#!/usr/bin/env python # webcheck.py - main module of webcheck doing command-line checking # # Copyright (C) 1998, 1999 Albert Hopkins (marduk) # Copyright (C) 2002 Mike Meyer # Copyright (C) 2005 Arthur de Jong # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA import sys import time import os start_time = time.time() # importing the config.py file is a real problem if the user did not install # the files EXACTLY the way I said to... or even using the frozen version is # becoming a real bitch. I will just have to tell them right out how to fix it. try: sys.path = ['.'] + sys.path import config except ImportError: sys.stdout.write('Please verify that PYTHONPATH knows where to find "config.py"\n') sys.exit(1) import myUrlLib import debugio debugio.loglevel=debugio.INFO import version import plugins def print_version(): """print version information""" print \ "webcheck "+version.webcheck+"\n" \ "Written by Albert Hopkins (marduk), Mike Meyer and Arthur de Jong.\n" \ "\n" \ "Copyright (C) 1998, 1999, 2002, 2005 Albert Hopkins (marduk), Mike Meyer\n" \ "and Arthur de Jong.\n" \ "This is free software; see the source for copying conditions. There is NO\n" \ "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." def print_usage(): """print short usage information""" print >>sys.stderr, \ "Usage: webcheck [OPTION]... URL" def print_tryhelp(): """print friendly pointer to more information""" print >>sys.stderr, \ "Try `webcheck --help' for more information." def print_help(): """print option list""" print \ "Usage: webcheck [OPTION]... URL\n" \ "Generate a report for the given URL\n" \ "\n" \ " -x PATTERN mark URLs matching PATTERN as external\n" \ " -y PATTERN do not check URLs matching PATTERN\n" \ " -l URL use URL as logo for the report\n" \ " -b base URLs only: consider any URL not starting with the base\n" \ " URL to be external\n" \ " -a do not check external URLs\n" \ " -q, --quiet, --silent\n" \ " do not print out progress as webcheck traverses a site\n" \ " -d, --debug set loglevel to LEVEL, for programmer-level debugging\n" \ " -o DIRECTORY the directory in which webcheck will generate the reports\n" \ " -f, --force overwrite files without asking\n" \ " -r N the amount of redirects webcheck should follow when following\n" \ " a link, 0 implies follow all redirects.\n" \ " -w, --wait=SECONDS\n" \ " wait SECONDS between retrievals\n" \ " -V, --version output version information and exit\n" \ " -h, --help display this help and exit" def parse_args(): """parse command-line arguments""" import getopt try: optlist, args = getopt.gnu_getopt(sys.argv[1:], "x:y:l:baqdo:fr:w:Vh", ["quiet","silent","debug","force","wait=","version","help"]) except getopt.error, reason: print >>sys.stderr,"webcheck: %s" % reason; print_tryhelp() sys.exit(1) for flag,arg in optlist: if flag=='-x': config.EXCLUDED_URLS.append(arg) elif flag=='-y': config.YANKED_URLS.append(arg) elif flag=='-l': config.LOGO_HREF=arg elif flag=='-b': config.BASE_URLS_ONLY=1 elif flag=='-a': config.AVOID_EXTERNAL_LINKS=1 elif flag in ("-q","--quiet","--silent"): debugio.loglevel=debugio.ERROR elif flag=='-o': config.OUTPUT_DIR=arg elif flag in ("-f","--force"): config.OVERWRITE_FILES=1 elif flag=='-r': config.REDIRECT_DEPTH=int(arg) elif flag in ("-w","--wait"): config.WAIT_BETWEEN_REQUESTS=int(arg) elif flag in ("-V","--version"): print_version() sys.exit(0) elif flag in ("-h","--help"): print_help() sys.exit(0) elif flag in("-d","--debug"): debugio.loglevel=debugio.DEBUG if len(args)==0: print_usage() print_tryhelp() sys.exit(1) else: global URL URL = args[0] config.HOSTS = config.HOSTS + args[1:] def warn(): """Warn the user that something has gone wrong.""" print "*******************************************" print "* *" print "* Warning, webcheck has found nothing to *" print "* report for this site. If you feel this *" print "* is in error, please contact *" print "* %s. *" % version.author print "* and specify the environment that caused *" print "* this to occur. *" print "* *" print "* webcheck %s *" % version.webcheck print "* *" print "*******************************************" def find_file(fname): """Search the python path for the file name and return full path of the file.""" for dname in sys.path: res = os.path.join(dname,fname) if os.path.isfile(res): return res return None def install_file(fname,text=False): """Install the given file in the output directory.""" import shutil # TODO: extend so that if # - filename has no slashes in it: search python path # - filename starts with a known scheme: use that # - filename starts with slash: treat is as a file://///// url # TODO: make it possible to reference the original location instead of copying the file # FIXME: check that source and target are different before opening file for writing source = find_file(fname) target = os.path.basename(fname) # open the input file, TODO: use the scheme stuff for doing this mode='r' if text: mode+='U' sfp=open(source,mode) # create file in output directory (with overwrite question) tfp=plugins.open_file(target); # copy contents shutil.copyfileobj(sfp,tfp) # close files tfp.close() sfp.close() def main(): # parse command-line arguments parse_args() # indicate that we are starting debugio.info('checking site....') try: site = myUrlLib.Link(URL,None) # this will take a while except KeyboardInterrupt: sys.stderr.write("Interrupted\n") sys.exit(1) debugio.info('done.') if not hasattr(site,"url"): warn() sys.exit(1) # now we can write out the files # start with the frame-description page debugio.info('generating reports...') # for every plugin, generate a page plugins.generate(site, config.PLUGINS) # put extra files in the output directory install_file('webcheck.css',True) debugio.info('done.') if __name__ == '__main__': main()