# __init__.py - plugin function module # # Copyright (C) 1998, 1999 Albert Hopkins (marduk) # Copyright (C) 2002 Mike W. Meyer # Copyright (C) 2005, 2006 Arthur de Jong # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # The files produced as output from the software do not automatically fall # under the copyright of the software, unless explicitly stated otherwise. import sys import urllib import string import debugio import config import time def escape(txt, inattr=False): """HTML escape the given string and return an ASCII clean string with known entities and character entities for the other values.""" import htmlentitydefs # the output string out = '' # convert to unicode object if type(txt) is str: txt = unicode(txt, errors='replace') # loop over the characters of the string for c in txt: if c == '"': if inattr: out += '&%s;' % htmlentitydefs.codepoint2name[ord(c)] else: out += '"' elif htmlentitydefs.codepoint2name.has_key(ord(c)): out += '&%s;' % htmlentitydefs.codepoint2name[ord(c)] elif ord(c) > 126: out += '&#%d;'% ord(c) elif inattr and c == u'\n': out += ' ' else: out += c.encode('utf-8') return out def get_title(link): """Returns the title of a link if it is set otherwise returns url.""" if link.title is None or link.title == '': return link.url return link.title def _floatformat(f): """Return a float as a string while trying to keep it within three characters.""" r = '%.1f' % f if len(r) > 3: r = r[:r.find('.')] return r def get_size(i): """Return the size in bytes as a readble string.""" K = 1024 M = K*1024 G = M*1024 if i > 1024*1024*999: return _floatformat(float(i)/float(G))+'G' elif i > 1024*999: return _floatformat(float(i)/float(M))+'M' elif i >= 1024: return _floatformat(float(i)/float(K))+'K' else: return '%d' % i def get_info(link): """Return a string with a summary of the information in the link.""" info = u'url: %s\n' % unicode(link.url, errors='replace') if link.status: info += '%s\n' % unicode(link.status, errors='replace') if link.title: info += 'title: %s\n' % link.title.strip() if link.author: info += 'author: %s\n' % link.author.strip() if link.isinternal: info += 'internal link' else: info += 'external link' if link.isyanked: info += ', not checked\n' else: info += '\n' if link.redirectdepth > 0: if len(link.children) > 0: info += 'redirect: %s\n' % unicode(link.children[0].url, errors='replace') else: info += 'redirect (not followed)\n' if len(link.parents) == 1: info += 'linked from 1 page\n' elif len(link.parents) > 1: info += 'linked from %d pages\n' % len(link.parents) if link.mtime: info += 'last modified: %s\n' % time.ctime(link.mtime) if link.size: info += 'size: %s\n' % get_size(link.size) if link.mimetype: info += 'mime-type: %s\n' % unicode(link.mimetype, errors='replace') if link.encoding: info += 'encoding: %s\n' % unicode(link.encoding, errors='replace') for problem in link.linkproblems: info += 'problem: %s\n' % unicode(problem, errors='replace') # trim trailing newline return info.strip() def make_link(link,title=None): """Return an nchor to a url with title. If url is in the Linklist and is external, insert "class=external" in the tag.""" # try to fetch the link object for this url if link.isinternal: cssclass='internal' else: cssclass='external' if title is None: title=get_title(link) target='' if config.REPORT_LINKS_IN_NEW_WINDOW: target='target="_blank" ' # gather some information about the link to report return ''+escape(title)+'' def print_parents(fp,link,indent=' '): # present a list of parents parents = link.parents # if there are no parents print nothing if len(parents) == 0: return parents.sort(lambda a, b: cmp(a.title, b.title)) fp.write( indent+'
\n'+ indent+' referenced from:\n'+ indent+' \n'+ indent+'
\n' ) def open_file(filename, istext=True): """This returns an open file object which can be used for writing. This file is created in the output directory. The output directory (stored in config.OUTPUT_DIR is created if it does not yet exist. If the second parameter is True (default) the file is opened as an UTF-8 text file.""" import os # check if output directory exists and create it if needed if not os.path.isdir(config.OUTPUT_DIR): try: os.mkdir(config.OUTPUT_DIR) except OSError, (errno, strerror): debugio.error('error creating directory %(dir)s: %(strerror)s' % { 'dir': config.OUTPUT_DIR, 'strerror': strerror }) sys.exit(1) # build the output file name fname = os.path.join(config.OUTPUT_DIR,filename) # check if file exists and ask to overwrite if os.path.exists(fname) and not config.OVERWRITE_FILES: ow = raw_input('webcheck: overwrite %s? [y]es, [a]ll, [q]uit: ' % fname) ow = ow.lower() + " " if ow[0] == 'a': config.OVERWRITE_FILES = True elif ow[0] != 'y': print 'Aborted.' sys.exit(1) # open the file for writing try: if istext: return open(fname, 'w') else: return open(fname, 'wb') except IOError, (errno, strerror): debugio.error('error creating output file %(fname)s: %(strerror)s' % { 'fname': fname, 'strerror': strerror }) sys.exit(1) def print_navbar(fp, plugins, current): """Return a html fragement representing the navigation bar for a page.""" fp.write(' \n') def generate(site, plugins): """Generate pages for plugins.""" for p in plugins: debugio.info(' ' + p) # if this is the first plugin, use index.html as filename filename = p + '.html' if p == plugins[0]: filename = 'index.html' report = __import__('plugins.' + p, globals(), locals(), [p]) fp = open_file(filename) # write basic html head # TODO: make it possible to use multiple stylesheets (possibly reference external stylesheets) fp.write( '\n' '\n' '\n' ' \n' ' \n' ' Webcheck report for %(sitetitle)s\n' ' \n' ' \n' ' \n' ' \n' ' \n' '

Webcheck report for %(sitetitle)s

\n' % { 'sitetitle': escape(get_title(site.linkMap[site.base])), 'siteurl': site.base, 'version': config.VERSION }) # write navigation bar print_navbar(fp, plugins, p) # write plugin heading fp.write('

%s

\n' % escape(report.__title__)) # write plugin contents fp.write('
\n') report.generate(fp,site) fp.write('
\n') # write bottom of page fp.write( ' \n' ' \n' '\n' % { 'time': escape(time.ctime(time.time())), 'homepage': config.HOMEPAGE, 'version': escape(config.VERSION) }) fp.close()