# __init__.py - plugin function module # # Copyright (C) 1998, 1999 Albert Hopkins (marduk) # Copyright (C) 2002 Mike W. Meyer # Copyright (C) 2005, 2006, 2007, 2009 Arthur de Jong # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # The files produced as output from the software do not automatically fall # under the copyright of the software, unless explicitly stated otherwise. """This package groups all the plugins. When generating the report each plugin is called in turn with the generate() function. Each plugin should export the following fields: generate(site) Based on the site generate all the output files as needed. __title__ A short description of the plugin that is used when linking to the output from the plugin. __author__ The author(s) of the plugin. __outputfile__ The file the plugin generates (for linking to). docstring The docstring is used as description of the plugin in the report. Pluings can use the functions exported by this module.""" import sys import debugio import config import time import parsers.html # reference function from html module htmlescape = parsers.html.htmlescape def get_title(link): """Returns the title of a link if it is set otherwise returns url.""" if link.title is None or link.title == '': return link.url return link.title def _floatformat(f): """Return a float as a string while trying to keep it within three characters.""" txt = '%.1f' % f # remove period from too long strings if len(txt) > 3: txt = txt[:txt.find('.')] return txt def get_size(i): """Return the size in bytes as a readble string.""" K = 1024 M = K*1024 G = M*1024 if i > 1024*1024*999: return _floatformat(float(i)/float(G))+'G' elif i > 1024*999: return _floatformat(float(i)/float(M))+'M' elif i >= 1024: return _floatformat(float(i)/float(K))+'K' else: return '%d' % i def _mk_unicode(txt): """Returns a unicode instance of the string.""" if not isinstance(txt, unicode): txt = unicode(txt, errors='replace') return txt def get_info(link): """Return a string with a summary of the information in the link.""" info = u'url: %s\n' % _mk_unicode(link.url) if link.status: info += '%s\n' % _mk_unicode(link.status) if link.title: info += 'title: %s\n' % link.title.strip() if link.author: info += 'author: %s\n' % link.author.strip() if link.isinternal: info += 'internal link' else: info += 'external link' if link.isyanked: if isinstance(link.isyanked, unicode): info += ', not checked (%s)\n' % link.isyanked if isinstance(link.isyanked, str): info += ', not checked (%s)\n' % _mk_unicode(link.isyanked) else: info += ', not checked\n' else: info += '\n' if link.redirectdepth > 0: if len(link.children) > 0: info += 'redirect: %s\n' % _mk_unicode(link.children.copy().pop().url) else: info += 'redirect (not followed)\n' if len(link.parents) == 1: info += 'linked from 1 page\n' elif len(link.parents) > 1: info += 'linked from %d pages\n' % len(link.parents) if link.mtime: info += 'last modified: %s\n' % time.ctime(link.mtime) if link.size: info += 'size: %s\n' % get_size(link.size) if link.mimetype: info += 'mime-type: %s\n' % _mk_unicode(link.mimetype) if link.encoding: info += 'encoding: %s\n' % _mk_unicode(link.encoding) for problem in link.linkproblems: info += 'problem: %s\n' % _mk_unicode(problem) # trim trailing newline return info.strip() def make_link(link, title=None): """Return an nchor to a url with title. If url is in the Linklist and is external, insert "class=external" in the tag.""" # try to fetch the link object for this url if link.isinternal: cssclass = 'internal' else: cssclass = 'external' if title is None: title = get_title(link) target = '' if config.REPORT_LINKS_IN_NEW_WINDOW: target = 'target="_blank" ' # gather some information about the link to report return ''+htmlescape(title)+'' def print_parents(fp, link, indent=' '): """Write a list of parents to the output file descriptor. The output is indeted with the specified indent.""" parents = list(link.parents) # if there are no parents print nothing if len(parents) == 0: return parents.sort(lambda a, b: cmp(a.title, b.title) or cmp(a.url, b.url)) fp.write( indent+'