# __init__.py - plugin function module # # Copyright (C) 1998, 1999 Albert Hopkins (marduk) # Copyright (C) 2002 Mike W. Meyer # Copyright (C) 2005 Arthur de Jong # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # The files produced as output from the software do not automatically fall # under the copyright of the software, unless explicitly stated otherwise. import sys import urllib import string import debugio import config import time def escape(txt, inattr=False): """HTML escape the given string and return an ASCII clean string with known entities and character entities for the other values.""" import htmlentitydefs # the output string out = '' # convert to unicode object if isinstance(txt, str): txt = unicode(txt, 'utf-8') # loop over the characters of the string for c in txt: if c == '"': if inattr: out += '&%s;' % htmlentitydefs.codepoint2name[ord(c)] else: out += '"' elif htmlentitydefs.codepoint2name.has_key(ord(c)): out += '&%s;' % htmlentitydefs.codepoint2name[ord(c)] elif ord(c) > 126: out += '%d;'% ord(c) elif inattr and c == u'\n': out += ' ' else: out += c.encode('utf-8') return out def get_title(link): """Returns the title of a link if it is set otherwise returns url.""" if link.title is None or link.title == '': return link.url return link.title def _floatformat(f): """Return a float as a string while trying to keep it within three characters.""" r = '%.1f' % f if len(r) > 3: r = r[:r.find('.')] return r def get_size(i): """Return the size in bytes as a readble string.""" K = 1024 M = K*1024 G = M*1024 if i > 1024*1024*999: return _floatformat(float(i)/float(G))+'G' elif i > 1024*999: return _floatformat(float(i)/float(M))+'M' elif i >= 1024: return _floatformat(float(i)/float(K))+'K' else: return '%d' % i def get_info(link): """Return a string with a summary of the information in the link.""" info = u'url: %s\n' % link.url if link.status: info += '%s\n' % link.status if link.title: info += 'title: %s\n' % link.title.strip() if link.author: info += 'author: %s\n' % link.author.strip() if link.isinternal: info += 'internal link' else: info += 'external link' if link.isyanked: info += ', not checked\n' else: info += '\n' if link.redirectdepth > 0: if len(link.children) > 0: info += 'redirect: %s\n' % link.children[0].url else: info += 'redirect (not followed)\n' if len(link.parents) == 1: info += 'linked from 1 page\n' elif len(link.parents) > 1: info += 'linked from %d pages\n' % len(link.parents) if link.mtime: info += 'last modified: %s\n' % time.ctime(link.mtime) if link.size: info += 'size: %s\n' % get_size(link.size) if link.mimetype: info += 'mime-type: %s\n' % link.mimetype if link.encoding: info += 'encoding: %s\n' % link.encoding for problem in link.linkproblems: info += 'problem: %s\n' % problem # trim trailing newline return info.strip() def make_link(link,title=None): """Return an nchor to a url with title. If url is in the Linklist and is external, insert "class=external" in the tag.""" # try to fetch the link object for this url if link.isinternal: cssclass='internal' else: cssclass='external' if title is None: title=get_title(link) target='' if config.REPORT_LINKS_IN_NEW_WINDOW: target='target="_blank" ' # gather some information about the link to report return ''+escape(title)+'' def print_parents(fp,link,indent=' '): # present a list of parents parents = link.parents # if there are no parents print nothing if len(parents) == 0: return parents.sort(lambda a, b: cmp(a.title, b.title)) fp.write( indent+'