1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
|
# __init__.py - plugin function module
#
# Copyright (C) 1998, 1999 Albert Hopkins (marduk)
# Copyright (C) 2002 Mike W. Meyer
# Copyright (C) 2005 Arthur de Jong
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
import sys
import urllib
import string
import debugio
import config
import xml.sax.saxutils
import time
problem_db = {}
def get_title(link):
"""Returns the title of a link if it is set otherwise returns url."""
if link.title is None or link.title == '':
return link.url
return link.title
def _floatformat(f):
"""Return a float as a string while trying to keep it within three
characters."""
r = '%.1f' % f
if len(r) > 3:
r = r[:r.find('.')]
return r
def get_size(i):
"""Return the size in bytes as a readble string."""
K = 1024
M = K*1024
G = M*1024
if i > 1024*1024*999:
return _floatformat(float(i)/float(G))+'G'
elif i > 1024*999:
return _floatformat(float(i)/float(M))+'M'
elif i >= 1024:
return _floatformat(float(i)/float(K))+'K'
else:
return '%d' % i
def get_info(link):
"""Return a string with a summary of the information in the link."""
info = 'url: %s\n' % link.url
if link.title:
info += 'title: %s\n' % link.title.strip()
if link.author:
info += 'author: %s\n' % link.author.strip()
if link.isinternal:
info += 'internal link'
else:
info += 'external link'
if link.isyanked:
info += ', not checked\n'
else:
info += '\n'
if link.redirectdepth > 0:
if len(link.children) > 0:
info += 'redirect: %s\n' % link.children[0].url
else:
info += 'redirect (not followed)\n'
if len(link.parents) == 1:
info += 'linked from 1 page\n'
elif len(link.parents) > 1:
info += 'linked from %d pages\n' % len(link.parents)
if link.mtime:
info += 'last modified: %s\n' % time.ctime(link.mtime)
if link.size:
info += 'size: %s\n' % get_size(link.size)
if link.mimetype:
info += 'mime-type: %s\n' % link.mimetype
if link.status:
info += 'status: %s\n' % link.status
# trim trailing newline
return info.strip()
def make_link(link,title=None):
"""Return an <a>nchor to a url with title. If url is in the Linklist and
is external, insert "class=external" in the <a> tag."""
# try to fetch the link object for this url
if link.isinternal:
cssclass='internal'
else:
cssclass='external'
if title is None:
title=get_title(link)
# gather some information about the link to report
info = xml.sax.saxutils.quoteattr(get_info(link),{'\n':' '})
return '<a href="'+link.url+'" class="'+cssclass+'" title='+info+'>'+xml.sax.saxutils.escape(title)+'</a>'
def print_parents(fp,link,indent=' '):
# present a list of parents
parents = link.parents
# if there are no parents print nothing
if len(parents) == 0:
return
parents.sort(lambda a, b: cmp(a.title, b.title))
fp.write(
indent+'<div class="parents">\n'+ \
indent+' referenced from:\n'+ \
indent+' <ul>\n' )
for parent in parents:
fp.write(
indent+' <li>%(parent)s</li>\n'
% { 'parent': make_link(parent) })
fp.write(
indent+' </ul>\n'+ \
indent+'</div>\n' )
def add_problem(type,link):
""" Add a problem link to the 'problems' database. Will not add external links """
if not link.isinternal:
return
global problem_db
author = link.author
if problem_db.has_key(author):
problem_db[author].append((type,link))
else:
problem_db[author]=[(type,link)]
def open_file(filename):
""" given config.OUTPUT_DIR checks if the directory already exists; if
not, it creates it, and then opens filename for writing and returns the
file object """
import os
if os.path.isdir(config.OUTPUT_DIR) == 0:
os.mkdir(config.OUTPUT_DIR)
fname = os.path.join(config.OUTPUT_DIR,filename)
if os.path.exists(fname) and not config.OVERWRITE_FILES:
# mv: overwrite `/tmp/b'?
# cp: overwrite `/tmp/b'?
# zip: replace aap.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename:
ow = raw_input('webcheck: overwrite %s? [y]es, [a]ll, [q]uit: ' % fname)
ow = ow.lower() + " "
if ow[0] == 'a':
config.OVERWRITE_FILES = True
elif ow[0] != 'y':
print 'Aborted.'
sys.exit(0)
return open(fname,'w')
def generate(site,plugins):
"""Generate pages for plugins."""
# generate navigation part
navbar=' <ul class="navbar">\n'
for plugin in plugins:
# if this is the first plugin use index.html as filename
filename = plugin + '.html'
if plugin == plugins[0]:
filename = 'index.html'
# import the plugin
report = __import__('plugins.' + plugin, globals(), locals(), [plugin])
# generate a link to the plugin page
navbar += ' <li><a href="%(pluginfile)s" title="%(description)s">%(title)s</a></li>\n' \
% { 'pluginfile': filename,
'title': xml.sax.saxutils.escape(report.__title__),
'description': xml.sax.saxutils.escape(report.__doc__) }
navbar+=' </ul>\n'
for plugin in plugins:
debugio.info(' ' + plugin)
# if this is the first plugin use index.html as filename
filename = plugin + '.html'
if plugin == plugins[0]:
filename = 'index.html'
report = __import__('plugins.' + plugin, globals(), locals(), [plugin])
fp = open_file(filename)
# write basic html head
# TODO: make it possible to use multiple stylesheets (possibly reference external stylesheets)
fp.write( \
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n' \
'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n' \
'<html xmlns="http://www.w3.org/1999/xhtml">\n' \
' <head>\n' \
' <title>Webcheck report for %(sitetitle)s</title>\n' \
' <link rel="stylesheet" type="text/css" href="webcheck.css" />\n' \
' <meta name="Generator" content="webcheck %(version)s" />\n' \
' </head>\n' \
' <body>\n' \
' <h1 class="basename">Webcheck report for <a href="%(siteurl)s">%(sitetitle)s</a></h1>\n' \
% { 'sitetitle': xml.sax.saxutils.escape(get_title(site.linkMap[site.base])),
'siteurl': site.base,
'version': config.VERSION })
# write navigation bar
fp.write(navbar)
# write plugin heading
fp.write(' <h2>%s</h2>\n' % xml.sax.saxutils.escape(report.__title__))
if hasattr(report,"__description__"):
fp.write(' <p class="description">\n %s\n </p>\n' % xml.sax.saxutils.escape(report.__description__))
# write plugin contents
fp.write(' <div class="content">\n')
report.generate(fp,site)
fp.write(' </div>\n')
# write bottom of page
fp.write( \
' <p class="footer">\n' \
' Generated %(time)s by <a href="%(homepage)s">webcheck %(version)s</a>\n' \
' </p>\n' \
' </body>\n' \
'</html>\n' \
% { 'time': xml.sax.saxutils.escape(time.ctime(time.time())),
'homepage': config.HOMEPAGE,
'version': xml.sax.saxutils.escape(config.VERSION) })
fp.close()
|