From 69d183363cab127659591fa49a335ab91205f0e9 Mon Sep 17 00:00:00 2001 From: Arthur de Jong Date: Thu, 7 Apr 2005 20:05:37 +0000 Subject: tabs to spaces (tabs are evil) git-svn-id: http://arthurdejong.org/svn/webcheck/webcheck@12 86f53f14-5ff3-0310-afe5-9b438ce3f40c --- config.py | 22 ++++++------ contrib/plugins/about.py | 12 +++---- httpcodes.py | 72 +++++++++++++++++++------------------- plugins/badlinks.py | 36 +++++++++---------- plugins/external.py | 6 ++-- plugins/notchkd.py | 16 ++++----- plugins/notitles.py | 12 +++---- plugins/problems.py | 24 ++++++------- plugins/rptlib.py | 78 ++++++++++++++++++++--------------------- plugins/sitemap.py | 12 +++---- plugins/slow.py | 30 ++++++++-------- plugins/whatsnew.py | 14 ++++---- plugins/whatsold.py | 16 ++++----- schemes/filelink.py | 10 +++--- schemes/ftplink.py | 24 ++++++------- schemes/httplink.py | 90 ++++++++++++++++++++++++------------------------ 16 files changed, 237 insertions(+), 237 deletions(-) diff --git a/config.py b/config.py index c7c32de..a5c956c 100644 --- a/config.py +++ b/config.py @@ -21,7 +21,7 @@ BASE_URLS_ONLY=0 # be passed to webcheck via the -x command line switch. Note this should be a # VALID REGULAR EXPRESSION. See also YANKED_URLS below. EXCLUDED_URLS = [r'.*\.gif',r'.*\.tar\.gz',r'.*\.jpeg',r'.*\.jpg', - r'http://www.mired.org/cgi-bin/', r'http://www.mired.org/ATCPFAQ/'] + r'http://www.mired.org/cgi-bin/', r'http://www.mired.org/ATCPFAQ/'] # This is like EXCLUDED_URLS, but YANKED_URLS are not checked at all. Also # you can use the -y command line switch. @@ -29,7 +29,7 @@ EXCLUDED_URLS = [r'.*\.gif',r'.*\.tar\.gz',r'.*\.jpeg',r'.*\.jpg', # raw Python strings (beginning quote preceded with an "r"). Regular expressions # are case insensitive. YANKED_URLS = [r'http://www.amazon.com/exec/obidos/', - r'http://www.mired.org/home/mwm/&me;.txt'] + r'http://www.mired.org/home/mwm/&me;.txt'] # Normally webcheck will check links to "external" sites at the top level to # ensure that your pages don't refer to broken links that are not at your @@ -111,15 +111,15 @@ DEBUG_LEVEL = 1 # Note: Do not include the 'problems' report as it will appear (last) on all # reports automatically PLUGINS = ['sitemap', - 'badlinks', - 'images', - 'whatsold', - 'whatsnew', - 'slow', - 'notitles', - 'external', - 'notchkd'] - + 'badlinks', + 'images', + 'whatsold', + 'whatsnew', + 'slow', + 'notitles', + 'external', + 'notchkd'] + # This is a URL (absolute or relative) of a level 1 Cascading Stylesheet to be # used in all reports. See the default webcheck.css as well as the HTML source # for ideas on making your own .css for Webcheck. diff --git a/contrib/plugins/about.py b/contrib/plugins/about.py index 470d02e..519192e 100644 --- a/contrib/plugins/about.py +++ b/contrib/plugins/about.py @@ -37,11 +37,11 @@ def generate(): print '' print '' for plugin in config.PLUGINS + ['problems']: - report = __import__('plugins.%s' % plugin,globals(),locals(),[plugin]) - author = report.__author__ - version = report.__version__ - print '' % plugin, - print '' % version, - print '' % author + report = __import__('plugins.%s' % plugin,globals(),locals(),[plugin]) + author = report.__author__ + version = report.__version__ + print '' % plugin, + print '' % version, + print '' % author print '

Plugin	Version	Author
%s	%s	%s
%s	%s	%s

' print '' diff --git a/httpcodes.py b/httpcodes.py index 6060c67..e21e394 100644 --- a/httpcodes.py +++ b/httpcodes.py @@ -20,39 +20,39 @@ __version__='0.10' __author__ = 'Mike Meyer ' HTTP_STATUS_CODES= {'100':"Continue", - '101':"Switching Protocols", - '200':"OK", - '201':"Created", - '202':"Accepted", - '204':"No Content", - '205':"Reset Content", - '206':"Partial Content", - '300':"Multiple Choices", - '301':"Moved Permanently", - '302':"Moved Temporarily", - '303':"See Other", - '304':"Not Modified", - '305':"Use Proxy", - '400':"Bad Request", - '401':"Unauthorized", - '402':"Payment Required", - '403':"Forbidden", - '404':"Not Found", - '405':"Method Not Allowed", - '406':"Not Acceptable", - '407':"Proxy Authentication Required", - '408':"Request Time-out", - '409':"Conflict", - '410':"Gone", - '411':"Length Required", - '412':"Precondition Failed", - '413':"Request Entity Too Large", - '414':"Request-URI Too Large", - '415':"Unsupported Media Type", - '500':"Internal Server Error", - '501':"Not Implemented", - '502':"Bad Gateway", - '503':"Service Unavailable", - '504':"Gateway Time-out", - '505':"HTTP Version not supported" - } + '101':"Switching Protocols", + '200':"OK", + '201':"Created", + '202':"Accepted", + '204':"No Content", + '205':"Reset Content", + '206':"Partial Content", + '300':"Multiple Choices", + '301':"Moved Permanently", + '302':"Moved Temporarily", + '303':"See Other", + '304':"Not Modified", + '305':"Use Proxy", + '400':"Bad Request", + '401':"Unauthorized", + '402':"Payment Required", + '403':"Forbidden", + '404':"Not Found", + '405':"Method Not Allowed", + '406':"Not Acceptable", + '407':"Proxy Authentication Required", + '408':"Request Time-out", + '409':"Conflict", + '410':"Gone", + '411':"Length Required", + '412':"Precondition Failed", + '413':"Request Entity Too Large", + '414':"Request-URI Too Large", + '415':"Unsupported Media Type", + '500':"Internal Server Error", + '501':"Not Implemented", + '502':"Bad Gateway", + '503':"Service Unavailable", + '504':"Gateway Time-out", + '505':"HTTP Version not supported" + } diff --git a/plugins/badlinks.py b/plugins/badlinks.py index ef1f229..d55c011 100644 --- a/plugins/badlinks.py +++ b/plugins/badlinks.py @@ -34,23 +34,23 @@ def generate(): print '

' print '' for link in Link.badLinks: - print '\t' - if config.ANCHOR_BAD_LINKS: - print '\t', - print '' - else: - print '\t', - print '' % link - status = str(linkList[link].status) - if status in HTTP_STATUS_CODES.keys(): - status = status + ": " + HTTP_STATUS_CODES[status] - print '\t' % status - print '\t' % len(linkList[link].parents) - parents = linkList[link].parents - parents.sort(sort_by_author) - for parent in parents: - print '\t\t' % make_link(parent,get_title(parent)), - print '\n\t' % (str(linkList[parent].author)) - add_problem("Bad Link: " + link,linkList[parent]) + print '\t' + if config.ANCHOR_BAD_LINKS: + print '\t', + print '' + else: + print '\t', + print '' % link + status = str(linkList[link].status) + if status in HTTP_STATUS_CODES.keys(): + status = status + ": " + HTTP_STATUS_CODES[status] + print '\t' % status + print '\t' % len(linkList[link].parents) + parents = linkList[link].parents + parents.sort(sort_by_author) + for parent in parents: + print '\t\t' % make_link(parent,get_title(parent)), + print '\n\t' % (str(linkList[parent].author)) + add_problem("Bad Link: " + link,linkList[parent]) print '


Link	' +make_link(link,link) +'
Link	%s
Status	%s
Parents	%s	%s

Link	' +make_link(link,link) +'
Link	%s
Status	%s
Parents	%s	%s

' print '

' diff --git a/plugins/external.py b/plugins/external.py index 44e11f3..26d1145 100644 --- a/plugins/external.py +++ b/plugins/external.py @@ -34,7 +34,7 @@ title = 'External Links' def generate(): print '

%s' % make_link(url,get_title(url)) + link=linkList[url] + if link.external: + print '\t
%s' % make_link(url,get_title(url)) print '

' diff --git a/plugins/notchkd.py b/plugins/notchkd.py index d9c08d0..835cea7 100644 --- a/plugins/notchkd.py +++ b/plugins/notchkd.py @@ -34,13 +34,13 @@ def generate(): print '

' print '' for url in Link.notChecked: - print '\t' % make_link(url,url) - print '\t' % len(linkList[url].parents) - for parent in linkList[url].parents: - print '\t\t', - if parent != linkList[url].parents[0]: print '', - print '' % make_link(parent,get_title(parent)), - print '' % (linkList[parent].author) - print '\n\t\n' + print '\t' % make_link(url,url) + print '\t' % len(linkList[url].parents) + for parent in linkList[url].parents: + print '\t\t', + if parent != linkList[url].parents[0]: print '', + print '' % make_link(parent,get_title(parent)), + print '' % (linkList[parent].author) + print '\n\t\n' print '

%s
Parent
%s		%s

%s
Parent
%s		%s

' print '

' diff --git a/plugins/notitles.py b/plugins/notitles.py index aba829a..43cf04c 100644 --- a/plugins/notitles.py +++ b/plugins/notitles.py @@ -37,11 +37,11 @@ def generate(): urls = linkList.keys() urls.sort(sort_by_author) for url in urls: - link = linkList[url] - if link.external: continue - if link.html and (link.title is None): - print '\t%s%s' \ - % (make_link(url,url), link.author) - add_problem("No Title",link) + link = linkList[url] + if link.external: continue + if link.html and (link.title is None): + print '\t%s%s' \ + % (make_link(url,url), link.author) + add_problem("No Title",link) print '' print '' diff --git a/plugins/problems.py b/plugins/problems.py index 2a42f99..2c330a5 100644 --- a/plugins/problems.py +++ b/plugins/problems.py @@ -34,20 +34,20 @@ def generate(): authors=problem_db.keys() authors.sort() if len(authors) > 1: - print '

' - for author in authors[:-1]: - print '%s' % (author, author), - print " | " - print '%s' % (authors[-1], authors[-1]), - print '

' + print '

' + for author in authors[:-1]: + print '%s' % (author, author), + print " | " + print '%s' % (authors[-1], authors[-1]), + print '

' print '

' print '' for author in authors: - print '' % (author,author) - for type,link in problem_db[author]: - url=`link` - title=get_title(url) - print '' % (make_link(url,title), type) - print '\n' + print '' % (author,author) + for type,link in problem_db[author]: + url=`link` + title=get_title(url) + print '' % (make_link(url,title), type) + print '\n' print '

%s
%s %s

%s
%s %s

' print '

' diff --git a/plugins/rptlib.py b/plugins/rptlib.py index 4a0c140..e76ef28 100644 --- a/plugins/rptlib.py +++ b/plugins/rptlib.py @@ -52,7 +52,7 @@ def get_title(url): note that this implies linkList[url] """ link=linkList[url] if link.title is None: - return url + return url return link.title def make_link(url,text): @@ -61,13 +61,13 @@ def make_link(url,text): url = str(url) # because sometimes I lazily pass a Link object. mystring = '' + text + '' return mystring @@ -77,41 +77,41 @@ def add_problem(type,link): global problem_db author = link.author if problem_db.has_key(author): - problem_db[author].append((type,link)) + problem_db[author].append((type,link)) else: - problem_db[author]=[(type,link)] + problem_db[author]=[(type,link)] def sort_by_age(a,b): """ sort helper for url's age. a and b are urls in linkList """ aage, bage = linkList[a].age, linkList[b].age if aage < bage: - return -1 + return -1 if aage == bage: - return sort_by_author(a,b) + return sort_by_author(a,b) return 1 def sort_by_rev_age(a,b): aage, bage = linkList[a].age, linkList[b].age if aage > bage: - return -1 + return -1 if aage == bage: - return sort_by_author(a,b) + return sort_by_author(a,b) return 1 def sort_by_author(a,b): aauthor,bauthor = `linkList[a].author`, `linkList[b].author` if aauthor < bauthor: - return -1 + return -1 if aauthor == bauthor: - return 0 + return 0 return 1 def sort_by_size(a,b): asize, bsize = linkList[a].totalSize, linkList[b].totalSize if asize < bsize: - return 1 + return 1 if asize == bsize: - return 0 + return 0 return -1 def main_index(): @@ -128,9 +128,9 @@ def main_index(): print ' --> ' print '' print '' \ - % config.NAVBAR_WIDTH + % config.NAVBAR_WIDTH print '' \ - % config.NAVBAR_FILENAME + % config.NAVBAR_FILENAME print '' % (webcheck.plugins[0]+'.html') print '' print '' @@ -154,32 +154,32 @@ def nav_bar(plugins): print '' print '

' print '' \ - % (config.NAVBAR_PADDING, config.NAVBAR_SPACING) + % (config.NAVBAR_PADDING, config.NAVBAR_SPACING) # title print '' \ - % (version.home, version.webcheck) + % (version.home, version.webcheck) # labels pointing to each individual page for plugin in plugins + ['problems']: - debugio.write('\t' + plugin,file=stdout) - filename = plugin + '.html' - print '' - - # create the file we just pointed to - tmp = sys.stdout - fp = open_file(filename) - sys.stdout = fp - doTopMain(report) - report.generate() - report_version = report.__version__ - doBotMain() - fp.close() - sys.stdout = tmp + debugio.write('\t' + plugin,file=stdout) + filename = plugin + '.html' + print '' + + # create the file we just pointed to + tmp = sys.stdout + fp = open_file(filename) + sys.stdout = fp + doTopMain(report) + report.generate() + report_version = report.__version__ + doBotMain() + fp.close() + sys.stdout = tmp print print '

', print 'Webcheck %s
', - report = __import__('plugins.' + plugin, globals(), locals(), [plugin]) - print '%s' \ - % (filename, report.__doc__, report.title), - print '
', + report = __import__('plugins.' + plugin, globals(), locals(), [plugin]) + print '%s' \ + % (filename, report.__doc__, report.title), + print '

' @@ -224,7 +224,7 @@ def doTopMain(report): print 'href="%s">

' % (Link.base, config.LOGO_HREF) print '\n

' print '\t%s' \ - % (`Link.base`, get_title(`Link.base`)) + % (`Link.base`, get_title(`Link.base`)) print '

' print '\n\n' print '\t\n

%s

\n' % report.title @@ -235,6 +235,6 @@ def doBotMain(): print '

' print '' \ - % (webcheck.start_time,version.home, version.webcheck) + % (webcheck.start_time,version.home, version.webcheck) print '' print '' diff --git a/plugins/sitemap.py b/plugins/sitemap.py index 8338e4e..e92151f 100644 --- a/plugins/sitemap.py +++ b/plugins/sitemap.py @@ -50,9 +50,9 @@ def explore(link, explored): for i in link.children: # Skip pages that have already been traversed if explored.has_key( i ): continue - if (i in webcheck.Link.badLinks) and not webcheck.config.ANCHOR_BAD_LINKS: - L.append('

%s' % i) - else: + if (i in webcheck.Link.badLinks) and not webcheck.config.ANCHOR_BAD_LINKS: + L.append('

%s' % i) + else: to_explore.append(i) explored[ i ] = 1 # Mark the link as explored @@ -60,8 +60,8 @@ def explore(link, explored): # any pages that are marked as having already been traversed. for i in to_explore: child = webcheck.Link.linkList[i] - L.append('

%s' % (make_link(i,get_title(i)))) - L = L + explore(child, explored) + L.append('

%s' % (make_link(i,get_title(i)))) + L = L + explore(child, explored) L.append( '' ) level=level-1 @@ -73,7 +73,7 @@ def explore(link, explored): return L # site map -def generate(): +def generate(): print make_link(webcheck.Link.base,'Starting Page') L = explore(webcheck.Link.base, {}) for i in L: print i diff --git a/plugins/slow.py b/plugins/slow.py index ab18d2f..b8a2fce 100644 --- a/plugins/slow.py +++ b/plugins/slow.py @@ -42,20 +42,20 @@ def generate(): urls = linkList.keys() urls.sort(sort_by_size) for url in urls: - link = linkList[url] - if not link.html: continue - sizeK = link.totalSize / 1024 - sizek = link.totalSize * 8 / 1000 - if sizeK < config.REPORT_SLOW_URL_SIZE: - break - print '\t%s' % make_link(url, get_title(url)), - print '%s%s' \ - % (sizeK, time.strftime('%H:%M:%S',time.gmtime(int(sizek/28.8)))), - print '%s' \ - % time.strftime('%H:%M:%S',time.gmtime(int(sizek/56))), - print '%s' \ - % time.strftime('%H:%M:%S',time.gmtime(int(sizek/1500))), - print '' - add_problem('Slow Link: %sK' % sizeK, link) + link = linkList[url] + if not link.html: continue + sizeK = link.totalSize / 1024 + sizek = link.totalSize * 8 / 1000 + if sizeK < config.REPORT_SLOW_URL_SIZE: + break + print '\t%s' % make_link(url, get_title(url)), + print '%s%s' \ + % (sizeK, time.strftime('%H:%M:%S',time.gmtime(int(sizek/28.8)))), + print '%s' \ + % time.strftime('%H:%M:%S',time.gmtime(int(sizek/56))), + print '%s' \ + % time.strftime('%H:%M:%S',time.gmtime(int(sizek/1500))), + print '' + add_problem('Slow Link: %sK' % sizeK, link) print '' print '

' diff --git a/plugins/whatsnew.py b/plugins/whatsnew.py index 1c655af..1153866 100644 --- a/plugins/whatsnew.py +++ b/plugins/whatsnew.py @@ -38,12 +38,12 @@ def generate(): urls = linkList.keys() urls.sort(sort_by_age) for url in urls: - link=linkList[url] - if not link.html: continue - age = link.age - if (age is not None)and (age <= config.REPORT_WHATSNEW_URL_AGE): - print '\t%s' % make_link(url,get_title(url)), - print '%s' % link.author, - print '%s' % age + link=linkList[url] + if not link.html: continue + age = link.age + if (age is not None)and (age <= config.REPORT_WHATSNEW_URL_AGE): + print '\t%s' % make_link(url,get_title(url)), + print '%s' % link.author, + print '%s' % age print '' print '' diff --git a/plugins/whatsold.py b/plugins/whatsold.py index 51d1ad2..d5bf149 100644 --- a/plugins/whatsold.py +++ b/plugins/whatsold.py @@ -38,13 +38,13 @@ def generate(): urls = linkList.keys() urls.sort(sort_by_rev_age) for url in urls: - link=linkList[url] - if not link.html: continue - age = link.age - if age and (age >= config.REPORT_WHATSOLD_URL_AGE): - print '\t%s' % make_link(url,get_title(url)), - print '%s' % (link.author), - print '%s' % age - add_problem('Old Link: %s days old' % age ,link) + link=linkList[url] + if not link.html: continue + age = link.age + if age and (age >= config.REPORT_WHATSOLD_URL_AGE): + print '\t%s' % make_link(url,get_title(url)), + print '%s' % (link.author), + print '%s' % age + add_problem('Old Link: %s days old' % age ,link) print '' print '' diff --git a/schemes/filelink.py b/schemes/filelink.py index 0c0cb7c..5ecb2fe 100644 --- a/schemes/filelink.py +++ b/schemes/filelink.py @@ -32,12 +32,12 @@ def init(self, url, parent): parsed = urlparse.urlparse(self.URL,'file',0) filename = parsed[2] if os.name != 'posix': - filename = re.sub("^/\(//\)?\([a-zA-Z]\)[|:]","\\2:",filename) + filename = re.sub("^/\(//\)?\([a-zA-Z]\)[|:]","\\2:",filename) try: - stats = os.stat(filename) + stats = os.stat(filename) except os.error: - self.set_bad_link(self.URL, "No such file or directory") - return + self.set_bad_link(self.URL, "No such file or directory") + return self.size = stats[6] @@ -51,7 +51,7 @@ def get_document(url): parsed = urlparse.urlparse(url,'file',0) filename = parsed[2] if os.name != 'posix': - filename = re.sub("^/\(//\)?\([a-zA-Z]\)[|:]","\\2:",filename) + filename = re.sub("^/\(//\)?\([a-zA-Z]\)[|:]","\\2:",filename) return open(filename,'r').read() diff --git a/schemes/ftplink.py b/schemes/ftplink.py index 08fdeeb..6646def 100644 --- a/schemes/ftplink.py +++ b/schemes/ftplink.py @@ -49,7 +49,7 @@ def init(self, url, parent): if filename not in ftp.nlst(): raise ftplib.error_perm, "No such file or directory" except ftplib.all_errors, errtext: - self.set_bad_link(self.URL, str(errtext)) + self.set_bad_link(self.URL, str(errtext)) try: ftp.quit() except: @@ -118,27 +118,27 @@ def size(pathname,ftpobject): def cwd(dirs, ftpobject): for dir in dirs: - ftpobject.cwd(dir) + ftpobject.cwd(dir) def parseurl(url): parsed = urlparse.urlparse(url) host = parsed[1] if '@' in host: - userpass, host = string.split(host,'@') - if ':' in userpass: - user, passwd = string.split(userpass,':') - else: - user = userpass - passwd = None + userpass, host = string.split(host,'@') + if ':' in userpass: + user, passwd = string.split(userpass,':') + else: + user = userpass + passwd = None else: - user = 'anonymous' + user = 'anonymous' passwd = '' # Filled in by ftplib. if ':' in host: - host, port = string.split(host,':') - port = int(port) + host, port = string.split(host,':') + port = int(port) else: - port = ftplib.FTP_PORT + port = ftplib.FTP_PORT pathname = parsed[2] if not port: port = ftplib.FTP_PORT diff --git a/schemes/httplink.py b/schemes/httplink.py index 594624e..7ecb385 100644 --- a/schemes/httplink.py +++ b/schemes/httplink.py @@ -51,12 +51,12 @@ def get_reply(url): global redirect_depth parsed = urlparse.urlparse(url) if proxies and proxies.has_key('http'): - host = urlparse.urlparse(proxies['http'])[1] - document = url + host = urlparse.urlparse(proxies['http'])[1] + document = url else: - host = parsed[1] - document = string.join(parsed[2:4],'') + host = parsed[1] + document = string.join(parsed[2:4],'') if not document: document = '/' debugio.write('document= %s' % document,3) @@ -67,8 +67,8 @@ def get_reply(url): h.putrequest('HEAD', document) if username and passwd: - auth = string.strip(base64.encodestring(username + ":" + passwd)) - h.putheader('Authorization', 'Basic %s' % auth) + auth = string.strip(base64.encodestring(username + ":" + passwd)) + h.putheader('Authorization', 'Basic %s' % auth) h.putheader('User-Agent','Webcheck %s' % version.webcheck) h.putheader('Host',realhost) @@ -80,22 +80,22 @@ def get_reply(url): debugio.write(errcode,2) debugio.write(errmsg,2) if errcode == 301 or errcode == 302: - redirect_depth += 1 - if redirect_depth > config.REDIRECT_DEPTH: - debugio.write('\tToo many redirects!') - redirect_depth = 0 - return (errcode, errmsg, headers, url) + redirect_depth += 1 + if redirect_depth > config.REDIRECT_DEPTH: + debugio.write('\tToo many redirects!') + redirect_depth = 0 + return (errcode, errmsg, headers, url) redirect = headers['location'] - debugio.write('\tRedirect location: ' + redirect) - redirect = urlparse.urljoin(url,redirect) - if redirect == url: - debugio.write('\tRedirect same as source: %s' % redirect) - redirect_depth = 0 - return (errcode, errmsg, headers, url) - debugio.write('\tRedirected to: ' + redirect) - if Link.linkList.has_key(redirect): - link = Link.linkList[redirect] - return (link.status, link.message, link.headers, link.URL) + debugio.write('\tRedirect location: ' + redirect) + redirect = urlparse.urljoin(url,redirect) + if redirect == url: + debugio.write('\tRedirect same as source: %s' % redirect) + redirect_depth = 0 + return (errcode, errmsg, headers, url) + debugio.write('\tRedirected to: ' + redirect) + if Link.linkList.has_key(redirect): + link = Link.linkList[redirect] + return (link.status, link.message, link.headers, link.URL) return get_reply(redirect) redirect_depth = 0 return (errcode, errmsg, headers, url) @@ -107,26 +107,26 @@ def init(self, url, parent): (self.status, self.message, self.headers, self.URL) = get_reply(myUrlLib.basejoin(parent,url)) Link.linkList[self.URL] = self try: - self.type = self.headers.gettype() + self.type = self.headers.gettype() except AttributeError: - self.type = 'text/html' # is this a good enough default? + self.type = 'text/html' # is this a good enough default? debugio.write('\tContent-type: ' + self.type,2) try: - self.size = int(self.headers['content-length']) + self.size = int(self.headers['content-length']) except (KeyError, TypeError): - self.size = 0 + self.size = 0 if (self.status != 200) and (self.status != 'Not Checked'): - self.set_bad_link(self.URL,str(self.status) + ": " + self.message) - return + self.set_bad_link(self.URL,str(self.status) + ": " + self.message) + return try: - lastMod = time.mktime(self.headers.getdate('Last-Modified')) + lastMod = time.mktime(self.headers.getdate('Last-Modified')) except (OverflowError, TypeError, ValueError): - lastMod = None + lastMod = None if lastMod: - self.age = int((time.time()-lastMod)/myUrlLib.SECS_PER_DAY) + self.age = int((time.time()-lastMod)/myUrlLib.SECS_PER_DAY) def get_document(url): document = opener.open(url).read() @@ -146,26 +146,26 @@ def parse_host(location): at = string.find(location, "@") if at > -1: - userpass = location[:at] - colon = string.find(userpass, ":") - if colon > -1: - user = userpass[:colon] - passw = userpass[colon+1:] - else: - user = userpass - passw = None - hostport = location[at+1:] + userpass = location[:at] + colon = string.find(userpass, ":") + if colon > -1: + user = userpass[:colon] + passw = userpass[colon+1:] + else: + user = userpass + passw = None + hostport = location[at+1:] else: - user = passw = None - hostport = location + user = passw = None + hostport = location colon = string.find(hostport, ":") if colon > -1: - hostname = hostport[:colon] - port = hostport[colon+1:] + hostname = hostport[:colon] + port = hostport[colon+1:] else: - hostname = hostport - port = None + hostname = hostport + port = None debugio.write("parse_host = %s %s %s %s" % (user, passw, hostname, port),3) return (user, passw, hostname, port) -- cgit v1.2.3