Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArthur de Jong <arthur@arthurdejong.org>2013-09-24 18:39:42 +0200
committerArthur de Jong <arthur@arthurdejong.org>2013-09-28 15:24:09 +0200
commitd55b995e35eb18d9d89aed0d4977af46dd8e5b22 (patch)
treec241156d19e41be7a2ed6c8e7c1ff706c1f17a43
parent3c2b822f966e9b8a0dfb23391ce906ad0e9bcd25 (diff)
Get response size and modified date from request
-rw-r--r--webcheck/crawler.py12
1 files changed, 9 insertions, 3 deletions
diff --git a/webcheck/crawler.py b/webcheck/crawler.py
index abd1828..d126ca7 100644
--- a/webcheck/crawler.py
+++ b/webcheck/crawler.py
@@ -353,12 +353,18 @@ class Crawler(object):
if parent:
request.add_header('Referer', parent.url)
response = urllib2.urlopen(request, timeout=config.IOTIMEOUT)
+ info = response.info()
link.mimetype = response.info().gettype()
link.set_encoding(response.headers.getparam('charset'))
- # FIXME: get result code and other stuff
+ # get result code and other stuff
link.status = str(response.code)
- # link.size = int(response.getheader('Content-length'))
- # link.mtime = time.mktime(response.msg.getdate('Last-Modified'))
+ try:
+ link.size = int(info.getheader('Content-length'))
+ except (TypeError, ValueError):
+ pass
+ mtime = info.getdate('Last-Modified')
+ if mtime:
+ link.mtime = datetime.datetime(*mtime[:7])
# if response.status == 301: link.add_linkproblem(str(response.status)+': '+response.reason)
# elif response.status != 200: link.add_linkproblem(str(response.status)+': '+response.reason)
# TODO: add checking for size