diff options
author | Arthur de Jong <arthur@arthurdejong.org> | 2013-09-24 18:39:42 +0200 |
---|---|---|
committer | Arthur de Jong <arthur@arthurdejong.org> | 2013-09-28 15:24:09 +0200 |
commit | d55b995e35eb18d9d89aed0d4977af46dd8e5b22 (patch) | |
tree | c241156d19e41be7a2ed6c8e7c1ff706c1f17a43 | |
parent | 3c2b822f966e9b8a0dfb23391ce906ad0e9bcd25 (diff) |
Get response size and modified date from request
-rw-r--r-- | webcheck/crawler.py | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/webcheck/crawler.py b/webcheck/crawler.py index abd1828..d126ca7 100644 --- a/webcheck/crawler.py +++ b/webcheck/crawler.py @@ -353,12 +353,18 @@ class Crawler(object): if parent: request.add_header('Referer', parent.url) response = urllib2.urlopen(request, timeout=config.IOTIMEOUT) + info = response.info() link.mimetype = response.info().gettype() link.set_encoding(response.headers.getparam('charset')) - # FIXME: get result code and other stuff + # get result code and other stuff link.status = str(response.code) - # link.size = int(response.getheader('Content-length')) - # link.mtime = time.mktime(response.msg.getdate('Last-Modified')) + try: + link.size = int(info.getheader('Content-length')) + except (TypeError, ValueError): + pass + mtime = info.getdate('Last-Modified') + if mtime: + link.mtime = datetime.datetime(*mtime[:7]) # if response.status == 301: link.add_linkproblem(str(response.status)+': '+response.reason) # elif response.status != 200: link.add_linkproblem(str(response.status)+': '+response.reason) # TODO: add checking for size |