diff options
author | Arthur de Jong <arthur@arthurdejong.org> | 2012-08-29 17:34:23 +0200 |
---|---|---|
committer | Arthur de Jong <arthur@arthurdejong.org> | 2012-08-29 17:34:23 +0200 |
commit | d26d8299a054be7be491c7210714291c05083e60 (patch) | |
tree | 05763798c1018c684060da69709e0b9a247e77d9 | |
parent | 554a760f311b94fecf74e25a7ae53ebc0f2b8706 (diff) |
pass a string to RobotFileParser because of problems with unicode
git-svn-id: http://arthurdejong.org/svn/webcheck/webcheck@471 86f53f14-5ff3-0310-afe5-9b438ce3f40c
-rw-r--r-- | webcheck/crawler.py | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/webcheck/crawler.py b/webcheck/crawler.py index 3f0c3b8..0099399 100644 --- a/webcheck/crawler.py +++ b/webcheck/crawler.py @@ -285,7 +285,7 @@ class Crawler(object): # choose a link from the tocheck list link = tocheck.pop() link.is_internal = self._is_internal(link.url) - link.yanked = self._is_yanked(link.url) + link.yanked = self._is_yanked(str(link.url)) # see if there are any more links to check if not tocheck: tocheck = self.get_links_to_crawl(session) |