From 3e6a8c12558aa3fbe1e471a20ee2dc13d514be6d Mon Sep 17 00:00:00 2001 From: Arthur de Jong Date: Thu, 27 Apr 2006 21:53:59 +0000 Subject: also add all unfetched links from a site to make this method recallable git-svn-id: http://arthurdejong.org/svn/webcheck/webcheck@249 86f53f14-5ff3-0310-afe5-9b438ce3f40c --- crawler.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crawler.py b/crawler.py index c738767..b5b68f5 100644 --- a/crawler.py +++ b/crawler.py @@ -236,6 +236,11 @@ class Site: # TODO: have some different scheme to crawl a site (e.g. separate # internal and external queues, threading, etc) tocheck = [] + # add all unfetched site urls + for link in self.linkMap.values(): + if not link.isyanked and not link.isfetched: + tocheck.append(link) + # add all internal urls for url in self._internal_urls: tocheck.append(self.get_link(url)) # repeat until we have nothing more to check -- cgit v1.2.3