From 44ac994edca503d6b41d04a64fe67d581deb5926 Mon Sep 17 00:00:00 2001 From: Arthur de Jong Date: Tue, 24 Apr 2007 18:53:11 +0000 Subject: handle ID attribute as anchor on any tag git-svn-id: http://arthurdejong.org/svn/webcheck/webcheck@326 86f53f14-5ff3-0310-afe5-9b438ce3f40c --- parsers/html/beautifulsoup.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/parsers/html/beautifulsoup.py b/parsers/html/beautifulsoup.py index a18d1cc..61abb59 100644 --- a/parsers/html/beautifulsoup.py +++ b/parsers/html/beautifulsoup.py @@ -91,13 +91,13 @@ def parse(content, link): link.add_anchor(crawler.urlescape(htmlunescape(a['id']).strip())) # add the anchor link.add_anchor(a_name) - # - for a in soup.findAll('a', id=True): - # skip entries that have a name - if a.has_key('name'): + # + for elem in soup.findAll(id=True): + # skip anchor that have a name + if elem.name == 'a' and elem.has_key('name'): continue # add the anchor - link.add_anchor(crawler.urlescape(htmlunescape(a['id']).strip())) + link.add_anchor(crawler.urlescape(htmlunescape(elem['id']).strip())) # ... for frame in soup.findAll('frame', src=True): embed = crawler.urlescape(htmlunescape(frame['src']).strip()) -- cgit v1.2.3