From 76170833713c2fc72cefbefa28b8c7ef6a47ef80 Mon Sep 17 00:00:00 2001 From: Arthur de Jong Date: Sat, 6 May 2006 15:44:00 +0000 Subject: implement checking for id and name tags in anchors git-svn-id: http://arthurdejong.org/svn/webcheck/webcheck@251 86f53f14-5ff3-0310-afe5-9b438ce3f40c --- parsers/html.py | 51 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/parsers/html.py b/parsers/html.py index b7fe892..fbbe865 100644 --- a/parsers/html.py +++ b/parsers/html.py @@ -139,19 +139,46 @@ class _MyHTMLParser(HTMLParser.HTMLParser): # elif tag == 'img' and attrs.has_key('src'): self.embedded.append(self._cleanurl(attrs['src'])) - # - elif tag == 'a' and attrs.has_key('href'): - self.children.append(self._cleanurl(attrs['href'])) - # - elif tag == 'a' and attrs.has_key('name'): - anchor = self._cleanurl(attrs['name'],'anchor') - if anchor in self.anchors: + # + elif tag == 'a': + # + if attrs.has_key('href'): + self.children.append(self._cleanurl(attrs['href'])) + # + a_name = None + if attrs.has_key('name'): + a_name = self._cleanurl(attrs['name'], 'anchor') + # + a_id = None + if attrs.has_key('id'): + a_id = self._cleanurl(attrs['id'], 'anchor') + # if both id and name are used they should be the same + if a_id and a_name and a_id != a_name: + # add problem self.link.add_pageproblem( - 'anchor "%(anchor)s" defined again %(location)s' - % { 'anchor': anchor, - 'location': self._location() }) - else: - self.anchors.append(anchor) + 'anchors defined in name and id attributes do not match %(location)s' + % { 'location': self._location() }) + elif a_id == a_name: + # ignore id if it's the same as name + a_id = None + # + if a_name: + if a_name in self.anchors: + self.link.add_pageproblem( + 'anchor "%(anchor)s" defined again %(location)s' + % { 'anchor': a_name, + 'location': self._location() }) + else: + self.anchors.append(a_name) + # + if a_id: + if a_id in self.anchors: + self.link.add_pageproblem( + 'anchor "%(anchor)s" defined again %(location)s' + % { 'anchor': a_id, + 'location': self._location() }) + else: + self.anchors.append(a_id) # ... elif tag == 'frame' and attrs.has_key('src'): self.embedded.append(self._cleanurl(attrs['src'])) -- cgit v1.2.3