diff options
author | Arthur de Jong <arthur@arthurdejong.org> | 2006-05-06 17:44:00 +0200 |
---|---|---|
committer | Arthur de Jong <arthur@arthurdejong.org> | 2006-05-06 17:44:00 +0200 |
commit | 76170833713c2fc72cefbefa28b8c7ef6a47ef80 (patch) | |
tree | 9ba8982f9d53b5fb1cfadf7df9d028ec91978321 | |
parent | 049d1d72733bad5e56e1e212cc932274ca64bd37 (diff) |
implement checking for id and name tags in anchors
git-svn-id: http://arthurdejong.org/svn/webcheck/webcheck@251 86f53f14-5ff3-0310-afe5-9b438ce3f40c
-rw-r--r-- | parsers/html.py | 51 |
1 files changed, 39 insertions, 12 deletions
diff --git a/parsers/html.py b/parsers/html.py index b7fe892..fbbe865 100644 --- a/parsers/html.py +++ b/parsers/html.py @@ -139,19 +139,46 @@ class _MyHTMLParser(HTMLParser.HTMLParser): # <img src="url"> elif tag == 'img' and attrs.has_key('src'): self.embedded.append(self._cleanurl(attrs['src'])) - # <a href="url"> - elif tag == 'a' and attrs.has_key('href'): - self.children.append(self._cleanurl(attrs['href'])) - # <a name="#anchor"> - elif tag == 'a' and attrs.has_key('name'): - anchor = self._cleanurl(attrs['name'],'anchor') - if anchor in self.anchors: + # <a href="url" name="anchor" id="anchor"> + elif tag == 'a': + # <a href="url"> + if attrs.has_key('href'): + self.children.append(self._cleanurl(attrs['href'])) + # <a name="anchor"> + a_name = None + if attrs.has_key('name'): + a_name = self._cleanurl(attrs['name'], 'anchor') + # <a id="anchor"> + a_id = None + if attrs.has_key('id'): + a_id = self._cleanurl(attrs['id'], 'anchor') + # if both id and name are used they should be the same + if a_id and a_name and a_id != a_name: + # add problem self.link.add_pageproblem( - 'anchor "%(anchor)s" defined again %(location)s' - % { 'anchor': anchor, - 'location': self._location() }) - else: - self.anchors.append(anchor) + 'anchors defined in name and id attributes do not match %(location)s' + % { 'location': self._location() }) + elif a_id == a_name: + # ignore id if it's the same as name + a_id = None + # <a name="anchor"> + if a_name: + if a_name in self.anchors: + self.link.add_pageproblem( + 'anchor "%(anchor)s" defined again %(location)s' + % { 'anchor': a_name, + 'location': self._location() }) + else: + self.anchors.append(a_name) + # <a id="anchor"> + if a_id: + if a_id in self.anchors: + self.link.add_pageproblem( + 'anchor "%(anchor)s" defined again %(location)s' + % { 'anchor': a_id, + 'location': self._location() }) + else: + self.anchors.append(a_id) # <frameset><frame src="url"...>...</frameset> elif tag == 'frame' and attrs.has_key('src'): self.embedded.append(self._cleanurl(attrs['src'])) |