Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArthur de Jong <arthur@arthurdejong.org>2006-05-06 17:44:00 +0200
committerArthur de Jong <arthur@arthurdejong.org>2006-05-06 17:44:00 +0200
commit76170833713c2fc72cefbefa28b8c7ef6a47ef80 (patch)
tree9ba8982f9d53b5fb1cfadf7df9d028ec91978321
parent049d1d72733bad5e56e1e212cc932274ca64bd37 (diff)
implement checking for id and name tags in anchors
git-svn-id: http://arthurdejong.org/svn/webcheck/webcheck@251 86f53f14-5ff3-0310-afe5-9b438ce3f40c
-rw-r--r--parsers/html.py51
1 files changed, 39 insertions, 12 deletions
diff --git a/parsers/html.py b/parsers/html.py
index b7fe892..fbbe865 100644
--- a/parsers/html.py
+++ b/parsers/html.py
@@ -139,19 +139,46 @@ class _MyHTMLParser(HTMLParser.HTMLParser):
# <img src="url">
elif tag == 'img' and attrs.has_key('src'):
self.embedded.append(self._cleanurl(attrs['src']))
- # <a href="url">
- elif tag == 'a' and attrs.has_key('href'):
- self.children.append(self._cleanurl(attrs['href']))
- # <a name="#anchor">
- elif tag == 'a' and attrs.has_key('name'):
- anchor = self._cleanurl(attrs['name'],'anchor')
- if anchor in self.anchors:
+ # <a href="url" name="anchor" id="anchor">
+ elif tag == 'a':
+ # <a href="url">
+ if attrs.has_key('href'):
+ self.children.append(self._cleanurl(attrs['href']))
+ # <a name="anchor">
+ a_name = None
+ if attrs.has_key('name'):
+ a_name = self._cleanurl(attrs['name'], 'anchor')
+ # <a id="anchor">
+ a_id = None
+ if attrs.has_key('id'):
+ a_id = self._cleanurl(attrs['id'], 'anchor')
+ # if both id and name are used they should be the same
+ if a_id and a_name and a_id != a_name:
+ # add problem
self.link.add_pageproblem(
- 'anchor "%(anchor)s" defined again %(location)s'
- % { 'anchor': anchor,
- 'location': self._location() })
- else:
- self.anchors.append(anchor)
+ 'anchors defined in name and id attributes do not match %(location)s'
+ % { 'location': self._location() })
+ elif a_id == a_name:
+ # ignore id if it's the same as name
+ a_id = None
+ # <a name="anchor">
+ if a_name:
+ if a_name in self.anchors:
+ self.link.add_pageproblem(
+ 'anchor "%(anchor)s" defined again %(location)s'
+ % { 'anchor': a_name,
+ 'location': self._location() })
+ else:
+ self.anchors.append(a_name)
+ # <a id="anchor">
+ if a_id:
+ if a_id in self.anchors:
+ self.link.add_pageproblem(
+ 'anchor "%(anchor)s" defined again %(location)s'
+ % { 'anchor': a_id,
+ 'location': self._location() })
+ else:
+ self.anchors.append(a_id)
# <frameset><frame src="url"...>...</frameset>
elif tag == 'frame' and attrs.has_key('src'):
self.embedded.append(self._cleanurl(attrs['src']))