Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArthur de Jong <arthur@arthurdejong.org>2013-12-02 23:33:46 +0100
committerArthur de Jong <arthur@arthurdejong.org>2013-12-02 23:42:02 +0100
commit27bd8bff7d0ad778df6ca0e67380546aa1a80fb1 (patch)
tree055a19a4e1282eeefd29331f91490c7d57a57ec4
parent55365f948b6697e6bcd02bc203b93d37a285762e (diff)
Only convert content if link has encoding
This fixes an issue for calling tidy when the character encoding of the page could not be determined.
-rw-r--r--webcheck/parsers/html/calltidy.py3
1 files changed, 2 insertions, 1 deletions
diff --git a/webcheck/parsers/html/calltidy.py b/webcheck/parsers/html/calltidy.py
index 1404444..57b8efb 100644
--- a/webcheck/parsers/html/calltidy.py
+++ b/webcheck/parsers/html/calltidy.py
@@ -32,7 +32,8 @@ def parse(content, link):
# only call tidy on internal pages
if link.is_internal:
# force encoding of the content to UTF-8
- content = content.decode(link.encoding).encode('utf-8')
+ if link.encoding:
+ content = content.decode(link.encoding).encode('utf-8')
t = tidy.parseString(content, **config.TIDY_OPTIONS)
for err in t.errors:
# error messages are escaped so we unescape them