From 27bd8bff7d0ad778df6ca0e67380546aa1a80fb1 Mon Sep 17 00:00:00 2001 From: Arthur de Jong Date: Mon, 2 Dec 2013 23:33:46 +0100 Subject: Only convert content if link has encoding This fixes an issue for calling tidy when the character encoding of the page could not be determined. --- webcheck/parsers/html/calltidy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/webcheck/parsers/html/calltidy.py b/webcheck/parsers/html/calltidy.py index 1404444..57b8efb 100644 --- a/webcheck/parsers/html/calltidy.py +++ b/webcheck/parsers/html/calltidy.py @@ -32,7 +32,8 @@ def parse(content, link): # only call tidy on internal pages if link.is_internal: # force encoding of the content to UTF-8 - content = content.decode(link.encoding).encode('utf-8') + if link.encoding: + content = content.decode(link.encoding).encode('utf-8') t = tidy.parseString(content, **config.TIDY_OPTIONS) for err in t.errors: # error messages are escaped so we unescape them -- cgit v1.2.3