diff options
author | Arthur de Jong <arthur@arthurdejong.org> | 2013-12-02 23:33:46 +0100 |
---|---|---|
committer | Arthur de Jong <arthur@arthurdejong.org> | 2013-12-02 23:42:02 +0100 |
commit | 27bd8bff7d0ad778df6ca0e67380546aa1a80fb1 (patch) | |
tree | 055a19a4e1282eeefd29331f91490c7d57a57ec4 | |
parent | 55365f948b6697e6bcd02bc203b93d37a285762e (diff) |
Only convert content if link has encoding
This fixes an issue for calling tidy when the character encoding of the
page could not be determined.
-rw-r--r-- | webcheck/parsers/html/calltidy.py | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/webcheck/parsers/html/calltidy.py b/webcheck/parsers/html/calltidy.py index 1404444..57b8efb 100644 --- a/webcheck/parsers/html/calltidy.py +++ b/webcheck/parsers/html/calltidy.py @@ -32,7 +32,8 @@ def parse(content, link): # only call tidy on internal pages if link.is_internal: # force encoding of the content to UTF-8 - content = content.decode(link.encoding).encode('utf-8') + if link.encoding: + content = content.decode(link.encoding).encode('utf-8') t = tidy.parseString(content, **config.TIDY_OPTIONS) for err in t.errors: # error messages are escaped so we unescape them |