From fbbb5503b1ed31b350c16b8c60f7de08c7a2ad5e Mon Sep 17 00:00:00 2001 From: Arthur de Jong Date: Sun, 10 Mar 2019 17:23:44 +0100 Subject: Switch update scripts to beautifulsoup4 --- update/isil.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'update/isil.py') diff --git a/update/isil.py b/update/isil.py index 3ef51b6..efa4163 100755 --- a/update/isil.py +++ b/update/isil.py @@ -2,7 +2,7 @@ # update/isil.py - script to donwload ISIL agencies # -# Copyright (C) 2011-2018 Arthur de Jong +# Copyright (C) 2011-2019 Arthur de Jong # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -26,7 +26,11 @@ code prefixes.""" import re import urllib -import BeautifulSoup + +try: + from bs4 import BeautifulSoup +except ImportError: + from BeautifulSoup import BeautifulSoup spaces_re = re.compile(r'\s+', re.UNICODE) @@ -46,7 +50,7 @@ def parse(f): print('# %s' % download_url) # We hack the HTML to insert missing elements content = f.read().replace('', '') - soup = BeautifulSoup.BeautifulSoup(content, convertEntities='html') + soup = BeautifulSoup(content) # find all table rows for tr in soup.findAll('tr'): # find the rows with four columns of text -- cgit v1.2.3