Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/update/isil.py
diff options
context:
space:
mode:
authorArthur de Jong <arthur@arthurdejong.org>2019-03-10 17:23:44 +0100
committerArthur de Jong <arthur@arthurdejong.org>2019-03-10 17:23:44 +0100
commitfbbb5503b1ed31b350c16b8c60f7de08c7a2ad5e (patch)
tree41dc0d25ca5e6917249c69e2bf0d4182c4eddee4 /update/isil.py
parent61a8a94146ea9bc03fa94af44957b14ad673dc49 (diff)
Switch update scripts to beautifulsoup4
Diffstat (limited to 'update/isil.py')
-rwxr-xr-xupdate/isil.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/update/isil.py b/update/isil.py
index 3ef51b6..efa4163 100755
--- a/update/isil.py
+++ b/update/isil.py
@@ -2,7 +2,7 @@
# update/isil.py - script to donwload ISIL agencies
#
-# Copyright (C) 2011-2018 Arthur de Jong
+# Copyright (C) 2011-2019 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -26,7 +26,11 @@ code prefixes."""
import re
import urllib
-import BeautifulSoup
+
+try:
+ from bs4 import BeautifulSoup
+except ImportError:
+ from BeautifulSoup import BeautifulSoup
spaces_re = re.compile(r'\s+', re.UNICODE)
@@ -46,7 +50,7 @@ def parse(f):
print('# %s' % download_url)
# We hack the HTML to insert missing <TR> elements
content = f.read().replace('</TR>', '</TR><TR>')
- soup = BeautifulSoup.BeautifulSoup(content, convertEntities='html')
+ soup = BeautifulSoup(content)
# find all table rows
for tr in soup.findAll('tr'):
# find the rows with four columns of text