Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/update/isil.py
diff options
context:
space:
mode:
authorArthur de Jong <arthur@arthurdejong.org>2018-04-14 14:01:51 +0200
committerArthur de Jong <arthur@arthurdejong.org>2018-04-14 14:01:51 +0200
commitd9defc8b514e5f2d9c545de23054e416bd7bd2ab (patch)
tree4052ae140c42236632e28f815ab1fcf2758a3244 /update/isil.py
parente200656d89de638b716d24da928bc57bc41b9e3e (diff)
Get files ready for 1.9 release1.9
Diffstat (limited to 'update/isil.py')
-rwxr-xr-xupdate/isil.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/update/isil.py b/update/isil.py
index d86bdef..3ef51b6 100755
--- a/update/isil.py
+++ b/update/isil.py
@@ -32,7 +32,7 @@ import BeautifulSoup
spaces_re = re.compile(r'\s+', re.UNICODE)
# the web page that holds information on the ISIL authorities
-download_url = 'http://biblstandard.dk/isil/'
+download_url = 'https://english.slks.dk/libraries/library-standards/isil/'
def clean(s):
@@ -44,7 +44,9 @@ def parse(f):
"""Parse the specified file."""
print('# generated from ISIL Registration Authority, downloaded from')
print('# %s' % download_url)
- soup = BeautifulSoup.BeautifulSoup(f, convertEntities='html')
+ # We hack the HTML to insert missing <TR> elements
+ content = f.read().replace('</TR>', '</TR><TR>')
+ soup = BeautifulSoup.BeautifulSoup(content, convertEntities='html')
# find all table rows
for tr in soup.findAll('tr'):
# find the rows with four columns of text