From d9defc8b514e5f2d9c545de23054e416bd7bd2ab Mon Sep 17 00:00:00 2001 From: Arthur de Jong <arthur@arthurdejong.org> Date: Sat, 14 Apr 2018 14:01:51 +0200 Subject: Get files ready for 1.9 release --- update/isil.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'update/isil.py') diff --git a/update/isil.py b/update/isil.py index d86bdef..3ef51b6 100755 --- a/update/isil.py +++ b/update/isil.py @@ -32,7 +32,7 @@ import BeautifulSoup spaces_re = re.compile(r'\s+', re.UNICODE) # the web page that holds information on the ISIL authorities -download_url = 'http://biblstandard.dk/isil/' +download_url = 'https://english.slks.dk/libraries/library-standards/isil/' def clean(s): @@ -44,7 +44,9 @@ def parse(f): """Parse the specified file.""" print('# generated from ISIL Registration Authority, downloaded from') print('# %s' % download_url) - soup = BeautifulSoup.BeautifulSoup(f, convertEntities='html') + # We hack the HTML to insert missing <TR> elements + content = f.read().replace('</TR>', '</TR><TR>') + soup = BeautifulSoup.BeautifulSoup(content, convertEntities='html') # find all table rows for tr in soup.findAll('tr'): # find the rows with four columns of text -- cgit v1.2.3