diff options
author | Arthur de Jong <arthur@arthurdejong.org> | 2018-04-14 14:01:51 +0200 |
---|---|---|
committer | Arthur de Jong <arthur@arthurdejong.org> | 2018-04-14 14:01:51 +0200 |
commit | d9defc8b514e5f2d9c545de23054e416bd7bd2ab (patch) | |
tree | 4052ae140c42236632e28f815ab1fcf2758a3244 /update/isil.py | |
parent | e200656d89de638b716d24da928bc57bc41b9e3e (diff) |
Get files ready for 1.9 release1.9
Diffstat (limited to 'update/isil.py')
-rwxr-xr-x | update/isil.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/update/isil.py b/update/isil.py index d86bdef..3ef51b6 100755 --- a/update/isil.py +++ b/update/isil.py @@ -32,7 +32,7 @@ import BeautifulSoup spaces_re = re.compile(r'\s+', re.UNICODE) # the web page that holds information on the ISIL authorities -download_url = 'http://biblstandard.dk/isil/' +download_url = 'https://english.slks.dk/libraries/library-standards/isil/' def clean(s): @@ -44,7 +44,9 @@ def parse(f): """Parse the specified file.""" print('# generated from ISIL Registration Authority, downloaded from') print('# %s' % download_url) - soup = BeautifulSoup.BeautifulSoup(f, convertEntities='html') + # We hack the HTML to insert missing <TR> elements + content = f.read().replace('</TR>', '</TR><TR>') + soup = BeautifulSoup.BeautifulSoup(content, convertEntities='html') # find all table rows for tr in soup.findAll('tr'): # find the rows with four columns of text |