Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/update
diff options
context:
space:
mode:
authorArthur de Jong <arthur@arthurdejong.org>2019-03-10 17:23:44 +0100
committerArthur de Jong <arthur@arthurdejong.org>2019-03-10 17:23:44 +0100
commitfbbb5503b1ed31b350c16b8c60f7de08c7a2ad5e (patch)
tree41dc0d25ca5e6917249c69e2bf0d4182c4eddee4 /update
parent61a8a94146ea9bc03fa94af44957b14ad673dc49 (diff)
Switch update scripts to beautifulsoup4
Diffstat (limited to 'update')
-rwxr-xr-xupdate/at_postleitzahl.py10
-rwxr-xr-xupdate/isil.py10
-rwxr-xr-xupdate/my_bp.py11
-rw-r--r--update/requirements.txt2
4 files changed, 23 insertions, 10 deletions
diff --git a/update/at_postleitzahl.py b/update/at_postleitzahl.py
index 4d2a993..173ee95 100755
--- a/update/at_postleitzahl.py
+++ b/update/at_postleitzahl.py
@@ -3,7 +3,7 @@
# update/at_postleitzahl.py - download list of Austrian postal codes
#
-# Copyright (C) 2018 Arthur de Jong
+# Copyright (C) 2018-2019 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -29,7 +29,6 @@ import os.path
import re
import urllib
-import BeautifulSoup
import xlrd
@@ -38,6 +37,11 @@ try:
except ImportError:
from urlparse import urljoin
+try:
+ from bs4 import BeautifulSoup
+except ImportError:
+ from BeautifulSoup import BeautifulSoup
+
# The page that contains a link to the downloadable spreadsheet with current
# Austrian postal codes
@@ -60,7 +64,7 @@ regions = {
def find_download_url():
"""Extract the spreadsheet URL from the Austrian Post website."""
f = urllib.urlopen(base_url)
- soup = BeautifulSoup.BeautifulSoup(f, convertEntities='html')
+ soup = BeautifulSoup(f)
url = soup.find(
'a',
attrs=dict(
diff --git a/update/isil.py b/update/isil.py
index 3ef51b6..efa4163 100755
--- a/update/isil.py
+++ b/update/isil.py
@@ -2,7 +2,7 @@
# update/isil.py - script to donwload ISIL agencies
#
-# Copyright (C) 2011-2018 Arthur de Jong
+# Copyright (C) 2011-2019 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -26,7 +26,11 @@ code prefixes."""
import re
import urllib
-import BeautifulSoup
+
+try:
+ from bs4 import BeautifulSoup
+except ImportError:
+ from BeautifulSoup import BeautifulSoup
spaces_re = re.compile(r'\s+', re.UNICODE)
@@ -46,7 +50,7 @@ def parse(f):
print('# %s' % download_url)
# We hack the HTML to insert missing <TR> elements
content = f.read().replace('</TR>', '</TR><TR>')
- soup = BeautifulSoup.BeautifulSoup(content, convertEntities='html')
+ soup = BeautifulSoup(content)
# find all table rows
for tr in soup.findAll('tr'):
# find the rows with four columns of text
diff --git a/update/my_bp.py b/update/my_bp.py
index ad9bc60..7337db3 100755
--- a/update/my_bp.py
+++ b/update/my_bp.py
@@ -2,7 +2,7 @@
# update/my_bp.py - script to download data from Malaysian government site
#
-# Copyright (C) 2013-2018 Arthur de Jong
+# Copyright (C) 2013-2019 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -25,10 +25,15 @@ birthplace code from the National Registration Department of Malaysia."""
import re
from collections import defaultdict
-import BeautifulSoup
import requests
+try:
+ from bs4 import BeautifulSoup
+except ImportError:
+ from BeautifulSoup import BeautifulSoup
+
+
# URLs that are downloaded
state_list_url = 'http://www.jpn.gov.my/informasi/kod-negeri/'
country_list_url = 'http://www.jpn.gov.my/en/informasi/kod-negara/'
@@ -48,7 +53,7 @@ def clean(s):
def parse(f):
"""Parse the specified file."""
- soup = BeautifulSoup.BeautifulSoup(f, convertEntities='html')
+ soup = BeautifulSoup(f)
# find all table rows
for tr in soup.find('div', {'class': 'box-content'}).findAll('tr'):
# find the rows with four columns of text
diff --git a/update/requirements.txt b/update/requirements.txt
index 0535937..c74ee0c 100644
--- a/update/requirements.txt
+++ b/update/requirements.txt
@@ -1,3 +1,3 @@
-BeautifulSoup
+beautifulsoup4
requests
xlrd