Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/update/my_bp.py
diff options
context:
space:
mode:
authorArthur de Jong <arthur@arthurdejong.org>2018-10-14 21:24:41 +0200
committerArthur de Jong <arthur@arthurdejong.org>2018-10-14 21:24:41 +0200
commita68f3ca26006f86110f555b6820e836fc944c7f9 (patch)
tree80d1e7297121f22c76c02b8b2469b126f4591c58 /update/my_bp.py
parent6b85f91f64c38f7c1641d2a7e3019c27f5329800 (diff)
Get files ready for 1.10 release1.10
Diffstat (limited to 'update/my_bp.py')
-rwxr-xr-xupdate/my_bp.py20
1 files changed, 13 insertions, 7 deletions
diff --git a/update/my_bp.py b/update/my_bp.py
index 3cf1227..49d7ffa 100755
--- a/update/my_bp.py
+++ b/update/my_bp.py
@@ -23,17 +23,22 @@
birthplace code from the National Registration Department of Malaysia."""
import re
-import urllib
from collections import defaultdict
import BeautifulSoup
+import requests
+
# URLs that are downloaded
state_list_url = 'http://www.jpn.gov.my/informasi/kod-negeri/'
country_list_url = 'http://www.jpn.gov.my/en/informasi/kod-negara/'
+# The user agent that will be passed in requests
+user_agent = 'Mozilla/5.0 (compatible; python-stdnum updater; +https://arthurdejong.org/python-stdnum/)'
+
+
spaces_re = re.compile(r'\s+', re.UNICODE)
@@ -59,18 +64,19 @@ def parse(f):
if __name__ == '__main__':
+ headers = {
+ 'User-Agent': user_agent,
+ }
results = defaultdict(lambda: defaultdict(set))
# read the states
- # f = open('/tmp/states.html', 'r')
- f = urllib.urlopen(state_list_url)
- for state, bps in parse(f):
+ response = requests.get(state_list_url, headers=headers)
+ for state, bps in parse(response.text):
for bp in bps.split(','):
results[bp.strip()]['state'] = state
results[bp.strip()]['countries'].add('Malaysia')
# read the countries
- # f = open('/tmp/countries.html', 'r')
- f = urllib.urlopen(country_list_url)
- for country, bp in parse(f):
+ response = requests.get(country_list_url, headers=headers)
+ for country, bp in parse(response.text):
results[bp]['countries'].add(country)
# print the results
print('# generated from National Registration Department of Malaysia, downloaded from')