Get files ready for 1.10 release1.10

author: Arthur de Jong <arthur@arthurdejong.org> 2018-10-14 21:24:41 +0200
committer: Arthur de Jong <arthur@arthurdejong.org> 2018-10-14 21:24:41 +0200
commit: a68f3ca26006f86110f555b6820e836fc944c7f9 (patch)
tree: 80d1e7297121f22c76c02b8b2469b126f4591c58 /update
parent: 6b85f91f64c38f7c1641d2a7e3019c27f5329800 (diff)
1 files changed, 13 insertions, 7 deletions
diff --git a/update/my_bp.py b/update/my_bp.py
index 3cf1227..49d7ffa 100755
--- a/update/my_bp.py
+++ b/update/my_bp.py
@@ -23,17 +23,22 @@
 birthplace code from the National Registration Department of Malaysia."""
 
 import re
-import urllib
 from collections import defaultdict
 
 import BeautifulSoup
 
+import requests
+
 
 # URLs that are downloaded
 state_list_url = 'http://www.jpn.gov.my/informasi/kod-negeri/'
 country_list_url = 'http://www.jpn.gov.my/en/informasi/kod-negara/'
 
 
+# The user agent that will be passed in requests
+user_agent = 'Mozilla/5.0 (compatible; python-stdnum updater; +https://arthurdejong.org/python-stdnum/)'
+
+
 spaces_re = re.compile(r'\s+', re.UNICODE)
 
 
@@ -59,18 +64,19 @@ def parse(f):
 
 
 if __name__ == '__main__':
+    headers = {
+        'User-Agent': user_agent,
+    }
     results = defaultdict(lambda: defaultdict(set))
     # read the states
-    # f = open('/tmp/states.html', 'r')
-    f = urllib.urlopen(state_list_url)
-    for state, bps in parse(f):
+    response = requests.get(state_list_url, headers=headers)
+    for state, bps in parse(response.text):
         for bp in bps.split(','):
             results[bp.strip()]['state'] = state
             results[bp.strip()]['countries'].add('Malaysia')
     # read the countries
-    # f = open('/tmp/countries.html', 'r')
-    f = urllib.urlopen(country_list_url)
-    for country, bp in parse(f):
+    response = requests.get(country_list_url, headers=headers)
+    for country, bp in parse(response.text):
         results[bp]['countries'].add(country)
     # print the results
     print('# generated from National Registration Department of Malaysia, downloaded from')
author	Arthur de Jong <arthur@arthurdejong.org>	2018-10-14 21:24:41 +0200
committer	Arthur de Jong <arthur@arthurdejong.org>	2018-10-14 21:24:41 +0200
commit	a68f3ca26006f86110f555b6820e836fc944c7f9 (patch)
tree	80d1e7297121f22c76c02b8b2469b126f4591c58 /update
parent	6b85f91f64c38f7c1641d2a7e3019c27f5329800 (diff)