diff options
author | Arthur de Jong <arthur@arthurdejong.org> | 2018-10-14 21:24:41 +0200 |
---|---|---|
committer | Arthur de Jong <arthur@arthurdejong.org> | 2018-10-14 21:24:41 +0200 |
commit | a68f3ca26006f86110f555b6820e836fc944c7f9 (patch) | |
tree | 80d1e7297121f22c76c02b8b2469b126f4591c58 /update | |
parent | 6b85f91f64c38f7c1641d2a7e3019c27f5329800 (diff) |
Get files ready for 1.10 release1.10
Diffstat (limited to 'update')
-rwxr-xr-x | update/my_bp.py | 20 |
1 files changed, 13 insertions, 7 deletions
diff --git a/update/my_bp.py b/update/my_bp.py index 3cf1227..49d7ffa 100755 --- a/update/my_bp.py +++ b/update/my_bp.py @@ -23,17 +23,22 @@ birthplace code from the National Registration Department of Malaysia.""" import re -import urllib from collections import defaultdict import BeautifulSoup +import requests + # URLs that are downloaded state_list_url = 'http://www.jpn.gov.my/informasi/kod-negeri/' country_list_url = 'http://www.jpn.gov.my/en/informasi/kod-negara/' +# The user agent that will be passed in requests +user_agent = 'Mozilla/5.0 (compatible; python-stdnum updater; +https://arthurdejong.org/python-stdnum/)' + + spaces_re = re.compile(r'\s+', re.UNICODE) @@ -59,18 +64,19 @@ def parse(f): if __name__ == '__main__': + headers = { + 'User-Agent': user_agent, + } results = defaultdict(lambda: defaultdict(set)) # read the states - # f = open('/tmp/states.html', 'r') - f = urllib.urlopen(state_list_url) - for state, bps in parse(f): + response = requests.get(state_list_url, headers=headers) + for state, bps in parse(response.text): for bp in bps.split(','): results[bp.strip()]['state'] = state results[bp.strip()]['countries'].add('Malaysia') # read the countries - # f = open('/tmp/countries.html', 'r') - f = urllib.urlopen(country_list_url) - for country, bp in parse(f): + response = requests.get(country_list_url, headers=headers) + for country, bp in parse(response.text): results[bp]['countries'].add(country) # print the results print('# generated from National Registration Department of Malaysia, downloaded from') |