#!/usr/bin/env python
# getismsi.py - script to donwload data from Wikipedia to build the database
#
# Copyright (C) 2011 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA
from collections import defaultdict
import re
import urllib
# URLs that are downloaded
mcc_list_url = 'https://en.wikipedia.org/w/index.php?title=Mobile_country_code&action=raw'
cleanup_replacements = {
'Anguilla (United Kingdom)': 'Anguilla',
'Argentina|Argentine Republic': 'Argentina',
'Aruba (Kingdom of the Netherlands|Netherlands)': 'Aruba',
'Azerbaijan|Azerbaijani Republic': 'Azerbaijan',
'Bermuda (United Kingdom)': 'Bermuda',
'British Virgin Islands (United Kingdom)': 'British Virgin Islands',
'Brunei|Brunei Darussalam': 'Brunei',
'Cayman Islands': 'Cayman Islands (United Kingdom)',
'Cayman Islands (United Kingdom)': 'Cayman Islands (United Kingdom)',
'Czech Rep.': 'Czech Republic',
'Democratic People\'s Republic of Korea|Korea, North': 'North Korea',
'Denmark (Kingdom of Denmark)': 'Denmark',
'Faroe Islands (Kingdom of Denmark)': 'Faroe Islands (Denmark)',
'French Polynesia (France)': 'French Polynesia',
'Gabon|Gabonese Republic': 'Gabon',
'Georgia (country)|Georgia': 'Georgia',
'Gibraltar': 'Gibraltar (United Kingdom)',
'Gibraltar (United Kingdom)': 'Gibraltar (United Kingdom)',
'Greenland (Kingdom of Denmark)': 'Greenland (Denmark)',
'Guadeloupe': 'Guadeloupe (France)',
'Hong Kong (People\'s Republic of China|PRC)': 'Hong Kong (China)',
'Hong Kong (Special Administrative Region of People\'s Republic of China)': 'Hong Kong (China)',
'Korea (Rep. of)': 'South Korea',
'Kyrgyz Republic': 'Kyrgyzstan',
'Lao People\'s Democratic Republic|Laos': 'Laos',
'Macau (People\'s Republic of China)': 'Macau (China)',
'Macau (People\'s Republic of China|PRC)': 'Macau (China)',
'Martinique': 'Martinique (France)',
'Moldova (Republic of)': 'Moldova',
'Montenegro (Republic of)': 'Montenegro',
'Netherlands (Kingdom of the Netherlands)': 'Netherlands',
'Palestinian Authority': 'Palestinian territories',
'Palestinian territories|Palestine': 'Palestinian territories',
'People\'s Republic of China|China': 'China',
'Puerto Rico (United States)': 'Puerto Rico',
'Republic of Ireland|Ireland': 'Ireland',
'Republic of Korea|Korea, South': 'South Korea',
'Russian Federation': 'Russian Federation',
'Rwanda|Rwandese Republic': 'Rwanda',
'Serbia (Republic of)': 'Serbia',
'Somali Democratic Republic|Somalia': 'Somalia',
'Syrian Arab Republic': 'Syria',
'Syrian Arab Republic|Syria': 'Syria',
'Turks and Caicos Islands (United Kingdom)': 'Turks and Caicos Islands',
'United States': 'United States of America',
'United States Virgin Islands (United States)': 'United States Virgin Islands',
'Venezuela (Bolivarian Republic of)': 'Venezuela',
'Vietnam|Viet Nam': 'Vietnam',
}
remove_ref_re = re.compile(r'[.*?]')
def cleanup_value(val):
"""Remove unneeded markup from the value."""
# remove uninteresting things from value
val = val.replace('[', '').replace(']', '').strip()
val = remove_ref_re.sub('', val)
# replace value
val = val.replace('United Kingdom|UK', 'United Kingdom')
val = val.replace('United States|US', 'United States')
val = val.replace('New Zealand|NZ', 'New Zealand').strip()
return cleanup_replacements.get(val, val)
def update_mncs(data, mcc, mnc, **kwargs):
"""Merge provided mnc information with the data that is already stored
in mccs."""
data[mcc][mnc].update(dict((k, cleanup_value(v)) for k, v in kwargs.items() if v))
def get_mncs_from_wikipedia(data):
"""Update the collection of Mobile Country Codes from Wikipedia.
This parses a Wikipedia page to extract the MCC and MNC, the first
part of any IMSI, and stores the results."""
mnc_country_re = re.compile(r'^====\s+(?P.*?)(\s+-\s+(?P[^\s]{2}))?\s+====$')
mnc_line_re = re.compile(r'^\|\s+(?P[0-9]+)' +
r'\s+\|\|\s+(?P[0-9]+)' +
r'(\s+\|\|\s+(?P[^|]*)' +
r'(\s+\|\|\s+(?P[^|]*)' +
r'(\s+\|\|\s+(?P[^|]*)' +
r'(\s+\|\|\s+(?P[^|]*)' +
r'(\s+\|\|\s+(?P[^|]*)' +
r')?)?)?)?)?')
f = urllib.urlopen(mcc_list_url)
country = cc = ''
for line in f.readlines():
line = line.strip()
match = mnc_country_re.match(line)
if match:
country = match.group('country')
cc = (match.group('cc') or '').lower()
match = mnc_line_re.match(line)
if match:
update_mncs(data, match.group('mcc'), match.group('mnc'),
country=country, cc=cc, brand=match.group('brand'),
operator=match.group('operator'),
status=match.group('status'),
bands=match.group('bands'))
if __name__ == '__main__':
# download/parse the information
data = defaultdict(lambda: defaultdict(dict))
get_mncs_from_wikipedia(data)
# print header
print '# generated from various sources'
print '# %s' % mcc_list_url
# build an ordered list of mccs
mcc_list = list(data.keys())
mcc_list.sort()
# go over mccs
for mcc in mcc_list:
print '%s' % mcc
# build an ordered list of mncs
mnc_list = data[mcc].keys()
mnc_list.sort()
for mnc in mnc_list:
info = data[mcc][mnc]
infokeys = info.keys()
infokeys.sort()
print ' %s%s' % (mnc, ''.join([' %s="%s"' % (k, info[k]) for k in infokeys]))
# try to get the length of mnc's
try:
l = len(mnc_list[0])
print ' %s-%s' % (l * '0', l * '9')
except IndexError:
pass # ignore