update/at_postleitzahl.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

#!/usr/bin/env python
# coding: utf-8

# update/at_postleitzahl.py - download list of Austrian postal codes
#
# Copyright (C) 2018 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA

"""This download list of postal codes from Austrian Post."""

from __future__ import print_function, unicode_literals

import os
import os.path
import re
import urllib

import BeautifulSoup
import xlrd


try:
    from urllib.parse import urljoin
except ImportError:
    from urlparse import urljoin


# The page that contains a link to the downloadable spreadsheet with current
# Austrian postal codes
base_url = 'https://www.post.at/en/business_advertise_products_and_services_addresses_postcodes.php'

# The list of regions that can be used in the document.
regions = {
    'B': 'Burgenland',
    'K': 'Kärnten',
    'N': 'Niederösterreich',
    'O': 'Oberösterreich',
    'Sa': 'Salzburg',
    'St': 'Steiermark',
    'T': 'Tirol',
    'V': 'Vorarlberg',
    'W': 'Wien',
}


def find_download_url():
    """Extract the spreadsheet URL from the Austrian Post website."""
    f = urllib.urlopen(base_url)
    soup = BeautifulSoup.BeautifulSoup(f, convertEntities='html')
    url = soup.find(
        'a',
        attrs=dict(
            href=re.compile(r'.*/downloads/PLZ_Verzeichnis.*')))['href']
    return urljoin(base_url, url.split('?')[0])


def get_postal_codes(download_url):
    """Download the Austrian postal codes spreadsheet."""
    document = urllib.urlopen(download_url).read()
    workbook = xlrd.open_workbook(
        file_contents=document, logfile=open(os.devnull, 'w'))
    sheet = workbook.sheet_by_index(0)
    rows = sheet.get_rows()
    # the first row contains the column names
    columns = [column.value.lower() for column in next(rows)]
    # the other rows contain data
    for row in rows:
        data = dict(zip(
            columns,
            [column.value for column in row]))
        if data['adressierbar'].lower() == 'ja':
            yield (
                data['plz'],
                data['ort'],
                regions.get(data['bundesland']))


if __name__ == '__main__':
    # download/parse the information
    download_url = find_download_url()
    # print header
    print('# generated from %s downloaded from ' %
          os.path.basename(download_url))
    print('# %s' % base_url)
    # build an ordered list of postal codes
    for code, location, region in sorted(get_postal_codes(download_url)):
        info = '%s location="%s" region="%s"' % (code, location, region)
        print(info.encode('utf-8'))