#!/usr/bin/env python3
# coding: utf-8

# update/at_postleitzahl.py - download list of Austrian postal codes
#
# Copyright (C) 2018-2019 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA

"""This download list of postal codes from Austrian Post."""

from __future__ import print_function, unicode_literals

import os
import os.path

import lxml.html
import requests
import xlrd


try:
    from urllib.parse import urljoin
except ImportError:
    from urlparse import urljoin


# The page that contains a link to the downloadable spreadsheet with current
# Austrian postal codes
base_url = 'https://www.post.at/en/business_advertise_products_and_services_addresses_postcodes.php'

# The list of regions that can be used in the document.
regions = {
    'B': 'Burgenland',
    'K': 'Kärnten',
    'N': 'Niederösterreich',
    'O': 'Oberösterreich',
    'Sa': 'Salzburg',
    'St': 'Steiermark',
    'T': 'Tirol',
    'V': 'Vorarlberg',
    'W': 'Wien',
}


def find_download_url():
    """Extract the spreadsheet URL from the Austrian Post website."""
    response = requests.get(base_url)
    response.raise_for_status()
    document = lxml.html.document_fromstring(response.content)
    url = [
        a.get('href')
        for a in document.findall('.//a[@href]')
        if '/downloads/PLZ_Verzeichnis' in a.get('href')][0]
    return urljoin(base_url, url.split('?')[0])


def get_postal_codes(download_url):
    """Download the Austrian postal codes spreadsheet."""
    response = requests.get(download_url)
    response.raise_for_status()
    workbook = xlrd.open_workbook(
        file_contents=response.content, logfile=open(os.devnull, 'w'))
    sheet = workbook.sheet_by_index(0)
    rows = sheet.get_rows()
    # the first row contains the column names
    columns = [column.value.lower() for column in next(rows)]
    # the other rows contain data
    for row in rows:
        data = dict(zip(
            columns,
            [column.value for column in row]))
        if data['adressierbar'].lower() == 'ja':
            yield (
                data['plz'],
                data['ort'],
                regions.get(data['bundesland']))


if __name__ == '__main__':
    # download/parse the information
    download_url = find_download_url()
    # print header
    print('# generated from %s downloaded from' %
          os.path.basename(download_url))
    print('# %s' % base_url)
    # build an ordered list of postal codes
    for code, location, region in sorted(get_postal_codes(download_url)):
        print('%s location="%s" region="%s"' % (code, location, region))