diff options
Diffstat (limited to 'getnace.py')
-rwxr-xr-x | getnace.py | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/getnace.py b/getnace.py new file mode 100755 index 0000000..0830563 --- /dev/null +++ b/getnace.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +# getnace.py - script to get the NACE v2 catalogue +# +# Copyright (C) 2017 Arthur de Jong +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301 USA + +"""This script downloads XML data from the European commission RAMON Eurostat +Metadata Server and extracts the information that is used for validating NACE +codes.""" + +from xml.etree import ElementTree +import cgi +import urllib.request + + +# the location of the ISBN Ranges XML file +download_url = 'http://ec.europa.eu/eurostat/ramon/nomenclatures/index.cfm?TargetUrl=ACT_OTH_CLS_DLD&StrNom=NACE_REV2&StrFormat=XML&StrLanguageCode=EN' + + +def get(f=None): + if f is None: + f = urllib.request.urlopen(download_url) + _, params = cgi.parse_header(f.info().get('Content-Disposition', '')) + filename = params.get('filename', '?') + yield '# generated from %s, downloaded from' % filename + yield '# %s' % download_url + else: + yield '# generated from %s' % f + + # parse XML document + doc = ElementTree.parse(f).getroot() + + # output header + yield '# %s: %s' % ( + doc.find('Classification').get('id'), + doc.find('Classification/Label/LabelText[@language="EN"]').text) + + for item in doc.findall('Classification/Item'): + number = item.get('id') + level = int(item.get('idLevel', 0)) + label = item.find('Label/LabelText[@language="EN"]').text + isic = item.find( + 'Property[@genericName="ISIC4_REF"]/PropertyQualifier/' + + 'PropertyText').text + if level == 1: + section = number + yield '%s label="%s" isic="%s"' % (number, label, isic) + elif level == 2: + yield '%s section="%s" label="%s" isic="%s"' % ( + number, section, label, isic) + else: + yield '%s%s label="%s" isic="%s"' % ( + ' ' * (level - 2), number[level], label, isic) + + +if __name__ == '__main__': + #get('NACE_REV2_20170326_162216.xml') + for row in get(): + print(row) |