Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/update/eu_nace.py
diff options
context:
space:
mode:
Diffstat (limited to 'update/eu_nace.py')
-rwxr-xr-xupdate/eu_nace.py65
1 files changed, 65 insertions, 0 deletions
diff --git a/update/eu_nace.py b/update/eu_nace.py
new file mode 100755
index 0000000..af831f1
--- /dev/null
+++ b/update/eu_nace.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+
+# update/eu_nace.py - script to get the NACE v2 catalogue
+#
+# Copyright (C) 2017-2018 Arthur de Jong
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+
+"""This script downloads XML data from the European commission RAMON Eurostat
+Metadata Server and extracts the information that is used for validating NACE
+codes."""
+
+import cgi
+import urllib.request
+from xml.etree import ElementTree
+
+
+# the location of the ISBN Ranges XML file
+download_url = 'http://ec.europa.eu/eurostat/ramon/nomenclatures/index.cfm?TargetUrl=ACT_OTH_CLS_DLD&StrNom=NACE_REV2&StrFormat=XML&StrLanguageCode=EN'
+
+
+if __name__ == '__main__':
+ f = urllib.request.urlopen(download_url)
+ _, params = cgi.parse_header(f.info().get('Content-Disposition', ''))
+ filename = params.get('filename', '?')
+ print('# generated from %s, downloaded from' % filename)
+ print('# %s' % download_url)
+
+ # parse XML document
+ doc = ElementTree.parse(f).getroot()
+
+ # output header
+ print('# %s: %s' % (
+ doc.find('Classification').get('id'),
+ doc.find('Classification/Label/LabelText[@language="EN"]').text))
+
+ for item in doc.findall('Classification/Item'):
+ number = item.get('id')
+ level = int(item.get('idLevel', 0))
+ label = item.find('Label/LabelText[@language="EN"]').text
+ isic = item.find(
+ 'Property[@genericName="ISIC4_REF"]/PropertyQualifier/' +
+ 'PropertyText').text
+ if level == 1:
+ section = number
+ print('%s label="%s" isic="%s"' % (number, label, isic))
+ elif level == 2:
+ print('%s section="%s" label="%s" isic="%s"' % (
+ number, section, label, isic))
+ else:
+ print('%s%s label="%s" isic="%s"' % (
+ ' ' * (level - 2), number[level], label, isic))