Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/getnace.py
diff options
context:
space:
mode:
Diffstat (limited to 'getnace.py')
-rwxr-xr-xgetnace.py74
1 files changed, 74 insertions, 0 deletions
diff --git a/getnace.py b/getnace.py
new file mode 100755
index 0000000..0830563
--- /dev/null
+++ b/getnace.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+# getnace.py - script to get the NACE v2 catalogue
+#
+# Copyright (C) 2017 Arthur de Jong
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+
+"""This script downloads XML data from the European commission RAMON Eurostat
+Metadata Server and extracts the information that is used for validating NACE
+codes."""
+
+from xml.etree import ElementTree
+import cgi
+import urllib.request
+
+
+# the location of the ISBN Ranges XML file
+download_url = 'http://ec.europa.eu/eurostat/ramon/nomenclatures/index.cfm?TargetUrl=ACT_OTH_CLS_DLD&StrNom=NACE_REV2&StrFormat=XML&StrLanguageCode=EN'
+
+
+def get(f=None):
+ if f is None:
+ f = urllib.request.urlopen(download_url)
+ _, params = cgi.parse_header(f.info().get('Content-Disposition', ''))
+ filename = params.get('filename', '?')
+ yield '# generated from %s, downloaded from' % filename
+ yield '# %s' % download_url
+ else:
+ yield '# generated from %s' % f
+
+ # parse XML document
+ doc = ElementTree.parse(f).getroot()
+
+ # output header
+ yield '# %s: %s' % (
+ doc.find('Classification').get('id'),
+ doc.find('Classification/Label/LabelText[@language="EN"]').text)
+
+ for item in doc.findall('Classification/Item'):
+ number = item.get('id')
+ level = int(item.get('idLevel', 0))
+ label = item.find('Label/LabelText[@language="EN"]').text
+ isic = item.find(
+ 'Property[@genericName="ISIC4_REF"]/PropertyQualifier/' +
+ 'PropertyText').text
+ if level == 1:
+ section = number
+ yield '%s label="%s" isic="%s"' % (number, label, isic)
+ elif level == 2:
+ yield '%s section="%s" label="%s" isic="%s"' % (
+ number, section, label, isic)
+ else:
+ yield '%s%s label="%s" isic="%s"' % (
+ ' ' * (level - 2), number[level], label, isic)
+
+
+if __name__ == '__main__':
+ #get('NACE_REV2_20170326_162216.xml')
+ for row in get():
+ print(row)