#!/usr/bin/env python3 # update/isbn.py - script to get ISBN prefix data # # Copyright (C) 2010-2018 Arthur de Jong # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301 USA """This script downloads XML data from the International ISBN Agency website and provides a compact form of all group prefixes, and registrant ranges for those prefixes suitable for the numdb module. This data is needed to correctly split ISBNs into an EAN.UCC prefix, a group prefix, a registrant, an item number and a check-digit.""" import ssl import urllib.request from xml.etree import ElementTree # the location of the ISBN Ranges XML file download_url = 'https://www.isbn-international.org/export_rangemessage.xml' def ranges(group): """Provide the ranges for the group.""" for rule in group.find('Rules').findall('Rule'): length = int(rule.find('Length').text.strip()) if length: yield '-'.join( x[:length] for x in rule.find('Range').text.strip().split('-')) def wrap(text): """Rewrap the provided text into lines.""" while text: i = len(text) if i > 73: i = text.rindex(',', 20, 73) yield text[:i] text = text[i + 1:] if __name__ == '__main__': print('# generated from RangeMessage.xml, downloaded from') print('# %s' % download_url) ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1) f = urllib.request.urlopen(download_url, context=ctx) # parse XML document msg = ElementTree.parse(f).getroot() # dump data from document print('# file serial %s' % msg.find('MessageSerialNumber').text.strip()) print('# file date %s' % msg.find('MessageDate').text.strip()) top_groups = dict( (x.find('Prefix').text.strip(), x) for x in msg.find('EAN.UCCPrefixes').findall('EAN.UCC')) prevtop = None for group in msg.find('RegistrationGroups').findall('Group'): top, prefix = group.find('Prefix').text.strip().split('-') agency = group.find('Agency').text.strip() if top != prevtop: print(top) for line in wrap(','.join(ranges(top_groups[top]))): print(' %s' % line) prevtop = top print(' %s agency="%s"' % (prefix, agency)) for line in wrap(','.join(ranges(group))): print(' %s' % line)