Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/stdnum/isbn/ranges.py
blob: 83085c8025cf09bccf8712f2abd9257303d7991f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
# ranges.py - list of ISBN prefix data and utility functions
#
# Copyright (C) 2010 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA

"""This module contains that current ISBN group and registrant prefixes as
they are registered with the International ISBN Agency. This information
is needed to correctly split an ISBN into an EAN.UCC prefix, a group prefix,
a registrant, an item number and a check-digit."""

# The place where the current version of RangeMessage.xml can be downloaded.
_download_url = 'http://www.isbn-international.org/agency?rmxml=1'

# What follows is a representation of the prefixes that are defined by
# International ISBN Agency to correctly split ISBNs. See the download()
# and output() methods on how to download and generate this data.

# generated from RangeMessage.xml, downloaded from
# http://www.isbn-international.org/agency?rmxml=1
# serial 0aad2b046ddd9b30e080cb2b24afc868
# date Thu, 20 May 2010 18:36:55 GMT
_prefixes = """
978 0-5 600-649 7-7 80-94 950-989 9900-9989 99900-99999
978-0 00-19 200-699 7000-8499 85000-89999 900000-949999 9500000-9999999
978-1 00-09 100-399 4000-5499 55000-86979 869800-998999 9990000-9999999
978-2 00-19 200-349 35000-39999 400-699 7000-8399 84000-89999 900000-949999
978-2 9500000-9999999
978-3 00-02 030-033 0340-0369 03700-03999 04-19 200-699 7000-8499 85000-89999
978-3 900000-949999 9500000-9539999 95400-96999 9700000-9899999 99000-99499
978-3 99500-99999
978-4 00-19 200-699 7000-8499 85000-89999 900000-949999 9500000-9999999
978-5 00-19 200-420 4210-4299 430-430 4310-4399 440-440 4410-4499 450-699
978-5 7000-8499 85000-89999 900000-909999 91000-91999 9200-9299 93000-94999
978-5 9500000-9500999 9501-9799 98000-98999 9900000-9909999 9910-9999
978-600 00-09 100-499 5000-8999 90000-99999
978-601 00-19 200-699 7000-7999 80000-84999 85-99
978-602 00-19 200-799 8000-9499 95000-99999
978-603 00-04 05-49 500-799 8000-8999 90000-99999
978-604 0-4 50-89 900-979 9800-9999
978-605 01-09 100-399 4000-5999 60000-89999 90-99
978-606 0-0 10-49 500-799 8000-9199 92000-99999
978-607 00-39 400-749 7500-9499 95000-99999
978-608 0-0 10-19 200-449 4500-6499 65000-69999 7-9
978-609 00-39 400-799 8000-9499 95000-99999
978-612 00-29 300-399 4000-4499 45000-49999 50-99
978-613 0-9
978-614 00-39 400-799 8000-9499 95000-99999
978-615 00-09 100-499 5000-7999 80000-89999
978-616 00-19 200-699 7000-8999 90000-99999
978-617 00-49 500-699 7000-8999 90000-99999
978-7 00-09 100-499 5000-7999 80000-89999 900000-999999
978-80 00-19 200-699 7000-8499 85000-89999 900000-999999
978-81 00-19 200-699 7000-8499 85000-89999 900000-999999
978-82 00-19 200-699 7000-8999 90000-98999 990000-999999
978-83 00-19 200-599 60000-69999 7000-8499 85000-89999 900000-999999
978-84 00-14 15000-19999 200-699 7000-8499 85000-89999 9000-9199
978-84 920000-923999 92400-92999 930000-949999 95000-96999 9700-9999
978-85 00-19 200-599 60000-69999 7000-8499 85000-89999 900000-979999
978-85 98000-99999
978-86 00-29 300-599 6000-7999 80000-89999 900000-999999
978-87 00-29 400-649 7000-7999 85000-94999 970000-999999
978-88 00-19 200-599 6000-8499 85000-89999 900000-949999 95000-99999
978-89 00-24 250-549 5500-8499 85000-94999 950000-999999
978-90 00-19 200-499 5000-6999 70000-79999 800000-849999 8500-8999 90-90
978-90 910000-939999 94-94 950000-999999
978-91 0-1 20-49 500-649 7000-7999 85000-94999 970000-999999
978-92 0-5 60-79 800-899 9000-9499 95000-98999 990000-999999
978-93 00-09 100-499 5000-7999 80000-94999 950000-999999
978-94 000-599 6000-8999 90000-99999
978-950 00-49 500-899 9000-9899 99000-99999
978-951 0-1 20-54 550-889 8900-9499 95000-99999
978-952 00-19 200-499 5000-5999 60-65 6600-6699 67000-69999 7000-7999 80-94
978-952 9500-9899 99000-99999
978-953 0-0 10-14 150-549 55000-59999 6000-9499 95000-99999
978-954 00-28 2900-2999 300-799 8000-8999 90000-92999 9300-9999
978-955 0000-1999 20-49 50000-54999 550-799 8000-9499 95000-99999
978-956 00-19 200-699 7000-9999
978-957 00-02 0300-0499 05-19 2000-2099 21-27 28000-30999 31-43 440-819
978-957 8200-9699 97000-99999
978-958 00-56 57000-59999 600-799 8000-9499 95000-99999
978-959 00-19 200-699 7000-8499 85000-99999
978-960 00-19 200-659 6600-6899 690-699 7000-8499 85000-92999 93-93 9400-9799
978-960 98000-99999
978-961 00-19 200-599 6000-8999 90000-94999
978-962 00-19 200-699 7000-8499 85000-86999 8700-8999 900-999
978-963 00-19 200-699 7000-8499 85000-89999 9000-9999
978-964 00-14 150-249 2500-2999 300-549 5500-8999 90000-96999 970-989
978-964 9900-9999
978-965 00-19 200-599 7000-7999 90000-99999
978-966 00-14 1500-1699 170-199 2000-2999 300-699 7000-8999 90000-99999
978-967 00-29 300-499 5000-5999 60-89 900-989 9900-9989 99900-99999
978-968 01-39 400-499 5000-7999 800-899 9000-9999
978-969 0-1 20-39 400-799 8000-9999
978-970 01-59 600-899 9000-9099 91000-96999 9700-9999
978-971 000-015 0160-0199 02-02 0300-0599 06-09 10-49 500-849 8500-9099
978-971 91000-98999 9900-9999
978-972 0-1 20-54 550-799 8000-9499 95000-99999
978-973 0-0 100-169 1700-1999 20-54 550-759 7600-8499 85000-88999 8900-9499
978-973 95000-99999
978-974 00-19 200-699 7000-8499 85000-89999 90000-94999 9500-9999
978-975 00000-00999 01-01 02-24 250-599 6000-9199 92000-98999 990-999
978-976 0-3 40-59 600-799 8000-9499 95000-99999
978-977 00-19 200-499 5000-6999 700-999
978-978 000-199 2000-2999 30000-79999 8000-8999 900-999
978-979 000-099 1000-1499 15000-19999 20-29 3000-3999 400-799 8000-9499
978-979 95000-99999
978-980 00-19 200-599 6000-9999
978-981 00-11 1200-1999 200-289 2900-9999
978-982 00-09 100-699 70-89 9000-9799 98000-99999
978-983 00-01 020-199 2000-3999 40000-44999 45-49 50-79 800-899 9000-9899
978-983 99000-99999
978-984 00-39 400-799 8000-8999 90000-99999
978-985 00-39 400-599 6000-8999 90000-99999
978-986 00-11 120-559 5600-7999 80000-99999
978-987 00-09 1000-1999 20000-29999 30-49 500-899 9000-9499 95000-99999
978-988 00-16 17000-19999 200-799 8000-9699 97000-99999
978-989 0-1 20-54 550-799 8000-9499 95000-99999
978-9927 00-09 100-399 4000-4999
978-9928 00-09 100-399 4000-4999
978-9929 0-3 40-54 550-799 8000-9999
978-9930 00-49 500-939 9400-9999
978-9931 00-29 300-899 9000-9999
978-9932 00-39 400-849 8500-9999
978-9933 0-0 10-39 400-899 9000-9999
978-9934 0-0 10-49 500-799 8000-9999
978-9935 0-0 10-39 400-899 9000-9999
978-9936 0-1 20-39 400-799 8000-9999
978-9937 0-2 30-49 500-799 8000-9999
978-9938 00-79 800-949 9500-9999
978-9939 0-4 50-79 800-899 9000-9999
978-9940 0-1 20-49 500-899 9000-9999
978-9941 0-0 10-39 400-899 9000-9999
978-9942 00-89 900-994 9950-9999
978-9943 00-29 300-399 4000-9999
978-9944 0000-0999 100-499 5000-5999 60-69 700-799 80-89 900-999
978-9945 00-00 010-079 08-39 400-569 57-57 580-849 8500-9999
978-9946 0-1 20-39 400-899 9000-9999
978-9947 0-1 20-79 800-999
978-9948 00-39 400-849 8500-9999
978-9949 0-0 10-39 400-899 9000-9999
978-9950 00-29 300-849 8500-9999
978-9951 00-39 400-849 8500-9999
978-9952 0-1 20-39 400-799 8000-9999
978-9953 0-0 10-39 400-599 60-89 9000-9999
978-9954 0-1 20-39 400-799 8000-9999
978-9955 00-39 400-929 9300-9999
978-9956 0-0 10-39 400-899 9000-9999
978-9957 00-39 400-699 70-84 8500-8799 88-99
978-9958 0-0 10-49 500-899 9000-9999
978-9959 0-1 20-79 800-949 9500-9999
978-9960 00-59 600-899 9000-9999
978-9961 0-2 30-69 700-949 9500-9999
978-9962 00-54 5500-5599 56-59 600-849 8500-9999
978-9963 0-2 30-54 550-734 7350-7499 7500-9999
978-9964 0-6 70-94 950-999
978-9965 00-39 400-899 9000-9999
978-9966 000-199 20-69 7000-7499 750-959 9600-9999
978-9967 00-39 400-899 9000-9999
978-9968 00-49 500-939 9400-9999
978-9970 00-39 400-899 9000-9999
978-9971 0-5 60-89 900-989 9900-9999
978-9972 00-09 1-1 200-249 2500-2999 30-59 600-899 9000-9999
978-9973 00-05 060-089 0900-0999 10-69 700-969 9700-9999
978-9974 0-2 30-54 550-749 7500-9499 95-99
978-9975 0-0 100-399 4000-4499 45-89 900-949 9500-9999
978-9976 0-5 60-89 900-989 9900-9999
978-9977 00-89 900-989 9900-9999
978-9978 00-29 300-399 40-94 950-989 9900-9999
978-9979 0-4 50-64 650-659 66-75 760-899 9000-9999
978-9980 0-3 40-89 900-989 9900-9999
978-9981 00-09 100-159 1600-1999 20-79 800-949 9500-9999
978-9982 00-79 800-989 9900-9999
978-9983 80-94 950-989 9900-9999
978-9984 00-49 500-899 9000-9999
978-9985 0-4 50-79 800-899 9000-9999
978-9986 00-39 400-899 9000-9399 940-969 97-99
978-9987 00-39 400-879 8800-9999
978-9988 0-2 30-54 550-749 7500-9999
978-9989 0-0 100-199 2000-2999 30-59 600-949 9500-9999
978-99901 00-49 500-799 80-99
978-99903 0-1 20-89 900-999
978-99904 0-5 60-89 900-999
978-99905 0-3 40-79 800-999
978-99906 0-2 30-59 600-699 70-89 90-94 950-999
978-99908 0-0 10-89 900-999
978-99909 0-3 40-94 950-999
978-99910 0-2 30-89 900-999
978-99911 00-59 600-999
978-99912 0-3 400-599 60-89 900-999
978-99913 0-2 30-35 600-604
978-99914 0-4 50-89 900-999
978-99915 0-4 50-79 800-999
978-99916 0-2 30-69 700-999
978-99917 0-2 30-89 900-999
978-99918 0-3 40-79 800-999
978-99919 0-2 300-399 40-69 900-999
978-99920 0-4 50-89 900-999
978-99921 0-1 20-69 700-799 8-8 90-99
978-99922 0-3 40-69 700-999
978-99923 0-1 20-79 800-999
978-99924 0-1 20-79 800-999
978-99925 0-3 40-79 800-999
978-99926 0-0 10-59 600-999
978-99927 0-2 30-59 600-999
978-99928 0-0 10-79 800-999
978-99929 0-4 50-79 800-999
978-99930 0-4 50-79 800-999
978-99931 0-4 50-79 800-999
978-99932 0-0 10-59 600-699 7-7 80-99
978-99933 0-2 30-59 600-999
978-99934 0-1 20-79 800-999
978-99935 0-2 30-59 600-699 7-8 90-99
978-99936 0-0 10-59 600-999
978-99937 0-1 20-59 600-999
978-99938 0-1 20-59 600-899 90-99
978-99939 0-5 60-89 900-999
978-99940 0-0 10-69 700-999
978-99941 0-2 30-79 800-999
978-99942 0-4 50-79 800-999
978-99943 0-2 30-59 600-999
978-99944 0-4 50-79 800-999
978-99945 0-5 60-89 900-999
978-99946 0-2 30-59 600-999
978-99947 0-2 30-69 700-999
978-99948 0-4 50-79 800-999
978-99949 0-1 20-89 900-999
978-99950 0-4 50-79 800-999
978-99952 0-4 50-79 800-999
978-99953 0-2 30-79 800-939 94-99
978-99954 0-2 30-69 700-999
978-99955 0-1 20-59 600-799 80-89 90-99
978-99956 00-59 600-859 86-99
978-99957 0-1 20-79 800-999
978-99958 0-4 50-94 950-999
978-99959 0-2 30-59 600-999
978-99960 0-0 10-94 950-999
978-99961 0-3 40-89 900-999
978-99962 0-4 50-79 800-999
978-99963 00-49 500-999
978-99964 0-1 20-79 800-999
978-99965 0-3 40-79 800-999
978-99966 0-2 30-69 700-799
978-99967 0-1 20-59 600-899
979 10-10
979-10 00-19 200-699 7000-8999 90000-97599 976000-999999
"""

def _expand():
    """Ensures that the prefix list is expanded as a dictionary to allow
    easy lookups. The default text form is compact but not very efficient."""
    global _prefixes
    if type(_prefixes) == dict:
        return
    # build a new dictionary of ranges from the string
    new_prefixes = dict()
    for line in _prefixes.splitlines():
        if line:
            ( prefix, r ) = line.split(' ', 1)
            range_list = new_prefixes.setdefault(prefix, [])
            for r in r.split(' '):
                low, high = r.split('-')
                range_list.append((len(low), low, high))
    # save the dictionary
    _prefixes = new_prefixes

def lookup(prefix, number):
    """Look up the specified prefix and split the provided number split in
    the correct parts. If the prefix cannot be found or the number is not
    in any of the defined ranges a tuple with one element is returned.
    The prefix and number together are expected to form a complete ISBN13
    number.

    >>> lookup('978', '9024538270')
    ('90', '24538270')
    >>> lookup('978-0', '471117094')
    ('471', '117094')
    """
    _expand()
    try:
        for length, low, high in _prefixes[prefix]:
            if low <= number[:length] <= high:
                return number[:length], number[length:]
    except KeyError:
        pass
    return ( number, )

def load(fp):
    """Loads the data from the specified file descriptor. The provided file
    should match the format of the RangeMessage.xml file."""
    # this is in-line to avoid importing xml.sax for normal use
    import xml.sax
    # initialise data
    global _prefixes
    _prefixes = dict()
    # SAX handler class
    class RangeHandler(xml.sax.ContentHandler):
        def __init__(self):
            self._gather = None
            self._prefix = None
            self._range = None
            self._length = None
        def startElement(self, name, attrs):
            if name in ( 'MessageSerialNumber', 'MessageDate', 'Prefix',
                         'Range', 'Length',  ):
                self._gather = ''
        def characters(self, content):
            if self._gather is not None:
                self._gather += content
        def endElement(self, name):
            if name == 'MessageSerialNumber':
                global _download_serial
                _download_serial = self._gather.strip()
            elif name == 'MessageDate':
                global _download_date
                _download_date = self._gather.strip()
            elif name == 'Prefix':
                self._prefix = self._gather.strip()
            elif name == 'Range':
                self._range = self._gather.strip()
            elif name == 'Length':
                self._length = int(self._gather.strip())
            elif name == 'Rule' and self._length:
                r = ( self._length, ) + tuple( x[:self._length] for x in self._range.split('-') )
                _prefixes.setdefault(self._prefix, []).append(r)
            self._gather = None
    # start the actual parsing
    parser = xml.sax.make_parser()
    parser.setContentHandler(RangeHandler())
    parser.parse(fp)

def download(url=None):
    """Download the RangeMessage.xml data from the International ISBN Agency
    website or from the specified URL."""
    import urllib
    load(urllib.urlopen(url or _download_url))

def _wrap(text, max_len, sep=' '):
    """Generator that returns lines of text that are no longer than
    max_len. The sep arguments is the string to split on."""
    while text:
        i = len(text)
        if i > max_len:
            i = text.rindex(' ', 20, max_len)
        yield text[:i]
        text = text[i+1:]

def output(fp=None):
    """Print the downloaded range data to stdout (or a file if one is
    provided) in the compact text format suitable for inclusion in this
    module."""
    _expand()
    if not fp:
        import sys
        fp = sys.stdout
    # first print the header if we can
    try:
        fp.write('# generated from RangeMessage.xml, downloaded from\n'
                 '# %(url)s\n'
                 '# serial %(serial)s\n'
                 '# date %(date)s\n'
                 '_prefixes = """\n' % { 'url':    _download_url,
                                         'serial': _download_serial,
                                         'date':   _download_date })
        headerprinted = True
    except NameError:
        headerprinted = False
    # print the actual prefixes
    prefixes = _prefixes.items()
    prefixes.sort()
    for prefix, ragnes in prefixes:
        for line in _wrap(' '.join(r[1] + '-' + r[2] for r in ranges), 77 - len(prefix)):
            fp.write('%s %s\n' % ( prefix, line ) )
    # print the footer if the header was printed
    if headerprinted:
        fp.write('"""\n')