Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/stdnum/isil.py
blob: fb8ab8e952641f99728155e0f80369568a30a73d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# isil.py - functions for handling identifiers for libraries and related
#           organizations
#
# Copyright (C) 2011-2022 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA

"""ISIL (International Standard Identifier for Libraries).

The ISIL is the International Standard Identifier for Libraries and Related
Organizations (ISO 15511) used to uniquely identify libraries, archives,
museums, and similar organisations.

The identifier can be up to 15 characters that may use digits,
letters (case insensitive) hyphens, colons and slashes. The non-alphanumeric
characters are part of the identifier and are not just for readability.

The identifier consists of two parts separated by a hyphen. The first part is
either a two-letter ISO 3166 country code or a (not two-letter) non-national
prefix that identifies the agency that issued the ISIL. The second part is
the is the identifier issued by that agency.

Only the first part can be validated since it is registered globally. There
may be some validation possible with the second parts (some agencies provide
web services for validation) but there is no common format to these services.

More information:

* https://en.wikipedia.org/wiki/ISBT_128
* https://biblstandard.dk/isil/
* https://www.iso.org/standard/57332.html

>>> validate('IT-RM0267')
'IT-RM0267'
>>> validate('OCLC-DLC')
'OCLC-DLC'
>>> validate('WW-RM0267')  # unregistered country code
Traceback (most recent call last):
    ...
InvalidComponent: ...
>>> format('it-RM0267')
'IT-RM0267'
"""

from stdnum.exceptions import *
from stdnum.util import clean


# the valid characters in an ISIL
_alphabet = set(
    '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-:/')


def compact(number):
    """Convert the ISIL to the minimal representation. This strips
    surrounding whitespace."""
    return clean(number, '').strip()


def _is_known_agency(agency):
    """Check whether the specified agency is valid."""
    # look it up in the db
    from stdnum import numdb
    results = numdb.get('isil').info(agency.upper() + '$')
    # there should be only one part and it should have properties
    return len(results) == 1 and bool(results[0][1])


def validate(number):
    """Check if the number provided is a valid ISIL."""
    number = compact(number)
    if not all(x in _alphabet for x in number):
        raise InvalidFormat()
    if len(number) > 15:
        raise InvalidLength()
    if not _is_known_agency(number.split('-')[0]):
        raise InvalidComponent()
    return number


def is_valid(number):
    """Check if the number provided is a valid ISIL."""
    try:
        return bool(validate(number))
    except ValidationError:
        return False


def format(number):
    """Reformat the number to the standard presentation format."""
    number = compact(number)
    parts = number.split('-')
    if len(parts) > 1 and _is_known_agency(parts[0]):
        parts[0] = parts[0].upper()
    return '-'.join(parts)