Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/stdnum
diff options
context:
space:
mode:
Diffstat (limited to 'stdnum')
-rw-r--r--stdnum/dk/cpr.py20
-rw-r--r--stdnum/iban.py6
-rw-r--r--stdnum/imei.py10
-rw-r--r--stdnum/isbn.py21
-rw-r--r--stdnum/isil.py45
-rw-r--r--stdnum/issn.py12
-rw-r--r--stdnum/nl/bsn.py12
-rw-r--r--stdnum/us/ssn.py21
-rw-r--r--stdnum/verhoeff.py36
9 files changed, 136 insertions, 47 deletions
diff --git a/stdnum/dk/cpr.py b/stdnum/dk/cpr.py
index 4bf4a44..bdbbbb9 100644
--- a/stdnum/dk/cpr.py
+++ b/stdnum/dk/cpr.py
@@ -19,13 +19,21 @@
"""CPR (personnummer, the Danish citizen number).
-The CPR is the national number to identify Danish citizens. The number
+The CPR is the national number to identify Danish citizens and is stored in
+the Det Centrale Personregister (Civil Registration System). The number
consists of 10 digits in the format DDMMYY-SSSS where the first part
-represents the birth date and the second a sequence number. The first
-digit of the sequence number indicates the century.
-
-The numbers used to validate using a checksum but since the sequence
-numbers ran out this was abandoned in 2007.
+represents the birth date and the second a sequence number. The first digit
+of the sequence number indicates the century.
+
+The numbers used to validate using a checksum but since the sequence numbers
+ran out this was abandoned in 2007. It is also not possible to use the
+checksum only for numbers that have a birth date before that because the
+numbers are also assigned to immigrants.
+
+More information:
+ https://en.wikipedia.org/wiki/Personal_identification_number_(Denmark)
+ https://da.wikipedia.org/wiki/CPR-nummer
+ https://cpr.dk/
>>> validate('211062-5629')
'2110625629'
diff --git a/stdnum/iban.py b/stdnum/iban.py
index ee71786..ff33cdd 100644
--- a/stdnum/iban.py
+++ b/stdnum/iban.py
@@ -1,6 +1,6 @@
# iban.py - functions for handling International Bank Account Numbers (IBANs)
#
-# Copyright (C) 2011, 2012, 2013 Arthur de Jong
+# Copyright (C) 2011-2015 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -27,6 +27,10 @@ for the remainder of the number.
Some countries may also use checksum algorithms within their number but
this is currently not checked by this number.
+More information:
+ https://en.wikipedia.org/wiki/International_Bank_Account_Number
+ https://www.swift.com/products_services/bic_and_iban_format_registration_iban_format_r
+
>>> validate('GR16 0110 1050 0000 1054 7023 795')
'GR1601101050000010547023795'
>>> validate('BE31435411161155')
diff --git a/stdnum/imei.py b/stdnum/imei.py
index 35f76e1..8c0c1a0 100644
--- a/stdnum/imei.py
+++ b/stdnum/imei.py
@@ -1,7 +1,7 @@
# imei.py - functions for handling International Mobile Equipment Identity
# (IMEI) numbers
#
-# Copyright (C) 2010, 2011, 2012, 2013 Arthur de Jong
+# Copyright (C) 2010-2015 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -20,8 +20,12 @@
"""IMEI (International Mobile Equipment Identity).
-The IMEI is used to identify mobile phones. The IMEI may optionally
-include a check digit which is validated using the Luhn algorithm.
+The IMEI is used to identify mobile phones. An IMEI is 14, 15 (when the
+check digit is included) or 16 digits (IMEISV) long. The check digit is
+validated using the Luhn algorithm.
+
+More information:
+ https://en.wikipedia.org/wiki/International_Mobile_Equipment_Identity
>>> validate('35686800-004141-20')
'3568680000414120'
diff --git a/stdnum/isbn.py b/stdnum/isbn.py
index a49d9d7..6b9233b 100644
--- a/stdnum/isbn.py
+++ b/stdnum/isbn.py
@@ -1,6 +1,6 @@
# isbn.py - functions for handling ISBNs
#
-# Copyright (C) 2010, 2011, 2012, 2013 Arthur de Jong
+# Copyright (C) 2010-2015 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -20,8 +20,23 @@
"""ISBN (International Standard Book Number).
The ISBN is the International Standard Book Number, used to identify
-publications. This module supports both numbers in ISBN-10 (10-digit) and
-ISBN-13 (13-digit) format.
+publications. An ISBN is used to identify books. Numbers can either have 10
+digits (in ISBN-10 format) or 13 digits (in ISBN-13, EAN compatible format).
+An ISBN has the following components:
+
+* 3-digit (only in ISBN-13) Bookland code
+* 1 to 5-digit group identifier (identifies country or language)
+* 1 to 7-digit publisher code
+* 1 to 8-digit item number (identifies the book)
+* a check digit
+
+More information:
+ https://en.wikipedia.org/wiki/International_Standard_Book_Number
+ https://www.isbn-international.org/range_file_generation
+
+This module also offers functions for converting to ISBN-13 and formatting
+based on how the number should be split into a bookland code, group
+identifier, publisher code, item number and check digit.
>>> validate('978-9024538270')
'9789024538270'
diff --git a/stdnum/isil.py b/stdnum/isil.py
index b0ee905..219e89e 100644
--- a/stdnum/isil.py
+++ b/stdnum/isil.py
@@ -1,7 +1,7 @@
# isil.py - functions for handling identifiers for libraries and related
# organizations
#
-# Copyright (C) 2011, 2012, 2013 Arthur de Jong
+# Copyright (C) 2011-2015 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -20,8 +20,27 @@
"""ISIL (International Standard Identifier for Libraries).
-The ISIL is the International Standard Identifier for
-Libraries and Related Organizations.
+The ISIL is the International Standard Identifier for Libraries and Related
+Organizations (ISO 15511) used to uniquely identify libraries, archives,
+museums, and similar organisations.
+
+The identifier can be up to 15 characters that may use digits,
+letters (case insensitive) hyphens, colons and slashes. The non-alphanumeric
+characters are part of the identifier and are not just for readability.
+
+The identifier consists of two parts separated by a hyphen. The first part is
+either a two-letter ISO 3166 country code or a (not two-letter) non-national
+prefix that identifies the agency that issued the ISIL. The second part is
+the is the identifier issued by that agency.
+
+Only the first part can be validated since it is registered globally. There
+may be some validation possible with the second parts (some agencies provide
+web services for validation) but there is no common format to these services.
+
+More information:
+ https://en.wikipedia.org/wiki/ISBT_128
+ http://biblstandard.dk/isil/
+ http://www.iso.org/iso/catalogue_detail?csnumber=57332
>>> validate('IT-RM0267')
'IT-RM0267'
@@ -46,7 +65,8 @@ from stdnum.util import clean
# the valid characters in an ISIL
-_alphabet = set('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-:/')
+_alphabet = set(
+ '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-:/')
def compact(number):
@@ -55,7 +75,7 @@ def compact(number):
return clean(number, '').strip()
-def _known_agency(agency):
+def _is_known_agency(agency):
"""Checks whether the specified agency is valid."""
# look it up in the db
from stdnum import numdb
@@ -65,22 +85,19 @@ def _known_agency(agency):
def validate(number):
- """Checks to see if the number provided is a valid isil (or isilSV)
- number."""
+ """Checks to see if the number provided is a valid ISIL."""
number = compact(number)
- for n in number:
- if n not in _alphabet:
- raise InvalidFormat()
+ if not all(x in _alphabet for x in number):
+ raise InvalidFormat()
if len(number) > 15:
raise InvalidLength()
- if not _known_agency(number.split('-')[0]):
+ if not _is_known_agency(number.split('-')[0]):
raise InvalidComponent()
return number
def is_valid(number):
- """Checks to see if the number provided is a valid isil (or isilSV)
- number."""
+ """Checks to see if the number provided is a valid ISIL."""
try:
return bool(validate(number))
except ValidationError:
@@ -91,6 +108,6 @@ def format(number):
"""Reformat the passed number to the standard format."""
number = compact(number)
parts = number.split('-')
- if len(parts) > 1 and _known_agency(parts[0]):
+ if len(parts) > 1 and _is_known_agency(parts[0]):
parts[0] = parts[0].upper()
return '-'.join(parts)
diff --git a/stdnum/issn.py b/stdnum/issn.py
index 6cfabbf..8be1f2c 100644
--- a/stdnum/issn.py
+++ b/stdnum/issn.py
@@ -1,6 +1,6 @@
# issn.py - functions for handling ISSNs
#
-# Copyright (C) 2010, 2011, 2012, 2013 Arthur de Jong
+# Copyright (C) 2010-2015 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -20,7 +20,15 @@
"""ISSN (International Standard Serial Number).
The ISSN (International Standard Serial Number) is the standard code to
-identify periodical publications. It has a checksum similar to ISBN-10.
+identify periodical publications (e.g. magazines).
+
+An ISSN has 8 digits and is formatted in two pairs of 4 digits separated by a
+hyphen. The last digit is a check digit and may be 0-9 or X (similar to
+ISBN-10).
+
+More information:
+ https://en.wikipedia.org/wiki/International_Standard_Serial_Number
+ http://www.issn.org/
>>> validate('0024-9319')
'00249319'
diff --git a/stdnum/nl/bsn.py b/stdnum/nl/bsn.py
index b8926c9..280cc9b 100644
--- a/stdnum/nl/bsn.py
+++ b/stdnum/nl/bsn.py
@@ -1,6 +1,6 @@
# bsn.py - functions for handling BSNs
#
-# Copyright (C) 2010, 2011, 2012, 2013 Arthur de Jong
+# Copyright (C) 2010-2015 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -19,8 +19,14 @@
"""BSN (Burgerservicenummer, Dutch national identification number).
-The BSN is a number with up to 9 digits (the leading 0's are commonly left
-out) which is used as the Dutch national identification number.
+The BSN is a unique personal identifier and has been introduced as the
+successor to the sofinummer. The number consists of up to 9 digits (the
+leading 0's are commonly left out) and contains a simple checksum.
+
+More information:
+ https://en.wikipedia.org/wiki/National_identification_number#Netherlands
+ https://nl.wikipedia.org/wiki/Burgerservicenummer
+ http://www.burgerservicenummer.nl/
>>> validate('1112.22.333')
'111222333'
diff --git a/stdnum/us/ssn.py b/stdnum/us/ssn.py
index d4df834..d4058bf 100644
--- a/stdnum/us/ssn.py
+++ b/stdnum/us/ssn.py
@@ -1,6 +1,6 @@
# ssn.py - functions for handling SSNs
#
-# Copyright (C) 2011, 2012, 2013 Arthur de Jong
+# Copyright (C) 2011-2015 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -20,7 +20,24 @@
"""SSN (U.S. Social Security Number).
The Social Security Number is used to identify individuals for taxation
-purposes.
+purposes. It is a 9-digit number that consists of a 3-digit area number, a
+2-digit group number and a 4-digit serial number. The number does not use a
+check digit.
+
+Some validation options are available but with the introduction of Social
+Security Number Randomization it is no longer possible to validate using the
+High Group History List. Some areas, groups and ranges can be blacklisted
+though.
+
+There are several on-line verification facilities available, either for
+Employers or at a fee but validation requires more information than just the
+number (e.g. name, date of birth, etc). Another means of validation is the
+Death Master File which can be ordered on DVD.
+
+More information:
+ https://en.wikipedia.org/wiki/Social_Security_number
+ https://www.ssa.gov/employer/verifySSN.htm
+ https://en.wikipedia.org/wiki/Death_Master_File
>>> validate('536-90-4399')
'536904399'
diff --git a/stdnum/verhoeff.py b/stdnum/verhoeff.py
index 00037e0..5d6fb15 100644
--- a/stdnum/verhoeff.py
+++ b/stdnum/verhoeff.py
@@ -1,6 +1,6 @@
# verhoeff.py - functions for performing the Verhoeff checksum
#
-# Copyright (C) 2010, 2011, 2012, 2013 Arthur de Jong
+# Copyright (C) 2010-2015 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -19,8 +19,18 @@
"""The Verhoeff algorithm.
-The Verhoeff algorithm uses two tables for permutations and
-multiplications to calculate a checksum.
+The Verhoeff algorithm is a checksum algorithm that should catch most common
+(typing) errors in numbers. The algorithm uses two tables for permutations
+and multiplications and as a result is more complex than the Luhn algorithm.
+
+More information:
+ https://en.wikipedia.org/wiki/Verhoeff_algorithm
+ https://en.wikibooks.org/wiki/Algorithm_Implementation/Checksums/Verhoeff_Algorithm
+
+The module provides the checksum() function to calculate the Verhoeff
+checksum a calc_check_digit() function to generate a check digit that can be
+append to an existing number to result in a number with a valid checksum and
+validation functions.
>>> validate('1234')
Traceback (most recent call last):
@@ -41,16 +51,16 @@ from stdnum.exceptions import *
# Verhoeff algorithm.
_multiplication_table = (
- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
- [1, 2, 3, 4, 0, 6, 7, 8, 9, 5],
- [2, 3, 4, 0, 1, 7, 8, 9, 5, 6],
- [3, 4, 0, 1, 2, 8, 9, 5, 6, 7],
- [4, 0, 1, 2, 3, 9, 5, 6, 7, 8],
- [5, 9, 8, 7, 6, 0, 4, 3, 2, 1],
- [6, 5, 9, 8, 7, 1, 0, 4, 3, 2],
- [7, 6, 5, 9, 8, 2, 1, 0, 4, 3],
- [8, 7, 6, 5, 9, 3, 2, 1, 0, 4],
- [9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
+ (1, 2, 3, 4, 0, 6, 7, 8, 9, 5),
+ (2, 3, 4, 0, 1, 7, 8, 9, 5, 6),
+ (3, 4, 0, 1, 2, 8, 9, 5, 6, 7),
+ (4, 0, 1, 2, 3, 9, 5, 6, 7, 8),
+ (5, 9, 8, 7, 6, 0, 4, 3, 2, 1),
+ (6, 5, 9, 8, 7, 1, 0, 4, 3, 2),
+ (7, 6, 5, 9, 8, 2, 1, 0, 4, 3),
+ (8, 7, 6, 5, 9, 3, 2, 1, 0, 4),
+ (9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
_permutation_table = (
(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),