Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArthur de Jong <arthur@arthurdejong.org>2024-03-17 17:34:26 +0100
committerArthur de Jong <arthur@arthurdejong.org>2024-03-17 18:18:04 +0100
commit26fd25b32a5d4377d2b17e22adc2db0d27503f93 (patch)
treedbdef70eea57c7474d5a5b5e31647d95d2cdcbe4
parent923060485816ba6b897655251e4b6446e4a5dcc0 (diff)
Switch to using openpyxl for parsing XLSX files
The xlrd has dropped support for parsing XLSX files. We still use xlrd for update/be_banks.py because they use the classic XLS format and openpyxl does not support that format.
-rw-r--r--setup.cfg1
-rwxr-xr-xupdate/cfi.py21
-rwxr-xr-xupdate/nz_banks.py16
-rw-r--r--update/requirements.txt1
4 files changed, 19 insertions, 20 deletions
diff --git a/setup.cfg b/setup.cfg
index 757a9f1..afd45e7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -47,5 +47,6 @@ lines_after_imports = 2
multi_line_output = 4
known_third_party =
lxml
+ openpyxl
requests
xlrd
diff --git a/update/cfi.py b/update/cfi.py
index e47e6bc..f5431a8 100755
--- a/update/cfi.py
+++ b/update/cfi.py
@@ -2,7 +2,7 @@
# update/cfi.py - script to download CFI code list from the SIX group
#
-# Copyright (C) 2022 Arthur de Jong
+# Copyright (C) 2022-2024 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -21,11 +21,12 @@
"""This script downloads the list of CFI codes as published by the SIX group."""
+import io
import re
import lxml.html
+import openpyxl
import requests
-import xlrd
# the location of the Statistical Classification file
@@ -39,8 +40,8 @@ def normalise(value):
def get_categories(sheet):
"""Get the list of top-level CFI categories."""
- for row in sheet.get_rows():
- if len(row[0].value) == 1 and row[1].value:
+ for row in sheet.iter_rows():
+ if row[0].value and len(row[0].value) == 1 and row[1].value:
yield (row[0].value, row[1].value)
@@ -49,7 +50,7 @@ def get_attributes(sheet):
attribute = None
value_list = []
values = None
- for row in sheet.get_rows():
+ for row in sheet.iter_rows():
if row[0].value and not row[1].value and row[2].value:
attribute = normalise(row[2].value)
values = []
@@ -84,15 +85,15 @@ if __name__ == '__main__':
# Download and parse the spreadsheet
response = requests.get(link_url, timeout=30)
response.raise_for_status()
- workbook = xlrd.open_workbook(file_contents=response.content)
+ workbook = openpyxl.load_workbook(io.BytesIO(response.content), read_only=True)
print('# generated from %s, downloaded from' % link_url.split('/')[-1])
print('# %s' % download_url)
- groups = sorted(x for x in workbook.sheet_names() if len(x) == 6 and x.endswith('XXXX'))
- for category, name in sorted(get_categories(workbook.sheet_by_name('Categories'))):
+ groups = sorted(x for x in workbook.sheetnames if len(x) == 6 and x.endswith('XXXX'))
+ for category, name in sorted(get_categories(workbook['Categories'])):
print('%s category="%s"' % (category, name))
for group in (x for x in groups if x.startswith(category)):
- sheet = workbook.sheet_by_name(group)
- print(' %s group="%s"' % (group[1], normalise(sheet.cell(0, 0).value)))
+ sheet = workbook[group]
+ print(' %s group="%s"' % (group[1], normalise(sheet.cell(1, 1).value)))
print_attributes(get_attributes(sheet))
diff --git a/update/nz_banks.py b/update/nz_banks.py
index 816930f..af1a433 100755
--- a/update/nz_banks.py
+++ b/update/nz_banks.py
@@ -3,7 +3,7 @@
# update/nz_banks.py - script to download Bank list from Bank Branch Register
#
-# Copyright (C) 2019-2021 Arthur de Jong
+# Copyright (C) 2019-2024 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -23,16 +23,12 @@
"""This script downloads the list of banks with bank codes as used in the
New Zealand bank account numbers."""
+import io
import re
from collections import OrderedDict, defaultdict
+import openpyxl
import requests
-import xlrd
-
-
-# Monkey patch xlrd avoiding bug in combination with Python 3.9
-xlrd.xlsx.ensure_elementtree_imported(False, None)
-xlrd.xlsx.Element_has_iter = True
# The page that contains a link to the latest XLS version of the codes.
@@ -41,7 +37,7 @@ download_url = 'https://www.paymentsnz.co.nz/resources/industry-registers/bank-b
def get_values(sheet):
"""Return rows from the worksheet as a dict per row."""
- rows = sheet.get_rows()
+ rows = sheet.iter_rows()
# the first row has column names
columns = [column.value.lower().replace(' ', '_') for column in next(rows)]
# go over rows with values
@@ -75,8 +71,8 @@ if __name__ == '__main__':
response.raise_for_status()
content_disposition = response.headers.get('content-disposition', '')
filename = re.findall(r'filename=?(.+)"?', content_disposition)[0].strip('"')
- workbook = xlrd.open_workbook(file_contents=response.content)
- sheet = workbook.sheet_by_index(0)
+ workbook = openpyxl.load_workbook(io.BytesIO(response.content), read_only=True)
+ sheet = workbook.worksheets[0]
# print header
print('# generated from %s downloaded from' % filename)
print('# %s' % download_url)
diff --git a/update/requirements.txt b/update/requirements.txt
index 16e367c..b51c684 100644
--- a/update/requirements.txt
+++ b/update/requirements.txt
@@ -1,3 +1,4 @@
lxml
+openpyxl
requests
xlrd