From 46510bf84a666ed26dd0db029d52421cdfc2b32c Mon Sep 17 00:00:00 2001 From: Arthur de Jong Date: Fri, 3 Jul 2015 18:15:00 +0200 Subject: Improve performance of metadata reading This inlines the clean_meta() function and reads the whole JSON file in memory to greatly reduce the number of function calls that are performed reading the files list. This is especially noticable in reading the backup files lists. This does mean that more memory is used when reading large files. --- repo.py | 64 ++++++++++++++++++++++------------------------------------------ 1 file changed, 22 insertions(+), 42 deletions(-) diff --git a/repo.py b/repo.py index eda0d2a..9523e10 100644 --- a/repo.py +++ b/repo.py @@ -63,37 +63,16 @@ def generate_backup_name(): return datetime.datetime.now().strftime('%Y%m%d-%H%M%S') -def clean_meta(meta): - """Migrate metadata from previous formats.""" - meta.pop('ctime', None) # ctime is no longer used - if meta.pop('is_dir', None): - meta['type'] = 'D' # replace is_dir with type - mode = meta['mode'] - if mode != stat.S_IMODE(mode): - meta['mode'] = stat.S_IMODE(mode) - if stat.S_ISDIR(mode): - meta['type'] = 'D' - meta['size'] = 0 - elif stat.S_ISCHR(mode): - meta['type'] = 'C' - meta['size'] = 0 - elif stat.S_ISBLK(mode): - meta['type'] = 'B' - meta['size'] = 0 - elif stat.S_ISFIFO(mode): - meta['type'] = 'F' - meta['size'] = 0 - elif stat.S_ISLNK(mode): - meta['type'] = 'L' - meta['size'] = 0 - elif stat.S_ISSOCK(mode): - meta['type'] = 'S' - meta['size'] = 0 - elif stat.S_ISREG(mode): - meta['type'] = 'R' - else: - meta['type'] = '?' - return meta +# mapping from stat's modes to our file types +_mode_map = { + stat.S_IFDIR: 'D', + stat.S_IFCHR: 'C', + stat.S_IFBLK: 'B', + stat.S_IFREG: 'R', + stat.S_IFIFO: 'F', + stat.S_IFLNK: 'L', + stat.S_IFSOCK: 'S', +} class Repository(object): @@ -199,18 +178,19 @@ class Repository(object): def read_fileslist(self, filename): """Read a file list from the repository and return information one line at a time.""" - # we take advantage of the fact that the JSON file contains a single - # path per line (otherwise we would have to load the whole file in - # memory) with self.read_file(filename) as f: - line = f.next() - if line != '[\n': - raise ValueError('error on first line') - for line in f: - if line != ']\n': - yield clean_meta(json.loads(line.rstrip('\n,'))) - if line != ']\n': - raise ValueError('error on last line') + for meta in json.load(f): + # migrate metadata from previous formats + meta.pop('ctime', None) # ctime is no longer used + if meta.pop('is_dir', None): + meta['type'] = 'D' # replace is_dir with type + mode = meta['mode'] + if mode != stat.S_IMODE(mode): + meta['mode'] = stat.S_IMODE(mode) + meta['type'] = _mode_map.get(stat.S_IFMT(mode), '?') + if meta['type'] in 'DCBFLS': + meta['size'] = 0 + yield meta def get_passphrase(self): if not self._passphrase: -- cgit v1.2.3