From 46510bf84a666ed26dd0db029d52421cdfc2b32c Mon Sep 17 00:00:00 2001
From: Arthur de Jong <arthur@arthurdejong.org>
Date: Fri, 3 Jul 2015 18:15:00 +0200
Subject: Improve performance of metadata reading

This inlines the clean_meta() function and reads the whole JSON file in
memory to greatly reduce the number of function calls that are performed
reading the files list. This is especially noticable in reading the
backup files lists.

This does mean that more memory is used when reading large files.
---
 repo.py | 64 ++++++++++++++++++++++------------------------------------------
 1 file changed, 22 insertions(+), 42 deletions(-)

diff --git a/repo.py b/repo.py
index eda0d2a..9523e10 100644
--- a/repo.py
+++ b/repo.py
@@ -63,37 +63,16 @@ def generate_backup_name():
     return datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
 
 
-def clean_meta(meta):
-    """Migrate metadata from previous formats."""
-    meta.pop('ctime', None)  # ctime is no longer used
-    if meta.pop('is_dir', None):
-        meta['type'] = 'D'  # replace is_dir with type
-    mode = meta['mode']
-    if mode != stat.S_IMODE(mode):
-        meta['mode'] = stat.S_IMODE(mode)
-        if stat.S_ISDIR(mode):
-            meta['type'] = 'D'
-            meta['size'] = 0
-        elif stat.S_ISCHR(mode):
-            meta['type'] = 'C'
-            meta['size'] = 0
-        elif stat.S_ISBLK(mode):
-            meta['type'] = 'B'
-            meta['size'] = 0
-        elif stat.S_ISFIFO(mode):
-            meta['type'] = 'F'
-            meta['size'] = 0
-        elif stat.S_ISLNK(mode):
-            meta['type'] = 'L'
-            meta['size'] = 0
-        elif stat.S_ISSOCK(mode):
-            meta['type'] = 'S'
-            meta['size'] = 0
-        elif stat.S_ISREG(mode):
-            meta['type'] = 'R'
-        else:
-            meta['type'] = '?'
-    return meta
+# mapping from stat's modes to our file types
+_mode_map = {
+    stat.S_IFDIR: 'D',
+    stat.S_IFCHR: 'C',
+    stat.S_IFBLK: 'B',
+    stat.S_IFREG: 'R',
+    stat.S_IFIFO: 'F',
+    stat.S_IFLNK: 'L',
+    stat.S_IFSOCK: 'S',
+}
 
 
 class Repository(object):
@@ -199,18 +178,19 @@ class Repository(object):
     def read_fileslist(self, filename):
         """Read a file list from the repository and return information one
         line at a time."""
-        # we take advantage of the fact that the JSON file contains a single
-        # path per line (otherwise we would have to load the whole file in
-        # memory)
         with self.read_file(filename) as f:
-            line = f.next()
-            if line != '[\n':
-                raise ValueError('error on first line')
-            for line in f:
-                if line != ']\n':
-                    yield clean_meta(json.loads(line.rstrip('\n,')))
-            if line != ']\n':
-                raise ValueError('error on last line')
+            for meta in json.load(f):
+                # migrate metadata from previous formats
+                meta.pop('ctime', None)  # ctime is no longer used
+                if meta.pop('is_dir', None):
+                    meta['type'] = 'D'  # replace is_dir with type
+                mode = meta['mode']
+                if mode != stat.S_IMODE(mode):
+                    meta['mode'] = stat.S_IMODE(mode)
+                    meta['type'] = _mode_map.get(stat.S_IFMT(mode), '?')
+                    if meta['type'] in 'DCBFLS':
+                        meta['size'] = 0
+                yield meta
 
     def get_passphrase(self):
         if not self._passphrase:
-- 
cgit v1.2.3