Improve performance of metadata readingHEAD master

This inlines the clean_meta() function and reads the whole JSON file in memory to greatly reduce the number of function calls that are performed reading the files list. This is especially noticable in reading the backup files lists. This does mean that more memory is used when reading large files.
author: Arthur de Jong <arthur@arthurdejong.org> 2015-07-03 18:15:00 +0200
committer: Arthur de Jong <arthur@arthurdejong.org> 2015-07-03 18:15:11 +0200
commit: 46510bf84a666ed26dd0db029d52421cdfc2b32c (patch)
tree: b94f96ad2fb6e050c3b64d399bb0a67169cd2d22
parent: 9f434bb5f69ff63da3d9fe3e7ff3cb24f7b15c2b (diff)
1 files changed, 22 insertions, 42 deletions
diff --git a/repo.py b/repo.py
index eda0d2a..9523e10 100644
--- a/repo.py
+++ b/repo.py
@@ -63,37 +63,16 @@ def generate_backup_name():
     return datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
 
 
-def clean_meta(meta):
-    """Migrate metadata from previous formats."""
-    meta.pop('ctime', None)  # ctime is no longer used
-    if meta.pop('is_dir', None):
-        meta['type'] = 'D'  # replace is_dir with type
-    mode = meta['mode']
-    if mode != stat.S_IMODE(mode):
-        meta['mode'] = stat.S_IMODE(mode)
-        if stat.S_ISDIR(mode):
-            meta['type'] = 'D'
-            meta['size'] = 0
-        elif stat.S_ISCHR(mode):
-            meta['type'] = 'C'
-            meta['size'] = 0
-        elif stat.S_ISBLK(mode):
-            meta['type'] = 'B'
-            meta['size'] = 0
-        elif stat.S_ISFIFO(mode):
-            meta['type'] = 'F'
-            meta['size'] = 0
-        elif stat.S_ISLNK(mode):
-            meta['type'] = 'L'
-            meta['size'] = 0
-        elif stat.S_ISSOCK(mode):
-            meta['type'] = 'S'
-            meta['size'] = 0
-        elif stat.S_ISREG(mode):
-            meta['type'] = 'R'
-        else:
-            meta['type'] = '?'
-    return meta
+# mapping from stat's modes to our file types
+_mode_map = {
+    stat.S_IFDIR: 'D',
+    stat.S_IFCHR: 'C',
+    stat.S_IFBLK: 'B',
+    stat.S_IFREG: 'R',
+    stat.S_IFIFO: 'F',
+    stat.S_IFLNK: 'L',
+    stat.S_IFSOCK: 'S',
+}
 
 
 class Repository(object):
@@ -199,18 +178,19 @@ class Repository(object):
     def read_fileslist(self, filename):
         """Read a file list from the repository and return information one
         line at a time."""
-        # we take advantage of the fact that the JSON file contains a single
-        # path per line (otherwise we would have to load the whole file in
-        # memory)
         with self.read_file(filename) as f:
-            line = f.next()
-            if line != '[\n':
-                raise ValueError('error on first line')
-            for line in f:
-                if line != ']\n':
-                    yield clean_meta(json.loads(line.rstrip('\n,')))
-            if line != ']\n':
-                raise ValueError('error on last line')
+            for meta in json.load(f):
+                # migrate metadata from previous formats
+                meta.pop('ctime', None)  # ctime is no longer used
+                if meta.pop('is_dir', None):
+                    meta['type'] = 'D'  # replace is_dir with type
+                mode = meta['mode']
+                if mode != stat.S_IMODE(mode):
+                    meta['mode'] = stat.S_IMODE(mode)
+                    meta['type'] = _mode_map.get(stat.S_IFMT(mode), '?')
+                    if meta['type'] in 'DCBFLS':
+                        meta['size'] = 0
+                yield meta
 
     def get_passphrase(self):
         if not self._passphrase:
author	Arthur de Jong <arthur@arthurdejong.org>	2015-07-03 18:15:00 +0200
committer	Arthur de Jong <arthur@arthurdejong.org>	2015-07-03 18:15:11 +0200
commit	46510bf84a666ed26dd0db029d52421cdfc2b32c (patch)
tree	b94f96ad2fb6e050c3b64d399bb0a67169cd2d22
parent	9f434bb5f69ff63da3d9fe3e7ff3cb24f7b15c2b (diff)