Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArthur de Jong <arthur@arthurdejong.org>2015-07-03 18:15:00 +0200
committerArthur de Jong <arthur@arthurdejong.org>2015-07-03 18:15:11 +0200
commit46510bf84a666ed26dd0db029d52421cdfc2b32c (patch)
treeb94f96ad2fb6e050c3b64d399bb0a67169cd2d22
parent9f434bb5f69ff63da3d9fe3e7ff3cb24f7b15c2b (diff)
Improve performance of metadata readingHEADmaster
This inlines the clean_meta() function and reads the whole JSON file in memory to greatly reduce the number of function calls that are performed reading the files list. This is especially noticable in reading the backup files lists. This does mean that more memory is used when reading large files.
-rw-r--r--repo.py64
1 files changed, 22 insertions, 42 deletions
diff --git a/repo.py b/repo.py
index eda0d2a..9523e10 100644
--- a/repo.py
+++ b/repo.py
@@ -63,37 +63,16 @@ def generate_backup_name():
return datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
-def clean_meta(meta):
- """Migrate metadata from previous formats."""
- meta.pop('ctime', None) # ctime is no longer used
- if meta.pop('is_dir', None):
- meta['type'] = 'D' # replace is_dir with type
- mode = meta['mode']
- if mode != stat.S_IMODE(mode):
- meta['mode'] = stat.S_IMODE(mode)
- if stat.S_ISDIR(mode):
- meta['type'] = 'D'
- meta['size'] = 0
- elif stat.S_ISCHR(mode):
- meta['type'] = 'C'
- meta['size'] = 0
- elif stat.S_ISBLK(mode):
- meta['type'] = 'B'
- meta['size'] = 0
- elif stat.S_ISFIFO(mode):
- meta['type'] = 'F'
- meta['size'] = 0
- elif stat.S_ISLNK(mode):
- meta['type'] = 'L'
- meta['size'] = 0
- elif stat.S_ISSOCK(mode):
- meta['type'] = 'S'
- meta['size'] = 0
- elif stat.S_ISREG(mode):
- meta['type'] = 'R'
- else:
- meta['type'] = '?'
- return meta
+# mapping from stat's modes to our file types
+_mode_map = {
+ stat.S_IFDIR: 'D',
+ stat.S_IFCHR: 'C',
+ stat.S_IFBLK: 'B',
+ stat.S_IFREG: 'R',
+ stat.S_IFIFO: 'F',
+ stat.S_IFLNK: 'L',
+ stat.S_IFSOCK: 'S',
+}
class Repository(object):
@@ -199,18 +178,19 @@ class Repository(object):
def read_fileslist(self, filename):
"""Read a file list from the repository and return information one
line at a time."""
- # we take advantage of the fact that the JSON file contains a single
- # path per line (otherwise we would have to load the whole file in
- # memory)
with self.read_file(filename) as f:
- line = f.next()
- if line != '[\n':
- raise ValueError('error on first line')
- for line in f:
- if line != ']\n':
- yield clean_meta(json.loads(line.rstrip('\n,')))
- if line != ']\n':
- raise ValueError('error on last line')
+ for meta in json.load(f):
+ # migrate metadata from previous formats
+ meta.pop('ctime', None) # ctime is no longer used
+ if meta.pop('is_dir', None):
+ meta['type'] = 'D' # replace is_dir with type
+ mode = meta['mode']
+ if mode != stat.S_IMODE(mode):
+ meta['mode'] = stat.S_IMODE(mode)
+ meta['type'] = _mode_map.get(stat.S_IFMT(mode), '?')
+ if meta['type'] in 'DCBFLS':
+ meta['size'] = 0
+ yield meta
def get_passphrase(self):
if not self._passphrase: