Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/backup.py
diff options
context:
space:
mode:
authorArthur de Jong <arthur@arthurdejong.org>2015-06-30 22:48:24 +0200
committerArthur de Jong <arthur@arthurdejong.org>2015-06-30 22:58:58 +0200
commit1f56ce57c50f15e408ffc004e6f736f2fa3249b6 (patch)
tree0902504fdaa4e8de43a2763ec709916c5f90095d /backup.py
parent4c3cbfb8876efe74a918deda71e180be31f785c7 (diff)
Do not cache full backup contents
Storing this in SQLite is slow and grows the cache to a huge size. The approach of reading these files lists may be a bit slower but saves a lot of space and overhead and removes quite some complexity.
Diffstat (limited to 'backup.py')
-rwxr-xr-xbackup.py106
1 files changed, 33 insertions, 73 deletions
diff --git a/backup.py b/backup.py
index a13d4ec..6a45177 100755
--- a/backup.py
+++ b/backup.py
@@ -622,77 +622,49 @@ def list_backups(config, repo):
print(' %s' % ', '.join(extra))
+def _pattern_match(meta, patterns):
+ return not patterns or \
+ meta['path'] in patterns or \
+ any(meta['path'].startswith(pattern + '/')
+ for pattern in patterns)
+
+
def list_contents(config, repo):
"""List backup contents and print file listing."""
- from cache import MetaData, escape_like
from ls import ls_format
- backup = config.backup
- # bring metadata cache in sync with repository
- db = MetaData(config.cache_dir, repo.uuid)
- db.resync_backup_contents(repo, backup=backup)
- # filter file list
- pred = []
- args = [backup]
- for pattern in config.files:
- pattern = pattern.rstrip('/')
- pred.append('''`path` = ?''')
- args.append(pattern)
- pred.append('''`path` LIKE ? ESCAPE '\\' ''')
- args.append(escape_like(pattern) + '/%')
- cursor = db.connection.execute('''
- SELECT `path`, `meta`
- FROM `backup_contents`
- WHERE `backup` = ?
- %s %s
- ORDER BY `path`
- ''' % ('AND' if pred else '', ' OR '.join(pred)), args)
- # go over results
- print('%s:' % backup)
- for path, meta in cursor:
- ls_format(json.loads(meta))
+ patterns = [pattern.rstrip('/') for pattern in config.files]
+ print('%s:' % config.backup)
+ for meta in repo.read_fileslist('backups/%s/files.json' % config.backup):
+ if _pattern_match(meta, patterns):
+ ls_format(meta)
def find(config, repo):
"""Find archives containing the files."""
- from cache import MetaData
from ls import ls_format
from path import pattern2re
- # bring metadata cache in sync with repository
- db = MetaData(config.cache_dir, repo.uuid)
- db.resync_backup_contents(repo)
- # find backups containing file
- pred = []
- args = []
- for pattern in config.files:
- if pattern.endswith('/'):
- pred.append(
- '''(`path` REGEXP ? AND `meta` LIKE '%"type": "D"%')''')
- else:
- pred.append('`path` REGEXP ?')
- args.append(pattern2re(pattern.rstrip('/')))
- cursor = db.connection.execute('''
- SELECT `backup`, `meta`
- FROM `backup_contents`
- WHERE %s
- ORDER BY `backup`, `path`
- ''' % ' OR '.join(pred), args)
- # print results
- for backup, rows in itertools.groupby(cursor, lambda row: row[0]):
+ patterns = [
+ (pattern2re(pattern.rstrip('/')), pattern.endswith('/'))
+ for pattern in config.files]
+ for backup in repo.list_backups():
print('%s:' % backup)
- for row in rows:
- ls_format(json.loads(row[1]))
+ for meta in repo.read_fileslist('backups/%s/files.json' % backup):
+ ok = False
+ if any(pattern.match(meta['path']) and (
+ (not is_dir) or (meta['type'] == 'D'))
+ for pattern, is_dir in patterns):
+ ls_format(meta)
def restore(config, repo):
"""Restore files from a backup in the repository."""
- from cache import MetaData, escape_like
+ from cache import MetaData
from filters import GnuPGKeyEncryption, Reader
repo.keyencryption = GnuPGKeyEncryption()
backup = config.backup
# bring metadata cache in sync with repository
db = MetaData(config.cache_dir, repo.uuid)
db.resync_backups(repo)
- db.resync_backup_contents(repo, backup)
db.resync_archives(repo)
# get list of needed archives
cursor = db.connection.execute('''
@@ -707,27 +679,19 @@ def restore(config, repo):
CREATE TEMPORARY TABLE `tmp_torestore`
( `meta` TEXT NOT NULL );
''')
- pred = []
- args = [backup]
- for pattern in config.files:
- pattern = pattern.rstrip('/')
- pred.append('''`path` = ?''')
- args.append(pattern)
- pred.append('''`path` LIKE ? ESCAPE '\\' ''')
- args.append(escape_like(pattern) + '/%')
+ patterns = [pattern.rstrip('/') for pattern in config.files]
+ print('%s: reading files list' % config.backup)
+ fileslist = repo.read_fileslist('backups/%s/files.json' % backup)
with db.connection:
- pred = ' OR '.join(pred)
- if pred:
- pred = 'AND ( %s )' % pred
- db.connection.execute('''
+ db.connection.executemany('''
INSERT INTO `tmp_torestore`
(`meta`)
- SELECT `meta`
- FROM `backup_contents`
- WHERE `backup` = ?
- %s
- ORDER BY `path`
- ''' % pred, args)
+ VALUES
+ (?)
+ ''', (
+ (json.dumps(meta, sort_keys=True),)
+ for meta in fileslist
+ if _pattern_match(meta, patterns)))
db.connection.executescript('''
CREATE INDEX IF NOT EXISTS `tmp_torestore_meta_idx`
ON `tmp_torestore` (`meta`);
@@ -888,10 +852,6 @@ def remove_backups(config, repo):
DELETE FROM `backups`
WHERE `backup` = ?
''', (backup, ))
- db.connection.execute('''
- DELETE FROM `backup_contents`
- WHERE `backup` = ?
- ''', (backup, ))
# find archives that are no longer used
cursor = db.connection.execute('''
SELECT `archive`