diff options
author | Arthur de Jong <arthur@arthurdejong.org> | 2015-06-30 22:48:24 +0200 |
---|---|---|
committer | Arthur de Jong <arthur@arthurdejong.org> | 2015-06-30 22:58:58 +0200 |
commit | 1f56ce57c50f15e408ffc004e6f736f2fa3249b6 (patch) | |
tree | 0902504fdaa4e8de43a2763ec709916c5f90095d /backup.py | |
parent | 4c3cbfb8876efe74a918deda71e180be31f785c7 (diff) |
Do not cache full backup contents
Storing this in SQLite is slow and grows the cache to a huge size. The
approach of reading these files lists may be a bit slower but saves a
lot of space and overhead and removes quite some complexity.
Diffstat (limited to 'backup.py')
-rwxr-xr-x | backup.py | 106 |
1 files changed, 33 insertions, 73 deletions
@@ -622,77 +622,49 @@ def list_backups(config, repo): print(' %s' % ', '.join(extra)) +def _pattern_match(meta, patterns): + return not patterns or \ + meta['path'] in patterns or \ + any(meta['path'].startswith(pattern + '/') + for pattern in patterns) + + def list_contents(config, repo): """List backup contents and print file listing.""" - from cache import MetaData, escape_like from ls import ls_format - backup = config.backup - # bring metadata cache in sync with repository - db = MetaData(config.cache_dir, repo.uuid) - db.resync_backup_contents(repo, backup=backup) - # filter file list - pred = [] - args = [backup] - for pattern in config.files: - pattern = pattern.rstrip('/') - pred.append('''`path` = ?''') - args.append(pattern) - pred.append('''`path` LIKE ? ESCAPE '\\' ''') - args.append(escape_like(pattern) + '/%') - cursor = db.connection.execute(''' - SELECT `path`, `meta` - FROM `backup_contents` - WHERE `backup` = ? - %s %s - ORDER BY `path` - ''' % ('AND' if pred else '', ' OR '.join(pred)), args) - # go over results - print('%s:' % backup) - for path, meta in cursor: - ls_format(json.loads(meta)) + patterns = [pattern.rstrip('/') for pattern in config.files] + print('%s:' % config.backup) + for meta in repo.read_fileslist('backups/%s/files.json' % config.backup): + if _pattern_match(meta, patterns): + ls_format(meta) def find(config, repo): """Find archives containing the files.""" - from cache import MetaData from ls import ls_format from path import pattern2re - # bring metadata cache in sync with repository - db = MetaData(config.cache_dir, repo.uuid) - db.resync_backup_contents(repo) - # find backups containing file - pred = [] - args = [] - for pattern in config.files: - if pattern.endswith('/'): - pred.append( - '''(`path` REGEXP ? AND `meta` LIKE '%"type": "D"%')''') - else: - pred.append('`path` REGEXP ?') - args.append(pattern2re(pattern.rstrip('/'))) - cursor = db.connection.execute(''' - SELECT `backup`, `meta` - FROM `backup_contents` - WHERE %s - ORDER BY `backup`, `path` - ''' % ' OR '.join(pred), args) - # print results - for backup, rows in itertools.groupby(cursor, lambda row: row[0]): + patterns = [ + (pattern2re(pattern.rstrip('/')), pattern.endswith('/')) + for pattern in config.files] + for backup in repo.list_backups(): print('%s:' % backup) - for row in rows: - ls_format(json.loads(row[1])) + for meta in repo.read_fileslist('backups/%s/files.json' % backup): + ok = False + if any(pattern.match(meta['path']) and ( + (not is_dir) or (meta['type'] == 'D')) + for pattern, is_dir in patterns): + ls_format(meta) def restore(config, repo): """Restore files from a backup in the repository.""" - from cache import MetaData, escape_like + from cache import MetaData from filters import GnuPGKeyEncryption, Reader repo.keyencryption = GnuPGKeyEncryption() backup = config.backup # bring metadata cache in sync with repository db = MetaData(config.cache_dir, repo.uuid) db.resync_backups(repo) - db.resync_backup_contents(repo, backup) db.resync_archives(repo) # get list of needed archives cursor = db.connection.execute(''' @@ -707,27 +679,19 @@ def restore(config, repo): CREATE TEMPORARY TABLE `tmp_torestore` ( `meta` TEXT NOT NULL ); ''') - pred = [] - args = [backup] - for pattern in config.files: - pattern = pattern.rstrip('/') - pred.append('''`path` = ?''') - args.append(pattern) - pred.append('''`path` LIKE ? ESCAPE '\\' ''') - args.append(escape_like(pattern) + '/%') + patterns = [pattern.rstrip('/') for pattern in config.files] + print('%s: reading files list' % config.backup) + fileslist = repo.read_fileslist('backups/%s/files.json' % backup) with db.connection: - pred = ' OR '.join(pred) - if pred: - pred = 'AND ( %s )' % pred - db.connection.execute(''' + db.connection.executemany(''' INSERT INTO `tmp_torestore` (`meta`) - SELECT `meta` - FROM `backup_contents` - WHERE `backup` = ? - %s - ORDER BY `path` - ''' % pred, args) + VALUES + (?) + ''', ( + (json.dumps(meta, sort_keys=True),) + for meta in fileslist + if _pattern_match(meta, patterns))) db.connection.executescript(''' CREATE INDEX IF NOT EXISTS `tmp_torestore_meta_idx` ON `tmp_torestore` (`meta`); @@ -888,10 +852,6 @@ def remove_backups(config, repo): DELETE FROM `backups` WHERE `backup` = ? ''', (backup, )) - db.connection.execute(''' - DELETE FROM `backup_contents` - WHERE `backup` = ? - ''', (backup, )) # find archives that are no longer used cursor = db.connection.execute(''' SELECT `archive` |