# repo.py - functions for repository handling # # Copyright (C) 2015 Arthur de Jong # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # The files produced as output from the software do not automatically fall # under the copyright of the software, unless explicitly stated otherwise. import datetime import json import math import os.path import random import re import stat import string import time # umask used for files created in the repository # TODO: UMASK = 0077 def base36(i): """Return a BASE36 encoded string of the integer value.""" alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' result = [] while i != 0: i, d = divmod(i, 36) result.append(alphabet[d]) result.reverse() return ''.join(result) def generate_passphrase(bits, alphabet=string.ascii_letters + string.digits): l = int(math.ceil(math.log(math.pow(2, bits)) / math.log(len(alphabet)))) r = random.SystemRandom() return ''.join(r.choice(alphabet) for i in xrange(l + 1)) def generate_archive_name(): """Generate a name based on a timestamp and random value.""" name = '%8s-%8s' % ( base36(int(time.time() * 10)), base36(random.randrange(int('zzzzzzzz', 36) + 1))) return name.replace(' ', '0') def generate_backup_name(): """Create a new backup in the repository.""" return datetime.datetime.now().strftime('%Y%m%d-%H%M%S') def clean_meta(meta): """Migrate metadata from previous formats.""" meta.pop('ctime', None) # ctime is no longer used if meta.pop('is_dir', None): meta['type'] = 'D' # replace is_dir with type mode = meta['mode'] if mode != stat.S_IMODE(mode): meta['mode'] = stat.S_IMODE(mode) if stat.S_ISDIR(mode): meta['type'] = 'D' meta['size'] = 0 elif stat.S_ISCHR(mode): meta['type'] = 'C' meta['size'] = 0 elif stat.S_ISBLK(mode): meta['type'] = 'B' meta['size'] = 0 elif stat.S_ISFIFO(mode): meta['type'] = 'F' meta['size'] = 0 elif stat.S_ISLNK(mode): meta['type'] = 'L' meta['size'] = 0 elif stat.S_ISSOCK(mode): meta['type'] = 'S' meta['size'] = 0 elif stat.S_ISREG(mode): meta['type'] = 'R' else: meta['type'] = '?' return meta class Repository(object): def __init__(self, repository): self._passphrase = None self._uuid = None from backends.file import FileBackend self.backend = FileBackend(repository) def list_archives(self): """Return archives that are found in the repository.""" filelist = set() for filename in self.backend.listdir('archives'): m = re.match(r'archives/([0-9a-zA-Z]{8}-[0-9a-zA-Z]{8})\.tar', filename) if m: yield m.group(1) def remove_archive(self, archive): # remove archive files that start with `archive` if getattr(self, 'archive_files', None) is None: self.archive_files = list(self.backend.listdir('archives')) for archive_file in self.archive_files: if os.path.basename(archive_file).startswith(archive + '.'): try: self.backend.remove(archive_file) except EnvironmentError: import traceback print(traceback.format_exc()) def list_backups(self): """Return backups that are found in the repository.""" filelist = list() for filename in self.backend.listdir('backups'): filelist.append(os.path.basename(filename)) filelist.sort() return filelist def remove_backup(self, backup): try: self.backend.remove('backups/%s' % backup, recurse=True) except EnvironmentError: import traceback print(traceback.format_exc()) def write_file(self, filename, executable=False, encryption=None, compression=None): """Return an open file handle that can be used to write a file contents to.""" if compression: filename = compression.rename(filename) if encryption: filename = encryption.rename(filename) f = self.backend.put(filename, executable) if encryption: f = encryption.writer(f) if compression: f = compression.writer(f) return f def get_filters(self, filename, encryption=None): from filters import ( GzipCompression, Bzip2Compression, XZCompression, GnuPGEncryption) filters = [] while True: if encryption and filename.endswith('.' + encryption.extension): filename = filename[:-(len(encryption.extension) + 1)] filters.append(encryption) elif filename.endswith('.' + GnuPGEncryption.extension): filename = filename[:-(len(GnuPGEncryption.extension) + 1)] filters.append(GnuPGEncryption(self)) elif filename.endswith('.' + GzipCompression.extension): filename = filename[:-(len(GzipCompression.extension) + 1)] filters.append(GzipCompression()) elif filename.endswith('.' + Bzip2Compression.extension): filename = filename[:-(len(Bzip2Compression.extension) + 1)] filters.append(Bzip2Compression()) elif filename.endswith('.' + XZCompression.extension): filename = filename[:-(len(XZCompression.extension) + 1)] filters.append(XZCompression()) else: break return filename, filters def expand(self, filename, encryption=None): path = os.path.dirname(filename) for f in self.backend.listdir(path): if f.startswith(filename): name, filters = self.get_filters(f, encryption) if name == filename: return f, filters return filename, [] def read_file(self, filename, encryption=None): """Returns an open file handle that can be used to read from.""" filename, filters = self.expand(filename, encryption) f = self.backend.get(filename) for fltr in filters: f = fltr.reader(f) return f def read_fileslist(self, filename): """Read a file list from the repository and return information one line at a time.""" # we take advantage of the fact that the JSON file contains a single # path per line (otherwise we would have to load the whole file in # memory) with self.read_file(filename) as f: line = f.next() if line != '[\n': raise ValueError('error on first line') for line in f: if line != ']\n': yield clean_meta(json.loads(line.rstrip('\n,'))) if line != ']\n': raise ValueError('error on last line') def get_passphrase(self): if not self._passphrase: # TODO: we should be able to use multiple passphrases for # different parts of the repository from filters import GnuPGKeyEncryption with self.read_file( 'keys/passphrase', encryption=GnuPGKeyEncryption()) as f: self._passphrase = str(f.read()).strip() return self._passphrase def get_or_create_passphrase(self): try: return self.get_passphrase() except EnvironmentError: # generate a passphrase with about 256 bits entropy return generate_passphrase(256) def write_passphrase(self, passphrase, keys): from filters import GnuPGKeyEncryption filename = 'keys/passphrase' # remove any existing new files newfile, filters = self.expand(filename + '.new') if self.backend.exists(newfile): self.backend.remove(newfile) # write out the new encrypted file with self.write_file( filename + '.new', encryption=GnuPGKeyEncryption(keys)) as f: f.write(('%s\n' % passphrase).encode('utf-8')) # check that we can actually read the file with self.read_file( filename + '.new', encryption=GnuPGKeyEncryption()) as f: assert str(f.read()).strip() == passphrase # figure out the file names newfile, filters = self.expand(filename + '.new') curfile, filters = self.expand(filename) oldfile, filters = self.expand(filename + '.old') # remove the old backup if self.backend.exists(oldfile): self.remove(oldfile) # backup the old passphrase if self.backend.exists(curfile): oldfile = (curfile + '.').replace('.', '.old.', 1).rstrip('.') self.backend.rename(curfile, oldfile) # put new passphrase file in place self.backend.rename(newfile, newfile.replace('.new', '', 1)) @property def uuid(self): if self._uuid: return self._uuid # try to read the uuid file try: with self.read_file('uuid') as f: uuid = str(f.read()).strip() if re.match(r'^[0-9a-z]{8,16}$', uuid): self._uuid = uuid return uuid except EnvironmentError: pass # ignore reading uuid, generate one instead # generate a new uuid uuid = '%16s' % base36(random.randrange(int(16 * 'z', 36) + 1)) uuid.replace(' ', '0') self._uuid = uuid # save the uuid in the repository with self.write_file('uuid') as f: f.write(('%s\n' % uuid).encode('utf-8')) return uuid