repo.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

# repo.py - functions for repository handling
#
# Copyright (C) 2015 Arthur de Jong
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
#
# The files produced as output from the software do not automatically fall
# under the copyright of the software, unless explicitly stated otherwise.

import datetime
import json
import math
import os.path
import random
import re
import stat
import string
import time


# umask used for files created in the repository
# TODO: UMASK = 0077

def base36(i):
    """Return a BASE36 encoded string of the integer value."""
    alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
    result = []
    while i != 0:
        i, d = divmod(i, 36)
        result.append(alphabet[d])
    result.reverse()
    return ''.join(result)


def generate_passphrase(bits, alphabet=string.ascii_letters + string.digits):
    l = int(math.ceil(math.log(math.pow(2, bits)) / math.log(len(alphabet))))
    r = random.SystemRandom()
    return ''.join(r.choice(alphabet) for i in xrange(l + 1))


def generate_archive_name():
    """Generate a name based on a timestamp and random value."""
    name = '%8s-%8s' % (
        base36(int(time.time() * 10)),
        base36(random.randrange(int('zzzzzzzz', 36) + 1)))
    return name.replace(' ', '0')


def generate_backup_name():
    """Create a new backup in the repository."""
    return datetime.datetime.now().strftime('%Y%m%d-%H%M%S')


# mapping from stat's modes to our file types
_mode_map = {
    stat.S_IFDIR: 'D',
    stat.S_IFCHR: 'C',
    stat.S_IFBLK: 'B',
    stat.S_IFREG: 'R',
    stat.S_IFIFO: 'F',
    stat.S_IFLNK: 'L',
    stat.S_IFSOCK: 'S',
}


class Repository(object):

    def __init__(self, repository):
        self._passphrase = None
        self._uuid = None
        from backends.file import FileBackend
        self.backend = FileBackend(repository)

    def list_archives(self):
        """Return archives that are found in the repository."""
        filelist = set()
        for filename in self.backend.listdir('archives'):
            m = re.match(r'archives/([0-9a-zA-Z]{8}-[0-9a-zA-Z]{8})\.tar',
                         filename)
            if m:
                yield m.group(1)

    def remove_archive(self, archive):
        # remove archive files that start with `archive`
        if getattr(self, 'archive_files', None) is None:
            self.archive_files = list(self.backend.listdir('archives'))
        for archive_file in self.archive_files:
            if os.path.basename(archive_file).startswith(archive + '.'):
                try:
                    self.backend.remove(archive_file)
                except EnvironmentError:
                    import traceback
                    print(traceback.format_exc())

    def list_backups(self):
        """Return backups that are found in the repository."""
        filelist = list()
        for filename in self.backend.listdir('backups'):
            filelist.append(os.path.basename(filename))
        filelist.sort()
        return filelist

    def remove_backup(self, backup):
        try:
            self.backend.remove('backups/%s' % backup, recurse=True)
        except EnvironmentError:
            import traceback
            print(traceback.format_exc())

    def write_file(self, filename, executable=False,
                   encryption=None, compression=None):
        """Return an open file handle that can be used to write a file
        contents to."""
        if compression:
            filename = compression.rename(filename)
        if encryption:
            filename = encryption.rename(filename)
        f = self.backend.put(filename, executable)
        if encryption:
            f = encryption.writer(f)
        if compression:
            f = compression.writer(f)
        return f

    def get_filters(self, filename, encryption=None):
        from filters import (
            GzipCompression, Bzip2Compression, XZCompression, GnuPGEncryption)
        filters = []
        while True:
            if encryption and filename.endswith('.' + encryption.extension):
                filename = filename[:-(len(encryption.extension) + 1)]
                filters.append(encryption)
            elif filename.endswith('.' + GnuPGEncryption.extension):
                filename = filename[:-(len(GnuPGEncryption.extension) + 1)]
                filters.append(GnuPGEncryption(self))
            elif filename.endswith('.' + GzipCompression.extension):
                filename = filename[:-(len(GzipCompression.extension) + 1)]
                filters.append(GzipCompression())
            elif filename.endswith('.' + Bzip2Compression.extension):
                filename = filename[:-(len(Bzip2Compression.extension) + 1)]
                filters.append(Bzip2Compression())
            elif filename.endswith('.' + XZCompression.extension):
                filename = filename[:-(len(XZCompression.extension) + 1)]
                filters.append(XZCompression())
            else:
                break
        return filename, filters

    def expand(self, filename, encryption=None):
        path = os.path.dirname(filename)
        for f in self.backend.listdir(path):
            if f.startswith(filename):
                name, filters = self.get_filters(f, encryption)
                if name == filename:
                    return f, filters
        return filename, []

    def read_file(self, filename, encryption=None):
        """Returns an open file handle that can be used to read from."""
        filename, filters = self.expand(filename, encryption)
        f = self.backend.get(filename)
        for fltr in filters:
            f = fltr.reader(f)
        return f

    def read_fileslist(self, filename):
        """Read a file list from the repository and return information one
        line at a time."""
        with self.read_file(filename) as f:
            for meta in json.load(f):
                # migrate metadata from previous formats
                meta.pop('ctime', None)  # ctime is no longer used
                if meta.pop('is_dir', None):
                    meta['type'] = 'D'  # replace is_dir with type
                mode = meta['mode']
                if mode != stat.S_IMODE(mode):
                    meta['mode'] = stat.S_IMODE(mode)
                    meta['type'] = _mode_map.get(stat.S_IFMT(mode), '?')
                    if meta['type'] in 'DCBFLS':
                        meta['size'] = 0
                yield meta

    def get_passphrase(self):
        if not self._passphrase:
            # TODO: we should be able to use multiple passphrases for
            # different parts of the repository
            from filters import GnuPGKeyEncryption
            with self.read_file(
                    'keys/passphrase', encryption=GnuPGKeyEncryption()) as f:
                self._passphrase = str(f.read()).strip()
        return self._passphrase

    def get_or_create_passphrase(self):
        try:
            return self.get_passphrase()
        except EnvironmentError:
            # generate a passphrase with about 256 bits entropy
            return generate_passphrase(256)

    def write_passphrase(self, passphrase, keys):
        from filters import GnuPGKeyEncryption
        filename = 'keys/passphrase'
        # remove any existing new files
        newfile, filters = self.expand(filename + '.new')
        if self.backend.exists(newfile):
            self.backend.remove(newfile)
        # write out the new encrypted file
        with self.write_file(
                filename + '.new', encryption=GnuPGKeyEncryption(keys)) as f:
            f.write(('%s\n' % passphrase).encode('utf-8'))
        # check that we can actually read the file
        with self.read_file(
                filename + '.new', encryption=GnuPGKeyEncryption()) as f:
            assert str(f.read()).strip() == passphrase
        # figure out the file names
        newfile, filters = self.expand(filename + '.new')
        curfile, filters = self.expand(filename)
        oldfile, filters = self.expand(filename + '.old')
        # remove the old backup
        if self.backend.exists(oldfile):
            self.remove(oldfile)
        # backup the old passphrase
        if self.backend.exists(curfile):
            oldfile = (curfile + '.').replace('.', '.old.', 1).rstrip('.')
            self.backend.rename(curfile, oldfile)
        # put new passphrase file in place
        self.backend.rename(newfile, newfile.replace('.new', '', 1))

    @property
    def uuid(self):
        if self._uuid:
            return self._uuid
        # try to read the uuid file
        try:
            with self.read_file('uuid') as f:
                uuid = str(f.read()).strip()
            if re.match(r'^[0-9a-z]{8,16}$', uuid):
                self._uuid = uuid
                return uuid
        except EnvironmentError:
            pass  # ignore reading uuid, generate one instead
        # generate a new uuid
        uuid = '%16s' % base36(random.randrange(int(16 * 'z', 36) + 1))
        uuid.replace(' ', '0')
        self._uuid = uuid
        # save the uuid in the repository
        with self.write_file('uuid') as f:
            f.write(('%s\n' % uuid).encode('utf-8'))
        return uuid