# cache.py - functions for metadata cache
#
# Copyright (C) 2015 Arthur de Jong
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
#
# The files produced as output from the software do not automatically fall
# under the copyright of the software, unless explicitly stated otherwise.

import json
import os
import sqlite3


class MetaData(object):

    def __init__(self, cache_dir, uuid):
        # set a default cache directory
        if not cache_dir:
            import xdg.BaseDirectory
            cache_dir = os.path.join(
                xdg.BaseDirectory.xdg_cache_home, 'sloth')
        # create cache directory if needed
        if not os.path.isdir(cache_dir):
            os.makedirs(cache_dir)
        # set up SQLite connection
        self.connection = sqlite3.connect(
            '%s/%s-v2.sqlite' % (cache_dir, uuid))
        # create database tables
        self.connection.executescript('''
            PRAGMA secure_delete = false;
            PRAGMA temp_store = MEMORY;
            PRAGMA cache_size = 20000;
            PRAGMA synchronous = OFF;
            PRAGMA journal_mode = MEMORY;
            CREATE TABLE IF NOT EXISTS `archive_contents`
              ( `id` INTEGER PRIMARY KEY,
                `archive` TEXT NOT NULL,
                `path` TEXT NOT NULL,
                `size` INTEGER NOT NULL,
                `meta` TEXT NOT NULL,
                `used` INTEGER NOT NULL DEFAULT 0 );
            CREATE INDEX IF NOT EXISTS `archive_contents_archive_idx`
              ON `archive_contents` (`archive`);
            CREATE INDEX IF NOT EXISTS `archive_contents_path_idx`
              ON `archive_contents` (`path`);
            CREATE INDEX IF NOT EXISTS `archive_contents_meta_idx`
              ON `archive_contents` (`meta`);
            CREATE TABLE IF NOT EXISTS `backups`
              ( `backup` TEXT PRIMARY KEY,
                `json` TEXT NOT NULL );
            CREATE INDEX IF NOT EXISTS `backups_backup_idx`
              ON `backups` (`backup`);
            ''')

    def is_crawled(self, path):
        cursor = self.connection.execute('''
            SELECT 1
            FROM `crawled`
            WHERE `path` = ?
            ''', (path, ))
        return bool(list(cursor))

    def resync_archives(self, repo):
        """Update metadata cache with the information from the repository."""
        # set up temporary table to hold list of archives found in repository
        self.connection.executescript('''
            CREATE TEMPORARY TABLE `tmp_archives`
              ( `archive` TEXT NOT NULL );
            CREATE INDEX IF NOT EXISTS `tmp_archives_archive_idx`
              ON `tmp_archives` (`archive`);
            ''')
        # get list of archives in the repository
        with self.connection:
            self.connection.executemany('''
                INSERT INTO `tmp_archives`
                  (`archive`)
                VALUES
                  (?)
                ''', ((archive, ) for archive in repo.list_archives()))
        # remove archives from database that are not in the repository
        with self.connection:
            self.connection.execute('''
                DELETE FROM `archive_contents`
                WHERE `archive` NOT IN (
                    SELECT `archive` FROM `tmp_archives`)
                ''')
        # read archive metadata that is missing from the database
        cursor = self.connection.execute('''
            SELECT `archive`
            FROM `tmp_archives`
            WHERE `archive` NOT IN (
                SELECT DISTINCT `archive` FROM `archive_contents`)
            ''')
        # use fetchall because SQLite cannot handle partial reads from a cursor
        # if the database is being modified in another cursor
        for archive, in cursor.fetchall():
            print('Importing archive %s file list' % archive)
            try:
                fileslist = repo.read_fileslist('archives/%s.json' % archive)
                with self.connection:
                    self.connection.executemany('''
                        INSERT INTO `archive_contents`
                          (`archive`, `path`, `size`, `meta`)
                        VALUES
                          (?, ?, ?, ?)
                        ''', ((
                        archive, meta['path'], meta['size'],
                        json.dumps(meta, sort_keys=True))
                        for meta in fileslist))
            except (EnvironmentError, ValueError):
                import traceback
                print(traceback.format_exc())
        # clean up
        self.connection.executescript('''
            DROP TABLE `tmp_archives`;
            ''')

    def _check_backups(self, repo, table, backup=None):
        """Return the list of backups that need to be synced, checking the
        specified table."""
        if backup is not None:
            # if we already have data for the backup, we're done
            cursor = self.connection.execute('''
                SELECT 1
                FROM `%s`
                WHERE `backup` = ?
                ''' % table, (backup, ))
            if bool(list(cursor)):
                return []
            return [backup]
        # get list of backups in the repository
        self.connection.executescript('''
            CREATE TEMPORARY TABLE `tmp_backups`
              ( `backup` TEXT NOT NULL );
            CREATE INDEX IF NOT EXISTS `tmp_backups_backup_idx`
              ON `tmp_backups` (`backup`);
            ''')
        with self.connection:
            self.connection.executemany('''
                INSERT INTO `tmp_backups`
                  (`backup`)
                VALUES
                  (?)
                ''', ((backup, ) for backup in repo.list_backups()))
        # remove backups from database that are not in the repository
        with self.connection:
            self.connection.execute('''
                DELETE FROM `%s`
                WHERE `backup` NOT IN (
                    SELECT `backup` FROM `tmp_backups`)
                ''' % table)
        # read backup metadata that is missing from the database
        backups = [
            row[0] for row in self.connection.execute('''
                SELECT `backup`
                FROM `tmp_backups`
                WHERE `backup` NOT IN (
                    SELECT `backup` FROM `%s`)
                ''' % table)]
        # clean up
        self.connection.executescript('''
            DROP TABLE IF EXISTS `tmp_backups`;
            ''')
        return backups

    def resync_backups(self, repo):
        """Update metadata cache with the information from the repository."""
        for backup in self._check_backups(repo, 'backups'):
            print('Importing backup %s metadata' % backup)
            try:
                with repo.read_file('backups/%s/info.json' % backup) as f:
                    info = json.load(f)
                with self.connection:
                    self.connection.execute('''
                        INSERT OR REPLACE INTO `backups`
                          (`backup`, `json`)
                        VALUES
                          (?, ?)
                        ''', (
                        backup, json.dumps(info, sort_keys=True)))
            except (EnvironmentError, ValueError):
                import traceback
                print(traceback.format_exc())