# xml.py - module for parsing and writing XML for PSKC files
# coding: utf-8
#
# Copyright (C) 2014-2016 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA

"""Module for parsing XML in PSKC files.

This module provides some utility functions for parsing XML files.
"""

from __future__ import absolute_import

# try to find a usable ElementTree module
try:
    from lxml import etree
except ImportError:  # pragma: no cover (different implementations)
    import xml.etree.ElementTree as etree


# the relevant XML namespaces for PSKC
namespaces = dict(
    # the XML namespace URI for version 1.0 of PSKC
    pskc='urn:ietf:params:xml:ns:keyprov:pskc',
    # the XML Signature namespace
    ds='http://www.w3.org/2000/09/xmldsig#',
    # the XML Encryption namespace
    xenc='http://www.w3.org/2001/04/xmlenc#',
    # the XML Encryption version 1.1 namespace
    xenc11='http://www.w3.org/2009/xmlenc11#',
    # the PKCS #5 namespace
    pkcs5='http://www.rsasecurity.com/rsalabs/pkcs/schemas/pkcs-5v2-0#',
)


# register the namespaces so the correct short names will be used
for ns, namespace in namespaces.items():
    etree.register_namespace(ns, namespace)


def parse(source):
    """Parse the provided file and return an element tree."""
    return etree.parse(source)


def remove_namespaces(tree):
    """Remove namespaces from all elements in the tree."""
    import re
    for elem in tree.getiterator():
        if isinstance(elem.tag, ''.__class__):  # pragma: no branch
            elem.tag = re.sub(r'^\{[^}]*\}', '', elem.tag)


def findall(tree, *matches):
    """Find the child elements."""
    for match in matches:
        for element in tree.findall(match, namespaces=namespaces):
            yield element


def find(tree, *matches):
    """Find a child element that matches any of the patterns (or None)."""
    try:
        return next(findall(tree, *matches))
    except StopIteration:
        pass


def findtext(tree, *matches):
    """Get the text value of an element (or None)."""
    element = find(tree, *matches)
    if element is not None:
        return element.text.strip()


def findint(tree, *matches):
    """Return an element value as an int (or None)."""
    value = findtext(tree, *matches)
    if value:
        return int(value)


def findtime(tree, *matches):
    """Return an element value as a datetime (or None)."""
    value = findtext(tree, *matches)
    if value:
        import dateutil.parser
        return dateutil.parser.parse(value)


def findbin(tree, *matches):
    """Return the binary element value base64 decoded."""
    value = findtext(tree, *matches)
    if value:
        import base64
        return base64.b64decode(value)


def getint(tree, attribute):
    """Return an attribute value as an integer (or None)."""
    value = tree.get(attribute)
    if value:
        return int(value)


def getbool(tree, attribute, default=None):
    """Return an attribute value as a boolean (or None)."""
    value = tree.get(attribute)
    if value:
        value = value.lower()
        if value in ('1', 'true'):
            return True
        elif value in ('0', 'false'):
            return False
        else:
            raise ValueError('invalid boolean value: %r' % value)
    return default


def _format(value):
    import datetime
    if isinstance(value, datetime.datetime):
        value = value.isoformat()
        if value.endswith('+00:00'):
            value = value[:-6] + 'Z'
        return value
    elif value is True:
        return 'true'
    elif value is False:
        return 'false'
    return str(value)


def mk_elem(parent, tag=None, text=None, empty=False, **kwargs):
    """Add element as a child of parent."""
    # special-case the top-level element
    if tag is None:
        tag = parent
        parent = None
        empty = True
    # don't create empty elements
    if not empty and text is None and \
       all(x is None for x in kwargs.values()):
        return
    # replace namespace identifier with URL
    if ':' in tag:
        ns, name = tag.split(':', 1)
        tag = '{%s}%s' % (namespaces[ns], name)
    if parent is None:
        element = etree.Element(tag)
    else:
        element = etree.SubElement(parent, tag)
    # set text of element
    if text is not None:
        element.text = _format(text)
    # set kwargs as attributes
    for k, v in kwargs.items():
        if v is not None:
            element.set(k, _format(v))
    return element


def tostring(element):
    """Return a serialised XML document for the element tree."""
    from xml.dom import minidom
    # if we are using lxml.etree move namespaces to toplevel element
    if hasattr(element, 'nsmap'):  # pragma: no cover (only on lxml)
        # get all used namespaces
        nsmap = {}
        for e in element.iter():
            nsmap.update(e.nsmap)
        # replace toplevel element with all namespaces
        e = etree.Element(element.tag, attrib=element.attrib, nsmap=nsmap)
        for a in element:
            e.append(a)
        element = e
    xml = etree.tostring(element, encoding='UTF-8')
    return minidom.parseString(xml).toprettyxml(
        indent=' ', encoding='UTF-8').strip()