caselawclient.models.documents.xml

 1import os
 2
 3from lxml import etree
 4
 5from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings, get_xpath_nodes
 6
 7
 8def _xslt_path(xslt_file_name: str) -> str:
 9    from caselawclient.Client import ROOT_DIR
10
11    return os.path.join(ROOT_DIR, "xslt", xslt_file_name)
12
13
14class NonXMLDocumentError(Exception):
15    """A document cannot be parsed as XML."""
16
17
18class XML:
19    """
20    A class for interacting with the raw XML of a document.
21    """
22
23    def __init__(self, xml_bytestring: bytes):
24        """
25        :raises NonXMLDocumentError: This document is not valid XML
26        """
27        try:
28            self.xml_as_tree: etree._Element = etree.fromstring(xml_bytestring)
29        except etree.XMLSyntaxError:
30            raise NonXMLDocumentError
31
32    @property
33    def xml_as_string(self) -> str:
34        """
35        :return: A string representation of this document's XML tree.
36        """
37        return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
38
39    @property
40    def root_element(self) -> str:
41        return str(self.xml_as_tree.tag)
42
43    def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
44        return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
45
46    def get_xpath_match_strings(
47        self,
48        xpath: str,
49        namespaces: dict[str, str],
50    ) -> list[str]:
51        return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
52
53    def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str]) -> list[etree._Element]:
54        return get_xpath_nodes(self.xml_as_tree, xpath, namespaces)
55
56    def _modified(
57        self,
58        xslt: str,
59        **values: str,
60    ) -> bytes:
61        """XSLT transform this XML, given a stylesheet"""
62        passable_values = {k: etree.XSLT.strparam(v) for k, v in values.items()}
63        xslt_transform = etree.XSLT(etree.fromstring(xslt))
64        noncanonical_xml = xslt_transform(self.xml_as_tree, profile_run=False, **passable_values)
65        return etree.tostring(noncanonical_xml, method="c14n2")
66
67    def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
68        """XSLT transform this XML, given a path to a stylesheet"""
69        full_xslt_filename = _xslt_path(xslt_filename)
70        with open(full_xslt_filename) as f:
71            xslt = f.read()
72        return self._modified(xslt, **values)
class NonXMLDocumentError(builtins.Exception):
15class NonXMLDocumentError(Exception):
16    """A document cannot be parsed as XML."""

A document cannot be parsed as XML.

class XML:
19class XML:
20    """
21    A class for interacting with the raw XML of a document.
22    """
23
24    def __init__(self, xml_bytestring: bytes):
25        """
26        :raises NonXMLDocumentError: This document is not valid XML
27        """
28        try:
29            self.xml_as_tree: etree._Element = etree.fromstring(xml_bytestring)
30        except etree.XMLSyntaxError:
31            raise NonXMLDocumentError
32
33    @property
34    def xml_as_string(self) -> str:
35        """
36        :return: A string representation of this document's XML tree.
37        """
38        return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
39
40    @property
41    def root_element(self) -> str:
42        return str(self.xml_as_tree.tag)
43
44    def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
45        return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
46
47    def get_xpath_match_strings(
48        self,
49        xpath: str,
50        namespaces: dict[str, str],
51    ) -> list[str]:
52        return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
53
54    def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str]) -> list[etree._Element]:
55        return get_xpath_nodes(self.xml_as_tree, xpath, namespaces)
56
57    def _modified(
58        self,
59        xslt: str,
60        **values: str,
61    ) -> bytes:
62        """XSLT transform this XML, given a stylesheet"""
63        passable_values = {k: etree.XSLT.strparam(v) for k, v in values.items()}
64        xslt_transform = etree.XSLT(etree.fromstring(xslt))
65        noncanonical_xml = xslt_transform(self.xml_as_tree, profile_run=False, **passable_values)
66        return etree.tostring(noncanonical_xml, method="c14n2")
67
68    def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
69        """XSLT transform this XML, given a path to a stylesheet"""
70        full_xslt_filename = _xslt_path(xslt_filename)
71        with open(full_xslt_filename) as f:
72            xslt = f.read()
73        return self._modified(xslt, **values)

A class for interacting with the raw XML of a document.

XML(xml_bytestring: bytes)
24    def __init__(self, xml_bytestring: bytes):
25        """
26        :raises NonXMLDocumentError: This document is not valid XML
27        """
28        try:
29            self.xml_as_tree: etree._Element = etree.fromstring(xml_bytestring)
30        except etree.XMLSyntaxError:
31            raise NonXMLDocumentError
Raises
  • NonXMLDocumentError: This document is not valid XML
xml_as_string: str
33    @property
34    def xml_as_string(self) -> str:
35        """
36        :return: A string representation of this document's XML tree.
37        """
38        return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
Returns

A string representation of this document's XML tree.

root_element: str
40    @property
41    def root_element(self) -> str:
42        return str(self.xml_as_tree.tag)
def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
44    def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str:
45        return get_xpath_match_string(self.xml_as_tree, xpath, namespaces)
def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str]) -> list[str]:
47    def get_xpath_match_strings(
48        self,
49        xpath: str,
50        namespaces: dict[str, str],
51    ) -> list[str]:
52        return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces)
def get_xpath_nodes( self, xpath: str, namespaces: dict[str, str]) -> list[lxml.etree._Element]:
54    def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str]) -> list[etree._Element]:
55        return get_xpath_nodes(self.xml_as_tree, xpath, namespaces)
def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
68    def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
69        """XSLT transform this XML, given a path to a stylesheet"""
70        full_xslt_filename = _xslt_path(xslt_filename)
71        with open(full_xslt_filename) as f:
72            xslt = f.read()
73        return self._modified(xslt, **values)

XSLT transform this XML, given a path to a stylesheet