caselawclient.models.documents.xml
1import os 2 3from lxml import etree 4 5from caselawclient.xml_helpers import get_xpath_match_string, get_xpath_match_strings, get_xpath_nodes 6 7 8def _xslt_path(xslt_file_name: str) -> str: 9 from caselawclient.Client import ROOT_DIR 10 11 return os.path.join(ROOT_DIR, "xslt", xslt_file_name) 12 13 14class NonXMLDocumentError(Exception): 15 """A document cannot be parsed as XML.""" 16 17 18class XML: 19 """ 20 A class for interacting with the raw XML of a document. 21 """ 22 23 def __init__(self, xml_bytestring: bytes): 24 """ 25 :raises NonXMLDocumentError: This document is not valid XML 26 """ 27 try: 28 self.xml_as_tree: etree._Element = etree.fromstring(xml_bytestring) 29 except etree.XMLSyntaxError: 30 raise NonXMLDocumentError 31 32 @property 33 def xml_as_string(self) -> str: 34 """ 35 :return: A string representation of this document's XML tree. 36 """ 37 return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8")) 38 39 @property 40 def root_element(self) -> str: 41 return str(self.xml_as_tree.tag) 42 43 def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str: 44 return get_xpath_match_string(self.xml_as_tree, xpath, namespaces) 45 46 def get_xpath_match_strings( 47 self, 48 xpath: str, 49 namespaces: dict[str, str], 50 ) -> list[str]: 51 return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces) 52 53 def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str]) -> list[etree._Element]: 54 return get_xpath_nodes(self.xml_as_tree, xpath, namespaces) 55 56 def _modified( 57 self, 58 xslt: str, 59 **values: str, 60 ) -> bytes: 61 """XSLT transform this XML, given a stylesheet""" 62 passable_values = {k: etree.XSLT.strparam(v) for k, v in values.items()} 63 xslt_transform = etree.XSLT(etree.fromstring(xslt)) 64 noncanonical_xml = xslt_transform(self.xml_as_tree, profile_run=False, **passable_values) 65 return etree.tostring(noncanonical_xml, method="c14n2") 66 67 def apply_xslt(self, xslt_filename: str, **values: str) -> bytes: 68 """XSLT transform this XML, given a path to a stylesheet""" 69 full_xslt_filename = _xslt_path(xslt_filename) 70 with open(full_xslt_filename) as f: 71 xslt = f.read() 72 return self._modified(xslt, **values)
class
NonXMLDocumentError(builtins.Exception):
A document cannot be parsed as XML.
class
XML:
19class XML: 20 """ 21 A class for interacting with the raw XML of a document. 22 """ 23 24 def __init__(self, xml_bytestring: bytes): 25 """ 26 :raises NonXMLDocumentError: This document is not valid XML 27 """ 28 try: 29 self.xml_as_tree: etree._Element = etree.fromstring(xml_bytestring) 30 except etree.XMLSyntaxError: 31 raise NonXMLDocumentError 32 33 @property 34 def xml_as_string(self) -> str: 35 """ 36 :return: A string representation of this document's XML tree. 37 """ 38 return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8")) 39 40 @property 41 def root_element(self) -> str: 42 return str(self.xml_as_tree.tag) 43 44 def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str]) -> str: 45 return get_xpath_match_string(self.xml_as_tree, xpath, namespaces) 46 47 def get_xpath_match_strings( 48 self, 49 xpath: str, 50 namespaces: dict[str, str], 51 ) -> list[str]: 52 return get_xpath_match_strings(self.xml_as_tree, xpath, namespaces) 53 54 def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str]) -> list[etree._Element]: 55 return get_xpath_nodes(self.xml_as_tree, xpath, namespaces) 56 57 def _modified( 58 self, 59 xslt: str, 60 **values: str, 61 ) -> bytes: 62 """XSLT transform this XML, given a stylesheet""" 63 passable_values = {k: etree.XSLT.strparam(v) for k, v in values.items()} 64 xslt_transform = etree.XSLT(etree.fromstring(xslt)) 65 noncanonical_xml = xslt_transform(self.xml_as_tree, profile_run=False, **passable_values) 66 return etree.tostring(noncanonical_xml, method="c14n2") 67 68 def apply_xslt(self, xslt_filename: str, **values: str) -> bytes: 69 """XSLT transform this XML, given a path to a stylesheet""" 70 full_xslt_filename = _xslt_path(xslt_filename) 71 with open(full_xslt_filename) as f: 72 xslt = f.read() 73 return self._modified(xslt, **values)
A class for interacting with the raw XML of a document.
XML(xml_bytestring: bytes)
24 def __init__(self, xml_bytestring: bytes): 25 """ 26 :raises NonXMLDocumentError: This document is not valid XML 27 """ 28 try: 29 self.xml_as_tree: etree._Element = etree.fromstring(xml_bytestring) 30 except etree.XMLSyntaxError: 31 raise NonXMLDocumentError
Raises
- NonXMLDocumentError: This document is not valid XML
xml_as_string: str
33 @property 34 def xml_as_string(self) -> str: 35 """ 36 :return: A string representation of this document's XML tree. 37 """ 38 return str(etree.tostring(self.xml_as_tree).decode(encoding="utf-8"))
Returns
A string representation of this document's XML tree.
def
apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
68 def apply_xslt(self, xslt_filename: str, **values: str) -> bytes: 69 """XSLT transform this XML, given a path to a stylesheet""" 70 full_xslt_filename = _xslt_path(xslt_filename) 71 with open(full_xslt_filename) as f: 72 xslt = f.read() 73 return self._modified(xslt, **values)
XSLT transform this XML, given a path to a stylesheet