caselawclient.models.documents.body

View Source

  1import datetime
  2import os
  3import warnings
  4from functools import cache, cached_property
  5from typing import Optional
  6
  7import pytz
  8from ds_caselaw_utils.types import CourtCode
  9from lxml import etree
 10from saxonche import PySaxonProcessor
 11
 12from caselawclient.models.utilities.dates import parse_string_date_as_utc
 13from caselawclient.types import DocumentCategory
 14from caselawclient.xml_helpers import DEFAULT_NAMESPACES
 15
 16from .xml import XML
 17
 18
 19class UnparsableDate(Warning):
 20    pass
 21
 22
 23class DocumentBody:
 24    """
 25    A class for abstracting out interactions with the body of a document.
 26    """
 27
 28    def __init__(self, xml_bytestring: bytes):
 29        self._xml = XML(xml_bytestring=xml_bytestring)
 30        """ This is an instance of the `Document.XML` class for manipulation of the XML document itself. """
 31
 32    def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> str:
 33        return self._xml.get_xpath_match_string(xpath, namespaces)
 34
 35    def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[str]:
 36        return self._xml.get_xpath_match_strings(xpath, namespaces)
 37
 38    def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[etree._Element]:
 39        return self._xml.get_xpath_nodes(xpath, namespaces)
 40
 41    @cached_property
 42    def name(self) -> str:
 43        return self.get_xpath_match_string(
 44            "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value"
 45        )
 46
 47    @cached_property
 48    def court(self) -> str:
 49        return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()")
 50
 51    @cached_property
 52    def jurisdiction(self) -> str:
 53        return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")
 54
 55    @cached_property
 56    def categories(self) -> list[DocumentCategory]:
 57        xpath = "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category"
 58        nodes = self.get_xpath_nodes(xpath, DEFAULT_NAMESPACES)
 59
 60        categories: dict[str, DocumentCategory] = {}
 61        children_map: dict[str, list[DocumentCategory]] = {}
 62
 63        for node in nodes:
 64            name = node.text
 65            if name is None or not name.strip():
 66                continue
 67
 68            category = DocumentCategory(name=name)
 69            categories[name] = category
 70
 71            parent = node.get("parent")
 72
 73            if parent:
 74                children_map.setdefault(parent, []).append(category)
 75
 76        for parent, subcategories in children_map.items():
 77            if parent in categories:
 78                categories[parent].subcategories.extend(subcategories)
 79
 80        top_level_categories = [
 81            categories[name]
 82            for node in nodes
 83            if node.get("parent") is None
 84            if (name := node.text) and name in categories
 85        ]
 86
 87        return top_level_categories
 88
 89    # NOTE: Deprecated - use categories function
 90    @cached_property
 91    def category(self) -> Optional[str]:
 92        return self.get_xpath_match_string(
 93            "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category[not(@parent)][1]/text()"
 94        )
 95
 96    @cached_property
 97    def case_number(self) -> Optional[str]:
 98        return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:caseNumber/text()")
 99
100    @property
101    def court_and_jurisdiction_identifier_string(self) -> CourtCode:
102        if self.jurisdiction != "":
103            return CourtCode("/".join((self.court, self.jurisdiction)))
104        return CourtCode(self.court)
105
106    @cached_property
107    def document_date_as_string(self) -> str:
108        return self.get_xpath_match_string(
109            "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
110        )
111
112    @cached_property
113    def document_date_as_date(self) -> Optional[datetime.date]:
114        if not self.document_date_as_string:
115            return None
116        try:
117            return datetime.datetime.strptime(
118                self.document_date_as_string,
119                "%Y-%m-%d",
120            ).date()
121        except ValueError:
122            warnings.warn(
123                f"Unparsable date encountered: {self.document_date_as_string}",
124                UnparsableDate,
125            )
126            return None
127
128    def get_manifestation_datetimes(
129        self,
130        name: Optional[str] = None,
131    ) -> list[datetime.datetime]:
132        name_filter = f"[@name='{name}']" if name else ""
133        iso_datetimes = self.get_xpath_match_strings(
134            f"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation/akn:FRBRdate{name_filter}/@date",
135        )
136
137        return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
138
139    def get_latest_manifestation_datetime(
140        self,
141        name: Optional[str] = None,
142    ) -> Optional[datetime.datetime]:
143        events = self.get_manifestation_datetimes(name)
144        if not events:
145            return None
146        return max(events)
147
148    def get_latest_manifestation_type(self) -> Optional[str]:
149        return max(
150            (
151                (type, time)
152                for type in ["transform", "tna-enriched"]
153                if (time := self.get_latest_manifestation_datetime(type))
154            ),
155            key=lambda x: x[1],
156        )[0]
157
158    @cached_property
159    def transformation_datetime(self) -> Optional[datetime.datetime]:
160        """When was this document successfully parsed or reparsed (date from XML)"""
161        return self.get_latest_manifestation_datetime("transform")
162
163    @cached_property
164    def enrichment_datetime(self) -> Optional[datetime.datetime]:
165        """When was this document successfully enriched (date from XML)"""
166        return self.get_latest_manifestation_datetime("tna-enriched")
167
168    @cached_property
169    def content_as_xml(self) -> str:
170        return self._xml.xml_as_string
171
172    @cached_property
173    def has_content(self) -> bool:
174        """If we do not have a word document, the XML will not contain
175        the contents of the judgment, but will have content in the header if a judgment.
176        All press summaries (which have <doc> not <judgment> tags) are assumed to have content"""
177        return bool(
178            self._xml.xml_as_tree.xpath("//akn:header[normalize-space(string(.))]", namespaces=DEFAULT_NAMESPACES)
179            or self._xml.xml_as_tree.xpath("//akn:doc", namespaces=DEFAULT_NAMESPACES)
180        )
181
182    @cached_property
183    def has_external_data(self) -> bool:
184        """Is there data which is not present within the source document:
185        is there a spreadsheet which has populated some fields. The current implementation
186        "is there a uk:party tag" is intended as a stopgap whilst we're not importing that data."""
187        return bool(self._xml.xml_as_tree.xpath("//uk:party", namespaces=DEFAULT_NAMESPACES))
188
189    @cache
190    def content_html(self, image_prefix: str) -> Optional[str]:
191        """Convert the XML representation of the Document into HTML for rendering."""
192        """This used to be called content_as_html but we have changed the parameter passed to it from the
193        domain of the assets to the path in which the assets are stored (from assets to assets/d-a1b2c3)
194        and made the image_prefix mandatory"""
195        if not self.has_content:
196            return None
197
198        html_xslt_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), "transforms", "html.xsl")
199
200        with PySaxonProcessor() as proc:
201            xslt_processor = proc.new_xslt30_processor()
202            document = proc.parse_xml(xml_text=self._xml.xml_as_string)
203
204            executable = xslt_processor.compile_stylesheet(stylesheet_file=html_xslt_location)
205
206            if image_prefix:
207                executable.set_parameter("image-prefix", proc.make_string_value(image_prefix))
208
209            return str(executable.transform_to_string(xdm_node=document))
210
211    @cached_property
212    def failed_to_parse(self) -> bool:
213        """
214        Did this document entirely fail to parse?
215
216        :return: `True` if there was a complete parser failure, otherwise `False`
217        """
218        return "error" in self._xml.root_element
219
220    def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
221        return self._xml.apply_xslt(xslt_filename, **values)

class UnparsableDate(builtins.Warning): View Source

20class UnparsableDate(Warning):
21    pass

Base class for warning categories.

class DocumentBody: View Source

 24class DocumentBody:
 25    """
 26    A class for abstracting out interactions with the body of a document.
 27    """
 28
 29    def __init__(self, xml_bytestring: bytes):
 30        self._xml = XML(xml_bytestring=xml_bytestring)
 31        """ This is an instance of the `Document.XML` class for manipulation of the XML document itself. """
 32
 33    def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> str:
 34        return self._xml.get_xpath_match_string(xpath, namespaces)
 35
 36    def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[str]:
 37        return self._xml.get_xpath_match_strings(xpath, namespaces)
 38
 39    def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[etree._Element]:
 40        return self._xml.get_xpath_nodes(xpath, namespaces)
 41
 42    @cached_property
 43    def name(self) -> str:
 44        return self.get_xpath_match_string(
 45            "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value"
 46        )
 47
 48    @cached_property
 49    def court(self) -> str:
 50        return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()")
 51
 52    @cached_property
 53    def jurisdiction(self) -> str:
 54        return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")
 55
 56    @cached_property
 57    def categories(self) -> list[DocumentCategory]:
 58        xpath = "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category"
 59        nodes = self.get_xpath_nodes(xpath, DEFAULT_NAMESPACES)
 60
 61        categories: dict[str, DocumentCategory] = {}
 62        children_map: dict[str, list[DocumentCategory]] = {}
 63
 64        for node in nodes:
 65            name = node.text
 66            if name is None or not name.strip():
 67                continue
 68
 69            category = DocumentCategory(name=name)
 70            categories[name] = category
 71
 72            parent = node.get("parent")
 73
 74            if parent:
 75                children_map.setdefault(parent, []).append(category)
 76
 77        for parent, subcategories in children_map.items():
 78            if parent in categories:
 79                categories[parent].subcategories.extend(subcategories)
 80
 81        top_level_categories = [
 82            categories[name]
 83            for node in nodes
 84            if node.get("parent") is None
 85            if (name := node.text) and name in categories
 86        ]
 87
 88        return top_level_categories
 89
 90    # NOTE: Deprecated - use categories function
 91    @cached_property
 92    def category(self) -> Optional[str]:
 93        return self.get_xpath_match_string(
 94            "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category[not(@parent)][1]/text()"
 95        )
 96
 97    @cached_property
 98    def case_number(self) -> Optional[str]:
 99        return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:caseNumber/text()")
100
101    @property
102    def court_and_jurisdiction_identifier_string(self) -> CourtCode:
103        if self.jurisdiction != "":
104            return CourtCode("/".join((self.court, self.jurisdiction)))
105        return CourtCode(self.court)
106
107    @cached_property
108    def document_date_as_string(self) -> str:
109        return self.get_xpath_match_string(
110            "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
111        )
112
113    @cached_property
114    def document_date_as_date(self) -> Optional[datetime.date]:
115        if not self.document_date_as_string:
116            return None
117        try:
118            return datetime.datetime.strptime(
119                self.document_date_as_string,
120                "%Y-%m-%d",
121            ).date()
122        except ValueError:
123            warnings.warn(
124                f"Unparsable date encountered: {self.document_date_as_string}",
125                UnparsableDate,
126            )
127            return None
128
129    def get_manifestation_datetimes(
130        self,
131        name: Optional[str] = None,
132    ) -> list[datetime.datetime]:
133        name_filter = f"[@name='{name}']" if name else ""
134        iso_datetimes = self.get_xpath_match_strings(
135            f"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation/akn:FRBRdate{name_filter}/@date",
136        )
137
138        return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]
139
140    def get_latest_manifestation_datetime(
141        self,
142        name: Optional[str] = None,
143    ) -> Optional[datetime.datetime]:
144        events = self.get_manifestation_datetimes(name)
145        if not events:
146            return None
147        return max(events)
148
149    def get_latest_manifestation_type(self) -> Optional[str]:
150        return max(
151            (
152                (type, time)
153                for type in ["transform", "tna-enriched"]
154                if (time := self.get_latest_manifestation_datetime(type))
155            ),
156            key=lambda x: x[1],
157        )[0]
158
159    @cached_property
160    def transformation_datetime(self) -> Optional[datetime.datetime]:
161        """When was this document successfully parsed or reparsed (date from XML)"""
162        return self.get_latest_manifestation_datetime("transform")
163
164    @cached_property
165    def enrichment_datetime(self) -> Optional[datetime.datetime]:
166        """When was this document successfully enriched (date from XML)"""
167        return self.get_latest_manifestation_datetime("tna-enriched")
168
169    @cached_property
170    def content_as_xml(self) -> str:
171        return self._xml.xml_as_string
172
173    @cached_property
174    def has_content(self) -> bool:
175        """If we do not have a word document, the XML will not contain
176        the contents of the judgment, but will have content in the header if a judgment.
177        All press summaries (which have <doc> not <judgment> tags) are assumed to have content"""
178        return bool(
179            self._xml.xml_as_tree.xpath("//akn:header[normalize-space(string(.))]", namespaces=DEFAULT_NAMESPACES)
180            or self._xml.xml_as_tree.xpath("//akn:doc", namespaces=DEFAULT_NAMESPACES)
181        )
182
183    @cached_property
184    def has_external_data(self) -> bool:
185        """Is there data which is not present within the source document:
186        is there a spreadsheet which has populated some fields. The current implementation
187        "is there a uk:party tag" is intended as a stopgap whilst we're not importing that data."""
188        return bool(self._xml.xml_as_tree.xpath("//uk:party", namespaces=DEFAULT_NAMESPACES))
189
190    @cache
191    def content_html(self, image_prefix: str) -> Optional[str]:
192        """Convert the XML representation of the Document into HTML for rendering."""
193        """This used to be called content_as_html but we have changed the parameter passed to it from the
194        domain of the assets to the path in which the assets are stored (from assets to assets/d-a1b2c3)
195        and made the image_prefix mandatory"""
196        if not self.has_content:
197            return None
198
199        html_xslt_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), "transforms", "html.xsl")
200
201        with PySaxonProcessor() as proc:
202            xslt_processor = proc.new_xslt30_processor()
203            document = proc.parse_xml(xml_text=self._xml.xml_as_string)
204
205            executable = xslt_processor.compile_stylesheet(stylesheet_file=html_xslt_location)
206
207            if image_prefix:
208                executable.set_parameter("image-prefix", proc.make_string_value(image_prefix))
209
210            return str(executable.transform_to_string(xdm_node=document))
211
212    @cached_property
213    def failed_to_parse(self) -> bool:
214        """
215        Did this document entirely fail to parse?
216
217        :return: `True` if there was a complete parser failure, otherwise `False`
218        """
219        return "error" in self._xml.root_element
220
221    def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
222        return self._xml.apply_xslt(xslt_filename, **values)

A class for abstracting out interactions with the body of a document.

DocumentBody(xml_bytestring: bytes) View Source

29    def __init__(self, xml_bytestring: bytes):
30        self._xml = XML(xml_bytestring=xml_bytestring)
31        """ This is an instance of the `Document.XML` class for manipulation of the XML document itself. """

def get_xpath_match_string( self, xpath: str, namespaces: dict[str, str] = {'uk': 'https://caselaw.nationalarchives.gov.uk/akn', 'akn': 'http://docs.oasis-open.org/legaldocml/ns/akn/3.0'}) -> str: View Source

33    def get_xpath_match_string(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> str:
34        return self._xml.get_xpath_match_string(xpath, namespaces)

def get_xpath_match_strings( self, xpath: str, namespaces: dict[str, str] = {'uk': 'https://caselaw.nationalarchives.gov.uk/akn', 'akn': 'http://docs.oasis-open.org/legaldocml/ns/akn/3.0'}) -> list[str]: View Source

36    def get_xpath_match_strings(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[str]:
37        return self._xml.get_xpath_match_strings(xpath, namespaces)

def get_xpath_nodes( self, xpath: str, namespaces: dict[str, str] = {'uk': 'https://caselaw.nationalarchives.gov.uk/akn', 'akn': 'http://docs.oasis-open.org/legaldocml/ns/akn/3.0'}) -> list[lxml.etree._Element]: View Source

39    def get_xpath_nodes(self, xpath: str, namespaces: dict[str, str] = DEFAULT_NAMESPACES) -> list[etree._Element]:
40        return self._xml.get_xpath_nodes(xpath, namespaces)

name: str View Source

42    @cached_property
43    def name(self) -> str:
44        return self.get_xpath_match_string(
45            "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRname/@value"
46        )

court: str View Source

48    @cached_property
49    def court(self) -> str:
50        return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:court/text()")

jurisdiction: str View Source

52    @cached_property
53    def jurisdiction(self) -> str:
54        return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:jurisdiction/text()")

categories: list[caselawclient.types.DocumentCategory] View Source

56    @cached_property
57    def categories(self) -> list[DocumentCategory]:
58        xpath = "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category"
59        nodes = self.get_xpath_nodes(xpath, DEFAULT_NAMESPACES)
60
61        categories: dict[str, DocumentCategory] = {}
62        children_map: dict[str, list[DocumentCategory]] = {}
63
64        for node in nodes:
65            name = node.text
66            if name is None or not name.strip():
67                continue
68
69            category = DocumentCategory(name=name)
70            categories[name] = category
71
72            parent = node.get("parent")
73
74            if parent:
75                children_map.setdefault(parent, []).append(category)
76
77        for parent, subcategories in children_map.items():
78            if parent in categories:
79                categories[parent].subcategories.extend(subcategories)
80
81        top_level_categories = [
82            categories[name]
83            for node in nodes
84            if node.get("parent") is None
85            if (name := node.text) and name in categories
86        ]
87
88        return top_level_categories

category: Optional[str] View Source

91    @cached_property
92    def category(self) -> Optional[str]:
93        return self.get_xpath_match_string(
94            "/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:category[not(@parent)][1]/text()"
95        )

case_number: Optional[str] View Source

97    @cached_property
98    def case_number(self) -> Optional[str]:
99        return self.get_xpath_match_string("/akn:akomaNtoso/akn:*/akn:meta/akn:proprietary/uk:caseNumber/text()")

court_and_jurisdiction_identifier_string: ds_caselaw_utils.types.CourtCode View Source

101    @property
102    def court_and_jurisdiction_identifier_string(self) -> CourtCode:
103        if self.jurisdiction != "":
104            return CourtCode("/".join((self.court, self.jurisdiction)))
105        return CourtCode(self.court)

document_date_as_string: str View Source

107    @cached_property
108    def document_date_as_string(self) -> str:
109        return self.get_xpath_match_string(
110            "/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRWork/akn:FRBRdate/@date",
111        )

document_date_as_date: Optional[datetime.date] View Source

113    @cached_property
114    def document_date_as_date(self) -> Optional[datetime.date]:
115        if not self.document_date_as_string:
116            return None
117        try:
118            return datetime.datetime.strptime(
119                self.document_date_as_string,
120                "%Y-%m-%d",
121            ).date()
122        except ValueError:
123            warnings.warn(
124                f"Unparsable date encountered: {self.document_date_as_string}",
125                UnparsableDate,
126            )
127            return None

def get_manifestation_datetimes(self, name: Optional[str] = None) -> list[datetime.datetime]: View Source

129    def get_manifestation_datetimes(
130        self,
131        name: Optional[str] = None,
132    ) -> list[datetime.datetime]:
133        name_filter = f"[@name='{name}']" if name else ""
134        iso_datetimes = self.get_xpath_match_strings(
135            f"/akn:akomaNtoso/akn:*/akn:meta/akn:identification/akn:FRBRManifestation/akn:FRBRdate{name_filter}/@date",
136        )
137
138        return [parse_string_date_as_utc(event, pytz.UTC) for event in iso_datetimes]

def get_latest_manifestation_datetime(self, name: Optional[str] = None) -> Optional[datetime.datetime]: View Source

140    def get_latest_manifestation_datetime(
141        self,
142        name: Optional[str] = None,
143    ) -> Optional[datetime.datetime]:
144        events = self.get_manifestation_datetimes(name)
145        if not events:
146            return None
147        return max(events)

def get_latest_manifestation_type(self) -> Optional[str]: View Source

149    def get_latest_manifestation_type(self) -> Optional[str]:
150        return max(
151            (
152                (type, time)
153                for type in ["transform", "tna-enriched"]
154                if (time := self.get_latest_manifestation_datetime(type))
155            ),
156            key=lambda x: x[1],
157        )[0]

transformation_datetime: Optional[datetime.datetime] View Source

159    @cached_property
160    def transformation_datetime(self) -> Optional[datetime.datetime]:
161        """When was this document successfully parsed or reparsed (date from XML)"""
162        return self.get_latest_manifestation_datetime("transform")

When was this document successfully parsed or reparsed (date from XML)

enrichment_datetime: Optional[datetime.datetime] View Source

164    @cached_property
165    def enrichment_datetime(self) -> Optional[datetime.datetime]:
166        """When was this document successfully enriched (date from XML)"""
167        return self.get_latest_manifestation_datetime("tna-enriched")

When was this document successfully enriched (date from XML)

content_as_xml: str View Source

169    @cached_property
170    def content_as_xml(self) -> str:
171        return self._xml.xml_as_string

has_content: bool View Source

173    @cached_property
174    def has_content(self) -> bool:
175        """If we do not have a word document, the XML will not contain
176        the contents of the judgment, but will have content in the header if a judgment.
177        All press summaries (which have <doc> not <judgment> tags) are assumed to have content"""
178        return bool(
179            self._xml.xml_as_tree.xpath("//akn:header[normalize-space(string(.))]", namespaces=DEFAULT_NAMESPACES)
180            or self._xml.xml_as_tree.xpath("//akn:doc", namespaces=DEFAULT_NAMESPACES)
181        )

If we do not have a word document, the XML will not contain the contents of the judgment, but will have content in the header if a judgment. All press summaries (which have not tags) are assumed to have content

has_external_data: bool View Source

183    @cached_property
184    def has_external_data(self) -> bool:
185        """Is there data which is not present within the source document:
186        is there a spreadsheet which has populated some fields. The current implementation
187        "is there a uk:party tag" is intended as a stopgap whilst we're not importing that data."""
188        return bool(self._xml.xml_as_tree.xpath("//uk:party", namespaces=DEFAULT_NAMESPACES))

Is there data which is not present within the source document: is there a spreadsheet which has populated some fields. The current implementation "is there a uk:party tag" is intended as a stopgap whilst we're not importing that data.

@cache

def content_html(self, image_prefix: str) -> Optional[str]: View Source

190    @cache
191    def content_html(self, image_prefix: str) -> Optional[str]:
192        """Convert the XML representation of the Document into HTML for rendering."""
193        """This used to be called content_as_html but we have changed the parameter passed to it from the
194        domain of the assets to the path in which the assets are stored (from assets to assets/d-a1b2c3)
195        and made the image_prefix mandatory"""
196        if not self.has_content:
197            return None
198
199        html_xslt_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), "transforms", "html.xsl")
200
201        with PySaxonProcessor() as proc:
202            xslt_processor = proc.new_xslt30_processor()
203            document = proc.parse_xml(xml_text=self._xml.xml_as_string)
204
205            executable = xslt_processor.compile_stylesheet(stylesheet_file=html_xslt_location)
206
207            if image_prefix:
208                executable.set_parameter("image-prefix", proc.make_string_value(image_prefix))
209
210            return str(executable.transform_to_string(xdm_node=document))

Convert the XML representation of the Document into HTML for rendering.

failed_to_parse: bool View Source

212    @cached_property
213    def failed_to_parse(self) -> bool:
214        """
215        Did this document entirely fail to parse?
216
217        :return: `True` if there was a complete parser failure, otherwise `False`
218        """
219        return "error" in self._xml.root_element

Did this document entirely fail to parse?

Returns

True if there was a complete parser failure, otherwise False

def apply_xslt(self, xslt_filename: str, **values: str) -> bytes: View Source

221    def apply_xslt(self, xslt_filename: str, **values: str) -> bytes:
222        return self._xml.apply_xslt(xslt_filename, **values)