caselawclient.responses.search_result

View Source

  1import logging
  2import os
  3from datetime import datetime
  4from enum import Enum
  5from functools import cached_property
  6from typing import Any, Dict, Optional
  7
  8from dateutil import parser as dateparser
  9from dateutil.parser import ParserError
 10from ds_caselaw_utils.courts import Court, CourtNotFoundException, courts
 11from ds_caselaw_utils.types import CourtCode, JurisdictionCode
 12from lxml import etree
 13
 14from caselawclient.Client import MarklogicApiClient
 15from caselawclient.models.identifiers.collection import IdentifiersCollection
 16from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber
 17from caselawclient.models.identifiers.press_summary_ncn import PressSummaryRelatedNCNIdentifier
 18from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree
 19from caselawclient.types import DocumentURIString
 20from caselawclient.xml_helpers import get_xpath_match_string
 21
 22
 23class EditorStatus(Enum):
 24    """
 25    Enum representing the editor status.
 26    """
 27
 28    NEW = "new"
 29    IN_PROGRESS = "in progress"
 30    HOLD = "hold"
 31    PUBLISHED = "published"
 32
 33
 34class EditorPriority(Enum):
 35    """
 36    Enum representing the editor priority.
 37    """
 38
 39    LOW = "low"
 40    MEDIUM = "medium"
 41    HIGH = "high"
 42
 43
 44class SearchResultMetadata:
 45    """
 46    Represents the metadata of a search result.
 47    """
 48
 49    def __init__(self, node: etree._Element, last_modified: str):
 50        self.node = node
 51        self.last_modified = last_modified
 52
 53    @property
 54    def author(self) -> str:
 55        """
 56        :return: The author of the search result
 57        """
 58
 59        return self._get_xpath_match_string("//source-name/text()")
 60
 61    @property
 62    def author_email(self) -> str:
 63        """
 64        :return: The email address of the author
 65        """
 66
 67        return self._get_xpath_match_string("//source-email/text()")
 68
 69    @property
 70    def consignment_reference(self) -> str:
 71        """
 72        :return: The consignment reference of this document submission
 73        """
 74
 75        return self._get_xpath_match_string("//transfer-consignment-reference/text()")
 76
 77    @property
 78    def assigned_to(self) -> str:
 79        """
 80        :return: The username of the editor assigned to this document
 81        """
 82
 83        return self._get_xpath_match_string("//assigned-to/text()")
 84
 85    @property
 86    def editor_hold(self) -> str:
 87        """
 88        :return: The editor hold status
 89        """
 90
 91        return self._get_xpath_match_string("//editor-hold/text()")
 92
 93    @property
 94    def is_published(self) -> bool:
 95        """
 96        :return:
 97        """
 98        return self._get_xpath_match_string("//published/text()") == "true"
 99
100    @property
101    def editor_priority(self) -> str:
102        """
103        :return: The editor priority
104        """
105
106        return self._get_xpath_match_string(
107            "//editor-priority/text()",
108            EditorPriority.MEDIUM.value,
109        )
110
111    @property
112    def submission_datetime(self) -> datetime:
113        """
114        :return: The submission datetime
115        """
116        if tdr_time := self._get_xpath_match_string("//transfer-received-at/text()"):
117            return datetime.strptime(tdr_time, "%Y-%m-%dT%H:%M:%SZ")
118        if email_time := self._get_xpath_match_string("//email-received-at/text()"):
119            return datetime.strptime(email_time, "%Y-%m-%dT%H:%M:%SZ")
120        return datetime.min
121
122    @property
123    def editor_status(
124        self,
125    ) -> str:
126        """
127        :return: The editor status based on the metadata
128        """
129
130        if self.is_published:
131            return EditorStatus.PUBLISHED.value
132        if self.editor_hold == "true":
133            return EditorStatus.HOLD.value
134        if self.assigned_to:
135            return EditorStatus.IN_PROGRESS.value
136        return EditorStatus.NEW.value
137
138    def _get_xpath_match_string(self, path: str, fallback: str = "") -> str:
139        return get_xpath_match_string(self.node, path, fallback=fallback)
140
141
142class SearchResult:
143    """
144    Represents a search result obtained from XML data.
145    """
146
147    NAMESPACES: Dict[str, str] = {
148        "search": "http://marklogic.com/appservices/search",
149        "uk": "https://caselaw.nationalarchives.gov.uk/akn",
150        "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0",
151    }
152    """ Namespace mappings used in XPath expressions. """
153
154    def __init__(self, node: etree._Element, client: MarklogicApiClient):
155        """
156        :param node: The XML element representing the search result
157        """
158
159        self.node = node
160        self.client = client
161
162    def __repr__(self) -> str:
163        try:
164            slug = self.slug
165        except RuntimeError:
166            slug = "**NO SLUG**"
167        name = self.name or "**NO NAME**"
168        return f"<SearchResult {self.uri} {slug} {name} {self.date}>"
169
170    @property
171    def uri(self) -> DocumentURIString:
172        """
173        :return: The URI of the search result
174        """
175
176        return DocumentURIString(
177            self._get_xpath_match_string("@uri").lstrip("/").split(".xml")[0],
178        )
179
180    @property
181    def identifiers(self) -> IdentifiersCollection:
182        identifiers_etrees = self._get_xpath(".//identifiers")
183        count = len(identifiers_etrees)
184        if count != 1:
185            logging.warning(f"{count} //identifiers nodes found in search result, expected 1.")
186        identifiers_etree = None if not identifiers_etrees else identifiers_etrees[0]
187        return unpack_all_identifiers_from_etree(identifiers_etree)
188
189    @cached_property
190    def slug(self) -> str:
191        preferred = self.identifiers.preferred()
192        if not preferred:
193            raise RuntimeError("No preferred identifier for search result")
194        return str(preferred.url_slug)
195
196    @property
197    def neutral_citation(self) -> Optional[str]:
198        """
199        :return: If present, the value of preferred neutral citation of the document.
200        """
201
202        preferred_ncn = self.identifiers.preferred(type=NeutralCitationNumber)
203
204        # If the result doesn't have a preferred NCN, maybe it has a preferred press summary related NCN?
205        if not preferred_ncn:
206            preferred_ncn = self.identifiers.preferred(type=PressSummaryRelatedNCNIdentifier)
207
208        return preferred_ncn.value if preferred_ncn else None
209
210    @property
211    def name(self) -> str:
212        """
213        :return: The title of the search result's document
214        """
215
216        return self._get_xpath_match_string("search:extracted/akn:FRBRname/@value")
217
218    @property
219    def court(
220        self,
221    ) -> Optional[Court]:
222        """
223        :return: The court of the search result
224        """
225        court: Optional[Court] = None
226        court_code = self._get_xpath_match_string("search:extracted/uk:court/text()")
227        jurisdiction_code = self._get_xpath_match_string(
228            "search:extracted/uk:jurisdiction/text()",
229        )
230        if jurisdiction_code:
231            try:
232                court = courts.get_court_with_jurisdiction_by_code(
233                    CourtCode(court_code), JurisdictionCode(jurisdiction_code)
234                )
235            except CourtNotFoundException:
236                logging.warning(
237                    "Court not found with court code %s and jurisdiction code %s for judgment with NCN %s, falling back to court."
238                    % (court_code, jurisdiction_code, self.neutral_citation),
239                )
240        if court is None:
241            try:
242                court = courts.get_by_code(CourtCode(court_code))
243            except CourtNotFoundException:
244                logging.warning(
245                    "Court not found with court code %s for judgment with NCN %s, returning None."
246                    % (court_code, self.neutral_citation),
247                )
248                court = None
249        return court
250
251    @property
252    def date(self) -> Optional[datetime]:
253        """
254        :return: The date of the search result
255        """
256
257        date_string = self._get_xpath_match_string(
258            "search:extracted/akn:FRBRdate[(@name='judgment' or @name='decision')]/@date",
259        )
260        try:
261            date = dateparser.parse(date_string)
262        except ParserError as e:
263            logging.warning(
264                f'Unable to parse document date "{date_string}". Full error: {e}',
265            )
266            date = None
267        return date
268
269    @property
270    def transformation_date(self) -> str:
271        """
272        :return: The transformation date of the search result
273        """
274
275        return self._get_xpath_match_string(
276            "search:extracted/akn:FRBRdate[@name='transform']/@date",
277        )
278
279    @property
280    def content_hash(self) -> str:
281        """
282        :return: The content hash of the search result
283        """
284
285        return self._get_xpath_match_string("search:extracted/uk:hash/text()")
286
287    @property
288    def matches(self) -> str:
289        """
290        :return: The search result matches
291        """
292
293        file_path = os.path.join(os.path.dirname(__file__), "xsl/search_match.xsl")
294        xslt_transform = etree.XSLT(etree.parse(file_path))
295        return str(xslt_transform(self.node))
296
297    @cached_property
298    def metadata(self) -> SearchResultMetadata:
299        """
300        :return: A `SearchResultMetadata` instance representing the metadata of this result
301        """
302        response_text = self.client.get_properties_for_search_results([self.uri])
303        last_modified = self.client.get_last_modified(self.uri)
304        root = etree.fromstring(response_text)
305        return SearchResultMetadata(root, last_modified)
306
307    def _get_xpath_match_string(self, path: str) -> str:
308        return get_xpath_match_string(self.node, path, namespaces=self.NAMESPACES)
309
310    def _get_xpath(self, path: str) -> Any:
311        return self.node.xpath(path, namespaces=self.NAMESPACES)

class EditorStatus(enum.Enum): View Source

24class EditorStatus(Enum):
25    """
26    Enum representing the editor status.
27    """
28
29    NEW = "new"
30    IN_PROGRESS = "in progress"
31    HOLD = "hold"
32    PUBLISHED = "published"

Enum representing the editor status.

NEW = <EditorStatus.NEW: 'new'>

IN_PROGRESS = <EditorStatus.IN_PROGRESS: 'in progress'>

HOLD = <EditorStatus.HOLD: 'hold'>

PUBLISHED = <EditorStatus.PUBLISHED: 'published'>

class EditorPriority(enum.Enum): View Source

35class EditorPriority(Enum):
36    """
37    Enum representing the editor priority.
38    """
39
40    LOW = "low"
41    MEDIUM = "medium"
42    HIGH = "high"

Enum representing the editor priority.

LOW = <EditorPriority.LOW: 'low'>

MEDIUM = <EditorPriority.MEDIUM: 'medium'>

HIGH = <EditorPriority.HIGH: 'high'>

caselawclient.responses.search_result

Returns

Returns

Returns

Returns

Returns

Returns

Returns

Returns

Returns

Parameters

Returns

Returns

Returns

Returns

Returns

Returns

Returns

Returns

Returns