caselawclient.responses.search_result
1import logging 2import os 3from datetime import datetime 4from enum import Enum 5from functools import cached_property 6from typing import Any, Dict, Optional 7 8from dateutil import parser as dateparser 9from dateutil.parser import ParserError 10from ds_caselaw_utils.courts import Court, CourtNotFoundException, courts 11from ds_caselaw_utils.types import CourtCode, JurisdictionCode 12from lxml import etree 13 14from caselawclient.Client import MarklogicApiClient 15from caselawclient.models.identifiers.collection import IdentifiersCollection 16from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber 17from caselawclient.models.identifiers.press_summary_ncn import PressSummaryRelatedNCNIdentifier 18from caselawclient.models.identifiers.unpacker import unpack_all_identifiers_from_etree 19from caselawclient.types import DocumentURIString 20from caselawclient.xml_helpers import get_xpath_match_string 21 22 23class EditorStatus(Enum): 24 """ 25 Enum representing the editor status. 26 """ 27 28 NEW = "new" 29 IN_PROGRESS = "in progress" 30 HOLD = "hold" 31 PUBLISHED = "published" 32 33 34class EditorPriority(Enum): 35 """ 36 Enum representing the editor priority. 37 """ 38 39 LOW = "low" 40 MEDIUM = "medium" 41 HIGH = "high" 42 43 44class SearchResultMetadata: 45 """ 46 Represents the metadata of a search result. 47 """ 48 49 def __init__(self, node: etree._Element, last_modified: str): 50 self.node = node 51 self.last_modified = last_modified 52 53 @property 54 def author(self) -> str: 55 """ 56 :return: The author of the search result 57 """ 58 59 return self._get_xpath_match_string("//source-name/text()") 60 61 @property 62 def author_email(self) -> str: 63 """ 64 :return: The email address of the author 65 """ 66 67 return self._get_xpath_match_string("//source-email/text()") 68 69 @property 70 def consignment_reference(self) -> str: 71 """ 72 :return: The consignment reference of this document submission 73 """ 74 75 return self._get_xpath_match_string("//transfer-consignment-reference/text()") 76 77 @property 78 def assigned_to(self) -> str: 79 """ 80 :return: The username of the editor assigned to this document 81 """ 82 83 return self._get_xpath_match_string("//assigned-to/text()") 84 85 @property 86 def editor_hold(self) -> str: 87 """ 88 :return: The editor hold status 89 """ 90 91 return self._get_xpath_match_string("//editor-hold/text()") 92 93 @property 94 def is_published(self) -> bool: 95 """ 96 :return: 97 """ 98 return self._get_xpath_match_string("//published/text()") == "true" 99 100 @property 101 def editor_priority(self) -> str: 102 """ 103 :return: The editor priority 104 """ 105 106 return self._get_xpath_match_string( 107 "//editor-priority/text()", 108 EditorPriority.MEDIUM.value, 109 ) 110 111 @property 112 def submission_datetime(self) -> datetime: 113 """ 114 :return: The submission datetime 115 """ 116 if tdr_time := self._get_xpath_match_string("//transfer-received-at/text()"): 117 return datetime.strptime(tdr_time, "%Y-%m-%dT%H:%M:%SZ") 118 if email_time := self._get_xpath_match_string("//email-received-at/text()"): 119 return datetime.strptime(email_time, "%Y-%m-%dT%H:%M:%SZ") 120 return datetime.min 121 122 @property 123 def editor_status( 124 self, 125 ) -> str: 126 """ 127 :return: The editor status based on the metadata 128 """ 129 130 if self.is_published: 131 return EditorStatus.PUBLISHED.value 132 if self.editor_hold == "true": 133 return EditorStatus.HOLD.value 134 if self.assigned_to: 135 return EditorStatus.IN_PROGRESS.value 136 return EditorStatus.NEW.value 137 138 def _get_xpath_match_string(self, path: str, fallback: str = "") -> str: 139 return get_xpath_match_string(self.node, path, fallback=fallback) 140 141 142class SearchResult: 143 """ 144 Represents a search result obtained from XML data. 145 """ 146 147 NAMESPACES: Dict[str, str] = { 148 "search": "http://marklogic.com/appservices/search", 149 "uk": "https://caselaw.nationalarchives.gov.uk/akn", 150 "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0", 151 } 152 """ Namespace mappings used in XPath expressions. """ 153 154 def __init__(self, node: etree._Element, client: MarklogicApiClient): 155 """ 156 :param node: The XML element representing the search result 157 """ 158 159 self.node = node 160 self.client = client 161 162 def __repr__(self) -> str: 163 try: 164 slug = self.slug 165 except RuntimeError: 166 slug = "**NO SLUG**" 167 name = self.name or "**NO NAME**" 168 return f"<SearchResult {self.uri} {slug} {name} {self.date}>" 169 170 @property 171 def uri(self) -> DocumentURIString: 172 """ 173 :return: The URI of the search result 174 """ 175 176 return DocumentURIString( 177 self._get_xpath_match_string("@uri").lstrip("/").split(".xml")[0], 178 ) 179 180 @property 181 def identifiers(self) -> IdentifiersCollection: 182 identifiers_etrees = self._get_xpath(".//identifiers") 183 count = len(identifiers_etrees) 184 if count != 1: 185 logging.warning(f"{count} //identifiers nodes found in search result, expected 1.") 186 identifiers_etree = None if not identifiers_etrees else identifiers_etrees[0] 187 return unpack_all_identifiers_from_etree(identifiers_etree) 188 189 @cached_property 190 def slug(self) -> str: 191 preferred = self.identifiers.preferred() 192 if not preferred: 193 raise RuntimeError("No preferred identifier for search result") 194 return str(preferred.url_slug) 195 196 @property 197 def neutral_citation(self) -> Optional[str]: 198 """ 199 :return: If present, the value of preferred neutral citation of the document. 200 """ 201 202 preferred_ncn = self.identifiers.preferred(type=NeutralCitationNumber) 203 204 # If the result doesn't have a preferred NCN, maybe it has a preferred press summary related NCN? 205 if not preferred_ncn: 206 preferred_ncn = self.identifiers.preferred(type=PressSummaryRelatedNCNIdentifier) 207 208 return preferred_ncn.value if preferred_ncn else None 209 210 @property 211 def name(self) -> str: 212 """ 213 :return: The title of the search result's document 214 """ 215 216 return self._get_xpath_match_string("search:extracted/akn:FRBRname/@value") 217 218 @property 219 def court( 220 self, 221 ) -> Optional[Court]: 222 """ 223 :return: The court of the search result 224 """ 225 court: Optional[Court] = None 226 court_code = self._get_xpath_match_string("search:extracted/uk:court/text()") 227 jurisdiction_code = self._get_xpath_match_string( 228 "search:extracted/uk:jurisdiction/text()", 229 ) 230 if jurisdiction_code: 231 try: 232 court = courts.get_court_with_jurisdiction_by_code( 233 CourtCode(court_code), JurisdictionCode(jurisdiction_code) 234 ) 235 except CourtNotFoundException: 236 logging.warning( 237 "Court not found with court code %s and jurisdiction code %s for judgment with NCN %s, falling back to court." 238 % (court_code, jurisdiction_code, self.neutral_citation), 239 ) 240 if court is None: 241 try: 242 court = courts.get_by_code(CourtCode(court_code)) 243 except CourtNotFoundException: 244 logging.warning( 245 "Court not found with court code %s for judgment with NCN %s, returning None." 246 % (court_code, self.neutral_citation), 247 ) 248 court = None 249 return court 250 251 @property 252 def date(self) -> Optional[datetime]: 253 """ 254 :return: The date of the search result 255 """ 256 257 date_string = self._get_xpath_match_string( 258 "search:extracted/akn:FRBRdate[(@name='judgment' or @name='decision')]/@date", 259 ) 260 try: 261 date = dateparser.parse(date_string) 262 except ParserError as e: 263 logging.warning( 264 f'Unable to parse document date "{date_string}". Full error: {e}', 265 ) 266 date = None 267 return date 268 269 @property 270 def transformation_date(self) -> str: 271 """ 272 :return: The transformation date of the search result 273 """ 274 275 return self._get_xpath_match_string( 276 "search:extracted/akn:FRBRdate[@name='transform']/@date", 277 ) 278 279 @property 280 def content_hash(self) -> str: 281 """ 282 :return: The content hash of the search result 283 """ 284 285 return self._get_xpath_match_string("search:extracted/uk:hash/text()") 286 287 @property 288 def matches(self) -> str: 289 """ 290 :return: The search result matches 291 """ 292 293 file_path = os.path.join(os.path.dirname(__file__), "xsl/search_match.xsl") 294 xslt_transform = etree.XSLT(etree.parse(file_path)) 295 return str(xslt_transform(self.node)) 296 297 @cached_property 298 def metadata(self) -> SearchResultMetadata: 299 """ 300 :return: A `SearchResultMetadata` instance representing the metadata of this result 301 """ 302 response_text = self.client.get_properties_for_search_results([self.uri]) 303 last_modified = self.client.get_last_modified(self.uri) 304 root = etree.fromstring(response_text) 305 return SearchResultMetadata(root, last_modified) 306 307 def _get_xpath_match_string(self, path: str) -> str: 308 return get_xpath_match_string(self.node, path, namespaces=self.NAMESPACES) 309 310 def _get_xpath(self, path: str) -> Any: 311 return self.node.xpath(path, namespaces=self.NAMESPACES)
24class EditorStatus(Enum): 25 """ 26 Enum representing the editor status. 27 """ 28 29 NEW = "new" 30 IN_PROGRESS = "in progress" 31 HOLD = "hold" 32 PUBLISHED = "published"
Enum representing the editor status.
35class EditorPriority(Enum): 36 """ 37 Enum representing the editor priority. 38 """ 39 40 LOW = "low" 41 MEDIUM = "medium" 42 HIGH = "high"
Enum representing the editor priority.
45class SearchResultMetadata: 46 """ 47 Represents the metadata of a search result. 48 """ 49 50 def __init__(self, node: etree._Element, last_modified: str): 51 self.node = node 52 self.last_modified = last_modified 53 54 @property 55 def author(self) -> str: 56 """ 57 :return: The author of the search result 58 """ 59 60 return self._get_xpath_match_string("//source-name/text()") 61 62 @property 63 def author_email(self) -> str: 64 """ 65 :return: The email address of the author 66 """ 67 68 return self._get_xpath_match_string("//source-email/text()") 69 70 @property 71 def consignment_reference(self) -> str: 72 """ 73 :return: The consignment reference of this document submission 74 """ 75 76 return self._get_xpath_match_string("//transfer-consignment-reference/text()") 77 78 @property 79 def assigned_to(self) -> str: 80 """ 81 :return: The username of the editor assigned to this document 82 """ 83 84 return self._get_xpath_match_string("//assigned-to/text()") 85 86 @property 87 def editor_hold(self) -> str: 88 """ 89 :return: The editor hold status 90 """ 91 92 return self._get_xpath_match_string("//editor-hold/text()") 93 94 @property 95 def is_published(self) -> bool: 96 """ 97 :return: 98 """ 99 return self._get_xpath_match_string("//published/text()") == "true" 100 101 @property 102 def editor_priority(self) -> str: 103 """ 104 :return: The editor priority 105 """ 106 107 return self._get_xpath_match_string( 108 "//editor-priority/text()", 109 EditorPriority.MEDIUM.value, 110 ) 111 112 @property 113 def submission_datetime(self) -> datetime: 114 """ 115 :return: The submission datetime 116 """ 117 if tdr_time := self._get_xpath_match_string("//transfer-received-at/text()"): 118 return datetime.strptime(tdr_time, "%Y-%m-%dT%H:%M:%SZ") 119 if email_time := self._get_xpath_match_string("//email-received-at/text()"): 120 return datetime.strptime(email_time, "%Y-%m-%dT%H:%M:%SZ") 121 return datetime.min 122 123 @property 124 def editor_status( 125 self, 126 ) -> str: 127 """ 128 :return: The editor status based on the metadata 129 """ 130 131 if self.is_published: 132 return EditorStatus.PUBLISHED.value 133 if self.editor_hold == "true": 134 return EditorStatus.HOLD.value 135 if self.assigned_to: 136 return EditorStatus.IN_PROGRESS.value 137 return EditorStatus.NEW.value 138 139 def _get_xpath_match_string(self, path: str, fallback: str = "") -> str: 140 return get_xpath_match_string(self.node, path, fallback=fallback)
Represents the metadata of a search result.
70 @property 71 def consignment_reference(self) -> str: 72 """ 73 :return: The consignment reference of this document submission 74 """ 75 76 return self._get_xpath_match_string("//transfer-consignment-reference/text()")
Returns
The consignment reference of this document submission
78 @property 79 def assigned_to(self) -> str: 80 """ 81 :return: The username of the editor assigned to this document 82 """ 83 84 return self._get_xpath_match_string("//assigned-to/text()")
Returns
The username of the editor assigned to this document
86 @property 87 def editor_hold(self) -> str: 88 """ 89 :return: The editor hold status 90 """ 91 92 return self._get_xpath_match_string("//editor-hold/text()")
Returns
The editor hold status
94 @property 95 def is_published(self) -> bool: 96 """ 97 :return: 98 """ 99 return self._get_xpath_match_string("//published/text()") == "true"
Returns
101 @property 102 def editor_priority(self) -> str: 103 """ 104 :return: The editor priority 105 """ 106 107 return self._get_xpath_match_string( 108 "//editor-priority/text()", 109 EditorPriority.MEDIUM.value, 110 )
Returns
The editor priority
112 @property 113 def submission_datetime(self) -> datetime: 114 """ 115 :return: The submission datetime 116 """ 117 if tdr_time := self._get_xpath_match_string("//transfer-received-at/text()"): 118 return datetime.strptime(tdr_time, "%Y-%m-%dT%H:%M:%SZ") 119 if email_time := self._get_xpath_match_string("//email-received-at/text()"): 120 return datetime.strptime(email_time, "%Y-%m-%dT%H:%M:%SZ") 121 return datetime.min
Returns
The submission datetime
123 @property 124 def editor_status( 125 self, 126 ) -> str: 127 """ 128 :return: The editor status based on the metadata 129 """ 130 131 if self.is_published: 132 return EditorStatus.PUBLISHED.value 133 if self.editor_hold == "true": 134 return EditorStatus.HOLD.value 135 if self.assigned_to: 136 return EditorStatus.IN_PROGRESS.value 137 return EditorStatus.NEW.value
Returns
The editor status based on the metadata
143class SearchResult: 144 """ 145 Represents a search result obtained from XML data. 146 """ 147 148 NAMESPACES: Dict[str, str] = { 149 "search": "http://marklogic.com/appservices/search", 150 "uk": "https://caselaw.nationalarchives.gov.uk/akn", 151 "akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0", 152 } 153 """ Namespace mappings used in XPath expressions. """ 154 155 def __init__(self, node: etree._Element, client: MarklogicApiClient): 156 """ 157 :param node: The XML element representing the search result 158 """ 159 160 self.node = node 161 self.client = client 162 163 def __repr__(self) -> str: 164 try: 165 slug = self.slug 166 except RuntimeError: 167 slug = "**NO SLUG**" 168 name = self.name or "**NO NAME**" 169 return f"<SearchResult {self.uri} {slug} {name} {self.date}>" 170 171 @property 172 def uri(self) -> DocumentURIString: 173 """ 174 :return: The URI of the search result 175 """ 176 177 return DocumentURIString( 178 self._get_xpath_match_string("@uri").lstrip("/").split(".xml")[0], 179 ) 180 181 @property 182 def identifiers(self) -> IdentifiersCollection: 183 identifiers_etrees = self._get_xpath(".//identifiers") 184 count = len(identifiers_etrees) 185 if count != 1: 186 logging.warning(f"{count} //identifiers nodes found in search result, expected 1.") 187 identifiers_etree = None if not identifiers_etrees else identifiers_etrees[0] 188 return unpack_all_identifiers_from_etree(identifiers_etree) 189 190 @cached_property 191 def slug(self) -> str: 192 preferred = self.identifiers.preferred() 193 if not preferred: 194 raise RuntimeError("No preferred identifier for search result") 195 return str(preferred.url_slug) 196 197 @property 198 def neutral_citation(self) -> Optional[str]: 199 """ 200 :return: If present, the value of preferred neutral citation of the document. 201 """ 202 203 preferred_ncn = self.identifiers.preferred(type=NeutralCitationNumber) 204 205 # If the result doesn't have a preferred NCN, maybe it has a preferred press summary related NCN? 206 if not preferred_ncn: 207 preferred_ncn = self.identifiers.preferred(type=PressSummaryRelatedNCNIdentifier) 208 209 return preferred_ncn.value if preferred_ncn else None 210 211 @property 212 def name(self) -> str: 213 """ 214 :return: The title of the search result's document 215 """ 216 217 return self._get_xpath_match_string("search:extracted/akn:FRBRname/@value") 218 219 @property 220 def court( 221 self, 222 ) -> Optional[Court]: 223 """ 224 :return: The court of the search result 225 """ 226 court: Optional[Court] = None 227 court_code = self._get_xpath_match_string("search:extracted/uk:court/text()") 228 jurisdiction_code = self._get_xpath_match_string( 229 "search:extracted/uk:jurisdiction/text()", 230 ) 231 if jurisdiction_code: 232 try: 233 court = courts.get_court_with_jurisdiction_by_code( 234 CourtCode(court_code), JurisdictionCode(jurisdiction_code) 235 ) 236 except CourtNotFoundException: 237 logging.warning( 238 "Court not found with court code %s and jurisdiction code %s for judgment with NCN %s, falling back to court." 239 % (court_code, jurisdiction_code, self.neutral_citation), 240 ) 241 if court is None: 242 try: 243 court = courts.get_by_code(CourtCode(court_code)) 244 except CourtNotFoundException: 245 logging.warning( 246 "Court not found with court code %s for judgment with NCN %s, returning None." 247 % (court_code, self.neutral_citation), 248 ) 249 court = None 250 return court 251 252 @property 253 def date(self) -> Optional[datetime]: 254 """ 255 :return: The date of the search result 256 """ 257 258 date_string = self._get_xpath_match_string( 259 "search:extracted/akn:FRBRdate[(@name='judgment' or @name='decision')]/@date", 260 ) 261 try: 262 date = dateparser.parse(date_string) 263 except ParserError as e: 264 logging.warning( 265 f'Unable to parse document date "{date_string}". Full error: {e}', 266 ) 267 date = None 268 return date 269 270 @property 271 def transformation_date(self) -> str: 272 """ 273 :return: The transformation date of the search result 274 """ 275 276 return self._get_xpath_match_string( 277 "search:extracted/akn:FRBRdate[@name='transform']/@date", 278 ) 279 280 @property 281 def content_hash(self) -> str: 282 """ 283 :return: The content hash of the search result 284 """ 285 286 return self._get_xpath_match_string("search:extracted/uk:hash/text()") 287 288 @property 289 def matches(self) -> str: 290 """ 291 :return: The search result matches 292 """ 293 294 file_path = os.path.join(os.path.dirname(__file__), "xsl/search_match.xsl") 295 xslt_transform = etree.XSLT(etree.parse(file_path)) 296 return str(xslt_transform(self.node)) 297 298 @cached_property 299 def metadata(self) -> SearchResultMetadata: 300 """ 301 :return: A `SearchResultMetadata` instance representing the metadata of this result 302 """ 303 response_text = self.client.get_properties_for_search_results([self.uri]) 304 last_modified = self.client.get_last_modified(self.uri) 305 root = etree.fromstring(response_text) 306 return SearchResultMetadata(root, last_modified) 307 308 def _get_xpath_match_string(self, path: str) -> str: 309 return get_xpath_match_string(self.node, path, namespaces=self.NAMESPACES) 310 311 def _get_xpath(self, path: str) -> Any: 312 return self.node.xpath(path, namespaces=self.NAMESPACES)
Represents a search result obtained from XML data.
155 def __init__(self, node: etree._Element, client: MarklogicApiClient): 156 """ 157 :param node: The XML element representing the search result 158 """ 159 160 self.node = node 161 self.client = client
Parameters
- node: The XML element representing the search result
Namespace mappings used in XPath expressions.
171 @property 172 def uri(self) -> DocumentURIString: 173 """ 174 :return: The URI of the search result 175 """ 176 177 return DocumentURIString( 178 self._get_xpath_match_string("@uri").lstrip("/").split(".xml")[0], 179 )
Returns
The URI of the search result
181 @property 182 def identifiers(self) -> IdentifiersCollection: 183 identifiers_etrees = self._get_xpath(".//identifiers") 184 count = len(identifiers_etrees) 185 if count != 1: 186 logging.warning(f"{count} //identifiers nodes found in search result, expected 1.") 187 identifiers_etree = None if not identifiers_etrees else identifiers_etrees[0] 188 return unpack_all_identifiers_from_etree(identifiers_etree)
197 @property 198 def neutral_citation(self) -> Optional[str]: 199 """ 200 :return: If present, the value of preferred neutral citation of the document. 201 """ 202 203 preferred_ncn = self.identifiers.preferred(type=NeutralCitationNumber) 204 205 # If the result doesn't have a preferred NCN, maybe it has a preferred press summary related NCN? 206 if not preferred_ncn: 207 preferred_ncn = self.identifiers.preferred(type=PressSummaryRelatedNCNIdentifier) 208 209 return preferred_ncn.value if preferred_ncn else None
Returns
If present, the value of preferred neutral citation of the document.
211 @property 212 def name(self) -> str: 213 """ 214 :return: The title of the search result's document 215 """ 216 217 return self._get_xpath_match_string("search:extracted/akn:FRBRname/@value")
Returns
The title of the search result's document
219 @property 220 def court( 221 self, 222 ) -> Optional[Court]: 223 """ 224 :return: The court of the search result 225 """ 226 court: Optional[Court] = None 227 court_code = self._get_xpath_match_string("search:extracted/uk:court/text()") 228 jurisdiction_code = self._get_xpath_match_string( 229 "search:extracted/uk:jurisdiction/text()", 230 ) 231 if jurisdiction_code: 232 try: 233 court = courts.get_court_with_jurisdiction_by_code( 234 CourtCode(court_code), JurisdictionCode(jurisdiction_code) 235 ) 236 except CourtNotFoundException: 237 logging.warning( 238 "Court not found with court code %s and jurisdiction code %s for judgment with NCN %s, falling back to court." 239 % (court_code, jurisdiction_code, self.neutral_citation), 240 ) 241 if court is None: 242 try: 243 court = courts.get_by_code(CourtCode(court_code)) 244 except CourtNotFoundException: 245 logging.warning( 246 "Court not found with court code %s for judgment with NCN %s, returning None." 247 % (court_code, self.neutral_citation), 248 ) 249 court = None 250 return court
Returns
The court of the search result
252 @property 253 def date(self) -> Optional[datetime]: 254 """ 255 :return: The date of the search result 256 """ 257 258 date_string = self._get_xpath_match_string( 259 "search:extracted/akn:FRBRdate[(@name='judgment' or @name='decision')]/@date", 260 ) 261 try: 262 date = dateparser.parse(date_string) 263 except ParserError as e: 264 logging.warning( 265 f'Unable to parse document date "{date_string}". Full error: {e}', 266 ) 267 date = None 268 return date
Returns
The date of the search result
270 @property 271 def transformation_date(self) -> str: 272 """ 273 :return: The transformation date of the search result 274 """ 275 276 return self._get_xpath_match_string( 277 "search:extracted/akn:FRBRdate[@name='transform']/@date", 278 )
Returns
The transformation date of the search result
280 @property 281 def content_hash(self) -> str: 282 """ 283 :return: The content hash of the search result 284 """ 285 286 return self._get_xpath_match_string("search:extracted/uk:hash/text()")
Returns
The content hash of the search result
288 @property 289 def matches(self) -> str: 290 """ 291 :return: The search result matches 292 """ 293 294 file_path = os.path.join(os.path.dirname(__file__), "xsl/search_match.xsl") 295 xslt_transform = etree.XSLT(etree.parse(file_path)) 296 return str(xslt_transform(self.node))
Returns
The search result matches
298 @cached_property 299 def metadata(self) -> SearchResultMetadata: 300 """ 301 :return: A `SearchResultMetadata` instance representing the metadata of this result 302 """ 303 response_text = self.client.get_properties_for_search_results([self.uri]) 304 last_modified = self.client.get_last_modified(self.uri) 305 root = etree.fromstring(response_text) 306 return SearchResultMetadata(root, last_modified)
Returns
A
SearchResultMetadatainstance representing the metadata of this result