caselawclient.models.identifiers.unpacker
1from typing import Optional 2from warnings import warn 3 4from lxml import etree 5 6from . import IDENTIFIER_UNPACKABLE_ATTRIBUTES, Identifier 7from .collection import SUPPORTED_IDENTIFIER_TYPES, IdentifiersCollection 8from .exceptions import InvalidIdentifierXMLRepresentationException 9 10IDENTIFIER_NAMESPACE_MAP: dict[str, type[Identifier]] = { 11 identifier_type.schema.namespace: identifier_type for identifier_type in SUPPORTED_IDENTIFIER_TYPES 12} 13 14 15def unpack_all_identifiers_from_etree(identifiers_etree: Optional[etree._Element]) -> IdentifiersCollection: 16 """This expects the entire <identifiers> tag, and unpacks all Identifiers inside it""" 17 identifiers = IdentifiersCollection() 18 if identifiers_etree is None: 19 return identifiers 20 for identifier_etree in identifiers_etree.findall("identifier"): 21 identifier = unpack_an_identifier_from_etree(identifier_etree) 22 if identifier: 23 identifiers.add(identifier) 24 return identifiers 25 26 27def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Optional[Identifier]: 28 """Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier if the type is known (otherwise return `None`).""" 29 30 namespace_element = identifier_xml.find("namespace") 31 32 if namespace_element is None or not namespace_element.text: 33 raise InvalidIdentifierXMLRepresentationException( 34 "Identifer XML representation is not valid: namespace not present or empty" 35 ) 36 37 # If the identifier namespace isn't known, fail out 38 if namespace_element.text not in IDENTIFIER_NAMESPACE_MAP: 39 warn(f"Identifier type {namespace_element.text} is not known.") 40 return None 41 42 str_kwargs: dict[str, str] = {} 43 deprecated = False 44 45 for attribute in IDENTIFIER_UNPACKABLE_ATTRIBUTES: 46 element = identifier_xml.find(attribute) 47 48 # Special case for unpacking deprecation state into a boolean 49 if attribute == "deprecated": 50 if element is not None and element.text is not None and element.text.lower() == "true": 51 deprecated = True 52 53 else: 54 # Case for unpacking all other element types 55 if element is None or not element.text: 56 raise InvalidIdentifierXMLRepresentationException( 57 f"Identifer XML representation is not valid: {element} not present or empty" 58 ) 59 str_kwargs[attribute] = element.text 60 61 return IDENTIFIER_NAMESPACE_MAP[namespace_element.text](deprecated=deprecated, **str_kwargs)
IDENTIFIER_NAMESPACE_MAP: dict[str, type[caselawclient.models.identifiers.Identifier]] =
{'fclid': <class 'caselawclient.models.identifiers.fclid.FindCaseLawIdentifier'>, 'ukncn': <class 'caselawclient.models.identifiers.neutral_citation.NeutralCitationNumber'>, 'uksummaryofncn': <class 'caselawclient.models.identifiers.press_summary_ncn.PressSummaryRelatedNCNIdentifier'>}
def
unpack_all_identifiers_from_etree( identifiers_etree: Optional[lxml.etree._Element]) -> caselawclient.models.identifiers.collection.IdentifiersCollection:
16def unpack_all_identifiers_from_etree(identifiers_etree: Optional[etree._Element]) -> IdentifiersCollection: 17 """This expects the entire <identifiers> tag, and unpacks all Identifiers inside it""" 18 identifiers = IdentifiersCollection() 19 if identifiers_etree is None: 20 return identifiers 21 for identifier_etree in identifiers_etree.findall("identifier"): 22 identifier = unpack_an_identifier_from_etree(identifier_etree) 23 if identifier: 24 identifiers.add(identifier) 25 return identifiers
This expects the entire
def
unpack_an_identifier_from_etree( identifier_xml: lxml.etree._Element) -> Optional[caselawclient.models.identifiers.Identifier]:
28def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Optional[Identifier]: 29 """Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier if the type is known (otherwise return `None`).""" 30 31 namespace_element = identifier_xml.find("namespace") 32 33 if namespace_element is None or not namespace_element.text: 34 raise InvalidIdentifierXMLRepresentationException( 35 "Identifer XML representation is not valid: namespace not present or empty" 36 ) 37 38 # If the identifier namespace isn't known, fail out 39 if namespace_element.text not in IDENTIFIER_NAMESPACE_MAP: 40 warn(f"Identifier type {namespace_element.text} is not known.") 41 return None 42 43 str_kwargs: dict[str, str] = {} 44 deprecated = False 45 46 for attribute in IDENTIFIER_UNPACKABLE_ATTRIBUTES: 47 element = identifier_xml.find(attribute) 48 49 # Special case for unpacking deprecation state into a boolean 50 if attribute == "deprecated": 51 if element is not None and element.text is not None and element.text.lower() == "true": 52 deprecated = True 53 54 else: 55 # Case for unpacking all other element types 56 if element is None or not element.text: 57 raise InvalidIdentifierXMLRepresentationException( 58 f"Identifer XML representation is not valid: {element} not present or empty" 59 ) 60 str_kwargs[attribute] = element.text 61 62 return IDENTIFIER_NAMESPACE_MAP[namespace_element.text](deprecated=deprecated, **str_kwargs)
Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier if the type is known (otherwise return None).