caselawclient.models.identifiers.unpacker

 1from typing import Optional
 2from warnings import warn
 3
 4from lxml import etree
 5
 6from . import IDENTIFIER_UNPACKABLE_ATTRIBUTES, Identifier
 7from .collection import SUPPORTED_IDENTIFIER_TYPES, IdentifiersCollection
 8from .exceptions import InvalidIdentifierXMLRepresentationException
 9
10IDENTIFIER_NAMESPACE_MAP: dict[str, type[Identifier]] = {
11    identifier_type.schema.namespace: identifier_type for identifier_type in SUPPORTED_IDENTIFIER_TYPES
12}
13
14
15def unpack_all_identifiers_from_etree(identifiers_etree: Optional[etree._Element]) -> IdentifiersCollection:
16    """This expects the entire <identifiers> tag, and unpacks all Identifiers inside it"""
17    identifiers = IdentifiersCollection()
18    if identifiers_etree is None:
19        return identifiers
20    for identifier_etree in identifiers_etree.findall("identifier"):
21        identifier = unpack_an_identifier_from_etree(identifier_etree)
22        if identifier:
23            identifiers.add(identifier)
24    return identifiers
25
26
27def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Optional[Identifier]:
28    """Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier if the type is known (otherwise return `None`)."""
29
30    namespace_element = identifier_xml.find("namespace")
31
32    if namespace_element is None or not namespace_element.text:
33        raise InvalidIdentifierXMLRepresentationException(
34            "Identifer XML representation is not valid: namespace not present or empty"
35        )
36
37    # If the identifier namespace isn't known, fail out
38    if namespace_element.text not in IDENTIFIER_NAMESPACE_MAP:
39        warn(f"Identifier type {namespace_element.text} is not known.")
40        return None
41
42    str_kwargs: dict[str, str] = {}
43    deprecated = False
44
45    for attribute in IDENTIFIER_UNPACKABLE_ATTRIBUTES:
46        element = identifier_xml.find(attribute)
47
48        # Special case for unpacking deprecation state into a boolean
49        if attribute == "deprecated":
50            if element is not None and element.text is not None and element.text.lower() == "true":
51                deprecated = True
52
53        else:
54            # Case for unpacking all other element types
55            if element is None or not element.text:
56                raise InvalidIdentifierXMLRepresentationException(
57                    f"Identifer XML representation is not valid: {element} not present or empty"
58                )
59            str_kwargs[attribute] = element.text
60
61    return IDENTIFIER_NAMESPACE_MAP[namespace_element.text](deprecated=deprecated, **str_kwargs)
def unpack_all_identifiers_from_etree( identifiers_etree: Optional[lxml.etree._Element]) -> caselawclient.models.identifiers.collection.IdentifiersCollection:
16def unpack_all_identifiers_from_etree(identifiers_etree: Optional[etree._Element]) -> IdentifiersCollection:
17    """This expects the entire <identifiers> tag, and unpacks all Identifiers inside it"""
18    identifiers = IdentifiersCollection()
19    if identifiers_etree is None:
20        return identifiers
21    for identifier_etree in identifiers_etree.findall("identifier"):
22        identifier = unpack_an_identifier_from_etree(identifier_etree)
23        if identifier:
24            identifiers.add(identifier)
25    return identifiers

This expects the entire tag, and unpacks all Identifiers inside it

def unpack_an_identifier_from_etree( identifier_xml: lxml.etree._Element) -> Optional[caselawclient.models.identifiers.Identifier]:
28def unpack_an_identifier_from_etree(identifier_xml: etree._Element) -> Optional[Identifier]:
29    """Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier if the type is known (otherwise return `None`)."""
30
31    namespace_element = identifier_xml.find("namespace")
32
33    if namespace_element is None or not namespace_element.text:
34        raise InvalidIdentifierXMLRepresentationException(
35            "Identifer XML representation is not valid: namespace not present or empty"
36        )
37
38    # If the identifier namespace isn't known, fail out
39    if namespace_element.text not in IDENTIFIER_NAMESPACE_MAP:
40        warn(f"Identifier type {namespace_element.text} is not known.")
41        return None
42
43    str_kwargs: dict[str, str] = {}
44    deprecated = False
45
46    for attribute in IDENTIFIER_UNPACKABLE_ATTRIBUTES:
47        element = identifier_xml.find(attribute)
48
49        # Special case for unpacking deprecation state into a boolean
50        if attribute == "deprecated":
51            if element is not None and element.text is not None and element.text.lower() == "true":
52                deprecated = True
53
54        else:
55            # Case for unpacking all other element types
56            if element is None or not element.text:
57                raise InvalidIdentifierXMLRepresentationException(
58                    f"Identifer XML representation is not valid: {element} not present or empty"
59                )
60            str_kwargs[attribute] = element.text
61
62    return IDENTIFIER_NAMESPACE_MAP[namespace_element.text](deprecated=deprecated, **str_kwargs)

Given an etree representation of a single identifier, unpack it into an appropriate instance of an Identifier if the type is known (otherwise return None).