caselawclient.client_helpers

 1from lxml import etree
 2
 3from caselawclient.xml_helpers import DEFAULT_NAMESPACES
 4
 5from ..models.documents import Document
 6from ..models.judgments import Judgment
 7from ..models.parser_logs import ParserLog
 8from ..models.press_summaries import PressSummary
 9
10
11class CannotDetermineDocumentType(Exception):
12    pass
13
14
15def get_document_type_class(xml: bytes) -> type[Document]:
16    """Attempt to get the type of the document based on the top-level structure of the XML document."""
17
18    node = etree.fromstring(xml)
19
20    # If the main node is `<judgment>`, it's a judgment
21    if node.xpath("/akn:akomaNtoso/akn:judgment", namespaces=DEFAULT_NAMESPACES):
22        return Judgment
23
24    # If the main node is `<doc name='pressSummary'>`, it's a press summary
25    if node.xpath("/akn:akomaNtoso/akn:doc[@name='pressSummary']", namespaces=DEFAULT_NAMESPACES):
26        return PressSummary
27
28    # If the document is a parser error with a root element of `error`, it's not of a special type.
29    if node.xpath("/error", namespaces=DEFAULT_NAMESPACES):
30        return ParserLog
31
32    # Otherwise, we don't know for sure. Fail out.
33    raise CannotDetermineDocumentType(
34        "Unable to determine the Document type by its XML",
35    )
class CannotDetermineDocumentType(builtins.Exception):
12class CannotDetermineDocumentType(Exception):
13    pass

Common base class for all non-exit exceptions.

def get_document_type_class(xml: bytes) -> type[caselawclient.models.documents.Document]:
16def get_document_type_class(xml: bytes) -> type[Document]:
17    """Attempt to get the type of the document based on the top-level structure of the XML document."""
18
19    node = etree.fromstring(xml)
20
21    # If the main node is `<judgment>`, it's a judgment
22    if node.xpath("/akn:akomaNtoso/akn:judgment", namespaces=DEFAULT_NAMESPACES):
23        return Judgment
24
25    # If the main node is `<doc name='pressSummary'>`, it's a press summary
26    if node.xpath("/akn:akomaNtoso/akn:doc[@name='pressSummary']", namespaces=DEFAULT_NAMESPACES):
27        return PressSummary
28
29    # If the document is a parser error with a root element of `error`, it's not of a special type.
30    if node.xpath("/error", namespaces=DEFAULT_NAMESPACES):
31        return ParserLog
32
33    # Otherwise, we don't know for sure. Fail out.
34    raise CannotDetermineDocumentType(
35        "Unable to determine the Document type by its XML",
36    )

Attempt to get the type of the document based on the top-level structure of the XML document.