caselawclient.client_helpers
1from lxml import etree 2 3from caselawclient.xml_helpers import DEFAULT_NAMESPACES 4 5from ..models.documents import Document 6from ..models.judgments import Judgment 7from ..models.parser_logs import ParserLog 8from ..models.press_summaries import PressSummary 9 10 11class CannotDetermineDocumentType(Exception): 12 pass 13 14 15def get_document_type_class(xml: bytes) -> type[Document]: 16 """Attempt to get the type of the document based on the top-level structure of the XML document.""" 17 18 node = etree.fromstring(xml) 19 20 # If the main node is `<judgment>`, it's a judgment 21 if node.xpath("/akn:akomaNtoso/akn:judgment", namespaces=DEFAULT_NAMESPACES): 22 return Judgment 23 24 # If the main node is `<doc name='pressSummary'>`, it's a press summary 25 if node.xpath("/akn:akomaNtoso/akn:doc[@name='pressSummary']", namespaces=DEFAULT_NAMESPACES): 26 return PressSummary 27 28 # If the document is a parser error with a root element of `error`, it's not of a special type. 29 if node.xpath("/error", namespaces=DEFAULT_NAMESPACES): 30 return ParserLog 31 32 # Otherwise, we don't know for sure. Fail out. 33 raise CannotDetermineDocumentType( 34 "Unable to determine the Document type by its XML", 35 )
class
CannotDetermineDocumentType(builtins.Exception):
Common base class for all non-exit exceptions.
16def get_document_type_class(xml: bytes) -> type[Document]: 17 """Attempt to get the type of the document based on the top-level structure of the XML document.""" 18 19 node = etree.fromstring(xml) 20 21 # If the main node is `<judgment>`, it's a judgment 22 if node.xpath("/akn:akomaNtoso/akn:judgment", namespaces=DEFAULT_NAMESPACES): 23 return Judgment 24 25 # If the main node is `<doc name='pressSummary'>`, it's a press summary 26 if node.xpath("/akn:akomaNtoso/akn:doc[@name='pressSummary']", namespaces=DEFAULT_NAMESPACES): 27 return PressSummary 28 29 # If the document is a parser error with a root element of `error`, it's not of a special type. 30 if node.xpath("/error", namespaces=DEFAULT_NAMESPACES): 31 return ParserLog 32 33 # Otherwise, we don't know for sure. Fail out. 34 raise CannotDetermineDocumentType( 35 "Unable to determine the Document type by its XML", 36 )
Attempt to get the type of the document based on the top-level structure of the XML document.