caselawclient.models.identifiers.collection

View Source
  1from typing import TYPE_CHECKING, Optional, Union
  2
  3from lxml import etree
  4
  5from caselawclient.types import SuccessFailureMessageTuple
  6
  7from . import Identifier, IdentifierSchema
  8from .fclid import FindCaseLawIdentifier
  9from .neutral_citation import NeutralCitationNumber
 10from .press_summary_ncn import PressSummaryRelatedNCNIdentifier
 11
 12if TYPE_CHECKING:
 13    from caselawclient.Client import MarklogicApiClient
 14    from caselawclient.models.documents import Document
 15
 16SUPPORTED_IDENTIFIER_TYPES: list[type["Identifier"]] = [
 17    FindCaseLawIdentifier,
 18    NeutralCitationNumber,
 19    PressSummaryRelatedNCNIdentifier,
 20]
 21
 22
 23class IdentifiersCollection(dict[str, Identifier]):
 24    def validate_uuids_match_keys(self) -> SuccessFailureMessageTuple:
 25        for uuid, identifier in self.items():
 26            if uuid != identifier.uuid:
 27                return SuccessFailureMessageTuple(
 28                    False, [f"Key of {identifier} in Identifiers is {uuid} not {identifier.uuid}"]
 29                )
 30
 31        return SuccessFailureMessageTuple(True, [])
 32
 33    def _list_all_identifiers_by_schema(self) -> dict[type[IdentifierSchema], list[Identifier]]:
 34        """Get a list of all identifiers, grouped by their schema."""
 35        identifiers_by_schema: dict[type[IdentifierSchema], list[Identifier]] = {}
 36
 37        for identifier in self.values():
 38            identifiers_by_schema.setdefault(identifier.schema, []).append(identifier)
 39
 40        return identifiers_by_schema
 41
 42    def check_only_single_non_deprecated_identifier_where_multiples_not_allowed(self) -> SuccessFailureMessageTuple:
 43        """Check that only one non-deprecated identifier exists per schema where that schema does not allow multiples."""
 44
 45        for schema, identifiers in self._list_all_identifiers_by_schema().items():
 46            if schema.allow_multiple:
 47                continue
 48            non_deprecated_identifiers = [i for i in identifiers if not i.deprecated]
 49            if len(non_deprecated_identifiers) > 1:
 50                return SuccessFailureMessageTuple(
 51                    False,
 52                    [
 53                        f"Multiple non-deprecated identifiers found for schema '{schema.name}': {', '.join(i.value for i in non_deprecated_identifiers)}"
 54                    ],
 55                )
 56
 57        return SuccessFailureMessageTuple(True, [])
 58
 59    def _perform_collection_level_validations(self) -> SuccessFailureMessageTuple:
 60        """Perform identifier validations which are only possible at the collection level, such as UUID integrity and identifying exclusivity problems."""
 61
 62        success = True
 63        messages: list[str] = []
 64
 65        collection_validations_to_run: list[SuccessFailureMessageTuple] = [
 66            self.validate_uuids_match_keys(),
 67            self.check_only_single_non_deprecated_identifier_where_multiples_not_allowed(),
 68        ]
 69
 70        for validation in collection_validations_to_run:
 71            if not validation.success:
 72                success = False
 73                messages += validation.messages
 74
 75        return SuccessFailureMessageTuple(success, messages)
 76
 77    def _perform_identifier_level_validations(
 78        self, document_type: type["Document"], api_client: "MarklogicApiClient"
 79    ) -> SuccessFailureMessageTuple:
 80        """Perform identifier validations at the individual identifier level."""
 81
 82        success = True
 83        messages: list[str] = []
 84
 85        for _, identifier in self.items():
 86            validations = identifier.perform_all_validations(document_type=document_type, api_client=api_client)
 87            if validations.success is False:
 88                success = False
 89
 90            messages += validations.messages
 91
 92        return SuccessFailureMessageTuple(success, messages)
 93
 94    def perform_all_validations(
 95        self, document_type: type["Document"], api_client: "MarklogicApiClient"
 96    ) -> SuccessFailureMessageTuple:
 97        """Perform all possible identifier validations on this collection, both at the individual and collection level."""
 98
 99        identifier_level_success, identifier_level_messages = self._perform_identifier_level_validations(
100            document_type=document_type, api_client=api_client
101        )
102        collection_level_success, collection_level_messages = self._perform_collection_level_validations()
103
104        success = all([identifier_level_success, collection_level_success])
105        all_messages = identifier_level_messages + collection_level_messages
106
107        return SuccessFailureMessageTuple(success, all_messages)
108
109    def contains(self, other_identifier: Identifier) -> bool:
110        """Does the identifier's value and namespace already exist in this group?"""
111        return any(other_identifier.same_as(identifier) for identifier in self.values())
112
113    def add(self, identifier: Identifier) -> None:
114        if not self.contains(identifier):
115            self[identifier.uuid] = identifier
116
117    def valid_new_identifier_types(self, document_type: type["Document"]) -> list[type[Identifier]]:
118        """Return a list of identifier types which can be added to a document of the given type, given identifiers already in this collection."""
119        return [
120            t
121            for t in SUPPORTED_IDENTIFIER_TYPES
122            if t.schema.allow_editing
123            and (not t.schema.document_types or document_type.__name__ in t.schema.document_types)
124        ]
125
126    def __delitem__(self, key: Union[Identifier, str]) -> None:
127        if isinstance(key, Identifier):
128            super().__delitem__(key.uuid)
129        else:
130            super().__delitem__(key)
131
132    def of_type(self, identifier_type: type[Identifier]) -> list[Identifier]:
133        """Return a list of all identifiers of a given type."""
134        uuids = self.keys()
135        return [self[uuid] for uuid in list(uuids) if isinstance(self[uuid], identifier_type)]
136
137    def delete_type(self, deleted_identifier_type: type[Identifier]) -> None:
138        "For when we want an identifier to be the only valid identifier of that type, delete the others first"
139        uuids = self.keys()
140        for uuid in list(uuids):
141            # we could use compare to .schema instead, which would have diffferent behaviour for subclasses
142            if isinstance(self[uuid], deleted_identifier_type):
143                del self[uuid]
144
145    @property
146    def as_etree(self) -> etree._Element:
147        """Return an etree representation of all the Document's identifiers."""
148        identifiers_root = etree.Element("identifiers")
149
150        for identifier in self.values():
151            identifiers_root.append(identifier.as_xml_tree)
152
153        return identifiers_root
154
155    def by_score(self, type: Optional[type[Identifier]] = None) -> list[Identifier]:
156        """
157        :param type: Optionally, an identifier type to constrain this list to.
158
159        :return: Return a list of identifiers, sorted by their score in descending order.
160        """
161        identifiers = self.of_type(type) if type else list(self.values())
162        return sorted(identifiers, key=lambda v: v.score, reverse=True)
163
164    def preferred(self, type: Optional[type[Identifier]] = None) -> Optional[Identifier]:
165        """
166        :param type: Optionally, an identifier type to constrain the results to.
167
168        :return: Return the highest scoring identifier of the given type (or of any type, if none is specified). Returns `None` if no identifier is available.
169        """
170        if len(self.by_score(type)) == 0:
171            return None
172        return self.by_score(type)[0]
SUPPORTED_IDENTIFIER_TYPES: list[type[caselawclient.models.identifiers.Identifier]] = [<class 'caselawclient.models.identifiers.fclid.FindCaseLawIdentifier'>, <class 'caselawclient.models.identifiers.neutral_citation.NeutralCitationNumber'>, <class 'caselawclient.models.identifiers.press_summary_ncn.PressSummaryRelatedNCNIdentifier'>]
caselawclient.models.identifiers.collection

Parameters

Returns

Parameters

Returns