caselawclient.models.identifiers

  1from abc import ABC, abstractmethod
  2from typing import TYPE_CHECKING, Any, Optional
  3from uuid import uuid4
  4
  5from lxml import etree
  6
  7from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, SuccessFailureMessageTuple
  8
  9from .exceptions import IdentifierValidationException
 10
 11if TYPE_CHECKING:
 12    from caselawclient.Client import MarklogicApiClient
 13    from caselawclient.models.documents import Document
 14
 15IDENTIFIER_PACKABLE_ATTRIBUTES: list[str] = [
 16    "uuid",
 17    "value",
 18    "deprecated",
 19    "url_slug",
 20]
 21"""A list of attributes of an Identifier to pack into an XML representation."""
 22
 23IDENTIFIER_UNPACKABLE_ATTRIBUTES: list[str] = [
 24    "uuid",
 25    "value",
 26    "deprecated",
 27]
 28"""A list of attributes to unpack from an XML representation."""
 29
 30
 31class IdentifierSchema(ABC):
 32    """
 33    A base class which describes what an identifier schema should look like.
 34    """
 35
 36    name: str
 37    namespace: str
 38
 39    human_readable: bool
 40    """ Should this identifier type be considered for display as a 'human readable' identifier? """
 41
 42    base_score_multiplier: float = 1.0
 43    """ A multiplier used to adjust the relative ranking of this identifier when calculating preferred identifiers. """
 44
 45    allow_editing: bool = True
 46    """ Should editors be allowed to manually manipulate identifiers under this schema? """
 47
 48    require_globally_unique: bool = True
 49    """ Must this identifier be globally unique? (appear on no other documents) """
 50
 51    allow_multiple: bool = False
 52    """ May documents have more than one non-deprecated identifier of this type? """
 53
 54    document_types: Optional[list[str]] = None
 55    """
 56    If present, a list of the names of document classes which can have this identifier.
 57
 58    If `None`, this identifier is valid for all document types.
 59    """
 60
 61    def __init_subclass__(cls: type["IdentifierSchema"], **kwargs: Any) -> None:
 62        """Ensure that subclasses have the required attributes set."""
 63        for required in (
 64            "name",
 65            "namespace",
 66            "human_readable",
 67        ):
 68            if not hasattr(cls, required):
 69                raise NotImplementedError(f"Can't instantiate IdentifierSchema without {required} attribute.")
 70        super().__init_subclass__(**kwargs)
 71
 72    def __repr__(self) -> str:
 73        return self.name
 74
 75    @classmethod
 76    @abstractmethod
 77    def validate_identifier_value(cls, value: str) -> bool:
 78        """Check that any given identifier value is valid for this schema."""
 79        pass
 80
 81    @classmethod
 82    @abstractmethod
 83    def compile_identifier_url_slug(cls, value: str) -> DocumentIdentifierSlug:
 84        """Convert an identifier into a precompiled URL slug."""
 85        pass
 86
 87
 88class Identifier(ABC):
 89    """A base class for subclasses representing a concrete identifier."""
 90
 91    schema: type[IdentifierSchema]
 92
 93    uuid: str
 94    value: DocumentIdentifierValue
 95
 96    deprecated: bool
 97    """Should this identifier be considered deprecated, ie although we know it refers to a particular document its usage should be discouraged?"""
 98
 99    def __init_subclass__(cls: type["Identifier"], **kwargs: Any) -> None:
100        """Ensure that subclasses have the required attributes set."""
101        for required in ("schema",):
102            if not getattr(cls, required, False):
103                raise NotImplementedError(f"Can't instantiate Identifier without {required} attribute.")
104        super().__init_subclass__(**kwargs)
105
106    def __repr__(self) -> str:
107        representation = f"{self.schema.name} {self.value}: {self.uuid}"
108
109        if self.deprecated:
110            return f"<{representation} (deprecated)> "
111        return f"<{representation}>"
112
113    def __str__(self) -> str:
114        return self.value
115
116    def __init__(self, value: str, uuid: Optional[str] = None, deprecated: bool = False) -> None:
117        if not self.schema.validate_identifier_value(value=value):
118            raise IdentifierValidationException(
119                f'Identifier value "{value}" is not valid according to the {self.schema.name} schema.'
120            )
121
122        self.value = DocumentIdentifierValue(value)
123        if uuid:
124            self.uuid = uuid
125        else:
126            self.uuid = "id-" + str(uuid4())
127
128        self.deprecated = deprecated
129
130    @property
131    def as_xml_tree(self) -> etree._Element:
132        """Convert this Identifier into a packed XML representation for storage."""
133        identifier_root = etree.Element("identifier")
134
135        namespace_attribute = etree.SubElement(identifier_root, "namespace")
136        namespace_attribute.text = self.schema.namespace
137
138        for attribute_name in IDENTIFIER_PACKABLE_ATTRIBUTES:
139            packed_attribute = etree.SubElement(identifier_root, attribute_name)
140            attribute_value = getattr(self, attribute_name)
141            if type(attribute_value) is bool:
142                packed_attribute.text = str(attribute_value).lower()
143            else:
144                packed_attribute.text = getattr(self, attribute_name)
145
146        return identifier_root
147
148    @property
149    def url_slug(self) -> str:
150        return self.schema.compile_identifier_url_slug(self.value)
151
152    @property
153    def score(self) -> float:
154        """Return the score of this identifier, used to calculate the preferred identifier for a document."""
155        if self.deprecated:
156            return 0
157
158        return 1 * self.schema.base_score_multiplier
159
160    def same_as(self, other: "Identifier") -> bool:
161        "Is this the same as another identifier (in value and schema)?"
162        return self.value == other.value and self.schema == other.schema
163
164    def validate_require_globally_unique(self, api_client: "MarklogicApiClient") -> SuccessFailureMessageTuple:
165        """
166        Check against the list of identifiers in the database that this value does not currently exist.
167
168        nb: We don't need to check that the identifier value is unique within a parent `Identifiers` object, because `Identifiers.add()` will only allow one value per namespace.
169        """
170        resolutions = [
171            resolution
172            for resolution in api_client.resolve_from_identifier_value(
173                identifier_value=self.value, published_only=False
174            )
175            if resolution.identifier_namespace == self.schema.namespace and resolution.identifier_uuid != self.uuid
176        ]
177        if len(resolutions) > 0:
178            return SuccessFailureMessageTuple(
179                False,
180                [f'Identifiers in scheme "{self.schema.namespace}" must be unique; "{self.value}" already exists!'],
181            )
182
183        return SuccessFailureMessageTuple(True, [])
184
185    def validate_valid_for_document_type(self, document_type: type["Document"]) -> SuccessFailureMessageTuple:
186        document_type_classname = document_type.__name__
187
188        if self.schema.document_types and document_type_classname not in self.schema.document_types:
189            return SuccessFailureMessageTuple(
190                False,
191                [
192                    f'Document type "{document_type_classname}" is not accepted for identifier schema "{self.schema.name}"'
193                ],
194            )
195
196        return SuccessFailureMessageTuple(True, [])
197
198    def perform_all_validations(
199        self, document_type: type["Document"], api_client: "MarklogicApiClient"
200    ) -> SuccessFailureMessageTuple:
201        """Perform all validations on a given identifier"""
202        validations = [
203            self.validate_require_globally_unique(api_client=api_client),
204            self.validate_valid_for_document_type(document_type=document_type),
205        ]
206
207        success = True
208        messages: list[str] = []
209
210        for validation in validations:
211            if validation.success is False:
212                success = False
213
214            messages += validation.messages
215
216        return SuccessFailureMessageTuple(success, messages)
IDENTIFIER_PACKABLE_ATTRIBUTES: list[str] = ['uuid', 'value', 'deprecated', 'url_slug']

A list of attributes of an Identifier to pack into an XML representation.

IDENTIFIER_UNPACKABLE_ATTRIBUTES: list[str] = ['uuid', 'value', 'deprecated']

A list of attributes to unpack from an XML representation.

class IdentifierSchema(abc.ABC):
32class IdentifierSchema(ABC):
33    """
34    A base class which describes what an identifier schema should look like.
35    """
36
37    name: str
38    namespace: str
39
40    human_readable: bool
41    """ Should this identifier type be considered for display as a 'human readable' identifier? """
42
43    base_score_multiplier: float = 1.0
44    """ A multiplier used to adjust the relative ranking of this identifier when calculating preferred identifiers. """
45
46    allow_editing: bool = True
47    """ Should editors be allowed to manually manipulate identifiers under this schema? """
48
49    require_globally_unique: bool = True
50    """ Must this identifier be globally unique? (appear on no other documents) """
51
52    allow_multiple: bool = False
53    """ May documents have more than one non-deprecated identifier of this type? """
54
55    document_types: Optional[list[str]] = None
56    """
57    If present, a list of the names of document classes which can have this identifier.
58
59    If `None`, this identifier is valid for all document types.
60    """
61
62    def __init_subclass__(cls: type["IdentifierSchema"], **kwargs: Any) -> None:
63        """Ensure that subclasses have the required attributes set."""
64        for required in (
65            "name",
66            "namespace",
67            "human_readable",
68        ):
69            if not hasattr(cls, required):
70                raise NotImplementedError(f"Can't instantiate IdentifierSchema without {required} attribute.")
71        super().__init_subclass__(**kwargs)
72
73    def __repr__(self) -> str:
74        return self.name
75
76    @classmethod
77    @abstractmethod
78    def validate_identifier_value(cls, value: str) -> bool:
79        """Check that any given identifier value is valid for this schema."""
80        pass
81
82    @classmethod
83    @abstractmethod
84    def compile_identifier_url_slug(cls, value: str) -> DocumentIdentifierSlug:
85        """Convert an identifier into a precompiled URL slug."""
86        pass

A base class which describes what an identifier schema should look like.

name: str
namespace: str
human_readable: bool

Should this identifier type be considered for display as a 'human readable' identifier?

base_score_multiplier: float = 1.0

A multiplier used to adjust the relative ranking of this identifier when calculating preferred identifiers.

allow_editing: bool = True

Should editors be allowed to manually manipulate identifiers under this schema?

require_globally_unique: bool = True

Must this identifier be globally unique? (appear on no other documents)

allow_multiple: bool = False

May documents have more than one non-deprecated identifier of this type?

document_types: Optional[list[str]] = None

If present, a list of the names of document classes which can have this identifier.

If None, this identifier is valid for all document types.

@classmethod
@abstractmethod
def validate_identifier_value(cls, value: str) -> bool:
76    @classmethod
77    @abstractmethod
78    def validate_identifier_value(cls, value: str) -> bool:
79        """Check that any given identifier value is valid for this schema."""
80        pass

Check that any given identifier value is valid for this schema.

@classmethod
@abstractmethod
def compile_identifier_url_slug(cls, value: str) -> caselawclient.types.DocumentIdentifierSlug:
82    @classmethod
83    @abstractmethod
84    def compile_identifier_url_slug(cls, value: str) -> DocumentIdentifierSlug:
85        """Convert an identifier into a precompiled URL slug."""
86        pass

Convert an identifier into a precompiled URL slug.

class Identifier(abc.ABC):
 89class Identifier(ABC):
 90    """A base class for subclasses representing a concrete identifier."""
 91
 92    schema: type[IdentifierSchema]
 93
 94    uuid: str
 95    value: DocumentIdentifierValue
 96
 97    deprecated: bool
 98    """Should this identifier be considered deprecated, ie although we know it refers to a particular document its usage should be discouraged?"""
 99
100    def __init_subclass__(cls: type["Identifier"], **kwargs: Any) -> None:
101        """Ensure that subclasses have the required attributes set."""
102        for required in ("schema",):
103            if not getattr(cls, required, False):
104                raise NotImplementedError(f"Can't instantiate Identifier without {required} attribute.")
105        super().__init_subclass__(**kwargs)
106
107    def __repr__(self) -> str:
108        representation = f"{self.schema.name} {self.value}: {self.uuid}"
109
110        if self.deprecated:
111            return f"<{representation} (deprecated)> "
112        return f"<{representation}>"
113
114    def __str__(self) -> str:
115        return self.value
116
117    def __init__(self, value: str, uuid: Optional[str] = None, deprecated: bool = False) -> None:
118        if not self.schema.validate_identifier_value(value=value):
119            raise IdentifierValidationException(
120                f'Identifier value "{value}" is not valid according to the {self.schema.name} schema.'
121            )
122
123        self.value = DocumentIdentifierValue(value)
124        if uuid:
125            self.uuid = uuid
126        else:
127            self.uuid = "id-" + str(uuid4())
128
129        self.deprecated = deprecated
130
131    @property
132    def as_xml_tree(self) -> etree._Element:
133        """Convert this Identifier into a packed XML representation for storage."""
134        identifier_root = etree.Element("identifier")
135
136        namespace_attribute = etree.SubElement(identifier_root, "namespace")
137        namespace_attribute.text = self.schema.namespace
138
139        for attribute_name in IDENTIFIER_PACKABLE_ATTRIBUTES:
140            packed_attribute = etree.SubElement(identifier_root, attribute_name)
141            attribute_value = getattr(self, attribute_name)
142            if type(attribute_value) is bool:
143                packed_attribute.text = str(attribute_value).lower()
144            else:
145                packed_attribute.text = getattr(self, attribute_name)
146
147        return identifier_root
148
149    @property
150    def url_slug(self) -> str:
151        return self.schema.compile_identifier_url_slug(self.value)
152
153    @property
154    def score(self) -> float:
155        """Return the score of this identifier, used to calculate the preferred identifier for a document."""
156        if self.deprecated:
157            return 0
158
159        return 1 * self.schema.base_score_multiplier
160
161    def same_as(self, other: "Identifier") -> bool:
162        "Is this the same as another identifier (in value and schema)?"
163        return self.value == other.value and self.schema == other.schema
164
165    def validate_require_globally_unique(self, api_client: "MarklogicApiClient") -> SuccessFailureMessageTuple:
166        """
167        Check against the list of identifiers in the database that this value does not currently exist.
168
169        nb: We don't need to check that the identifier value is unique within a parent `Identifiers` object, because `Identifiers.add()` will only allow one value per namespace.
170        """
171        resolutions = [
172            resolution
173            for resolution in api_client.resolve_from_identifier_value(
174                identifier_value=self.value, published_only=False
175            )
176            if resolution.identifier_namespace == self.schema.namespace and resolution.identifier_uuid != self.uuid
177        ]
178        if len(resolutions) > 0:
179            return SuccessFailureMessageTuple(
180                False,
181                [f'Identifiers in scheme "{self.schema.namespace}" must be unique; "{self.value}" already exists!'],
182            )
183
184        return SuccessFailureMessageTuple(True, [])
185
186    def validate_valid_for_document_type(self, document_type: type["Document"]) -> SuccessFailureMessageTuple:
187        document_type_classname = document_type.__name__
188
189        if self.schema.document_types and document_type_classname not in self.schema.document_types:
190            return SuccessFailureMessageTuple(
191                False,
192                [
193                    f'Document type "{document_type_classname}" is not accepted for identifier schema "{self.schema.name}"'
194                ],
195            )
196
197        return SuccessFailureMessageTuple(True, [])
198
199    def perform_all_validations(
200        self, document_type: type["Document"], api_client: "MarklogicApiClient"
201    ) -> SuccessFailureMessageTuple:
202        """Perform all validations on a given identifier"""
203        validations = [
204            self.validate_require_globally_unique(api_client=api_client),
205            self.validate_valid_for_document_type(document_type=document_type),
206        ]
207
208        success = True
209        messages: list[str] = []
210
211        for validation in validations:
212            if validation.success is False:
213                success = False
214
215            messages += validation.messages
216
217        return SuccessFailureMessageTuple(success, messages)

A base class for subclasses representing a concrete identifier.

Identifier(value: str, uuid: Optional[str] = None, deprecated: bool = False)
117    def __init__(self, value: str, uuid: Optional[str] = None, deprecated: bool = False) -> None:
118        if not self.schema.validate_identifier_value(value=value):
119            raise IdentifierValidationException(
120                f'Identifier value "{value}" is not valid according to the {self.schema.name} schema.'
121            )
122
123        self.value = DocumentIdentifierValue(value)
124        if uuid:
125            self.uuid = uuid
126        else:
127            self.uuid = "id-" + str(uuid4())
128
129        self.deprecated = deprecated
schema: type[IdentifierSchema]
uuid: str
deprecated: bool

Should this identifier be considered deprecated, ie although we know it refers to a particular document its usage should be discouraged?

as_xml_tree: lxml.etree._Element
131    @property
132    def as_xml_tree(self) -> etree._Element:
133        """Convert this Identifier into a packed XML representation for storage."""
134        identifier_root = etree.Element("identifier")
135
136        namespace_attribute = etree.SubElement(identifier_root, "namespace")
137        namespace_attribute.text = self.schema.namespace
138
139        for attribute_name in IDENTIFIER_PACKABLE_ATTRIBUTES:
140            packed_attribute = etree.SubElement(identifier_root, attribute_name)
141            attribute_value = getattr(self, attribute_name)
142            if type(attribute_value) is bool:
143                packed_attribute.text = str(attribute_value).lower()
144            else:
145                packed_attribute.text = getattr(self, attribute_name)
146
147        return identifier_root

Convert this Identifier into a packed XML representation for storage.

url_slug: str
149    @property
150    def url_slug(self) -> str:
151        return self.schema.compile_identifier_url_slug(self.value)
score: float
153    @property
154    def score(self) -> float:
155        """Return the score of this identifier, used to calculate the preferred identifier for a document."""
156        if self.deprecated:
157            return 0
158
159        return 1 * self.schema.base_score_multiplier

Return the score of this identifier, used to calculate the preferred identifier for a document.

def same_as(self, other: Identifier) -> bool:
161    def same_as(self, other: "Identifier") -> bool:
162        "Is this the same as another identifier (in value and schema)?"
163        return self.value == other.value and self.schema == other.schema

Is this the same as another identifier (in value and schema)?

def validate_require_globally_unique( self, api_client: caselawclient.Client.MarklogicApiClient) -> caselawclient.types.SuccessFailureMessageTuple:
165    def validate_require_globally_unique(self, api_client: "MarklogicApiClient") -> SuccessFailureMessageTuple:
166        """
167        Check against the list of identifiers in the database that this value does not currently exist.
168
169        nb: We don't need to check that the identifier value is unique within a parent `Identifiers` object, because `Identifiers.add()` will only allow one value per namespace.
170        """
171        resolutions = [
172            resolution
173            for resolution in api_client.resolve_from_identifier_value(
174                identifier_value=self.value, published_only=False
175            )
176            if resolution.identifier_namespace == self.schema.namespace and resolution.identifier_uuid != self.uuid
177        ]
178        if len(resolutions) > 0:
179            return SuccessFailureMessageTuple(
180                False,
181                [f'Identifiers in scheme "{self.schema.namespace}" must be unique; "{self.value}" already exists!'],
182            )
183
184        return SuccessFailureMessageTuple(True, [])

Check against the list of identifiers in the database that this value does not currently exist.

nb: We don't need to check that the identifier value is unique within a parent Identifiers object, because Identifiers.add() will only allow one value per namespace.

def validate_valid_for_document_type( self, document_type: type[caselawclient.models.documents.Document]) -> caselawclient.types.SuccessFailureMessageTuple:
186    def validate_valid_for_document_type(self, document_type: type["Document"]) -> SuccessFailureMessageTuple:
187        document_type_classname = document_type.__name__
188
189        if self.schema.document_types and document_type_classname not in self.schema.document_types:
190            return SuccessFailureMessageTuple(
191                False,
192                [
193                    f'Document type "{document_type_classname}" is not accepted for identifier schema "{self.schema.name}"'
194                ],
195            )
196
197        return SuccessFailureMessageTuple(True, [])
def perform_all_validations( self, document_type: type[caselawclient.models.documents.Document], api_client: caselawclient.Client.MarklogicApiClient) -> caselawclient.types.SuccessFailureMessageTuple:
199    def perform_all_validations(
200        self, document_type: type["Document"], api_client: "MarklogicApiClient"
201    ) -> SuccessFailureMessageTuple:
202        """Perform all validations on a given identifier"""
203        validations = [
204            self.validate_require_globally_unique(api_client=api_client),
205            self.validate_valid_for_document_type(document_type=document_type),
206        ]
207
208        success = True
209        messages: list[str] = []
210
211        for validation in validations:
212            if validation.success is False:
213                success = False
214
215            messages += validation.messages
216
217        return SuccessFailureMessageTuple(success, messages)

Perform all validations on a given identifier