caselawclient.models.identifiers
1from abc import ABC, abstractmethod 2from typing import TYPE_CHECKING, Any, Optional 3from uuid import uuid4 4 5from lxml import etree 6 7from caselawclient.types import DocumentIdentifierSlug, DocumentIdentifierValue, SuccessFailureMessageTuple 8 9from .exceptions import IdentifierValidationException 10 11if TYPE_CHECKING: 12 from caselawclient.Client import MarklogicApiClient 13 from caselawclient.models.documents import Document 14 15IDENTIFIER_PACKABLE_ATTRIBUTES: list[str] = [ 16 "uuid", 17 "value", 18 "deprecated", 19 "url_slug", 20] 21"""A list of attributes of an Identifier to pack into an XML representation.""" 22 23IDENTIFIER_UNPACKABLE_ATTRIBUTES: list[str] = [ 24 "uuid", 25 "value", 26 "deprecated", 27] 28"""A list of attributes to unpack from an XML representation.""" 29 30 31class IdentifierSchema(ABC): 32 """ 33 A base class which describes what an identifier schema should look like. 34 """ 35 36 name: str 37 namespace: str 38 39 human_readable: bool 40 """ Should this identifier type be considered for display as a 'human readable' identifier? """ 41 42 base_score_multiplier: float = 1.0 43 """ A multiplier used to adjust the relative ranking of this identifier when calculating preferred identifiers. """ 44 45 allow_editing: bool = True 46 """ Should editors be allowed to manually manipulate identifiers under this schema? """ 47 48 require_globally_unique: bool = True 49 """ Must this identifier be globally unique? (appear on no other documents) """ 50 51 allow_multiple: bool = False 52 """ May documents have more than one non-deprecated identifier of this type? """ 53 54 document_types: Optional[list[str]] = None 55 """ 56 If present, a list of the names of document classes which can have this identifier. 57 58 If `None`, this identifier is valid for all document types. 59 """ 60 61 def __init_subclass__(cls: type["IdentifierSchema"], **kwargs: Any) -> None: 62 """Ensure that subclasses have the required attributes set.""" 63 for required in ( 64 "name", 65 "namespace", 66 "human_readable", 67 ): 68 if not hasattr(cls, required): 69 raise NotImplementedError(f"Can't instantiate IdentifierSchema without {required} attribute.") 70 super().__init_subclass__(**kwargs) 71 72 def __repr__(self) -> str: 73 return self.name 74 75 @classmethod 76 @abstractmethod 77 def validate_identifier_value(cls, value: str) -> bool: 78 """Check that any given identifier value is valid for this schema.""" 79 pass 80 81 @classmethod 82 @abstractmethod 83 def compile_identifier_url_slug(cls, value: str) -> DocumentIdentifierSlug: 84 """Convert an identifier into a precompiled URL slug.""" 85 pass 86 87 88class Identifier(ABC): 89 """A base class for subclasses representing a concrete identifier.""" 90 91 schema: type[IdentifierSchema] 92 93 uuid: str 94 value: DocumentIdentifierValue 95 96 deprecated: bool 97 """Should this identifier be considered deprecated, ie although we know it refers to a particular document its usage should be discouraged?""" 98 99 def __init_subclass__(cls: type["Identifier"], **kwargs: Any) -> None: 100 """Ensure that subclasses have the required attributes set.""" 101 for required in ("schema",): 102 if not getattr(cls, required, False): 103 raise NotImplementedError(f"Can't instantiate Identifier without {required} attribute.") 104 super().__init_subclass__(**kwargs) 105 106 def __repr__(self) -> str: 107 representation = f"{self.schema.name} {self.value}: {self.uuid}" 108 109 if self.deprecated: 110 return f"<{representation} (deprecated)> " 111 return f"<{representation}>" 112 113 def __str__(self) -> str: 114 return self.value 115 116 def __init__(self, value: str, uuid: Optional[str] = None, deprecated: bool = False) -> None: 117 if not self.schema.validate_identifier_value(value=value): 118 raise IdentifierValidationException( 119 f'Identifier value "{value}" is not valid according to the {self.schema.name} schema.' 120 ) 121 122 self.value = DocumentIdentifierValue(value) 123 if uuid: 124 self.uuid = uuid 125 else: 126 self.uuid = "id-" + str(uuid4()) 127 128 self.deprecated = deprecated 129 130 @property 131 def as_xml_tree(self) -> etree._Element: 132 """Convert this Identifier into a packed XML representation for storage.""" 133 identifier_root = etree.Element("identifier") 134 135 namespace_attribute = etree.SubElement(identifier_root, "namespace") 136 namespace_attribute.text = self.schema.namespace 137 138 for attribute_name in IDENTIFIER_PACKABLE_ATTRIBUTES: 139 packed_attribute = etree.SubElement(identifier_root, attribute_name) 140 attribute_value = getattr(self, attribute_name) 141 if type(attribute_value) is bool: 142 packed_attribute.text = str(attribute_value).lower() 143 else: 144 packed_attribute.text = getattr(self, attribute_name) 145 146 return identifier_root 147 148 @property 149 def url_slug(self) -> str: 150 return self.schema.compile_identifier_url_slug(self.value) 151 152 @property 153 def score(self) -> float: 154 """Return the score of this identifier, used to calculate the preferred identifier for a document.""" 155 if self.deprecated: 156 return 0 157 158 return 1 * self.schema.base_score_multiplier 159 160 def same_as(self, other: "Identifier") -> bool: 161 "Is this the same as another identifier (in value and schema)?" 162 return self.value == other.value and self.schema == other.schema 163 164 def validate_require_globally_unique(self, api_client: "MarklogicApiClient") -> SuccessFailureMessageTuple: 165 """ 166 Check against the list of identifiers in the database that this value does not currently exist. 167 168 nb: We don't need to check that the identifier value is unique within a parent `Identifiers` object, because `Identifiers.add()` will only allow one value per namespace. 169 """ 170 resolutions = [ 171 resolution 172 for resolution in api_client.resolve_from_identifier_value( 173 identifier_value=self.value, published_only=False 174 ) 175 if resolution.identifier_namespace == self.schema.namespace and resolution.identifier_uuid != self.uuid 176 ] 177 if len(resolutions) > 0: 178 return SuccessFailureMessageTuple( 179 False, 180 [f'Identifiers in scheme "{self.schema.namespace}" must be unique; "{self.value}" already exists!'], 181 ) 182 183 return SuccessFailureMessageTuple(True, []) 184 185 def validate_valid_for_document_type(self, document_type: type["Document"]) -> SuccessFailureMessageTuple: 186 document_type_classname = document_type.__name__ 187 188 if self.schema.document_types and document_type_classname not in self.schema.document_types: 189 return SuccessFailureMessageTuple( 190 False, 191 [ 192 f'Document type "{document_type_classname}" is not accepted for identifier schema "{self.schema.name}"' 193 ], 194 ) 195 196 return SuccessFailureMessageTuple(True, []) 197 198 def perform_all_validations( 199 self, document_type: type["Document"], api_client: "MarklogicApiClient" 200 ) -> SuccessFailureMessageTuple: 201 """Perform all validations on a given identifier""" 202 validations = [ 203 self.validate_require_globally_unique(api_client=api_client), 204 self.validate_valid_for_document_type(document_type=document_type), 205 ] 206 207 success = True 208 messages: list[str] = [] 209 210 for validation in validations: 211 if validation.success is False: 212 success = False 213 214 messages += validation.messages 215 216 return SuccessFailureMessageTuple(success, messages)
A list of attributes of an Identifier to pack into an XML representation.
A list of attributes to unpack from an XML representation.
32class IdentifierSchema(ABC): 33 """ 34 A base class which describes what an identifier schema should look like. 35 """ 36 37 name: str 38 namespace: str 39 40 human_readable: bool 41 """ Should this identifier type be considered for display as a 'human readable' identifier? """ 42 43 base_score_multiplier: float = 1.0 44 """ A multiplier used to adjust the relative ranking of this identifier when calculating preferred identifiers. """ 45 46 allow_editing: bool = True 47 """ Should editors be allowed to manually manipulate identifiers under this schema? """ 48 49 require_globally_unique: bool = True 50 """ Must this identifier be globally unique? (appear on no other documents) """ 51 52 allow_multiple: bool = False 53 """ May documents have more than one non-deprecated identifier of this type? """ 54 55 document_types: Optional[list[str]] = None 56 """ 57 If present, a list of the names of document classes which can have this identifier. 58 59 If `None`, this identifier is valid for all document types. 60 """ 61 62 def __init_subclass__(cls: type["IdentifierSchema"], **kwargs: Any) -> None: 63 """Ensure that subclasses have the required attributes set.""" 64 for required in ( 65 "name", 66 "namespace", 67 "human_readable", 68 ): 69 if not hasattr(cls, required): 70 raise NotImplementedError(f"Can't instantiate IdentifierSchema without {required} attribute.") 71 super().__init_subclass__(**kwargs) 72 73 def __repr__(self) -> str: 74 return self.name 75 76 @classmethod 77 @abstractmethod 78 def validate_identifier_value(cls, value: str) -> bool: 79 """Check that any given identifier value is valid for this schema.""" 80 pass 81 82 @classmethod 83 @abstractmethod 84 def compile_identifier_url_slug(cls, value: str) -> DocumentIdentifierSlug: 85 """Convert an identifier into a precompiled URL slug.""" 86 pass
A base class which describes what an identifier schema should look like.
Should this identifier type be considered for display as a 'human readable' identifier?
A multiplier used to adjust the relative ranking of this identifier when calculating preferred identifiers.
Should editors be allowed to manually manipulate identifiers under this schema?
Must this identifier be globally unique? (appear on no other documents)
May documents have more than one non-deprecated identifier of this type?
If present, a list of the names of document classes which can have this identifier.
If None, this identifier is valid for all document types.
76 @classmethod 77 @abstractmethod 78 def validate_identifier_value(cls, value: str) -> bool: 79 """Check that any given identifier value is valid for this schema.""" 80 pass
Check that any given identifier value is valid for this schema.
82 @classmethod 83 @abstractmethod 84 def compile_identifier_url_slug(cls, value: str) -> DocumentIdentifierSlug: 85 """Convert an identifier into a precompiled URL slug.""" 86 pass
Convert an identifier into a precompiled URL slug.
89class Identifier(ABC): 90 """A base class for subclasses representing a concrete identifier.""" 91 92 schema: type[IdentifierSchema] 93 94 uuid: str 95 value: DocumentIdentifierValue 96 97 deprecated: bool 98 """Should this identifier be considered deprecated, ie although we know it refers to a particular document its usage should be discouraged?""" 99 100 def __init_subclass__(cls: type["Identifier"], **kwargs: Any) -> None: 101 """Ensure that subclasses have the required attributes set.""" 102 for required in ("schema",): 103 if not getattr(cls, required, False): 104 raise NotImplementedError(f"Can't instantiate Identifier without {required} attribute.") 105 super().__init_subclass__(**kwargs) 106 107 def __repr__(self) -> str: 108 representation = f"{self.schema.name} {self.value}: {self.uuid}" 109 110 if self.deprecated: 111 return f"<{representation} (deprecated)> " 112 return f"<{representation}>" 113 114 def __str__(self) -> str: 115 return self.value 116 117 def __init__(self, value: str, uuid: Optional[str] = None, deprecated: bool = False) -> None: 118 if not self.schema.validate_identifier_value(value=value): 119 raise IdentifierValidationException( 120 f'Identifier value "{value}" is not valid according to the {self.schema.name} schema.' 121 ) 122 123 self.value = DocumentIdentifierValue(value) 124 if uuid: 125 self.uuid = uuid 126 else: 127 self.uuid = "id-" + str(uuid4()) 128 129 self.deprecated = deprecated 130 131 @property 132 def as_xml_tree(self) -> etree._Element: 133 """Convert this Identifier into a packed XML representation for storage.""" 134 identifier_root = etree.Element("identifier") 135 136 namespace_attribute = etree.SubElement(identifier_root, "namespace") 137 namespace_attribute.text = self.schema.namespace 138 139 for attribute_name in IDENTIFIER_PACKABLE_ATTRIBUTES: 140 packed_attribute = etree.SubElement(identifier_root, attribute_name) 141 attribute_value = getattr(self, attribute_name) 142 if type(attribute_value) is bool: 143 packed_attribute.text = str(attribute_value).lower() 144 else: 145 packed_attribute.text = getattr(self, attribute_name) 146 147 return identifier_root 148 149 @property 150 def url_slug(self) -> str: 151 return self.schema.compile_identifier_url_slug(self.value) 152 153 @property 154 def score(self) -> float: 155 """Return the score of this identifier, used to calculate the preferred identifier for a document.""" 156 if self.deprecated: 157 return 0 158 159 return 1 * self.schema.base_score_multiplier 160 161 def same_as(self, other: "Identifier") -> bool: 162 "Is this the same as another identifier (in value and schema)?" 163 return self.value == other.value and self.schema == other.schema 164 165 def validate_require_globally_unique(self, api_client: "MarklogicApiClient") -> SuccessFailureMessageTuple: 166 """ 167 Check against the list of identifiers in the database that this value does not currently exist. 168 169 nb: We don't need to check that the identifier value is unique within a parent `Identifiers` object, because `Identifiers.add()` will only allow one value per namespace. 170 """ 171 resolutions = [ 172 resolution 173 for resolution in api_client.resolve_from_identifier_value( 174 identifier_value=self.value, published_only=False 175 ) 176 if resolution.identifier_namespace == self.schema.namespace and resolution.identifier_uuid != self.uuid 177 ] 178 if len(resolutions) > 0: 179 return SuccessFailureMessageTuple( 180 False, 181 [f'Identifiers in scheme "{self.schema.namespace}" must be unique; "{self.value}" already exists!'], 182 ) 183 184 return SuccessFailureMessageTuple(True, []) 185 186 def validate_valid_for_document_type(self, document_type: type["Document"]) -> SuccessFailureMessageTuple: 187 document_type_classname = document_type.__name__ 188 189 if self.schema.document_types and document_type_classname not in self.schema.document_types: 190 return SuccessFailureMessageTuple( 191 False, 192 [ 193 f'Document type "{document_type_classname}" is not accepted for identifier schema "{self.schema.name}"' 194 ], 195 ) 196 197 return SuccessFailureMessageTuple(True, []) 198 199 def perform_all_validations( 200 self, document_type: type["Document"], api_client: "MarklogicApiClient" 201 ) -> SuccessFailureMessageTuple: 202 """Perform all validations on a given identifier""" 203 validations = [ 204 self.validate_require_globally_unique(api_client=api_client), 205 self.validate_valid_for_document_type(document_type=document_type), 206 ] 207 208 success = True 209 messages: list[str] = [] 210 211 for validation in validations: 212 if validation.success is False: 213 success = False 214 215 messages += validation.messages 216 217 return SuccessFailureMessageTuple(success, messages)
A base class for subclasses representing a concrete identifier.
117 def __init__(self, value: str, uuid: Optional[str] = None, deprecated: bool = False) -> None: 118 if not self.schema.validate_identifier_value(value=value): 119 raise IdentifierValidationException( 120 f'Identifier value "{value}" is not valid according to the {self.schema.name} schema.' 121 ) 122 123 self.value = DocumentIdentifierValue(value) 124 if uuid: 125 self.uuid = uuid 126 else: 127 self.uuid = "id-" + str(uuid4()) 128 129 self.deprecated = deprecated
Should this identifier be considered deprecated, ie although we know it refers to a particular document its usage should be discouraged?
131 @property 132 def as_xml_tree(self) -> etree._Element: 133 """Convert this Identifier into a packed XML representation for storage.""" 134 identifier_root = etree.Element("identifier") 135 136 namespace_attribute = etree.SubElement(identifier_root, "namespace") 137 namespace_attribute.text = self.schema.namespace 138 139 for attribute_name in IDENTIFIER_PACKABLE_ATTRIBUTES: 140 packed_attribute = etree.SubElement(identifier_root, attribute_name) 141 attribute_value = getattr(self, attribute_name) 142 if type(attribute_value) is bool: 143 packed_attribute.text = str(attribute_value).lower() 144 else: 145 packed_attribute.text = getattr(self, attribute_name) 146 147 return identifier_root
Convert this Identifier into a packed XML representation for storage.
153 @property 154 def score(self) -> float: 155 """Return the score of this identifier, used to calculate the preferred identifier for a document.""" 156 if self.deprecated: 157 return 0 158 159 return 1 * self.schema.base_score_multiplier
Return the score of this identifier, used to calculate the preferred identifier for a document.
161 def same_as(self, other: "Identifier") -> bool: 162 "Is this the same as another identifier (in value and schema)?" 163 return self.value == other.value and self.schema == other.schema
Is this the same as another identifier (in value and schema)?
165 def validate_require_globally_unique(self, api_client: "MarklogicApiClient") -> SuccessFailureMessageTuple: 166 """ 167 Check against the list of identifiers in the database that this value does not currently exist. 168 169 nb: We don't need to check that the identifier value is unique within a parent `Identifiers` object, because `Identifiers.add()` will only allow one value per namespace. 170 """ 171 resolutions = [ 172 resolution 173 for resolution in api_client.resolve_from_identifier_value( 174 identifier_value=self.value, published_only=False 175 ) 176 if resolution.identifier_namespace == self.schema.namespace and resolution.identifier_uuid != self.uuid 177 ] 178 if len(resolutions) > 0: 179 return SuccessFailureMessageTuple( 180 False, 181 [f'Identifiers in scheme "{self.schema.namespace}" must be unique; "{self.value}" already exists!'], 182 ) 183 184 return SuccessFailureMessageTuple(True, [])
Check against the list of identifiers in the database that this value does not currently exist.
nb: We don't need to check that the identifier value is unique within a parent Identifiers object, because Identifiers.add() will only allow one value per namespace.
186 def validate_valid_for_document_type(self, document_type: type["Document"]) -> SuccessFailureMessageTuple: 187 document_type_classname = document_type.__name__ 188 189 if self.schema.document_types and document_type_classname not in self.schema.document_types: 190 return SuccessFailureMessageTuple( 191 False, 192 [ 193 f'Document type "{document_type_classname}" is not accepted for identifier schema "{self.schema.name}"' 194 ], 195 ) 196 197 return SuccessFailureMessageTuple(True, [])
199 def perform_all_validations( 200 self, document_type: type["Document"], api_client: "MarklogicApiClient" 201 ) -> SuccessFailureMessageTuple: 202 """Perform all validations on a given identifier""" 203 validations = [ 204 self.validate_require_globally_unique(api_client=api_client), 205 self.validate_valid_for_document_type(document_type=document_type), 206 ] 207 208 success = True 209 messages: list[str] = [] 210 211 for validation in validations: 212 if validation.success is False: 213 success = False 214 215 messages += validation.messages 216 217 return SuccessFailureMessageTuple(success, messages)
Perform all validations on a given identifier