diff --git a/ebl/app.py b/ebl/app.py index 1dfbbd597..769b91801 100644 --- a/ebl/app.py +++ b/ebl/app.py @@ -23,6 +23,9 @@ from ebl.ebl_ai_client import EblAiClient from ebl.files.infrastructure.grid_fs_file_repository import GridFsFileRepository from ebl.files.web.bootstrap import create_files_route +from ebl.fragmentarium.infrastructure.cropped_sign_images_repository import ( + MongoCroppedSignImagesRepository, +) from ebl.fragmentarium.infrastructure.mongo_annotations_repository import ( MongoAnnotationsRepository, ) @@ -67,6 +70,7 @@ def create_context(): return Context( ebl_ai_client=ebl_ai_client, auth_backend=auth_backend, + cropped_sign_images_repository=MongoCroppedSignImagesRepository(database), word_repository=MongoWordRepository(database), sign_repository=MongoSignRepository(database), public_file_repository=GridFsFileRepository(database, "fs"), diff --git a/ebl/context.py b/ebl/context.py index c66caa90c..c6d6e4b50 100644 --- a/ebl/context.py +++ b/ebl/context.py @@ -15,6 +15,9 @@ from ebl.fragmentarium.application.transliteration_update_factory import ( TransliterationUpdateFactory, ) +from ebl.fragmentarium.infrastructure.cropped_sign_images_repository import ( + MongoCroppedSignImagesRepository, +) from ebl.lemmatization.application.suggestion_finder import LemmaRepository from ebl.transliteration.application.parallel_line_injector import ParallelLineInjector from ebl.transliteration.application.sign_repository import SignRepository @@ -27,6 +30,7 @@ class Context: ebl_ai_client: EblAiClient auth_backend: AuthBackend + cropped_sign_images_repository: MongoCroppedSignImagesRepository word_repository: WordRepository sign_repository: SignRepository public_file_repository: FileRepository diff --git a/ebl/fragmentarium/application/annotations_image_extractor.py b/ebl/fragmentarium/application/annotations_image_extractor.py index d45696cdc..e69de29bb 100644 --- a/ebl/fragmentarium/application/annotations_image_extractor.py +++ b/ebl/fragmentarium/application/annotations_image_extractor.py @@ -1,137 +0,0 @@ -import base64 -from functools import singledispatchmethod -import io -from typing import Sequence, NewType - -import attr -from PIL import Image - -from ebl.files.application.file_repository import FileRepository -from ebl.fragmentarium.application.annotations_repository import AnnotationsRepository -from ebl.fragmentarium.application.fragment_repository import FragmentRepository -from ebl.fragmentarium.domain.annotation import BoundingBox, Annotation -from ebl.transliteration.domain.museum_number import MuseumNumber -from ebl.transliteration.domain.line_label import LineLabel -from ebl.transliteration.domain.line_number import ( - AbstractLineNumber, - LineNumber, - LineNumberRange, -) - -Base64 = NewType("Base64", str) - - -@attr.attrs(auto_attribs=True, frozen=True) -class CroppedAnnotation: - image: Base64 - fragment_number: MuseumNumber - script: str - label: str - - -class AnnotationImageExtractor: - def __init__( - self, - fragment_repository: FragmentRepository, - annotations_repository: AnnotationsRepository, - photos_repository: FileRepository, - ): - self._fragments_repository = fragment_repository - self._annotations_repository = annotations_repository - self._photos_repository = photos_repository - - def _format_label(self, label: LineLabel) -> str: - line_number = label.line_number - column = label.column - surface = label.surface - object = label.object - line_atf = line_number.atf if line_number else "" - column_abbr = column.abbreviation if column else "" - surface_abbr = surface.abbreviation if surface else "" - object_abbr = object.abbreviation if object else "" - return " ".join( - filter( - bool, - [column_abbr, surface_abbr, object_abbr, line_atf.replace(".", "")], - ) - ) - - def _cropped_image_from_annotation( - self, annotation: Annotation, fragment_number: MuseumNumber - ) -> Base64: - fragment_image = self._photos_repository.query_by_file_name( - f"{fragment_number}.jpg" - ) - image_bytes = fragment_image.read() - image = Image.open(io.BytesIO(image_bytes), mode="r") - bounding_box = BoundingBox.from_annotations( - image.size[0], image.size[1], [annotation] - )[0] - area = ( - bounding_box.top_left_x, - bounding_box.top_left_y, - bounding_box.top_left_x + bounding_box.width, - bounding_box.top_left_y + bounding_box.height, - ) - cropped_image = image.crop(area) - buf = io.BytesIO() - cropped_image.save(buf, format="PNG") - return Base64(base64.b64encode(buf.getvalue()).decode("utf-8")) - - @singledispatchmethod - def _is_matching_number(self, line_number: AbstractLineNumber, number: int) -> bool: - raise ValueError("No default for overloading") - - @_is_matching_number.register(LineNumber) - def _(self, line_number: LineNumber, number: int): - return number == line_number.number - - @_is_matching_number.register(LineNumberRange) - def _(self, line_number: LineNumberRange, number: int): - return line_number.start.number <= number <= line_number.end.number - - def _cropped_image_from_annotations( - self, fragment_number: MuseumNumber, annotations: Sequence[Annotation] - ) -> Sequence[CroppedAnnotation]: - cropped_annotations = [] - for annotation in annotations: - fragment = self._fragments_repository.query_by_museum_number( - fragment_number - ) - script = fragment.script - labels = fragment.text.labels - label = next( - ( - label - for label in labels - if self._is_matching_number( - label.line_number, annotation.data.path[0] - ) - ), - None, - ) - - cropped_image = self._cropped_image_from_annotation( - annotation, fragment_number - ) - cropped_annotations.append( - CroppedAnnotation( - cropped_image, - fragment_number, - script, - self._format_label(label) if label else "", - ) - ) - return cropped_annotations - - def cropped_images_from_sign(self, sign: str) -> Sequence[CroppedAnnotation]: - annotations = self._annotations_repository.find_by_sign(sign) - cropped_annotations = [] - for single_annotation in annotations: - fragment_number = single_annotation.fragment_number - cropped_annotations.extend( - self._cropped_image_from_annotations( - fragment_number, single_annotation.annotations - ) - ) - return cropped_annotations diff --git a/ebl/fragmentarium/application/annotations_schema.py b/ebl/fragmentarium/application/annotations_schema.py index 971733acb..97a8af2c5 100644 --- a/ebl/fragmentarium/application/annotations_schema.py +++ b/ebl/fragmentarium/application/annotations_schema.py @@ -1,5 +1,6 @@ -from marshmallow import Schema, fields, post_load, EXCLUDE - +from marshmallow import Schema, fields, post_load, post_dump, EXCLUDE +import pydash +from ebl.fragmentarium.application.cropped_sign_image import CroppedSignSchema from ebl.fragmentarium.domain.annotation import ( Geometry, AnnotationData, @@ -40,18 +41,22 @@ class Meta: geometry = fields.Nested(GeometrySchema(), required=True) data = fields.Nested(AnnotationDataSchema(), required=True) + cropped_sign = fields.Nested( + CroppedSignSchema(), load_default=None, data_key="croppedSign" + ) @post_load def make_annotation(self, data, **kwargs): return Annotation(**data) + @post_dump + def filter_none(self, data, **kwargs): + return pydash.omit_by(data, pydash.is_none) -class AnnotationsSchema(Schema): - class Meta: - unknown = EXCLUDE +class AnnotationsSchema(Schema): fragment_number = fields.String(required=True, data_key="fragmentNumber") - annotations = fields.Nested(AnnotationSchema, many=True, required=True) + annotations = fields.List(fields.Nested(AnnotationSchema(), required=True)) @post_load def make_annotation(self, data, **kwargs): diff --git a/ebl/fragmentarium/application/annotations_service.py b/ebl/fragmentarium/application/annotations_service.py index 7fa7eba7c..68b7e1d0f 100644 --- a/ebl/fragmentarium/application/annotations_service.py +++ b/ebl/fragmentarium/application/annotations_service.py @@ -1,10 +1,29 @@ +from io import BytesIO +from typing import Tuple, Sequence + +import attr +from PIL import Image + from ebl.changelog import Changelog from ebl.ebl_ai_client import EblAiClient -import attr from ebl.files.application.file_repository import FileRepository from ebl.fragmentarium.application.annotations_repository import AnnotationsRepository from ebl.fragmentarium.application.annotations_schema import AnnotationsSchema -from ebl.fragmentarium.domain.annotation import Annotations +from ebl.fragmentarium.application.cropped_sign_image import CroppedSign +from ebl.fragmentarium.application.cropped_sign_images_repository import ( + CroppedSignImage, + CroppedSignImagesRepository, +) +from ebl.fragmentarium.application.fragment_repository import FragmentRepository +from ebl.fragmentarium.domain.annotation import ( + Annotations, + AnnotationValueType, + Annotation, + BoundingBox, + AnnotationValueType, +) + +from ebl.transliteration.domain.line_label import LineLabel from ebl.transliteration.domain.museum_number import MuseumNumber from ebl.users.domain.user import User @@ -15,6 +34,9 @@ class AnnotationsService: _annotations_repository: AnnotationsRepository _photo_repository: FileRepository _changelog: Changelog + _fragments_repository: FragmentRepository + _photos_repository: FileRepository + _cropped_sign_images_repository: CroppedSignImagesRepository def generate_annotations( self, number: MuseumNumber, threshold: float = 0.3 @@ -27,17 +49,94 @@ def generate_annotations( def find(self, number: MuseumNumber) -> Annotations: return self._annotations_repository.query_by_museum_number(number) + def _label_by_line_number( + self, line_number_to_match: int, labels: Sequence[LineLabel] + ) -> str: + matching_label = None + for label in labels: + label_line_number = label.line_number + if label_line_number and label_line_number.is_matching_number( + line_number_to_match + ): + matching_label = label + return matching_label.formatted_label if matching_label else "" + + def _cropped_image_from_annotations_helper( + self, + annotations: Annotations, + image: Image.Image, + script: str, + labels: Sequence[LineLabel], + ) -> Tuple[Annotations, Sequence[CroppedSignImage]]: + cropped_sign_images = [] + updated_cropped_annotations = [] + + for annotation in annotations.annotations: + label = ( + self._label_by_line_number(annotation.data.path[0], labels) + if annotation.data.type != AnnotationValueType.BLANK + else "" + ) + cropped_image = annotation.crop_image(image) + cropped_sign_image = CroppedSignImage.create(cropped_image) + cropped_sign_images.append(cropped_sign_image) + + updated_cropped_annotation = attr.evolve( + annotation, + cropped_sign=CroppedSign( + cropped_sign_image.image_id, + script, + label, + ), + ) + updated_cropped_annotations.append(updated_cropped_annotation) + return ( + attr.evolve(annotations, annotations=updated_cropped_annotations), + cropped_sign_images, + ) + + def _cropped_image_from_annotations( + self, annotations: Annotations + ) -> Tuple[Annotations, Sequence[CroppedSignImage]]: + fragment = self._fragments_repository.query_by_museum_number( + annotations.fragment_number + ) + fragment_image = self._photos_repository.query_by_file_name( + f"{annotations.fragment_number}.jpg" + ) + image_bytes = fragment_image.read() + image = Image.open(BytesIO(image_bytes), mode="r") + return self._cropped_image_from_annotations_helper( + annotations, image, fragment.script, fragment.text.labels + ) + def update(self, annotations: Annotations, user: User) -> Annotations: old_annotations = self._annotations_repository.query_by_museum_number( annotations.fragment_number ) _id = str(annotations.fragment_number) schema = AnnotationsSchema() + ( + annotations_with_image_ids, + cropped_sign_images, + ) = self._cropped_image_from_annotations(annotations) + + self._annotations_repository.create_or_update(annotations_with_image_ids) + self._cropped_sign_images_repository.create_many(cropped_sign_images) + self._changelog.create( "annotations", user.profile, {"_id": _id, **schema.dump(old_annotations)}, - {"_id": _id, **schema.dump(annotations)}, + {"_id": _id, **schema.dump(annotations_with_image_ids)}, ) - self._annotations_repository.create_or_update(annotations) - return annotations + return annotations_with_image_ids + + def migrate(self, annotations: Annotations) -> Annotations: + ( + annotations_with_image_ids, + cropped_sign_images, + ) = self._cropped_image_from_annotations(annotations) + self._annotations_repository.create_or_update(annotations_with_image_ids) + self._cropped_sign_images_repository.create_many(cropped_sign_images) + return annotations_with_image_ids diff --git a/ebl/fragmentarium/application/cropped_annotations_service.py b/ebl/fragmentarium/application/cropped_annotations_service.py new file mode 100644 index 000000000..22b0a9f44 --- /dev/null +++ b/ebl/fragmentarium/application/cropped_annotations_service.py @@ -0,0 +1,38 @@ +from typing import Sequence + +from ebl.fragmentarium.application.annotations_repository import AnnotationsRepository +from ebl.fragmentarium.application.cropped_sign_image import CroppedAnnotation +from ebl.fragmentarium.application.cropped_sign_images_repository import ( + CroppedSignImagesRepository, +) + + +class CroppedAnnotationService: + def __init__( + self, + annotations_repository: AnnotationsRepository, + cropped_sign_images_repository: CroppedSignImagesRepository, + ): + self._annotations_repository = annotations_repository + self._cropped_sign_image_repository = cropped_sign_images_repository + + def find_annotations_by_sign(self, sign: str) -> Sequence[CroppedAnnotation]: + annotations = self._annotations_repository.find_by_sign(sign) + cropped_image_annotations = [] + for annotation in annotations: + for annotation_elem in annotation.annotations: + cropped_sign = annotation_elem.cropped_sign + if cropped_sign: + cropped_sign_image = ( + self._cropped_sign_image_repository.query_by_id( + cropped_sign.image_id + ) + ) + cropped_image_annotations.append( + CroppedAnnotation.from_cropped_sign( + annotation.fragment_number, + cropped_sign_image.image, + cropped_sign, + ) + ) + return cropped_image_annotations diff --git a/ebl/fragmentarium/application/cropped_sign_image.py b/ebl/fragmentarium/application/cropped_sign_image.py new file mode 100644 index 000000000..5863b1395 --- /dev/null +++ b/ebl/fragmentarium/application/cropped_sign_image.py @@ -0,0 +1,72 @@ +import uuid +from typing import NewType + +import attr +from marshmallow import Schema, fields, post_load, post_dump + +from ebl.transliteration.domain.museum_number import MuseumNumber + +Base64 = NewType("Base64", str) + + +@attr.s(auto_attribs=True, frozen=True) +class CroppedSignImage: + image_id: str + image: Base64 + + @classmethod + def create(cls, image: Base64) -> "CroppedSignImage": + return cls(str(uuid.uuid4()), image) + + +class CroppedSignImageSchema(Schema): + image_id = fields.Str(required=True) + image = fields.Str(required=True) + + @post_load + def load(self, data, **kwargs): + return CroppedSignImage(data["_id"], data["image"]) + + @post_dump + def cropped_sign_image_dump(self, data, **kwargs): + return {"_id": data["image_id"], "image": data["image"]} + + +@attr.attrs(auto_attribs=True, frozen=True) +class CroppedSign: + image_id: str + script: str + label: str + + +class CroppedSignSchema(Schema): + image_id = fields.String(required=True, data_key="imageId") + script = fields.String(required=True) + label = fields.String(required=True) + + @post_load + def load(self, data, **kwargs): + return CroppedSign(data["imageId"], data["script"], data["label"]) + + +@attr.attrs(auto_attribs=True, frozen=True) +class CroppedAnnotation(CroppedSign): + fragment_number: MuseumNumber + image: Base64 + + @classmethod + def from_cropped_sign( + cls, fragment_number: MuseumNumber, image: Base64, cropped_sign: CroppedSign + ) -> "CroppedAnnotation": + return cls( + cropped_sign.image_id, + cropped_sign.script, + cropped_sign.label, + fragment_number, + image, + ) + + +class CroppedAnnotationSchema(CroppedSignSchema): + fragment_number = fields.String(required=True, data_key="fragmentNumber") + image = fields.String(required=True) diff --git a/ebl/fragmentarium/application/cropped_sign_images_repository.py b/ebl/fragmentarium/application/cropped_sign_images_repository.py new file mode 100644 index 000000000..4599179bc --- /dev/null +++ b/ebl/fragmentarium/application/cropped_sign_images_repository.py @@ -0,0 +1,14 @@ +from abc import ABC, abstractmethod +from typing import Sequence + +from ebl.fragmentarium.application.cropped_sign_image import CroppedSignImage + + +class CroppedSignImagesRepository(ABC): + @abstractmethod + def query_by_id(self, image_id: str) -> CroppedSignImage: + ... + + @abstractmethod + def create_many(self, cropped_sign_images: Sequence[CroppedSignImage]) -> None: + ... diff --git a/ebl/fragmentarium/domain/annotation.py b/ebl/fragmentarium/domain/annotation.py index dfb198bba..79ec6fb73 100644 --- a/ebl/fragmentarium/domain/annotation.py +++ b/ebl/fragmentarium/domain/annotation.py @@ -1,9 +1,13 @@ +import base64 +import io +from PIL import Image from enum import Enum -from typing import Sequence +from typing import Sequence, Optional from uuid import uuid4 import attr +from ebl.fragmentarium.application.cropped_sign_image import CroppedSign, Base64 from ebl.transliteration.domain.museum_number import MuseumNumber @@ -38,11 +42,27 @@ class AnnotationData: class Annotation: geometry: Geometry data: AnnotationData + cropped_sign: Optional[CroppedSign] + + def crop_image(self, image: Image.Image) -> Base64: + bounding_box = BoundingBox.from_annotations( + image.size[0], image.size[1], [self] + )[0] + area = ( + bounding_box.top_left_x, + bounding_box.top_left_y, + bounding_box.top_left_x + bounding_box.width, + bounding_box.top_left_y + bounding_box.height, + ) + cropped_image = image.crop(area) + buf = io.BytesIO() + cropped_image.save(buf, format="PNG") + return Base64(base64.b64encode(buf.getvalue()).decode("utf-8")) @classmethod def from_prediction(cls, geometry: Geometry) -> "Annotation": data = AnnotationData(uuid4().hex, "", AnnotationValueType.PREDICTED, [], "") - return cls(geometry, data) + return cls(geometry, data, None) @attr.attrs(auto_attribs=True, frozen=True) diff --git a/ebl/fragmentarium/infrastructure/cropped_sign_images_repository.py b/ebl/fragmentarium/infrastructure/cropped_sign_images_repository.py new file mode 100644 index 000000000..39ea6f131 --- /dev/null +++ b/ebl/fragmentarium/infrastructure/cropped_sign_images_repository.py @@ -0,0 +1,26 @@ +from typing import Sequence + +from pymongo.database import Database + +from ebl.fragmentarium.application.cropped_sign_image import CroppedSignImageSchema +from ebl.fragmentarium.application.cropped_sign_images_repository import ( + CroppedSignImagesRepository, + CroppedSignImage, +) +from ebl.mongo_collection import MongoCollection + +COLLECTION = "cropped_sign_images" + + +class MongoCroppedSignImagesRepository(CroppedSignImagesRepository): + def __init__(self, database: Database) -> None: + self._collection = MongoCollection(database, COLLECTION) + + def create_many(self, cropped_sign_images: Sequence[CroppedSignImage]) -> None: + schema = CroppedSignImageSchema(many=True) + self._collection.insert_many(schema.dump(cropped_sign_images)) + + def query_by_id(self, image_id: str) -> CroppedSignImage: + return CroppedSignImageSchema().load( + self._collection.find_one({"_id": image_id}) + ) diff --git a/ebl/fragmentarium/infrastructure/mongo_annotations_repository.py b/ebl/fragmentarium/infrastructure/mongo_annotations_repository.py index 3429a5921..3af26e9d1 100644 --- a/ebl/fragmentarium/infrastructure/mongo_annotations_repository.py +++ b/ebl/fragmentarium/infrastructure/mongo_annotations_repository.py @@ -41,7 +41,7 @@ def retrieve_all_non_empty(self) -> List[Annotations]: result = self._collection.find_many( {"annotations": {"$exists": True, "$ne": []}} ) - return AnnotationsSchema().load(result, unknown=EXCLUDE, many=True) + return AnnotationsSchema(unknown=EXCLUDE).load(result, many=True) def find_by_sign(self, sign: str) -> Sequence[Annotations]: query = {"$regex": re.escape(sign), "$options": "i"} diff --git a/ebl/fragmentarium/migrate_annotations.py b/ebl/fragmentarium/migrate_annotations.py new file mode 100644 index 000000000..bc45c8d4b --- /dev/null +++ b/ebl/fragmentarium/migrate_annotations.py @@ -0,0 +1,21 @@ +from ebl.app import create_context +from ebl.fragmentarium.application.annotations_service import AnnotationsService + +if __name__ == "__main__": + context = create_context() + annotations = context.annotations_repository.retrieve_all_non_empty() + service = AnnotationsService( + context.ebl_ai_client, + context.annotations_repository, + context.photo_repository, + context.changelog, + context.fragment_repository, + context.photo_repository, + context.cropped_sign_images_repository, + ) + print(f"Lenght {len(annotations)}") + for counter, annotation in enumerate(annotations): + print(counter) + service.migrate(annotation) + + print("Update annotations completed!") diff --git a/ebl/fragmentarium/update_annotations.py b/ebl/fragmentarium/update_annotations.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/ebl/fragmentarium/web/annotations.py b/ebl/fragmentarium/web/annotations.py index 7bc5d7d7b..03f4540fb 100644 --- a/ebl/fragmentarium/web/annotations.py +++ b/ebl/fragmentarium/web/annotations.py @@ -12,7 +12,7 @@ def __init__(self, annotation_service: AnnotationsService): self._annotation_service = annotation_service @falcon.before(require_scope, "annotate:fragments") - @validate(AnnotationsSchema(), AnnotationsSchema()) + @validate(AnnotationsSchema()) def on_post(self, req: falcon.Request, resp: falcon.Response, number: str): if number == req.media.get("fragmentNumber"): annotations = self._annotation_service.update( diff --git a/ebl/fragmentarium/web/bootstrap.py b/ebl/fragmentarium/web/bootstrap.py index dba584c31..522a3e6ca 100644 --- a/ebl/fragmentarium/web/bootstrap.py +++ b/ebl/fragmentarium/web/bootstrap.py @@ -39,6 +39,9 @@ def create_fragmentarium_routes(api: falcon.App, context: Context): context.annotations_repository, context.photo_repository, context.changelog, + context.fragment_repository, + context.photo_repository, + context.cropped_sign_images_repository, ) statistics = make_statistics_resource(context.cache, fragmentarium) diff --git a/ebl/mongo_collection.py b/ebl/mongo_collection.py index ed30f2c83..874bab36a 100644 --- a/ebl/mongo_collection.py +++ b/ebl/mongo_collection.py @@ -1,4 +1,4 @@ -from typing import Any, cast +from typing import Any, cast, Sequence import inflect from pymongo.collection import Collection @@ -19,6 +19,9 @@ def __init__(self, database: Database, collection: str): self.__collection = collection self.__resource_noun = singlar(collection) + def insert_many(self, documents: Sequence[dict]): + return self.__get_collection().insert_many(documents).inserted_ids + def insert_one(self, document): try: return self.__get_collection().insert_one(document).inserted_id diff --git a/ebl/signs/web/bootstrap.py b/ebl/signs/web/bootstrap.py index 3223b952e..b13a73b69 100644 --- a/ebl/signs/web/bootstrap.py +++ b/ebl/signs/web/bootstrap.py @@ -1,22 +1,20 @@ import falcon from ebl.context import Context -from ebl.fragmentarium.application.annotations_image_extractor import ( - AnnotationImageExtractor, +from ebl.fragmentarium.application.cropped_annotations_service import ( + CroppedAnnotationService, ) from ebl.signs.web.sign_search import SignsSearch from ebl.signs.web.signs import SignsResource -from ebl.signs.web.sign_images import SignsImageResource +from ebl.signs.web.cropped_annotations import CroppedAnnotationsResource def create_signs_routes(api: falcon.App, context: Context): signs_search = SignsSearch(context.sign_repository) signs = SignsResource(context.sign_repository) - signs_images = SignsImageResource( - AnnotationImageExtractor( - context.fragment_repository, - context.annotations_repository, - context.photo_repository, + signs_images = CroppedAnnotationsResource( + CroppedAnnotationService( + context.annotations_repository, context.cropped_sign_images_repository ) ) api.add_route("/signs", signs_search) diff --git a/ebl/signs/web/cropped_annotation_schema.py b/ebl/signs/web/cropped_annotation_schema.py deleted file mode 100644 index 79d186874..000000000 --- a/ebl/signs/web/cropped_annotation_schema.py +++ /dev/null @@ -1,8 +0,0 @@ -from marshmallow import fields, Schema - - -class CroppedAnnotationSchema(Schema): - image = fields.String(required=True) - fragment_number = fields.String(required=True, data_key="fragmentNumber") - script = fields.String(required=True) - label = fields.String(required=True) diff --git a/ebl/signs/web/cropped_annotations.py b/ebl/signs/web/cropped_annotations.py new file mode 100644 index 000000000..1cabfd51e --- /dev/null +++ b/ebl/signs/web/cropped_annotations.py @@ -0,0 +1,20 @@ +import falcon + +from ebl.fragmentarium.application.cropped_annotations_service import ( + CroppedAnnotationService, +) +from ebl.fragmentarium.application.cropped_sign_image import CroppedAnnotationSchema + +from ebl.users.web.require_scope import require_scope + + +class CroppedAnnotationsResource: + def __init__(self, cropped_annotations_service: CroppedAnnotationService): + self._cropped_annotations_service = cropped_annotations_service + + @falcon.before(require_scope, "read:words") + def on_get(self, _req, resp, sign_name): + cropped_signs = self._cropped_annotations_service.find_annotations_by_sign( + sign_name + ) + resp.media = CroppedAnnotationSchema().dump(cropped_signs, many=True) diff --git a/ebl/signs/web/sign_images.py b/ebl/signs/web/sign_images.py deleted file mode 100644 index 04e34ea38..000000000 --- a/ebl/signs/web/sign_images.py +++ /dev/null @@ -1,17 +0,0 @@ -import falcon - -from ebl.fragmentarium.application.annotations_image_extractor import ( - AnnotationImageExtractor, -) -from ebl.signs.web.cropped_annotation_schema import CroppedAnnotationSchema -from ebl.users.web.require_scope import require_scope - - -class SignsImageResource: - def __init__(self, annotation_image_extractor: AnnotationImageExtractor): - self._image_extractor = annotation_image_extractor - - @falcon.before(require_scope, "read:words") - def on_get(self, _req, resp, sign_name): - cropped_signs = self._image_extractor.cropped_images_from_sign(sign_name) - resp.media = CroppedAnnotationSchema().dump(cropped_signs, many=True) diff --git a/ebl/tests/conftest.py b/ebl/tests/conftest.py index 163b4d8f9..bf3f6d173 100644 --- a/ebl/tests/conftest.py +++ b/ebl/tests/conftest.py @@ -10,6 +10,7 @@ import pytest from PIL import Image from dictdiffer import diff + from falcon import testing from falcon_auth import NoneAuthBackend from falcon_caching import Cache @@ -30,6 +31,7 @@ from ebl.ebl_ai_client import EblAiClient from ebl.files.application.file_repository import File from ebl.files.infrastructure.grid_fs_file_repository import GridFsFileRepository +from ebl.fragmentarium.application.annotations_service import AnnotationsService from ebl.fragmentarium.application.fragment_finder import FragmentFinder from ebl.fragmentarium.application.fragment_matcher import FragmentMatcher from ebl.fragmentarium.application.fragment_updater import FragmentUpdater @@ -37,14 +39,15 @@ from ebl.fragmentarium.application.transliteration_update_factory import ( TransliterationUpdateFactory, ) -from ebl.transliteration.application.parallel_line_injector import ParallelLineInjector -from ebl.transliteration.domain.museum_number import MuseumNumber -from ebl.fragmentarium.infrastructure.mongo_fragment_repository import ( - MongoFragmentRepository, +from ebl.fragmentarium.infrastructure.cropped_sign_images_repository import ( + MongoCroppedSignImagesRepository, ) from ebl.fragmentarium.infrastructure.mongo_annotations_repository import ( MongoAnnotationsRepository, ) +from ebl.fragmentarium.infrastructure.mongo_fragment_repository import ( + MongoFragmentRepository, +) from ebl.lemmatization.infrastrcuture.mongo_suggestions_finder import ( MongoLemmaRepository, ) @@ -53,10 +56,12 @@ SignSchema, ) from ebl.tests.factories.bibliography import BibliographyEntryFactory +from ebl.transliteration.application.parallel_line_injector import ParallelLineInjector from ebl.transliteration.domain import atf from ebl.transliteration.domain.at_line import ColumnAtLine, SurfaceAtLine, ObjectAtLine from ebl.transliteration.domain.labels import ColumnLabel, SurfaceLabel, ObjectLabel from ebl.transliteration.domain.line_number import LineNumber +from ebl.transliteration.domain.museum_number import MuseumNumber from ebl.transliteration.domain.sign import Sign, SignListRecord, Value from ebl.transliteration.domain.sign_tokens import Reading from ebl.transliteration.domain.text import Text @@ -103,6 +108,11 @@ def dictionary(word_repository, changelog): return Dictionary(word_repository, changelog) +@pytest.fixture +def cropped_sign_images_repository(database): + return MongoCroppedSignImagesRepository(database) + + @pytest.fixture def ebl_ai_client(): return EblAiClient("http://localhost:8001") @@ -323,6 +333,25 @@ def lemma_repository(database): return MongoLemmaRepository(database) +@pytest.fixture +def annotations_service( + annotations_repository, + photo_repository, + changelog, + fragment_repository, + cropped_sign_images_repository, +): + return AnnotationsService( + EblAiClient(""), + annotations_repository, + photo_repository, + changelog, + fragment_repository, + photo_repository, + cropped_sign_images_repository, + ) + + @pytest.fixture def user() -> User: return Auth0User( @@ -352,6 +381,7 @@ def user() -> User: @pytest.fixture def context( ebl_ai_client, + cropped_sign_images_repository, word_repository, sign_repository, file_repository, @@ -369,6 +399,7 @@ def context( return ebl.context.Context( ebl_ai_client=ebl_ai_client, auth_backend=NoneAuthBackend(lambda: user), + cropped_sign_images_repository=cropped_sign_images_repository, word_repository=word_repository, sign_repository=sign_repository, public_file_repository=file_repository, diff --git a/ebl/tests/factories/annotation.py b/ebl/tests/factories/annotation.py index da62dac9b..1f4076c16 100644 --- a/ebl/tests/factories/annotation.py +++ b/ebl/tests/factories/annotation.py @@ -1,5 +1,6 @@ import factory.fuzzy +from ebl.fragmentarium.application.cropped_sign_image import CroppedSign from ebl.fragmentarium.domain.annotation import ( Annotation, AnnotationData, @@ -37,12 +38,22 @@ class Meta: ) +class CroppedSignFactory(factory.Factory): + class Meta: + model = CroppedSign + + image_id = factory.Faker("word") + script = factory.Faker("word") + label = factory.Faker("word") + + class AnnotationFactory(factory.Factory): class Meta: model = Annotation geometry = factory.SubFactory(GeometryFactory) data = factory.SubFactory(AnnotationDataFactory) + cropped_sign = factory.SubFactory(CroppedSignFactory) class AnnotationsFactory(factory.Factory): diff --git a/ebl/tests/fragmentarium/test_annotation.py b/ebl/tests/fragmentarium/test_annotation.py index fe0e6a67c..441699e92 100644 --- a/ebl/tests/fragmentarium/test_annotation.py +++ b/ebl/tests/fragmentarium/test_annotation.py @@ -1,3 +1,4 @@ +from ebl.fragmentarium.application.cropped_sign_image import CroppedSign from ebl.fragmentarium.domain.annotation import ( Geometry, AnnotationData, @@ -21,7 +22,13 @@ SIGN_NAME = "KUR" DATA = AnnotationData(ID, VALUE, TYPE, PATH, SIGN_NAME) -ANNOTATION = Annotation(GEOMETRY, DATA) +IMAGE_ID = "image-id" +SCRIPT = "script" +LABEL = "label" + +CROPPED_SIGN = CroppedSign(IMAGE_ID, SCRIPT, LABEL) + +ANNOTATION = Annotation(GEOMETRY, DATA, CROPPED_SIGN) MUSEUM_NUMBER = MuseumNumber("K", "1") ANNOTATIONS = Annotations(MUSEUM_NUMBER, [ANNOTATION]) diff --git a/ebl/tests/fragmentarium/test_annotations_image_extractor.py b/ebl/tests/fragmentarium/test_annotations_image_extractor.py deleted file mode 100644 index 46e4d139a..000000000 --- a/ebl/tests/fragmentarium/test_annotations_image_extractor.py +++ /dev/null @@ -1,124 +0,0 @@ -import pytest - -from ebl.fragmentarium.application.annotations_image_extractor import ( - AnnotationImageExtractor, - CroppedAnnotation, -) -from ebl.tests.conftest import create_test_photo -from ebl.tests.factories.annotation import ( - AnnotationsFactory, - AnnotationFactory, - AnnotationDataFactory, -) -from ebl.tests.factories.fragment import TransliteratedFragmentFactory -from ebl.transliteration.domain import atf -from ebl.transliteration.domain.labels import ColumnLabel, SurfaceLabel, ObjectLabel -from ebl.transliteration.domain.line_label import LineLabel -from ebl.transliteration.domain.line_number import LineNumber, LineNumberRange - - -@pytest.mark.parametrize( - "line_label, expected", - [ - ( - LineLabel( - ColumnLabel.from_int(1), - SurfaceLabel([], atf.Surface.SURFACE, "Stone wig"), - ObjectLabel([], atf.Object.OBJECT, "Stone wig"), - LineNumber(2), - ), - "i Stone wig Stone wig 2", - ), - ( - LineLabel( - None, None, None, LineNumberRange(LineNumber(1, True), LineNumber(3)) - ), - "1'-3", - ), - ], -) -def test_format_line_label( - line_label, expected, annotations_repository, photo_repository, fragment_repository -): - image_extractor = AnnotationImageExtractor( - fragment_repository, annotations_repository, photo_repository - ) - assert image_extractor._format_label(line_label) == expected - - -@pytest.mark.parametrize( - "line_label, line_number, expected", - [ - ( - LineNumber(2), - 2, - True, - ), - ( - LineNumber(2), - 1, - False, - ), - ( - LineNumberRange(LineNumber(1, True), LineNumber(3)), - 2, - True, - ), - ( - LineNumberRange(LineNumber(1, True), LineNumber(3)), - 4, - False, - ), - ], -) -def test_line_label_match_line_number( - line_label, - line_number, - expected, - fragment_repository, - annotations_repository, - photo_repository, -): - image_extractor = AnnotationImageExtractor( - fragment_repository, annotations_repository, photo_repository - ) - assert image_extractor._is_matching_number(line_label, line_number) == expected - - -def test_cropped_images_from_sign( - annotations_repository, - photo_repository, - fragment_repository, - when, - text_with_labels, -): - - image_extractor = AnnotationImageExtractor( - fragment_repository, annotations_repository, photo_repository - ) - - single_annotation = AnnotationFactory.build( - data=AnnotationDataFactory.build(path=[2, 0, 0]) - ) - annotation = AnnotationsFactory.build(annotations=[single_annotation]) - sign = "test-sign" - - fragment = TransliteratedFragmentFactory.build(text=text_with_labels) - (when(annotations_repository).find_by_sign(sign).thenReturn([annotation])) - ( - when(fragment_repository) - .query_by_museum_number(annotation.fragment_number) - .thenReturn(fragment) - ) - ( - when(photo_repository) - .query_by_file_name(f"{annotation.fragment_number}.jpg") - .thenReturn(create_test_photo("K.2")) - ) - - result = image_extractor.cropped_images_from_sign(sign) - assert len(result) > 0 - first_cropped_annotation = result[0] - assert isinstance(first_cropped_annotation, CroppedAnnotation) - assert first_cropped_annotation.script == fragment.script - assert first_cropped_annotation.label == "i Stone wig Stone wig 2" diff --git a/ebl/tests/fragmentarium/test_annotations_route.py b/ebl/tests/fragmentarium/test_annotations_route.py index 2822dcc7e..f4ad88285 100644 --- a/ebl/tests/fragmentarium/test_annotations_route.py +++ b/ebl/tests/fragmentarium/test_annotations_route.py @@ -5,8 +5,11 @@ from ebl.fragmentarium.application.annotations_schema import AnnotationsSchema from ebl.fragmentarium.domain.annotation import Annotations + +from ebl.tests.conftest import create_test_photo +from ebl.tests.factories.annotation import AnnotationsFactory, AnnotationFactory +from ebl.tests.factories.fragment import TransliteratedFragmentFactory from ebl.transliteration.domain.museum_number import MuseumNumber -from ebl.tests.factories.annotation import AnnotationsFactory def test_find_annotations(client): @@ -65,23 +68,37 @@ def test_find_not_allowed(guest_client): assert result.status == falcon.HTTP_FORBIDDEN -def test_update(client): - annotations = AnnotationsFactory.build() - fragment_number = annotations.fragment_number - body = AnnotationsSchema().dumps(annotations) - url = f"/fragments/{fragment_number}/annotations" - post_result = client.simulate_post(url, body=body) +def test_update(client, fragment_repository, photo_repository): + number = MuseumNumber.of("K.123") + annotation_1 = AnnotationFactory(cropped_sign=None) + annotation_2 = AnnotationFactory(cropped_sign=None) + annotations = AnnotationsFactory.build( + fragment_number=number, annotations=[annotation_1, annotation_2] + ) + fragment = TransliteratedFragmentFactory.build(number=number) + fragment_repository.create(fragment) + photo_repository._create(create_test_photo(number)) + + body = AnnotationsSchema().dump(annotations) + url = f"/fragments/{number}/annotations" + post_result = client.simulate_post(url, body=json.dumps(body)) expected_json = AnnotationsSchema().dump(annotations) assert post_result.status == falcon.HTTP_OK - assert post_result.json == expected_json + assert post_result.json["fragmentNumber"] == expected_json["fragmentNumber"] + for annotation, expected_annotation in zip( + post_result.json["annotations"], expected_json["annotations"] + ): + assert annotation["geometry"] == expected_annotation["geometry"] + assert annotation["data"] == expected_annotation["data"] + assert annotation["croppedSign"] is not None get_result = client.simulate_get( - f"/fragments/{fragment_number}/annotations", + f"/fragments/{number}/annotations", params={"generateAnnotations": False}, ) - assert get_result.json == expected_json + assert get_result.json == post_result.json def test_update_number_mismatch(client): diff --git a/ebl/tests/fragmentarium/test_annotations_schema.py b/ebl/tests/fragmentarium/test_annotations_schema.py index 3144d19a5..6c5b58431 100644 --- a/ebl/tests/fragmentarium/test_annotations_schema.py +++ b/ebl/tests/fragmentarium/test_annotations_schema.py @@ -1,4 +1,5 @@ from ebl.fragmentarium.application.annotations_schema import AnnotationsSchema +from ebl.fragmentarium.application.cropped_sign_image import CroppedSign from ebl.fragmentarium.domain.annotation import ( Annotation, Geometry, @@ -18,13 +19,19 @@ TYPE = AnnotationValueType.HAS_SIGN ID = "abc123" SIGN_NAME = "KUR" +IMAGE_ID = "image-id" +SCRIPT = "script" +LABEL = "label" ANNOTATION = Annotation( - Geometry(X, Y, WIDTH, HEIGHT), AnnotationData(ID, VALUE, TYPE, PATH, SIGN_NAME) + Geometry(X, Y, WIDTH, HEIGHT), + AnnotationData(ID, VALUE, TYPE, PATH, SIGN_NAME), + CroppedSign(IMAGE_ID, SCRIPT, LABEL), ) MUSEUM_NUMBER = MuseumNumber("K", "1") ANNOTATIONS = Annotations(MUSEUM_NUMBER, [ANNOTATION]) + SERIALIZED = { "fragmentNumber": str(MUSEUM_NUMBER), "annotations": [ @@ -37,6 +44,7 @@ "signName": SIGN_NAME, "path": PATH, }, + "croppedSign": {"imageId": IMAGE_ID, "script": SCRIPT, "label": LABEL}, } ], } diff --git a/ebl/tests/fragmentarium/test_annotations_service.py b/ebl/tests/fragmentarium/test_annotations_service.py index b616d7721..3b5c4ee18 100644 --- a/ebl/tests/fragmentarium/test_annotations_service.py +++ b/ebl/tests/fragmentarium/test_annotations_service.py @@ -1,16 +1,71 @@ +import attr + from ebl.ebl_ai_client import EblAiClient from ebl.fragmentarium.application.annotations_schema import AnnotationsSchema from ebl.fragmentarium.application.annotations_service import AnnotationsService +from ebl.fragmentarium.application.cropped_sign_image import Base64, CroppedSignImage from ebl.fragmentarium.domain.annotation import Annotations -from ebl.transliteration.domain.museum_number import MuseumNumber from ebl.tests.conftest import create_test_photo -from ebl.tests.factories.annotation import AnnotationsFactory +from ebl.tests.factories.annotation import ( + AnnotationsFactory, + AnnotationFactory, + AnnotationDataFactory, + CroppedSignFactory, +) +from ebl.tests.factories.fragment import TransliteratedFragmentFactory +from ebl.transliteration.domain.museum_number import MuseumNumber SCHEMA = AnnotationsSchema() +def test_label_by_line_number(text_with_labels, annotations_service): + assert ( + annotations_service._label_by_line_number(2, text_with_labels.labels) + == "i Stone wig Stone wig 2" + ) + + +def test_cropped_images_from_sign( + annotations_repository, + fragment_repository, + photo_repository, + when, + text_with_labels, + annotations_service, +): + single_annotation = AnnotationFactory.build( + data=AnnotationDataFactory.build(path=[2, 0, 0]) + ) + annotation = AnnotationsFactory.build(annotations=[single_annotation]) + + fragment = TransliteratedFragmentFactory.build(text=text_with_labels) + ( + when(fragment_repository) + .query_by_museum_number(annotation.fragment_number) + .thenReturn(fragment) + ) + ( + when(photo_repository) + .query_by_file_name(f"{annotation.fragment_number}.jpg") + .thenReturn(create_test_photo("K.2")) + ) + + annotations, cropped_images = annotations_service._cropped_image_from_annotations( + annotation + ) + for annotation, cropped_image in zip(annotations.annotations, cropped_images): + assert annotation.cropped_sign.image_id == cropped_image.image_id + assert annotation.cropped_sign.script == fragment.script + assert annotation.cropped_sign.label == "i Stone wig Stone wig 2" + + def test_generate_annotations( - annotations_repository, photo_repository, changelog, when + annotations_repository, + photo_repository, + changelog, + when, + fragment_repository, + cropped_sign_images_repository, ): fragment_number = MuseumNumber.of("X.0") @@ -21,9 +76,14 @@ def test_generate_annotations( ) ebl_ai_client = EblAiClient("mock-localhost:8001") service = AnnotationsService( - ebl_ai_client, annotations_repository, photo_repository, changelog + ebl_ai_client, + annotations_repository, + photo_repository, + changelog, + fragment_repository, + photo_repository, + cropped_sign_images_repository, ) - expected = Annotations(fragment_number, tuple()) when(ebl_ai_client).generate_annotations(fragment_number, image_file, 0).thenReturn( expected @@ -34,36 +94,75 @@ def test_generate_annotations( assert annotations == expected -def test_find(annotations_repository, photo_repository, changelog, when): +def test_find(annotations_repository, annotations_service, when): annotations = AnnotationsFactory.build() when(annotations_repository).query_by_museum_number( annotations.fragment_number ).thenReturn(annotations) - service = AnnotationsService( - EblAiClient(""), annotations_repository, photo_repository, changelog - ) - assert service.find(annotations.fragment_number) == annotations + assert annotations_service.find(annotations.fragment_number) == annotations -def test_update(annotations_repository, photo_repository, when, user, changelog): +def test_update( + annotations_service, + annotations_repository, + photo_repository, + fragment_repository, + cropped_sign_images_repository, + when, + user, + changelog, + text_with_labels, +): fragment_number = MuseumNumber("K", "1") - annotations = AnnotationsFactory.build(fragment_number=fragment_number) - updated_annotations = AnnotationsFactory.build(fragment_number=fragment_number) + fragment = TransliteratedFragmentFactory( + number=fragment_number, text=text_with_labels + ) + + old_annotations = AnnotationsFactory.build(fragment_number=fragment_number) + + data = AnnotationDataFactory.build(path=[2, 0, 0]) + annotation = AnnotationFactory.build(cropped_sign=None, data=data) + annotations = AnnotationsFactory.build( + fragment_number=fragment_number, annotations=[annotation] + ) when(annotations_repository).query_by_museum_number(fragment_number).thenReturn( - annotations + old_annotations + ) + image = create_test_photo("K.2") + when(fragment_repository).query_by_museum_number(fragment_number).thenReturn( + fragment + ) + ( + when(photo_repository) + .query_by_file_name(f"{annotations.fragment_number}.jpg") + .thenReturn(image) ) - when(annotations_repository).create_or_update(updated_annotations).thenReturn() + + expected_cropped_sign_image = CroppedSignImage("test-id", Base64("test-image")) + annotation_cropped_sign = attr.evolve( + annotation, + cropped_sign=CroppedSignFactory.build( + image_id="test-id", script=fragment.script, label="i Stone wig Stone wig 2" + ), + ) + expected_annotations = attr.evolve( + annotations, annotations=[annotation_cropped_sign] + ) + when(CroppedSignImage).create(...).thenReturn(expected_cropped_sign_image) + + when(annotations_repository).create_or_update(expected_annotations).thenReturn() + when(cropped_sign_images_repository).create_many( + [expected_cropped_sign_image] + ).thenReturn() + schema = AnnotationsSchema() when(changelog).create( "annotations", user.profile, - {"_id": str(fragment_number), **SCHEMA.dump(annotations)}, - {"_id": str(fragment_number), **SCHEMA.dump(updated_annotations)}, + {"_id": str(fragment_number), **schema.dump(old_annotations)}, + {"_id": str(fragment_number), **schema.dump(expected_annotations)}, ).thenReturn() - service = AnnotationsService( - EblAiClient(""), annotations_repository, photo_repository, changelog - ) - - assert service.update(updated_annotations, user) == updated_annotations + result = annotations_service.update(annotations, user) + assert result == expected_annotations diff --git a/ebl/tests/fragmentarium/test_cropped_annotations_service.py b/ebl/tests/fragmentarium/test_cropped_annotations_service.py new file mode 100644 index 000000000..2de7fdcdb --- /dev/null +++ b/ebl/tests/fragmentarium/test_cropped_annotations_service.py @@ -0,0 +1,41 @@ +from ebl.fragmentarium.application.cropped_annotations_service import ( + CroppedAnnotationService, +) +from ebl.fragmentarium.application.cropped_sign_image import ( + Base64, + CroppedSignImage, + CroppedAnnotation, +) +from ebl.tests.factories.annotation import ( + AnnotationsFactory, + AnnotationFactory, +) + + +def test_find_annotations_by_sign( + annotations_repository, cropped_sign_images_repository, when +): + service = CroppedAnnotationService( + annotations_repository, cropped_sign_images_repository + ) + annotation = AnnotationFactory.build_batch(2) + annotations = AnnotationsFactory.build(annotations=annotation) + + image_id_1 = annotation[0].cropped_sign.image_id + image_id_2 = annotation[1].cropped_sign.image_id + + when(annotations_repository).find_by_sign("test-sign").thenReturn([annotations]) + when(cropped_sign_images_repository).query_by_id(image_id_1).thenReturn( + CroppedSignImage(image_id_1, Base64("test-base64-1")) + ) + when(cropped_sign_images_repository).query_by_id(image_id_2).thenReturn( + CroppedSignImage(image_id_2, Base64("test-base64-2")) + ) + fragment_number = annotations.fragment_number + expected_1 = CroppedAnnotation.from_cropped_sign( + fragment_number, Base64("test-base64-1"), annotation[0].cropped_sign + ) + expected_2 = CroppedAnnotation.from_cropped_sign( + fragment_number, Base64("test-base64-2"), annotation[1].cropped_sign + ) + assert service.find_annotations_by_sign("test-sign") == [expected_1, expected_2] diff --git a/ebl/tests/signs/test_sign_images_route.py b/ebl/tests/signs/test_sign_images_route.py index 0d25cb85b..f980b05f9 100644 --- a/ebl/tests/signs/test_sign_images_route.py +++ b/ebl/tests/signs/test_sign_images_route.py @@ -1,12 +1,15 @@ import falcon -from ebl.transliteration.domain.museum_number import MuseumNumber +from ebl.fragmentarium.application.cropped_sign_image import CroppedSignImage, Base64 + from ebl.tests.factories.annotation import ( AnnotationsFactory, AnnotationFactory, AnnotationDataFactory, + CroppedSignFactory, ) from ebl.tests.factories.fragment import TransliteratedFragmentFactory +from ebl.transliteration.domain.museum_number import MuseumNumber def test_signs_get( @@ -16,6 +19,7 @@ def test_signs_get( when, fragment_repository, text_with_labels, + cropped_sign_images_repository, ): fragment = TransliteratedFragmentFactory.build( number=MuseumNumber.of("K.2"), text=text_with_labels @@ -23,7 +27,17 @@ def test_signs_get( fragment_repository.create(fragment) annotation_data = AnnotationDataFactory.build(sign_name="signName", path=[2, 0, 0]) - annotation = AnnotationFactory.build(data=annotation_data) + cropped_sign = CroppedSignFactory.build(script=fragment.script) + annotation = AnnotationFactory.build( + data=annotation_data, cropped_sign=cropped_sign + ) + cropped_sign_images_repository.create_many( + [ + CroppedSignImage( + annotation.cropped_sign.image_id, Base64("test-base64-string") + ) + ] + ) annotations_repository.create_or_update( AnnotationsFactory.build(fragment_number="K.2", annotations=[annotation]) ) @@ -34,8 +48,8 @@ def test_signs_get( result_json = result.json[0] assert result_json["fragmentNumber"] == str(fragment.number) - assert isinstance(result_json["image"], str) - assert result_json["script"] == fragment.script - assert result_json["label"] == "i Stone wig Stone wig 2" + assert result_json["image"] == "test-base64-string" + assert result_json["script"] == cropped_sign.script + assert result_json["label"] == cropped_sign.label assert result.status == falcon.HTTP_OK diff --git a/ebl/tests/test_mongo_collection.py b/ebl/tests/test_mongo_collection.py index 1b4456e6e..0c3c82fa5 100644 --- a/ebl/tests/test_mongo_collection.py +++ b/ebl/tests/test_mongo_collection.py @@ -9,6 +9,15 @@ def collection(database): return MongoCollection(database, "collection") +def test_create_many_and_find_by_id(collection): + documents = [{"data": "payload-1"}, {"data": "payload-2"}] + + insert_ids = collection.insert_many(documents) + assert [ + collection.find_one_by_id(insert_id) for insert_id in insert_ids + ] == documents + + def test_create_and_find_by_id(collection): document = {"data": "payload"} insert_id = collection.insert_one(document) diff --git a/ebl/tests/transliteration/test_line_label.py b/ebl/tests/transliteration/test_line_label.py new file mode 100644 index 000000000..f3c8cd849 --- /dev/null +++ b/ebl/tests/transliteration/test_line_label.py @@ -0,0 +1,30 @@ +import pytest + +from ebl.transliteration.domain import atf +from ebl.transliteration.domain.labels import ColumnLabel, SurfaceLabel, ObjectLabel +from ebl.transliteration.domain.line_label import LineLabel +from ebl.transliteration.domain.line_number import LineNumber, LineNumberRange + + +@pytest.mark.parametrize( + "line_label, expected", + [ + ( + LineLabel( + ColumnLabel.from_int(1), + SurfaceLabel([], atf.Surface.SURFACE, "Stone wig"), + ObjectLabel([], atf.Object.OBJECT, "Stone wig"), + LineNumber(2), + ), + "i Stone wig Stone wig 2", + ), + ( + LineLabel( + None, None, None, LineNumberRange(LineNumber(1, True), LineNumber(3)) + ), + "1'-3", + ), + ], +) +def test_format_line_label(line_label, expected, annotations_service): + assert line_label.formatted_label == expected diff --git a/ebl/tests/transliteration/test_line_number.py b/ebl/tests/transliteration/test_line_number.py index d59210a06..a813a7d9b 100644 --- a/ebl/tests/transliteration/test_line_number.py +++ b/ebl/tests/transliteration/test_line_number.py @@ -47,3 +47,17 @@ def test_line_number_range(start: LineNumber, end: LineNumber) -> None: assert line_number.atf == f"{label}." assert line_number.label == label assert line_number.is_beginning_of_side == start.is_beginning_of_side + + +@pytest.mark.parametrize( + "line_number, matching_number, expected", + [ + (LineNumber(1), 1, True), + (LineNumber(2), 1, False), + (LineNumberRange(LineNumber(1), LineNumber(3)), 1, True), + (LineNumberRange(LineNumber(1), LineNumber(3)), 2, True), + (LineNumberRange(LineNumber(1), LineNumber(3)), 4, False), + ], +) +def test_is_line_matching_number(line_number, matching_number, expected): + assert line_number.is_matching_number(matching_number) == expected diff --git a/ebl/transliteration/domain/line_label.py b/ebl/transliteration/domain/line_label.py index 5298e3e4a..8effe5476 100644 --- a/ebl/transliteration/domain/line_label.py +++ b/ebl/transliteration/domain/line_label.py @@ -26,3 +26,20 @@ def set_object(self, object: Optional[ObjectLabel]) -> "LineLabel": def set_line_number(self, line_number: Optional[AbstractLineNumber]) -> "LineLabel": return attr.evolve(self, line_number=line_number) + + @property + def formatted_label(self) -> str: + line_number = self.line_number + column = self.column + surface = self.surface + object = self.object + line_atf = line_number.atf if line_number else "" + column_abbr = column.abbreviation if column else "" + surface_abbr = surface.abbreviation if surface else "" + object_abbr = object.abbreviation if object else "" + return " ".join( + filter( + bool, + [column_abbr, surface_abbr, object_abbr, line_atf.replace(".", "")], + ) + ) diff --git a/ebl/transliteration/domain/line_number.py b/ebl/transliteration/domain/line_number.py index 7b9992e15..146278511 100644 --- a/ebl/transliteration/domain/line_number.py +++ b/ebl/transliteration/domain/line_number.py @@ -19,6 +19,10 @@ def is_beginning_of_side(self) -> bool: def atf(self) -> str: return f"{self.label}." + @abstractmethod + def is_matching_number(self, number: int) -> bool: + ... + @attr.s(auto_attribs=True, frozen=True) class LineNumber(AbstractLineNumber): @@ -38,6 +42,9 @@ def label(self) -> str: def is_beginning_of_side(self) -> bool: return self.number == 1 and not self.has_prime and self.prefix_modifier is None + def is_matching_number(self, number: int) -> bool: + return number == self.number + @attr.s(auto_attribs=True, frozen=True) class LineNumberRange(AbstractLineNumber): @@ -51,3 +58,6 @@ def label(self) -> str: @property def is_beginning_of_side(self) -> bool: return self.start.is_beginning_of_side + + def is_matching_number(self, number: int) -> bool: + return self.start.number <= number <= self.end.number