diff --git a/async_api/src/models/person.py b/async_api/src/models/person.py index 4e9696f..82ee446 100644 --- a/async_api/src/models/person.py +++ b/async_api/src/models/person.py @@ -1,9 +1,11 @@ from models.base import UUIDBase -class PersonFilmRoles(UUIDBase): - """Модель для хранения информации о ролях актёра в фильме.""" +class PersonFilm(UUIDBase): + """Модель для хранения информации о фильме, в котором участвовал актёр.""" + title: str + imdb_rating: float roles: list[str] @@ -11,4 +13,4 @@ class Person(UUIDBase): """Модель для хранения информации об актёре.""" full_name: str - films: list[PersonFilmRoles] + films: list[PersonFilm] diff --git a/async_api/tests/test_services/test_person_service.py b/async_api/tests/test_services/test_person_service.py index b218f8b..bebef07 100644 --- a/async_api/tests/test_services/test_person_service.py +++ b/async_api/tests/test_services/test_person_service.py @@ -23,8 +23,14 @@ async def test_get_person_by_id(person_service: PersonService): assert person.id == george_lucas_id assert person.full_name == "George Lucas" assert len(person.films) == 46 - assert person.films[0].id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7") - assert person.films[0].roles == ["director", "writer"] + person_film = next( + (film for film in person.films if film.id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7")), + None, + ) + assert person_film is not None + assert person_film.title == "Star Wars: Episode III - Revenge of the Sith" + assert person_film.imdb_rating == 7.5 + assert set(person_film.roles) == {"director", "writer"} async def test_search_persons(person_service: PersonService): @@ -50,8 +56,15 @@ async def test_search_persons_with_query(person_service: PersonService): # Assert person = persons[0] + assert person is not None assert person.id == george_lucas_id assert person.full_name == "George Lucas" assert len(person.films) == 46 - assert person.films[0].id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7") - assert person.films[0].roles == ["director", "writer"] + person_film = next( + (film for film in person.films if film.id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7")), + None, + ) + assert person_film is not None + assert person_film.title == "Star Wars: Episode III - Revenge of the Sith" + assert person_film.imdb_rating == 7.5 + assert set(person_film.roles) == {"director", "writer"} diff --git a/etl/.env.docker b/etl/.env.docker index 17e91d6..1eee1d8 100644 --- a/etl/.env.docker +++ b/etl/.env.docker @@ -1,3 +1,6 @@ POSTGRES_DSN=postgres://app:123qwe@postgres:5432/movies_database ELASTICSEARCH_HOST=http://elasticsearch:9200 STATE_FILE_PATH=/data/state.json +PERSONS_PER_RUN=10000 +GENRES_PER_RUN=10000 +FILM_WORKS_PER_RUN=10000 diff --git a/etl/etl/dto/elasticsearch.py b/etl/etl/dto/elasticsearch.py index 53c9a3d..a5a8565 100644 --- a/etl/etl/dto/elasticsearch.py +++ b/etl/etl/dto/elasticsearch.py @@ -4,7 +4,7 @@ __all__ = [ "BaseElasticsearchRecord", "PersonMinimalElasticsearchRecord", - "FilmWorkMinimalElasticsearchRecord", + "PersonFilmWorkElasticsearchRecord", "GenreElasticsearchRecord", "PersonElasticsearchRecord", "FilmWorkElasticsearchRecord", @@ -26,9 +26,13 @@ class PersonMinimalElasticsearchRecord(BaseElasticsearchRecord): @dataclass -class FilmWorkMinimalElasticsearchRecord(BaseElasticsearchRecord): - """Модель для хранения краткой информации о кинопроизведении в индексе Elasticsearch.""" +class PersonFilmWorkElasticsearchRecord(BaseElasticsearchRecord): + """Модель для хранения информации о кинопроизведении, + в котором участвовала персона, в индексе Elasticsearch. + """ + title: str + imdb_rating: float roles: list[str] @@ -45,7 +49,7 @@ class PersonElasticsearchRecord(BaseElasticsearchRecord): """Модель для хранения информации о персоне в индексе Elasticsearch.""" full_name: str - films: list[FilmWorkMinimalElasticsearchRecord] + films: list[PersonFilmWorkElasticsearchRecord] @dataclass diff --git a/etl/etl/dto/postgres.py b/etl/etl/dto/postgres.py index 653a083..32a7f64 100644 --- a/etl/etl/dto/postgres.py +++ b/etl/etl/dto/postgres.py @@ -64,6 +64,8 @@ class PersonFilmWorkRecord: person_id: UUID film_work_id: UUID + title: str + rating: float roles: list[str] diff --git a/etl/etl/extractor.py b/etl/etl/extractor.py index 0c9c1a2..a31560f 100644 --- a/etl/etl/extractor.py +++ b/etl/etl/extractor.py @@ -255,10 +255,13 @@ def fetch_persons_film_works( SELECT person_id, film_work_id, + title, + rating, array_agg(role) AS roles - FROM content.person_film_work + FROM content.person_film_work pfw + LEFT JOIN content.film_work fw ON fw.id = pfw.film_work_id WHERE person_id = ANY(%(persons_ids)s) - GROUP BY (person_id, film_work_id) + GROUP BY (person_id, film_work_id, title, rating) """, ({"persons_ids": [person.id for person in persons]}), ) diff --git a/etl/etl/transformer.py b/etl/etl/transformer.py index dfd8271..7a5f661 100644 --- a/etl/etl/transformer.py +++ b/etl/etl/transformer.py @@ -84,7 +84,12 @@ def build_persons_elasticsearch_records( id=person_info.id, full_name=person_info.full_name, films=[ - dto.FilmWorkMinimalElasticsearchRecord(id=film.film_work_id, roles=film.roles) + dto.PersonFilmWorkElasticsearchRecord( + id=film.film_work_id, + title=film.title, + imdb_rating=film.rating, + roles=film.roles, + ) for film in films_by_person_id[person_info.id] ], ) diff --git a/infra/elasticsearch/indexes/persons.json b/infra/elasticsearch/indexes/persons.json index 8db514c..23bf8d0 100644 --- a/infra/elasticsearch/indexes/persons.json +++ b/infra/elasticsearch/indexes/persons.json @@ -55,6 +55,12 @@ "id": { "type": "keyword" }, + "title": { + "type": "text" + }, + "imdb_rating": { + "type": "float" + }, "roles": { "type": "keyword" }