Skip to content

Commit

Permalink
Complement person films info (#21)
Browse files Browse the repository at this point in the history
* Tests for genre service

* Add minimal person service

* Add title and rating to ETL for person films

* Add title and rating to models
  • Loading branch information
a1d4r authored Mar 22, 2024
1 parent 273b36d commit 8061381
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 14 deletions.
8 changes: 5 additions & 3 deletions async_api/src/models/person.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from models.base import UUIDBase


class PersonFilmRoles(UUIDBase):
"""Модель для хранения информации о ролях актёра в фильме."""
class PersonFilm(UUIDBase):
"""Модель для хранения информации о фильме, в котором участвовал актёр."""

title: str
imdb_rating: float
roles: list[str]


class Person(UUIDBase):
"""Модель для хранения информации об актёре."""

full_name: str
films: list[PersonFilmRoles]
films: list[PersonFilm]
21 changes: 17 additions & 4 deletions async_api/tests/test_services/test_person_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,14 @@ async def test_get_person_by_id(person_service: PersonService):
assert person.id == george_lucas_id
assert person.full_name == "George Lucas"
assert len(person.films) == 46
assert person.films[0].id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7")
assert person.films[0].roles == ["director", "writer"]
person_film = next(
(film for film in person.films if film.id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7")),
None,
)
assert person_film is not None
assert person_film.title == "Star Wars: Episode III - Revenge of the Sith"
assert person_film.imdb_rating == 7.5
assert set(person_film.roles) == {"director", "writer"}


async def test_search_persons(person_service: PersonService):
Expand All @@ -50,8 +56,15 @@ async def test_search_persons_with_query(person_service: PersonService):

# Assert
person = persons[0]
assert person is not None
assert person.id == george_lucas_id
assert person.full_name == "George Lucas"
assert len(person.films) == 46
assert person.films[0].id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7")
assert person.films[0].roles == ["director", "writer"]
person_film = next(
(film for film in person.films if film.id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7")),
None,
)
assert person_film is not None
assert person_film.title == "Star Wars: Episode III - Revenge of the Sith"
assert person_film.imdb_rating == 7.5
assert set(person_film.roles) == {"director", "writer"}
3 changes: 3 additions & 0 deletions etl/.env.docker
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
POSTGRES_DSN=postgres://app:123qwe@postgres:5432/movies_database
ELASTICSEARCH_HOST=http://elasticsearch:9200
STATE_FILE_PATH=/data/state.json
PERSONS_PER_RUN=10000
GENRES_PER_RUN=10000
FILM_WORKS_PER_RUN=10000
12 changes: 8 additions & 4 deletions etl/etl/dto/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
__all__ = [
"BaseElasticsearchRecord",
"PersonMinimalElasticsearchRecord",
"FilmWorkMinimalElasticsearchRecord",
"PersonFilmWorkElasticsearchRecord",
"GenreElasticsearchRecord",
"PersonElasticsearchRecord",
"FilmWorkElasticsearchRecord",
Expand All @@ -26,9 +26,13 @@ class PersonMinimalElasticsearchRecord(BaseElasticsearchRecord):


@dataclass
class FilmWorkMinimalElasticsearchRecord(BaseElasticsearchRecord):
"""Модель для хранения краткой информации о кинопроизведении в индексе Elasticsearch."""
class PersonFilmWorkElasticsearchRecord(BaseElasticsearchRecord):
"""Модель для хранения информации о кинопроизведении,
в котором участвовала персона, в индексе Elasticsearch.
"""

title: str
imdb_rating: float
roles: list[str]


Expand All @@ -45,7 +49,7 @@ class PersonElasticsearchRecord(BaseElasticsearchRecord):
"""Модель для хранения информации о персоне в индексе Elasticsearch."""

full_name: str
films: list[FilmWorkMinimalElasticsearchRecord]
films: list[PersonFilmWorkElasticsearchRecord]


@dataclass
Expand Down
2 changes: 2 additions & 0 deletions etl/etl/dto/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ class PersonFilmWorkRecord:

person_id: UUID
film_work_id: UUID
title: str
rating: float
roles: list[str]


Expand Down
7 changes: 5 additions & 2 deletions etl/etl/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,13 @@ def fetch_persons_film_works(
SELECT
person_id,
film_work_id,
title,
rating,
array_agg(role) AS roles
FROM content.person_film_work
FROM content.person_film_work pfw
LEFT JOIN content.film_work fw ON fw.id = pfw.film_work_id
WHERE person_id = ANY(%(persons_ids)s)
GROUP BY (person_id, film_work_id)
GROUP BY (person_id, film_work_id, title, rating)
""",
({"persons_ids": [person.id for person in persons]}),
)
Expand Down
7 changes: 6 additions & 1 deletion etl/etl/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ def build_persons_elasticsearch_records(
id=person_info.id,
full_name=person_info.full_name,
films=[
dto.FilmWorkMinimalElasticsearchRecord(id=film.film_work_id, roles=film.roles)
dto.PersonFilmWorkElasticsearchRecord(
id=film.film_work_id,
title=film.title,
imdb_rating=film.rating,
roles=film.roles,
)
for film in films_by_person_id[person_info.id]
],
)
Expand Down
6 changes: 6 additions & 0 deletions infra/elasticsearch/indexes/persons.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@
"id": {
"type": "keyword"
},
"title": {
"type": "text"
},
"imdb_rating": {
"type": "float"
},
"roles": {
"type": "keyword"
}
Expand Down

0 comments on commit 8061381

Please sign in to comment.