Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complement person films info #21

Merged
merged 4 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions async_api/src/models/person.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from models.base import UUIDBase


class PersonFilmRoles(UUIDBase):
"""Модель для хранения информации о ролях актёра в фильме."""
class PersonFilm(UUIDBase):
"""Модель для хранения информации о фильме, в котором участвовал актёр."""

title: str
imdb_rating: float
roles: list[str]


class Person(UUIDBase):
"""Модель для хранения информации об актёре."""

full_name: str
films: list[PersonFilmRoles]
films: list[PersonFilm]
21 changes: 17 additions & 4 deletions async_api/tests/test_services/test_person_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,14 @@ async def test_get_person_by_id(person_service: PersonService):
assert person.id == george_lucas_id
assert person.full_name == "George Lucas"
assert len(person.films) == 46
assert person.films[0].id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7")
assert person.films[0].roles == ["director", "writer"]
person_film = next(
(film for film in person.films if film.id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7")),
None,
)
assert person_film is not None
assert person_film.title == "Star Wars: Episode III - Revenge of the Sith"
assert person_film.imdb_rating == 7.5
assert set(person_film.roles) == {"director", "writer"}


async def test_search_persons(person_service: PersonService):
Expand All @@ -50,8 +56,15 @@ async def test_search_persons_with_query(person_service: PersonService):

# Assert
person = persons[0]
assert person is not None
assert person.id == george_lucas_id
assert person.full_name == "George Lucas"
assert len(person.films) == 46
assert person.films[0].id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7")
assert person.films[0].roles == ["director", "writer"]
person_film = next(
(film for film in person.films if film.id == UUID("516f91da-bd70-4351-ba6d-25e16b7713b7")),
None,
)
assert person_film is not None
assert person_film.title == "Star Wars: Episode III - Revenge of the Sith"
assert person_film.imdb_rating == 7.5
assert set(person_film.roles) == {"director", "writer"}
3 changes: 3 additions & 0 deletions etl/.env.docker
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
POSTGRES_DSN=postgres://app:123qwe@postgres:5432/movies_database
ELASTICSEARCH_HOST=http://elasticsearch:9200
STATE_FILE_PATH=/data/state.json
PERSONS_PER_RUN=10000
GENRES_PER_RUN=10000
FILM_WORKS_PER_RUN=10000
12 changes: 8 additions & 4 deletions etl/etl/dto/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
__all__ = [
"BaseElasticsearchRecord",
"PersonMinimalElasticsearchRecord",
"FilmWorkMinimalElasticsearchRecord",
"PersonFilmWorkElasticsearchRecord",
"GenreElasticsearchRecord",
"PersonElasticsearchRecord",
"FilmWorkElasticsearchRecord",
Expand All @@ -26,9 +26,13 @@ class PersonMinimalElasticsearchRecord(BaseElasticsearchRecord):


@dataclass
class FilmWorkMinimalElasticsearchRecord(BaseElasticsearchRecord):
"""Модель для хранения краткой информации о кинопроизведении в индексе Elasticsearch."""
class PersonFilmWorkElasticsearchRecord(BaseElasticsearchRecord):
"""Модель для хранения информации о кинопроизведении,
в котором участвовала персона, в индексе Elasticsearch.
"""

title: str
imdb_rating: float
roles: list[str]


Expand All @@ -45,7 +49,7 @@ class PersonElasticsearchRecord(BaseElasticsearchRecord):
"""Модель для хранения информации о персоне в индексе Elasticsearch."""

full_name: str
films: list[FilmWorkMinimalElasticsearchRecord]
films: list[PersonFilmWorkElasticsearchRecord]


@dataclass
Expand Down
2 changes: 2 additions & 0 deletions etl/etl/dto/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ class PersonFilmWorkRecord:

person_id: UUID
film_work_id: UUID
title: str
rating: float
roles: list[str]


Expand Down
7 changes: 5 additions & 2 deletions etl/etl/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,13 @@ def fetch_persons_film_works(
SELECT
person_id,
film_work_id,
title,
rating,
array_agg(role) AS roles
FROM content.person_film_work
FROM content.person_film_work pfw
LEFT JOIN content.film_work fw ON fw.id = pfw.film_work_id
WHERE person_id = ANY(%(persons_ids)s)
GROUP BY (person_id, film_work_id)
GROUP BY (person_id, film_work_id, title, rating)
""",
({"persons_ids": [person.id for person in persons]}),
)
Expand Down
7 changes: 6 additions & 1 deletion etl/etl/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ def build_persons_elasticsearch_records(
id=person_info.id,
full_name=person_info.full_name,
films=[
dto.FilmWorkMinimalElasticsearchRecord(id=film.film_work_id, roles=film.roles)
dto.PersonFilmWorkElasticsearchRecord(
id=film.film_work_id,
title=film.title,
imdb_rating=film.rating,
roles=film.roles,
)
for film in films_by_person_id[person_info.id]
],
)
Expand Down
6 changes: 6 additions & 0 deletions infra/elasticsearch/indexes/persons.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@
"id": {
"type": "keyword"
},
"title": {
"type": "text"
},
"imdb_rating": {
"type": "float"
},
"roles": {
"type": "keyword"
}
Expand Down
Loading