From 7b8a83e9e416c66681057523c97cb6d5400854bc Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 25 Jan 2023 15:36:29 +0100 Subject: [PATCH] [#5199] Optimize /project/children endpoint # Counting children This was slow because we made a query for each project. Now we get the children and grand children. A simple dict with each project and a list of their children is then built in python. **Side effect** Paging doesn't work anymore as the queryset includes grand children. Reimplementing it can be done another time if we really do have projects with enormous hierarchies. # Parent retrieval We already know who the parent is since we're getting its children --> no need to make another query. # Locations We weren't retrieving related objects, which we now do. Additionally, we iterated over the list of countries twice when one iteration was enough. --- akvo/rest/serializers/project.py | 54 ++++++++++++------- akvo/rest/views/project.py | 35 +++++++++--- .../rsr/spa/app/modules/hierarchy/services.js | 10 ++-- akvo/rsr/tests/rest/test_project_hierarchy.py | 11 ++-- 4 files changed, 71 insertions(+), 39 deletions(-) diff --git a/akvo/rest/serializers/project.py b/akvo/rest/serializers/project.py index 313eb499a9..97560d7e01 100644 --- a/akvo/rest/serializers/project.py +++ b/akvo/rest/serializers/project.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- - +import functools # Akvo RSR is covered by the GNU Affero General Public License. # See more details in the license.txt file located at the root folder of the Akvo RSR module. # For additional details on the GNU license please see < http://www.gnu.org/licenses/agpl.html >. from datetime import timedelta import logging -from typing import List +from typing import List, Optional from django.conf import settings from django.utils.timezone import now @@ -358,26 +358,44 @@ class ProjectMetadataSerializer(BaseRSRSerializer): children_count = serializers.SerializerMethodField() def get_children_count(self, obj): - return obj.children().count() + if parents_to_children := self.context.get("parents_to_children"): + return len(parents_to_children.get(obj.uuid, [])) + else: + return obj.children().count() def get_locations(self, obj): - countries = {location.country for location in obj.locations.all() if location.country} - return [ - {'country': c.name, 'iso_code': c.iso_code} - for c - in countries - ] + countries = set() + results = [] + for location in obj.locations.all(): + country = location.country + if not country or country in countries: + continue + countries.add(country) + results.append({ + "country": country.name, + "iso_code": country.iso_code, + }) + return results def get_parent(self, obj): - p = obj.parent() - user = self.context['request'].user - if not user.can_view_project(p): - return None - return ( - {'id': p.id, 'title': p.title, 'is_lead': p.is_hierarchy_root()} - if p is not None - else None - ) + if "parent" in self.context: + p = self.context.get("parent") + else: + p = obj.parent() + user = self.context['request'].user + if not user.can_view_project(p): + return None + return self._parent_to_dict(p) + + @staticmethod + @functools.lru_cache + def _parent_to_dict(parent: Project) -> Optional[dict]: + if parent is not None: + return { + 'id': parent.id, + 'title': parent.title, + 'is_lead': parent.is_hierarchy_root() + } def get_editable(self, obj): """Method used by the editable SerializerMethodField""" diff --git a/akvo/rest/views/project.py b/akvo/rest/views/project.py index 0ffd1936d4..498d2b208a 100644 --- a/akvo/rest/views/project.py +++ b/akvo/rest/views/project.py @@ -6,6 +6,8 @@ """ from datetime import timedelta +from typing import Dict, List +from uuid import UUID from django.conf import settings from django.db.models import Count, Q @@ -104,22 +106,39 @@ def external_project(self, request, **kwargs): def children(self, request, **kwargs): project = self.get_object() - queryset = self._filter_queryset(project.children().select_related( + queryset = self._filter_queryset(project.descendants(max_depth=2, with_self=False).select_related( "primary_organisation", + "projecthierarchy", + "publishingstatus", ).prefetch_related( - "locations", + "locations__country", "recipient_countries", "sectors", + "projectrole_set", )) - page = self.paginate_queryset(queryset) - + descendants = list(queryset) serializer_context = self.get_serializer_context() - if page is not None: - serializer = ProjectMetadataSerializer(page, many=True, context=serializer_context) - return self.get_paginated_response(serializer.data) + # Optimization to count the children + # Collect the children of each project in a list + # The alternative is a subquery, but that's extra slow due to lacking psql indices + parent_to_children: Dict[UUID, List[Project]] = dict() + children: List[Project] = [] + parent_uuid = project.uuid + for descendant in descendants: + if not (curr_parent_uuid := descendant.get_parent_uuid()): + continue + parent_to_children.setdefault(curr_parent_uuid, []).append(descendant) + + if curr_parent_uuid == parent_uuid: + children.append(descendant) + + serializer_context["parents_to_children"] = parent_to_children + + # optimization for ProjectMetadataSerializer.get_parent + serializer_context["parent"] = project - serializer = ProjectMetadataSerializer(queryset, many=True, context=serializer_context).data + serializer = ProjectMetadataSerializer(children, many=True, context=serializer_context) return Response(serializer.data) @action( diff --git a/akvo/rsr/spa/app/modules/hierarchy/services.js b/akvo/rsr/spa/app/modules/hierarchy/services.js index 0d722e2c63..bdfd70fa0c 100644 --- a/akvo/rsr/spa/app/modules/hierarchy/services.js +++ b/akvo/rsr/spa/app/modules/hierarchy/services.js @@ -1,13 +1,9 @@ import api from '../../utils/api' import { getProjectUuids } from '../../utils/misc' -export const getChildrenApi = async (id, page = 1) => { - const response = await api.get(`/project/${id}/children/?format=json&page=${page}`) - const { results, next } = response.data - if (next) { - return results?.concat(await getChildrenApi(id, page + 1)) - } - return results +export const getChildrenApi = async (id) => { + const response = await api.get(`/project/${id}/children/?format=json`) + return response.data } export const getProgramApi = (id, successCallback, errorCallback) => { diff --git a/akvo/rsr/tests/rest/test_project_hierarchy.py b/akvo/rsr/tests/rest/test_project_hierarchy.py index b30f87bffb..e1bac34996 100644 --- a/akvo/rsr/tests/rest/test_project_hierarchy.py +++ b/akvo/rsr/tests/rest/test_project_hierarchy.py @@ -88,23 +88,22 @@ def test_project_access_check(self): self.assertFalse(self.user.has_perm('rsr.view_project', self.p5)) self.assertFalse(self.user.has_perm('rsr.view_project', self.p6)) - def assertReturnsProjects(self, response, project_set): - results = response.data["results"] - self.assertEqual(len(project_set), response.data["count"]) + def assertReturnsProjects(self, results, count, project_set): + self.assertEqual(len(project_set), count) self.assertEqual(project_set, {p['id'] for p in results}) self.assertFalse(results[0]['editable']) def test_fetch_root_projects(self): response = self.c.get('/rest/v1/program/?format=json', follow=True) - self.assertReturnsProjects(response, {self.p1.id, self.p5.id}) + self.assertReturnsProjects(response.data["results"], response.data["count"], {self.p1.id, self.p5.id}) def test_get_program_children(self): response = self.c.get(f'/rest/v1/project/{self.p1.id}/children?format=json', follow=True) - self.assertReturnsProjects(response, {self.p2.id}) + self.assertReturnsProjects(response.data, len(response.data), {self.p2.id}) def test_get_project_children(self): response = self.c.get(f'/rest/v1/project/{self.p2.id}/children?format=json', follow=True) - self.assertReturnsProjects(response, {self.p3.id}) + self.assertReturnsProjects(response.data, len(response.data), {self.p3.id}) class RawProjectHierarchyTestCase(BaseTestCase):