Skip to content

Commit

Permalink
gps: Improve user autocomplete
Browse files Browse the repository at this point in the history
Use psql full-text with a generated search vector field and an index on
it
  • Loading branch information
tonial committed Jan 13, 2025
1 parent 41cae5f commit a73e437
Show file tree
Hide file tree
Showing 33 changed files with 393 additions and 34 deletions.
22 changes: 22 additions & 0 deletions itou/users/migrations/0020_add_simple_unaccent_search_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 5.1.4 on 2025-01-10 09:31

from django.db import migrations


class Migration(migrations.Migration):
dependencies = [
("users", "0019_drop_jobseekerprofile_ata_allocation_since"),
]

operations = [
migrations.RunSQL(
"CREATE TEXT SEARCH CONFIGURATION simple_unaccent (COPY=simple);",
reverse_sql="DROP TEXT SEARCH CONFIGURATION simple_unaccent;",
),
migrations.RunSQL(
"ALTER TEXT SEARCH CONFIGURATION simple_unaccent "
"ALTER MAPPING FOR hword, hword_part, word "
"WITH unaccent,simple;",
reverse_sql=migrations.RunSQL.noop,
),
]
26 changes: 26 additions & 0 deletions itou/users/migrations/0021_user_full_name_search_vector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Generated by Django 5.1.4 on 2025-01-10 14:38

import django.contrib.postgres.lookups
import django.contrib.postgres.search
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("users", "0020_add_simple_unaccent_search_config"),
]

operations = [
migrations.AddField(
model_name="user",
name="full_name_search_vector",
field=models.GeneratedField(
db_persist=True,
expression=django.contrib.postgres.search.SearchVector(
"first_name", "last_name", config="simple_unaccent"
),
output_field=django.contrib.postgres.search.SearchVectorField(),
verbose_name="nom complet utilisé pour rechercher un utilisateur",
),
),
]
22 changes: 22 additions & 0 deletions itou/users/migrations/0022_user_full_name_search_idx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 5.1.4 on 2025-01-10 14:38

import django.contrib.postgres.indexes
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("auth", "0012_alter_user_first_name_max_length"),
("cities", "0001_initial"),
("users", "0021_user_full_name_search_vector"),
]

operations = [
migrations.AddIndex(
model_name="user",
index=django.contrib.postgres.indexes.GinIndex(
models.F("full_name_search_vector"),
name="full_name_search_idx",
),
),
]
12 changes: 10 additions & 2 deletions itou/users/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from citext import CIEmailField
from django.conf import settings
from django.contrib.auth.models import AbstractUser, UserManager
from django.contrib.postgres.indexes import OpClass
from django.contrib.postgres.indexes import GinIndex, OpClass
from django.contrib.postgres.search import SearchVector, SearchVectorField
from django.core.exceptions import ValidationError
from django.core.serializers.json import DjangoJSONEncoder
from django.core.validators import MaxLengthValidator, MinLengthValidator, RegexValidator
Expand Down Expand Up @@ -170,6 +171,12 @@ class User(AbstractUser, AddressMixin):
default="",
choices=Title.choices,
)
full_name_search_vector = models.GeneratedField(
expression=SearchVector("first_name", "last_name", config="simple_unaccent"),
output_field=SearchVectorField(),
verbose_name="nom complet utilisé pour rechercher un utilisateur",
db_persist=True,
)

email = CIEmailField(
"adresse e-mail",
Expand Down Expand Up @@ -231,7 +238,8 @@ class Meta(AbstractUser.Meta):
models.Index(
OpClass(Upper("email"), name="text_pattern_ops"),
name="users_user_email_upper",
)
),
GinIndex("full_name_search_vector", name="full_name_search_idx"),
]
constraints = [
models.CheckConstraint(
Expand Down
48 changes: 23 additions & 25 deletions itou/www/autocomplete/views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import datetime

from django.contrib.auth.decorators import login_not_required
from django.contrib.postgres.search import SearchQuery, SearchRank
from django.db.models import Exists, F, OuterRef, Q, Value
from django.db.models.functions import Least, Lower, NullIf, StrIndex
from django.http import JsonResponse
Expand All @@ -13,7 +14,6 @@
from itou.users.enums import UserKind
from itou.users.models import User
from itou.utils.auth import check_user
from itou.utils.db import or_queries
from itou.www.gps.views import is_allowed_to_use_gps_advanced_features


Expand Down Expand Up @@ -141,37 +141,35 @@ def gps_users_autocomplete(request):
users = []

if term:
# We started by using to_vector queries but it's not suitable for searching names because
# it tries to lemmatize names so for example, henry becomes henri after lemmatization and
# the search doesn't work.
# Then we tried TrigramSimilarity methods but it's too random for accurate searching.
# Fallback to unaccent / icontains for now

search_terms = term.split(" ")
name_q = []
for term in search_terms:
name_q.append(Q(first_name__unaccent__istartswith=term))
name_q.append(Q(last_name__unaccent__istartswith=term))
users_qs = (
User.objects.filter(or_queries(name_q))
.filter(kind=UserKind.JOB_SEEKER)
.exclude(
Exists(
FollowUpGroup.objects.filter(
beneficiary_id=OuterRef("pk"),
memberships__member=current_user,
memberships__is_active=True,
)
users_qs = User.objects.filter(kind=UserKind.JOB_SEEKER).exclude(
Exists(
FollowUpGroup.objects.filter(
beneficiary_id=OuterRef("pk"),
memberships__member=current_user,
memberships__is_active=True,
)
)
)[:10]
)

search_query = SearchQuery(term, config="simple_unaccent")
users_qs = users_qs.filter(full_name_search_vector=search_query)
users_qs = users_qs.annotate(rank=SearchRank("full_name_search_vector", search_query)).order_by("-rank")

def format_user_name(user):
res = ""
if user.title:
res += f"{user.title.capitalize()}. "
res += user.get_full_name()
if getattr(user.jobseeker_profile, "birthdate", None):
res += f" ({user.jobseeker_profile.birthdate})"
return res

users = [
{
"text": user.get_full_name(),
"text": format_user_name(user),
"id": user.pk,
}
for user in users_qs
for user in users_qs[:10]
]

return JsonResponse({"results": users})
2 changes: 2 additions & 0 deletions tests/api/__snapshots__/test_geiq.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down Expand Up @@ -510,6 +511,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down
2 changes: 2 additions & 0 deletions tests/api/employee_record_api/__snapshots__/tests.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down Expand Up @@ -344,6 +345,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down
1 change: 1 addition & 0 deletions tests/companies/__snapshots__/test_admin.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down
5 changes: 5 additions & 0 deletions tests/gps/__snapshots__/test_views.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down Expand Up @@ -376,6 +377,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down Expand Up @@ -485,6 +487,7 @@
T6."ban_api_resolved_address",
T6."insee_city_id",
T6."title",
T6."full_name_search_vector",
T6."email",
T6."phone",
T6."kind",
Expand Down Expand Up @@ -517,6 +520,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down Expand Up @@ -622,6 +626,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down
18 changes: 11 additions & 7 deletions tests/gps/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,23 @@ def test_user_autocomplete(client):
FollowUpGroupFactory(beneficiary=third_beneficiary, memberships=3, memberships__member=prescriber)
FollowUpGroupFactory(beneficiary=second_beneficiary, memberships=2)

def get_autocomplete_results(user):
def get_autocomplete_results(user, term="gps"):
client.force_login(user)
response = client.get(reverse("autocomplete:gps_users") + "?term=gps")
return set(r["id"] for r in response.json()["results"])
response = client.get(reverse("autocomplete:gps_users") + f"?term={term}")
return [r["id"] for r in response.json()["results"]]

# Employers should get the 3 job seekers.
results = get_autocomplete_results(EmployerFactory(with_company=True))
assert results == {first_beneficiary.pk, second_beneficiary.pk, third_beneficiary.pk}
assert set(results) == {first_beneficiary.pk, second_beneficiary.pk, third_beneficiary.pk}

# Authorized prescribers should get the 3 job seekers.
org = PrescriberOrganizationWithMembershipFactory(authorized=True)
results = get_autocomplete_results(org.members.get())
assert results == {first_beneficiary.pk, second_beneficiary.pk, third_beneficiary.pk}
assert set(results) == {first_beneficiary.pk, second_beneficiary.pk, third_beneficiary.pk}

# We should not get ourself nor the first and third user user because we are a member of their group
results = get_autocomplete_results(prescriber)
assert results == {second_beneficiary.pk}
assert set(results) == {second_beneficiary.pk}

# Now, if we remove the first user from our group by setting the membership to is_active False
# The autocomplete should return it again
Expand All @@ -73,7 +73,11 @@ def get_autocomplete_results(user):
# We should not get ourself but we should get the first beneficiary (we are is_active=False)
# and the second one (we are not part of his group)
results = get_autocomplete_results(prescriber)
assert results == {first_beneficiary.pk, second_beneficiary.pk}
assert set(results) == {first_beneficiary.pk, second_beneficiary.pk}

# with "martin gps" Martin is the only match
results = get_autocomplete_results(prescriber, term="martin gps")
assert results == [second_beneficiary.pk]


@pytest.mark.parametrize(
Expand Down
2 changes: 2 additions & 0 deletions tests/job_applications/__snapshots__/test_transfer.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down Expand Up @@ -184,6 +185,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down
5 changes: 5 additions & 0 deletions tests/users/__snapshots__/test_admin_views.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down Expand Up @@ -97,6 +98,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down Expand Up @@ -287,6 +289,7 @@
"users_user"."ban_api_resolved_address",
"users_user"."insee_city_id",
"users_user"."title",
"users_user"."full_name_search_vector",
"users_user"."email",
"users_user"."phone",
"users_user"."kind",
Expand Down Expand Up @@ -377,6 +380,7 @@
T3."ban_api_resolved_address",
T3."insee_city_id",
T3."title",
T3."full_name_search_vector",
T3."email",
T3."phone",
T3."kind",
Expand Down Expand Up @@ -467,6 +471,7 @@
T3."ban_api_resolved_address",
T3."insee_city_id",
T3."title",
T3."full_name_search_vector",
T3."email",
T3."phone",
T3."kind",
Expand Down
Loading

0 comments on commit a73e437

Please sign in to comment.