Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP Add unaccented full text search configuration + search in annotation #1101

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 19 additions & 14 deletions docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,21 @@ This project adheres to `Semantic Versioning <http://semver.org/>`_.
Unreleased
==========

Fix
---
- Store refeneces to all subdirectories of the refenced files to the database

Changed
-------
- Bulk delete method silently ignores non-existent objects and objects without
edit permissions instead of raising an exception
- Use ``simple_unaccent`` full text search configuration instead of ``simple``

Added
-----
- Add unaccented full text search configuration ``simple_unaccent``

Changed
-------
- Use ``simple_unaccent`` full text search configuration instead of ``simple``
- Use ``simple_unaccent`` full text search configuration instead of ``simple``
- Create ``simple_unaccent`` full text search index in ``PostgreSQL``


===================
Expand All @@ -42,12 +49,12 @@ Changed
38.3.0 - 2024-01-11
===================

Fix
---
Fixed
-----
- Correctly clear ``Redis`` cache on data restart

Add
---
Added
-----
- Add ``clear_redis_cache`` management command
- Add modified field to the ``AnnotationValue`` model and expose it in API

Expand All @@ -56,8 +63,8 @@ Add
38.2.0 - 2023-12-15
===================

Fix
---
Fixed
-----
- Add default value for ``FLOW_PROCESSES_ALLOW_LIST`` and
``FLOW_PROCESSES_IGNORE_LIST`` in case of missing settings.
- User defined slug must not be changed
Expand Down Expand Up @@ -90,6 +97,7 @@ Changed
-------
- Redis cache in listener is updated when data fields are retrieved from the
database
- Bulk annotations on entity endpoint now accept field path instead of id

Added
-----
Expand All @@ -108,10 +116,6 @@ Fixed
- Already processed messages in listener are ignored for one day so messages
are not processed twice

Changed
-------
- Bulk annotations on entity endpoint now accept field path instead of id


===================
38.0.0 - 2023-11-13
Expand Down Expand Up @@ -158,6 +162,7 @@ Fixed
-----
- Set ``value`` to ``AnnotationValue`` object on duplication when it is created
- Send ``post_duplicate`` signal only on successful duplication

Changed
-------
- Simplify permission checks on ``AnontationValue`` endpoint
Expand Down
2 changes: 1 addition & 1 deletion resolwe/flow/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class TextFilterMixin:

def filter_text(self, queryset: QuerySet, name: str, value: str):
"""Full-text search."""
query = SearchQuery(value, config="simple")
query = SearchQuery(value, config="simple_unaccent")
return (
queryset.filter(**{name: query})
# This assumes that field is already a TextSearch vector and thus
Expand Down
24 changes: 23 additions & 1 deletion resolwe/flow/migrations/0001_squashed_0043_full_text_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@
import django.core.validators
import django.db.models.deletion
from django.conf import settings
from django.contrib.postgres.operations import CITextExtension, TrigramExtension
from django.contrib.postgres.operations import (
CITextExtension,
TrigramExtension,
UnaccentExtension,
)

from django.db import migrations, models

import resolwe.flow.models.fields
Expand All @@ -27,6 +32,23 @@ class Migration(migrations.Migration):
operations = [
CITextExtension(),
TrigramExtension(),
UnaccentExtension(),
migrations.RunSQL(
"""
DO
$$BEGIN
CREATE TEXT SEARCH CONFIGURATION simple_unaccent( COPY = simple );
EXCEPTION
WHEN unique_violation THEN
NULL; -- ignore error
END;$$;
"""
),
migrations.RunSQL(
"ALTER TEXT SEARCH CONFIGURATION simple_unaccent "
+ "ALTER MAPPING FOR hword, hword_part, word "
+ "WITH unaccent, simple;"
),
migrations.CreateModel(
name="Collection",
fields=[
Expand Down
13 changes: 13 additions & 0 deletions resolwe/flow/migrations/0022_add_unaccent_extension.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Generated by Django 4.2.9 on 2024-02-13 07:00

from django.db import migrations
from django.contrib.postgres.operations import UnaccentExtension


class Migration(migrations.Migration):

dependencies = [
("flow", "0021_annotationvalue_modified"),
]

operations = [UnaccentExtension()]
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 4.2.9 on 2024-02-13 07:01

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("flow", "0022_add_unaccent_extension"),
]

operations = [
migrations.RunSQL(
"""
DO
$$BEGIN
CREATE TEXT SEARCH CONFIGURATION simple_unaccent( COPY = simple );
EXCEPTION
WHEN unique_violation THEN
NULL; -- ignore error
END;$$;
"""
),
migrations.RunSQL(
"ALTER TEXT SEARCH CONFIGURATION simple_unaccent "
+ "ALTER MAPPING FOR hword, hword_part, word "
+ "WITH unaccent, simple;"
),
]
35 changes: 35 additions & 0 deletions resolwe/flow/migrations/0024_use_unaccent_full_text_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Generated by Django 4.2.9 on 2024-02-13 07:33

import os

from django.db import connection, migrations


def load_triggers(apps, schema_editor):
file_names = [
"utils.sql",
"triggers_collection.sql",
"triggers_entity.sql",
"triggers_data.sql",
]
with connection.cursor() as c:
for file_name in file_names:
file_path = os.path.join(os.path.dirname(__file__), file_name)
with open(file_path) as fh:
sql_statement = fh.read()
c.execute(sql_statement)


class Migration(migrations.Migration):

dependencies = [
("flow", "0023_create_unaccent_full_text_search_config"),
]

operations = [
migrations.RunPython(load_triggers),
# Update existing entries.
migrations.RunSQL("UPDATE flow_entity SET id=id;", migrations.RunSQL.noop),
migrations.RunSQL("UPDATE flow_collection SET id=id;", migrations.RunSQL.noop),
migrations.RunSQL("UPDATE flow_data SET id=id;", migrations.RunSQL.noop),
]
101 changes: 59 additions & 42 deletions resolwe/flow/migrations/triggers_collection.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,33 +30,33 @@ CREATE OR REPLACE FUNCTION generate_resolwe_collection_search(collection flow_co

SELECT
-- Collection name.
setweight(to_tsvector('simple', collection.name), 'A') ||
setweight(to_tsvector('simple', get_characters(collection.name)), 'B') ||
setweight(to_tsvector('simple', get_numbers(collection.name)), 'B') ||
setweight(to_tsvector('simple_unaccent', collection.name), 'A') ||
setweight(to_tsvector('simple_unaccent', get_characters(collection.name)), 'B') ||
setweight(to_tsvector('simple_unaccent', get_numbers(collection.name)), 'B') ||
-- Collection description.
setweight(to_tsvector('simple', collection.description), 'B') ||
setweight(to_tsvector('simple_unaccent', collection.description), 'B') ||
-- Contributor username.
setweight(to_tsvector('simple', contributor.usernames), 'B') ||
setweight(to_tsvector('simple', get_characters(contributor.usernames)), 'C') ||
setweight(to_tsvector('simple', get_numbers(contributor.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', contributor.usernames), 'B') ||
setweight(to_tsvector('simple_unaccent', get_characters(contributor.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', get_numbers(contributor.usernames)), 'C') ||
-- Contributor first name.
setweight(to_tsvector('simple', contributor.first_names), 'B') ||
setweight(to_tsvector('simple_unaccent', contributor.first_names), 'B') ||
-- Contributor last name.
setweight(to_tsvector('simple', contributor.last_names), 'B') ||
setweight(to_tsvector('simple_unaccent', contributor.last_names), 'B') ||
-- Owners usernames. There is no guarantee that it is not NULL.
setweight(to_tsvector('simple', COALESCE(owners.usernames, '')), 'B') ||
setweight(to_tsvector('simple', get_characters(owners.usernames)), 'C') ||
setweight(to_tsvector('simple', get_numbers(owners.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', COALESCE(owners.usernames, '')), 'B') ||
setweight(to_tsvector('simple_unaccent', get_characters(owners.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', get_numbers(owners.usernames)), 'C') ||
-- Owners first names. There is no guarantee that it is not NULL.
setweight(to_tsvector('simple', COALESCE(owners.first_names, '')), 'B') ||
setweight(to_tsvector('simple_unaccent', COALESCE(owners.first_names, '')), 'B') ||
-- Owners last names. There is no guarantee that it is not NULL.
setweight(to_tsvector('simple', COALESCE(owners.last_names, '')), 'B') ||
setweight(to_tsvector('simple_unaccent', COALESCE(owners.last_names, '')), 'B') ||
-- Collection tags.
setweight(to_tsvector('simple', array_to_string(collection.tags, ' ')), 'B') ||
setweight(to_tsvector('simple_unaccent', array_to_string(collection.tags, ' ')), 'B') ||
-- Collection descriptor.
setweight(to_tsvector('simple', flat_descriptor), 'C') ||
setweight(to_tsvector('simple', get_characters(flat_descriptor)), 'D') ||
setweight(to_tsvector('simple', get_numbers(flat_descriptor)), 'D')
setweight(to_tsvector('simple_unaccent', flat_descriptor), 'C') ||
setweight(to_tsvector('simple_unaccent', get_characters(flat_descriptor)), 'D') ||
setweight(to_tsvector('simple_unaccent', get_numbers(flat_descriptor)), 'D')

INTO search;

Expand All @@ -79,11 +79,14 @@ CREATE OR REPLACE FUNCTION collection_biut()
END;
$$;

CREATE TRIGGER collection_biut
BEFORE INSERT OR UPDATE
ON flow_collection
FOR EACH ROW EXECUTE PROCEDURE collection_biut();

DO $$ BEGIN
CREATE TRIGGER collection_biut
BEFORE INSERT OR UPDATE
ON flow_collection
FOR EACH ROW EXECUTE PROCEDURE collection_biut();
EXCEPTION
WHEN others THEN null;
END $$;

-- Trigger after update/insert/delete user permission object.
CREATE OR REPLACE FUNCTION handle_userpermission_collection(user_permission permissions_permissionmodel)
Expand Down Expand Up @@ -114,10 +117,14 @@ CREATE OR REPLACE FUNCTION userpermission_collection_aiut()
END;
$$;

CREATE TRIGGER userpermission_collection_aiut
AFTER INSERT OR UPDATE
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_collection_aiut();
DO $$ BEGIN
CREATE TRIGGER userpermission_collection_aiut
AFTER INSERT OR UPDATE
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_collection_aiut();
EXCEPTION
WHEN others THEN null;
END $$;

CREATE OR REPLACE FUNCTION userpermission_collection_adt()
RETURNS TRIGGER
Expand All @@ -129,12 +136,15 @@ CREATE OR REPLACE FUNCTION userpermission_collection_adt()
END;
$$;

CREATE TRIGGER userpermission_collection_adt
AFTER DELETE
-- ON guardian_userobjectpermission
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_collection_adt();

DO $$ BEGIN
CREATE TRIGGER userpermission_collection_adt
AFTER DELETE
-- ON guardian_userobjectpermission
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_collection_adt();
EXCEPTION
WHEN others THEN null;
END $$;

-- Trigger after update contributor.
CREATE OR REPLACE FUNCTION collection_contributor_aut()
Expand All @@ -149,11 +159,14 @@ CREATE OR REPLACE FUNCTION collection_contributor_aut()
END;
$$;

CREATE TRIGGER collection_contributor_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE collection_contributor_aut();

DO $$ BEGIN
CREATE TRIGGER collection_contributor_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE collection_contributor_aut();
EXCEPTION
WHEN others THEN null;
END $$;

-- Trigger after update owner.
CREATE OR REPLACE FUNCTION collection_owner_aut()
Expand All @@ -178,7 +191,11 @@ CREATE OR REPLACE FUNCTION collection_owner_aut()
END;
$$;

CREATE TRIGGER collection_owner_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE collection_owner_aut();
DO $$ BEGIN
CREATE TRIGGER collection_owner_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE collection_owner_aut();
EXCEPTION
WHEN others THEN null;
END $$;
Loading
Loading