Skip to content

Commit

Permalink
Add full text search index by entity annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
gregorjerse committed Feb 14, 2024
1 parent f57ac26 commit 81d8454
Show file tree
Hide file tree
Showing 7 changed files with 260 additions and 134 deletions.
6 changes: 6 additions & 0 deletions docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ Added
-----
- Add unaccented full text search configuration ``simple_unaccent``

Changed
-------
- Use ``simple_unaccent`` full text search configuration instead of ``simple``
- Use ``simple_unaccent`` full text search configuration instead of ``simple``
- Create ``simple_unaccent`` full text search index in ``PostgreSQL``


===================
38.3.2 - 2024-01-25
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,15 @@ class Migration(migrations.Migration):

operations = [
migrations.RunSQL(
"CREATE TEXT SEARCH CONFIGURATION simple_unaccent( COPY = simple );"
"""
DO
$$BEGIN
CREATE TEXT SEARCH CONFIGURATION simple_unaccent( COPY = simple );
EXCEPTION
WHEN unique_violation THEN
NULL; -- ignore error
END;$$;
"""
),
migrations.RunSQL(
"ALTER TEXT SEARCH CONFIGURATION simple_unaccent "
Expand Down
35 changes: 35 additions & 0 deletions resolwe/flow/migrations/0024_use_unaccent_full_text_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Generated by Django 4.2.9 on 2024-02-13 07:33

import os

from django.db import connection, migrations


def load_triggers(apps, schema_editor):
file_names = [
"utils.sql",
"triggers_collection.sql",
"triggers_entity.sql",
"triggers_data.sql",
]
with connection.cursor() as c:
for file_name in file_names:
file_path = os.path.join(os.path.dirname(__file__), file_name)
with open(file_path) as fh:
sql_statement = fh.read()
c.execute(sql_statement)


class Migration(migrations.Migration):

dependencies = [
("flow", "0023_create_unaccent_full_text_search_config"),
]

operations = [
migrations.RunPython(load_triggers),
# Update existing entries.
migrations.RunSQL("UPDATE flow_entity SET id=id;", migrations.RunSQL.noop),
migrations.RunSQL("UPDATE flow_collection SET id=id;", migrations.RunSQL.noop),
migrations.RunSQL("UPDATE flow_data SET id=id;", migrations.RunSQL.noop),
]
101 changes: 59 additions & 42 deletions resolwe/flow/migrations/triggers_collection.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,33 +30,33 @@ CREATE OR REPLACE FUNCTION generate_resolwe_collection_search(collection flow_co

SELECT
-- Collection name.
setweight(to_tsvector('simple', collection.name), 'A') ||
setweight(to_tsvector('simple', get_characters(collection.name)), 'B') ||
setweight(to_tsvector('simple', get_numbers(collection.name)), 'B') ||
setweight(to_tsvector('simple_unaccent', collection.name), 'A') ||
setweight(to_tsvector('simple_unaccent', get_characters(collection.name)), 'B') ||
setweight(to_tsvector('simple_unaccent', get_numbers(collection.name)), 'B') ||
-- Collection description.
setweight(to_tsvector('simple', collection.description), 'B') ||
setweight(to_tsvector('simple_unaccent', collection.description), 'B') ||
-- Contributor username.
setweight(to_tsvector('simple', contributor.usernames), 'B') ||
setweight(to_tsvector('simple', get_characters(contributor.usernames)), 'C') ||
setweight(to_tsvector('simple', get_numbers(contributor.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', contributor.usernames), 'B') ||
setweight(to_tsvector('simple_unaccent', get_characters(contributor.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', get_numbers(contributor.usernames)), 'C') ||
-- Contributor first name.
setweight(to_tsvector('simple', contributor.first_names), 'B') ||
setweight(to_tsvector('simple_unaccent', contributor.first_names), 'B') ||
-- Contributor last name.
setweight(to_tsvector('simple', contributor.last_names), 'B') ||
setweight(to_tsvector('simple_unaccent', contributor.last_names), 'B') ||
-- Owners usernames. There is no guarantee that it is not NULL.
setweight(to_tsvector('simple', COALESCE(owners.usernames, '')), 'B') ||
setweight(to_tsvector('simple', get_characters(owners.usernames)), 'C') ||
setweight(to_tsvector('simple', get_numbers(owners.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', COALESCE(owners.usernames, '')), 'B') ||
setweight(to_tsvector('simple_unaccent', get_characters(owners.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', get_numbers(owners.usernames)), 'C') ||
-- Owners first names. There is no guarantee that it is not NULL.
setweight(to_tsvector('simple', COALESCE(owners.first_names, '')), 'B') ||
setweight(to_tsvector('simple_unaccent', COALESCE(owners.first_names, '')), 'B') ||
-- Owners last names. There is no guarantee that it is not NULL.
setweight(to_tsvector('simple', COALESCE(owners.last_names, '')), 'B') ||
setweight(to_tsvector('simple_unaccent', COALESCE(owners.last_names, '')), 'B') ||
-- Collection tags.
setweight(to_tsvector('simple', array_to_string(collection.tags, ' ')), 'B') ||
setweight(to_tsvector('simple_unaccent', array_to_string(collection.tags, ' ')), 'B') ||
-- Collection descriptor.
setweight(to_tsvector('simple', flat_descriptor), 'C') ||
setweight(to_tsvector('simple', get_characters(flat_descriptor)), 'D') ||
setweight(to_tsvector('simple', get_numbers(flat_descriptor)), 'D')
setweight(to_tsvector('simple_unaccent', flat_descriptor), 'C') ||
setweight(to_tsvector('simple_unaccent', get_characters(flat_descriptor)), 'D') ||
setweight(to_tsvector('simple_unaccent', get_numbers(flat_descriptor)), 'D')

INTO search;

Expand All @@ -79,11 +79,14 @@ CREATE OR REPLACE FUNCTION collection_biut()
END;
$$;

CREATE TRIGGER collection_biut
BEFORE INSERT OR UPDATE
ON flow_collection
FOR EACH ROW EXECUTE PROCEDURE collection_biut();

DO $$ BEGIN
CREATE TRIGGER collection_biut
BEFORE INSERT OR UPDATE
ON flow_collection
FOR EACH ROW EXECUTE PROCEDURE collection_biut();
EXCEPTION
WHEN others THEN null;
END $$;

-- Trigger after update/insert/delete user permission object.
CREATE OR REPLACE FUNCTION handle_userpermission_collection(user_permission permissions_permissionmodel)
Expand Down Expand Up @@ -114,10 +117,14 @@ CREATE OR REPLACE FUNCTION userpermission_collection_aiut()
END;
$$;

CREATE TRIGGER userpermission_collection_aiut
AFTER INSERT OR UPDATE
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_collection_aiut();
DO $$ BEGIN
CREATE TRIGGER userpermission_collection_aiut
AFTER INSERT OR UPDATE
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_collection_aiut();
EXCEPTION
WHEN others THEN null;
END $$;

CREATE OR REPLACE FUNCTION userpermission_collection_adt()
RETURNS TRIGGER
Expand All @@ -129,12 +136,15 @@ CREATE OR REPLACE FUNCTION userpermission_collection_adt()
END;
$$;

CREATE TRIGGER userpermission_collection_adt
AFTER DELETE
-- ON guardian_userobjectpermission
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_collection_adt();

DO $$ BEGIN
CREATE TRIGGER userpermission_collection_adt
AFTER DELETE
-- ON guardian_userobjectpermission
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_collection_adt();
EXCEPTION
WHEN others THEN null;
END $$;

-- Trigger after update contributor.
CREATE OR REPLACE FUNCTION collection_contributor_aut()
Expand All @@ -149,11 +159,14 @@ CREATE OR REPLACE FUNCTION collection_contributor_aut()
END;
$$;

CREATE TRIGGER collection_contributor_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE collection_contributor_aut();

DO $$ BEGIN
CREATE TRIGGER collection_contributor_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE collection_contributor_aut();
EXCEPTION
WHEN others THEN null;
END $$;

-- Trigger after update owner.
CREATE OR REPLACE FUNCTION collection_owner_aut()
Expand All @@ -178,7 +191,11 @@ CREATE OR REPLACE FUNCTION collection_owner_aut()
END;
$$;

CREATE TRIGGER collection_owner_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE collection_owner_aut();
DO $$ BEGIN
CREATE TRIGGER collection_owner_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE collection_owner_aut();
EXCEPTION
WHEN others THEN null;
END $$;
116 changes: 69 additions & 47 deletions resolwe/flow/migrations/triggers_data.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
-- Trigger after insert/update Data object.
CREATE TYPE process_result AS (
name text,
type text
);
DO $$ BEGIN
CREATE TYPE process_result AS (
name text,
type text
);
EXCEPTION
WHEN duplicate_object THEN null;
END $$;

CREATE OR REPLACE FUNCTION generate_resolwe_data_search(data flow_data)
RETURNS tsvector
Expand Down Expand Up @@ -38,37 +42,37 @@ CREATE OR REPLACE FUNCTION generate_resolwe_data_search(data flow_data)

SELECT
-- Data name.
setweight(to_tsvector('simple', data.name), 'A') ||
setweight(to_tsvector('simple', get_characters(data.name)), 'B') ||
setweight(to_tsvector('simple', get_numbers(data.name)), 'B') ||
setweight(to_tsvector('simple_unaccent', data.name), 'A') ||
setweight(to_tsvector('simple_unaccent', get_characters(data.name)), 'B') ||
setweight(to_tsvector('simple_unaccent', get_numbers(data.name)), 'B') ||
-- Contributor username.
setweight(to_tsvector('simple', contributor.usernames), 'B') ||
setweight(to_tsvector('simple', get_characters(contributor.usernames)), 'C') ||
setweight(to_tsvector('simple', get_numbers(contributor.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', contributor.usernames), 'B') ||
setweight(to_tsvector('simple_unaccent', get_characters(contributor.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', get_numbers(contributor.usernames)), 'C') ||
-- Contributor first name.
setweight(to_tsvector('simple', contributor.first_names), 'B') ||
setweight(to_tsvector('simple_unaccent', contributor.first_names), 'B') ||
-- Contributor last name.
setweight(to_tsvector('simple', contributor.last_names), 'B') ||
setweight(to_tsvector('simple_unaccent', contributor.last_names), 'B') ||
-- Owners usernames. There is no guarantee that it is not NULL.
setweight(to_tsvector('simple', COALESCE(owners.usernames, '')), 'B') ||
setweight(to_tsvector('simple', get_characters(owners.usernames)), 'C') ||
setweight(to_tsvector('simple', get_numbers(owners.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', COALESCE(owners.usernames, '')), 'B') ||
setweight(to_tsvector('simple_unaccent', get_characters(owners.usernames)), 'C') ||
setweight(to_tsvector('simple_unaccent', get_numbers(owners.usernames)), 'C') ||
-- Owners first names. There is no guarantee that it is not NULL.
setweight(to_tsvector('simple', COALESCE(owners.first_names, '')), 'B') ||
setweight(to_tsvector('simple_unaccent', COALESCE(owners.first_names, '')), 'B') ||
-- Owners last names. There is no guarantee that it is not NULL.
setweight(to_tsvector('simple', COALESCE(owners.last_names, '')), 'B') ||
setweight(to_tsvector('simple_unaccent', COALESCE(owners.last_names, '')), 'B') ||
-- Process name.
setweight(to_tsvector('simple', process.name), 'B') ||
setweight(to_tsvector('simple', get_characters(process.name)), 'C') ||
setweight(to_tsvector('simple', get_numbers(process.name)), 'C') ||
setweight(to_tsvector('simple_unaccent', process.name), 'B') ||
setweight(to_tsvector('simple_unaccent', get_characters(process.name)), 'C') ||
setweight(to_tsvector('simple_unaccent', get_numbers(process.name)), 'C') ||
-- Process type.
setweight(to_tsvector('simple', process.type), 'D') ||
setweight(to_tsvector('simple_unaccent', process.type), 'D') ||
-- Data tags.
setweight(to_tsvector('simple', array_to_string(data.tags, ' ')), 'B') ||
setweight(to_tsvector('simple_unaccent', array_to_string(data.tags, ' ')), 'B') ||
-- Data descriptor.
setweight(to_tsvector('simple', flat_descriptor), 'C') ||
setweight(to_tsvector('simple', get_characters(flat_descriptor)), 'D') ||
setweight(to_tsvector('simple', get_numbers(flat_descriptor)), 'D')
setweight(to_tsvector('simple_unaccent', flat_descriptor), 'C') ||
setweight(to_tsvector('simple_unaccent', get_characters(flat_descriptor)), 'D') ||
setweight(to_tsvector('simple_unaccent', get_numbers(flat_descriptor)), 'D')
INTO search;

RETURN search;
Expand All @@ -90,11 +94,14 @@ CREATE OR REPLACE FUNCTION data_biut()
END;
$$;

CREATE TRIGGER data_biut
BEFORE INSERT OR UPDATE
ON flow_data
FOR EACH ROW EXECUTE PROCEDURE data_biut();

DO $$ BEGIN
CREATE TRIGGER data_biut
BEFORE INSERT OR UPDATE
ON flow_data
FOR EACH ROW EXECUTE PROCEDURE data_biut();
EXCEPTION
WHEN others THEN null;
END $$;

-- Trigger after update/insert/delete user permission object.
CREATE OR REPLACE FUNCTION handle_userpermission_data(user_permission permissions_permissionmodel)
Expand Down Expand Up @@ -125,10 +132,14 @@ CREATE OR REPLACE FUNCTION userpermission_data_aiut()
END;
$$;

CREATE TRIGGER userpermission_data_aiut
AFTER INSERT OR UPDATE
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_data_aiut();
DO $$ BEGIN
CREATE TRIGGER userpermission_data_aiut
AFTER INSERT OR UPDATE
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_data_aiut();
EXCEPTION
WHEN others THEN null;
END $$;

CREATE OR REPLACE FUNCTION userpermission_data_adt()
RETURNS TRIGGER
Expand All @@ -140,10 +151,14 @@ CREATE OR REPLACE FUNCTION userpermission_data_adt()
END;
$$;

CREATE TRIGGER userpermission_data_adt
AFTER DELETE
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_data_adt();
DO $$ BEGIN
CREATE TRIGGER userpermission_data_adt
AFTER DELETE
ON permissions_permissionmodel
FOR EACH ROW EXECUTE PROCEDURE userpermission_data_adt();
EXCEPTION
WHEN others THEN null;
END $$;


-- Trigger after update contributor.
Expand All @@ -159,11 +174,14 @@ CREATE OR REPLACE FUNCTION data_contributor_aut()
END;
$$;

CREATE TRIGGER data_contributor_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE data_contributor_aut();

DO $$ BEGIN
CREATE TRIGGER data_contributor_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE data_contributor_aut();
EXCEPTION
WHEN others THEN null;
END $$;

-- Trigger after update owner.
CREATE OR REPLACE FUNCTION data_owner_aut()
Expand All @@ -188,7 +206,11 @@ CREATE OR REPLACE FUNCTION data_owner_aut()
END;
$$;

CREATE TRIGGER data_owner_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE data_owner_aut();
DO $$ BEGIN
CREATE TRIGGER data_owner_aut
AFTER UPDATE
ON auth_user
FOR EACH ROW EXECUTE PROCEDURE data_owner_aut();
EXCEPTION
WHEN others THEN null;
END $$;
Loading

0 comments on commit 81d8454

Please sign in to comment.