diff --git a/tardis/apps/search/api.py b/tardis/apps/search/api.py
index 8ae266712..3e3235d12 100644
--- a/tardis/apps/search/api.py
+++ b/tardis/apps/search/api.py
@@ -1,4 +1,4 @@
-# pylint: disable=C0302,R1702
+# pylint: disable=C0302
 """
 RESTful API for MyTardis search.
 Implemented with Tastypie.
@@ -6,15 +6,13 @@
 .. moduleauthor:: Manish Kumar <rishimanish123@gmail.com>
 .. moduleauthor:: Mike Laverick <mike.laverick@auckland.ac.nz>
 """
-from datetime import datetime
 import json
 
 from django.conf import settings
-from django.template.defaultfilters import filesizeformat
 
 import pytz
 from django_elasticsearch_dsl.search import Search
-from elasticsearch_dsl import MultiSearch, Q
+from elasticsearch_dsl import MultiSearch
 from tastypie import fields
 from tastypie.exceptions import ImmediateHttpResponse
 from tastypie.http import HttpUnauthorized
@@ -30,7 +28,17 @@
     ParameterName,
 )
 from tardis.apps.projects.models import Project
-
+from .utils.api import (
+    create_user_and_group_query,
+    query_keywords_and_metadata,
+    query_apply_filters,
+    query_add_sorting,
+    cleaning_acls,
+    cleaning_ids,
+    cleaning_preload,
+    cleaning_parent_filter,
+    cleaning_results,
+)
 
 LOCAL_TZ = pytz.timezone(settings.TIME_ZONE)
 RESULTS_PER_PAGE = settings.RESULTS_PER_PAGE
@@ -62,6 +70,8 @@ def to_json(self, data, options=None):
 
 
 class SearchObject(object):
+    """Basic TastyPie API object to hold search results"""
+
     def __init__(self, hits=None, total_hits=None, id=None):
         self.hits = hits
         self.total_hits = total_hits
@@ -69,6 +79,8 @@ def __init__(self, hits=None, total_hits=None, id=None):
 
 
 class SchemasObject(object):
+    """Basic TastyPie API object to hold schemas for filter bar population"""
+
     def __init__(self, schemas=None, id=None):
         self.schemas = schemas
         self.id = id
@@ -96,6 +108,13 @@ def detail_uri_kwargs(self, bundle_or_obj):
         return kwargs
 
     def get_object_list(self, request):
+        """
+        Populates the API response with schemas and metadata fields that
+        a user can access.
+        TODO: Probably separate out PUBLIC_DATA schemas
+        """
+
+        # if a user is not logged in, return empty for their schemas
         if not request.user.is_authenticated:
             result_dict = {
                 "project": None,
@@ -104,40 +123,30 @@ def get_object_list(self, request):
                 "datafile": None,
             }
             return [SchemasObject(id=1, schemas=result_dict)]
-        result_dict = {
-            "project": [
-                *{
-                    *Project.safe.all(user=request.user)
-                    .prefetch_related("projectparameterset")
-                    .values_list("projectparameterset__schema__id", flat=True)
-                }
-            ],
-            "experiment": [
-                *{
-                    *Experiment.safe.all(user=request.user)
-                    .prefetch_related("experimentparameterset")
-                    .values_list("experimentparameterset__schema__id", flat=True)
-                }
-            ],
-            "dataset": [
-                *{
-                    *Dataset.safe.all(user=request.user)
-                    .prefetch_related("datasetparameterset")
-                    .values_list("datasetparameterset__schema__id", flat=True)
-                }
-            ],
-            "datafile": [
+
+        # pull out schema IDs for all accessible objects for a user
+        result_dict = {}
+        for string, model in {
+            "project": Project,
+            "experiment": Experiment,
+            "dataset": Dataset,
+            "datafile": DataFile,
+        }.items():
+            result_dict[string] = [
                 *{
-                    *DataFile.safe.all(user=request.user)
-                    .prefetch_related("datafileparameterset")
-                    .values_list("datafileparameterset__schema__id", flat=True)
+                    *model.safe.all(user=request.user)
+                    .prefetch_related(string + "parameterset")
+                    .values_list(string + "parameterset__schema__id", flat=True)
                 }
-            ],
-        }
+            ]
+
+        # create a return dictionary of schemas and their non-sensitive metadata fields
         safe_dict = {}
+        # iterate over accessible schemas
         for key, val in result_dict.items():
             safe_dict[key] = {}
             for value in val:
+                # if object type has schemas, add them to safe_dict
                 if value is not None:
                     schema_id = str(value)
                     schema_dict = {
@@ -146,6 +155,7 @@ def get_object_list(self, request):
                         "schema_name": Schema.objects.get(id=value).name,
                         "parameters": {},
                     }
+                    # get parameter_names associated with schema
                     param_names = ParameterName.objects.filter(
                         schema__id=value, sensitive=False
                     )
@@ -166,7 +176,9 @@ def get_object_list(self, request):
                             "full_name": param.full_name,
                             "data_type": type_dict[param.data_type],
                         }
+                        # append parameter info to relevant schema
                         schema_dict["parameters"][param_id] = param_dict
+                    # add completed schema to schema_dict ready for return
                     safe_dict[key][schema_id] = schema_dict
         return [SchemasObject(id=1, schemas=safe_dict)]
 
@@ -220,24 +232,35 @@ def create_search_results(self, bundle):
             # return [SearchObject(id=1, hits=result_dict)]
         groups = user.groups.all()
 
+        # This holds the "text" from all the object specific "keyword" search bars,
+        # which may ALL have been populated with the same text via the menubar search bar
         query_text = bundle.data.get("query", None)
+        # This holds all of the intrinsic and schema-specific metadata filters per object
         filters = bundle.data.get("filters", None)
+        # result specific bundles for pagination and sorting
         request_sorting = bundle.data.get("sort", None)
         request_size = bundle.data.get("size", 20)
         request_offset = bundle.data.get("offset", 0)
+        # result specific bundle to trigger a object specific search update
         request_type = bundle.data.get("type", None)
-        # Mock input
-        # request_for_pag = True
-        # if request_for_pag:
-        #    request_offset = 37
-        #    request_size = 50
-        #    request_sorting = [#{ 'field': ["title"], 'order': "desc" },
-        #                       #{ 'field': ["experiments","title"], 'order': "desc" },
-        #                       { 'field': ["size"], 'order': "desc" }]
-        #    request_type = 'datafile'
+
+        """Mock input
+        request_for_pag = True
+        if request_for_pag:
+            request_offset = 37
+            request_size = 50
+            request_sorting = [#{ 'field': ["title"], 'order': "desc" },
+                               #{ 'field': ["experiments","title"], 'order': "desc" },
+                               { 'field': ["size"], 'order': "desc" }]
+            request_type = 'datafile' 
+        """
+
+        # if API request object type isn't specified default to all object types
         if request_type is None:
             index_list = ["project", "experiment", "dataset", "datafile"]
-            match_list = ["name", "title", "description", "filename"]
+            title_list = ["name", "title", "description", "filename"]
+        # If API request object type is specified then specify object type + parent
+        # heirarchy object types, and their intrinsic "title" field names
         else:
             # probably some nicer structure/way to do this
             type_2_list = {
@@ -256,505 +279,41 @@ def create_search_results(self, bundle):
                 },
             }
             index_list = type_2_list[request_type]["index"]
-            match_list = type_2_list[request_type]["match"]
+            title_list = type_2_list[request_type]["match"]
 
+        # Numerically specify the order of heirarchy
         hierarchy = {"project": 4, "experiment": 3, "dataset": 2, "datafile": 1}
+        # Define a numerical filter_level for objects, below which we enforce a
+        # parent-must-be-in-results criteria
         filter_level = 0
+
+        # create multisearch object to search all 4 objects in parallel
         ms = MultiSearch(index=index_list)
+
+        # iterate over object types required in this search request
         for idx, obj in enumerate(index_list):
-            # (1) add user/group criteria to searchers
-            query_obj = Q(
-                {
-                    "nested": {
-                        "path": "acls",
-                        "query": Q(
-                            {
-                                "bool": {
-                                    "must": [
-                                        Q({"match": {"acls.entityId": user.id}}),
-                                        Q({"term": {"acls.pluginId": "django_user"}}),
-                                    ]
-                                }
-                            }
-                        ),
-                    }
-                }
-            )
-            for group in groups:
-                query_obj_group = Q(
-                    {
-                        "nested": {
-                            "path": "acls",
-                            "query": Q(
-                                {
-                                    "bool": {
-                                        "must": [
-                                            Q({"match": {"acls.entityId": group.id}}),
-                                            Q(
-                                                {
-                                                    "term": {
-                                                        "acls.pluginId": "django_group"
-                                                    }
-                                                }
-                                            ),
-                                        ]
-                                    }
-                                }
-                            ),
-                        }
-                    }
-                )
-                query_obj = query_obj | query_obj_group
 
-            # (2) Search on title/keywords + on non-sensitive metadata
+            # add user/group criteria to searchers
+            query_obj = create_user_and_group_query(user=user, groups=groups)
+
+            # Search on title/keywords + on non-sensitive metadata
             if query_text is not None:
+                # parent-child filter isn't enforced here right now
                 # if filter_level < hierarchy[obj]:
                 #    filter_level = hierarchy[obj]
                 if obj in query_text.keys():
-                    query_obj_text = Q({"match": {match_list[idx]: query_text[obj]}})
-                    query_obj_text_meta = Q(
-                        {
-                            "nested": {
-                                "path": "parameters.string",
-                                "query": Q(
-                                    {
-                                        "bool": {
-                                            "must": [
-                                                Q(
-                                                    {
-                                                        "match": {
-                                                            "parameters.string.value": query_text[
-                                                                obj
-                                                            ]
-                                                        }
-                                                    }
-                                                ),
-                                                Q(
-                                                    {
-                                                        "term": {
-                                                            "parameters.string.sensitive": False
-                                                        }
-                                                    }
-                                                ),
-                                            ]
-                                        }
-                                    }
-                                ),
-                            }
-                        }
+                    query_obj = query_keywords_and_metadata(
+                        query_obj, query_text, obj, idx, title_list
                     )
-                    query_obj_text_meta = query_obj_text | query_obj_text_meta
-                    query_obj = query_obj & query_obj_text_meta
 
-            # (3) Apply intrinsic filters + metadata filters to search
+            # Apply intrinsic filters + metadata filters to search
             if filters is not None:
                 # filter_op = filters['op']     This isn't used for now
-                filterlist = filters["content"]
-                operator_dict = {
-                    "is": "term",
-                    "contains": "match",
-                    ">=": "gte",
-                    "<=": "lte",
-                }
-                num_2_type = {
-                    1: "experiment",
-                    2: "dataset",
-                    3: "datafile",
-                    6: "project",
-                }
-                for filter in filterlist:
-                    oper = operator_dict[filter["op"]]
-
-                    # (3.1) Apply Schema-parameter / metadata filters to search
-                    if filter["kind"] == "schemaParameter":
-                        schema_id, param_id = filter["target"][0], filter["target"][1]
-                        # check filter is applied to correct object type
-                        if num_2_type[Schema.objects.get(id=schema_id).type] == obj:
-                            if filter_level < hierarchy[obj]:
-                                filter_level = hierarchy[obj]
-                            if filter["type"] == "STRING":
-                                # check if filter query is list of options, or single value
-                                # (elasticsearch can actually handle delimiters in a single string...)
-                                if isinstance(filter["content"], list):
-                                    Qdict = {"should": []}
-                                    for option in filter["content"]:
-                                        qry = Q(
-                                            {
-                                                "nested": {
-                                                    "path": "parameters.string",
-                                                    "query": Q(
-                                                        {
-                                                            "bool": {
-                                                                "must": [
-                                                                    Q(
-                                                                        {
-                                                                            "match": {
-                                                                                "parameters.string.pn_id": str(
-                                                                                    param_id
-                                                                                )
-                                                                            }
-                                                                        }
-                                                                    ),
-                                                                    Q(
-                                                                        {
-                                                                            oper: {
-                                                                                "parameters.string.value": option
-                                                                            }
-                                                                        }
-                                                                    ),
-                                                                    Q(
-                                                                        {
-                                                                            "term": {
-                                                                                "parameters.string.sensitive": False
-                                                                            }
-                                                                        }
-                                                                    ),
-                                                                ]
-                                                            }
-                                                        }
-                                                    ),
-                                                }
-                                            }
-                                        )
-                                        Qdict["should"].append(qry)
-                                    query_obj_filt = Q({"bool": Qdict})
-                                else:
-                                    query_obj_filt = Q(
-                                        {
-                                            "nested": {
-                                                "path": "parameters.string",
-                                                "query": Q(
-                                                    {
-                                                        "bool": {
-                                                            "must": [
-                                                                Q(
-                                                                    {
-                                                                        "match": {
-                                                                            "parameters.string.pn_id": str(
-                                                                                param_id
-                                                                            )
-                                                                        }
-                                                                    }
-                                                                ),
-                                                                Q(
-                                                                    {
-                                                                        oper: {
-                                                                            "parameters.string.value": filter[
-                                                                                "content"
-                                                                            ]
-                                                                        }
-                                                                    }
-                                                                ),
-                                                                Q(
-                                                                    {
-                                                                        "term": {
-                                                                            "parameters.string.sensitive": False
-                                                                        }
-                                                                    }
-                                                                ),
-                                                            ]
-                                                        }
-                                                    }
-                                                ),
-                                            }
-                                        }
-                                    )
-                            elif filter["type"] == "NUMERIC":
-                                query_obj_filt = Q(
-                                    {
-                                        "nested": {
-                                            "path": "parameters.numerical",
-                                            "query": Q(
-                                                {
-                                                    "bool": {
-                                                        "must": [
-                                                            Q(
-                                                                {
-                                                                    "match": {
-                                                                        "parameters.numerical.pn_id": str(
-                                                                            param_id
-                                                                        )
-                                                                    }
-                                                                }
-                                                            ),
-                                                            Q(
-                                                                {
-                                                                    "range": {
-                                                                        "parameters.numerical.value": {
-                                                                            oper: filter[
-                                                                                "content"
-                                                                            ]
-                                                                        }
-                                                                    }
-                                                                }
-                                                            ),
-                                                            Q(
-                                                                {
-                                                                    "term": {
-                                                                        "parameters.string.sensitive": False
-                                                                    }
-                                                                }
-                                                            ),
-                                                        ]
-                                                    }
-                                                }
-                                            ),
-                                        }
-                                    }
-                                )
-                            elif filter["type"] == "DATETIME":
-                                query_obj_filt = Q(
-                                    {
-                                        "nested": {
-                                            "path": "parameters.datetime",
-                                            "query": Q(
-                                                {
-                                                    "bool": {
-                                                        "must": [
-                                                            Q(
-                                                                {
-                                                                    "match": {
-                                                                        "parameters.datetime.pn_id": str(
-                                                                            param_id
-                                                                        )
-                                                                    }
-                                                                }
-                                                            ),
-                                                            Q(
-                                                                {
-                                                                    "range": {
-                                                                        "parameters.datetime.value": {
-                                                                            oper: filter[
-                                                                                "content"
-                                                                            ]
-                                                                        }
-                                                                    }
-                                                                }
-                                                            ),
-                                                            Q(
-                                                                {
-                                                                    "term": {
-                                                                        "parameters.string.sensitive": False
-                                                                    }
-                                                                }
-                                                            ),
-                                                        ]
-                                                    }
-                                                }
-                                            ),
-                                        }
-                                    }
-                                )
-                            query_obj = query_obj & query_obj_filt
-
-                    # (3.2) Apply intrinsic object filters to search
-                    if filter["kind"] == "typeAttribute":
-                        target_objtype, target_fieldtype = (
-                            filter["target"][0],
-                            filter["target"][1],
-                        )
-                        if target_objtype == obj:
-                            # Update the heirarchy level at which the
-                            # "parent-in-results" criteria must be applied
-                            if filter_level < hierarchy[obj]:
-                                filter_level = hierarchy[obj]
-
-                            # (3.2.1) Apply "Selected Schema" filter
-                            if target_fieldtype == "schema":
-                                # check if filter query is list of options, or single value
-                                if isinstance(filter["content"], list):
-                                    Qdict = {"should": []}
-                                    for option in filter["content"]:
-                                        qry = Q(
-                                            {
-                                                "nested": {
-                                                    "path": "parameters.schemas",
-                                                    "query": Q(
-                                                        {
-                                                            oper: {
-                                                                "parameters.schemas.schema_id": option
-                                                            }
-                                                        }
-                                                    ),
-                                                }
-                                            }
-                                        )
-                                        Qdict["should"].append(qry)
-                                    query_obj_filt = Q({"bool": Qdict})
-                                else:
-                                    query_obj_filt = Q(
-                                        {
-                                            "nested": {
-                                                "path": "parameters.schemas",
-                                                "query": Q(
-                                                    {
-                                                        oper: {
-                                                            "parameters.schemas.schema_id": filter[
-                                                                "content"
-                                                            ]
-                                                        }
-                                                    }
-                                                ),
-                                            }
-                                        }
-                                    )
-                                query_obj = query_obj & query_obj_filt
-
-                            # (3.2.2) Apply filters that act on fields which are
-                            # intrinsic to the object (Proj,exp,set,file)
-                            if target_fieldtype in {
-                                "name",
-                                "description",
-                                "title",
-                                "tags",
-                                "filename",
-                                "file_extension",
-                                "created_time",
-                                "start_time",
-                                "end_time",
-                            }:
-                                if filter["type"] == "STRING":
-                                    if isinstance(filter["content"], list):
-                                        Qdict = {"should": []}
-                                        for option in filter["content"]:
-                                            if target_fieldtype == "file_extension":
-                                                if option[0] == ".":
-                                                    option = option[1:]
-                                            qry = Q({oper: {target_fieldtype: option}})
-                                            Qdict["should"].append(qry)
-                                        query_obj_filt = Q({"bool": Qdict})
-                                    else:
-                                        if target_fieldtype == "file_extension":
-                                            if filter["content"][0] == ".":
-                                                filter["content"] = filter["content"][
-                                                    1:
-                                                ]
-                                        query_obj_filt = Q(
-                                            {
-                                                oper: {
-                                                    target_fieldtype: filter["content"]
-                                                }
-                                            }
-                                        )
-                                elif filter["type"] == "DATETIME":
-                                    query_obj_filt = Q(
-                                        {
-                                            "range": {
-                                                target_fieldtype: {
-                                                    oper: filter["content"]
-                                                }
-                                            }
-                                        }
-                                    )
-                                query_obj = query_obj & query_obj_filt
-
-                            # (3.2.3) Apply filters that act on fields which are
-                            # intrinsic to related objects (instruments, users, etc)
-                            if target_fieldtype in {
-                                "principal_investigator",
-                                "projects",
-                                "instrument",
-                                "institution",
-                                "experiments",
-                                "dataset",
-                            }:
-                                nested_fieldtype = filter["target"][2]
-                                if isinstance(filter["content"], list):
-                                    Qdict = {"should": []}
-                                    for option in filter["content"]:
-                                        qry = Q(
-                                            {
-                                                "nested": {
-                                                    "path": target_fieldtype,
-                                                    "query": Q(
-                                                        {
-                                                            oper: {
-                                                                ".".join(
-                                                                    [
-                                                                        target_fieldtype,
-                                                                        nested_fieldtype,
-                                                                    ]
-                                                                ): option
-                                                            }
-                                                        }
-                                                    ),
-                                                }
-                                            }
-                                        )
-                                        Qdict["should"].append(qry)
-                                    query_obj_filt = Q({"bool": Qdict})
-                                else:
-                                    query_obj_filt = Q(
-                                        {
-                                            "nested": {
-                                                "path": target_fieldtype,
-                                                "query": Q(
-                                                    {
-                                                        oper: {
-                                                            ".".join(
-                                                                [
-                                                                    target_fieldtype,
-                                                                    nested_fieldtype,
-                                                                ]
-                                                            ): filter["content"]
-                                                        }
-                                                    }
-                                                ),
-                                            }
-                                        }
-                                    )
-                                # Special handling for list of principal investigators
-                                if target_fieldtype == "principal_investigator":
-                                    Qdict_lr = {"should": [query_obj_filt]}
-                                    if isinstance(filter["content"], list):
-                                        Qdict = {"should": []}
-                                        for option in filter["content"]:
-                                            qry = Q(
-                                                {
-                                                    "nested": {
-                                                        "path": target_fieldtype,
-                                                        "query": Q(
-                                                            {
-                                                                "term": {
-                                                                    ".".join(
-                                                                        [
-                                                                            target_fieldtype,
-                                                                            "username",
-                                                                        ]
-                                                                    ): option
-                                                                }
-                                                            }
-                                                        ),
-                                                    }
-                                                }
-                                            )
-                                            Qdict["should"].append(qry)
-                                        query_obj_filt = Q({"bool": Qdict})
-                                    else:
-                                        query_obj_filt = Q(
-                                            {
-                                                "nested": {
-                                                    "path": target_fieldtype,
-                                                    "query": Q(
-                                                        {
-                                                            "term": {
-                                                                ".".join(
-                                                                    [
-                                                                        target_fieldtype,
-                                                                        "username",
-                                                                    ]
-                                                                ): filter["content"]
-                                                            }
-                                                        }
-                                                    ),
-                                                }
-                                            }
-                                        )
-                                    Qdict_lr["should"].append(query_obj_filt)
-                                    query_obj_filt = Q({"bool": Qdict_lr})
-                                query_obj = query_obj & query_obj_filt
-
-            # (4) Define fields not to return in the search results (for brevity)
+                query_obj, filter_level = query_apply_filters(
+                    query_obj, filters, obj, filter_level, hierarchy
+                )
+
+            # Define fields not to return in the search results
             excluded_fields_list = [
                 "end_time",
                 "institution",
@@ -770,58 +329,19 @@ def create_search_results(self, bundle):
                 "parameters.datetime.pn_id",
                 "acls",
             ]
+            # "description" field is crucial for datasets, but too verbose for experiments
             if obj != "dataset":
                 excluded_fields_list.append("description")
 
-            ######TODO (5) Do some sorting
-            # Default sorting
+            # Apply sorting filters based upon request and defaults
             sort_dict = {}
-            if request_sorting is not None:
-                if obj in request_sorting:
-                    for sort in request_sorting[obj]:
-                        if len(sort["field"]) > 1:
-                            if sort["field"][-1] in {
-                                "fullname",
-                                "name",
-                                "title",
-                                "description",
-                                "filename",
-                            }:
-                                search_field = ".".join(sort["field"]) + ".raw"
-                            else:
-                                search_field = ".".join(sort["field"])
-                            sort_dict[search_field] = {
-                                "order": sort["order"],
-                                "nested_path": ".".join(sort["field"][:-1]),
-                            }
-
-                        if len(sort["field"]) == 1:
-                            if sort["field"][0] in {
-                                "principal_investigator",
-                                "name",
-                                "title",
-                                "description",
-                                "filename",
-                            }:
-                                sort_dict[sort["field"][0] + ".raw"] = {
-                                    "order": sort["order"]
-                                }
-                            elif sort["field"][0] == "size":
-                                if obj == "datafile":
-                                    sort_dict[sort["field"][0]] = {
-                                        "order": sort["order"]
-                                    }
-                                else:
-                                    # DO SOME SORTING AFTER ELASTICSEARCH
-                                    pass
-                            else:
-                                sort_dict[sort["field"][0]] = {"order": sort["order"]}
+            sort_dict = query_add_sorting(request_sorting, obj, sort_dict)
 
             # If sort dict is still empty even after filters, add in the defaults
             if not sort_dict:
-                sort_dict = {match_list[idx] + ".raw": {"order": "asc"}}
+                sort_dict = {title_list[idx] + ".raw": {"order": "asc"}}
 
-            # (6) Add the search to the multi-search object, ready for execution
+            # Finally, add the search to the multi-search object, ready for execution
             ms = ms.add(
                 Search(index=obj)
                 .sort(sort_dict)
@@ -830,6 +350,7 @@ def create_search_results(self, bundle):
                 .source(excludes=excluded_fields_list)
             )
 
+        # execute the multi-search object and return results
         results = ms.execute()
 
         # --------------------
@@ -837,112 +358,13 @@ def create_search_results(self, bundle):
         # --------------------
 
         # load in object IDs for all objects a user has sensitive access to
-        # projects_sens = {*Project.safe.all(user, viewsensitive=True).values_list("id", flat=True)}
-        projects_sens_query = (
-            user.projectacls.select_related("project")
-            .filter(canSensitive=True)
-            .exclude(
-                effectiveDate__gte=datetime.today(), expiryDate__lte=datetime.today()
-            )
-            .values_list("project__id", flat=True)
-        )
-        for group in groups:
-            projects_sens_query |= (
-                group.projectacls.select_related("project")
-                .filter(canSensitive=True)
-                .exclude(
-                    effectiveDate__gte=datetime.today(),
-                    expiryDate__lte=datetime.today(),
-                )
-                .values_list("project__id", flat=True)
-            )
-        projects_sens = [*projects_sens_query.distinct()]
-
-        # experiments_sens = {*Experiment.safe.all(user, viewsensitive=True).values_list("id", flat=True)}
-        experiments_sens_query = (
-            user.experimentacls.select_related("experiment")
-            .filter(canSensitive=True)
-            .exclude(
-                effectiveDate__gte=datetime.today(), expiryDate__lte=datetime.today()
-            )
-            .values_list("experiment__id", flat=True)
-        )
-        for group in groups:
-            experiments_sens_query |= (
-                group.experimentacls.select_related("experiment")
-                .filter(canSensitive=True)
-                .exclude(
-                    effectiveDate__gte=datetime.today(),
-                    expiryDate__lte=datetime.today(),
-                )
-                .values_list("experiment__id", flat=True)
-            )
-        experiments_sens = [*experiments_sens_query.distinct()]
-
-        # datasets_sens = {*Dataset.safe.all(user, viewsensitive=True).values_list("id", flat=True)}
-        datasets_sens_query = (
-            user.datasetacls.select_related("dataset")
-            .filter(canSensitive=True)
-            .exclude(
-                effectiveDate__gte=datetime.today(), expiryDate__lte=datetime.today()
-            )
-            .values_list("dataset__id", flat=True)
-        )
-        for group in groups:
-            datasets_sens_query |= (
-                group.datasetacls.select_related("dataset")
-                .filter(canSensitive=True)
-                .exclude(
-                    effectiveDate__gte=datetime.today(),
-                    expiryDate__lte=datetime.today(),
-                )
-                .values_list("dataset__id", flat=True)
-            )
-        datasets_sens = [*datasets_sens_query.distinct()]
-
-        # datafiles_sens = {*DataFile.safe.all(user, viewsensitive=True).values_list("id", flat=True)}
-        datafiles_sens_query = (
-            user.datafileacls.select_related("datafile")
-            .filter(canSensitive=True)
-            .exclude(
-                effectiveDate__gte=datetime.today(), expiryDate__lte=datetime.today()
-            )
-            .values_list("datafile__id", flat=True)
-        )
-        for group in groups:
-            datafiles_sens_query |= (
-                group.datafileacls.select_related("datafile")
-                .filter(canSensitive=True)
-                .exclude(
-                    effectiveDate__gte=datetime.today(),
-                    expiryDate__lte=datetime.today(),
-                )
-                .values_list("datafile__id", flat=True)
-            )
-        datafiles_sens = [*datafiles_sens_query.distinct()]
+        projects_sens = cleaning_acls(user, groups, "project", canSensitive=True)
+        experiments_sens = cleaning_acls(user, groups, "experiment", canSensitive=True)
+        datasets_sens = cleaning_acls(user, groups, "dataset", canSensitive=True)
+        datafiles_sens = cleaning_acls(user, groups, "datafile", canSensitive=True)
 
         # load in datafile IDs for all datafiles a user has download access to
-        # datafiles_dl = {*DataFile.safe.all(user, downloadable=True).values_list("id", flat=True)}
-
-        datafiles_dl_query = (
-            user.datafileacls.select_related("datafile")
-            .filter(canDownload=True)
-            .exclude(
-                effectiveDate__gte=datetime.today(), expiryDate__lte=datetime.today()
-            )
-            .values_list("datafile__id", flat=True)
-        )
-        for group in groups:
-            datafiles_dl_query |= (
-                group.datafileacls.select_related("datafile")
-                .filter(canDownload=True)
-                .exclude(
-                    effectiveDate__gte=datetime.today(),
-                    expiryDate__lte=datetime.today(),
-                )
-                .values_list("datafile__id", flat=True)
-            )
-        datafiles_dl = [*datafiles_dl_query.distinct()]
+        datafiles_dl = cleaning_acls(user, groups, "datafile", canDownload=True)
 
         # re-structure into convenient dictionary
         preloaded = {
@@ -951,318 +373,42 @@ def create_search_results(self, bundle):
             "dataset": {"sens_list": datasets_sens, "objects": {}},
             "datafile": {"sens_list": datafiles_sens, "objects": {}},
         }
+
         # load in object IDs for all objects a user has read access to,
         # and IDs for all of the object's nested-children - regardless of user
         # access to these child objects (the access check come later)
-        # projects_values = ["id", "experiment__id", "experiment__datasets__id",
-        #                                         "experiment__datasets__datafile__id"]
-        # projects = [*Project.safe.all(user).values_list(*projects_values)]
-
-        projects_query = (
-            user.projectacls.select_related("project")
-            .prefetch_related(
-                "project__experiments",
-                "project__experiments__datasets",
-                "project__experiments__datasets__datafile",
-            )
-            .exclude(
-                effectiveDate__gte=datetime.today(), expiryDate__lte=datetime.today()
-            )
-            .values_list(
-                "project__id",
-                "project__experiments__id",
-                "project__experiments__datasets__id",
-                "project__experiments__datasets__datafile__id",
-            )
+        projects = cleaning_ids(user, groups, "project")
+        experiments = cleaning_ids(user, groups, "experiment")
+        datasets = cleaning_ids(user, groups, "dataset")
+        datafiles = cleaning_ids(user, groups, "datafile")
+
+        # add data to preloaded["objects"] dictionary with ID as key
+        # and nested items as value - key/values.
+        preloaded = cleaning_preload(
+            preloaded, projects, experiments, datasets, datafiles
         )
-        for group in groups:
-            projects_query |= (
-                group.projectacls.select_related("project")
-                .prefetch_related(
-                    "project__experiments",
-                    "project__experiments__datasets",
-                    "project__experiments__datasets__datafile",
-                )
-                .exclude(
-                    effectiveDate__gte=datetime.today(),
-                    expiryDate__lte=datetime.today(),
-                )
-                .values_list(
-                    "project__id",
-                    "project__experiments__id",
-                    "project__experiments__datasets__id",
-                    "project__experiments__datasets__datafile__id",
-                )
-            )
-        projects = [*projects_query.distinct()]
-
-        # experiments_values = ["id", "datasets__id", "datasets__datafile__id"]
-        # experiments = [*Experiment.safe.all(user).values_list(*experiments_values)]
-
-        experiments_query = (
-            user.experimentacls.select_related("experiment")
-            .prefetch_related("experiment__datasets", "experiment__datasets__datafile")
-            .exclude(
-                effectiveDate__gte=datetime.today(), expiryDate__lte=datetime.today()
-            )
-            .values_list(
-                "experiment__id",
-                "experiment__datasets__id",
-                "experiment__datasets__datafile__id",
-            )
-        )
-        for group in groups:
-            experiments_query |= (
-                group.experimentacls.select_related("experiment")
-                .prefetch_related(
-                    "experiment__datasets", "experiment__datasets__datafile"
-                )
-                .exclude(
-                    effectiveDate__gte=datetime.today(),
-                    expiryDate__lte=datetime.today(),
-                )
-                .values_list(
-                    "experiment__id",
-                    "experiment__datasets__id",
-                    "experiment__datasets__datafile__id",
-                )
-            )
-        experiments = [*experiments_query.distinct()]
-
-        # datasets = [*Dataset.safe.all(user).prefetch_related("datafile").values_list("id", "datafile__id")]
-        datasets_query = (
-            user.datasetacls.select_related("dataset")
-            .prefetch_related("dataset__datafile")
-            .exclude(
-                effectiveDate__gte=datetime.today(), expiryDate__lte=datetime.today()
-            )
-            .values_list("dataset__id", "dataset__datafile__id")
-        )
-        for group in groups:
-            datasets_query |= (
-                group.datasetacls.select_related("dataset")
-                .exclude(
-                    effectiveDate__gte=datetime.today(),
-                    expiryDate__lte=datetime.today(),
-                )
-                .values_list("dataset__id", "dataset__datafile__id")
-            )
-        datasets = [*datasets_query.distinct()]
-
-        # datafiles = [*DataFile.safe.all(user).values_list("id", "size")]
-        datafiles_query = (
-            user.datafileacls.select_related("datafile")
-            .exclude(
-                effectiveDate__gte=datetime.today(), expiryDate__lte=datetime.today()
-            )
-            .values_list("datafile__id", "datafile__size")
-        )
-        for group in groups:
-            datafiles_query |= (
-                group.datafileacls.select_related("datafile")
-                .exclude(
-                    effectiveDate__gte=datetime.today(),
-                    expiryDate__lte=datetime.today(),
-                )
-                .values_list("datafile__id", "datafile__size")
-            )
-        datafiles = [*datafiles_query.distinct()]
-
-        # add data to preloaded["objects"] dictionary with ID as key and nested items as value - key/values.
-        # Probably a cleaner/simpler way to do this, but hey ho!
-        for key, value in {
-            "project": projects,
-            "experiment": experiments,
-            "dataset": datasets,
-            "datafile": datafiles,
-        }.items():
-            for item in value:
-                name = item[0]
-                if name in preloaded[key]["objects"]:
-                    if key == "dataset":
-                        preloaded[key]["objects"][name]["dfs"].add(item[1])
-                    elif key == "experiment":
-                        preloaded[key]["objects"][name]["sets"].add(item[1])
-                        preloaded[key]["objects"][name]["dfs"].add(item[2])
-                    elif key == "project":
-                        preloaded[key]["objects"][name]["exps"].add(item[1])
-                        preloaded[key]["objects"][name]["sets"].add(item[2])
-                        preloaded[key]["objects"][name]["dfs"].add(item[3])
-                else:
-                    new_dict = {}
-                    if key == "datafile":
-                        new_dict["size"] = item[1]
-                    elif key == "dataset":
-                        new_dict["dfs"] = {item[1]}
-                    elif key == "experiment":
-                        new_dict["sets"] = {item[1]}
-                        new_dict["dfs"] = {item[2]}
-                    elif key == "project":
-                        new_dict["exps"] = {item[1]}
-                        new_dict["sets"] = {item[2]}
-                        new_dict["dfs"] = {item[3]}
-                    preloaded[key]["objects"][name] = new_dict
 
         # Create the result object which will be returned to the front-end
         result_dict = {k: [] for k in ["project", "experiment", "dataset", "datafile"]}
 
         # If filters are active, enforce the "parent in results" criteria on relevant objects
         if filter_level:
-            # Define parent_type for experiment/datafile (N/A for project, hardcoded for dataset)
-            parent_child = {"experiment": "projects", "dataset": "experiments"}
-            # Define hierarchy of types for filter levels
-            hierarch = [3, 2, 1]  # {"experiments":3, "datasets":2, "datafiles":1}
-            for idx, item in enumerate(results[1:]):
-                # if active filter level higher than current object type: apply "parent-in-result" filter
-                if hierarch[idx] < filter_level:
-                    parent_ids = [
-                        objj["_source"]["id"] for objj in results[idx].hits.hits
-                    ]
-                    parent_ids_set = {*parent_ids}
-
-                    for obj_idx, obj in reversed([*enumerate(item.hits.hits)]):
-                        if obj["_index"] != "datafile":
-                            parent_es_ids = [
-                                parent["id"]
-                                for parent in obj["_source"][
-                                    parent_child[obj["_index"]]
-                                ]
-                            ]
-                            if not any(itemm in parent_es_ids for itemm in parent_ids):
-                                results[idx + 1].hits.hits.pop(obj_idx)
-                        else:
-                            if (
-                                obj["_source"]["dataset"]["id"] not in parent_ids_set
-                            ):  # parent object is idx-1, but idx in enumerate is already shifted by -1, so straight idx
-                                results[idx + 1].hits.hits.pop(obj_idx)
+            results = cleaning_parent_filter(results, filter_level)
+
         # Count the number of search results after elasticsearch + parent filtering
         total_hits = {
             index_list[idx]: len(type.hits.hits) for idx, type in enumerate(results)
         }
 
+        # Pagination done before final cleaning to reduce "clean_parent_ids" duration
+        # Default Pagination handled by response.get if key isn't specified
         for item in results:
             item.hits.hits = item.hits.hits[
                 request_offset : (request_offset + request_size)
             ]
 
-        # Pagination done before final cleaning to reduce "clean_parent_ids" duration
-        # Default Pagination handled by response.get if key isn't specified
-        # result_dict = {k:v[request_offset:(request_offset+request_size)] for k,v in result_dict.items()}
-
         # Clean and prepare the results "hit" objects and append them to the results_dict
-        for item in results:
-            for hit_attrdict in item.hits.hits:
-                hit = hit_attrdict.to_dict()
-
-                # Check to see if indexed object actually exists in DB, if not then skip
-                if int(hit["_source"]["id"]) not in preloaded[hit["_index"]]["objects"]:
-                    continue
-
-                # Default sensitive permission and size of object
-                sensitive_bool = False
-                size = 0
-                # If user/group has sensitive permission, update flag
-                if hit["_source"]["id"] in preloaded[hit["_index"]]["sens_list"]:
-                    sensitive_bool = True
-                # Re-package parameters into single parameter list
-                param_list = []
-                if "string" in hit["_source"]["parameters"]:
-                    param_list.extend(hit["_source"]["parameters"]["string"])
-                if "numerical" in hit["_source"]["parameters"]:
-                    param_list.extend(hit["_source"]["parameters"]["numerical"])
-                if "datetime" in hit["_source"]["parameters"]:
-                    param_list.extend(hit["_source"]["parameters"]["datetime"])
-                hit["_source"]["parameters"] = param_list
-                # Remove unused fields to reduce data sent to front-end
-                hit.pop("_score")
-                hit.pop("_id")
-                # hit.pop("_type")
-                hit.pop("sort")
-
-                # Get count of all nested objects and download status
-                if hit["_index"] == "datafile":
-                    if hit["_source"]["id"] in datafiles_dl:
-                        hit["_source"]["userDownloadRights"] = "full"
-                        size = hit["_source"]["size"]
-                    else:
-                        hit["_source"]["userDownloadRights"] = "none"
-
-                else:
-                    safe_nested_dfs_set = {
-                        *preloaded["datafile"]["objects"]
-                    }.intersection(
-                        preloaded[hit["_index"]]["objects"][hit["_source"]["id"]]["dfs"]
-                    )
-                    safe_nested_dfs_count = len(safe_nested_dfs_set)
-                    if hit["_index"] in {"project", "experiment"}:
-                        safe_nested_set = len(
-                            {*preloaded["dataset"]["objects"]}.intersection(
-                                preloaded[hit["_index"]]["objects"][
-                                    hit["_source"]["id"]
-                                ]["sets"]
-                            )
-                        )
-                    # Ugly hack, should do a nicer, less verbose loop+type detection
-                    if hit["_index"] == "project":
-                        safe_nested_exp = len(
-                            {*preloaded["experiment"]["objects"]}.intersection(
-                                preloaded[hit["_index"]]["objects"][
-                                    hit["_source"]["id"]
-                                ]["exps"]
-                            )
-                        )
-                        hit["_source"]["counts"] = {
-                            "experiments": safe_nested_exp,
-                            "datasets": safe_nested_set,
-                            "datafiles": (safe_nested_dfs_count),
-                        }
-                    if hit["_index"] == "experiment":
-                        hit["_source"]["counts"] = {
-                            "datasets": safe_nested_set,
-                            "datafiles": safe_nested_dfs_count,
-                        }
-                    if hit["_index"] == "dataset":
-                        hit["_source"]["counts"] = {"datafiles": safe_nested_dfs_count}
-                    # Get downloadable datafiles ultimately belonging to this "hit" object
-                    # and calculate the total size of these files
-                    safe_nested_dfs_dl = [
-                        *safe_nested_dfs_set.intersection(datafiles_dl)
-                    ]
-                    size = sum(
-                        (
-                            preloaded["datafile"]["objects"][id]["size"]
-                            for id in safe_nested_dfs_dl
-                        )
-                    )
-                    # Determine the download state of the "hit" object
-                    # safe_nested_dfs_dl_bool = [id in datafiles_dl for id in safe_nested_dfs]
-                    if safe_nested_dfs_set.issubset(datafiles_dl):
-                        hit["_source"]["userDownloadRights"] = "full"
-                    elif safe_nested_dfs_set.intersection(datafiles_dl):
-                        hit["_source"]["userDownloadRights"] = "partial"
-                    else:
-                        hit["_source"]["userDownloadRights"] = "none"
-
-                hit["_source"]["size"] = filesizeformat(size)
-
-                # if no sensitive access, remove sensitive metadata from response
-                for idxx, parameter in reversed(
-                    [*enumerate(hit["_source"]["parameters"])]
-                ):
-                    if not sensitive_bool:
-                        if parameter["sensitive"]:
-                            hit["_source"]["parameters"].pop(idxx)
-                        else:
-                            hit["_source"]["parameters"][idxx].pop("sensitive")
-                    else:
-                        if not parameter["sensitive"]:
-                            hit["_source"]["parameters"][idxx].pop("sensitive")
-
-                # Append hit to results if not already in results.
-                # Due to non-identical scores in hits for non-sensitive vs sensitive search,
-                # we require a more complex comparison than just 'is in' as hits are not identical
-                # if hit["_source"]['id'] not in [objj["_source"]['id'] for objj in result_dict[hit["_index"]+"s"]]:
-                result_dict[hit["_index"]].append(hit)
+        result_dict = cleaning_results(results, result_dict, preloaded, datafiles_dl)
 
         # Removes parent IDs from hits once parent-filtering applied
         # Removed for tidiness in returned response to front-end
diff --git a/tardis/apps/search/documents.py b/tardis/apps/search/documents.py
index 6657c3cc5..9382b2b66 100644
--- a/tardis/apps/search/documents.py
+++ b/tardis/apps/search/documents.py
@@ -3,10 +3,10 @@
 from django.conf import settings
 from django.contrib.auth.models import User
 from django.db.models.signals import post_delete
-
-from elasticsearch_dsl import analyzer, token_filter
 from django_elasticsearch_dsl import Document, fields
 from django_elasticsearch_dsl.registries import registry
+from elasticsearch_dsl import analyzer, token_filter
+
 
 from tardis.tardis_portal.models import (
     Experiment,
@@ -24,14 +24,18 @@
     DatafileParameter,
     DatafileParameterSet,
 )
-
 from tardis.apps.projects.models import (
     Project,
     ProjectParameter,
     ProjectParameterSet,
     ProjectACL,
 )
-
+from .utils.documents import (
+    generic_acl_structure,
+    generic_parameter_structure,
+    prepare_generic_acls,
+    prepare_generic_parameters,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -58,196 +62,6 @@
 )
 
 
-def generic_acl_structure():
-    """
-    Return the ES structure of an ACL.
-
-    - pluginId = type of ACL owner: user/group/token
-    - entityId = ID of the owner
-    """
-    return fields.NestedField(
-        properties={
-            "pluginId": fields.KeywordField(),
-            "entityId": fields.KeywordField(),
-            "canDownload": fields.BooleanField(),
-            "canSensitive": fields.BooleanField(),
-        }
-    )
-
-
-def generic_parameter_structure():
-    """
-    Return the ES structure of object parameters and schema.
-    The parameter structure splits out string/numerical/datetime
-    parameters so that ES can specifically handle each of their
-    datatypes.
-
-    - Schemas:
-      - schema_id: Id of the object schemas
-    - string/numerical/datetime:
-      - pn_id: Id of parameter name
-      - pn_name: Name of parameter name
-      - value: value of parameter
-      - sensitive: whether parameter name is sensitive
-    """
-    return fields.NestedField(
-        properties={
-            "string": fields.NestedField(
-                properties={
-                    "pn_id": fields.KeywordField(),
-                    "pn_name": fields.KeywordField(),
-                    "value": fields.TextField(),
-                    "sensitive": fields.BooleanField(),
-                }
-            ),
-            "numerical": fields.NestedField(
-                properties={
-                    "pn_id": fields.KeywordField(),
-                    "pn_name": fields.KeywordField(),
-                    "value": fields.FloatField(),
-                    "sensitive": fields.BooleanField(),
-                }
-            ),
-            "datetime": fields.NestedField(
-                properties={
-                    "pn_id": fields.KeywordField(),
-                    "pn_name": fields.KeywordField(),
-                    "value": fields.DateField(),
-                    "sensitive": fields.BooleanField(),
-                }
-            ),
-            "schemas": fields.NestedField(
-                properties={"schema_id": fields.KeywordField()}
-            ),
-        },
-    )
-
-
-def prepare_generic_acls_build(INSTANCE_ACL_SET, return_list):
-    """Returns the ACLs associated with this
-    object, formatted for elasticsearch.
-    """
-    for acl in INSTANCE_ACL_SET:
-        acl_dict = {}
-        if acl["user__id"] is not None:
-            acl_dict["pluginId"] = "django_user"
-            acl_dict["entityId"] = acl["user__id"]
-        if acl["group__id"] is not None:
-            acl_dict["pluginId"] = "django_group"
-            acl_dict["entityId"] = acl["group__id"]
-        if acl["token__id"] is not None:
-            # token access shouldn't be added to search
-            # unless search is given a way of checking token expiry
-            continue
-        # add in permission booleans
-        acl_dict["canDownload"] = acl["canDownload"]
-        acl_dict["canSensitive"] = acl["canSensitive"]
-        if acl_dict not in return_list:
-            return_list.append(acl_dict)
-
-
-def prepare_generic_acls(type, INSTANCE_ACL_SET, INSTANCE_EXPS=None):
-    """Returns the ACLs associated with this
-    object, formatted for elasticsearch.
-
-    This function is mostly just a wrapper around "prepare_generic_acls_build"
-    to account for current macro/micro behaviour.
-    """
-    return_list = []
-    if settings.ONLY_EXPERIMENT_ACLS and type != "experiment":
-        for exp in INSTANCE_EXPS.all():
-            prepare_generic_acls_build(
-                exp.experimentacl_set.select_related("user", "group", "token")
-                .all()
-                .exclude(user__id=settings.PUBLIC_USER_ID)
-                .values(
-                    "user__id",
-                    "group__id",
-                    "token__id",
-                    "canDownload",
-                    "canSensitive",
-                ),
-                return_list,
-            )
-    else:
-        prepare_generic_acls_build(
-            INSTANCE_ACL_SET.select_related("user", "group", "token")
-            .all()
-            .exclude(user__id=settings.PUBLIC_USER_ID)
-            .values(
-                "user__id",
-                "group__id",
-                "token__id",
-                "canDownload",
-                "canSensitive",
-            ),
-            return_list,
-        )
-    return return_list
-
-
-def prepare_generic_parameters(instance, type):
-    """Returns the parameters associated with the provided instance,
-    formatted for elasticsearch."""
-
-    type_dict = {
-        "project": ProjectParameter,
-        "experiment": ExperimentParameter,
-        "dataset": DatasetParameter,
-        "datafile": DatafileParameter,
-    }
-    OBJPARAMETERS = type_dict[type]
-
-    # get list of object parametersets
-    paramsets = list(instance.getParameterSets())
-    parameter_groups = {
-        "string": [],
-        "numerical": [],
-        "datetime": [],
-        "schemas": [],
-    }
-    # iterate over parametersets of an object
-    for paramset in paramsets:
-        param_type = {1: "datetime", 2: "string", 3: "numerical"}
-        # query parameters from parameterset
-        param_glob = OBJPARAMETERS.objects.filter(parameterset=paramset).values_list(
-            "name",
-            "datetime_value",
-            "string_value",
-            "numerical_value",
-        )
-        # add schema information to dict
-        parameter_groups["schemas"].append({"schema_id": paramset.schema_id})
-        # iterate over parameter info "name/datetime/string/numerical"
-        for sublist in param_glob:
-            # query parametername info using "name"
-            PN = ParameterName.objects.get(id=sublist[0])
-            # build dict for param
-            param_dict = {}
-            type_idx = 0
-            # iterate over datetime/string/numerical info
-            for idx, value in enumerate(sublist[1:]):
-                # if datetime/string/numerical atually contains info
-                if value not in [None, ""]:
-                    # add parametername info to dict
-                    param_dict["pn_id"] = str(PN.id)
-                    param_dict["pn_name"] = str(PN.full_name)
-                    param_dict["sensitive"] = PN.sensitive
-                    type_idx = idx + 1
-                    # detect type of param, and add value to dict
-                    if type_idx == 1:
-                        param_dict["value"] = value
-                    elif type_idx == 2:
-                        param_dict["value"] = str(value)
-                    elif type_idx == 3:
-                        param_dict["value"] = float(value)
-            # if parameter with a value is added, add param_dict to
-            # parameters_dict
-            if type_idx:
-                parameter_groups[param_type[type_idx]].append(param_dict)
-    return parameter_groups
-
-
 class MyTardisDocument(Document):
     """
     Generalised class for MyTardis objects
@@ -605,7 +419,8 @@ def update_es_relations(instance, **kwargs):
     in the django_elasticsearch_dsl package. This function simply re-indexes
     relevant documents a second time on post_delete.
 
-    Probably clashes with the Async CelerySignalProcessor.
+    Probably clashes with the Async CelerySignalProcessor, so have forced
+    non-compatability between this function and CelerySignalProcessor=True.
     """
     if isinstance(instance, ProjectACL):
         parent = instance.project
diff --git a/tardis/apps/search/utils/__init__.py b/tardis/apps/search/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tardis/apps/search/utils/api.py b/tardis/apps/search/utils/api.py
new file mode 100644
index 000000000..7114f7108
--- /dev/null
+++ b/tardis/apps/search/utils/api.py
@@ -0,0 +1,803 @@
+"""
+helper functions used in api.py
+"""
+
+from datetime import datetime
+
+from django.template.defaultfilters import filesizeformat
+from elasticsearch_dsl import Q
+
+from tardis.tardis_portal.models import (
+    Schema,
+)
+
+
+def cleaning_results(results, result_dict, preloaded, datafiles_dl):
+    """
+    Filter out and clean search result hits based on a number of critera.
+    """
+
+    for item in results:
+        for hit_attrdict in item.hits.hits:
+            hit = hit_attrdict.to_dict()
+
+            # Check to see if indexed object actually exists in DB, if not then skip
+            if int(hit["_source"]["id"]) not in preloaded[hit["_index"]]["objects"]:
+                continue
+
+            # Default sensitive permission and size of object
+            sensitive_bool = False
+            size = 0
+            # If user/group has sensitive permission, update flag
+            if hit["_source"]["id"] in preloaded[hit["_index"]]["sens_list"]:
+                sensitive_bool = True
+            # Re-package parameters into single parameter list
+            param_list = []
+            if "string" in hit["_source"]["parameters"]:
+                param_list.extend(hit["_source"]["parameters"]["string"])
+            if "numerical" in hit["_source"]["parameters"]:
+                param_list.extend(hit["_source"]["parameters"]["numerical"])
+            if "datetime" in hit["_source"]["parameters"]:
+                param_list.extend(hit["_source"]["parameters"]["datetime"])
+            hit["_source"]["parameters"] = param_list
+            # Remove unused fields to reduce data sent to front-end
+            hit.pop("_score")
+            hit.pop("_id")
+            # hit.pop("_type")
+            hit.pop("sort")
+
+            # Get count of all nested objects and download status
+            if hit["_index"] == "datafile":
+                if hit["_source"]["id"] in datafiles_dl:
+                    hit["_source"]["userDownloadRights"] = "full"
+                    size = hit["_source"]["size"]
+                else:
+                    hit["_source"]["userDownloadRights"] = "none"
+
+            else:
+                safe_nested_dfs_set = {*preloaded["datafile"]["objects"]}.intersection(
+                    preloaded[hit["_index"]]["objects"][hit["_source"]["id"]]["dfs"]
+                )
+                safe_nested_dfs_count = len(safe_nested_dfs_set)
+                if hit["_index"] in {"project", "experiment"}:
+                    safe_nested_set = len(
+                        {*preloaded["dataset"]["objects"]}.intersection(
+                            preloaded[hit["_index"]]["objects"][hit["_source"]["id"]][
+                                "sets"
+                            ]
+                        )
+                    )
+                # Ugly hack, should do a nicer, less verbose loop+type detection
+                if hit["_index"] == "project":
+                    safe_nested_exp = len(
+                        {*preloaded["experiment"]["objects"]}.intersection(
+                            preloaded[hit["_index"]]["objects"][hit["_source"]["id"]][
+                                "exps"
+                            ]
+                        )
+                    )
+                    hit["_source"]["counts"] = {
+                        "experiments": safe_nested_exp,
+                        "datasets": safe_nested_set,
+                        "datafiles": (safe_nested_dfs_count),
+                    }
+                if hit["_index"] == "experiment":
+                    hit["_source"]["counts"] = {
+                        "datasets": safe_nested_set,
+                        "datafiles": safe_nested_dfs_count,
+                    }
+                if hit["_index"] == "dataset":
+                    hit["_source"]["counts"] = {"datafiles": safe_nested_dfs_count}
+                # Get downloadable datafiles ultimately belonging to this "hit" object
+                # and calculate the total size of these files
+                safe_nested_dfs_dl = [*safe_nested_dfs_set.intersection(datafiles_dl)]
+                size = sum(
+                    (
+                        preloaded["datafile"]["objects"][id]["size"]
+                        for id in safe_nested_dfs_dl
+                    )
+                )
+                # Determine the download state of the "hit" object
+                # safe_nested_dfs_dl_bool = [id in datafiles_dl for id in safe_nested_dfs]
+                if safe_nested_dfs_set.issubset(datafiles_dl):
+                    hit["_source"]["userDownloadRights"] = "full"
+                elif safe_nested_dfs_set.intersection(datafiles_dl):
+                    hit["_source"]["userDownloadRights"] = "partial"
+                else:
+                    hit["_source"]["userDownloadRights"] = "none"
+
+            hit["_source"]["size"] = filesizeformat(size)
+
+            # if no sensitive access, remove sensitive metadata from response
+            for idxx, parameter in reversed([*enumerate(hit["_source"]["parameters"])]):
+                if not sensitive_bool:
+                    if parameter["sensitive"]:
+                        hit["_source"]["parameters"].pop(idxx)
+                    else:
+                        hit["_source"]["parameters"][idxx].pop("sensitive")
+                else:
+                    if not parameter["sensitive"]:
+                        hit["_source"]["parameters"][idxx].pop("sensitive")
+
+            # Append hit to results if not already in results.
+            # Due to non-identical scores in hits for non-sensitive vs sensitive search,
+            # we require a more complex comparison than just 'is in' as hits are not identical
+            result_dict[hit["_index"]].append(hit)
+    return result_dict
+
+
+def cleaning_parent_filter(results, filter_level):
+    """
+    filter out hits results based upon level of parent filtering
+    """
+
+    # Define parent_type for experiment/datafile
+    # (N/A for project, hardcoded for dataset)
+    parent_child = {"experiment": "projects", "dataset": "experiments"}
+    # Define hierarchy of types for filter levels
+    hierarch = [3, 2, 1]  # {"experiments":3, "datasets":2, "datafiles":1}
+    for idx, item in enumerate(results[1:]):
+        # if active filter level higher than current object type:
+        # apply "parent-in-result" filter
+        if hierarch[idx] < filter_level:
+            parent_ids = [objj["_source"]["id"] for objj in results[idx].hits.hits]
+            parent_ids_set = {*parent_ids}
+
+            for obj_idx, obj in reversed([*enumerate(item.hits.hits)]):
+                if obj["_index"] != "datafile":
+                    parent_es_ids = [
+                        parent["id"]
+                        for parent in obj["_source"][parent_child[obj["_index"]]]
+                    ]
+                    if not any(itemm in parent_es_ids for itemm in parent_ids):
+                        results[idx + 1].hits.hits.pop(obj_idx)
+                else:
+                    if (
+                        obj["_source"]["dataset"]["id"] not in parent_ids_set
+                    ):  # parent object is idx-1, but idx in enumerate
+                        # is already shifted by -1, so straight idx
+                        results[idx + 1].hits.hits.pop(obj_idx)
+    return results
+
+
+def cleaning_preload(preloaded, projects, experiments, datasets, datafiles):
+    """
+    Populate the preload dictionary with IDs and child IDs of search results.
+    """
+
+    # Probably a cleaner/simpler way to do this, but hey ho!
+    for otype, id_list in {
+        "project": projects,
+        "experiment": experiments,
+        "dataset": datasets,
+        "datafile": datafiles,
+    }.items():
+        # iterate over objects in list - each list element has the obj_id
+        # followed by separated lists of any children objects
+        for ids in id_list:
+            # extract object ID
+            obj_id = ids[0]
+            # Check if the ID already exists in preloaded dict
+            if obj_id in preloaded[otype]["objects"]:
+                # add nested children to the dict
+                # Note: Datafiles don't have children so only in the else clause
+                if otype == "dataset":
+                    preloaded[otype]["objects"][obj_id]["dfs"].add(ids[1])
+                elif otype == "experiment":
+                    preloaded[otype]["objects"][obj_id]["sets"].add(ids[1])
+                    preloaded[otype]["objects"][obj_id]["dfs"].add(ids[2])
+                elif otype == "project":
+                    preloaded[otype]["objects"][obj_id]["exps"].add(ids[1])
+                    preloaded[otype]["objects"][obj_id]["sets"].add(ids[2])
+                    preloaded[otype]["objects"][obj_id]["dfs"].add(ids[3])
+            else:
+                # create the new dict for the object and populate with
+                # children (proj/exp/set) or datafile size (datafile)
+                new_dict = {}
+                if otype == "datafile":
+                    new_dict["size"] = ids[1]
+                elif otype == "dataset":
+                    new_dict["dfs"] = {ids[1]}
+                elif otype == "experiment":
+                    new_dict["sets"] = {ids[1]}
+                    new_dict["dfs"] = {ids[2]}
+                elif otype == "project":
+                    new_dict["exps"] = {ids[1]}
+                    new_dict["sets"] = {ids[2]}
+                    new_dict["dfs"] = {ids[3]}
+                # assign new dict to preloaded dict
+                preloaded[otype]["objects"][obj_id] = new_dict
+    return preloaded
+
+
+def cleaning_ids(user, groups, objtype):
+    """
+    Function to build up generic object queries to get ID information
+    on objects, and specifically also the size of Datafiles.
+    """
+
+    if objtype == "project":
+        prefetch_fields = [
+            "project__experiments",
+            "project__experiments__datasets",
+            "project__experiments__datasets__datafile",
+        ]
+        value_fields = [field + "__id" for field in prefetch_fields]
+
+    if objtype == "experiment":
+        prefetch_fields = [
+            "experiment__datasets",
+            "experiment__datasets__datafile",
+        ]
+        value_fields = [field + "__id" for field in prefetch_fields]
+
+    if objtype == "dataset":
+        prefetch_fields = [
+            "dataset__datafile",
+        ]
+        value_fields = [field + "__id" for field in prefetch_fields]
+
+    if objtype == "datafile":
+        prefetch_fields = None
+        value_fields = ["datafile__size"]
+
+    id_list = cleaning_acls(
+        user,
+        groups,
+        objtype,
+        prefetch_fields=prefetch_fields,
+        value_fields=value_fields,
+        flat=False,
+    )
+    return id_list
+
+
+def cleaning_acls(
+    user,
+    groups,
+    objtype,
+    canSensitive=False,
+    canDownload=False,
+    prefetch_fields=None,
+    value_fields=None,
+    flat=True,
+):
+    """
+    Function to build up generic object queries to get ACL or ID information
+    on objects.
+    """
+    if objtype == "project":
+        entity = user.projectacls
+    if objtype == "experiment":
+        entity = user.experimentacls
+    if objtype == "dataset":
+        entity = user.datasetacls
+    if objtype == "datafile":
+        entity = user.datafileacls
+
+    query = cleaning_acl_query(
+        entity,
+        objtype,
+        canSensitive=canSensitive,
+        canDownload=canDownload,
+        prefetch_fields=prefetch_fields,
+        value_fields=value_fields,
+        flat=flat,
+    )
+    for group in groups:
+
+        if objtype == "project":
+            entity = group.projectacls
+        if objtype == "experiment":
+            entity = group.experimentacls
+        if objtype == "dataset":
+            entity = group.datasetacls
+        if objtype == "datafile":
+            entity = group.datafileacls
+
+        query |= cleaning_acl_query(
+            entity,
+            objtype,
+            canSensitive=canSensitive,
+            canDownload=canDownload,
+            prefetch_fields=prefetch_fields,
+            value_fields=value_fields,
+            flat=flat,
+        )
+    return [*query.distinct()]
+
+
+def cleaning_acl_query(
+    entity_acls,
+    objtype,
+    canSensitive=False,
+    canDownload=False,
+    prefetch_fields=None,
+    value_fields=None,
+    flat=True,
+):
+    """
+    Function to build up generic object queries to get ACL or ID information
+    on objects.
+    """
+    # build query on object and related ACLs
+    query = entity_acls.select_related(objtype)
+
+    # apply specific ACL perm filter
+    if canSensitive is True:
+        query = query.filter(canSensitive=True)
+    if canDownload is True:
+        query = query.filter(canSensitive=True)
+
+    # if prefetch_fields are specified, add prefetch to query
+    if prefetch_fields is not None:
+        query = query.prefetch_related(*prefetch_fields)
+
+    # exclude too-new/expired ACLs
+    query = query.exclude(
+        effectiveDate__gte=datetime.today(),
+        expiryDate__lte=datetime.today(),
+    )
+
+    # add OBJ__id to values_list return
+    value_list_to_add = [objtype + "__id"]
+    # if list of extra values_list specified, add them to query
+    if value_fields is not None:
+        value_list_to_add.extend(value_fields)
+
+    return query.values_list(*value_list_to_add, flat=flat)
+
+
+def query_add_sorting(request_sorting, obj, sort_dict):
+    """
+    Function to build up sorting filters that need to be applied to
+    a search query
+    """
+    # make sure sorting request contains info and current object/model is in sorting request
+    if (request_sorting is not None) and (obj in request_sorting):
+        # check if
+        # iterate over sort options
+        for sort in request_sorting[obj]:
+
+            # process nested sort filters
+            if len(sort["field"]) > 1:
+                # if in this dict then field adds .raw to end of sort
+                if sort["field"][-1] in {
+                    "fullname",
+                    "name",
+                    "title",
+                    "description",
+                    "filename",
+                }:
+                    search_field = ".".join(sort["field"]) + ".raw"
+                else:
+                    search_field = ".".join(sort["field"])
+                # build up sorting dict options
+                sort_dict[search_field] = {
+                    "order": sort["order"],
+                    "nested_path": ".".join(sort["field"][:-1]),
+                }
+            # process non-nested sort filters
+            if len(sort["field"]) == 1:
+                # these fields need to have .raw added to them
+                if sort["field"][0] in {
+                    "principal_investigator",
+                    "name",
+                    "title",
+                    "description",
+                    "filename",
+                }:
+                    sort_dict[sort["field"][0] + ".raw"] = {"order": sort["order"]}
+                # size field needs specific handling
+                elif sort["field"][0] == "size":
+                    # for datafile size is easy to calculate
+                    if obj == "datafile":
+                        sort_dict[sort["field"][0]] = {"order": sort["order"]}
+                    # for parent models we need ACL context for this,
+                    # which is currently not available in ES.
+                    else:
+                        # DO THIS SORTING AFTER ELASTICSEARCH
+                        pass
+                else:
+                    sort_dict[sort["field"][0]] = {"order": sort["order"]}
+    return sort_dict
+
+
+def Q_nested(path, query):
+    """wrapper function for readability of nested ES Queries"""
+    query = Q({"nested": {"path": path, "query": query}})
+    return query
+
+
+def Q_must(query_list):
+    """wrapper function for readability of must ES Queries"""
+    query = Q({"bool": {"must": query_list}})
+    return query
+
+
+def create_user_and_group_query(user, groups):
+    """
+    This function creates an initial search query object and requires that
+    any results must have an appropriate User OR Group ACL for a user + any
+    of their groups.
+    """
+    # query where ACL must match entityId=User.id and pluginId=django_user
+    query_obj = Q_nested(
+        path="acls",
+        query=Q_must(
+            query_list=[
+                Q({"match": {"acls.entityId": user.id}}),
+                Q({"term": {"acls.pluginId": "django_user"}}),
+            ]
+        ),
+    )
+    # queries where ACL must match entityId=group.id and pluginId=django_group
+    for group in groups:
+        query_obj_group = Q_nested(
+            path="acls",
+            query=Q_must(
+                query_list=[
+                    Q({"match": {"acls.entityId": group.id}}),
+                    Q({"term": {"acls.pluginId": "django_group"}}),
+                ]
+            ),
+        )
+        # add each group query as an OR to existing query
+        query_obj = query_obj | query_obj_group
+    return query_obj
+
+
+def query_keywords_and_metadata(query_obj, query_text, object_type, idx, title_list):
+    """
+    This function takes an existing search query and adds matches for
+    keywords or non-sensitive metadata fields.
+    """
+    # query where object "title" must match query_text for given object
+    query_obj_text = Q({"match": {title_list[idx]: query_text[object_type]}})
+    # query where non-sensitive parameters of object must match query_text for given object
+    query_obj_text_meta = Q_nested(
+        path="parameters.string",
+        query=Q_must(
+            query_list=[
+                Q({"match": {"parameters.string.value": query_text[object_type]}}),
+                Q({"term": {"parameters.string.sensitive": False}}),
+            ]
+        ),
+    )
+    # stack up query by matching "title" OR "non-sensitive metadata"
+    query_obj_text_meta = query_obj_text | query_obj_text_meta
+    # stack keyword+metadata query with existing query via AND (we expect it's the user/group query)
+    query_obj = query_obj & query_obj_text_meta
+    return query_obj
+
+
+def _query_filter_on_parameters_type(ptype, param_id, oper, value):
+    """
+    This function is a generic handler to build an individual parameter query. It deals with
+    the following types of metadata: "string", "numerical", "datetime".
+    """
+
+    # cast the type field into the correct spelling for object document structure
+    type_mapping = {"STRING": "string", "NUMERIC": "numerical", "DATETIME": "datetime"}
+    ptype = type_mapping[ptype]
+
+    # tweak the operators and value structure based up parameter type
+    if ptype == "string":
+        base_oper = oper
+        query_val = value
+
+    elif ptype in ["numerical", "datetime"]:
+        base_oper = "range"
+        query_val = {oper: value}
+
+    # create and return query on parameter using provided inputs
+    query_obj = Q_nested(
+        path="parameters." + ptype,
+        query=Q_must(
+            query_list=[
+                Q({"match": {"parameters." + ptype + ".pn_id": str(param_id)}}),
+                Q({base_oper: {"parameters." + ptype + ".value": query_val}}),
+                Q({"term": {"parameters." + ptype + ".sensitive": False}}),
+            ]
+        ),
+    )
+    return query_obj
+
+
+def _query_filter_on_parameters(query_obj, filter, obj, filter_level, hierarchy, oper):
+    """
+    This function is a component of the more general "query_apply_filters" function.
+    Here we build up queries on Schema-parameter / metadata filters.
+    """
+
+    # Hardcode of Schema types+numbers here
+    num_2_type = {
+        1: "experiment",
+        2: "dataset",
+        3: "datafile",
+        6: "project",
+    }
+
+    # extract schema + parameter id from API request
+    schema_id, param_id = filter["target"][0], filter["target"][1]
+
+    # check filter is applied to correct object type
+    if num_2_type[Schema.objects.get(id=schema_id).type] == obj:
+        # redefine "parent-in-results" threshold if required
+        if filter_level < hierarchy[obj]:
+            filter_level = hierarchy[obj]
+
+        # check if filter query is list of options or a single value
+        # (elasticsearch can actually handle delimiters in a single string...)
+        if isinstance(filter["content"], list):
+            # if filter is a list, create an "OR" operator list of search queries
+            # and pass each value individually to build "OR" query
+            Qdict = {"should": []}
+            for option in filter["content"]:
+                query = _query_filter_on_parameters_type(
+                    filter["type"], param_id, oper, option
+                )
+                Qdict["should"].append(query)
+            # final "OR" query is built up from the individual ones
+            query_obj_filt = Q({"bool": Qdict})
+
+        else:
+            # if filter content not a list of options then pass filter content
+            # directly as the value to search on
+            query_obj_filt = _query_filter_on_parameters_type(
+                filter["type"], param_id, oper, filter["content"]
+            )
+
+        # as before, combine filter query with existing query
+        query_obj = query_obj & query_obj_filt
+
+    # return both the query_obj and the filter level variable
+    return query_obj, filter_level
+
+
+def _query_filter_on_intrinsic_schemas(query_obj, filter, oper):
+    """
+    This function is applies "filter by schema" to a search query.
+    """
+    # check if filter query is list of options, or single value
+    if isinstance(filter["content"], list):
+        # If its a list of filters, build up several queries and combine them with OR
+        Qdict = {"should": []}
+        for option in filter["content"]:
+            qry = Q_nested(
+                path="parameters.schemas",
+                query=Q({oper: {"parameters.schemas.schema_id": option}}),
+            )
+            Qdict["should"].append(qry)
+        query_obj_filt = Q({"bool": Qdict})
+    else:
+        # if its a single filter, just create one query
+        query_obj_filt = Q_nested(
+            path="parameters.schemas",
+            query=Q({oper: {"parameters.schemas.schema_id": filter["content"]}}),
+        )
+
+    # combine this filter with the existing query and return it
+    query_obj = query_obj & query_obj_filt
+    return query_obj
+
+
+def _query_filter_on_intrinsic_fields(query_obj, filter, oper, target_fieldtype):
+    """
+    This function is applies "filter by intrinsic object fields" to a search query.
+    It separates the fields into two groups: "string" fields and "datetime" fields.
+    String fields: name, description, title, tags, filename, file_extension
+    Datetime fields: created_time, start_time, end_time
+    """
+
+    # if the filter type is on a string field
+    if filter["type"] == "STRING":
+        # string filters can be comma separated lists, so check for list or single
+        if isinstance(filter["content"], list):
+            # if filter is a list, build individual queries for each filter and combine
+            # using an "OR"
+            Qdict = {"should": []}
+            for option in filter["content"]:
+                # Special treatment for file extension to remove leading fullstops
+                if target_fieldtype == "file_extension":
+                    if option[0] == ".":
+                        option = option[1:]
+                # add individual query to list of query components
+                qry = Q({oper: {target_fieldtype: option}})
+                Qdict["should"].append(qry)
+            # build final query from individual components
+            query_obj_filt = Q({"bool": Qdict})
+        else:
+            # Special treatment for file extension to remove leading fullstops
+            if target_fieldtype == "file_extension":
+                if filter["content"][0] == ".":
+                    filter["content"] = filter["content"][1:]
+            # simply build the filter query
+            query_obj_filt = Q({oper: {target_fieldtype: filter["content"]}})
+    # if the filter type is a datetime field
+    elif filter["type"] == "DATETIME":
+        # simply build the single query
+        query_obj_filt = Q({"range": {target_fieldtype: {oper: filter["content"]}}})
+    # combine the filter query with existing search query and return it
+    query_obj = query_obj & query_obj_filt
+    return query_obj
+
+
+def _query_filter_relations_builder(target_fieldtype, oper, nested_fieldtype, option):
+    """
+    This function is a generic query builder for the relational filter queries.
+    """
+    # rest_list
+    query_obj = Q_nested(
+        path=target_fieldtype,
+        query=Q(
+            {
+                oper: {
+                    ".".join(
+                        [
+                            target_fieldtype,
+                            nested_fieldtype,
+                        ]
+                    ): option
+                }
+            }
+        ),
+    )
+
+    return query_obj
+
+
+def _query_filter_on_intrinsic_relations(query_obj, filter, oper, target_fieldtype):
+    """
+    This function is applies "filter by intrinsic relations" to a search query.
+    This includes the following relational fields (and the object on which it acts):
+     - principal_investigator (Project),
+     - projects (Experiment),
+     - instrument (Dataset),
+     - institution (Project),
+     - experiments (Dataset),
+     - dataset (Datafile),
+    """
+
+    # extract the the nested field type from filter
+    nested_fieldtype = filter["target"][2]
+    # determine if filter is a list of filters or individual search term
+    if isinstance(filter["content"], list):
+        Qdict = {"should": []}
+        # iterate over options and bu8ild individual queries to combine with "OR"
+        for option in filter["content"]:
+            qry = _query_filter_relations_builder(
+                target_fieldtype, oper, nested_fieldtype, option
+            )
+            Qdict["should"].append(qry)
+        # build final query out of individual components
+        query_obj_filt = Q({"bool": Qdict})
+    else:
+        # if individual search term, simply build it
+        query_obj_filt = _query_filter_relations_builder(
+            target_fieldtype, oper, nested_fieldtype, filter["content"]
+        )
+    # Special handling for list of principal investigators
+    if target_fieldtype == "principal_investigator":
+        # principal investigator builds on existing intrinsic relation query!
+        Qdict_lr = {"should": [query_obj_filt]}
+        # determine if filter is a list of filters or individual search term
+        if isinstance(filter["content"], list):
+            Qdict = {"should": []}
+            # iterate over options and bu8ild individual queries to combine with "OR"
+            for option in filter["content"]:
+                qry = _query_filter_relations_builder(
+                    target_fieldtype, "term", "username", option
+                )
+                Qdict["should"].append(qry)
+            # build final query out of individual components
+            query_obj_filt = Q({"bool": Qdict})
+        else:
+            # if individual search term, simply build it
+            query_obj_filt = _query_filter_relations_builder(
+                target_fieldtype, "term", "username", filter["content"]
+            )
+        # build special "OR" query for principal_investigator search
+        Qdict_lr["should"].append(query_obj_filt)
+        query_obj_filt = Q({"bool": Qdict_lr})
+    # combine intrinsic relation filter with existing search query and return
+    query_obj = query_obj & query_obj_filt
+    return query_obj
+
+
+def _query_filter_on_intrinsics(query_obj, filter, obj, filter_level, hierarchy, oper):
+    """
+    This function is a component of the more general "query_apply_filters" function.
+    Here we build up queries on intrinsic filters such as filter by Schema, filter by object
+    properties, filter by related object properties.
+    """
+
+    # Extract out the filter object type and the filter field/variable name
+    target_objtype, target_fieldtype = (
+        filter["target"][0],
+        filter["target"][1],
+    )
+
+    # Of course, only process filter if it's the right object for this object search
+    if target_objtype == obj:
+        # Update the heirarchy level at which the
+        # "parent-in-results" criteria must be applied
+        if filter_level < hierarchy[obj]:
+            filter_level = hierarchy[obj]
+
+        # Apply "Selected Schema" filter
+        if target_fieldtype == "schema":
+            query_obj = _query_filter_on_intrinsic_schemas(query_obj, filter, oper)
+
+        # Apply filters that act on fields which are intrinsic to the object (Proj,exp,set,file)
+        if target_fieldtype in {
+            "name",
+            "description",
+            "title",
+            "tags",
+            "filename",
+            "file_extension",
+            "created_time",
+            "start_time",
+            "end_time",
+        }:
+            query_obj = _query_filter_on_intrinsic_fields(
+                query_obj, filter, oper, target_fieldtype
+            )
+
+        # Apply filters that act on fields which are intrinsic to related objects (instruments, users, etc)
+        if target_fieldtype in {
+            "principal_investigator",
+            "projects",
+            "instrument",
+            "institution",
+            "experiments",
+            "dataset",
+        }:
+
+            query_obj = _query_filter_on_intrinsic_relations(
+                query_obj, filter, oper, target_fieldtype
+            )
+
+    # return function-updated query_obj, and filter_level
+    return query_obj, filter_level
+
+
+def query_apply_filters(query_obj, filters, obj, filter_level, hierarchy):
+    """
+    This function takes an existing search query and adds matches for
+    the various filters on metadata that can be applied.
+    """
+
+    # combination logic of filters can theoretically be applied, but not supported yet.
+    # filter_op = filters['op']     This isn't used for now
+
+    filterlist = filters["content"]
+
+    # Define operator translations between API and ES
+    operator_dict = {
+        "is": "term",
+        "contains": "match",
+        ">=": "gte",
+        "<=": "lte",
+    }
+
+    # Iterate over filters in request
+    for filter in filterlist:
+
+        # Extract required operator for filter
+        oper = operator_dict[filter["op"]]
+
+        # Apply Schema-parameter / metadata filters to search
+        if filter["kind"] == "schemaParameter":
+            query_obj, filter_level = _query_filter_on_parameters(
+                query_obj, filter, obj, filter_level, hierarchy, oper
+            )
+
+        # Apply intrinsic object filters to search
+        if filter["kind"] == "typeAttribute":
+            query_obj, filter_level = _query_filter_on_intrinsics(
+                query_obj, filter, obj, filter_level, hierarchy, oper
+            )
+
+    return query_obj, filter_level
diff --git a/tardis/apps/search/utils/documents.py b/tardis/apps/search/utils/documents.py
new file mode 100644
index 000000000..cd9c6fc5e
--- /dev/null
+++ b/tardis/apps/search/utils/documents.py
@@ -0,0 +1,207 @@
+"""
+helper functions used in documents.py
+"""
+
+from django.conf import settings
+from django_elasticsearch_dsl import fields
+
+from tardis.tardis_portal.models import (
+    ParameterName,
+    ExperimentParameter,
+    DatasetParameter,
+    DatafileParameter,
+)
+
+from tardis.apps.projects.models import (
+    ProjectParameter,
+)
+
+
+def generic_acl_structure():
+    """
+    Return the ES structure of an ACL.
+
+    - pluginId = type of ACL owner: user/group/token
+    - entityId = ID of the owner
+    """
+    return fields.NestedField(
+        properties={
+            "pluginId": fields.KeywordField(),
+            "entityId": fields.KeywordField(),
+            "canDownload": fields.BooleanField(),
+            "canSensitive": fields.BooleanField(),
+        }
+    )
+
+
+def generic_parameter_structure():
+    """
+    Return the ES structure of object parameters and schema.
+    The parameter structure splits out string/numerical/datetime
+    parameters so that ES can specifically handle each of their
+    datatypes.
+
+    - Schemas:
+      - schema_id: Id of the object schemas
+    - string/numerical/datetime:
+      - pn_id: Id of parameter name
+      - pn_name: Name of parameter name
+      - value: value of parameter
+      - sensitive: whether parameter name is sensitive
+    """
+    return fields.NestedField(
+        properties={
+            "string": fields.NestedField(
+                properties={
+                    "pn_id": fields.KeywordField(),
+                    "pn_name": fields.KeywordField(),
+                    "value": fields.TextField(),
+                    "sensitive": fields.BooleanField(),
+                }
+            ),
+            "numerical": fields.NestedField(
+                properties={
+                    "pn_id": fields.KeywordField(),
+                    "pn_name": fields.KeywordField(),
+                    "value": fields.FloatField(),
+                    "sensitive": fields.BooleanField(),
+                }
+            ),
+            "datetime": fields.NestedField(
+                properties={
+                    "pn_id": fields.KeywordField(),
+                    "pn_name": fields.KeywordField(),
+                    "value": fields.DateField(),
+                    "sensitive": fields.BooleanField(),
+                }
+            ),
+            "schemas": fields.NestedField(
+                properties={"schema_id": fields.KeywordField()}
+            ),
+        },
+    )
+
+
+def prepare_generic_acls_build(INSTANCE_ACL_SET, return_list):
+    """Returns the ACLs associated with this
+    object, formatted for elasticsearch.
+    """
+    for acl in INSTANCE_ACL_SET:
+        acl_dict = {}
+        if acl["user__id"] is not None:
+            acl_dict["pluginId"] = "django_user"
+            acl_dict["entityId"] = acl["user__id"]
+        if acl["group__id"] is not None:
+            acl_dict["pluginId"] = "django_group"
+            acl_dict["entityId"] = acl["group__id"]
+        if acl["token__id"] is not None:
+            # token access shouldn't be added to search
+            # unless search is given a way of checking token expiry
+            continue
+        # add in permission booleans
+        acl_dict["canDownload"] = acl["canDownload"]
+        acl_dict["canSensitive"] = acl["canSensitive"]
+        if acl_dict not in return_list:
+            return_list.append(acl_dict)
+
+
+def prepare_generic_acls(type, INSTANCE_ACL_SET, INSTANCE_EXPS=None):
+    """Returns the ACLs associated with this
+    object, formatted for elasticsearch.
+
+    This function is mostly just a wrapper around "prepare_generic_acls_build"
+    to account for current macro/micro behaviour.
+    """
+    return_list = []
+    if settings.ONLY_EXPERIMENT_ACLS and type != "experiment":
+        for exp in INSTANCE_EXPS.all():
+            prepare_generic_acls_build(
+                exp.experimentacl_set.select_related("user", "group", "token")
+                .all()
+                .exclude(user__id=settings.PUBLIC_USER_ID)
+                .values(
+                    "user__id",
+                    "group__id",
+                    "token__id",
+                    "canDownload",
+                    "canSensitive",
+                ),
+                return_list,
+            )
+    else:
+        prepare_generic_acls_build(
+            INSTANCE_ACL_SET.select_related("user", "group", "token")
+            .all()
+            .exclude(user__id=settings.PUBLIC_USER_ID)
+            .values(
+                "user__id",
+                "group__id",
+                "token__id",
+                "canDownload",
+                "canSensitive",
+            ),
+            return_list,
+        )
+    return return_list
+
+
+def prepare_generic_parameters(instance, type):
+    """Returns the parameters associated with the provided instance,
+    formatted for elasticsearch."""
+
+    type_dict = {
+        "project": ProjectParameter,
+        "experiment": ExperimentParameter,
+        "dataset": DatasetParameter,
+        "datafile": DatafileParameter,
+    }
+    OBJPARAMETERS = type_dict[type]
+
+    # get list of object parametersets
+    paramsets = list(instance.getParameterSets())
+    parameter_groups = {
+        "string": [],
+        "numerical": [],
+        "datetime": [],
+        "schemas": [],
+    }
+    # iterate over parametersets of an object
+    for paramset in paramsets:
+        param_type = {1: "datetime", 2: "string", 3: "numerical"}
+        # query parameters from parameterset
+        param_glob = OBJPARAMETERS.objects.filter(parameterset=paramset).values_list(
+            "name",
+            "datetime_value",
+            "string_value",
+            "numerical_value",
+        )
+        # add schema information to dict
+        parameter_groups["schemas"].append({"schema_id": paramset.schema_id})
+        # iterate over parameter info "name/datetime/string/numerical"
+        for sublist in param_glob:
+            # query parametername info using "name"
+            PN = ParameterName.objects.get(id=sublist[0])
+            # build dict for param
+            param_dict = {}
+            type_idx = 0
+            # iterate over datetime/string/numerical info
+            for idx, value in enumerate(sublist[1:]):
+                # if datetime/string/numerical atually contains info
+                if value not in [None, ""]:
+                    # add parametername info to dict
+                    param_dict["pn_id"] = str(PN.id)
+                    param_dict["pn_name"] = str(PN.full_name)
+                    param_dict["sensitive"] = PN.sensitive
+                    type_idx = idx + 1
+                    # detect type of param, and add value to dict
+                    if type_idx == 1:
+                        param_dict["value"] = value
+                    elif type_idx == 2:
+                        param_dict["value"] = str(value)
+                    elif type_idx == 3:
+                        param_dict["value"] = float(value)
+            # if parameter with a value is added, add param_dict to
+            # parameters_dict
+            if type_idx:
+                parameter_groups[param_type[type_idx]].append(param_dict)
+    return parameter_groups