From 824e4f57a370b5009d16866351598cf17c357c26 Mon Sep 17 00:00:00 2001 From: Caroline Kery Date: Mon, 22 Apr 2024 16:44:42 -0400 Subject: [PATCH 1/5] fix the get_unlabeled data function and make sure the data gets removed from the data queue --- backend/django/core/utils/utils_annotate.py | 14 +++++++++++--- backend/django/core/views/api_annotate.py | 7 +++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/backend/django/core/utils/utils_annotate.py b/backend/django/core/utils/utils_annotate.py index 687230d7..8cbe6ab3 100644 --- a/backend/django/core/utils/utils_annotate.py +++ b/backend/django/core/utils/utils_annotate.py @@ -267,15 +267,23 @@ def process_irr_label(data, label): def get_unlabeled_data(project_pk): project = Project.objects.get(pk=project_pk) - stuff_in_queue = DataQueue.objects.filter(queue__project=project) - queued_ids = [queued.data.id for queued in stuff_in_queue] + stuff_in_queue = DataQueue.objects.filter( + queue__project=project, queue__type="admin" + ) + in_admin_queue_ids = [queued.data.id for queued in stuff_in_queue] recycle_ids = RecycleBin.objects.filter(data__project=project).values_list( "data__pk", flat=True ) + + assigned_ids = AssignedData.objects.filter(data__project=project).values_list( + "data__pk", flat=True + ) + unlabeled_data = ( project.data_set.filter(datalabel__isnull=True) - .exclude(id__in=queued_ids) + .exclude(id__in=in_admin_queue_ids) + .exclude(id__in=assigned_ids) .exclude(id__in=recycle_ids) .exclude(irr_ind=True) ) diff --git a/backend/django/core/views/api_annotate.py b/backend/django/core/views/api_annotate.py index dc3593a9..8a03ed04 100644 --- a/backend/django/core/views/api_annotate.py +++ b/backend/django/core/views/api_annotate.py @@ -460,6 +460,7 @@ def modify_label(request, data_pk): profile = request.user.profile response = {} project = data.project + normal_queue = Queue.objects.get(project=project, type="normal") label = Label.objects.get(pk=request.data["labelID"]) @@ -478,6 +479,9 @@ def modify_label(request, data_pk): training_set=current_training_set, pre_loaded=False, ) + if DataQueue.objects.filter(data=data, queue=normal_queue).exists(): + DataQueue.objects.get(data=data, queue=normal_queue).delete() + elif "oldLabelID" in request.data: old_label = Label.objects.get(pk=request.data["oldLabelID"]) with transaction.atomic(): @@ -842,6 +846,7 @@ def label_skew_label(request, data_pk): label = Label.objects.get(pk=request.data["labelID"]) profile = request.user.profile update_last_action(project, profile) + normal_queue = Queue.objects.get(project=project, type="normal") response = {} # check if they have the admin lock still. @@ -866,6 +871,8 @@ def label_skew_label(request, data_pk): time_to_label=None, timestamp=timezone.now(), ) + if DataQueue.objects.filter(data=datum, queue=normal_queue).exists(): + DataQueue.objects.get(data=datum, queue=normal_queue).delete() VerifiedDataLabel.objects.create( data_label=dl, verified_timestamp=timezone.now(), verified_by=profile ) From 98aca06cea0735dd3178219a74d10acb060bad49 Mon Sep 17 00:00:00 2001 From: Caroline Kery Date: Mon, 22 Apr 2024 16:51:58 -0400 Subject: [PATCH 2/5] handle REDIS --- backend/django/core/views/api_annotate.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/backend/django/core/views/api_annotate.py b/backend/django/core/views/api_annotate.py index 8a03ed04..e15671cf 100644 --- a/backend/django/core/views/api_annotate.py +++ b/backend/django/core/views/api_annotate.py @@ -469,6 +469,9 @@ def modify_label(request, data_pk): and not DataLabel.objects.filter(data=data).exists() ): current_training_set = project.get_current_training_set() + data_in_normal_queue = DataQueue.objects.filter( + data=data, queue=normal_queue + ).exists() with transaction.atomic(): DataLabel.objects.create( data=data, @@ -479,9 +482,14 @@ def modify_label(request, data_pk): training_set=current_training_set, pre_loaded=False, ) - if DataQueue.objects.filter(data=data, queue=normal_queue).exists(): + if data_in_normal_queue: DataQueue.objects.get(data=data, queue=normal_queue).delete() + if data_in_normal_queue: + settings.REDIS.srem( + redis_serialize_set(normal_queue), redis_serialize_data(data) + ) + elif "oldLabelID" in request.data: old_label = Label.objects.get(pk=request.data["oldLabelID"]) with transaction.atomic(): @@ -862,6 +870,9 @@ def label_skew_label(request, data_pk): current_training_set = project.get_current_training_set() if project_extras.proj_permission_level(datum.project, profile) >= 2: + data_in_normal_queue = DataQueue.objects.filter( + data=datum, queue=normal_queue + ).exists() with transaction.atomic(): dl = DataLabel.objects.create( data=datum, @@ -871,11 +882,15 @@ def label_skew_label(request, data_pk): time_to_label=None, timestamp=timezone.now(), ) - if DataQueue.objects.filter(data=datum, queue=normal_queue).exists(): + if data_in_normal_queue: DataQueue.objects.get(data=datum, queue=normal_queue).delete() VerifiedDataLabel.objects.create( data_label=dl, verified_timestamp=timezone.now(), verified_by=profile ) + if data_in_normal_queue: + settings.REDIS.srem( + redis_serialize_set(normal_queue), redis_serialize_data(datum) + ) else: response["error"] = "Invalid permission. Must be an admin." From a40a8a157b058529858f74d0ae44516aa02a74d5 Mon Sep 17 00:00:00 2001 From: Caroline Kery Date: Mon, 29 Apr 2024 10:26:49 -0400 Subject: [PATCH 3/5] add note to unlabeled data button --- frontend/src/components/History/HistoryTable.jsx | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/components/History/HistoryTable.jsx b/frontend/src/components/History/HistoryTable.jsx index cb368d42..9803bd2b 100644 --- a/frontend/src/components/History/HistoryTable.jsx +++ b/frontend/src/components/History/HistoryTable.jsx @@ -229,6 +229,7 @@ const HistoryTable = () => {

Toggle the checkbox below to show/hide unlabeled data:

+ NOTE: Data assigned to someone in the Annotate Data tab will not be returned. Admin can go to the Unassign Coder tab on the Admin page to un-assign data from individual coders. Date: Fri, 3 May 2024 08:40:11 -0400 Subject: [PATCH 4/5] Fix count of unlabelled + unassigned on details page --- backend/django/core/templates/projects/detail.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/django/core/templates/projects/detail.html b/backend/django/core/templates/projects/detail.html index fa134b86..7d217996 100644 --- a/backend/django/core/templates/projects/detail.html +++ b/backend/django/core/templates/projects/detail.html @@ -389,7 +389,7 @@
$('#fully-labeled').append(response.final); $('#fully-labeled-verified').append(response.final_verified); $('#fully-labeled-unverified').append(response.final_unverified); - $('#unlabeled-unassigned').append(response.unlabeled); + $('#unlabeled-unassigned').append(response.unlabeled - response.assigned); $('#awaiting-adjudication').append(response.adjudication); $('#recycled').append(response.recycled); $('#other-labeled').append((response.total - response.final - response.adjudication - response.recycled) - response.unlabeled); From 41115beb0180c34672549f265ee63d7f18aedf2d Mon Sep 17 00:00:00 2001 From: andykawabata Date: Mon, 6 May 2024 10:28:13 -0400 Subject: [PATCH 5/5] replaces js fix with SQL query fix --- .../django/core/templates/projects/detail.html | 2 +- backend/django/core/utils/util.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/backend/django/core/templates/projects/detail.html b/backend/django/core/templates/projects/detail.html index 7d217996..fa134b86 100644 --- a/backend/django/core/templates/projects/detail.html +++ b/backend/django/core/templates/projects/detail.html @@ -389,7 +389,7 @@
$('#fully-labeled').append(response.final); $('#fully-labeled-verified').append(response.final_verified); $('#fully-labeled-unverified').append(response.final_unverified); - $('#unlabeled-unassigned').append(response.unlabeled - response.assigned); + $('#unlabeled-unassigned').append(response.unlabeled); $('#awaiting-adjudication').append(response.adjudication); $('#recycled').append(response.recycled); $('#other-labeled').append((response.total - response.final - response.adjudication - response.recycled) - response.unlabeled); diff --git a/backend/django/core/utils/util.py b/backend/django/core/utils/util.py index 9ef87356..e18a665a 100644 --- a/backend/django/core/utils/util.py +++ b/backend/django/core/utils/util.py @@ -773,25 +773,25 @@ def get_unlabelled_data_objs(project_id: int) -> int: WHERE cd.project_id = %s AND cdl.label_id IS NULL ), queue_ids AS ( - SELECT cdq.id + SELECT cdq.id, cdq.data_id FROM core_dataqueue cdq LEFT JOIN core_queue cq ON cdq.queue_id = cq.id WHERE cq.project_id = %s AND cq.type = 'admin' ), irr_log_ids AS ( - SELECT ci.id + SELECT ci.id, ci.data_id FROM core_irrlog ci LEFT JOIN core_data cd ON ci.data_id = cd.id WHERE cd.project_id = %s ), assigned_ids AS ( - SELECT ca.id + SELECT ca.id, ca.data_id FROM core_assigneddata ca LEFT JOIN core_data cd ON ca.data_id = cd.id WHERE cd.project_id = %s ), recycle_ids AS ( - SELECT cr.id + SELECT cr.id, cr.data_id FROM core_recyclebin cr LEFT JOIN core_data cd ON cr.data_id = cd.id WHERE cd.project_id = %s @@ -800,10 +800,10 @@ def get_unlabelled_data_objs(project_id: int) -> int: FROM ( SELECT p.id FROM project_ids p - LEFT JOIN queue_ids q ON p.id = q.id - LEFT JOIN irr_log_ids irr ON p.id = irr.id - LEFT JOIN assigned_ids a ON p.id = a.id - LEFT JOIN recycle_ids r ON p.id = r.id + LEFT JOIN queue_ids q ON p.id = q.data_id + LEFT JOIN irr_log_ids irr ON p.id = irr.data_id + LEFT JOIN assigned_ids a ON p.id = a.data_id + LEFT JOIN recycle_ids r ON p.id = r.data_id WHERE q.id IS NULL AND irr.id IS NULL AND a.id IS NULL