From ecb104e0ec607465b22edeb062a563f5beca6928 Mon Sep 17 00:00:00 2001 From: Caroline Kery Date: Tue, 1 Oct 2024 12:31:34 -0400 Subject: [PATCH] add more logic to handle uploading prelabeled data --- backend/django/core/utils/util.py | 22 ++++++++++++++++++++-- backend/django/core/utils/utils_form.py | 13 ++++++++++--- frontend/src/actions/skew.js | 2 +- frontend/src/components/Skew/index.jsx | 2 +- 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/backend/django/core/utils/util.py b/backend/django/core/utils/util.py index de370524..fe9c4af1 100644 --- a/backend/django/core/utils/util.py +++ b/backend/django/core/utils/util.py @@ -202,6 +202,16 @@ def create_labels_from_csv(df, project): stream = StringIO() labels = {label.name: label.pk for label in project.labels.all()} + + df["Label"] = df["Label"].apply( + lambda s: s.replace("\\n", "\n").replace("\\t", "\t").replace("\\r", "\r") + ) + + existing_labels = set(labels.keys()) + df_labels = set(df["Label"].tolist()) + + quote_labels = df_labels - existing_labels + df["Label"] = df["Label"].apply(lambda s: f'"{s}"' if s in quote_labels else s) df["data_id"] = df["hash"].apply( lambda x: Data.objects.get(hash=x, project=project).pk ) @@ -841,10 +851,18 @@ def create_label_metadata(project, label_data): df_label_ids = set(label_data["Label"].tolist()) need_quotes = df_label_ids - existing_label_ids - label_data["Label"] = label_data["Label"].apply(lambda s: f'"{s}"'.replace('\\n', '\n').replace('\\t', '\t').replace('\\r', '\r') if s in need_quotes else s) + label_data["Label"] = label_data["Label"].apply( + lambda s: ( + f'"{s}"'.replace("\\n", "\n").replace("\\t", "\t").replace("\\r", "\r") + if s in need_quotes + else s + ) + ) df_label_ids = set(label_data["Label"].tolist()) if len(df_label_ids - existing_label_ids) > 0: - raise ValidationError("ERROR loading in label metadata. Something is going wrong with the label file.") + raise ValidationError( + "ERROR loading in label metadata. Something is going wrong with the label file." + ) label_data = label_data.merge(label_objects, on="Label", how="inner") diff --git a/backend/django/core/utils/utils_form.py b/backend/django/core/utils/utils_form.py index f1971eef..cf17c149 100644 --- a/backend/django/core/utils/utils_form.py +++ b/backend/django/core/utils/utils_form.py @@ -79,9 +79,16 @@ def clean_data_helper( and len(set(labels_in_data) - set(supplied_labels)) > 0 ): just_in_data = set(labels_in_data) - set(supplied_labels) - raise ValidationError( - f"There are extra labels in the file which were not created in step 2: {just_in_data}" - ) + # add a correction for label descriptions with weird characters + labels_in_data_fixed = [ + f'"{s}"'.replace("\\n", "\n").replace("\\t", "\t").replace("\\r", "\r") + for s in just_in_data + ] + list(set(labels_in_data) - just_in_data) + + if len(set(labels_in_data_fixed) - set(supplied_labels)) > 0: + raise ValidationError( + f"There are extra labels in the file which were not in step 2 of project creation: {just_in_data}" + ) if "ID" in data.columns: # there should be no null values diff --git a/frontend/src/actions/skew.js b/frontend/src/actions/skew.js index 957ae7c2..9083c89e 100644 --- a/frontend/src/actions/skew.js +++ b/frontend/src/actions/skew.js @@ -90,7 +90,7 @@ export const skewLabel = (dataID, labelID, projectID) => { dispatch(setMessage(response.error)); } else { dispatch(getUnlabeled(projectID)); - dispatch(getLabelCounts(projectID)); + //dispatch(getLabelCounts(projectID)); } }); }; diff --git a/frontend/src/components/Skew/index.jsx b/frontend/src/components/Skew/index.jsx index 8ccf6627..296874d2 100644 --- a/frontend/src/components/Skew/index.jsx +++ b/frontend/src/components/Skew/index.jsx @@ -38,7 +38,7 @@ class Skew extends React.Component { componentDidMount() { this.props.setFilterStr(""); this.props.getUnlabeled(); - this.props.getLabelCounts(); + //this.props.getLabelCounts(); } getText(row) {