feat: add nutrition extractor

openfoodfacts · Oct 22, 2024 · 74dec15 · 74dec15
1 parent 7686439
commit 74dec15
Show file tree

Hide file tree

Showing 10 changed files with 1,093 additions and 29 deletions.
diff --git a/robotoff/cli/main.py b/robotoff/cli/main.py
@@ -93,12 +93,19 @@ def create_redis_update(
 
     get_logger()
     client = get_redis_client()
+    flavor_to_product_type = {
+        "off": "food",
+        "obf": "beauty",
+        "opff": "petfood",
+        "opf": "product",
+    }
     event = {
         "code": barcode,
         "flavor": flavor,
         "user_id": user_id,
         "action": action,
         "comment": comment,
+        "product_type": flavor_to_product_type[flavor],
     }
 
     diffs: JSONType

diff --git a/robotoff/insights/annotate.py b/robotoff/insights/annotate.py
@@ -653,24 +653,6 @@ def process_annotation(
         return UPDATED_ANNOTATION_RESULT
 
 
-class NutritionTableStructureAnnotator(InsightAnnotator):
-    @classmethod
-    def process_annotation(
-        cls,
-        insight: ProductInsight,
-        data: Optional[dict] = None,
-        auth: Optional[OFFAuthentication] = None,
-        is_vote: bool = False,
-    ) -> AnnotationResult:
-        insight.data["annotation"] = data
-        insight.save()
-        return SAVED_ANNOTATION_RESULT
-
-    @classmethod
-    def is_data_required(cls) -> bool:
-        return True
-
-
 ANNOTATOR_MAPPING: dict[str, Type] = {
     InsightType.packager_code.name: PackagerCodeAnnotator,
     InsightType.label.name: LabelAnnotator,
@@ -681,7 +663,6 @@ def is_data_required(cls) -> bool:
     InsightType.store.name: StoreAnnotator,
     InsightType.packaging.name: PackagingAnnotator,
     InsightType.nutrition_image.name: NutritionImageAnnotator,
-    InsightType.nutrition_table_structure.name: NutritionTableStructureAnnotator,
     InsightType.is_upc_image.name: UPCImageAnnotator,
 }
 

diff --git a/robotoff/insights/importer.py b/robotoff/insights/importer.py
@@ -1524,6 +1524,33 @@ def _keep_prediction(
         )
 
 
+class NutrientExtractionImporter(InsightImporter):
+    @staticmethod
+    def get_type() -> InsightType:
+        return InsightType.nutrient_extraction
+
+    @classmethod
+    def get_required_prediction_types(cls) -> set[PredictionType]:
+        return {PredictionType.nutrient_extraction}
+
+    @classmethod
+    def generate_candidates(
+        cls,
+        product: Optional[Product],
+        predictions: list[Prediction],
+        product_id: ProductIdentifier,
+    ) -> Iterator[ProductInsight]:
+        for prediction in predictions:
+            yield ProductInsight(**prediction.to_dict())
+
+    @classmethod
+    def is_conflicting_insight(
+        cls, candidate: ProductInsight, reference: ProductInsight
+    ) -> bool:
+        # Only one insight per product
+        return True
+
+
 class PackagingElementTaxonomyException(Exception):
     pass
 
@@ -1860,6 +1887,7 @@ def import_product_predictions(
     UPCImageImporter,
     NutritionImageImporter,
     IngredientSpellcheckImporter,
+    NutrientExtractionImporter,
 ]
 
 

diff --git a/robotoff/off.py b/robotoff/off.py
@@ -68,8 +68,16 @@ def get_username(self) -> Optional[str]:
         return None
 
 
-def get_source_from_url(ocr_url: str) -> str:
-    url_path = urlparse(ocr_url).path
+def get_source_from_url(url: str) -> str:
+    """Get the `source_image` field from an image or OCR URL.
+
+    It's the path of the image or OCR JSON file, but without the `/images/products`
+    prefix. It always ends with `.jpg`, whather it's an image or an OCR JSON file.
+
+    :param url: the URL of the image or OCR JSON file
+    :return: the source image path
+    """
+    url_path = urlparse(url).path
 
     if url_path.startswith("/images/products"):
         url_path = url_path[len("/images/products") :]