feat: suggestions for jaquel (#72)

* Put suggestions into error messages when parsing jaquel. * Make sure aggregates get unique column names * Add Google site verification meta tag --------- Co-authored-by: a.krantz <a.krantz@peak-solution.de>
peak-solution · Oct 28, 2024 · 8f1b928 · 8f1b928
1 parent 9edc710
commit 8f1b928
Show file tree

Hide file tree

Showing 9 changed files with 234 additions and 25 deletions.
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
@@ -0,0 +1,7 @@
+<!-- _templates/layout.html -->
+{% extends "!layout.html" %}
+
+{% block extrahead %}
+<meta name="google-site-verification" content="{{ google_site_verification }}" />
+{{ super() }}
+{% endblock %}
diff --git a/docs/conf.py b/docs/conf.py
@@ -57,6 +57,10 @@
 #
 html_theme = "sphinx_rtd_theme"
 
+html_context = {"google_site_verification": "M-YV4bEhpyyWVOBQB9VLsSCjKfqO_UpvTBMJ7DS5t_U"}
+
+templates_path = ["_templates"]
+
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".

diff --git a/src/odsbox/con_i.py b/src/odsbox/con_i.py
@@ -170,28 +170,22 @@ def logout(self):
     def query_data(
         self,
         query: str | dict | ods.SelectStatement,
-        enum_as_string: bool = False,
-        date_as_timestamp: bool = False,
+        **kwargs,
     ) -> DataFrame:
         """
         Query ods server for content and return the results as Pandas DataFrame
 
         :param str | dict | ods.SelectStatement query: Query given as JAQueL query (dict or str)
             or as an ASAM ODS SelectStatement.
-        :param bool enum_as_string: columns of type DT_ENUM are returned as int values.
-            If this is set to True the int values are mapped to the corresponding string values.
-        :param bool date_as_timestamp: columns of type DT_DATE or DS_DATE are returned as string.
-            If this is set to True the strings are converted to pandas Timestamp.
+        :param kwargs: additional arguments passed to `to_pandas`.
         :raises requests.HTTPError: If query fails.
         :return DataFrame: The DataMatrices as Pandas.DataFrame. The columns are named as `ENTITY_NAME.ATTRIBUTE_NAME`.
             `IsNull` values are not marked invalid.
         """
         data_matrices = (
             self.data_read(query) if isinstance(query, ods.SelectStatement) else self.data_read_jaquel(query)
         )
-        return to_pandas(
-            data_matrices, model_cache=self.mc, enum_as_string=enum_as_string, date_as_timestamp=date_as_timestamp
-        )
+        return to_pandas(data_matrices, model_cache=self.mc, **kwargs)
 
     def model(self) -> ods.Model:
         """

diff --git a/src/odsbox/datamatrices_to_pandas.py b/src/odsbox/datamatrices_to_pandas.py
@@ -167,6 +167,7 @@ def to_pandas(
     model_cache: ModelCache | None = None,
     enum_as_string: bool = False,
     date_as_timestamp: bool = False,
+    name_separator: str = ".",
 ) -> pd.DataFrame:
     """
     Converts data in an ASAM ODS DataMatrices into a pandas DataFrame.
@@ -178,9 +179,10 @@ def to_pandas(
                                 to the corresponding string values.
     :param bool date_as_timestamp: columns of type DT_DATE or DS_DATE are returned as string.
                                    If this is set to True the strings are converted to pandas Timestamp.
+    :param str name_separator: separator used to concatenate entity and attribute names to define column name.
 
     :return pd.DataFrame: A pandas DataFrame containing all the single matrices in a single frame. The
-                          columns are named by the schema `ENTITY_NAME.ATTRIBUTE_NAME`.
+                          columns are named by the schema `ENTITY_NAME.ATTRIBUTE_NAME[.AGGREGATE]`.
     """
     if 0 == len(data_matrices.matrices):
         return pd.DataFrame()
@@ -193,8 +195,14 @@ def to_pandas(
     for matrix in data_matrices.matrices:
         entity = model_cache.entity(matrix.name) if model_cache is not None else None
         for column in matrix.columns:
+            aggregate_postfix = (
+                ""
+                if ods.AggregateEnum.AG_NONE == column.aggregate
+                else name_separator + ods.AggregateEnum.Name(column.aggregate)
+            )
+            column_name = f"{matrix.name}{name_separator}{column.name}{aggregate_postfix}"
             # The flags are ignored here. There might be NULL in here. Check `column.is_null` for this.
-            column_dict[matrix.name + "." + column.name] = __get_datamatrix_column_values_ex(
+            column_dict[column_name] = __get_datamatrix_column_values_ex(
                 column, model_cache, enum_as_string, entity, date_as_timestamp
             )
 

diff --git a/src/odsbox/jaquel.py b/src/odsbox/jaquel.py
@@ -6,6 +6,7 @@
 import json
 import re
 from typing import Tuple, List, Any
+from difflib import get_close_matches
 
 from google.protobuf.internal import containers as _containers
 
@@ -127,7 +128,60 @@ def __model_get_entity_ex(model: ods.Model, entity_name_or_aid: str | int) -> od
         if entity.name.lower() == entity_name_or_aid.lower() or entity.base_name.lower() == entity_name_or_aid.lower():
             return entity
 
-    raise SyntaxError(f"Entity '{entity_name_or_aid}' is unknown in model.")
+    raise SyntaxError(
+        f"Entity '{entity_name_or_aid}' is unknown in model.{__model_get_suggestion_entity(model, entity_name_or_aid)}"
+    )
+
+
+def __model_get_suggestion(lower_case_dict: dict, str_val: str) -> str:
+    suggestions = get_close_matches(
+        str_val.lower(),
+        lower_case_dict,
+        n=1,
+        cutoff=0.3,
+    )
+    if len(suggestions) > 0:
+        return_value = lower_case_dict[suggestions[0]]
+        return f" Did you mean '{return_value}'?"
+    return ""
+
+
+def __model_get_enum_suggestion(enumeration: ods.Model.Enumeration, str_val: str) -> str:
+    available = {key.lower(): key for key in enumeration.items}
+    return __model_get_suggestion(available, str_val)
+
+
+def __model_get_suggestion_attribute(entity: ods.Model.Entity, attribute_or_relation_name: str) -> str:
+    available = {}
+    available.update({relation.base_name.lower(): relation.base_name for key, relation in entity.relations.items()})
+    available.update({attribute.base_name.lower(): attribute.base_name for key, attribute in entity.attributes.items()})
+    available.update({relation.name.lower(): relation.name for key, relation in entity.relations.items()})
+    available.update({attribute.name.lower(): attribute.name for key, attribute in entity.attributes.items()})
+    return __model_get_suggestion(available, attribute_or_relation_name)
+
+
+def __model_get_suggestion_relation(entity: ods.Model.Entity, relation_name: str) -> str:
+    available = {}
+    available.update({relation.base_name.lower(): relation.base_name for key, relation in entity.relations.items()})
+    available.update({relation.name.lower(): relation.name for key, relation in entity.relations.items()})
+    return __model_get_suggestion(available, relation_name)
+
+
+def __model_get_suggestion_entity(model: ods.Model, entity_name: str) -> str:
+    available = {}
+    available.update({entity.base_name.lower(): entity.base_name for key, entity in model.entities.items()})
+    available.update({entity.name.lower(): entity.name for key, entity in model.entities.items()})
+    return __model_get_suggestion(available, entity_name)
+
+
+def __model_get_suggestion_aggregate(aggregate_name: str) -> str:
+    available = {key.lower(): key for key in _jo_aggregates}
+    return __model_get_suggestion(available, aggregate_name)
+
+
+def __model_get_suggestion_operators(operator_name: str) -> str:
+    available = {key.lower(): key for key in _jo_operators}
+    return __model_get_suggestion(available, operator_name)
 
 
 def __model_get_enum_index(model: ods.Model, entity: ods.Model.Entity, attribute_name: str, str_val: str) -> int:
@@ -137,7 +191,7 @@ def __model_get_enum_index(model: ods.Model, entity: ods.Model.Entity, attribute
         if key.lower() == str_val.lower():
             return enum.items[key]
 
-    raise SyntaxError('Enum entry for "' + str_val + '" does not exist')
+    raise SyntaxError(f"Enum entry for '{str_val}' does not exist.{__model_get_enum_suggestion(enum, str_val)}")
 
 
 def _jo_enum_get_numeric_value(
@@ -191,7 +245,8 @@ def __parse_path_and_add_joins(
             # Must be a relation
             relation = __model_get_relation(model, attribute_entity, path_part)
             if relation is None:
-                raise SyntaxError(f"'{path_part}' is no relation of entity '{attribute_entity.name}'")
+                suggestion_text = __model_get_suggestion_relation(attribute_entity, path_part)
+                raise SyntaxError(f"'{path_part}' is no relation of entity '{attribute_entity.name}'.{suggestion_text}")
             attribute_name = relation.name
 
             # add join
@@ -216,8 +271,9 @@ def __parse_path_and_add_joins(
                 else:
                     relation = __model_get_relation(model, attribute_entity, path_part)
                     if relation is None:
+                        suggestion_text = __model_get_suggestion_attribute(attribute_entity, path_part)
                         raise SyntaxError(
-                            f"'{path_part}' is neither attribute nor relation of entity '{attribute_entity.name}'"
+                            f"'{path_part}' is neither attribute nor relation of entity '{attribute_entity.name}'.{suggestion_text}"  # noqa: E501
                         )
                     attribute_name = relation.name
                     attribute_type = ods.DataTypeEnum.DT_LONGLONG  # its an id
@@ -281,7 +337,7 @@ def __parse_attributes(
             elif "$options" == element:
                 raise SyntaxError("Actually no $options defined for attributes")
             else:
-                raise SyntaxError('Unknown aggregate "' + element + '"')
+                raise SyntaxError(f"Unknown aggregate '{element}'.{__model_get_suggestion_aggregate(element)}")
         else:
             if element_attribute["path"]:
                 element_attribute["path"] += "."
@@ -612,7 +668,7 @@ def __parse_conditions(
             elif "$options" == elem:
                 continue
             else:
-                raise SyntaxError('Unknown operator "' + elem + '"')
+                raise SyntaxError(f"Unknown operator '{elem}'.{__model_get_suggestion_operators(elem)}")
         else:
             if elem_attribute["path"]:
                 elem_attribute["path"] += "."

diff --git a/src/odsbox/unit_utils.py b/src/odsbox/unit_utils.py
@@ -21,6 +21,7 @@ def query_physical_dimensions(
     temperature: int = 0,
     molar_amount: int = 0,
     luminous_intensity: int = 0,
+    **kwargs,
 ) -> DataFrame:
     """
     Search for a physical dimension by its SI exponents.
@@ -57,7 +58,7 @@ def query_physical_dimensions(
     ci.condition.aid = physical_dimension_entity.aid
     ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
     ci.condition.long_array.values.append(luminous_intensity)
-    return to_pandas(con_i.data_read(select))
+    return to_pandas(con_i.data_read(select), **kwargs)
 
 
 def query_units(
@@ -69,6 +70,7 @@ def query_units(
     temperature: int = 0,
     molar_amount: int = 0,
     luminous_intensity: int = 0,
+    **kwargs,
 ) -> DataFrame:
     """
     Search for a unit by its SI exponents.
@@ -112,7 +114,10 @@ def query_units(
     ci.condition.aid = physical_dimension_entity.aid
     ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
     ci.condition.long_array.values.append(luminous_intensity)
-    return to_pandas(con_i.data_read(select))
+    return to_pandas(
+        con_i.data_read(select),
+        **kwargs,
+    )
 
 
 def query_quantity(
@@ -124,6 +129,7 @@ def query_quantity(
     temperature: int = 0,
     molar_amount: int = 0,
     luminous_intensity: int = 0,
+    **kwargs,
 ) -> DataFrame:
     """
     Search for a quantity by its SI exponents.
@@ -174,4 +180,4 @@ def query_quantity(
     ci.condition.aid = physical_dimension_entity.aid
     ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
     ci.condition.long_array.values.append(luminous_intensity)
-    return to_pandas(con_i.data_read(select))
+    return to_pandas(con_i.data_read(select), **kwargs)
diff --git a/tests/test_con_i.py b/tests/test_con_i.py
@@ -28,6 +28,11 @@ def test_con_i():
             {"AoMeasurement": {}, "$options": {"$rowlimit": 50}}, date_as_timestamp=True, enum_as_string=True
         )
 
+        r = con_i.query_data(
+            {"AoUnit": {}, "$attributes": {"name": 1}, "$options": {"$rowlimit": 1}}, name_separator="::"
+        )
+        assert f"{entity.name}::" in r.columns[0]
+
 
 def test_query_data():
     with __create_con_i() as con_i:

diff --git a/tests/test_datamatrices_to_pandas.py b/tests/test_datamatrices_to_pandas.py
@@ -343,3 +343,37 @@ def test_unknown_arrays_empty():
     assert pdf.to_dict() != {}
 
     assert unknown_array_values(dm.columns[0].unknown_arrays.values[0]) == []
+
+
+def test_aggregates():
+    dms = ods.DataMatrices()
+    dm = dms.matrices.add(aid=4711, name="Aggregates")
+    dm.columns.add(
+        name="Name", base_name="name", aggregate=ods.AggregateEnum.AG_NONE, data_type=ods.DT_STRING
+    ).string_array.values[:] = ["my_name"]
+    dm.columns.add(
+        name="Maximum", base_name="maximum", aggregate=ods.AggregateEnum.AG_MAX, data_type=ods.DT_DOUBLE
+    ).double_array.values[:] = [1.2]
+    dm.columns.add(
+        name="Maximum", base_name="maximum", aggregate=ods.AggregateEnum.AG_MIN, data_type=ods.DT_DOUBLE
+    ).double_array.values[:] = [1.1]
+
+    pdf = to_pandas(dms)
+    logging.getLogger().info(pdf)
+    assert pdf.shape == (1, 3)
+    assert pdf.to_dict() == {
+        "Aggregates.Name": {0: "my_name"},
+        "Aggregates.Maximum.AG_MAX": {0: 1.2},
+        "Aggregates.Maximum.AG_MIN": {0: 1.1},
+    }
+    assert len(pdf.to_json()) > 0
+
+    pdf = to_pandas(dms, name_separator="::")
+    logging.getLogger().info(pdf)
+    assert pdf.shape == (1, 3)
+    assert pdf.to_dict() == {
+        "Aggregates::Name": {0: "my_name"},
+        "Aggregates::Maximum::AG_MAX": {0: 1.2},
+        "Aggregates::Maximum::AG_MIN": {0: 1.1},
+    }
+    assert len(pdf.to_json()) > 0