From 27874432e689af18b4f8b663c2733bc20d698e8e Mon Sep 17 00:00:00 2001
From: "a.krantz" <a.krantz@peak-solution.de>
Date: Mon, 28 Oct 2024 14:30:32 +0000
Subject: [PATCH] Make sure aggregates get unique column names

---
 src/odsbox/con_i.py                  | 12 +++-------
 src/odsbox/datamatrices_to_pandas.py | 12 ++++++++--
 src/odsbox/unit_utils.py             | 12 +++++++---
 tests/test_con_i.py                  |  5 ++++
 tests/test_datamatrices_to_pandas.py | 34 ++++++++++++++++++++++++++++
 5 files changed, 61 insertions(+), 14 deletions(-)

diff --git a/src/odsbox/con_i.py b/src/odsbox/con_i.py
index d3644be..7730b95 100644
--- a/src/odsbox/con_i.py
+++ b/src/odsbox/con_i.py
@@ -170,18 +170,14 @@ def logout(self):
     def query_data(
         self,
         query: str | dict | ods.SelectStatement,
-        enum_as_string: bool = False,
-        date_as_timestamp: bool = False,
+        **kwargs,
     ) -> DataFrame:
         """
         Query ods server for content and return the results as Pandas DataFrame
 
         :param str | dict | ods.SelectStatement query: Query given as JAQueL query (dict or str)
             or as an ASAM ODS SelectStatement.
-        :param bool enum_as_string: columns of type DT_ENUM are returned as int values.
-            If this is set to True the int values are mapped to the corresponding string values.
-        :param bool date_as_timestamp: columns of type DT_DATE or DS_DATE are returned as string.
-            If this is set to True the strings are converted to pandas Timestamp.
+        :param kwargs: additional arguments passed to `to_pandas`.
         :raises requests.HTTPError: If query fails.
         :return DataFrame: The DataMatrices as Pandas.DataFrame. The columns are named as `ENTITY_NAME.ATTRIBUTE_NAME`.
             `IsNull` values are not marked invalid.
@@ -189,9 +185,7 @@ def query_data(
         data_matrices = (
             self.data_read(query) if isinstance(query, ods.SelectStatement) else self.data_read_jaquel(query)
         )
-        return to_pandas(
-            data_matrices, model_cache=self.mc, enum_as_string=enum_as_string, date_as_timestamp=date_as_timestamp
-        )
+        return to_pandas(data_matrices, model_cache=self.mc, **kwargs)
 
     def model(self) -> ods.Model:
         """
diff --git a/src/odsbox/datamatrices_to_pandas.py b/src/odsbox/datamatrices_to_pandas.py
index f64ce70..cd932e1 100644
--- a/src/odsbox/datamatrices_to_pandas.py
+++ b/src/odsbox/datamatrices_to_pandas.py
@@ -167,6 +167,7 @@ def to_pandas(
     model_cache: ModelCache | None = None,
     enum_as_string: bool = False,
     date_as_timestamp: bool = False,
+    name_separator: str = ".",
 ) -> pd.DataFrame:
     """
     Converts data in an ASAM ODS DataMatrices into a pandas DataFrame.
@@ -178,9 +179,10 @@ def to_pandas(
                                 to the corresponding string values.
     :param bool date_as_timestamp: columns of type DT_DATE or DS_DATE are returned as string.
                                    If this is set to True the strings are converted to pandas Timestamp.
+    :param str name_separator: separator used to concatenate entity and attribute names to define column name.
 
     :return pd.DataFrame: A pandas DataFrame containing all the single matrices in a single frame. The
-                          columns are named by the schema `ENTITY_NAME.ATTRIBUTE_NAME`.
+                          columns are named by the schema `ENTITY_NAME.ATTRIBUTE_NAME[.AGGREGATE]`.
     """
     if 0 == len(data_matrices.matrices):
         return pd.DataFrame()
@@ -194,7 +196,13 @@ def to_pandas(
         entity = model_cache.entity(matrix.name) if model_cache is not None else None
         for column in matrix.columns:
             # The flags are ignored here. There might be NULL in here. Check `column.is_null` for this.
-            column_dict[matrix.name + "." + column.name] = __get_datamatrix_column_values_ex(
+            column_name = f"{
+                matrix.name}{
+                name_separator}{
+                column.name}{
+                '' if ods.AggregateEnum.AG_NONE == column.aggregate else
+                name_separator + ods.AggregateEnum.Name(column.aggregate)}"
+            column_dict[column_name] = __get_datamatrix_column_values_ex(
                 column, model_cache, enum_as_string, entity, date_as_timestamp
             )
 
diff --git a/src/odsbox/unit_utils.py b/src/odsbox/unit_utils.py
index 17e45fa..ed92919 100644
--- a/src/odsbox/unit_utils.py
+++ b/src/odsbox/unit_utils.py
@@ -21,6 +21,7 @@ def query_physical_dimensions(
     temperature: int = 0,
     molar_amount: int = 0,
     luminous_intensity: int = 0,
+    **kwargs,
 ) -> DataFrame:
     """
     Search for a physical dimension by its SI exponents.
@@ -57,7 +58,7 @@ def query_physical_dimensions(
     ci.condition.aid = physical_dimension_entity.aid
     ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
     ci.condition.long_array.values.append(luminous_intensity)
-    return to_pandas(con_i.data_read(select))
+    return to_pandas(con_i.data_read(select), **kwargs)
 
 
 def query_units(
@@ -69,6 +70,7 @@ def query_units(
     temperature: int = 0,
     molar_amount: int = 0,
     luminous_intensity: int = 0,
+    **kwargs,
 ) -> DataFrame:
     """
     Search for a unit by its SI exponents.
@@ -112,7 +114,10 @@ def query_units(
     ci.condition.aid = physical_dimension_entity.aid
     ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
     ci.condition.long_array.values.append(luminous_intensity)
-    return to_pandas(con_i.data_read(select))
+    return to_pandas(
+        con_i.data_read(select),
+        **kwargs,
+    )
 
 
 def query_quantity(
@@ -124,6 +129,7 @@ def query_quantity(
     temperature: int = 0,
     molar_amount: int = 0,
     luminous_intensity: int = 0,
+    **kwargs,
 ) -> DataFrame:
     """
     Search for a quantity by its SI exponents.
@@ -174,4 +180,4 @@ def query_quantity(
     ci.condition.aid = physical_dimension_entity.aid
     ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
     ci.condition.long_array.values.append(luminous_intensity)
-    return to_pandas(con_i.data_read(select))
+    return to_pandas(con_i.data_read(select), **kwargs)
diff --git a/tests/test_con_i.py b/tests/test_con_i.py
index 2d2a58c..dd0bf4d 100644
--- a/tests/test_con_i.py
+++ b/tests/test_con_i.py
@@ -28,6 +28,11 @@ def test_con_i():
             {"AoMeasurement": {}, "$options": {"$rowlimit": 50}}, date_as_timestamp=True, enum_as_string=True
         )
 
+        r = con_i.query_data(
+            {"AoUnit": {}, "$attributes": {"name": 1}, "$options": {"$rowlimit": 1}}, name_separator="::"
+        )
+        assert f"{entity.name}::" in r.columns[0]
+
 
 def test_query_data():
     with __create_con_i() as con_i:
diff --git a/tests/test_datamatrices_to_pandas.py b/tests/test_datamatrices_to_pandas.py
index f689a18..cf60507 100644
--- a/tests/test_datamatrices_to_pandas.py
+++ b/tests/test_datamatrices_to_pandas.py
@@ -343,3 +343,37 @@ def test_unknown_arrays_empty():
     assert pdf.to_dict() != {}
 
     assert unknown_array_values(dm.columns[0].unknown_arrays.values[0]) == []
+
+
+def test_aggregates():
+    dms = ods.DataMatrices()
+    dm = dms.matrices.add(aid=4711, name="Aggregates")
+    dm.columns.add(
+        name="Name", base_name="name", aggregate=ods.AggregateEnum.AG_NONE, data_type=ods.DT_STRING
+    ).string_array.values[:] = ["my_name"]
+    dm.columns.add(
+        name="Maximum", base_name="maximum", aggregate=ods.AggregateEnum.AG_MAX, data_type=ods.DT_DOUBLE
+    ).double_array.values[:] = [1.2]
+    dm.columns.add(
+        name="Maximum", base_name="maximum", aggregate=ods.AggregateEnum.AG_MIN, data_type=ods.DT_DOUBLE
+    ).double_array.values[:] = [1.1]
+
+    pdf = to_pandas(dms)
+    logging.getLogger().info(pdf)
+    assert pdf.shape == (1, 3)
+    assert pdf.to_dict() == {
+        "Aggregates.Name": {0: "my_name"},
+        "Aggregates.Maximum.AG_MAX": {0: 1.2},
+        "Aggregates.Maximum.AG_MIN": {0: 1.1},
+    }
+    assert len(pdf.to_json()) > 0
+
+    pdf = to_pandas(dms, name_separator="::")
+    logging.getLogger().info(pdf)
+    assert pdf.shape == (1, 3)
+    assert pdf.to_dict() == {
+        "Aggregates::Name": {0: "my_name"},
+        "Aggregates::Maximum::AG_MAX": {0: 1.2},
+        "Aggregates::Maximum::AG_MIN": {0: 1.1},
+    }
+    assert len(pdf.to_json()) > 0