From 27874432e689af18b4f8b663c2733bc20d698e8e Mon Sep 17 00:00:00 2001 From: "a.krantz" Date: Mon, 28 Oct 2024 14:30:32 +0000 Subject: [PATCH] Make sure aggregates get unique column names --- src/odsbox/con_i.py | 12 +++------- src/odsbox/datamatrices_to_pandas.py | 12 ++++++++-- src/odsbox/unit_utils.py | 12 +++++++--- tests/test_con_i.py | 5 ++++ tests/test_datamatrices_to_pandas.py | 34 ++++++++++++++++++++++++++++ 5 files changed, 61 insertions(+), 14 deletions(-) diff --git a/src/odsbox/con_i.py b/src/odsbox/con_i.py index d3644be..7730b95 100644 --- a/src/odsbox/con_i.py +++ b/src/odsbox/con_i.py @@ -170,18 +170,14 @@ def logout(self): def query_data( self, query: str | dict | ods.SelectStatement, - enum_as_string: bool = False, - date_as_timestamp: bool = False, + **kwargs, ) -> DataFrame: """ Query ods server for content and return the results as Pandas DataFrame :param str | dict | ods.SelectStatement query: Query given as JAQueL query (dict or str) or as an ASAM ODS SelectStatement. - :param bool enum_as_string: columns of type DT_ENUM are returned as int values. - If this is set to True the int values are mapped to the corresponding string values. - :param bool date_as_timestamp: columns of type DT_DATE or DS_DATE are returned as string. - If this is set to True the strings are converted to pandas Timestamp. + :param kwargs: additional arguments passed to `to_pandas`. :raises requests.HTTPError: If query fails. :return DataFrame: The DataMatrices as Pandas.DataFrame. The columns are named as `ENTITY_NAME.ATTRIBUTE_NAME`. `IsNull` values are not marked invalid. @@ -189,9 +185,7 @@ def query_data( data_matrices = ( self.data_read(query) if isinstance(query, ods.SelectStatement) else self.data_read_jaquel(query) ) - return to_pandas( - data_matrices, model_cache=self.mc, enum_as_string=enum_as_string, date_as_timestamp=date_as_timestamp - ) + return to_pandas(data_matrices, model_cache=self.mc, **kwargs) def model(self) -> ods.Model: """ diff --git a/src/odsbox/datamatrices_to_pandas.py b/src/odsbox/datamatrices_to_pandas.py index f64ce70..cd932e1 100644 --- a/src/odsbox/datamatrices_to_pandas.py +++ b/src/odsbox/datamatrices_to_pandas.py @@ -167,6 +167,7 @@ def to_pandas( model_cache: ModelCache | None = None, enum_as_string: bool = False, date_as_timestamp: bool = False, + name_separator: str = ".", ) -> pd.DataFrame: """ Converts data in an ASAM ODS DataMatrices into a pandas DataFrame. @@ -178,9 +179,10 @@ def to_pandas( to the corresponding string values. :param bool date_as_timestamp: columns of type DT_DATE or DS_DATE are returned as string. If this is set to True the strings are converted to pandas Timestamp. + :param str name_separator: separator used to concatenate entity and attribute names to define column name. :return pd.DataFrame: A pandas DataFrame containing all the single matrices in a single frame. The - columns are named by the schema `ENTITY_NAME.ATTRIBUTE_NAME`. + columns are named by the schema `ENTITY_NAME.ATTRIBUTE_NAME[.AGGREGATE]`. """ if 0 == len(data_matrices.matrices): return pd.DataFrame() @@ -194,7 +196,13 @@ def to_pandas( entity = model_cache.entity(matrix.name) if model_cache is not None else None for column in matrix.columns: # The flags are ignored here. There might be NULL in here. Check `column.is_null` for this. - column_dict[matrix.name + "." + column.name] = __get_datamatrix_column_values_ex( + column_name = f"{ + matrix.name}{ + name_separator}{ + column.name}{ + '' if ods.AggregateEnum.AG_NONE == column.aggregate else + name_separator + ods.AggregateEnum.Name(column.aggregate)}" + column_dict[column_name] = __get_datamatrix_column_values_ex( column, model_cache, enum_as_string, entity, date_as_timestamp ) diff --git a/src/odsbox/unit_utils.py b/src/odsbox/unit_utils.py index 17e45fa..ed92919 100644 --- a/src/odsbox/unit_utils.py +++ b/src/odsbox/unit_utils.py @@ -21,6 +21,7 @@ def query_physical_dimensions( temperature: int = 0, molar_amount: int = 0, luminous_intensity: int = 0, + **kwargs, ) -> DataFrame: """ Search for a physical dimension by its SI exponents. @@ -57,7 +58,7 @@ def query_physical_dimensions( ci.condition.aid = physical_dimension_entity.aid ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name ci.condition.long_array.values.append(luminous_intensity) - return to_pandas(con_i.data_read(select)) + return to_pandas(con_i.data_read(select), **kwargs) def query_units( @@ -69,6 +70,7 @@ def query_units( temperature: int = 0, molar_amount: int = 0, luminous_intensity: int = 0, + **kwargs, ) -> DataFrame: """ Search for a unit by its SI exponents. @@ -112,7 +114,10 @@ def query_units( ci.condition.aid = physical_dimension_entity.aid ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name ci.condition.long_array.values.append(luminous_intensity) - return to_pandas(con_i.data_read(select)) + return to_pandas( + con_i.data_read(select), + **kwargs, + ) def query_quantity( @@ -124,6 +129,7 @@ def query_quantity( temperature: int = 0, molar_amount: int = 0, luminous_intensity: int = 0, + **kwargs, ) -> DataFrame: """ Search for a quantity by its SI exponents. @@ -174,4 +180,4 @@ def query_quantity( ci.condition.aid = physical_dimension_entity.aid ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name ci.condition.long_array.values.append(luminous_intensity) - return to_pandas(con_i.data_read(select)) + return to_pandas(con_i.data_read(select), **kwargs) diff --git a/tests/test_con_i.py b/tests/test_con_i.py index 2d2a58c..dd0bf4d 100644 --- a/tests/test_con_i.py +++ b/tests/test_con_i.py @@ -28,6 +28,11 @@ def test_con_i(): {"AoMeasurement": {}, "$options": {"$rowlimit": 50}}, date_as_timestamp=True, enum_as_string=True ) + r = con_i.query_data( + {"AoUnit": {}, "$attributes": {"name": 1}, "$options": {"$rowlimit": 1}}, name_separator="::" + ) + assert f"{entity.name}::" in r.columns[0] + def test_query_data(): with __create_con_i() as con_i: diff --git a/tests/test_datamatrices_to_pandas.py b/tests/test_datamatrices_to_pandas.py index f689a18..cf60507 100644 --- a/tests/test_datamatrices_to_pandas.py +++ b/tests/test_datamatrices_to_pandas.py @@ -343,3 +343,37 @@ def test_unknown_arrays_empty(): assert pdf.to_dict() != {} assert unknown_array_values(dm.columns[0].unknown_arrays.values[0]) == [] + + +def test_aggregates(): + dms = ods.DataMatrices() + dm = dms.matrices.add(aid=4711, name="Aggregates") + dm.columns.add( + name="Name", base_name="name", aggregate=ods.AggregateEnum.AG_NONE, data_type=ods.DT_STRING + ).string_array.values[:] = ["my_name"] + dm.columns.add( + name="Maximum", base_name="maximum", aggregate=ods.AggregateEnum.AG_MAX, data_type=ods.DT_DOUBLE + ).double_array.values[:] = [1.2] + dm.columns.add( + name="Maximum", base_name="maximum", aggregate=ods.AggregateEnum.AG_MIN, data_type=ods.DT_DOUBLE + ).double_array.values[:] = [1.1] + + pdf = to_pandas(dms) + logging.getLogger().info(pdf) + assert pdf.shape == (1, 3) + assert pdf.to_dict() == { + "Aggregates.Name": {0: "my_name"}, + "Aggregates.Maximum.AG_MAX": {0: 1.2}, + "Aggregates.Maximum.AG_MIN": {0: 1.1}, + } + assert len(pdf.to_json()) > 0 + + pdf = to_pandas(dms, name_separator="::") + logging.getLogger().info(pdf) + assert pdf.shape == (1, 3) + assert pdf.to_dict() == { + "Aggregates::Name": {0: "my_name"}, + "Aggregates::Maximum::AG_MAX": {0: 1.2}, + "Aggregates::Maximum::AG_MIN": {0: 1.1}, + } + assert len(pdf.to_json()) > 0