Skip to content

Commit

Permalink
Make sure aggregates get unique column names
Browse files Browse the repository at this point in the history
  • Loading branch information
a.krantz committed Oct 28, 2024
1 parent cea586b commit 2787443
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 14 deletions.
12 changes: 3 additions & 9 deletions src/odsbox/con_i.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,28 +170,22 @@ def logout(self):
def query_data(
self,
query: str | dict | ods.SelectStatement,
enum_as_string: bool = False,
date_as_timestamp: bool = False,
**kwargs,
) -> DataFrame:
"""
Query ods server for content and return the results as Pandas DataFrame
:param str | dict | ods.SelectStatement query: Query given as JAQueL query (dict or str)
or as an ASAM ODS SelectStatement.
:param bool enum_as_string: columns of type DT_ENUM are returned as int values.
If this is set to True the int values are mapped to the corresponding string values.
:param bool date_as_timestamp: columns of type DT_DATE or DS_DATE are returned as string.
If this is set to True the strings are converted to pandas Timestamp.
:param kwargs: additional arguments passed to `to_pandas`.
:raises requests.HTTPError: If query fails.
:return DataFrame: The DataMatrices as Pandas.DataFrame. The columns are named as `ENTITY_NAME.ATTRIBUTE_NAME`.
`IsNull` values are not marked invalid.
"""
data_matrices = (
self.data_read(query) if isinstance(query, ods.SelectStatement) else self.data_read_jaquel(query)
)
return to_pandas(
data_matrices, model_cache=self.mc, enum_as_string=enum_as_string, date_as_timestamp=date_as_timestamp
)
return to_pandas(data_matrices, model_cache=self.mc, **kwargs)

def model(self) -> ods.Model:
"""
Expand Down
12 changes: 10 additions & 2 deletions src/odsbox/datamatrices_to_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def to_pandas(
model_cache: ModelCache | None = None,
enum_as_string: bool = False,
date_as_timestamp: bool = False,
name_separator: str = ".",
) -> pd.DataFrame:
"""
Converts data in an ASAM ODS DataMatrices into a pandas DataFrame.
Expand All @@ -178,9 +179,10 @@ def to_pandas(
to the corresponding string values.
:param bool date_as_timestamp: columns of type DT_DATE or DS_DATE are returned as string.
If this is set to True the strings are converted to pandas Timestamp.
:param str name_separator: separator used to concatenate entity and attribute names to define column name.
:return pd.DataFrame: A pandas DataFrame containing all the single matrices in a single frame. The
columns are named by the schema `ENTITY_NAME.ATTRIBUTE_NAME`.
columns are named by the schema `ENTITY_NAME.ATTRIBUTE_NAME[.AGGREGATE]`.
"""
if 0 == len(data_matrices.matrices):
return pd.DataFrame()
Expand All @@ -194,7 +196,13 @@ def to_pandas(
entity = model_cache.entity(matrix.name) if model_cache is not None else None
for column in matrix.columns:
# The flags are ignored here. There might be NULL in here. Check `column.is_null` for this.
column_dict[matrix.name + "." + column.name] = __get_datamatrix_column_values_ex(
column_name = f"{
matrix.name}{
name_separator}{
column.name}{
'' if ods.AggregateEnum.AG_NONE == column.aggregate else
name_separator + ods.AggregateEnum.Name(column.aggregate)}"
column_dict[column_name] = __get_datamatrix_column_values_ex(
column, model_cache, enum_as_string, entity, date_as_timestamp
)

Expand Down
12 changes: 9 additions & 3 deletions src/odsbox/unit_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def query_physical_dimensions(
temperature: int = 0,
molar_amount: int = 0,
luminous_intensity: int = 0,
**kwargs,
) -> DataFrame:
"""
Search for a physical dimension by its SI exponents.
Expand Down Expand Up @@ -57,7 +58,7 @@ def query_physical_dimensions(
ci.condition.aid = physical_dimension_entity.aid
ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
ci.condition.long_array.values.append(luminous_intensity)
return to_pandas(con_i.data_read(select))
return to_pandas(con_i.data_read(select), **kwargs)


def query_units(
Expand All @@ -69,6 +70,7 @@ def query_units(
temperature: int = 0,
molar_amount: int = 0,
luminous_intensity: int = 0,
**kwargs,
) -> DataFrame:
"""
Search for a unit by its SI exponents.
Expand Down Expand Up @@ -112,7 +114,10 @@ def query_units(
ci.condition.aid = physical_dimension_entity.aid
ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
ci.condition.long_array.values.append(luminous_intensity)
return to_pandas(con_i.data_read(select))
return to_pandas(
con_i.data_read(select),
**kwargs,
)


def query_quantity(
Expand All @@ -124,6 +129,7 @@ def query_quantity(
temperature: int = 0,
molar_amount: int = 0,
luminous_intensity: int = 0,
**kwargs,
) -> DataFrame:
"""
Search for a quantity by its SI exponents.
Expand Down Expand Up @@ -174,4 +180,4 @@ def query_quantity(
ci.condition.aid = physical_dimension_entity.aid
ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
ci.condition.long_array.values.append(luminous_intensity)
return to_pandas(con_i.data_read(select))
return to_pandas(con_i.data_read(select), **kwargs)
5 changes: 5 additions & 0 deletions tests/test_con_i.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ def test_con_i():
{"AoMeasurement": {}, "$options": {"$rowlimit": 50}}, date_as_timestamp=True, enum_as_string=True
)

r = con_i.query_data(
{"AoUnit": {}, "$attributes": {"name": 1}, "$options": {"$rowlimit": 1}}, name_separator="::"
)
assert f"{entity.name}::" in r.columns[0]


def test_query_data():
with __create_con_i() as con_i:
Expand Down
34 changes: 34 additions & 0 deletions tests/test_datamatrices_to_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,3 +343,37 @@ def test_unknown_arrays_empty():
assert pdf.to_dict() != {}

assert unknown_array_values(dm.columns[0].unknown_arrays.values[0]) == []


def test_aggregates():
dms = ods.DataMatrices()
dm = dms.matrices.add(aid=4711, name="Aggregates")
dm.columns.add(
name="Name", base_name="name", aggregate=ods.AggregateEnum.AG_NONE, data_type=ods.DT_STRING
).string_array.values[:] = ["my_name"]
dm.columns.add(
name="Maximum", base_name="maximum", aggregate=ods.AggregateEnum.AG_MAX, data_type=ods.DT_DOUBLE
).double_array.values[:] = [1.2]
dm.columns.add(
name="Maximum", base_name="maximum", aggregate=ods.AggregateEnum.AG_MIN, data_type=ods.DT_DOUBLE
).double_array.values[:] = [1.1]

pdf = to_pandas(dms)
logging.getLogger().info(pdf)
assert pdf.shape == (1, 3)
assert pdf.to_dict() == {
"Aggregates.Name": {0: "my_name"},
"Aggregates.Maximum.AG_MAX": {0: 1.2},
"Aggregates.Maximum.AG_MIN": {0: 1.1},
}
assert len(pdf.to_json()) > 0

pdf = to_pandas(dms, name_separator="::")
logging.getLogger().info(pdf)
assert pdf.shape == (1, 3)
assert pdf.to_dict() == {
"Aggregates::Name": {0: "my_name"},
"Aggregates::Maximum::AG_MAX": {0: 1.2},
"Aggregates::Maximum::AG_MIN": {0: 1.1},
}
assert len(pdf.to_json()) > 0

0 comments on commit 2787443

Please sign in to comment.