Skip to content

Commit

Permalink
feat: suggestions for jaquel (#72)
Browse files Browse the repository at this point in the history
* Put suggestions into error messages when parsing jaquel.

* Make sure aggregates get unique column names

* Add Google site verification meta tag

---------

Co-authored-by: a.krantz <a.krantz@peak-solution.de>
  • Loading branch information
totonga and a.krantz authored Oct 28, 2024
1 parent 9edc710 commit 8f1b928
Show file tree
Hide file tree
Showing 9 changed files with 234 additions and 25 deletions.
7 changes: 7 additions & 0 deletions docs/_templates/layout.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<!-- _templates/layout.html -->
{% extends "!layout.html" %}

{% block extrahead %}
<meta name="google-site-verification" content="{{ google_site_verification }}" />
{{ super() }}
{% endblock %}
4 changes: 4 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@
#
html_theme = "sphinx_rtd_theme"

html_context = {"google_site_verification": "M-YV4bEhpyyWVOBQB9VLsSCjKfqO_UpvTBMJ7DS5t_U"}

templates_path = ["_templates"]

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
Expand Down
12 changes: 3 additions & 9 deletions src/odsbox/con_i.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,28 +170,22 @@ def logout(self):
def query_data(
self,
query: str | dict | ods.SelectStatement,
enum_as_string: bool = False,
date_as_timestamp: bool = False,
**kwargs,
) -> DataFrame:
"""
Query ods server for content and return the results as Pandas DataFrame
:param str | dict | ods.SelectStatement query: Query given as JAQueL query (dict or str)
or as an ASAM ODS SelectStatement.
:param bool enum_as_string: columns of type DT_ENUM are returned as int values.
If this is set to True the int values are mapped to the corresponding string values.
:param bool date_as_timestamp: columns of type DT_DATE or DS_DATE are returned as string.
If this is set to True the strings are converted to pandas Timestamp.
:param kwargs: additional arguments passed to `to_pandas`.
:raises requests.HTTPError: If query fails.
:return DataFrame: The DataMatrices as Pandas.DataFrame. The columns are named as `ENTITY_NAME.ATTRIBUTE_NAME`.
`IsNull` values are not marked invalid.
"""
data_matrices = (
self.data_read(query) if isinstance(query, ods.SelectStatement) else self.data_read_jaquel(query)
)
return to_pandas(
data_matrices, model_cache=self.mc, enum_as_string=enum_as_string, date_as_timestamp=date_as_timestamp
)
return to_pandas(data_matrices, model_cache=self.mc, **kwargs)

def model(self) -> ods.Model:
"""
Expand Down
12 changes: 10 additions & 2 deletions src/odsbox/datamatrices_to_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def to_pandas(
model_cache: ModelCache | None = None,
enum_as_string: bool = False,
date_as_timestamp: bool = False,
name_separator: str = ".",
) -> pd.DataFrame:
"""
Converts data in an ASAM ODS DataMatrices into a pandas DataFrame.
Expand All @@ -178,9 +179,10 @@ def to_pandas(
to the corresponding string values.
:param bool date_as_timestamp: columns of type DT_DATE or DS_DATE are returned as string.
If this is set to True the strings are converted to pandas Timestamp.
:param str name_separator: separator used to concatenate entity and attribute names to define column name.
:return pd.DataFrame: A pandas DataFrame containing all the single matrices in a single frame. The
columns are named by the schema `ENTITY_NAME.ATTRIBUTE_NAME`.
columns are named by the schema `ENTITY_NAME.ATTRIBUTE_NAME[.AGGREGATE]`.
"""
if 0 == len(data_matrices.matrices):
return pd.DataFrame()
Expand All @@ -193,8 +195,14 @@ def to_pandas(
for matrix in data_matrices.matrices:
entity = model_cache.entity(matrix.name) if model_cache is not None else None
for column in matrix.columns:
aggregate_postfix = (
""
if ods.AggregateEnum.AG_NONE == column.aggregate
else name_separator + ods.AggregateEnum.Name(column.aggregate)
)
column_name = f"{matrix.name}{name_separator}{column.name}{aggregate_postfix}"
# The flags are ignored here. There might be NULL in here. Check `column.is_null` for this.
column_dict[matrix.name + "." + column.name] = __get_datamatrix_column_values_ex(
column_dict[column_name] = __get_datamatrix_column_values_ex(
column, model_cache, enum_as_string, entity, date_as_timestamp
)

Expand Down
68 changes: 62 additions & 6 deletions src/odsbox/jaquel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import re
from typing import Tuple, List, Any
from difflib import get_close_matches

from google.protobuf.internal import containers as _containers

Expand Down Expand Up @@ -127,7 +128,60 @@ def __model_get_entity_ex(model: ods.Model, entity_name_or_aid: str | int) -> od
if entity.name.lower() == entity_name_or_aid.lower() or entity.base_name.lower() == entity_name_or_aid.lower():
return entity

raise SyntaxError(f"Entity '{entity_name_or_aid}' is unknown in model.")
raise SyntaxError(
f"Entity '{entity_name_or_aid}' is unknown in model.{__model_get_suggestion_entity(model, entity_name_or_aid)}"
)


def __model_get_suggestion(lower_case_dict: dict, str_val: str) -> str:
suggestions = get_close_matches(
str_val.lower(),
lower_case_dict,
n=1,
cutoff=0.3,
)
if len(suggestions) > 0:
return_value = lower_case_dict[suggestions[0]]
return f" Did you mean '{return_value}'?"
return ""


def __model_get_enum_suggestion(enumeration: ods.Model.Enumeration, str_val: str) -> str:
available = {key.lower(): key for key in enumeration.items}
return __model_get_suggestion(available, str_val)


def __model_get_suggestion_attribute(entity: ods.Model.Entity, attribute_or_relation_name: str) -> str:
available = {}
available.update({relation.base_name.lower(): relation.base_name for key, relation in entity.relations.items()})
available.update({attribute.base_name.lower(): attribute.base_name for key, attribute in entity.attributes.items()})
available.update({relation.name.lower(): relation.name for key, relation in entity.relations.items()})
available.update({attribute.name.lower(): attribute.name for key, attribute in entity.attributes.items()})
return __model_get_suggestion(available, attribute_or_relation_name)


def __model_get_suggestion_relation(entity: ods.Model.Entity, relation_name: str) -> str:
available = {}
available.update({relation.base_name.lower(): relation.base_name for key, relation in entity.relations.items()})
available.update({relation.name.lower(): relation.name for key, relation in entity.relations.items()})
return __model_get_suggestion(available, relation_name)


def __model_get_suggestion_entity(model: ods.Model, entity_name: str) -> str:
available = {}
available.update({entity.base_name.lower(): entity.base_name for key, entity in model.entities.items()})
available.update({entity.name.lower(): entity.name for key, entity in model.entities.items()})
return __model_get_suggestion(available, entity_name)


def __model_get_suggestion_aggregate(aggregate_name: str) -> str:
available = {key.lower(): key for key in _jo_aggregates}
return __model_get_suggestion(available, aggregate_name)


def __model_get_suggestion_operators(operator_name: str) -> str:
available = {key.lower(): key for key in _jo_operators}
return __model_get_suggestion(available, operator_name)


def __model_get_enum_index(model: ods.Model, entity: ods.Model.Entity, attribute_name: str, str_val: str) -> int:
Expand All @@ -137,7 +191,7 @@ def __model_get_enum_index(model: ods.Model, entity: ods.Model.Entity, attribute
if key.lower() == str_val.lower():
return enum.items[key]

raise SyntaxError('Enum entry for "' + str_val + '" does not exist')
raise SyntaxError(f"Enum entry for '{str_val}' does not exist.{__model_get_enum_suggestion(enum, str_val)}")


def _jo_enum_get_numeric_value(
Expand Down Expand Up @@ -191,7 +245,8 @@ def __parse_path_and_add_joins(
# Must be a relation
relation = __model_get_relation(model, attribute_entity, path_part)
if relation is None:
raise SyntaxError(f"'{path_part}' is no relation of entity '{attribute_entity.name}'")
suggestion_text = __model_get_suggestion_relation(attribute_entity, path_part)
raise SyntaxError(f"'{path_part}' is no relation of entity '{attribute_entity.name}'.{suggestion_text}")
attribute_name = relation.name

# add join
Expand All @@ -216,8 +271,9 @@ def __parse_path_and_add_joins(
else:
relation = __model_get_relation(model, attribute_entity, path_part)
if relation is None:
suggestion_text = __model_get_suggestion_attribute(attribute_entity, path_part)
raise SyntaxError(
f"'{path_part}' is neither attribute nor relation of entity '{attribute_entity.name}'"
f"'{path_part}' is neither attribute nor relation of entity '{attribute_entity.name}'.{suggestion_text}" # noqa: E501
)
attribute_name = relation.name
attribute_type = ods.DataTypeEnum.DT_LONGLONG # its an id
Expand Down Expand Up @@ -281,7 +337,7 @@ def __parse_attributes(
elif "$options" == element:
raise SyntaxError("Actually no $options defined for attributes")
else:
raise SyntaxError('Unknown aggregate "' + element + '"')
raise SyntaxError(f"Unknown aggregate '{element}'.{__model_get_suggestion_aggregate(element)}")
else:
if element_attribute["path"]:
element_attribute["path"] += "."
Expand Down Expand Up @@ -612,7 +668,7 @@ def __parse_conditions(
elif "$options" == elem:
continue
else:
raise SyntaxError('Unknown operator "' + elem + '"')
raise SyntaxError(f"Unknown operator '{elem}'.{__model_get_suggestion_operators(elem)}")
else:
if elem_attribute["path"]:
elem_attribute["path"] += "."
Expand Down
12 changes: 9 additions & 3 deletions src/odsbox/unit_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def query_physical_dimensions(
temperature: int = 0,
molar_amount: int = 0,
luminous_intensity: int = 0,
**kwargs,
) -> DataFrame:
"""
Search for a physical dimension by its SI exponents.
Expand Down Expand Up @@ -57,7 +58,7 @@ def query_physical_dimensions(
ci.condition.aid = physical_dimension_entity.aid
ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
ci.condition.long_array.values.append(luminous_intensity)
return to_pandas(con_i.data_read(select))
return to_pandas(con_i.data_read(select), **kwargs)


def query_units(
Expand All @@ -69,6 +70,7 @@ def query_units(
temperature: int = 0,
molar_amount: int = 0,
luminous_intensity: int = 0,
**kwargs,
) -> DataFrame:
"""
Search for a unit by its SI exponents.
Expand Down Expand Up @@ -112,7 +114,10 @@ def query_units(
ci.condition.aid = physical_dimension_entity.aid
ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
ci.condition.long_array.values.append(luminous_intensity)
return to_pandas(con_i.data_read(select))
return to_pandas(
con_i.data_read(select),
**kwargs,
)


def query_quantity(
Expand All @@ -124,6 +129,7 @@ def query_quantity(
temperature: int = 0,
molar_amount: int = 0,
luminous_intensity: int = 0,
**kwargs,
) -> DataFrame:
"""
Search for a quantity by its SI exponents.
Expand Down Expand Up @@ -174,4 +180,4 @@ def query_quantity(
ci.condition.aid = physical_dimension_entity.aid
ci.condition.attribute = con_i.mc.attribute_by_base_name(physical_dimension_entity, "luminous_intensity_exp").name
ci.condition.long_array.values.append(luminous_intensity)
return to_pandas(con_i.data_read(select))
return to_pandas(con_i.data_read(select), **kwargs)
5 changes: 5 additions & 0 deletions tests/test_con_i.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ def test_con_i():
{"AoMeasurement": {}, "$options": {"$rowlimit": 50}}, date_as_timestamp=True, enum_as_string=True
)

r = con_i.query_data(
{"AoUnit": {}, "$attributes": {"name": 1}, "$options": {"$rowlimit": 1}}, name_separator="::"
)
assert f"{entity.name}::" in r.columns[0]


def test_query_data():
with __create_con_i() as con_i:
Expand Down
34 changes: 34 additions & 0 deletions tests/test_datamatrices_to_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,3 +343,37 @@ def test_unknown_arrays_empty():
assert pdf.to_dict() != {}

assert unknown_array_values(dm.columns[0].unknown_arrays.values[0]) == []


def test_aggregates():
dms = ods.DataMatrices()
dm = dms.matrices.add(aid=4711, name="Aggregates")
dm.columns.add(
name="Name", base_name="name", aggregate=ods.AggregateEnum.AG_NONE, data_type=ods.DT_STRING
).string_array.values[:] = ["my_name"]
dm.columns.add(
name="Maximum", base_name="maximum", aggregate=ods.AggregateEnum.AG_MAX, data_type=ods.DT_DOUBLE
).double_array.values[:] = [1.2]
dm.columns.add(
name="Maximum", base_name="maximum", aggregate=ods.AggregateEnum.AG_MIN, data_type=ods.DT_DOUBLE
).double_array.values[:] = [1.1]

pdf = to_pandas(dms)
logging.getLogger().info(pdf)
assert pdf.shape == (1, 3)
assert pdf.to_dict() == {
"Aggregates.Name": {0: "my_name"},
"Aggregates.Maximum.AG_MAX": {0: 1.2},
"Aggregates.Maximum.AG_MIN": {0: 1.1},
}
assert len(pdf.to_json()) > 0

pdf = to_pandas(dms, name_separator="::")
logging.getLogger().info(pdf)
assert pdf.shape == (1, 3)
assert pdf.to_dict() == {
"Aggregates::Name": {0: "my_name"},
"Aggregates::Maximum::AG_MAX": {0: 1.2},
"Aggregates::Maximum::AG_MIN": {0: 1.1},
}
assert len(pdf.to_json()) > 0
Loading

0 comments on commit 8f1b928

Please sign in to comment.