From 54132b27979e205ba6be4c3c337abfe07962c185 Mon Sep 17 00:00:00 2001 From: mdeegen <84499883+mdeegen@users.noreply.github.com> Date: Wed, 28 Feb 2024 13:20:27 +0100 Subject: [PATCH 1/2] Update pandas_utils.py for column names with line breaks in is_valid_variable_name because of exception when column name included a new line --- paderbox/utils/pandas_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paderbox/utils/pandas_utils.py b/paderbox/utils/pandas_utils.py index 82790503..27b276ce 100644 --- a/paderbox/utils/pandas_utils.py +++ b/paderbox/utils/pandas_utils.py @@ -103,8 +103,10 @@ def py_query( keywords = ['index'] + list(data) - def is_valid_variable_name(name): + def is_valid_variable_name(name: str): import ast + if len(name.splitlines()) != 1: + return False # https://stackoverflow.com/a/36331242/5766934 try: ast.parse('{} = None'.format(name)) From a6fef74b50dea845c721373787236e0b98bf8cf1 Mon Sep 17 00:00:00 2001 From: mdeegen <84499883+mdeegen@users.noreply.github.com> Date: Wed, 28 Feb 2024 15:41:17 +0100 Subject: [PATCH 2/2] Update pandas_utils.py doctest with new lines in column name and query --- paderbox/utils/pandas_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paderbox/utils/pandas_utils.py b/paderbox/utils/pandas_utils.py index 27b276ce..ff5f925b 100644 --- a/paderbox/utils/pandas_utils.py +++ b/paderbox/utils/pandas_utils.py @@ -44,10 +44,10 @@ def py_query( To access column names that aren't valid python identifiers (e.g. the name contains a whitespace), you have to use the kwargs dictionary: - >>> df = pd.DataFrame([{'a b': 1, 'b': 2}, {'a b': 3, 'b': 4}]) - >>> py_query(df, 'kwargs["a b"] == 1') - a b b - 0 1 2 + >>> df = pd.DataFrame([{'a b': 1, 'b\\nc': 2}, {'a b': 3, 'b\\nc': 4}]) + >>> py_query(df, 'kwargs["a b"] == 1 and kwargs["b\\\\nc"] == 2') + a b b\\nc + 0 1 2 When you need a package function, you have to specify it in the globals dict. e.g.: