From 3b21e3e00659a2b5fd4b4bf477a4a939bf75f612 Mon Sep 17 00:00:00 2001 From: Dmitri Lebedev Date: Fri, 3 Sep 2021 14:09:06 +0700 Subject: [PATCH] added tests for subset errors --- .github/workflows/python-publish.yml | 2 +- README.md | 8 ++++---- erde/op/subset.py | 5 +++++ tests/test_subset.py | 30 ++++++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 tests/test_subset.py diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 7879505..8a555b9 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -53,4 +53,4 @@ jobs: - name: Test with pytest run: | pytest --cov=erde - coverage html + # coverage html # not used diff --git a/README.md b/README.md index 76c5715..2ccc3c4 100644 --- a/README.md +++ b/README.md @@ -47,17 +47,17 @@ See [the example](examples/2_minimal_cli_app/) for more code and instructions. Instead of doing this: - df.rename(columns={'oldname1': 'newname1', 'oldname2': 'newname2'}, inplace=True) - df.drop(['oldcol3', 'oldcol4'], inplace=True, axis=1, errors='ignore') + df.rename(columns={'oldname1': 'newname1'}, inplace=True) + df.drop(['oldcol2'], inplace=True, axis=1, errors='ignore') you can simply do this: from erde import subset - df = subset(df, 'oldname1: newname1, oldname2: newname2, -oldcol3, -oldcol4, *') + df = subset(df, 'oldname1: newname1, -oldcol2, *') Or even run this from command line: - erde subset old_file.gpkg oldname1:newname1,oldname2:newname2,-oldcol3,-oldcol4,* new_file.gpkg + erde subset old_file.gpkg oldname1:newname1,-oldcol2,* new_file.gpkg ### Routing diff --git a/erde/op/subset.py b/erde/op/subset.py index 707f573..435fcdb 100644 --- a/erde/op/subset.py +++ b/erde/op/subset.py @@ -4,6 +4,11 @@ def parse_str(columns): result = [] for i in columns.split(','): j = [k.strip() for k in i.strip().split(':')] + + for k in j: + if len(k) == 0 or (len(k) == 1 and k.startswith('-')): + raise ValueError(f'Bad column name: "{i}": zero name length.') + if len(j) > 2: raise ValueError(f"column name must have 0 or 1 colons (:) got {len(i) - 1} in '") diff --git a/tests/test_subset.py b/tests/test_subset.py new file mode 100644 index 0000000..e7e3bc3 --- /dev/null +++ b/tests/test_subset.py @@ -0,0 +1,30 @@ +import pandas as pd +from erde.op import subset +from pytest import raises + +def test_parse_strings_errors(): + good_string = 'old1:new1,old2,old3:new3' + assert subset.parse_str(good_string) == [['old1', 'new1'], ['old2', None], ['old3', 'new3']] + + bad_string = 'old1:new1,old2:new2:verynew2' + with raises(ValueError): + subset.parse_str(bad_string) + + with raises(ValueError): + subset.parse_str('-old1:new1') + + with raises(ValueError): + subset.parse_str('old1,old2:new2,-,old4') + + with raises(ValueError): + subset.parse_str('') + + +def test_main_errors(): + df = pd.DataFrame({'col1': range(5), 'col2': range(10, 15), 'col3': range(20, 25)}) + for v in [123456, None, True, False, df]: + with raises(TypeError): + subset.main(df, v) + + with raises(KeyError): + subset.main(df, 'missing_column') \ No newline at end of file