From 3b21e3e00659a2b5fd4b4bf477a4a939bf75f612 Mon Sep 17 00:00:00 2001
From: Dmitri Lebedev <ryba4@ryba4.com>
Date: Fri, 3 Sep 2021 14:09:06 +0700
Subject: [PATCH] added tests for subset errors

---
 .github/workflows/python-publish.yml |  2 +-
 README.md                            |  8 ++++----
 erde/op/subset.py                    |  5 +++++
 tests/test_subset.py                 | 30 ++++++++++++++++++++++++++++
 4 files changed, 40 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_subset.py

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 7879505..8a555b9 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -53,4 +53,4 @@ jobs:
     - name: Test with pytest
       run: |
         pytest --cov=erde
-        coverage html
+        # coverage html  # not used
diff --git a/README.md b/README.md
index 76c5715..2ccc3c4 100644
--- a/README.md
+++ b/README.md
@@ -47,17 +47,17 @@ See [the example](examples/2_minimal_cli_app/) for more code and instructions.
 
 Instead of doing this:
 
-	df.rename(columns={'oldname1': 'newname1', 'oldname2': 'newname2'}, inplace=True)
-	df.drop(['oldcol3', 'oldcol4'], inplace=True, axis=1, errors='ignore')
+	df.rename(columns={'oldname1': 'newname1'}, inplace=True)
+	df.drop(['oldcol2'], inplace=True, axis=1, errors='ignore')
 
 you can simply do this:
 
 	from erde import subset
-	df = subset(df, 'oldname1: newname1, oldname2: newname2, -oldcol3, -oldcol4, *')
+	df = subset(df, 'oldname1: newname1, -oldcol2, *')
 
 Or even run this from command line:
 
-	erde subset old_file.gpkg oldname1:newname1,oldname2:newname2,-oldcol3,-oldcol4,* new_file.gpkg
+	erde subset old_file.gpkg oldname1:newname1,-oldcol2,* new_file.gpkg
 
 ### Routing
 
diff --git a/erde/op/subset.py b/erde/op/subset.py
index 707f573..435fcdb 100644
--- a/erde/op/subset.py
+++ b/erde/op/subset.py
@@ -4,6 +4,11 @@ def parse_str(columns):
 	result = []
 	for i in columns.split(','):
 		j = [k.strip() for k in i.strip().split(':')]
+
+		for k in j:
+			if len(k) == 0 or (len(k) == 1 and k.startswith('-')):
+				raise ValueError(f'Bad column name: "{i}": zero name length.')
+
 		if len(j) > 2:
 			raise ValueError(f"column name must have 0 or 1 colons (:) got {len(i) - 1} in '")
 
diff --git a/tests/test_subset.py b/tests/test_subset.py
new file mode 100644
index 0000000..e7e3bc3
--- /dev/null
+++ b/tests/test_subset.py
@@ -0,0 +1,30 @@
+import pandas as pd
+from erde.op import subset
+from pytest import raises
+
+def test_parse_strings_errors():
+	good_string = 'old1:new1,old2,old3:new3'
+	assert subset.parse_str(good_string) == [['old1', 'new1'], ['old2', None], ['old3', 'new3']]
+
+	bad_string = 'old1:new1,old2:new2:verynew2'
+	with raises(ValueError):
+		subset.parse_str(bad_string)
+
+	with raises(ValueError):
+		subset.parse_str('-old1:new1')
+
+	with raises(ValueError):
+		subset.parse_str('old1,old2:new2,-,old4')
+
+	with raises(ValueError):
+		subset.parse_str('')
+
+
+def test_main_errors():
+	df = pd.DataFrame({'col1': range(5), 'col2': range(10, 15), 'col3': range(20, 25)})
+	for v in [123456, None, True, False, df]:
+		with raises(TypeError):
+			subset.main(df, v)
+
+	with raises(KeyError):
+		subset.main(df, 'missing_column')
\ No newline at end of file