From 65f7d711efefa49fa39919795fd4072af6e537a9 Mon Sep 17 00:00:00 2001 From: CodyCBakerPhD Date: Fri, 16 Aug 2024 15:37:32 -0400 Subject: [PATCH] add and debug zarr example in base test --- tests/examples/reduced_example_0/0.log | 1 + .../cb65c877-882b-4554-8fa1-8f4e986e13a6.tsv | 2 ++ tests/test_reduce_dandi_raw_s3_log.py | 29 +++++++++++-------- 3 files changed, 20 insertions(+), 12 deletions(-) create mode 100644 tests/examples/reduced_example_0/expected_output/zarr/cb65c877-882b-4554-8fa1-8f4e986e13a6.tsv diff --git a/tests/examples/reduced_example_0/0.log b/tests/examples/reduced_example_0/0.log index 43bacc6..41e10c5 100644 --- a/tests/examples/reduced_example_0/0.log +++ b/tests/examples/reduced_example_0/0.log @@ -1,2 +1,3 @@ 8787a3c41bf7ce0d54359d9348ad5b08e16bd5bb8ae5aa4e1508b435773a066e dandiarchive [31/Dec/2021:23:06:42 +0000] 192.0.2.0 - NWC7V1KE70QZYJ5Q REST.GET.OBJECT blobs/a7b/032/a7b032b8-1e31-429f-975f-52a28cec6629 "GET /blobs/a7b/032/a7b032b8-1e31-429f-975f-52a28cec6629?versionId=yn5YAJiwT36Rv78jGYLM71GZumWL.QWn HTTP/1.1" 200 - 1443 1443 35 35 "-" "git-annex/8.20211028-g1c76278" yn5YAJiwT36Rv78jGYLM71GZumWL.QWn ojBg2QLVTSTWsCAe1HoC6IBNLUSPmWH276FdsedhZ/4CQ67DWuZQHcXXB9XUJxYKpnPHpJyBjMM= - ECDHE-RSA-AES128-GCM-SHA256 - dandiarchive.s3.amazonaws.com TLSv1.2 - 8787a3c41bf7ce0d54359d9348ad5b08e16bd5bb8ae5aa4e1508b435773a066e dandiarchive [04/May/2022:05:06:35 +0000] 192.0.2.0 - J42N2W7ET0EC03CV REST.GET.OBJECT blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991 "GET /blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991 HTTP/1.1" 206 - 512 171408 53 52 "-" "-" - DX8oFoKQx0o5V3lwEuWBxF5p2fSXrwINj0rnxmas0YgjWuPqYLK/vnW60Txh23K93aahe0IFw2c= - ECDHE-RSA-AES128-GCM-SHA256 - dandiarchive.s3.amazonaws.com TLSv1.2 - +8787a3c41bf7ce0d54359d9348ad5b08e16bd5bb8ae5aa4e1508b435773a066e dandiarchive [01/Jan/2023:22:42:58 +0000] 192.0.2.0 - W3VJKP0HM8TV2N46 REST.GET.OBJECT zarr/cb65c877-882b-4554-8fa1-8f4e986e13a6/0/0/0/1/5/19 "GET /zarr/cb65c877-882b-4554-8fa1-8f4e986e13a6/0/0/0/1/5/19?versionId=MwI7yXtui4mtwTeuZxHoT4qHap44j3T2 HTTP/1.1" 200 - 1526223 1526223 61 55 "-" "git-annex/10.20220927-geb4a544" MwI7yXtui4mtwTeuZxHoT4qHap44j3T2 U4WvVRIYm+n+VYNArVY/+fjDV3PZesvSaclnyALtK7rsaZ/8sTq8H1JnNAyMj/xuitYxXNUCZ+U= - ECDHE-RSA-AES128-GCM-SHA256 - dandiarchive.s3.amazonaws.com TLSv1.2 - - diff --git a/tests/examples/reduced_example_0/expected_output/zarr/cb65c877-882b-4554-8fa1-8f4e986e13a6.tsv b/tests/examples/reduced_example_0/expected_output/zarr/cb65c877-882b-4554-8fa1-8f4e986e13a6.tsv new file mode 100644 index 0000000..e25b6a1 --- /dev/null +++ b/tests/examples/reduced_example_0/expected_output/zarr/cb65c877-882b-4554-8fa1-8f4e986e13a6.tsv @@ -0,0 +1,2 @@ +timestamp bytes_sent ip_address line_index +2023-01-01 22:42:58 1526223 192.0.2.0 2 diff --git a/tests/test_reduce_dandi_raw_s3_log.py b/tests/test_reduce_dandi_raw_s3_log.py index 0f37892..484f83c 100644 --- a/tests/test_reduce_dandi_raw_s3_log.py +++ b/tests/test_reduce_dandi_raw_s3_log.py @@ -33,23 +33,28 @@ def test_reduce_dandi_raw_s3_log_example_0(tmpdir: py.path.local) -> None: assert number_of_output_files != 0, f"Test expected_output folder ({test_reduced_s3_logs_folder_path}) is empty!" # Increment this over time as more examples are added - expected_number_of_output_files = 2 + expected_number_of_output_files = 3 assert ( number_of_output_files == expected_number_of_output_files ), f"The number of asset files ({number_of_output_files}) does not match expectation!" expected_asset_ids = [path.stem for path in expected_reduced_s3_logs_folder_path.rglob("*.tsv")] - for test_parsed_s3_log_file_path in test_output_file_paths: + for test_reduced_s3_log_file_path in test_output_file_paths: assert ( - test_parsed_s3_log_file_path.stem in expected_asset_ids - ), f"Asset ID {test_parsed_s3_log_file_path.stem} not found in expected asset IDs!" - - test_parsed_s3_log = pandas.read_table(filepath_or_buffer=test_parsed_s3_log_file_path) - - blob_id = test_parsed_s3_log_file_path.stem - expected_parsed_s3_log_file_path = ( - expected_reduced_s3_logs_folder_path / "blobs" / blob_id[:3] / blob_id[3:6] / f"{blob_id}.tsv" - ) + test_reduced_s3_log_file_path.stem in expected_asset_ids + ), f"Asset ID {test_reduced_s3_log_file_path.stem} not found in expected asset IDs!" + + is_asset_zarr = "zarr" in str(test_reduced_s3_log_file_path) + if is_asset_zarr: + blob_id = test_reduced_s3_log_file_path.stem + expected_parsed_s3_log_file_path = expected_reduced_s3_logs_folder_path / "zarr" / f"{blob_id}.tsv" + else: + blob_id = test_reduced_s3_log_file_path.stem + expected_parsed_s3_log_file_path = ( + expected_reduced_s3_logs_folder_path / "blobs" / blob_id[:3] / blob_id[3:6] / f"{blob_id}.tsv" + ) + + test_reduced_s3_log = pandas.read_table(filepath_or_buffer=test_reduced_s3_log_file_path) expected_parsed_s3_log = pandas.read_table(filepath_or_buffer=expected_parsed_s3_log_file_path) - pandas.testing.assert_frame_equal(left=test_parsed_s3_log, right=expected_parsed_s3_log) + pandas.testing.assert_frame_equal(left=test_reduced_s3_log, right=expected_parsed_s3_log)