Skip to content

Commit

Permalink
Merge pull request #22 from biomarkersParkinson/support_legacy_import
Browse files Browse the repository at this point in the history
Support legacy import
  • Loading branch information
kretep authored Sep 16, 2023
2 parents e724160 + 327acea commit d90c736
Show file tree
Hide file tree
Showing 21 changed files with 2,425 additions and 2,171 deletions.
180 changes: 92 additions & 88 deletions docs/processing_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 1,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import tsdf\n",
"import os\n",
"import numpy as np\n",
"import tsdf\n",
"from tsdf.constants import TestConstants as TEST_CONST"
]
},
Expand All @@ -48,18 +48,9 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"outputs": [],
"source": [
"# Reload modules automatically on changes; useful for developing\n",
"%load_ext autoreload\n",
Expand Down Expand Up @@ -87,7 +78,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -101,13 +92,15 @@
}
],
"source": [
"dummy_data_name = \"example_10_3_int16\"\n",
"data_name = \"example_10_3_int16\"\n",
"\n",
"dummy_metadata = tsdf.load_metadata_from_path(os.path.join(TEST_CONST.TEST_DATA_DIR, dummy_data_name + TEST_CONST.METADATA_EXTENSION))[dummy_data_name + TEST_CONST.BINARY_EXTENSION]\n",
"dummy_data = dummy_metadata.load_binary()\n",
"print(f\"Data type used for storing:\\t {dummy_data.dtype}\")\n",
"print(f\"Data dimensions:\\t\\t {dummy_data.shape}\")\n",
"print(f\"Number of rows:\\t\\t\\t {dummy_data.shape[0]}\")"
"metadata = tsdf.load_metadata_from_path(\n",
" os.path.join(TEST_CONST.TEST_DATA_DIR, data_name + TEST_CONST.METADATA_EXTENSION)\n",
")[data_name + TEST_CONST.BINARY_EXTENSION]\n",
"data = metadata.load_binary()\n",
"print(f\"Data type used for storing:\\t {data.dtype}\")\n",
"print(f\"Data dimensions:\\t\\t {data.shape}\")\n",
"print(f\"Number of rows:\\t\\t\\t {data.shape[0]}\")"
]
},
{
Expand All @@ -120,7 +113,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -134,38 +127,10 @@
}
],
"source": [
"processed_dummy_data_1 = (dummy_data / 10).astype('float32')\n",
"print(f\"Data type used for storing:\\t {processed_dummy_data_1.dtype}\")\n",
"print(f\"Data dimensions:\\t\\t {processed_dummy_data_1.shape}\")\n",
"print(f\"Number of rows:\\t\\t\\t {processed_dummy_data_1.shape[0]}\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Metadata available from the data (NumPy array)\n",
"The metadata will be used indirectly to generate the new TSDF metadata file."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Binary formatting that can be inferred from the NumPy array:\n",
"{'data_type': 'float', 'bits': 32, 'endianness': 'little', 'rows': 10}\n"
]
}
],
"source": [
"bin_meta = tsdf.get_metadata_from_ndarray(processed_dummy_data_1)\n",
"print(f\"Binary formatting that can be inferred from the NumPy array:\\n{bin_meta}\")"
"processed_data_1 = (data / 10).astype('float32')\n",
"print(f\"Data type used for storing:\\t {processed_data_1.dtype}\")\n",
"print(f\"Data dimensions:\\t\\t {processed_data_1.shape}\")\n",
"print(f\"Number of rows:\\t\\t\\t {processed_data_1.shape[0]}\")"
]
},
{
Expand All @@ -179,16 +144,16 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"processed_dummy_data_name_1 = \"tmp_test_example_10_3_int16_to_float32\"\n",
"processed_dummy_metadata_1 = tsdf.write_binary_file(\n",
"processed_data_name_1 = \"tmp_test_example_10_3_int16_to_float32\"\n",
"processed_metadata_1 = tsdf.write_binary_file(\n",
" TEST_CONST.TEST_OUTPUT_DATA_DIR,\n",
" processed_dummy_data_name_1 + TEST_CONST.BINARY_EXTENSION,\n",
" processed_dummy_data_1,\n",
" dummy_metadata.get_plain_tsdf_dict_copy(),\n",
" processed_data_name_1 + TEST_CONST.BINARY_EXTENSION,\n",
" processed_data_1,\n",
" metadata.get_plain_tsdf_dict_copy(),\n",
" )"
]
},
Expand All @@ -204,12 +169,12 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Write new metadata file\n",
"tsdf.write_metadata([processed_dummy_metadata_1], processed_dummy_data_name_1 + TEST_CONST.METADATA_EXTENSION)\n"
"tsdf.write_metadata([processed_metadata_1], processed_data_name_1 + TEST_CONST.METADATA_EXTENSION)\n"
]
},
{
Expand All @@ -222,29 +187,32 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Preprocess the original data to generate another data source\n",
"processed_dummy_data_2 = (dummy_data * 1000).astype('int32')\n",
"processed_data_2 = (data * 1000).astype(\"int32\")\n",
"\n",
"# Adjust the metadata slightly\n",
"updated_dummy_metadata = dummy_metadata.get_plain_tsdf_dict_copy()\n",
"updated_dummy_metadata.pop(\"scale_factors\") #remove the 'scale_factors'\n",
"updated_metadata = metadata.get_plain_tsdf_dict_copy()\n",
"updated_metadata.pop(\"scale_factors\") # remove the 'scale_factors'\n",
"\n",
"\n",
"# Save the new binary file\n",
"processed_dummy_data_name_2 = \"tmp_test_example_10_3_int16_to_int32\"\n",
"processed_dummy_metadata_2 = tsdf.write_binary_file(\n",
" TEST_CONST.TEST_OUTPUT_DATA_DIR,\n",
" processed_dummy_data_name_2 + TEST_CONST.BINARY_EXTENSION,\n",
" processed_dummy_data_2,\n",
" updated_dummy_metadata,\n",
" )\n",
"processed_data_name_2 = \"tmp_test_example_10_3_int16_to_int32\"\n",
"processed_metadata_2 = tsdf.write_binary_file(\n",
" TEST_CONST.TEST_OUTPUT_DATA_DIR,\n",
" processed_data_name_2 + TEST_CONST.BINARY_EXTENSION,\n",
" processed_data_2,\n",
" updated_metadata,\n",
")\n",
"\n",
"# Write a metadata file that combines the two binary files\n",
"tsdf.write_metadata([processed_dummy_metadata_1, processed_dummy_metadata_2], \"tmp_test_example_10_3_int16_to_int_n_float_meta.json\")\n"
"tsdf.write_metadata(\n",
" [processed_metadata_1, processed_metadata_2],\n",
" \"tmp_test_example_10_3_int16_to_int_n_float_meta.json\",\n",
")"
]
},
{
Expand All @@ -258,7 +226,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -275,19 +243,25 @@
"new_metadata[\"device_id\"] = \"example\"\n",
"new_metadata[\"endianness\"] = \"little\"\n",
"new_metadata[\"metadata_version\"] = \"0.1\"\n",
"new_metadata[\"start_datetime_unix_ms\"] = 1571135957025,\n",
"new_metadata[\"start_datetime_unix_ms\"] = (1571135957025,)\n",
"new_metadata[\"start_iso8601\"] = \"2019-10-15T10:39:17.025000+00:00\"\n",
"new_metadata[\"end_datetime_unix_ms\"] = 1571168851826\n",
"new_metadata[\"end_iso8601\"] = \"2019-10-15T19:47:31.826000+00:00\"\n",
"new_metadata[\"channels\"] = [\"x\",\"y\",\"z\"]\n",
"new_metadata[\"units\"] = [\"m/s/s\",\"m/s/s\",\"m/s/s\"]\n",
"new_metadata[\"channels\"] = [\"x\", \"y\", \"z\"]\n",
"new_metadata[\"units\"] = [\"m/s/s\", \"m/s/s\", \"m/s/s\"]\n",
"\n",
"# Write the three binary files based on the provided metadata\n",
"\n",
"file_prefix = \"tmp_test\"\n",
"new_meta_1 = tsdf.write_binary_file(TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix + \"_1.bin\", data_1, new_metadata)\n",
"new_meta_2 = tsdf.write_binary_file(TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix+\"_2.bin\", data_2, new_metadata)\n",
"new_meta_3 = tsdf.write_binary_file(TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix+\"_3.bin\", data_3, new_metadata)\n"
"new_meta_1 = tsdf.write_binary_file(\n",
" TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix + \"_1.bin\", data_1, new_metadata\n",
")\n",
"new_meta_2 = tsdf.write_binary_file(\n",
" TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix + \"_2.bin\", data_2, new_metadata\n",
")\n",
"new_meta_3 = tsdf.write_binary_file(\n",
" TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix + \"_3.bin\", data_3, new_metadata\n",
")"
]
},
{
Expand All @@ -300,7 +274,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -310,12 +284,15 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Combine and write all metadata files\n",
"tsdf.write_metadata([new_meta_1, new_meta_2, new_meta_3], file_prefix + \"_3\" + TEST_CONST.METADATA_EXTENSION)"
"tsdf.write_metadata(\n",
" [new_meta_1, new_meta_2, new_meta_3],\n",
" file_prefix + \"_3\" + TEST_CONST.METADATA_EXTENSION,\n",
")"
]
},
{
Expand All @@ -329,25 +306,52 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"from tsdf.utils_legacy_TSDF import generate_tsdf_metadata_from_tsdb, convert_metadata_tsdb_to_tsdf, convert_metadatas_tsdb_to_tsdf\n",
"from tsdf.legacy_tsdf_utils import (\n",
" generate_tsdf_metadata_from_tsdb,\n",
" convert_file_tsdb_to_tsdf,\n",
" convert_files_tsdb_to_tsdf,\n",
")\n",
"\n",
"# Path to the metadata file\n",
"path_to_file = os.path.join(TEST_CONST.TEST_DATA_DIR, \"ppp_format_meta_legacy.json\")\n",
"path_to_new_file = os.path.join(TEST_CONST.TEST_OUTPUT_DATA_DIR, \"tmp_ppp_format_meta.json\")\n",
"path_to_new_file = os.path.join(\n",
" TEST_CONST.TEST_OUTPUT_DATA_DIR, \"tmp_ppp_format_meta.json\"\n",
")\n",
"\n",
"# Generate a TSDF metadata file from TSDB\n",
"generate_tsdf_metadata_from_tsdb(path_to_file, path_to_new_file)\n",
"\n",
"# Convert a TSDB metadata file to TSDB format \n",
"# Convert a TSDB metadata file to TSDB format\n",
"# convert_metadata_tsdb_to_tsdf(path_to_file)\n",
"\n",
"# Convert all metadata files in the directory from TSDB to TSDF format\n",
"# convert_metadatas_tsdb_to_tsdf(path_to_dir)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Verify TSDF format\n",
"Method used to verify formatting of existing files."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# from tsdf import validator\n",
"path_to_metadata_file = os.path.join(TEST_CONST.TEST_DATA_DIR, \"ppp_format_meta.json\")\n",
"# Verify the metadata file\n",
"#validator.validate_tsdf_format(path_to_metadata_file)"
]
}
],
"metadata": {
Expand All @@ -366,7 +370,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.9.13"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
Loading

0 comments on commit d90c736

Please sign in to comment.