Merge pull request #22 from biomarkersParkinson/support_legacy_import

Support legacy import
biomarkersParkinson · Sep 16, 2023 · d90c736 · d90c736
2 parents e724160 + 327acea
commit d90c736
Show file tree

Hide file tree

Showing 21 changed files with 2,425 additions and 2,171 deletions.
diff --git a/docs/processing_example.ipynb b/docs/processing_example.ipynb
@@ -24,15 +24,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 1,
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "import tsdf\n",
     "import os\n",
     "import numpy as np\n",
+    "import tsdf\n",
     "from tsdf.constants import TestConstants as TEST_CONST"
    ]
   },
@@ -48,18 +48,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Reload modules automatically on changes; useful for developing\n",
     "%load_ext autoreload\n",
@@ -87,7 +78,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -101,13 +92,15 @@
     }
    ],
    "source": [
-    "dummy_data_name = \"example_10_3_int16\"\n",
+    "data_name = \"example_10_3_int16\"\n",
     "\n",
-    "dummy_metadata = tsdf.load_metadata_from_path(os.path.join(TEST_CONST.TEST_DATA_DIR, dummy_data_name + TEST_CONST.METADATA_EXTENSION))[dummy_data_name + TEST_CONST.BINARY_EXTENSION]\n",
-    "dummy_data = dummy_metadata.load_binary()\n",
-    "print(f\"Data type used for storing:\\t {dummy_data.dtype}\")\n",
-    "print(f\"Data dimensions:\\t\\t {dummy_data.shape}\")\n",
-    "print(f\"Number of rows:\\t\\t\\t {dummy_data.shape[0]}\")"
+    "metadata = tsdf.load_metadata_from_path(\n",
+    "    os.path.join(TEST_CONST.TEST_DATA_DIR, data_name + TEST_CONST.METADATA_EXTENSION)\n",
+    ")[data_name + TEST_CONST.BINARY_EXTENSION]\n",
+    "data = metadata.load_binary()\n",
+    "print(f\"Data type used for storing:\\t {data.dtype}\")\n",
+    "print(f\"Data dimensions:\\t\\t {data.shape}\")\n",
+    "print(f\"Number of rows:\\t\\t\\t {data.shape[0]}\")"
    ]
   },
   {
@@ -120,7 +113,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -134,38 +127,10 @@
     }
    ],
    "source": [
-    "processed_dummy_data_1 = (dummy_data / 10).astype('float32')\n",
-    "print(f\"Data type used for storing:\\t {processed_dummy_data_1.dtype}\")\n",
-    "print(f\"Data dimensions:\\t\\t {processed_dummy_data_1.shape}\")\n",
-    "print(f\"Number of rows:\\t\\t\\t {processed_dummy_data_1.shape[0]}\")"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Metadata available from the data (NumPy array)\n",
-    "The metadata will be used indirectly to generate the new TSDF metadata file."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Binary formatting that can be inferred from the NumPy array:\n",
-      "{'data_type': 'float', 'bits': 32, 'endianness': 'little', 'rows': 10}\n"
-     ]
-    }
-   ],
-   "source": [
-    "bin_meta = tsdf.get_metadata_from_ndarray(processed_dummy_data_1)\n",
-    "print(f\"Binary formatting that can be inferred from the NumPy array:\\n{bin_meta}\")"
+    "processed_data_1 = (data / 10).astype('float32')\n",
+    "print(f\"Data type used for storing:\\t {processed_data_1.dtype}\")\n",
+    "print(f\"Data dimensions:\\t\\t {processed_data_1.shape}\")\n",
+    "print(f\"Number of rows:\\t\\t\\t {processed_data_1.shape[0]}\")"
    ]
   },
   {
@@ -179,16 +144,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
-    "processed_dummy_data_name_1 = \"tmp_test_example_10_3_int16_to_float32\"\n",
-    "processed_dummy_metadata_1 = tsdf.write_binary_file(\n",
+    "processed_data_name_1 = \"tmp_test_example_10_3_int16_to_float32\"\n",
+    "processed_metadata_1 = tsdf.write_binary_file(\n",
     "            TEST_CONST.TEST_OUTPUT_DATA_DIR,\n",
-    "            processed_dummy_data_name_1 + TEST_CONST.BINARY_EXTENSION,\n",
-    "            processed_dummy_data_1,\n",
-    "            dummy_metadata.get_plain_tsdf_dict_copy(),\n",
+    "            processed_data_name_1 + TEST_CONST.BINARY_EXTENSION,\n",
+    "            processed_data_1,\n",
+    "            metadata.get_plain_tsdf_dict_copy(),\n",
     "        )"
    ]
   },
@@ -204,12 +169,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Write new metadata file\n",
-    "tsdf.write_metadata([processed_dummy_metadata_1], processed_dummy_data_name_1 + TEST_CONST.METADATA_EXTENSION)\n"
+    "tsdf.write_metadata([processed_metadata_1], processed_data_name_1 + TEST_CONST.METADATA_EXTENSION)\n"
    ]
   },
   {
@@ -222,29 +187,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Preprocess the original data to generate another data source\n",
-    "processed_dummy_data_2 = (dummy_data * 1000).astype('int32')\n",
+    "processed_data_2 = (data * 1000).astype(\"int32\")\n",
     "\n",
     "# Adjust the metadata slightly\n",
-    "updated_dummy_metadata = dummy_metadata.get_plain_tsdf_dict_copy()\n",
-    "updated_dummy_metadata.pop(\"scale_factors\") #remove the 'scale_factors'\n",
+    "updated_metadata = metadata.get_plain_tsdf_dict_copy()\n",
+    "updated_metadata.pop(\"scale_factors\")  # remove the 'scale_factors'\n",
     "\n",
     "\n",
     "# Save the new binary file\n",
-    "processed_dummy_data_name_2 = \"tmp_test_example_10_3_int16_to_int32\"\n",
-    "processed_dummy_metadata_2 = tsdf.write_binary_file(\n",
-    "            TEST_CONST.TEST_OUTPUT_DATA_DIR,\n",
-    "            processed_dummy_data_name_2 + TEST_CONST.BINARY_EXTENSION,\n",
-    "            processed_dummy_data_2,\n",
-    "            updated_dummy_metadata,\n",
-    "        )\n",
+    "processed_data_name_2 = \"tmp_test_example_10_3_int16_to_int32\"\n",
+    "processed_metadata_2 = tsdf.write_binary_file(\n",
+    "    TEST_CONST.TEST_OUTPUT_DATA_DIR,\n",
+    "    processed_data_name_2 + TEST_CONST.BINARY_EXTENSION,\n",
+    "    processed_data_2,\n",
+    "    updated_metadata,\n",
+    ")\n",
     "\n",
     "# Write a metadata file that combines the two binary files\n",
-    "tsdf.write_metadata([processed_dummy_metadata_1, processed_dummy_metadata_2], \"tmp_test_example_10_3_int16_to_int_n_float_meta.json\")\n"
+    "tsdf.write_metadata(\n",
+    "    [processed_metadata_1, processed_metadata_2],\n",
+    "    \"tmp_test_example_10_3_int16_to_int_n_float_meta.json\",\n",
+    ")"
    ]
   },
   {
@@ -258,7 +226,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -275,19 +243,25 @@
     "new_metadata[\"device_id\"] = \"example\"\n",
     "new_metadata[\"endianness\"] = \"little\"\n",
     "new_metadata[\"metadata_version\"] = \"0.1\"\n",
-    "new_metadata[\"start_datetime_unix_ms\"] = 1571135957025,\n",
+    "new_metadata[\"start_datetime_unix_ms\"] = (1571135957025,)\n",
     "new_metadata[\"start_iso8601\"] = \"2019-10-15T10:39:17.025000+00:00\"\n",
     "new_metadata[\"end_datetime_unix_ms\"] = 1571168851826\n",
     "new_metadata[\"end_iso8601\"] = \"2019-10-15T19:47:31.826000+00:00\"\n",
-    "new_metadata[\"channels\"] = [\"x\",\"y\",\"z\"]\n",
-    "new_metadata[\"units\"] = [\"m/s/s\",\"m/s/s\",\"m/s/s\"]\n",
+    "new_metadata[\"channels\"] = [\"x\", \"y\", \"z\"]\n",
+    "new_metadata[\"units\"] = [\"m/s/s\", \"m/s/s\", \"m/s/s\"]\n",
     "\n",
     "# Write the three binary files based on the provided metadata\n",
     "\n",
     "file_prefix = \"tmp_test\"\n",
-    "new_meta_1 =  tsdf.write_binary_file(TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix + \"_1.bin\", data_1, new_metadata)\n",
-    "new_meta_2 =  tsdf.write_binary_file(TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix+\"_2.bin\", data_2, new_metadata)\n",
-    "new_meta_3 =  tsdf.write_binary_file(TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix+\"_3.bin\", data_3, new_metadata)\n"
+    "new_meta_1 = tsdf.write_binary_file(\n",
+    "    TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix + \"_1.bin\", data_1, new_metadata\n",
+    ")\n",
+    "new_meta_2 = tsdf.write_binary_file(\n",
+    "    TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix + \"_2.bin\", data_2, new_metadata\n",
+    ")\n",
+    "new_meta_3 = tsdf.write_binary_file(\n",
+    "    TEST_CONST.TEST_OUTPUT_DATA_DIR, file_prefix + \"_3.bin\", data_3, new_metadata\n",
+    ")"
    ]
   },
   {
@@ -300,7 +274,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -310,12 +284,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Combine and write all metadata files\n",
-    "tsdf.write_metadata([new_meta_1, new_meta_2, new_meta_3], file_prefix + \"_3\" + TEST_CONST.METADATA_EXTENSION)"
+    "tsdf.write_metadata(\n",
+    "    [new_meta_1, new_meta_2, new_meta_3],\n",
+    "    file_prefix + \"_3\" + TEST_CONST.METADATA_EXTENSION,\n",
+    ")"
    ]
   },
   {
@@ -329,25 +306,52 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
-    "from tsdf.utils_legacy_TSDF import generate_tsdf_metadata_from_tsdb, convert_metadata_tsdb_to_tsdf, convert_metadatas_tsdb_to_tsdf\n",
+    "from tsdf.legacy_tsdf_utils import (\n",
+    "    generate_tsdf_metadata_from_tsdb,\n",
+    "    convert_file_tsdb_to_tsdf,\n",
+    "    convert_files_tsdb_to_tsdf,\n",
+    ")\n",
     "\n",
     "# Path to the metadata file\n",
     "path_to_file = os.path.join(TEST_CONST.TEST_DATA_DIR, \"ppp_format_meta_legacy.json\")\n",
-    "path_to_new_file = os.path.join(TEST_CONST.TEST_OUTPUT_DATA_DIR, \"tmp_ppp_format_meta.json\")\n",
+    "path_to_new_file = os.path.join(\n",
+    "    TEST_CONST.TEST_OUTPUT_DATA_DIR, \"tmp_ppp_format_meta.json\"\n",
+    ")\n",
     "\n",
     "# Generate a TSDF metadata file from TSDB\n",
     "generate_tsdf_metadata_from_tsdb(path_to_file, path_to_new_file)\n",
     "\n",
-    "# Convert a TSDB metadata file to TSDB format \n",
+    "# Convert a TSDB metadata file to TSDB format\n",
     "# convert_metadata_tsdb_to_tsdf(path_to_file)\n",
     "\n",
     "# Convert all metadata files in the directory from TSDB to TSDF format\n",
     "# convert_metadatas_tsdb_to_tsdf(path_to_dir)"
    ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Verify TSDF format\n",
+    "Method used to verify formatting of existing files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# from tsdf import validator\n",
+    "path_to_metadata_file = os.path.join(TEST_CONST.TEST_DATA_DIR, \"ppp_format_meta.json\")\n",
+    "# Verify the metadata file\n",
+    "#validator.validate_tsdf_format(path_to_metadata_file)"
+   ]
   }
  ],
  "metadata": {
@@ -366,7 +370,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.9.13"
   },
   "orig_nbformat": 4,
   "vscode": {