Updates instructions for logging executions to the Hamilton UI (#166)

* Updates instructions for logging executions to the Hamilton UI Added instructions on how to see and log executions using the Hamilton UI. Links to the correct documentation to help people get started. * Adds unit test for catching SDKs not installed Adding test to make coverage not complain, and to also validate that if the environment variables are present that things wont break if the right SDK is not installed. * Adds pragma to skip some lines from coverage report Because we don't want to test them.
IMMM-SFA · May 4, 2024 · 998d0b3 · 998d0b3
1 parent 97d1a98
commit 998d0b3
Show file tree

Hide file tree

Showing 4 changed files with 92 additions and 53 deletions.
diff --git a/docs/source/quickstarter.rst b/docs/source/quickstarter.rst
@@ -71,53 +71,37 @@ This will run all functions required to create the output specified in the `run.
     $ python run.py
 
 
-Run using the `DAGWorks Platform <app.dagworks.io>`_
-----------------------------------------------------
+Track execution with the `Hamilton UI <https://github.com/dagworks-inc/hamilton/tree/main/ui>`_
+___________________________________________________________________________________________________________
+If you would like to track the execution of the `naturf` workflow, there is an interactive UI
+available that allows you to track the progress of the workflow, view logs, and capture summary statistics of outputs.
 
-Set the DAGWorks API Key as an environment variable:
+1. Pre-requisites:
 
-.. code:: bash
-
-    $ export DAGWORKS_API_KEY="<your API Key>"
+* Have the self-hosted Hamilton UI running and you have created a user and project. If not, follow the instructions in the `Hamilton UI README <https://github.com/dagworks-inc/hamilton/tree/main/ui>`_.
+* Or, have a free account on `DAGWorks Inc. <https://www.dagworks.io/hamilton>`_, and have created a project and an API Key.
+* Have the right SDK installed. If not, install it using the following command:
 
+    .. code:: bash
 
-Start with the `run.py` file from above (using either the example data or your own data) and add the following. Import os and DAGWorks adapters, which contains the tracker:
-
-.. code:: python3
+        $ pip install sf-hamilton[sdk]  # if self-hosting the Hamilton UI
+        $ pip install dagworks-sdk  # if using the hosted Hamilton UI via DAGWorks Inc.
 
-    import os
-    from dagworks import adapters
 
+2. Set the requisite environment variables:
 
-Initialize the DAGWorks tracker:
-
-.. code:: python3
-
-    tracker = adapters.DAGWorksTracker(
-            project_id=<your project ID>,
-            api_key=os.environ["DAGWORKS_API_KEY"],
-            username="<your username>",
-            dag_name="<name of the DAG>",
-            tags={"environment": "DEV", "team": "MY_TEAM", "version": "X"},
-        )
-
-
-Add `tracker` to the `hamilton_adaptors` list:
-
-.. code:: python3
+.. code:: bash
 
-    hamilton_adapters = [
-                base.SimplePythonDataFrameGraphAdapter(),
-                h_tqdm.ProgressBar("Naturf DAG"),
-                tracker,
-            ]
+    $ export HAMILTON_UI_USERNAME="<your username>"
+    $ export HAMILTON_UI_PROJECT_ID="<your project ID>"
+    $ export DAGWORKS_API_KEY="<your DAGWorks API key>"  # set this is you are using the hosted Hamilton UI via DAGWorks Inc.
 
+3. Run the python file (again)!
 
-Run the python file!
+Underneath, in naturf driver, the correct SDK will be invoked and the execution will be tracked on the Hamilton UI.
 
 .. code:: bash
 
     $ python run.py
 
-
-You should see a run on the `DAGWorks Platform <app.dagworks.io>`_!
+You should see logs emitted that provide a URL to click to see execution!
diff --git a/naturf/driver.py b/naturf/driver.py
@@ -9,8 +9,9 @@
 import naturf.output as output
 
 DAGWORKS_API_KEY = os.environ.get("DAGWORKS_API_KEY")
-DAGWORKS_USERNAME = os.environ.get("DAGWORKS_USERNAME")
-DAGWORKS_PROJECT_ID = os.environ.get("DAGWORKS_PROJECT_ID")
+HAMILTON_UI_PROJECT_ID = os.environ.get("HAMILTON_UI_PROJECT_ID")
+HAMILTON_UI_USERNAME = os.environ.get("HAMILTON_UI_USERNAME")
+ENV = os.environ.get("ENV", "dev")
 
 
 class Model:
@@ -26,18 +27,39 @@ def __init__(self, inputs: dict, outputs: List[str], **kwargs):
             base.SimplePythonDataFrameGraphAdapter(),
             h_tqdm.ProgressBar("Naturf DAG"),
         ]
-        if DAGWORKS_API_KEY and DAGWORKS_USERNAME and DAGWORKS_PROJECT_ID:
-            from dagworks import adapters
-
-            hamilton_adapters.append(
-                adapters.DAGWorksTracker(
-                    project_id=int(DAGWORKS_PROJECT_ID),
-                    api_key=DAGWORKS_API_KEY,
-                    username=DAGWORKS_USERNAME,
-                    dag_name="naturf-dag",
-                    tags={"env": "dev", "status": "development", "version": "1"},
+        # use the hosted version (there's a free tier) of the Hamilton UI to log telemetry to.
+        if DAGWORKS_API_KEY and HAMILTON_UI_USERNAME and HAMILTON_UI_PROJECT_ID:
+            try:
+                from dagworks import adapters
+            except ImportError:
+                # dagworks-sdk not installed
+                pass
+            else:
+                hamilton_adapters.append(  # pragma: no cover
+                    adapters.DAGWorksTracker(
+                        project_id=int(HAMILTON_UI_PROJECT_ID),
+                        api_key=DAGWORKS_API_KEY,
+                        username=HAMILTON_UI_USERNAME,
+                        dag_name="naturf-dag",
+                        tags={"env": ENV},
+                    )
+                )
+        # use the self-hosted version of the Hamilton UI to log telemetry to.
+        elif HAMILTON_UI_USERNAME and HAMILTON_UI_PROJECT_ID:
+            try:
+                from hamilton_sdk import adapters
+            except ImportError:
+                # hamilton-sdk not installed
+                pass
+            else:
+                hamilton_adapters.append(  # pragma: no cover
+                    adapters.HamiltonTracker(
+                        project_id=int(HAMILTON_UI_PROJECT_ID),
+                        username=HAMILTON_UI_USERNAME,
+                        dag_name="naturf-dag",
+                        tags={"env": ENV},
+                    )
                 )
-            )
 
         # instantiate driver with function definitions & adapters
         self.dr = (

diff --git a/notebooks/quickstarter.ipynb b/notebooks/quickstarter.ipynb
@@ -2089,9 +2089,9 @@
    "id": "45b95e28",
    "metadata": {},
    "source": [
-    "## Optional: DAGWorks Interactive Dashboard\n",
+    "## Optional: Hamilton UI's Interactive Dashboard\n",
     "\n",
-    "Since we're using `hamilton` to run `naturf`, users can log each run to DAGWorks (researchers/academics have access to the free tier) by signing up at [dagworks.io](www.dagworks.io) and creating a project. Then either set the environment variables `DAGWORKS_API_KEY`, `DAGWORKS_USERNAME`, and `DAGWORKS_PROJECT_ID` below or set it in the module directly."
+    "Since we're using `hamilton` to run `naturf`, users can log each run to a [self-hostable Hamilton UI](https://github.com/dagworks-inc/hamilton/tree/main/ui), or via the hosted version by DAGWorks Inc. (researchers/academics have access to the free tier -- email support for details) by signing up at [dagworks.io](www.dagworks.io/hamilton). For both, one needs to have a project and username created. Then either set the environment variables `DAGWORKS_API_KEY` (if using the hosted version), `HAMILTON_UI_USERNAME`, and `HAMILTON_UI_PROJECT_ID` below or set it in the module directly. Then when the model is executed, the run will be logged to the Hamilton UI."
    ]
   },
   {
@@ -2106,9 +2106,9 @@
    },
    "outputs": [],
    "source": [
-    "# driver.DAGWORKS_API_KEY = \"your_api_key\"\n",
-    "# driver.DAGWORKS_USERNAME = \"your_username\"\n",
-    "# driver.DAGWORKS_PROJECT_ID = \"your_project_id\"  # some integer.\n",
+    "# driver.DAGWORKS_API_KEY = \"your_api_key\"  # only required if using the hosted version of the Hamilton UI\n",
+    "# driver.HAMILTON_UI_USERNAME = \"your_username\"\n",
+    "# driver.HAMILTON_UI_PROJECT_ID = \"your_project_id\"  # some integer.\n",
     "\n",
     "# model = driver.Model(inputs, [\"input_shapefile_df\"])\n",
     "# model.execute()"

diff --git a/tests/test_driver.py b/tests/test_driver.py
@@ -0,0 +1,33 @@
+import os
+import unittest
+from unittest.mock import patch
+
+from naturf import driver
+
+
+class TestDriverGuardAgainstSDK(unittest.TestCase):
+    INPUTS = {
+        "input_shapefile": os.path.join("naturf", "data", "C-5.shp"),
+        "radius": 100,
+        "cap_style": 1,
+    }
+
+    # mock the driver module variables
+    @patch("naturf.driver.HAMILTON_UI_PROJECT_ID", "3")
+    @patch("naturf.driver.HAMILTON_UI_USERNAME", "test@test")
+    def tests_sdk_not_installed(self):
+        """tests that things work if the sdk is not installed"""
+        # this line running without error means that our checks worked
+        driver.Model(inputs=TestDriverGuardAgainstSDK.INPUTS, outputs=["input_shapefile_df"])
+
+    @patch("naturf.driver.HAMILTON_UI_PROJECT_ID", "3")
+    @patch("naturf.driver.HAMILTON_UI_USERNAME", "test@test")
+    @patch("naturf.driver.DAGWORKS_API_KEY", "some-api-key")
+    def tests_sdk_not_installed_DW(self):
+        """tests that things work if the sdk is not installed for the DW path"""
+        # this line running without error means that our checks worked
+        driver.Model(inputs=TestDriverGuardAgainstSDK.INPUTS, outputs=["input_shapefile_df"])
+
+
+if __name__ == "__main__":
+    unittest.main()