From e54cb278490254bf78ed5a0611863bc4cab0a823 Mon Sep 17 00:00:00 2001 From: Stefan Krawczyk Date: Fri, 3 May 2024 23:27:11 -0700 Subject: [PATCH 1/3] Updates instructions for logging executions to the Hamilton UI Added instructions on how to see and log executions using the Hamilton UI. Links to the correct documentation to help people get started. --- docs/source/quickstarter.rst | 54 +++++++++++++----------------------- naturf/driver.py | 48 +++++++++++++++++++++++--------- notebooks/quickstarter.ipynb | 10 +++---- 3 files changed, 59 insertions(+), 53 deletions(-) diff --git a/docs/source/quickstarter.rst b/docs/source/quickstarter.rst index 06d879fcbe..9b79158681 100644 --- a/docs/source/quickstarter.rst +++ b/docs/source/quickstarter.rst @@ -71,53 +71,37 @@ This will run all functions required to create the output specified in the `run. $ python run.py -Run using the `DAGWorks Platform `_ ----------------------------------------------------- +Track execution with the `Hamilton UI `_ +___________________________________________________________________________________________________________ +If you would like to track the execution of the `naturf` workflow, there is an interactive UI +available that allows you to track the progress of the workflow, view logs, and capture summary statistics of outputs. -Set the DAGWorks API Key as an environment variable: +1. Pre-requisites: -.. code:: bash - - $ export DAGWORKS_API_KEY="" +* Have the self-hosted Hamilton UI running and you have created a user and project. If not, follow the instructions in the `Hamilton UI README `_. +* Or, have a free account on `DAGWorks Inc. `_, and have created a project and an API Key. +* Have the right SDK installed. If not, install it using the following command: + .. code:: bash -Start with the `run.py` file from above (using either the example data or your own data) and add the following. Import os and DAGWorks adapters, which contains the tracker: - -.. code:: python3 + $ pip install sf-hamilton[sdk] # if self-hosting the Hamilton UI + $ pip install dagworks-sdk # if using the hosted Hamilton UI via DAGWorks Inc. - import os - from dagworks import adapters +2. Set the requisite environment variables: -Initialize the DAGWorks tracker: - -.. code:: python3 - - tracker = adapters.DAGWorksTracker( - project_id=, - api_key=os.environ["DAGWORKS_API_KEY"], - username="", - dag_name="", - tags={"environment": "DEV", "team": "MY_TEAM", "version": "X"}, - ) - - -Add `tracker` to the `hamilton_adaptors` list: - -.. code:: python3 +.. code:: bash - hamilton_adapters = [ - base.SimplePythonDataFrameGraphAdapter(), - h_tqdm.ProgressBar("Naturf DAG"), - tracker, - ] + $ export HAMILTON_UI_USERNAME="" + $ export HAMILTON_UI_PROJECT_ID="" + $ export DAGWORKS_API_KEY="" # set this is you are using the hosted Hamilton UI via DAGWorks Inc. +3. Run the python file (again)! -Run the python file! +Underneath, in naturf driver, the correct SDK will be invoked and the execution will be tracked on the Hamilton UI. .. code:: bash $ python run.py - -You should see a run on the `DAGWorks Platform `_! +You should see logs emitted that provide a URL to click to see execution! diff --git a/naturf/driver.py b/naturf/driver.py index 6114358ee3..7d30bfacb2 100644 --- a/naturf/driver.py +++ b/naturf/driver.py @@ -9,8 +9,9 @@ import naturf.output as output DAGWORKS_API_KEY = os.environ.get("DAGWORKS_API_KEY") -DAGWORKS_USERNAME = os.environ.get("DAGWORKS_USERNAME") -DAGWORKS_PROJECT_ID = os.environ.get("DAGWORKS_PROJECT_ID") +HAMILTON_UI_PROJECT_ID = os.environ.get("HAMILTON_UI_PROJECT_ID") +HAMILTON_UI_USERNAME = os.environ.get("HAMILTON_UI_USERNAME") +ENV = os.environ.get("ENV", "dev") class Model: @@ -26,18 +27,39 @@ def __init__(self, inputs: dict, outputs: List[str], **kwargs): base.SimplePythonDataFrameGraphAdapter(), h_tqdm.ProgressBar("Naturf DAG"), ] - if DAGWORKS_API_KEY and DAGWORKS_USERNAME and DAGWORKS_PROJECT_ID: - from dagworks import adapters - - hamilton_adapters.append( - adapters.DAGWorksTracker( - project_id=int(DAGWORKS_PROJECT_ID), - api_key=DAGWORKS_API_KEY, - username=DAGWORKS_USERNAME, - dag_name="naturf-dag", - tags={"env": "dev", "status": "development", "version": "1"}, + # use the hosted version (there's a free tier) of the Hamilton UI to log telemetry to. + if DAGWORKS_API_KEY and HAMILTON_UI_USERNAME and HAMILTON_UI_PROJECT_ID: + try: + from dagworks import adapters + except ImportError: + # dagworks-sdk not installed + pass + else: + hamilton_adapters.append( + adapters.DAGWorksTracker( + project_id=int(HAMILTON_UI_PROJECT_ID), + api_key=DAGWORKS_API_KEY, + username=HAMILTON_UI_USERNAME, + dag_name="naturf-dag", + tags={"env": ENV}, + ) + ) + # use the self-hosted version of the Hamilton UI to log telemetry to. + elif HAMILTON_UI_USERNAME and HAMILTON_UI_PROJECT_ID: + try: + from hamilton_sdk import adapters + except ImportError: + # hamilton-sdk not installed + pass + else: + hamilton_adapters.append( + adapters.HamiltonTracker( + project_id=int(HAMILTON_UI_PROJECT_ID), + username=HAMILTON_UI_USERNAME, + dag_name="naturf-dag", + tags={"env": ENV}, + ) ) - ) # instantiate driver with function definitions & adapters self.dr = ( diff --git a/notebooks/quickstarter.ipynb b/notebooks/quickstarter.ipynb index 39a7e8d3f3..54a74d55da 100644 --- a/notebooks/quickstarter.ipynb +++ b/notebooks/quickstarter.ipynb @@ -2089,9 +2089,9 @@ "id": "45b95e28", "metadata": {}, "source": [ - "## Optional: DAGWorks Interactive Dashboard\n", + "## Optional: Hamilton UI's Interactive Dashboard\n", "\n", - "Since we're using `hamilton` to run `naturf`, users can log each run to DAGWorks (researchers/academics have access to the free tier) by signing up at [dagworks.io](www.dagworks.io) and creating a project. Then either set the environment variables `DAGWORKS_API_KEY`, `DAGWORKS_USERNAME`, and `DAGWORKS_PROJECT_ID` below or set it in the module directly." + "Since we're using `hamilton` to run `naturf`, users can log each run to a [self-hostable Hamilton UI](https://github.com/dagworks-inc/hamilton/tree/main/ui), or via the hosted version by DAGWorks Inc. (researchers/academics have access to the free tier -- email support for details) by signing up at [dagworks.io](www.dagworks.io/hamilton). For both, one needs to have a project and username created. Then either set the environment variables `DAGWORKS_API_KEY` (if using the hosted version), `HAMILTON_UI_USERNAME`, and `HAMILTON_UI_PROJECT_ID` below or set it in the module directly. Then when the model is executed, the run will be logged to the Hamilton UI." ] }, { @@ -2106,9 +2106,9 @@ }, "outputs": [], "source": [ - "# driver.DAGWORKS_API_KEY = \"your_api_key\"\n", - "# driver.DAGWORKS_USERNAME = \"your_username\"\n", - "# driver.DAGWORKS_PROJECT_ID = \"your_project_id\" # some integer.\n", + "# driver.DAGWORKS_API_KEY = \"your_api_key\" # only required if using the hosted version of the Hamilton UI\n", + "# driver.HAMILTON_UI_USERNAME = \"your_username\"\n", + "# driver.HAMILTON_UI_PROJECT_ID = \"your_project_id\" # some integer.\n", "\n", "# model = driver.Model(inputs, [\"input_shapefile_df\"])\n", "# model.execute()" From 18edb9c4a21b4f39dfeb829093b8626898ac103b Mon Sep 17 00:00:00 2001 From: Stefan Krawczyk Date: Fri, 3 May 2024 23:42:00 -0700 Subject: [PATCH 2/3] Adds unit test for catching SDKs not installed Adding test to make coverage not complain, and to also validate that if the environment variables are present that things wont break if the right SDK is not installed. --- tests/test_driver.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/test_driver.py diff --git a/tests/test_driver.py b/tests/test_driver.py new file mode 100644 index 0000000000..f2ca58413f --- /dev/null +++ b/tests/test_driver.py @@ -0,0 +1,33 @@ +import os +import unittest +from unittest.mock import patch + +from naturf import driver + + +class TestDriverGuardAgainstSDK(unittest.TestCase): + INPUTS = { + "input_shapefile": os.path.join("naturf", "data", "C-5.shp"), + "radius": 100, + "cap_style": 1, + } + + # mock the driver module variables + @patch("naturf.driver.HAMILTON_UI_PROJECT_ID", "3") + @patch("naturf.driver.HAMILTON_UI_USERNAME", "test@test") + def tests_sdk_not_installed(self): + """tests that things work if the sdk is not installed""" + # this line running without error means that our checks worked + driver.Model(inputs=TestDriverGuardAgainstSDK.INPUTS, outputs=["input_shapefile_df"]) + + @patch("naturf.driver.HAMILTON_UI_PROJECT_ID", "3") + @patch("naturf.driver.HAMILTON_UI_USERNAME", "test@test") + @patch("naturf.driver.DAGWORKS_API_KEY", "some-api-key") + def tests_sdk_not_installed_DW(self): + """tests that things work if the sdk is not installed for the DW path""" + # this line running without error means that our checks worked + driver.Model(inputs=TestDriverGuardAgainstSDK.INPUTS, outputs=["input_shapefile_df"]) + + +if __name__ == "__main__": + unittest.main() From b5bb7ed964e5c7620e67f8e567eeb00753160fed Mon Sep 17 00:00:00 2001 From: Stefan Krawczyk Date: Fri, 3 May 2024 23:48:02 -0700 Subject: [PATCH 3/3] Adds pragma to skip some lines from coverage report Because we don't want to test them. --- naturf/driver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/naturf/driver.py b/naturf/driver.py index 7d30bfacb2..607676adb9 100644 --- a/naturf/driver.py +++ b/naturf/driver.py @@ -35,7 +35,7 @@ def __init__(self, inputs: dict, outputs: List[str], **kwargs): # dagworks-sdk not installed pass else: - hamilton_adapters.append( + hamilton_adapters.append( # pragma: no cover adapters.DAGWorksTracker( project_id=int(HAMILTON_UI_PROJECT_ID), api_key=DAGWORKS_API_KEY, @@ -52,7 +52,7 @@ def __init__(self, inputs: dict, outputs: List[str], **kwargs): # hamilton-sdk not installed pass else: - hamilton_adapters.append( + hamilton_adapters.append( # pragma: no cover adapters.HamiltonTracker( project_id=int(HAMILTON_UI_PROJECT_ID), username=HAMILTON_UI_USERNAME,