diff --git a/docs/source/quickstarter.rst b/docs/source/quickstarter.rst index 06d879fcbe..9b79158681 100644 --- a/docs/source/quickstarter.rst +++ b/docs/source/quickstarter.rst @@ -71,53 +71,37 @@ This will run all functions required to create the output specified in the `run. $ python run.py -Run using the `DAGWorks Platform `_ ----------------------------------------------------- +Track execution with the `Hamilton UI `_ +___________________________________________________________________________________________________________ +If you would like to track the execution of the `naturf` workflow, there is an interactive UI +available that allows you to track the progress of the workflow, view logs, and capture summary statistics of outputs. -Set the DAGWorks API Key as an environment variable: +1. Pre-requisites: -.. code:: bash - - $ export DAGWORKS_API_KEY="" +* Have the self-hosted Hamilton UI running and you have created a user and project. If not, follow the instructions in the `Hamilton UI README `_. +* Or, have a free account on `DAGWorks Inc. `_, and have created a project and an API Key. +* Have the right SDK installed. If not, install it using the following command: + .. code:: bash -Start with the `run.py` file from above (using either the example data or your own data) and add the following. Import os and DAGWorks adapters, which contains the tracker: - -.. code:: python3 + $ pip install sf-hamilton[sdk] # if self-hosting the Hamilton UI + $ pip install dagworks-sdk # if using the hosted Hamilton UI via DAGWorks Inc. - import os - from dagworks import adapters +2. Set the requisite environment variables: -Initialize the DAGWorks tracker: - -.. code:: python3 - - tracker = adapters.DAGWorksTracker( - project_id=, - api_key=os.environ["DAGWORKS_API_KEY"], - username="", - dag_name="", - tags={"environment": "DEV", "team": "MY_TEAM", "version": "X"}, - ) - - -Add `tracker` to the `hamilton_adaptors` list: - -.. code:: python3 +.. code:: bash - hamilton_adapters = [ - base.SimplePythonDataFrameGraphAdapter(), - h_tqdm.ProgressBar("Naturf DAG"), - tracker, - ] + $ export HAMILTON_UI_USERNAME="" + $ export HAMILTON_UI_PROJECT_ID="" + $ export DAGWORKS_API_KEY="" # set this is you are using the hosted Hamilton UI via DAGWorks Inc. +3. Run the python file (again)! -Run the python file! +Underneath, in naturf driver, the correct SDK will be invoked and the execution will be tracked on the Hamilton UI. .. code:: bash $ python run.py - -You should see a run on the `DAGWorks Platform `_! +You should see logs emitted that provide a URL to click to see execution! diff --git a/naturf/driver.py b/naturf/driver.py index 6114358ee3..607676adb9 100644 --- a/naturf/driver.py +++ b/naturf/driver.py @@ -9,8 +9,9 @@ import naturf.output as output DAGWORKS_API_KEY = os.environ.get("DAGWORKS_API_KEY") -DAGWORKS_USERNAME = os.environ.get("DAGWORKS_USERNAME") -DAGWORKS_PROJECT_ID = os.environ.get("DAGWORKS_PROJECT_ID") +HAMILTON_UI_PROJECT_ID = os.environ.get("HAMILTON_UI_PROJECT_ID") +HAMILTON_UI_USERNAME = os.environ.get("HAMILTON_UI_USERNAME") +ENV = os.environ.get("ENV", "dev") class Model: @@ -26,18 +27,39 @@ def __init__(self, inputs: dict, outputs: List[str], **kwargs): base.SimplePythonDataFrameGraphAdapter(), h_tqdm.ProgressBar("Naturf DAG"), ] - if DAGWORKS_API_KEY and DAGWORKS_USERNAME and DAGWORKS_PROJECT_ID: - from dagworks import adapters - - hamilton_adapters.append( - adapters.DAGWorksTracker( - project_id=int(DAGWORKS_PROJECT_ID), - api_key=DAGWORKS_API_KEY, - username=DAGWORKS_USERNAME, - dag_name="naturf-dag", - tags={"env": "dev", "status": "development", "version": "1"}, + # use the hosted version (there's a free tier) of the Hamilton UI to log telemetry to. + if DAGWORKS_API_KEY and HAMILTON_UI_USERNAME and HAMILTON_UI_PROJECT_ID: + try: + from dagworks import adapters + except ImportError: + # dagworks-sdk not installed + pass + else: + hamilton_adapters.append( # pragma: no cover + adapters.DAGWorksTracker( + project_id=int(HAMILTON_UI_PROJECT_ID), + api_key=DAGWORKS_API_KEY, + username=HAMILTON_UI_USERNAME, + dag_name="naturf-dag", + tags={"env": ENV}, + ) + ) + # use the self-hosted version of the Hamilton UI to log telemetry to. + elif HAMILTON_UI_USERNAME and HAMILTON_UI_PROJECT_ID: + try: + from hamilton_sdk import adapters + except ImportError: + # hamilton-sdk not installed + pass + else: + hamilton_adapters.append( # pragma: no cover + adapters.HamiltonTracker( + project_id=int(HAMILTON_UI_PROJECT_ID), + username=HAMILTON_UI_USERNAME, + dag_name="naturf-dag", + tags={"env": ENV}, + ) ) - ) # instantiate driver with function definitions & adapters self.dr = ( diff --git a/notebooks/quickstarter.ipynb b/notebooks/quickstarter.ipynb index 39a7e8d3f3..54a74d55da 100644 --- a/notebooks/quickstarter.ipynb +++ b/notebooks/quickstarter.ipynb @@ -2089,9 +2089,9 @@ "id": "45b95e28", "metadata": {}, "source": [ - "## Optional: DAGWorks Interactive Dashboard\n", + "## Optional: Hamilton UI's Interactive Dashboard\n", "\n", - "Since we're using `hamilton` to run `naturf`, users can log each run to DAGWorks (researchers/academics have access to the free tier) by signing up at [dagworks.io](www.dagworks.io) and creating a project. Then either set the environment variables `DAGWORKS_API_KEY`, `DAGWORKS_USERNAME`, and `DAGWORKS_PROJECT_ID` below or set it in the module directly." + "Since we're using `hamilton` to run `naturf`, users can log each run to a [self-hostable Hamilton UI](https://github.com/dagworks-inc/hamilton/tree/main/ui), or via the hosted version by DAGWorks Inc. (researchers/academics have access to the free tier -- email support for details) by signing up at [dagworks.io](www.dagworks.io/hamilton). For both, one needs to have a project and username created. Then either set the environment variables `DAGWORKS_API_KEY` (if using the hosted version), `HAMILTON_UI_USERNAME`, and `HAMILTON_UI_PROJECT_ID` below or set it in the module directly. Then when the model is executed, the run will be logged to the Hamilton UI." ] }, { @@ -2106,9 +2106,9 @@ }, "outputs": [], "source": [ - "# driver.DAGWORKS_API_KEY = \"your_api_key\"\n", - "# driver.DAGWORKS_USERNAME = \"your_username\"\n", - "# driver.DAGWORKS_PROJECT_ID = \"your_project_id\" # some integer.\n", + "# driver.DAGWORKS_API_KEY = \"your_api_key\" # only required if using the hosted version of the Hamilton UI\n", + "# driver.HAMILTON_UI_USERNAME = \"your_username\"\n", + "# driver.HAMILTON_UI_PROJECT_ID = \"your_project_id\" # some integer.\n", "\n", "# model = driver.Model(inputs, [\"input_shapefile_df\"])\n", "# model.execute()" diff --git a/tests/test_driver.py b/tests/test_driver.py new file mode 100644 index 0000000000..f2ca58413f --- /dev/null +++ b/tests/test_driver.py @@ -0,0 +1,33 @@ +import os +import unittest +from unittest.mock import patch + +from naturf import driver + + +class TestDriverGuardAgainstSDK(unittest.TestCase): + INPUTS = { + "input_shapefile": os.path.join("naturf", "data", "C-5.shp"), + "radius": 100, + "cap_style": 1, + } + + # mock the driver module variables + @patch("naturf.driver.HAMILTON_UI_PROJECT_ID", "3") + @patch("naturf.driver.HAMILTON_UI_USERNAME", "test@test") + def tests_sdk_not_installed(self): + """tests that things work if the sdk is not installed""" + # this line running without error means that our checks worked + driver.Model(inputs=TestDriverGuardAgainstSDK.INPUTS, outputs=["input_shapefile_df"]) + + @patch("naturf.driver.HAMILTON_UI_PROJECT_ID", "3") + @patch("naturf.driver.HAMILTON_UI_USERNAME", "test@test") + @patch("naturf.driver.DAGWORKS_API_KEY", "some-api-key") + def tests_sdk_not_installed_DW(self): + """tests that things work if the sdk is not installed for the DW path""" + # this line running without error means that our checks worked + driver.Model(inputs=TestDriverGuardAgainstSDK.INPUTS, outputs=["input_shapefile_df"]) + + +if __name__ == "__main__": + unittest.main()