diff --git a/docs/clay_over_aoi.ipynb b/docs/clay_over_aoi.ipynb
new file mode 100644
index 00000000..51dcbb74
--- /dev/null
+++ b/docs/clay_over_aoi.ipynb
@@ -0,0 +1,807 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c67aed17-49b0-4cc3-9bcb-6110c05ff809",
+   "metadata": {},
+   "source": [
+    "# How to run clay over custom AOIs\n",
+    "\n",
+    "This script shows in a few simple steps how the clay model can be run for custom AOIs and over custom date ranges.\n",
+    "\n",
+    "## Download and open global list of MGRS tiles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "b11e3a6a-3fa9-4c36-9f02-8991f7b4ec8e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "# The repo home is our working directory.\n",
+    "wd = Path.cwd().parent\n",
+    "os.chdir(wd)\n",
+    "# Ensure data directories exist\n",
+    "Path(\"data/mgrs\").mkdir(exist_ok=True)\n",
+    "Path(\"data/chips\").mkdir(exist_ok=True)\n",
+    "Path(\"data/checkpoints\").mkdir(exist_ok=True)\n",
+    "Path(\"data/embeddings\").mkdir(exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b65dbec9-725d-4032-bc3e-13e0dbebfcf0",
+   "metadata": {},
+   "source": [
+    "A full list of MGRS tiles has been created as part of the landcover based sampling strategy. The file is [sourced from a complete MGRS tile list](https://github.com/Clay-foundation/model/blob/main/scripts/landcover.sh#L7), and then [itersected with the WorldCover landcover](https://github.com/Clay-foundation/model/blob/main/scripts/landcover.py) layer, outputting the `mgrs_full.fgb` file that is used below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "6643b779-5f87-42ce-87f6-d4247f60db12",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--2024-01-15 12:40:58--  https://clay-mgrs-samples.s3.amazonaws.com/mgrs_full.fgb\n",
+      "Resolving clay-mgrs-samples.s3.amazonaws.com (clay-mgrs-samples.s3.amazonaws.com)... 3.5.28.235, 54.231.165.153, 52.217.89.132, ...\n",
+      "Connecting to clay-mgrs-samples.s3.amazonaws.com (clay-mgrs-samples.s3.amazonaws.com)|3.5.28.235|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 13787464 (13M) [binary/octet-stream]\n",
+      "Saving to: ‘data/mgrs/mgrs_full.fgb’\n",
+      "\n",
+      "data/mgrs/mgrs_full 100%[===================>]  13.15M  4.30MB/s    in 3.1s    \n",
+      "\n",
+      "2024-01-15 12:41:02 (4.30 MB/s) - ‘data/mgrs/mgrs_full.fgb’ saved [13787464/13787464]\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import geopandas as gpd\n",
+    "\n",
+    "! wget https://clay-mgrs-samples.s3.amazonaws.com/mgrs_full.fgb -O data/mgrs/mgrs_full.fgb"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "beb8d805-38c6-4b08-bff8-fdb59f598535",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "      <th>geometry</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>59CNK</td>\n",
+       "      <td>MULTIPOLYGON (((170.99885 -81.06088, 177.30316...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>59CNJ</td>\n",
+       "      <td>MULTIPOLYGON (((170.99872 -81.95638, 177.99281...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>58CEN</td>\n",
+       "      <td>MULTIPOLYGON (((164.99856 -82.85238, 172.85358...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>58CEP</td>\n",
+       "      <td>MULTIPOLYGON (((164.99872 -81.95638, 171.99281...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>60CVQ</td>\n",
+       "      <td>MULTIPOLYGON (((171.25220 -81.01644, 177.56261...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>56681</th>\n",
+       "      <td>02CMR</td>\n",
+       "      <td>MULTIPOLYGON (((-176.23170 -80.12446, -170.488...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>56682</th>\n",
+       "      <td>02CMQ</td>\n",
+       "      <td>MULTIPOLYGON (((-176.74780 -81.01644, -170.437...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>56683</th>\n",
+       "      <td>01CEK</td>\n",
+       "      <td>MULTIPOLYGON (((-177.00115 -81.06088, -170.696...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>56684</th>\n",
+       "      <td>01CEJ</td>\n",
+       "      <td>MULTIPOLYGON (((-177.00128 -81.95638, -170.007...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>56685</th>\n",
+       "      <td>01CEH</td>\n",
+       "      <td>MULTIPOLYGON (((-177.00144 -82.85238, -169.146...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>56686 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        Name                                           geometry\n",
+       "0      59CNK  MULTIPOLYGON (((170.99885 -81.06088, 177.30316...\n",
+       "1      59CNJ  MULTIPOLYGON (((170.99872 -81.95638, 177.99281...\n",
+       "2      58CEN  MULTIPOLYGON (((164.99856 -82.85238, 172.85358...\n",
+       "3      58CEP  MULTIPOLYGON (((164.99872 -81.95638, 171.99281...\n",
+       "4      60CVQ  MULTIPOLYGON (((171.25220 -81.01644, 177.56261...\n",
+       "...      ...                                                ...\n",
+       "56681  02CMR  MULTIPOLYGON (((-176.23170 -80.12446, -170.488...\n",
+       "56682  02CMQ  MULTIPOLYGON (((-176.74780 -81.01644, -170.437...\n",
+       "56683  01CEK  MULTIPOLYGON (((-177.00115 -81.06088, -170.696...\n",
+       "56684  01CEJ  MULTIPOLYGON (((-177.00128 -81.95638, -170.007...\n",
+       "56685  01CEH  MULTIPOLYGON (((-177.00144 -82.85238, -169.146...\n",
+       "\n",
+       "[56686 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mgrs = gpd.read_file(\"data/mgrs/mgrs_full.fgb\")\n",
+    "mgrs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "05bd8621-f4d0-40e3-8a4c-ac279f377ec0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<Geographic 2D CRS: EPSG:4326>\n",
+       "Name: WGS 84\n",
+       "Axis Info [ellipsoidal]:\n",
+       "- Lat[north]: Geodetic latitude (degree)\n",
+       "- Lon[east]: Geodetic longitude (degree)\n",
+       "Area of Use:\n",
+       "- name: World.\n",
+       "- bounds: (-180.0, -90.0, 180.0, 90.0)\n",
+       "Datum: World Geodetic System 1984 ensemble\n",
+       "- Ellipsoid: WGS 84\n",
+       "- Prime Meridian: Greenwich"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mgrs.crs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a5211112-d169-40b4-83af-1a7cc40c2439",
+   "metadata": {},
+   "source": [
+    "## Create a Geopandas dataframe with AOI\n",
+    "\n",
+    "This example uses a bounding box over the area around Puri, India."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8a5fb076-e19f-4b34-b938-02b06829fef7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/svg+xml": [
+       "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"85.007528 19.452128000000002 1.1394440000000117 1.154843999999997\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,40.0591)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.02309687999999994\" opacity=\"0.6\" d=\"M 86.1042,19.4949 L 86.1042,20.5642 L 85.0503,20.5642 L 85.0503,19.4949 L 86.1042,19.4949 z\" /></g></svg>"
+      ],
+      "text/plain": [
+       "<POLYGON ((86.104 19.495, 86.104 20.564, 85.05 20.564, 85.05 19.495, 86.104 ...>"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import geopandas as gpd\n",
+    "import pandas as pd\n",
+    "from shapely import box\n",
+    "\n",
+    "aoi = gpd.GeoDataFrame(\n",
+    "    pd.DataFrame([\"Puri\"], columns=[\"Region\"]),\n",
+    "    crs=\"EPSG:4326\",\n",
+    "    geometry=[box(85.0503, 19.4949, 86.1042, 20.5642)],\n",
+    ")\n",
+    "aoi.geometry[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b1f838fa-4c18-4934-b6cc-dff4702145c9",
+   "metadata": {},
+   "source": [
+    "## Intersect the AOI with the MGRS tile layer\n",
+    "\n",
+    "This will select the MGRS tiles that intersect with your AOI. The processing will then happen for each of the MGRS tiles. This will most likely provide slightly more data than the AOI itself, as the whole tile data will downloaded for each matched MGRS tile.\n",
+    "\n",
+    "Store the intersected tiles in a file, it will be used by the `datacube.py` script."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "7ba0c7c2-9937-461a-8c48-d9c607e93bd0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/svg+xml": [
+       "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"100.0\" height=\"100.0\" viewBox=\"86.00982935861315 19.775145170632 0.12471890405485908 0.8194030920359978\" preserveAspectRatio=\"xMinYMin meet\"><g transform=\"matrix(1,0,0,-1,0,40.3696934333)\"><path fill-rule=\"evenodd\" fill=\"#66cc99\" stroke=\"#555555\" stroke-width=\"0.016388061840719958\" opacity=\"0.6\" d=\"M 86.0448445217,19.8054934333 L 86.04017762128115,20.5642 L 86.1042,20.5642 L 86.1042,19.80563662693465 L 86.0448445217,19.8054934333 z\" /></g></svg>"
+      ],
+      "text/plain": [
+       "<POLYGON ((86.045 19.805, 86.04 20.564, 86.104 20.564, 86.104 19.806, 86.045...>"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mgrs_aoi = mgrs.overlay(aoi)\n",
+    "# Rename the name column to use lowercase letters for the datacube script to\n",
+    "# pick upthe MGRS tile name.\n",
+    "mgrs_aoi = mgrs_aoi.rename(columns={\"Name\": \"name\"})\n",
+    "mgrs_aoi.geometry[2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "045a0e36-025a-4381-b633-85ab236b1450",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>name</th>\n",
+       "      <th>Region</th>\n",
+       "      <th>geometry</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>45QTC</td>\n",
+       "      <td>Puri</td>\n",
+       "      <td>POLYGON ((85.18408 19.79882, 85.05030 19.79707...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>45QUC</td>\n",
+       "      <td>Puri</td>\n",
+       "      <td>POLYGON ((85.09075 19.79784, 85.08133 20.56420...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>45QVC</td>\n",
+       "      <td>Puri</td>\n",
+       "      <td>POLYGON ((86.04484 19.80549, 86.04018 20.56420...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>45QVB</td>\n",
+       "      <td>Puri</td>\n",
+       "      <td>POLYGON ((86.10420 19.89401, 86.10420 19.49490...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>45QTB</td>\n",
+       "      <td>Puri</td>\n",
+       "      <td>POLYGON ((85.18307 19.88715, 85.18741 19.49490...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>45QUB</td>\n",
+       "      <td>Puri</td>\n",
+       "      <td>POLYGON ((86.10420 19.89408, 86.10420 19.49490...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    name Region                                           geometry\n",
+       "0  45QTC   Puri  POLYGON ((85.18408 19.79882, 85.05030 19.79707...\n",
+       "1  45QUC   Puri  POLYGON ((85.09075 19.79784, 85.08133 20.56420...\n",
+       "2  45QVC   Puri  POLYGON ((86.04484 19.80549, 86.04018 20.56420...\n",
+       "3  45QVB   Puri  POLYGON ((86.10420 19.89401, 86.10420 19.49490...\n",
+       "4  45QTB   Puri  POLYGON ((85.18307 19.88715, 85.18741 19.49490...\n",
+       "5  45QUB   Puri  POLYGON ((86.10420 19.89408, 86.10420 19.49490..."
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mgrs_aoi.to_file(\"data/mgrs/mgrs_aoi.fgb\")\n",
+    "mgrs_aoi"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bd5a5329-3ffd-4de8-9e36-c3b7a5f47e60",
+   "metadata": {},
+   "source": [
+    "## Use the datacube.py script to download imagery\n",
+    "\n",
+    "Each run of th datacube script will take an index as input, which is the index of the MGRS tile within the input file. This is why we need to download the data in a loop.\n",
+    "\n",
+    "A list of date ranges can be specified. The script will look for the least cloudy Sentinel-2 scene for each date range, and match Sentinel-1 dates near the identified Sentinel-2 dates.\n",
+    "\n",
+    "The output folder can be specified as a local folder, or a bucket can be specified to upload the data to S3.\n",
+    "\n",
+    "Note that for the script to run, a Microsoft Planetary Computer token needs to be set up, consult the [Planetary Computer SDK](https://github.com/microsoft/planetary-computer-sdk-for-python) documentation on how to set up the token."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "a06149e2-116a-4a49-92a8-43aaa053bb43",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Usage: datacube.py [OPTIONS]\n",
+      "\n",
+      "Options:\n",
+      "  --sample TEXT      Location of MGRS tile sample\n",
+      "  --index INTEGER    Index of MGRS tile from sample file that should be\n",
+      "                     processed\n",
+      "  --bucket TEXT      Specify the bucket for where to write the data.\n",
+      "  --subset TEXT      For debugging, subset x and y to this pixel window as a\n",
+      "                     commaseparated string of 4 integers.\n",
+      "  --localpath TEXT   If specified, this path will be used to write the tiles\n",
+      "                     locallyOtherwise a temp dir will be used.\n",
+      "  --dateranges TEXT  Comma separated list of date ranges, each provided as yy-\n",
+      "                     mm-dd/yy-mm-dd.\n",
+      "  --help             Show this message and exit.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Print the help of the script to get a sense of the input parameters.\n",
+    "! python scripts/datacube.py --help"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7b95c426-d82a-4973-94c0-c761affe9b9e",
+   "metadata": {},
+   "source": [
+    "By default, the datacube script will download all the data available for the each MGRS tile it processes. So the output might include imagery chips that are outside of the AOI specified.\n",
+    "\n",
+    "To speed up processing in the example below, we use the subset argument to reduce each MGRS tile to a small pixel window. When subsetting, the script will only download a fraction of each MGRS tile. This will lead to discontinous datasets and should not be used in a real use case. Remove the subset argument when using the script for a real world application, where all the data should be downloaded for each MGRS tile."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "6e176358-1c66-47be-8357-47f27e4359a1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 name                                                    45QTC\n",
+      "Region                                                   Puri\n",
+      "geometry    POLYGON ((85.1840761005 19.798817712, 85.0503 ...\n",
+      "Name: 0, dtype: object\n",
+      "Starting algorithm for MGRS tile 45QVB with index 0\n",
+      "Processing data for date range 2020-01-01/2020-04-01\n",
+      "Found 29 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2020-03-27/2020-04-02\n",
+      "Found 1 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 1 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "Too much no-data in vv\n",
+      "Too much no-data in vv\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "Too much no-data in vv\n",
+      "Too much no-data in vv\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "Processing data for date range 2021-06-01/2021-09-15\n",
+      "Found 14 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2021-08-22/2021-08-28\n",
+      "Found 4 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 2 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "Too much cloud coverage\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "1 name                                                    45QUC\n",
+      "Region                                                   Puri\n",
+      "geometry    POLYGON ((85.0907496721 19.7978447286, 85.0813...\n",
+      "Name: 1, dtype: object\n",
+      "Starting algorithm for MGRS tile 45QUB with index 1\n",
+      "Processing data for date range 2020-01-01/2020-04-01\n",
+      "Found 14 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2020-02-09/2020-02-15\n",
+      "Found 1 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 2 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "Processing data for date range 2021-06-01/2021-09-15\n",
+      "Found 7 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2021-08-22/2021-08-28\n",
+      "Found 4 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 2 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "Too much cloud coverage\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "2 name                                                    45QVC\n",
+      "Region                                                   Puri\n",
+      "geometry    POLYGON ((86.0448445217 19.8054934333, 86.0401...\n",
+      "Name: 2, dtype: object\n",
+      "Starting algorithm for MGRS tile 45QVC with index 2\n",
+      "Processing data for date range 2020-01-01/2020-04-01\n",
+      "Found 27 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2020-02-11/2020-02-17\n",
+      "Found 1 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 3 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "Processing data for date range 2021-06-01/2021-09-15\n",
+      "Found 13 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2021-06-03/2021-06-09\n",
+      "Found 2 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 4 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "Too much cloud coverage\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "3 name                                                    45QVB\n",
+      "Region                                                   Puri\n",
+      "geometry    POLYGON ((86.1042 19.894006473585993, 86.1042 ...\n",
+      "Name: 3, dtype: object\n",
+      "Starting algorithm for MGRS tile 45QUC with index 3\n",
+      "Processing data for date range 2020-01-01/2020-04-01\n",
+      "Found 16 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2020-02-09/2020-02-15\n",
+      "Found 2 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 4 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "Processing data for date range 2021-06-01/2021-09-15\n",
+      "Found 7 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2021-06-03/2021-06-09\n",
+      "Found 2 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 4 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "Too much cloud coverage\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "4 name                                                    45QTB\n",
+      "Region                                                   Puri\n",
+      "geometry    POLYGON ((85.183071142 19.8871534399, 85.18741...\n",
+      "Name: 4, dtype: object\n",
+      "Starting algorithm for MGRS tile 45QTC with index 4\n",
+      "Processing data for date range 2020-01-01/2020-04-01\n",
+      "Found 30 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2020-01-10/2020-01-16\n",
+      "Found 2 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 4 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "Processing data for date range 2021-06-01/2021-09-15\n",
+      "Found 12 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2021-06-03/2021-06-09\n",
+      "Found 2 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 4 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "Too much cloud coverage\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "5 name                                                    45QUB\n",
+      "Region                                                   Puri\n",
+      "geometry    POLYGON ((86.1042 19.894075652432416, 86.1042 ...\n",
+      "Name: 5, dtype: object\n",
+      "Starting algorithm for MGRS tile 45QTB with index 5\n",
+      "Processing data for date range 2020-01-01/2020-04-01\n",
+      "Found 29 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2020-02-09/2020-02-15\n",
+      "Found 1 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 2 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "Processing data for date range 2021-06-01/2021-09-15\n",
+      "Found 12 Sentinel-2 items\n",
+      "EPSG code based on Sentinel-2 item:  32645\n",
+      "Searching S1 in date range 2021-06-03/2021-06-09\n",
+      "Found 3 Sentinel-1 items\n",
+      "Most overlapped orbit:  descending\n",
+      "Found 3 DEM items\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "/home/tam/apps/miniforge3/envs/claymodel/lib/python3.11/site-packages/stackstac/prepare.py:408: UserWarning: The argument 'infer_datetime_format' is deprecated and will be removed in a future version. A strict version of it is now the default, see https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. You can safely remove this argument.\n",
+      "  times = pd.to_datetime(\n",
+      "Subsetting to [1500, 1500, 2524, 2524]\n",
+      "Writing tempfiles to  data/chips\n",
+      "Too much cloud coverage\n",
+      "Too much cloud coverage\n",
+      "No bucket specified, skipping S3 sync.\n",
+      "Too much cloud coverage\n",
+      "Too much cloud coverage\n",
+      "No bucket specified, skipping S3 sync.\n"
+     ]
+    }
+   ],
+   "source": [
+    "for index, row in mgrs_aoi.iterrows():\n",
+    "    print(index, row)\n",
+    "    ! python scripts/datacube.py --sample data/mgrs/mgrs_aoi.fgb --subset 1500,1500,2524,2524 --localpath data/chips  --index {index} --dateranges 2020-01-01/2020-04-01,2021-06-01/2021-09-15"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1f640c20-87b3-445b-9de8-e91670562df1",
+   "metadata": {},
+   "source": [
+    "## Create the embeddings for each training chip\n",
+    "\n",
+    "The checkpoints can be accessed directly from huggingface at https://huggingface.co/made-with-clay/Clay.\n",
+    "\n",
+    "The following command will automatically download and cache the model weights and run the model to create the embeddings."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "91f194ad-781c-4fa3-bdd8-6d1af29b23b5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "W&B disabled.\n",
+      "Seed set to 42\n",
+      "Using 16bit Automatic Mixed Precision (AMP)\n",
+      "GPU available: True (cuda), used: True\n",
+      "TPU available: False, using: 0 TPU cores\n",
+      "IPU available: False, using: 0 IPUs\n",
+      "HPU available: False, using: 0 HPUs\n",
+      "[rank: 0] Seed set to 42\n",
+      "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "distributed_backend=nccl\n",
+      "All distributed processes registered. Starting with 1 processes\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "\n",
+      "Total number of chips: 28\n",
+      "Restoring states from the checkpoint path at https://huggingface.co/made-with-clay/Clay/resolve/main/Clay_v0.1_epoch-24_val-loss-0.46.ckpt\n",
+      "Downloading: \"https://huggingface.co/made-with-clay/Clay/resolve/main/Clay_v0.1_epoch-24_val-loss-0.46.ckpt\" to /home/tam/.cache/torch/hub/checkpoints/Clay_v0.1_epoch-24_val-loss-0.46.ckpt\n",
+      "100%|████████████████████████████████████████| 487M/487M [00:18<00:00, 27.8MB/s]\n",
+      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
+      "Loaded model weights from the checkpoint at https://huggingface.co/made-with-clay/Clay/resolve/main/Clay_v0.1_epoch-24_val-loss-0.46.ckpt\n",
+      "Predicting DataLoader 0: 100%|██████████████████| 14/14 [00:02<00:00,  5.97it/s]Saved 7 rows of embeddings of shape (768,) to /home/tam/Documents/repos/model/data/embeddings/45QUB_20200328_20210825_v001.gpq\n",
+      "Saved 8 rows of embeddings of shape (768,) to /home/tam/Documents/repos/model/data/embeddings/45QTB_20200328_20210815_v001.gpq\n",
+      "Saved 8 rows of embeddings of shape (768,) to /home/tam/Documents/repos/model/data/embeddings/44QRG_20200328_20210815_v001.gpq\n",
+      "Saved 1 rows of embeddings of shape (768,) to /home/tam/Documents/repos/model/data/embeddings/45QVC_20210822_20210822_v001.gpq\n",
+      "Saved 2 rows of embeddings of shape (768,) to /home/tam/Documents/repos/model/data/embeddings/44QRH_20200328_20200328_v001.gpq\n",
+      "Saved 2 rows of embeddings of shape (768,) to /home/tam/Documents/repos/model/data/embeddings/45QTC_20200328_20200328_v001.gpq\n",
+      "Predicting DataLoader 0: 100%|██████████████████| 14/14 [00:02<00:00,  5.84it/s]\n",
+      "Done!\n"
+     ]
+    }
+   ],
+   "source": [
+    "! wandb disabled\n",
+    "! python trainer.py predict --ckpt_path=https://huggingface.co/made-with-clay/Clay/resolve/main/Clay_v0.1_epoch-24_val-loss-0.46.ckpt \\\n",
+    "                            --trainer.precision=16-mixed \\\n",
+    "                            --data.data_dir=/home/tam/Desktop/aoitiles \\\n",
+    "                            --data.batch_size=2 \\\n",
+    "                            --data.num_workers=8"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "claymodel",
+   "language": "python",
+   "name": "claymodel"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pyproject.toml b/ruff.toml
similarity index 79%
rename from pyproject.toml
rename to ruff.toml
index c2b3a2b7..82f49e2d 100644
--- a/pyproject.toml
+++ b/ruff.toml
@@ -1,8 +1,11 @@
-[tool.ruff.format]
+[per-file-ignores]
+"docs/clay_over_aoi.ipynb" = ["E501"]
+
+[format]
 # https://docs.astral.sh/ruff/settings/#format
 line-ending = "lf"  # Use UNIX `\n` line endings for all files
 
-[tool.ruff.lint]
+[lint]
 # https://docs.astral.sh/ruff/rules/
 select = [
     "E",    # pycodestyle errors
@@ -14,5 +17,5 @@ select = [
     "W",    # pycodestyle warnings
 ]
 
-[tool.ruff.lint.pylint]
+[lint.pylint]
 max-args = 6
diff --git a/scripts/datacube.py b/scripts/datacube.py
index 7345cc27..ad4d470f 100755
--- a/scripts/datacube.py
+++ b/scripts/datacube.py
@@ -316,20 +316,20 @@ def make_datasets(s2_items, s1_items, dem_items, resolution):
 
 def process(
     aoi,
-    year,
+    date_range,
     resolution,
     cloud_cover_percentage,
     nodata_pixel_percentage,
 ):
     """
     Process Sentinel-2, Sentinel-1, and Copernicus DEM data for a specified
-    year, area of interest (AOI), resolution, EPSG code, cloud cover
+    date_range, area of interest (AOI), resolution, EPSG code, cloud cover
     percentage, and nodata pixel percentage.
 
     Parameters:
     - aoi (shapely.geometry.base.BaseGeometry): Geometry object for an Area of
         Interest (AOI).
-    - year (int): The year for finding imagery.
+    - date_range (str): Date range string to pass to the catalog search.
     - resolution (int): Spatial resolution.
     - cloud_cover_percentage (int): Maximum acceptable cloud cover percentage
         for Sentinel-2 images.
@@ -339,7 +339,6 @@ def process(
     Returns:
     - xr.Dataset: Merged xarray Dataset containing processed data.
     """
-    date_range = f"{year}-01-01/{year}-12-31"
     catalog = pystac_client.Client.open(STAC_API, modifier=pc.sign_inplace)
 
     for i in range(S1_MATCH_ATTEMPTS):
@@ -364,8 +363,8 @@ def process(
 
     if i == S1_MATCH_ATTEMPTS - 1:
         print(
-            "No match for S1 scenes found for year "
-            f"{year} after {S1_MATCH_ATTEMPTS} attempts."
+            "No match for S1 scenes found for date range "
+            f"{date_range} after {S1_MATCH_ATTEMPTS} attempts."
         )
         return None, None
 
@@ -423,10 +422,26 @@ def convert_attrs_and_coords_objects_to_str(data):
 @click.option(
     "--subset",
     required=False,
+    default="",
+    help="For debugging, subset the MGRS tile data to this pixel window."
+    "Expects a comma separated string of 4 integers.",
+    type=str,
+)
+@click.option(
+    "--localpath",
+    required=False,
     default=None,
-    help="For debugging, subset x and y to this pixel window.",
+    help="If specified, this path will be used to write the tiles locally"
+    "Otherwise a temp dir will be used.",
+)
+@click.option(
+    "--dateranges",
+    required=False,
+    default="",
+    type=str,
+    help="Comma separated list of date ranges, each provided as YYYY-MM-DD/YYYY-MM-DD.",
 )
-def main(sample, index, subset, bucket):
+def main(sample, index, subset, bucket, localpath, dateranges):
     index = int(index)
     tiles = gpd.read_file(sample)
     tile = tiles.iloc[index]
@@ -434,17 +449,27 @@ def main(sample, index, subset, bucket):
 
     print(f"Starting algorithm for MGRS tile {tile['name']} with index {index}")
 
-    # Shuffle years, use index as seed for reproducibility but no
-    # to have the same shuffle every time.
-    years = [2017, 2018, 2019, 2020, 2021, 2022, 2023]
-    random.seed(index)
-    random.shuffle(years)
+    if subset:
+        subset = [int(dat) for dat in subset.split(",")]
+
+    if dateranges:
+        date_ranges = dateranges.split(",")
+    else:
+        # Shuffle years, use index as seed for reproducibility but no
+        # to have the same shuffle every time.
+        date_ranges = [
+            f"{year}-01-01/{year}-12-31"
+            for year in (2017, 2018, 2019, 2020, 2021, 2022, 2023)
+        ]
+        random.seed(index)
+        random.shuffle(date_ranges)
+
     match_count = 0
-    for year in years:
-        print(f"Processing data for year {year}")
+    for date_range in date_ranges:
+        print(f"Processing data for date range {date_range}")
         date, pixels = process(
             tile.geometry,
-            year,
+            date_range,
             SPATIAL_RESOLUTION,
             CLOUD_COVER_PERCENTAGE,
             NODATA_PIXEL_PERCENTAGE,
@@ -455,7 +480,6 @@ def main(sample, index, subset, bucket):
             match_count += 1
 
         if subset:
-            subset = [int(dat) for dat in subset.split(",")]
             print(f"Subsetting to {subset}")
             pixels = [
                 part[:, subset[1] : subset[3], subset[0] : subset[2]] for part in pixels
@@ -463,7 +487,7 @@ def main(sample, index, subset, bucket):
 
         pixels = [part.compute() for part in pixels]
 
-        tiler(pixels, date, mgrs, bucket)
+        tiler(pixels, date, mgrs, bucket, localpath)
 
         if match_count == DATES_PER_LOCATION:
             break
diff --git a/scripts/tile.py b/scripts/tile.py
index e7e12a4b..41a9056f 100644
--- a/scripts/tile.py
+++ b/scripts/tile.py
@@ -55,7 +55,7 @@ def filter_clouds_nodata(tile):
     return True  # If both conditions pass
 
 
-def tiler(stack, date, mgrs, bucket):
+def tile_to_dir(stack, date, mgrs, bucket, dir):
     """
     Function to tile a multi-dimensional imagery stack while filtering out
     tiles with high cloud coverage or no-data pixels.
@@ -66,67 +66,77 @@ def tiler(stack, date, mgrs, bucket):
     - mgrs (str): MGRS Tile id
     - bucket(str): AWS S3 bucket to write tiles to
     """
+    print("Writing tempfiles to ", dir)
+
     # Calculate the number of full tiles in x and y directions
     num_x_tiles = stack[0].x.size // TILE_SIZE
     num_y_tiles = stack[0].y.size // TILE_SIZE
 
     counter = 0
-    with tempfile.TemporaryDirectory() as dir:
-        print("Writing tempfiles to ", dir)
-        # Iterate through each chunk of x and y dimensions and create tiles
-        for y_idx in range(num_y_tiles):
-            for x_idx in range(num_x_tiles):
-                # Calculate the start and end indices for x and y dimensions
-                # for the current tile
-                x_start = x_idx * TILE_SIZE
-                y_start = y_idx * TILE_SIZE
-                x_end = x_start + TILE_SIZE
-                y_end = y_start + TILE_SIZE
-
-                # Select the subset of data for the current tile
-                parts = [part[:, y_start:y_end, x_start:x_end] for part in stack]
-
-                # Only concat here to save memory, it converts S2 data to float
-                tile = xr.concat(parts, dim="band").rename("tile")
-
-                counter += 1
-                if counter % 100 == 0:
-                    print(f"Counted {counter} tiles")
-
-                if not filter_clouds_nodata(tile):
-                    continue
-
-                tile = tile.drop_sel(band="SCL")
-
-                # Track band names and color interpretation
-                tile.attrs["long_name"] = [str(x.values) for x in tile.band]
-                color = [ColorInterp.blue, ColorInterp.green, ColorInterp.red] + [
-                    ColorInterp.gray
-                ] * (len(tile.band) - 3)
-
-                # Write tile to tempdir
-                name = "{dir}/claytile_{mgrs}_{date}_v{version}_{counter}.tif".format(
-                    dir=dir,
-                    mgrs=mgrs,
-                    date=date.replace("-", ""),
-                    version=VERSION,
-                    counter=str(counter).zfill(4),
-                )
-                tile.rio.to_raster(name, compress="deflate")
-
-                with rasterio.open(name, "r+") as rst:
-                    rst.colorinterp = color
-                    rst.update_tags(date=date)
-
-        print(f"Syncing {dir} with s3://{bucket}/{VERSION}/{mgrs}/{date}")
-        subprocess.run(
-            [
-                "aws",
-                "s3",
-                "sync",
-                dir,
-                f"s3://{bucket}/{VERSION}/{mgrs}/{date}",
-                "--no-progress",
-            ],
-            check=True,
-        )
+    # Iterate through each chunk of x and y dimensions and create tiles
+    for y_idx in range(num_y_tiles):
+        for x_idx in range(num_x_tiles):
+            # Calculate the start and end indices for x and y dimensions
+            # for the current tile
+            x_start = x_idx * TILE_SIZE
+            y_start = y_idx * TILE_SIZE
+            x_end = x_start + TILE_SIZE
+            y_end = y_start + TILE_SIZE
+
+            # Select the subset of data for the current tile
+            parts = [part[:, y_start:y_end, x_start:x_end] for part in stack]
+
+            # Only concat here to save memory, it converts S2 data to float
+            tile = xr.concat(parts, dim="band").rename("tile")
+
+            counter += 1
+            if counter % 100 == 0:
+                print(f"Counted {counter} tiles")
+
+            if not filter_clouds_nodata(tile):
+                continue
+
+            tile = tile.drop_sel(band="SCL")
+
+            # Track band names and color interpretation
+            tile.attrs["long_name"] = [str(x.values) for x in tile.band]
+            color = [ColorInterp.blue, ColorInterp.green, ColorInterp.red] + [
+                ColorInterp.gray
+            ] * (len(tile.band) - 3)
+
+            # Write tile to tempdir
+            name = "{dir}/claytile_{mgrs}_{date}_v{version}_{counter}.tif".format(
+                dir=dir,
+                mgrs=mgrs,
+                date=date.replace("-", ""),
+                version=VERSION,
+                counter=str(counter).zfill(4),
+            )
+            tile.rio.to_raster(name, compress="deflate")
+
+            with rasterio.open(name, "r+") as rst:
+                rst.colorinterp = color
+                rst.update_tags(date=date)
+        if bucket:
+            print(f"Syncing {dir} with s3://{bucket}/{VERSION}/{mgrs}/{date}")
+            subprocess.run(
+                [
+                    "aws",
+                    "s3",
+                    "sync",
+                    dir,
+                    f"s3://{bucket}/{VERSION}/{mgrs}/{date}",
+                    "--no-progress",
+                ],
+                check=True,
+            )
+        else:
+            print("No bucket specified, skipping S3 sync.")
+
+
+def tiler(stack, date, mgrs, bucket, dir):
+    if dir:
+        tile_to_dir(stack, date, mgrs, bucket, dir)
+    else:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            tile_to_dir(stack, date, mgrs, bucket, tmpdir)

	Name	geometry
0	59CNK	MULTIPOLYGON (((170.99885 -81.06088, 177.30316...
1	59CNJ	MULTIPOLYGON (((170.99872 -81.95638, 177.99281...
2	58CEN	MULTIPOLYGON (((164.99856 -82.85238, 172.85358...
3	58CEP	MULTIPOLYGON (((164.99872 -81.95638, 171.99281...
4	60CVQ	MULTIPOLYGON (((171.25220 -81.01644, 177.56261...
...	...	...
56681	02CMR	MULTIPOLYGON (((-176.23170 -80.12446, -170.488...
56682	02CMQ	MULTIPOLYGON (((-176.74780 -81.01644, -170.437...
56683	01CEK	MULTIPOLYGON (((-177.00115 -81.06088, -170.696...
56684	01CEJ	MULTIPOLYGON (((-177.00128 -81.95638, -170.007...
56685	01CEH	MULTIPOLYGON (((-177.00144 -82.85238, -169.146...
	name	Region	geometry
0	45QTC	Puri	POLYGON ((85.18408 19.79882, 85.05030 19.79707...
1	45QUC	Puri	POLYGON ((85.09075 19.79784, 85.08133 20.56420...
2	45QVC	Puri	POLYGON ((86.04484 19.80549, 86.04018 20.56420...
3	45QVB	Puri	POLYGON ((86.10420 19.89401, 86.10420 19.49490...
4	45QTB	Puri	POLYGON ((85.18307 19.88715, 85.18741 19.49490...
5	45QUB	Puri	POLYGON ((86.10420 19.89408, 86.10420 19.49490...