Skip to content

Commit

Permalink
Refactor dockerfile for better caching and avoid pbss download in not…
Browse files Browse the repository at this point in the history
…ebook test (#573)

Re-orders some of the Dockerfile commands to avoid re-building
`tensorstore` from source every docker build.
Also removes the pbss download from `test_load_notebook.ipynb` since
this can't succeed outside the nvidia VPN

Signed-off-by: Peter St. John <pstjohn@nvidia.com>
  • Loading branch information
pstjohn authored Jan 8, 2025
1 parent fc4b44a commit 26af484
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 49 deletions.
34 changes: 19 additions & 15 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -92,26 +92,13 @@ ENV UV_LINK_MODE=copy \
RUN --mount=type=bind,source=./sub-packages/bionemo-geometric/requirements.txt,target=/requirements-pyg.txt \
uv pip install --no-build-isolation -r /requirements-pyg.txt

WORKDIR /workspace/bionemo2

# Install 3rd-party deps and bionemo submodules.
COPY ./LICENSE /workspace/bionemo2/LICENSE
COPY ./3rdparty /workspace/bionemo2/3rdparty
COPY ./sub-packages /workspace/bionemo2/sub-packages

COPY --from=rust-env /usr/local/cargo /usr/local/cargo
COPY --from=rust-env /usr/local/rustup /usr/local/rustup

ENV PATH="/usr/local/cargo/bin:/usr/local/rustup/bin:${PATH}"
ENV RUSTUP_HOME="/usr/local/rustup"

# Note, we need to mount the .git folder here so that setuptools-scm is able to fetch git tag for version.
# Includes a hack to install tensorstore 0.1.45, which doesn't distribute a pypi wheel for python 3.12, and the metadata
# in the source distribution doesn't match the expected pypi version.
RUN --mount=type=bind,source=./.git,target=./.git \
--mount=type=bind,source=./requirements-test.txt,target=/requirements-test.txt \
--mount=type=bind,source=./requirements-cve.txt,target=/requirements-cve.txt \
<<EOF
RUN <<EOF
set -eo pipefail
uv pip install maturin --no-build-isolation

Expand All @@ -121,6 +108,24 @@ sed -i 's/^Version: 0\.0\.0$/Version: 0.1.45/' \
/usr/local/lib/python3.12/dist-packages/tensorstore-0.0.0.dist-info/METADATA
mv /usr/local/lib/python3.12/dist-packages/tensorstore-0.0.0.dist-info \
/usr/local/lib/python3.12/dist-packages/tensorstore-0.1.45.dist-info
rm -rf /root/.cache/*
EOF

WORKDIR /workspace/bionemo2

# Install 3rd-party deps and bionemo submodules.
COPY ./LICENSE /workspace/bionemo2/LICENSE
COPY ./3rdparty /workspace/bionemo2/3rdparty
COPY ./sub-packages /workspace/bionemo2/sub-packages

# Note, we need to mount the .git folder here so that setuptools-scm is able to fetch git tag for version.
# Includes a hack to install tensorstore 0.1.45, which doesn't distribute a pypi wheel for python 3.12, and the metadata
# in the source distribution doesn't match the expected pypi version.
RUN --mount=type=bind,source=./.git,target=./.git \
--mount=type=bind,source=./requirements-test.txt,target=/requirements-test.txt \
--mount=type=bind,source=./requirements-cve.txt,target=/requirements-cve.txt \
<<EOF
set -eo pipefail

uv pip install --no-build-isolation \
./3rdparty/* \
Expand All @@ -131,7 +136,6 @@ uv pip install --no-build-isolation \
rm -rf ./3rdparty
rm -rf /tmp/*
rm -rf ./sub-packages/bionemo-noodles/target
rm -rf /root/.cache/*
EOF

# In the devcontainer image, we just copy over the finished `dist-packages` folder from the build image back into the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import tempfile\n",
"from pathlib import Path\n",
"\n",
"from bionemo.core.data.load import load"
]
Expand All @@ -21,26 +21,20 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading data from 'nvidia/clara/scdl_sample_test:1.0' to file '/tmp/tmpqif5bfww/7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9-scdl_sample_test.tar.gz'.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Untarring contents of '/tmp/tmpqif5bfww/7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9-scdl_sample_test.tar.gz' to '/tmp/tmpqif5bfww/7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9-scdl_sample_test.tar.gz.untar'\n"
"Downloading data from 'nvidia/clara/scdl_sample_test:1.0' to file '/tmp/tmp7nmjzz19/7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9-scdl_sample_test.tar.gz'.\n",
"Untarring contents of '/tmp/tmp7nmjzz19/7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9-scdl_sample_test.tar.gz' to '/tmp/tmp7nmjzz19/7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9-scdl_sample_test.tar.gz.untar'\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"download_end\": \"2024-12-03 18:39:20\",\n",
" \"download_start\": \"2024-12-03 18:39:03\",\n",
" \"download_time\": \"17s\",\n",
" \"download_end\": \"2025-01-03 15:16:48\",\n",
" \"download_start\": \"2025-01-03 15:16:47\",\n",
" \"download_time\": \"0s\",\n",
" \"files_downloaded\": 1,\n",
" \"local_path\": \"/tmp/tmpqif5bfww/tmprn0ysh0w/scdl_sample_test_v1.0\",\n",
" \"local_path\": \"/tmp/tmp7nmjzz19/tmpfuw2obcq/scdl_sample_test_v1.0\",\n",
" \"size_downloaded\": \"964.91 KB\",\n",
" \"status\": \"COMPLETED\"\n",
"}\n"
Expand All @@ -51,26 +45,6 @@
"with tempfile.TemporaryDirectory() as cache_dir:\n",
" load(\"scdl/sample\", source=\"ngc\", cache_dir=Path(cache_dir))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading data from 's3://bionemo-ci/test-data/scdl_sample_test.tar.gz' to file '/tmp/tmpl6cgwhyn/7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9-scdl_sample_test.tar.gz'.\n",
"s3://bionemo-ci/test-data/scdl_sample_test.tar.gz: 100%|██████████| 988k/988k [00:00<00:00, 2.70MB/s]\n",
"Untarring contents of '/tmp/tmpl6cgwhyn/7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9-scdl_sample_test.tar.gz' to '/tmp/tmpl6cgwhyn/7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9-scdl_sample_test.tar.gz.untar'\n"
]
}
],
"source": [
"with tempfile.TemporaryDirectory() as cache_dir:\n",
" load(\"scdl/sample\", source=\"pbss\", cache_dir=Path(cache_dir))"
]
}
],
"metadata": {
Expand All @@ -89,7 +63,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.12.3"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 26af484

Please sign in to comment.