Update agent and infrastructure

danijar · Apr 26, 2024 · 2411f7d · 2411f7d
1 parent 8fa35f8
commit 2411f7d
Show file tree

Hide file tree

Showing 177 changed files with 10,432 additions and 6,009 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,3 @@
+*.py[cod]
+__pycache__/
+dist
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1 +1 @@
-include requirements.txt
+include dreamerv3/requirements.txt
diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ outperforming specialized methods. Removing the need for tuning reduces the
 amount of expert knowledge and computational resources needed to apply
 reinforcement learning.
 
-![DreamerV3 Benchmark Scores](https://user-images.githubusercontent.com/2111293/217356042-536a693a-cb5e-42aa-a20f-5303a77cad9c.png)
+![DreamerV3 Benchmark Scores](https://github.com/danijar/dreamerv3/assets/2111293/0fe8f1cf-6970-41ea-9efc-e2e2477e7861)
 
 Due to its robustness, DreamerV3 shows favorable scaling properties. Notably,
 using larger models consistently increases not only its final performance but
@@ -48,24 +48,21 @@ increases data efficiency.
 
 # Instructions
 
-## Package
-
-If you just want to run DreamerV3 on a custom environment, you can `pip install
-dreamerv3` and copy [`example.py`][example] from this repository as a starting
-point.
+The code has been tested on Linux and Mac.
 
 ## Docker
 
-If you want to make modifications to the code, you can either use the provided
-`Dockerfile` that contains instructions or follow the manual instructions
-below.
+You can either use the provided `Dockerfile` that contains instructions or
+follow the manual instructions below.
 
 ## Manual
 
 Install [JAX][jax] and then the other dependencies:
 
 ```sh
-pip install -r requirements.txt
+pip install -U -r embodied/requirements.txt
+pip install -U -r dreamerv3/requirements.txt \
+  -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 ```
 
 Simple training script:
@@ -77,21 +74,25 @@ python example.py
 Flexible training script:
 
 ```sh
-python dreamerv3/train.py \
-  --logdir ~/logdir/$(date "+%Y%m%d-%H%M%S") \
-  --configs crafter --batch_size 16 --run.train_ratio 32
+python dreamerv3/main.py \
+  --logdir ~/logdir/{timestamp} \
+  --configs crafter \
+  --run.train_ratio 32
 ```
 
+To reproduce results, train on the desired task using the corresponding config,
+such as `--configs atari --task atari_pong`.
+
 # Tips
 
 - All config options are listed in `configs.yaml` and you can override them
-  from the command line.
+  as flags from the command line.
 - The `debug` config block reduces the network size, batch size, duration
   between logs, and so on for fast debugging (but does not learn a good model).
 - By default, the code tries to run on GPU. You can switch to CPU or TPU using
-  the `--jax.platform cpu` flag. Note that multi-GPU support is untested.
-- You can run with multiple config blocks that will override defaults in the
-  order they are specified, for example `--configs crafter large`.
+  the `--jax.platform cpu` flag.
+- You can use multiple config blocks that will override defaults in the
+  order they are specified, for example `--configs crafter size50m`.
 - By default, metrics are printed to the terminal, appended to a JSON lines
   file, and written as TensorBoard summaries. Other outputs like WandB can be
   enabled in the training script.
@@ -100,16 +101,12 @@ python dreamerv3/train.py \
   often happens when reusing an old logdir by accident.
 - If you are getting CUDA errors, scroll up because the cause is often just an
   error that happened earlier, such as out of memory or incompatible JAX and
-  CUDA versions.
-- You can use the `small`, `medium`, `large` config blocks to reduce memory
-  requirements. The default is `xlarge`. See the scaling graph above to see how
-  this affects performance.
-- Many environments are included, some of which require installating additional
-  packages. See the installation scripts in `scripts` and the `Dockerfile` for
-  reference.
+  CUDA versions. Try `--batch_size 1` to rule out an out of memory error.
+- Many environments are included, some of which require installing additional
+  packages. See the `Dockerfile` for reference.
 - When running on custom environments, make sure to specify the observation
-  keys the agent should be using via `encoder.mlp_keys`, `encode.cnn_keys`,
-  `decoder.mlp_keys` and `decoder.cnn_keys`.
+  keys the agent should be using via `enc.simple.mlp_keys`,
+  `enc.simple.cnn_keys`, `dec.simple.mlp_keys` and `dec.simple.cnn_keys`.
 - To log metrics from environments without showing them to the agent or storing
   them in the replay buffer, return them as observation keys with `log_` prefix
   and enable logging via the `run.log_keys_...` options.

diff --git a/dreamerv3/Dockerfile b/dreamerv3/Dockerfile
@@ -1,63 +1,75 @@
-# 1. Test setup:
-# docker run -it --rm --gpus all nvidia/cuda:11.4.2-cudnn8-runtime-ubuntu20.04 nvidia-smi
+# Instructions
 #
-# If the above does not work, try adding the --privileged flag
-# and changing the command to `sh -c 'ldconfig -v && nvidia-smi'`.
+# 1) Test setup:
 #
-# 2. Start training:
-# docker build -f  dreamerv3/Dockerfile -t img . && \
-# docker run -it --rm --gpus all -v ~/logdir:/logdir img \
-#   sh scripts/xvfb_run.sh python3 dreamerv3/train.py \
-#   --logdir "/logdir/$(date +%Y%m%d-%H%M%S)" \
-#   --configs dmc_vision --task dmc_walker_walk
+#   docker run -it --rm --gpus all --privileged <base image> \
+#     sh -c 'ldconfig; nvidia-smi'
+#
+# 2) Start training:
+#
+#   docker build -f dreamerv3/Dockerfile -t img . && \
+#   docker run -it --rm --gpus all -v ~/logdir/docker:/logdir img \
+#     sh -c 'ldconfig; sh embodied/scripts/xvfb_run.sh python dreamerv3/main.py \
+#       --logdir "/logdir/{timestamp}" --configs atari --task atari_pong'
+#
+# 3) See results:
+#
+#   tensorboard --logdir ~/logdir/docker
 #
-# 3. See results:
-# tensorboard --logdir ~/logdir
 
 # System
-FROM nvidia/cuda:11.4.2-cudnn8-devel-ubuntu20.04
-ARG DEBIAN_FRONTEND=noninteractive
+FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
+ENV DEBIAN_FRONTEND=noninteractive
 ENV TZ=America/San_Francisco
 ENV PYTHONUNBUFFERED 1
-ENV PIP_DISABLE_PIP_VERSION_CHECK 1
 ENV PIP_NO_CACHE_DIR 1
+ENV PIP_ROOT_USER_ACTION=ignore
 RUN apt-get update && apt-get install -y \
-  ffmpeg git python3-pip vim libglew-dev \
-  x11-xserver-utils xvfb \
+  ffmpeg git vim curl software-properties-common \
+  libglew-dev x11-xserver-utils xvfb \
   && apt-get clean
-RUN pip3 install --upgrade pip
+
+# Workdir
+RUN mkdir /app
+WORKDIR /app
+
+# Python
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y python3.11-dev python3.11-venv && apt-get clean
+RUN python3.11 -m venv ./venv --upgrade-deps
+ENV PATH="/app/venv/bin:$PATH"
+RUN pip install --upgrade pip setuptools
 
 # Envs
+COPY embodied/scripts/install-minecraft.sh .
+RUN sh install-minecraft.sh
+COPY embodied/scripts/install-dmlab.sh .
+RUN sh install-dmlab.sh
+RUN pip install ale_py autorom[accept-rom-license]
+RUN pip install procgen_mirror
+RUN pip install crafter
+RUN pip install dm_control
+RUN pip install memory_maze
 ENV MUJOCO_GL egl
-ENV DMLAB_DATASET_PATH /dmlab_data
-COPY scripts scripts
-RUN sh scripts/install-dmlab.sh
-RUN sh scripts/install-atari.sh
-RUN sh scripts/install-minecraft.sh
-ENV NUMBA_CACHE_DIR=/tmp
-RUN pip3 install crafter
-RUN pip3 install dm_control
-RUN pip3 install robodesk
-RUN pip3 install bsuite
+ENV NUMBA_CACHE_DIR /tmp
 
 # Agent
-RUN pip3 install jax[cuda11_cudnn82] -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
-RUN pip3 install jaxlib
-RUN pip3 install tensorflow_probability
-RUN pip3 install optax
-RUN pip3 install tensorflow-cpu
+COPY dreamerv3/requirements.txt agent-requirements.txt
+RUN pip install -r agent-requirements.txt \
+  -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
 ENV XLA_PYTHON_CLIENT_MEM_FRACTION 0.8
 
-# Google Cloud DNS cache (optional)
+# Embodied
+COPY embodied/requirements.txt embodied-requirements.txt
+RUN pip install -r embodied-requirements.txt
+
+# Source
+COPY . .
+
+# Cloud
 ENV GCS_RESOLVE_REFRESH_SECS=60
 ENV GCS_REQUEST_CONNECTION_TIMEOUT_SECS=300
 ENV GCS_METADATA_REQUEST_TIMEOUT_SECS=300
 ENV GCS_READ_REQUEST_TIMEOUT_SECS=300
 ENV GCS_WRITE_REQUEST_TIMEOUT_SECS=600
-
-# Embodied
-RUN pip3 install numpy cloudpickle ruamel.yaml rich zmq msgpack
-COPY . /embodied
-RUN chown -R 1000:root /embodied && chmod -R 775 /embodied
-
-WORKDIR embodied
+RUN chown 1000:root . && chmod 775 .
diff --git a/dreamerv3/__init__.py b/dreamerv3/__init__.py
@@ -1,6 +1,2 @@
-import sys, pathlib
-sys.path.append(str(pathlib.Path(__file__).parent))
-
 from .agent import Agent
-configs = Agent.configs
-from .train import wrap_env
+from .main import wrap_env
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		include requirements.txt
		include dreamerv3/requirements.txt