-
Notifications
You must be signed in to change notification settings - Fork 4
/
Dockerfile-analyses
73 lines (65 loc) · 2.67 KB
/
Dockerfile-analyses
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Builds an image for reproducing the efficient-learning-khan analyses.
# Use Dockerfile-experiment to build an environment for running the experiment.
FROM debian:buster-slim
LABEL maintainer="Paxton Fitzpatrick <Paxton.C.Fitzpatrick.GR@Dartmouth.edu>"
ARG notebook_ip=0.0.0.0
ARG notebook_port=8888
ARG notebook_dir="/mnt/code/notebooks"
ARG DEBIAN_FRONTEND=noninteractive
ENV MAMBA_NO_BANNER=1 \
NOTEBOOK_IP="$notebook_ip" \
NOTEBOOK_PORT="$notebook_port" \
NOTEBOOK_DIR="$notebook_dir" \
PATH="/opt/conda/bin:$PATH"
COPY code/khan_helpers docker/jupyter_notebook_config.py docker/analysis-environment.yml /khan_helpers/
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN apt-get update --fix-missing \
&& apt-get install -y --no-install-recommends eatmydata \
# temporarily install libraries needed to download & build Python
# requirements, plus a mini text editor for convenience
&& eatmydata apt-get install -y --no-install-recommends \
ca-certificates \
curl \
gcc \
git \
libc6-dev \
vim-tiny \
# install mamba (faster C++ conda reimplementation, uses conda-forge)
&& curl --progress-bar \
-L "https://github.com/conda-forge/miniforge/releases/download/4.11.0-0/Mambaforge-4.11.0-0-Linux-$(uname -m).sh" \
-o "$HOME/mambaforge.sh" \
&& /bin/bash "$HOME/mambaforge.sh" -bp /opt/conda \
&& rm "$HOME/mambaforge.sh" \
# install required packages from environment file \
&& mamba env update -f /khan_helpers/analysis-environment.yml --prune \
# install additional requirements not available via conda
# wordcloud must be built from source for ARM/Python>3.7
&& pip install --disable-pip-version-check --no-cache-dir \
hypertools==0.8.0 \
youtube_transcript_api==0.4.4 \
git+https://github.com/amueller/word_cloud.git@35ce9b7 \
# set up jupyter notebook config
&& mkdir "$HOME/.jupyter" \
&& mv "/khan_helpers/jupyter_notebook_config.py" "$HOME/.jupyter/" \
# download NLTK corpora & models
&& python -uBW ignore -m nltk.downloader \
stopwords \
averaged_perceptron_tagger \
wordnet \
omw-1.4 \
# install khan_helpers package (NLTK data must be downloaded first)
&& pip install --disable-pip-version-check --no-cache-dir /khan_helpers \
# various cleanup to reduce image size
&& rm -rf /khan_helpers \
&& mamba clean -afy \
&& apt-get purge -y --auto-remove \
ca-certificates \
curl \
gcc \
git \
libc6-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& rm -f "$HOME/nltk_data/*/*.zip"
WORKDIR "/mnt"
CMD ["jupyter", "notebook"]