-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
153 lines (134 loc) · 4.55 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# ==================================================================
# module list
# ------------------------------------------------------------------
# jupyter hub+lab latest (pip)
# airflow latest (pip)
# dagster latest (pip)
# MLflow latest (pip)
# polynote latest (github tar)
# Dask latest (pip)
# Ray latest (pip)
# Prefect latest (pip)
# ==================================================================
FROM konradmalik/spark:latest
USER root
# ==================================================================
# python
# ------------------------------------------------------------------
RUN $PIP_INSTALL \
setuptools \
numpy \
scipy \
pandas \
cloudpickle \
joblib
# ==================================================================
# jupyter hub
# ------------------------------------------------------------------
RUN eval $APT_INSTALL \
npm nodejs && \
npm install -g configurable-http-proxy && \
$PIP_INSTALL \
jupyterhub jupyterlab && \
mkdir -p /etc/jupyterhub
COPY configs/jupyterhub_config.py /etc/jupyterhub/jupyterhub_config.py
# ==================================================================
# MLflow
# ------------------------------------------------------------------
RUN $PIP_INSTALL \
mlflow && \
sed -i 's/127.0.0.1/0.0.0.0/g' /usr/local/lib/python${PYTHON_COMPAT_VERSION}/dist-packages/mlflow/cli.py && \
curl -LO http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash Miniconda3-latest-Linux-x86_64.sh -p /miniconda -b && \
rm Miniconda3-latest-Linux-x86_64.sh
ENV PATH=${PATH}:/miniconda/bin
RUN conda init && \
conda config --set auto_activate_base false
# ------------------------------------------------------------------
# Airflow
# ------------------------------------------------------------------
RUN eval $APT_INSTALL \
# mysql
libmysqlclient-dev \
# hive
libsasl2-dev && \
$PIP_INSTALL \
apache-airflow[mysql,hive,hdfs,postgres,azure,devel,redis,ssh]
ENV AIRFLOW_HOME=~/airflow
# ==================================================================
# Dagster
# ------------------------------------------------------------------
RUN $PIP_INSTALL \
dagster \
dagster-airflow \
dagster-dask \
dagster-aws \
dagster-bash \
dagster-cron \
dagster-pandas \
dagster-postgres \
dagster-pyspark \
dagster-spark \
dagster-ssh \
# must be last
&& $PIP_INSTALL dagit
# ==================================================================
# Dask
# ------------------------------------------------------------------
RUN $PIP_INSTALL \
dask
# ==================================================================
# Ray
# ------------------------------------------------------------------
RUN $PIP_INSTALL \
ray ray[debug]
# ==================================================================
# Prefect
# ------------------------------------------------------------------
RUN $PIP_INSTALL \
prefect
# ==================================================================
# Polynote
# ------------------------------------------------------------------
ENV POLYNOTE_VERSION=0.3.11
ENV POLYNOTE_ARCHIVE=https://github.com/polynote/polynote/releases/download/$POLYNOTE_VERSION/polynote-dist.tar.gz
RUN curl -sL $POLYNOTE_ARCHIVE | tar -zx -C /usr/local/
ENV POLYNOTE_HOME /usr/local/polynote
RUN $PIP_INSTALL \
jep jedi virtualenv
# ==================================================================
# config & cleanup
# ------------------------------------------------------------------
RUN ldconfig && \
apt-get clean && \
apt-get -y autoremove && \
rm -rf /var/lib/apt/lists/* /tmp/* ~/*
# add default user
ENV DEFAULT_USER=deenv
RUN chmod +x add-user.sh && ./add-user.sh $DEFAULT_USER
# make spark dir owned by that user
RUN chown -R $DEFAULT_USER:$DEFAULT_USER $SPARK_HOME
# make jupyter notebook token equal to username by default
ENV JUPYTER_LAB_TOKEN=$DEFAULT_USER
# copy run scripts
COPY scripts/run-* /
RUN chmod +x /run-*
# run as non-root
USER $DEFAULT_USER
# make sure data folder has proper permissions
RUN mkdir -p /home/$DEFAULT_USER/data
VOLUME /home/$DEFAULT_USER/data
# dagit
EXPOSE 3000
# jupyterlab
EXPOSE 8888
# jupyterhub
EXPOSE 8000
# spark ui
EXPOSE 4040
# spark master
EXPOSE 7077
# spark worker
EXPOSE 8081
# polynote
EXPOSE 8192