-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
26 lines (20 loc) · 1.05 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# The FastLLM Dockerfile is used to construct FastLLM image that can be directly used
# to run the offline batch inference.
#################### BatchLLM dependency IMAGE ####################
# image with dependency installed
FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS batchllm
WORKDIR /workspace
RUN apt-get update -y \
&& apt-get install -y python3-pip git vim wget
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -i http://yum.tbsite.net/aliyun-pypi/simple/ --trusted-host=yum.tbsite.net --extra-index-url https://mirrors.aliyun.com/pypi/simple/ pyodps-int
COPY requirements.txt requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements.txt
ARG VLLM_WHL=vllm-0.5.0.post1+cu124-cp310-cp310-linux_x86_64.whl
COPY release/${VLLM_WHL} ${VLLM_WHL}
RUN --mount=type=cache,target=/root/.cache/pip \
pip install ${VLLM_WHL} # -i https://pypi.tuna.tsinghua.edu.cn/simple/
COPY entrypoint.sh entrypoint.sh
ENTRYPOINT ["bash", "entrypoint.sh"]
#################### BatchLLM dependency IMAGE ####################