Skip to content

ci: build rocm 6.2 #608

ci: build rocm 6.2

ci: build rocm 6.2 #608

Workflow file for this run

name: ci
permissions:
contents: read
pull-requests: read
actions: read
env:
LLAMA_BOX_BUILD_VERSION: "${{ github.ref_name }}"
on:
workflow_dispatch: { }
push:
tags:
- "v*.*.*"
branches:
- "main"
- "branch-v*.*"
paths-ignore:
- "docs/**"
- "**.md"
- "**.mdx"
- "**.png"
- "**.jpg"
- ".github/workflows/prune.yml"
- ".github/workflows/sync.yml"
pull_request:
branches:
- "main"
paths-ignore:
- "docs/**"
- "**.md"
- "**.mdx"
- "**.png"
- "**.jpg"
- ".github/workflows/prune.yml"
- ".github/workflows/sync.yml"
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
# Disable OpenMP,
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
jobs:
darwin:
strategy:
fail-fast: false
matrix:
include:
- arch: 'amd64'
instruction: 'avx2'
runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Deps
run: |
brew update
- name: Setup XCode
uses: maxim-lobanov/setup-xcode@v1
with:
xcode-version: '15.2'
- name: Build
run: |
echo "===== BUILD ====="
mkdir -p ${{ github.workspace }}/.cache
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
-DGGML_ACCELERATE=on -DGGML_METAL=off \
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \
-DGGML_NATIVE=on \
-DGGML_OPENMP=off \
-DGGML_RPC=on
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(sysctl -n hw.physicalcpu)
echo "===== RESULT ====="
ls -alh ${{ github.workspace }}/build/bin/
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
otool --version
otool -L ${{ github.workspace }}/build/bin/llama-box || true
else
exit 1
fi
echo "===== PACKAGE ====="
mkdir -p ${{ github.workspace }}/out
zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-${{ matrix.instruction }}.zip ${{ github.workspace }}/build/bin/llama-box
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/out/*.zip
name: llama-box-darwin-${{ matrix.arch }}-${{ matrix.instruction }}
darwin-metal:
strategy:
fail-fast: false
matrix:
arch:
- 'amd64'
- 'arm64'
version:
- '3.0'
# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images,
# https://support.apple.com/en-us/102894.
runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Deps
run: |
brew update
- name: Setup XCode
uses: maxim-lobanov/setup-xcode@v1
with:
xcode-version: '15.2'
- name: Build
run: |
echo "===== BUILD ====="
mkdir -p ${{ github.workspace }}/.cache
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
-DGGML_ACCELERATE=on -DGGML_METAL=on -DGGML_METAL_USE_BF16=on -DGGML_METAL_EMBED_LIBRARY=on \
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \
-DGGML_NATIVE=on \
-DGGML_OPENMP=off \
-DGGML_RPC=on
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(sysctl -n hw.physicalcpu)
echo "===== RESULT ====="
ls -alh ${{ github.workspace }}/build/bin/
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
otool --version
otool -L ${{ github.workspace }}/build/bin/llama-box || true
else
exit 1
fi
echo "===== PACKAGE ====="
mkdir -p ${{ github.workspace }}/out
zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/build/bin/llama-box
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/out/*.zip
name: llama-box-darwin-${{ matrix.arch }}-metal-${{ matrix.version }}
linux:
strategy:
fail-fast: false
matrix:
# AVX2 ==> CentOS 7.
# AVX512 ==> RockyLinux 8.9.
# NEON ==> Ubuntu 18.04.
include:
- arch: 'amd64'
instruction: 'avx2'
distro_container_image: 'centos:7'
- arch: 'amd64'
instruction: 'avx512'
distro_container_image: 'rockylinux:8.9'
- arch: 'arm64'
instruction: 'neon'
distro_container_image: 'ubuntu:18.04'
runs-on: ubuntu-22.04
steps:
- name: Maximize Docker Build Space
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
with:
deep-clean: false
root-reserve-mb: 20480
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v4
with:
key: cache-linux-${{ matrix.arch }}-${{ matrix.instruction }}
path: |
${{ github.workspace }}/.cache
- name: Setup QEMU
if: ${{ matrix.arch == 'arm64' }}
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v8.1.5
platforms: "arm64"
- name: Build
env:
CMAKE_VERSION: "3.22.1"
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
run: |
echo "===== SCRIPT ====="
cat <<EOF > /tmp/entrypoint.sh
#!/bin/bash
if [ -f /etc/os-release ]; then
source /etc/os-release
cat /etc/os-release
if [ "\${ID}" = "ubuntu" ]; then
apt-get update -y \
&& apt-get install -y binutils pkg-config build-essential libopenblas-dev ccache curl git bc
if (( \$(echo "\${VERSION_ID} < 21.04" | bc -l) )); then
apt-get update -y \
&& apt-get install -y software-properties-common
add-apt-repository -y ppa:ubuntu-toolchain-r/test
apt-get update -y \
&& apt-get install -y gcc-11 g++-11
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
fi
elif [ "\${ID}" = "rocky" ]; then
# NB(thxCode): Enbale EPEL, see
# https://wiki.rockylinux.org/rocky/repo/#notes-on-epel.
dnf install -y epel-release
if [[ "\${VERSION_ID}" =~ 8\\.* ]]; then
dnf config-manager --set-enabled powertools
else
dnf config-manager --set-enabled crb
fi
dnf install -y binutils pkgconfig gcc gcc-c++ make glibc-static libstdc++-static openblas-static ccache curl git
if [[ "\${VERSION_ID}" =~ 8\\.* ]]; then
dnf install -y gcc-toolset-11
source scl_source enable gcc-toolset-11
fi
elif [ "\${ID}" = "centos" ]; then
# NB(thxCode): Patch for CentOS, see
# https://github.com/ROCm/ROCm-docker/blob/db86386c24eeb45f5d3ba73564b00cc66566e537/dev/Dockerfile-centos-7.
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
yum install -y centos-release-scl
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
yum update -y \
&& yum install -y devtoolset-9 devtoolset-9-libatomic-devel devtoolset-9-elfutils-libelf-devel
export PATH="/opt/rh/devtoolset-9/root/usr/bin:\${PATH}" \
&& export MANPATH="/opt/rh/devtoolset-9/root/usr/share/man:\${MANPATH}" \
&& export INFOPATH="/opt/rh/devtoolset-9/root/usr/share/info:\${INFOPATH}" \
&& export PKG_CONFIG_PATH="/opt/rh/devtoolset-9/root/usr/lib64/pkgconfig:\${PKG_CONFIG_PATH}" \
&& export PCP_DIR="/opt/rh/devtoolset-9/root" \
&& export PERL5LIB="/opt/rh/devtoolset-9/root/usr/lib64/perl5/vendor_perl:/opt/rh/devtoolset-9/root/usr/lib/perl5:/opt/rh/devtoolset-9/root/usr/share/perl5/" \
&& export LD_LIBRARY_PATH="/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:/opt/rh/devtoolset-9/root/usr/lib64/dyninst:/opt/rh/devtoolset-9/root/usr/lib/dyninst:/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:\${LD_LIBRARY_PATH}" \
&& export LDFLAGS="-Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib64 -Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib"
yum --enablerepo=extras install -y epel-release
yum install -y https://packages.endpointdev.com/rhel/\${VERSION_ID}/os/\$(uname -m)/endpoint-repo.\$(uname -m).rpm
yum update -y \
&& yum install -y make glibc-static libstdc++-static openblas-static ccache curl git
cat <<EOL >/usr/lib64/pkgconfig/openblas.pc
Name: OpenBLAS
Description: OpenBLAS library
Version: 0.3.3
Libs: -L/usr/lib64 -lopenblas
Cflags: -I/usr/include/openblas
EOL
else
echo "Unsupport distribution: \${ID}"
exit 1
fi
else
echo "Unknown distribution"
exit 1
fi
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1
git config --system --add safe.directory '*'
mkdir -p ${{ github.workspace }}/.cache
echo "===== BUILD ====="
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \
-DGGML_NATIVE=off \
${{ matrix.instruction == 'avx2' && '-DGGML_AVX=on -DGGML_AVX_VNNI=off -DGGML_AVX2=on' || '' }} \
${{ matrix.instruction == 'avx512' && '-DGGML_AVX512=on -DGGML_AVX512_BF16=off -DGGML_AVX512_VBMI=on -DGGML_AVX512_VNNI=on' || '' }} \
${{ matrix.instruction == 'neon' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' || '' }} \
-DGGML_BLAS_VENDOR=OpenBLAS \
-DGGML_STATIC=on \
-DGGML_BLAS=on \
-DGGML_OPENMP=off \
-DGGML_RPC=on
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
echo "===== RESULT ====="
ls -alh ${{ github.workspace }}/build/bin/
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
ldd --version
ldd ${{ github.workspace }}/build/bin/llama-box || true
else
exit 1
fi
EOF
chmod +x /tmp/entrypoint.sh
cat /tmp/entrypoint.sh
docker run \
--rm \
--privileged \
--platform linux/${{ matrix.arch }} \
--volume ${{ github.workspace }}:${{ github.workspace }} \
--workdir ${{ github.workspace }} \
--env DEBIAN_FRONTEND=noninteractive \
--env CCACHE_DIR \
--env LLAMA_BOX_BUILD_VERSION \
--volume /tmp/entrypoint.sh:/entrypoint.sh \
--entrypoint /entrypoint.sh \
${{ matrix.distro_container_image }}
echo "===== PACKAGE ====="
mkdir -p ${{ github.workspace }}/out
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-${{ matrix.instruction }}.zip ${{ github.workspace }}/build/bin/llama-box
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/out/*.zip
name: llama-box-linux-${{ matrix.arch }}-${{ matrix.instruction }}
linux-hip:
strategy:
fail-fast: false
matrix:
# see https://hub.docker.com/r/rocm/dev-centos-7/tags.
# 6.2 ==> 6.2.4, CentOS 7.
# build fat binary,
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
# https://llvm.org/docs/AMDGPUUsage.html.
# official gpu support list,
# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html.
include:
- arch: 'amd64'
version: '6.2'
distro_container_image: 'rocm/dev-centos-7:6.2.4-complete'
hip_arch: 'gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1030;gfx1100;gfx1101;gfx1102'
runs-on: ubuntu-22.04
steps:
- name: Maximize Docker Build Space
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
with:
deep-clean: false
root-reserve-mb: 20480
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v4
with:
key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }}
path: |
${{ github.workspace }}/.cache
- name: Setup QEMU
if: ${{ matrix.arch == 'arm64' }}
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v8.1.5
platforms: "arm64"
- name: Build
env:
CMAKE_VERSION: "3.22.1"
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
AMDGPU_TARGETS: "${{ matrix.hip_arch }}"
run: |
echo "===== SCRIPT ====="
cat <<EOF > /tmp/entrypoint.sh
#!/bin/bash
if [ -f /etc/os-release ]; then
source /etc/os-release
cat /etc/os-release
if [ "\${ID}" = "centos" ]; then
# NB(thxCode): Patch for CentOS, see
# https://github.com/ROCm/ROCm-docker/blob/db86386c24eeb45f5d3ba73564b00cc66566e537/dev/Dockerfile-centos-7.
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
yum install -y centos-release-scl
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
yum update -y \
&& yum install -y devtoolset-9 devtoolset-9-libatomic-devel devtoolset-9-elfutils-libelf-devel
export PATH="/opt/rh/devtoolset-9/root/usr/bin:\${PATH}" \
&& export MANPATH="/opt/rh/devtoolset-9/root/usr/share/man:\${MANPATH}" \
&& export INFOPATH="/opt/rh/devtoolset-9/root/usr/share/info:\${INFOPATH}" \
&& export PKG_CONFIG_PATH="/opt/rh/devtoolset-9/root/usr/lib64/pkgconfig:\${PKG_CONFIG_PATH}" \
&& export PCP_DIR="/opt/rh/devtoolset-9/root" \
&& export PERL5LIB="/opt/rh/devtoolset-9/root/usr/lib64/perl5/vendor_perl:/opt/rh/devtoolset-9/root/usr/lib/perl5:/opt/rh/devtoolset-9/root/usr/share/perl5/" \
&& export LD_LIBRARY_PATH="/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:/opt/rh/devtoolset-9/root/usr/lib64/dyninst:/opt/rh/devtoolset-9/root/usr/lib/dyninst:/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:\${LD_LIBRARY_PATH}" \
&& export LDFLAGS="-Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib64 -Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib"
yum --enablerepo=extras install -y epel-release
yum install -y https://packages.endpointdev.com/rhel/\${VERSION_ID}/os/\$(uname -m)/endpoint-repo.\$(uname -m).rpm
yum update -y \
&& yum install -y make glibc-static libstdc++-static ccache curl git
else
echo "Unsupport distribution: \${ID}"
exit 1
fi
else
echo "Unknown distribution"
exit 1
fi
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1
git config --system --add safe.directory '*'
mkdir -p ${{ github.workspace }}/.cache
echo "===== BUILD ====="
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
-DGGML_HIP=on -DAMDGPU_TARGETS="${AMDGPU_TARGETS}" \
-DCMAKE_HIP_COMPILER="\$(hipconfig -l)/clang" \
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \
-DGGML_NATIVE=off \
-DGGML_CUDA_DMMV_X=4096 \
-DGGML_CUDA_MMV_Y=256 \
-DGGML_OPENMP=off \
-DGGML_RPC=on
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
echo "===== RESULT ====="
ls -alh ${{ github.workspace }}/build/bin/
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
ldd --version
ldd ${{ github.workspace }}/build/bin/llama-box || true
else
exit 1
fi
EOF
chmod +x /tmp/entrypoint.sh
cat /tmp/entrypoint.sh
docker run \
--rm \
--privileged \
--platform linux/${{ matrix.arch }} \
--volume ${{ github.workspace }}:${{ github.workspace }} \
--workdir ${{ github.workspace }} \
--env DEBIAN_FRONTEND=noninteractive \
--env CCACHE_DIR \
--env AMDGPU_TARGETS \
--env LLAMA_BOX_BUILD_VERSION \
--volume /tmp/entrypoint.sh:/entrypoint.sh \
--entrypoint /entrypoint.sh \
${{ matrix.distro_container_image }}
echo "===== PACKAGE ====="
mkdir -p ${{ github.workspace }}/out
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/out/*.zip
name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}
linux-cuda:
strategy:
fail-fast: false
matrix:
# see https://hub.docker.com/r/nvidia/cuda/tags?page=&page_size=&ordering=&name=devel.
# 12.4 ==> 12.4.0, CentOS 7.
# 11.8 ==> 11.8.0, CentOS 7
# build fat binary,
# see https://developer.nvidia.com/cuda-gpus.
include:
- arch: 'amd64'
version: '12.4'
distro_container_image: 'nvidia/cuda:12.4.0-devel-centos7'
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
- arch: 'amd64'
version: '11.8'
distro_container_image: 'nvidia/cuda:11.8.0-devel-centos7'
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
runs-on: ubuntu-22.04
steps:
- name: Maximize Docker Build Space
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
with:
deep-clean: false
root-reserve-mb: 20480
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v4
with:
key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }}
path: |
${{ github.workspace }}/.cache
- name: Setup QEMU
if: ${{ matrix.arch == 'arm64' }}
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v8.1.5
platforms: "arm64"
- name: Build
env:
CMAKE_VERSION: "3.22.1"
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}"
run: |
echo "===== SCRIPT ====="
cat <<EOF > /tmp/entrypoint.sh
#!/bin/bash
if [ -f /etc/os-release ]; then
source /etc/os-release
cat /etc/os-release
if [ "\${ID}" = "centos" ]; then
# NB(thxCode): Patch for CentOS, see
# https://github.com/ROCm/ROCm-docker/blob/db86386c24eeb45f5d3ba73564b00cc66566e537/dev/Dockerfile-centos-7.
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
yum install -y centos-release-scl
sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo \
&& sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo \
&& sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
yum update -y \
&& yum install -y devtoolset-9 devtoolset-9-libatomic-devel devtoolset-9-elfutils-libelf-devel
export PATH="/opt/rh/devtoolset-9/root/usr/bin:\${PATH}" \
&& export MANPATH="/opt/rh/devtoolset-9/root/usr/share/man:\${MANPATH}" \
&& export INFOPATH="/opt/rh/devtoolset-9/root/usr/share/info:\${INFOPATH}" \
&& export PKG_CONFIG_PATH="/opt/rh/devtoolset-9/root/usr/lib64/pkgconfig:\${PKG_CONFIG_PATH}" \
&& export PCP_DIR="/opt/rh/devtoolset-9/root" \
&& export PERL5LIB="/opt/rh/devtoolset-9/root/usr/lib64/perl5/vendor_perl:/opt/rh/devtoolset-9/root/usr/lib/perl5:/opt/rh/devtoolset-9/root/usr/share/perl5/" \
&& export LD_LIBRARY_PATH="/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:/opt/rh/devtoolset-9/root/usr/lib64/dyninst:/opt/rh/devtoolset-9/root/usr/lib/dyninst:/opt/rh/devtoolset-9/root/usr/lib64:/opt/rh/devtoolset-9/root/usr/lib:\${LD_LIBRARY_PATH}" \
&& export LDFLAGS="-Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib64 -Wl,-rpath=/opt/rh/devtoolset-9/root/usr/lib"
yum --enablerepo=extras install -y epel-release
yum install -y https://packages.endpointdev.com/rhel/\${VERSION_ID}/os/\$(uname -m)/endpoint-repo.\$(uname -m).rpm
yum update -y \
&& yum install -y make glibc-static libstdc++-static ccache curl git
else
echo "Unsupport distribution: \${ID}"
exit 1
fi
else
echo "Unknown distribution"
exit 1
fi
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1
git config --system --add safe.directory '*'
mkdir -p ${{ github.workspace }}/.cache
echo "===== BUILD ====="
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
-DGGML_CUDA=on -DGGML_CUDA_F16=on -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHITECTURES}" \
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \
-DGGML_NATIVE=off \
-DGGML_CUDA_DMMV_X=4096 \
-DGGML_CUDA_MMV_Y=256 \
-DGGML_OPENMP=off \
-DGGML_RPC=on
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
echo "===== RESULT ====="
ls -alh ${{ github.workspace }}/build/bin/
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
ldd --version
ldd ${{ github.workspace }}/build/bin/llama-box || true
else
exit 1
fi
EOF
chmod +x /tmp/entrypoint.sh
cat /tmp/entrypoint.sh
docker run \
--rm \
--privileged \
--platform linux/${{ matrix.arch }} \
--volume ${{ github.workspace }}:${{ github.workspace }} \
--workdir ${{ github.workspace }} \
--env DEBIAN_FRONTEND=noninteractive \
--env CCACHE_DIR \
--env CUDA_ARCHITECTURES \
--env LLAMA_BOX_BUILD_VERSION \
--volume /tmp/entrypoint.sh:/entrypoint.sh \
--entrypoint /entrypoint.sh \
${{ matrix.distro_container_image }}
echo "===== PACKAGE ====="
mkdir -p ${{ github.workspace }}/out
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/out/*.zip
name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}
linux-oneapi:
strategy:
fail-fast: false
matrix:
# see https://hub.docker.com/r/intel/oneapi-basekit/tags?page=&page_size=&ordering=&name=devel.
# 2025.0 ==> 2025.0.0-0, Ubuntu 22.04.
include:
- arch: 'amd64'
version: '2025.0'
distro_container_image: 'intel/oneapi-basekit:2025.0.0-0-devel-ubuntu22.04'
runs-on: ubuntu-22.04
steps:
- name: Maximize Docker Build Space
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
with:
deep-clean: false
root-reserve-mb: 20480
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v4
with:
key: cache-linux-oneapi-${{ matrix.arch }}-${{ matrix.version }}
path: |
${{ github.workspace }}/.cache
- name: Setup QEMU
if: ${{ matrix.arch == 'arm64' }}
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v8.1.5
platforms: "arm64"
- name: Build
env:
CMAKE_VERSION: "3.22.1"
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
run: |
echo "===== SCRIPT ====="
cat <<EOF > /tmp/entrypoint.sh
#!/bin/bash
if [ -f /etc/os-release ]; then
source /etc/os-release
cat /etc/os-release
if [ "\${ID}" = "ubuntu" ]; then
apt-get update -y \
&& apt-get install -y build-essential ccache curl git bc
if (( \$(echo "\${VERSION_ID} < 21.04" | bc -l) )); then
apt-get update -y \
&& apt-get install -y software-properties-common
add-apt-repository -y ppa:ubuntu-toolchain-r/test
apt-get update -y \
&& apt-get install -y gcc-11 g++-11
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
fi
else
echo "Unsupport distribution: \${ID}"
exit 1
fi
else
echo "Unknown distribution"
exit 1
fi
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1
git config --system --add safe.directory '*'
mkdir -p ${{ github.workspace }}/.cache
echo "===== BUILD ====="
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
-DGGML_SYCL=on -DGGML_SYCL_F16=on \
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \
-DGGML_NATIVE=off \
-DGGML_OPENMP=off \
-DGGML_RPC=on
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
echo "===== RESULT ====="
ls -alh ${{ github.workspace }}/build/bin/
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
ldd --version
ldd ${{ github.workspace }}/build/bin/llama-box || true
else
exit 1
fi
EOF
chmod +x /tmp/entrypoint.sh
cat /tmp/entrypoint.sh
docker run \
--rm \
--privileged \
--platform linux/${{ matrix.arch }} \
--volume ${{ github.workspace }}:${{ github.workspace }} \
--workdir ${{ github.workspace }} \
--env CC=icx \
--env CXX=icpx \
--env DEBIAN_FRONTEND=noninteractive \
--env CCACHE_DIR \
--env LLAMA_BOX_BUILD_VERSION \
--volume /tmp/entrypoint.sh:/entrypoint.sh \
--entrypoint /entrypoint.sh \
${{ matrix.distro_container_image }}
echo "===== PACKAGE ====="
mkdir -p ${{ github.workspace }}/out
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/out/*.zip
name: llama-box-linux-${{ matrix.arch }}-oneapi-${{ matrix.version }}
linux-cann:
strategy:
fail-fast: false
matrix:
# see https://hub.docker.com/r/ascendai/cann/tags?page=&page_size=&ordering=&name=8.0.rc2.alpha003-910b.
# 8.0 ==> 8.0.rc2.alpha003, Ubuntu 20.04, OpenEuler 20.03
arch:
- 'amd64'
- 'arm64'
version:
- '8.0'
distro_container_image:
- 'ascendai/cann:8.0.rc2.alpha003-910b-openeuler20.03-py3.9'
- 'ascendai/cann:8.0.rc2.alpha003-910b-ubuntu20.04-py3.9'
- 'gpustack/ascendai-cann:8.0.RC2.alpha003-310p-openeuler20.03-py3.9'
- 'gpustack/ascendai-cann:8.0.RC2.alpha003-310p-ubuntu20.04-py3.9'
runs-on: ubuntu-22.04
steps:
- name: Maximize Docker Build Space
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
with:
deep-clean: false
root-reserve-mb: 20480
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v4
with:
key: cache-linux-cann-${{ matrix.arch }}-${{ matrix.version }}-${{ matrix.distro_container_image }}${{ contains(matrix.distro_container_image, '310p') && '-310p' || '' }}
path: |
${{ github.workspace }}/.cache
- name: Setup QEMU
if: ${{ matrix.arch == 'arm64' }}
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v8.1.5
platforms: "arm64"
- name: Build
env:
CMAKE_VERSION: "3.22.1"
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
run: |
echo "===== SCRIPT ====="
cat <<EOF > /tmp/entrypoint.sh
#!/bin/bash
if [ -f /etc/os-release ]; then
source /etc/os-release
cat /etc/os-release
if [ "\${ID}" = "ubuntu" ]; then
apt-get update -y \
&& apt-get install -y build-essential ccache curl git bc
if (( \$(echo "\${VERSION_ID} < 21.04" | bc -l) )); then
apt-get update -y \
&& apt-get install -y software-properties-common
add-apt-repository -y ppa:ubuntu-toolchain-r/test
apt-get update -y \
&& apt-get install -y gcc-11 g++-11
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
fi
elif [ "\${ID}" = "openEuler" ]; then
yum update -y \
&& yum install -y gcc gcc-c++ gcc-toolset-10-gcc*
yum update -y \
&& yum install -y gcc-toolset-10-libstdc++-static make ccache curl git
export PATH="/opt/openEuler/gcc-toolset-10/root/usr/bin/:\${PATH}" && \
export LD_LIBRARY_PATH="/opt/openEuler/gcc-toolset-10/root/usr/lib64/:\${LD_LIBRARY_PATH}"
else
echo "Unsupport distribution: \${ID}"
exit 1
fi
else
echo "Unknown distribution"
exit 1
fi
source /usr/local/Ascend/ascend-toolkit/set_env.sh
curl -L "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-\$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1
git config --system --add safe.directory '*'
mkdir -p ${{ github.workspace }}/.cache
echo "===== BUILD ====="
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
-DGGML_CANN=on \
-DSOC_TYPE=${{ contains(matrix.distro_container_image, '910b') && 'Ascend910B3' || 'Ascend310P3' }} \
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \
-DGGML_NATIVE=off \
${{ matrix.arch == 'arm64' && '-DGGML_CPU_ARM_ARCH="armv8.2-a"' || '' }} \
${{ contains(matrix.distro_container_image, '310p') && '-DGGML_AVX2=off' || '' }} \
-DGGML_OPENMP=off \
-DGGML_RPC=on
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
echo "===== RESULT ====="
ls -alh ${{ github.workspace }}/build/bin/
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
ldd --version
ldd ${{ github.workspace }}/build/bin/llama-box || true
else
exit 1
fi
EOF
chmod +x /tmp/entrypoint.sh
cat /tmp/entrypoint.sh
docker run \
--rm \
--privileged \
--platform linux/${{ matrix.arch }} \
--volume ${{ github.workspace }}:${{ github.workspace }} \
--workdir ${{ github.workspace }} \
--env DEBIAN_FRONTEND=noninteractive \
--env CCACHE_DIR \
--env LLAMA_BOX_BUILD_VERSION \
--volume /tmp/entrypoint.sh:/entrypoint.sh \
--entrypoint /entrypoint.sh \
${{ matrix.distro_container_image }}
echo "===== PACKAGE ====="
mkdir -p ${{ github.workspace }}/out
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }}${{ contains(matrix.distro_container_image, 'openeuler20.03') && '-openeuler20.03' || '' }}${{ contains(matrix.distro_container_image, '310p') && '-310p' || '' }}.zip ${{ github.workspace }}/build/bin/llama-box
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/out/*.zip
name: llama-box-linux-${{ matrix.arch }}-cann-${{ matrix.version }}${{ contains(matrix.distro_container_image, 'openeuler20.03') && '-openeuler20.03' || '' }}${{ contains(matrix.distro_container_image, '310p') && '-310p' || '' }}
linux-musa:
strategy:
fail-fast: false
matrix:
# see https://hub.docker.com/r/mthreads/musa/tags?page_size=&ordering=&name=ubuntu20.04.
# rc3.1.0 ==> rc3.1.0, Ubuntu 20.04.
include:
- arch: 'amd64'
version: 'rc3.1'
distro_container_image: 'mthreads/musa:rc3.1.0-devel-ubuntu22.04'
runs-on: ubuntu-22.04
steps:
- name: Maximize Docker Build Space
uses: gpustack/.github/.github/actions/maximize-docker-build-space@main
with:
deep-clean: false
root-reserve-mb: 20480
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v4
with:
key: cache-linux-musa-${{ matrix.arch }}-${{ matrix.version }}
path: |
${{ github.workspace }}/.cache
- name: Setup QEMU
if: ${{ matrix.arch == 'arm64' }}
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v8.1.5
platforms: "arm64"
- name: Build
env:
CCACHE_DIR: "${{ github.workspace }}/.cache/ccache"
run: |
echo "===== SCRIPT ====="
cat <<EOF > /tmp/entrypoint.sh
#!/bin/bash
if [ -f /etc/os-release ]; then
source /etc/os-release
cat /etc/os-release
if [ "\${ID}" = "ubuntu" ]; then
apt-get update -y \
&& apt-get install -y build-essential ccache curl git cmake
if (( \$(echo "\${VERSION_ID} < 21.04" | bc -l) )); then
apt-get update -y \
&& apt-get install -y software-properties-common
add-apt-repository -y ppa:ubuntu-toolchain-r/test
apt-get update -y \
&& apt-get install -y gcc-11 g++-11
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
fi
else
echo "Unsupport distribution: \$ID"
exit 1
fi
else
echo "Unknown distribution"
exit 1
fi
git config --system --add safe.directory '*'
mkdir -p ${{ github.workspace }}/.cache
echo "===== BUILD ====="
cmake -S ${{ github.workspace }} -B ${{ github.workspace }}/build -DCMAKE_BUILD_TYPE=Release \
-DGGML_MUSA=on \
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} \
-DGGML_NATIVE=off \
-DGGML_OPENMP=off \
-DGGML_RPC=on
cmake --build ${{ github.workspace }}/build --target llama-box --config Release -- -j $(nproc)
echo "===== RESULT ====="
ls -alh ${{ github.workspace }}/build/bin/
if [ -f ${{ github.workspace }}/build/bin/llama-box ]; then
ldd --version
ldd ${{ github.workspace }}/build/bin/llama-box || true
else
exit 1
fi
EOF
chmod +x /tmp/entrypoint.sh
cat /tmp/entrypoint.sh
docker run \
--rm \
--privileged \
--platform linux/${{ matrix.arch }} \
--volume ${{ github.workspace }}:${{ github.workspace }} \
--workdir ${{ github.workspace }} \
--env DEBIAN_FRONTEND=noninteractive \
--env CCACHE_DIR \
--env LLAMA_BOX_BUILD_VERSION \
--volume /tmp/entrypoint.sh:/entrypoint.sh \
--entrypoint /entrypoint.sh \
${{ matrix.distro_container_image }}
echo "===== PACKAGE ====="
mkdir -p ${{ github.workspace }}/out
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-musa-${{ matrix.version }}.zip ${{ github.workspace }}/build/bin/llama-box
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/out/*.zip
name: llama-box-linux-${{ matrix.arch }}-musa-${{ matrix.version }}
windows:
strategy:
fail-fast: false
matrix:
# AVX2 ==> Windows Server 2022.
# AVX512 ==> Windows Server 2022.
# NEON ==> Windows Server 2022.
include:
- arch: 'amd64'
instruction: 'avx2'
- arch: 'amd64'
instruction: 'avx512'
- arch: 'arm64'
instruction: 'neon'
runs-on: windows-2022
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
# doesn't support ccache,
# see https://stackoverflow.com/questions/72829476/how-to-use-ccache-4-6-1-on-windows-msvc-with-cmake.
timeout-minutes: 5
uses: actions/cache@v4
with:
key: cache-windows-${{ matrix.arch }}-${{ matrix.instruction }}
path: |
${{ github.workspace }}\build
${{ github.workspace }}\.cache
- name: Deps
run: |
$ErrorActionPreference = "Stop"
$ProgressPreference = 'SilentlyContinue'
choco install ccache ninja curl -y
- name: Build
env:
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
run: |
$ErrorActionPreference = "Stop"
$ProgressPreference = 'SilentlyContinue'
Write-Host "===== BUILD ====="
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null
cmake ${{ matrix.arch == 'arm64' && '-G "Ninja Multi-Config"'|| '' }} -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} `
-DGGML_NATIVE=off `
${{ matrix.instruction == 'avx2' && '-DGGML_AVX=on -DGGML_AVX_VNNI=off -DGGML_AVX2=on' || '' }} `
${{ matrix.instruction == 'avx512' && '-DGGML_AVX512=on -DGGML_AVX512_BF16=off -DGGML_AVX512_VBMI=on -DGGML_AVX512_VNNI=on' || '' }} `
${{ matrix.instruction == 'neon' && format('-DCMAKE_TOOLCHAIN_FILE={0}\llama-box\scripts\build-windows-arm64.cmake', github.workspace) || '' }} `
-DGGML_STATIC=on `
-DGGML_OPENMP=off `
-DGGML_RPC=on
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- ${{ matrix.arch == 'arm64' && '-j ' || '/m:' }}${env:NUMBER_OF_PROCESSORS}
Write-Host "===== RESULT ====="
Get-ChildItem -Path "${{ github.workspace }}\build\bin\Release\" -File -ErrorAction Ignore
if (Test-Path -Path "${{ github.workspace }}\build\bin\Release\llama-box.exe") {
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\Release\llama-box.exe"
} else {
exit 1
}
Write-Host "===== PACKAGE ====="
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null
Compress-Archive -Path "${{ github.workspace }}\build\bin\Release\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-${{ matrix.instruction }}.zip"
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}\\out\\*.zip
name: llama-box-windows-${{ matrix.arch }}-${{ matrix.instruction }}
windows-hip:
strategy:
fail-fast: false
matrix:
# see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html.
# 6.2 ==> 6.2.4, Windows Server 2022.
# build fat binary,
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
# https://llvm.org/docs/AMDGPUUsage.html.
# official gpu support list,
# see https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html.
include:
- arch: 'amd64'
version: '6.2'
distro_binary_installer: 'https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe'
hip_arch: 'gfx1030;gfx1100;gfx1101;gfx1102'
runs-on: windows-2022
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v4
with:
key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }}
path: |
${{ github.workspace }}\.cache
- name: Deps
run: |
$ErrorActionPreference = "Stop"
$ProgressPreference = 'SilentlyContinue'
choco install ccache curl -y
- name: Setup HIP
run: |
$ErrorActionPreference = "Stop"
$ProgressPreference = 'SilentlyContinue'
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download AMD ROCm HIP SDK"
curl.exe --retry 5 --retry-delay 5 `
--output "${{ runner.temp }}\installer.exe" `
--url "${{ matrix.distro_binary_installer }}"
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install AMD ROCm HIP SDK"
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait `
-ArgumentList '-install'
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify AMD ROCm HIP SDK"
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
$hipPath = "$(Resolve-Path -Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path)"
"HIP_PATH=${hipPath}" | Out-File -FilePath $env:GITHUB_ENV -Append
- name: Build
env:
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
AMDGPU_TARGETS: "${{ matrix.hip_arch }}"
run: |
$ErrorActionPreference = "Stop"
$ProgressPreference = 'SilentlyContinue'
Write-Host "HIP_PATH=${env:HIP_PATH}"
Write-Host "===== BUILD ====="
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null
$env:CMAKE_PREFIX_PATH = "${env:HIP_PATH}"
cmake -G "Unix Makefiles" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
-DGGML_HIP=on -DAMDGPU_TARGETS="${env:AMDGPU_TARGETS}" `
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} `
-DGGML_NATIVE=off `
-DGGML_CUDA_DMMV_X=4096 `
-DGGML_CUDA_MMV_Y=256 `
-DGGML_OPENMP=off `
-DGGML_RPC=on
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS}
Write-Host "===== RESULT ====="
Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
} else {
exit 1
}
Write-Host "===== PACKAGE ====="
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null
Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}.zip"
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}\\out\\*.zip
name: llama-box-windows-${{ matrix.arch }}-hip-${{ matrix.version }}
windows-cuda:
strategy:
fail-fast: false
matrix:
# see https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=Server2022&target_type=exe_network.
# 12.4 ==> 12.4.1, Windows Server 2022.
# 11.8 ==> 11.8.0, Windows Server 2019.
# build fat binary,
# see https://developer.nvidia.com/cuda-gpus.
include:
- arch: 'amd64'
version: '12.4'
distro_binary_installer: 'https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe'
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
- arch: 'amd64'
version: '11.8'
distro_binary_installer: 'https://developer.download.nvidia.com/compute/cuda/11.8.0/network_installers/cuda_11.8.0_windows_network.exe'
cuda_arch: '60-real;61-real;70-real;75-real;80-real;86-real;89-real;90-real'
runs-on: ${{ matrix.version == '11.8' && 'windows-2019' || 'windows-2022' }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v4
with:
key: cache-windows-cuda-${{ matrix.arch }}-${{ matrix.version }}
path: |
${{ github.workspace }}\.cache
- name: Deps
run: |
$ErrorActionPreference = "Stop"
$ProgressPreference = 'SilentlyContinue'
choco install ccache ninja curl -y
- name: Setup CUDA
run: |
$ErrorActionPreference = "Stop"
$ProgressPreference = 'SilentlyContinue'
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download NVIDIA CUDA SDK"
curl.exe --retry 5 --retry-delay 5 `
--output "${{ runner.temp }}\installer.exe" `
--url "${{ matrix.distro_binary_installer }}"
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install NVIDIA CUDA SDK"
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait `
-ArgumentList '-s','nvcc_${{ matrix.version }}','cudart_${{ matrix.version }}','cublas_${{ matrix.version }}','cublas_dev_${{ matrix.version }}','thrust_${{ matrix.version }}','visual_studio_integration_${{ matrix.version }}'
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify NVIDIA CUDA SDK"
& 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\nvcc.exe' --version
$cudaPath = "$(Resolve-Path -Path 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v*\bin\nvcc.exe' | Split-Path | Split-Path)"
$cudaVersion=($cudaPath | Split-Path -Leaf ) -replace 'v(\d+).(\d+)', '$1_$2'
"CUDA_PATH=${cudaPath}" | Out-File -FilePath $env:GITHUB_ENV -Append
"CUDA_PATH_V${cudaVersion}=$cudaPath" | Out-File -FilePath $env:GITHUB_ENV -Append
"CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVersion}" | Out-File -FilePath $env:GITHUB_ENV -Append
if (Test-Path -Path "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat") {
cmd /c 'call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' || 'amd64_arm64' }} && set' | ForEach-Object { `
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } `
}
} else {
cmd /c 'call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'amd64' && 'amd64' || 'amd64_arm64' }} && set' | ForEach-Object { `
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } `
}
}
- name: Build
env:
CCACHE_DIR: "${{ github.workspace }}\\.cache\\ccache"
CUDA_ARCHITECTURES: "${{ matrix.cuda_arch }}"
run: |
$ErrorActionPreference = "Stop"
$ProgressPreference = 'SilentlyContinue'
Write-Host "CUDA_PATH=${env:CUDA_PATH}"
Write-Host "===== BUILD ====="
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\.cache" -ErrorAction Ignore | Out-Null
cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
-DGGML_CUDA=on -DGGML_CUDA_F16=on -DCMAKE_CUDA_ARCHITECTURES="${env:CUDA_ARCHITECTURES}" `
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} `
-DGGML_NATIVE=off `
-DGGML_CUDA_DMMV_X=4096 `
-DGGML_CUDA_MMV_Y=256 `
-DGGML_OPENMP=off `
-DGGML_RPC=on
cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1))
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS}
Write-Host "===== RESULT ====="
Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
} else {
exit 1
}
Write-Host "===== PACKAGE ====="
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null
Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip"
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}\\out\\*.zip
name: llama-box-windows-${{ matrix.arch }}-cuda-${{ matrix.version }}
windows-oneapi:
strategy:
fail-fast: false
matrix:
# see https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html?operatingsystem=windows&windows-install-type=online.
# 2025.0 ==> 2025.0.0, Windows Server 2022.
include:
- arch: 'amd64'
version: '2025.0'
distro_binary_installer: 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882.exe'
runs-on: windows-2022
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
# doesn't support ccache,
# as the oneAPI need to configure the environment variables via setvars.bat.
timeout-minutes: 5
uses: actions/cache@v4
with:
key: cache-windows-oneapi-${{ matrix.arch }}-${{ matrix.version }}
path: |
${{ github.workspace }}\build
- name: Deps
run: |
$ErrorActionPreference = "Stop"
$ProgressPreference = 'SilentlyContinue'
choco install ninja curl -y
- name: Setup oneAPI
run: |
$ErrorActionPreference = "Stop"
$ProgressPreference = 'SilentlyContinue'
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] download Intel oneAPI SDK"
curl.exe --retry 5 --retry-delay 5 `
--output "${{ runner.temp }}\installer.exe" `
--url "${{ matrix.distro_binary_installer }}"
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] install Intel oneAPI SDK"
Start-Process "${{ runner.temp }}\installer.exe" -NoNewWindow -Wait `
-ArgumentList '-s','--action=install','--components=intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel','--eula=accept','-p=NEED_VS2017_INTEGRATION=0','-p=NEED_VS2019_INTEGRATION=0','-p=NEED_VS2022_INTEGRATION=0'
Write-Host "I [$((Get-Date).ToString("yyyy-mm-dd HH:mm:ss"))] verify Intel oneAPI SDK"
& 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' --version
$oneapiPath = "$(Resolve-Path -Path 'C:\Program Files (x86)\Intel\oneAPI\*\bin\icx.exe' | Split-Path | Split-Path)"
"ONEAPI_PATH=${oneapiPath}" | Out-File -FilePath $env:GITHUB_ENV -Append
$oneapiRoot = "$(Split-Path -Path $oneapiPath)"
"ONEAPI_ROOT=${oneapiRoot}" | Out-File -FilePath $env:GITHUB_ENV -Append
cmd /c "call `"${oneapiRoot}\setvars.bat`" && set" | ForEach-Object { `
if ($_ -Match '^(.*?)=(.*)$') { $_ | Out-File -FilePath $env:GITHUB_ENV -Append } `
}
- name: Build
run: |
$ErrorActionPreference = "Stop"
$ProgressPreference = 'SilentlyContinue'
Write-Host "ONEAPI_PATH=${env:ONEAPI_PATH}"
Write-Host "ONEAPI_ROOT=${env:ONEAPI_ROOT}"
Write-Host "===== BUILD ====="
cmake -G "Ninja" -S ${{ github.workspace }} -B ${{ github.workspace }}\build -DCMAKE_BUILD_TYPE=Release `
-DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx `
-DGGML_SYCL=on -DGGML_SYCL_F16=on `
-DGGML_CPU_AARCH64=${{ matrix.arch == 'arm64' && 'on' || 'off' }} `
-DGGML_NATIVE=off `
-DGGML_OPENMP=off `
-DGGML_RPC=on
cmake --build ${{ github.workspace }}\build --target ggml --config Release -- -j $((${env:NUMBER_OF_PROCESSORS} - 1))
cmake --build ${{ github.workspace }}\build --target llama-box --config Release -- -j ${env:NUMBER_OF_PROCESSORS}
Write-Host "===== RESULT ====="
Get-ChildItem -Path "${{ github.workspace }}\build\bin\" -File -ErrorAction Ignore
if (Test-Path -Path "${{ github.workspace }}\build\bin\llama-box.exe") {
llvm-objdump.exe -p "${{ github.workspace }}\build\bin\llama-box.exe"
} else {
exit 1
}
Write-Host "===== PACKAGE ====="
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}\out" -ErrorAction Ignore | Out-Null
Compress-Archive -Path "${{ github.workspace }}\build\bin\llama-box.exe" -DestinationPath "${{ github.workspace }}\out\llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}.zip"
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}\\out\\*.zip
name: llama-box-windows-${{ matrix.arch }}-oneapi-${{ matrix.version }}
release:
if: ${{ startsWith(github.ref, 'refs/tags/') }}
permissions:
contents: write
actions: read
id-token: write
runs-on: ubuntu-22.04
needs:
- darwin
- darwin-metal
- linux
- linux-hip
- linux-cuda
- linux-oneapi
- linux-cann
- linux-musa
- windows
- windows-hip
- windows-cuda
- windows-oneapi
steps:
- name: Download Artifact
uses: actions/download-artifact@v4
with:
path: ${{ github.workspace }}/out
merge-multiple: true
- name: Release
uses: softprops/action-gh-release@v1
with:
fail_on_unmatched_files: true
tag_name: "${{ env.VERSION }}"
prerelease: ${{ contains(github.ref, 'rc') }}
files: ${{ github.workspace }}/out/*