Skip to content

feat: first commit

feat: first commit #27

Workflow file for this run

name: ci
permissions:
contents: read
pull-requests: read
actions: read
env:
VERSION: "${{ github.ref_name }}"
on:
workflow_dispatch: { }
push:
tags:
- "v*.*.*"
branches:
- main
paths-ignore:
- "docs/**"
- "**.md"
- "**.mdx"
- "**.png"
- "**.jpg"
pull_request:
branches:
- main
paths-ignore:
- "docs/**"
- "**.md"
- "**.mdx"
- "**.png"
- "**.jpg"
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
darwin-metal:
if: ${{ false }}
strategy:
fail-fast: false
matrix:
arch: [ amd64, arm64 ]
# see https://github.com/actions/runner-images?tab=readme-ov-file#available-images,
# https://support.apple.com/en-us/102894.
runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup XCode
uses: maxim-lobanov/setup-xcode@v1
with:
xcode-version: '15.2'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v3
with:
key: cache-darwin-metal-${{ matrix.arch }}-${{ hashFiles('**/.gitmodules') }}
path: |
${{ github.workspace }}/.cache
- name: Deps
run: |
brew update && brew install ccache
- name: Build
run: |
echo "===== BUILD ====="
mkdir -p ${{ github.workspace }}/.cache
CCACHE_DIR=${{ github.workspace }}/.cache/ccache make -j LLAMA_METAL=1 LLAMA_NO_OPENMP=1 ${{ matrix.arch == 'amd64' && 'CFLAGS="-mfma -mf16c -mavx -mavx2" CXXFLAGS="-mfma -mf16c -mavx -mavx2"' || '' }}
echo "===== RESULT ====="
if [ -f ${{ github.workspace }}/bin/llama-box ]; then
ldd ${{ github.workspace }}/bin/llama-box
else
exit 1
fi
echo "===== PACKAGE ====="
mkdir -p ${{ github.workspace }}/out
zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/bin/*
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/out/*.zip
name: llama-box-darwin-${{ matrix.arch }}-metal
linux-hip:
if: ${{ false }}
strategy:
fail-fast: false
matrix:
arch: [ amd64 ]
version: [ '6.0.2' ]
runs-on: ubuntu-22.04
steps:
- name: Maximize Space
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml.
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo docker image prune --all --force
- name: Clone
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v3
with:
key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }}
path: |
${{ github.workspace }}/.cache
- name: Setup QEMU
if: ${{ matrix.arch == 'arm64' }}
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v7.0.0
platforms: "arm64"
- name: Build
# disable OpenMP to support static linking,
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
# build fat binary,
# see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
# https://llvm.org/docs/AMDGPUUsage.html.
env:
GPU_TARGETS: "gfx803 gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1010 gfx1030 gfx1100 gfx1101 gfx1102"
run: |
echo "===== SCRIPT ====="
cat <<EOF > /tmp/entrypoint.sh
#!/bin/bash
apt-get update && apt-get install -y build-essential git libgomp1 ccache
git config --global --add safe.directory /workspace/llama.cpp
make -j LLAMA_HIPBLAS=1 LLAMA_NO_OPENMP=1 ${{ matrix.arch == 'amd64' && 'CFLAGS="-mfma -mf16c -mavx -mavx2" CXXFLAGS="-mfma -mf16c -mavx -mavx2"' || '' }}
EOF
chmod +x /tmp/entrypoint.sh
cat /tmp/entrypoint.sh
echo "===== BUILD ====="
mkdir -p ${{ github.workspace }}/.cache
docker run \
--rm \
--privileged \
--platform linux/${{ matrix.arch }} \
--volume ${{ github.workspace }}:/workspace \
--volume /tmp/entrypoint.sh:/entrypoint.sh \
--entrypoint /entrypoint.sh \
--workdir /workspace \
--env CCACHE_DIR=/workspace/.cache/ccache \
--env CC=/opt/rocm/llvm/bin/clang \
--env CXX=/opt/rocm/llvm/bin/clang++ \
--env GPU_TARGETS="${{ env.GPU_TARGETS }}" \
--env AMDGPU_TARGETS="${{ env.GPU_TARGETS }}" \
rocm/dev-ubuntu-22.04:${{ matrix.version }}-complete
echo "===== RESULT ====="
if [ -f ${{ github.workspace }}/bin/llama-box ]; then
ldd ${{ github.workspace }}/bin/llama-box
else
exit 1
fi
echo "===== PACKAGE ====="
mkdir -p ${{ github.workspace }}/out
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/bin/*
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/out/*.zip
name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}
linux-cuda:
if: ${{ false }}
strategy:
fail-fast: false
matrix:
arch: [ amd64 ]
version: [ '12.2.0', '11.7.1' ]
runs-on: ubuntu-22.04
steps:
- name: Maximize Space
# see https://github.com/easimon/maximize-build-space/blob/master/action.yml.
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo docker image prune --all --force
- name: Clone
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v3
with:
key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }}
path: |
${{ github.workspace }}/.cache
- name: Setup QEMU
if: ${{ matrix.arch == 'arm64' }}
uses: docker/setup-qemu-action@v3
with:
image: tonistiigi/binfmt:qemu-v7.0.0
platforms: "arm64"
- name: Build
# disable OpenMP to support static linking,
# see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
# https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
run: |
echo "===== SCRIPT ====="
cat <<EOF > /tmp/entrypoint.sh
#!/bin/bash
apt-get update && apt-get install -y build-essential git libgomp1 ccache
git config --global --add safe.directory /workspace/llama.cpp
make -j LLAMA_CUDA=1 LLAMA_NO_OPENMP=1 LLAMA_CUDA_FORCE_MMQ=1 ${{ matrix.arch == 'amd64' && 'CFLAGS="-mfma -mf16c -mavx -mavx2" CXXFLAGS="-mfma -mf16c -mavx -mavx2"' || '' }}
EOF
chmod +x /tmp/entrypoint.sh
cat /tmp/entrypoint.sh
echo "===== BUILD ====="
mkdir -p ./.cache
docker run \
--rm \
--privileged \
--platform linux/${{ matrix.arch }} \
--volume ${{ github.workspace }}:/workspace \
--workdir /workspace \
--volume /tmp/entrypoint.sh:/entrypoint.sh \
--entrypoint /entrypoint.sh \
--env CCACHE_DIR=/workspace/.cache/ccache \
--env CUDA_DOCKER_ARCH=all \
nvidia/cuda:${{ matrix.version }}-devel-ubuntu22.04
echo "===== RESULT ====="
if [ -f ${{ github.workspace }}/bin/llama-box ]; then
ldd ${{ github.workspace }}/bin/llama-box
else
exit 1
fi
echo "===== PACKAGE ====="
mkdir -p ${{ github.workspace }}/out
zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/bin/*
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
path: ${{ github.workspace }}/out/*.zip
name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}
windows-hip:
strategy:
fail-fast: false
matrix:
arch: [ amd64 ]
# see https://www.amd.com/en/developer/resources/rocm-hub/hip-sdk.html.
version: [ '6.0.2' ]
runs-on: windows-2022
steps:
- name: Clone
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'recursive'
- name: Setup Cache
timeout-minutes: 5
uses: actions/cache@v3
with:
key: cache-windows-hip-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }}
path: |
${{ github.workspace }}/.cache
- name: Deps
run: |
$ErrorActionPreference = "Stop"
$WarningPreference = 'SilentlyContinue'
$VerbosePreference = 'SilentlyContinue'
$DebugPreference = 'SilentlyContinue'
$ProgressPreference = 'SilentlyContinue'
Write-Host "I install make"
choco install make
Write-Host "I install ccache"
choco install make ccache
Write-Host "I install AMD HIP SDK"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ startsWith(matrix.version, '6') && '23.Q4' || '23.Q3' }}-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
Write-Host "I configure the PATH environment variable"
$path = $(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Split-Path | Split-Path)
$newMachinePath = $path + ";" + [System.Environment]::GetEnvironmentVariable("Path","Machine")
[Environment]::SetEnvironmentVariable("Path", $newMachinePath, [System.EnvironmentVariableTarget]::Machine)
$env:Path = $path + ";" + $env:Path
Write-Host "I verify Clang"
clang.exe --version
- name: Build
env:
GPU_TARGETS: "gfx803 gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1010 gfx1030 gfx1100 gfx1101 gfx1102"
run: |
Write-Host "===== BUILD ====="
New-Item -Force -ItemType Directory -Path "${{ github.workspace }}/.cache" -ErrorAction Ignore | Out-Null
$env:CCACHE_DIR = "${{ github.workspace }}/.cache/ccache"
make -j LLAMA_HIPBLAS=1 LLAMA_NO_OPENMP=1 ${{ matrix.arch == 'amd64' && 'CFLAGS="-mfma -mf16c -mavx -mavx2"' || '' }}
Write-Host "===== RESULT ====="
if (Test-Path -Path "${{ github.workspace }}/bin/llama-box.exe") {
llvm-objdump.exe -p "${{ github.workspace }}/bin/llama-box.exe"
} else {
exit 1
}
release:
if: ${{ startsWith(github.ref, 'refs/tags/') }}
permissions:
contents: write
actions: read
id-token: write
runs-on: ubuntu-22.04
needs:
- darwin-metal
- linux-hip
- linux-cuda
steps:
- name: Download Artifact
uses: actions/download-artifact@v4
with:
path: ${{ github.workspace }}/out
merge-multiple: true
- name: Release
uses: softprops/action-gh-release@v1
with:
fail_on_unmatched_files: true
tag_name: "${{ env.VERSION }}"
prerelease: ${{ contains(github.ref, 'rc') }}
files: ${{ github.workspace }}/out/*