feat: first commit

Signed-off-by: thxCode <thxcode0824@gmail.com>
gpustack · Jun 25, 2024 · fc5beec · fc5beec
commit fc5beec
Show file tree

Hide file tree

Showing 14 changed files with 6,070 additions and 0 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+* text=auto eol=lf
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,263 @@
+name: ci
+
+permissions:
+  contents: read
+  pull-requests: read
+  actions: read
+
+env:
+  VERSION: "${{ github.ref_name }}"
+
+on:
+  workflow_dispatch: { }
+  push:
+    tags:
+      - "v*.*.*"
+    branches:
+      - main
+    paths-ignore:
+      - "docs/**"
+      - "**.md"
+      - "**.mdx"
+      - "**.png"
+      - "**.jpg"
+  pull_request:
+    branches:
+      - main
+    paths-ignore:
+      - "docs/**"
+      - "**.md"
+      - "**.mdx"
+      - "**.png"
+      - "**.jpg"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  darwin-metal:
+    strategy:
+      fail-fast: false
+      matrix:
+        arch: [ amd64, arm64 ]
+    # see https://github.com/actions/runner-images?tab=readme-ov-file#available-images,
+    #     https://support.apple.com/en-us/102894.
+    runs-on: ${{ matrix.arch == 'amd64' && 'macos-13' || 'macos-14' }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: 'recursive'
+      - name: Setup XCode
+        uses: maxim-lobanov/setup-xcode@v1
+        with:
+          xcode-version: '15.2'
+      - name: Setup Cache
+        timeout-minutes: 5
+        uses: actions/cache@v3
+        with:
+          key: cache-darwin-metal-${{ matrix.arch }}-${{ hashFiles('**/.gitmodules') }}
+          path: |
+            ${{ github.workspace }}/.cache
+      - name: Deps
+        continue-on-error: true
+        run: |
+          brew update && brew install ccache
+      - name: Build
+        run: |
+          echo "===== BUILD ====="
+          mkdir -p ${{ github.workspace }}/.cache
+          CCACHE_DIR=${{ github.workspace }}/.cache/ccache make -j LLAMA_METAL=1
+          
+          echo "===== RESULT ====="
+          [ -f ${{ github.workspace }}/bin/llama-box ] && file ${{ github.workspace }}/bin/llama-box && otool -L ${{ github.workspace }}/bin/llama-box
+          
+          echo "===== PACKAGE ====="
+          mkdir -p ${{ github.workspace }}/out
+          zip -j ${{ github.workspace }}/out/llama-box-darwin-${{ matrix.arch }}-metal.zip ${{ github.workspace }}/bin/*
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v4
+        with:
+          path: ${{ github.workspace }}/out/*.zip
+          name: llama-box-darwin-${{ matrix.arch }}-metal
+
+  linux-hip:
+    strategy:
+      fail-fast: false
+      matrix:
+        arch: [ amd64 ]
+        version: [ '6.0.2' ]
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Maximize Space
+        # see https://github.com/easimon/maximize-build-space/blob/master/action.yml.
+        run: |
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo docker image prune --all --force
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: 'recursive'
+      - name: Setup Cache
+        timeout-minutes: 5
+        uses: actions/cache@v3
+        with:
+          key: cache-linux-hip-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }}
+          path: |
+            ${{ github.workspace }}/.cache
+      - name: Setup QEMU
+        if: ${{ matrix.arch == 'arm64' }}
+        uses: docker/setup-qemu-action@v3
+        with:
+          image: tonistiigi/binfmt:qemu-v7.0.0
+          platforms: "arm64"
+      - name: Build
+        # disable OpenMP to support static linking,
+        # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
+        #     https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
+        # build fat binary,
+        # see https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878,
+        #     https://llvm.org/docs/AMDGPUUsage.html.
+        env:
+          GPU_TARGETS: "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102"
+        run: |
+          echo "===== SCRIPT ====="
+          cat <<EOF > /tmp/entrypoint.sh
+          #!/bin/bash
+          apt-get update && apt-get install -y build-essential git libgomp1 ccache
+          git config --global --add safe.directory /workspace/llama.cpp
+          make -j LLAMA_HIPBLAS=1 LLAMA_NO_OPENMP=1 ${{ matrix.arch == 'amd64' && 'CFLAGS="-mfma -mf16c -mavx" CXXFLAGS="-mfma -mf16c -mavx"' || '' }}
+          EOF
+          chmod +x /tmp/entrypoint.sh
+          cat /tmp/entrypoint.sh
+          
+          echo "===== BUILD ====="
+          mkdir -p ${{ github.workspace }}/.cache
+          docker run \
+            --rm \
+            --privileged \
+            --platform linux/${{ matrix.arch }} \
+            --volume ${{ github.workspace }}:/workspace \
+            --volume /tmp/entrypoint.sh:/entrypoint.sh \
+            --entrypoint /entrypoint.sh \
+            --workdir /workspace \
+            --env CCACHE_DIR=/workspace/.cache/ccache \
+            --env CC=/opt/rocm/llvm/bin/clang \
+            --env CXX=/opt/rocm/llvm/bin/clang++ \
+            --env GPU_TARGETS="${{ env.GPU_TARGETS }}" \
+            --env AMDGPU_TARGETS="${{ env.GPU_TARGETS }}" \
+            rocm/dev-ubuntu-22.04:${{ matrix.version }}-complete
+          
+          echo "===== RESULT ====="
+          [ -f ${{ github.workspace }}/bin/llama-box ] && file ${{ github.workspace }}/bin/llama-box && ldd ${{ github.workspace }}/bin/llama-box
+          
+          echo "===== PACKAGE ====="
+          mkdir -p ${{ github.workspace }}/out
+          zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}.zip ${{ github.workspace }}/bin/*
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v4
+        with:
+          path: ${{ github.workspace }}/out/*.zip
+          name: llama-box-linux-${{ matrix.arch }}-hip-${{ matrix.version }}
+
+  linux-cuda:
+    strategy:
+      fail-fast: false
+      matrix:
+        arch: [ amd64 ]
+        version: [ '12.2.0', '11.7.1' ]
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Maximize Space
+        # see https://github.com/easimon/maximize-build-space/blob/master/action.yml.
+        run: |
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo docker image prune --all --force
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: 'recursive'
+      - name: Setup Cache
+        timeout-minutes: 5
+        uses: actions/cache@v3
+        with:
+          key: cache-linux-cuda-${{ matrix.arch }}-${{ matrix.version }}-${{ hashFiles('**/.gitmodules') }}
+          path: |
+            ${{ github.workspace }}/.cache
+      - name: Setup QEMU
+        if: ${{ matrix.arch == 'arm64' }}
+        uses: docker/setup-qemu-action@v3
+        with:
+          image: tonistiigi/binfmt:qemu-v7.0.0
+          platforms: "arm64"
+      - name: Build
+        # disable OpenMP to support static linking,
+        # see https://github.com/ggerganov/llama.cpp/issues/7743#issuecomment-2148342691,
+        #     https://github.com/ggerganov/llama.cpp/issues/7719#issuecomment-2147631216.
+        run: |
+          echo "===== SCRIPT ====="
+          cat <<EOF > /tmp/entrypoint.sh
+          #!/bin/bash
+          apt-get update && apt-get install -y build-essential git libgomp1 ccache
+          git config --global --add safe.directory /workspace/llama.cpp
+          make -j LLAMA_CUDA=1 LLAMA_NO_OPENMP=1 ${{ matrix.arch == 'amd64' && 'CFLAGS="-mfma -mf16c -mavx" CXXFLAGS="-mfma -mf16c -mavx"' || '' }}
+          EOF
+          chmod +x /tmp/entrypoint.sh
+          cat /tmp/entrypoint.sh
+          
+          echo "===== BUILD ====="
+          mkdir -p ./.cache
+          docker run \
+            --rm \
+            --privileged \
+            --platform linux/${{ matrix.arch }} \
+            --volume ${{ github.workspace }}:/workspace \
+            --workdir /workspace \
+            --volume /tmp/entrypoint.sh:/entrypoint.sh \
+            --entrypoint /entrypoint.sh \
+            --env CCACHE_DIR=/workspace/.cache/ccache \
+            --env CUDA_DOCKER_ARCH=all \
+            nvidia/cuda:${{ matrix.version }}-devel-ubuntu22.04
+          
+          echo "===== RESULT ====="
+          [ -f ${{ github.workspace }}/bin/llama-box ] && file ${{ github.workspace }}/bin/llama-box && ldd ${{ github.workspace }}/bin/llama-box
+
+          echo "===== PACKAGE ====="
+          mkdir -p ${{ github.workspace }}/out
+          zip -j ${{ github.workspace }}/out/llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}.zip ${{ github.workspace }}/bin/*
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v4
+        with:
+          path: ${{ github.workspace }}/out/*.zip
+          name: llama-box-linux-${{ matrix.arch }}-cuda-${{ matrix.version }}
+
+  release:
+    if: ${{ startsWith(github.ref, 'refs/tags/') }}
+    runs-on: ubuntu-22.04
+    needs:
+      - darwin-metal
+      - linux-hip
+      - linux-cuda
+    steps:
+      - name: Download Artifact
+        uses: actions/download-artifact@v4
+        with:
+          path: ${{ github.workspace }}/out
+          merge-multiple: true
+      - name: Release
+        uses: softprops/action-gh-release@v1
+        with:
+          fail_on_unmatched_files: true
+          tag_name: "${{ env.VERSION }}"
+          prerelease: ${{ contains(github.ref, 'rc') }}
+          files: ${{ github.workspace }}/out/*
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,35 @@
+# Files
+.DS_Store
+*.o
+*.a
+*.so
+*.gguf
+*.bin
+*.exe
+*.exe~
+*.dll
+*.dylib
+*.log
+*.dot
+*.bat
+*.tmp
+*.metallib
+*.out
+*.swp
+*.swo
+.clang-tidy
+version.cpp
+
+# Directories
+.idea/
+.vscode/
+.vs/
+.build/
+.cache/
+.ccls-cache/
+.direnv/
+.sbin/
+.dist/
+bin/
+out/
+tmp/
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "llama.cpp"]
+	path = llama.cpp
+	url = https://github.com/ggerganov/llama.cpp
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 The llama-box authors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.