Skip to content

Commit

Permalink
Merge pull request #23 from nextcloud/improvements
Browse files Browse the repository at this point in the history
Improvements
  • Loading branch information
kyteinsky authored Aug 22, 2024
2 parents b6059b1 + dfb01c4 commit fd39799
Show file tree
Hide file tree
Showing 9 changed files with 448 additions and 366 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/docker-build-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# This workflow is provided via the organization template repository
#
# https://github.com/nextcloud/.github
# https://docs.github.com/en/actions/learn-github-actions/sharing-workflows-with-your-organization

name: Docker build and publish

on:
push:
tags:
- '**'

env:
APP_ID: translate2

jobs:
build_and_publish:
runs-on: ubuntu-latest

# Only allowed to be run on nextcloud repositories
if: ${{ github.repository_owner == 'nextcloud' }}

steps:
- name: Checkout app
uses: actions/checkout@v4

- name: Get app version
id: appinfo
uses: skjnldsv/xpath-action@7e6a7c379d0e9abc8acaef43df403ab4fc4f770c # master
with:
filename: appinfo/info.xml
expression: "/info/version/text()"

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: |
ghcr.io/nextcloud/${{ env.APP_ID }}:latest
ghcr.io/nextcloud/${{ env.APP_ID }}:${{ fromJson(steps.appinfo.outputs.result).version }}
cache-from: type=gha
cache-to: type=gha,mode=max
1 change: 1 addition & 0 deletions .github/workflows/integration_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ jobs:
APP_ID: llm2
APP_PORT: 9080
APP_SECRET: 12345
COMPUTE_DEVICE: cpu
NEXTCLOUD_URL: http://localhost:8080

services:
Expand Down
26 changes: 16 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,29 @@ FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive

RUN apt update
RUN apt install -y pipx build-essential
RUN apt install -y pipx build-essential git vim
RUN pipx install poetry

ENV DEBIAN_FRONTEND=dialog
ENV PATH="/root/.local/bin:${PATH}"
ENV CMAKE_ARGS="-DGGML_CUDA=on"

WORKDIR /app

# Install requirements
COPY pyproject.toml .
COPY poetry.lock .
COPY healthcheck.sh .

RUN poetry install
RUN ln -s /usr/local/cuda/compat/libcuda.so.1 /usr/lib/x86_64-linux-gnu/

ADD li[b] /app/lib
ADD model[s] /app/models
ADD default_confi[g] /app/default_config

# Install requirements
COPY pyproject.toml /app
COPY poetry.lock /app
COPY init.sh /app
COPY healthcheck.sh /app

WORKDIR /app
ENTRYPOINT ["bash", "init.sh"]
WORKDIR /app/lib
ENTRYPOINT ["poetry", "run", "python3", "main.py"]

LABEL org.opencontainers.image.source=https://github.com/nextcloud/llm2
HEALTHCHECK --interval=2s --timeout=2s --retries=300 CMD /app/healthcheck.sh
HEALTHCHECK --interval=2s --timeout=2s --retries=300 CMD /app/healthcheck.sh
53 changes: 28 additions & 25 deletions default_config/config.json
Original file line number Diff line number Diff line change
@@ -1,58 +1,61 @@
{
"llama-2-7b-chat.Q4_K_M": {
"prompt": "<|im_start|> system\n{system_prompt}\n<|im_end|>\n<|im_start|> user\n{user_prompt}\n<|im_end|>\n<|im_start|> assistant\n",
"gpt4all_config": {
"max_tokens": 4096,
"n_predict": 2048,
"loader_config": {
"n_ctx": 4096,
"max_tokens": 2048,
"stop": ["<|im_end|>"]
}
},
"gpt4all-falcon-q4_0": {
"prompt": "### Instruction: {system_prompt}\n{user_prompt}\n### Response:",
"gpt4all_config": {
"max_tokens": 4096,
"n_predict": 2048,
"loader_config": {
"n_ctx": 4096,
"max_tokens": 2048,
"stop": ["### Instruction:"]
}
},
"leo-hessianai-13b-chat-bilingual.Q4_K_M": {
"prompt": "<|im_start|> system\n{system_prompt}\n<|im_end|>\n<|im_start|> user\n{user_prompt}\n<|im_end|>\n<|im_start|> assistant\n",
"gpt4all_config": {
"max_tokens": 4096,
"n_predict": 2048,
"loader_config": {
"n_ctx": 4096,
"max_tokens": 2048,
"stop": ["<|im_end|>"]
}
},
"neuralbeagle14-7b.Q4_K_M": {
"prompt": "<|im_start|> system\n{system_prompt}\n<|im_end|>\n<|im_start|> user\n{user_prompt}\n<|im_end|>\n<|im_start|> assistant\n",
"gpt4all_config": {
"max_tokens": 8000,
"n_predict": 4000,
"loader_config": {
"n_ctx": 8000,
"max_tokens": 4000,
"stop": ["<|im_end|>"]
}
},
"Meta-Llama-3-8B-Instruct.Q4_K_M": {
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n{user_prompt}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n",
"gpt4all_config": {
"max_tokens": 8000,
"n_predict": 4000,
"stop": ["<|eot_id|>"]
"loader_config": {
"n_ctx": 8000,
"max_tokens": 4000,
"stop": ["<|eot_id|>"],
"temperature": 0.3
}
},
"Meta-Llama-3.1-8B-Instruct.Q4_K_M": {
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n{user_prompt}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n",
"gpt4all_config": {
"max_tokens": 128000,
"n_predict": 60000,
"stop": ["<|eot_id|>"]
"loader_config": {
"n_ctx": 128000,
"max_tokens": 60000,
"stop": ["<|eot_id|>"],
"temperature": 0.3
}
},
"default": {
"prompt": "<|im_start|> system\n{system_prompt}\n<|im_end|>\n<|im_start|> user\n{user_prompt}\n<|im_end|>\n<|im_start|> assistant\n",
"gpt4all_config": {
"max_tokens": 4096,
"n_predict": 2048,
"stop": ["<|im_end|>"]
"loader_config": {
"n_ctx": 4096,
"max_tokens": 2048,
"stop": ["<|im_end|>"],
"temperature": 0.6
}
}
}
}
7 changes: 0 additions & 7 deletions init.sh

This file was deleted.

46 changes: 10 additions & 36 deletions lib/chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,15 @@
"""

import os
import json

from free_prompt import FreePromptChain
from headline import HeadlineChain
from topics import TopicsChain
from summarize import SummarizeChain
from contextwrite import ContextWriteChain
from reformulate import ReformulateChain
from simplify import SimplifyChain
from formalize import FormalizeChain
from langchain_community.llms import LlamaCpp
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import json
from nc_py_api.ex_app import persistent_storage

dir_path = os.path.dirname(os.path.realpath(__file__))
Expand Down Expand Up @@ -42,18 +38,6 @@ def get_model_config(file_name):
return model_config


config = {
"llama": {
"n_batch": 10,
"n_ctx": 4096,
"n_gpu_layers": -1,
"model_kwargs": {
"device": "cuda"
}
}
}


def generate_llm_chain(file_name):
model_config = get_model_config(file_name)

Expand All @@ -62,35 +46,24 @@ def generate_llm_chain(file_name):
if not os.path.exists(path):
path = os.path.join(persistent_storage(), file_name)

compute_device = os.getenv("COMPUTE_DEVICE", "cuda")
try:
llm = LlamaCpp(
model_path=path,
model_kwargs={'device': config["llama"]["model_kwargs"]["device"]},
n_gpu_layers=config["llama"]["n_gpu_layers"],
n_ctx=model_config['gpt4all_config']["n_predict"],
max_tokens=model_config["gpt4all_config"]["max_tokens"],
stop=model_config["gpt4all_config"]["stop"],
echo=True
**{
"n_gpu_layers": (0, -1)[compute_device != "cpu"],
**model_config["loader_config"],
},
)
print(f'Using: {config["llama"]["model_kwargs"]["device"]}', flush=True)
except Exception as gpu_error:
try:
llm = LlamaCpp(model_path=path, device="cpu",
n_ctx=model_config['gpt4all_config']["n_predict"],
max_tokens=model_config["gpt4all_config"]["max_tokens"],
stop=model_config["gpt4all_config"]["stop"],
echo=True)
print("Using: CPU", flush=True)
except Exception as cpu_error:
raise RuntimeError(f"Error: Failed to initialize the LLM model on both GPU and CPU.", f"{cpu_error}") from cpu_error
except Exception as e:
print(f"Failed to load model '{path}' with compute device '{compute_device}'")
raise e

prompt = PromptTemplate.from_template(model_config['prompt'])

return LLMChain(llm=llm, prompt=prompt)




def generate_chains():
chains = {}
for file in os.scandir(models_folder_path):
Expand All @@ -104,6 +77,7 @@ def generate_chains():

return chains


def generate_chain_for_model(file_name, chains):
model_name = file_name.split('.gguf')[0]

Expand Down
Loading

0 comments on commit fd39799

Please sign in to comment.