Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements #23

Merged
merged 3 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions .github/workflows/docker-build-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# This workflow is provided via the organization template repository
#
# https://github.com/nextcloud/.github
# https://docs.github.com/en/actions/learn-github-actions/sharing-workflows-with-your-organization

name: Docker build and publish

on:
push:
tags:
- '**'

env:
APP_ID: translate2

jobs:
build_and_publish:
runs-on: ubuntu-latest

# Only allowed to be run on nextcloud repositories
if: ${{ github.repository_owner == 'nextcloud' }}

steps:
- name: Checkout app
uses: actions/checkout@v4

- name: Get app version
id: appinfo
uses: skjnldsv/xpath-action@7e6a7c379d0e9abc8acaef43df403ab4fc4f770c # master
with:
filename: appinfo/info.xml
expression: "/info/version/text()"

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: |
ghcr.io/nextcloud/${{ env.APP_ID }}:latest
ghcr.io/nextcloud/${{ env.APP_ID }}:${{ fromJson(steps.appinfo.outputs.result).version }}
cache-from: type=gha
cache-to: type=gha,mode=max
1 change: 1 addition & 0 deletions .github/workflows/integration_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ jobs:
APP_ID: llm2
APP_PORT: 9080
APP_SECRET: 12345
COMPUTE_DEVICE: cpu
NEXTCLOUD_URL: http://localhost:8080

services:
Expand Down
26 changes: 16 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,29 @@ FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive

RUN apt update
RUN apt install -y pipx build-essential
RUN apt install -y pipx build-essential git vim
RUN pipx install poetry

ENV DEBIAN_FRONTEND=dialog
ENV PATH="/root/.local/bin:${PATH}"
ENV CMAKE_ARGS="-DGGML_CUDA=on"

WORKDIR /app

# Install requirements
COPY pyproject.toml .
COPY poetry.lock .
COPY healthcheck.sh .

RUN poetry install
RUN ln -s /usr/local/cuda/compat/libcuda.so.1 /usr/lib/x86_64-linux-gnu/

ADD li[b] /app/lib
ADD model[s] /app/models
ADD default_confi[g] /app/default_config

# Install requirements
COPY pyproject.toml /app
COPY poetry.lock /app
COPY init.sh /app
COPY healthcheck.sh /app

WORKDIR /app
ENTRYPOINT ["bash", "init.sh"]
WORKDIR /app/lib
ENTRYPOINT ["poetry", "run", "python3", "main.py"]

LABEL org.opencontainers.image.source=https://github.com/nextcloud/llm2
HEALTHCHECK --interval=2s --timeout=2s --retries=300 CMD /app/healthcheck.sh
HEALTHCHECK --interval=2s --timeout=2s --retries=300 CMD /app/healthcheck.sh
53 changes: 28 additions & 25 deletions default_config/config.json
Original file line number Diff line number Diff line change
@@ -1,58 +1,61 @@
{
"llama-2-7b-chat.Q4_K_M": {
"prompt": "<|im_start|> system\n{system_prompt}\n<|im_end|>\n<|im_start|> user\n{user_prompt}\n<|im_end|>\n<|im_start|> assistant\n",
"gpt4all_config": {
"max_tokens": 4096,
"n_predict": 2048,
"loader_config": {
"n_ctx": 4096,
"max_tokens": 2048,
"stop": ["<|im_end|>"]
}
},
"gpt4all-falcon-q4_0": {
"prompt": "### Instruction: {system_prompt}\n{user_prompt}\n### Response:",
"gpt4all_config": {
"max_tokens": 4096,
"n_predict": 2048,
"loader_config": {
"n_ctx": 4096,
"max_tokens": 2048,
"stop": ["### Instruction:"]
}
},
"leo-hessianai-13b-chat-bilingual.Q4_K_M": {
"prompt": "<|im_start|> system\n{system_prompt}\n<|im_end|>\n<|im_start|> user\n{user_prompt}\n<|im_end|>\n<|im_start|> assistant\n",
"gpt4all_config": {
"max_tokens": 4096,
"n_predict": 2048,
"loader_config": {
"n_ctx": 4096,
"max_tokens": 2048,
"stop": ["<|im_end|>"]
}
},
"neuralbeagle14-7b.Q4_K_M": {
"prompt": "<|im_start|> system\n{system_prompt}\n<|im_end|>\n<|im_start|> user\n{user_prompt}\n<|im_end|>\n<|im_start|> assistant\n",
"gpt4all_config": {
"max_tokens": 8000,
"n_predict": 4000,
"loader_config": {
"n_ctx": 8000,
"max_tokens": 4000,
"stop": ["<|im_end|>"]
}
},
"Meta-Llama-3-8B-Instruct.Q4_K_M": {
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n{user_prompt}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n",
"gpt4all_config": {
"max_tokens": 8000,
"n_predict": 4000,
"stop": ["<|eot_id|>"]
"loader_config": {
"n_ctx": 8000,
"max_tokens": 4000,
"stop": ["<|eot_id|>"],
"temperature": 0.3
}
},
"Meta-Llama-3.1-8B-Instruct.Q4_K_M": {
"prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n{user_prompt}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n",
"gpt4all_config": {
"max_tokens": 128000,
"n_predict": 60000,
"stop": ["<|eot_id|>"]
"loader_config": {
"n_ctx": 128000,
"max_tokens": 60000,
"stop": ["<|eot_id|>"],
"temperature": 0.3
}
},
"default": {
"prompt": "<|im_start|> system\n{system_prompt}\n<|im_end|>\n<|im_start|> user\n{user_prompt}\n<|im_end|>\n<|im_start|> assistant\n",
"gpt4all_config": {
"max_tokens": 4096,
"n_predict": 2048,
"stop": ["<|im_end|>"]
"loader_config": {
"n_ctx": 4096,
"max_tokens": 2048,
"stop": ["<|im_end|>"],
"temperature": 0.6
}
}
}
}
7 changes: 0 additions & 7 deletions init.sh

This file was deleted.

46 changes: 10 additions & 36 deletions lib/chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,15 @@
"""

import os
import json

from free_prompt import FreePromptChain
from headline import HeadlineChain
from topics import TopicsChain
from summarize import SummarizeChain
from contextwrite import ContextWriteChain
from reformulate import ReformulateChain
from simplify import SimplifyChain
from formalize import FormalizeChain
from langchain_community.llms import LlamaCpp
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import json
from nc_py_api.ex_app import persistent_storage

dir_path = os.path.dirname(os.path.realpath(__file__))
Expand Down Expand Up @@ -42,18 +38,6 @@ def get_model_config(file_name):
return model_config


config = {
"llama": {
"n_batch": 10,
"n_ctx": 4096,
"n_gpu_layers": -1,
"model_kwargs": {
"device": "cuda"
}
}
}


def generate_llm_chain(file_name):
model_config = get_model_config(file_name)

Expand All @@ -62,35 +46,24 @@ def generate_llm_chain(file_name):
if not os.path.exists(path):
path = os.path.join(persistent_storage(), file_name)

compute_device = os.getenv("COMPUTE_DEVICE", "cuda")
try:
llm = LlamaCpp(
model_path=path,
model_kwargs={'device': config["llama"]["model_kwargs"]["device"]},
n_gpu_layers=config["llama"]["n_gpu_layers"],
n_ctx=model_config['gpt4all_config']["n_predict"],
max_tokens=model_config["gpt4all_config"]["max_tokens"],
stop=model_config["gpt4all_config"]["stop"],
echo=True
**{
"n_gpu_layers": (0, -1)[compute_device != "cpu"],
**model_config["loader_config"],
},
)
print(f'Using: {config["llama"]["model_kwargs"]["device"]}', flush=True)
except Exception as gpu_error:
try:
llm = LlamaCpp(model_path=path, device="cpu",
n_ctx=model_config['gpt4all_config']["n_predict"],
max_tokens=model_config["gpt4all_config"]["max_tokens"],
stop=model_config["gpt4all_config"]["stop"],
echo=True)
print("Using: CPU", flush=True)
except Exception as cpu_error:
raise RuntimeError(f"Error: Failed to initialize the LLM model on both GPU and CPU.", f"{cpu_error}") from cpu_error
except Exception as e:
print(f"Failed to load model '{path}' with compute device '{compute_device}'")
raise e

prompt = PromptTemplate.from_template(model_config['prompt'])

return LLMChain(llm=llm, prompt=prompt)




def generate_chains():
chains = {}
for file in os.scandir(models_folder_path):
Expand All @@ -104,6 +77,7 @@ def generate_chains():

return chains


def generate_chain_for_model(file_name, chains):
model_name = file_name.split('.gguf')[0]

Expand Down
Loading
Loading