Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TTS for long responses #101

Merged
merged 3 commits into from
Oct 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ jobs:
- name: Lint and fix
run: npm run lint:fix
env:
LITELLM_BASE_URL: ${{secrets.LITELLM_BASE_URL}}
LITELLM_API_KEY: ${{secrets.LITELLM_API_KEY}}
ANTHROPIC_API_KEY: ${{secrets.ANTHROPIC_API_KEY}}
TURSO_DB_URL: ${{secrets.TURSO_DB_URL}}
TURSO_DB_AUTH_TOKEN: ${{secrets.TURSO_DB_AUTH_TOKEN}}
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@
"eslint": "8.37.0",
"eslint-config-next": "13.2.4",
"eventsource-parser": "1.1.1",
"ffmpeg-static": "^5.2.0",
"fluent-ffmpeg": "^2.1.3",
"framer-motion": "10.16.2",
"get-blob-duration": "1.2.0",
"html-to-image": "1.11.11",
Expand Down
107 changes: 86 additions & 21 deletions src/app/api/tts/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { eq } from "drizzle-orm";
import { NextRequest, NextResponse } from "next/server";
import OpenAI from "openai";
import * as z from "zod";

export const maxDuration = 180;

const bodyobj = z.object({
Expand All @@ -19,6 +20,67 @@ const bodyobj = z.object({
messages: z.any().optional(),
});

const MAX_CHUNK_LENGTH = 4000; // Slightly less than 4095 to be safe

function chunkText(text: string): string[] {
const paragraphs = text.split("\n\n");
const chunks: string[] = [];
let currentChunk = "";

for (const paragraph of paragraphs) {
if (currentChunk.length + paragraph.length > MAX_CHUNK_LENGTH) {
if (currentChunk) {
chunks.push(currentChunk.trim());
currentChunk = "";
}
if (paragraph.length > MAX_CHUNK_LENGTH) {
// If a single paragraph is too long, split it into sentences
const sentences = paragraph.match(/[^.!?]+[.!?]+/g) || [paragraph];
for (const sentence of sentences) {
if (currentChunk.length + sentence.length > MAX_CHUNK_LENGTH) {
chunks.push(currentChunk.trim());
currentChunk = sentence;
} else {
currentChunk += " " + sentence;
}
}
} else {
currentChunk = paragraph;
}
} else {
currentChunk += (currentChunk ? "\n\n" : "") + paragraph;
}
}

if (currentChunk) {
chunks.push(currentChunk.trim());
}

return chunks;
}

async function generateAudioForChunk(
openai: OpenAI,
chunk: string,
): Promise<Buffer> {
const mp3 = await openai.audio.speech.create({
model: "tts-1",
voice: "alloy",
input: chunk,
response_format: "mp3",
});

return Buffer.from(await mp3.arrayBuffer());
}

async function concatenateAudioBuffers(
audioBuffers: Buffer[],
): Promise<Buffer> {
// Simple concatenation of MP3 buffers
// Note: This may not work perfectly for all MP3 files and may require a more sophisticated approach
return Buffer.concat(audioBuffers as unknown as Uint8Array[]);
}

export async function POST(request: NextRequest) {
const b = await request.json();
const searchParams = await request.nextUrl.searchParams;
Expand All @@ -32,21 +94,20 @@ export async function POST(request: NextRequest) {
const chatId = body.chatId;
const messages: ChatEntry[] = body.messages;

const Openai = new OpenAI({
const openai = new OpenAI({
apiKey: env.OPEN_AI_API_KEY,
});

if (text && messageId && body.index) {
console.log("got into if");
// handling audio for a single message
const mp3 = await Openai.audio.speech.create({
model: "tts-1",
voice: "alloy",
input: text,
response_format: "aac",
});
const chunks = chunkText(text);
const audioBuffers = await Promise.all(
chunks.map((chunk) => generateAudioForChunk(openai, chunk)),
);

const finalBuffer = await concatenateAudioBuffers(audioBuffers);

const buffer = Buffer.from(await mp3.arrayBuffer());
// fetching the chat
let chatlog: ChatLog = { log: [] };
let fetchedChat: ChatSchema[] = [];
Expand Down Expand Up @@ -81,8 +142,11 @@ export async function POST(request: NextRequest) {

messageId = messageId ? messageId : chatlog.log[body.index].id;

// adding the audio to the message
const audioUrl = await saveAudioMessage({ buffer, chatId, messageId });
const audioUrl = await saveAudioMessage({
buffer: finalBuffer,
chatId,
messageId,
});
message.audio = audioUrl;

await db
Expand All @@ -98,20 +162,22 @@ export async function POST(request: NextRequest) {
);
} else {
// summarize and generate audio for all messages

const summary: string = await summarizeChat(messages);
const mp3 = await Openai.audio.speech.create({
model: "tts-1",
voice: "alloy",
input: summary,
response_format: "aac",
});
const chunks = chunkText(summary);
const audioBuffers = await Promise.all(
chunks.map((chunk) => generateAudioForChunk(openai, chunk)),
);

const finalBuffer = await concatenateAudioBuffers(audioBuffers);

const buffer = Buffer.from(await mp3.arrayBuffer());
const messageId = "summary"; // as it is the summary of the whole chat
const audioUrl = await saveAudioMessage({ buffer, chatId, messageId });
const audioUrl = await saveAudioMessage({
buffer: finalBuffer,
chatId,
messageId,
});

// update the db to save audio url for correspointing chat
// update the db to save audio url for corresponding chat
await db
.update(chats)
.set({
Expand All @@ -120,7 +186,6 @@ export async function POST(request: NextRequest) {
})
.where(eq(chats.id, Number(chatId)))
.run();
// fetching the chat

return new NextResponse(JSON.stringify({ audioUrl: audioUrl }));
}
Expand Down
2 changes: 1 addition & 1 deletion src/app/dashboard/layout.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ export default function LoggedInLayout({
);
}

const SearchButton = (props: React.ComponentProps<typeof Button>) => {
export const SearchButton = (props: React.ComponentProps<typeof Button>) => {
return (
<Button {...props} variant="ghost" className="max-h-[32px]">
<SearchIcon className="w-4 h-4 mr-2" />
Expand Down
6 changes: 6 additions & 0 deletions src/app/env.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ import { z } from "zod";

export const env = createEnv({
server: {
// LITELLM
LITELLM_BASE_URL: z.string().min(1),
LITELLM_API_KEY: z.string().min(10),
// Anthropic
ANTHROPIC_API_KEY: z.string().min(10),
// OpenAI
Expand Down Expand Up @@ -64,6 +67,9 @@ export const env = createEnv({
},

runtimeEnv: {
// LITELLM
LITELLM_BASE_URL: process.env.LITELLM_BASE_URL,
LITELLM_API_KEY: process.env.LITELLM_API_KEY,
// Anthropic
ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
// Clerk (Auth)
Expand Down
14 changes: 14 additions & 0 deletions src/app/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ import { ChatType } from "@/lib/types";
import { parseAsString, useQueryState } from "next-usequerystate";
import ChatCardWrapper from "@/components/chatcardwrapper";
import { OrgChatToggler } from "@/components/chatSidebar";
import AudioPlayer from "@/components/audioplayer";
import Search from "@/components/search";
import { SearchButton } from "./dashboard/layout";
import useSearchDialogState from "@/store/searchDialogStore";

const handleSmoothScroll = (): void => {
if (typeof window !== "undefined") {
Expand Down Expand Up @@ -126,6 +130,7 @@ export default function Home() {
console.error("Error creating new chat:", error);
}
};
const { showSearchDialog, toggleSearchDialog } = useSearchDialogState();

return (
<div>
Expand All @@ -137,6 +142,15 @@ export default function Home() {
variant="secondary"
asChild
></Button>
{isSignedIn ? (
<>
<AudioPlayer />
<SearchButton onClick={toggleSearchDialog}>
<span className="hidden sm:inline">Search</span>
</SearchButton>
<Search orgSlug={orgSlug as string} />
</>
) : null}
</Header>
<div className="absolute top-0 w-full y-0 flex flex-col flex-grow h-screen justify-center items-center gap-2 text-center">
<div className="absolute inset-0 -z-5">
Expand Down
8 changes: 4 additions & 4 deletions src/utils/apiHelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ export const summarizeChat = async (chat: ChatEntry[]): Promise<string> => {
name: "SummarizeChat",
run_type: "llm",
inputs: {
model: "HuggingFaceH4/zephyr-7b-beta",
model: "llama3.1-8b",
messages: msgs as ChatCompletionMessageParam[],
top_p: 0.7,
max_tokens: 512,
Expand All @@ -423,12 +423,12 @@ export const summarizeChat = async (chat: ChatEntry[]): Promise<string> => {
const parentRun = new RunTree(parentRunConfig);

const openai = new OpenAI({
baseURL: env.ANYSCALE_API_BASE,
apiKey: env.ANYSCALE_API_KEY,
baseURL: env.LITELLM_BASE_URL,
apiKey: env.LITELLM_API_KEY,
});
const stream: OpenAI.Chat.ChatCompletion =
await openai.chat.completions.create({
model: "HuggingFaceH4/zephyr-7b-beta",
model: "llama3.1-8b",
messages: [
{ role: "user", content: msg },
] as ChatCompletionMessageParam[],
Expand Down
Loading