Skip to content

Commit

Permalink
Merge pull request #351 from harmony-one/voice-command
Browse files Browse the repository at this point in the history
Voice command
  • Loading branch information
fegloff authored Jan 27, 2024
2 parents 5184555 + 949701a commit 50d9f79
Show file tree
Hide file tree
Showing 12 changed files with 282 additions and 52 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ yarn.lock
# Sentry Auth Token
.sentryclirc
.DS_Store
public/
4 changes: 3 additions & 1 deletion src/bot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ import { VoiceToTextBot } from './modules/voice-to-text'
import { now } from './utils/perf'
import { hasPrefix } from './modules/open-ai/helpers'
import { VoiceToVoiceGPTBot } from './modules/voice-to-voice-gpt'
import { VoiceCommand } from './modules/voice-command'

Events.EventEmitter.defaultMaxListeners = 30

Expand Down Expand Up @@ -251,6 +252,7 @@ const voiceTranslateBot = new VoiceTranslateBot(payments)
const textToSpeechBot = new TextToSpeechBot(payments)
const voiceToTextBot = new VoiceToTextBot(payments)
const voiceToVoiceGPTBot = new VoiceToVoiceGPTBot(payments)
const voiceCommand = new VoiceCommand(openAiBot)

bot.on('message:new_chat_members:me', async (ctx) => {
try {
Expand Down Expand Up @@ -358,6 +360,7 @@ const writeCommandLog = async (
}

const PayableBots: Record<string, PayableBotConfig> = {
voiceCommand: { bot: voiceCommand },
qrCodeBot: { bot: qrCodeBot },
sdImagesBot: { bot: sdImagesBot },
voiceTranslate: { bot: voiceTranslateBot },
Expand Down Expand Up @@ -397,7 +400,6 @@ const onMessage = async (ctx: OnMessageContext): Promise<void> => {
await telegramPayments.onEvent(ctx)
return
}

for (const config of Object.values(PayableBots)) {
const bot = config.bot

Expand Down
4 changes: 4 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ export default {
? process.env.RESTRICTED_PHRASES.split(', ')
: ['metamask', 'walletconnect']
},
voiceCommand: {
isEnabled: true,
voiceDuration: 30
},
voiceMemo: {
isEnabled: Boolean(parseInt(process.env.VOICE_MEMO_ENABLED ?? '1')),
telegramApiId: parseInt(process.env.TELEGRAM_API_ID ?? ''),
Expand Down
10 changes: 6 additions & 4 deletions src/modules/llms/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,9 @@ export class LlmsBot implements PayableBot {
prompt,
conversation
})
const price = response.price * config.openAi.chatGpt.priceAdjustment
if (
!(await this.payments.pay(ctx as OnMessageContext, response.price))
!(await this.payments.pay(ctx as OnMessageContext, price))
) {
await this.onNotBalanceMessage(ctx)
} else {
Expand Down Expand Up @@ -365,8 +366,9 @@ export class LlmsBot implements PayableBot {
prompt,
conversation
})
const price = response.price * config.openAi.chatGpt.priceAdjustment
if (
!(await this.payments.pay(ctx as OnMessageContext, response.price))
!(await this.payments.pay(ctx as OnMessageContext, price))
) {
if (ctx.chat?.id) {
await ctx.api.deleteMessage(ctx.chat?.id, msgId)
Expand Down Expand Up @@ -421,13 +423,13 @@ export class LlmsBot implements PayableBot {
const result = await llmCheckCollectionStatus(collection?.collectionName ?? '')
if (result.price > 0) {
if (
!(await this.payments.pay(ctx as OnMessageContext, result.price))
!(await this.payments.pay(ctx as OnMessageContext, result.price)) // price 0.05 x collections (chunks)
) {
await this.onNotBalanceMessage(ctx)
} else {
ctx.session.collections.activeCollections.push(collection)
if (collection.msgId) {
const oneFee = await this.payments.getPriceInONE(result.price)
const oneFee = await this.payments.getPriceInONE(result.price) // price in cents
let statusMsg
if (collection.collectionType === 'URL') {
statusMsg = `${collection.url} processed (${this.payments.toONE(oneFee, false).toFixed(2)} ONE fee)`
Expand Down
1 change: 0 additions & 1 deletion src/modules/open-ai/api/openAi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,5 @@ export async function speechToText (readStream: fs.ReadStream): Promise<string>
file: readStream,
model: 'whisper-1'
})

return result.text
}
3 changes: 2 additions & 1 deletion src/modules/open-ai/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ export enum SupportedCommands {
dalleShorter = 'i',
genImgEn = 'genImgEn',
on = 'on',
off = 'off'
off = 'off',
talk = 'talk'
}

export const MAX_TRIES = 3
Expand Down
161 changes: 123 additions & 38 deletions src/modules/open-ai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
} from '../types'
import {
alterGeneratedImg,
chatCompletion,
getChatModel,
getDalleModel,
getDalleModelPrice,
Expand Down Expand Up @@ -47,6 +48,7 @@ import { AxiosError } from 'axios'
import { Callbacks } from '../types'
import { LlmsBot } from '../llms'
import { type PhotoSize } from 'grammy/types'
import { responseWithVoice } from '../voice-to-voice-gpt/helpers'

const priceAdjustment = config.openAi.chatGpt.priceAdjustment
export class OpenAIBot implements PayableBot {
Expand Down Expand Up @@ -153,6 +155,72 @@ export class OpenAIBot implements PayableBot {
return false
}

public async voiceCommand (ctx: OnMessageContext | OnCallBackQueryData, command: string, transcribedText: string): Promise<void> {
try {
let prompt = transcribedText.slice(command.length).replace(/^[.,\s]+/, '')
switch (command) {
case SupportedCommands.vision: {
const photo = ctx.message?.photo ?? ctx.message?.reply_to_message?.photo
if (photo) {
ctx.session.openAi.imageGen.imgRequestQueue.push({
prompt,
photo,
command
})
if (!ctx.session.openAi.imageGen.isProcessingQueue) {
ctx.session.openAi.imageGen.isProcessingQueue = true
await this.onImgRequestHandler(ctx).then(() => {
ctx.session.openAi.imageGen.isProcessingQueue = false
})
}
}
break
}
case SupportedCommands.ask:
case SupportedCommands.talk: {
if (this.botSuspended) {
ctx.transient.analytics.sessionState = RequestState.Error
await sendMessage(ctx, 'The bot is suspended').catch(async (e) => { await this.onError(ctx, e) })
ctx.transient.analytics.actualResponseTime = now()
return
}
const adaptedPrompt = (SupportedCommands.talk === command
? 'Keep it short, like a phone call'
: '') + await preparePrompt(ctx, prompt)
ctx.session.openAi.chatGpt.requestQueue.push({
prompt: adaptedPrompt,
outputFormat: SupportedCommands.ask === command ? 'text' : 'voice'
})
if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
ctx.session.openAi.chatGpt.isProcessingQueue = true
await this.onChatRequestHandler(ctx).then(() => {
ctx.session.openAi.chatGpt.isProcessingQueue = false
})
}
break
}
case SupportedCommands.dalleImg: {
if (!prompt || prompt.split(' ').length === 1) {
prompt = config.openAi.dalle.defaultPrompt
}
ctx.session.openAi.imageGen.imgRequestQueue.push({
command: 'dalle',
prompt
})
if (!ctx.session.openAi.imageGen.isProcessingQueue) {
ctx.session.openAi.imageGen.isProcessingQueue = true
await this.onImgRequestHandler(ctx).then(() => {
ctx.session.openAi.imageGen.isProcessingQueue = false
})
}
break
}
}
} catch (e: any) {
await this.onError(ctx, e)
}
}

public async onEvent (ctx: OnMessageContext | OnCallBackQueryData): Promise<void> {
ctx.transient.analytics.module = this.module
if (!(this.isSupportedEvent(ctx)) && (ctx.chat?.type !== 'private') && !ctx.session.openAi.chatGpt.isFreePromptChatGroups) {
Expand Down Expand Up @@ -346,7 +414,7 @@ export class OpenAIBot implements PayableBot {
)
}

private async promptGen (data: ChatPayload, msgId?: number): Promise< { price: number, chat: ChatConversation[] }> {
private async completionGen (data: ChatPayload, msgId?: number, outputFormat = 'text'): Promise< { price: number, chat: ChatConversation[] }> {
const { conversation, ctx, model } = data
try {
if (!msgId) {
Expand All @@ -359,29 +427,42 @@ export class OpenAIBot implements PayableBot {
})
).message_id
}
const isTypingEnabled = config.openAi.chatGpt.isTypingEnabled
if (isTypingEnabled) {
ctx.chatAction = 'typing'
}
const completion = await streamChatCompletion(
conversation,
ctx,
model,
msgId,
true // telegram messages has a character limit
)
if (isTypingEnabled) {
ctx.chatAction = null
}
if (completion) {
ctx.transient.analytics.sessionState = RequestState.Success
ctx.transient.analytics.actualResponseTime = now()
const price = getPromptPrice(completion, data)
this.logger.info(
`streamChatCompletion result = tokens: ${price.totalTokens} | ${model} | price: ${price.price}¢` // price.promptTokens + price.completionTokens }
if (outputFormat === 'text') {
const isTypingEnabled = config.openAi.chatGpt.isTypingEnabled
if (isTypingEnabled) {
ctx.chatAction = 'typing'
}
const completion = await streamChatCompletion(
conversation,
ctx,
model,
msgId,
true // telegram messages has a character limit
)
if (isTypingEnabled) {
ctx.chatAction = null
}
if (completion) {
ctx.transient.analytics.sessionState = RequestState.Success
ctx.transient.analytics.actualResponseTime = now()
const price = getPromptPrice(completion, data)
this.logger.info(
`streamChatCompletion result = tokens: ${price.totalTokens} | ${model} | price: ${price.price}¢` // price.promptTokens + price.completionTokens }
)
return {
price: price.price,
chat: conversation
}
}
} else {
const response = await chatCompletion(conversation, ChatGPTModelsEnum.GPT_35_TURBO_16K)
conversation.push({
role: 'system',
content: response.completion
})
await responseWithVoice(response.completion, ctx as OnMessageContext, msgId)
return {
price: price.price,
price: response.price,
chat: conversation
}
}
Expand All @@ -408,9 +489,10 @@ export class OpenAIBot implements PayableBot {
}
const { username } = ctx.me
const prompt = ctx.message?.text?.slice(username.length + 1) ?? '' // @
ctx.session.openAi.chatGpt.requestQueue.push(
await preparePrompt(ctx, prompt)
)
ctx.session.openAi.chatGpt.requestQueue.push({
prompt: await preparePrompt(ctx, prompt),
outputFormat: 'text'
})
if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
ctx.session.openAi.chatGpt.isProcessingQueue = true
await this.onChatRequestHandler(ctx).then(() => {
Expand All @@ -433,9 +515,10 @@ export class OpenAIBot implements PayableBot {
return
}
const prompt = ctx.message?.text?.slice(prefix.length) ?? ''
ctx.session.openAi.chatGpt.requestQueue.push(
await preparePrompt(ctx, prompt)
)
ctx.session.openAi.chatGpt.requestQueue.push({
prompt: await preparePrompt(ctx, prompt),
outputFormat: 'text'
})
if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
ctx.session.openAi.chatGpt.isProcessingQueue = true
await this.onChatRequestHandler(ctx).then(() => {
Expand All @@ -455,9 +538,10 @@ export class OpenAIBot implements PayableBot {
ctx.transient.analytics.actualResponseTime = now()
return
}
ctx.session.openAi.chatGpt.requestQueue.push(
await preparePrompt(ctx, ctx.message?.text ?? '')
)
ctx.session.openAi.chatGpt.requestQueue.push({
prompt: await preparePrompt(ctx, ctx.message?.text ?? ''),
outputFormat: 'text'
})
if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
ctx.session.openAi.chatGpt.isProcessingQueue = true
await this.onChatRequestHandler(ctx).then(() => {
Expand Down Expand Up @@ -495,9 +579,10 @@ export class OpenAIBot implements PayableBot {
if (await this.freePromptChatGroup(ctx, prompt as string)) {
return
}
ctx.session.openAi.chatGpt.requestQueue.push(
await preparePrompt(ctx, prompt as string)
)
ctx.session.openAi.chatGpt.requestQueue.push({
prompt: await preparePrompt(ctx, prompt as string),
outputFormat: 'text'
})
if (!ctx.session.openAi.chatGpt.isProcessingQueue) {
ctx.session.openAi.chatGpt.isProcessingQueue = true
await this.onChatRequestHandler(ctx).then(() => {
Expand Down Expand Up @@ -527,7 +612,7 @@ export class OpenAIBot implements PayableBot {
ctx.transient.analytics.actualResponseTime = now()
return
}
const { url, newPrompt } = hasUrl(ctx, prompt)
const { url, newPrompt } = hasUrl(ctx, prompt.prompt)
const hasCode = hasCodeSnippet(ctx)
if (chatConversation.length === 0 && (hasCode || !url)) {
chatConversation.push({
Expand All @@ -540,14 +625,14 @@ export class OpenAIBot implements PayableBot {
} else {
chatConversation.push({
role: 'user',
content: prompt
content: prompt.prompt
})
const payload = {
conversation: chatConversation,
model: model || config.openAi.chatGpt.model,
ctx
}
const result = await this.promptGen(payload)
const result = await this.completionGen(payload, prompt.msgId, prompt.outputFormat)
ctx.session.openAi.chatGpt.chatConversation = [...result.chat]
if (
!(await this.payments.pay(ctx as OnMessageContext, result.price))
Expand Down Expand Up @@ -660,7 +745,7 @@ export class OpenAIBot implements PayableBot {
ctx
})
this.logger.info(
`streamChatCompletion result = tokens: ${
`streamChatVisionCompletion result = tokens: ${
price.promptTokens + price.completionTokens
} | ${model} | price: ${price.price}¢`
)
Expand Down
9 changes: 8 additions & 1 deletion src/modules/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,21 @@ export interface ImageRequest {
photo?: PhotoSize[] | undefined
photoUrl?: string[]
}

export interface promptRequest {
prompt: string
msgId?: number
outputFormat?: 'text' | 'voice'
commandPrefix?: string
}
export interface ChatGptSessionData {
model: string
isEnabled: boolean
isFreePromptChatGroups: boolean
chatConversation: ChatConversation[]
usage: number
price: number
requestQueue: string[]
requestQueue: promptRequest[]
isProcessingQueue: boolean
}

Expand Down
Loading

0 comments on commit 50d9f79

Please sign in to comment.