Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added VoiceToTextBot #316

Merged
merged 1 commit into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/bot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ import { ES } from './es'
import { hydrateFiles } from '@grammyjs/files'
import { VoiceTranslateBot } from './modules/voice-translate'
import { TextToSpeechBot } from './modules/text-to-speech'
import { VoiceToTextBot } from './modules/voice-to-text'

Events.EventEmitter.defaultMaxListeners = 30

Expand Down Expand Up @@ -221,6 +222,7 @@ const documentBot = new DocumentHandler()
const telegramPayments = new TelegramPayments(payments)
const voiceTranslateBot = new VoiceTranslateBot(payments)
const textToSpeechBot = new TextToSpeechBot(payments)
const voiceToTextBot = new VoiceToTextBot(payments)

bot.on('message:new_chat_members:me', async (ctx) => {
try {
Expand Down Expand Up @@ -335,6 +337,7 @@ const PayableBots: Record<string, PayableBotConfig> = {
documentBot: { bot: documentBot },
translateBot: { bot: translateBot },
textToSpeech: { bot: textToSpeechBot },
voiceToText: { bot: voiceToTextBot },
openAiBot: {
enabled: (ctx: OnMessageContext) => ctx.session.openAi.imageGen.isEnabled,
bot: openAiBot
Expand Down
90 changes: 90 additions & 0 deletions src/modules/voice-to-text/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import fs from 'fs'
import pino from 'pino'
import { InputFile } from 'grammy'
import type { Logger } from 'pino'
import type { BotPayments } from '../payment'
import { speechToText } from '../open-ai/api/openAi'
import type { OnMessageContext, PayableBot } from '../types'
import { bot } from '../../bot'
import { SessionState } from '../types'
import { download } from '../../utils/files'

export class VoiceToTextBot implements PayableBot {
public readonly module = 'VoiceToText'
private readonly payments: BotPayments

private readonly logger: Logger

constructor (payments: BotPayments) {
this.payments = payments
this.logger = pino({
name: 'VoiceToText',
transport: {
target: 'pino-pretty',
options: { colorize: true }
}
})
}

public isSupportedEvent (ctx: OnMessageContext): boolean {
return ctx.hasCommand('text')
}

public getEstimatedPrice (ctx: OnMessageContext): number {
const { voice, audio } = ctx.update.message
const seconds = (voice?.duration ?? audio?.duration) ?? 0
return seconds * 0.005
}

public async onEvent (ctx: OnMessageContext): Promise<void> {
ctx.session.analytics.module = this.module
const { voice, audio } = ctx.message.reply_to_message ?? { voice: undefined, audio: undefined }

if (!voice && !audio) {
await ctx.reply('The message must include audio content')
return
}

const progressMessage = await ctx.reply('Generating...')

if (!ctx.chat?.id) {
throw Error('chat id is undefined')
}

const fileId = voice?.file_id ?? audio?.file_id

if (!fileId) {
await ctx.reply('The message must include audio content')
return
}

const file = await bot.api.getFile(fileId)

const path = await download(file)

let ext = 'ogg'

if (file.file_path) {
ext = file.file_path.split('.').pop() ?? ext
}

const filename = path + '.' + ext
fs.renameSync(path, filename)

const resultText = await speechToText(fs.createReadStream(filename))
fs.rmSync(filename)

await ctx.api.deleteMessage(ctx.chat.id, progressMessage.message_id)
if (resultText.length > 512) {
const translationFile = new InputFile(new TextEncoder().encode(resultText), `From @${ctx.message.from.username}.txt`)
await bot.api.sendDocument(ctx.chat.id, translationFile, {
reply_to_message_id: ctx.message.message_id,
caption: resultText.slice(0, 1024)
})
} else {
await ctx.reply(resultText, { message_thread_id: ctx.message?.message_thread_id })
}

ctx.session.analytics.sessionState = SessionState.Success
}
}
53 changes: 53 additions & 0 deletions src/utils/files.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import * as pathLib from 'path'
import * as fs from 'fs'
import * as os from 'os'
import * as https from 'https'
import type { File } from '@grammyjs/types/manage.js'
import config from '../config'

export const createTempFile = async (): Promise<string> =>
pathLib.join(
await fs.promises.mkdtemp(
(await fs.promises.realpath(os.tmpdir())) + pathLib.sep
),
'filedata'
)

export const copyFile = fs.promises.copyFile

export async function downloadFile (url: string, dest: string): Promise<void> {
const file = fs.createWriteStream(dest)
await new Promise<void>((resolve, reject) => {
https.get(url, (res) => {
res.pipe(file)
file.on('finish', () => {
file.close()
resolve()
})
}).on(
'error',
(err0) => { fs.unlink(dest, (err1) => { reject(err1 ?? err0) }) }
)
})
}

const buildFileUrl = (root: string, token: string, path: string): string => `${root}/file/bot${token}/${path}`
const buildLink = (path: string): string => {
return buildFileUrl('https://api.telegram.org', config.telegramBotAuthToken, path)
}
function getUrl (file: File): string {
const path = file.file_path
if (path === undefined) {
const id = file.file_id
throw new Error(`File path is not available for file '${id}'`)
}
return pathLib.isAbsolute(path) ? path : buildLink(path)
}

export async function download (file: File, path?: string): Promise<string> {
const url = getUrl(file)
if (path === undefined) path = await createTempFile()
if (pathLib.isAbsolute(url)) await copyFile(url, path)
else await downloadFile(url, path)
return path
}