Skip to content

Commit

Permalink
Merge pull request #316 from harmony-one/voice-to-text
Browse files Browse the repository at this point in the history
Added VoiceToTextBot
  • Loading branch information
theofandrich authored Sep 28, 2023
2 parents 95ede4a + 27f24ec commit ba4f203
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/bot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ import { ES } from './es'
import { hydrateFiles } from '@grammyjs/files'
import { VoiceTranslateBot } from './modules/voice-translate'
import { TextToSpeechBot } from './modules/text-to-speech'
import { VoiceToTextBot } from './modules/voice-to-text'

Events.EventEmitter.defaultMaxListeners = 30

Expand Down Expand Up @@ -221,6 +222,7 @@ const documentBot = new DocumentHandler()
const telegramPayments = new TelegramPayments(payments)
const voiceTranslateBot = new VoiceTranslateBot(payments)
const textToSpeechBot = new TextToSpeechBot(payments)
const voiceToTextBot = new VoiceToTextBot(payments)

bot.on('message:new_chat_members:me', async (ctx) => {
try {
Expand Down Expand Up @@ -335,6 +337,7 @@ const PayableBots: Record<string, PayableBotConfig> = {
documentBot: { bot: documentBot },
translateBot: { bot: translateBot },
textToSpeech: { bot: textToSpeechBot },
voiceToText: { bot: voiceToTextBot },
openAiBot: {
enabled: (ctx: OnMessageContext) => ctx.session.openAi.imageGen.isEnabled,
bot: openAiBot
Expand Down
90 changes: 90 additions & 0 deletions src/modules/voice-to-text/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import fs from 'fs'
import pino from 'pino'
import { InputFile } from 'grammy'
import type { Logger } from 'pino'
import type { BotPayments } from '../payment'
import { speechToText } from '../open-ai/api/openAi'
import type { OnMessageContext, PayableBot } from '../types'
import { bot } from '../../bot'
import { SessionState } from '../types'
import { download } from '../../utils/files'

export class VoiceToTextBot implements PayableBot {
public readonly module = 'VoiceToText'
private readonly payments: BotPayments

private readonly logger: Logger

constructor (payments: BotPayments) {
this.payments = payments
this.logger = pino({
name: 'VoiceToText',
transport: {
target: 'pino-pretty',
options: { colorize: true }
}
})
}

public isSupportedEvent (ctx: OnMessageContext): boolean {
return ctx.hasCommand('text')
}

public getEstimatedPrice (ctx: OnMessageContext): number {
const { voice, audio } = ctx.update.message
const seconds = (voice?.duration ?? audio?.duration) ?? 0
return seconds * 0.005
}

public async onEvent (ctx: OnMessageContext): Promise<void> {
ctx.session.analytics.module = this.module
const { voice, audio } = ctx.message.reply_to_message ?? { voice: undefined, audio: undefined }

if (!voice && !audio) {
await ctx.reply('The message must include audio content')
return
}

const progressMessage = await ctx.reply('Generating...')

if (!ctx.chat?.id) {
throw Error('chat id is undefined')
}

const fileId = voice?.file_id ?? audio?.file_id

if (!fileId) {
await ctx.reply('The message must include audio content')
return
}

const file = await bot.api.getFile(fileId)

const path = await download(file)

let ext = 'ogg'

if (file.file_path) {
ext = file.file_path.split('.').pop() ?? ext
}

const filename = path + '.' + ext
fs.renameSync(path, filename)

const resultText = await speechToText(fs.createReadStream(filename))
fs.rmSync(filename)

await ctx.api.deleteMessage(ctx.chat.id, progressMessage.message_id)
if (resultText.length > 512) {
const translationFile = new InputFile(new TextEncoder().encode(resultText), `From @${ctx.message.from.username}.txt`)
await bot.api.sendDocument(ctx.chat.id, translationFile, {
reply_to_message_id: ctx.message.message_id,
caption: resultText.slice(0, 1024)
})
} else {
await ctx.reply(resultText, { message_thread_id: ctx.message?.message_thread_id })
}

ctx.session.analytics.sessionState = SessionState.Success
}
}
53 changes: 53 additions & 0 deletions src/utils/files.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import * as pathLib from 'path'
import * as fs from 'fs'
import * as os from 'os'
import * as https from 'https'
import type { File } from '@grammyjs/types/manage.js'
import config from '../config'

export const createTempFile = async (): Promise<string> =>
pathLib.join(
await fs.promises.mkdtemp(
(await fs.promises.realpath(os.tmpdir())) + pathLib.sep
),
'filedata'
)

export const copyFile = fs.promises.copyFile

export async function downloadFile (url: string, dest: string): Promise<void> {
const file = fs.createWriteStream(dest)
await new Promise<void>((resolve, reject) => {
https.get(url, (res) => {
res.pipe(file)
file.on('finish', () => {
file.close()
resolve()
})
}).on(
'error',
(err0) => { fs.unlink(dest, (err1) => { reject(err1 ?? err0) }) }
)
})
}

const buildFileUrl = (root: string, token: string, path: string): string => `${root}/file/bot${token}/${path}`
const buildLink = (path: string): string => {
return buildFileUrl('https://api.telegram.org', config.telegramBotAuthToken, path)
}
function getUrl (file: File): string {
const path = file.file_path
if (path === undefined) {
const id = file.file_id
throw new Error(`File path is not available for file '${id}'`)
}
return pathLib.isAbsolute(path) ? path : buildLink(path)
}

export async function download (file: File, path?: string): Promise<string> {
const url = getUrl(file)
if (path === undefined) path = await createTempFile()
if (pathLib.isAbsolute(url)) await copyFile(url, path)
else await downloadFile(url, path)
return path
}

0 comments on commit ba4f203

Please sign in to comment.