Skip to content

Commit

Permalink
Merge pull request #332 from harmony-one/v11-voices
Browse files Browse the repository at this point in the history
V11 voices
  • Loading branch information
theofandrich authored Oct 5, 2023
2 parents 8348cf7 + 188856c commit ba563a2
Show file tree
Hide file tree
Showing 10 changed files with 6,393 additions and 55 deletions.
3 changes: 2 additions & 1 deletion src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,5 +162,6 @@ export default {
index: process.env.ES_INDEX
},
deepL: { apikey: process.env.DEEPL_API_KEY ?? '' },
gc: { credentials: process.env.GC_CREDENTIALS ?? '' }
gc: { credentials: process.env.GC_CREDENTIALS ?? '' },
elevenlabs: { apiKey: process.env.ELEVENLABS_API_KEY ?? '' }
}
47 changes: 43 additions & 4 deletions src/elevenlabs/elevenlabsClient.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,37 @@
import axios, { type AxiosInstance } from 'axios'

interface Voice {
'voice_id': string
'name': string
'samples': null
'category': 'premade'
'fine_tuning': {
'language': null
'is_allowed_to_fine_tune': boolean
'fine_tuning_requested': boolean
'finetuning_state': 'not_started'
'verification_attempts': null
'verification_failures': []
'verification_attempts_count': 0
'slice_ids': null
'manual_verification': null
'manual_verification_requested': false
}
'labels': {
'accent': 'american' | string
'description': 'strong' | string
'age': 'young' | string
'gender': 'female' | string
'use case': 'narration' | string
}
'description': null
'preview_url': string
'available_for_tiers': []
'settings': null
'sharing': null
'high_quality_base_model_ids': []
}

export class ElevenlabsClient {
private readonly _token: string
private readonly _httpClient: AxiosInstance
Expand All @@ -17,13 +49,20 @@ export class ElevenlabsClient {
}

public async textToSpeech ({ text, voiceId }: { text: string, voiceId: string }): Promise<string | Uint8Array | null | undefined> {
return await this._httpClient.post(`/v1/text-to-speech/${voiceId}`, {
text: 'string',
model_id: 'eleven_monolingual_v1',
const response = await this._httpClient.post(`/v1/text-to-speech/${voiceId}`, {
text,
model_id: 'eleven_multilingual_v2',
voice_settings: {
stability: 0.5,
similarity_boost: 0.5
}
})
}, { responseType: 'arraybuffer' })

return Buffer.from(response.data, 'binary')
}

public async voiceList (): Promise<Voice[]> {
const response = await this._httpClient.get<{ voices: Voice[] }>('/v1/voices')
return response.data.voices
}
}
19 changes: 19 additions & 0 deletions src/elevenlabs/sandbox.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { ElevenlabsClient } from './elevenlabsClient'
import config from '../config'

function labelsToString (labels: Record<string, string>): string {
return Object.entries(labels).reduce((acc, item) => {
return acc + item.join(': ') + '; '
}, '')
}

async function main (): Promise<void> {
const client = new ElevenlabsClient(config.elevenlabs.apiKey)
const voiceList = await client.voiceList()

for (const voice of voiceList) {
console.log(voice.voice_id, voice.name, '\t', labelsToString(voice.labels))
}
}

main().then(() => { console.log('### finish') }).catch(console.log)
Empty file removed src/elevenlabs/test.ts
Empty file.
10 changes: 8 additions & 2 deletions src/google-cloud/gcTextToSpeechClient.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import GcTextToSpeech, { type TextToSpeechClient } from '@google-cloud/text-to-speech'
import config from '../config'
import type { CredentialBody } from 'google-auth-library/build/src/auth/credentials'
import type { google } from '@google-cloud/text-to-speech/build/protos/protos'

export interface TextToSpeechParams {
text: string
languageCode: string
ssmlGender?: 'MALE' | 'FEMALE'
voiceName?: string
ssmlGender?: google.cloud.texttospeech.v1.SsmlVoiceGender | keyof typeof google.cloud.texttospeech.v1.SsmlVoiceGender | null
voiceName?: string | null
}

class GcTextToSpeechClient {
Expand Down Expand Up @@ -35,6 +36,11 @@ class GcTextToSpeechClient {

return response.audioContent
}

async listVoices (): Promise<google.cloud.texttospeech.v1.IVoice[] | null | undefined> {
const response = await this._client.listVoices()
return response[0].voices
}
}

const credentials = JSON.parse(Buffer.from(config.gc.credentials, 'base64').toString('utf-8'))
Expand Down
Loading

0 comments on commit ba563a2

Please sign in to comment.