Skip to content

Commit

Permalink
Upsample 48KHz
Browse files Browse the repository at this point in the history
  • Loading branch information
srhinos committed Jun 5, 2024
1 parent 86ebf8b commit 3becc50
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
4 changes: 3 additions & 1 deletion vocode/streaming/synthesizer/eleven_labs_synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def __init__(
self.optimize_streaming_latency = synthesizer_config.optimize_streaming_latency
self.words_per_minute = 150
self.upsample = False
self.sample_rate = self.synthesizer_config.sampling_rate

if self.synthesizer_config.audio_encoding == AudioEncoding.LINEAR16:
match self.synthesizer_config.sampling_rate:
Expand All @@ -52,6 +53,7 @@ def __init__(
case SamplingRate.RATE_48000:
self.output_format = "pcm_44100"
self.upsample = SamplingRate.RATE_48000.value
self.sample_rate = SamplingRate.RATE_44100.value
case _:
raise ValueError(
f"Unsupported sampling rate: {self.synthesizer_config.sampling_rate}. Elevenlabs only supports 16000, 22050, 24000, and 44100 Hz."
Expand Down Expand Up @@ -148,7 +150,7 @@ async def get_chunks(
if self.upsample:
chunk = self._resample_chunk(
chunk,
self.synthesizer_config.sampling_rate,
self.sample_rate,
self.upsample,
)
chunk_queue.put_nowait(chunk)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def __init__(
"writer": None,
}
self.end_of_turn = False
self.upsample = False
self.sample_rate = self.synthesizer_config.sampling_rate

# While this looks useless, we need to assign the response of `asyncio.gather`
# to *something* or we risk garbage collection of the running coroutines spawned
Expand All @@ -124,6 +126,10 @@ def __init__(
self.output_format = "pcm_24000"
case SamplingRate.RATE_44100:
self.output_format = "pcm_44100"
case SamplingRate.RATE_48000:
self.output_format = "pcm_44100"
self.upsample = SamplingRate.RATE_48000.value
self.sample_rate = SamplingRate.RATE_44100.value
case _:
raise ValueError(
f"Unsupported sampling rate: {self.synthesizer_config.sampling_rate}. Elevenlabs only supports 16000, 22050, 24000, and 44100 Hz."
Expand Down Expand Up @@ -212,12 +218,21 @@ async def listen() -> None:
message = await ws.recv()
if "audio" not in message:
continue
response = ElevenLabsWebsocketResponse.parse_raw(message)
response = ElevenLabsWebsocketResponse.model_validate_json(message)
if response.audio:
decoded = base64.b64decode(response.audio)
seconds = len(decoded) / (
self.sample_width * self.synthesizer_config.sampling_rate
)

if self.upsample:
decoded = self._resample_chunk(
decoded,
self.sample_rate,
self.upsample,
)
seconds = len(decoded) / (self.sample_width * self.sample_rate)

if response.alignment:
utterance_chunk = "".join(response.alignment.chars) + " "
self.current_turn_utterances_by_chunk.append((utterance_chunk, seconds))
Expand Down

0 comments on commit 3becc50

Please sign in to comment.