Skip to content

Commit

Permalink
refactor(audioanalyser): ✨ code refactoring and various UI optimisations
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastienrousseau committed Feb 6, 2024
1 parent d454196 commit 93e82b8
Show file tree
Hide file tree
Showing 11 changed files with 2,186 additions and 135 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -114,4 +114,8 @@ ENV/

# Database
*.db-shm
*.db-wal
*.db-wal

# Files
analysis_status.txt
recommendations_status.txt
2 changes: 1 addition & 1 deletion audioanalyser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
# limitations under the License.

"""The Python Audio Analyser module."""
__version__ = "0.0.5"
__version__ = "0.0.6"
19 changes: 19 additions & 0 deletions audioanalyser/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
from audioanalyser.modules.transcribe_audio_files import (
transcribe_audio_files,
)
from audioanalyser.modules.text_to_speech import (
text_to_speech,
)

# Configure logging
logging.basicConfig(
Expand Down Expand Up @@ -80,6 +83,20 @@ async def main():
""",
)

parser.add_argument(
"-tts",
"--text_to_speech",
action="store_true",
help="""
This command processes audio files in the specified sample folder,
converting speech to text using Azure Cognitive Services.
It supports long audio recognition and handles various speech
recognition events. Results are saved as text and JSON files, and
transcriptions are also stored in a SQLite database.
Ideal for transcribing lectures, meetings, or interviews.
""",
)

parser.add_argument(
"-sum",
"--summary",
Expand Down Expand Up @@ -138,6 +155,8 @@ async def main():
if args.record != "default":
settings = load_audio_settings(args.record)
await audio_recorder(settings)
elif args.text_to_speech:
text_to_speech()
elif args.translate:
azure_translator(*args.translate)
else:
Expand Down
87 changes: 67 additions & 20 deletions audioanalyser/modules/azure_recommendation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
import openai
from pathlib import Path
import logging
import json
import sqlite3
from dotenv import load_dotenv
from typing import Iterator
import os
Expand Down Expand Up @@ -207,30 +209,75 @@ def __init__(self, config: Config) -> None:
def generate_recommendations(self) -> None:
"""
Processes each transcript file and generates recommendations.
The recommendations are saved in the configured recommendations folder.
The recommendations are saved in various formats in the configured folders.
"""
# Ensure the base folders exist
self.config.RECOMMENDATIONS_FOLDER.mkdir(exist_ok=True)

for transcript in Transcript.iter_transcripts(
self.config.TRANSCRIPTS_FOLDER
):
recommendation_text = self.generate_recommendation(
transcript
)
db_filename = self.config.RECOMMENDATIONS_FOLDER / "recommendations.db"
table_name = "recommendations"

recommendation_filename = (
f"azure_recommendation-{transcript.path.name}"
)
with open(
self.config.RECOMMENDATIONS_FOLDER
/ recommendation_filename,
"w",
) as recommendation_file:
recommendation_file.write(recommendation_text)

logger.info(
f"Generated recommendation for {transcript.path.name}"
)
for transcript in Transcript.iter_transcripts(self.config.TRANSCRIPTS_FOLDER):
recommendation_text = self.generate_recommendation(transcript)

# Saving as a text file
recommendation_filename_txt = f"azure_recommendation-{transcript.path.stem}.txt"
self.save_text_to_file(self.config.RECOMMENDATIONS_FOLDER / recommendation_filename_txt, recommendation_text)

# Saving as a JSON file
recommendation_filename_json = f"azure_recommendation-{transcript.path.stem}.json"
self.save_data_to_json(self.config.RECOMMENDATIONS_FOLDER / recommendation_filename_json, {"recommendation": recommendation_text})

# Preparing data for SQLite insertion
data_to_insert = [(transcript.path.name, recommendation_text)]

# Inserting into SQLite database
self.insert_data_to_sqlite(db_filename, table_name, data_to_insert)

logger.info(f"Generated recommendation for {transcript.path.name}")


def save_text_to_file(self, output_path: Path, content: str) -> None:
"""
Saves text data to a file.
Args:
output_path (Path): The full path to the output file.
content (str): The text content to write.
"""
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as file:
file.write(content)
logger.info(f"Saved text data to {output_path}")

def save_data_to_json(self, json_path: Path, data) -> None:
"""
Saves data to a JSON file.
Args:
json_path (Path): The full path to the JSON file.
data: The data to serialize to JSON.
"""
json_path.parent.mkdir(parents=True, exist_ok=True)
with open(json_path, "w") as json_file:
json.dump(data, json_file, indent=4)
logger.info(f"Saved data to JSON file {json_path}")

def insert_data_to_sqlite(self, db_path: Path, table_name: str, data: list) -> None:
"""
Inserts data into an SQLite database.
Args:
db_path (Path): The path to the SQLite database file.
table_name (str): The name of the table to insert data into.
data (list): A list of tuples, where each tuple represents the data to insert per row.
"""
db_path.parent.mkdir(parents=True, exist_ok=True)
with sqlite3.connect(db_path) as conn:
cursor = conn.cursor()
cursor.execute(f"CREATE TABLE IF NOT EXISTS {table_name} (filename TEXT, transcription TEXT)")
cursor.executemany(f"INSERT INTO {table_name} (filename, transcription) VALUES (?, ?)", data)
logger.info(f"Inserted data into {db_path} in table {table_name}")

def generate_recommendation(self, transcript: Transcript) -> str:
"""
Expand Down
Binary file not shown.
39 changes: 38 additions & 1 deletion audioanalyser/modules/speech_text_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,43 @@ def list_audio_files(self):
"error": "An error occurred while fetching the file list"
}

@cherrypy.expose
def serve_audio(self, filename):
"""
Endpoint to serve audio files.
"""
try:
files_dir = "./resources/input"
full_path = os.path.abspath(os.path.join(files_dir, filename))
return cherrypy.lib.static.serve_file(full_path, content_type="audio/wav")
except Exception as e:
cherrypy.log(f"Error serving audio file: {str(e)}")
cherrypy.response.status = 500
return {
"error": "An error occurred while serving the audio file"
}

@cherrypy.expose
def download_audio(self, filename):
"""
Endpoint to trigger audio file download.
"""
try:
files_dir = "./resources/input"
full_path = os.path.abspath(os.path.join(files_dir, filename))

# Set the 'Content-Disposition' header to suggest a filename
cherrypy.response.headers['Content-Disposition'] = f'attachment; filename="{filename}"'

# Serve the file for download
return cherrypy.lib.static.serve_file(full_path, content_type="application/octet-stream")
except Exception as e:
cherrypy.log(f"Error serving audio file for download: {str(e)}")
cherrypy.response.status = 500
return {
"error": "An error occurred while serving the audio file for download"
}

@cherrypy.expose
@cherrypy.tools.json_out()
def process_text_analysis(self):
Expand Down Expand Up @@ -349,7 +386,7 @@ def process_all_translations(self):
)
thread.start()

message = "Translation process started for country code: "
message = "Translation process started: "
result_message = message + str(countryCode)
return {"result": result_message}

Expand Down
130 changes: 130 additions & 0 deletions audioanalyser/modules/text_to_speech.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import azure.cognitiveservices.speech as speechsdk
import logging
import os
from dotenv import load_dotenv
from pathlib import Path

load_dotenv()

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s app - %(message)s",
)
logger = logging.getLogger("AzureTextToSpeech")


class Config:
"""
Configuration parameters for the Azure Text-to-Speech API.
Attributes:
- api_key (str): The API key for the Azure Text-to-Speech API.
- region (str): The Azure region where the Text-to-Speech API is located.
- OUTPUT_FOLDER (str): The folder path where the synthesized audio files will be saved.
- audio_extension (str): The audio file extension for the output files.
"""

def __init__(self):
self.api_key = os.getenv("AZURE_AUDIO_TEXT_KEY")
self.region = os.getenv("REGION")
self.OUTPUT_FOLDER = os.getenv("RECORDS_FOLDER")
self.audio_extension = os.getenv("AUDIO_EXTENSION", "wav")
self.validate()

def validate(self):
"""
Validate the configuration parameters.
Raises:
EnvironmentError: If any required environment variables are missing.
"""
required_vars = [
self.api_key,
self.region,
self.OUTPUT_FOLDER,
self.audio_extension
]
if any(var is None for var in required_vars):
missing = [
var for var, value in locals().items() if value is None
]
logger.error(
f"Missing environment variables: {', '.join(missing)}"
)
raise EnvironmentError(
"Missing required environment variables."
)


class TextToSpeech:
"""
Synthesizes speech from text using the Azure Text-to-Speech API.
Args:
config (Config): The configuration parameters for the Azure Text-to-Speech API.
Attributes:
config (Config): The configuration parameters for the Azure Text-to-Speech API.
"""

def __init__(self, config):
self.config = config

def synthesize_text(self, text: str, filename: str):
"""
Synthesize speech from text and save it to an audio file.
Args:
text (str): The text to synthesize.
filename (str): The base name for the output audio file,
without extension.
"""
speech_config = speechsdk.SpeechConfig(subscription=self.config.api_key, region=self.config.region)

# Set the language
speech_config.speech_synthesis_language = "en-GB" # Language setting

# Optionally, specify a voice name
# Configure speech synthesis
speech_config.speech_synthesis_voice_name = "en-GB-RyanNeural"

synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
result = synthesizer.speak_text_async(text).get()

if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:

PROJECT_ROOT = Path(__file__).resolve().parents[2]
OUTPUT_FOLDER = PROJECT_ROOT / os.getenv("RECORDS_FOLDER")
self.config.OUTPUT_FOLDER = os.path.join(
PROJECT_ROOT, OUTPUT_FOLDER
)

# Ensure the output directory exists
if not os.path.exists(self.config.OUTPUT_FOLDER):
os.makedirs(self.config.OUTPUT_FOLDER)

# Construct the output file path correctly
output_path = Path(
self.config.OUTPUT_FOLDER
) / f"{filename}.{self.config.audio_extension}"

with open(output_path, "wb") as audio_file:
audio_file.write(result.audio_data)
logger.info(f"Audio file saved to {output_path}")
else:
logger.error("Failed to synthesize speech from text.")


def text_to_speech():
try:
config = Config()
tts = TextToSpeech(config)
# Example usage, replace "Hello, World!" and "output_filename" with
# your desired input and file name.
tts.synthesize_text("Thank you for your time today!", "text_to_speech")
except Exception as e:
logger.error(f"Failed to synthesize text to speech: {e}")


if __name__ == "__main__":
text_to_speech()
Loading

0 comments on commit 93e82b8

Please sign in to comment.