refactor(audioanalyser): ✨ code refactoring and various UI optimisations

sebastienrousseau · Feb 6, 2024 · 93e82b8 · 93e82b8
1 parent d454196
commit 93e82b8
Show file tree

Hide file tree

Showing 11 changed files with 2,186 additions and 135 deletions.
diff --git a/.gitignore b/.gitignore
@@ -114,4 +114,8 @@ ENV/
 
 # Database
 *.db-shm
-*.db-wal
+*.db-wal
+
+# Files
+analysis_status.txt
+recommendations_status.txt
diff --git a/audioanalyser/__init__.py b/audioanalyser/__init__.py
@@ -14,4 +14,4 @@
 # limitations under the License.
 
 """The Python Audio Analyser module."""
-__version__ = "0.0.5"
+__version__ = "0.0.6"
diff --git a/audioanalyser/__main__.py b/audioanalyser/__main__.py
@@ -38,6 +38,9 @@
 from audioanalyser.modules.transcribe_audio_files import (
     transcribe_audio_files,
 )
+from audioanalyser.modules.text_to_speech import (
+    text_to_speech,
+)
 
 # Configure logging
 logging.basicConfig(
@@ -80,6 +83,20 @@ async def main():
         """,
     )
 
+    parser.add_argument(
+        "-tts",
+        "--text_to_speech",
+        action="store_true",
+        help="""
+This command processes audio files in the specified sample folder,
+converting speech to text using Azure Cognitive Services.
+It supports long audio recognition and handles various speech
+recognition events. Results are saved as text and JSON files, and
+transcriptions are also stored in a SQLite database.
+Ideal for transcribing lectures, meetings, or interviews.
+        """,
+    )
+
     parser.add_argument(
         "-sum",
         "--summary",
@@ -138,6 +155,8 @@ async def main():
             if args.record != "default":
                 settings = load_audio_settings(args.record)
             await audio_recorder(settings)
+        elif args.text_to_speech:
+            text_to_speech()
         elif args.translate:
             azure_translator(*args.translate)
         else:

diff --git a/audioanalyser/modules/azure_recommendation.py b/audioanalyser/modules/azure_recommendation.py
@@ -39,6 +39,8 @@
 import openai
 from pathlib import Path
 import logging
+import json
+import sqlite3
 from dotenv import load_dotenv
 from typing import Iterator
 import os
@@ -207,30 +209,75 @@ def __init__(self, config: Config) -> None:
     def generate_recommendations(self) -> None:
         """
         Processes each transcript file and generates recommendations.
-        The recommendations are saved in the configured recommendations folder.
+        The recommendations are saved in various formats in the configured folders.
         """
+        # Ensure the base folders exist
         self.config.RECOMMENDATIONS_FOLDER.mkdir(exist_ok=True)
 
-        for transcript in Transcript.iter_transcripts(
-            self.config.TRANSCRIPTS_FOLDER
-        ):
-            recommendation_text = self.generate_recommendation(
-                transcript
-            )
+        db_filename = self.config.RECOMMENDATIONS_FOLDER / "recommendations.db"
+        table_name = "recommendations"
 
-            recommendation_filename = (
-                f"azure_recommendation-{transcript.path.name}"
-            )
-            with open(
-                self.config.RECOMMENDATIONS_FOLDER
-                / recommendation_filename,
-                "w",
-            ) as recommendation_file:
-                recommendation_file.write(recommendation_text)
-
-            logger.info(
-                f"Generated recommendation for {transcript.path.name}"
-            )
+        for transcript in Transcript.iter_transcripts(self.config.TRANSCRIPTS_FOLDER):
+            recommendation_text = self.generate_recommendation(transcript)
+
+            # Saving as a text file
+            recommendation_filename_txt = f"azure_recommendation-{transcript.path.stem}.txt"
+            self.save_text_to_file(self.config.RECOMMENDATIONS_FOLDER / recommendation_filename_txt, recommendation_text)
+
+            # Saving as a JSON file
+            recommendation_filename_json = f"azure_recommendation-{transcript.path.stem}.json"
+            self.save_data_to_json(self.config.RECOMMENDATIONS_FOLDER / recommendation_filename_json, {"recommendation": recommendation_text})
+
+            # Preparing data for SQLite insertion
+            data_to_insert = [(transcript.path.name, recommendation_text)]
+
+            # Inserting into SQLite database
+            self.insert_data_to_sqlite(db_filename, table_name, data_to_insert)
+
+            logger.info(f"Generated recommendation for {transcript.path.name}")
+
+
+    def save_text_to_file(self, output_path: Path, content: str) -> None:
+        """
+        Saves text data to a file.
+
+        Args:
+            output_path (Path): The full path to the output file.
+            content (str): The text content to write.
+        """
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(output_path, "w") as file:
+            file.write(content)
+        logger.info(f"Saved text data to {output_path}")
+
+    def save_data_to_json(self, json_path: Path, data) -> None:
+        """
+        Saves data to a JSON file.
+
+        Args:
+            json_path (Path): The full path to the JSON file.
+            data: The data to serialize to JSON.
+        """
+        json_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(json_path, "w") as json_file:
+            json.dump(data, json_file, indent=4)
+        logger.info(f"Saved data to JSON file {json_path}")
+
+    def insert_data_to_sqlite(self, db_path: Path, table_name: str, data: list) -> None:
+        """
+        Inserts data into an SQLite database.
+
+        Args:
+            db_path (Path): The path to the SQLite database file.
+            table_name (str): The name of the table to insert data into.
+            data (list): A list of tuples, where each tuple represents the data to insert per row.
+        """
+        db_path.parent.mkdir(parents=True, exist_ok=True)
+        with sqlite3.connect(db_path) as conn:
+            cursor = conn.cursor()
+            cursor.execute(f"CREATE TABLE IF NOT EXISTS {table_name} (filename TEXT, transcription TEXT)")
+            cursor.executemany(f"INSERT INTO {table_name} (filename, transcription) VALUES (?, ?)", data)
+        logger.info(f"Inserted data into {db_path} in table {table_name}")
 
     def generate_recommendation(self, transcript: Transcript) -> str:
         """

diff --git a/audioanalyser/modules/resources/records/text_to_speech..wav b/audioanalyser/modules/resources/records/text_to_speech..wav
diff --git a/audioanalyser/modules/speech_text_server.py b/audioanalyser/modules/speech_text_server.py
@@ -134,6 +134,43 @@ def list_audio_files(self):
                 "error": "An error occurred while fetching the file list"
             }
 
+    @cherrypy.expose
+    def serve_audio(self, filename):
+        """
+        Endpoint to serve audio files.
+        """
+        try:
+            files_dir = "./resources/input"
+            full_path = os.path.abspath(os.path.join(files_dir, filename))
+            return cherrypy.lib.static.serve_file(full_path, content_type="audio/wav")
+        except Exception as e:
+            cherrypy.log(f"Error serving audio file: {str(e)}")
+            cherrypy.response.status = 500
+            return {
+                "error": "An error occurred while serving the audio file"
+            }
+
+    @cherrypy.expose
+    def download_audio(self, filename):
+        """
+        Endpoint to trigger audio file download.
+        """
+        try:
+            files_dir = "./resources/input"
+            full_path = os.path.abspath(os.path.join(files_dir, filename))
+
+            # Set the 'Content-Disposition' header to suggest a filename
+            cherrypy.response.headers['Content-Disposition'] = f'attachment; filename="{filename}"'
+
+            # Serve the file for download
+            return cherrypy.lib.static.serve_file(full_path, content_type="application/octet-stream")
+        except Exception as e:
+            cherrypy.log(f"Error serving audio file for download: {str(e)}")
+            cherrypy.response.status = 500
+            return {
+                "error": "An error occurred while serving the audio file for download"
+            }
+
     @cherrypy.expose
     @cherrypy.tools.json_out()
     def process_text_analysis(self):
@@ -349,7 +386,7 @@ def process_all_translations(self):
             )
             thread.start()
 
-            message = "Translation process started for country code: "
+            message = "Translation process started: "
             result_message = message + str(countryCode)
             return {"result": result_message}
 

diff --git a/audioanalyser/modules/text_to_speech.py b/audioanalyser/modules/text_to_speech.py
@@ -0,0 +1,130 @@
+import azure.cognitiveservices.speech as speechsdk
+import logging
+import os
+from dotenv import load_dotenv
+from pathlib import Path
+
+load_dotenv()
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s app - %(message)s",
+)
+logger = logging.getLogger("AzureTextToSpeech")
+
+
+class Config:
+    """
+    Configuration parameters for the Azure Text-to-Speech API.
+
+    Attributes:
+        - api_key (str): The API key for the Azure Text-to-Speech API.
+        - region (str): The Azure region where the Text-to-Speech API is located.
+        - OUTPUT_FOLDER (str): The folder path where the synthesized audio files will be saved.
+        - audio_extension (str): The audio file extension for the output files.
+    """
+
+    def __init__(self):
+        self.api_key = os.getenv("AZURE_AUDIO_TEXT_KEY")
+        self.region = os.getenv("REGION")
+        self.OUTPUT_FOLDER = os.getenv("RECORDS_FOLDER")
+        self.audio_extension = os.getenv("AUDIO_EXTENSION", "wav")
+        self.validate()
+
+    def validate(self):
+        """
+        Validate the configuration parameters.
+
+        Raises:
+            EnvironmentError: If any required environment variables are missing.
+        """
+        required_vars = [
+            self.api_key,
+            self.region,
+            self.OUTPUT_FOLDER,
+            self.audio_extension
+        ]
+        if any(var is None for var in required_vars):
+            missing = [
+                var for var, value in locals().items() if value is None
+            ]
+            logger.error(
+                f"Missing environment variables: {', '.join(missing)}"
+            )
+            raise EnvironmentError(
+                "Missing required environment variables."
+            )
+
+
+class TextToSpeech:
+    """
+    Synthesizes speech from text using the Azure Text-to-Speech API.
+
+    Args:
+        config (Config): The configuration parameters for the Azure Text-to-Speech API.
+
+    Attributes:
+        config (Config): The configuration parameters for the Azure Text-to-Speech API.
+    """
+
+    def __init__(self, config):
+        self.config = config
+
+    def synthesize_text(self, text: str, filename: str):
+        """
+        Synthesize speech from text and save it to an audio file.
+
+        Args:
+            text (str): The text to synthesize.
+            filename (str): The base name for the output audio file,
+            without extension.
+        """
+        speech_config = speechsdk.SpeechConfig(subscription=self.config.api_key, region=self.config.region)
+
+        # Set the language
+        speech_config.speech_synthesis_language = "en-GB"  # Language setting
+
+        # Optionally, specify a voice name
+        # Configure speech synthesis
+        speech_config.speech_synthesis_voice_name = "en-GB-RyanNeural"
+
+        synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
+        result = synthesizer.speak_text_async(text).get()
+
+        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
+
+            PROJECT_ROOT = Path(__file__).resolve().parents[2]
+            OUTPUT_FOLDER = PROJECT_ROOT / os.getenv("RECORDS_FOLDER")
+            self.config.OUTPUT_FOLDER = os.path.join(
+                PROJECT_ROOT, OUTPUT_FOLDER
+            )
+
+            # Ensure the output directory exists
+            if not os.path.exists(self.config.OUTPUT_FOLDER):
+                os.makedirs(self.config.OUTPUT_FOLDER)
+
+            # Construct the output file path correctly
+            output_path = Path(
+                self.config.OUTPUT_FOLDER
+            ) / f"{filename}.{self.config.audio_extension}"
+
+            with open(output_path, "wb") as audio_file:
+                audio_file.write(result.audio_data)
+            logger.info(f"Audio file saved to {output_path}")
+        else:
+            logger.error("Failed to synthesize speech from text.")
+
+
+def text_to_speech():
+    try:
+        config = Config()
+        tts = TextToSpeech(config)
+        # Example usage, replace "Hello, World!" and "output_filename" with
+        # your desired input and file name.
+        tts.synthesize_text("Thank you for your time today!", "text_to_speech")
+    except Exception as e:
+        logger.error(f"Failed to synthesize text to speech: {e}")
+
+
+if __name__ == "__main__":
+    text_to_speech()