diff --git a/.gitignore b/.gitignore index 4870f22..caf78c0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,10 @@ .idea/ +output/ +src/__pycache__/ +build/ +dist/ +application.spec .env +*.mkv +*.mp4 *.mp3 \ No newline at end of file diff --git a/README.md b/README.md index 5f39a81..94d8251 100644 --- a/README.md +++ b/README.md @@ -16,19 +16,12 @@ You'll need to create a `.env` file with the `API_KEY` variable with your API ke The script on the repository is configured in French; it's easy to modify the script to adapt it to another language (just translate the few instructions given to GPT). > [!IMPORTANT] -> To use the script, you need to be connected to the Internet so that it can call the OpenAI API. +> To use the application, you need to be connected to the Internet so that it can call the OpenAI API. -You will also need to install the necessary : -```bash -sudo apt-get update - -sudo apt install python3 -sudo apt install python3-pip +## Installation -pip3 install openai -pip3 install python-docx -pip3 install pydub -``` +> [!IMPORTANT] +> For the application to work properly, the `.env` file must be in the same place as the executable. > [!NOTE] > For information, the code has been developed and works with the following library versions: @@ -37,32 +30,80 @@ pip3 install pydub > | openai | 0.28.0 | > | python-docx | 1.1.0 | > | pydub | 0.25.1 | +> | ffmpeg-python | 0.2.0 | +> | customtkinter | 4.0.2 | -## Usage +### Linux +You will also need to install the necessary : ```bash -python3 src/meetingMinutes.py file_name.mp3 -``` +sudo apt-get update && sudo apt-get upgrade -y -> [!WARNING] -> The file must be a `.mp3`. +sudo apt install ffmpeg +sudo apt install python3 +sudo apt install python3-pip + +pip3 install customtkinter +pip3 install python-dotenv +pip3 install openai==0.28 +pip3 install pydub +pip3 install python-docx +pip3 install ffmpeg-python +``` -When you run the script, it will ask you whether you want to run the complete script or just the transcript. -The complete script includes transcript, summary, key points and action points. +### Windows + +1. Download and install [Python](https://www.python.org/) +2. Check that Python is installed using the following command: `python --version` or `python3 --version`. +3. Download and install [FFmpeg](https://ffmpeg.org/) + - Extract the downloaded file to a location of your choice on your computer (somewhere easy to access). +4. To add FFmpeg to the Windows PATH, follow these steps: + - Open System Properties: Right-click on the Start button and choose "*System*". Then click on "*Advanced system settings*" on the left-hand side of the window (on Windows 10) or below the "*Device specifications*" section (on Windows 11). + - Environment variables: In the "*Advanced*" tab, click the "*Environment Variables...*" button at the bottom. + - Edit PATH: Under "*System variables*", find the `Path` variable and click on it, then click on "*Edit...*". If you are running Windows 10 or Windows 11, this will open a window with a list of paths. + - Add the FFmpeg path: Click "*New*" and type the path to the bin folder of your FFmpeg installation. If you extracted FFmpeg to `C:\FFmpeg`, the path to add will probably be `C:\FFmpeg\bin`. + - Save and close: Click "*OK*" to close the `Path` editing window, then "*OK*" again to close the "*Environment Variables*" window and "*OK*" once more to close the "*System Properties*". +5. Check that FFmpeg is installed using the following command: `ffmpeg -version`. + - This command should display the version of FFmpeg installed and the configuration. If you get an error message saying that FFmpeg is not recognised as an internal or external command, this means that FFmpeg has not been correctly added to the PATH or that the terminal needs to be restarted. -You can change the model used in the code by modifying the `model_gpt` variable. You can find a list of the different GPT models supported on the [OpenAI site](https://platform.openai.com/docs/guides/function-calling), along with the methods of use for API calls. +You will also need to install the necessary : +```shell +pip install customtkinter +pip install python-dotenv +pip install openai==0.28 +pip install pydub +pip install python-docx +pip install ffmpeg-python +``` +## Build -If you get the error that the `mp3` file is too heavy, feel free to cut it to make it lighter with the following commands: +You can build the application using `PyInstaller`. ```bash -# Tool installation -sudo apt install ffmpeg +pip install pyinstaller -# Using ffmpeg -ffmpeg -i test.mp3 -ss 00:00:30 -to 00:10:00 -c copy output.mp3 # Here from 30 seconds to 10 minutes +pyinstaller --onefile --noconsole src/application.py ``` +## Usage + +Double-click on the executable or use `./application[.exe]` from the command line. + +application + +The only mandatory cells are "*File browser*" and "*Transcription*", the others such as "*File names*" and "*Start/End Times*" are optional and will have a default value. + +The "*File browser*" section is mandatory and takes as input the file to which the application will be applied. +> [!WARNING] +> The file must be a `.mp3` or `.mkv`. + +The "*File names*" section is optional and can take as input the name of the output audio and text files. The default values are "*audio_[date]*" and "*meeting_minutes_[date]*". + +The "*Start/End Times*" section is optional and can take as input the start and end times of the output audio file. By default, their respective values are "*00:00:00*" and "*[file_end_value]*". The input must be in the format `[00-59]:[00-59]:[00-59]`. + +The "*Transcription*" section is mandatory and offers a choice between "*Transcription only*", which will output a text file with only the transcription of the input audio file, or "*Full execution*", which will output a text file with the transcription of the input audio file as well as a summary, a list of key points and a list of action items. + ## Performance Here are the performances I've seen in use: @@ -79,6 +120,8 @@ Here are the performances I've seen in use: | gpt-4-1106-preview | 18min35 | 3min28 | 0.22$ | | gpt-4 | 27min17 | 4min15 | 0.79$ | +You can change the model used in the code by modifying the `model_gpt` variable. You can find a list of the different GPT models supported on the [OpenAI website](https://platform.openai.com/docs/guides/function-calling), along with the methods of use for API calls. + You can find costs for the various models (including Whisper and GPT-4) on the [OpenAI website](https://openai.com/pricing). You can also find all your consumption for the current month, as well as your payment history, on the [Usage page](https://platform.openai.com/usage). diff --git a/img/application.png b/img/application.png new file mode 100644 index 0000000..59c09bb Binary files /dev/null and b/img/application.png differ diff --git a/src/application.py b/src/application.py new file mode 100644 index 0000000..6ae5c4b --- /dev/null +++ b/src/application.py @@ -0,0 +1,255 @@ +import re +import shutil +import datetime + +import customtkinter +from customtkinter import filedialog + +from meetingMinutes import meeting_minutes_main +from convertMKVtoMP3 import convert_mkv_to_mp3, cutting_mp3 + + +class MyFileDialogFrame(customtkinter.CTkFrame): + """ + Make a frame to select a file. + """ + + def __init__(self, master, title, placeholder, button_text): + """ + Initialise and configure the frame to select a file. + :param master: + :param title: Frame title + :param placeholder: Text in the placeholder + :param button_text: Text in the button + """ + super().__init__(master) + + # Display configuration + self.grid_columnconfigure(0, weight=5) + + self.title = title + self.placeholder = placeholder + self.button_text = button_text + + self.title = customtkinter.CTkLabel(self, text=self.title, fg_color="gray30", corner_radius=6) + self.title.grid(row=0, column=0, padx=10, pady=10, sticky="ew", columnspan=2) + + self.entry_placeholder = customtkinter.CTkEntry(self, placeholder_text=self.placeholder) + self.entry_placeholder.grid(row=1, column=0, padx=(10, 0), pady=(0, 10), sticky="ew") + self.entry_placeholder.configure(state="disabled") + + # Button configuration + button = customtkinter.CTkButton(self, text=self.button_text, command=self.select_file) + button.grid(row=1, column=1, padx=10, pady=(0, 10), sticky="ew") + + def select_file(self): + """ + Selecting a file. + """ + filename = filedialog.askopenfilename( + title='Open a file', + initialdir='.', + filetypes=( + ('mkv & mp3 files', '*.mkv *.mp3'), + ('All files', '*.*'))) + + if filename: + self.entry_placeholder.configure(state="normal") + self.entry_placeholder.delete(0, "end") + self.entry_placeholder.insert(0, filename) + self.entry_placeholder.configure(state="disabled") + + def get(self): + """ + Get the path to the file. + :return: Path to file + """ + return self.entry_placeholder.get() + + +class MyEntryFrame(customtkinter.CTkFrame): + """ + Make a frame to enter data. + """ + + def __init__(self, master, title, data_titles, placeholders): + """ + Initialise and configure the frame to enter data. + :param master: + :param title: Frame title + :param data_titles: Text in front of entry cells + :param placeholders: Text in the placeholder + """ + super().__init__(master) + + # Display configuration + self.grid_columnconfigure(0, weight=0) + self.grid_columnconfigure(1, weight=1) + + self.title = title + self.data_titles = data_titles + self.placeholders = placeholders + self.datas = [] + + self.title = customtkinter.CTkLabel(self, text=self.title, fg_color="gray30", corner_radius=6) + self.title.grid(row=0, column=0, padx=10, pady=10, sticky="ew", columnspan=2) + + for i, (data_title, placeholder) in enumerate(zip(self.data_titles, self.placeholders)): + entry_data_title = customtkinter.CTkLabel(self, text=data_title) + entry_data_title.grid(row=i + 1, column=0, padx=10, pady=(0, 10), sticky="w") + entry_placeholder = customtkinter.CTkEntry(self, placeholder_text=placeholder) + entry_placeholder.grid(row=i + 1, column=1, padx=10, pady=(0, 10), sticky="ew") + self.datas.append(entry_placeholder) + + def get(self): + """ + Get cell values. + :return: Cell values + """ + data_list = [] + for data in self.datas: + data_list.append(data.get()) + return data_list + + +class MyRadioButtonFrame(customtkinter.CTkFrame): + """ + Make a frame to select a radio button value. + """ + + def __init__(self, master, title, options): + """ + Initialise and configure the frame to select a radio button value. + :param master: + :param title: Frame title + :param options: Radio button options + """ + super().__init__(master) + + # Display configuration + self.grid_columnconfigure(0, weight=1) + + self.title = title + self.options = options + self.variable = customtkinter.StringVar(value="") + + self.title = customtkinter.CTkLabel(self, text=self.title, fg_color="gray30", corner_radius=6) + self.title.grid(row=0, column=0, padx=10, pady=(10, 0), sticky="ew") + + for i, value in enumerate(self.options): + radiobutton = customtkinter.CTkRadioButton(self, text=value, value=value, variable=self.variable) + radiobutton.grid(row=i + 1, column=0, padx=10, pady=(10, 0), sticky="w") + + def get(self): + """ + Get the value of the selected radio button. + :return: Radio button value + """ + return self.variable.get() + + def set(self, value): + """ + Set the value of the selected radio button. + """ + self.variable.set(value) + + +class App(customtkinter.CTk): + """ + Main application code. + """ + + def __init__(self): + """ + Initialise and configure the window. + """ + super().__init__() + + # Window configuration + self.title("Meeting Minutes") + self.geometry("600x420") + self.resizable(False, False) + + self.grid_columnconfigure((0, 1), weight=1) + self.grid_rowconfigure((0, 1, 2), weight=1) + + # Building the grid + self.file_selection_frame = MyFileDialogFrame(self, "File browser", placeholder="File path", + button_text="Choose file") + self.file_selection_frame.grid(row=0, column=0, padx=10, pady=(10, 5), sticky="nsew", columnspan=2) + + self.file_name_selection_frame = MyEntryFrame(self, "File names", data_titles=["Audio file:", "Meeting file:"], + placeholders=["audio_[date].mp3", "meeting_minutes_[date].docx"]) + self.file_name_selection_frame.grid(row=1, column=0, padx=10, pady=5, sticky="nsew", columnspan=2) + + self.timecode_selection_frame = MyEntryFrame(self, "Start/End Times", data_titles=["Start:", "End:"], + placeholders=["00:00:00", "59:59:59"]) + self.timecode_selection_frame.grid(row=2, column=0, padx=(10, 5), pady=5, sticky="nsew") + self.radio_button_selection_frame = MyRadioButtonFrame(self, "Transcription", + options=["Transcription only", "Full execution"]) + self.radio_button_selection_frame.grid(row=2, column=1, padx=(5, 10), pady=5, sticky="nsew") + + self.button = customtkinter.CTkButton(self, text="Run the program", command=self.code_execution) + self.button.grid(row=3, column=0, padx=10, pady=10, sticky="ew", columnspan=2) + + # Setup + self.radio_button_selection_frame.set("Transcription only") + + def code_execution(self): + """ + Code execution and processing. + """ + # Getting data + start_time = self.timecode_selection_frame.get()[0] + end_time = self.timecode_selection_frame.get()[1] + path = self.file_selection_frame.get() + radio = self.radio_button_selection_frame.get() + name_mp3 = self.file_name_selection_frame.get()[0] + name_docx = self.file_name_selection_frame.get()[1] + new_path = False + + # Checks whether the format of the start and end times is correct + motif = r'^([0-5]?[0-9]):([0-5]?[0-9]):([0-5]?[0-9])$' + start_time_good = True if re.match(motif, start_time) else False + end_time_good = True if re.match(motif, end_time) else False + + # Convert file to mp3 + if path.endswith('.mkv'): + path = convert_mkv_to_mp3(path, name_mp3) if name_mp3 else convert_mkv_to_mp3(path) + new_path = True + + # Cutting up the file + if path.endswith('.mp3') and (start_time_good or end_time_good): + cut_params = {} + if name_mp3: + cut_params['name_mp3_file'] = name_mp3 + if start_time_good: + cut_params['start_time'] = start_time + if end_time_good: + cut_params['end_time'] = end_time + + path = cutting_mp3(path, **cut_params) + new_path = True + + # Running the MeetingMinutes + if path.endswith('.mp3'): + name_docx = name_docx if name_docx != "" else None + action_type = "Full" if radio == "Full execution" else "Transcription" + + meeting_minutes_main(path, action_type, name_docx) + + if not new_path: + output_dir = "output" + if name_mp3 == "": + now = datetime.datetime.now() + formatted_date = now.strftime("%Y-%m-%d_%H-%M-%S") + shutil.copy(path, f"{output_dir}/audio_{formatted_date}.mp3") + else: + shutil.copy(path, f"{output_dir}/{name_mp3}.mp3") + elif path != "": + print("Erreur sur le type de fichier") + + +if __name__ == '__main__': + app = App() + app.mainloop() diff --git a/src/convertMKVtoMP3.py b/src/convertMKVtoMP3.py new file mode 100644 index 0000000..553b354 --- /dev/null +++ b/src/convertMKVtoMP3.py @@ -0,0 +1,88 @@ +import os +import sys +import datetime + +import ffmpeg + + +def convert_mkv_to_mp3(mkv_file_path, name_mp3_file=None): + """ + Convert an `.mkv` file into an `.mp3` file. + :param mkv_file_path: Path to the input file (`.mkv`) + :param name_mp3_file: Name of output audio file (optional) + :return: Path to the new file + """ + # Check that the output directory is present + output_dir = "output" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Creating name of the output file + if name_mp3_file is None: + now = datetime.datetime.now() + formatted_date = now.strftime("%Y-%m-%d_%H-%M-%S") + name_mp3_file = f"{output_dir}/audio_{formatted_date}.mp3" + else: + name_mp3_file = f"{output_dir}/{name_mp3_file}.mp3" + + # Attempt to convert the file + try: + ffmpeg.input(mkv_file_path).output(name_mp3_file, audio_bitrate='192k').run(overwrite_output=True) + print(f"The file '{mkv_file_path}' has been successfully converted to '{name_mp3_file}'.") + + return name_mp3_file + except ffmpeg.Error as e: + print(f"Conversion error: {e}") + sys.exit(1) + + +def cutting_mp3(mp3_file_path, name_mp3_file=None, start_time=None, end_time=None): + """ + Cutting the audio file to keep just a part of it. + :param mp3_file_path: Path to the input file (`.mp3`) + :param name_mp3_file: Name of output audio file (optional) + :param start_time: Start of cutting time (optional) + :param end_time: End of cutting time (optional) + :return: Path to the new file + """ + # Check that the output directory is present + output_dir = "output" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # Modification of the file converted as a temporary file + temp_input_mp3_file = mp3_file_path + if os.path.exists(os.path.join(output_dir, os.path.basename(mp3_file_path))): + os.rename(mp3_file_path, f"{output_dir}/temp.mp3") + temp_input_mp3_file = f"{output_dir}/temp.mp3" + + # Creating name of the output file + if name_mp3_file is None: + now = datetime.datetime.now() + formatted_date = now.strftime("%Y-%m-%d_%H-%M-%S") + name_mp3_file = f"{output_dir}/audio_{formatted_date}.mp3" + else: + name_mp3_file = f"{output_dir}/{name_mp3_file}.mp3" + + # Attempt to cut the file + try: + # Configuring options for ffmpeg + ffmpeg_options = {} + + if start_time is not None: + ffmpeg_options['ss'] = start_time + + if end_time is not None: + ffmpeg_options['to'] = end_time + + # Running ffmpeg with the options configured + ffmpeg.input(temp_input_mp3_file, **ffmpeg_options).output(name_mp3_file, c='copy').run(overwrite_output=True) + print(f"The file '{mp3_file_path}' has been successfully cut to '{name_mp3_file}'.") + + # Cleaning + os.remove(f"{output_dir}/temp.mp3") + + return name_mp3_file + except ffmpeg.Error as e: + print(f"Cutting error: {e}") + sys.exit(1) diff --git a/src/meetingMinutes.py b/src/meetingMinutes.py index 5684b86..0dae5eb 100644 --- a/src/meetingMinutes.py +++ b/src/meetingMinutes.py @@ -1,11 +1,10 @@ import os import sys -import time import datetime -import tempfile +import uuid import math -import dotenv +import dotenv import openai from pydub import AudioSegment from docx import Document @@ -14,6 +13,7 @@ model_whisper = "whisper-1" model_gpt = "gpt-4-1106-preview" + def split_audio(file_path): """ Cutting the audio file so that it can be processed within the limits of the API. @@ -26,6 +26,7 @@ def split_audio(file_path): chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), int(chunk_length_ms))] return chunks + def transcribe_audio(audio_chunks): """ Convert audio files that have been cut into chunks into text. @@ -34,16 +35,27 @@ def transcribe_audio(audio_chunks): """ transcriptions = [] for chunk in audio_chunks: - with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as temp_audio_file: - chunk.export(temp_audio_file.name, format="mp3", bitrate="192k") # You can adjust the bitrate as needed - file_size = os.path.getsize(temp_audio_file.name) - if file_size > 25 * 1024 * 1024: - raise ValueError("Audio chunk is too large: {} bytes".format(file_size)) - with open(temp_audio_file.name, 'rb') as f: - transcription = openai.Audio.transcribe(model_whisper, f) - transcriptions.append(transcription['text']) + temp_filename = "temp_audio_{}.mp3".format(uuid.uuid4()) + temp_audio_path = os.path.join("output", temp_filename) + + chunk.export(temp_audio_path, format="mp3", bitrate="192k") # You can adjust the bitrate as needed + file_size = os.path.getsize(temp_audio_path) + + if file_size > 25 * 1024 * 1024: + os.remove(temp_audio_path) + raise ValueError("Audio chunk is too large: {} bytes".format(file_size)) + + with open(temp_audio_path, 'rb') as f: + transcription = openai.Audio.transcribe(model_whisper, f) + + transcriptions.append(transcription['text']) + + # Cleaning + os.remove(temp_audio_path) + return " ".join(transcriptions) + def abstract_summary_extraction(transcription): """ From the audio file transcript, create a summary. @@ -66,6 +78,7 @@ def abstract_summary_extraction(transcription): ) return response['choices'][0]['message']['content'] + def key_points_extraction(transcription): """ From the audio file transcript, create a list of key points. @@ -88,6 +101,7 @@ def key_points_extraction(transcription): ) return response['choices'][0]['message']['content'] + def action_item_extraction(transcription): """ From the audio file transcript, create a list of action item. @@ -110,6 +124,7 @@ def action_item_extraction(transcription): ) return response['choices'][0]['message']['content'] + def meeting_minutes(transcription): """ Execution of all extractions. @@ -126,6 +141,7 @@ def meeting_minutes(transcription): 'action_items': action_items } + def save_as_docx(minutes, filename, output_dir): """ Save extraction as docx file. @@ -148,32 +164,21 @@ def save_as_docx(minutes, filename, output_dir): doc.save(filename) -if __name__ == '__main__': +def meeting_minutes_main(audio_file_path, choice, name_docx=None): + """ + Main code for switching from an audio file to a transcription in a text file. + :param audio_file_path: Path to the audio file (`.mp3`) + :param choice: Choice between transcribing only or performing all actions ('Full' or 'Transcription') + :param name_docx: Name of output text file (optional) + """ # Configuration dotenv.load_dotenv() api_key = os.getenv('API_KEY') if api_key is None: print("API_KEY variable is not set: set it.") sys.exit(1) - - if len(sys.argv) != 2: - print("Usage: python3 src/meetingMinutes.py ") - sys.exit(1) - audio_file_path = sys.argv[1] openai.api_key = api_key - # Ask the user if they want to run the full script or just the transcription - while True: - choice = input( - "Voulez-vous exécuter le script complet (entrez 'F') ou seulement la transcription (entrez 'T') ? ").lower().strip() - if choice in ['f', 't']: - break - else: - print("Option non valide, veuillez saisir 'F' ou 'T'.") - - # Start timer - start = time.time() - # Split audio into chunks audio_chunks = split_audio(audio_file_path) @@ -181,11 +186,11 @@ def save_as_docx(minutes, filename, output_dir): transcription = transcribe_audio(audio_chunks) # Check user's choice - if choice == 'f': + if choice == 'Full': # If user chose 'full', perform all actions minutes = meeting_minutes(transcription) print("Minutes prepared.") - elif choice == 't': + elif choice == 'Transcription': # If user chose 'transcribe', only save the transcription minutes = { 'complete_transcription': transcription @@ -195,11 +200,12 @@ def save_as_docx(minutes, filename, output_dir): print("Invalid option. Exiting.") sys.exit(1) + # Creating the output file output_dir = "output" - now = datetime.datetime.now() - formatted_date = now.strftime("%Y-%m-%d_%H-%M-%S") - filename = f"{output_dir}/meeting_minutes_{formatted_date}.docx" + if name_docx is None: + now = datetime.datetime.now() + formatted_date = now.strftime("%Y-%m-%d_%H-%M-%S") + filename = f"{output_dir}/meeting_minutes_{formatted_date}.docx" + else: + filename = f"{output_dir}/{name_docx}.docx" save_as_docx(minutes, filename, output_dir) - - # End timer - print(f'meetingMinutes.py runtime: {time.strftime("%H:%M:%S", time.gmtime(time.time() - start))}')