diff --git a/make-prompt/dumpf_s.py b/make-prompt/dumpf_s.py new file mode 100644 index 0000000..63f77b6 --- /dev/null +++ b/make-prompt/dumpf_s.py @@ -0,0 +1,2 @@ +#!/usr/bin/env python3 + diff --git a/make-prompt/main.py b/make-prompt/main.py index b342ed0..86156b4 100644 --- a/make-prompt/main.py +++ b/make-prompt/main.py @@ -1,18 +1,44 @@ +import subprocess import json import sys from openai import OpenAI -prompt = input() +# model, temperature, system_prompt +import argparse + +DEFAULT_MODEL = "gpt-4-1106-preview" # "gpt-4" +DEFAULT_TEMPERATURE = 0.2 +DEFAULT_SYSTEM_PROMPT = ("From assemblyAI transcripts expressing a command," + "generate an Emacs s-expression, without explanations, executable in a Doom Emacs environment with lsp, projectile and magit which executes the command." + "Utilize fuzzy search for filepaths and names instead of hardcoded placeholders.") + # "Generate an Emacs s-expression from assemblyAI transcripts command. Output only the executable expression, in Doom Emacs with lsp, projectile, and magit using fuzzy search" + +parser = argparse.ArgumentParser(description='Process some arguments.') +parser.add_argument('--model', type=str, help='Model to use.', default="gpt-4-1106-preview") +parser.add_argument('--temperature', type=float, help='Temperature value.', default=0.2) +parser.add_argument('--system-prompt', type=str, help='System prompt to use.', default=DEFAULT_SYSTEM_PROMPT) + +args = parser.parse_args() -openai_client = OpenAI() +def get_api_key(): + p_api_key = subprocess.run(["pass", "openai/api_key"], capture_output=True) + if not p_api_key.stdout: + print("ERROR: Failed to retrieve assemblyai.com/api_key pass entry", file=sys.stderr) + sys.exit(3) + return str(p_api_key.stdout, encoding="utf-8").strip() + +openai_client = OpenAI(api_key=get_api_key()) + +prompt = input() # openai api call payload = { - "model": "gpt-4-1106-preview", + "model": args.model, "messages": [ - {"role": "system": "content": "Process AssemblyAI transcripts to extract Emacs commands. Respond with an Emacs Lisp s-expression that executes these commands in a Doom Emacs setup with lsp and magit. Ensure compatibility with Projectile, using fuzzy search to handle filepaths and names without hard-coding."}, + {"role": "system", "content": args.system_prompt}, {"role": "user", "content": prompt} - ] + ], + "temperature": args.temperature, } print("Sending transcript to openai...", file=sys.stderr) diff --git a/make-prompt/shell-script.py b/make-prompt/shell-script.py new file mode 100644 index 0000000..1bb4ef8 --- /dev/null +++ b/make-prompt/shell-script.py @@ -0,0 +1,82 @@ +import subprocess +import json +import sys +from openai import OpenAI + +# model, temperature, system_prompt +import argparse + +DEFAULT_MODEL = "gpt-4-1106-preview" # "gpt-4" +DEFAULT_TEMPERATURE = 0.2 +DEFAULT_SYSTEM_PROMPT = ("generate an Emacs s-expression, without explanations, executable in a Doom Emacs environment with lsp, projectile and magit which executes the command." + "Utilize fuzzy search for filepaths and names instead of hardcoded placeholders.") + # "Generate an Emacs s-expression from assemblyAI transcripts command. Output only the executable expression, in Doom Emacs with lsp, projectile, and magit using fuzzy search" + +parser = argparse.ArgumentParser(description='Process some arguments.') +parser.add_argument('--from-speech', type=bool, help='Indicate if input is a voice transcript.') +parser.add_argument('--script-language', type=str, help='Language that should be used for generating the script. One of SHELL, ELISP or PYTHON', required=True) +parser.add_argument('--model', type=str, help='Model to use.', default="gpt-4-1106-preview") +parser.add_argument('--temperature', type=float, help='Temperature value.', default=0.2) +parser.add_argument('--system-prompt', type=str, help='System prompt to use.', default=DEFAULT_SYSTEM_PROMPT) + +def make_system_prompt(args): + system_prompt = "From assemblyAI transcripts expressing a command," if 'from_speech' in args else "" + + match args['script_language']: + case 'ELISP': + system_prompt += "Emacs s-expression, without explanations, executable in a Doom Emacs environment with lsp, projectile and magit" + case 'SHELL': + system_prompt += "shell script, without explanations, executable in a typical Linux environment" + case 'PYTHON': + system_prompt += "python script, without explanations, executable by Python3 with numpy, requests and other standard libraries" + case _: + system_prompt += f"script in the {script_language} language, intended to be executed in a typical environment" + print(f"WARNING: language {script_language} has only generic version of the prompt", file=sys.stderr) + + system_prompt += "to execute the command. Utilize fuzzy search for filepaths and names instead of hardcoded placeholders." + system_prompt += f" {args['custom_instructions']}" if 'custom_instructions' in args else "" + + return system_prompt + + +def make_payload(args): + return { + "model": args.model, + "message": [ + {"role": "system", "content": } + ] + } + +args = parser.parse_args() + +def get_api_key(): + p_api_key = subprocess.run(["pass", "openai/api_key"], capture_output=True) + if not p_api_key.stdout: + print("ERROR: Failed to retrieve assemblyai.com/api_key pass entry", file=sys.stderr) + sys.exit(3) + return str(p_api_key.stdout, encoding="utf-8").strip() + +openai_client = OpenAI(api_key=get_api_key()) + +prompt = input() + +# openai api call +payload = { + "model": args.model, + "messages": [ + {"role": "system", "content": args.system_prompt}, + {"role": "user", "content": prompt} + ], + "temperature": args.temperature, +} + +print("Sending transcript to openai...", file=sys.stderr) +response = openai_client.chat.completions.create(**payload) + +py_response = response.model_dump() + +print(json.dumps(py_response, indent=2), file=sys.stderr) + +content = py_response['choices'][0]['message']['content'] + +print(content) diff --git a/speech-to-text/speech_reco.py b/speech-to-text/speech_reco.py index 015bbd6..e40026c 100644 --- a/speech-to-text/speech_reco.py +++ b/speech-to-text/speech_reco.py @@ -5,6 +5,7 @@ It is printed to stdout, and any other output of the program is sent to stderr. """ +import subprocess import websocket import base64 import pyaudio @@ -16,6 +17,18 @@ import wave import requests from contextlib import closing +import argparse + +parser = argparse.ArgumentParser(description='Handle command line arguments') +parser.add_argument('--input-device', type=int, help='Input device ID') +parser.add_argument('--sample-rate', type=int, help='Input device sample rate') +parser.add_argument('--frames-per-buffer', type=int, help='Frames per buffer') +parser.add_argument('--format', type=str, help='Format of the audio') +parser.add_argument('--channels', type=int, help='Number of audio channels') + +args = parser.parse_args() +print(args) + p = pyaudio.PyAudio() ################## @@ -23,13 +36,16 @@ ################## #PIPEWIRE_DEVICE_INDEX = 7 DEFAULT_DEVICE = p.get_default_input_device_info() + DEFAULT_DEVICE_INDEX = DEFAULT_DEVICE['index'] -SAMPLE_RATE = 16000 # int(DEFAULT_DEVICE['defaultSampleRate']) -FRAMES_PER_BUFFER = int(SAMPLE_RATE / 2) # 3200 +SAMPLE_RATE = 16000 # int(DEFAULT_DEVICE['defaultSampleRate']) +FRAMES_PER_BUFFER = int(SAMPLE_RATE / 2) # Sync AssemblyAI's throughput of twice a second LATENCY = FRAMES_PER_BUFFER / SAMPLE_RATE FORMAT = pyaudio.paInt16 CHANNELS = 1 + + ############################## # # Termination logic config # ############################## @@ -57,13 +73,6 @@ # Time when assemblyAI answers with a SessionTerminated message _AAI_SESSION_END_TIME = None -# They both report time differently, so we compute the difference -# and store it here in order to work with normalized timestamps. -#_WEBSOCKET_TO_PYAUDIO_CLOCK_DIFF = None - -# We use the following two to compute the above -#_PYAUDIO_TO_CLOCK_DIFF = None - # Buffers to store audio data and transcription results WEB_SOCKET_IS_CONNECTING_BUFFER = [] @@ -266,17 +275,6 @@ def on_message(ws, msg): _LOGGER.write({"PARTIAL_TRANSCRIPT": text, "created": payload['created']}) -######################## -# Retrieve credentials # -######################## -ASSEMBLYAI_API_KEY = os.getenv("ASSEMBLYAI_API_KEY") -auth_header = {"Authorization": f"{ASSEMBLYAI_API_KEY}"} - -if not ASSEMBLYAI_API_KEY: - print("ERROR: Failed to retrieve ASSEMBLYAI_API_KEY env variable", file=sys.stderr) - p.terminate() - sys.exit(1) - ################################# # Create and start audio stream # ################################# @@ -301,10 +299,19 @@ def on_error(ws, *err): ######################## # Set up the websocket # ######################## +def get_api_key(): + p_api_key = subprocess.run(["pass", "assemblyai.com/api_key"], capture_output=True) + if not p_api_key.stdout: + print("ERROR: Failed to retrieve assemblyai.com/api_key pass entry", file=sys.stderr) + if not stream.is_stopped(): stream.close() + p.terminate() + sys.exit(3) + return str(p_api_key.stdout, encoding="utf-8").strip() + try: ws = websocket.WebSocketApp( f"wss://api.assemblyai.com/v2/realtime/ws?sample_rate={SAMPLE_RATE}", - header=auth_header, + header={"Authorization": get_api_key()}, on_message=on_message, on_error=on_error, on_close=on_close,