More generic script for making prompts in various context

Jef808 · Dec 20, 2023 · 9c042bc · 9c042bc
1 parent 591ddc4
commit 9c042bc
Show file tree

Hide file tree

Showing 4 changed files with 143 additions and 26 deletions.
diff --git a/make-prompt/dumpf_s.py b/make-prompt/dumpf_s.py
@@ -0,0 +1,2 @@
+#!/usr/bin/env python3
+
diff --git a/make-prompt/main.py b/make-prompt/main.py
@@ -1,18 +1,44 @@
+import subprocess
 import json
 import sys
 from openai import OpenAI
 
-prompt = input()
+# model, temperature, system_prompt
+import argparse
+
+DEFAULT_MODEL = "gpt-4-1106-preview" # "gpt-4"
+DEFAULT_TEMPERATURE = 0.2
+DEFAULT_SYSTEM_PROMPT = ("From assemblyAI transcripts expressing a command,"
+                        "generate an Emacs s-expression, without explanations, executable in a Doom Emacs environment with lsp, projectile and magit which executes the command."
+                        "Utilize fuzzy search for filepaths and names instead of hardcoded placeholders.")
+                # "Generate an Emacs s-expression from assemblyAI transcripts command. Output only the executable expression, in Doom Emacs with lsp, projectile, and magit using fuzzy search"
+
+parser = argparse.ArgumentParser(description='Process some arguments.')
+parser.add_argument('--model', type=str, help='Model to use.', default="gpt-4-1106-preview")
+parser.add_argument('--temperature', type=float, help='Temperature value.', default=0.2)
+parser.add_argument('--system-prompt', type=str, help='System prompt to use.', default=DEFAULT_SYSTEM_PROMPT)
+
+args = parser.parse_args()
 
-openai_client = OpenAI()
+def get_api_key():
+    p_api_key = subprocess.run(["pass", "openai/api_key"], capture_output=True)
+    if not p_api_key.stdout:
+        print("ERROR: Failed to retrieve assemblyai.com/api_key pass entry", file=sys.stderr)
+        sys.exit(3)
+    return str(p_api_key.stdout, encoding="utf-8").strip()
+
+openai_client = OpenAI(api_key=get_api_key())
+
+prompt = input()
 
 # openai api call
 payload = {
-    "model": "gpt-4-1106-preview",
+    "model": args.model,
     "messages": [
-        {"role": "system": "content": "Process AssemblyAI transcripts to extract Emacs commands. Respond with an Emacs Lisp s-expression that executes these commands in a Doom Emacs setup with lsp and magit. Ensure compatibility with Projectile, using fuzzy search to handle filepaths and names without hard-coding."},
+        {"role": "system", "content": args.system_prompt},
         {"role": "user", "content": prompt}
-    ]
+    ],
+    "temperature": args.temperature,
 }
 
 print("Sending transcript to openai...", file=sys.stderr)

diff --git a/make-prompt/shell-script.py b/make-prompt/shell-script.py
@@ -0,0 +1,82 @@
+import subprocess
+import json
+import sys
+from openai import OpenAI
+
+# model, temperature, system_prompt
+import argparse
+
+DEFAULT_MODEL = "gpt-4-1106-preview" # "gpt-4"
+DEFAULT_TEMPERATURE = 0.2
+DEFAULT_SYSTEM_PROMPT = ("generate an Emacs s-expression, without explanations, executable in a Doom Emacs environment with lsp, projectile and magit which executes the command."
+                         "Utilize fuzzy search for filepaths and names instead of hardcoded placeholders.")
+                # "Generate an Emacs s-expression from assemblyAI transcripts command. Output only the executable expression, in Doom Emacs with lsp, projectile, and magit using fuzzy search"
+
+parser = argparse.ArgumentParser(description='Process some arguments.')
+parser.add_argument('--from-speech', type=bool, help='Indicate if input is a voice transcript.')
+parser.add_argument('--script-language', type=str, help='Language that should be used for generating the script. One of SHELL, ELISP or PYTHON', required=True)
+parser.add_argument('--model', type=str, help='Model to use.', default="gpt-4-1106-preview")
+parser.add_argument('--temperature', type=float, help='Temperature value.', default=0.2)
+parser.add_argument('--system-prompt', type=str, help='System prompt to use.', default=DEFAULT_SYSTEM_PROMPT)
+
+def make_system_prompt(args):
+    system_prompt = "From assemblyAI transcripts expressing a command," if 'from_speech' in args else ""
+
+    match args['script_language']:
+        case 'ELISP':
+            system_prompt += "Emacs s-expression, without explanations, executable in a Doom Emacs environment with lsp, projectile and magit"
+        case 'SHELL':
+            system_prompt += "shell script, without explanations, executable in a typical Linux environment"
+        case 'PYTHON':
+            system_prompt += "python script, without explanations, executable by Python3 with numpy, requests and other standard libraries"
+        case _:
+            system_prompt += f"script in the {script_language} language, intended to be executed in a typical environment"
+            print(f"WARNING: language {script_language} has only generic version of the prompt", file=sys.stderr)
+
+    system_prompt += "to execute the command. Utilize fuzzy search for filepaths and names instead of hardcoded placeholders."
+    system_prompt += f" {args['custom_instructions']}" if 'custom_instructions' in args else ""
+
+    return system_prompt
+
+
+def make_payload(args):
+    return {
+        "model": args.model,
+        "message": [
+            {"role": "system", "content": }
+        ]
+    }
+
+args = parser.parse_args()
+
+def get_api_key():
+    p_api_key = subprocess.run(["pass", "openai/api_key"], capture_output=True)
+    if not p_api_key.stdout:
+        print("ERROR: Failed to retrieve assemblyai.com/api_key pass entry", file=sys.stderr)
+        sys.exit(3)
+    return str(p_api_key.stdout, encoding="utf-8").strip()
+
+openai_client = OpenAI(api_key=get_api_key())
+
+prompt = input()
+
+# openai api call
+payload = {
+    "model": args.model,
+    "messages": [
+        {"role": "system", "content": args.system_prompt},
+        {"role": "user", "content": prompt}
+    ],
+    "temperature": args.temperature,
+}
+
+print("Sending transcript to openai...", file=sys.stderr)
+response = openai_client.chat.completions.create(**payload)
+
+py_response = response.model_dump()
+
+print(json.dumps(py_response, indent=2), file=sys.stderr)
+
+content = py_response['choices'][0]['message']['content']
+
+print(content)
diff --git a/speech-to-text/speech_reco.py b/speech-to-text/speech_reco.py
@@ -5,6 +5,7 @@
 It is printed to stdout, and any other output of the program is sent to stderr.
 """
 
+import subprocess
 import websocket
 import base64
 import pyaudio
@@ -16,20 +17,35 @@
 import wave
 import requests
 from contextlib import closing
+import argparse
+
+parser = argparse.ArgumentParser(description='Handle command line arguments')
+parser.add_argument('--input-device', type=int, help='Input device ID')
+parser.add_argument('--sample-rate', type=int, help='Input device sample rate')
+parser.add_argument('--frames-per-buffer', type=int, help='Frames per buffer')
+parser.add_argument('--format', type=str, help='Format of the audio')
+parser.add_argument('--channels', type=int, help='Number of audio channels')
+
+args = parser.parse_args()
+print(args)
+
 
 p = pyaudio.PyAudio()
 ##################
 # # Audio config #
 ##################
 #PIPEWIRE_DEVICE_INDEX = 7
 DEFAULT_DEVICE = p.get_default_input_device_info()
+
 DEFAULT_DEVICE_INDEX = DEFAULT_DEVICE['index']
-SAMPLE_RATE = 16000 # int(DEFAULT_DEVICE['defaultSampleRate'])
-FRAMES_PER_BUFFER = int(SAMPLE_RATE / 2) # 3200
+SAMPLE_RATE = 16000  # int(DEFAULT_DEVICE['defaultSampleRate'])
+FRAMES_PER_BUFFER = int(SAMPLE_RATE / 2)  # Sync AssemblyAI's throughput of twice a second
 LATENCY = FRAMES_PER_BUFFER / SAMPLE_RATE
 FORMAT = pyaudio.paInt16
 CHANNELS = 1
 
+
+
 ##############################
 # # Termination logic config #
 ##############################
@@ -57,13 +73,6 @@
 # Time when assemblyAI answers with a SessionTerminated message
 _AAI_SESSION_END_TIME = None
 
-# They both report time differently, so we compute the difference
-# and store it here in order to work with normalized timestamps.
-#_WEBSOCKET_TO_PYAUDIO_CLOCK_DIFF = None
-
-# We use the following two to compute the above
-#_PYAUDIO_TO_CLOCK_DIFF = None
-
 # Buffers to store audio data and transcription results
 WEB_SOCKET_IS_CONNECTING_BUFFER = []
 
@@ -266,17 +275,6 @@ def on_message(ws, msg):
 
     _LOGGER.write({"PARTIAL_TRANSCRIPT": text, "created": payload['created']})
 
-########################
-# Retrieve credentials #
-########################
-ASSEMBLYAI_API_KEY = os.getenv("ASSEMBLYAI_API_KEY")
-auth_header = {"Authorization": f"{ASSEMBLYAI_API_KEY}"}
-
-if not ASSEMBLYAI_API_KEY:
-    print("ERROR: Failed to retrieve ASSEMBLYAI_API_KEY env variable", file=sys.stderr)
-    p.terminate()
-    sys.exit(1)
-
 #################################
 # Create and start audio stream #
 #################################
@@ -301,10 +299,19 @@ def on_error(ws, *err):
 ########################
 # Set up the websocket #
 ########################
+def get_api_key():
+    p_api_key = subprocess.run(["pass", "assemblyai.com/api_key"], capture_output=True)
+    if not p_api_key.stdout:
+        print("ERROR: Failed to retrieve assemblyai.com/api_key pass entry", file=sys.stderr)
+        if not stream.is_stopped(): stream.close()
+        p.terminate()
+        sys.exit(3)
+    return str(p_api_key.stdout, encoding="utf-8").strip()
+
 try:
     ws = websocket.WebSocketApp(
         f"wss://api.assemblyai.com/v2/realtime/ws?sample_rate={SAMPLE_RATE}",
-        header=auth_header,
+        header={"Authorization": get_api_key()},
         on_message=on_message,
         on_error=on_error,
         on_close=on_close,