Skip to content

Commit

Permalink
More generic script for making prompts in various context
Browse files Browse the repository at this point in the history
  • Loading branch information
Jef808 committed Dec 20, 2023
1 parent 591ddc4 commit 9c042bc
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 26 deletions.
2 changes: 2 additions & 0 deletions make-prompt/dumpf_s.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env python3

36 changes: 31 additions & 5 deletions make-prompt/main.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,44 @@
import subprocess
import json
import sys
from openai import OpenAI

prompt = input()
# model, temperature, system_prompt
import argparse

DEFAULT_MODEL = "gpt-4-1106-preview" # "gpt-4"
DEFAULT_TEMPERATURE = 0.2
DEFAULT_SYSTEM_PROMPT = ("From assemblyAI transcripts expressing a command,"
"generate an Emacs s-expression, without explanations, executable in a Doom Emacs environment with lsp, projectile and magit which executes the command."
"Utilize fuzzy search for filepaths and names instead of hardcoded placeholders.")
# "Generate an Emacs s-expression from assemblyAI transcripts command. Output only the executable expression, in Doom Emacs with lsp, projectile, and magit using fuzzy search"

parser = argparse.ArgumentParser(description='Process some arguments.')
parser.add_argument('--model', type=str, help='Model to use.', default="gpt-4-1106-preview")
parser.add_argument('--temperature', type=float, help='Temperature value.', default=0.2)
parser.add_argument('--system-prompt', type=str, help='System prompt to use.', default=DEFAULT_SYSTEM_PROMPT)

args = parser.parse_args()

openai_client = OpenAI()
def get_api_key():
p_api_key = subprocess.run(["pass", "openai/api_key"], capture_output=True)
if not p_api_key.stdout:
print("ERROR: Failed to retrieve assemblyai.com/api_key pass entry", file=sys.stderr)
sys.exit(3)
return str(p_api_key.stdout, encoding="utf-8").strip()

openai_client = OpenAI(api_key=get_api_key())

prompt = input()

# openai api call
payload = {
"model": "gpt-4-1106-preview",
"model": args.model,
"messages": [
{"role": "system": "content": "Process AssemblyAI transcripts to extract Emacs commands. Respond with an Emacs Lisp s-expression that executes these commands in a Doom Emacs setup with lsp and magit. Ensure compatibility with Projectile, using fuzzy search to handle filepaths and names without hard-coding."},
{"role": "system", "content": args.system_prompt},
{"role": "user", "content": prompt}
]
],
"temperature": args.temperature,
}

print("Sending transcript to openai...", file=sys.stderr)
Expand Down
82 changes: 82 additions & 0 deletions make-prompt/shell-script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import subprocess
import json
import sys
from openai import OpenAI

# model, temperature, system_prompt
import argparse

DEFAULT_MODEL = "gpt-4-1106-preview" # "gpt-4"
DEFAULT_TEMPERATURE = 0.2
DEFAULT_SYSTEM_PROMPT = ("generate an Emacs s-expression, without explanations, executable in a Doom Emacs environment with lsp, projectile and magit which executes the command."
"Utilize fuzzy search for filepaths and names instead of hardcoded placeholders.")
# "Generate an Emacs s-expression from assemblyAI transcripts command. Output only the executable expression, in Doom Emacs with lsp, projectile, and magit using fuzzy search"

parser = argparse.ArgumentParser(description='Process some arguments.')
parser.add_argument('--from-speech', type=bool, help='Indicate if input is a voice transcript.')
parser.add_argument('--script-language', type=str, help='Language that should be used for generating the script. One of SHELL, ELISP or PYTHON', required=True)
parser.add_argument('--model', type=str, help='Model to use.', default="gpt-4-1106-preview")
parser.add_argument('--temperature', type=float, help='Temperature value.', default=0.2)
parser.add_argument('--system-prompt', type=str, help='System prompt to use.', default=DEFAULT_SYSTEM_PROMPT)

def make_system_prompt(args):
system_prompt = "From assemblyAI transcripts expressing a command," if 'from_speech' in args else ""

match args['script_language']:
case 'ELISP':
system_prompt += "Emacs s-expression, without explanations, executable in a Doom Emacs environment with lsp, projectile and magit"
case 'SHELL':
system_prompt += "shell script, without explanations, executable in a typical Linux environment"
case 'PYTHON':
system_prompt += "python script, without explanations, executable by Python3 with numpy, requests and other standard libraries"
case _:
system_prompt += f"script in the {script_language} language, intended to be executed in a typical environment"
print(f"WARNING: language {script_language} has only generic version of the prompt", file=sys.stderr)

system_prompt += "to execute the command. Utilize fuzzy search for filepaths and names instead of hardcoded placeholders."
system_prompt += f" {args['custom_instructions']}" if 'custom_instructions' in args else ""

return system_prompt


def make_payload(args):
return {
"model": args.model,
"message": [
{"role": "system", "content": }
]
}

args = parser.parse_args()

def get_api_key():
p_api_key = subprocess.run(["pass", "openai/api_key"], capture_output=True)
if not p_api_key.stdout:
print("ERROR: Failed to retrieve assemblyai.com/api_key pass entry", file=sys.stderr)
sys.exit(3)
return str(p_api_key.stdout, encoding="utf-8").strip()

openai_client = OpenAI(api_key=get_api_key())

prompt = input()

# openai api call
payload = {
"model": args.model,
"messages": [
{"role": "system", "content": args.system_prompt},
{"role": "user", "content": prompt}
],
"temperature": args.temperature,
}

print("Sending transcript to openai...", file=sys.stderr)
response = openai_client.chat.completions.create(**payload)

py_response = response.model_dump()

print(json.dumps(py_response, indent=2), file=sys.stderr)

content = py_response['choices'][0]['message']['content']

print(content)
49 changes: 28 additions & 21 deletions speech-to-text/speech_reco.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
It is printed to stdout, and any other output of the program is sent to stderr.
"""

import subprocess
import websocket
import base64
import pyaudio
Expand All @@ -16,20 +17,35 @@
import wave
import requests
from contextlib import closing
import argparse

parser = argparse.ArgumentParser(description='Handle command line arguments')
parser.add_argument('--input-device', type=int, help='Input device ID')
parser.add_argument('--sample-rate', type=int, help='Input device sample rate')
parser.add_argument('--frames-per-buffer', type=int, help='Frames per buffer')
parser.add_argument('--format', type=str, help='Format of the audio')
parser.add_argument('--channels', type=int, help='Number of audio channels')

args = parser.parse_args()
print(args)


p = pyaudio.PyAudio()
##################
# # Audio config #
##################
#PIPEWIRE_DEVICE_INDEX = 7
DEFAULT_DEVICE = p.get_default_input_device_info()

DEFAULT_DEVICE_INDEX = DEFAULT_DEVICE['index']
SAMPLE_RATE = 16000 # int(DEFAULT_DEVICE['defaultSampleRate'])
FRAMES_PER_BUFFER = int(SAMPLE_RATE / 2) # 3200
SAMPLE_RATE = 16000 # int(DEFAULT_DEVICE['defaultSampleRate'])
FRAMES_PER_BUFFER = int(SAMPLE_RATE / 2) # Sync AssemblyAI's throughput of twice a second
LATENCY = FRAMES_PER_BUFFER / SAMPLE_RATE
FORMAT = pyaudio.paInt16
CHANNELS = 1



##############################
# # Termination logic config #
##############################
Expand Down Expand Up @@ -57,13 +73,6 @@
# Time when assemblyAI answers with a SessionTerminated message
_AAI_SESSION_END_TIME = None

# They both report time differently, so we compute the difference
# and store it here in order to work with normalized timestamps.
#_WEBSOCKET_TO_PYAUDIO_CLOCK_DIFF = None

# We use the following two to compute the above
#_PYAUDIO_TO_CLOCK_DIFF = None

# Buffers to store audio data and transcription results
WEB_SOCKET_IS_CONNECTING_BUFFER = []

Expand Down Expand Up @@ -266,17 +275,6 @@ def on_message(ws, msg):

_LOGGER.write({"PARTIAL_TRANSCRIPT": text, "created": payload['created']})

########################
# Retrieve credentials #
########################
ASSEMBLYAI_API_KEY = os.getenv("ASSEMBLYAI_API_KEY")
auth_header = {"Authorization": f"{ASSEMBLYAI_API_KEY}"}

if not ASSEMBLYAI_API_KEY:
print("ERROR: Failed to retrieve ASSEMBLYAI_API_KEY env variable", file=sys.stderr)
p.terminate()
sys.exit(1)

#################################
# Create and start audio stream #
#################################
Expand All @@ -301,10 +299,19 @@ def on_error(ws, *err):
########################
# Set up the websocket #
########################
def get_api_key():
p_api_key = subprocess.run(["pass", "assemblyai.com/api_key"], capture_output=True)
if not p_api_key.stdout:
print("ERROR: Failed to retrieve assemblyai.com/api_key pass entry", file=sys.stderr)
if not stream.is_stopped(): stream.close()
p.terminate()
sys.exit(3)
return str(p_api_key.stdout, encoding="utf-8").strip()

try:
ws = websocket.WebSocketApp(
f"wss://api.assemblyai.com/v2/realtime/ws?sample_rate={SAMPLE_RATE}",
header=auth_header,
header={"Authorization": get_api_key()},
on_message=on_message,
on_error=on_error,
on_close=on_close,
Expand Down

0 comments on commit 9c042bc

Please sign in to comment.