WIP refactor code

Jef808 · Feb 16, 2024 · 3b37ad2 · 3b37ad2
1 parent 7280e4e
commit 3b37ad2
Show file tree

Hide file tree

Showing 20 changed files with 102,975 additions and 107 deletions.
diff --git a/data/LICENSE.txt b/data/LICENSE.txt
diff --git a/echo_crafter/config/__init__.py b/echo_crafter/config/__init__.py
@@ -0,0 +1,3 @@
+from .config import setup_config
+
+__all__ = ['setup_config']
diff --git a/echo_crafter/config/config.py b/echo_crafter/config/config.py
@@ -0,0 +1,146 @@
+"""Set the global configuration for the Echo Crafter project."""
+
+from .config_base import *
+from .utils import get_openai_api_key, get_picovoice_api_key
+
+import os
+from pathlib import Path
+
+RUNTIME_DIR = Path(os.environ.get('XDG_RUNTIME_DIR', '/tmp'))
+
+DEFAULT_API_KEYS = ApiKeys(
+    openai_api_key=get_openai_api_key(),
+    picovoice_api_key=get_picovoice_api_key()
+)
+
+DEFAULT_DIRECTORIES = Directories(
+    project_root=Path(__file__).parent.parent,
+    data_dir=Path(__file__).parent.parent / 'data',
+    tests_dir=Path(__file__).parent.parent / 'tests',
+)
+
+DEFAULT_SOCKETS = Sockets(
+    socket_path=str(RUNTIME_DIR / 'echo_crafter.sock')
+)
+
+DEFAULT_FILES = Files(
+    cheetahModelFile=Path(__file__).parent.parent / 'data' / 'speech-command-cheetah-v1.pv',
+    porcupineLaptopKeywordFile=Path(__file__).parent.parent / 'data' / 'laptop_en_linux_v3_0_0.ppn',
+    rhinoContextFile=Path(__file__).parent.parent / 'data' / 'computer-commands_en_linux_v3_0_0.rhn',
+    transcriptBeginWav=Path(__file__).parent.parent / 'data' / 'transcript_begin.wav',
+    transcriptSuccessWav=Path(__file__).parent.parent / 'data' / 'transcript_success.wav'
+)
+
+DEFAULT_MICROPHONE_SETTINGS = MicrophoneSettings(
+    deviceIndex=-1,
+    sampleRate=16000,
+    frameLength=512,
+    intentCollectionTimeoutSecs=5,
+    transcriptCollectionTimeoutSecs=15
+)
+
+DEFAULT_ENVIRONMENT_VARIABLES = EnvironmentVariables(
+    manualSkipWakeWord='ECHO_CRAFTER_WAIT_FOR_KEYWORD',
+    manualSetIntent='ECHO_CRAFTER_INTENT',
+    manualSetSlots='ECHO_CRAFTER_SLOTS'
+)
+
+def verify_config_types(config: Config) -> bool:
+    """Verify that the config object has the correct types."""
+    return all(
+        all((isinstance(config, section), section_type) for section, section_type in section_types.items())
+        for section_types in (
+                Directories.__annotations__,
+                ApiKeys.__annotations__,
+                MicrophoneSettings.__annotations__,
+                EnvironmentVariables.__annotations__
+        ))
+
+
+def validate_paths(directories: Directories) -> bool:
+    """Validate that all paths exists and are of the correct type."""
+    return all(
+        (path.exists() and path.is_dir() for path in directories),
+        (path.exists() and path.is_file() for path in directories,
+        (path.is_socket() for path in directories.socket_path)
+    )
+
+
+def validate_api_keys(api_keys: Config.ApiKeys) -> bool:
+    """Validate that all API keys are non-empty strings."""
+    OpenAI(api_key=api_keys.openai, max_tokens=0)
+    return all(
+        (isistance(api_key, str) and api_key for api_key in api_keys)
+    )
+
+
+def is_source_device(device_info: DeviceInfo) -> bool:
+    """Verify that the device can record audio."""
+    return device_info['maxInputChannels'] > 0
+
+
+def device_is_compatible(p: PyAudio,
+                             device_info: DeviceInfo,
+                             sample_rate: int):
+        """
+        Verify that the device supports the necessary settings for our Microphone stream.
+
+        More precisely, this checks that the device supports streaming at the chosen
+        frame rate with uint16 format are supported by the device.
+        """
+        return p.is_format_supported(
+            sample_rate,
+            input_device=device_info['index'],
+            input_channels=1,
+            input_format=paInt16
+        )
+
+
+def verify_device_index(device_index: int) -> bool:
+    """Verify that the device index is a valid index."""
+    try:
+        devnull = os.open(os.devnull, os.O_WRONLY)
+        old_stderr = os.dup(2)
+        sys.stderr.flush()
+        os.dup2(devnull, 2)
+        os.close(devnull)
+
+        p = PyAudio()
+        device_info = p.get_device_info_by_index(device_index)
+        return is_source_device(device_info) and device_is_compatible(p, device_info, MICROPHONE_SETTINGS.sampleRate)
+    except Exception as e:
+        print(f"An error occurred while verifying the device index {device_index} with PyAudio:", e, file=sys.stderr)
+        return False
+    finally:
+        p.terminate()
+        os.close(old_stderr)
+        os.dup2(old_stderr, 2)
+
+
+def validate_microphone_settings(microphone_settings: Config.MicrophoneSettings) -> bool:
+    """Validate that all microphone settings are of the correct type."""
+    return all(
+        (path.exists() and path.is_dir() for path in (microphone_settings.cheetahModelFile, microphone_settings.porcupineLaptopKeywordFile, microphone_settings.rhinoContextFile)),
+        (path.exists() and path.suffix == '.wav' for path in (microphone_settings.transcriptBeginWav, microphone_settings.transcriptSuccessWav)),
+    )
+
+
+def verify_environment_variables(
+        environment_variables: Config.EnvironmentVariables) -> bool:
+    """Do not do anything since all members are optional and arbitrary."""
+    return True
+
+
+_config = Config(
+    directories=DIRECTORIES,
+    apiKeys=API_KEYS,
+    microphoneSettinctgs=MICROPHONE_SETTINGS,
+    environmentVariables=ENVIRONMENT_VARIABLES)
+
+
+def setup_config():
+    """Create the Config object."""
+    if not verify_config_types(_config):
+        raise TypeError("The config object has the wrong types.")
+
+    return _config
diff --git a/echo_crafter/config/config_base.py b/echo_crafter/config/config_base.py
@@ -0,0 +1,76 @@
+"""Declare the Config object's structure here."""
+
+from __future__ import annotations
+
+from os import PathLike
+from dataclasses import dataclass
+from typing import TypedDict
+
+class Directories(TypedDict):
+    """A simple namespace for the directories in the Config object."""
+
+    project_root: PathLike
+    data_dir: PathLike
+
+
+class Files(TypedDict):
+    """A simple namespace for the directories in the Config object."""
+
+    cheetah_model_file: PathLike
+    porcupine_laptop_keyword_file: PathLike
+    rhino_context_file: PathLike
+    transcript_begin_wav: PathLike
+    transcript_success_wav: PathLike
+
+
+class Sockets(TypedDict):
+    """A simple namespace for the directories in the Config object."""
+
+    socket_path: str
+
+
+class ApiKeys(TypedDict):
+    """A simple namespace for the API keys in the Config object."""
+
+    openai_api_key: str
+    picovoice_api_key: str
+
+
+class MicrophoneSettings(TypedDict):
+    """A simple namespace for the microphone settings in the Config object."""
+
+    device_index: int
+    sample_rate: int
+    frame_length: int
+    intent_collection_timeout_secs: float
+    transcript_collection_timeout_secs: float
+
+
+class EnvironmentVariables(TypedDict):
+    """Use these to manually trigger the wakeword and/or set the intent and slots."""
+
+    wakeword: str
+    intent: str
+    slots: str
+
+
+@dataclass(frozen=True, kw_only=True)
+class Config:
+    """A simple namespace for the Config object."""
+
+    paths=TypedDict('paths', {
+        'directories': Directories,
+        'files': Files,
+        'sockets': Sockets
+    })
+    apiKeys: ApiKeys
+    microphoneSettings: MicrophoneSettings
+    environmentVariables: EnvironmentVariables
+
+
+def setup_config(*args, **kwargs):
+    """Return the Config object."""
+    try:
+        return Config(**kwargs)
+    except ValueError as e:
+        raise ValueError(f"Invalid configuration: {e}") from e
diff --git a/echo_crafter/config/utils.py b/echo_crafter/config/utils.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+"""Utility functions for generating the Config object."""
+
+from os import environ
+from pathlib import Path
+
+def get_project_root() -> Path:
+    """Return the root directory of the project."""
+    return Path(__file__).resolve().parent
+
+
+def get_default_data_dir() -> Path:
+    """Return the default data directory for the project."""
+    return get_project_root() / "data"
+
+
+def get_socket_dir() -> Path:
+    """Return the default socket directory for the project."""
+    return Path(environ.get('XDG_RUNTIME_DIR', '~/.local/share')) / 'transcription'
+
+
+def generate_path_for_pv_files():
+    """Return the path to the corresponding binary file."""
+    DATA_DIR = get_default_data_dir()
+    return dict(
+        cheetah = DATA_DIR / "speech-command-cheetah-v1.pv",
+        porcupine = DATA_DIR / "laptop_en_linux_v3_0_0.ppn",
+        rhino = DATA_DIR / "computer-commands_en_linux_v3_0_0.rhn"
+    )
+
+def get_openai_api_key():
+    """Get the OpenAI API key from the password store."""
+    from os import getenv
+    from subprocess import check_output
+
+    api_key = getenv("OPENAI_API_KEY")
+    if not api_key:
+        api_key = check_output(["pass", "openai.com/api_key"], text=True)
+    return api_key
+
+def get_picovoice_api_key():
+    """Get the Picovoice API key from the password store."""
+    from os import getenv
+    from subprocess import check_output
+
+    api_key = getenv("PICOVOICE_API_KEY")
+    if not api_key:
+        api_key = check_output(["pass", "picovoice.com/api_key"], text=True)
+    return api_key
diff --git a/echo_crafter/listener/env_override.py b/echo_crafter/listener/env_override.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+from echo_crafter.config import Config
+
+import os
+import sys
+
+
+def EnvOverride():
+    """Monitor the environment for changes and update the relevant globals accordingly."""
+    last_should_wait_for_keyword_s = ''
+    last_intent_s = ''
+    last_slots_s = ''
+    should_wait_for_keyword = True
+    preset_intent = None
+
+    def check_for_env_settings():
+        nonlocal last_should_wait_for_keyword_s
+        nonlocal last_intent_s
+        nonlocal last_slots_s
+        nonlocal should_wait_for_keyword
+        nonlocal preset_intent
+
+        _should_wait_for_keyword_s = os.getenv(Config.environmentVariables.wakeword) or ''
+        _intent_s = os.getenv(Config.environmentVariables.intent) or ''
+        _slots_s = os.getenv(Config.environmentVariables.slots) or ''
+
+        if _should_wait_for_keyword_s != last_should_wait_for_keyword_s:
+            last_should_wait_for_keyword_s = _should_wait_for_keyword_s
+            should_wait_for_keyword = _should_wait_for_keyword_s.lower() == 'false'
+
+        if _intent_s != last_intent_s or _slots_s != last_slots_s:
+            last_intent_s = _intent_s
+            last_slots_s = _slots_s if _intent_s else ''
+
+            intent = [True, _intent_s, {}] if _intent_s else None
+            if _slots_s:
+                slots_kv_s = _slots_s.split(',')
+                try:
+                    intent[2] = {k: v for k, v in (e.split('=') for e in slots_kv_s)}
+                except ValueError:
+                    print("ECHO_CRAFTER_INTENT_SLOTS should be a comma separated list of 'key=value' entries", file=sys.stderr)
+                    intent = None
+            preset_intent = intent
+
+        return should_wait_for_keyword, preset_intent
+
+    return check_for_env_settings