From 24fcbd5061932abf410f8db43732d4d3ecd682f9 Mon Sep 17 00:00:00 2001 From: Elias Bachaalany Date: Mon, 18 Dec 2023 19:00:24 -0800 Subject: [PATCH] idxtool: find_gptfile() WIP --- .scripts/gptparser.py | 24 +++++++++++++-- .scripts/idxtool.py | 68 +++++++++---------------------------------- .vscode/launch.json | 2 +- 3 files changed, 37 insertions(+), 57 deletions(-) diff --git a/.scripts/gptparser.py b/.scripts/gptparser.py index f8da363c..693df49b 100644 --- a/.scripts/gptparser.py +++ b/.scripts/gptparser.py @@ -8,6 +8,8 @@ from collections import namedtuple from typing import Union, Tuple, Generator +compiled_pattern = re.compile(r'^([0-9a-z]{9})_([^\.]+)\.md$', re.IGNORECASE) + GPT_BASE_URL = 'https://chat.openai.com/g/g-' GPT_BASE_URL_L = len(GPT_BASE_URL) FIELD_PREFIX = 'GPT' @@ -15,9 +17,10 @@ GPT_FILE_VERSION_RE = re.compile(r'\[([^]]*)\]\.md$', re.IGNORECASE) GptFieldInfo = namedtuple('FieldInfo', ['order', 'display']) + GptIdentifier = namedtuple('GptIdentifier', ['id', 'name']) +"""Description of the fields supported by GPT markdown files.""" -# Description of the fields supported by GPT markdown files. SUPPORTED_FIELDS = { 'url': GptFieldInfo(10, 'URL'), 'title': GptFieldInfo(20, 'Title'), @@ -132,7 +135,7 @@ def get_prompts_path() -> str: return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts', 'gpts')) def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, None]: - """Enumerate all the GPT files in the prompts directory.""" + """Enumerate all the GPT files in the prompts directory, parse them and return the parsed GPT object.""" prompts_path = get_prompts_path() for file_path in os.listdir(prompts_path): _, ext = os.path.splitext(file_path) @@ -144,3 +147,20 @@ def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, Non yield (True, gpt) else: yield (False, f"Failed to parse '{file_path}': {gpt}") + +def enum_gpt_files() -> Generator[str, None, None]: + """ + Enumerate all the GPT files in the prompts directory while relying on the files naming convention. + To normalize all the GPT file names, run the `idxtool.py --rename` + """ + pattern = r'[a-z]{9}_[a-z]+\.[a-z]+' + + prompts_path = get_prompts_path() + for file_path in os.listdir(prompts_path): + _, ext = os.path.splitext(file_path) + if ext != '.md': + continue + file_path = os.path.join(prompts_path, file_path) + yield file_path + + diff --git a/.scripts/idxtool.py b/.scripts/idxtool.py index 3a4c5544..0d4fd06e 100644 --- a/.scripts/idxtool.py +++ b/.scripts/idxtool.py @@ -8,7 +8,7 @@ """ import sys, os, argparse -from gptparser import GptMarkdownFile, enum_gpts +from gptparser import GptMarkdownFile, enum_gpts, parse_gpturl from typing import Tuple from urllib.parse import quote @@ -32,7 +32,7 @@ def update_description(filename): print(f"TODO Updating description with file: {filename}") raise NotImplementedError -def rename_gpt(): +def rename_gpts(): nb_ok = nb_total = 0 all_renamed_already = True @@ -63,51 +63,6 @@ def rename_gpt(): return (ok, msg) -def reformat_gpt_files(src_path: str, dst_path: str) -> Tuple[bool, str]: - """ - Reformat all the GPT files in the source path and save them to the destination path. - :param src_path: str, path to the source directory. - :param dst_path: str, path to the destination directory. - """ - if not os.path.exists(src_path): - return (False, f"Source path '{src_path}' does not exist.") - - if not os.path.exists(dst_path): - os.makedirs(dst_path) - - print(f"Reformatting GPT files in '{src_path}' and saving them to '{dst_path}'...") - - nb_ok = nb_total = 0 - for src_file_path in os.listdir(src_path): - _, ext = os.path.splitext(src_file_path) - if ext != '.md': - continue - nb_total += 1 - dst_file_path = os.path.join(dst_path, src_file_path) - src_file_path = os.path.join(src_path, src_file_path) - ok, gpt = GptMarkdownFile.parse(src_file_path) - if ok: - ok, msg = gpt.save(dst_file_path) - if ok: - id = gpt.id() - if id: - info = f"; id={id.id}" - if id.name: - info += f", name='{id.name}'" - else: - info = '' - print(f"[+] saved '{os.path.basename(src_file_path)}'{info}") - nb_ok += 1 - else: - print(f"[!] failed to save '{src_file_path}': {msg}") - else: - print(f"[!] failed to parse '{src_file_path}': {gpt}") - - msg = f"Reformatted {nb_ok} out of {nb_total} GPT files." - ok = nb_ok == nb_total - return (ok, msg) - - def parse_gpt_file(filename) -> Tuple[bool, str]: ok, gpt = GptMarkdownFile.parse(filename) if ok: @@ -181,16 +136,21 @@ def rebuild_toc(toc_out: str = '') -> Tuple[bool, str]: print(msg) return (ok, msg) - -def find_gptfile(keyword): - print(f"TODO: Finding GPT file with ID or name: {keyword}") - raise NotImplementedError - - def find_gpt_in_toc(gptid_or_string): print(f"TODO: Searching TOC.md for GPT ID or string: {gptid_or_string}") raise NotImplementedError +def find_gptfile(keyword): + keyword = keyword.strip().tolower() + # Response file with a set of GPT IDs + if keyword.startswith('@'): + print(f"TODO: Finding GPT file with ID: {keyword}") + if gpt_info := parse_gpturl(keyword): + keyword = gpt_info.id + + print(f"TODO: Finding GPT with ID: {keyword}") + raise NotImplementedError + def main(): parser = argparse.ArgumentParser(description='idxtool: A GPT indexing and searching tool for the CSP repo') @@ -223,7 +183,7 @@ def main(): if args.find_gpttoc: find_gpt_in_toc(args.find_gpttoc) if args.rename: - ok, err = rename_gpt() + ok, err = rename_gpts() if not ok: print(err) diff --git a/.vscode/launch.json b/.vscode/launch.json index 1c2a59ea..2f7e9c9b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -22,7 +22,7 @@ "type": "python", "request": "launch", "program": "${workspaceFolder}/.scripts/idxtool.py", - "args": ["--find-gptfile", "GPT3"], + "args": ["--find-gptfile", "https://chat.openai.com/g/g-svehnI9xP-retro-adventures"], "console": "integratedTerminal" }, {