Skip to content

Commit

Permalink
idxtool: find_gptfile() WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
0xeb committed Dec 19, 2023
1 parent 3cf64c4 commit 24fcbd5
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 57 deletions.
24 changes: 22 additions & 2 deletions .scripts/gptparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,19 @@
from collections import namedtuple
from typing import Union, Tuple, Generator

compiled_pattern = re.compile(r'^([0-9a-z]{9})_([^\.]+)\.md$', re.IGNORECASE)

GPT_BASE_URL = 'https://chat.openai.com/g/g-'
GPT_BASE_URL_L = len(GPT_BASE_URL)
FIELD_PREFIX = 'GPT'

GPT_FILE_VERSION_RE = re.compile(r'\[([^]]*)\]\.md$', re.IGNORECASE)

GptFieldInfo = namedtuple('FieldInfo', ['order', 'display'])

GptIdentifier = namedtuple('GptIdentifier', ['id', 'name'])
"""Description of the fields supported by GPT markdown files."""

# Description of the fields supported by GPT markdown files.
SUPPORTED_FIELDS = {
'url': GptFieldInfo(10, 'URL'),
'title': GptFieldInfo(20, 'Title'),
Expand Down Expand Up @@ -132,7 +135,7 @@ def get_prompts_path() -> str:
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts', 'gpts'))

def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, None]:
"""Enumerate all the GPT files in the prompts directory."""
"""Enumerate all the GPT files in the prompts directory, parse them and return the parsed GPT object."""
prompts_path = get_prompts_path()
for file_path in os.listdir(prompts_path):
_, ext = os.path.splitext(file_path)
Expand All @@ -144,3 +147,20 @@ def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, Non
yield (True, gpt)
else:
yield (False, f"Failed to parse '{file_path}': {gpt}")

def enum_gpt_files() -> Generator[str, None, None]:
"""
Enumerate all the GPT files in the prompts directory while relying on the files naming convention.
To normalize all the GPT file names, run the `idxtool.py --rename`
"""
pattern = r'[a-z]{9}_[a-z]+\.[a-z]+'

prompts_path = get_prompts_path()
for file_path in os.listdir(prompts_path):
_, ext = os.path.splitext(file_path)
if ext != '.md':
continue
file_path = os.path.join(prompts_path, file_path)
yield file_path


68 changes: 14 additions & 54 deletions .scripts/idxtool.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"""

import sys, os, argparse
from gptparser import GptMarkdownFile, enum_gpts
from gptparser import GptMarkdownFile, enum_gpts, parse_gpturl
from typing import Tuple
from urllib.parse import quote

Expand All @@ -32,7 +32,7 @@ def update_description(filename):
print(f"TODO Updating description with file: {filename}")
raise NotImplementedError

def rename_gpt():
def rename_gpts():
nb_ok = nb_total = 0
all_renamed_already = True

Expand Down Expand Up @@ -63,51 +63,6 @@ def rename_gpt():
return (ok, msg)


def reformat_gpt_files(src_path: str, dst_path: str) -> Tuple[bool, str]:
"""
Reformat all the GPT files in the source path and save them to the destination path.
:param src_path: str, path to the source directory.
:param dst_path: str, path to the destination directory.
"""
if not os.path.exists(src_path):
return (False, f"Source path '{src_path}' does not exist.")

if not os.path.exists(dst_path):
os.makedirs(dst_path)

print(f"Reformatting GPT files in '{src_path}' and saving them to '{dst_path}'...")

nb_ok = nb_total = 0
for src_file_path in os.listdir(src_path):
_, ext = os.path.splitext(src_file_path)
if ext != '.md':
continue
nb_total += 1
dst_file_path = os.path.join(dst_path, src_file_path)
src_file_path = os.path.join(src_path, src_file_path)
ok, gpt = GptMarkdownFile.parse(src_file_path)
if ok:
ok, msg = gpt.save(dst_file_path)
if ok:
id = gpt.id()
if id:
info = f"; id={id.id}"
if id.name:
info += f", name='{id.name}'"
else:
info = ''
print(f"[+] saved '{os.path.basename(src_file_path)}'{info}")
nb_ok += 1
else:
print(f"[!] failed to save '{src_file_path}': {msg}")
else:
print(f"[!] failed to parse '{src_file_path}': {gpt}")

msg = f"Reformatted {nb_ok} out of {nb_total} GPT files."
ok = nb_ok == nb_total
return (ok, msg)


def parse_gpt_file(filename) -> Tuple[bool, str]:
ok, gpt = GptMarkdownFile.parse(filename)
if ok:
Expand Down Expand Up @@ -181,16 +136,21 @@ def rebuild_toc(toc_out: str = '') -> Tuple[bool, str]:
print(msg)
return (ok, msg)


def find_gptfile(keyword):
print(f"TODO: Finding GPT file with ID or name: {keyword}")
raise NotImplementedError


def find_gpt_in_toc(gptid_or_string):
print(f"TODO: Searching TOC.md for GPT ID or string: {gptid_or_string}")
raise NotImplementedError

def find_gptfile(keyword):
keyword = keyword.strip().tolower()
# Response file with a set of GPT IDs
if keyword.startswith('@'):
print(f"TODO: Finding GPT file with ID: {keyword}")
if gpt_info := parse_gpturl(keyword):
keyword = gpt_info.id

print(f"TODO: Finding GPT with ID: {keyword}")
raise NotImplementedError

def main():
parser = argparse.ArgumentParser(description='idxtool: A GPT indexing and searching tool for the CSP repo')

Expand Down Expand Up @@ -223,7 +183,7 @@ def main():
if args.find_gpttoc:
find_gpt_in_toc(args.find_gpttoc)
if args.rename:
ok, err = rename_gpt()
ok, err = rename_gpts()
if not ok:
print(err)

Expand Down
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/.scripts/idxtool.py",
"args": ["--find-gptfile", "GPT3"],
"args": ["--find-gptfile", "https://chat.openai.com/g/g-svehnI9xP-retro-adventures"],
"console": "integratedTerminal"
},
{
Expand Down

0 comments on commit 24fcbd5

Please sign in to comment.