Skip to content

Commit

Permalink
Enable Discordbot Images (#11)
Browse files Browse the repository at this point in the history
* Using the df `code` col as a lookup and using the simplified `images` col as the uri.

* Playwright pinned to version1.47.0
  • Loading branch information
Acbarakat authored Oct 21, 2024
1 parent a68ec4e commit 4c37e75
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 4 deletions.
6 changes: 5 additions & 1 deletion crystalvision/discordbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@
try:
from .lang import PROMPTS_JSON, CORPUS_DIR
from .lang.docs import DOCS
from .lang.tools import MultiImageEmbedTool
except (ModuleNotFoundError, ImportError):
from crystalvision.lang import PROMPTS_JSON, CORPUS_DIR
from crystalvision.lang.docs import DOCS
from crystalvision.lang.tools import MultiImageEmbedTool


log = logging.getLogger("discord.crystalvision")
Expand Down Expand Up @@ -144,7 +146,7 @@ def agent(self) -> AgentExecutor:
include_df_in_prompt=None,
allow_dangerous_code=True,
prefix=prefix,
extra_tools=[retriever_tool],
extra_tools=[retriever_tool, MultiImageEmbedTool(self.df)],
)

@cached_property
Expand Down Expand Up @@ -211,6 +213,7 @@ def decode_message(self, message: discord.Message) -> str:
return message.content

CARD_ITALICS = re.compile(r"\[\[i\]\](.*?)\[\[/\]\]")
CARD_SPECIAL = re.compile(r"\[\[s\]\](.*?)\[\[/\]\]")
EX_BURST = re.compile(r"\[\[ex\]\]EX BURS[T|T ]\[\[/\]\]")

def format_message(self, message: str) -> str:
Expand All @@ -222,6 +225,7 @@ def format_message(self, message: str) -> str:
)
answer = re.sub(r"\u2029\s+|\u2029\s", "\n", answer)
answer = self.CARD_ITALICS.sub(r"*\1*", answer)
answer = self.CARD_SPECIAL.sub(r"***\1***", answer)
return answer

async def generate(self, content, context) -> str:
Expand Down
6 changes: 6 additions & 0 deletions crystalvision/lang/df_description.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
"type_en": {
"description": "the English type of the card"
},
"power": {
"description": "the numerical value of power of the card"
},
"isMultiElement": {
"description": "if the card is multielement/multi-element/multi element but not which elements it is"
},
Expand All @@ -22,5 +25,8 @@
},
"ex_burst": {
"description": "if the card has an exburst/ex burst/ex-burst/ex/EX ability"
},
"images": {
"description": "the card image URL"
}
}
12 changes: 10 additions & 2 deletions crystalvision/lang/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,17 @@
TEXTEN_REGEX = re.compile(
"|".join(re.escape(kanji) for kanji in kanji_to_english.keys())
)
CARD_CODE = re.compile(r"\d{1,2}-\d{3}[CRHLS]")
CARD_CODE = re.compile(r"(?:\d{1,2})-\d{3}[CRHLS]|PR-\d{3}")


def explain_database():
with Path(__file__).parent / "df_description.json" as fp:
description = json.loads(fp.read_bytes())

df = make_database().drop(
["id", "images", "thumbs", "element", "power", "multicard", "mono"], axis=1
["id", "thumbs", "element", "power", "multicard", "mono"], axis=1
)
df = df[~df["code"].str.contains("C-")]
for lang in ("de", "fr", "es", "it", "ja"):
df.drop(
[f"name_{lang}", f"text_{lang}", f"job_{lang}", f"type_{lang}"],
Expand All @@ -66,6 +67,13 @@ def explain_database():
df["text_en"] = (
df["text_en"].str.replace(r"\[\[br\]\]", "\u2029", regex=True).str.strip()
)
df["images"] = df["images"].apply(
lambda x: (
f"https://fftcg.cdn.sewest.net/images/cards/full/{x[0]}" if x else None
)
)
df["cost"] = df["cost"].astype(int)
df["power"] = df["power"].astype(float)

for col in df.columns:
if col_attrs := description.get(col):
Expand Down
99 changes: 99 additions & 0 deletions crystalvision/lang/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from typing import Optional, List, Type, Any, Tuple

import pandas as pd
from langchain.tools import BaseTool
from langchain.callbacks.manager import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from pydantic import BaseModel, Field, field_validator, field_serializer

try:
from .loaders import CARD_CODE
except (ModuleNotFoundError, ImportError):
from crystalvision.lang.loaders import CARD_CODE


class MultipleImageInput(BaseModel):
card_codes: List[str] = Field(description="A list of card code(s)")

@field_validator("card_codes", mode="before")
@classmethod
def validate_card_codes(cls, v: Any):
# Check if input is a string that looks like a list, e.g., "['1-001H']"
if isinstance(v, str):
# Evaluate the string to convert it to a list
if (v := CARD_CODE.findall(v)) and len(v) < 1:
raise ValueError("No valid card codes provided.")
return v

@field_serializer("card_codes")
@classmethod
def parse_card_codes(cls, v: Any):
# Check if input is a string that looks like a list, e.g., "['1-001H']"
if isinstance(v, str):
# Evaluate the string to convert it to a list
v = CARD_CODE.findall(v)
return v


class MultiImageEmbedTool(BaseTool):
name: str = "MultiImageEmbedTool"
# description: str = "Shows multiple images from the given a list of card code(s)."
description: str = (
"Shows multiple images or retrieve image(s) URLs from the given a list of card code(s) formatted for discord."
)
return_direct: bool = False
df: pd.DataFrame = pd.DataFrame()
response_format: str = "content_and_artifact"

# Define the schema for arguments that the tool will accept
args_schema: Type[BaseModel] = MultipleImageInput

def __init__(self, df: pd.DataFrame = None, **kwargs: Any) -> None:
super().__init__(**kwargs)

self.df = df

def _run(
self, card_codes: Any, run_manager: Optional[CallbackManagerForToolRun] = None
) -> Tuple[str, List[Any]]:
"""Fetch images from the URLs and return a list of embeds suitable for Discord."""
from discord import Embed

card_codes = MultipleImageInput.parse_card_codes(card_codes)

content = []
artifacts = []
for _, row in self.df.query("`code` in @card_codes").iterrows():
content.append(f"({row['name_en'].upper()} {row['code']})[{row['images']}]")
embed = Embed(title=f"{row['name_en'].upper()} {row['code']}")
embed.set_image(url=row["images"])
artifacts.append(embed)

# content = "\u2029".join(content)
content = "\r\n".join(content)

return (content, artifacts)

async def _arun(
self,
card_codes: Any,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> Tuple[str, List[Any]]:
"""Asynchronous version of the tool."""
from discord import Embed

card_codes = MultipleImageInput.parse_card_codes(card_codes)

content = []
artifacts = []
for _, row in self.df.query("`code` in @card_codes").iterrows():
content.append(f"[{row['name_en'].upper()} {row['code']}]({row['images']})")
embed = Embed(title=f"{row['name_en'].upper()} {row['code']}")
embed.set_image(url=row["images"])
artifacts.append(embed)

content = "\u2029".join(content)

return (content, artifacts)
2 changes: 1 addition & 1 deletion docker/Dockerfile.discordbot
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ RUN apt-get update && apt-get upgrade -yqq

RUN apt install libffi-dev libnacl-dev python3-dev -yqq

RUN pip install poetry==1.8.3 playwright
RUN pip install poetry==1.8.3 playwright==1.47.0
RUN poetry config virtualenvs.create false && poetry config cache-dir /poetry-cache
RUN playwright install --with-deps chromium

Expand Down

0 comments on commit 4c37e75

Please sign in to comment.