From 993a1a6cafc08fb90790be1ac45b42c9ebca30e7 Mon Sep 17 00:00:00 2001 From: skylares <93623871+skylares@users.noreply.github.com> Date: Wed, 15 Jan 2025 10:50:33 -0500 Subject: [PATCH] Add discord daily test (#3676) * Add discord daily test * Fix mypy error --- .../discord/test_discord_connector.py | 71 +++++++++++++------ .../connectors/discord/test_discord_data.json | 32 +++++++++ 2 files changed, 82 insertions(+), 21 deletions(-) create mode 100644 backend/tests/daily/connectors/discord/test_discord_data.json diff --git a/backend/tests/daily/connectors/discord/test_discord_connector.py b/backend/tests/daily/connectors/discord/test_discord_connector.py index 98d66e49eda..2616fd7974a 100644 --- a/backend/tests/daily/connectors/discord/test_discord_connector.py +++ b/backend/tests/daily/connectors/discord/test_discord_connector.py @@ -1,39 +1,47 @@ +import json import os import time +from pathlib import Path +from typing import Any import pytest from onyx.connectors.discord.connector import DiscordConnector from onyx.connectors.models import Document +from onyx.connectors.models import DocumentSource + + +def load_test_data(file_name: str = "test_discord_data.json") -> dict[str, Any]: + current_dir = Path(__file__).parent + with open(current_dir / file_name, "r") as f: + return json.load(f) @pytest.fixture def discord_connector() -> DiscordConnector: - server_ids: str | None = os.environ.get("server_ids", None) - channel_names: str | None = os.environ.get("channel_names", None) - - connector = DiscordConnector( - server_ids=server_ids.split(",") if server_ids else [], - channel_names=channel_names.split(",") if channel_names else [], - start_date=os.environ.get("start_date", None), - ) - connector.load_credentials( - { - "discord_bot_token": os.environ.get("DISCORD_BOT_TOKEN"), - } - ) + connector = DiscordConnector() + connector.load_credentials(get_credentials()) return connector -@pytest.mark.skip(reason="Test Discord is not setup yet!") -def test_discord_poll_connector(discord_connector: DiscordConnector) -> None: - end = time.time() - start = end - 24 * 60 * 60 * 15 # 1 day +def get_credentials() -> dict[str, str]: + token = os.environ.get("DISCORD_BOT_TOKEN") + if token is None: + raise ValueError("DISCORD_BOT_TOKEN is not set") + return {"discord_bot_token": token} + + +def test_discord_connector_basic(discord_connector: DiscordConnector) -> None: + test_data = load_test_data() + + target_doc_id = test_data["target_doc"]["id"] + target_doc: Document | None = None all_docs: list[Document] = [] channels: set[str] = set() threads: set[str] = set() - for doc_batch in discord_connector.poll_source(start, end): + + for doc_batch in discord_connector.poll_source(0, time.time()): for doc in doc_batch: if "Channel" in doc.metadata: assert isinstance(doc.metadata["Channel"], str) @@ -41,9 +49,30 @@ def test_discord_poll_connector(discord_connector: DiscordConnector) -> None: if "Thread" in doc.metadata: assert isinstance(doc.metadata["Thread"], str) threads.add(doc.metadata["Thread"]) + if doc.id == target_doc_id: + target_doc = doc all_docs.append(doc) - # might change based on the channels and servers being used - assert len(all_docs) == 10 + # Check all docs are returned, with the correct number of channels and threads + assert len(all_docs) == 8 assert len(channels) == 2 - assert len(threads) == 2 + assert len(threads) == 1 + + # Check that all the channels and threads are returned + assert channels == set(test_data["channels"]) + assert threads == set(test_data["threads"]) + + # Check the target doc + assert target_doc is not None + assert target_doc.id == target_doc_id + assert target_doc.source == DocumentSource.DISCORD + assert target_doc.metadata["Thread"] == test_data["target_doc"]["Thread"] + assert target_doc.sections[0].link == test_data["target_doc"]["link"] + assert target_doc.sections[0].text == test_data["target_doc"]["text"] + assert ( + target_doc.semantic_identifier == test_data["target_doc"]["semantic_identifier"] + ) + + # Ensure all the docs section data is returned correctly + assert {doc.sections[0].text for doc in all_docs} == set(test_data["texts"]) + assert {doc.sections[0].link for doc in all_docs} == set(test_data["links"]) diff --git a/backend/tests/daily/connectors/discord/test_discord_data.json b/backend/tests/daily/connectors/discord/test_discord_data.json new file mode 100644 index 00000000000..cc0b58fc431 --- /dev/null +++ b/backend/tests/daily/connectors/discord/test_discord_data.json @@ -0,0 +1,32 @@ +{ + "target_doc": { + "id": "DISCORD_1328834962551603201", + "text": "Yes I have figured it out! Shall we discuss further in private? I have much to tell you.", + "link": "https://discord.com/channels/1328437578021736479/1328443280685400105/1328834962551603201", + "semantic_identifier": "onyx_test_user_1 said in Thread: Anyone figure out the perpetual motion: Yes I have figured it out! Sha...", + "Thread": "Anyone figure out the perpetual motion" + + }, + "channels": ["rules", "general"], + "threads": ["Anyone figure out the perpetual motion"], + "links": [ + "https://discord.com/channels/1328437578021736479/1328437578021736482/1328443181779652718", + "https://discord.com/channels/1328437578021736479/1328437578021736482/1328443280685400105", + "https://discord.com/channels/1328437578021736479/1328443280685400105/1328834962551603201", + "https://discord.com/channels/1328437578021736479/1328443280685400105/1328835359529766974", + "https://discord.com/channels/1328437578021736479/1328442536066416760/1328828040821604432", + "https://discord.com/channels/1328437578021736479/1328442536066416760/1328828065874444359", + "https://discord.com/channels/1328437578021736479/1328442536066416760/1328828097956548732", + "https://discord.com/channels/1328437578021736479/1328442536066416760/1328828102536855644" + ], + "texts": [ + "Hello all! How's everyone doing today?", + "Anyone figure out the perpetual motion machine yet? I think we're getting close!", + "Yes I have figured it out! Shall we discuss further in private? I have much to tell you.", + "Absolutely! let's have continued dialog in a space such that none other than ourselves may be privy to the information revealed.", + "1.) Keep opinions to a minimum, we want only to ascertain the truth.", + "2.) Answer questions quickly and accurately.", + "3.) Respect one another.", + "4.) Have fun!" + ] +}