Skip to content

Commit

Permalink
Add assertions to Zendesk connector tests
Browse files Browse the repository at this point in the history
  • Loading branch information
skylares committed Jan 5, 2025
1 parent f895e5f commit a7318b5
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 1 deletion.
4 changes: 4 additions & 0 deletions .github/workflows/pr-python-connector-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ env:
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
# Slab
SLAB_BOT_TOKEN: ${{ secrets.SLAB_BOT_TOKEN }}
# Zendesk
ZENDESK_SUBDOMAIN: ${{ secrets.ZENDESK_SUBDOMAIN }}
ZENDESK_EMAIL: ${{ secrets.ZENDESK_EMAIL }}
ZENDESK_TOKEN: ${{ secrets.ZENDESK_TOKEN }}
# Salesforce
SF_USERNAME: ${{ secrets.SF_USERNAME }}
SF_PASSWORD: ${{ secrets.SF_PASSWORD }}
Expand Down
43 changes: 42 additions & 1 deletion backend/onyx/connectors/zendesk/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,21 @@
time_str_to_utc,
)
from onyx.connectors.interfaces import GenerateDocumentsOutput
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
from onyx.connectors.interfaces import LoadConnector
from onyx.connectors.interfaces import PollConnector
from onyx.connectors.interfaces import SecondsSinceUnixEpoch
from onyx.connectors.interfaces import SlimConnector
from onyx.connectors.models import BasicExpertInfo
from onyx.connectors.models import Document
from onyx.connectors.models import Section
from onyx.connectors.models import SlimDocument
from onyx.file_processing.html_utils import parse_html_page_basic
from onyx.utils.retry_wrapper import retry_builder


MAX_PAGE_SIZE = 30 # Zendesk API maximum
_SLIM_BATCH_SIZE = 1000


class ZendeskCredentialsNotSetUpError(PermissionError):
Expand Down Expand Up @@ -272,7 +276,7 @@ def _ticket_to_document(
)


class ZendeskConnector(LoadConnector, PollConnector):
class ZendeskConnector(LoadConnector, PollConnector, SlimConnector):
def __init__(
self,
batch_size: int = INDEX_BATCH_SIZE,
Expand Down Expand Up @@ -397,6 +401,43 @@ def _poll_tickets(
if doc_batch:
yield doc_batch

def retrieve_all_slim_documents(
self,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
) -> GenerateSlimDocumentOutput:
slim_doc_batch: list[SlimDocument] = []
if self.content_type == "articles":
articles = _get_articles(
self.client, start_time=int(start) if start else None
)
for article in articles:
slim_doc_batch.append(
SlimDocument(
id=f"article:{article['id']}",
)
)
if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
yield slim_doc_batch
slim_doc_batch = []
elif self.content_type == "tickets":
tickets = _get_tickets(
self.client, start_time=int(start) if start else None
)
for ticket in tickets:
slim_doc_batch.append(
SlimDocument(
id=f"zendesk_ticket_{ticket['id']}",
)
)
if len(slim_doc_batch) >= _SLIM_BATCH_SIZE:
yield slim_doc_batch
slim_doc_batch = []
else:
raise ValueError(f"Unsupported content_type: {self.content_type}")
if slim_doc_batch:
yield slim_doc_batch


if __name__ == "__main__":
import os
Expand Down
110 changes: 110 additions & 0 deletions backend/tests/daily/connectors/zendesk/test_zendesk_connector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import json
import os
import time
from pathlib import Path

import pytest

from onyx.configs.constants import DocumentSource
from onyx.connectors.models import Document
from onyx.connectors.zendesk.connector import ZendeskConnector


def load_test_data(file_name: str = "test_zendesk_data.json") -> dict[str, dict]:
current_dir = Path(__file__).parent
with open(current_dir / file_name, "r") as f:
return json.load(f)


@pytest.fixture
def zendesk_article_connector() -> ZendeskConnector:
connector = ZendeskConnector(content_type="articles")
connector.load_credentials(get_credentials())
return connector


@pytest.fixture
def zendesk_ticket_connector() -> ZendeskConnector:
connector = ZendeskConnector(content_type="tickets")
connector.load_credentials(get_credentials())
return connector


def get_credentials() -> dict[str, str]:
return {
"zendesk_subdomain": os.environ["ZENDESK_SUBDOMAIN"],
"zendesk_email": os.environ["ZENDESK_EMAIL"],
"zendesk_token": os.environ["ZENDESK_TOKEN"],
}


@pytest.mark.parametrize(
"connector_fixture", ["zendesk_article_connector", "zendesk_ticket_connector"]
)
def test_zendesk_connector_basic(
request: pytest.FixtureRequest, connector_fixture: str
) -> None:
connector = request.getfixturevalue(connector_fixture)
test_data = load_test_data()
all_docs: list[Document] = []
target_test_doc_id: str
if connector.content_type == "articles":
target_test_doc_id = f"article:{test_data['article']['id']}"
else:
target_test_doc_id = f"zendesk_ticket_{test_data['ticket']['id']}"

target_doc: Document | None = None

for doc_batch in connector.poll_source(0, time.time()):
for doc in doc_batch:
all_docs.append(doc)
if doc.id == target_test_doc_id:
target_doc = doc
print(f"target_doc {target_doc}")
assert len(all_docs) > 0, "No documents were retrieved from the connector"
assert (
target_doc is not None
), "Target document was not found in the retrieved documents"
assert target_doc.source == DocumentSource.ZENDESK, "Document source is not ZENDESK"

if connector.content_type == "articles":
test_article = test_data["article"]
assert target_doc.semantic_identifier == test_article["semantic_identifier"]
assert target_doc.sections[0].link == test_article["sections"][0]["link"]
assert target_doc.source == test_article["source"]
assert (
target_doc.primary_owners[0].display_name
== test_article["primary_owners"][0]["display_name"]
)
assert (
target_doc.primary_owners[0].email
== test_article["primary_owners"][0]["email"]
)
else:
test_ticket = test_data["ticket"]
assert target_doc.semantic_identifier == test_ticket["semantic_identifier"]
assert target_doc.sections[0].link == test_ticket["sections"][0]["link"]
assert target_doc.source == test_ticket["source"]
assert target_doc.metadata["status"] == test_ticket["metadata"]["status"]
assert target_doc.metadata["priority"] == test_ticket["metadata"]["priority"]
assert target_doc.metadata["tags"] == test_ticket["metadata"]["tags"]
assert (
target_doc.metadata["ticket_type"] == test_ticket["metadata"]["ticket_type"]
)


def test_zendesk_connector_slim(zendesk_article_connector: ZendeskConnector) -> None:
# Get full doc IDs
all_full_doc_ids = set()
for doc_batch in zendesk_article_connector.load_from_state():
all_full_doc_ids.update([doc.id for doc in doc_batch])

# Get slim doc IDs
all_slim_doc_ids = set()
for slim_doc_batch in zendesk_article_connector.retrieve_all_slim_documents():
all_slim_doc_ids.update([doc.id for doc in slim_doc_batch])

# Full docs should be subset of slim docs
assert all_full_doc_ids.issubset(
all_slim_doc_ids
), f"Full doc IDs {all_full_doc_ids} not subset of slim doc IDs {all_slim_doc_ids}"
34 changes: 34 additions & 0 deletions backend/tests/daily/connectors/zendesk/test_zendesk_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"article": {
"id": "32502691728155",
"sections": [
{
"link": "https://d3v-onyx.zendesk.com/hc/en-us/articles/32502691728155-How-can-agents-leverage-knowledge-to-help-customers"
}
],
"source": "zendesk",
"semantic_identifier": "How can agents leverage knowledge to help customers?",
"primary_owners": [
{
"display_name": "Dan Swer",
"email": "admin@onyx-test.com"
}
]
},
"ticket": {
"id": "1",
"sections": [
{
"link": "https://d3v-onyx.zendesk.com/agent/tickets/1"
}
],
"source": "zendesk",
"semantic_identifier": "Ticket #1: SAMPLE TICKET: Meet the ticket",
"metadata": {
"status": "open",
"priority": "normal",
"tags": ["sample", "support", "zendesk"],
"ticket_type": "incident"
}
}
}

0 comments on commit a7318b5

Please sign in to comment.