From c3481c735614a58155f6a59a45642087f7002655 Mon Sep 17 00:00:00 2001
From: skylares <93623871+skylares@users.noreply.github.com>
Date: Wed, 15 Jan 2025 09:40:31 -0500
Subject: [PATCH] Fireflies daily test (#3663)

* Init test files for fireflies

* Finish creating daily test and update parsing of sections

* Added comment
---
 .../onyx/connectors/fireflies/connector.py    | 56 +++++++++------
 .../fireflies/test_fireflies_connector.py     | 62 +++++++++++++++++
 .../fireflies/test_fireflies_data.json        | 68 +++++++++++++++++++
 3 files changed, 165 insertions(+), 21 deletions(-)
 create mode 100644 backend/tests/daily/connectors/fireflies/test_fireflies_connector.py
 create mode 100644 backend/tests/daily/connectors/fireflies/test_fireflies_data.json

diff --git a/backend/onyx/connectors/fireflies/connector.py b/backend/onyx/connectors/fireflies/connector.py
index a9a02a54c41..64099ce957c 100644
--- a/backend/onyx/connectors/fireflies/connector.py
+++ b/backend/onyx/connectors/fireflies/connector.py
@@ -30,13 +30,14 @@
         transcripts(fromDate: $fromDate, toDate: $toDate, limit: $limit, skip: $skip) {
             id
             title
-            host_email
+            organizer_email
             participants
             date
             transcript_url
             sentences {
                 text
                 speaker_name
+                start_time
             }
         }
     }
@@ -44,16 +45,34 @@
 
 
 def _create_doc_from_transcript(transcript: dict) -> Document | None:
-    meeting_text = ""
-    sentences = transcript.get("sentences", [])
-    if sentences:
-        for sentence in sentences:
-            meeting_text += sentence.get("speaker_name") or "Unknown Speaker"
-            meeting_text += ": " + sentence.get("text", "") + "\n\n"
-    else:
-        return None
-
-    meeting_link = transcript["transcript_url"]
+    sections: List[Section] = []
+    current_speaker_name = None
+    current_link = ""
+    current_text = ""
+
+    for sentence in transcript["sentences"]:
+        if sentence["speaker_name"] != current_speaker_name:
+            if current_speaker_name is not None:
+                sections.append(
+                    Section(
+                        link=current_link,
+                        text=current_text.strip(),
+                    )
+                )
+            current_speaker_name = sentence.get("speaker_name") or "Unknown Speaker"
+            current_link = f"{transcript['transcript_url']}?t={sentence['start_time']}"
+            current_text = f"{current_speaker_name}: "
+
+        cleaned_text = sentence["text"].replace("\xa0", " ")
+        current_text += f"{cleaned_text} "
+
+    # Sometimes these links (links with a timestamp) do not work, it is a bug with Fireflies.
+    sections.append(
+        Section(
+            link=current_link,
+            text=current_text.strip(),
+        )
+    )
 
     fireflies_id = _FIREFLIES_ID_PREFIX + transcript["id"]
 
@@ -62,27 +81,22 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
     meeting_date_unix = transcript["date"]
     meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)
 
-    meeting_host_email = transcript["host_email"]
-    host_email_user_info = [BasicExpertInfo(email=meeting_host_email)]
+    meeting_organizer_email = transcript["organizer_email"]
+    organizer_email_user_info = [BasicExpertInfo(email=meeting_organizer_email)]
 
     meeting_participants_email_list = []
     for participant in transcript.get("participants", []):
-        if participant != meeting_host_email and participant:
+        if participant != meeting_organizer_email and participant:
             meeting_participants_email_list.append(BasicExpertInfo(email=participant))
 
     return Document(
         id=fireflies_id,
-        sections=[
-            Section(
-                link=meeting_link,
-                text=meeting_text,
-            )
-        ],
+        sections=sections,
         source=DocumentSource.FIREFLIES,
         semantic_identifier=meeting_title,
         metadata={},
         doc_updated_at=meeting_date,
-        primary_owners=host_email_user_info,
+        primary_owners=organizer_email_user_info,
         secondary_owners=meeting_participants_email_list,
     )
 
diff --git a/backend/tests/daily/connectors/fireflies/test_fireflies_connector.py b/backend/tests/daily/connectors/fireflies/test_fireflies_connector.py
new file mode 100644
index 00000000000..67336cd622f
--- /dev/null
+++ b/backend/tests/daily/connectors/fireflies/test_fireflies_connector.py
@@ -0,0 +1,62 @@
+import json
+import os
+import time
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from onyx.configs.constants import DocumentSource
+from onyx.connectors.fireflies.connector import FirefliesConnector
+from onyx.connectors.models import Document
+
+
+def load_test_data(file_name: str = "test_fireflies_data.json") -> dict[str, Any]:
+    current_dir = Path(__file__).parent
+    with open(current_dir / file_name, "r") as f:
+        return json.load(f)
+
+
+@pytest.fixture
+def fireflies_connector() -> FirefliesConnector:
+    connector = FirefliesConnector()
+    connector.load_credentials(get_credentials())
+    return connector
+
+
+def get_credentials() -> dict[str, str]:
+    return {
+        "fireflies_api_key": os.environ["FIREFLIES_API_KEY"],
+    }
+
+
+def test_fireflies_connector_basic(fireflies_connector: FirefliesConnector) -> None:
+    test_data = load_test_data()
+
+    connector_return_data: list[Document] = next(
+        fireflies_connector.poll_source(0, time.time())
+    )
+    target_doc: Document = connector_return_data[0]
+
+    assert target_doc is not None, "No documents were retrieved from the connector"
+    assert (
+        target_doc.primary_owners is not None
+    ), "No primary owners were retrieved from the connector"
+
+    assert target_doc.id == test_data["id"]
+    assert target_doc.semantic_identifier == test_data["semantic_identifier"]
+    assert target_doc.primary_owners[0].email == test_data["primary_owners"]
+    assert target_doc.secondary_owners == test_data["secondary_owners"]
+
+    assert (
+        target_doc.source == DocumentSource.FIREFLIES
+    ), "Document source is not fireflies"
+    assert target_doc.metadata == {}
+
+    # Check that the test data and the connector data contain the same section data
+    assert {section.text for section in target_doc.sections} == {
+        section["text"] for section in test_data["sections"]
+    }
+    assert {section.link for section in target_doc.sections} == {
+        section["link"] for section in test_data["sections"]
+    }
diff --git a/backend/tests/daily/connectors/fireflies/test_fireflies_data.json b/backend/tests/daily/connectors/fireflies/test_fireflies_data.json
new file mode 100644
index 00000000000..fe814b5c926
--- /dev/null
+++ b/backend/tests/daily/connectors/fireflies/test_fireflies_data.json
@@ -0,0 +1,68 @@
+{
+  "id": "FIREFLIES_VcBdZpuV82rImQCA",
+  "semantic_identifier": "Lead Generation Efforts",
+  "primary_owners": "admin@onyx-test.com",
+  "secondary_owners": [],
+  "sections": [
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=153.1",
+      "text": "test_user_1 1: Hey, David, thanks for taking the time today."
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=158.14",
+      "text": "Test Admin Admin: Of course Sarah, It's nice to see you. Whenever you're ready."
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=165.1",
+      "text": "test_user_1 1: All right then, David, let's jump right in. How are the lead generation efforts for the new product launch looking?"
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=171.084",
+      "text": "Test Admin Admin: So far we've seen a good initial response, but we're facing a slight challenge with qualifying leads. The sales team is getting inquiries. Some aren't quite aligned with our ideal customer profile."
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=191.86",
+      "text": "test_user_1 1: That makes sense. Do you think we need to adjust our marketing messaging to better target the right audience?"
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=202.26",
+      "text": "Test Admin Admin: Absolutely. Maybe we could emphasize the key features that are most relevant to our target market in the marketing materials. What are your thoughts on refining the lead capture to gather more specific information?"
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=225.99",
+      "text": "test_user_1 1: I think that's a great idea. We could add additional qualifying questions to ensure we're capturing leads with the right needs."
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=238.56",
+      "text": "Test Admin Admin: On another note, how are the social media campaigns performing? Are we seeing good engagement with the new product launch post?"
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=257.2",
+      "text": "test_user_1 1: The engagement is positive, but we could potentially increase increase reach further with targeted ad campaigns and key platforms."
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=268.91",
+      "text": "Test Admin Admin: Agreed. Let's discuss a strategy to develop targeted ads that focus on the pain points our ideal customers are facing and how our product solves them."
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=270.27",
+      "text": "test_user_1 1: We can collaborate on creating specific ad copy that highlights these benefits."
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=289.06",
+      "text": "Test Admin Admin: All right, so to summarize, let's prioritize refining the lead capture form, develop targeted social media ads, and make sure our marketing method clearly aligns with our ideal customer profile."
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=303.38",
+      "text": "test_user_1 1: Yep. And let's schedule a follow up meeting in a week, review progress and discuss any adjustments."
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=310.9",
+      "text": "Test Admin Admin: Sounds good. I'll send you address updated lead form by the end of the day. Thanks, Sarah."
+    },
+    {
+      "link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=319.19",
+      "text": "test_user_1 1: Thank you David."
+    }
+  ]
+}