diff --git a/npiai/tools/email_organizer/__test__/invoice_organizer.py b/npiai/tools/email_organizer/__test__/invoice_organizer.py index 5b914de..bf785e9 100644 --- a/npiai/tools/email_organizer/__test__/invoice_organizer.py +++ b/npiai/tools/email_organizer/__test__/invoice_organizer.py @@ -17,6 +17,9 @@ async def main(): async with EmailOrganizer(provider=Outlook(creds)) as tool: email_list = [email async for email in tool.list_inbox_stream(limit=10)] + for email in email_list: + print(await tool._to_compact_email_with_pdf_attachments(email)) + print("Raw email list:", json.dumps(email_list, indent=4, ensure_ascii=False)) filtered_emails = [] diff --git a/npiai/tools/email_organizer/app.py b/npiai/tools/email_organizer/app.py index 2e4226e..5a53957 100644 --- a/npiai/tools/email_organizer/app.py +++ b/npiai/tools/email_organizer/app.py @@ -1,19 +1,17 @@ import asyncio import json -from typing import AsyncGenerator, List, cast, Literal - -from typing_extensions import TypedDict, overload +from typing import AsyncGenerator, List, cast import pymupdf from litellm.types.completion import ( ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam, ) +from typing_extensions import TypedDict from npiai import FunctionTool, Context -from npiai.utils import llm_tool_call, concurrent_task_runner, llm_summarize from npiai.tools.shared_types.base_email_tool import BaseEmailTool, EmailMessage - +from npiai.utils import llm_tool_call, concurrent_task_runner, llm_summarize from .prompts import FILTER_PROMPT, SUMMARIZE_PROMPT from .types import FilterResult, Column, EmailSummary @@ -277,21 +275,24 @@ async def _to_compact_email_with_pdf_attachments( if not email["attachments"]: return cast(CompactEmailMessage, email) - attachments = await self._provider.download_attachments_in_message( - email["id"], - filter_by_type="application/pdf", - ) + pdf_attachments = [] - if not attachments: - return cast(CompactEmailMessage, {**email, "attachments": None}) + for attachment in email["attachments"]: + if attachment["filetype"] != "application/pdf": + continue - pdf_attachments = [] + data = attachment["data"] + + if not data: + data = await self._provider.download_attachment( + message_id=attachment["message_id"], + attachment_id=attachment["id"], + ) - for attachment in attachments: - if not attachment["data"]: + if not data: continue - doc = pymupdf.open(stream=attachment["data"], filetype="pdf") + doc = pymupdf.open(stream=data, filetype="pdf") content = "" for page in doc: diff --git a/npiai/tools/google/gmail/app.py b/npiai/tools/google/gmail/app.py index b06159b..cb23dec 100644 --- a/npiai/tools/google/gmail/app.py +++ b/npiai/tools/google/gmail/app.py @@ -1,4 +1,5 @@ import asyncio +import base64 import json import os import re @@ -129,33 +130,25 @@ async def get_message_by_id(self, message_id: str) -> EmailMessage | None: except HttpError: return None - async def download_attachments_in_message( + async def download_attachment( self, message_id: str, - filter_by_type: str = None, - ) -> List[EmailAttachment] | None: + attachment_id: str, + ) -> bytes | None: try: - msg = self._gmail_client.get_message_by_id(message_id) - results: List[EmailAttachment] = [] + # noinspection PyProtectedMember + res = ( + self._gmail_client._service.users() + .messages() + .attachments() + .get(userId="me", messageId=message_id, id=attachment_id) + .execute() + ) - if not msg.attachments: + try: + return base64.urlsafe_b64decode(res["data"]) + except Exception: return None - - for att in msg.attachments: - if filter_by_type and att.filetype != filter_by_type: - continue - - att.download() - results.append( - EmailAttachment( - id=att.id, - message_id=msg.id, - filename=att.filename, - filetype=att.filetype, - data=att.data, - ) - ) - return results except HttpError: return None diff --git a/npiai/tools/outlook/app.py b/npiai/tools/outlook/app.py index 430351b..cd96837 100644 --- a/npiai/tools/outlook/app.py +++ b/npiai/tools/outlook/app.py @@ -187,53 +187,36 @@ async def list_inbox_stream( if not messages.odata_next_link: return - async def download_attachments_in_message( + async def download_attachment( self, message_id: str, - filter_by_type: str = None, - ) -> List[EmailAttachment] | None: + attachment_id: str, + ) -> bytes | None: """ Download attachments in a message Args: message_id: The ID of the message - filter_by_type: Filter the attachments by type. Default is None. + attachment_id: The ID of the attachment """ - attachments = await self._client.me.messages.by_message_id( - message_id - ).attachments.get() + attachment = ( + await self._client.me.messages.by_message_id(message_id) + .attachments.by_attachment_id(attachment_id) + .get() + ) - if not attachments or not attachments.value: + if not attachment or attachment.odata_type != FileAttachment.odata_type: return None - results: List[EmailAttachment] = [] - - for attachment in attachments.value: - if filter_by_type and attachment.content_type != filter_by_type: - continue - - att = cast( - FileAttachment, - await self._client.me.messages.by_message_id(message_id) - .attachments.by_attachment_id(attachment.id) - .get(), - ) + attachment = cast(FileAttachment, attachment) - results.append( - EmailAttachment( - id=att.id, - message_id=message_id, - filename=att.name, - filetype=att.content_type, - data=( - base64.urlsafe_b64decode(att.content_bytes) - if att.content_bytes - else None - ), - ) - ) + if not attachment.content_bytes: + return None - return results + try: + return base64.urlsafe_b64decode(attachment.content_bytes) + except Exception: + return None @function async def search_emails( diff --git a/npiai/tools/shared_types/base_email_tool.py b/npiai/tools/shared_types/base_email_tool.py index 48da3fe..0830ea1 100644 --- a/npiai/tools/shared_types/base_email_tool.py +++ b/npiai/tools/shared_types/base_email_tool.py @@ -41,9 +41,9 @@ async def get_message_by_id(self, message_id: str) -> EmailMessage | None: pass @abstractmethod - async def download_attachments_in_message( + async def download_attachment( self, message_id: str, - filter_by_type: str = None, - ) -> List[EmailAttachment] | None: + attachment_id: str, + ) -> bytes | None: pass