Skip to content

Commit

Permalink
refactor(tools/email): support get attachment by id
Browse files Browse the repository at this point in the history
  • Loading branch information
idiotWu committed Dec 26, 2024
1 parent 5e1dd4f commit cab21be
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 74 deletions.
3 changes: 3 additions & 0 deletions npiai/tools/email_organizer/__test__/invoice_organizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ async def main():
async with EmailOrganizer(provider=Outlook(creds)) as tool:
email_list = [email async for email in tool.list_inbox_stream(limit=10)]

for email in email_list:
print(await tool._to_compact_email_with_pdf_attachments(email))

print("Raw email list:", json.dumps(email_list, indent=4, ensure_ascii=False))

filtered_emails = []
Expand Down
31 changes: 16 additions & 15 deletions npiai/tools/email_organizer/app.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
import asyncio
import json
from typing import AsyncGenerator, List, cast, Literal

from typing_extensions import TypedDict, overload
from typing import AsyncGenerator, List, cast

import pymupdf
from litellm.types.completion import (
ChatCompletionSystemMessageParam,
ChatCompletionUserMessageParam,
)
from typing_extensions import TypedDict

from npiai import FunctionTool, Context
from npiai.utils import llm_tool_call, concurrent_task_runner, llm_summarize
from npiai.tools.shared_types.base_email_tool import BaseEmailTool, EmailMessage

from npiai.utils import llm_tool_call, concurrent_task_runner, llm_summarize
from .prompts import FILTER_PROMPT, SUMMARIZE_PROMPT
from .types import FilterResult, Column, EmailSummary

Expand Down Expand Up @@ -277,21 +275,24 @@ async def _to_compact_email_with_pdf_attachments(
if not email["attachments"]:
return cast(CompactEmailMessage, email)

attachments = await self._provider.download_attachments_in_message(
email["id"],
filter_by_type="application/pdf",
)
pdf_attachments = []

if not attachments:
return cast(CompactEmailMessage, {**email, "attachments": None})
for attachment in email["attachments"]:
if attachment["filetype"] != "application/pdf":
continue

pdf_attachments = []
data = attachment["data"]

if not data:
data = await self._provider.download_attachment(
message_id=attachment["message_id"],
attachment_id=attachment["id"],
)

for attachment in attachments:
if not attachment["data"]:
if not data:
continue

doc = pymupdf.open(stream=attachment["data"], filetype="pdf")
doc = pymupdf.open(stream=data, filetype="pdf")
content = ""

for page in doc:
Expand Down
37 changes: 15 additions & 22 deletions npiai/tools/google/gmail/app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import base64
import json
import os
import re
Expand Down Expand Up @@ -129,33 +130,25 @@ async def get_message_by_id(self, message_id: str) -> EmailMessage | None:
except HttpError:
return None

async def download_attachments_in_message(
async def download_attachment(
self,
message_id: str,
filter_by_type: str = None,
) -> List[EmailAttachment] | None:
attachment_id: str,
) -> bytes | None:
try:
msg = self._gmail_client.get_message_by_id(message_id)
results: List[EmailAttachment] = []
# noinspection PyProtectedMember
res = (
self._gmail_client._service.users()
.messages()
.attachments()
.get(userId="me", messageId=message_id, id=attachment_id)
.execute()
)

if not msg.attachments:
try:
return base64.urlsafe_b64decode(res["data"])
except Exception:
return None

for att in msg.attachments:
if filter_by_type and att.filetype != filter_by_type:
continue

att.download()
results.append(
EmailAttachment(
id=att.id,
message_id=msg.id,
filename=att.filename,
filetype=att.filetype,
data=att.data,
)
)
return results
except HttpError:
return None

Expand Down
51 changes: 17 additions & 34 deletions npiai/tools/outlook/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,53 +187,36 @@ async def list_inbox_stream(
if not messages.odata_next_link:
return

async def download_attachments_in_message(
async def download_attachment(
self,
message_id: str,
filter_by_type: str = None,
) -> List[EmailAttachment] | None:
attachment_id: str,
) -> bytes | None:
"""
Download attachments in a message
Args:
message_id: The ID of the message
filter_by_type: Filter the attachments by type. Default is None.
attachment_id: The ID of the attachment
"""
attachments = await self._client.me.messages.by_message_id(
message_id
).attachments.get()
attachment = (
await self._client.me.messages.by_message_id(message_id)
.attachments.by_attachment_id(attachment_id)
.get()
)

if not attachments or not attachments.value:
if not attachment or attachment.odata_type != FileAttachment.odata_type:
return None

results: List[EmailAttachment] = []

for attachment in attachments.value:
if filter_by_type and attachment.content_type != filter_by_type:
continue

att = cast(
FileAttachment,
await self._client.me.messages.by_message_id(message_id)
.attachments.by_attachment_id(attachment.id)
.get(),
)
attachment = cast(FileAttachment, attachment)

results.append(
EmailAttachment(
id=att.id,
message_id=message_id,
filename=att.name,
filetype=att.content_type,
data=(
base64.urlsafe_b64decode(att.content_bytes)
if att.content_bytes
else None
),
)
)
if not attachment.content_bytes:
return None

return results
try:
return base64.urlsafe_b64decode(attachment.content_bytes)
except Exception:
return None

@function
async def search_emails(
Expand Down
6 changes: 3 additions & 3 deletions npiai/tools/shared_types/base_email_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ async def get_message_by_id(self, message_id: str) -> EmailMessage | None:
pass

@abstractmethod
async def download_attachments_in_message(
async def download_attachment(
self,
message_id: str,
filter_by_type: str = None,
) -> List[EmailAttachment] | None:
attachment_id: str,
) -> bytes | None:
pass

0 comments on commit cab21be

Please sign in to comment.