Skip to content

Commit

Permalink
joinedload folder and labels
Browse files Browse the repository at this point in the history
  • Loading branch information
squeaky-pl committed Nov 6, 2024
1 parent fa50d15 commit 8e26a62
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 25 deletions.
6 changes: 5 additions & 1 deletion inbox/mailsync/backends/gmail.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,11 @@ def __deduplicate_message_object_creation(self, db_session, raw_messages, accoun
uid.update_flags(raw_message.flags)
uid.update_labels(raw_message.g_labels)
common.update_message_metadata(
db_session, account, message_obj, uid.is_draft
db_session,
account.id,
account.category_type,
message_obj,
uid.is_draft,
)
db_session.commit()

Expand Down
53 changes: 41 additions & 12 deletions inbox/mailsync/backends/imap/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,23 @@
"""

from collections.abc import Iterable
from datetime import datetime
from typing import List, Set

from inbox.models.label import Label
from sqlalchemy import bindparam, desc
from sqlalchemy.orm import Query, Session
from sqlalchemy.orm import joinedload
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy.sql.expression import func

from inbox.contacts.processing import update_contacts_from_message
from inbox.crispin import RawMessage
from inbox.logging import get_logger
from inbox.models import Account, ActionLog, Folder, Message, MessageCategory
from inbox.models.backends.imap import ImapFolderInfo, ImapUid
from inbox.models.account import CategoryType
from inbox.models.backends.imap import ImapFolderInfo, ImapUid, LabelItem
from inbox.models.category import Category
from inbox.models.session import session_scope
from inbox.models.util import reconcile_message
Expand Down Expand Up @@ -77,20 +81,34 @@ def lastseenuid(account_id, session, folder_id):


def update_message_metadata(
session: Session, account: Account, message: Message, is_draft: bool
session: Session,
account_id: int,
category_type: CategoryType,
message: Message,
is_draft: bool,
) -> None:
"""Update the message's metadata"""
# Sort imapuids in a way that the ones that were added later come first.
# There are non-conforming IMAP servers that can list the same message thousands of times
# in the same folder. This is a workaround to limit the memory pressure caused by such
# servers. The metadata is meaningless for such messages anyway.

options = [joinedload(ImapUid.folder).joinedload(Folder.category)]
if category_type == "label":
options.append(
joinedload("labelitems")
.joinedload(LabelItem.label)
.joinedload(Label.category)
)

latest_imapuids = (
imapuids_for_message_query(
account_id=account.id,
account_id=account_id,
message_id=message.id,
only_latest=IMAPUID_PER_MESSAGE_SANITY_LIMIT,
)
.with_session(session)
.options(options)
.all()
)

Expand All @@ -99,11 +117,13 @@ def update_message_metadata(
message.is_draft = is_draft

latest_categories: List[Category] = [
category for imapuid in latest_imapuids for category in imapuid.categories
category
for imapuid in latest_imapuids
for category in imapuid.get_categories(category_type)
]

categories: Set[Category]
if account.category_type == "folder":
if category_type == "folder":
# For generic IMAP we want to deterministically select the last category.
# A message will always be in a single folder but it seems that for some
# on-prem servers we are not able to reliably detect when a message is moved
Expand All @@ -113,7 +133,7 @@ def update_message_metadata(
# from the database. This makes it deterministic and more-correct because a message
# is likely in a folder (and category) it was added to last.
categories = {latest_categories[0]} if latest_categories else set()
elif account.category_type == "label":
elif category_type == "label":
categories = set(latest_categories)
else:
raise AssertionError("Unreachable")
Expand Down Expand Up @@ -204,7 +224,9 @@ def update_metadata(account_id, folder_id, folder_role, new_flags, session):
if changed:
change_count += 1
is_draft = item.is_draft and folder_role in ["drafts", "all"]
update_message_metadata(session, account, item.message, is_draft)
update_message_metadata(
session, account.id, account.category_type, item.message, is_draft
)
session.commit()
log.info("Updated UID metadata", changed=change_count, out_of=len(new_flags))

Expand All @@ -221,7 +243,7 @@ def imapuids_for_message_query(
return query


def remove_deleted_uids(account_id, folder_id, uids):
def remove_deleted_uids(account_id: int, folder_id: int, uids: Iterable[int]) -> None:
"""
Make sure you're holding a db write lock on the account. (We don't try
to grab the lock in here in case the caller needs to put higher-level
Expand All @@ -230,7 +252,12 @@ def remove_deleted_uids(account_id, folder_id, uids):
"""
if not uids:
return

deleted_uid_count = 0

with session_scope(account_id) as db_session:
category_type = Account.get(account_id, db_session).category_type

for uid in uids:
# We do this one-uid-at-a-time because issuing many deletes within a
# single database transaction is problematic. But loading many
Expand All @@ -251,7 +278,8 @@ def remove_deleted_uids(account_id, folder_id, uids):
ImapUid,
"FORCE INDEX (ix_imapuid_account_id_folder_id_msg_uid_desc)",
)
.first()
.options(joinedload(ImapUid.message))
.one_or_none()
)
if imapuid is None:
continue
Expand Down Expand Up @@ -282,9 +310,8 @@ def remove_deleted_uids(account_id, folder_id, uids):
if thread is not None and not thread.messages:
db_session.delete(thread)
else:
account = Account.get(account_id, db_session)
update_message_metadata(
db_session, account, message, message.is_draft
db_session, account_id, category_type, message, message.is_draft
)
if not message_imapuids_exist:
# But don't outright delete messages. Just mark them as
Expand Down Expand Up @@ -353,7 +380,9 @@ def create_imap_message(
is_draft = imapuid.is_draft and (
folder.canonical_name == "drafts" or folder.canonical_name == "all"
)
update_message_metadata(db_session, account, new_message, is_draft)
update_message_metadata(
db_session, account.id, account.category_type, new_message, is_draft
)

update_contacts_from_message(db_session, new_message, account.namespace.id)

Expand Down
19 changes: 12 additions & 7 deletions inbox/models/backends/imap.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from datetime import datetime
from typing import List, Set
from typing import List, assert_never

from sqlalchemy import (
BigInteger,
Expand All @@ -20,7 +20,7 @@
from sqlalchemy.sql.expression import false

from inbox.logging import get_logger
from inbox.models.account import Account
from inbox.models.account import Account, CategoryType
from inbox.models.base import MailSyncBase
from inbox.models.category import Category
from inbox.models.folder import Folder
Expand Down Expand Up @@ -209,11 +209,16 @@ def update_labels(self, new_labels: List[str]) -> None:
def namespace(self):
return self.imapaccount.namespace

@property
def categories(self) -> Set[Category]:
categories = {label.category for label in self.labels}
categories.add(self.folder.category)
return categories
def get_categories(self, category_type: CategoryType) -> set[Category]:
match category_type:
case "label":
return {label.category for label in self.labels} | {
self.folder.category
}
case "folder":
return {self.folder.category}
case _:
assert_never(category_type)

__table_args__ = (
UniqueConstraint("folder_id", "msg_uid", "account_id"),
Expand Down
16 changes: 13 additions & 3 deletions tests/imap/test_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,13 @@ def folder_and_message_maps(db, default_account):
# Create a message in the folder
message = add_fake_message(db.session, default_account.namespace.id, thread)
add_fake_imapuid(db.session, default_account.id, message, folder, 13)
update_message_metadata(db.session, default_account, message, False)
update_message_metadata(
db.session,
default_account.id,
default_account.category_type,
message,
False,
)
db.session.commit()
folder_map[name] = folder
message_map[name] = message
Expand All @@ -46,7 +52,9 @@ def add_inbox_label(db, default_account, message):
imapuid.update_labels(["\\Inbox"])
db.session.commit()
assert {c.name for c in imapuid.categories} == {"all", "inbox"}
update_message_metadata(db.session, default_account, message, False)
update_message_metadata(
db.session, default_account.id, default_account.category_type, message, False
)
db.session.commit()
return message

Expand All @@ -58,7 +66,9 @@ def add_custom_label(db, default_account, message):
imapuid.update_labels(["<3"])
db.session.commit()
assert {c.name for c in imapuid.categories} == {existing, ""}
update_message_metadata(db.session, default_account, message, False)
update_message_metadata(
db.session, default_account.id, default_account.category_type, message, False
)
db.session.commit()
return message

Expand Down
8 changes: 6 additions & 2 deletions tests/imap/test_update_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ def test_update_categories_when_actionlog_entry_missing(
):
message.categories_changes = True
db.session.commit()
update_message_metadata(db.session, imapuid.account, message, False)
update_message_metadata(
db.session, imapuid.account.id, imapuid.account.category_type, message, False
)
assert message.categories == {imapuid.folder.category}


Expand Down Expand Up @@ -112,7 +114,9 @@ def test_categories_from_multiple_imap_folders(
imapuid.updated_at = imapuid.updated_at + datetime.timedelta(seconds=delay)
db.session.commit()

update_message_metadata(db.session, generic_account, message, False)
update_message_metadata(
db.session, generic_account.id, generic_account.category_type, message, False
)
assert {category.name for category in message.categories} == categories

delete_imapuids(db.session)
Expand Down

0 comments on commit 8e26a62

Please sign in to comment.