Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly account for anonymous access in Confluence #3601

Merged
merged 1 commit into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions backend/ee/onyx/configs/app_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY = int(
os.environ.get("CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60
)
# This is a boolean that determines if anonymous access is public
# Default behavior is to not make the page public and instead add a group
# that contains all the users that we found in Confluence
CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC = (
os.environ.get("CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC", "").lower() == "true"
)
# In seconds, default is 5 minutes
CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY = int(
os.environ.get("CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60
Expand Down
4 changes: 4 additions & 0 deletions backend/ee/onyx/external_permissions/confluence/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# This is a group that we use to store all the users that we found in Confluence
# Instead of setting a page to public, we just add this group so that the page
# is only accessible to users who have confluence accounts.
ALL_CONF_EMAILS_GROUP_NAME = "All_Confluence_Users_Found_By_Onyx"
88 changes: 70 additions & 18 deletions backend/ee/onyx/external_permissions/confluence/doc_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
"""
from typing import Any

from ee.onyx.configs.app_configs import CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC
from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
from onyx.access.models import DocExternalAccess
from onyx.access.models import ExternalAccess
from onyx.connectors.confluence.connector import ConfluenceConnector
Expand Down Expand Up @@ -31,14 +33,32 @@ def _get_server_space_permissions(
permission_category.get("spacePermissions", [])
)

is_public = False
user_names = set()
group_names = set()
for permission in viewspace_permissions:
if user_name := permission.get("userName"):
user_name = permission.get("userName")
if user_name:
user_names.add(user_name)
if group_name := permission.get("groupName"):
group_name = permission.get("groupName")
if group_name:
group_names.add(group_name)

# It seems that if anonymous access is turned on for the site and space,
# then the space is publicly accessible.
# For confluence server, we make a group that contains all users
# that exist in confluence and then just add that group to the space permissions
# if anonymous access is turned on for the site and space or we set is_public = True
# if they set the env variable CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC to True so
# that we can support confluence server deployments that want anonymous access
# to be public (we cant test this because its paywalled)
if user_name is None and group_name is None:
# Defaults to False
if CONFLUENCE_ANONYMOUS_ACCESS_IS_PUBLIC:
is_public = True
else:
group_names.add(ALL_CONF_EMAILS_GROUP_NAME)

user_emails = set()
for user_name in user_names:
user_email = get_user_email_from_username__server(confluence_client, user_name)
Expand All @@ -50,11 +70,7 @@ def _get_server_space_permissions(
return ExternalAccess(
external_user_emails=user_emails,
external_user_group_ids=group_names,
# TODO: Check if the space is publicly accessible
# Currently, we assume the space is not public
# We need to check if anonymous access is turned on for the site and space
# This information is paywalled so it remains unimplemented
is_public=False,
is_public=is_public,
)


Expand Down Expand Up @@ -134,7 +150,7 @@ def _get_space_permissions(

def _extract_read_access_restrictions(
confluence_client: OnyxConfluence, restrictions: dict[str, Any]
) -> ExternalAccess | None:
) -> tuple[set[str], set[str]]:
"""
Converts a page's restrictions dict into an ExternalAccess object.
If there are no restrictions, then return None
Expand Down Expand Up @@ -177,21 +193,57 @@ def _extract_read_access_restrictions(
group["name"] for group in read_access_group_jsons if group.get("name")
]

return set(read_access_user_emails), set(read_access_group_names)


def _get_all_page_restrictions(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: these functions are confusingly named. It's not clear what the difference between _get_all_page_restrictions and _fetch_all_page_restrictions is.

One should probably named _get_restrictons_for_page_batch and the other _get_restrictions_for_page or something similar.

Let's address this in a follow up.

confluence_client: OnyxConfluence,
perm_sync_data: dict[str, Any],
) -> ExternalAccess | None:
"""
This function gets the restrictions for a page by taking the intersection
of the page's restrictions and the restrictions of all the ancestors
of the page.
If the page/ancestor has no restrictions, then it is ignored (no intersection).
If no restrictions are found anywhere, then return None, indicating that the page
should inherit the space's restrictions.
"""
found_user_emails: set[str] = set()
found_group_names: set[str] = set()

found_user_emails, found_group_names = _extract_read_access_restrictions(
confluence_client=confluence_client,
restrictions=perm_sync_data.get("restrictions", {}),
)

ancestors: list[dict[str, Any]] = perm_sync_data.get("ancestors", [])
for ancestor in ancestors:
ancestor_user_emails, ancestor_group_names = _extract_read_access_restrictions(
confluence_client=confluence_client,
restrictions=ancestor.get("restrictions", {}),
)
if not ancestor_user_emails and not ancestor_group_names:
# This ancestor has no restrictions, so it has no effect on
# the page's restrictions, so we ignore it
continue

found_user_emails.intersection_update(ancestor_user_emails)
found_group_names.intersection_update(ancestor_group_names)

# If there are no restrictions found, then the page
# inherits the space's restrictions so return None
is_space_public = read_access_user_emails == [] and read_access_group_names == []
if is_space_public:
if not found_user_emails and not found_group_names:
return None

return ExternalAccess(
external_user_emails=set(read_access_user_emails),
external_user_group_ids=set(read_access_group_names),
external_user_emails=found_user_emails,
external_user_group_ids=found_group_names,
# there is no way for a page to be individually public if the space isn't public
is_public=False,
)


def _fetch_all_page_restrictions_for_space(
def _fetch_all_page_restrictions(
confluence_client: OnyxConfluence,
slim_docs: list[SlimDocument],
space_permissions_by_space_key: dict[str, ExternalAccess],
Expand All @@ -208,11 +260,11 @@ def _fetch_all_page_restrictions_for_space(
raise ValueError(
f"No permission sync data found for document {slim_doc.id}"
)
restrictions = _extract_read_access_restrictions(

if restrictions := _get_all_page_restrictions(
confluence_client=confluence_client,
restrictions=slim_doc.perm_sync_data.get("restrictions", {}),
)
if restrictions:
perm_sync_data=slim_doc.perm_sync_data,
):
document_restrictions.append(
DocExternalAccess(
doc_id=slim_doc.id,
Expand Down Expand Up @@ -301,7 +353,7 @@ def confluence_doc_sync(
slim_docs.extend(doc_batch)

logger.debug("Fetching all page restrictions for space")
return _fetch_all_page_restrictions_for_space(
return _fetch_all_page_restrictions(
confluence_client=confluence_connector.confluence_client,
slim_docs=slim_docs,
space_permissions_by_space_key=space_permissions_by_space_key,
Expand Down
13 changes: 12 additions & 1 deletion backend/ee/onyx/external_permissions/confluence/group_sync.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from ee.onyx.db.external_perm import ExternalUserGroup
from ee.onyx.external_permissions.confluence.constants import ALL_CONF_EMAILS_GROUP_NAME
from onyx.connectors.confluence.onyx_confluence import build_confluence_client
from onyx.connectors.confluence.onyx_confluence import OnyxConfluence
from onyx.connectors.confluence.utils import get_user_email_from_username__server
from onyx.db.models import ConnectorCredentialPair
from onyx.utils.logger import setup_logger


logger = setup_logger()


Expand Down Expand Up @@ -53,12 +53,23 @@ def confluence_group_sync(
confluence_client=confluence_client,
)
onyx_groups: list[ExternalUserGroup] = []
all_found_emails = set()
for group_id, group_member_emails in group_member_email_map.items():
onyx_groups.append(
ExternalUserGroup(
id=group_id,
user_emails=list(group_member_emails),
)
)
all_found_emails.update(group_member_emails)

# This is so that when we find a public confleunce server page, we can
# give access to all users only in if they have an email in Confluence
if cc_pair.connector.connector_specific_config.get("is_cloud", False):
all_found_group = ExternalUserGroup(
id=ALL_CONF_EMAILS_GROUP_NAME,
user_emails=list(all_found_emails),
)
onyx_groups.append(all_found_group)

return onyx_groups
4 changes: 4 additions & 0 deletions backend/onyx/connectors/confluence/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
"space",
"restrictions.read.restrictions.user",
"restrictions.read.restrictions.group",
"ancestors.restrictions.read.restrictions.user",
"ancestors.restrictions.read.restrictions.group",
]

_SLIM_DOC_BATCH_SIZE = 5000
Expand Down Expand Up @@ -323,9 +325,11 @@ def retrieve_all_slim_documents(
# These will be used by doc_sync.py to sync permissions
page_restrictions = page.get("restrictions")
page_space_key = page.get("space", {}).get("key")
page_ancestors = page.get("ancestors", [])
page_perm_sync_data = {
"restrictions": page_restrictions or {},
"space_key": page_space_key,
"ancestors": page_ancestors or [],
}

doc_metadata_list.append(
Expand Down
Loading