Skip to content

Commit

Permalink
Merge "new rule: Dataflow Worker SA has Dataflow worker role"
Browse files Browse the repository at this point in the history
-- Branch commit log --
commit 67580e2
Author:  gcpdiag team <noreply@google.com>
Date:    2023-07-08T08:50:50Z

    new rule: Dataflow Worker SA has Dataflow worker role

Change-Id: I10cb80c3e9543dc6335da9446b022b564c25640a
GitOrigin-RevId: 2165c56
  • Loading branch information
gcpdiag team authored and copybara-github committed Jul 14, 2023
1 parent 1bf050b commit 106b6b9
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 0 deletions.
114 changes: 114 additions & 0 deletions gcpdiag/lint/dataflow/err_2023_008_dataflow_sa_worker_perm_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#
# Copyright 2021 Google LLC
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Lint as: python3
"""Dataflow worker service account has roles/dataflow.worker role
Check that the worker service account used in dataflow job
has the following role: roles/dataflow.worker role
"""

import itertools

from boltons.iterutils import get_path

from gcpdiag import lint, models
from gcpdiag.queries import apis, crm, iam, logs

# Criteria to filter for logs
LOG_FILTER = [
'protoPayload.@type="type.googleapis.com/google.cloud.audit.AuditLog"',
('protoPayload.methodName="dataflow.jobs.updateContents" OR'
' "dataflow.jobs.create"'),
]
logs_by_project = {}
policies_by_project = {}

WORKER_ROLE = 'roles/dataflow.worker'
EDITOR_ROLE = 'roles/editor'
OWNER_ROLE = 'roles/owner'


def prefetch_rule(context: models.Context):
# Make sure that we have the IAM policy in cache.
iam.get_project_policy(context.project_id)


def prepare_rule(context: models.Context):
logs_by_project[context.project_id] = logs.query(
project_id=context.project_id,
resource_type='dataflow_step',
log_name='log_id("cloudaudit.googleapis.com/activity")',
filter_str=' AND '.join(LOG_FILTER),
)


def run_rule(context: models.Context, report: lint.LintReportRuleInterface):
has_role = True

project = crm.get_project(context.project_id)
policies_by_project[context.project_id] = iam.get_project_policy(
context.project_id)

# skip entire rule if logging is disabled
if not apis.is_enabled(context.project_id, 'logging'):
report.add_skipped(project, 'logging api is disabled')
return

# skip entire rule if dataflow API is disabled
if not apis.is_enabled(context.project_id, 'dataflow'):
report.add_skipped(project, 'dataflow api is disabled')
return

if context.project_id in logs_by_project:
failed_jobs = set()
for log_entry in logs_by_project[context.project_id].entries:
service_account = get_path(
log_entry,
('protoPayload', 'request', 'serviceAccount'),
)

job_id = get_path(
log_entry,
('protoPayload', 'request', 'job_id'),
)

sa_dataflow_worker_role = policies_by_project[
context.project_id].has_role_permissions(
member=f'serviceAccount:{service_account}', role=WORKER_ROLE)

sa_owner_role = policies_by_project[
context.project_id].has_role_permissions(
member=f'serviceAccount:{service_account}', role=OWNER_ROLE)

sa_editor_role = policies_by_project[
context.project_id].has_role_permissions(
member=f'serviceAccount:{service_account}', role=EDITOR_ROLE)

if sa_dataflow_worker_role or sa_owner_role or sa_editor_role:
continue
else:
has_role = False
failed_jobs.add('SA ' + service_account + ' used in ' + job_id + ' ' +
'does not has the role roles/dataflow.worker \n')

if failed_jobs:
report.add_failed(
project,
'Some Dataflow jobs in which worker SA did not have Dataflow Worker'
' role: ' + ', '.join(itertools.islice(failed_jobs, 100)),
)

if has_role:
report.add_ok(project)
3 changes: 3 additions & 0 deletions gcpdiag/lint/dataflow/snapshots/ERR_2023_008.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
* dataflow/ERR/2023_008: Dataflow worker service account has roles/dataflow.worker role
(Error: could not access 'serviceAccount' from path ('protoPayload', 'request', 'serviceAccount'), got error: KeyError('serviceAccount')) [SKIP]

24 changes: 24 additions & 0 deletions website/content/en/rules/dataflow/ERR/2023_008.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
title: "dataflow/ERR/2023_008"
linkTitle: "ERR/2023_008"
weight: 1
type: docs
description: >
Dataflow worker service account has roles/dataflow.worker role
---

**Product**: [Dataflow](https://cloud.google.com/dataflow)\
**Rule class**: ERR - Something that is very likely to be wrong

### Description

Check that the worker service account used in dataflow job
has the following role: roles/dataflow.worker role

### Remediation
The Dataflow Worker role (roles/dataflow.worker role) must be assigned to worker service account that is able
to request and update work from the Dataflow service.

### Further information
Refer [link](https://cloud.google.com/dataflow/docs/concepts/access-control#:~:text=The%20Dataflow%20Worker%20role)
to know more about this.

0 comments on commit 106b6b9

Please sign in to comment.