Skip to content

Commit

Permalink
Merge pull request #414 from alliance-genome/KANBAN-547-pipeline-logs
Browse files Browse the repository at this point in the history
Use log group dedicated for PAVI pipeline logs (KANBAN-547)
  • Loading branch information
mluypaert authored Oct 16, 2024
2 parents 8c7d76d + 4f095c2 commit 652e3ee
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 2 deletions.
1 change: 1 addition & 0 deletions pipeline/aws_infra/cdk_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
CdkInfraStack(app, "PaviPipelineCdkStack-dev", env_suffix="dev",
shared_seq_retrieval_image_repo='agr_pavi/pipeline_seq_retrieval',
shared_alignment_image_repo='agr_pavi/pipeline_alignment',
shared_logs_group='pavi/pipeline-batch-jobs',
shared_work_dir_bucket='agr-pavi-pipeline-nextflow',
env=agr_aws_environment)

Expand Down
16 changes: 15 additions & 1 deletion pipeline/aws_infra/cdk_classes/aws_batch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from aws_cdk import (
Duration,
aws_batch,
aws_logs as cwl,
aws_ec2 as ec2,
aws_iam as iam,
RemovalPolicy,
Expand All @@ -18,18 +19,20 @@

class PaviExecutionEnvironment:

batch_log_group: cwl.ILogGroup
compute_environment: aws_batch.ManagedEc2EcsComputeEnvironment
job_queue: aws_batch.JobQueue
nf_output_bucket: s3.Bucket | s3.IBucket
nf_aws_execution_policy: iam.ManagedPolicy

def __init__(self, scope: Stack, env_suffix: str, shared_work_dir_bucket: Optional[str]) -> None:
def __init__(self, scope: Stack, env_suffix: str, shared_logs_group: Optional[str], shared_work_dir_bucket: Optional[str]) -> None:
"""
Defines the PAVI execution environment.
Args:
scope: CDK Stack to which the construct belongs
env_suffix: environment suffix, added to created resource names
shared_logs_group: when defined, use CW log group with the defined value as logGroupName as Nextflow batch log group
shared_work_dir_bucket: when defined, use S3 bucket with defined the value as bucketName as Nextflow workdir bucket
"""

Expand Down Expand Up @@ -89,6 +92,17 @@ def __init__(self, scope: Stack, env_suffix: str, shared_work_dir_bucket: Option
cdk_tags.of(instance_role).add("Product", "PAVI") # type: ignore
cdk_tags.of(instance_role).add("Managed_by", "PAVI") # type: ignore

if not shared_logs_group:
self.batch_log_group = cwl.LogGroup(
scope, 'pipeline-log-group',
log_group_name='pavi/pipeline-batch-jobs',
retention=cwl.RetentionDays.INFINITE
)
else:
self.batch_log_group = cwl.LogGroup.from_log_group_name(
scope, 'pipeline-log-group',
log_group_name=shared_logs_group)

ce_name = 'pavi_pipeline_ecs'
if env_suffix:
ce_name += f'_{env_suffix}'
Expand Down
4 changes: 3 additions & 1 deletion pipeline/aws_infra/cdk_classes/cdk_infra_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class CdkInfraStack(Stack):
def __init__(self, scope: Construct, construct_id: str, env_suffix: str = "",
shared_seq_retrieval_image_repo: Optional[str] = None,
shared_alignment_image_repo: Optional[str] = None,
shared_logs_group: Optional[str] = None,
shared_work_dir_bucket: Optional[str] = None,
**kwargs: Any) -> None:
"""
Expand All @@ -30,6 +31,7 @@ def __init__(self, scope: Construct, construct_id: str, env_suffix: str = "",
env_suffix: environment suffix, added to created resource names
shared_seq_retrieval_image_repo: when defined, use ECR repo with defined the value as repoName as seq_retrieval image repo
shared_alignment_image_repo: when defined, use ECR repo with defined the value as repoName as alignment image repo
shared_logs_group: when defined, use CW log group with the defined value as logGroupName as Nextflow batch log group
shared_work_dir_bucket: when defined, use S3 bucket with defined the value as bucketName as Nextflow workdir bucket
"""
super().__init__(scope, construct_id, **kwargs)
Expand All @@ -52,4 +54,4 @@ def __init__(self, scope: Construct, construct_id: str, env_suffix: str = "",
cdk_tags.of(self.alignment_ecr_repo).add("Product", "PAVI")
cdk_tags.of(self.alignment_ecr_repo).add("Managed_by", "PAVI")

self.execution_environment = PaviExecutionEnvironment(self, env_suffix=env_suffix, shared_work_dir_bucket=shared_work_dir_bucket)
self.execution_environment = PaviExecutionEnvironment(self, env_suffix=env_suffix, shared_logs_group=shared_logs_group, shared_work_dir_bucket=shared_work_dir_bucket)
8 changes: 8 additions & 0 deletions pipeline/aws_infra/tests/unit/test_cdk_infra_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,11 @@ def test_pipeline_execution_environment() -> None:
"JobQueueName": "pavi_pipeline"
}
})


def test_pipeline_logs_group() -> None:
template.has_resource(type='AWS::Logs::LogGroup', props={
"Properties": {
"LogGroupName": "pavi/pipeline-batch-jobs"
}
})
3 changes: 3 additions & 0 deletions pipeline/workflow/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ profiles {
}
aws {
region = 'us-east-1'
batch {
logsGroup = 'pavi/pipeline-batch-jobs'
}
}
workDir = "${params.nextflow_output_dir}/work"
}
Expand Down

0 comments on commit 652e3ee

Please sign in to comment.