diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f78dcc2..6a5a6267 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### **Added** +- added `sagemaker-templates` module with `multi_account_basic` project template - bump cdk & ecr deployment version to fix deprecated custom resource runtimes issue in `mlflow-image` - added `sagemaker-jumpstart-fm-endpoint` module - added RDS persistence layer to MLFlow modules diff --git a/README.md b/README.md index 4c641f3a..3a397dfa 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,12 @@ All modules in this repository adhere to the module structure defined in the the ### SageMaker Modules -| Type | Description | -|------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [SageMaker Endpoint Module](modules/sagemaker/sagemaker-endpoint/README.md) | Creates SageMaker real-time inference endpoint for the specified model package or latest approved model from the model package group | -| [SageMaker Studio Module](modules/sagemaker/sagemaker-studio/README.md) | Provisions secure SageMaker Studio Domain environment, creates example User Profiles for Data Scientist and Lead Data Scientist linked to IAM Roles, and adds lifecycle config | -| [SageMaker Notebook Instance Module](modules/sagemaker/sagemaker-notebook/README.md) | Creates SageMaker Notebook Instances | +| Type | Description | +|---------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [SageMaker Studio Module](modules/sagemaker/sagemaker-studio/README.md) | Provisions secure SageMaker Studio Domain environment, creates example User Profiles for Data Scientist and Lead Data Scientist linked to IAM Roles, and adds lifecycle config | +| [SageMaker Endpoint Module](modules/sagemaker/sagemaker-endpoint/README.md) | Creates SageMaker real-time inference endpoint for the specified model package or latest approved model from the model package group | +| [SageMaker Project Templates Module](modules/sagemaker/sagemaker-templates/README.md) | Provisions SageMaker Project Templates for an organization. The templates are available using SageMaker Studio Classic or Service Catalog | +| [SageMaker Notebook Instance Module](modules/sagemaker/sagemaker-notebook/README.md) | Creates secure SageMaker Notebook Instance for the Data Scientist, clones the source code to the workspace | ### Mlflow Modules diff --git a/manifests/deployment.yaml b/manifests/deployment.yaml index 0afa3006..92e296a9 100644 --- a/manifests/deployment.yaml +++ b/manifests/deployment.yaml @@ -8,6 +8,8 @@ groups: path: manifests/storage-modules.yaml - name: sagemaker-studio path: manifests/sagemaker-studio-modules.yaml + - name: sagemaker-templates + path: manifests/sagemaker-templates-modules.yaml - name: images path: manifests/images-modules.yaml - name: mlflow diff --git a/manifests/sagemaker-templates-modules.yaml b/manifests/sagemaker-templates-modules.yaml new file mode 100644 index 00000000..8abec0ef --- /dev/null +++ b/manifests/sagemaker-templates-modules.yaml @@ -0,0 +1,10 @@ +name: templates +path: modules/sagemaker/sagemaker-templates +targetAccount: primary +parameters: + - name: portfolio-access-role-arn + valueFrom: + moduleMetadata: + group: sagemaker-studio + name: studio + key: LeadDataScientistRoleArn \ No newline at end of file diff --git a/modules/sagemaker/sagemaker-templates/README.md b/modules/sagemaker/sagemaker-templates/README.md new file mode 100644 index 00000000..a710c3d0 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/README.md @@ -0,0 +1,54 @@ +# SageMaker Project Templates + +This module creates organizational SageMaker Project Templates. + +The templates are registered in Service Catalog and available via SageMaker Studio Classic. + +### Architecture + +![SageMaker Templates Module Architecture](docs/_static/sagemaker-templates-module-architecture.png "SageMaker Templates Module Architecture") + +### Project Templates + +The module contains ogranizational SageMaker Project Templates vended as Service Catalog Products. Using the templates is available through SageMaker Studio Classic and AWS Service Catalog. + +#### Basic Multi-Account Template + +This project template contains an example of basic multi-account template from [AWS Enterprise MLOps Framework](https://github.com/aws-samples/aws-enterprise-mlops-framework/blob/main/mlops-multi-account-cdk/mlops-sm-project-template/README.md#sagemaker-project-stack). + +TODO: add detailed description and architecture diagram. + +## Inputs and outputs: +### Required inputs: + - `portfolio-access-role-arn` - the ARN of the IAM Role used to access the Service Catalog Portfolio or SageMaker projects + +### Optional Inputs: + - `portfolio-name` - name of the Service Catalog Portfolio + - `portfolio-owner` - owner of the Service Catalog Portfolio + +### Sample manifest declaration + +```yaml +name: templates +path: modules/sagemaker/sagemaker-templates +targetAccount: primary +parameters: + - name: portfolio-access-role-arn + valueFrom: + moduleMetadata: + group: sagemaker-studio + name: studio + key: LeadDataScientistRoleArn +``` + +### Outputs (module metadata): + - `ServiceCatalogPortfolioName` - the name of the Service Catalog Portfolio + - `ServiceCatalogPortfolioOwner` - the owner of the Service Catalog Portfolio + +### Example Output: +```yaml +{ + "ServiceCatalogPortfolioName": "MLOps SageMaker Project Templates", + "ServiceCatalogPortfolioOwner": "administrator" +} +``` diff --git a/modules/sagemaker/sagemaker-templates/app.py b/modules/sagemaker/sagemaker-templates/app.py new file mode 100644 index 00000000..22261111 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/app.py @@ -0,0 +1,56 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import os + +import aws_cdk + +from stack import ServiceCatalogStack + +project_name = os.getenv("SEEDFARMER_PROJECT_NAME", "") +deployment_name = os.getenv("SEEDFARMER_DEPLOYMENT_NAME", "") +module_name = os.getenv("SEEDFARMER_MODULE_NAME", "") +app_prefix = f"{project_name}-{deployment_name}-{module_name}" + +DEFAULT_PORTFOLIO_NAME = "MLOps SageMaker Project Templates" +DEFAULT_PORTFOLIO_OWNER = "administrator" + + +def _param(name: str) -> str: + return f"SEEDFARMER_PARAMETER_{name}" + + +environment = aws_cdk.Environment( + account=os.environ["CDK_DEFAULT_ACCOUNT"], + region=os.environ["CDK_DEFAULT_REGION"], +) + +portfolio_name = os.getenv(_param("PORTFOLIO_NAME"), DEFAULT_PORTFOLIO_NAME) +portfolio_owner = os.getenv(_param("PORTFOLIO_OWNER"), DEFAULT_PORTFOLIO_OWNER) +portfolio_access_role_arn = os.getenv(_param("PORTFOLIO_ACCESS_ROLE_ARN")) + +if not portfolio_access_role_arn: + raise ValueError("Missing input parameter portfolio-access-role-arn") + +app = aws_cdk.App() +stack = ServiceCatalogStack( + app, + app_prefix, + portfolio_name=portfolio_name, + portfolio_owner=portfolio_owner, + portfolio_access_role_arn=portfolio_access_role_arn, +) + + +aws_cdk.CfnOutput( + scope=stack, + id="metadata", + value=stack.to_json_string( + { + "ServiceCatalogPortfolioName": stack.portfolio_name, + "ServiceCatalogPortfolioOwner": stack.portfolio_owner, + } + ), +) + +app.synth() diff --git a/modules/sagemaker/sagemaker-templates/coverage.ini b/modules/sagemaker/sagemaker-templates/coverage.ini new file mode 100644 index 00000000..c3878739 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/coverage.ini @@ -0,0 +1,3 @@ +[run] +omit = + tests/* \ No newline at end of file diff --git a/modules/sagemaker/sagemaker-templates/deployspec.yaml b/modules/sagemaker/sagemaker-templates/deployspec.yaml new file mode 100644 index 00000000..f95cdd70 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/deployspec.yaml @@ -0,0 +1,23 @@ +publishGenericEnvVariables: true +deploy: + phases: + install: + commands: + - env + # Install whatever additional build libraries + - npm install -g aws-cdk@2.130.0 + - pip install -r requirements.txt + build: + commands: + - cdk deploy --require-approval never --progress events --app "python app.py" --outputs-file ./cdk-exports.json +destroy: + phases: + install: + commands: + # Install whatever additional build libraries + - npm install -g aws-cdk@2.130.0 + - pip install -r requirements.txt + build: + commands: + # execute the CDK + - cdk destroy --force --app "python app.py" \ No newline at end of file diff --git a/modules/sagemaker/sagemaker-templates/images/zip-image/Dockerfile b/modules/sagemaker/sagemaker-templates/images/zip-image/Dockerfile new file mode 100644 index 00000000..eaa6cb07 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/images/zip-image/Dockerfile @@ -0,0 +1,3 @@ +FROM public.ecr.aws/docker/library/alpine:latest + +RUN apk --no-cache add zip make diff --git a/modules/sagemaker/sagemaker-templates/pyproject.toml b/modules/sagemaker/sagemaker-templates/pyproject.toml new file mode 100644 index 00000000..d03cfa02 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/pyproject.toml @@ -0,0 +1,38 @@ +[tool.black] +line-length = 120 +target-version = ["py36", "py37", "py38"] +exclude = ''' +/( + \.eggs + | \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | \.env + | _build + | buck-out + | build + | dist + | codeseeder.out +)/ +''' + +[tool.isort] +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true +line_length = 120 +py_version = 36 +skip_gitignore = false + +[tool.pytest.ini_options] +addopts = "-v --cov=. --cov-report term --cov-config=coverage.ini --cov-fail-under=80" +pythonpath = [ + "." +] +testpaths = [ + "tests" +] \ No newline at end of file diff --git a/modules/sagemaker/sagemaker-templates/requirements.in b/modules/sagemaker/sagemaker-templates/requirements.in new file mode 100644 index 00000000..0e1bed6c --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/requirements.in @@ -0,0 +1,3 @@ +aws-cdk-lib==2.130.0 +cdk-nag==2.28.27 +boto3==1.34.35 diff --git a/modules/sagemaker/sagemaker-templates/requirements.txt b/modules/sagemaker/sagemaker-templates/requirements.txt new file mode 100644 index 00000000..281e1d7c --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/requirements.txt @@ -0,0 +1,84 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# by the following command: +# +# pip-compile --output-file=requirements.txt requirements.in +# +attrs==23.2.0 + # via + # cattrs + # jsii +aws-cdk-asset-awscli-v1==2.2.202 + # via aws-cdk-lib +aws-cdk-asset-kubectl-v20==2.1.2 + # via aws-cdk-lib +aws-cdk-asset-node-proxy-agent-v6==2.0.1 + # via aws-cdk-lib +aws-cdk-lib==2.130.0 + # via + # -r requirements.in + # cdk-nag +boto3==1.34.35 + # via -r requirements.in +botocore==1.34.51 + # via + # boto3 + # s3transfer +cattrs==23.2.3 + # via jsii +cdk-nag==2.28.27 + # via -r requirements.in +constructs==10.3.0 + # via + # aws-cdk-lib + # cdk-nag +exceptiongroup==1.2.0 + # via cattrs +importlib-resources==6.1.2 + # via jsii +jmespath==1.0.1 + # via + # boto3 + # botocore +jsii==1.94.0 + # via + # aws-cdk-asset-awscli-v1 + # aws-cdk-asset-kubectl-v20 + # aws-cdk-asset-node-proxy-agent-v6 + # aws-cdk-lib + # cdk-nag + # constructs +publication==0.0.3 + # via + # aws-cdk-asset-awscli-v1 + # aws-cdk-asset-kubectl-v20 + # aws-cdk-asset-node-proxy-agent-v6 + # aws-cdk-lib + # cdk-nag + # constructs + # jsii +python-dateutil==2.8.2 + # via + # botocore + # jsii +s3transfer==0.10.0 + # via boto3 +six==1.16.0 + # via python-dateutil +typeguard==2.13.3 + # via + # aws-cdk-asset-awscli-v1 + # aws-cdk-asset-kubectl-v20 + # aws-cdk-asset-node-proxy-agent-v6 + # aws-cdk-lib + # cdk-nag + # constructs + # jsii +typing-extensions==4.10.0 + # via + # cattrs + # jsii +urllib3==1.26.18 + # via botocore +zipp==3.17.0 + # via importlib-resources diff --git a/modules/sagemaker/sagemaker-templates/setup.cfg b/modules/sagemaker/sagemaker-templates/setup.cfg new file mode 100644 index 00000000..6136e2bb --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/setup.cfg @@ -0,0 +1,28 @@ +[metadata] +license_files = + LICENSE + NOTICE + VERSION + +[flake8] +max-line-length = 120 +extend-ignore = E203, W503 +exclude = + .git, + __pycache__, + docs/source/conf.py, + old, + build, + dist, + .venv, + codeseeder.out, + bundle + +[mypy] +python_version = 3.7 +strict = True +ignore_missing_imports = True +allow_untyped_decorators = True +exclude = + codeseeder.out/|example/|tests/ +warn_unused_ignores = False \ No newline at end of file diff --git a/modules/sagemaker/sagemaker-templates/stack.py b/modules/sagemaker/sagemaker-templates/stack.py new file mode 100644 index 00000000..c00375a4 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/stack.py @@ -0,0 +1,143 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import importlib +import os +from typing import Any, Optional, Tuple + +from aws_cdk import BundlingOptions, BundlingOutput, DockerImage, Stack, Tags +from aws_cdk import aws_iam as iam +from aws_cdk import aws_s3_assets as s3_assets +from aws_cdk import aws_servicecatalog as servicecatalog +from constructs import Construct + + +class ServiceCatalogStack(Stack): + def __init__( + self, + scope: Construct, + id: str, + portfolio_name: str, + portfolio_owner: str, + portfolio_access_role_arn: str, + **kwargs: Any, + ) -> None: + super().__init__(scope, id, **kwargs) + + self.portfolio_name = portfolio_name + self.portfolio_owner = portfolio_owner + + self.portfolio = servicecatalog.Portfolio( + self, + "Portfolio", + display_name=portfolio_name, + provider_name=portfolio_owner, + description="MLOps Unified Templates", + ) + + account_root_principal = iam.Role( + self, + "AccountRootPrincipal", + assumed_by=iam.AccountRootPrincipal(), + ) + self.portfolio.give_access_to_role(account_root_principal) + + portfolio_access_role: iam.IRole = iam.Role.from_role_arn( + self, "portfolio-access-role", portfolio_access_role_arn + ) + self.portfolio.give_access_to_role(portfolio_access_role) + + product_launch_role = iam.Role( + self, + "ProductLaunchRole", + assumed_by=iam.CompositePrincipal( + iam.ServicePrincipal("servicecatalog.amazonaws.com"), + iam.ServicePrincipal("cloudformation.amazonaws.com"), + iam.ArnPrincipal(portfolio_access_role.role_arn), + ), + managed_policies=[iam.ManagedPolicy.from_aws_managed_policy_name("AdministratorAccess")], + ) + + templates_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "templates") + for template_name in next(os.walk(templates_dir))[1]: + build_app_asset, deploy_app_asset = self.upload_assets( + portfolio_access_role=portfolio_access_role, + template_name=template_name, + ) + + product_stack_module = importlib.import_module(f"templates.{template_name}.product_stack") + product_stack: servicecatalog.ProductStack = product_stack_module.Product( + self, + f"{template_name}ProductStack", + build_app_asset=build_app_asset, + deploy_app_asset=deploy_app_asset, + ) + + product_name: str = getattr(product_stack, "TEMPLATE_NAME", template_name) + product_description: Optional[str] = getattr(product_stack, "DESCRIPTION", None) + + product = servicecatalog.CloudFormationProduct( + self, + f"{template_name}CloudFormationProduct", + owner=portfolio_owner, + product_name=product_name, + description=product_description, + product_versions=[ + servicecatalog.CloudFormationProductVersion( + cloud_formation_template=servicecatalog.CloudFormationTemplate.from_product_stack( + product_stack + ), + ) + ], + ) + + self.portfolio.add_product(product) + self.portfolio.set_launch_role(product, product_launch_role) + + Tags.of(product).add(key="sagemaker:studio-visibility", value="true") + + def upload_assets( + self, + portfolio_access_role: iam.IRole, + template_name: str, + ) -> Tuple[s3_assets.Asset, Optional[s3_assets.Asset]]: + # Create the build and deployment asset as an output to pass to pipeline stack + zip_image = DockerImage.from_build("images/zip-image") + + build_app_asset = s3_assets.Asset( + self, + f"{template_name}BuildAsset", + path=f"templates/{template_name}/seed_code/build_app/", + bundling=BundlingOptions( + image=zip_image, + command=[ + "sh", + "-c", + """zip -r /asset-output/build_app.zip .""", + ], + output_type=BundlingOutput.ARCHIVED, + ), + ) + build_app_asset.grant_read(grantee=portfolio_access_role) + + # check if there is a deploy_app folder + if not os.path.isdir(f"templates/{template_name}/seed_code/deploy_app/"): + return build_app_asset, None + + deploy_app_asset = s3_assets.Asset( + self, + f"{template_name}DeployAsset", + path=f"templates/{template_name}/seed_code/deploy_app/", + bundling=BundlingOptions( + image=zip_image, + command=[ + "sh", + "-c", + """zip -r /asset-output/deploy_app.zip .""", + ], + output_type=BundlingOutput.ARCHIVED, + ), + ) + deploy_app_asset.grant_read(grantee=portfolio_access_role) + + return build_app_asset, deploy_app_asset diff --git a/modules/sagemaker/sagemaker-templates/templates/__init__.py b/modules/sagemaker/sagemaker-templates/templates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/__init__.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/pipeline_constructs/__init__.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/pipeline_constructs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/pipeline_constructs/build_pipeline_construct.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/pipeline_constructs/build_pipeline_construct.py new file mode 100644 index 00000000..86c45a53 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/pipeline_constructs/build_pipeline_construct.py @@ -0,0 +1,278 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any + +import aws_cdk +from aws_cdk import Aws +from aws_cdk import aws_cloudwatch as cloudwatch +from aws_cdk import aws_codebuild as codebuild +from aws_cdk import aws_codecommit as codecommit +from aws_cdk import aws_codepipeline as codepipeline +from aws_cdk import aws_codepipeline_actions as codepipeline_actions +from aws_cdk import aws_iam as iam +from aws_cdk import aws_s3 as s3 +from aws_cdk import aws_s3_assets as s3_assets +from constructs import Construct + + +class BuildPipelineConstruct(Construct): + def __init__( + self, + scope: Construct, + construct_id: str, + project_name: str, + project_id: str, + s3_artifact: s3.IBucket, + pipeline_artifact_bucket: s3.IBucket, + model_package_group_name: str, + repo_asset: s3_assets.Asset, + **kwargs: Any, + ) -> None: + super().__init__(scope, construct_id, **kwargs) + + # Define resource name + sagemaker_pipeline_name = f"{project_name}-{project_id}" + sagemaker_pipeline_description = f"{project_name} Model Build Pipeline" + + # Create source repo from seed bucket/key + build_app_repository = codecommit.Repository( + self, + "Build App Code Repo", + repository_name=f"{project_name}-{construct_id}", + code=codecommit.Code.from_asset( + asset=repo_asset, + branch="main", + ), + ) + aws_cdk.Tags.of(build_app_repository).add("sagemaker:project-id", project_id) + aws_cdk.Tags.of(build_app_repository).add("sagemaker:project-name", project_name) + + sagemaker_seedcode_bucket = s3.Bucket.from_bucket_name( + self, + "SageMaker Seedcode Bucket", + f"sagemaker-servicecatalog-seedcode-{Aws.REGION}", + ) + + codebuild_role = iam.Role( + self, + "CodeBuild Role", + assumed_by=iam.ServicePrincipal("codebuild.amazonaws.com"), + path="/service-role/", + ) + + sagemaker_execution_role = iam.Role( + self, + "SageMaker Execution Role", + assumed_by=iam.ServicePrincipal("sagemaker.amazonaws.com"), + path="/service-role/", + ) + + # Create a policy statement for SM and ECR pull + sagemaker_policy = iam.Policy( + self, + "SageMaker Policy", + document=iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=[ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents", + ], + resources=["*"], + ), + iam.PolicyStatement( + actions=[ + "ecr:BatchCheckLayerAvailability", + "ecr:BatchGetImage", + "ecr:Describe*", + "ecr:GetAuthorizationToken", + "ecr:GetDownloadUrlForLayer", + ], + resources=["*"], + ), + iam.PolicyStatement( + actions=[ + "kms:Encrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:Decrypt", + "kms:DescribeKey", + ], + effect=iam.Effect.ALLOW, + resources=[f"arn:aws:kms:{Aws.REGION}:{Aws.ACCOUNT_ID}:key/*"], + ), + ] + ), + ) + + cloudwatch.Metric.grant_put_metric_data(sagemaker_policy) + s3_artifact.grant_read_write(sagemaker_policy) + sagemaker_seedcode_bucket.grant_read_write(sagemaker_policy) + + sagemaker_execution_role.grant_pass_role(codebuild_role) + sagemaker_execution_role.grant_pass_role(sagemaker_execution_role) + + # Attach the policy + sagemaker_policy.attach_to_role(sagemaker_execution_role) + sagemaker_policy.attach_to_role(codebuild_role) + + # Grant extra permissions for the SageMaker role + sagemaker_execution_role.add_to_policy( + iam.PolicyStatement( + actions=[ + "sagemaker:CreateModel", + "sagemaker:DeleteModel", + "sagemaker:DescribeModel", + "sagemaker:CreateProcessingJob", + "sagemaker:DescribeProcessingJob", + "sagemaker:StopProcessingJob", + "sagemaker:CreateTrainingJob", + "sagemaker:DescribeTrainingJob", + "sagemaker:StopTrainingJob", + "sagemaker:AddTags", + "sagemaker:DeleteTags", + "sagemaker:ListTags", + ], + resources=[ + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model/*", + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:processing-job/*", + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:training-job/*", + ], + ) + ) + sagemaker_execution_role.add_to_policy( + iam.PolicyStatement( + actions=[ + "sagemaker:CreateModelPackageGroup", + "sagemaker:DeleteModelPackageGroup", + "sagemaker:DescribeModelPackageGroup", + "sagemaker:CreateModelPackage", + "sagemaker:DeleteModelPackage", + "sagemaker:UpdateModelPackage", + "sagemaker:DescribeModelPackage", + "sagemaker:ListModelPackages", + "sagemaker:AddTags", + "sagemaker:DeleteTags", + "sagemaker:ListTags", + ], + resources=[ + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model-package-group/" + f"{model_package_group_name}", + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model-package/" + f"{model_package_group_name}/*", + ], + ), + ) + + # Grant extra permissions for the CodeBuild role + codebuild_role.add_to_policy( + iam.PolicyStatement( + actions=[ + "sagemaker:DescribeModelPackage", + "sagemaker:ListModelPackages", + "sagemaker:UpdateModelPackage", + "sagemaker:AddTags", + "sagemaker:DeleteTags", + "sagemaker:ListTags", + ], + resources=[ + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model-package/" + f"{model_package_group_name}/*" + ], + ) + ) + codebuild_role.add_to_policy( + iam.PolicyStatement( + actions=[ + "sagemaker:CreatePipeline", + "sagemaker:UpdatePipeline", + "sagemaker:DeletePipeline", + "sagemaker:StartPipelineExecution", + "sagemaker:StopPipelineExecution", + "sagemaker:DescribePipelineExecution", + "sagemaker:ListPipelineExecutionSteps", + "sagemaker:AddTags", + "sagemaker:DeleteTags", + "sagemaker:ListTags", + ], + resources=[ + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:pipeline/" + f"{sagemaker_pipeline_name}", + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:pipeline/" + f"{sagemaker_pipeline_name}/execution/*", + ], + ), + ) + codebuild_role.add_to_policy( + iam.PolicyStatement( + actions=[ + "sagemaker:DescribeImageVersion", + ], + resources=[ + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:image-version/*", + ], + ) + ) + + # Create the CodeBuild project + sm_pipeline_build = codebuild.PipelineProject( + self, + "SM Pipeline Build", + project_name=f"{project_name}-{construct_id}", + role=codebuild_role, # figure out what actually this role would need + build_spec=codebuild.BuildSpec.from_source_filename("buildspec.yml"), + environment=codebuild.BuildEnvironment( + build_image=codebuild.LinuxBuildImage.STANDARD_5_0, + environment_variables={ + "SAGEMAKER_PROJECT_NAME": codebuild.BuildEnvironmentVariable(value=project_name), + "SAGEMAKER_PROJECT_ID": codebuild.BuildEnvironmentVariable(value=project_id), + "MODEL_PACKAGE_GROUP_NAME": codebuild.BuildEnvironmentVariable(value=model_package_group_name), + "AWS_REGION": codebuild.BuildEnvironmentVariable(value=Aws.REGION), + "SAGEMAKER_PIPELINE_NAME": codebuild.BuildEnvironmentVariable( + value=sagemaker_pipeline_name, + ), + "SAGEMAKER_PIPELINE_DESCRIPTION": codebuild.BuildEnvironmentVariable( + value=sagemaker_pipeline_description, + ), + "SAGEMAKER_PIPELINE_ROLE_ARN": codebuild.BuildEnvironmentVariable( + value=sagemaker_execution_role.role_arn, + ), + "ARTIFACT_BUCKET": codebuild.BuildEnvironmentVariable(value=s3_artifact.bucket_name), + "ARTIFACT_BUCKET_KMS_ID": codebuild.BuildEnvironmentVariable( + value=s3_artifact.encryption_key.key_id # type: ignore[union-attr] + ), + }, + ), + ) + + source_artifact = codepipeline.Artifact(artifact_name="GitSource") + + build_pipeline = codepipeline.Pipeline( + self, + "Pipeline", + pipeline_name=f"{project_name}-{construct_id}", + artifact_bucket=pipeline_artifact_bucket, + ) + + # add a source stage + source_stage = build_pipeline.add_stage(stage_name="Source") + source_stage.add_action( + codepipeline_actions.CodeCommitSourceAction( + action_name="Source", + output=source_artifact, + repository=build_app_repository, + branch="main", + ) + ) + + # add a build stage + build_stage = build_pipeline.add_stage(stage_name="Build") + build_stage.add_action( + codepipeline_actions.CodeBuildAction( + action_name="SMPipeline", + input=source_artifact, + project=sm_pipeline_build, + ) + ) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/pipeline_constructs/deploy_pipeline_construct.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/pipeline_constructs/deploy_pipeline_construct.py new file mode 100644 index 00000000..7df91ddf --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/pipeline_constructs/deploy_pipeline_construct.py @@ -0,0 +1,353 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any + +import aws_cdk +from aws_cdk import Aws, CfnCapabilities +from aws_cdk import aws_codebuild as codebuild +from aws_cdk import aws_codecommit as codecommit +from aws_cdk import aws_codepipeline as codepipeline +from aws_cdk import aws_codepipeline_actions as codepipeline_actions +from aws_cdk import aws_events as events +from aws_cdk import aws_events_targets as targets +from aws_cdk import aws_iam as iam +from aws_cdk import aws_s3 as s3 +from aws_cdk import aws_s3_assets as s3_assets +from constructs import Construct + + +class DeployPipelineConstruct(Construct): + def __init__( + self, + scope: Construct, + construct_id: str, + project_name: str, + project_id: str, + pipeline_artifact_bucket: s3.IBucket, + model_package_group_name: str, + repo_asset: s3_assets.Asset, + preprod_account: str, + preprod_region: str, + prod_account: str, + prod_region: str, + deployment_region: str, + **kwargs: Any, + ) -> None: + super().__init__(scope, construct_id, **kwargs) + + # Define resource names + pipeline_name = f"{project_name}-{construct_id}" + + # Create source repo from seed bucket/key + deploy_app_repository = codecommit.Repository( + self, + "DeployAppCodeRepo", + repository_name=f"{project_name}-{construct_id}", + code=codecommit.Code.from_asset( + asset=repo_asset, + branch="main", + ), + ) + aws_cdk.Tags.of(deploy_app_repository).add("sagemaker:project-id", project_id) + aws_cdk.Tags.of(deploy_app_repository).add("sagemaker:project-name", project_name) + + cdk_synth_build_role = iam.Role( + self, + "CodeBuildRole", + assumed_by=iam.ServicePrincipal("codebuild.amazonaws.com"), + path="/service-role/", + ) + + cdk_synth_build_role.add_to_policy( + iam.PolicyStatement( + actions=["sagemaker:ListModelPackages"], + resources=[ + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model-package-group/" + f"{project_name}-{project_id}*", + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model-package/" + f"{project_name}-{project_id}/*", + ], + ) + ) + + cdk_synth_build_role.add_to_policy( + iam.PolicyStatement( + actions=["ssm:GetParameter"], + resources=[ + f"arn:{Aws.PARTITION}:ssm:{Aws.REGION}:{Aws.ACCOUNT_ID}:parameter/*", + ], + ) + ) + + cdk_synth_build_role.add_to_policy( + iam.PolicyStatement( + actions=[ + "kms:Encrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:Decrypt", + "kms:DescribeKey", + ], + effect=iam.Effect.ALLOW, + resources=[f"arn:aws:kms:{Aws.REGION}:{Aws.ACCOUNT_ID}:key/*"], + ), + ) + + cdk_synth_build = codebuild.PipelineProject( + self, + "CDKSynthBuild", + role=cdk_synth_build_role, + build_spec=codebuild.BuildSpec.from_object( + { + "version": "0.2", + "phases": { + "build": { + "commands": [ + "npm install -g aws-cdk", + "pip install -r requirements.txt", + 'cdk synth --no-lookups --app "python app.py"', + ] + } + }, + "artifacts": {"base-directory": "cdk.out", "files": "**/*"}, + } + ), + environment=codebuild.BuildEnvironment( + build_image=codebuild.LinuxBuildImage.STANDARD_5_0, + environment_variables={ + "MODEL_PACKAGE_GROUP_NAME": codebuild.BuildEnvironmentVariable(value=model_package_group_name), + "PROJECT_ID": codebuild.BuildEnvironmentVariable(value=project_id), + "PROJECT_NAME": codebuild.BuildEnvironmentVariable(value=project_name), + "DEPLOYMENT_ACCOUNT": codebuild.BuildEnvironmentVariable(value=Aws.ACCOUNT_ID), + "DEPLOYMENT_REGION": codebuild.BuildEnvironmentVariable(value=deployment_region), + "PREPROD_ACCOUNT": codebuild.BuildEnvironmentVariable(value=preprod_account), + "PREPROD_REGION": codebuild.BuildEnvironmentVariable(value=preprod_region), + "PROD_ACCOUNT": codebuild.BuildEnvironmentVariable(value=prod_account), + "PROD_REGION": codebuild.BuildEnvironmentVariable(value=prod_region), + }, + ), + ) + + # code build to include security scan over cloudformation template + security_scan = codebuild.Project( + self, + "SecurityScanTooling", + build_spec=codebuild.BuildSpec.from_object( + { + "version": 0.2, + "env": { + "shell": "bash", + "variables": { + "TemplateFolder": "./*.template.json", + "FAIL_BUILD": "true", + }, + }, + "phases": { + "install": { + "runtime-versions": {"ruby": 2.6}, + "commands": [ + "export date=`date +%Y-%m-%dT%H:%M:%S.%NZ`", + "echo Installing cfn_nag - `pwd`", + "gem install cfn-nag", + "echo cfn_nag installation complete `date`", + ], + }, + "build": { + "commands": [ + "echo Starting cfn scanning `date` in `pwd`", + "echo 'RulesToSuppress:\n- id: W58\n reason: W58 is an warning raised due to Lambda " + "functions require permission to write CloudWatch Logs, although the lambda role " + "contains the policy that support these permissions cgn_nag continues to through " + "this problem (https://github.com/stelligent/cfn_nag/issues/422)' > cfn_nag_ignore.yml", + 'mkdir report || echo "dir report exists"', + "SCAN_RESULT=$(cfn_nag_scan --fail-on-warnings --deny-list-path cfn_nag_ignore.yml " + "--input-path ${TemplateFolder} -o json > ./report/cfn_nag.out.json && echo OK || " + "echo FAILED)", + "echo Completed cfn scanning `date`", + "echo $SCAN_RESULT", + "echo $FAIL_BUILD", + """if [[ "$FAIL_BUILD" = "true" && "$SCAN_RESULT" = "FAILED" ]]; then printf "\n\n + Failiing pipeline as possible insecure configurations were detected + \n\n" && exit 1; fi""", + ] + }, + }, + "artifacts": {"files": "./report/cfn_nag.out.json"}, + } + ), + environment=codebuild.BuildEnvironment( + build_image=codebuild.LinuxBuildImage.STANDARD_5_0, + environment_variables={ + "DEPLOYMENT_ACCOUNT": codebuild.BuildEnvironmentVariable(value=Aws.ACCOUNT_ID), + "DEPLOYMENT_REGION": codebuild.BuildEnvironmentVariable(value=deployment_region), + "PREPROD_ACCOUNT": codebuild.BuildEnvironmentVariable(value=preprod_account), + "PREPROD_REGION": codebuild.BuildEnvironmentVariable(value=preprod_region), + "PROD_ACCOUNT": codebuild.BuildEnvironmentVariable(value=prod_account), + "PROD_REGION": codebuild.BuildEnvironmentVariable(value=prod_region), + }, + ), + ) + + source_artifact = codepipeline.Artifact(artifact_name="GitSource") + cdk_synth_artifact = codepipeline.Artifact(artifact_name="CDKSynth") + cfn_nag_artifact = codepipeline.Artifact(artifact_name="CfnNagScanReport") + + deploy_code_pipeline = codepipeline.Pipeline( + self, + "DeployPipeline", + cross_account_keys=True, + pipeline_name=pipeline_name, + artifact_bucket=pipeline_artifact_bucket, + ) + + # add a source stage + source_stage = deploy_code_pipeline.add_stage(stage_name="Source") + source_stage.add_action( + codepipeline_actions.CodeCommitSourceAction( + action_name="Source", + output=source_artifact, + repository=deploy_app_repository, + branch="main", + ) + ) + + # add a build stage + build_stage = deploy_code_pipeline.add_stage(stage_name="Build") + + build_stage.add_action( + codepipeline_actions.CodeBuildAction( + action_name="Synth", + input=source_artifact, + outputs=[cdk_synth_artifact], + project=cdk_synth_build, + ) + ) + + # add a security evaluation stage for cloudformation templates + security_stage = deploy_code_pipeline.add_stage(stage_name="SecurityEvaluation") + + security_stage.add_action( + codepipeline_actions.CodeBuildAction( + action_name="CFNNag", + input=cdk_synth_artifact, + outputs=[cfn_nag_artifact], + project=security_scan, + ) + ) + + # add stages to deploy to the different environments + deploy_code_pipeline.add_stage( + stage_name="DeployDev", + actions=[ + codepipeline_actions.CloudFormationCreateUpdateStackAction( + action_name="Deploy_CFN_Dev", + run_order=1, + template_path=cdk_synth_artifact.at_path("dev.template.json"), + stack_name=f"{project_name}-{construct_id}-dev", + admin_permissions=False, + replace_on_failure=True, + role=iam.Role.from_role_arn( + self, + "DevActionRole", + f"arn:{Aws.PARTITION}:iam::{Aws.ACCOUNT_ID}:role/" + f"cdk-hnb659fds-deploy-role-{Aws.ACCOUNT_ID}-{Aws.REGION}", + ), + deployment_role=iam.Role.from_role_arn( + self, + "DevDeploymentRole", + f"arn:{Aws.PARTITION}:iam::{Aws.ACCOUNT_ID}:role/" + f"cdk-hnb659fds-cfn-exec-role-{Aws.ACCOUNT_ID}-{Aws.REGION}", + ), + cfn_capabilities=[ + CfnCapabilities.AUTO_EXPAND, + CfnCapabilities.NAMED_IAM, + ], + ), + codepipeline_actions.ManualApprovalAction( + action_name="Approve_PreProd", + run_order=2, + additional_information="Approving deployment for preprod", + ), + ], + ) + + deploy_code_pipeline.add_stage( + stage_name="DeployPreProd", + actions=[ + codepipeline_actions.CloudFormationCreateUpdateStackAction( + action_name="Deploy_CFN_PreProd", + run_order=1, + template_path=cdk_synth_artifact.at_path("preprod.template.json"), + stack_name=f"{project_name}-{construct_id}-preprod", + admin_permissions=False, + replace_on_failure=True, + role=iam.Role.from_role_arn( + self, + "PreProdActionRole", + f"arn:{Aws.PARTITION}:iam::{preprod_account}:role/" + f"cdk-hnb659fds-deploy-role-{preprod_account}-{deployment_region}", + ), + deployment_role=iam.Role.from_role_arn( + self, + "PreProdDeploymentRole", + f"arn:{Aws.PARTITION}:iam::{preprod_account}:role/" + f"cdk-hnb659fds-cfn-exec-role-{preprod_account}-{deployment_region}", + ), + cfn_capabilities=[ + CfnCapabilities.AUTO_EXPAND, + CfnCapabilities.NAMED_IAM, + ], + ), + codepipeline_actions.ManualApprovalAction( + action_name="Approve_Prod", + run_order=2, + additional_information="Approving deployment for prod", + ), + ], + ) + + deploy_code_pipeline.add_stage( + stage_name="DeployProd", + actions=[ + codepipeline_actions.CloudFormationCreateUpdateStackAction( + action_name="Deploy_CFN_Prod", + run_order=1, + template_path=cdk_synth_artifact.at_path("prod.template.json"), + stack_name=f"{project_name}-{construct_id}-prod", + admin_permissions=False, + replace_on_failure=True, + role=iam.Role.from_role_arn( + self, + "ProdActionRole", + f"arn:{Aws.PARTITION}:iam::{prod_account}:role/" + f"cdk-hnb659fds-deploy-role-{prod_account}-{deployment_region}", + ), + deployment_role=iam.Role.from_role_arn( + self, + "ProdDeploymentRole", + f"arn:{Aws.PARTITION}:iam::{prod_account}:role/" + f"cdk-hnb659fds-cfn-exec-role-{prod_account}-{deployment_region}", + ), + cfn_capabilities=[ + CfnCapabilities.AUTO_EXPAND, + CfnCapabilities.NAMED_IAM, + ], + ), + ], + ) + + # CloudWatch rule to trigger model pipeline when a status change event happens to the model package group + events.Rule( + self, + "ModelEventRule", + event_pattern=events.EventPattern( + source=["aws.sagemaker"], + detail_type=["SageMaker Model Package State Change"], + detail={ + "ModelPackageGroupName": [model_package_group_name], + "ModelApprovalStatus": ["Approved", "Rejected"], + }, + ), + targets=[targets.CodePipeline(deploy_code_pipeline)], + ) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/product_stack.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/product_stack.py new file mode 100644 index 00000000..21f2a5d4 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/product_stack.py @@ -0,0 +1,255 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import aws_cdk.aws_iam as iam +import aws_cdk.aws_kms as kms +import aws_cdk.aws_s3 as s3 +import aws_cdk.aws_s3_assets as s3_assets +import aws_cdk.aws_sagemaker as sagemaker +import aws_cdk.aws_servicecatalog as servicecatalog +from aws_cdk import Aws, CfnParameter, CfnTag, RemovalPolicy, Tags +from constructs import Construct + +from templates.multi_account_basic.pipeline_constructs.build_pipeline_construct import BuildPipelineConstruct +from templates.multi_account_basic.pipeline_constructs.deploy_pipeline_construct import DeployPipelineConstruct + + +class Product(servicecatalog.ProductStack): + DESCRIPTION: str = "Creates a SageMaker pipeline which trains a model on Abalone data." + TEMPLATE_NAME: str = "Train Model on Abalone Data" + + def __init__( + self, + scope: Construct, + id: str, + build_app_asset: s3_assets.Asset, + deploy_app_asset: s3_assets.Asset, + ) -> None: + super().__init__(scope, id) + + sagemaker_project_name = CfnParameter( + self, + "SageMakerProjectName", + type="String", + description="Name of the project.", + ).value_as_string + + sagemaker_project_id = CfnParameter( + self, + "SageMakerProjectId", + type="String", + description="Service generated Id of the project.", + ).value_as_string + + preprod_account_id = CfnParameter( + self, + "PreprodAccountId", + type="String", + description="Pre-prod account id.", + ).value_as_string + + preprod_region = CfnParameter( + self, + "PreprodRegion", + type="String", + description="Pre-prod region.", + ).value_as_string + + prod_account_id = CfnParameter( + self, + "ProdAccountId", + type="String", + description="Prod account id.", + ).value_as_string + + prod_region = CfnParameter( + self, + "ProdRegion", + type="String", + description="Prod region.", + ).value_as_string + + Tags.of(self).add("sagemaker:project-id", sagemaker_project_id) + Tags.of(self).add("sagemaker:project-name", sagemaker_project_name) + + # create kms key to be used by the assets bucket + kms_key = kms.Key( + self, + "Artifacts Bucket KMS Key", + description="key used for encryption of data in Amazon S3", + enable_key_rotation=True, + policy=iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=["kms:*"], + effect=iam.Effect.ALLOW, + resources=["*"], + principals=[iam.AccountRootPrincipal()], + ) + ] + ), + ) + + # allow cross account access to the kms key + kms_key.add_to_resource_policy( + iam.PolicyStatement( + actions=[ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey", + ], + resources=[ + "*", + ], + principals=[ + iam.AccountPrincipal(preprod_account_id), + iam.AccountPrincipal(prod_account_id), + ], + ) + ) + + s3_artifact = s3.Bucket( + self, + "S3 Artifact", + bucket_name=f"mlops-{sagemaker_project_name}-{sagemaker_project_id}-{Aws.ACCOUNT_ID}", + encryption_key=kms_key, + versioned=True, + removal_policy=RemovalPolicy.DESTROY, + enforce_ssl=True, # Blocks insecure requests to the bucket + ) + + # DEV account access to objects in the bucket + s3_artifact.add_to_resource_policy( + iam.PolicyStatement( + sid="AddDevPermissions", + actions=["s3:*"], + resources=[ + s3_artifact.arn_for_objects(key_pattern="*"), + s3_artifact.bucket_arn, + ], + principals=[ + iam.AccountRootPrincipal(), + ], + ) + ) + + # PROD account access to objects in the bucket + s3_artifact.add_to_resource_policy( + iam.PolicyStatement( + sid="AddCrossAccountPermissions", + actions=["s3:List*", "s3:Get*", "s3:Put*"], + resources=[ + s3_artifact.arn_for_objects(key_pattern="*"), + s3_artifact.bucket_arn, + ], + principals=[ + iam.AccountPrincipal(preprod_account_id), + iam.AccountPrincipal(prod_account_id), + ], + ) + ) + + model_package_group_name = f"{sagemaker_project_name}-{sagemaker_project_id}" + + # cross account model registry resource policy + model_package_group_policy = iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + sid="ModelPackageGroup", + actions=[ + "sagemaker:DescribeModelPackageGroup", + ], + resources=[ + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model-package-group/" + f"{model_package_group_name}" + ], + principals=[ + iam.AccountPrincipal(preprod_account_id), + iam.AccountPrincipal(prod_account_id), + ], + ), + iam.PolicyStatement( + sid="ModelPackage", + actions=[ + "sagemaker:DescribeModelPackage", + "sagemaker:ListModelPackages", + "sagemaker:UpdateModelPackage", + "sagemaker:CreateModel", + ], + resources=[ + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model-package/" + f"{model_package_group_name}/*" + ], + principals=[ + iam.AccountPrincipal(preprod_account_id), + iam.AccountPrincipal(prod_account_id), + ], + ), + ] + ).to_json() + + sagemaker.CfnModelPackageGroup( + self, + "Model Package Group", + model_package_group_name=model_package_group_name, + model_package_group_description=f"Model Package Group for {sagemaker_project_name}", + model_package_group_policy=model_package_group_policy, + tags=[ + CfnTag(key="sagemaker:project-id", value=sagemaker_project_id), + CfnTag(key="sagemaker:project-name", value=sagemaker_project_name), + ], + ) + + kms_key = kms.Key( + self, + "Pipeline Bucket KMS Key", + description="key used for encryption of data in Amazon S3", + enable_key_rotation=True, + policy=iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=["kms:*"], + effect=iam.Effect.ALLOW, + resources=["*"], + principals=[iam.AccountRootPrincipal()], + ) + ] + ), + ) + + pipeline_artifact_bucket = s3.Bucket( + self, + "Pipeline Bucket", + bucket_name=f"pipeline-{sagemaker_project_name}-{sagemaker_project_id}-{Aws.ACCOUNT_ID}", + encryption_key=kms_key, + versioned=True, + removal_policy=RemovalPolicy.DESTROY, + ) + + BuildPipelineConstruct( + self, + "build", + project_name=sagemaker_project_name, + project_id=sagemaker_project_id, + s3_artifact=s3_artifact, + pipeline_artifact_bucket=pipeline_artifact_bucket, + model_package_group_name=model_package_group_name, + repo_asset=build_app_asset, + ) + + DeployPipelineConstruct( + self, + "deploy", + project_name=sagemaker_project_name, + project_id=sagemaker_project_id, + pipeline_artifact_bucket=pipeline_artifact_bucket, + model_package_group_name=model_package_group_name, + repo_asset=deploy_app_asset, + preprod_account=preprod_account_id, + preprod_region=preprod_region, + prod_account=prod_account_id, + prod_region=prod_region, + deployment_region=Aws.REGION, + ) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/.pre-commit-config.yaml b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/.pre-commit-config.yaml new file mode 100644 index 00000000..7a9c7e1c --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/.pre-commit-config.yaml @@ -0,0 +1,52 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-added-large-files + - id: check-json + - id: check-merge-conflict + # - id: check-yaml + - id: end-of-file-fixer + - id: requirements-txt-fixer + - id: trailing-whitespace +- repo: https://github.com/psf/black + rev: 22.6.0 + hooks: + - id: black + args: ["--line-length=120"] +- repo: https://gitlab.com/PyCQA/flake8 + rev: 3.9.2 + hooks: + - id: flake8 + args: ["--ignore=E231,E501,F841,W503,F403,E266,W605,F541,F401,E302", "--exclude=app.py", "--max-line-length=120"] +- repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.2.0 + hooks: + - id: forbid-crlf + - id: remove-crlf + - id: insert-license + files: \.(py|yaml)$ +- repo: local + hooks: + - id: clear-jupyter-notebooks + name: clear-jupyter-notebooks + entry: bash -c 'find . -type f -name "*.ipynb" -exec jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace "{}" \; && git add . && exit 0' + language: system + pass_filenames: false diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/Makefile b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/Makefile new file mode 100644 index 00000000..ce0bc7b2 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/Makefile @@ -0,0 +1,102 @@ +.PHONY: lint init + +################################################################################# +# GLOBALS # +################################################################################# + +PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) +PROJECT_NAME = gfdtv-dataanalysis-data-models +PYTHON_INTERPRETER = python3 + +ifeq (,$(shell which conda)) +HAS_CONDA=False +else +HAS_CONDA=True +endif + +################################################################################# +# COMMANDS # +################################################################################# + +## Lint using flake8 +lint: + flake8 src +## Setup git hooks +init: + git config core.hooksPath .githooks + +clean: + rm -f cdk.staging + rm -rf cdk.out + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '.coverage' -exec rm -fr {} + + find . -name '.pytest_cache' -exec rm -fr {} + + find . -name '.tox' -exec rm -fr {} + + find . -name '__pycache__' -exec rm -fr {} + +################################################################################# +# PROJECT RULES # +################################################################################# + + + + +################################################################################# +# Self Documenting Commands # +################################################################################# + +.DEFAULT_GOAL := help + +# Inspired by +# sed script explained: +# /^##/: +# * save line in hold space +# * purge line +# * Loop: +# * append newline + line to hold space +# * go to next line +# * if line starts with doc comment, strip comment character off and loop +# * remove target prerequisites +# * append hold space (+ newline) to line +# * replace newline plus comments by `---` +# * print line +# Separate expressions are necessary because labels cannot be delimited by +# semicolon; see +.PHONY: help +help: + @echo "$$(tput bold)Available rules:$$(tput sgr0)" + @echo + @sed -n -e "/^## / { \ + h; \ + s/.*//; \ + :doc" \ + -e "H; \ + n; \ + s/^## //; \ + t doc" \ + -e "s/:.*//; \ + G; \ + s/\\n## /---/; \ + s/\\n/ /g; \ + p; \ + }" ${MAKEFILE_LIST} \ + | LC_ALL='C' sort --ignore-case \ + | awk -F '---' \ + -v ncol=$$(tput cols) \ + -v indent=19 \ + -v col_on="$$(tput setaf 6)" \ + -v col_off="$$(tput sgr0)" \ + '{ \ + printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ + n = split($$2, words, " "); \ + line_length = ncol - indent; \ + for (i = 1; i <= n; i++) { \ + line_length -= length(words[i]) + 1; \ + if (line_length <= 0) { \ + line_length = ncol - indent - length(words[i]) - 1; \ + printf "\n%*s ", -indent, " "; \ + } \ + printf "%s ", words[i]; \ + } \ + printf "\n"; \ + }' \ + | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/README.md b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/README.md new file mode 100644 index 00000000..5f37e522 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/README.md @@ -0,0 +1,24 @@ +# SageMaker Build - Train Pipelines + +This folder contains all the SageMaker Pipelines of your project. + +`buildspec.yml` defines how to run a pipeline after each commit to this repository. +`ml_pipelines/` contains the SageMaker pipelines definitions. +The expected output of the your main pipeline (here `training/pipeline.py`) is a model registered to SageMaker Model Registry. + +`source_scripts/` contains the underlying scripts run by the steps of your SageMaker Pipelines. For example, if your SageMaker Pipeline runs a Processing Job as part of a Processing Step, the code being run inside the Processing Job should be defined in this folder. +A typical folder structure for `source_scripts/` can contain `helpers`, `preprocessing`, `training`, `postprocessing`, `evaluate`, depending on the nature of the steps run as part of the SageMaker Pipeline. +We provide here an example with the Abalone dataset, to train an XGBoost model (using), and exaluating the model on a test set before sending it for manual approval to SageMaker Model Registry inside the SageMaker ModelPackageGroup defined when creating the SageMaker Project. +Additionally, if you use custom containers, the Dockerfile definitions should be found in that folder. + +`tests/` contains the unittests for your `source_scripts/` + +`notebooks/` contains experimentation notebooks. + +# Run pipeline from command line from this folder + +``` +pip install -e . + +run-pipeline --module-name ml_pipelines.training.pipeline --role-arn YOUR_SAGEMAKER_EXECUTION_ROLE_ARN --kwargs '{"region":"eu-west-1"}' +``` diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/buildspec.yml b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/buildspec.yml new file mode 100644 index 00000000..9f9010d1 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/buildspec.yml @@ -0,0 +1,19 @@ +version: 0.2 + +phases: + install: + runtime-versions: + python: 3.8 + commands: + - pip install --upgrade --force-reinstall . "awscli>1.20.30" + + build: + commands: + - export PYTHONUNBUFFERED=TRUE + - export SAGEMAKER_PROJECT_NAME_ID="${SAGEMAKER_PROJECT_NAME}-${SAGEMAKER_PROJECT_ID}" + - | + run-pipeline --module-name ml_pipelines.training.pipeline \ + --role-arn $SAGEMAKER_PIPELINE_ROLE_ARN \ + --tags "[{\"Key\":\"sagemaker:project-name\", \"Value\":\"${SAGEMAKER_PROJECT_NAME}\"}, {\"Key\":\"sagemaker:project-id\", \"Value\":\"${SAGEMAKER_PROJECT_ID}\"}]" \ + --kwargs "{\"region\":\"${AWS_REGION}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"default_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"model_package_group_name\":\"${MODEL_PACKAGE_GROUP_NAME}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\", \"bucket_kms_id\":\"${ARTIFACT_BUCKET_KMS_ID}\"}" + - echo "Create/Update of the SageMaker Pipeline and execution completed." diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/README.md b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/README.md new file mode 100644 index 00000000..f55d6cf5 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/README.md @@ -0,0 +1,7 @@ +# SageMaker Pipelines + +This folder contains SageMaker Pipeline definitions and helper scripts to either simply "get" a SageMaker Pipeline definition (JSON dictionary) with `get_pipeline_definition.py`, or "run" a SageMaker Pipeline from a SageMaker pipeline definition with `run_pipeline.py`. + +Those files are generic and can be reused to call any SageMaker Pipeline. + +Each SageMaker Pipeline definition should be be treated as a module inside its own folder, for example here the "training" pipeline, contained inside `training/`. diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/__init__.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/__init__.py new file mode 100644 index 00000000..ff79f21c --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/__init__.py @@ -0,0 +1,30 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# © 2021 Amazon Web Services, Inc. or its affiliates. All Rights Reserved. This +# AWS Content is provided subject to the terms of the AWS Customer Agreement +# available at http://aws.amazon.com/agreement or other written agreement between +# Customer and either Amazon Web Services, Inc. or Amazon Web Services EMEA SARL +# or both. +# +# Any code, applications, scripts, templates, proofs of concept, documentation +# and other items provided by AWS under this SOW are "AWS Content," as defined +# in the Agreement, and are provided for illustration purposes only. All such +# AWS Content is provided solely at the option of AWS, and is subject to the +# terms of the Addendum and the Agreement. Customer is solely responsible for +# using, deploying, testing, and supporting any code and applications provided +# by AWS under this SOW. diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/__version__.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/__version__.py new file mode 100644 index 00000000..f1578eb6 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/__version__.py @@ -0,0 +1,12 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Metadata for the ml pipelines package.""" + +__title__ = "ml_pipelines" +__description__ = "ml pipelines - template package" +__version__ = "0.0.1" +__author__ = "" +__author_email__ = "" +__license__ = "Apache 2.0" +__url__ = "" diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/_utils.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/_utils.py new file mode 100644 index 00000000..3c505204 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/_utils.py @@ -0,0 +1,52 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Provides utilities for SageMaker Pipeline CLI.""" +from __future__ import absolute_import + +import ast +from typing import Any, Dict, Optional + + +def get_pipeline_driver(module_name: str, passed_args: Optional[str] = None) -> Any: + """Gets the driver for generating your pipeline definition. + + Pipeline modules must define a get_pipeline() module-level method. + + Args: + module_name: The module name of your pipeline. + passed_args: Optional passed arguments that your pipeline may be templated by. + + Returns: + The SageMaker Workflow pipeline. + """ + _imports = __import__(module_name, fromlist=["get_pipeline"]) + kwargs = convert_struct(passed_args) + return _imports.get_pipeline(**kwargs) + + +def convert_struct(str_struct: Optional[str] = None) -> Any: + """convert the string argument to it's proper type + + Args: + str_struct (str, optional): string to be evaluated. Defaults to None. + + Returns: + string struct as it's actuat evaluated type + """ + return ast.literal_eval(str_struct) if str_struct else {} + + +def get_pipeline_custom_tags(module_name: str, args: Optional[str], tags: Dict[str, Any]) -> Any: + """Gets the custom tags for pipeline + + Returns: + Custom tags to be added to the pipeline + """ + try: + _imports = __import__(module_name, fromlist=["get_pipeline_custom_tags"]) + kwargs = convert_struct(args) + return _imports.get_pipeline_custom_tags(tags, kwargs["region"], kwargs["sagemaker_project_arn"]) + except Exception as e: + print(f"Error getting project tags: {e}") + return tags diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/get_pipeline_definition.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/get_pipeline_definition.py new file mode 100644 index 00000000..16fb6899 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/get_pipeline_definition.py @@ -0,0 +1,62 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""A CLI to get pipeline definitions from pipeline modules.""" +from __future__ import absolute_import + +import argparse +import sys + +from ml_pipelines._utils import get_pipeline_driver + + +def main() -> None: # pragma: no cover + """The main harness that gets the pipeline definition JSON. + + Prints the json to stdout or saves to file. + """ + parser = argparse.ArgumentParser("Gets the pipeline definition for the pipeline script.") + + parser.add_argument( + "-n", + "--module-name", + dest="module_name", + type=str, + help="The module name of the pipeline to import.", + ) + parser.add_argument( + "-f", + "--file-name", + dest="file_name", + type=str, + default=None, + help="The file to output the pipeline definition json to.", + ) + parser.add_argument( + "-kwargs", + "--kwargs", + dest="kwargs", + default=None, + help="Dict string of keyword arguments for the pipeline generation (if supported)", + ) + args = parser.parse_args() + + if args.module_name is None: + parser.print_help() + sys.exit(2) + + try: + pipeline = get_pipeline_driver(args.module_name, args.kwargs) + content = pipeline.definition() + if args.file_name: + with open(args.file_name, "w") as f: + f.write(content) + else: + print(content) + except Exception as e: # pylint: disable=W0703 + print(f"Exception: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/run_pipeline.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/run_pipeline.py new file mode 100644 index 00000000..523a7c37 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/run_pipeline.py @@ -0,0 +1,95 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""A CLI to create or update and run pipelines.""" +from __future__ import absolute_import + +import argparse +import json +import sys + +from ml_pipelines._utils import convert_struct, get_pipeline_custom_tags, get_pipeline_driver + + +def main() -> None: # pragma: no cover + """The main harness that creates or updates and runs the pipeline. + + Creates or updates the pipeline and runs it. + """ + parser = argparse.ArgumentParser("Creates or updates and runs the pipeline for the pipeline script.") + + parser.add_argument( + "-n", + "--module-name", + dest="module_name", + type=str, + help="The module name of the pipeline to import.", + ) + parser.add_argument( + "-kwargs", + "--kwargs", + dest="kwargs", + default=None, + help="Dict string of keyword arguments for the pipeline generation (if supported)", + ) + parser.add_argument( + "-role-arn", + "--role-arn", + dest="role_arn", + type=str, + help="The role arn for the pipeline service execution role.", + ) + parser.add_argument( + "-description", + "--description", + dest="description", + type=str, + default=None, + help="The description of the pipeline.", + ) + parser.add_argument( + "-tags", + "--tags", + dest="tags", + default=None, + help="""List of dict strings of '[{"Key": "string", "Value": "string"}, ..]'""", + ) + args = parser.parse_args() + + if args.module_name is None or args.role_arn is None: + parser.print_help() + sys.exit(2) + tags = convert_struct(args.tags) + + try: + pipeline = get_pipeline_driver(args.module_name, args.kwargs) + print("###### Creating/updating a SageMaker Pipeline with the following definition:") + parsed = json.loads(pipeline.definition()) + print(json.dumps(parsed, indent=2, sort_keys=True)) + + all_tags = get_pipeline_custom_tags(args.module_name, args.kwargs, tags) + + upsert_response = pipeline.upsert(role_arn=args.role_arn, description=args.description, tags=all_tags) + + upsert_response = pipeline.upsert( + role_arn=args.role_arn, description=args.description + ) # , tags=tags) # Removing tag momentaneously + print("\n###### Created/Updated SageMaker Pipeline: Response received:") + print(upsert_response) + + execution = pipeline.start() + print(f"\n###### Execution started with PipelineExecutionArn: {execution.arn}") + + # TODO removiong wait time as training can take some time + print("Waiting for the execution to finish...") + execution.wait() + print("\n#####Execution completed. Execution step details:") + + print(execution.list_steps()) + except Exception as e: # pylint: disable=W0703 + print(f"Exception: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/README.md b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/README.md new file mode 100644 index 00000000..8a493ac6 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/README.md @@ -0,0 +1,7 @@ +# Training SageMaker Pipeline + +This SageMaker Pipeline definition creates a workflow that will: +- Prepare the Abalone dataset through a SageMaker Processing Job +- Train an XGBoost algorithm on the train set +- Evaluate the performance of the trained XGBoost algorithm on the validation set +- If the performance reaches a specified threshold, send the model for Manual Approval to SageMaker Model Registry. diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/__init__.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/__init__.py new file mode 100644 index 00000000..04f8b7b7 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/__init__.py @@ -0,0 +1,2 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/_utils.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/_utils.py new file mode 100644 index 00000000..d576799e --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/_utils.py @@ -0,0 +1,83 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import logging +from typing import Any, Dict, List + +import boto3 +from botocore.exceptions import ClientError + +logger = logging.getLogger(__name__) + + +def resolve_ecr_uri_from_image_versions( + sagemaker_session: boto3.Session, image_versions: List[Dict[str, Any]], image_name: str +) -> Any: + """Gets ECR URI from image versions + Args: + sagemaker_session: boto3 session for sagemaker client + image_versions: list of the image versions + image_name: Name of the image + + Returns: + ECR URI of the image version + """ + + # Fetch image details to get the Base Image URI + for image_version in image_versions: + if image_version["ImageVersionStatus"] == "CREATED": + image_arn = image_version["ImageVersionArn"] + version = image_version["Version"] + logger.info(f"Identified the latest image version: {image_arn}") + response = sagemaker_session.sagemaker_client.describe_image_version(ImageName=image_name, Version=version) + return response["ContainerImage"] + return None + + +def resolve_ecr_uri(sagemaker_session: boto3.Session, image_arn: str) -> Any: + """Gets the ECR URI from the image name + + Args: + sagemaker_session: boto3 session for sagemaker client + image_name: name of the image + + Returns: + ECR URI of the latest image version + """ + + # Fetching image name from image_arn (^arn:aws(-[\w]+)*:sagemaker:.+:[0-9]{12}:image/[a-z0-9]([-.]?[a-z0-9])*$) + image_name = image_arn.partition("image/")[2] + try: + # Fetch the image versions + next_token = "" + while True: + response = sagemaker_session.sagemaker_client.list_image_versions( + ImageName=image_name, + MaxResults=100, + SortBy="VERSION", + SortOrder="DESCENDING", + NextToken=next_token, + ) + + ecr_uri = resolve_ecr_uri_from_image_versions(sagemaker_session, response["ImageVersions"], image_name) + + if ecr_uri is not None: + return ecr_uri + + if "NextToken" in response: + next_token = response["NextToken"] + else: + break + + # Return error if no versions of the image found + error_message = f"No image version found for image name: {image_name}" + logger.error(error_message) + raise Exception(error_message) + + except ( + ClientError, + sagemaker_session.sagemaker_client.exceptions.ResourceNotFound, + ) as e: + error_message = e.response["Error"]["Message"] + logger.error(error_message) + raise Exception(error_message) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/pipeline.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/pipeline.py new file mode 100644 index 00000000..48da0d8e --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/ml_pipelines/training/pipeline.py @@ -0,0 +1,289 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Example workflow pipeline script for abalone pipeline. + + . -RegisterModel + . + Process-> Train -> Evaluate -> Condition . + . + . -(stop) + +Implements a get_pipeline(**kwargs) method. +""" +import logging +from typing import Any, Optional + +import boto3 +import sagemaker +import sagemaker.session +from sagemaker.estimator import Estimator +from sagemaker.inputs import TrainingInput +from sagemaker.model_metrics import MetricsSource, ModelMetrics +from sagemaker.processing import ProcessingInput, ProcessingOutput, ScriptProcessor +from sagemaker.workflow.condition_step import ConditionStep +from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo +from sagemaker.workflow.functions import JsonGet +from sagemaker.workflow.parameters import ParameterInteger, ParameterString +from sagemaker.workflow.pipeline import Pipeline +from sagemaker.workflow.properties import PropertyFile +from sagemaker.workflow.step_collections import RegisterModel +from sagemaker.workflow.steps import ProcessingStep, TrainingStep + +# BASE_DIR = os.path.dirname(os.path.realpath(__file__)) + +logger = logging.getLogger(__name__) + + +def get_session(region: str, default_bucket: Optional[str]) -> boto3.Session: + """Gets the sagemaker session based on the region. + + Args: + region: the aws region to start the session + default_bucket: the bucket to use for storing the artifacts + + Returns: + `sagemaker.session.Session instance + """ + + boto_session = boto3.Session(region_name=region) + + sagemaker_client = boto_session.client("sagemaker") + runtime_client = boto_session.client("sagemaker-runtime") + session = sagemaker.session.Session( + boto_session=boto_session, + sagemaker_client=sagemaker_client, + sagemaker_runtime_client=runtime_client, + default_bucket=default_bucket, + ) + + return session + + +def get_pipeline( + region: str, + role: Optional[str] = None, + default_bucket: Optional[str] = None, + bucket_kms_id: Optional[str] = None, + model_package_group_name: str = "AbalonePackageGroup", + pipeline_name: str = "AbalonePipeline", + base_job_prefix: str = "Abalone", + project_id: str = "SageMakerProjectId", +) -> Any: + """Gets a SageMaker ML Pipeline instance working with on abalone data. + + Args: + region: AWS region to create and run the pipeline. + role: IAM role to create and run steps and pipeline. + default_bucket: the bucket to use for storing the artifacts + + Returns: + an instance of a pipeline + """ + + sagemaker_session = get_session(region, default_bucket) + if role is None: + role = sagemaker.session.get_execution_role(sagemaker_session) + + # parameters for pipeline execution + processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) + processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.xlarge") + training_instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge") + inference_instance_type = ParameterString(name="InferenceInstanceType", default_value="ml.m5.xlarge") # noqa: F841 + model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="PendingManualApproval") + input_data = ParameterString( + name="InputDataUrl", + default_value=f"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv", + ) + processing_image_name = "sagemaker-{0}-processingimagebuild".format(project_id) + training_image_name = "sagemaker-{0}-trainingimagebuild".format(project_id) + inference_image_name = "sagemaker-{0}-inferenceimagebuild".format(project_id) + + # processing step for feature engineering + try: + processing_image_uri = sagemaker_session.sagemaker_client.describe_image_version( + ImageName=processing_image_name + )["ContainerImage"] + except sagemaker_session.sagemaker_client.exceptions.ResourceNotFound: + processing_image_uri = sagemaker.image_uris.retrieve( + framework="xgboost", + region=region, + version="1.0-1", + py_version="py3", + instance_type="ml.m5.xlarge", + ) + script_processor = ScriptProcessor( + image_uri=processing_image_uri, + instance_type=processing_instance_type, + instance_count=processing_instance_count, + base_job_name=f"{base_job_prefix}/sklearn-abalone-preprocess", + command=["python3"], + sagemaker_session=sagemaker_session, + role=role, + output_kms_key=bucket_kms_id, + ) + step_process = ProcessingStep( + name="PreprocessAbaloneData", + processor=script_processor, + outputs=[ + ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), + ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), + ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), + ], + code="source_scripts/preprocessing/prepare_abalone_data/main.py", + job_arguments=["--input-data", input_data], + ) + + # training step for generating model artifacts + model_path = f"s3://{default_bucket}/{base_job_prefix}/AbaloneTrain" + + try: + training_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=training_image_name)[ + "ContainerImage" + ] + except sagemaker_session.sagemaker_client.exceptions.ResourceNotFound: + training_image_uri = sagemaker.image_uris.retrieve( + framework="xgboost", + region=region, + version="1.0-1", + py_version="py3", + instance_type="ml.m5.xlarge", + ) + + xgb_train = Estimator( + image_uri=training_image_uri, + instance_type=training_instance_type, + instance_count=1, + output_path=model_path, + base_job_name=f"{base_job_prefix}/abalone-train", + sagemaker_session=sagemaker_session, + role=role, + output_kms_key=bucket_kms_id, + ) + xgb_train.set_hyperparameters( + objective="reg:linear", + num_round=50, + max_depth=5, + eta=0.2, + gamma=4, + min_child_weight=6, + subsample=0.7, + silent=0, + ) + step_train = TrainingStep( + name="TrainAbaloneModel", + estimator=xgb_train, + inputs={ + "train": TrainingInput( + s3_data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri, + content_type="text/csv", + ), + "validation": TrainingInput( + s3_data=step_process.properties.ProcessingOutputConfig.Outputs["validation"].S3Output.S3Uri, + content_type="text/csv", + ), + }, + ) + + # processing step for evaluation + script_eval = ScriptProcessor( + image_uri=training_image_uri, + command=["python3"], + instance_type=processing_instance_type, + instance_count=1, + base_job_name=f"{base_job_prefix}/script-abalone-eval", + sagemaker_session=sagemaker_session, + role=role, + output_kms_key=bucket_kms_id, + ) + evaluation_report = PropertyFile( + name="AbaloneEvaluationReport", + output_name="evaluation", + path="evaluation.json", + ) + step_eval = ProcessingStep( + name="EvaluateAbaloneModel", + processor=script_eval, + inputs=[ + ProcessingInput( + source=step_train.properties.ModelArtifacts.S3ModelArtifacts, + destination="/opt/ml/processing/model", + ), + ProcessingInput( + source=step_process.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri, + destination="/opt/ml/processing/test", + ), + ], + outputs=[ + ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), + ], + code="source_scripts/evaluate/evaluate_xgboost/main.py", + property_files=[evaluation_report], + ) + + # register model step that will be conditionally executed + model_metrics = ModelMetrics( + model_statistics=MetricsSource( + s3_uri="{}/evaluation.json".format( + step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"] + ), + content_type="application/json", + ) + ) + + try: + inference_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=inference_image_name)[ + "ContainerImage" + ] + except sagemaker_session.sagemaker_client.exceptions.ResourceNotFound: + inference_image_uri = sagemaker.image_uris.retrieve( + framework="xgboost", + region=region, + version="1.0-1", + py_version="py3", + instance_type="ml.m5.xlarge", + ) + step_register = RegisterModel( + name="RegisterAbaloneModel", + estimator=xgb_train, + image_uri=inference_image_uri, + model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, + content_types=["text/csv"], + response_types=["text/csv"], + inference_instances=["ml.t2.medium", "ml.m5.large"], + transform_instances=["ml.m5.large"], + model_package_group_name=model_package_group_name, + approval_status=model_approval_status, + model_metrics=model_metrics, + ) + + # condition step for evaluating model quality and branching execution + cond_lte = ConditionLessThanOrEqualTo( + left=JsonGet( + step_name=step_eval.name, + property_file=evaluation_report, + json_path="regression_metrics.mse.value", + ), + right=6.0, + ) + step_cond = ConditionStep( + name="CheckMSEAbaloneEvaluation", + conditions=[cond_lte], + if_steps=[step_register], + else_steps=[], + ) + + # pipeline instance + pipeline = Pipeline( + name=pipeline_name, + parameters=[ + processing_instance_type, + processing_instance_count, + training_instance_type, + model_approval_status, + input_data, + ], + steps=[step_process, step_train, step_eval, step_cond], + sagemaker_session=sagemaker_session, + ) + return pipeline diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/notebooks/README.md b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/notebooks/README.md new file mode 100644 index 00000000..c0749333 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/notebooks/README.md @@ -0,0 +1,4 @@ +# Jupyter Notebooks + +This folder is intended to store your experiment notebooks. +Typically the first step would be to store your Data Science notebooks, and start defining example SageMaker pipelines in here. Once satisfied with the first iteration of a SageMaker pipeline, the code should move as python scripts inside the respective `ml_pipelines/` and `source_scripts/` folders. diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/notebooks/sm_pipelines_runbook.ipynb b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/notebooks/sm_pipelines_runbook.ipynb new file mode 100644 index 00000000..bbc0902e --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/notebooks/sm_pipelines_runbook.ipynb @@ -0,0 +1,534 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import logging\n", + "\n", + "import boto3\n", + "import sagemaker\n", + "import sagemaker.session\n", + "from sagemaker.estimator import Estimator\n", + "from sagemaker.inputs import TrainingInput\n", + "from sagemaker.model_metrics import (\n", + " MetricsSource,\n", + " ModelMetrics,\n", + ")\n", + "from sagemaker.processing import (\n", + " ProcessingInput,\n", + " ProcessingOutput,\n", + " ScriptProcessor,\n", + ")\n", + "from sagemaker.workflow.condition_step import (\n", + " ConditionStep,\n", + ")\n", + "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", + "from sagemaker.workflow.functions import (\n", + " JsonGet,\n", + ")\n", + "from sagemaker.workflow.parameters import (\n", + " ParameterInteger,\n", + " ParameterString,\n", + ")\n", + "from sagemaker.workflow.pipeline import Pipeline\n", + "from sagemaker.workflow.properties import PropertyFile\n", + "from sagemaker.workflow.step_collections import RegisterModel\n", + "from sagemaker.workflow.steps import (\n", + " ProcessingStep,\n", + " TrainingStep,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "logger = logging.getLogger(__name__)\n", + "\n", + "\"\"\"Environment Variables\"\"\"\n", + "proj_dir = \"TO_BE_DEFINED\"\n", + "region = \"TO_BE_DEFINED\"\n", + "model_artefact_bucket = \"TO_BE_DEFINED\"\n", + "role = \"TO_BE_DEFINED\"\n", + "project_name = \"TO_BE_DEFINED\"\n", + "stage = \"test\"\n", + "model_package_group_name = \"AbalonePackageGroup\"\n", + "pipeline_name = \"AbalonePipeline\"\n", + "base_job_prefix = \"Abalone\"\n", + "project_id = \"SageMakerProjectId\"\n", + "processing_image_uri = None\n", + "training_image_uri = None\n", + "inference_image_uri = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def get_session(region, default_bucket):\n", + " \"\"\"Gets the sagemaker session based on the region.\n", + "\n", + " Args:\n", + " region: the aws region to start the session\n", + " default_bucket: the bucket to use for storing the artifacts\n", + "\n", + " Returns:\n", + " `sagemaker.session.Session instance\n", + " \"\"\"\n", + "\n", + " boto_session = boto3.Session(region_name=region)\n", + "\n", + " sagemaker_client = boto_session.client(\"sagemaker\")\n", + " runtime_client = boto_session.client(\"sagemaker-runtime\")\n", + " return sagemaker.session.Session(\n", + " boto_session=boto_session,\n", + " sagemaker_client=sagemaker_client,\n", + " sagemaker_runtime_client=runtime_client,\n", + " default_bucket=default_bucket,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "sagemaker_session = get_session(region, model_artefact_bucket)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Feature Engineering\n", + "This section describes the different steps involved in feature engineering which includes loading and transforming different data sources to build the features needed for the ML Use Case" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", + "processing_instance_type = ParameterString(name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\")\n", + "training_instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n", + "inference_instance_type = ParameterString(name=\"InferenceInstanceType\", default_value=\"ml.m5.xlarge\")\n", + "model_approval_status = ParameterString(name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\")\n", + "input_data = ParameterString(\n", + " name=\"InputDataUrl\",\n", + " default_value=f\"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv\",\n", + ")\n", + "processing_image_name = \"sagemaker-{0}-processingimagebuild\".format(project_id)\n", + "training_image_name = \"sagemaker-{0}-trainingimagebuild\".format(project_id)\n", + "inference_image_name = \"sagemaker-{0}-inferenceimagebuild\".format(project_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# processing step for feature engineering\n", + "try:\n", + " processing_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=processing_image_name)[\n", + " \"ContainerImage\"\n", + " ]\n", + "\n", + "except sagemaker_session.sagemaker_client.exceptions.ResourceNotFound:\n", + " processing_image_uri = sagemaker.image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version=\"1.0-1\",\n", + " py_version=\"py3\",\n", + " instance_type=processing_instance_type,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Define Script Processor\n", + "script_processor = ScriptProcessor(\n", + " image_uri=processing_image_uri,\n", + " instance_type=processing_instance_type,\n", + " instance_count=processing_instance_count,\n", + " base_job_name=f\"{base_job_prefix}/sklearn-abalone-preprocess\",\n", + " command=[\"python3\"],\n", + " sagemaker_session=sagemaker_session,\n", + " role=role,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Define ProcessingStep\n", + "step_process = ProcessingStep(\n", + " name=\"PreprocessAbaloneData\",\n", + " processor=script_processor,\n", + " outputs=[\n", + " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", + " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", + " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", + " ],\n", + " code=\"source_scripts/preprocessing/prepare_abalone_data/main.py\", # we must figure out this path to get it from step_source directory\n", + " job_arguments=[\"--input-data\", input_data],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Training an XGBoost model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# training step for generating model artifacts\n", + "model_path = f\"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/AbaloneTrain\"\n", + "\n", + "try:\n", + " training_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=training_image_name)[\n", + " \"ContainerImage\"\n", + " ]\n", + "except sagemaker_session.sagemaker_client.exceptions.ResourceNotFound:\n", + " training_image_uri = sagemaker.image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version=\"1.0-1\",\n", + " py_version=\"py3\",\n", + " instance_type=training_instance_type,\n", + " )\n", + "\n", + "xgb_train = Estimator(\n", + " image_uri=training_image_uri,\n", + " instance_type=training_instance_type,\n", + " instance_count=1,\n", + " output_path=model_path,\n", + " base_job_name=f\"{base_job_prefix}/abalone-train\",\n", + " sagemaker_session=sagemaker_session,\n", + " role=role,\n", + ")\n", + "xgb_train.set_hyperparameters(\n", + " objective=\"reg:linear\",\n", + " num_round=50,\n", + " max_depth=5,\n", + " eta=0.2,\n", + " gamma=4,\n", + " min_child_weight=6,\n", + " subsample=0.7,\n", + " silent=0,\n", + ")\n", + "step_train = TrainingStep(\n", + " name=\"TrainAbaloneModel\",\n", + " estimator=xgb_train,\n", + " inputs={\n", + " \"train\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " \"validation\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"validation\"].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Evaluate the Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# processing step for evaluation\n", + "script_eval = ScriptProcessor(\n", + " image_uri=training_image_uri,\n", + " command=[\"python3\"],\n", + " instance_type=processing_instance_type,\n", + " instance_count=1,\n", + " base_job_name=f\"{base_job_prefix}/script-abalone-eval\",\n", + " sagemaker_session=sagemaker_session,\n", + " role=role,\n", + ")\n", + "evaluation_report = PropertyFile(\n", + " name=\"AbaloneEvaluationReport\",\n", + " output_name=\"evaluation\",\n", + " path=\"evaluation.json\",\n", + ")\n", + "step_eval = ProcessingStep(\n", + " name=\"EvaluateAbaloneModel\",\n", + " processor=script_eval,\n", + " inputs=[\n", + " ProcessingInput(\n", + " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " destination=\"/opt/ml/processing/model\",\n", + " ),\n", + " ProcessingInput(\n", + " source=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n", + " destination=\"/opt/ml/processing/test\",\n", + " ),\n", + " ],\n", + " outputs=[\n", + " ProcessingOutput(output_name=\"evaluation\", source=\"/opt/ml/processing/evaluation\"),\n", + " ],\n", + " code=\"source_scripts/evaluate/evaluate_xgboost/main.py\",\n", + " property_files=[evaluation_report],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Conditional step to push model to SageMaker Model Registry" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# register model step that will be conditionally executed\n", + "model_metrics = ModelMetrics(\n", + " model_statistics=MetricsSource(\n", + " s3_uri=\"{}/evaluation.json\".format(\n", + " step_eval.arguments[\"ProcessingOutputConfig\"][\"Outputs\"][0][\"S3Output\"][\"S3Uri\"]\n", + " ),\n", + " content_type=\"application/json\",\n", + " )\n", + ")\n", + "\n", + "try:\n", + " inference_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=inference_image_name)[\n", + " \"ContainerImage\"\n", + " ]\n", + "except sagemaker_session.sagemaker_client.exceptions.ResourceNotFound:\n", + " inference_image_uri = sagemaker.image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version=\"1.0-1\",\n", + " py_version=\"py3\",\n", + " instance_type=inference_instance_type,\n", + " )\n", + "step_register = RegisterModel(\n", + " name=\"RegisterAbaloneModel\",\n", + " estimator=xgb_train,\n", + " image_uri=inference_image_uri,\n", + " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " content_types=[\"text/csv\"],\n", + " response_types=[\"text/csv\"],\n", + " inference_instances=[\"ml.t2.medium\", \"ml.m5.large\"],\n", + " transform_instances=[\"ml.m5.large\"],\n", + " model_package_group_name=model_package_group_name,\n", + " approval_status=model_approval_status,\n", + " model_metrics=model_metrics,\n", + ")\n", + "\n", + "# condition step for evaluating model quality and branching execution\n", + "cond_lte = ConditionLessThanOrEqualTo(\n", + " left=JsonGet(\n", + " step_name=step_eval.name,\n", + " property_file=evaluation_report,\n", + " json_path=\"regression_metrics.mse.value\",\n", + " ),\n", + " right=6.0,\n", + ")\n", + "step_cond = ConditionStep(\n", + " name=\"CheckMSEAbaloneEvaluation\",\n", + " conditions=[cond_lte],\n", + " if_steps=[step_register],\n", + " else_steps=[],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Create and run the Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# pipeline instance\n", + "pipeline = Pipeline(\n", + " name=pipeline_name,\n", + " parameters=[\n", + " processing_instance_type,\n", + " processing_instance_count,\n", + " training_instance_type,\n", + " model_approval_status,\n", + " input_data,\n", + " ],\n", + " steps=[step_process, step_train, step_eval, step_cond],\n", + " sagemaker_session=sagemaker_session,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "definition = json.loads(pipeline.definition())\n", + "definition" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "pipeline.upsert(role_arn=role, description=f\"{stage} pipelines for {project_name}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "pipeline.start()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_python3", + "language": "python", + "name": "conda_python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/setup.cfg b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/setup.cfg new file mode 100644 index 00000000..6f878705 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/setup.cfg @@ -0,0 +1,14 @@ +[tool:pytest] +addopts = + -vv +testpaths = tests + +[aliases] +test=pytest + +[metadata] +description-file = README.md +license_file = LICENSE + +[wheel] +universal = 1 diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/setup.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/setup.py new file mode 100644 index 00000000..9c190fa5 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/setup.py @@ -0,0 +1,64 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import os +from typing import Any, Dict + +import setuptools + +about: Dict[str, Any] = {} +here = os.path.abspath(os.path.dirname(__file__)) +with open(os.path.join(here, "ml_pipelines", "__version__.py")) as f: + exec(f.read(), about) + + +with open("README.md", "r") as f: + readme = f.read() + + +required_packages = ["sagemaker"] +extras = { + "test": [ + "black", + "coverage", + "flake8", + "mock", + "pydocstyle", + "pytest", + "pytest-cov", + "sagemaker", + "tox", + ] +} +setuptools.setup( + name=about["__title__"], + description=about["__description__"], + version=about["__version__"], + author=about["__author__"], + author_email=about["__author_email__"], + long_description=readme, + long_description_content_type="text/markdown", + url=about["__url__"], + license=about["__license__"], + packages=setuptools.find_packages(), + include_package_data=True, + python_requires=">=3.6", + install_requires=required_packages, + extras_require=extras, + entry_points={ + "console_scripts": [ + "get-pipeline-definition=pipelines.get_pipeline_definition:main", + "run-pipeline=ml_pipelines.run_pipeline:main", + ] + }, + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Natural Language :: English", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + ], +) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/README.md b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/README.md new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/README.md b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/README.md new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/main.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/main.py new file mode 100644 index 00000000..1d3cf199 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/main.py @@ -0,0 +1,71 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +"""Evaluation script for measuring mean squared error.""" +import json +import logging +import pathlib +import pickle +import tarfile + +import numpy as np +import pandas as pd +import xgboost +from sklearn.metrics import mean_squared_error + +logger = logging.getLogger() +logger.setLevel(logging.INFO) +logger.addHandler(logging.StreamHandler()) + + +if __name__ == "__main__": + logger.debug("Starting evaluation.") + model_path = "/opt/ml/processing/model/model.tar.gz" + with tarfile.open(model_path) as tar: + tar.extractall(path=".") + + logger.debug("Loading xgboost model.") + model = pickle.load(open("xgboost-model", "rb")) + + logger.debug("Reading test data.") + test_path = "/opt/ml/processing/test/test.csv" + df = pd.read_csv(test_path, header=None) + + logger.debug("Reading test data.") + y_test = df.iloc[:, 0].to_numpy() + df.drop(df.columns[0], axis=1, inplace=True) + X_test = xgboost.DMatrix(df.values) + + logger.info("Performing predictions against test data.") + predictions = model.predict(X_test) + + logger.debug("Calculating mean squared error.") + mse = mean_squared_error(y_test, predictions) + std = np.std(y_test - predictions) + report_dict = { + "regression_metrics": { + "mse": {"value": mse, "standard_deviation": std}, + }, + } + + output_dir = "/opt/ml/processing/evaluation" + pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) + + logger.info("Writing out evaluation report with mse: %f", mse) + evaluation_path = f"{output_dir}/evaluation.json" + with open(evaluation_path, "w") as f: + f.write(json.dumps(report_dict)) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/README.md b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/README.md new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/logger.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/logger.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/logger.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/requirements.txt b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/s3_helper.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/s3_helper.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/s3_helper.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/test/test_a.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/test/test_a.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/helpers/test/test_a.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/README.md b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/README.md new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/main.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/main.py new file mode 100644 index 00000000..5f60c85a --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/main.py @@ -0,0 +1,122 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Feature engineers the abalone dataset.""" +import argparse +import logging +import os +import pathlib +from typing import Any, Dict + +import boto3 +import numpy as np +import pandas as pd +from sklearn.compose import ColumnTransformer +from sklearn.impute import SimpleImputer +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import OneHotEncoder, StandardScaler + +logger = logging.getLogger() +logger.setLevel(logging.INFO) +logger.addHandler(logging.StreamHandler()) + + +# Since we get a headerless CSV file we specify the column names here. +feature_columns_names = [ + "sex", + "length", + "diameter", + "height", + "whole_weight", + "shucked_weight", + "viscera_weight", + "shell_weight", +] +label_column = "rings" + +feature_columns_dtype = { + "sex": str, + "length": np.float64, + "diameter": np.float64, + "height": np.float64, + "whole_weight": np.float64, + "shucked_weight": np.float64, + "viscera_weight": np.float64, + "shell_weight": np.float64, +} +label_column_dtype = {"rings": np.float64} + + +def merge_two_dicts(x: Dict[str, Any], y: Dict[str, Any]) -> Dict[str, Any]: + """Merges two dicts, returning a new copy.""" + z = x.copy() + z.update(y) + return z + + +if __name__ == "__main__": + logger.debug("Starting preprocessing.") + parser = argparse.ArgumentParser() + parser.add_argument("--input-data", type=str, required=True) + args = parser.parse_args() + + base_dir = "/opt/ml/processing" + pathlib.Path(f"{base_dir}/data").mkdir(parents=True, exist_ok=True) + input_data = args.input_data + bucket = input_data.split("/")[2] + key = "/".join(input_data.split("/")[3:]) + + logger.info("Downloading data from bucket: %s, key: %s", bucket, key) + fn = f"{base_dir}/data/abalone-dataset.csv" + s3 = boto3.resource("s3") + s3.Bucket(bucket).download_file(key, fn) + + logger.debug("Reading downloaded data.") + df = pd.read_csv( + fn, + header=None, + names=feature_columns_names + [label_column], + dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype), + ) + os.unlink(fn) + + logger.debug("Defining transformers.") + numeric_features = list(feature_columns_names) + numeric_features.remove("sex") + numeric_transformer = Pipeline( + steps=[ + ("imputer", SimpleImputer(strategy="median")), + ("scaler", StandardScaler()), + ] + ) + + categorical_features = ["sex"] + categorical_transformer = Pipeline( + steps=[ + ("imputer", SimpleImputer(strategy="constant", fill_value="missing")), + ("onehot", OneHotEncoder(handle_unknown="ignore")), + ] + ) + + preprocess = ColumnTransformer( + transformers=[ + ("num", numeric_transformer, numeric_features), + ("cat", categorical_transformer, categorical_features), + ] + ) + + logger.info("Applying transforms.") + y = df.pop("rings") + X_pre = preprocess.fit_transform(df) + y_pre = y.to_numpy().reshape(len(y), 1) + + X = np.concatenate((y_pre, X_pre), axis=1) + + logger.info("Splitting %d rows of data into train, validation, test datasets.", len(X)) + np.random.shuffle(X) + train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))]) + + logger.info("Writing out datasets to %s.", base_dir) + pd.DataFrame(train).to_csv(f"{base_dir}/train/train.csv", header=False, index=False) + pd.DataFrame(validation).to_csv(f"{base_dir}/validation/validation.csv", header=False, index=False) + pd.DataFrame(test).to_csv(f"{base_dir}/test/test.csv", header=False, index=False) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/training/xgboost/README.md b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/training/xgboost/README.md new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/training/xgboost/__main__.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/training/xgboost/__main__.py new file mode 100644 index 00000000..04f8b7b7 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/training/xgboost/__main__.py @@ -0,0 +1,2 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/training/xgboost/requirements.txt b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/training/xgboost/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/training/xgboost/test/test_a.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/training/xgboost/test/test_a.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/build_app/source_scripts/training/xgboost/test/test_a.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/.githooks/pre-commit b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/.githooks/pre-commit new file mode 100755 index 00000000..12eaeef7 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/.githooks/pre-commit @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# File generated by pre-commit: https://pre-commit.com +# ID: 138fd403232d2ddd5efb44317e38bf03 +import os +import sys + +# we try our best, but the shebang of this script is difficult to determine: +# - macos doesn't ship with python3 +# - windows executables are almost always `python.exe` +# therefore we continue to support python2 for this small script +if sys.version_info < (3, 3): + from distutils.spawn import find_executable as which +else: + from shutil import which + +# work around https://github.com/Homebrew/homebrew-core/issues/30445 +os.environ.pop("__PYVENV_LAUNCHER__", None) + +# start templated +INSTALL_PYTHON = "/usr/local/Caskroom/miniconda/base/envs/aws/bin/python" +ARGS = ["hook-impl", "--config=.pre-commit-config.yaml", "--hook-type=pre-commit"] +# end templated +ARGS.extend(("--hook-dir", os.path.realpath(os.path.dirname(__file__)))) +ARGS.append("--") +ARGS.extend(sys.argv[1:]) + +DNE = "`pre-commit` not found. Did you forget to activate your virtualenv?" +if os.access(INSTALL_PYTHON, os.X_OK): + CMD = [INSTALL_PYTHON, "-mpre_commit"] +elif which("pre-commit"): + CMD = ["pre-commit"] +else: + raise SystemExit(DNE) + +CMD.extend(ARGS) +if sys.platform == "win32": # https://bugs.python.org/issue19124 + import subprocess + + if sys.version_info < (3, 7): # https://bugs.python.org/issue25942 + raise SystemExit(subprocess.Popen(CMD).wait()) + else: + raise SystemExit(subprocess.call(CMD)) +else: + os.execvp(CMD[0], CMD) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/.pre-commit-config.yaml b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/.pre-commit-config.yaml new file mode 100644 index 00000000..7a9c7e1c --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/.pre-commit-config.yaml @@ -0,0 +1,52 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-added-large-files + - id: check-json + - id: check-merge-conflict + # - id: check-yaml + - id: end-of-file-fixer + - id: requirements-txt-fixer + - id: trailing-whitespace +- repo: https://github.com/psf/black + rev: 22.6.0 + hooks: + - id: black + args: ["--line-length=120"] +- repo: https://gitlab.com/PyCQA/flake8 + rev: 3.9.2 + hooks: + - id: flake8 + args: ["--ignore=E231,E501,F841,W503,F403,E266,W605,F541,F401,E302", "--exclude=app.py", "--max-line-length=120"] +- repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.2.0 + hooks: + - id: forbid-crlf + - id: remove-crlf + - id: insert-license + files: \.(py|yaml)$ +- repo: local + hooks: + - id: clear-jupyter-notebooks + name: clear-jupyter-notebooks + entry: bash -c 'find . -type f -name "*.ipynb" -exec jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace "{}" \; && git add . && exit 0' + language: system + pass_filenames: false diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/Makefile b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/Makefile new file mode 100644 index 00000000..ce0bc7b2 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/Makefile @@ -0,0 +1,102 @@ +.PHONY: lint init + +################################################################################# +# GLOBALS # +################################################################################# + +PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) +PROJECT_NAME = gfdtv-dataanalysis-data-models +PYTHON_INTERPRETER = python3 + +ifeq (,$(shell which conda)) +HAS_CONDA=False +else +HAS_CONDA=True +endif + +################################################################################# +# COMMANDS # +################################################################################# + +## Lint using flake8 +lint: + flake8 src +## Setup git hooks +init: + git config core.hooksPath .githooks + +clean: + rm -f cdk.staging + rm -rf cdk.out + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '.coverage' -exec rm -fr {} + + find . -name '.pytest_cache' -exec rm -fr {} + + find . -name '.tox' -exec rm -fr {} + + find . -name '__pycache__' -exec rm -fr {} + +################################################################################# +# PROJECT RULES # +################################################################################# + + + + +################################################################################# +# Self Documenting Commands # +################################################################################# + +.DEFAULT_GOAL := help + +# Inspired by +# sed script explained: +# /^##/: +# * save line in hold space +# * purge line +# * Loop: +# * append newline + line to hold space +# * go to next line +# * if line starts with doc comment, strip comment character off and loop +# * remove target prerequisites +# * append hold space (+ newline) to line +# * replace newline plus comments by `---` +# * print line +# Separate expressions are necessary because labels cannot be delimited by +# semicolon; see +.PHONY: help +help: + @echo "$$(tput bold)Available rules:$$(tput sgr0)" + @echo + @sed -n -e "/^## / { \ + h; \ + s/.*//; \ + :doc" \ + -e "H; \ + n; \ + s/^## //; \ + t doc" \ + -e "s/:.*//; \ + G; \ + s/\\n## /---/; \ + s/\\n/ /g; \ + p; \ + }" ${MAKEFILE_LIST} \ + | LC_ALL='C' sort --ignore-case \ + | awk -F '---' \ + -v ncol=$$(tput cols) \ + -v indent=19 \ + -v col_on="$$(tput setaf 6)" \ + -v col_off="$$(tput sgr0)" \ + '{ \ + printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ + n = split($$2, words, " "); \ + line_length = ncol - indent; \ + for (i = 1; i <= n; i++) { \ + line_length -= length(words[i]) + 1; \ + if (line_length <= 0) { \ + line_length = ncol - indent - length(words[i]) - 1; \ + printf "\n%*s ", -indent, " "; \ + } \ + printf "%s ", words[i]; \ + } \ + printf "\n"; \ + }' \ + | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/README.md b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/README.md new file mode 100644 index 00000000..79edc834 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/README.md @@ -0,0 +1,61 @@ +# NEEDED: Explanation on VPC and Network pre-requirements +Check https://gitlab.aws.dev/aws-proserve-emea-etip/cdk-pipelines/-/blob/main/README.md + + +# Welcome to your CDK Python project! + +This is a blank project for Python development with CDK. + +The `cdk.json` file tells the CDK Toolkit how to execute your app. + +This project is set up like a standard Python project. The initialization +process also creates a virtualenv within this project, stored under the `.venv` +directory. To create the virtualenv it assumes that there is a `python3` +(or `python` for Windows) executable in your path with access to the `venv` +package. If for any reason the automatic creation of the virtualenv fails, +you can create the virtualenv manually. + +To manually create a virtualenv on MacOS and Linux: + +``` +$ python3 -m venv .venv +``` + +After the init process completes and the virtualenv is created, you can use the following +step to activate your virtualenv. + +``` +$ source .venv/bin/activate +``` + +If you are a Windows platform, you would activate the virtualenv like this: + +``` +% .venv\Scripts\activate.bat +``` + +Once the virtualenv is activated, you can install the required dependencies. + +``` +$ pip install -r requirements.txt +``` + +At this point you can now synthesize the CloudFormation template for this code. + +``` +$ cdk synth +``` + +To add additional dependencies, for example other CDK libraries, just add +them to your `setup.py` file and rerun the `pip install -r requirements.txt` +command. + +## Useful commands + + * `cdk ls` list all stacks in the app + * `cdk synth` emits the synthesized CloudFormation template + * `cdk deploy` deploy this stack to your default AWS account/region + * `cdk diff` compare deployed stack with current state + * `cdk docs` open CDK documentation + +Enjoy! diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/app.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/app.py new file mode 100644 index 00000000..4b3da461 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/app.py @@ -0,0 +1,39 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import aws_cdk as cdk +from config.constants import ( + DEPLOYMENT_ACCOUNT, + DEPLOYMENT_REGION, + PREPROD_ACCOUNT, + PREPROD_REGION, + PROD_ACCOUNT, + PROD_REGION, +) +from deploy_endpoint.deploy_endpoint_stack import DeployEndpointStack + +app = cdk.App() + +dev_env = cdk.Environment(account=DEPLOYMENT_ACCOUNT, region=DEPLOYMENT_REGION) +preprod_env = cdk.Environment(account=PREPROD_ACCOUNT, region=PREPROD_REGION) +prod_env = cdk.Environment(account=PROD_ACCOUNT, region=PROD_REGION) + +DeployEndpointStack(app, "dev", env=dev_env) +DeployEndpointStack(app, "preprod", env=preprod_env) +DeployEndpointStack(app, "prod", env=prod_env) + +app.synth() diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/config_mux.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/config_mux.py new file mode 100644 index 00000000..53b4d8b1 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/config_mux.py @@ -0,0 +1,71 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from abc import ABCMeta +from pathlib import Path +from typing import Any + +import constructs +from aws_cdk import Stack, Stage +from dataclasses import dataclass +from yamldataclassconfig.config import YamlDataClassConfig + +DEFAULT_STAGE_NAME = "dev" +DEFAULT_STACK_NAME = "dev" + + +def get_config_for_stage(scope: constructs.Construct, path: str) -> Any: + default_path = Path(__file__).parent.joinpath(DEFAULT_STAGE_NAME, path) + if stage_name := Stage.of(scope).stage_name: # type: ignore[union-attr] + config_path = Path(__file__).parent.joinpath(stage_name.lower(), path) + + if not config_path.exists(): + print(f"Config file {path} for stage {stage_name} not found. Using {default_path} instead") + config_path = default_path + + return config_path + else: + print(f"Stack created without a stage, config {path} not found. Using {default_path} instead") + return default_path + + +def get_config_for_stack(scope: constructs.Construct, path: str) -> Path: + default_path = Path(__file__).parent.joinpath(DEFAULT_STACK_NAME, path) + if stack_name := Stack.of(scope).stack_name: + config_path = Path(__file__).parent.joinpath(stack_name.lower(), path) + + if not config_path.exists(): + print(f"Config file {path} for stack {stack_name} not found. Using {default_path} instead") + config_path = default_path + + return config_path + else: + print(f"Stack created without a stack, config {path} not found. Using {default_path} instead") + return default_path + + +@dataclass +class StageYamlDataClassConfig(YamlDataClassConfig, metaclass=ABCMeta): # type:ignore[misc] + """This class implements YAML file load function with relative config + paths and stage specific config loading capabilities.""" + + def load(self) -> Any: + """ + This method automatically uses the config from alpha + """ + path = Path(__file__).parent.joinpath("config/", "dev", self.FILE_PATH) + return super().load(path=path) + + def load_for_stage(self, scope: constructs.Construct) -> Any: + """ + Looks up the stage from the current scope and loads the relevant config file + """ + path = get_config_for_stage(scope, self.FILE_PATH) + return super().load(path=path) + + def load_for_stack(self, scope: constructs.Construct) -> Any: + """ + Looks up the stack from the current scope and loads the relevant config file + """ + path = get_config_for_stack(scope, self.FILE_PATH) + return super().load(path=path) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/constants.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/constants.py new file mode 100644 index 00000000..8aa71c6c --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/constants.py @@ -0,0 +1,33 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os + +DEPLOYMENT_ACCOUNT = os.environ["DEPLOYMENT_ACCOUNT"] +DEPLOYMENT_REGION = os.environ["DEPLOYMENT_REGION"] + +PREPROD_ACCOUNT = os.environ["PREPROD_ACCOUNT"] +PREPROD_REGION = os.environ["PREPROD_REGION"] + +PROD_ACCOUNT = os.environ["PROD_ACCOUNT"] +PROD_REGION = os.environ["PROD_REGION"] + +PROJECT_NAME = os.getenv("PROJECT_NAME", "") +PROJECT_ID = os.getenv("PROJECT_ID", "") +MODEL_PACKAGE_GROUP_NAME = os.getenv("MODEL_PACKAGE_GROUP_NAME", "") +MODEL_BUCKET_ARN = os.getenv("MODEL_BUCKET_ARN", "arn:aws:s3:::*mlops*") +ECR_REPO_ARN = os.getenv("ECR_REPO_ARN", None) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/dev/constants.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/dev/constants.py new file mode 100644 index 00000000..cfc61900 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/dev/constants.py @@ -0,0 +1,27 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +VPC_ID = "vpc-" +VPC_CIDR = "10.0.0.1/21" + +APP_SUBNETS = ["subnet-1", "subnet-2", "subnet-3"] + +BASE_SECURITY_GROUP = "sg-" + +DEFAULT_TIMEOUT = 300 +DEFAULT_MEMORY_SIZE = 256 +INFERENCE_MEMORY_SIZE = 1024 diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/dev/endpoint-config.yml b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/dev/endpoint-config.yml new file mode 100644 index 00000000..3887fecd --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/dev/endpoint-config.yml @@ -0,0 +1,4 @@ +initial_instance_count: 1 +initial_variant_weight: 1 +instance_type: "ml.m5.large" +variant_name: "AllTraffic" diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/prod/constants.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/prod/constants.py new file mode 100644 index 00000000..cfc61900 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/prod/constants.py @@ -0,0 +1,27 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +VPC_ID = "vpc-" +VPC_CIDR = "10.0.0.1/21" + +APP_SUBNETS = ["subnet-1", "subnet-2", "subnet-3"] + +BASE_SECURITY_GROUP = "sg-" + +DEFAULT_TIMEOUT = 300 +DEFAULT_MEMORY_SIZE = 256 +INFERENCE_MEMORY_SIZE = 1024 diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/prod/endpoint-config.yml b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/prod/endpoint-config.yml new file mode 100644 index 00000000..3887fecd --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/prod/endpoint-config.yml @@ -0,0 +1,4 @@ +initial_instance_count: 1 +initial_variant_weight: 1 +instance_type: "ml.m5.large" +variant_name: "AllTraffic" diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/staging/constants.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/staging/constants.py new file mode 100644 index 00000000..cfc61900 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/staging/constants.py @@ -0,0 +1,27 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +VPC_ID = "vpc-" +VPC_CIDR = "10.0.0.1/21" + +APP_SUBNETS = ["subnet-1", "subnet-2", "subnet-3"] + +BASE_SECURITY_GROUP = "sg-" + +DEFAULT_TIMEOUT = 300 +DEFAULT_MEMORY_SIZE = 256 +INFERENCE_MEMORY_SIZE = 1024 diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/staging/endpoint-config.yml b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/staging/endpoint-config.yml new file mode 100644 index 00000000..3887fecd --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/config/staging/endpoint-config.yml @@ -0,0 +1,4 @@ +initial_instance_count: 1 +initial_variant_weight: 1 +instance_type: "ml.m5.large" +variant_name: "AllTraffic" diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/deploy_endpoint/__init__.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/deploy_endpoint/__init__.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/deploy_endpoint/__init__.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py new file mode 100644 index 00000000..4950b2da --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py @@ -0,0 +1,209 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import constructs +from aws_cdk import Aws, Stack, Tags +from aws_cdk import aws_iam as iam +from aws_cdk import aws_kms as kms +from aws_cdk import aws_sagemaker as sagemaker +from config.config_mux import StageYamlDataClassConfig +from config.constants import ( + DEPLOYMENT_ACCOUNT, + ECR_REPO_ARN, + MODEL_BUCKET_ARN, + MODEL_PACKAGE_GROUP_NAME, + PROJECT_ID, + PROJECT_NAME, +) +from dataclasses import dataclass +from yamldataclassconfig import create_file_path_field + +from .get_approved_package import get_approved_package + + +@dataclass +class EndpointConfigProductionVariant(StageYamlDataClassConfig): # type:ignore[misc] + """ + Endpoint Config Production Variant Dataclass + a dataclass to handle mapping yml file configs to python class for endpoint configs + """ + + initial_instance_count: float = 1 + initial_variant_weight: float = 1 + instance_type: str = "ml.m5.2xlarge" + variant_name: str = "AllTraffic" + + FILE_PATH: Path = create_file_path_field("endpoint-config.yml", path_is_absolute=True) + + def get_endpoint_config_production_variant( + self, model_name: str + ) -> sagemaker.CfnEndpointConfig.ProductionVariantProperty: + """ + Function to handle creation of cdk glue job. It use the class fields for the job parameters. + + Parameters: + model_name: name of the sagemaker model resource the sagemaker endpoint would use + + Returns: + CfnEndpointConfig: CDK SageMaker CFN Endpoint Config resource + """ + + production_variant = sagemaker.CfnEndpointConfig.ProductionVariantProperty( + initial_instance_count=self.initial_instance_count, + initial_variant_weight=self.initial_variant_weight, + instance_type=self.instance_type, + variant_name=self.variant_name, + model_name=model_name, + ) + + return production_variant + + +class DeployEndpointStack(Stack): + """ + Deploy Endpoint Stack + Deploy Endpoint stack which provisions SageMaker Model Endpoint resources. + """ + + def __init__( + self, + scope: constructs.Construct, + id: str, + **kwargs: Any, + ): + super().__init__(scope, id, **kwargs) + + Tags.of(self).add("sagemaker:project-id", PROJECT_ID) + Tags.of(self).add("sagemaker:project-name", PROJECT_NAME) + Tags.of(self).add("sagemaker:deployment-stage", Stack.of(self).stack_name) + + # iam role that would be used by the model endpoint to run the inference + model_execution_policy = iam.ManagedPolicy( + self, + "ModelExecutionPolicy", + document=iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=[ + "s3:Put*", + "s3:Get*", + "s3:List*", + ], + effect=iam.Effect.ALLOW, + resources=[ + MODEL_BUCKET_ARN, + f"{MODEL_BUCKET_ARN}/*", + ], + ), + iam.PolicyStatement( + actions=[ + "kms:Encrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:Decrypt", + "kms:DescribeKey", + ], + effect=iam.Effect.ALLOW, + resources=[f"arn:aws:kms:{Aws.REGION}:{DEPLOYMENT_ACCOUNT}:key/*"], + ), + ] + ), + ) + + if ECR_REPO_ARN: + model_execution_policy.add_statements( + iam.PolicyStatement( + actions=["ecr:Get*"], + effect=iam.Effect.ALLOW, + resources=[ECR_REPO_ARN], + ) + ) + + model_execution_role = iam.Role( + self, + "ModelExecutionRole", + assumed_by=iam.ServicePrincipal("sagemaker.amazonaws.com"), + managed_policies=[ + model_execution_policy, + iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess"), + ], + ) + + # setup timestamp to be used to trigger the custom resource update event to retrieve + # latest approved model and to be used with model and endpoint config resources' names + now = datetime.now().replace(tzinfo=timezone.utc) + + timestamp = now.strftime("%Y%m%d%H%M%S") + + # get latest approved model package from the model registry (only from a specific model package group) + latest_approved_model_package = get_approved_package() + + # Sagemaker Model + model_name = f"{MODEL_PACKAGE_GROUP_NAME}-{id}-{timestamp}" + + model = sagemaker.CfnModel( + self, + "Model", + execution_role_arn=model_execution_role.role_arn, + model_name=model_name, + containers=[ + sagemaker.CfnModel.ContainerDefinitionProperty(model_package_name=latest_approved_model_package) + ], + ) + + # Sagemaker Endpoint Config + endpoint_config_name = f"{MODEL_PACKAGE_GROUP_NAME}-{id}-ec-{timestamp}" + + endpoint_config_production_variant = EndpointConfigProductionVariant() + + endpoint_config_production_variant.load_for_stack(self) + + # create kms key to be used by the assets bucket + kms_key = kms.Key( + self, + "endpoint-kms-key", + description="key used for encryption of data in Amazpn SageMaker Endpoint", + enable_key_rotation=True, + policy=iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=["kms:*"], + effect=iam.Effect.ALLOW, + resources=["*"], + principals=[iam.AccountRootPrincipal()], + ) + ] + ), + ) + + endpoint_config = sagemaker.CfnEndpointConfig( + self, + "EndpointConfig", + endpoint_config_name=endpoint_config_name, + kms_key_id=kms_key.key_id, + production_variants=[ + endpoint_config_production_variant.get_endpoint_config_production_variant( + model.model_name # type: ignore[arg-type] + ) + ], + ) + + endpoint_config.add_depends_on(model) + + # Sagemaker Endpoint + endpoint_name = f"{MODEL_PACKAGE_GROUP_NAME}-{id}-endpoint" + + endpoint = sagemaker.CfnEndpoint( + self, + "Endpoint", + endpoint_config_name=endpoint_config.endpoint_config_name, # type: ignore[arg-type] + endpoint_name=endpoint_name, + ) + + endpoint.add_depends_on(endpoint_config) + + self.endpoint = endpoint diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/deploy_endpoint/get_approved_package.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/deploy_endpoint/get_approved_package.py new file mode 100644 index 00000000..6ddb8be4 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/deploy_endpoint/get_approved_package.py @@ -0,0 +1,55 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from logging import Logger +from typing import Any + +import boto3 +from botocore.exceptions import ClientError +from config.constants import DEPLOYMENT_REGION, MODEL_PACKAGE_GROUP_NAME + +"""Initialise Logger class""" +logger = Logger(name="deploy_stack") + +"""Initialise boto3 SDK resources""" +sm_client = boto3.client("sagemaker", region_name=DEPLOYMENT_REGION) + + +def get_approved_package() -> Any: + """Gets the latest approved model package for a model package group. + Returns: + The SageMaker Model Package ARN. + """ + try: + # Get the latest approved model package + response = sm_client.list_model_packages( + ModelPackageGroupName=MODEL_PACKAGE_GROUP_NAME, + ModelApprovalStatus="Approved", + SortBy="CreationTime", + MaxResults=100, + ) + approved_packages = response["ModelPackageSummaryList"] + # Fetch more packages if none returned with continuation token + while len(approved_packages) == 0 and "NextToken" in response: + logger.debug(f"Getting more packages for token: {response['NextToken']}") + response = sm_client.list_model_packages( + ModelPackageGroupName=MODEL_PACKAGE_GROUP_NAME, + ModelApprovalStatus="Approved", + SortBy="CreationTime", + MaxResults=100, + NextToken=response["NextToken"], + ) + approved_packages.extend(response["ModelPackageSummaryList"]) + # Return error if no packages found + if len(approved_packages) == 0: + error_message = f"No approved ModelPackage found for ModelPackageGroup: {MODEL_PACKAGE_GROUP_NAME}" + logger.error(error_message) + raise Exception(error_message) + # Return the pmodel package arn + model_package_arn = approved_packages[0]["ModelPackageArn"] + logger.info(f"Identified the latest approved model package: {model_package_arn}") + return model_package_arn + except ClientError as e: + error_message = e.response["Error"]["Message"] + logger.error(error_message) + raise Exception(error_message) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/requirements-dev.txt b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/requirements-dev.txt new file mode 100644 index 00000000..92709451 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/requirements-dev.txt @@ -0,0 +1 @@ +pytest==6.2.5 diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/requirements.txt b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/requirements.txt new file mode 100644 index 00000000..c1dbbbff --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/requirements.txt @@ -0,0 +1,4 @@ +aws-cdk-lib +boto3 +constructs +yamldataclassconfig diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/source.bat b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/source.bat new file mode 100644 index 00000000..9e1a8344 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/source.bat @@ -0,0 +1,13 @@ +@echo off + +rem The sole purpose of this script is to make the command +rem +rem source .venv/bin/activate +rem +rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows. +rem On Windows, this command just runs this batch file (the argument is ignored). +rem +rem Now we don't need to document a Windows command for activating a virtualenv. + +echo Executing .venv\Scripts\activate.bat for you +.venv\Scripts\activate.bat diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/README.md b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/README.md new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/integration_tests/__init__.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/integration_tests/__init__.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/integration_tests/__init__.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/integration_tests/buildspec.yml b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/integration_tests/buildspec.yml new file mode 100644 index 00000000..6b0c888f --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/integration_tests/buildspec.yml @@ -0,0 +1,16 @@ +version: 0.2 + +phases: + install: + runtime-versions: + python: 3.8 + build: + commands: + # Call the test python code + - python tests/integration_tests/endpoint_test.py --import-build-config $CODEBUILD_SRC_DIR_BuildArtifact/staging-config-export.json --export-test-results $EXPORT_TEST_RESULTS + # Show the test results file + - cat $EXPORT_TEST_RESULTS + +artifacts: + files: + - $EXPORT_TEST_RESULTS diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/integration_tests/endpoint_test.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/integration_tests/endpoint_test.py new file mode 100644 index 00000000..dc909a0c --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/integration_tests/endpoint_test.py @@ -0,0 +1,87 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import argparse +import json +import logging +import os + +import boto3 +from botocore.exceptions import ClientError + +logger = logging.getLogger(__name__) +sm_client = boto3.client("sagemaker") + + +def invoke_endpoint(endpoint_name): + """ + Add custom logic here to invoke the endpoint and validate reponse + """ + return {"endpoint_name": endpoint_name, "success": True} + + +def test_endpoint(endpoint_name): + """ + Describe the endpoint and ensure InSerivce, then invoke endpoint. Raises exception on error. + """ + error_message = None + try: + # Ensure endpoint is in service + response = sm_client.describe_endpoint(EndpointName=endpoint_name) + status = response["EndpointStatus"] + if status != "InService": + error_message = f"SageMaker endpoint: {endpoint_name} status: {status} not InService" + logger.error(error_message) + raise Exception(error_message) + + # Output if endpoint has data capture enbaled + endpoint_config_name = response["EndpointConfigName"] + response = sm_client.describe_endpoint_config(EndpointConfigName=endpoint_config_name) + if "DataCaptureConfig" in response and response["DataCaptureConfig"]["EnableCapture"]: + logger.info(f"data capture enabled for endpoint config {endpoint_config_name}") + + # Call endpoint to handle + return invoke_endpoint(endpoint_name) + except ClientError as e: + error_message = e.response["Error"]["Message"] + logger.error(error_message) + raise Exception(error_message) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--log-level", type=str, default=os.environ.get("LOGLEVEL", "INFO").upper()) + parser.add_argument("--import-build-config", type=str, required=True) + parser.add_argument("--export-test-results", type=str, required=True) + args, _ = parser.parse_known_args() + + # Configure logging to output the line number and message + log_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s" + logging.basicConfig(format=log_format, level=args.log_level) + + # Load the build config + with open(args.import_build_config, "r") as f: + config = json.load(f) + + # Get the endpoint name from sagemaker project name + endpoint_name = "{}-{}".format(config["Parameters"]["SageMakerProjectName"], config["Parameters"]["StageName"]) + results = test_endpoint(endpoint_name) + + # Print results and write to file + logger.debug(json.dumps(results, indent=4)) + with open(args.export_test_results, "w") as f: + json.dump(results, f, indent=4) diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/unittests/__init__.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/unittests/__init__.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/unittests/__init__.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/unittests/test_deploy_app_stack.py b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/unittests/test_deploy_app_stack.py new file mode 100644 index 00000000..33a2a9c9 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/templates/multi_account_basic/seed_code/deploy_app/tests/unittests/test_deploy_app_stack.py @@ -0,0 +1,33 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import aws_cdk as core +from aws_cdk import assertions +from deploy_endpoint.deploy_endpoint_stack import DeployEndpointStack + + +# example tests. To run these tests, uncomment this file along with the example +# resource in deploy_app/deploy_app_stack.py +def test_sqs_queue_created(): + app = core.App() + stack = DeployEndpointStack(app, "deploy-app") + assertions.Template.from_stack(stack) + + +# template.has_resource_properties("AWS::SQS::Queue", { +# "VisibilityTimeout": 300 +# }) diff --git a/modules/sagemaker/sagemaker-templates/tests/__init__.py b/modules/sagemaker/sagemaker-templates/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/sagemaker/sagemaker-templates/tests/test_app.py b/modules/sagemaker/sagemaker-templates/tests/test_app.py new file mode 100644 index 00000000..c4b72a0e --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/tests/test_app.py @@ -0,0 +1,27 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import os +import sys + +import pytest + + +@pytest.fixture(scope="function") +def stack_defaults(): + os.environ["SEEDFARMER_PROJECT_NAME"] = "test-project" + os.environ["SEEDFARMER_DEPLOYMENT_NAME"] = "test-deployment" + os.environ["SEEDFARMER_MODULE_NAME"] = "test-module" + os.environ["CDK_DEFAULT_ACCOUNT"] = "111111111111" + os.environ["CDK_DEFAULT_REGION"] = "us-east-1" + os.environ["SEEDFARMER_PARAMETER_PORTFOLIO_ACCESS_ROLE_ARN"] = "arn:aws:iam::xxxxxxxxxxxx:role/role" + # Unload the app import so that subsequent tests don't reuse + if "app" in sys.modules: + del sys.modules["app"] + + +def test_portfolio_access_role(stack_defaults): + del os.environ["SEEDFARMER_PARAMETER_PORTFOLIO_ACCESS_ROLE_ARN"] + + with pytest.raises(Exception, match="Missing input parameter portfolio-access-role-arn"): + import app # noqa: F401 diff --git a/modules/sagemaker/sagemaker-templates/tests/test_stack.py b/modules/sagemaker/sagemaker-templates/tests/test_stack.py new file mode 100644 index 00000000..aebeb860 --- /dev/null +++ b/modules/sagemaker/sagemaker-templates/tests/test_stack.py @@ -0,0 +1,49 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import os +import sys + +import aws_cdk as cdk +import pytest +from aws_cdk.assertions import Template + + +@pytest.fixture(scope="function") +def stack_defaults(): + os.environ["CDK_DEFAULT_ACCOUNT"] = "111111111111" + os.environ["CDK_DEFAULT_REGION"] = "us-east-1" + + # Unload the app import so that subsequent tests don't reuse + + if "stack" in sys.modules: + del sys.modules["stack"] + + +def test_synthesize_stack(stack_defaults): + import stack + + app = cdk.App() + project_name = "test-project" + dep_name = "test-deployment" + mod_name = "test-module" + portfolio_name = "portfolio" + portfolio_owner = "owner" + portfolio_access_role_arn = "arn:aws:iam::xxxxxxxxxxxx:role/role" + + stack = stack.ServiceCatalogStack( + app, + f"{project_name}-{dep_name}-{mod_name}", + portfolio_name=portfolio_name, + portfolio_owner=portfolio_owner, + portfolio_access_role_arn=portfolio_access_role_arn, + env=cdk.Environment( + account=os.environ["CDK_DEFAULT_ACCOUNT"], + region=os.environ["CDK_DEFAULT_REGION"], + ), + ) + + template = Template.from_stack(stack) + + template.resource_count_is("AWS::ServiceCatalog::Portfolio", 1) + template.resource_count_is("AWS::ServiceCatalog::CloudFormationProduct", 1)