From 6056d240777955b415d3dc51ec1d35c18e713f3b Mon Sep 17 00:00:00 2001 From: Laren-AWS <57545972+Laren-AWS@users.noreply.github.com> Date: Tue, 19 Sep 2023 15:58:29 -0700 Subject: [PATCH] Python: Resilient service workflow followup (#5393) * Resilient service followup items: tests, metadata, specification, and more! --- .doc_gen/metadata/auto-scaling_metadata.yaml | 33 +- .doc_gen/metadata/cross_metadata.yaml | 7 +- .doc_gen/metadata/ec2_metadata.yaml | 128 ++ .../elastic-load-balancing-v2_metadata.yaml | 143 +++ .doc_gen/metadata/iam_metadata.yaml | 38 + .doc_gen/readmes/includes/important.jinja2 | 2 +- .github/pre_validate/pre_validate.py | 3 +- .../resilient_service/auto_scaler.py | 68 +- .../resilient_service/load_balancer.py | 16 + .../resilient_service/parameters.py | 10 +- .../cross_service/resilient_service/runner.py | 8 +- .../resilient_service/test/conftest.py | 83 ++ .../test/resources/instance_policy.json | 28 + .../test/resources/recommendations.json | 11 + .../test/resources/server.py | 136 +++ .../test/resources/server_startup_script.sh | 7 + .../test/resources/ssm_only_policy.json | 14 + .../resilient_service/test/test_demo.py | 137 +++ .../resilient_service/test/test_deploy.py | 153 +++ .../resilient_service/test/test_destroy.py | 107 ++ .../test/test_runner_integ.py | 35 + python/example_code/auto-scaling/README.md | 49 +- python/example_code/auto-scaling/hello.py | 27 + python/example_code/ec2/README.md | 42 +- .../elastic-load-balancing/README.md | 135 +++ .../elastic-load-balancing/hello.py | 26 + .../elastic-load-balancing/requirements.txt | 2 + python/example_code/iam/README.md | 54 +- python/example_code/s3/README.md | 9 +- python/test_tools/autoscaling_stubber.py | 10 +- python/test_tools/ec2_stubber.py | 48 +- python/test_tools/elbv2_stubber.py | 96 ++ python/test_tools/iam_stubber.py | 20 +- python/test_tools/ssm_stubber.py | 29 +- python/test_tools/stubber_factory.py | 3 + workflows/resilient_service/README.md | 6 +- workflows/resilient_service/SPECIFICATION.md | 1046 +++++++++++++++++ .../resilient_service/resources/server.py | 18 +- .../resources/server_startup_script.sh | 2 +- 39 files changed, 2722 insertions(+), 67 deletions(-) create mode 100644 .doc_gen/metadata/elastic-load-balancing-v2_metadata.yaml create mode 100644 python/cross_service/resilient_service/test/conftest.py create mode 100644 python/cross_service/resilient_service/test/resources/instance_policy.json create mode 100644 python/cross_service/resilient_service/test/resources/recommendations.json create mode 100644 python/cross_service/resilient_service/test/resources/server.py create mode 100644 python/cross_service/resilient_service/test/resources/server_startup_script.sh create mode 100644 python/cross_service/resilient_service/test/resources/ssm_only_policy.json create mode 100644 python/cross_service/resilient_service/test/test_demo.py create mode 100644 python/cross_service/resilient_service/test/test_deploy.py create mode 100644 python/cross_service/resilient_service/test/test_destroy.py create mode 100644 python/cross_service/resilient_service/test/test_runner_integ.py create mode 100644 python/example_code/auto-scaling/hello.py create mode 100644 python/example_code/elastic-load-balancing/README.md create mode 100644 python/example_code/elastic-load-balancing/hello.py create mode 100644 python/example_code/elastic-load-balancing/requirements.txt create mode 100644 python/test_tools/elbv2_stubber.py create mode 100644 workflows/resilient_service/SPECIFICATION.md diff --git a/.doc_gen/metadata/auto-scaling_metadata.yaml b/.doc_gen/metadata/auto-scaling_metadata.yaml index 4a3d22a86c1..c2317ac717c 100644 --- a/.doc_gen/metadata/auto-scaling_metadata.yaml +++ b/.doc_gen/metadata/auto-scaling_metadata.yaml @@ -22,6 +22,14 @@ auto-scaling_Hello: - description: snippet_tags: - php.example_code.auto-scaling.basics.helloService + Python: + versions: + - sdk_version: 3 + github: python/example_code/auto-scaling + excerpts: + - description: + snippet_tags: + - python.example_code.auto-scaling.Hello Rust: versions: - sdk_version: 1 @@ -154,10 +162,11 @@ auto-scaling_DeleteAutoScalingGroup: - sdk_version: 3 github: python/example_code/auto-scaling excerpts: - - description: + - description: Update the minimum size of an Auto Scaling group to zero, terminate all + instances in the group, and delete the group. snippet_tags: - - python.example_code.auto-scaling.AutoScalingWrapper.decl - - python.example_code.auto-scaling.DeleteAutoScalingGroup + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.auto-scaling.DeleteAutoScalingGroup Rust: versions: - sdk_version: 1 @@ -685,6 +694,24 @@ auto-scaling_DisableMetricsCollection: - cpp.example_code.autoscaling.disable_metrics_collection2 services: auto-scaling: {DisableMetricsCollection} +auto-scaling_AttachLoadBalancerTargetGroups: + title: Attach an ELB target group to an &AS; group using an &AWS; SDK + title_abbrev: Attach an ELB target group to an Auto Scaling group + synopsis: attach an ELB target group to an &AS; group. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/auto-scaling + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.auto-scaling.AttachLoadBalancerTargetGroups + services: + auto-scaling: {AttachLoadBalancerTargetGroups} auto-scaling_Scenario_GroupsAndInstances: title: Manage &AS; groups and instances using an &AWS; SDK title_abbrev: Manage groups and instances diff --git a/.doc_gen/metadata/cross_metadata.yaml b/.doc_gen/metadata/cross_metadata.yaml index 4142d8bd1fc..da78e464480 100644 --- a/.doc_gen/metadata/cross_metadata.yaml +++ b/.doc_gen/metadata/cross_metadata.yaml @@ -634,6 +634,9 @@ cross_ResilientService: - sdk_version: 3 github: python/cross_service/resilient_service excerpts: + - description: Run the interactive scenario at a command prompt. + snippet_tags: + - python.example_code.workflow.ResilientService_Runner - description: Create a class that wraps &AS; and &EC2; actions. snippet_tags: - python.example_code.workflow.ResilientService_AutoScaler @@ -646,9 +649,6 @@ cross_ResilientService: - description: Create a class that wraps &SYS; actions. snippet_tags: - python.example_code.workflow.ResilientService_ParameterHelper - - description: Run the interactive scenario at a command prompt. - snippet_tags: - - python.example_code.workflow.ResilientService_Runner services: auto-scaling: { CreateAutoScalingGroup, DescribeAutoScalingGroups, TerminateInstanceInAutoScalingGroup, @@ -661,3 +661,4 @@ cross_ResilientService: elastic-load-balancing-v2: { DescribeLoadBalancers, CreateTargetGroup, DescribeTargetGroups, DeleteTargetGroup, CreateLoadBalancer, CreateListener, DeleteLoadBalancer, DescribeTargetHealth} + iam: {CreateInstanceProfile, DeleteInstanceProfile} diff --git a/.doc_gen/metadata/ec2_metadata.yaml b/.doc_gen/metadata/ec2_metadata.yaml index 54ebebf26d5..c6622a0b7d9 100644 --- a/.doc_gen/metadata/ec2_metadata.yaml +++ b/.doc_gen/metadata/ec2_metadata.yaml @@ -1476,6 +1476,15 @@ ec2_RebootInstances: - description: snippet_tags: - ec2.cpp.reboot_instance.code + Python: + versions: + - sdk_version: 3 + github: python/example_code/ec2 + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.ec2.ReplaceIamInstanceProfileAssociation services: ec2: {RebootInstances} ec2_DescribeImages: @@ -1615,6 +1624,15 @@ ec2_DescribeAvailabilityZones: snippet_tags: - cpp.example_code.ec2.describe_regions.client - ec2.cpp.describe_zones.code + Python: + versions: + - sdk_version: 3 + github: python/example_code/ec2 + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.ec2.DescribeAvailabilityZones services: ec2: {DescribeAvailabilityZones} ec2_CreateTags: @@ -1689,6 +1707,116 @@ ec2_createVpc: services: ec2: {CreateVpc} +ec2_DescribeIamInstanceProfileAssociations: + title: Get data about the instance profile associated with an &EC2; instance using an &AWS; SDK + title_abbrev: Get data about the instance profile associated with an instance + synopsis: get data about the instance profile associated with an &EC2; instance. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/ec2 + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.ec2.DescribeIamInstanceProfileAssociations + services: + ec2: {DescribeIamInstanceProfileAssociations} +ec2_ReplaceIamInstanceProfileAssociation: + title: Replace the instance profile associated with an &EC2; instance using an &AWS; SDK + title_abbrev: Replace the instance profile associated with an instance + synopsis: replace the instance profile associated with an &EC2; instance. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/ec2 + sdkguide: + excerpts: + - description: This example replaces the instance profile of a running instance, + reboots the instance, and sends a command to the instance after it starts. + snippet_tags: + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.ec2.ReplaceIamInstanceProfileAssociation + services: + ec2: {ReplaceIamInstanceProfileAssociation} +ec2_CreateLaunchTemplate: + title: Create an &EC2; launch template using an &AWS; SDK + title_abbrev: Create a launch template + synopsis: create an &EC2; launch template. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/ec2 + sdkguide: + excerpts: + - description: This example creates a launch template that includes an instance profile + that grants specific permissions to the instance, and a user data Bash script that runs on the instance after it starts. + snippet_tags: + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.ec2.CreateLaunchTemplate + services: + ec2: {CreateLaunchTemplate} +ec2_DeleteLaunchTemplate: + title: Delete an &EC2; launch template using an &AWS; SDK + title_abbrev: Delete a launch template + synopsis: delete an &EC2; launch template. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/ec2 + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.ec2.DeleteLaunchTemplate + services: + ec2: {DeleteLaunchTemplate} +ec2_DescribeVpcs: + title: Get the default VPC using an &AWS; SDK + title_abbrev: Get the default VPC + synopsis: get the default VPC of the current account. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/ec2 + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.ec2.DescribeVpcs + services: + ec2: {DescribeVpcs} +ec2_DescribeSubnets: + title: Get the default subnets for a VPC using an &AWS; SDK + title_abbrev: Get the default subnets for a VPC + synopsis: get the default subnets for a VPC. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/ec2 + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.ec2.DescribeSubnets + services: + ec2: {DescribeSubnets} ec2_Scenario_GetStartedInstances: title: Get started with &EC2; instances using an &AWS; SDK title_abbrev: Get started with instances diff --git a/.doc_gen/metadata/elastic-load-balancing-v2_metadata.yaml b/.doc_gen/metadata/elastic-load-balancing-v2_metadata.yaml new file mode 100644 index 00000000000..6542a86f4fc --- /dev/null +++ b/.doc_gen/metadata/elastic-load-balancing-v2_metadata.yaml @@ -0,0 +1,143 @@ +elastic-load-balancing-v2_Hello: + title: Hello &ELB; + title_abbrev: Hello &ELB; + synopsis: get started using &ELB;. + category: Hello + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/elastic-load-balancing + sdkguide: + excerpts: + - description: + snippet_tags: + - python.example_code.elbv2.Hello + services: + elastic-load-balancing-v2: {DescribeLoadBalancers} +elastic-load-balancing-v2_DescribeLoadBalancers: + title: Get the endpoint of an ELB load balancer using an &AWS; SDK + title_abbrev: Get the endpoint of a load balancer + synopsis: get the endpoint of an ELB load balancer. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/elastic-load-balancing + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.LoadBalancer.decl + - python.cross_service.resilient_service.elbv2.DescribeLoadBalancers + services: + elastic-load-balancing-v2: {DescribeLoadBalancers} +elastic-load-balancing-v2_CreateTargetGroup: + title: Create an ELB target group using an &AWS; SDK + title_abbrev: Create a target group + synopsis: create an ELB target group. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/elastic-load-balancing + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.LoadBalancer.decl + - python.cross_service.resilient_service.elbv2.CreateTargetGroup + services: + elastic-load-balancing-v2: {CreateTargetGroup} +elastic-load-balancing-v2_DeleteTargetGroup: + title: Delete an ELB target group using an &AWS; SDK + title_abbrev: Delete a target group + synopsis: delete an ELB target group. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/elastic-load-balancing + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.LoadBalancer.decl + - python.cross_service.resilient_service.elbv2.DeleteTargetGroup + services: + elastic-load-balancing-v2: {DeleteTargetGroup} +elastic-load-balancing-v2_CreateLoadBalancer: + title: Create an ELB Application Load Balancer using an &AWS; SDK + title_abbrev: Create an Application Load Balancer + synopsis: create an ELB Application Load Balancer. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/elastic-load-balancing + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.LoadBalancer.decl + - python.cross_service.resilient_service.elbv2.CreateLoadBalancer + services: + elastic-load-balancing-v2: {CreateLoadBalancer} +elastic-load-balancing-v2_CreateListener: + title: Create a listener for an ELB load balancer using an &AWS; SDK + title_abbrev: Create a listener for a load balancer + synopsis: create a listener that forwards requests from an ELB load balancer to a target group. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/elastic-load-balancing + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.LoadBalancer.decl + - python.cross_service.resilient_service.elbv2.CreateListener + services: + elastic-load-balancing-v2: {CreateListener} +elastic-load-balancing-v2_DeleteLoadBalancer: + title: Delete an ELB load balancer using an &AWS; SDK + title_abbrev: Delete a load balancer + synopsis: delete an ELB load balancer. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/elastic-load-balancing + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.LoadBalancer.decl + - python.cross_service.resilient_service.elbv2.DeleteLoadBalancer + services: + elastic-load-balancing-v2: {DeleteLoadBalancer} +elastic-load-balancing-v2_DescribeTargetHealth: + title: Get the health of an ELB target group using an &AWS; SDK + title_abbrev: Get the health of a target group + synopsis: get the health of instances in an ELB target group. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/elastic-load-balancing + sdkguide: + excerpts: + - description: + snippet_tags: + - python.cross_service.resilient_service.LoadBalancer.decl + - python.cross_service.resilient_service.elbv2.DescribeTargetHealth + services: + elastic-load-balancing-v2: {DescribeTargetHealth} diff --git a/.doc_gen/metadata/iam_metadata.yaml b/.doc_gen/metadata/iam_metadata.yaml index c1736bf0721..2f01f39d8e8 100644 --- a/.doc_gen/metadata/iam_metadata.yaml +++ b/.doc_gen/metadata/iam_metadata.yaml @@ -3355,6 +3355,44 @@ iam_UploadServerCertificate: - javascript.v3.iam.actions.UploadServerCertificate services: iam: {UploadServerCertificate} +iam_CreateInstanceProfile: + title: Create an &IAM; instance profile using an &AWS; SDK + title_abbrev: Create an instance profile + synopsis: create an &IAM; instance profile. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/iam + sdkguide: + excerpts: + - description: This example creates a policy, role, and instance profile and + links them all together. + snippet_tags: + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.iam.CreateInstanceProfile + services: + iam: {CreateInstanceProfile} +iam_DeleteInstanceProfile: + title: Delete an &IAM; instance profile using an &AWS; SDK + title_abbrev: Delete an instance profile + synopsis: delete an &IAM; instance profile. + category: + languages: + Python: + versions: + - sdk_version: 3 + github: python/example_code/iam + sdkguide: + excerpts: + - description: This example removes the role from the instance profile, detaches + all policies attached to the role, and deletes all the resources. + snippet_tags: + - python.cross_service.resilient_service.AutoScaler.decl + - python.cross_service.resilient_service.iam.DeleteInstanceProfile + services: + iam: {DeleteInstanceProfile} iam_Scenario_GroupBasics: title: Create an &IAM; group and add a user to the group using an &AWS; SDK title_abbrev: Create a group and add a user diff --git a/.doc_gen/readmes/includes/important.jinja2 b/.doc_gen/readmes/includes/important.jinja2 index a890a6bc44f..945fbba4225 100644 --- a/.doc_gen/readmes/includes/important.jinja2 +++ b/.doc_gen/readmes/includes/important.jinja2 @@ -1,6 +1,6 @@ ## ⚠ Important -* Running this code might result in charges to your AWS account. See [AWS Pricing](https://aws.amazon.com/pricing/?aws-products-pricing.sort-by=item.additionalFields.productNameLowercase&aws-products-pricing.sort-order=asc&awsf.Free%20Tier%20Type=*all&awsf.tech-category=*all) and [Free Tier](https://aws.amazon.com/free/?all-free-tier.sort-by=item.additionalFields.SortRank&all-free-tier.sort-order=asc&awsf.Free%20Tier%20Types=*all&awsf.Free%20Tier%20Categories=*all) for more details. +* Running this code might result in charges to your AWS account. For more details, see [AWS Pricing](https://aws.amazon.com/pricing/?aws-products-pricing.sort-by=item.additionalFields.productNameLowercase&aws-products-pricing.sort-order=asc&awsf.Free%20Tier%20Type=*all&awsf.tech-category=*all) and [Free Tier](https://aws.amazon.com/free/?all-free-tier.sort-by=item.additionalFields.SortRank&all-free-tier.sort-order=asc&awsf.Free%20Tier%20Types=*all&awsf.Free%20Tier%20Categories=*all). * Running the tests might result in charges to your AWS account. * We recommend that you grant your code least privilege. At most, grant only the minimum permissions required to perform the task. For more information, see [Grant least privilege](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html#grant-least-privilege). * This code is not tested in every AWS Region. For more information, see [AWS Regional Services](https://aws.amazon.com/about-aws/global-infrastructure/regional-product-services). diff --git a/.github/pre_validate/pre_validate.py b/.github/pre_validate/pre_validate.py index a43e84f1032..c30405d15a6 100644 --- a/.github/pre_validate/pre_validate.py +++ b/.github/pre_validate/pre_validate.py @@ -68,7 +68,8 @@ 'venv', '.venv', 'bin', - 'obj' + 'obj', + '.doc_gen', } # Files to skip. diff --git a/python/cross_service/resilient_service/auto_scaler.py b/python/cross_service/resilient_service/auto_scaler.py index cb19dd3d9ca..03aa3637ed8 100644 --- a/python/cross_service/resilient_service/auto_scaler.py +++ b/python/cross_service/resilient_service/auto_scaler.py @@ -16,6 +16,7 @@ class AutoScalerError(Exception): pass # snippet-start:[python.example_code.workflow.ResilientService_AutoScaler] +# snippet-start:[python.cross_service.resilient_service.AutoScaler.decl] class AutoScaler: """ Encapsulates Amazon EC2 Auto Scaling and EC2 management actions. @@ -33,6 +34,12 @@ def __init__( :param ssm_client: A Boto3 Systems Manager client. :param iam_client: A Boto3 IAM client. """ + self.inst_type = inst_type + self.ami_param = ami_param + self.autoscaling_client = autoscaling_client + self.ec2_client = ec2_client + self.ssm_client = ssm_client + self.iam_client = iam_client self.launch_template_name = f"{resource_prefix}-template" self.group_name = f"{resource_prefix}-group" self.instance_policy_name = f"{resource_prefix}-pol" @@ -41,12 +48,7 @@ def __init__( self.bad_creds_policy_name = f"{resource_prefix}-bc-pol" self.bad_creds_role_name = f"{resource_prefix}-bc-role" self.bad_creds_profile_name = f"{resource_prefix}-bc-prof" - self.inst_type = inst_type - self.ami_param = ami_param - self.autoscaling_client = autoscaling_client - self.ec2_client = ec2_client - self.ssm_client = ssm_client - self.iam_client = iam_client +# snippet-end:[python.cross_service.resilient_service.AutoScaler.decl] @classmethod def from_client(cls, resource_prefix): @@ -63,6 +65,7 @@ def from_client(cls, resource_prefix): resource_prefix, 't3.micro', '/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2', as_client, ec2_client, ssm_client, iam_client) + # snippet-start:[python.cross_service.resilient_service.iam.CreateInstanceProfile] def create_instance_profile( self, policy_file, policy_name, role_name, profile_name, aws_managed_policies=()): """ @@ -142,7 +145,9 @@ def create_instance_profile( f"Couldn't create profile {profile_name} and attach it to role\n" f"{role_name}: {err}") return profile_arn + # snippet-end:[python.cross_service.resilient_service.iam.CreateInstanceProfile] + # snippet-start:[python.cross_service.resilient_service.ec2.DescribeIamInstanceProfileAssociations] def get_instance_profile(self, instance_id): """ Gets data about the profile associated with an instance. @@ -158,7 +163,9 @@ def get_instance_profile(self, instance_id): f"Couldn't get instance profile association for instance {instance_id}: {err}") else: return response['IamInstanceProfileAssociations'][0] + # snippet-end:[python.cross_service.resilient_service.ec2.DescribeIamInstanceProfileAssociations] + # snippet-start:[python.cross_service.resilient_service.ec2.ReplaceIamInstanceProfileAssociation] def replace_instance_profile( self, instance_id, new_instance_profile_name, profile_association_id): """ @@ -198,7 +205,9 @@ def replace_instance_profile( except ClientError as err: raise AutoScalerError( f"Couldn't replace instance profile for association {profile_association_id}: {err}") + # snippet-end:[python.cross_service.resilient_service.ec2.ReplaceIamInstanceProfileAssociation] + # snippet-start:[python.cross_service.resilient_service.iam.DeleteInstanceProfile] def delete_instance_profile(self, profile_name, role_name): """ Detaches a role from an instance profile, detaches policies from the role, @@ -229,7 +238,9 @@ def delete_instance_profile(self, profile_name, role_name): raise AutoScalerError( f"Couldn't delete instance profile {profile_name} or detach " f"policies and delete role {role_name}: {err}") + # snippet-end:[python.cross_service.resilient_service.iam.DeleteInstanceProfile] + # snippet-start:[python.cross_service.resilient_service.ec2.CreateLaunchTemplate] def create_template(self, server_startup_script_file, instance_policy_file): """ Creates an Amazon EC2 launch template to use with Amazon EC2 Auto Scaling. The @@ -270,7 +281,9 @@ def create_template(self, server_startup_script_file, instance_policy_file): raise AutoScalerError( f"Couldn't create launch template {self.launch_template_name}: {err}.") return template + # snippet-end:[python.cross_service.resilient_service.ec2.CreateLaunchTemplate] + # snippet-start:[python.cross_service.resilient_service.ec2.DeleteLaunchTemplate] def delete_template(self): """ Deletes a launch template. @@ -285,7 +298,9 @@ def delete_template(self): else: raise AutoScalerError( f"Couldn't delete launch template {self.launch_template_name}: {err}.") + # snippet-end:[python.cross_service.resilient_service.ec2.DeleteLaunchTemplate] + # snippet-start:[python.cross_service.resilient_service.ec2.DescribeAvailabilityZones] def get_availability_zones(self): """ Gets a list of Availability Zones in the AWS Region of the Amazon EC2 client. @@ -299,7 +314,9 @@ def get_availability_zones(self): raise AutoScalerError(f"Couldn't get availability zones: {err}.") else: return zones + # snippet-end:[python.cross_service.resilient_service.ec2.DescribeAvailabilityZones] + # snippet-start:[python.cross_service.resilient_service.auto-scaling.CreateAutoScalingGroup] def create_group(self, group_size): """ Creates an EC2 Auto Scaling group with the specified size. @@ -327,7 +344,9 @@ def create_group(self, group_size): raise AutoScalerError( f"Couldn't create EC2 Auto Scaling group {self.group_name}: {err}") return zones + # snippet-end:[python.cross_service.resilient_service.auto-scaling.CreateAutoScalingGroup] + # snippet-start:[python.cross_service.resilient_service.auto-scaling.DescribeAutoScalingGroups] def get_instances(self): """ Gets data about the instances in the EC2 Auto Scaling group. @@ -338,12 +357,12 @@ def get_instances(self): as_response = self.autoscaling_client.describe_auto_scaling_groups( AutoScalingGroupNames=[self.group_name]) instance_ids = [i['InstanceId'] for i in as_response['AutoScalingGroups'][0]['Instances']] - ec2_response = self.ec2_client.describe_instances(InstanceIds=instance_ids) except ClientError as err: raise AutoScalerError( f"Couldn't get instances for Auto Scaling group {self.group_name}: {err}") else: - return ec2_response['Reservations'][0]['Instances'] + return instance_ids + # snippet-end:[python.cross_service.resilient_service.auto-scaling.DescribeAutoScalingGroups] def terminate_instance(self, instance_id): """ @@ -360,6 +379,7 @@ def terminate_instance(self, instance_id): raise AutoScalerError( f"Couldn't terminate instance {instance_id}: {err}") + # snippet-start:[python.cross_service.resilient_service.auto-scaling.AttachLoadBalancerTargetGroups] def attach_load_balancer_target_group(self, lb_target_group): """ Attaches an Elastic Load Balancing (ELB) target group to this EC2 Auto Scaling group. @@ -378,6 +398,23 @@ def attach_load_balancer_target_group(self, lb_target_group): raise AutoScalerError( f"Couldn't attach load balancer target group {lb_target_group['TargetGroupName']}\n" f"to auto scaling group {self.group_name}") + # snippet-end:[python.cross_service.resilient_service.auto-scaling.AttachLoadBalancerTargetGroups] + + # snippet-start:[python.cross_service.resilient_service.auto-scaling.DeleteAutoScalingGroup] + def _try_terminate_instance(self, inst_id): + stopping = False + log.info(f"Stopping {inst_id}.") + while not stopping: + try: + self.autoscaling_client.terminate_instance_in_auto_scaling_group( + InstanceId=inst_id, ShouldDecrementDesiredCapacity=True) + stopping = True + except ClientError as err: + if err.response['Error']['Code'] == 'ScalingActivityInProgress': + log.info("Scaling activity in progress for %s. Waiting...", inst_id) + time.sleep(10) + else: + raise AutoScalerError(f"Couldn't stop instance {inst_id}: {err}.") def _try_delete_group(self): """ @@ -412,16 +449,16 @@ def delete_group(self): AutoScalingGroupName=self.group_name, MinSize=0) instance_ids = [inst['InstanceId'] for inst in groups[0]['Instances']] for inst_id in instance_ids: - log.info(f"Stopping {inst_id}.") - self.autoscaling_client.terminate_instance_in_auto_scaling_group( - InstanceId=inst_id, ShouldDecrementDesiredCapacity=True) + self._try_terminate_instance(inst_id) self._try_delete_group() else: log.info("No groups found named %s, nothing to do.", self.group_name) except ClientError as err: raise AutoScalerError( f"Couldn't delete group {self.group_name}: {err}.") + # snippet-end:[python.cross_service.resilient_service.auto-scaling.DeleteAutoScalingGroup] + # snippet-start:[python.cross_service.resilient_service.ec2.DescribeVpcs] def get_default_vpc(self): """ Gets the default VPC for the account. @@ -434,7 +471,9 @@ def get_default_vpc(self): raise AutoScalerError(f"Couldn't get default VPC: {err}") else: return response['Vpcs'][0] + # snippet-end:[python.cross_service.resilient_service.ec2.DescribeVpcs] + # snippet-start:[python.cross_service.resilient_service.ec2.DescribeSecurityGroups] def verify_inbound_port(self, vpc, port, ip_address): """ Verify the default security group of the specified VPC allows ingress from this @@ -470,7 +509,7 @@ def verify_inbound_port(self, vpc, port, ip_address): if not port_is_open: log.info( "The inbound rule does not appear to be open to either this computer's IP\n" - "address of %s, to all IP addresses (0.0.0.0/0), or to a prefix list ID.") + "address of %s, to all IP addresses (0.0.0.0/0), or to a prefix list ID.", ip_address) else: break except ClientError as err: @@ -478,7 +517,9 @@ def verify_inbound_port(self, vpc, port, ip_address): f"Couldn't verify inbound rule for port {port} for VPC {vpc['VpcId']}: {err}") else: return sec_group, port_is_open + # snippet-end:[python.cross_service.resilient_service.ec2.DescribeSecurityGroups] + # snippet-start:[python.cross_service.resilient_service.ec2.AuthorizeSecurityGroupIngress] def open_inbound_port(self, sec_group_id, port, ip_address): """ Add an ingress rule to the specified security group that allows access on the @@ -495,7 +536,9 @@ def open_inbound_port(self, sec_group_id, port, ip_address): except ClientError as err: raise AutoScalerError( f"Couldn't authorize ingress to {sec_group_id} on port {port} from {ip_address}: {err}") + # snippet-end:[python.cross_service.resilient_service.ec2.AuthorizeSecurityGroupIngress] + # snippet-start:[python.cross_service.resilient_service.ec2.DescribeSubnets] def get_subnets(self, vpc_id, zones): """ Gets the default subnets in a VPC for a specified list of Availability Zones. @@ -516,4 +559,5 @@ def get_subnets(self, vpc_id, zones): raise AutoScalerError(f"Couldn't get subnets: {err}") else: return subnets + # snippet-end:[python.cross_service.resilient_service.ec2.DescribeSubnets] # snippet-end:[python.example_code.workflow.ResilientService_AutoScaler] \ No newline at end of file diff --git a/python/cross_service/resilient_service/load_balancer.py b/python/cross_service/resilient_service/load_balancer.py index 0b2a4c64c96..9f666d2dcea 100644 --- a/python/cross_service/resilient_service/load_balancer.py +++ b/python/cross_service/resilient_service/load_balancer.py @@ -15,6 +15,7 @@ class LoadBalancerError(Exception): pass # snippet-start:[python.example_code.workflow.ResilientService_LoadBalancer] +# snippet-start:[python.cross_service.resilient_service.LoadBalancer.decl] class LoadBalancer: """Encapsulates Elastic Load Balancing (ELB) actions.""" def __init__( @@ -28,6 +29,7 @@ def __init__( self.load_balancer_name = load_balancer_name self.elb_client = elb_client self._endpoint = None +# snippet-end:[python.cross_service.resilient_service.LoadBalancer.decl] @classmethod def from_client(cls, resource_prefix): @@ -42,6 +44,7 @@ def from_client(cls, resource_prefix): f"{resource_prefix}-lb", elb_client) + # snippet-start:[python.cross_service.resilient_service.elbv2.DescribeLoadBalancers] def endpoint(self): """ Gets the HTTP endpoint of the load balancer. @@ -56,7 +59,9 @@ def endpoint(self): raise LoadBalancerError( f"Couldn't get the endpoint for load balancer {self.load_balancer_name}: {err}") return self._endpoint + # snippet-end:[python.cross_service.resilient_service.elbv2.DescribeLoadBalancers] + # snippet-start:[python.cross_service.resilient_service.elbv2.CreateTargetGroup] def create_target_group(self, protocol, port, vpc_id): """ Creates an Elastic Load Balancing target group. The target group specifies how @@ -85,7 +90,9 @@ def create_target_group(self, protocol, port, vpc_id): f"Couldn't create load balancing target group {self.target_group_name}: {err}") else: return target_group + # snippet-end:[python.cross_service.resilient_service.elbv2.CreateTargetGroup] + # snippet-start:[python.cross_service.resilient_service.elbv2.DeleteTargetGroup] def delete_target_group(self): """ Deletes the target group. @@ -108,7 +115,10 @@ def delete_target_group(self): else: raise LoadBalancerError( f"Couldn't delete load balancing target group {self.target_group_name}: {err}") + # snippet-end:[python.cross_service.resilient_service.elbv2.DeleteTargetGroup] + # snippet-start:[python.cross_service.resilient_service.elbv2.CreateLoadBalancer] + # snippet-start:[python.cross_service.resilient_service.elbv2.CreateListener] def create_load_balancer(self, subnet_ids, target_group): """ Creates an Elastic Load Balancing load balancer that uses the specified subnets @@ -143,7 +153,10 @@ def create_load_balancer(self, subnet_ids, target_group): else: self._endpoint = load_balancer['DNSName'] return load_balancer + # snippet-end:[python.cross_service.resilient_service.elbv2.CreateListener] + # snippet-end:[python.cross_service.resilient_service.elbv2.CreateLoadBalancer] + # snippet-start:[python.cross_service.resilient_service.elbv2.DeleteLoadBalancer] def delete_load_balancer(self): """ Deletes a load balancer. @@ -162,6 +175,7 @@ def delete_load_balancer(self): else: raise LoadBalancerError( f"Couldn't delete load balancer {self.load_balancer_name}: {err}") + # snippet-end:[python.cross_service.resilient_service.elbv2.DeleteLoadBalancer] def verify_load_balancer_endpoint(self): """ @@ -183,6 +197,7 @@ def verify_load_balancer_endpoint(self): time.sleep(10) return success + # snippet-start:[python.cross_service.resilient_service.elbv2.DescribeTargetHealth] def check_target_health(self): """ Checks the health of the instances in the target group. @@ -198,4 +213,5 @@ def check_target_health(self): f"Couldn't check health of {self.target_group_name} targets: {err}") else: return health_response['TargetHealthDescriptions'] + # snippet-end:[python.cross_service.resilient_service.elbv2.DescribeTargetHealth] # snippet-end:[python.example_code.workflow.ResilientService_LoadBalancer] diff --git a/python/cross_service/resilient_service/parameters.py b/python/cross_service/resilient_service/parameters.py index 6e6d663616e..00cf54cbe9d 100644 --- a/python/cross_service/resilient_service/parameters.py +++ b/python/cross_service/resilient_service/parameters.py @@ -22,14 +22,20 @@ class ParameterHelper: failure_response = 'doc-example-resilient-architecture-failure-response' health_check = 'doc-example-resilient-architecture-health-check' - def __init__(self, table_name): + def __init__(self, table_name, ssm_client): """ :param table_name: The name of the DynamoDB table that is used as a recommendation service. + :param ssm_client: A Boto3 Systems Manager client. """ - self.ssm_client = boto3.client('ssm') + self.ssm_client = ssm_client self.table_name = table_name + @classmethod + def from_client(cls, table_name): + ssm_client = boto3.client('ssm') + return cls(table_name, ssm_client) + def reset(self): """ Resets the Systems Manager parameters to starting values for the demo. diff --git a/python/cross_service/resilient_service/runner.py b/python/cross_service/resilient_service/runner.py index 7b8476a3a9a..929c4d965e6 100644 --- a/python/cross_service/resilient_service/runner.py +++ b/python/cross_service/resilient_service/runner.py @@ -12,7 +12,6 @@ import argparse import logging -import time from pprint import pp import sys @@ -184,7 +183,7 @@ def demo(self): ssm_only_policy, self.autoscaler.bad_creds_policy_name, self.autoscaler.bad_creds_role_name, self.autoscaler.bad_creds_profile_name, ['AmazonSSMManagedInstanceCore']) instances = self.autoscaler.get_instances() - bad_instance_id = instances[0]['InstanceId'] + bad_instance_id = instances[0] instance_profile = self.autoscaler.get_instance_profile(bad_instance_id) print(f"\nReplacing the profile for instance {bad_instance_id} with a profile that contains\n" f"bad credentials...\n") @@ -225,6 +224,7 @@ def demo(self): "unhealthy instances, allowing them to fail open and return a static response rather than fail\n" "closed and report failure to the customer.") self.demo_choices() + self.param_helper.reset() def destroy(self): print("This concludes the demo of how to build and manage a resilient service.\n" @@ -246,7 +246,7 @@ def destroy(self): def main(): parser = argparse.ArgumentParser() parser.add_argument( - '--action', choices=['all', 'deploy', 'demo', 'destroy'], + '--action', required=True, choices=['all', 'deploy', 'demo', 'destroy'], help="The action to take for the demo. When 'all' is specified, resources are\n" "deployed, the demo is run, and resources are destroyed.") parser.add_argument( @@ -263,7 +263,7 @@ def main(): recommendation = RecommendationService.from_client('doc-example-recommendation-service') autoscaler = AutoScaler.from_client(prefix) loadbalancer = LoadBalancer.from_client(prefix) - param_helper = ParameterHelper(recommendation.table_name) + param_helper = ParameterHelper.from_client(recommendation.table_name) runner = Runner(args.resource_path, recommendation, autoscaler, loadbalancer, param_helper) actions = [args.action] if args.action != 'all' else ['deploy', 'demo', 'destroy'] for action in actions: diff --git a/python/cross_service/resilient_service/test/conftest.py b/python/cross_service/resilient_service/test/conftest.py new file mode 100644 index 00000000000..9af2978910b --- /dev/null +++ b/python/cross_service/resilient_service/test/conftest.py @@ -0,0 +1,83 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Contains common test fixtures used to run unit tests. +""" + +from datetime import datetime +import sys +import boto3 +import pytest + +from auto_scaler import AutoScaler +from load_balancer import LoadBalancer +from parameters import ParameterHelper +from recommendation_service import RecommendationService +import runner + +# This is needed so Python can find test_tools on the path. +sys.path.append('../..') +from test_tools.fixtures.common import * + + +class ScenarioData: + def __init__(self, auto_scaling, elb, ddb, ec2, ssm, iam): + self.auto_scaling = auto_scaling + self.elb = elb + self.ddb = ddb + self.ec2 = ec2 + self.ssm = ssm + self.iam = iam + + self.test_resource_path = 'test/resources' + self.table_name = 'doc-example-test-rec-table' + self.resource_prefix = 'doc-example-test-resilience' + self.policy_arn = 'arn:aws:iam:us-west-2:123456789012:policy/test-policy' + self.role_name = f'{self.resource_prefix}-role' + self.profile_name = f'{self.resource_prefix}-prof' + self.lt_name = f'{self.resource_prefix}-template' + self.inst_type = 'test-inst-type' + self.ami_param = 'test-ami-param' + self.instance = { + 'InstanceId': 'test-instance-id', 'AvailabilityZone': 'test-zone', 'LifecycleState': 'active', + 'HealthStatus': 'healthy', 'ProtectedFromScaleIn': True} + self.asg_name = f'{self.resource_prefix}-group' + self.asg_group = { + 'AutoScalingGroupName': self.asg_name, + 'MinSize': 3, 'MaxSize': 3, 'DesiredCapacity': 0, 'DefaultCooldown': 0, + 'AvailabilityZones': ['test-zone'], 'HealthCheckType': 'EC2', + 'CreatedTime': datetime.now(), + 'Instances': [self.instance]} + self.tg_name = f'{self.resource_prefix}-tg' + self.tg_arn = 'arn:aws:elasticloadbalancing:test-region:123456789012:targetgroup/test-group' + self.lb_name = f'{self.resource_prefix}-lb' + self.lb_arn = 'arn:aws:elasticloadbalancing:test-region:123456789012:loadbalancer/test-lb' + self.bad_policy_arn = 'arn:aws:iam:us-west-2:123456789012:policy/test-bad-policy' + self.bad_role_name = f'{self.resource_prefix}-bc-role' + self.bad_prof_name = f'{self.resource_prefix}-bc-prof' + self.scenario = runner.Runner( + self.test_resource_path, + RecommendationService(self.table_name, self.ddb.client), + AutoScaler( + self.resource_prefix, self.inst_type, self.ami_param, + self.auto_scaling.client, self.ec2.client, self.ssm.client, self.iam.client), + LoadBalancer(self.tg_name, self.lb_name, self.elb.client), + ParameterHelper(self.table_name, self.ssm.client)) + + +class TestClient: + def __init__(self, service, make_stubber): + self.client = boto3.client(service) + self.stubber = make_stubber(self.client) + + +@pytest.fixture +def scenario_data(make_stubber): + auto_scaling = TestClient('autoscaling', make_stubber) + elb = TestClient('elbv2', make_stubber) + ddb = TestClient('dynamodb', make_stubber) + ec2 = TestClient('ec2', make_stubber) + ssm = TestClient('ssm', make_stubber) + iam = TestClient('iam', make_stubber) + return ScenarioData(auto_scaling, elb, ddb, ec2, ssm, iam) diff --git a/python/cross_service/resilient_service/test/resources/instance_policy.json b/python/cross_service/resilient_service/test/resources/instance_policy.json new file mode 100644 index 00000000000..600afef3b03 --- /dev/null +++ b/python/cross_service/resilient_service/test/resources/instance_policy.json @@ -0,0 +1,28 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "dynamodb:BatchGetItem", + "dynamodb:Describe*", + "dynamodb:List*", + "dynamodb:GetItem", + "dynamodb:Query", + "dynamodb:Scan" + ], + "Resource": [ + "arn:aws:dynamodb:*:*:table/doc-example-recommendation-service" + ] + }, + { + "Effect": "Allow", + "Action": [ + "ssm:GetParameters" + ], + "Resource": [ + "*" + ] + } + ] +} \ No newline at end of file diff --git a/python/cross_service/resilient_service/test/resources/recommendations.json b/python/cross_service/resilient_service/test/resources/recommendations.json new file mode 100644 index 00000000000..6fc964c21bb --- /dev/null +++ b/python/cross_service/resilient_service/test/resources/recommendations.json @@ -0,0 +1,11 @@ +[ + {"MediaType": {"S": "Book"}, "ItemId":{"N": "1"}, "Title": {"S": "Pride and Prejudice"}, "Creator": {"S": "Jane Austen"}}, + {"MediaType": {"S": "Book"}, "ItemId": {"N": "2"}, "Title": {"S": "The Lord of the Rings"}, "Creator": {"S": "J. R. R. Tolkien"}}, + {"MediaType": {"S": "Book"}, "ItemId": {"N": "3"}, "Title": {"S": "The Three Musketeers"}, "Creator": {"S": "Alexandre Dumas"}}, + {"MediaType": {"S": "Movie"}, "ItemId": {"N": "1"}, "Title": {"S": "Delicatessen"}, "Creator": {"S": "Jeunet et Caro"}}, + {"MediaType": {"S": "Movie"}, "ItemId": {"N": "2"}, "Title": {"S": "The Princess Bride"}, "Creator": {"S": "Rob Reiner"}}, + {"MediaType": {"S": "Movie"}, "ItemId": {"N": "3"}, "Title": {"S": "12 Angry Men"}, "Creator": {"S": "Sidney Lumet"}}, + {"MediaType": {"S": "Song"}, "ItemId": {"N": "1"}, "Title": {"S": "And Dream of Sheep"}, "Creator": {"S": "Kate Bush"}}, + {"MediaType": {"S": "Song"}, "ItemId": {"N": "2"}, "Title": {"S": "Mirrorball"}, "Creator": {"S": "Elbow"}}, + {"MediaType": {"S": "Song"}, "ItemId": {"N": "3"}, "Title": {"S": "Guardians of Asgaard"}, "Creator": {"S": "Amon Amarth"}} +] \ No newline at end of file diff --git a/python/cross_service/resilient_service/test/resources/server.py b/python/cross_service/resilient_service/test/resources/server.py new file mode 100644 index 00000000000..8c404005888 --- /dev/null +++ b/python/cross_service/resilient_service/test/resources/server.py @@ -0,0 +1,136 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +This is a simple test server for use with the How to Build and Manage a Resilient Service +example. It simulates an engine for recommending books, movies, and songs. + +This code is only for use in this example. +*** NOT FOR PRODUCTION USE *** +""" + +import argparse +from http.server import BaseHTTPRequestHandler, HTTPServer +import json +from functools import partial +import random + +import boto3 +from botocore.exceptions import ClientError +from ec2_metadata import ec2_metadata + + +class RequestHandler(BaseHTTPRequestHandler): + """Handles HTTP requests by returning a recommendation or responding to a health check.""" + def __init__(self, dynamodb_client, ssm_client, *args, **kwargs): + """ + :param dynamodb_client: A Boto3 DynamoDB client. + :param ssm_client: A Boto3 Systems Manager client. + """ + self.dynamodb_client = dynamodb_client + self.ssm_client = ssm_client + super().__init__(*args, **kwargs) + + def _respond(self, status_code, payload): + self.send_response(status_code) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(bytes(json.dumps(payload), "utf-8")) + + def do_GET(self): + """ + Responds to an HTTP GET request. This function uses Systems Manager parameters + to determine how to respond to different kinds of requests, in order to simulate + failures or to make the server more resilient to failure. + + Root path '/': + * Returns a recommendation by getting a random item from the recommendations table. + * The table name is specified by a Systems Manager parameter. When this parameter + is set to a non-existent table, either an error response is returned or a static + response is returned. + * This example uses the ec2_metadata package to get metadata about the instance + where the server is running and include metadata in the response. + + Health check path '/healthcheck': + * When shallow checks are specified, always returns success. + * When deep checks are specified, attempts to get data about the DynamoDB table and, + if the connection attempt fails, returns an error response. + """ + print("path: ", self.path) + + table = 'doc-example-resilient-architecture-table' + failure_response = 'doc-example-resilient-architecture-failure-response' + health_check = 'doc-example-resilient-architecture-health-check' + param_response = self.ssm_client.get_parameters(Names=[table, failure_response, health_check]) + parameters = {p['Name']: p['Value'] for p in param_response['Parameters']} + print(parameters) + + if self.path == '/': + try: + media_type = random.choice(['Book', 'Movie', 'Song']) + item_id = random.randint(1, 3) + response = self.dynamodb_client.get_item( + TableName=parameters[table], + Key={'MediaType': {'S': media_type}, 'ItemId': {'N': str(item_id)}}) + payload = response['Item'] + except ClientError as err: + print(f"Recommendation service error: {err}") + if parameters[failure_response] == 'static': + payload = { + "MediaType": {"S": "Book"}, + "ItemId":{"N": "0"}, + "Title": {"S": "404 Not Found: A Coloring Book"}, + "Creator": {"S": "The Oatmeal"}} + else: + raise err + + payload['Metadata'] = { + 'InstanceId': ec2_metadata.instance_id, + 'AvailabilityZone': ec2_metadata.availability_zone} + self._respond(200, payload) + elif self.path == '/healthcheck': + response_code = 200 + success = True + if parameters[health_check] == 'deep': + try: + response = self.dynamodb_client.describe_table(TableName=parameters[table]) + if response['Table']['TableStatus'] == 'ACTIVE': + response_code = 200 + success = True + else: + response_code = 503 + success = False + except ClientError as err: + print(f"Recommendation service health check error: {err}") + response_code = 503 + success = False + self._respond(response_code, {'success': success}) + + +def run(): + """ + Runs a web server that listens for HTTP requests on the specified port. + To simplify the example, the web server is run as the root user and uses a simple + Python web server that is intended only for development and testing. + """ + parser = argparse.ArgumentParser() + parser.add_argument('port', default=80, type=int, help="The port where the HTTP server listens.") + parser.add_argument('region', default='us-west-2', help="The AWS Region of AWS resources used by this example.") + args = parser.parse_args() + + server_port = args.port + server_ip = '0.0.0.0' + + print('Starting server...') + server_address = (server_ip, server_port) + + dynamodb_client = boto3.client('dynamodb', region_name=args.region) + ssm_client = boto3.client('ssm', region_name=args.region) + handler = partial(RequestHandler, dynamodb_client, ssm_client) + httpd = HTTPServer(server_address, handler) + print('Running server...') + httpd.serve_forever() + + +if __name__ == "__main__": + run() diff --git a/python/cross_service/resilient_service/test/resources/server_startup_script.sh b/python/cross_service/resilient_service/test/resources/server_startup_script.sh new file mode 100644 index 00000000000..68db90cedb9 --- /dev/null +++ b/python/cross_service/resilient_service/test/resources/server_startup_script.sh @@ -0,0 +1,7 @@ +#!/bin/bash +yum -y update +sleep 30 # prevent "Error: Rpmdb changed underneath us" +yum install python-pip -y +python3 -m pip install boto3 ec2-metadata +wget -O server.py https://raw.githubusercontent.com/awsdocs/aws-doc-sdk-examples/main/workflows/resilient_service/resources/server.py +python3 server.py 80 us-west-2 diff --git a/python/cross_service/resilient_service/test/resources/ssm_only_policy.json b/python/cross_service/resilient_service/test/resources/ssm_only_policy.json new file mode 100644 index 00000000000..5da5891b218 --- /dev/null +++ b/python/cross_service/resilient_service/test/resources/ssm_only_policy.json @@ -0,0 +1,14 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ssm:GetParameters" + ], + "Resource": [ + "*" + ] + } + ] +} \ No newline at end of file diff --git a/python/cross_service/resilient_service/test/test_demo.py b/python/cross_service/resilient_service/test/test_demo.py new file mode 100644 index 00000000000..85c58f4423d --- /dev/null +++ b/python/cross_service/resilient_service/test/test_demo.py @@ -0,0 +1,137 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from datetime import datetime +import time +from unittest.mock import MagicMock +from botocore.exceptions import ClientError, WaiterError +from botocore.stub import ANY +import pytest +import requests + +from auto_scaler import AutoScalerError +from load_balancer import LoadBalancerError +from parameters import ParameterHelper, ParameterHelperError + + +class MockManager: + def __init__(self, stub_runner, scenario_data, input_mocker): + self.scenario_data = scenario_data + self.scenario_data.endpoint = 'test-endpoint' + self.scenario_data.bad_profile_name = f'{self.scenario_data.resource_prefix}-bc-prof' + self.scenario_data.bad_profile_arn = 'arn:aws:iam:us-west-2:123456789012:instance-profile/test-bad-profile' + self.scenario_data.association_id = 'test-association-id' + self.scenario_args = [] + self.scenario_out = {} + answers = ['1', '2', '3', '3', '3', '3', '3', '3', '3'] + input_mocker.mock_answers(answers) + self.stub_runner = stub_runner + + def setup_stubs(self, error, stop_on): + with self.stub_runner(error, stop_on) as runner: + runner.add(self.scenario_data.ssm.stubber.stub_put_parameter, ParameterHelper.table, self.scenario_data.table_name) + runner.add(self.scenario_data.ssm.stubber.stub_put_parameter, ParameterHelper.failure_response, 'none') + runner.add(self.scenario_data.ssm.stubber.stub_put_parameter, ParameterHelper.health_check, 'shallow') + runner.add(self.scenario_data.elb.stubber.stub_describe_load_balancers, [self.scenario_data.lb_name], [self.scenario_data.endpoint]) + runner.add(self.scenario_data.elb.stubber.stub_describe_target_groups, [self.scenario_data.tg_name], [self.scenario_data.tg_arn]) + runner.add( + self.scenario_data.elb.stubber.stub_describe_target_health, self.scenario_data.tg_arn, + [{'id': 'test-id', 'port': 80, 'state': 'unhealthy', 'reason': 'test reason', 'desc': 'test desc'}]) + runner.add(self.scenario_data.ssm.stubber.stub_put_parameter, ParameterHelper.table, 'this-is-not-a-table') + runner.add(self.scenario_data.ssm.stubber.stub_put_parameter, ParameterHelper.failure_response, 'static') + runner.add(self.scenario_data.ssm.stubber.stub_put_parameter, ParameterHelper.table, self.scenario_data.table_name) + runner.add( + self.scenario_data.iam.stubber.stub_create_policy, f'{self.scenario_data.resource_prefix}-bc-pol', + self.scenario_data.bad_policy_arn) + runner.add(self.scenario_data.iam.stubber.stub_create_role, self.scenario_data.bad_role_name) + runner.add( + self.scenario_data.iam.stubber.stub_attach_role_policy, self.scenario_data.bad_role_name, + self.scenario_data.bad_policy_arn) + runner.add(self.scenario_data.iam.stubber.stub_attach_role_policy, self.scenario_data.bad_role_name, ANY) + runner.add( + self.scenario_data.iam.stubber.stub_create_instance_profile, self.scenario_data.bad_profile_name, + self.scenario_data.bad_profile_arn) + runner.add( + self.scenario_data.iam.stubber.stub_get_instance_profile, self.scenario_data.bad_profile_name, + self.scenario_data.bad_profile_arn) + runner.add( + self.scenario_data.iam.stubber.stub_add_role_to_instance_profile, self.scenario_data.bad_profile_name, + self.scenario_data.bad_role_name) + runner.add( + self.scenario_data.auto_scaling.stubber.stub_describe_auto_scaling_groups, [self.scenario_data.asg_name], + [{ + 'AutoScalingGroupName': self.scenario_data.asg_name, + 'MinSize': 3, 'MaxSize': 3, 'DesiredCapacity': 0, 'DefaultCooldown': 0, + 'AvailabilityZones': ['test-zone'], 'HealthCheckType': 'EC2', + 'CreatedTime': datetime.now(), + 'Instances': [self.scenario_data.instance]}]) + runner.add( + self.scenario_data.ec2.stubber.stub_describe_iam_instance_profile_associations, + self.scenario_data.instance['InstanceId'], self.scenario_data.association_id) + runner.add( + self.scenario_data.ec2.stubber.stub_replace_iam_instance_profile_association, + self.scenario_data.bad_profile_name, self.scenario_data.association_id) + runner.add( + self.scenario_data.ec2.stubber.stub_reboot_instances, [self.scenario_data.instance['InstanceId']]) + runner.add(self.scenario_data.ssm.stubber.stub_describe_instance_information, [self.scenario_data.instance['InstanceId']]) + runner.add(self.scenario_data.ssm.stubber.stub_send_command, [self.scenario_data.instance['InstanceId']], commands=ANY, timeout=None) + runner.add(self.scenario_data.ssm.stubber.stub_put_parameter, ParameterHelper.health_check, 'deep') + runner.add( + self.scenario_data.auto_scaling.stubber.stub_terminate_instance_in_auto_scaling_group, + self.scenario_data.instance['InstanceId'], False, None) + runner.add(self.scenario_data.ssm.stubber.stub_put_parameter, ParameterHelper.table, 'this-is-not-a-table') + runner.add(self.scenario_data.ssm.stubber.stub_put_parameter, ParameterHelper.table, self.scenario_data.table_name) + runner.add(self.scenario_data.ssm.stubber.stub_put_parameter, ParameterHelper.failure_response, 'none') + runner.add(self.scenario_data.ssm.stubber.stub_put_parameter, ParameterHelper.health_check, 'shallow') + +@pytest.fixture +def mock_mgr(stub_runner, scenario_data, input_mocker): + return MockManager(stub_runner, scenario_data, input_mocker) + + +def test_demo(mock_mgr, monkeypatch): + monkeypatch.setattr(time, 'sleep', lambda x: None) + monkeypatch.setattr(requests, 'get', lambda x: MagicMock(status_code=200, text='test text')) + mock_mgr.setup_stubs(None, None) + + mock_mgr.scenario_data.scenario.demo() + + +@pytest.mark.parametrize('error, stub_name, stop_on_index', [ + (ParameterHelperError, 'stub_put_parameter', 0), + (ParameterHelperError, 'stub_put_parameter', 1), + (ParameterHelperError, 'stub_put_parameter', 2), + (LoadBalancerError, 'stub_describe_load_balancers', 3), + (LoadBalancerError, 'stub_describe_target_groups', 4), + (LoadBalancerError, 'stub_describe_target_health', 5), + (ParameterHelperError, 'stub_put_parameter', 6), + (ParameterHelperError, 'stub_put_parameter', 7), + (ParameterHelperError, 'stub_put_parameter', 8), + (AutoScalerError, 'stub_create_policy', 9), + (AutoScalerError, 'stub_create_role', 10), + (AutoScalerError, 'stub_attach_role_policy', 11), + (AutoScalerError, 'stub_attach_role_policy', 12), + (AutoScalerError, 'stub_create_instance_profile', 13), + (WaiterError, 'stub_get_instance_profile', 14), + (AutoScalerError, 'stub_add_role_to_instance_profile', 15), + (AutoScalerError, 'stub_describe_auto_scaling_groups', 16), + (AutoScalerError, 'stub_describe_iam_instance_profile_associations', 17), + (AutoScalerError, 'stub_replace_iam_instance_profile_association', 18), + (AutoScalerError, 'stub_reboot_instances', 19), + (AutoScalerError, 'stub_describe_instance_information', 20), + (AutoScalerError, 'stub_send_command', 21), + (ParameterHelperError, 'stub_put_parameter', 22), + (AutoScalerError, 'stub_terminate_instance_in_auto_scaling_group', 23), + (ParameterHelperError, 'stub_put_parameter', 24), + (ParameterHelperError, 'stub_put_parameter', 25), + (ParameterHelperError, 'stub_put_parameter', 26), + (ParameterHelperError, 'stub_put_parameter', 27) +]) +def test_demo_error( + mock_mgr, caplog, error, stub_name, stop_on_index, monkeypatch): + monkeypatch.setattr(time, 'sleep', lambda x: None) + monkeypatch.setattr(requests, 'get', lambda x: MagicMock(status_code=200, text='test text')) + mock_mgr.setup_stubs(error, stop_on_index) + + with pytest.raises(error): + mock_mgr.scenario_data.scenario.demo() diff --git a/python/cross_service/resilient_service/test/test_deploy.py b/python/cross_service/resilient_service/test/test_deploy.py new file mode 100644 index 00000000000..a0ec2569371 --- /dev/null +++ b/python/cross_service/resilient_service/test/test_deploy.py @@ -0,0 +1,153 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import logging +import time +from unittest.mock import MagicMock +from botocore.exceptions import ClientError, WaiterError +from botocore.stub import ANY +import pytest +import requests + +from auto_scaler import AutoScalerError +from load_balancer import LoadBalancerError +from recommendation_service import RecommendationServiceError + +class MockManager: + def __init__(self, stub_runner, scenario_data, input_mocker): + self.scenario_data = scenario_data + self.scenario_data.profile_arn = 'arn:aws:iam:us-west-2:123456789012:instance-profile/test-profile' + self.scenario_data.ami_id = 'test-ami-id' + self.scenario_data.zones = ['test-zone-1', 'test-zone-2'] + self.scenario_data.vpc_id = 'test-vpc' + self.scenario_data.subnet_ids = ['subnet-test-id'] + self.scenario_data.lb_endpoint = 'test-endpoint' + self.scenario_data.sg_id = 'test-sg-id' + self.scenario_data.ip_address = 'test-address' + self.scenario_args = [] + self.scenario_out = {} + answers = ['', '', 'y', ''] + input_mocker.mock_answers(answers) + self.stub_runner = stub_runner + + def setup_stubs(self, error, stop_on): + with self.stub_runner(error, stop_on) as runner: + runner.add( + self.scenario_data.ddb.stubber.stub_create_table, + self.scenario_data.table_name, + [{'name': 'MediaType', 'type': 'S', 'key_type': 'HASH'}, + {'name': 'ItemId', 'type': 'N', 'key_type': 'RANGE'}], + {'read': 5, 'write': 5}) + runner.add(self.scenario_data.ddb.stubber.stub_describe_table, self.scenario_data.table_name) + runner.add(self.scenario_data.ddb.stubber.stub_batch_write_item, ANY) + runner.add( + self.scenario_data.iam.stubber.stub_create_policy, f'{self.scenario_data.resource_prefix}-pol', + self.scenario_data.policy_arn) + runner.add(self.scenario_data.iam.stubber.stub_create_role, self.scenario_data.role_name) + runner.add( + self.scenario_data.iam.stubber.stub_attach_role_policy, self.scenario_data.role_name, + self.scenario_data.policy_arn) + runner.add( + self.scenario_data.iam.stubber.stub_create_instance_profile, self.scenario_data.profile_name, + self.scenario_data.profile_arn) + runner.add( + self.scenario_data.iam.stubber.stub_get_instance_profile, self.scenario_data.profile_name, + self.scenario_data.profile_arn) + runner.add( + self.scenario_data.iam.stubber.stub_add_role_to_instance_profile, self.scenario_data.profile_name, + self.scenario_data.role_name) + runner.add(self.scenario_data.ssm.stubber.stub_get_parameter, self.scenario_data.ami_param, self.scenario_data.ami_id) + runner.add( + self.scenario_data.ec2.stubber.stub_create_launch_template, self.scenario_data.lt_name, self.scenario_data.inst_type, + self.scenario_data.ami_id, inst_profile=self.scenario_data.profile_name, user_data=ANY) + runner.add(self.scenario_data.ec2.stubber.stub_describe_availability_zones, self.scenario_data.zones) + runner.add( + self.scenario_data.auto_scaling.stubber.stub_create_auto_scaling_group, self.scenario_data.asg_name, self.scenario_data.zones, + self.scenario_data.lt_name, 3, 3) + runner.add( + self.scenario_data.ec2.stubber.stub_describe_vpcs, {self.scenario_data.vpc_id: True}, + vpc_filters=[{'Name': 'is-default', 'Values': ['true']}]) + runner.add( + self.scenario_data.ec2.stubber.stub_describe_subnets, self.scenario_data.vpc_id, self.scenario_data.zones, + self.scenario_data.subnet_ids) + runner.add( + self.scenario_data.elb.stubber.stub_create_target_group, self.scenario_data.tg_name, 'HTTP', 80, + self.scenario_data.vpc_id, + {'path': '/healthcheck', 'interval': 10, 'timeout': 5, 'thresh_healthy': 2, + 'thresh_unhealthy': 2}, self.scenario_data.tg_arn) + runner.add( + self.scenario_data.elb.stubber.stub_create_load_balancer, self.scenario_data.lb_name, self.scenario_data.subnet_ids, + 'HTTP', 80, self.scenario_data.lb_arn, self.scenario_data.lb_endpoint) + runner.add(self.scenario_data.elb.stubber.stub_describe_load_balancers, [self.scenario_data.lb_name]) + runner.add( + self.scenario_data.elb.stubber.stub_create_listener, self.scenario_data.lb_arn, 'HTTP', 80, self.scenario_data.tg_arn) + runner.add( + self.scenario_data.auto_scaling.stubber.stub_attach_load_balancer_target_groups, self.scenario_data.asg_name, + [self.scenario_data.tg_arn]) + runner.add( + self.scenario_data.ec2.stubber.stub_describe_security_groups, + [{'id': self.scenario_data.sg_id, 'group_name': 'default', + 'ip_permissions': [{'FromPort': 80, 'IpRanges': [{'CidrIp': 'test'}], 'PrefixListIds': []}]}], + self.scenario_data.vpc_id) + runner.add( + self.scenario_data.ec2.stubber.stub_authorize_security_group_ingress, self.scenario_data.sg_id, + cidr_ip=f'{self.scenario_data.ip_address}/32', port=80, ip_protocol='tcp') + + +@pytest.fixture +def mock_mgr(stub_runner, scenario_data, input_mocker): + return MockManager(stub_runner, scenario_data, input_mocker) + + +def test_deploy(mock_mgr, caplog, monkeypatch): + caplog.set_level(logging.INFO) + monkeypatch.setattr(time, 'sleep', lambda x: None) + monkeypatch.setattr(requests, 'get', lambda x: MagicMock(status_code=404, text=mock_mgr.scenario_data.ip_address)) + mock_mgr.setup_stubs(None, None) + + mock_mgr.scenario_data.scenario.deploy() + + assert len(caplog.records) > 0 + attrs = [attr for attr in dir(mock_mgr.scenario_data) + if not callable(getattr(mock_mgr.scenario_data, attr)) and not attr.startswith("__") + and attr not in [ + 'ami_param', 'lb_arn', 'lb_endpoint', 'profile_arn', 'tg_arn', 'vpc_id', 'bad_policy_arn', + 'bad_prof_name', 'bad_role_name']] + for attr in attrs: + val = getattr(mock_mgr.scenario_data, attr) + if isinstance(val, str): + assert any(val in rec for rec in caplog.messages), f"'{val}' not in log messages!" + + +@pytest.mark.parametrize('error, stub_name, stop_on_index', [ + (RecommendationServiceError, 'stub_create_table', 0), + (WaiterError, 'stub_describe_table', 1), + (RecommendationServiceError, 'stub_batch_write_item', 2), + (AutoScalerError, 'stub_create_policy', 3), + (AutoScalerError, 'stub_create_role', 4), + (AutoScalerError, 'stub_attach_role_policy', 5), + (AutoScalerError, 'stub_create_instance_profile', 6), + (WaiterError, 'stub_get_instance_profile', 7), + (AutoScalerError, 'stub_add_role_to_instance_profile', 8), + (AutoScalerError, 'stub_get_parameter', 9), + (AutoScalerError, 'stub_create_launch_template', 10), + (AutoScalerError, 'stub_describe_availability_zones', 11), + (AutoScalerError, 'stub_create_auto_scaling_group', 12), + (AutoScalerError, 'stub_describe_vpcs', 13), + (AutoScalerError, 'stub_describe_subnets', 14), + (LoadBalancerError, 'stub_create_target_group', 15), + (LoadBalancerError, 'stub_create_load_balancer', 16), + (WaiterError, 'stub_describe_load_balancers', 17), + (LoadBalancerError, 'stub_create_listener', 18), + (AutoScalerError, 'stub_attach_load_balancer_target_groups', 19), + (AutoScalerError, 'stub_describe_security_groups', 20), + (AutoScalerError, 'stub_authorize_security_group_ingress', 21), +]) +def test_deploy_error( + mock_mgr, capsys, monkeypatch, error, stub_name, stop_on_index): + monkeypatch.setattr(time, 'sleep', lambda x: None) + monkeypatch.setattr(requests, 'get', lambda x: MagicMock(status_code=404, text=mock_mgr.scenario_data.ip_address)) + mock_mgr.setup_stubs(error, stop_on_index) + + with pytest.raises(error): + mock_mgr.scenario_data.scenario.deploy() diff --git a/python/cross_service/resilient_service/test/test_destroy.py b/python/cross_service/resilient_service/test/test_destroy.py new file mode 100644 index 00000000000..4f77b1bfc69 --- /dev/null +++ b/python/cross_service/resilient_service/test/test_destroy.py @@ -0,0 +1,107 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from botocore.exceptions import ClientError, WaiterError +import pytest + +from auto_scaler import AutoScalerError +from load_balancer import LoadBalancerError +from recommendation_service import RecommendationServiceError + + +class MockManager: + def __init__(self, stub_runner, scenario_data, input_mocker): + self.scenario_data = scenario_data + self.scenario_args = [] + self.scenario_out = {} + answers = ['y'] + input_mocker.mock_answers(answers) + self.stub_runner = stub_runner + + def setup_stubs(self, error, stop_on): + with self.stub_runner(error, stop_on) as runner: + runner.add( + self.scenario_data.elb.stubber.stub_describe_load_balancers, [self.scenario_data.lb_name], arns=[self.scenario_data.lb_arn]) + runner.add(self.scenario_data.elb.stubber.stub_delete_load_balancer, self.scenario_data.lb_arn) + runner.add( + self.scenario_data.elb.stubber.stub_describe_load_balancers, [self.scenario_data.lb_name], error_code='LoadBalancerNotFound') + runner.add( + self.scenario_data.elb.stubber.stub_describe_target_groups, [self.scenario_data.tg_name], [self.scenario_data.tg_arn]) + runner.add(self.scenario_data.elb.stubber.stub_delete_target_group, self.scenario_data.tg_arn) + runner.add( + self.scenario_data.auto_scaling.stubber.stub_describe_auto_scaling_groups, [self.scenario_data.asg_name], [self.scenario_data.asg_group]) + runner.add(self.scenario_data.auto_scaling.stubber.stub_update_auto_scaling_group, self.scenario_data.asg_name, 0) + runner.add( + self.scenario_data.auto_scaling.stubber.stub_terminate_instance_in_auto_scaling_group, + self.scenario_data.instance['InstanceId'], True, None) + runner.add(self.scenario_data.auto_scaling.stubber.stub_delete_auto_scaling_group, self.scenario_data.asg_name) + runner.add(self.scenario_data.ec2.stubber.stub_delete_launch_template, self.scenario_data.lt_name) + runner.add( + self.scenario_data.iam.stubber.stub_remove_role_from_instance_profile, + self.scenario_data.profile_name, self.scenario_data.role_name) + runner.add(self.scenario_data.iam.stubber.stub_delete_instance_profile, self.scenario_data.profile_name) + runner.add( + self.scenario_data.iam.stubber.stub_list_attached_role_policies, self.scenario_data.role_name, + {'1': self.scenario_data.policy_arn}) + runner.add( + self.scenario_data.iam.stubber.stub_detach_role_policy, self.scenario_data.role_name, self.scenario_data.policy_arn) + runner.add(self.scenario_data.iam.stubber.stub_delete_policy, self.scenario_data.policy_arn) + runner.add(self.scenario_data.iam.stubber.stub_delete_role, self.scenario_data.role_name) + runner.add( + self.scenario_data.iam.stubber.stub_remove_role_from_instance_profile, + self.scenario_data.bad_prof_name, self.scenario_data.bad_role_name) + runner.add(self.scenario_data.iam.stubber.stub_delete_instance_profile, self.scenario_data.bad_prof_name) + runner.add( + self.scenario_data.iam.stubber.stub_list_attached_role_policies, self.scenario_data.bad_role_name, + {'1': self.scenario_data.bad_policy_arn}) + runner.add( + self.scenario_data.iam.stubber.stub_detach_role_policy, self.scenario_data.bad_role_name, self.scenario_data.bad_policy_arn) + runner.add(self.scenario_data.iam.stubber.stub_delete_policy, self.scenario_data.bad_policy_arn) + runner.add(self.scenario_data.iam.stubber.stub_delete_role, self.scenario_data.bad_role_name) + runner.add(self.scenario_data.ddb.stubber.stub_delete_table, self.scenario_data.table_name) + runner.add(self.scenario_data.ddb.stubber.stub_describe_table, self.scenario_data.table_name, error_code='ResourceNotFoundException') + + +@pytest.fixture +def mock_mgr(stub_runner, scenario_data, input_mocker): + return MockManager(stub_runner, scenario_data, input_mocker) + + +def test_destroy(mock_mgr, capsys): + mock_mgr.setup_stubs(None, None) + + mock_mgr.scenario_data.scenario.destroy() + + +@pytest.mark.parametrize('error, stub_name, stop_on_index', [ + (LoadBalancerError, 'stub_describe_load_balancers', 0), + (LoadBalancerError, 'stub_delete_load_balancer', 1), + (WaiterError, 'stub_describe_load_balancers', 2), + (LoadBalancerError, 'stub_describe_target_groups', 3), + (LoadBalancerError, 'stub_delete_target_group', 4), + (AutoScalerError, 'stub_describe_auto_scaling_groups', 5), + (AutoScalerError, 'stub_update_auto_scaling_group', 6), + (AutoScalerError, 'stub_terminate_instance_in_auto_scaling_group', 7), + (AutoScalerError, 'stub_delete_auto_scaling_group', 8), + (AutoScalerError, 'stub_delete_launch_template', 9), + (AutoScalerError, 'stub_remove_role_from_instance_profile', 10), + (AutoScalerError, 'stub_delete_instance_profile', 11), + (AutoScalerError, 'stub_list_attached_role_policies', 12), + (AutoScalerError, 'stub_detach_role_policy', 13), + (AutoScalerError, 'stub_delete_policy', 14), + (AutoScalerError, 'stub_delete_role', 15), + (AutoScalerError, 'stub_remove_role_from_instance_profile', 16), + (AutoScalerError, 'stub_delete_instance_profile', 17), + (AutoScalerError, 'stub_list_attached_role_policies', 18), + (AutoScalerError, 'stub_detach_role_policy', 19), + (AutoScalerError, 'stub_delete_policy', 20), + (AutoScalerError, 'stub_delete_role', 21), + (RecommendationServiceError, 'stub_delete_table', 22), + (WaiterError, 'stub_describe_table', 23) +]) +def test_destroy_error( + mock_mgr, caplog, error, stub_name, stop_on_index): + mock_mgr.setup_stubs(error, stop_on_index) + + with pytest.raises(error): + mock_mgr.scenario_data.scenario.destroy() diff --git a/python/cross_service/resilient_service/test/test_runner_integ.py b/python/cross_service/resilient_service/test/test_runner_integ.py new file mode 100644 index 00000000000..0f31312dbc4 --- /dev/null +++ b/python/cross_service/resilient_service/test/test_runner_integ.py @@ -0,0 +1,35 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import logging + +import pytest + +from auto_scaler import AutoScaler +from load_balancer import LoadBalancer +from parameters import ParameterHelper +from recommendation_service import RecommendationService +import runner + +@pytest.mark.integ +def test_runner_integ(input_mocker, caplog): + caplog.set_level(logging.INFO) + prefix = 'doc-example-test-resilience' + table_name = 'doc-example-test-recommendation-service' + recommendation = RecommendationService.from_client(table_name) + autoscaler = AutoScaler.from_client(prefix) + loadbalancer = LoadBalancer.from_client(prefix) + param_helper = ParameterHelper.from_client(recommendation.table_name) + scenario = runner.Runner('test/resources',recommendation, autoscaler, loadbalancer, param_helper) + + input_mocker.mock_answers([ + '', '', 'y', '', # deploy + '1', '2', '3', '3', '3', '3', '3', '3', '3', # demo + 'y', # destroy + ]) + + scenario.deploy() + scenario.demo() + scenario.destroy() + + assert f"Table {table_name} deleted." in caplog.text diff --git a/python/example_code/auto-scaling/README.md b/python/example_code/auto-scaling/README.md index 4fc93eb63b3..578b9e9f7e7 100644 --- a/python/example_code/auto-scaling/README.md +++ b/python/example_code/auto-scaling/README.md @@ -1,4 +1,4 @@ - + # Auto Scaling code examples for the SDK for Python ## Overview @@ -12,7 +12,7 @@ Shows how to use the AWS SDK for Python (Boto3) to work with Amazon EC2 Auto Sca ## ⚠ Important -* Running this code might result in charges to your AWS account. +* Running this code might result in charges to your AWS account. For more details, see [AWS Pricing](https://aws.amazon.com/pricing/?aws-products-pricing.sort-by=item.additionalFields.productNameLowercase&aws-products-pricing.sort-order=asc&awsf.Free%20Tier%20Type=*all&awsf.tech-category=*all) and [Free Tier](https://aws.amazon.com/free/?all-free-tier.sort-by=item.additionalFields.SortRank&all-free-tier.sort-order=asc&awsf.Free%20Tier%20Types=*all&awsf.Free%20Tier%20Categories=*all). * Running the tests might result in charges to your AWS account. * We recommend that you grant your code least privilege. At most, grant only the minimum permissions required to perform the task. For more information, see [Grant least privilege](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html#grant-least-privilege). * This code is not tested in every AWS Region. For more information, see [AWS Regional Services](https://aws.amazon.com/about-aws/global-infrastructure/regional-product-services). @@ -35,12 +35,18 @@ python -m pip install -r requirements.txt + +### Get started + +* [Hello Amazon EC2 Auto Scaling](hello.py#L4) (`DescribeAutoScalingGroups`) + ### Single actions Code excerpts that show you how to call individual service functions. +* [Attach an ELB target group to an Auto Scaling group](../../cross_service/resilient_service/auto_scaler.py#L382) (`AttachLoadBalancerTargetGroups`) * [Create a group](action_wrapper.py#L28) (`CreateAutoScalingGroup`) -* [Delete a group](action_wrapper.py#L75) (`DeleteAutoScalingGroup`) +* [Delete a group](../../cross_service/resilient_service/auto_scaler.py#L403) (`DeleteAutoScalingGroup`) * [Disable metrics collection for a group](action_wrapper.py#L219) (`DisableMetricsCollection`) * [Enable metrics collection for a group](action_wrapper.py#L201) (`EnableMetricsCollection`) * [Get information about groups](action_wrapper.py#L93) (`DescribeAutoScalingGroups`) @@ -55,7 +61,8 @@ Code excerpts that show you how to call individual service functions. Code examples that show you how to accomplish a specific task by calling multiple functions within the same service. -* [Manage groups and instances](scenario_groups_and_instances.py) +* [Build and manage a resilient service](../../cross_service/resilient_service/runner.py) +* [Manage groups and instances](scenario_groups_and_instances.py) ## Run the examples @@ -65,7 +72,40 @@ functions within the same service. +#### Hello Amazon EC2 Auto Scaling + +This example shows you how to get started using Amazon EC2 Auto Scaling. + +``` +python hello.py +``` + + +#### Build and manage a resilient service + +This example shows you how to create a load-balanced web service that returns book, movie, and song recommendations. The example shows how the service responds to failures, and how to restructure the service for more resilience when failures occur. +* Use an Amazon EC2 Auto Scaling group to create Amazon Elastic Compute Cloud (Amazon EC2) instances based on a launch template and to keep the number of instances in a specified range. +* Handle and distribute HTTP requests with Elastic Load Balancing. +* Monitor the health of instances in an Auto Scaling group and forward requests only to healthy instances. +* Run a Python web server on each EC2 instance to handle HTTP requests. The web server responds with recommendations and health checks. +* Simulate a recommendation service with an Amazon DynamoDB table. +* Control web server response to requests and health checks by updating AWS Systems Manager parameters. + + + + +Start the example by running the following at a command prompt: + +``` +python ../../cross_service/resilient_service/runner.py +``` + + + +Complete details and instructions on how to run this example can be found in the +[README](../../cross_service/resilient_service/README.md) for the example. + #### Manage groups and instances @@ -87,6 +127,7 @@ Start the example by running the following at a command prompt: python scenario_groups_and_instances.py ``` + diff --git a/python/example_code/auto-scaling/hello.py b/python/example_code/auto-scaling/hello.py new file mode 100644 index 00000000000..875b4c7d3af --- /dev/null +++ b/python/example_code/auto-scaling/hello.py @@ -0,0 +1,27 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +# snippet-start:[python.example_code.auto-scaling.Hello] +import boto3 + +def hello_autoscaling(autoscaling_client): + """ + Use the AWS SDK for Python (Boto3) to create an Amazon EC2 Auto Scaling client and list + some of the Auto Scaling groups in your account. + This example uses the default settings specified in your shared credentials + and config files. + + :param auto-scaling_client: A Boto3 Amazon EC2 Auto Scaling client object. + """ + print("Hello, Amazon EC2 Auto Scaling! Let's list up to ten of you Auto Scaling groups:") + response = autoscaling_client.describe_auto_scaling_groups() + groups = response.get('AutoScalingGroups', []) + if groups: + for group in groups: + print(f"\t{group['AutoScalingGroupName']}: {group['AvailabilityZones']}") + else: + print("There are no Auto Scaling groups in your account.") + +if __name__ == '__main__': + hello_autoscaling(boto3.client('autoscaling')) +# snippet-end:[python.example_code.auto-scaling.Hello] diff --git a/python/example_code/ec2/README.md b/python/example_code/ec2/README.md index cc271e03432..84cbb618869 100644 --- a/python/example_code/ec2/README.md +++ b/python/example_code/ec2/README.md @@ -1,4 +1,4 @@ - + # Amazon EC2 code examples for the SDK for Python ## Overview @@ -12,7 +12,7 @@ Shows how to use the AWS SDK for Python (Boto3) to work with Amazon Elastic Comp ## ⚠ Important -* Running this code might result in charges to your AWS account. +* Running this code might result in charges to your AWS account. For more details, see [AWS Pricing](https://aws.amazon.com/pricing/?aws-products-pricing.sort-by=item.additionalFields.productNameLowercase&aws-products-pricing.sort-order=asc&awsf.Free%20Tier%20Type=*all&awsf.tech-category=*all) and [Free Tier](https://aws.amazon.com/free/?all-free-tier.sort-by=item.additionalFields.SortRank&all-free-tier.sort-order=asc&awsf.Free%20Tier%20Types=*all&awsf.Free%20Tier%20Categories=*all). * Running the tests might result in charges to your AWS account. * We recommend that you grant your code least privilege. At most, grant only the minimum permissions required to perform the task. For more information, see [Grant least privilege](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html#grant-least-privilege). * This code is not tested in every AWS Region. For more information, see [AWS Regional Services](https://aws.amazon.com/about-aws/global-infrastructure/regional-product-services). @@ -46,18 +46,26 @@ Code excerpts that show you how to call individual service functions. * [Allocate an Elastic IP address](elastic_ip.py#L32) (`AllocateAddress`) * [Associate an Elastic IP address with an instance](elastic_ip.py#L54) (`AssociateAddress`) +* [Create a launch template](../../cross_service/resilient_service/auto_scaler.py#L243) (`CreateLaunchTemplate`) * [Create a security group](security_group.py#L33) (`CreateSecurityGroup`) * [Create a security key pair](key_pair.py#L38) (`CreateKeyPair`) * [Create and run an instance](instance.py#L32) (`RunInstances`) +* [Delete a launch template](../../cross_service/resilient_service/auto_scaler.py#L286) (`DeleteLaunchTemplate`) * [Delete a security group](security_group.py#L108) (`DeleteSecurityGroup`) * [Delete a security key pair](key_pair.py#L80) (`DeleteKeyPair`) +* [Describe Availability Zones](../../cross_service/resilient_service/auto_scaler.py#L303) (`DescribeAvailabilityZones`) * [Describe instances](instance.py#L74) (`DescribeInstances`) * [Disassociate an Elastic IP address from an instance](elastic_ip.py#L80) (`DisassociateAddress`) * [Get data about Amazon Machine Images](instance.py#L169) (`DescribeImages`) * [Get data about a security group](security_group.py#L85) (`DescribeSecurityGroups`) * [Get data about instance types](instance.py#L188) (`DescribeInstanceTypes`) +* [Get data about the instance profile associated with an instance](../../cross_service/resilient_service/auto_scaler.py#L150) (`DescribeIamInstanceProfileAssociations`) +* [Get the default VPC](../../cross_service/resilient_service/auto_scaler.py#L461) (`DescribeVpcs`) +* [Get the default subnets for a VPC](../../cross_service/resilient_service/auto_scaler.py#L541) (`DescribeSubnets`) * [List security key pairs](key_pair.py#L62) (`DescribeKeyPairs`) +* [Reboot an instance](../../cross_service/resilient_service/auto_scaler.py#L19) (`RebootInstances`) * [Release an Elastic IP address](elastic_ip.py#L100) (`ReleaseAddress`) +* [Replace the instance profile associated with an instance](../../cross_service/resilient_service/auto_scaler.py#L168) (`ReplaceIamInstanceProfileAssociation`) * [Set inbound rules for a security group](security_group.py#L55) (`AuthorizeSecurityGroupIngress`) * [Start an instance](instance.py#L123) (`StartInstances`) * [Stop an instance](instance.py#L146) (`StopInstances`) @@ -68,7 +76,8 @@ Code excerpts that show you how to call individual service functions. Code examples that show you how to accomplish a specific task by calling multiple functions within the same service. -* [Get started with instances](scenario_get_started_instances.py) +* [Build and manage a resilient service](../../cross_service/resilient_service/runner.py) +* [Get started with instances](scenario_get_started_instances.py) ## Run the examples @@ -87,6 +96,32 @@ python hello.py ``` +#### Build and manage a resilient service + +This example shows you how to create a load-balanced web service that returns book, movie, and song recommendations. The example shows how the service responds to failures, and how to restructure the service for more resilience when failures occur. + +* Use an Amazon EC2 Auto Scaling group to create Amazon Elastic Compute Cloud (Amazon EC2) instances based on a launch template and to keep the number of instances in a specified range. +* Handle and distribute HTTP requests with Elastic Load Balancing. +* Monitor the health of instances in an Auto Scaling group and forward requests only to healthy instances. +* Run a Python web server on each EC2 instance to handle HTTP requests. The web server responds with recommendations and health checks. +* Simulate a recommendation service with an Amazon DynamoDB table. +* Control web server response to requests and health checks by updating AWS Systems Manager parameters. + + + + +Start the example by running the following at a command prompt: + +``` +python ../../cross_service/resilient_service/runner.py +``` + + + +Complete details and instructions on how to run this example can be found in the +[README](../../cross_service/resilient_service/README.md) for the example. + + #### Get started with instances This example shows you how to do the following: @@ -106,6 +141,7 @@ Start the example by running the following at a command prompt: python scenario_get_started_instances.py ``` + diff --git a/python/example_code/elastic-load-balancing/README.md b/python/example_code/elastic-load-balancing/README.md new file mode 100644 index 00000000000..0098ad5b4ab --- /dev/null +++ b/python/example_code/elastic-load-balancing/README.md @@ -0,0 +1,135 @@ + +# ELB code examples for the SDK for Python + +## Overview + +Shows how to use the AWS SDK for Python (Boto3) to work with Elastic Load Balancing (ELB). + + +Most of the example code for Elastic Load Balancing can be found in the +[python/cross_service/resilient_service](../../cross_service/resilient_service) folder, +which contains the [Build and manage a resilient service](../../cross_service/resilient_service/README.md) +scenario. + + +*ELB automatically distributes your incoming traffic across multiple targets, such as EC2 instances, containers, and IP addresses, in one or more Availability Zones.* + +## ⚠ Important + +* Running this code might result in charges to your AWS account. For more details, see [AWS Pricing](https://aws.amazon.com/pricing/?aws-products-pricing.sort-by=item.additionalFields.productNameLowercase&aws-products-pricing.sort-order=asc&awsf.Free%20Tier%20Type=*all&awsf.tech-category=*all) and [Free Tier](https://aws.amazon.com/free/?all-free-tier.sort-by=item.additionalFields.SortRank&all-free-tier.sort-order=asc&awsf.Free%20Tier%20Types=*all&awsf.Free%20Tier%20Categories=*all). +* Running the tests might result in charges to your AWS account. +* We recommend that you grant your code least privilege. At most, grant only the minimum permissions required to perform the task. For more information, see [Grant least privilege](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html#grant-least-privilege). +* This code is not tested in every AWS Region. For more information, see [AWS Regional Services](https://aws.amazon.com/about-aws/global-infrastructure/regional-product-services). + + + + +## Code examples + +### Prerequisites + +For prerequisites, see the [README](../../README.md#Prerequisites) in the `python` folder. + +Install the packages required by these examples by running the following in a virtual environment: + +``` +python -m pip install -r requirements.txt +``` + + + + + +### Get started + +* [Hello ELB](hello.py#L4) (`DescribeLoadBalancers`) + +### Single actions + +Code excerpts that show you how to call individual service functions. + +* [Create a listener for a load balancer](../../cross_service/resilient_service/load_balancer.py#L121) (`CreateListener`) +* [Create a target group](../../cross_service/resilient_service/load_balancer.py#L64) (`CreateTargetGroup`) +* [Create an Application Load Balancer](../../cross_service/resilient_service/load_balancer.py#L120) (`CreateLoadBalancer`) +* [Delete a load balancer](../../cross_service/resilient_service/load_balancer.py#L159) (`DeleteLoadBalancer`) +* [Delete a target group](../../cross_service/resilient_service/load_balancer.py#L95) (`DeleteTargetGroup`) +* [Get the endpoint of a load balancer](../../cross_service/resilient_service/load_balancer.py#L47) (`DescribeLoadBalancers`) +* [Get the health of a target group](../../cross_service/resilient_service/load_balancer.py#L200) (`DescribeTargetHealth`) + +### Scenarios + +Code examples that show you how to accomplish a specific task by calling multiple +functions within the same service. + +* [Build and manage a resilient service](../../cross_service/resilient_service/runner.py) + +## Run the examples + +### Instructions + + + + + +#### Hello ELB + +This example shows you how to get started using ELB. + +``` +python hello.py +``` + + +#### Build and manage a resilient service + +This example shows you how to create a load-balanced web service that returns book, movie, and song recommendations. The example shows how the service responds to failures, and how to restructure the service for more resilience when failures occur. + +* Use an Amazon EC2 Auto Scaling group to create Amazon Elastic Compute Cloud (Amazon EC2) instances based on a launch template and to keep the number of instances in a specified range. +* Handle and distribute HTTP requests with Elastic Load Balancing. +* Monitor the health of instances in an Auto Scaling group and forward requests only to healthy instances. +* Run a Python web server on each EC2 instance to handle HTTP requests. The web server responds with recommendations and health checks. +* Simulate a recommendation service with an Amazon DynamoDB table. +* Control web server response to requests and health checks by updating AWS Systems Manager parameters. + + + + +Start the example by running the following at a command prompt: + +``` +python ../../cross_service/resilient_service/runner.py +``` + + + +Complete details and instructions on how to run this example can be found in the +[README](../../cross_service/resilient_service/README.md) for the example. + + +### Tests + +⚠ Running tests might result in charges to your AWS account. + + +To find instructions for running these tests, see the [README](../../README.md#Tests) +in the `python` folder. + + + + + + +## Additional resources + +* [ELB User Guide](https://docs.aws.amazon.com/elasticloadbalancing/latest/userguide/what-is-load-balancing.html) +* [ELB API Reference](https://docs.aws.amazon.com/elasticloadbalancing/latest/APIReference/Welcome.html) +* [SDK for Python ELB reference](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/elbv2.html) + + + + +--- + +Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + +SPDX-License-Identifier: Apache-2.0 \ No newline at end of file diff --git a/python/example_code/elastic-load-balancing/hello.py b/python/example_code/elastic-load-balancing/hello.py new file mode 100644 index 00000000000..9e712ffe53b --- /dev/null +++ b/python/example_code/elastic-load-balancing/hello.py @@ -0,0 +1,26 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +# snippet-start:[python.example_code.elbv2.Hello] +import boto3 + +def hello_elbv2(elbv2_client): + """ + Use the AWS SDK for Python (Boto3) to create an Elastic Load Balancing V2 client and list + up to ten of the load balancers for your account. + This example uses the default settings specified in your shared credentials + and config files. + + :param elbv2_client: A Boto3 Elastic Load Balancing V2 client object. + """ + print("Hello, Elastic Load Balancing! Let's list some of your load balancers:") + load_balancers = elbv2_client.describe_load_balancers(PageSize=10).get('LoadBalancers', []) + if load_balancers: + for lb in load_balancers: + print(f"\t{lb['LoadBalancerName']}: {lb['DNSName']}") + else: + print("Your account doesn't have any load balancers.") + +if __name__ == '__main__': + hello_elbv2(boto3.client('elbv2')) +# snippet-end:[python.example_code.elbv2.Hello] diff --git a/python/example_code/elastic-load-balancing/requirements.txt b/python/example_code/elastic-load-balancing/requirements.txt new file mode 100644 index 00000000000..621e276912d --- /dev/null +++ b/python/example_code/elastic-load-balancing/requirements.txt @@ -0,0 +1,2 @@ +boto3>=1.26.79 +pytest>=7.2.1 diff --git a/python/example_code/iam/README.md b/python/example_code/iam/README.md index 042e61064f5..82c79f793ba 100644 --- a/python/example_code/iam/README.md +++ b/python/example_code/iam/README.md @@ -1,4 +1,4 @@ - + # IAM code examples for the SDK for Python ## Overview @@ -12,7 +12,7 @@ Shows how to use the AWS SDK for Python (Boto3) to work with AWS Identity and Ac ## ⚠ Important -* Running this code might result in charges to your AWS account. +* Running this code might result in charges to your AWS account. For more details, see [AWS Pricing](https://aws.amazon.com/pricing/?aws-products-pricing.sort-by=item.additionalFields.productNameLowercase&aws-products-pricing.sort-order=asc&awsf.Free%20Tier%20Type=*all&awsf.tech-category=*all) and [Free Tier](https://aws.amazon.com/free/?all-free-tier.sort-by=item.additionalFields.SortRank&all-free-tier.sort-order=asc&awsf.Free%20Tier%20Types=*all&awsf.Free%20Tier%20Categories=*all). * Running the tests might result in charges to your AWS account. * We recommend that you grant your code least privilege. At most, grant only the minimum permissions required to perform the task. For more information, see [Grant least privilege](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html#grant-least-privilege). * This code is not tested in every AWS Region. For more information, see [AWS Regional Services](https://aws.amazon.com/about-aws/global-infrastructure/regional-product-services). @@ -48,11 +48,13 @@ Code excerpts that show you how to call individual service functions. * [Create a user](user_wrapper.py#L25) (`CreateUser`) * [Create an access key](access_key_wrapper.py#L21) (`CreateAccessKey`) * [Create an alias for an account](account_wrapper.py#L23) (`CreateAccountAlias`) +* [Create an instance profile](../../cross_service/resilient_service/auto_scaler.py#L68) (`CreateInstanceProfile`) * [Delete a policy](policy_wrapper.py#L63) (`DeletePolicy`) * [Delete a role](role_wrapper.py#L94) (`DeleteRole`) * [Delete a user](user_wrapper.py#L44) (`DeleteUser`) * [Delete an access key](access_key_wrapper.py#L43) (`DeleteAccessKey`) * [Delete an account alias](account_wrapper.py#L42) (`DeleteAccountAlias`) +* [Delete an instance profile](../../cross_service/resilient_service/auto_scaler.py#L210) (`DeleteInstanceProfile`) * [Detach a policy from a role](policy_wrapper.py#L229) (`DetachRolePolicy`) * [Detach a policy from a user](user_wrapper.py#L116) (`DetachUserPolicy`) * [Generate a credential report](account_wrapper.py#L121) (`GenerateCredentialReport`) @@ -81,13 +83,14 @@ Code excerpts that show you how to call individual service functions. Code examples that show you how to accomplish a specific task by calling multiple functions within the same service. -* [Create a user and assume a role](scenario_create_user_assume_role.py) -* [Create read-only and read-write users](user_wrapper.py) -* [Manage access keys](access_key_wrapper.py) -* [Manage policies](policy_wrapper.py) -* [Manage roles](role_wrapper.py) -* [Manage your account](account_wrapper.py) -* [Roll back a policy version](policy_wrapper.py) +* [Build and manage a resilient service](../../cross_service/resilient_service/runner.py) +* [Create a user and assume a role](scenario_create_user_assume_role.py) +* [Create read-only and read-write users](user_wrapper.py) +* [Manage access keys](access_key_wrapper.py) +* [Manage policies](policy_wrapper.py) +* [Manage roles](role_wrapper.py) +* [Manage your account](account_wrapper.py) +* [Roll back a policy version](policy_wrapper.py) ## Run the examples @@ -99,6 +102,32 @@ functions within the same service. +#### Build and manage a resilient service + +This example shows you how to create a load-balanced web service that returns book, movie, and song recommendations. The example shows how the service responds to failures, and how to restructure the service for more resilience when failures occur. + +* Use an Amazon EC2 Auto Scaling group to create Amazon Elastic Compute Cloud (Amazon EC2) instances based on a launch template and to keep the number of instances in a specified range. +* Handle and distribute HTTP requests with Elastic Load Balancing. +* Monitor the health of instances in an Auto Scaling group and forward requests only to healthy instances. +* Run a Python web server on each EC2 instance to handle HTTP requests. The web server responds with recommendations and health checks. +* Simulate a recommendation service with an Amazon DynamoDB table. +* Control web server response to requests and health checks by updating AWS Systems Manager parameters. + + + + +Start the example by running the following at a command prompt: + +``` +python ../../cross_service/resilient_service/runner.py +``` + + + +Complete details and instructions on how to run this example can be found in the +[README](../../cross_service/resilient_service/README.md) for the example. + + #### Create a user and assume a role This example shows you how to create a user and assume a role. @@ -117,6 +146,7 @@ Start the example by running the following at a command prompt: python scenario_create_user_assume_role.py ``` + @@ -138,6 +168,7 @@ Start the example by running the following at a command prompt: python user_wrapper.py ``` + @@ -158,6 +189,7 @@ Start the example by running the following at a command prompt: python access_key_wrapper.py ``` + @@ -179,6 +211,7 @@ Start the example by running the following at a command prompt: python policy_wrapper.py ``` + @@ -199,6 +232,7 @@ Start the example by running the following at a command prompt: python role_wrapper.py ``` + @@ -220,6 +254,7 @@ Start the example by running the following at a command prompt: python account_wrapper.py ``` + @@ -241,6 +276,7 @@ Start the example by running the following at a command prompt: python policy_wrapper.py ``` + diff --git a/python/example_code/s3/README.md b/python/example_code/s3/README.md index ce3d58d157c..eeadc27e215 100644 --- a/python/example_code/s3/README.md +++ b/python/example_code/s3/README.md @@ -1,4 +1,4 @@ - + # Amazon S3 code examples for the SDK for Python ## Overview @@ -63,7 +63,7 @@ Code excerpts that show you how to call individual service functions. * [Get the lifecycle configuration of a bucket](s3_basics/bucket_wrapper.py#L281) (`GetBucketLifecycleConfiguration`) * [Get the policy for a bucket](s3_basics/bucket_wrapper.py#L230) (`GetBucketPolicy`) * [List buckets](s3_basics/bucket_wrapper.py#L80) (`ListBuckets`) -* [List objects in a bucket](s3_basics/object_wrapper.py#L87) (`ListObjects`) +* [List objects in a bucket](s3_basics/object_wrapper.py#L21) (`ListObjectsV2`) * [Set a new ACL for a bucket](s3_basics/bucket_wrapper.py#L115) (`PutBucketAcl`) * [Set the ACL of an object](s3_basics/object_wrapper.py#L207) (`PutObjectAcl`) * [Upload an object to a bucket](s3_basics/object_wrapper.py#L33) (`PutObject`) @@ -118,6 +118,7 @@ Start the example by running the following at a command prompt: python s3_basics/presigned_url.py ``` + @@ -140,6 +141,7 @@ Start the example by running the following at a command prompt: python s3_basics/scenario_getting_started.py ``` + @@ -157,6 +159,7 @@ Start the example by running the following at a command prompt: python s3_versioning/batch_versioning.py ``` + @@ -174,6 +177,7 @@ Start the example by running the following at a command prompt: python file_transfer/file_transfer.py ``` + @@ -196,6 +200,7 @@ Start the example by running the following at a command prompt: python s3_versioning/versioning.py ``` + diff --git a/python/test_tools/autoscaling_stubber.py b/python/test_tools/autoscaling_stubber.py index 3a9448d1f61..38e2d60d69c 100644 --- a/python/test_tools/autoscaling_stubber.py +++ b/python/test_tools/autoscaling_stubber.py @@ -59,7 +59,9 @@ def stub_describe_auto_scaling_groups(self, group_names, groups, error_code=None def stub_terminate_instance_in_auto_scaling_group(self, instance_id, decrement, activity, error_code=None): expected_params = {'InstanceId': instance_id, 'ShouldDecrementDesiredCapacity': decrement} - response = {'Activity': activity} + response = {} + if activity is not None: + response['Activity'] = activity self._stub_bifurcator( 'terminate_instance_in_auto_scaling_group', expected_params, response, error_code=error_code) @@ -95,3 +97,9 @@ def stub_disable_metrics_collection(self, group_name, error_code=None): response = {} self._stub_bifurcator( 'disable_metrics_collection', expected_params, response, error_code=error_code) + + def stub_attach_load_balancer_target_groups(self, asg_name, tg_arns, error_code=None): + expected_params = {'AutoScalingGroupName': asg_name, 'TargetGroupARNs': tg_arns} + response = {} + self._stub_bifurcator( + 'attach_load_balancer_target_groups', expected_params, response, error_code=error_code) diff --git a/python/test_tools/ec2_stubber.py b/python/test_tools/ec2_stubber.py index 876a3603a93..4f5270dde47 100644 --- a/python/test_tools/ec2_stubber.py +++ b/python/test_tools/ec2_stubber.py @@ -64,6 +64,17 @@ def stub_describe_vpcs(self, vpcs, vpc_filters=None, error_code=None): self._stub_bifurcator( 'describe_vpcs', expected_params, response, error_code=error_code) + def stub_describe_subnets(self, vpc_id, zones, subnet_ids, error_code=None): + expected_params = { + 'Filters': [ + {'Name': 'vpc-id', 'Values': [vpc_id]}, + {'Name': 'availability-zone', 'Values': zones}, + {'Name': 'default-for-az', 'Values': ['true']}]} + response = {'Subnets': [{'SubnetId': sub_id} for sub_id in subnet_ids]} + self._stub_bifurcator( + 'describe_subnets', expected_params, response, error_code=error_code) + + def stub_create_security_group( self, group_name, group_id, group_description=ANY, vpc_id=None, error_code=None): expected_params = { @@ -82,13 +93,20 @@ def stub_delete_security_group(self, group_id, error_code=None): 'delete_security_group', expected_params, error_code=error_code) def stub_authorize_security_group_ingress( - self, group_id, ip_permissions=None, source_group_name=None, - error_code=None): + self, group_id, ip_permissions=None, source_group_name=None, cidr_ip=None,port=None, + ip_protocol=None, error_code=None): expected_params = {'GroupId': group_id} if ip_permissions is not None: expected_params['IpPermissions'] = ip_permissions if source_group_name is not None: expected_params['SourceSecurityGroupName'] = source_group_name + if cidr_ip is not None: + expected_params['CidrIp'] = cidr_ip + if port is not None: + expected_params['FromPort'] = port + expected_params['ToPort'] = port + if ip_protocol is not None: + expected_params['IpProtocol'] = ip_protocol response = {'Return': True} self._stub_bifurcator( 'authorize_security_group_ingress', expected_params, response, error_code=error_code) @@ -218,8 +236,12 @@ def stub_modify_network_interface_attribute( 'modify_network_interface_attribute', expected_params, error_code=error_code) - def stub_describe_security_groups(self, groups, error_code=None): + def stub_describe_security_groups(self, groups, vpc_id=None, error_code=None): expected_params = {'GroupIds': [group['id'] for group in groups]} + if vpc_id is not None: + expected_params = {'Filters': [ + {'Name': 'group-name', 'Values': ['default']}, + {'Name': 'vpc-id', 'Values': [vpc_id]}]} response = {'SecurityGroups': [{ 'GroupId': group['id'], 'GroupName': group['group_name'], @@ -244,11 +266,17 @@ def stub_describe_launch_templates(self, template_names, templates, error_code=N self._stub_bifurcator( 'describe_launch_templates', expected_params, response, error_code=error_code) - def stub_create_launch_template(self, template_name, inst_type, ami_id, error_code=None): + def stub_create_launch_template( + self, template_name, inst_type, ami_id, inst_profile=None, user_data=None, + error_code=None): expected_params = { 'LaunchTemplateName': template_name, 'LaunchTemplateData': { 'InstanceType': inst_type, 'ImageId': ami_id}} + if inst_profile is not None: + expected_params['LaunchTemplateData']['IamInstanceProfile'] = {'Name': inst_profile} + if user_data is not None: + expected_params['LaunchTemplateData']['UserData'] = user_data response = {'LaunchTemplate': {'LaunchTemplateName': template_name}} self._stub_bifurcator( 'create_launch_template', expected_params, response, error_code=error_code) @@ -280,3 +308,15 @@ def stub_describe_instance_types(self, inst_types, filters=ANY, error_code=None) response = {'InstanceTypes': [{'InstanceType': inst_type} for inst_type in inst_types]} self._stub_bifurcator( 'describe_instance_types', expected_params, response, error_code=error_code) + + def stub_describe_iam_instance_profile_associations(self, instance_id, association_id, error_code=None): + expected_params = {'Filters': [{'Name': 'instance-id', 'Values': [instance_id]}]} + response = {'IamInstanceProfileAssociations': [{'AssociationId': association_id}]} + self._stub_bifurcator( + 'describe_iam_instance_profile_associations', expected_params, response, error_code=error_code) + + def stub_replace_iam_instance_profile_association(self, new_profile_name, association_id, error_code=None): + expected_params = {'IamInstanceProfile': {'Name': new_profile_name}, 'AssociationId': association_id} + response = {} + self._stub_bifurcator( + 'replace_iam_instance_profile_association', expected_params, response, error_code=error_code) diff --git a/python/test_tools/elbv2_stubber.py b/python/test_tools/elbv2_stubber.py new file mode 100644 index 00000000000..e8c39050f4b --- /dev/null +++ b/python/test_tools/elbv2_stubber.py @@ -0,0 +1,96 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Stub functions that are used by the Elastic Load Balancing v2 (ELB) unit tests. + +When tests are run against an actual AWS account, the stubber class does not +set up stubs and passes all calls through to the Boto3 client. +""" + +from test_tools.example_stubber import ExampleStubber + + +class ELBv2Stubber(ExampleStubber): + """ + A class that implements stub functions used by ELB v2 unit tests. + + The stubbed functions expect certain parameters to be passed to them as + part of the tests, and raise errors if the parameters are not as expected. + """ + def __init__(self, client, use_stubs=True): + """ + Initializes the object with a specific client and configures it for + stubbing or AWS passthrough. + + :param client: A Boto3 ELB v2 client. + :param use_stubs: When True, use stubs to intercept requests. Otherwise, + pass requests through to AWS. + """ + super().__init__(client, use_stubs) + + def stub_create_target_group(self, tg_name, protocol, port, vpc_id, healthcheck, tg_arn, error_code=None): + expected_params = { + 'Name': tg_name, 'Protocol': protocol, 'Port': port, 'VpcId': vpc_id, + 'HealthCheckPath': healthcheck['path'], 'HealthCheckIntervalSeconds': healthcheck['interval'], + 'HealthCheckTimeoutSeconds': healthcheck['timeout'], 'HealthyThresholdCount': healthcheck['thresh_healthy'], + 'UnhealthyThresholdCount': healthcheck['thresh_unhealthy']} + response = {'TargetGroups': [{ + 'TargetGroupName': tg_name, 'TargetGroupArn': tg_arn, 'Protocol': protocol, 'Port': port}]} + self._stub_bifurcator( + 'create_target_group', expected_params, response, error_code=error_code) + + def stub_describe_target_groups(self, tg_names, tg_arns, error_code=None): + expected_params = {'Names': tg_names} + response = {'TargetGroups': [{'TargetGroupArn': tg_arn} for tg_arn in tg_arns]} + self._stub_bifurcator( + 'describe_target_groups', expected_params, response, error_code=error_code) + + def stub_create_load_balancer(self, lb_name, subnet_ids, protocol, port, lb_arn, lb_dns_name, error_code=None): + expected_params = {'Name': lb_name, 'Subnets': subnet_ids} + response = {'LoadBalancers': [{'LoadBalancerArn': lb_arn, 'DNSName': lb_dns_name}]} + self._stub_bifurcator( + 'create_load_balancer', expected_params, response, error_code=error_code) + + def stub_describe_load_balancers(self, names, dns_names=None, arns=None, error_code=None): + expected_params = {'Names': names} + response = {'LoadBalancers': [{'State': {'Code': 'active'}}]} + if dns_names is not None: + for index, dns_name in enumerate(dns_names): + response['LoadBalancers'][index]['DNSName'] = dns_name + if arns is not None: + for index, arn in enumerate(arns): + response['LoadBalancers'][index]['LoadBalancerArn'] = arn + self._stub_bifurcator( + 'describe_load_balancers', expected_params, response, error_code=error_code) + + def stub_delete_load_balancer(self, arn, error_code=None): + expected_params = {'LoadBalancerArn': arn} + response = {} + self._stub_bifurcator( + 'delete_load_balancer', expected_params, response, error_code=error_code) + + + def stub_create_listener(self, lb_arn, protocol, port, tg_arn, error_code=None): + expected_params = { + 'LoadBalancerArn': lb_arn, 'Protocol': protocol, 'Port': port, + 'DefaultActions': [{'Type': 'forward', 'TargetGroupArn': tg_arn}]} + response = {} + self._stub_bifurcator( + 'create_listener', expected_params, response, error_code=error_code) + + def stub_describe_target_health(self, tg_arn, tg_descs, error_code=None): + expected_params = {'TargetGroupArn': tg_arn} + response = { + 'TargetHealthDescriptions': [ + {'TargetHealth': {'State': desc['state'], 'Reason': desc['reason'], 'Description': desc['desc']}, + 'Target': {'Id': desc['id'], 'Port': desc['port']}, + } for desc in tg_descs]} + self._stub_bifurcator( + 'describe_target_health', expected_params, response, error_code=error_code) + + def stub_delete_target_group(self, tg_arn, error_code=None): + expected_params = {'TargetGroupArn': tg_arn} + response = {} + self._stub_bifurcator( + 'delete_target_group', expected_params, response, error_code=error_code) diff --git a/python/test_tools/iam_stubber.py b/python/test_tools/iam_stubber.py index 7610b982fb8..374dd715198 100644 --- a/python/test_tools/iam_stubber.py +++ b/python/test_tools/iam_stubber.py @@ -437,19 +437,35 @@ def stub_deactivate_mfa_device(self, user_name, serial_number, error_code=None): self._stub_bifurcator( 'deactivate_mfa_device', expected_params, error_code=error_code) - def stub_create_instance_profile(self, profile_name, error_code=None): + def stub_create_instance_profile(self, profile_name, profile_arn=None, error_code=None): expected_params = {'InstanceProfileName': profile_name} + if profile_arn is None: + profile_arn = f'arn:aws:iam::123456EXAMPLE:instance-profile/{profile_name}' response = {'InstanceProfile': { 'Path': '/', 'InstanceProfileName': profile_name, 'InstanceProfileId': 'EXAMPLEEXAMPLEEXAMPLE', - 'Arn': f'arn:aws:iam::123456EXAMPLE:instance-profile/{profile_name}', + 'Arn': profile_arn, 'CreateDate': datetime.datetime.now(), 'Roles': [] }} self._stub_bifurcator( 'create_instance_profile', expected_params, response, error_code=error_code) + def stub_get_instance_profile(self, profile_name, profile_arn, error_code=None): + expected_params = {'InstanceProfileName': profile_name} + response = { + 'InstanceProfile': { + 'Path': '/', + 'InstanceProfileName': profile_name, + 'InstanceProfileId': 'EXAMPLEEXAMPLEEXAMPLE', + 'Arn': profile_arn, + 'CreateDate': datetime.datetime.now(), + 'Roles': []}, + 'ResponseMetadata': {'HTTPStatusCode': 200}} + self._stub_bifurcator( + 'get_instance_profile', expected_params, response, error_code=error_code) + def stub_add_role_to_instance_profile( self, profile_name, role_name, error_code=None): expected_params = {'InstanceProfileName': profile_name, 'RoleName': role_name} diff --git a/python/test_tools/ssm_stubber.py b/python/test_tools/ssm_stubber.py index 926f9710891..130d5cb206c 100644 --- a/python/test_tools/ssm_stubber.py +++ b/python/test_tools/ssm_stubber.py @@ -32,13 +32,16 @@ def __init__(self, client, use_stubs=True): """ super().__init__(client, use_stubs) - def stub_send_command(self, instance_ids, commands, command_id, error_code=None): + def stub_send_command(self, instance_ids, commands, command_id=None, timeout=3600, error_code=None): expected_parameters = { 'InstanceIds': instance_ids, 'DocumentName': 'AWS-RunShellScript', - 'Parameters': {'commands': commands}, - 'TimeoutSeconds': 3600} - response = {'Command': {'CommandId': command_id}} + 'Parameters': {'commands': commands}} + if timeout is not None: + expected_parameters['TimeoutSeconds'] = timeout + response = {} + if command_id is not None: + response['Command'] = {'CommandId': command_id} self._stub_bifurcator( 'send_command', expected_parameters, response, error_code=error_code) @@ -54,3 +57,21 @@ def stub_get_parameters_by_path(self, names, values, path=ANY, error_code=None): response = {'Parameters': [{'Name': name, 'Value': value} for name, value in zip(names, values)]} self._stub_bifurcator( 'get_parameters_by_path', expected_params, response, error_code=error_code) + + def stub_get_parameter(self, name, value, error_code=None): + expected_params = {'Name': name} + response = {'Parameter': {'Value': value}} + self._stub_bifurcator( + 'get_parameter', expected_params, response, error_code=error_code) + + def stub_put_parameter(self, name, value, error_code=None): + expected_params = {'Name': name, 'Value': value, 'Overwrite': True} + response = {} + self._stub_bifurcator( + 'put_parameter', expected_params, response, error_code=error_code) + + def stub_describe_instance_information(self, instance_ids, error_code=None): + expected_params = {} + response = {'InstanceInformationList': [{'InstanceId': instance_id} for instance_id in instance_ids]} + self._stub_bifurcator( + 'describe_instance_information', expected_params, response, error_code=error_code) diff --git a/python/test_tools/stubber_factory.py b/python/test_tools/stubber_factory.py index a7812ed752c..f650e04959a 100644 --- a/python/test_tools/stubber_factory.py +++ b/python/test_tools/stubber_factory.py @@ -23,6 +23,7 @@ from test_tools.config_stubber import ConfigStubber from test_tools.dynamodb_stubber import DynamoStubber from test_tools.ec2_stubber import Ec2Stubber +from test_tools.elbv2_stubber import ELBv2Stubber from test_tools.emr_stubber import EmrStubber from test_tools.eventbridge_stubber import EventBridgeStubber from test_tools.glacier_stubber import GlacierStubber @@ -93,6 +94,8 @@ def stubber_factory(service_name): return DynamoStubber elif service_name == 'ec2': return Ec2Stubber + elif service_name == 'elbv2': + return ELBv2Stubber elif service_name == 'emr': return EmrStubber elif service_name == 'events': diff --git a/workflows/resilient_service/README.md b/workflows/resilient_service/README.md index 9566e49fef7..b2bcc44ea1b 100644 --- a/workflows/resilient_service/README.md +++ b/workflows/resilient_service/README.md @@ -32,7 +32,11 @@ an interactive demo that runs at a command prompt. This example is implemented in the following languages: -* [Python](../python/cross_service/resilient_service/README.md) +* [Python](../../python/cross_service/resilient_service/README.md) + +## Additional reading + +* [Community.aws: How to build and manage a resilient service using AWS SDKs](https://community.aws/posts/build-and-manage-a-resilient-service-using-aws-sdks) --- diff --git a/workflows/resilient_service/SPECIFICATION.md b/workflows/resilient_service/SPECIFICATION.md new file mode 100644 index 00000000000..eb1e87e97ac --- /dev/null +++ b/workflows/resilient_service/SPECIFICATION.md @@ -0,0 +1,1046 @@ +# Build and manage a resilient service technical specification + +This document contains the technical specifications for *Build and manage a resilient service*, +a workflow scenario that showcases AWS services and SDKs. It is primarily intended for the AWS code +examples team to use while developing this example in additional languages. + +This document explains the following: + +- Deploying AWS resources and their configurations. +- Flow of the demo and the AWS Systems Manager parameters that simulate failures and how the web + server responds to them. +- Destroying the AWS resources at the end of the example. + +For an introduction to *Build and manage a resilient service*, see the [README.md](README.md). + +--- + +### Table of contents + +- [Architecture](#architecture) +- [HTTP API specification](#http-api-specification) +- [User actions](#user-actions) +- [Common resources](#common-resources) +- [Deploy](#deploy) +- [Demo](#demo) +- [Destroy](#destroy) +- [Other material](#other-material) + +--- + +## Architecture + +This example sets up a web server that is load balanced and made increasingly resilient to failure during +the course of the demonstration. + +1. This example relies on the default VPC hosted by Amazon Virtual Private Cloud (Amazon VPC). +The default VPC in your account should contain all the needed settings for the example to run +and provides an isolated environment for the resources used by the example. +2. An Amazon DynamoDB table acts as a service that recommends books, movies, and songs. The web +service depends on the table to make recommendations. +3. An Amazon EC2 Auto Scaling group creates and manages three EC2 instances in three separate +Availability Zones. Each instance runs a simple Python web server that handles requests, gets +recommendations from the DynamoDB table, and sends responses. The web server also responds +to health checks from the load balancer. +4. An Elastic Load Balancer provides a single endpoint that receives HTTP requests and distributes +them among the EC2 instances. +5. A set of Systems Manager parameters are used to change the behavior of the web server by +simulating failures and acting in a more resilient manner. + +--- + +## HTTP API specification + +The Python web server handles two kinds of HTTP requests. These are common to every language +variation and do not need any additional implementation. The web server is defined in +[server.py](resources/server.py). + +##### GET / + +Returns a recommendation from the DynamoDB table with Amazon EC2 metadata attached that specifies which instance +handled the request. + +###### Response +Content-type: application/json +``` +{ + "MediaType": {"S": "Book"}, + "ItemId":{"N": "0"}, + "Title": {"S": "404 Not Found: A Coloring Book"}, + "Creator": {"S": "The Oatmeal"}, + "Metadata": { + "InstanceId": "i-123456789", + "AvailabilityZone": "us-west-2b"} +} +``` + +##### GET /healthcheck + +Indicates to the load balancer whether the server is healthy by returning a success code (200) +or a failure code (503). + +###### Response +Content-type: application/json +``` +{"success": "True"} +``` + +--- + +## User actions + +This example runs as a console application that goes through a series of stages that +are controlled by Systems Manager parameters. After each stage, the user is presented +with a short menu of choices: + +1. Send a GET request to the load balancer endpoint. +2. Check the health of the load balancer targets. +3. Continue to the next stage. + +The user can select the first two choices multiple times to see how the situation changes as the +underlying resources update, such as when an instance becomes unhealthy. + +For more detail on how this is implemented, see [Demo](#demo). + +--- + +## Common resources + +This example has a set of common resources that are stored in the [resources](resources) folder. + +* [instance_policy.json](resources/instance_policy.json) contains an IAM policy that is used + by the instance profile for each instance in this example. It grants permission to access + the DynamoDB recommendation table and get parameters from Systems Manager. +* [recommendations.json](resources/recommendations.json) contains sample data used to populate + the recommendation table. +* [server.py](resources/server.py) contains Python code that runs the example web server. +* [server_startup_script.sh](resources/server_startup_script.sh) contains a Bash script that + is run when an instance starts. It installs Python packages and starts the Python web server. +* [ssm_only_policy.json](resources/ssm_only_policy.json) contains an IAM policy that is used + to set one instance profile to a set of credentials that don't allow access to DynamoDB, + to simulate bad credentials. + +This example uses a set of Systems Manager parameters to simulate failures and control how the +web server responds to them. They must use the exact names and be set to specific values +for the example to work correctly. + +* `doc-example-resilient-architecture-table` specifies the name of the DynamoDB table that the + web server uses to get recommendations. + * [name of your DynamoDB table]: the web server successfully gets items. + * 'this-is-not-a-table': the web server fails to get items. +* `doc-example-resilient-architecture-failure-response` specifies how the web server responds to + a failure to get recommendations from the table. + * 'none': the web server returns a failure code on failure. + * 'static': the web server returns a success code and a static JSON payload on failure. +* `doc-example-resilient-architecture-health-check` specifies how the web server responds to + health checks. + * 'shallow': always return a success code. + * 'deep': return a failure code if it can't connect to the recommendation table. + +--- + +## Deploy + +The reference implementation for this example is in Python. You can find it in +[python/cross_service/resilient_service](../../python/cross_service/resilient_service). + +Break deployment into three phases to make it more comprehensible to the user: introduction, +web server, and load balancing. + +### Introduction + +Start by introducing the workflow and asking permission to continue. This gives the user +a chance to stop the demo if they like. + +``` +---------------------------------------------------------------------------------------- +Welcome to the demonstration of How to Build and Manage a Resilient Service! +---------------------------------------------------------------------------------------- +INFO: Found credentials in shared credentials file: ~/.aws/credentials + +For this demo, we'll use an AWS SDK to create several AWS resources to set up a load-balanced +web service endpoint and explore some ways to make it resilient against various kinds of failures. + +Some of the resources create by this demo are: + + * A DynamoDB table that the web service depends on to provide book, movie, and song recommendations. + * An EC2 launch template that defines EC2 instances that each contain a Python web server. + * An EC2 Auto Scaling group that manages EC2 instances across several Availability Zones. + * An Elastic Load Balancing (ELB) load balancer that targets the Auto Scaling group to distribute requests. +---------------------------------------------------------------------------------------- +Press Enter when you're ready to start deploying resources. +``` + +### Web server + +#### Recommendation service + +The recommendation service is a mock service and is a DynamoDB table that the web server calls +directly. Create a DynamoDB table using DynamoDB.CreateTable and specify the following schema: + +``` +AttributeDefinitions=[{ + 'AttributeName': 'MediaType', + 'AttributeType': 'S'},{ + 'AttributeName': 'ItemId', + 'AttributeType': 'N'}], +KeySchema=[{ + 'AttributeName': 'MediaType', + 'KeyType': 'HASH'}, { + 'AttributeName': 'ItemId', + 'KeyType': 'RANGE'}], +``` + +Populate the table by reading [recommendations.json](resources/recommendations.json) and +sending it to DynamoDB by using DynamoDB.BatchWriteItem. + +Output: + +``` +Creating and populating a DynamoDB table named 'doc-example-recommendation-service'. +INFO: Creating table doc-example-recommendation-service... +INFO: Table doc-example-recommendation-service created. +INFO: Populated table doc-example-recommendation-service with items from ../../../workflows/resilient_service/resources/recommendations.json. +---------------------------------------------------------------------------------------- +``` + +#### Permissions, EC2 launch template, and Auto Scaling group + +Tell the user what you're going to do: + +``` +Creating an EC2 launch template that runs '../../../workflows/resilient_service/resources/server_startup_script.sh' when an instance starts. +This script starts a Python web server defined in the `server.py` script. The web server +listens to HTTP requests on port 80 and responds to requests to '/' and to '/healthcheck'. +For demo purposes, this server is run as the root user. In production, the best practice is to +run a web server, such as Apache, with least-privileged credentials. + +The template also defines an IAM policy that each instance uses to assume a role that grants +permissions to access the DynamoDB recommendation table and Systems Manager parameters +that control the flow of the demo. +``` + +##### Permissions + +As you create permissions, use waiters if your SDK has them. You might also have to insert +pauses when the waiter is not sufficient. + +1. Use IAM.CreatePolicy to create an IAM policy from [instance_policy.json](resources/instance_policy.json). +2. Use IAM.CreateRole to create an IAM role. + Specify an AssumeRolePolicyDocument that lets Amazon EC2 assume the role: + ``` + { + "Version": "2012-10-17", + "Statement": [{ + "Effect": "Allow", + "Principal": {"Service": "ec2.amazonaws.com"}, + "Action": "sts:AssumeRole"}] + } + ``` +3. Use IAM.AttachRolePolicy to attach the policy to the role. +4. Use IAM.CreateInstanceProfile and IAM.AddRoleToInstanceProfile to create an IAM instance + profile and add the role. + +Output: + +``` +INFO: Created policy with ARN arn:aws:iam::123456789012:policy/doc-example-resilience-pol. +INFO: Created role doc-example-resilience-role and attached policy arn:aws:iam::123456789012:policy/doc-example-resilience-pol. +INFO: Created profile doc-example-resilience-prof and added role doc-example-resilience-role. +---------------------------------------------------------------------------------------- +``` + +##### EC2 launch template + +1. Get the ID of a current AMI by calling SystemsManager.GetParameter for + `/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2`. +2. Get the startup Bash script for the launch template by reading from + [server_startup_script.sh](resources/server_startup_script.sh). +3. Create a launch template by calling EC2.CreateLaunchTemplate. Specify a small instance type, + such as 't3.micro', the ID of the AMI, the instance profile name, and the start script + as UserData. You must encode the UserData as base64. + ``` + LaunchTemplateData={ + 'InstanceType': 't3.micro', + 'ImageId': ami_id, + 'IamInstanceProfile': {'Name': self.instance_profile_name}, + 'UserData': base64.b64encode(start_server_script.encode(encoding='utf-8')).decode(encoding='utf-8')}) + ``` + +Output: + +``` +INFO: Created launch template doc-example-resilience-template for AMI ami-0167739a362a4b484 on t3.micro. +---------------------------------------------------------------------------------------- +``` + +##### Auto Scaling group + +1. Use EC2.DescribeAvailabilityZones to get the names of the Availability Zones for the +current Region. +2. Use AutoScaling.CreateAutoScalingGroup to create an Auto Scaling group that uses the launch template, +targets the Availability Zones for the Region, and specifies three minimum and maximum instances. + ``` + AvailabilityZones=zones, + LaunchTemplate={ + 'LaunchTemplateName': launch_template_name, 'Version': '$Default'}, + MinSize=group_size, MaxSize=group_size) + ``` + +Output: + +``` +Creating an EC2 Auto Scaling group that maintains three EC2 instances, each in a different +Availability Zone. +INFO: Created EC2 Auto Scaling group doc-example-resilience-template with availability zones ['us-west-2a', 'us-west-2b', 'us-west-2c', 'us-west-2d']. +---------------------------------------------------------------------------------------- +At this point, you have EC2 instances created. After each instance starts, it listens for +HTTP requests. You can see these instances in the console or continue with the demo. +---------------------------------------------------------------------------------------- +``` + +Pause at this point to let the user read about what's happened, and to give the instances a +chance to start. + +### Load balancer + +Elastic Load Balancer has two clients. This example uses an Application Load Balancer, so it +targets V2. Be aware that the Auto Scaling group is not the same as the load balancer +target group, although the two are linked. + +1. Use EC2.DescribeVpcs with a Filter of Name = `'is-default'` and Values = `['true']` to +get the default VPC. +2. Use EC2.DescribeSubnets with a Filter to get the default subnets for the VPC. + ``` + Filters=[ + {'Name': 'vpc-id', 'Values': [vpc_id]}, + {'Name': 'availability-zone', 'Values': zones}, + {'Name': 'default-for-az', 'Values': ['true']}]) + ``` +3. Use ELBv2.CreateTargetGroup to create the target group for the load balancer. + Specify the arguments shown in the following snippet: + ``` + response = self.elb_client.create_target_group( + Name=self.target_group_name, + Protocol='HTTP', + Port=80, + HealthCheckPath='/healthcheck', + HealthCheckIntervalSeconds=10, + HealthCheckTimeoutSeconds=5, + HealthyThresholdCount=2, + UnhealthyThresholdCount=2, + VpcId=vpc_id) + ``` +4. Use ELBv2.CreateLoadBalancer to create an Application Load Balancer. Specify the default +subnets for Subnets. +5. Cache the DNSName field of the load balancer. This is the endpoint where you will send + GET requests for the example. +6. Use ELBv2.CreateListener to add a listener that forwards requests from the load balancer + endpoint to the target group: + ``` + self.elb_client.create_listener( + LoadBalancerArn=load_balancer['LoadBalancerArn'], + Protocol=target_group['Protocol'], + Port=target_group['Port'], + DefaultActions=[{'Type': 'forward', 'TargetGroupArn': target_group['TargetGroupArn']}]) + ``` +7. Use AutoScaling.AttachLoadBalancerTargetGroups to associate the Auto Scaling group +with the load balancer target group. This completes the linkage between the load balancer +and the instances in the Auto Scaling group. + +Output: + +``` +Creating an Elastic Load Balancing target group and load balancer. The target group +defines how the load balancer connects to instances. The load balancer provides a +single endpoint where clients connect and dispatches requests to instances in the group. + +INFO: Found 4 subnets for the specified zones. +INFO: Created load balancing target group doc-example-resilience-tg. +INFO: Created load balancer doc-example-resilience-lb. +INFO: Waiting for load balancer to be available... +INFO: Load balancer is available! +INFO: Created listener to forward traffic from load balancer doc-example-resilience-lb to target group doc-example-resilience-tg. +INFO: Attached load balancer target group doc-example-resilience-tg to auto scaling group doc-example-resilience-group. +``` + +#### Verify endpoint + +1. Verify that the load balancer endpoint responds to requests by sending it a GET request using + an appropriate HTTP client. You might want to retry a few times with a pause between tries, + in order to give the system a chance to settle. + ``` + lb_response = requests.get(f'http://{self.endpoint()}') + ``` +2. If the request succeeds, display that to the user along with the endpoint. +3. If the request fails, the most likely culprit is the default security group for the VPC. + Use EC2.DescribeSecurityGroups to get the default security group and examine its + IpPermissions to find whether it has port 80 open either to the current computer's IP + address (you can find this programmatically by sending a GET request to http://checkip.amazonaws.com), + to all IP addresses (0.0.0.0/0) or to a VPN/Corpnet prefix: + ``` + response = self.ec2_client.describe_security_groups( + Filters=[ + {'Name': 'group-name', 'Values': ['default']}, + {'Name': 'vpc-id', 'Values': [vpc['VpcId']]}]) + sec_group = response['SecurityGroups'][0] + port_is_open = False + log.info("Found default security group %s.", sec_group['GroupId']) + for ip_perm in sec_group['IpPermissions']: + if ip_perm.get('FromPort', 0) == port: + log.info("Found inbound rule: %s", ip_perm) + for ip_range in ip_perm['IpRanges']: + cidr = ip_range.get('CidrIp', '') + if cidr.startswith(ip_address) or cidr == '0.0.0.0/0': + port_is_open = True + if ip_perm['PrefixListIds']: + port_is_open = True + if not port_is_open: + log.info( + "The inbound rule does not appear to be open to either this computer's IP\n" + "address of %s, to all IP addresses (0.0.0.0/0), or to a prefix list ID.", ip_address) + else: + break + ``` +5. If there is not a rule, ask the user if they want to add one for the current IP address: + ``` + if q.ask(f"Do you want to add a rule to security group {sec_group['GroupId']} to allow\n" + f"inbound traffic on port {self.port} from your computer's IP address of {current_ip_address}? (y/n) ", + q.is_yesno): + self.ec2_client.authorize_security_group_ingress( + GroupId=sec_group_id, CidrIp=f'{ip_address}/32', FromPort=port, ToPort=port, IpProtocol='tcp') + ``` +6. Try again to send a GET request to the endpoint. If this still fails, bail out and tell + the customer to do some troubleshooting of their own. + +Output when a rule exists but the request fails: + +``` +Verifying access to the load balancer endpoint... +INFO: Got connection error from load balancer endpoint, retrying... +INFO: Got connection error from load balancer endpoint, retrying... +INFO: Got connection error from load balancer endpoint, retrying... +Couldn't connect to the load balancer, verifying that the port is open... +INFO: Found default security group sg-0bf0814e. +INFO: Found inbound rule: {'FromPort': 80, 'IpProtocol': 'tcp', 'IpRanges': [], 'Ipv6Ranges': [], 'PrefixListIds': [{'PrefixListId': 'pl-123456789'}], 'ToPort': 80, 'UserIdGroupPairs': []} +INFO: Got connection error from load balancer endpoint, retrying... +INFO: Got connection error from load balancer endpoint, retrying... +INFO: Got connection error from load balancer endpoint, retrying... +Couldn't get a successful response from the load balancer endpoint. Troubleshoot by +manually verifying that your VPC and security group are configured correctly and that +you can successfully make a GET request to the load balancer endpoint: + + http://doc-example-resilience-lb-1234567890.us-west-2.elb.amazonaws.com +---------------------------------------------------------------------------------------- +``` + +## Demo + +The demo phase of this example cycles through several stages, setting Systems Manager +parameters along the way to simulate failures and instruct the web server to take increasingly +resilient actions. + +### Choices + +After each stage, present the user with three choices: + +``` +See the current state of the service by selecting one of the following choices: + +1. Send a GET request to the load balancer endpoint. +2. Check the health of load balancer targets. +3. Go to the next part of the demo. + +Which action would you like to take? +``` + +#### 1. Send a GET request + +Send a GET request to the load balancer endpoint. Depending on the stage of the demo, the +response is either successful and contains a JSON payload that contains the recommendation +and server metadata, or fails and returns a failure code. + +``` +Request: + +GET http://doc-example-resilience-lb-1282105285.us-west-2.elb.amazonaws.com + +Response: + +200 +{'Title': {'S': '12 Angry Men'}, + 'Creator': {'S': 'Sidney Lumet'}, + 'MediaType': {'S': 'Movie'}, + 'ItemId': {'N': '3'}, + 'Metadata': {'InstanceId': 'i-03ca37214fa45fe33', + 'AvailabilityZone': 'us-west-2a'}} +---------------------------------------------------------------------------------------- +``` + +#### 2. Check health + +Use ELBv2.DescribeTargetHealth to get the health of the load balancer targets. + +``` +Checking the health of load balancer targets: + + Target i-03ca37214fa45fe33 on port 80 is healthy + Target i-087544ae95640b911 on port 80 is healthy + Target i-0fa75af663ba14821 on port 80 is healthy + +Note that it can take a minute or two for the health check to update +after changes are made. + +---------------------------------------------------------------------------------------- +``` + +#### 3. Next stage + +Move to the next stage of the demo and show the choice menu again. + +### Stages + +The following are the stages of the demo, which must be performed in the order shown. + +#### Set parameters + +Before you start, use SSM.PutParameter to set the Systems Manager parameters to the starting +values. The names of the parameters must exactly match these names, because they are used by +the web server to get the parameters at runtime. +``` +table = 'doc-example-resilient-architecture-table' +failure_response = 'doc-example-resilient-architecture-failure-response' +health_check = 'doc-example-resilient-architecture-health-check' + +self.ssm_client.put_parameter(Name=table, Value=, Overwrite=True) +self.ssm_client.put_parameter(Name=failure_response, Value='none', Overwrite=True) +self.ssm_client.put_parameter(Name=health_check, Value='shallow', Overwrite=True) +``` + +#### Initial state + +At the beginning, the recommendation service successfully responds and all instances are healthy. + +```bash +---------------------------------------------------------------------------------------- +Request: + +GET http://doc-example-resilience-lb-1317068782.us-west-2.elb.amazonaws.com + +Response: + +200 +{'Title': {'S': 'Pride and Prejudice'}, + 'Creator': {'S': 'Jane Austen'}, + 'MediaType': {'S': 'Book'}, + 'ItemId': {'N': '1'}, + 'Metadata': {'InstanceId': 'i-05387127cb2ebbea1', + 'AvailabilityZone': 'us-west-2c'}} +---------------------------------------------------------------------------------------- +``` + +```bash +---------------------------------------------------------------------------------------- + +Checking the health of load balancer targets: + + Target i-02d98d9d0726c4b2d on port 80 is healthy + Target i-0e4b7104cfaf8e056 on port 80 is healthy + Target i-05387127cb2ebbea1 on port 80 is healthy +---------------------------------------------------------------------------------------- +``` + +#### Broken dependency + +The next phase simulates a broken dependency by setting the table name parameter to a +non-existent table name. When the web server tries to get a recommendation, it fails because +the table doesn't exist. + +Use SSM.PutParameter to set the `doc-example-resilient-architecture-table` parameter to a value +other than the name of your DynamoDB recommendation table, such as `this-is-not-a-table`. + +``` +The web service running on the EC2 instances gets recommendations by querying a DynamoDB table. +The table name is contained in a Systems Manager parameter named 'doc-example-resilient-architecture-table'. +To simulate a failure of the recommendation service, let's set this parameter to name a non-existent table. + +INFO: Setting demo parameter doc-example-resilient-architecture-table to 'this-is-not-a-table'. + +Now, sending a GET request to the load balancer endpoint returns a failure code. But, the service reports as +healthy to the load balancer because shallow health checks don't check for failure of the recommendation service. +---------------------------------------------------------------------------------------- +``` + +Response to GET: + +```bash +---------------------------------------------------------------------------------------- +Request: + +GET http://doc-example-resilience-lb-1317068782.us-west-2.elb.amazonaws.com + +Response: + +502 +---------------------------------------------------------------------------------------- +``` + +All instances report as healthy because they use shallow health checks, which means +that they simply report success under all conditions. + +#### Static response + +The next phase sets a parameter that instructs the web server to return a static response when +it cannot get a recommendation from the recommendation service. The static response is to always +suggest the *404 Not Found* coloring book. + +Use SSM.PutParameter to set the `doc-example-resilient-architecture-failure-response` parameter +to `static`. + +``` +Instead of failing when the recommendation service fails, the web service can return a static response. +While this is not a perfect solution, it presents the customer with a somewhat better experience than failure. + +INFO: Setting demo parameter doc-example-resilient-architecture-failure-response to 'static'. + +Now, sending a GET request to the load balancer endpoint returns a static response. +The service still reports as healthy because health checks are still shallow. + +---------------------------------------------------------------------------------------- +``` + +Response to GET request: + +```bash +Request: + +GET http://doc-example-resilience-lb-1317068782.us-west-2.elb.amazonaws.com + +Response: + +200 +{'MediaType': {'S': 'Book'}, + 'ItemId': {'N': '0'}, + 'Title': {'S': '404 Not Found: A Coloring Book'}, + 'Creator': {'S': 'The Oatmeal'}, + 'Metadata': {'InstanceId': 'i-05387127cb2ebbea1', + 'AvailabilityZone': 'us-west-2c'}} +---------------------------------------------------------------------------------------- +``` + +#### Bad credentials + +The next phase replaces the credentials on a single instance with credentials that don't allow +access to the recommendation service. + +Use SSM.PutParameter to set the `doc-example-resilient-architecture-table` parameter back to +the name of your DynamoDB recommendation table. + +##### Create an instance profile with bad credentials + +Create all the pieces needed for an instance profile that does not allow permission to the +DynamoDB recommendation table. + +1. Use IAM.CreatePolicy to create an IAM policy from [ssm_only_policy.json](resources/ssm_only_policy.json). +2. Use IAM.CreateRole to create an IAM role. + Specify an AssumeRolePolicyDocument that lets EC2 assume the role: + ``` + { + "Version": "2012-10-17", + "Statement": [{ + "Effect": "Allow", + "Principal": {"Service": "ec2.amazonaws.com"}, + "Action": "sts:AssumeRole"}] + } + ``` +3. Use IAM.AttachRolePolicy to attach the policy to the role. +4. Use IAM.AttachRolePolicy to attach the managed policy `AmazonSSMManagedInstanceCore' to the role. + This is required so that Systems Manager can restart the web server on the instance. +5. Use IAM.CreateInstanceProfile and IAM.AddRoleToInstanceProfile to create an IAM instance + profile and add the role. + +Select an instance, replace its instance profile, and reboot the instance. + +1. Use AutoScaling.DescribeAutoScalingGroups to get the instance IDs for the group. Pick one + to poison. +2. Use EC2.DescribeIamInstanceProfileAssociations to get the profile association ID for the + instance. +3. Use EC2.ReplaceIamInstanceProfileAssociation to replace the profile for the instance with + the new instance profile that contains bad credentials. +4. Use EC2.RebootInstances to reboot the instance. +5. Use SSM.DescribeInstanceInformation in a loop (with pauses) until the instance is in the + returned list. This indicates that the instance can receive Systems Manager commands. +6. Use SSM.SendCommand to restart the web server on the instance. + ``` + self.ssm_client.send_command( + InstanceIds=[instance_id], + DocumentName='AWS-RunShellScript', + Parameters={'commands': ['cd / && sudo python3 server.py 80 us-west-2']}) + ``` + +Tell the user all about it: + +``` +Let's reinstate the recommendation service. + +INFO: Setting demo parameter doc-example-resilient-architecture-table to 'doc-example-recommendation-service'. + +Let's also substitute bad credentials for one of the instances in the target group so that it can't +access the DynamoDB recommendation table. + +INFO: Created policy with ARN arn:aws:iam::123456789012:policy/doc-example-resilience-bc-pol. +INFO: Created role doc-example-resilience-bc-role and attached policy arn:aws:iam::123456789012:policy/doc-example-resilience-bc-pol. +INFO: Created profile doc-example-resilience-bc-prof and added role doc-example-resilience-bc-role. + +Replacing the profile for instance i-03ca37214fa45fe33 with a profile that contains +bad credentials... + +INFO: Replaced instance profile for association iip-assoc-05ceb4b8735f72381 with profile doc-example-resilience-bc-prof. +INFO: Rebooting instance i-03ca37214fa45fe33 and waiting for it to to be ready. +INFO: Restarted the Python web server on instance i-03ca37214fa45fe33. +Now, sending a GET request to the load balancer endpoint returns either a recommendation or a static response, +depending on which instance is selected by the load balancer. + +---------------------------------------------------------------------------------------- +``` + +An instance on us-west-2a gives real recommendations: + +```bash +---------------------------------------------------------------------------------------- +Request: + +GET http://doc-example-resilience-lb-1317068782.us-west-2.elb.amazonaws.com + +Response: + +200 +{'Title': {'S': 'Delicatessen'}, + 'Creator': {'S': 'Jeunet et Caro'}, + 'MediaType': {'S': 'Movie'}, + 'ItemId': {'N': '1'}, + 'Metadata': {'InstanceId': 'i-02d98d9d0726c4b2d', + 'AvailabilityZone': 'us-west-2a'}} +---------------------------------------------------------------------------------------- +``` + +While the bad instance on us-west-2b gives a static response: + +```bash +---------------------------------------------------------------------------------------- +Request: + +GET http://doc-example-resilience-lb-1317068782.us-west-2.elb.amazonaws.com + +Response: + +200 +{'MediaType': {'S': 'Book'}, + 'ItemId': {'N': '0'}, + 'Title': {'S': '404 Not Found: A Coloring Book'}, + 'Creator': {'S': 'The Oatmeal'}, + 'Metadata': {'InstanceId': 'i-0e4b7104cfaf8e056', + 'AvailabilityZone': 'us-west-2b'}} +---------------------------------------------------------------------------------------- +``` + +#### Deep health checks + +The next phase sets a parameter that instructs the web server to use a deep health check. +This means that the web server returns an error code when it can't connect to the recommendations +service. + +Note that the deep health check is only for ELB routing and not for Auto Scaling instance health. +This kind of deep health check is not recommended for Auto Scaling instance health, see +[Choosing the right health check with Elastic Load Balancing and EC2 Auto Scaling](https://aws.amazon.com/blogs/networking-and-content-delivery/choosing-the-right-health-check-with-elastic-load-balancing-and-ec2-auto-scaling/) +for more information. + +Use SSM.PutParameter to set the `doc-example-resilient-architecture-health-check` to 'deep'. + +Output: + +``` +Let's implement a deep health check. For this demo, a deep health check tests whether +the web service can access the DynamoDB table that it depends on for recommendations. Note that +the deep health check is only for ELB routing and not for Auto Scaling instance health. +This kind of deep health check is not recommended for Auto Scaling instance health, because it +risks accidental termination of all instances in the Auto Scaling group when a dependent service fails. + +By implementing deep health checks, the load balancer can detect when one of the instances is failing +and take that instance out of rotation. + +INFO: Setting demo parameter doc-example-resilient-architecture-health-check to 'deep'. + +Now, checking target health indicates that the instance with bad credentials (i-041efe367831f49b5) +is unhealthy. Note that it might take a minute or two for the load balancer to detect the unhealthy +instance. Sending a GET request to the load balancer endpoint always returns a recommendation, because +the load balancer takes unhealthy instances out of its rotation. + +---------------------------------------------------------------------------------------- +``` + +After this change, the instance with bad credentials reports as unhealthy: + +```bash +---------------------------------------------------------------------------------------- + +Checking the health of load balancer targets: + + Target i-02d98d9d0726c4b2d on port 80 is healthy + Target i-0e4b7104cfaf8e056 on port 80 is unhealthy + Target.ResponseCodeMismatch: Health checks failed with these codes: [503] + + Target i-05387127cb2ebbea1 on port 80 is healthy + +---------------------------------------------------------------------------------------- +``` + +#### Replace the failing instance + +This next phase uses an SDK action to terminate the unhealthy instance, at which point Auto Scaling +automatically starts a new instance. + +Use AutoScaling.TerminateInstanceInAutoScalingGroup with ShouldDecrementDesiredCapacity=False +to stop the instance with bad credentials. + +Output: + +``` +Because the instances in this demo are controlled by an auto scaler, the simplest way to fix an unhealthy +instance is to terminate it and let the auto scaler start a new instance to replace it. + +INFO: Terminated instance i-041efe367831f49b5. + +Even while the instance is terminating and the new instance is starting, sending a GET +request to the web service continues to get a successful recommendation response because +the load balancer routes requests to the healthy instances. After the replacement instance +starts and reports as healthy, it is included in the load balancing rotation. + +Note that terminating and replacing an instance typically takes several minutes, during which time you +can see the changing health check status until the new instance is running and healthy. + +---------------------------------------------------------------------------------------- +``` + +While the instances are transitioning, you will see various results from the health check, for example: + +```bash +---------------------------------------------------------------------------------------- + +Checking the health of load balancer targets: + + Target i-02d98d9d0726c4b2d on port 80 is healthy + Target i-05387127cb2ebbea1 on port 80 is healthy + Target i-0e4b7104cfaf8e056 on port 80 is draining + Target.DeregistrationInProgress: Target deregistration is in progress + + Target i-0c8df865e77bbb943 on port 80 is unhealthy + Target.FailedHealthChecks: Health checks failed + +---------------------------------------------------------------------------------------- +``` + +After the new instance starts, it reports as healthy and is again included in the load balancer's rotation. + +#### Fail open + +This last phase of the example again sets the table name parameter to a non-existent table to simulate a failure of the +recommendation service. This causes all instances to report as unhealthy. + +Use SSM.PutParameter to set the `doc-example-resilient-architecture-table` parameter to a value +other than the name of your DynamoDB recommendation table, such as `this-is-not-a-table`. + +Output: + +``` +If the recommendation service fails now, deep health checks mean all instances report as unhealthy. + +INFO: Setting demo parameter doc-example-resilient-architecture-table to 'this-is-not-a-table'. + +When all instances are unhealthy, the load balancer continues to route requests even to +unhealthy instances, allowing them to fail open and return a static response rather than fail +closed and report failure to the customer. +---------------------------------------------------------------------------------------- +``` + +Health check now shows all instances are unhealthy: + +```bash +---------------------------------------------------------------------------------------- + +Checking the health of load balancer targets: + + Target i-02d98d9d0726c4b2d on port 80 is unhealthy + Target.ResponseCodeMismatch: Health checks failed with these codes: [503] + + Target i-05387127cb2ebbea1 on port 80 is unhealthy + Target.ResponseCodeMismatch: Health checks failed with these codes: [503] + + Target i-0c8df865e77bbb943 on port 80 is unhealthy + Target.ResponseCodeMismatch: Health checks failed with these codes: [503] + +---------------------------------------------------------------------------------------- +``` + +When all instances in a target group are unhealthy, the load balancer continues to forward requests to +them, allowing for a fail open behavior. + +## Destroy + +After the demo portion of the example, give the user the option to destroy all resources, +and then do so. Use waiters as necessary if your SDK provides them. + +1. Use ELBv2.DeleteLoadBalancer to delete the load balancer. Wait for it to be deleted. +2. Use ELBv2.DeleteTargetGroup to delete the load balancer target group. You might get a + 'ResourceInUse' error, in which case you'll have to wait and try again. +3. Use AutoScaling.UpdateAutoScalingGroup to have MinSize=0. +4. Use AutoScaling.TerminateInstanceInAutoScalingGroup to terminate all instances in the group. + Wait for all instances to terminate. This is required before you can delete the Auto Scaling group. +5. Use AutoScaling.DeleteAutoScalingGroup to delete the Auto Scaling group. You might get + a 'ScalingActivityInProgress' error, in which case you'll have to wait and try again. +6. Use EC2.DeleteLaunchTemplate to delete the launch template. +7. Do the following steps for the main profile and the one with bad credentials: + 1. Use IAM.RemoveRoleFromInstanceProfile to remove the role from the instance profile. + 2. Use IAM.DeleteInstanceProfile to delete the instance profile. + 3. Use IAM.ListAttachedRolePolicies to get all policies attached to the role. + 4. Use IAM.DetachRolePolicy and IAM.DeletePolicy to detach and delete each policy. Don't delete + AWS managed policies, which have ARNs that start with 'arn:aws:iam::aws'. + 5. Use IAM.DeleteRole to delete the role. +8. Use DynamoDB.DeleteTable to delete the recommendations table. + +Output: + +``` +This concludes the demo of how to build and manage a resilient service. +To keep things tidy and to avoid unwanted charges on your account, we can clean up all AWS resources +that were created for this demo. +Do you want to clean up all demo resources? (y/n) y +INFO: Deleted load balancer doc-example-resilience-lb. +INFO: Waiting for load balancer to be deleted... +INFO: Target group not yet released from load balancer, waiting... +INFO: Deleted load balancing target group doc-example-resilience-tg. +INFO: Stopping i-041efe367831f49b5. +INFO: Stopping i-08022d9ebb1041b55. +INFO: Stopping i-08763def19ccbcbd6. +INFO: Stopping i-0f2c8709826fe8bf6. +INFO: Some instances are still running. Waiting for them to stop... +INFO: Some instances are still running. Waiting for them to stop... +INFO: Some instances are still running. Waiting for them to stop... +INFO: Some instances are still running. Waiting for them to stop... +INFO: Some instances are still running. Waiting for them to stop... +INFO: Some instances are still running. Waiting for them to stop... +INFO: Some instances are still running. Waiting for them to stop... +INFO: Deleted EC2 Auto Scaling group doc-example-resilience-group. +INFO: Deleted instance profile doc-example-resilience-prof. +INFO: Detached and deleted policy doc-example-resilience-pol. +INFO: Deleted role doc-example-resilience-role. +INFO: Launch template doc-example-resilience-template deleted. +INFO: Deleted instance profile doc-example-resilience-bc-prof. +INFO: Detached and deleted policy doc-example-resilience-bc-pol. +INFO: Detached and deleted policy AmazonSSMManagedInstanceCore. +INFO: Deleted role doc-example-resilience-bc-role. +INFO: Deleting table doc-example-recommendation-service... +INFO: Table doc-example-recommendation-service deleted. +---------------------------------------------------------------------------------------- +Thanks for watching! +---------------------------------------------------------------------------------------- +``` + +--- + +## Hello Service + +Most services used in this example already have an MVP defined. The only new service +to add is Elastic Load Balancing. + +* ELBv2.DescribeLoadBalancers. List LoadBalancerName and DNSName of up to 10 load balancers. +There might not be any if the customer has not yet defined any. + +Output: + +``` +Hello, Elastic Load Balancing! Let's list some of your load balancers: + test-load-balancer: test-load-balancer-1921885376.us-west-2.elb.amazonaws.com +``` + +--- + +## Actions + +**Elastic Load Balancing V2** + +* `DescribeLoadBalancers` +* `CreateTargetGroup` +* `DescribeTargetGroups` +* `DeleteTargetGroup` +* `CreateLoadBalancer` +* `CreateListener` +* `DeleteLoadBalancer` +* `DescribeTargetHealth` + +**Amazon EC2 Auto Scaling** + +In addition to the actions implemented as part of the MVP for Amazon EC2 Auto Scaling: + +`AttachLoadBalancerTargetGroup` + +This example implements several actions that overlap with the actions for the MVP. +If you find that the actions for this example differ enough from the MVP actions, add +them as a second excerpt (with differentiating description) to the existing example. + +**IAM** + +* `CreateInstanceProfile` +* `DeleteInstanceProfile` + +**EC2** + +* `DescribeIamInstanceProfileAssociations` +* `ReplaceIamInstancePorfileAssociation` +* `CreateaLaunchTemplate` +* `DeleteLaunchTemplate` +* `DescribeVpcs` +* `DescribeSubnets` + +--- + +## Metadata + +**elastic-load-balancing-v2_metadata.yaml** + +* elastic-load-balancing-v2_DescribeLoadBalancers +* elastic-load-balancing-v2_CreateTargetGroup +* elastic-load-balancing-v2_DescribeTargetGroups +* elastic-load-balancing-v2_DeleteTargetGroup +* elastic-load-balancing-v2_CreateLoadBalancer +* elastic-load-balancing-v2_CreateListener +* elastic-load-balancing-v2_DeleteLoadBalancer +* elastic-load-balancing-v2_DescribeTargetHealth + +**auto-scaling_metadata.yaml** + +* auto-scaling_AttachLoadBalancerTargetGroup + +**iam_metadata.yaml** + +* iam_CreateInstanceProfile +* iam_DeleteInstanceProfile + +**ec2_metadata.yaml** + +* ec2_DescribeIamInstanceProfileAssociations +* ec2_ReplaceIamInstancePorfileAssociation +* ec2_CreateaLaunchTemplate +* ec2_DeleteLaunchTemplate +* ec2_DescribeVpcs +* ec2_DescribeSubnets + +--- + +# Other material + +If technical details are not what you seek, try these instead: + +* [High-level summary](README.md) +* [Community.aws: How to build and manage a resilient service using AWS SDKs](https://community.aws/posts/build-and-manage-a-resilient-service-using-aws-sdks) + diff --git a/workflows/resilient_service/resources/server.py b/workflows/resilient_service/resources/server.py index d1255ec92a5..8c404005888 100644 --- a/workflows/resilient_service/resources/server.py +++ b/workflows/resilient_service/resources/server.py @@ -31,6 +31,12 @@ def __init__(self, dynamodb_client, ssm_client, *args, **kwargs): self.ssm_client = ssm_client super().__init__(*args, **kwargs) + def _respond(self, status_code, payload): + self.send_response(status_code) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(bytes(json.dumps(payload), "utf-8")) + def do_GET(self): """ Responds to an HTTP GET request. This function uses Systems Manager parameters @@ -81,11 +87,7 @@ def do_GET(self): payload['Metadata'] = { 'InstanceId': ec2_metadata.instance_id, 'AvailabilityZone': ec2_metadata.availability_zone} - - self.send_response(200) - self.send_header('Content-type', 'application/json') - self.end_headers() - self.wfile.write(bytes(json.dumps(payload), "utf-8")) + self._respond(200, payload) elif self.path == '/healthcheck': response_code = 200 success = True @@ -102,11 +104,7 @@ def do_GET(self): print(f"Recommendation service health check error: {err}") response_code = 503 success = False - - self.send_response(response_code) - self.send_header('Content-type', 'application/json') - self.end_headers() - self.wfile.write(bytes(json.dumps({'success': success}), "utf-8")) + self._respond(response_code, {'success': success}) def run(): diff --git a/workflows/resilient_service/resources/server_startup_script.sh b/workflows/resilient_service/resources/server_startup_script.sh index 9a10de83d08..68db90cedb9 100644 --- a/workflows/resilient_service/resources/server_startup_script.sh +++ b/workflows/resilient_service/resources/server_startup_script.sh @@ -3,5 +3,5 @@ yum -y update sleep 30 # prevent "Error: Rpmdb changed underneath us" yum install python-pip -y python3 -m pip install boto3 ec2-metadata -wget -O server.py https://raw.githubusercontent.com/Laren-AWS/aws-doc-sdk-examples/resilient-architecture-python/workflows/resilient_service/resources/server.py +wget -O server.py https://raw.githubusercontent.com/awsdocs/aws-doc-sdk-examples/main/workflows/resilient_service/resources/server.py python3 server.py 80 us-west-2