Skip to content

Commit

Permalink
Add instance name to DiscoverEC2 User Task failed instances list (#47712
Browse files Browse the repository at this point in the history
)

* Add instance name to DiscoverEC2 User Task failed instances list

* use stdlib slices and maps
  • Loading branch information
marcoandredinis committed Oct 28, 2024
1 parent b230518 commit 8aa3d4d
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 36 deletions.
2 changes: 1 addition & 1 deletion lib/srv/discovery/discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -989,10 +989,10 @@ func (s *Server) handleEC2RemoteInstallation(instances *server.EC2Instances) err
installerScript: req.InstallerScriptName(),
},
&usertasksv1.DiscoverEC2Instance{
// TODO(marco): add instance name
DiscoveryConfig: instances.DiscoveryConfig,
DiscoveryGroup: s.DiscoveryGroup,
InstanceId: instance.InstanceID,
Name: instance.InstanceName,
SyncTime: timestamppb.New(s.clock.Now()),
},
)
Expand Down
2 changes: 1 addition & 1 deletion lib/srv/discovery/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,12 +310,12 @@ func (s *Server) ReportEC2SSMInstallationResult(ctx context.Context, result *ser
installerScript: result.InstallerScript,
},
&usertasksv1.DiscoverEC2Instance{
// TODO(marco): add instance name
InvocationUrl: result.SSMRunEvent.InvocationURL,
DiscoveryConfig: result.DiscoveryConfig,
DiscoveryGroup: s.DiscoveryGroup,
SyncTime: timestamppb.New(result.SSMRunEvent.Time),
InstanceId: result.SSMRunEvent.InstanceID,
Name: result.InstanceName,
},
)

Expand Down
4 changes: 4 additions & 0 deletions lib/srv/server/ec2_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ type EC2Instances struct {
// discovered.
type EC2Instance struct {
InstanceID string
InstanceName string
Tags map[string]string
OriginalInstance ec2.Instance
}
Expand All @@ -92,6 +93,9 @@ func toEC2Instance(originalInst *ec2.Instance) EC2Instance {
for _, tag := range originalInst.Tags {
if key := aws.StringValue(tag.Key); key != "" {
inst.Tags[key] = aws.StringValue(tag.Value)
if key == "Name" {
inst.InstanceName = aws.StringValue(tag.Value)
}
}
}
return inst
Expand Down
82 changes: 78 additions & 4 deletions lib/srv/server/ec2_watcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,16 @@ func TestEC2Watcher(t *testing.T) {

present := ec2.Instance{
InstanceId: aws.String("instance-present"),
Tags: []*ec2.Tag{{
Key: aws.String("teleport"),
Value: aws.String("yes"),
}},
Tags: []*ec2.Tag{
{
Key: aws.String("teleport"),
Value: aws.String("yes"),
},
{
Key: aws.String("Name"),
Value: aws.String("Present"),
},
},
State: &ec2.InstanceState{
Name: aws.String(ec2.InstanceStateNameRunning),
},
Expand Down Expand Up @@ -360,3 +366,71 @@ func TestMakeEvents(t *testing.T) {
})
}
}

func TestToEC2Instances(t *testing.T) {
sampleInstance := &ec2.Instance{
InstanceId: aws.String("instance-001"),
Tags: []*ec2.Tag{
{
Key: aws.String("teleport"),
Value: aws.String("yes"),
},
{
Key: aws.String("Name"),
Value: aws.String("MyInstanceName"),
},
},
State: &ec2.InstanceState{
Name: aws.String(ec2.InstanceStateNameRunning),
},
}

sampleInstanceWithoutName := &ec2.Instance{
InstanceId: aws.String("instance-001"),
Tags: []*ec2.Tag{
{
Key: aws.String("teleport"),
Value: aws.String("yes"),
},
},
State: &ec2.InstanceState{
Name: aws.String(ec2.InstanceStateNameRunning),
},
}

for _, tt := range []struct {
name string
input []*ec2.Instance
expected []EC2Instance
}{
{
name: "with name",
input: []*ec2.Instance{sampleInstance},
expected: []EC2Instance{{
InstanceID: "instance-001",
Tags: map[string]string{
"Name": "MyInstanceName",
"teleport": "yes",
},
InstanceName: "MyInstanceName",
OriginalInstance: *sampleInstance,
}},
},
{
name: "without name",
input: []*ec2.Instance{sampleInstanceWithoutName},
expected: []EC2Instance{{
InstanceID: "instance-001",
Tags: map[string]string{
"teleport": "yes",
},
OriginalInstance: *sampleInstanceWithoutName,
}},
},
} {
t.Run(tt.name, func(t *testing.T) {
got := ToEC2Instances(tt.input)
require.Equal(t, tt.expected, got)
})
}
}
78 changes: 49 additions & 29 deletions lib/srv/server/ssm_install.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"errors"
"fmt"
"log/slog"
"maps"
"slices"
"strings"

"github.com/aws/aws-sdk-go/aws"
Expand Down Expand Up @@ -67,6 +69,9 @@ type SSMInstallationResult struct {
SSMDocumentName string
// InstallerScript is the Teleport Installer script name used to install Teleport into the instance.
InstallerScript string
// InstanceName is the Instance's name.
// Might be empty.
InstanceName string
}

// SSMInstaller handles running SSM commands that install Teleport on EC2 instances.
Expand Down Expand Up @@ -134,18 +139,18 @@ func NewSSMInstaller(cfg SSMInstallerConfig) (*SSMInstaller, error) {

// Run executes the SSM document and then blocks until the command has completed.
func (si *SSMInstaller) Run(ctx context.Context, req SSMRunRequest) error {
ids := make([]string, 0, len(req.Instances))
instances := make(map[string]string, len(req.Instances))
for _, inst := range req.Instances {
ids = append(ids, inst.InstanceID)
instances[inst.InstanceID] = inst.InstanceName
}

params := make(map[string][]*string)
for k, v := range req.Params {
params[k] = []*string{aws.String(v)}
}

validInstances := ids
instancesState, err := si.describeSSMAgentState(ctx, req, ids)
validInstances := instances
instancesState, err := si.describeSSMAgentState(ctx, req, instances)
switch {
case trace.IsAccessDenied(err):
// describeSSMAgentState uses `ssm:DescribeInstanceInformation` to gather all the instances information.
Expand All @@ -169,9 +174,10 @@ func (si *SSMInstaller) Run(ctx context.Context, req SSMRunRequest) error {
validInstances = instancesState.valid
}

validInstanceIDs := instanceIDsFrom(validInstances)
output, err := req.SSM.SendCommandWithContext(ctx, &ssm.SendCommandInput{
DocumentName: aws.String(req.DocumentName),
InstanceIds: aws.StringSlice(validInstances),
InstanceIds: aws.StringSlice(validInstanceIDs),
Parameters: params,
})
if err != nil {
Expand All @@ -190,7 +196,7 @@ func (si *SSMInstaller) Run(ctx context.Context, req SSMRunRequest) error {
delete(params, ParamSSHDConfigPath)
output, err = req.SSM.SendCommandWithContext(ctx, &ssm.SendCommandInput{
DocumentName: aws.String(req.DocumentName),
InstanceIds: aws.StringSlice(validInstances),
InstanceIds: aws.StringSlice(validInstanceIDs),
Parameters: params,
})
if err != nil {
Expand All @@ -200,16 +206,17 @@ func (si *SSMInstaller) Run(ctx context.Context, req SSMRunRequest) error {

g, ctx := errgroup.WithContext(ctx)
g.SetLimit(10)
for _, inst := range validInstances {
inst := inst
for instanceID, instanceName := range validInstances {
instanceID := instanceID
instanceName := instanceName
g.Go(func() error {
return trace.Wrap(si.checkCommand(ctx, req, output.Command.CommandId, &inst))
return trace.Wrap(si.checkCommand(ctx, req, output.Command.CommandId, &instanceID, instanceName))
})
}
return trace.Wrap(g.Wait())
}

func invalidSSMInstanceInstallationResult(req SSMRunRequest, instanceID, status, issueType string) *SSMInstallationResult {
func invalidSSMInstanceInstallationResult(req SSMRunRequest, instanceID, instanceName, status, issueType string) *SSMInstallationResult {
return &SSMInstallationResult{
SSMRunEvent: &apievents.SSMRun{
Metadata: apievents.Metadata{
Expand All @@ -228,13 +235,14 @@ func invalidSSMInstanceInstallationResult(req SSMRunRequest, instanceID, status,
IssueType: issueType,
SSMDocumentName: req.DocumentName,
InstallerScript: req.InstallerScriptName(),
InstanceName: instanceName,
}
}

func (si *SSMInstaller) emitInvalidInstanceEvents(ctx context.Context, req SSMRunRequest, instanceIDsState *instanceIDsSSMState) error {
var errs []error
for _, instanceID := range instanceIDsState.missing {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID,
for instanceID, instanceName := range instanceIDsState.missing {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID, instanceName,
"EC2 Instance is not registered in SSM. Make sure that the instance has AmazonSSMManagedInstanceCore policy assigned.",
usertasks.AutoDiscoverEC2IssueSSMInstanceNotRegistered,
)
Expand All @@ -243,8 +251,8 @@ func (si *SSMInstaller) emitInvalidInstanceEvents(ctx context.Context, req SSMRu
}
}

for _, instanceID := range instanceIDsState.connectionLost {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID,
for instanceID, instanceName := range instanceIDsState.connectionLost {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID, instanceName,
"SSM Agent in EC2 Instance is not connecting to SSM Service. Restart or reinstall the SSM service. See https://docs.aws.amazon.com/systems-manager/latest/userguide/ami-preinstalled-agent.html#verify-ssm-agent-status for more details.",
usertasks.AutoDiscoverEC2IssueSSMInstanceConnectionLost,
)
Expand All @@ -253,8 +261,8 @@ func (si *SSMInstaller) emitInvalidInstanceEvents(ctx context.Context, req SSMRu
}
}

for _, instanceID := range instanceIDsState.unsupportedOS {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID,
for instanceID, instanceName := range instanceIDsState.unsupportedOS {
installationResult := invalidSSMInstanceInstallationResult(req, instanceID, instanceName,
"EC2 instance is running an unsupported Operating System. Only Linux is supported.",
usertasks.AutoDiscoverEC2IssueSSMInstanceUnsupportedOS,
)
Expand All @@ -268,19 +276,29 @@ func (si *SSMInstaller) emitInvalidInstanceEvents(ctx context.Context, req SSMRu

// instanceIDsSSMState contains a list of EC2 Instance IDs for a given state.
type instanceIDsSSMState struct {
valid []string
missing []string
connectionLost []string
unsupportedOS []string
valid map[string]string
missing map[string]string
connectionLost map[string]string
unsupportedOS map[string]string
}

func instanceIDsFrom(m map[string]string) []string {
return slices.Collect(maps.Keys(m))
}

// describeSSMAgentState returns the instanceIDsSSMState for all the instances.
func (si *SSMInstaller) describeSSMAgentState(ctx context.Context, req SSMRunRequest, allInstanceIDs []string) (*instanceIDsSSMState, error) {
ret := &instanceIDsSSMState{}
func (si *SSMInstaller) describeSSMAgentState(ctx context.Context, req SSMRunRequest, allInstances map[string]string) (*instanceIDsSSMState, error) {
ret := &instanceIDsSSMState{
valid: make(map[string]string),
missing: make(map[string]string),
connectionLost: make(map[string]string),
unsupportedOS: make(map[string]string),
}
instanceIDs := instanceIDsFrom(allInstances)

ssmInstancesInfo, err := req.SSM.DescribeInstanceInformationWithContext(ctx, &ssm.DescribeInstanceInformationInput{
Filters: []*ssm.InstanceInformationStringFilter{
{Key: aws.String(ssm.InstanceInformationFilterKeyInstanceIds), Values: aws.StringSlice(allInstanceIDs)},
{Key: aws.String(ssm.InstanceInformationFilterKeyInstanceIds), Values: aws.StringSlice(instanceIDs)},
},
MaxResults: aws.Int64(awsEC2APIChunkSize),
})
Expand All @@ -294,24 +312,24 @@ func (si *SSMInstaller) describeSSMAgentState(ctx context.Context, req SSMRunReq
instanceStateByInstanceID[aws.StringValue(instanceState.InstanceId)] = instanceState
}

for _, instanceID := range allInstanceIDs {
for instanceID, instanceName := range allInstances {
instanceState, found := instanceStateByInstanceID[instanceID]
if !found {
ret.missing = append(ret.missing, instanceID)
ret.missing[instanceID] = instanceName
continue
}

if aws.StringValue(instanceState.PingStatus) == ssm.PingStatusConnectionLost {
ret.connectionLost = append(ret.connectionLost, instanceID)
ret.connectionLost[instanceID] = instanceName
continue
}

if aws.StringValue(instanceState.PlatformType) != ssm.PlatformTypeLinux {
ret.unsupportedOS = append(ret.unsupportedOS, instanceID)
ret.unsupportedOS[instanceID] = instanceName
continue
}

ret.valid = append(ret.valid, instanceID)
ret.valid[instanceID] = instanceName
}

return ret, nil
Expand All @@ -330,7 +348,7 @@ func skipAWSWaitErr(err error) error {
return trace.Wrap(err)
}

func (si *SSMInstaller) checkCommand(ctx context.Context, req SSMRunRequest, commandID, instanceID *string) error {
func (si *SSMInstaller) checkCommand(ctx context.Context, req SSMRunRequest, commandID, instanceID *string, instanceName string) error {
err := req.SSM.WaitUntilCommandExecutedWithContext(ctx, &ssm.GetCommandInvocationInput{
CommandId: commandID,
InstanceId: instanceID,
Expand Down Expand Up @@ -377,6 +395,7 @@ func (si *SSMInstaller) checkCommand(ctx context.Context, req SSMRunRequest, com
IssueType: usertasks.AutoDiscoverEC2IssueSSMScriptFailure,
SSMDocumentName: req.DocumentName,
InstallerScript: req.InstallerScriptName(),
InstanceName: instanceName,
}))
}

Expand All @@ -393,6 +412,7 @@ func (si *SSMInstaller) checkCommand(ctx context.Context, req SSMRunRequest, com
IssueType: usertasks.AutoDiscoverEC2IssueSSMScriptFailure,
SSMDocumentName: req.DocumentName,
InstallerScript: req.InstallerScriptName(),
InstanceName: instanceName,
}))
}
}
Expand Down
3 changes: 2 additions & 1 deletion lib/srv/server/ssm_install_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ func TestSSMInstaller(t *testing.T) {
name: "ssm run was successful",
req: SSMRunRequest{
Instances: []EC2Instance{
{InstanceID: "instance-id-1"},
{InstanceID: "instance-id-1", InstanceName: "my-instance-name"},
},
DocumentName: document,
Params: map[string]string{"token": "abcdefg"},
Expand Down Expand Up @@ -146,6 +146,7 @@ func TestSSMInstaller(t *testing.T) {
},
IssueType: "ec2-ssm-script-failure",
SSMDocumentName: "ssmdocument",
InstanceName: "my-instance-name",
}},
},
{
Expand Down

0 comments on commit 8aa3d4d

Please sign in to comment.