Skip to content

Commit

Permalink
Scale up correct number of instances (#7416)
Browse files Browse the repository at this point in the history
* Add helper function to compute instances to scale

* Standardize types to int where possible, finish function to compute instances to scale

* Fix bad rebase

* Return 0 instead of 1

* Fix generate capacity map
  • Loading branch information
Mauricio Araujo authored Sep 30, 2022
1 parent 76e0616 commit ae1db4d
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func TestMandelboxAssign(t *testing.T) {

var tests = []struct {
name string
capacity int64
capacity int
regions []string
clientSHA, want string
shouldBeAssigned bool
Expand Down Expand Up @@ -72,7 +72,7 @@ func TestMandelboxAssign(t *testing.T) {
Region: "us-east-1",
IPAddress: "1.1.1.1/24",
ClientSHA: "test-sha",
RemainingCapacity: int64(tt.capacity),
RemainingCapacity: tt.capacity,
},
{
ID: "test-assign-instance-2",
Expand All @@ -83,7 +83,7 @@ func TestMandelboxAssign(t *testing.T) {
Region: "us-west-1",
IPAddress: "1.1.1.1/24",
ClientSHA: "test-sha",
RemainingCapacity: int64(tt.capacity),
RemainingCapacity: tt.capacity,
},
{
ID: "test-assign-instance-3",
Expand All @@ -94,7 +94,7 @@ func TestMandelboxAssign(t *testing.T) {
Region: "ap-south-1",
IPAddress: "1.1.1.1/24",
ClientSHA: "test-sha",
RemainingCapacity: int64(tt.capacity),
RemainingCapacity: tt.capacity,
},
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ var (
// defaultInstanceBuffer is the number of instances with space to run
// mandelboxes. This value is used when deciding how many instances to
// scale up if we don't have enough capacity.
defaultInstanceBuffer = 1
defaultInstanceBuffer int = 1
// desiredFreeMandelboxesPerRegion is the number of free mandelboxes we always
// want available in a region. This value is set per-region and it represents
// the free mandelboxes we want on each.
Expand Down Expand Up @@ -90,17 +90,17 @@ var (
// generateInstanceCapacityMap uses the global instanceTypeToGPUNum and instanceTypeToVCPUNum maps
// to generate the maximum mandelbox capacity for each instance type in the intersection
// of their keys.
func generateInstanceCapacityMap(instanceToGPUMap, instanceToVCPUMap map[string]int) map[string]int64 {
func generateInstanceCapacityMap(instanceToGPUMap, instanceToVCPUMap map[string]int) map[string]int {
// Initialize the instance capacity map
capacityMap := map[string]int64{}
capacityMap := map[string]int{}
for instanceType, gpuNum := range instanceToGPUMap {
// Only populate for instances that are in both maps
vcpuNum, ok := instanceToVCPUMap[instanceType]
if !ok {
continue
}
min := utils.Min(gpuNum*constants.MaxMandelboxesPerGPU, vcpuNum/VCPUsPerMandelbox)
capacityMap[instanceType] = int64(min)
capacityMap[instanceType] = min
}
return capacityMap
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func TestGenerateInstanceCapacityMap(t *testing.T) {
name string
gpuMap map[string]int
vcpuMap map[string]int
expected map[string]int64
expected map[string]int
}{
{
name: "gpu limited",
Expand All @@ -26,7 +26,7 @@ func TestGenerateInstanceCapacityMap(t *testing.T) {
"a": 1000,
"b": 2000,
},
expected: map[string]int64{
expected: map[string]int{
"a": constants.MaxMandelboxesPerGPU,
"b": 2 * constants.MaxMandelboxesPerGPU,
},
Expand All @@ -41,7 +41,7 @@ func TestGenerateInstanceCapacityMap(t *testing.T) {
"a": 4,
"b": 8,
},
expected: map[string]int64{
expected: map[string]int{
"a": 4 / VCPUsPerMandelbox,
"b": 8 / VCPUsPerMandelbox,
},
Expand All @@ -56,7 +56,7 @@ func TestGenerateInstanceCapacityMap(t *testing.T) {
"c": 4,
"d": 8,
},
expected: map[string]int64{},
expected: map[string]int{},
},
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,27 @@ func (s *DefaultScalingAlgorithm) UpgradeImage(scalingCtx context.Context, event
fakeMandelboxesForDb []subscriptions.Mandelbox
)

// Compute the size of the buffer we want to start. We pass in a current capacity of 0
// because we want a full buffer of instances for the new version.
// TODO: change to a different instance type once we support more cloud providers/types
instancesToScale := helpers.ComputeInstancesToScale(desiredFreeMandelboxesPerRegion[event.Region], 0, instanceCapacity["g4dn.2xlarge"])

// If we are running on a local or testing environment, spinup "fake" instances to avoid
// creating them on a cloud provider. In any other case we call the host handler to create
// them on the cloud provider for us.
if metadata.IsLocalEnv() && !metadata.IsRunningInCI() {
logger.Infow("Running on localdev so scaling up fake instances.", contextFields)
instancesForDb, fakeMandelboxesForDb = helpers.SpinUpFakeInstances(defaultInstanceBuffer, newImage.ImageID, event.Region)
} else {
instancesForDb, err = s.Host.SpinUpInstances(scalingCtx, int32(defaultInstanceBuffer), maxWaitTimeReady, newImage)
instancesForDb, err = s.Host.SpinUpInstances(scalingCtx, int32(instancesToScale), maxWaitTimeReady, newImage)
if err != nil {
return utils.MakeError("failed to create instance buffer: %s", err)
}

// Set the instance capacity field and add to the slice
// that will be passed to the database.
for i := 0; i < len(instancesForDb); i++ {
instancesForDb[i].RemainingCapacity = int64(instanceCapacity[instancesForDb[i].Type])
instancesForDb[i].RemainingCapacity = instanceCapacity[instancesForDb[i].Type]
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (s *DefaultScalingAlgorithm) ScaleDownIfNecessary(scalingCtx context.Contex
// Extra capacity is considered once we have a full instance worth of capacity
// more than the desired free mandelboxes. TODO: Set the instance type once we
// have support for more instance types. For now default to `g4dn.2xlarge`.
extraCapacity := int64(desiredFreeMandelboxesPerRegion[event.Region]) + (int64(defaultInstanceBuffer) * instanceCapacity["g4dn.2xlarge"])
extraCapacity := desiredFreeMandelboxesPerRegion[event.Region] + (defaultInstanceBuffer * instanceCapacity["g4dn.2xlarge"])

// Acquire lock on protected from scale down map
s.protectedMapLock.Lock()
Expand Down Expand Up @@ -205,9 +205,6 @@ func (s *DefaultScalingAlgorithm) ScaleUpIfNecessary(instancesToScale int, scali
logger.Infow("Starting scale up action.", contextFields)
defer logger.Infow("Finished scale up action.", contextFields)

// Try scale up in given region
instanceNum := int32(instancesToScale)

var (
// Slice that will hold the instances and pass them to the dbclient
instancesForDb []subscriptions.Instance
Expand All @@ -224,7 +221,7 @@ func (s *DefaultScalingAlgorithm) ScaleUpIfNecessary(instancesToScale int, scali
instancesForDb, fakeMandelboxesForDb = helpers.SpinUpFakeInstances(instancesToScale, image.ImageID, event.Region)
} else {
// Call the host handler to handle the instance spinup in the cloud provider
instancesForDb, err = s.Host.SpinUpInstances(scalingCtx, instanceNum, maxWaitTimeReady, image)
instancesForDb, err = s.Host.SpinUpInstances(scalingCtx, int32(instancesToScale), maxWaitTimeReady, image)
if err != nil {
return utils.MakeError("failed to spin up instances: %s", err)
}
Expand All @@ -235,7 +232,7 @@ func (s *DefaultScalingAlgorithm) ScaleUpIfNecessary(instancesToScale int, scali
}

for i := 0; i < len(instancesForDb); i++ {
instancesForDb[i].RemainingCapacity = int64(instanceCapacity[instancesForDb[i].Type])
instancesForDb[i].RemainingCapacity = instanceCapacity[instancesForDb[i].Type]
logger.Infow(utils.Sprintf("Created tagged instance with ID %s", instancesForDb[i].ID), contextFields)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,15 @@ func (s *DefaultScalingAlgorithm) VerifyCapacity(scalingCtx context.Context, eve

// We consider the expected mandelbox capacity (active instances + starting instances)
// to account for warmup time and so that we don't scale up unnecesary instances.
if mandelboxCapacity < int64(desiredFreeMandelboxesPerRegion[event.Region]) {
if mandelboxCapacity < desiredFreeMandelboxesPerRegion[event.Region] {

// TODO: Change to a different instance type once we support more types or cloud providers
instancesToScale := helpers.ComputeInstancesToScale(desiredFreeMandelboxesPerRegion[event.Region], mandelboxCapacity, instanceCapacity["g4dn.2xlarge"])

logger.Infow(utils.Sprintf("Current mandelbox capacity of %d is less than desired %d. Scaling up %d instances to satisfy minimum desired capacity.",
mandelboxCapacity, desiredFreeMandelboxesPerRegion[event.Region], defaultInstanceBuffer), contextFields)
err = s.ScaleUpIfNecessary(defaultInstanceBuffer, scalingCtx, event, latestImage)
mandelboxCapacity, desiredFreeMandelboxesPerRegion[event.Region], instancesToScale), contextFields)

err = s.ScaleUpIfNecessary(instancesToScale, scalingCtx, event, latestImage)
if err != nil {
return err
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,33 @@ some functions that make it easier to test the scaling service locally.
package helpers

import (
"math"

"github.com/whisthq/whist/backend/services/subscriptions"
)

// ComputeRealMandelboxCapacity is a helper function to compute the real mandelbox capacity.
// Real capcity is the number of free mandelboxes available on instances with active status.
func ComputeRealMandelboxCapacity(imageID string, activeInstances []subscriptions.Instance) int64 {
var (
realMandelboxCapacity int
)
func ComputeRealMandelboxCapacity(imageID string, activeInstances []subscriptions.Instance) int {
var realMandelboxCapacity int

// Loop over active instances (status ACTIVE), only consider the ones with the current image
for _, instance := range activeInstances {
if instance.ImageID == imageID {
realMandelboxCapacity += int(instance.RemainingCapacity)
realMandelboxCapacity += instance.RemainingCapacity
}
}

return int64(realMandelboxCapacity)
return realMandelboxCapacity
}

// ComputeExpectedMandelboxCapacity is a helper function to compute the expected mandelbox capacity.
// Expected capacity is the number of free mandelboxes available on instances with active status and
// in starting instances.
func ComputeExpectedMandelboxCapacity(imageID string, activeInstances []subscriptions.Instance, startingInstances []subscriptions.Instance) int64 {
func ComputeExpectedMandelboxCapacity(imageID string, activeInstances []subscriptions.Instance, startingInstances []subscriptions.Instance) int {
var (
realMandelboxCapacity int64
expectedMandelboxCapacity int64
realMandelboxCapacity int
expectedMandelboxCapacity int
)

// Get the capacity from active instances
Expand All @@ -49,5 +49,21 @@ func ComputeExpectedMandelboxCapacity(imageID string, activeInstances []subscrip
}

expectedMandelboxCapacity += realMandelboxCapacity
return int64(expectedMandelboxCapacity)
return expectedMandelboxCapacity
}

// ComputeInstancesToScale computes the number of instances we want to scale, according to the current number of desired mandelboxes.
// desiredMandelboxes: the number of free mandelboxes we want at all times. Set in the config db.
// currentCapacity: the current free mandelboxes in the database. Obtained by querying database and computing capacity.
// instanceCapacity: the mandelbox capacity for the specific instance type in use (e.g. a "g4dn.2xlarge" instance can run 2 mandelboxes).
func ComputeInstancesToScale(desiredMandelboxes int, currentCapacity int, instanceCapacity int) int {
// This will never happen realistically, but we should try to handle all cases.
if instanceCapacity <= 0 {
return 0
}

desiredCapacity := desiredMandelboxes - currentCapacity
instancesToScale := math.Ceil(float64(desiredCapacity) / float64(instanceCapacity))

return int(instancesToScale)
}
4 changes: 2 additions & 2 deletions backend/services/subscriptions/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ type Instance struct {
ClientSHA string `json:"client_sha"` // Commit hash
IPAddress string `json:"ip_addr"` // Public IPv4 address
Type string `json:"instance_type"` // Instance type
RemainingCapacity int64 `json:"remaining_capacity"` // Number of mandelboxes that can be allocated
RemainingCapacity int `json:"remaining_capacity"` // Number of mandelboxes that can be allocated
Status string `json:"status"` // Current status of the instance
CreatedAt time.Time `json:"created_at"` // Timestamp when the instance was creates
UpdatedAt time.Time `json:"updated_at"` // Timestamp when the instance was last updated
Expand Down Expand Up @@ -215,7 +215,7 @@ func ToInstances(dbInstances []WhistInstance) []Instance {
ClientSHA: string(instance.ClientSHA),
IPAddress: instance.IPAddress,
Type: string(instance.Type),
RemainingCapacity: int64(instance.RemainingCapacity),
RemainingCapacity: int(instance.RemainingCapacity),
Status: string(instance.Status),
CreatedAt: instance.CreatedAt,
UpdatedAt: instance.UpdatedAt,
Expand Down

0 comments on commit ae1db4d

Please sign in to comment.