Skip to content

Commit

Permalink
Tests for multi-container readiness and liveness probes (knative#15180)
Browse files Browse the repository at this point in the history
* Tests for multi-container readiness and liveness probes

* Use PORT instead of HEALTHCHECK_PORT for serving container

* Fixes

* Pass FORWARD_PORT in service_test

* Revert readiness image to only fail readiness probe

handleMain does not reflect the readiness state like it was before

* Use proxy to start failing sidecar

This reverts commit c6929e6e0e1190b15c990c7873b269abdae9609d.

* Test liveness failure

* Use /healthz/readiness in multicontainer test

* Address review feedback

* start package-private functions with lowercase
* use already existing const for "user-container"
* rename getPort to getServerPort to make it more specific (avoid
confusion with FORWARD_PORT)

* Fix lint
  • Loading branch information
mgencur committed May 9, 2024
1 parent 6bb5c4d commit be530b6
Show file tree
Hide file tree
Showing 13 changed files with 602 additions and 60 deletions.
3 changes: 0 additions & 3 deletions pkg/apis/serving/k8s_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,10 +441,7 @@ func validateContainers(ctx context.Context, containers []corev1.Container, volu
} else {
allNames.Insert(containers[i].Name)
}
// Probes are not allowed on other than serving container,
// ref: http://bit.ly/probes-condition
if len(containers[i].Ports) == 0 {
// Note, if we allow readiness/liveness checks on sidecars, we should pass in an *empty* port here, not the main container's port.
errs = errs.Also(validateSidecarContainer(WithinSidecarContainer(ctx), containers[i], volumes).ViaFieldIndex("containers", i))
} else {
errs = errs.Also(ValidateUserContainer(WithinUserContainer(ctx), containers[i], volumes, port).ViaFieldIndex("containers", i))
Expand Down
8 changes: 8 additions & 0 deletions pkg/testing/v1/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,14 @@ func WithReadinessProbe(p *corev1.Probe) ServiceOption {
}
}

// WithLivenessProbe sets the provided probe to be the liveness
// probe on the service.
func WithLivenessProbe(p *corev1.Probe) ServiceOption {
return func(s *v1.Service) {
s.Spec.Template.Spec.Containers[0].LivenessProbe = p
}
}

// MarkConfigurationNotReconciled calls the function of the same name on the Service's status.
func MarkConfigurationNotReconciled(service *v1.Service) {
service.Status.MarkConfigurationNotReconciled()
Expand Down
3 changes: 3 additions & 0 deletions test/conformance/api/v1/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,9 @@ func TestServiceCreateWithMultipleContainers(t *testing.T) {
Ports: []corev1.ContainerPort{{
ContainerPort: 8881,
}},
Env: []corev1.EnvVar{
{Name: "FORWARD_PORT", Value: "8882"},
},
}, {
Image: pkgtest.ImagePath(names.Sidecars[0]),
}}
Expand Down
181 changes: 181 additions & 0 deletions test/conformance/runtime/liveness_probe_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
//go:build e2e
// +build e2e

/*
Copyright 2024 The Knative Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package runtime

import (
"context"
"net/http"
"strconv"
"strings"
"testing"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
pkgtest "knative.dev/pkg/test"
"knative.dev/pkg/test/spoof"
"knative.dev/serving/pkg/apis/config"
resourcenames "knative.dev/serving/pkg/reconciler/revision/resources/names"
v1opts "knative.dev/serving/pkg/testing/v1"
"knative.dev/serving/test"
v1test "knative.dev/serving/test/v1"
)

const (
livenessPath = "/healthz/liveness"
livenessCounterPath = "/healthz/livenessCounter"
)

func TestLivenessWithFail(t *testing.T) {
t.Parallel()
if test.ServingFlags.DisableOptionalAPI {
t.Skip("Container.livenessProbe is not required by Knative Serving API Specification")
}
clients := test.Setup(t)

names := test.ResourceNames{
Service: test.ObjectNameForTest(t),
Image: test.Readiness,
}

test.EnsureTearDown(t, clients, &names)

t.Log("Creating a new Service")
resources, err := v1test.CreateServiceReady(t, clients, &names,
v1opts.WithLivenessProbe(
&corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: livenessPath,
Port: intstr.FromInt32(8080),
},
},
PeriodSeconds: 1,
FailureThreshold: 3,
}))

if err != nil {
t.Fatalf("Failed to create initial Service: %v: %v", names.Service, err)
}

// Wait for the liveness probe to be executed a few times before introducing a failure.
url := resources.Route.Status.URL.URL()
url.Path = livenessCounterPath
if _, err = pkgtest.WaitForEndpointState(
context.Background(),
clients.KubeClient,
t.Logf,
url,
atLeastNumLivenessChecks(t, 5),
"livenessIsReady",
test.ServingFlags.ResolvableDomain,
test.AddRootCAtoTransport(context.Background(), t.Logf, clients, test.ServingFlags.HTTPS),
); err != nil {
t.Fatalf("The endpoint for Route %s at %s didn't return success: %v", names.Route, url, err)
}

// Check that user-container hasn't been restarted yet.
deploymentName := resourcenames.Deployment(resources.Revision)
podList, err := clients.KubeClient.CoreV1().Pods(test.ServingFlags.TestNamespace).List(context.Background(), metav1.ListOptions{})
if err != nil {
t.Fatal("Unable to get pod list: ", err)
}
for i := range podList.Items {
pod := &podList.Items[i]
if strings.Contains(pod.Name, deploymentName) && userContainerRestarted(pod) {
t.Fatal("User container unexpectedly restarted")
}
}

t.Log("POST to /start-failing")
client, err := pkgtest.NewSpoofingClient(context.Background(),
clients.KubeClient,
t.Logf,
url.Hostname(),
test.ServingFlags.ResolvableDomain,
test.AddRootCAtoTransport(context.Background(), t.Logf, clients, test.ServingFlags.HTTPS))
if err != nil {
t.Fatalf("Failed to create spoofing client: %v", err)
}

url.Path = "/start-failing"
startFailing, err := http.NewRequest(http.MethodPost, url.String(), nil)
if err != nil {
t.Fatal(err)
}
if _, err := client.Do(startFailing); err != nil {
t.Fatalf("POST to /start-failing failed: %v", err)
}

// Wait for the user-container to be restarted.
if err := pkgtest.WaitForPodListState(
context.Background(),
clients.KubeClient,
func(p *corev1.PodList) (bool, error) {
for i := range p.Items {
pod := &p.Items[i]
if strings.Contains(pod.Name, deploymentName) && userContainerRestarted(pod) {
return true, nil
}
}
return false, nil
},
"WaitForContainerRestart", test.ServingFlags.TestNamespace); err != nil {
t.Fatalf("Failed waiting for user-container to be restarted: %v", err)
}

// After restart, verify the liveness probe passes a few times again.
url.Path = livenessCounterPath
if _, err = pkgtest.WaitForEndpointState(
context.Background(),
clients.KubeClient,
t.Logf,
url,
atLeastNumLivenessChecks(t, 5),
"livenessIsReady",
test.ServingFlags.ResolvableDomain,
test.AddRootCAtoTransport(context.Background(), t.Logf, clients, test.ServingFlags.HTTPS),
); err != nil {
t.Fatalf("The endpoint for Route %s at %s didn't return success: %v", names.Route, url, err)
}
}

func atLeastNumLivenessChecks(t *testing.T, expectedChecks int) spoof.ResponseChecker {
return func(resp *spoof.Response) (bool, error) {
actualChecks, err := strconv.Atoi(string(resp.Body))
// Some errors are temporarily expected after container restart.
if err != nil {
t.Logf("Unable to parse num checks, status: %d, body: %s", resp.StatusCode, string(resp.Body))
}
if actualChecks >= expectedChecks {
return true, nil
}
return false, nil
}
}

func userContainerRestarted(pod *corev1.Pod) bool {
for _, status := range pod.Status.ContainerStatuses {
if status.Name == config.DefaultUserContainerName && status.RestartCount > 0 {
return true
}
}
return false
}
31 changes: 17 additions & 14 deletions test/conformance/runtime/readiness_probe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,20 @@ import (
v1test "knative.dev/serving/test/v1"
)

// readinessPropagationTime is how long to poll to allow for readiness probe
// changes to propagate to ingresses/activator.
//
// When Readiness.PeriodSeconds=0 the underlying Pods use the K8s
// defaults for readiness. Those are:
// - Readiness.PeriodSeconds=10
// - Readiness.FailureThreshold=3
//
// Thus it takes at a mininum 30 seconds for the Pod to become
// unready. To account for this we bump max propagation time
const readinessPropagationTime = time.Minute
const (
// readinessPropagationTime is how long to poll to allow for readiness probe
// changes to propagate to ingresses/activator.
//
// When Readiness.PeriodSeconds=0 the underlying Pods use the K8s
// defaults for readiness. Those are:
// - Readiness.PeriodSeconds=10
// - Readiness.FailureThreshold=3
//
// Thus it takes at a mininum 30 seconds for the Pod to become
// unready. To account for this we bump max propagation time
readinessPropagationTime = time.Minute
readinessPath = "/healthz/readiness"
)

func TestProbeRuntime(t *testing.T) {
t.Parallel()
Expand All @@ -72,7 +75,7 @@ func TestProbeRuntime(t *testing.T) {
}},
handler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Path: readinessPath,
},
},
}, {
Expand Down Expand Up @@ -130,7 +133,7 @@ func TestProbeRuntime(t *testing.T) {

// Once the service reports ready we should immediately be able to curl it.
url := resources.Route.Status.URL.URL()
url.Path = "/healthz"
url.Path = readinessPath
if _, err = pkgtest.CheckEndpointState(
context.Background(),
clients.KubeClient,
Expand Down Expand Up @@ -225,7 +228,7 @@ func waitReadyThenStartFailing(t *testing.T, clients *test.Clients, names test.R
PeriodSeconds: probePeriod,
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
Path: readinessPath,
},
},
}))
Expand Down
18 changes: 18 additions & 0 deletions test/e2e/e2e.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,21 @@ func RevisionFromConfiguration(clients *test.Clients, configName string) (string
}
return "", fmt.Errorf("no valid revision name found in configuration %s", configName)
}

// PrivateServiceName returns the private service name for the given revision.
func PrivateServiceName(t *testing.T, clients *test.Clients, revision string) string {
var privateServiceName string

if err := wait.PollUntilContextTimeout(context.Background(), time.Second, 1*time.Minute, true, func(context.Context) (bool, error) {
sks, err := clients.NetworkingClient.ServerlessServices.Get(context.Background(), revision, metav1.GetOptions{})
if err != nil {
return false, nil
}
privateServiceName = sks.Status.PrivateServiceName
return privateServiceName != "", nil
}); err != nil {
t.Fatalf("Error retrieving sks %q: %v", revision, err)
}

return privateServiceName
}
21 changes: 2 additions & 19 deletions test/e2e/minscale_readiness_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func TestMinScale(t *testing.T) {
}

revName := latestRevisionName(t, clients, names.Config, "")
serviceName := privateServiceName(t, clients, revName)
serviceName := PrivateServiceName(t, clients, revName)

t.Log("Waiting for revision to scale to minScale before becoming ready")
if lr, err := waitForDesiredScale(clients, serviceName, gte(minScale)); err != nil {
Expand Down Expand Up @@ -116,7 +116,7 @@ func TestMinScale(t *testing.T) {
}

newRevName := latestRevisionName(t, clients, names.Config, revName)
newServiceName := privateServiceName(t, clients, newRevName)
newServiceName := PrivateServiceName(t, clients, newRevName)

t.Log("Waiting for new revision to scale to minScale after update")
if lr, err := waitForDesiredScale(clients, newServiceName, gte(minScale)); err != nil {
Expand Down Expand Up @@ -208,23 +208,6 @@ func latestRevisionName(t *testing.T, clients *test.Clients, configName, oldRevN
return config.Status.LatestCreatedRevisionName
}

func privateServiceName(t *testing.T, clients *test.Clients, revisionName string) string {
var privateServiceName string

if err := wait.PollUntilContextTimeout(context.Background(), time.Second, 1*time.Minute, true, func(context.Context) (bool, error) {
sks, err := clients.NetworkingClient.ServerlessServices.Get(context.Background(), revisionName, metav1.GetOptions{})
if err != nil {
return false, nil
}
privateServiceName = sks.Status.PrivateServiceName
return privateServiceName != "", nil
}); err != nil {
t.Fatalf("Error retrieving sks %q: %v", revisionName, err)
}

return privateServiceName
}

// waitForDesiredScale returns the last observed number of pods and/or error if the cond
// callback is never satisfied.
func waitForDesiredScale(clients *test.Clients, serviceName string, cond func(int) bool) (latestReady int, err error) {
Expand Down
3 changes: 3 additions & 0 deletions test/e2e/multicontainer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ func TestMultiContainer(t *testing.T) {
Ports: []corev1.ContainerPort{{
ContainerPort: 8881,
}},
Env: []corev1.EnvVar{
{Name: "FORWARD_PORT", Value: "8882"},
},
}, {
Image: pkgTest.ImagePath(names.Sidecars[0]),
}}
Expand Down
Loading

0 comments on commit be530b6

Please sign in to comment.