Skip to content

Commit

Permalink
http2: fix reported protocol error from graceful upstream close (#36205)
Browse files Browse the repository at this point in the history
Risk Level: Medium
Testing: Added new tests
Release Notes: added
Runtime guard:
envoy.reloadable_features.http2_no_protocol_error_upon_clean_close
Fixes #21522

---------

Signed-off-by: Greg Greenway <ggreenway@apple.com>
  • Loading branch information
ggreenway committed Sep 20, 2024
1 parent a65a5b6 commit bb22882
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 5 deletions.
6 changes: 6 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,12 @@ bug_fixes:
change: |
RBAC will now allow stat prefixes configured in per-route config to override the base config's
stat prefix.
- area: http2
change: |
Fixed bug where an upstream that sent a GOAWAY and gracefully closed a connection would result in an increment of
the cluster stat ``upstream_cx_protocol_error`` and setting the ``UpstreamProtocolError`` response flag. This behavior
can be reverted by setting the runtime guard ``envoy.reloadable_features.http2_no_protocol_error_upon_clean_close``
to false.
- area: http3
change: |
Fixed a bug where an empty trailers block could be sent. This would occur if a filter removed
Expand Down
13 changes: 9 additions & 4 deletions source/common/http/codec_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,15 @@ void CodecClient::onData(Buffer::Instance& data) {
if (!status.ok()) {
ENVOY_CONN_LOG(debug, "Error dispatching received data: {}", *connection_, status.message());

// Don't count 408 responses where we have no active requests as protocol errors
if (!isPrematureResponseError(status) ||
(!active_requests_.empty() ||
getPrematureResponseHttpCode(status) != Code::RequestTimeout)) {
// Don't count 408 responses where we have no active requests as protocol errors.
// Don't count graceful GOAWAY closes.
const bool not_408 =
!isPrematureResponseError(status) ||
(!active_requests_.empty() || getPrematureResponseHttpCode(status) != Code::RequestTimeout);
const bool is_goaway = isGoAwayGracefulCloseError(status);
if (not_408 &&
(!is_goaway || !Runtime::runtimeFeatureEnabled(
"envoy.reloadable_features.http2_no_protocol_error_upon_clean_close"))) {
host_->cluster().trafficStats()->upstream_cx_protocol_error_.inc();
protocol_error_ = true;
}
Expand Down
6 changes: 5 additions & 1 deletion source/common/http/http2/codec_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1704,7 +1704,11 @@ int ConnectionImpl::setAndCheckCodecCallbackStatus(Status&& status) {
// error statuses are silently discarded.
codec_callback_status_.Update(std::move(status));
if (codec_callback_status_.ok() && connection_.state() != Network::Connection::State::Open) {
codec_callback_status_ = codecProtocolError("Connection was closed while dispatching frames");
if (!active_streams_.empty() || !raised_goaway_) {
codec_callback_status_ = codecProtocolError("Connection was closed while dispatching frames");
} else {
codec_callback_status_ = goAwayGracefulCloseError();
}
}

return codec_callback_status_.ok() ? 0 : ERR_CALLBACK_FAILURE;
Expand Down
12 changes: 12 additions & 0 deletions source/common/http/status.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ absl::string_view statusCodeToString(StatusCode code) {
return "InboundFramesWithEmptyPayloadError";
case StatusCode::EnvoyOverloadError:
return "EnvoyOverloadError";
case StatusCode::GoAwayGracefulClose:
return "GoAwayGracefulClose";
}
return "";
}
Expand Down Expand Up @@ -124,6 +126,12 @@ Status envoyOverloadError(absl::string_view message) {
return status;
}

Status goAwayGracefulCloseError() {
absl::Status status(absl::StatusCode::kInternal, {});
storePayload(status, EnvoyStatusPayload(StatusCode::GoAwayGracefulClose));
return status;
}

// Methods for checking and extracting error information
StatusCode getStatusCode(const Status& status) {
return status.ok() ? StatusCode::Ok : getPayload(status).status_code_;
Expand Down Expand Up @@ -160,5 +168,9 @@ bool isEnvoyOverloadError(const Status& status) {
return getStatusCode(status) == StatusCode::EnvoyOverloadError;
}

bool isGoAwayGracefulCloseError(const Status& status) {
return getStatusCode(status) == StatusCode::GoAwayGracefulClose;
}

} // namespace Http
} // namespace Envoy
7 changes: 7 additions & 0 deletions source/common/http/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ enum class StatusCode : int {
* Indicates that Envoy is overloaded and may shed load.
*/
EnvoyOverloadError = 6,

/**
* Indicates the connection was gracefully closed due to GOAWAY.
*/
GoAwayGracefulClose = 7,
};

using Status = absl::Status;
Expand All @@ -100,6 +105,7 @@ Status prematureResponseError(absl::string_view message, Http::Code http_code);
Status codecClientError(absl::string_view message);
Status inboundFramesWithEmptyPayloadError();
Status envoyOverloadError(absl::string_view message);
Status goAwayGracefulCloseError();

/**
* Returns Envoy::StatusCode of the given status object.
Expand All @@ -116,6 +122,7 @@ ABSL_MUST_USE_RESULT bool isPrematureResponseError(const Status& status);
ABSL_MUST_USE_RESULT bool isCodecClientError(const Status& status);
ABSL_MUST_USE_RESULT bool isInboundFramesWithEmptyPayloadError(const Status& status);
ABSL_MUST_USE_RESULT bool isEnvoyOverloadError(const Status& status);
ABSL_MUST_USE_RESULT bool isGoAwayGracefulCloseError(const Status& status);

/**
* Returns Http::Code value of the PrematureResponseError status.
Expand Down
1 change: 1 addition & 0 deletions source/common/runtime/runtime_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ RUNTIME_GUARD(envoy_reloadable_features_http1_balsa_disallow_lone_cr_in_chunk_ex
// Ignore the automated "remove this flag" issue: we should keep this for 1 year.
RUNTIME_GUARD(envoy_reloadable_features_http1_use_balsa_parser);
RUNTIME_GUARD(envoy_reloadable_features_http2_discard_host_header);
RUNTIME_GUARD(envoy_reloadable_features_http2_no_protocol_error_upon_clean_close);
// Ignore the automated "remove this flag" issue: we should keep this for 1 year.
RUNTIME_GUARD(envoy_reloadable_features_http2_use_visitor_for_data);
RUNTIME_GUARD(envoy_reloadable_features_http3_happy_eyeballs);
Expand Down
112 changes: 112 additions & 0 deletions test/integration/multiplexed_integration_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2147,6 +2147,9 @@ TEST_P(Http2FrameIntegrationTest, AdjustUpstreamSettingsMaxStreams) {
}

TEST_P(Http2FrameIntegrationTest, UpstreamSettingsMaxStreamsAfterGoAway) {
config_helper_.addRuntimeOverride(
"envoy.reloadable_features.http2_no_protocol_error_upon_clean_close", "true");

beginSession();
FakeRawConnectionPtr fake_upstream_connection;

Expand All @@ -2170,6 +2173,115 @@ TEST_P(Http2FrameIntegrationTest, UpstreamSettingsMaxStreamsAfterGoAway) {
std::string(settings_max_connections_frame))));

test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_close_notify", 1);
EXPECT_EQ(0, test_server_->counter("cluster.cluster_0.upstream_cx_protocol_error")->value());

// Cleanup.
tcp_client_->close();
}

TEST_P(Http2FrameIntegrationTest, UpstreamGoAway) {
config_helper_.addRuntimeOverride(
"envoy.reloadable_features.http2_no_protocol_error_upon_clean_close", "true");

beginSession();
FakeRawConnectionPtr fake_upstream_connection;

const uint32_t client_stream_idx = 1;
// Start a request and wait for it to reach the upstream.
sendFrame(Http2Frame::makePostRequest(client_stream_idx, "host", "/path/to/long/url"));
ASSERT_TRUE(fake_upstreams_[0]->waitForRawConnection(fake_upstream_connection));
const Http2Frame settings_frame = Http2Frame::makeEmptySettingsFrame();
ASSERT_TRUE(fake_upstream_connection->write(std::string(settings_frame)));
test_server_->waitForGaugeEq("cluster.cluster_0.upstream_rq_active", 1);

const Http2Frame rst_stream =
Http2Frame::makeResetStreamFrame(client_stream_idx, Http2Frame::ErrorCode::FlowControlError);
const Http2Frame go_away_frame =
Http2Frame::makeEmptyGoAwayFrame(12345, Http2Frame::ErrorCode::NoError);
ASSERT_TRUE(fake_upstream_connection->write(
absl::StrCat(std::string(rst_stream), std::string(go_away_frame))));
ASSERT_TRUE(fake_upstream_connection->close());

test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_close_notify", 1);
EXPECT_EQ(0, test_server_->counter("cluster.cluster_0.upstream_cx_protocol_error")->value());

// Cleanup.
tcp_client_->close();
}

TEST_P(Http2FrameIntegrationTest, UpstreamGoAwayLegacy) {
config_helper_.addRuntimeOverride(
"envoy.reloadable_features.http2_no_protocol_error_upon_clean_close", "false");

beginSession();
FakeRawConnectionPtr fake_upstream_connection;

const uint32_t client_stream_idx = 1;
// Start a request and wait for it to reach the upstream.
sendFrame(Http2Frame::makePostRequest(client_stream_idx, "host", "/path/to/long/url"));
ASSERT_TRUE(fake_upstreams_[0]->waitForRawConnection(fake_upstream_connection));
const Http2Frame settings_frame = Http2Frame::makeEmptySettingsFrame();
ASSERT_TRUE(fake_upstream_connection->write(std::string(settings_frame)));
test_server_->waitForGaugeEq("cluster.cluster_0.upstream_rq_active", 1);

const Http2Frame rst_stream =
Http2Frame::makeResetStreamFrame(client_stream_idx, Http2Frame::ErrorCode::FlowControlError);
const Http2Frame go_away_frame =
Http2Frame::makeEmptyGoAwayFrame(12345, Http2Frame::ErrorCode::NoError);
ASSERT_TRUE(fake_upstream_connection->write(
absl::StrCat(std::string(rst_stream), std::string(go_away_frame))));
ASSERT_TRUE(fake_upstream_connection->close());

test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_close_notify", 1);
test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_protocol_error", 1);

// Cleanup.
tcp_client_->close();
}

// Test that sending an invalid frame results in `upstream_cx_protocol_error`.
TEST_P(Http2FrameIntegrationTest, UpstreamProtocolError) {
config_helper_.addRuntimeOverride(
"envoy.reloadable_features.http2_no_protocol_error_upon_clean_close", "true");

beginSession();
FakeRawConnectionPtr fake_upstream_connection;

const uint32_t client_stream_idx = 1;
// Start a request and wait for it to reach the upstream.
sendFrame(Http2Frame::makePostRequest(client_stream_idx, "host", "/path/to/long/url"));
ASSERT_TRUE(fake_upstreams_[0]->waitForRawConnection(fake_upstream_connection));
const Http2Frame settings_frame = Http2Frame::makeEmptySettingsFrame();
ASSERT_TRUE(fake_upstream_connection->write(std::string(settings_frame)));
test_server_->waitForGaugeEq("cluster.cluster_0.upstream_rq_active", 1);

ASSERT_TRUE(fake_upstream_connection->write("abcdefg this is not a valid h2 frame"));

test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_protocol_error", 1);

// Cleanup.
tcp_client_->close();
}

// Test that sending an invalid frame results in `upstream_cx_protocol_error`.
TEST_P(Http2FrameIntegrationTest, UpstreamProtocolErrorLegacy) {
config_helper_.addRuntimeOverride(
"envoy.reloadable_features.http2_no_protocol_error_upon_clean_close", "false");

beginSession();
FakeRawConnectionPtr fake_upstream_connection;

const uint32_t client_stream_idx = 1;
// Start a request and wait for it to reach the upstream.
sendFrame(Http2Frame::makePostRequest(client_stream_idx, "host", "/path/to/long/url"));
ASSERT_TRUE(fake_upstreams_[0]->waitForRawConnection(fake_upstream_connection));
const Http2Frame settings_frame = Http2Frame::makeEmptySettingsFrame();
ASSERT_TRUE(fake_upstream_connection->write(std::string(settings_frame)));
test_server_->waitForGaugeEq("cluster.cluster_0.upstream_rq_active", 1);

ASSERT_TRUE(fake_upstream_connection->write("abcdefg this is not a valid h2 frame"));

test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_protocol_error", 1);

// Cleanup.
tcp_client_->close();
Expand Down

0 comments on commit bb22882

Please sign in to comment.