From 38cceb37ea8a2ebcd8f93f1dbbadc44f7bd90f36 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Mon, 12 Aug 2024 20:41:01 +0100 Subject: [PATCH 1/6] add tests for accept-charset --- .../directives/MarshallingDirectivesSpec.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala index 641377876..f17353ffd 100644 --- a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala +++ b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala @@ -275,4 +275,19 @@ class MarshallingDirectivesSpec extends RoutingSpec with Inside { } } } + + "The marshalling infrastructure for text" should { + val foo = "Hällö" + "render text with UTF-8 encoding if no `Accept-Charset` request header is present" in { + Get() ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/plain`, `UTF-8`), foo) + } + } + "rrender text with UTF-8 encoding if an `Accept-Charset` request header requests a non-UTF-8 encoding" in { + Get() ~> `Accept-Charset`(`ISO-8859-1`) ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/plain`, `ISO-8859-1`), foo) + } + } + + } } From 479dfa10cd2b923cad9e85d1cfd547fe4db1ca32 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Mon, 12 Aug 2024 21:49:08 +0100 Subject: [PATCH 2/6] default to utf-8 if charset is invalid scalafmt --- .../directives/MarshallingDirectivesSpec.scala | 8 ++++++-- .../marshalling/PredefinedToEntityMarshallers.scala | 12 +++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala index f17353ffd..6ec5475fa 100644 --- a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala +++ b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala @@ -283,11 +283,15 @@ class MarshallingDirectivesSpec extends RoutingSpec with Inside { responseEntity shouldEqual HttpEntity(ContentType(`text/plain`, `UTF-8`), foo) } } - "rrender text with UTF-8 encoding if an `Accept-Charset` request header requests a non-UTF-8 encoding" in { + "render text with requested encoding if an `Accept-Charset` request header requests a non-UTF-8 encoding" in { Get() ~> `Accept-Charset`(`ISO-8859-1`) ~> complete(foo) ~> check { responseEntity shouldEqual HttpEntity(ContentType(`text/plain`, `ISO-8859-1`), foo) } } - + "render text with UTF-8 encoding if an `Accept-Charset` request header requests an unknown encoding" in { + Get() ~> `Accept-Charset`(HttpCharset("unknown")(Nil)) ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/plain`, `UTF-8`), foo) + } + } } } diff --git a/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala b/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala index c4064b434..51f8ab6f4 100644 --- a/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala +++ b/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala @@ -14,6 +14,7 @@ package org.apache.pekko.http.scaladsl.marshalling import java.nio.CharBuffer +import java.nio.charset.UnsupportedCharsetException import org.apache.pekko import pekko.http.scaladsl.model.MediaTypes._ @@ -55,7 +56,16 @@ trait PredefinedToEntityMarshallers extends MultipartMarshallers { implicit val StringMarshaller: ToEntityMarshaller[String] = stringMarshaller(`text/plain`) def stringMarshaller(mediaType: MediaType.WithOpenCharset): ToEntityMarshaller[String] = - Marshaller.withOpenCharset(mediaType) { (s, cs) => HttpEntity(mediaType.withCharset(cs), s) } + Marshaller.withOpenCharset(mediaType) { (s, cs) => + // https://github.com/apache/pekko-http/issues/300 + // ignore issues with invalid charset - use UTF-8 instead + try { + HttpEntity(mediaType.withCharset(cs), s) + } catch { + case _: UnsupportedCharsetException => + HttpEntity(mediaType.withCharset(HttpCharsets.`UTF-8`), s) + } + } def stringMarshaller(mediaType: MediaType.WithFixedCharset): ToEntityMarshaller[String] = Marshaller.withFixedContentType(mediaType) { s => HttpEntity(mediaType, s) } From 3ccd053be002e14126459becaf7e4cd4b7454b6f Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Tue, 13 Aug 2024 18:03:25 +0100 Subject: [PATCH 3/6] xml tests (1 broken still) --- .../MarshallingDirectivesSpec.scala | 25 +++++++++++++++++++ .../PredefinedToEntityMarshallers.scala | 9 ++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala index 6ec5475fa..a2b2ba02c 100644 --- a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala +++ b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/server/directives/MarshallingDirectivesSpec.scala @@ -250,6 +250,11 @@ class MarshallingDirectivesSpec extends RoutingSpec with Inside { rejection shouldEqual UnacceptedResponseContentTypeRejection(Set(ContentType(`application/json`))) } } + "reject JSON rendering if an `Accept-Charset` request header requests an unknown encoding" in { + Get() ~> `Accept-Charset`(HttpCharset("unknown")(Nil)) ~> complete(foo) ~> check { + rejection shouldEqual UnacceptedResponseContentTypeRejection(Set(ContentType(`application/json`))) + } + } val acceptHeaderUtf = Accept.parseFromValueString("application/json;charset=utf8").right.get val acceptHeaderNonUtf = Accept.parseFromValueString("application/json;charset=ISO-8859-1").right.get "render JSON response when `Accept` header is present with the `charset` parameter ignoring it" in { @@ -294,4 +299,24 @@ class MarshallingDirectivesSpec extends RoutingSpec with Inside { } } } + + "The marshalling infrastructure for text/xml" should { + val foo = Hällö + "render XML with UTF-8 encoding if no `Accept-Charset` request header is present" in { + Get() ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/xml`, `UTF-8`), foo.toString) + } + } + "render XML with requested encoding if an `Accept-Charset` request header requests a non-UTF-8 encoding" in { + Get() ~> `Accept-Charset`(`ISO-8859-1`) ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/xml`, `ISO-8859-1`), foo.toString) + } + } + "render XML with UTF-8 encoding if an `Accept-Charset` request header requests an unknown encoding" ignore { + // still returns Content-Type: text/xml; charset=unknown + Get() ~> `Accept-Charset`(HttpCharset("unknown")(Nil)) ~> complete(foo) ~> check { + responseEntity shouldEqual HttpEntity(ContentType(`text/xml`, `UTF-8`), foo.toString) + } + } + } } diff --git a/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala b/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala index 51f8ab6f4..78fdaa98d 100644 --- a/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala +++ b/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala @@ -35,7 +35,14 @@ trait PredefinedToEntityMarshallers extends MultipartMarshallers { implicit val CharArrayMarshaller: ToEntityMarshaller[Array[Char]] = charArrayMarshaller(`text/plain`) def charArrayMarshaller(mediaType: MediaType.WithOpenCharset): ToEntityMarshaller[Array[Char]] = Marshaller.withOpenCharset(mediaType) { (value, charset) => - marshalCharArray(value, mediaType.withCharset(charset)) + // https://github.com/apache/pekko-http/issues/300 + // ignore issues with invalid charset - use UTF-8 instead + try { + marshalCharArray(value, mediaType.withCharset(charset)) + } catch { + case _: UnsupportedCharsetException => + marshalCharArray(value, mediaType.withCharset(HttpCharsets.`UTF-8`)) + } } def charArrayMarshaller(mediaType: MediaType.WithFixedCharset): ToEntityMarshaller[Array[Char]] = Marshaller.withFixedContentType(mediaType) { value => marshalCharArray(value, mediaType) } From c8514e78919d4bc41cc459aee9cb88a0a94043a6 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Fri, 16 Aug 2024 16:06:13 +0100 Subject: [PATCH 4/6] add charsetWithUtf8Failover function --- .../pekko/http/scaladsl/model/HttpCharset.scala | 13 +++++++++++++ .../PredefinedToEntityMarshallers.scala | 15 ++------------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/http-core/src/main/scala/org/apache/pekko/http/scaladsl/model/HttpCharset.scala b/http-core/src/main/scala/org/apache/pekko/http/scaladsl/model/HttpCharset.scala index 46967e1ed..c74ec3384 100644 --- a/http-core/src/main/scala/org/apache/pekko/http/scaladsl/model/HttpCharset.scala +++ b/http-core/src/main/scala/org/apache/pekko/http/scaladsl/model/HttpCharset.scala @@ -66,6 +66,19 @@ final case class HttpCharset private[http] (override val value: String)(val alia /** Returns the Charset for this charset if available or throws an exception otherwise */ def nioCharset: Charset = _nioCharset.get + /** + * @return this HttpCharset instance if this charset can be parsed to a + * java.nio.charset.Charset instance, otherwise returns the UTF-8 charset. + * @since 1.1.0 + */ + def charsetWithUtf8Failover: HttpCharset = { + if (_nioCharset.isSuccess) { + this + } else { + HttpCharsets.`UTF-8` + } + } + private def readObject(in: java.io.ObjectInputStream): Unit = { in.defaultReadObject() _nioCharset = HttpCharset.findNioCharset(value) diff --git a/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala b/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala index 78fdaa98d..0a5ee275d 100644 --- a/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala +++ b/http/src/main/scala/org/apache/pekko/http/scaladsl/marshalling/PredefinedToEntityMarshallers.scala @@ -14,7 +14,6 @@ package org.apache.pekko.http.scaladsl.marshalling import java.nio.CharBuffer -import java.nio.charset.UnsupportedCharsetException import org.apache.pekko import pekko.http.scaladsl.model.MediaTypes._ @@ -37,12 +36,7 @@ trait PredefinedToEntityMarshallers extends MultipartMarshallers { Marshaller.withOpenCharset(mediaType) { (value, charset) => // https://github.com/apache/pekko-http/issues/300 // ignore issues with invalid charset - use UTF-8 instead - try { - marshalCharArray(value, mediaType.withCharset(charset)) - } catch { - case _: UnsupportedCharsetException => - marshalCharArray(value, mediaType.withCharset(HttpCharsets.`UTF-8`)) - } + marshalCharArray(value, mediaType.withCharset(charset.charsetWithUtf8Failover)) } def charArrayMarshaller(mediaType: MediaType.WithFixedCharset): ToEntityMarshaller[Array[Char]] = Marshaller.withFixedContentType(mediaType) { value => marshalCharArray(value, mediaType) } @@ -66,12 +60,7 @@ trait PredefinedToEntityMarshallers extends MultipartMarshallers { Marshaller.withOpenCharset(mediaType) { (s, cs) => // https://github.com/apache/pekko-http/issues/300 // ignore issues with invalid charset - use UTF-8 instead - try { - HttpEntity(mediaType.withCharset(cs), s) - } catch { - case _: UnsupportedCharsetException => - HttpEntity(mediaType.withCharset(HttpCharsets.`UTF-8`), s) - } + HttpEntity(mediaType.withCharset(cs.charsetWithUtf8Failover), s) } def stringMarshaller(mediaType: MediaType.WithFixedCharset): ToEntityMarshaller[String] = Marshaller.withFixedContentType(mediaType) { s => HttpEntity(mediaType, s) } From 1ed3fd760bd9fecdd405d7c631f35cddf54369f1 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Fri, 16 Aug 2024 16:23:21 +0100 Subject: [PATCH 5/6] Update MarshallingSpec.scala --- .../scaladsl/marshalling/MarshallingSpec.scala | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala index fe0cb7446..6a8f135da 100644 --- a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala +++ b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala @@ -56,6 +56,23 @@ class MarshallingSpec extends AnyFreeSpec with Matchers with BeforeAndAfterAll w } } + "The PredefinedToEntityMarshallers" - { + "StringMarshaller should marshal response to `text/plain` content in UTF-8 when accept-charset is invalid" in { + val invalidAcceptCharsetHeader = `Accept-Charset`(HttpCharsetRange(HttpCharset.custom("invalid"))) + val request = HttpRequest().withHeaders(Seq(invalidAcceptCharsetHeader)) + val responseEntity = marshalToResponse("Ha“llo", request).entity + responseEntity.contentType.charsetOption shouldEqual Some(HttpCharsets.`UTF-8`) + responseEntity.contentType.mediaType shouldEqual MediaTypes.`text/plain` + } + "CharArrayMarshaller should marshal response to `text/plain` content in UTF-8 when accept-charset is invalid" in { + val invalidAcceptCharsetHeader = `Accept-Charset`(HttpCharsetRange(HttpCharset.custom("invalid"))) + val request = HttpRequest().withHeaders(Seq(invalidAcceptCharsetHeader)) + val responseEntity = marshalToResponse("Ha“llo".toCharArray(), request).entity + responseEntity.contentType.charsetOption shouldEqual Some(HttpCharsets.`UTF-8`) + responseEntity.contentType.mediaType shouldEqual MediaTypes.`text/plain` + } + } + "The PredefinedToResponseMarshallers" - { "fromStatusCode should properly marshal a status code that doesn't allow an entity" in { marshalToResponse(StatusCodes.NoContent) shouldEqual HttpResponse(StatusCodes.NoContent, From 6c4aaf7344d00f6ddcb3811b17090adcbed56cf1 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Fri, 16 Aug 2024 16:56:40 +0100 Subject: [PATCH 6/6] scala 2.12 compile issue --- .../pekko/http/scaladsl/marshalling/MarshallingSpec.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala index 6a8f135da..952b581b3 100644 --- a/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala +++ b/http-tests/src/test/scala/org/apache/pekko/http/scaladsl/marshalling/MarshallingSpec.scala @@ -59,14 +59,14 @@ class MarshallingSpec extends AnyFreeSpec with Matchers with BeforeAndAfterAll w "The PredefinedToEntityMarshallers" - { "StringMarshaller should marshal response to `text/plain` content in UTF-8 when accept-charset is invalid" in { val invalidAcceptCharsetHeader = `Accept-Charset`(HttpCharsetRange(HttpCharset.custom("invalid"))) - val request = HttpRequest().withHeaders(Seq(invalidAcceptCharsetHeader)) + val request = HttpRequest().withHeaders(invalidAcceptCharsetHeader) val responseEntity = marshalToResponse("Ha“llo", request).entity responseEntity.contentType.charsetOption shouldEqual Some(HttpCharsets.`UTF-8`) responseEntity.contentType.mediaType shouldEqual MediaTypes.`text/plain` } "CharArrayMarshaller should marshal response to `text/plain` content in UTF-8 when accept-charset is invalid" in { val invalidAcceptCharsetHeader = `Accept-Charset`(HttpCharsetRange(HttpCharset.custom("invalid"))) - val request = HttpRequest().withHeaders(Seq(invalidAcceptCharsetHeader)) + val request = HttpRequest().withHeaders(invalidAcceptCharsetHeader) val responseEntity = marshalToResponse("Ha“llo".toCharArray(), request).entity responseEntity.contentType.charsetOption shouldEqual Some(HttpCharsets.`UTF-8`) responseEntity.contentType.mediaType shouldEqual MediaTypes.`text/plain`