From 06154b87dd9d8654f90eff33b6dda827ee4f7232 Mon Sep 17 00:00:00 2001 From: Piotr Date: Tue, 5 Nov 2024 09:12:11 +0100 Subject: [PATCH] LibWeb: Support for "content-language" http-equiv state Implemented support for setting the pragma-set default language in the `` tag with an `http-equiv` attribute `content-language`. --- .../Text/expected/HTML/lang-pragma-set-1.txt | 1 + .../Text/expected/HTML/lang-pragma-set-2.txt | 1 + .../Text/expected/HTML/lang-pragma-set-3.txt | 1 + .../Text/expected/HTML/lang-pragma-set-4.txt | 1 + .../Text/expected/HTML/lang-pragma-set-5.txt | 1 + .../Text/input/HTML/lang-pragma-set-1.html | 25 +++++++++++++++ .../Text/input/HTML/lang-pragma-set-2.html | 27 ++++++++++++++++ .../Text/input/HTML/lang-pragma-set-3.html | 25 +++++++++++++++ .../Text/input/HTML/lang-pragma-set-4.html | 25 +++++++++++++++ .../Text/input/HTML/lang-pragma-set-5.html | 25 +++++++++++++++ Userland/Libraries/LibWeb/DOM/Document.h | 4 +++ Userland/Libraries/LibWeb/DOM/Element.cpp | 4 +++ .../Libraries/LibWeb/HTML/HTMLMetaElement.cpp | 32 +++++++++++++++++++ 13 files changed, 172 insertions(+) create mode 100644 Tests/LibWeb/Text/expected/HTML/lang-pragma-set-1.txt create mode 100644 Tests/LibWeb/Text/expected/HTML/lang-pragma-set-2.txt create mode 100644 Tests/LibWeb/Text/expected/HTML/lang-pragma-set-3.txt create mode 100644 Tests/LibWeb/Text/expected/HTML/lang-pragma-set-4.txt create mode 100644 Tests/LibWeb/Text/expected/HTML/lang-pragma-set-5.txt create mode 100644 Tests/LibWeb/Text/input/HTML/lang-pragma-set-1.html create mode 100644 Tests/LibWeb/Text/input/HTML/lang-pragma-set-2.html create mode 100644 Tests/LibWeb/Text/input/HTML/lang-pragma-set-3.html create mode 100644 Tests/LibWeb/Text/input/HTML/lang-pragma-set-4.html create mode 100644 Tests/LibWeb/Text/input/HTML/lang-pragma-set-5.html diff --git a/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-1.txt b/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-1.txt new file mode 100644 index 000000000000..d86bac9de59a --- /dev/null +++ b/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-1.txt @@ -0,0 +1 @@ +OK diff --git a/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-2.txt b/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-2.txt new file mode 100644 index 000000000000..d86bac9de59a --- /dev/null +++ b/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-2.txt @@ -0,0 +1 @@ +OK diff --git a/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-3.txt b/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-3.txt new file mode 100644 index 000000000000..d86bac9de59a --- /dev/null +++ b/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-3.txt @@ -0,0 +1 @@ +OK diff --git a/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-4.txt b/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-4.txt new file mode 100644 index 000000000000..d86bac9de59a --- /dev/null +++ b/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-4.txt @@ -0,0 +1 @@ +OK diff --git a/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-5.txt b/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-5.txt new file mode 100644 index 000000000000..d86bac9de59a --- /dev/null +++ b/Tests/LibWeb/Text/expected/HTML/lang-pragma-set-5.txt @@ -0,0 +1 @@ +OK diff --git a/Tests/LibWeb/Text/input/HTML/lang-pragma-set-1.html b/Tests/LibWeb/Text/input/HTML/lang-pragma-set-1.html new file mode 100644 index 000000000000..2cfd7d42bf48 --- /dev/null +++ b/Tests/LibWeb/Text/input/HTML/lang-pragma-set-1.html @@ -0,0 +1,25 @@ + + + + + + + +
TEST
+ + + + + diff --git a/Tests/LibWeb/Text/input/HTML/lang-pragma-set-2.html b/Tests/LibWeb/Text/input/HTML/lang-pragma-set-2.html new file mode 100644 index 000000000000..bec09897030a --- /dev/null +++ b/Tests/LibWeb/Text/input/HTML/lang-pragma-set-2.html @@ -0,0 +1,27 @@ + + + + + + + +
TEST
+ + + + + diff --git a/Tests/LibWeb/Text/input/HTML/lang-pragma-set-3.html b/Tests/LibWeb/Text/input/HTML/lang-pragma-set-3.html new file mode 100644 index 000000000000..c0fd8df70182 --- /dev/null +++ b/Tests/LibWeb/Text/input/HTML/lang-pragma-set-3.html @@ -0,0 +1,25 @@ + + + + + + + +
TEST
+ + + + + diff --git a/Tests/LibWeb/Text/input/HTML/lang-pragma-set-4.html b/Tests/LibWeb/Text/input/HTML/lang-pragma-set-4.html new file mode 100644 index 000000000000..db17403fefa2 --- /dev/null +++ b/Tests/LibWeb/Text/input/HTML/lang-pragma-set-4.html @@ -0,0 +1,25 @@ + + + + + + + +
TEST
+ + + + + diff --git a/Tests/LibWeb/Text/input/HTML/lang-pragma-set-5.html b/Tests/LibWeb/Text/input/HTML/lang-pragma-set-5.html new file mode 100644 index 000000000000..83eedf9513db --- /dev/null +++ b/Tests/LibWeb/Text/input/HTML/lang-pragma-set-5.html @@ -0,0 +1,25 @@ + + + + + + + +
TEST
+ + + + + diff --git a/Userland/Libraries/LibWeb/DOM/Document.h b/Userland/Libraries/LibWeb/DOM/Document.h index 758ca510e300..eb1b29509bd8 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.h +++ b/Userland/Libraries/LibWeb/DOM/Document.h @@ -380,6 +380,9 @@ class Document String const& content_type() const { return m_content_type; } void set_content_type(String content_type) { m_content_type = move(content_type); } + Optional const& pragma_set_default_language() const { return m_pragma_set_default_language; } + void set_pragma_set_default_language(String language) { m_pragma_set_default_language = move(language); } + bool has_encoding() const { return m_encoding.has_value(); } Optional const& encoding() const { return m_encoding; } String encoding_or_default() const { return m_encoding.value_or("UTF-8"_string); } @@ -822,6 +825,7 @@ class Document HTML::DocumentReadyState m_readiness { HTML::DocumentReadyState::Loading }; String m_content_type { "application/xml"_string }; + Optional m_pragma_set_default_language; Optional m_encoding; bool m_ready_for_post_load_tasks { false }; diff --git a/Userland/Libraries/LibWeb/DOM/Element.cpp b/Userland/Libraries/LibWeb/DOM/Element.cpp index 31dd0bb2adf0..f534feeacbf1 100644 --- a/Userland/Libraries/LibWeb/DOM/Element.cpp +++ b/Userland/Libraries/LibWeb/DOM/Element.cpp @@ -2934,6 +2934,10 @@ Optional Element::lang() const // 5. Otherwise // - If there is a pragma-set default language set, then that is the language of the node. + if (document().pragma_set_default_language().has_value()) { + return document().pragma_set_default_language(); + } + // - If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP), // if any, must be used as the final fallback language instead. // - In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages, diff --git a/Userland/Libraries/LibWeb/HTML/HTMLMetaElement.cpp b/Userland/Libraries/LibWeb/HTML/HTMLMetaElement.cpp index 10219129fa4f..d9255a513d00 100644 --- a/Userland/Libraries/LibWeb/HTML/HTMLMetaElement.cpp +++ b/Userland/Libraries/LibWeb/HTML/HTMLMetaElement.cpp @@ -5,6 +5,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include #include @@ -121,6 +122,37 @@ void HTMLMetaElement::inserted() // For meta elements with an http-equiv attribute in the X-UA-Compatible state, the content attribute must have a value that is an ASCII case-insensitive match for the string "IE=edge". // User agents are required to ignore this pragma. break; + case HttpEquivAttributeState::ContentLanguage: { + // https://html.spec.whatwg.org/multipage/semantics.html#attr-meta-http-equiv-content-language + // 1. If the meta element has no content attribute, then return. + if (!has_attribute(AttributeNames::content)) + break; + + // 2. If the element's content attribute contains a U+002C COMMA character (,) then return. + auto content = get_attribute_value(AttributeNames::content); + if (content.contains(","sv)) + break; + + // 3. Let input be the value of the element's content attribute. + // 4. Let position point at the first character of input. + GenericLexer lexer { content }; + + // 5. Skip ASCII whitespace within input given position. + lexer.ignore_while(Web::Infra::is_ascii_whitespace); + + // 6. Collect a sequence of code points that are not ASCII whitespace from input given position. + // 7. Let candidate be the string that resulted from the previous step. + auto candidate = lexer.consume_until(Web::Infra::is_ascii_whitespace); + + // 8. If candidate is the empty string, return. + if (candidate.is_empty()) + break; + + // 9. Set the pragma-set default language to candidate. + auto language = String::from_utf8_without_validation(candidate.bytes()); + document().set_pragma_set_default_language(language); + break; + } default: dbgln("FIXME: Implement '{}' http-equiv state", get_attribute_value(AttributeNames::http_equiv)); break;