Skip to content

Commit

Permalink
LibWeb: Support for "content-language" http-equiv state
Browse files Browse the repository at this point in the history
Implemented support for setting the pragma-set default language in the
`<meta/>` tag with an `http-equiv` attribute `content-language`.
  • Loading branch information
pbrw authored and awesomekling committed Nov 6, 2024
1 parent 413cf6b commit 06154b8
Show file tree
Hide file tree
Showing 13 changed files with 172 additions and 0 deletions.
1 change: 1 addition & 0 deletions Tests/LibWeb/Text/expected/HTML/lang-pragma-set-1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OK
1 change: 1 addition & 0 deletions Tests/LibWeb/Text/expected/HTML/lang-pragma-set-2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OK
1 change: 1 addition & 0 deletions Tests/LibWeb/Text/expected/HTML/lang-pragma-set-3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OK
1 change: 1 addition & 0 deletions Tests/LibWeb/Text/expected/HTML/lang-pragma-set-4.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OK
1 change: 1 addition & 0 deletions Tests/LibWeb/Text/expected/HTML/lang-pragma-set-5.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OK
25 changes: 25 additions & 0 deletions Tests/LibWeb/Text/input/HTML/lang-pragma-set-1.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Language" content="ko" >
<style type='text/css'>
.test div { width: 50px; background-color: red; }
#box:lang(ko) { width: 100px; background-color: limegreen; }
</style>
</head>
<body>
<div class="test"><div id="box">TEST</div></div>
</body>
</html>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 100) {
println("OK");
} else {
println("FAIL. If there is a pragma-set default language set, then that is the language of the node.");
}
done();
});
</script>
</html>
27 changes: 27 additions & 0 deletions Tests/LibWeb/Text/input/HTML/lang-pragma-set-2.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Language" content="
ko
som! ?et #$% hi;0 ng " >
<style type='text/css'>
.test div { width: 50px; background-color: red; }
#box:lang(ko) { width: 100px; background-color: limegreen; }
</style>
</head>
<body>
<div class="test"><div id="box">TEST</div></div>
</body>
</html>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 100) {
println("OK");
} else {
println("FAIL. If there is a pragma-set default language set, then that is the language of the node. It should match the first non-whitespace code points from the contet attribute.");
}
done();
});
</script>
</html>
25 changes: 25 additions & 0 deletions Tests/LibWeb/Text/input/HTML/lang-pragma-set-3.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Language" content="ko,pl" >
<style type='text/css'>
.test div { width: 50px; background-color: limegreen; }
#box:lang(ko) { width: 100px; background-color: red; }
</style>
</head>
<body>
<div class="test"><div id="box">TEST</div></div>
</body>
</html>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 50) {
println("OK");
} else {
println("FAIL. If the element's content attribute contains a U+002C COMMA character (,) then return.");
}
done();
});
</script>
</html>
25 changes: 25 additions & 0 deletions Tests/LibWeb/Text/input/HTML/lang-pragma-set-4.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Language" content="" >
<style type='text/css'>
.test div { width: 50px; background-color: limegreen; }
#box:lang(ko) { width: 100px; background-color: red; }
</style>
</head>
<body>
<div class="test"><div id="box">TEST</div></div>
</body>
</html>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 50) {
println("OK");
} else {
println("FAIL. If candidate is the empty string, return.");
}
done();
});
</script>
</html>
25 changes: 25 additions & 0 deletions Tests/LibWeb/Text/input/HTML/lang-pragma-set-5.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Language" >
<style type='text/css'>
.test div { width: 50px; background-color: limegreen; }
#box:lang(ko) { width: 100px; background-color: red; }
</style>
</head>
<body>
<div class="test"><div id="box">TEST</div></div>
</body>
</html>
<script src="../include.js"></script>
<script>
asyncTest((done) => {
if (document.getElementById('box').offsetWidth == 50) {
println("OK");
} else {
println("FAIL. If the meta element has no content attribute, then return.");
}
done();
});
</script>
</html>
4 changes: 4 additions & 0 deletions Userland/Libraries/LibWeb/DOM/Document.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,9 @@ class Document
String const& content_type() const { return m_content_type; }
void set_content_type(String content_type) { m_content_type = move(content_type); }

Optional<String> const& pragma_set_default_language() const { return m_pragma_set_default_language; }
void set_pragma_set_default_language(String language) { m_pragma_set_default_language = move(language); }

bool has_encoding() const { return m_encoding.has_value(); }
Optional<String> const& encoding() const { return m_encoding; }
String encoding_or_default() const { return m_encoding.value_or("UTF-8"_string); }
Expand Down Expand Up @@ -822,6 +825,7 @@ class Document

HTML::DocumentReadyState m_readiness { HTML::DocumentReadyState::Loading };
String m_content_type { "application/xml"_string };
Optional<String> m_pragma_set_default_language;
Optional<String> m_encoding;

bool m_ready_for_post_load_tasks { false };
Expand Down
4 changes: 4 additions & 0 deletions Userland/Libraries/LibWeb/DOM/Element.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2934,6 +2934,10 @@ Optional<String> Element::lang() const

// 5. Otherwise
// - If there is a pragma-set default language set, then that is the language of the node.
if (document().pragma_set_default_language().has_value()) {
return document().pragma_set_default_language();
}

// - If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP),
// if any, must be used as the final fallback language instead.
// - In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages,
Expand Down
32 changes: 32 additions & 0 deletions Userland/Libraries/LibWeb/HTML/HTMLMetaElement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* SPDX-License-Identifier: BSD-2-Clause
*/

#include <AK/GenericLexer.h>
#include <LibWeb/Bindings/HTMLMetaElementPrototype.h>
#include <LibWeb/Bindings/Intrinsics.h>
#include <LibWeb/CSS/Parser/Parser.h>
Expand Down Expand Up @@ -121,6 +122,37 @@ void HTMLMetaElement::inserted()
// For meta elements with an http-equiv attribute in the X-UA-Compatible state, the content attribute must have a value that is an ASCII case-insensitive match for the string "IE=edge".
// User agents are required to ignore this pragma.
break;
case HttpEquivAttributeState::ContentLanguage: {
// https://html.spec.whatwg.org/multipage/semantics.html#attr-meta-http-equiv-content-language
// 1. If the meta element has no content attribute, then return.
if (!has_attribute(AttributeNames::content))
break;

// 2. If the element's content attribute contains a U+002C COMMA character (,) then return.
auto content = get_attribute_value(AttributeNames::content);
if (content.contains(","sv))
break;

// 3. Let input be the value of the element's content attribute.
// 4. Let position point at the first character of input.
GenericLexer lexer { content };

// 5. Skip ASCII whitespace within input given position.
lexer.ignore_while(Web::Infra::is_ascii_whitespace);

// 6. Collect a sequence of code points that are not ASCII whitespace from input given position.
// 7. Let candidate be the string that resulted from the previous step.
auto candidate = lexer.consume_until(Web::Infra::is_ascii_whitespace);

// 8. If candidate is the empty string, return.
if (candidate.is_empty())
break;

// 9. Set the pragma-set default language to candidate.
auto language = String::from_utf8_without_validation(candidate.bytes());
document().set_pragma_set_default_language(language);
break;
}
default:
dbgln("FIXME: Implement '{}' http-equiv state", get_attribute_value(AttributeNames::http_equiv));
break;
Expand Down

0 comments on commit 06154b8

Please sign in to comment.