Skip to content

Commit

Permalink
fix double escape + tests
Browse files Browse the repository at this point in the history
  • Loading branch information
leandrocp committed Jan 11, 2025
1 parent 3f454a6 commit 9c1ca44
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 21 deletions.
39 changes: 33 additions & 6 deletions lib/mdex.ex
Original file line number Diff line number Diff line change
Expand Up @@ -603,11 +603,38 @@ defmodule MDEx do
defp maybe_trim({:ok, result}), do: {:ok, String.trim(result)}
defp maybe_trim(error), do: error

# TODO: spec/docs
def safe_html(unsafe_html, opts) do
sanitize = Keyword.get(opts, :sanitize, true)
escape_tags = Keyword.get(opts, :escape_tags, true)
escape_curly_braces_in_code = Keyword.get(opts, :escape_curly_braces_in_code, true)
Native.safe_html(unsafe_html, sanitize, escape_tags, escape_curly_braces_in_code)
@doc """
Utility function to sanitize and escape HTML.
## Example
iex> MDEx.safe_html("<script>console.log('attack')</script>")
""
iex> MDEx.safe_html("<span>Hello</span>")
"&lt;span&gt;Hello&lt;&#x2f;span&gt;"
iex> MDEx.safe_html("<h1>{'Example:'}</h1><code>{:ok, 'MDEx'}</code>")
"&lt;h1&gt;{&#x27;Example:&#x27;}&lt;&#x2f;h1&gt;&lt;code&gt;&lbrace;:ok, &#x27;MDEx&#x27;&rbrace;&lt;&#x2f;code&gt;"
## Options
- `:sanitize` - clean HTML using these rules https://docs.rs/ammonia/latest/ammonia/fn.clean.html. Defaults to `true`.
- `:escape` - which entities should be escaped. Defaults to `[:content, :curly_braces_in_code]`.
`:content` - escape common chars like `<`, `>`, `&`, and others in the HTML content;
`:curly_braces_in_code` - escape `{` and `}` only inside `<code>` tags, particularly useful for compiling HTML in LiveView;
"""
def safe_html(unsafe_html, opts \\ []) when is_binary(unsafe_html) and is_list(opts) do
sanitize = opt(opts, [:sanitize], true)
escape_content = opt(opts, [:escape, :content], true)
escape_curly_braces_in_code = opt(opts, [:escape, :curly_braces_in_code], true)
Native.safe_html(unsafe_html, sanitize, escape_content, escape_curly_braces_in_code)
end

defp opt(opts, keys, default) do
case get_in(opts, keys) do
nil -> default
val -> val
end
end
end
23 changes: 14 additions & 9 deletions native/comrak_nif/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,18 +382,16 @@ fn do_safe_html(
false => unsafe_html,
};

let html = match escape_tags {
true => v_htmlescape::escape(&html).to_string(),
false => html,
};

let html = match escape_curly_braces_in_code {
true => rewrite_str(
&html,
RewriteStrSettings {
element_content_handlers: vec![text!("code", |t| {
t.replace(
&t.as_str().replace('{', "&lbrace;").replace('}', "&rbrace;"),
element_content_handlers: vec![text!("code", |chunk| {
chunk.replace(
&chunk
.as_str()
.replace('{', "&lbrace;")
.replace('}', "&rbrace;"),
ContentType::Html,
);

Expand All @@ -406,5 +404,12 @@ fn do_safe_html(
false => html,
};

html
let html = match escape_tags {
true => v_htmlescape::escape(&html).to_string(),
false => html,
};

// TODO: not so clean solution to undo double escaping, could be better
html.replace("&amp;lbrace;", "&lbrace;")
.replace("&amp;rbrace;", "&rbrace;")
}
22 changes: 16 additions & 6 deletions test/mdex_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -231,19 +231,29 @@ defmodule MDExTest do
test "sanitize" do
assert MDEx.safe_html("<span>tag</span><script>console.log('hello')</script>",
sanitize: true,
escape_tags: false,
escape_curly_braces_in_code: false
escape: [content: false, curly_braces_in_code: false]
) == "<span>tag</span>"
end

test "escape tags" do
assert MDEx.safe_html("<span>tag</span>", sanitize: false, escape_tags: true, escape_curly_braces_in_code: false) ==
"&lt;span&gt;tag&lt;&#x2f;span&gt;"
assert MDEx.safe_html("<span>content</span>",
sanitize: false,
escape: [content: true, curly_braces_in_code: false]
) == "&lt;span&gt;content&lt;&#x2f;span&gt;"
end

test "escape curly braces in code tags" do
assert MDEx.safe_html("<h1>{test}</h1><code>{:foo}</code>", sanitize: false, escape_tags: false, escape_curly_braces_in_code: true) ==
"<h1>{test}</h1><code>&lbrace;:foo&rbrace;</code>"
assert MDEx.safe_html("<h1>{test}</h1><code>{:foo}</code>",
sanitize: false,
escape: [content: false, curly_braces_in_code: true]
) == "<h1>{test}</h1><code>&lbrace;:foo&rbrace;</code>"
end

test "enable all by default" do
assert MDEx.safe_html(
"<span>{:example} <code class=\"lang-ex\" data-foo=\"{:val}\">{:ok, 'foo'}</code></span><script>console.log('hello')</script>"
) ==
"&lt;span&gt;{:example} &lt;code&gt;&lbrace;:ok, &#x27;foo&#x27;&rbrace;&lt;&#x2f;code&gt;&lt;&#x2f;span&gt;"
end
end

Expand Down

0 comments on commit 9c1ca44

Please sign in to comment.