Skip to content

Commit

Permalink
Escape HTML to prevent injection
Browse files Browse the repository at this point in the history
  • Loading branch information
aazuspan committed Jan 10, 2025
1 parent de23d73 commit 43371f0
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 8 deletions.
12 changes: 12 additions & 0 deletions eerepr/html.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import html
from datetime import datetime, timezone
from itertools import chain
from typing import Any, Hashable
Expand All @@ -20,6 +21,17 @@
DATE_FORMAT = "%Y-%m-%d %H:%M:%S"


def escape_object(obj: Any) -> Any:
"""Recursively escape HTML strings in a Python object."""
if isinstance(obj, str):
return html.escape(obj)
if isinstance(obj, list):
return [escape_object(element) for element in obj]
if isinstance(obj, dict):
return {escape_object(key): escape_object(value) for key, value in obj.items()}
return obj


def convert_to_html(obj: Any, key: Hashable | None = None) -> str:
"""Converts a Python object (not list or dict) to an HTML <li> element.
Expand Down
11 changes: 6 additions & 5 deletions eerepr/repr.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from __future__ import annotations

import html
import uuid
from functools import _lru_cache_wrapper, lru_cache
from html import escape
from typing import Any, Literal, Union
from warnings import warn

import ee

from eerepr.config import Config
from eerepr.html import convert_to_html
from eerepr.html import convert_to_html, escape_object

REPR_HTML = "_repr_html_"
EEObject = Union[ee.Element, ee.ComputedObject]
Expand Down Expand Up @@ -64,7 +64,8 @@ def _is_nondeterministic(obj: EEObject) -> bool:
@lru_cache(maxsize=None)
def _repr_html_(obj: EEObject) -> str:
"""Generate an HTML representation of an EE object."""
info = obj.getInfo()
# Escape all strings in object info to prevent injection
info = escape_object(obj.getInfo())
css = _load_css()
body = convert_to_html(info)

Expand Down Expand Up @@ -96,7 +97,7 @@ def _ee_repr(obj: EEObject) -> str:
f"Getting info failed with: '{e}'. Falling back to string repr.",
stacklevel=2,
)
return f"<pre>{escape(repr(obj))}</pre>"
return f"<pre>{html.escape(repr(obj))}</pre>"

mbs = len(rep) / 1e6
if mbs > options.max_repr_mbs:
Expand All @@ -109,7 +110,7 @@ def _ee_repr(obj: EEObject) -> str:
),
stacklevel=2,
)
return f"<pre>{escape(repr(obj))}</pre>"
return f"<pre>{html.escape(repr(obj))}</pre>"

return rep

Expand Down
23 changes: 20 additions & 3 deletions tests/test_reprs.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import ee

import eerepr
from eerepr.repr import _repr_html_


def test_full_repr(data_regression):
"""Regression test the full HTML repr (with CSS and JS) of a nested EE object."""
from tests.test_html import get_test_objects

objects = get_test_objects().items()
rendered = _repr_html_(ee.List([obj[1] for obj in objects]))
eerepr.initialize()

objects = get_test_objects().values()
rendered = ee.List([obj for obj in objects])._repr_html_()
data_regression.check(rendered)


Expand Down Expand Up @@ -38,3 +39,19 @@ def _repr_html_(self):
# reset shouldn't remove the existing repr
eerepr.reset()
assert obj._repr_html_() == "foo"


def test_scripts_sanitized():
"""Test that scripts within objects are escaped."""
eerepr.initialize()

script_injection = "<script>alert('foo')</script>"

obj = ee.String(script_injection)
assert "<script>" not in obj._repr_html_()

obj = ee.List([script_injection])
assert "<script>" not in obj._repr_html_()

obj = ee.Dictionary({script_injection: script_injection, "type": script_injection})
assert "<script>" not in obj._repr_html_()

0 comments on commit 43371f0

Please sign in to comment.