Skip to content

Commit

Permalink
Make relative URLs in e-* properties absolute (#201)
Browse files Browse the repository at this point in the history
  • Loading branch information
angelogladding authored Nov 30, 2023
1 parent ab580f6 commit a0edfdb
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
All notable changes to this project will be documented in this file.

## 2.0 - unreleased
- make relative URLs in e-* properties absolute (#201)

## 1.1.3 - 2023-06-28
- reduce instances where photo is implied (#135)
Expand Down
4 changes: 4 additions & 0 deletions mf2py/parse_property.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ def datetime(el, default_date=None):

def embedded(el, base_url=""):
"""Process e-* properties"""
for tag in el.find_all():
for attr in ("href", "src", "cite", "data", "poster"):
if attr in tag.attrs:
tag.attrs[attr] = try_urljoin(base_url, tag.attrs[attr])
return {
"html": el.decode_contents().strip(), # secret bs4 method to get innerHTML
"value": get_textContent(el, replace_img=True, base_url=base_url),
Expand Down
13 changes: 13 additions & 0 deletions test/examples/relative_url_in_e.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Relative URLs in e-content</title>
<base href="http://example.com/" />
</head>
<body>
<div class="h-entry">
<div class="e-content"><p><a href=/cat.html>Cat <img src=cat.jpg></a></p></div>
</div>
</body>
</html>
10 changes: 10 additions & 0 deletions test/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,16 @@ def test_complex_e_content():
} == result["items"][0]


def test_relative_url_in_e():
"""When parsing e-* properties, make relative URLs absolute."""
result = parse_fixture("relative_url_in_e.html")

assert (
'<p><a href="http://example.com/cat.html">Cat '
'<img src="http://example.com/cat.jpg"/></a></p>'
) == result["items"][0]["properties"]["content"][0]["html"]


def test_nested_values():
"""When parsing nested microformats, check that value is the value of
the simple property element"""
Expand Down

0 comments on commit a0edfdb

Please sign in to comment.