diff --git a/README.md b/README.md index f49cb09..a4f8cc9 100644 --- a/README.md +++ b/README.md @@ -26,11 +26,11 @@ Import the parser using: ``` -### Parse an HTML File +### Parse an HTML Document from a string ```pycon ->>> with open("test/examples/eras.html", "r") as file: -... mf2json = mf2py.parse(doc=file) +>>> with open("test/examples/eras.html") as fp: +... mf2json = mf2py.parse(doc=fp) >>> mf2json {'items': [{'type': ['h-entry'], 'properties': {'name': ['Excited for the Taylor Swift Eras Tour'], @@ -57,19 +57,7 @@ Import the parser using: ``` -### Parse an HTML String - -```pycon ->>> html = '''

The best time -... to plant a tree was 30 years ago, and the second best time to plant a -... tree is now.

''' ->>> mf2py.parse(doc=html)["items"] -[{'type': ['h-entry'], 'properties': {'content': ['The best time to plant -a tree was 30 years ago, and the second best time to plant a tree is now.']}}] - -``` - -### Parse an HTML Document Retrieved from a URL +### Parse an HTML Document from a URL ```pycon >>> mf2json = mf2py.parse(url="https://events.indieweb.org") @@ -95,17 +83,12 @@ found. ## Advanced Usage -`parse` is a convenience method that delegates to `Parser`. More sophisticated -behaviors are available by invoking the parser object directly. +`parse` is a convenience function for `Parser`. More sophisticated behaviors are +available by invoking the parser object directly. ```pycon ->>> html = '''

Frank and Cosmo

-...

It's time for the Festivus feats of -... strength.

-...

It's a Festivus miracle!

-...

The tradition of Festivus begins with -... the airing of grievances.

''' ->>> mf2parser = mf2py.Parser(doc=html) +>>> with open("test/examples/festivus.html") as fp: +... mf2parser = mf2py.Parser(doc=fp) ``` @@ -114,11 +97,11 @@ behaviors are available by invoking the parser object directly. ```pycon >>> mf2json = mf2parser.to_dict() >>> len(mf2json["items"]) -5 +7 >>> len(mf2parser.to_dict(filter_by_type="h-card")) -2 ->>> len(mf2parser.to_dict(filter_by_type="h-entry")) 3 +>>> len(mf2parser.to_dict(filter_by_type="h-entry")) +4 ``` diff --git a/test/examples/festivus.html b/test/examples/festivus.html new file mode 100644 index 0000000..11d94f7 --- /dev/null +++ b/test/examples/festivus.html @@ -0,0 +1,10 @@ + +

Jerry

+

Happy Festivus!

+

Frank

+

It's time for the Festivus feats of +strength.

+

The tradition of Festivus begins with +the airing of grievances.

+

Cosmo

+

It's a Festivus miracle!

diff --git a/test/test_parser.py b/test/test_parser.py index a04825a..7a5cea5 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -1198,3 +1198,16 @@ def test_language(): assert result["items"][2]["lang"] == "sv" assert result["items"][2]["properties"]["content"][0]["lang"] == "en" assert result["items"][2]["properties"]["content"][1]["lang"] == "sv" + + +def test_parser_object(): + with open(os.path.join(TEST_DIR, "festivus.html")) as f: + p = Parser(doc=f) + assert len(p.to_dict(filter_by_type="h-card")) == 3 + assert len(p.to_dict(filter_by_type="h-entry")) == 4 + assert ( + p.to_json(filter_by_type="h-card") + == '[{"type": ["h-card"], "properties": {"name": ["Jerry"]}}, {"type": ' + '["h-card"], "properties": {"name": ["Frank"]}}, {"type": ["h-card"], ' + '"properties": {"name": ["Cosmo"]}}]' + )