Skip to content

Commit

Permalink
Parse metaformats by default and place in meta-items property
Browse files Browse the repository at this point in the history
  • Loading branch information
angelogladding authored Dec 4, 2023
1 parent 0354be0 commit 9982662
Showing 1 changed file with 3 additions and 13 deletions.
16 changes: 3 additions & 13 deletions mf2py/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from .version import __version__


def parse(doc=None, url=None, html_parser=None, expose_dom=False, metaformats=False):
def parse(doc=None, url=None, html_parser=None, expose_dom=False):
"""
Parse a microformats2 document or url and return a json dictionary.
Expand All @@ -33,8 +33,6 @@ def parse(doc=None, url=None, html_parser=None, expose_dom=False, metaformats=Fa
options from the BeautifulSoup documentation are:
"html", "xml", "html5", "lxml", "html5lib", and "html.parser"
expose_dom (boolean): optional, expose the DOM of embedded properties.
metaformats (boolean): whether to include metaformats extracted from OGP
and Twitter card data: https://microformats.org/wiki/metaformats
Return: a json dict represented the structured data in this document.
"""
Expand All @@ -43,7 +41,6 @@ def parse(doc=None, url=None, html_parser=None, expose_dom=False, metaformats=Fa
url,
html_parser,
expose_dom=expose_dom,
metaformats=metaformats,
).to_dict()


Expand All @@ -62,8 +59,6 @@ class Parser(object):
"html", "xml", "html5", "lxml", "html5lib", and "html.parser"
defaults to "html5lib"
expose_dom (boolean): optional, expose the DOM of embedded properties.
metaformats (boolean): whether to include metaformats extracted from OGP
and Twitter card data: https://microformats.org/wiki/metaformats
Attributes:
useragent (string): the User-Agent string for the Parser
Expand All @@ -79,7 +74,6 @@ def __init__(
url=None,
html_parser=None,
expose_dom=False,
metaformats=False,
):
self.__url__ = None
self.__doc__ = None
Expand All @@ -94,7 +88,6 @@ def __init__(
"version": __version__,
},
}
self.__metaformats = metaformats
self.expose_dom = expose_dom
self.lang = None

Expand Down Expand Up @@ -513,15 +506,12 @@ def parse_el(el, ctx):

ctx = []

if self.__metaformats:
# extract out a metaformats item, if available
self.__metaformats_item = metaformats.parse(self.__doc__, url=self.__url__)
if metaformats_item := metaformats.parse(self.__doc__, url=self.__url__):
self.__parsed__["meta-item"] = metaformats_item

# start parsing at root element of the document
parse_el(self.__doc__, ctx)
self.__parsed__["items"] = ctx
if self.__metaformats and self.__metaformats_item:
self.__parsed__["items"].append(self.__metaformats_item)

# parse for rel values
for el in get_descendents(self.__doc__):
Expand Down

0 comments on commit 9982662

Please sign in to comment.