Convert HTML to markdown or plain text.
Python binding to the rust rust-html2text library.
pip install -U html2text_rs
def text_markdown(html: str, width: int = 100):
"""Convert HTML to markdown text.
Args:
html (str): input html text.
width (int): wrap text to width columns. Default is 100.
"""
example:
import html2text_rs
import requests
resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")
text_markdown = html2text_rs.text_markdown(resp.text)
print(text_markdown)
def text_plain(html: str, width: int = 100):
"""Convert HTML to plain text.
Args:
html (str): input html text.
width (int): wrap text to width columns. Default is 100.
"""
example:
import html2text_rs
import requests
resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")
text_plain = html2text_rs.text_plain(resp.text)
print(text_plain)
def text_rich(html: str, width: int = 100):
"""Convert HTML to rich text.
Args:
html (str): input html text.
width (int): wrap text to width columns. Default is 100.
"""
example:
import html2text_rs
import requests
resp = requests.get("https://en.wikipedia.org/wiki/AGM-88_HARM")
text_rich = html2text_rs.text_rich(resp.text)
print(text_rich)