diff --git a/tests/golden/pull2140.tsv b/tests/golden/pull2140.tsv index 5ea08efdc..502e7998f 100644 --- a/tests/golden/pull2140.tsv +++ b/tests/golden/pull2140.tsv @@ -1,3 +1,3 @@ -name rows cols id classes title aria_label caption summary -table_0 0 0 test_empty -links 0 5 +name rows cols id classes title aria_label caption summary heading +table_0 0 0 test_empty +links 0 5 diff --git a/visidata/loaders/html.py b/visidata/loaders/html.py index 4293547ec..b1523e0b9 100644 --- a/visidata/loaders/html.py +++ b/visidata/loaders/html.py @@ -37,6 +37,7 @@ class HtmlTablesSheet(IndexSheet): Column('aria_label', getter=lambda col,row: row.html.attrib.get('aria-label')), Column('caption', getter=lambda col,row: row.html.xpath('normalize-space(./caption)') if row.html.xpath('./caption') else None, cache=True), Column('summary', getter=lambda col,row: row.html.attrib.get('summary')), + Column('heading', getter=lambda col,row: row.html.xpath('normalize-space(./preceding-sibling::*[self::h1 or self::h2 or self::h3 or self::h4 or self::h5 or self::h6][1])') or None, cache=True), ] def iterload(self): lxml = vd.importExternal('lxml')