From 8e1f170924cc83b77eb8293edc5416bf0f16f8b4 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Thu, 2 May 2024 20:41:31 +0900
Subject: [PATCH 01/37] html_tools/fix: Add forest_transplanting to handle
 invalid DOM

---
 changedetectionio/html_tools.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index a03653b9eda..96ffbec997a 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -110,6 +110,24 @@ def elementpath_tostring(obj):
 
     return str(obj)
 
+def forest_transplanting(root):
+    """
+    libxml2 violates DOM rules. it means there can be multiple root element
+    nodes. So I choose just transplating them to a new root by default.
+    See also, https://gitlab.gnome.org/GNOME/libxml2/-/issues/716
+    This will emulate xpath1 of html of libxml2 like '/html[2]/*'.
+    To make this function work, 'fragment=True' in elementpath.select is required.
+    """
+    from lxml import etree
+    from itertools import chain
+    root_siblings_preceding = [ s for s in root.itersiblings(preceding=True)]
+    root_siblings_preceding.reverse()
+    root_siblings = [s for s in root.itersiblings()]
+    new_root = etree.Element("new_root")
+    for node in chain(root_siblings_preceding, [root], root_siblings):
+        new_root.append(node)
+    return new_root
+
 # Return str Utf-8 of matched rules
 def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
     from lxml import etree, html
@@ -123,9 +141,10 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
         parser = etree.XMLParser(strip_cdata=False)
 
     tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
+    tree = forest_transplanting(tree)
     html_block = ""
 
-    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
+    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser, fragment=True)
     #@note: //title/text() wont work where <title>CDATA..
 
     if type(r) != list:

From 1f776ff8f69f1a19060f55ae9a6df13177d97d74 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Thu, 2 May 2024 20:43:45 +0900
Subject: [PATCH 02/37] requirements/fix: Upgrade and pin elementpath to
 support fragment option

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 76e88c8f42f..3084e5ab20d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -55,7 +55,7 @@ beautifulsoup4
 lxml >=4.8.0,<6
 
 # XPath 2.0-3.1 support - 4.2.0 broke something?
-elementpath==4.1.5
+elementpath==4.4.0
 
 selenium~=4.14.0
 

From bf5c2c7b0055bc25fc37a033504d2698c3a5d7f8 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Fri, 3 May 2024 02:16:22 +0900
Subject: [PATCH 03/37] html_tools/fix:

---
 changedetectionio/html_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 96ffbec997a..a3ca75f579a 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -144,7 +144,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
     tree = forest_transplanting(tree)
     html_block = ""
 
-    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser, fragment=True)
+    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser, fragment=True, item=tree[0])
     #@note: //title/text() wont work where <title>CDATA..
 
     if type(r) != list:

From 9f0cb3544f3316d7324f5bd4938bd41f7dacab46 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Fri, 3 May 2024 02:34:32 +0900
Subject: [PATCH 04/37] html_tools/fix: Another option

---
 changedetectionio/html_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index a3ca75f579a..0ea3401bd60 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -144,7 +144,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
     tree = forest_transplanting(tree)
     html_block = ""
 
-    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser, fragment=True, item=tree[0])
+    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser, fragment=True, item=tree)
     #@note: //title/text() wont work where <title>CDATA..
 
     if type(r) != list:

From 879d0b2c06644826a1042930122f8e5897e1009b Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Tue, 7 May 2024 15:27:52 +0900
Subject: [PATCH 05/37] html_tools/fix:

---
 changedetectionio/html_tools.py | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 0ea3401bd60..a53d86fa8a4 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -121,12 +121,28 @@ def forest_transplanting(root):
     from lxml import etree
     from itertools import chain
     root_siblings_preceding = [ s for s in root.itersiblings(preceding=True)]
-    root_siblings_preceding.reverse()
     root_siblings = [s for s in root.itersiblings()]
-    new_root = etree.Element("new_root")
-    for node in chain(root_siblings_preceding, [root], root_siblings):
-        new_root.append(node)
-    return new_root
+
+    Is_fragment=False
+    # If element node exsits in root element node's sibilings, it is fragment.
+    for node in chain(root_siblings_preceding, root_siblings):
+        if not hasattr(node.tag, '__name__'):
+            Is_fragment=True
+            # early exit. because the root is already root element.
+            # So, two root element nodes are detected. DOM violation.
+            break
+
+    if Is_fragment:
+        new_root = etree.Element("new_root")
+        root_siblings_preceding.reverse()
+        #tree = etree.ElementTree(new_root)
+        for node in chain(root_siblings_preceding, [root], root_siblings):
+            new_root.append(node)
+        #print(new_root.getchildren())
+        return new_root, True
+
+    return root, False
+
 
 # Return str Utf-8 of matched rules
 def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
@@ -141,10 +157,10 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
         parser = etree.XMLParser(strip_cdata=False)
 
     tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
-    tree = forest_transplanting(tree)
+    tree, is_fragment = forest_transplanting(tree)
     html_block = ""
 
-    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser, fragment=True, item=tree)
+    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser, fragment=is_fragment)
     #@note: //title/text() wont work where <title>CDATA..
 
     if type(r) != list:

From ed2aaf4cab615fe5cdfd9316dc22e035afad99f1 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Tue, 7 May 2024 22:56:52 +0900
Subject: [PATCH 06/37] tests/test_xpath_selector_unit/test: Add test.

---
 .../tests/test_xpath_selector_unit.py         | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index b4dda08068a..3f08de17ea5 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -201,3 +201,27 @@ def test_trips(html_content, xpath, answer):
     html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
     assert type(html_content) == str
     assert answer in html_content
+
+DOM_violation_two_html_root_element = ="""<!DOCTYPE html>
+<html>
+  <body>
+    <h1>Hello absurd world</h1>
+    <p>First paragraph.</p>
+  </body>
+</html>
+<html>
+  <body>
+    <h1>Hello absurd world</h1>
+    <p>Browsers parse this part by fixing it but lxml doesn't and returns two root element node</p>
+    <p>Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one.</p>
+  </body>
+</html>"""
+
+@pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element])
+@pytest.mark.parametrize("xpath, answer", [
+    ("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
+                          ])
+def test_trips(html_content, xpath, answer):
+    html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
+    assert type(html_content) == str
+    assert answer not in html_content

From dd8b4fe9222b4cb129bd12fab9e2805d222e2d49 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Tue, 7 May 2024 22:58:47 +0900
Subject: [PATCH 07/37] html_tools/docs: Remove comments

---
 changedetectionio/html_tools.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index a53d86fa8a4..6ae4ef07587 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -135,10 +135,8 @@ def forest_transplanting(root):
     if Is_fragment:
         new_root = etree.Element("new_root")
         root_siblings_preceding.reverse()
-        #tree = etree.ElementTree(new_root)
         for node in chain(root_siblings_preceding, [root], root_siblings):
             new_root.append(node)
-        #print(new_root.getchildren())
         return new_root, True
 
     return root, False

From fbd55129eddcf3ac3e5cf0964de407f4a3f75f08 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Tue, 7 May 2024 23:09:57 +0900
Subject: [PATCH 08/37] tests/test_xpath_selector_unit/fix: Typo

---
 changedetectionio/tests/test_xpath_selector_unit.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 3f08de17ea5..131054c5610 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -202,7 +202,7 @@ def test_trips(html_content, xpath, answer):
     assert type(html_content) == str
     assert answer in html_content
 
-DOM_violation_two_html_root_element = ="""<!DOCTYPE html>
+DOM_violation_two_html_root_element = """<!DOCTYPE html>
 <html>
   <body>
     <h1>Hello absurd world</h1>
@@ -216,7 +216,6 @@ def test_trips(html_content, xpath, answer):
     <p>Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one.</p>
   </body>
 </html>"""
-
 @pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element])
 @pytest.mark.parametrize("xpath, answer", [
     ("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),

From 20195e7a79b279b3f92ca1b135534066cf5d8d98 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Tue, 7 May 2024 23:25:45 +0900
Subject: [PATCH 09/37] tests/test_xpath_selector_unit/test: Fix test and add
 more small tests for fragment

---
 changedetectionio/tests/test_xpath_selector_unit.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 131054c5610..7b9c57d10be 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -205,22 +205,27 @@ def test_trips(html_content, xpath, answer):
 DOM_violation_two_html_root_element = """<!DOCTYPE html>
 <html>
   <body>
-    <h1>Hello absurd world</h1>
+    <h1>Hello world</h1>
     <p>First paragraph.</p>
   </body>
 </html>
 <html>
   <body>
-    <h1>Hello absurd world</h1>
+    <h1>Hello world</h1>
     <p>Browsers parse this part by fixing it but lxml doesn't and returns two root element node</p>
     <p>Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one.</p>
   </body>
 </html>"""
 @pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element])
 @pytest.mark.parametrize("xpath, answer", [
+    ("/html/body/p[1]", "First paragraph."),
     ("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
+    ("//html/body/p[1]", "First paragraph."),
+    ("//html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
+    ("//body/p[1]", "First paragraph."),
+    ("//body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
                           ])
 def test_trips(html_content, xpath, answer):
     html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
     assert type(html_content) == str
-    assert answer not in html_content
+    assert answer in html_content

From 220f484ee97e049b11a8901c298c2ad8cb9fec4c Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 00:00:08 +0900
Subject: [PATCH 10/37] tests/test_xpath_selector_unit/test: Check error
 occurs.

---
 changedetectionio/tests/test_xpath_selector_unit.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 7b9c57d10be..315a50fbc3b 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -226,6 +226,18 @@ def test_trips(html_content, xpath, answer):
     ("//body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
                           ])
 def test_trips(html_content, xpath, answer):
+
+
+    # In normal situation, DOM's root element node is only one. So Exception occurs.
+    with pytest.raises(Exception):
+        from lxml import etree, html
+        import elementpath
+        from elementpath.xpath3 import XPath3Parser
+        parser = etree.HTMLParser()
+        tree = html.fromstring(bytes(doc, encoding='utf-8'), parser=parser)
+        # Error will occur.
+        r = elementpath.select(tree, path.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
+
     html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
     assert type(html_content) == str
     assert answer in html_content

From e84b9f1c0f129209b8ef911ff7eca11e0a1f5981 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 00:04:02 +0900
Subject: [PATCH 11/37] tests/test_xpath_selector_unit/test: Fix

---
 changedetectionio/tests/test_xpath_selector_unit.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 315a50fbc3b..e3ea3894023 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -234,9 +234,10 @@ def test_trips(html_content, xpath, answer):
         import elementpath
         from elementpath.xpath3 import XPath3Parser
         parser = etree.HTMLParser()
-        tree = html.fromstring(bytes(doc, encoding='utf-8'), parser=parser)
+        tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
+        # just example xpath
         # Error will occur.
-        r = elementpath.select(tree, path.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
+        r = elementpath.select(tree, xpath.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
 
     html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
     assert type(html_content) == str

From 60777e429a183e5d5d8e669204ca2ecbb3f4c3c7 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 00:22:02 +0900
Subject: [PATCH 12/37] tests/test_xpath_selector_unit/test: Add more
 unintuitive tests

---
 .../tests/test_xpath_selector_unit.py         | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index e3ea3894023..1045a73837c 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -224,11 +224,15 @@ def test_trips(html_content, xpath, answer):
     ("//html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
     ("//body/p[1]", "First paragraph."),
     ("//body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
+    ("/html[2]/body/p[1]", "First paragraph."),
+    ("/html[2]/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
+    ("//html[2]/body/p[1]", "First paragraph."),
+    ("//html[2]/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
                           ])
 def test_trips(html_content, xpath, answer):
 
 
-    # In normal situation, DOM's root element node is only one. So Exception occurs.
+    # In normal situation, DOM's root element node is only one. So when DOM violation happens, Exception occurs.
     with pytest.raises(Exception):
         from lxml import etree, html
         import elementpath
@@ -242,3 +246,16 @@ def test_trips(html_content, xpath, answer):
     html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
     assert type(html_content) == str
     assert answer in html_content
+
+@pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element])
+@pytest.mark.parametrize("xpath, answer", [
+    ("/html[2]/body/p[1]", "First paragraph."),
+    ("//html[2]/body/p[1]", "First paragraph."),
+                          ])
+def test_trips(html_content, xpath, answer):
+    # In normal situation, DOM's root element node is only one. So when DOM violation happens, Exception occurs.
+
+    html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
+    assert type(html_content) == str
+    # check the answer is not in the html_content
+    assert answer not in html_content

From e325e029672ef3372f8d52c7a68c680b6d0342f6 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 01:04:41 +0900
Subject: [PATCH 13/37] tests/test_xpath_selector_unit/test: Trigger test again

---
 changedetectionio/tests/test_xpath_selector_unit.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 1045a73837c..f976b67ad87 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -254,8 +254,7 @@ def test_trips(html_content, xpath, answer):
                           ])
 def test_trips(html_content, xpath, answer):
     # In normal situation, DOM's root element node is only one. So when DOM violation happens, Exception occurs.
-
     html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
     assert type(html_content) == str
-    # check the answer is not in the html_content
+    # check the answer is *not in* the html_content
     assert answer not in html_content

From 6a2e1cf9138e7f6d4236922dfc490240175c3b3f Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 01:20:17 +0900
Subject: [PATCH 14/37] tests/test_xpath_selector_unit/fix: Trigger test again.
 why it doesn't work like my repo

---
 changedetectionio/tests/test_xpath_selector_unit.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index f976b67ad87..ebbaf23a92d 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -230,8 +230,6 @@ def test_trips(html_content, xpath, answer):
     ("//html[2]/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
                           ])
 def test_trips(html_content, xpath, answer):
-
-
     # In normal situation, DOM's root element node is only one. So when DOM violation happens, Exception occurs.
     with pytest.raises(Exception):
         from lxml import etree, html

From 55b2c6c63e3e73b408002f4c17713c843be1297d Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 01:36:04 +0900
Subject: [PATCH 15/37] tests/test_xpath_selector_unit/test: Oops fix test name

---
 changedetectionio/tests/test_xpath_selector_unit.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index ebbaf23a92d..95bdb525f47 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -229,7 +229,7 @@ def test_trips(html_content, xpath, answer):
     ("//html[2]/body/p[1]", "First paragraph."),
     ("//html[2]/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
                           ])
-def test_trips(html_content, xpath, answer):
+def test_broken_DOM_01(html_content, xpath, answer):
     # In normal situation, DOM's root element node is only one. So when DOM violation happens, Exception occurs.
     with pytest.raises(Exception):
         from lxml import etree, html
@@ -250,7 +250,7 @@ def test_trips(html_content, xpath, answer):
     ("/html[2]/body/p[1]", "First paragraph."),
     ("//html[2]/body/p[1]", "First paragraph."),
                           ])
-def test_trips(html_content, xpath, answer):
+def test_Broken_DOM_02(html_content, xpath, answer):
     # In normal situation, DOM's root element node is only one. So when DOM violation happens, Exception occurs.
     html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
     assert type(html_content) == str

From 93a9585fc6e2f340170796f83f799215ffd4c1f5 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 01:47:10 +0900
Subject: [PATCH 16/37] tests/test_xpath_selector_unit/test: Failed
 successfully

---
 changedetectionio/tests/test_xpath_selector_unit.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 95bdb525f47..3d8d84806c2 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -224,9 +224,7 @@ def test_trips(html_content, xpath, answer):
     ("//html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
     ("//body/p[1]", "First paragraph."),
     ("//body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
-    ("/html[2]/body/p[1]", "First paragraph."),
     ("/html[2]/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
-    ("//html[2]/body/p[1]", "First paragraph."),
     ("//html[2]/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
                           ])
 def test_broken_DOM_01(html_content, xpath, answer):

From e6b13c9ad3b0ec5ead2fc38494bdf1fdb03aaf40 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 02:16:37 +0900
Subject: [PATCH 17/37] tests/test_xpath_selector_unit/test: Add count test

---
 changedetectionio/tests/test_xpath_selector_unit.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 3d8d84806c2..0cbbb045570 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -220,6 +220,11 @@ def test_trips(html_content, xpath, answer):
 @pytest.mark.parametrize("xpath, answer", [
     ("/html/body/p[1]", "First paragraph."),
     ("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
+    ("count(/html/body/p[1])", "2"),
+    ("count(/html)", "2"),
+    ("count(//html)", "2"),
+    ("count(//body)", "2"),
+    ("count(/html/body)", "2"),
     ("//html/body/p[1]", "First paragraph."),
     ("//html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
     ("//body/p[1]", "First paragraph."),

From 2e3e7811ef9dc949e17c9c4bbc34320cae1a242c Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 02:30:34 +0900
Subject: [PATCH 18/37] tests/test_xpath_selector_unit/chore: Trigger CICD

---
 changedetectionio/tests/test_xpath_selector_unit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 0cbbb045570..958e2e29b9f 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -257,5 +257,5 @@ def test_Broken_DOM_02(html_content, xpath, answer):
     # In normal situation, DOM's root element node is only one. So when DOM violation happens, Exception occurs.
     html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
     assert type(html_content) == str
-    # check the answer is *not in* the html_content
+    # Check the answer is *not in* the html_content
     assert answer not in html_content

From c295c5e40dce6b58517ba44c34ebb0fbecf96356 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 02:50:21 +0900
Subject: [PATCH 19/37] tests/test_xpath_selector_unit/test: Add same behavior
 for xpath 1

---
 .../tests/test_xpath_selector_unit.py          | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 958e2e29b9f..cfee3080e45 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -259,3 +259,21 @@ def test_Broken_DOM_02(html_content, xpath, answer):
     assert type(html_content) == str
     # Check the answer is *not in* the html_content
     assert answer not in html_content
+
+@pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element])
+@pytest.mark.parametrize("xpath, answer", [
+    ("/html/body/p[1]", 2),
+    ("/html", 2),
+    ("//html", 2),
+    ("//body", 2),
+    ("/html/body", 2),
+                          ])
+def test_Broken_DOM_03(html_content, xpath, answer):
+    # In normal situation, DOM's root element node is only one. So when DOM violation happens, Exception occurs.
+
+    from lxml import etree, html
+    parser = etree.HTMLParser()
+    tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
+
+    # test xpath 1
+    assert len(tree.xpath(xpath)) == 2

From 5acd31fb1ec347e8e07f4017940a29a8bcab55e5 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 02:52:04 +0900
Subject: [PATCH 20/37] tests/test_xpath_selector_unit/test: Fix misc

---
 changedetectionio/tests/test_xpath_selector_unit.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index cfee3080e45..0d1ac6d36ea 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -269,8 +269,7 @@ def test_Broken_DOM_02(html_content, xpath, answer):
     ("/html/body", 2),
                           ])
 def test_Broken_DOM_03(html_content, xpath, answer):
-    # In normal situation, DOM's root element node is only one. So when DOM violation happens, Exception occurs.
-
+    """just test for xpath1"""
     from lxml import etree, html
     parser = etree.HTMLParser()
     tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)

From de7b66bc8e132211ca67054847e7f81f930cbf71 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 02:54:19 +0900
Subject: [PATCH 21/37] tests/test_xpath_selector_unit/test: Fix answer

---
 changedetectionio/tests/test_xpath_selector_unit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 0d1ac6d36ea..3f2b86d282e 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -275,4 +275,4 @@ def test_Broken_DOM_03(html_content, xpath, answer):
     tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
 
     # test xpath 1
-    assert len(tree.xpath(xpath)) == 2
+    assert len(tree.xpath(xpath)) == answer

From 66a7dae381367ede985202598913b5e97019b5fb Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 8 May 2024 03:00:50 +0900
Subject: [PATCH 22/37] html_tools/docs: Fix old comment

---
 changedetectionio/html_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 6ae4ef07587..276a6219487 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -113,7 +113,7 @@ def elementpath_tostring(obj):
 def forest_transplanting(root):
     """
     libxml2 violates DOM rules. it means there can be multiple root element
-    nodes. So I choose just transplating them to a new root by default.
+    nodes. So I choose just transplating them to a new root when the violation happens.
     See also, https://gitlab.gnome.org/GNOME/libxml2/-/issues/716
     This will emulate xpath1 of html of libxml2 like '/html[2]/*'.
     To make this function work, 'fragment=True' in elementpath.select is required.

From 4d266cac9f33d62ae1c662a3128d043d9a0579fd Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Fri, 10 May 2024 00:08:49 +0900
Subject: [PATCH 23/37] tests/test_xpath_selector_unit/feat: Do
 forest_transplanting by default

---
 changedetectionio/html_tools.py               | 22 +++++--------------
 .../tests/test_xpath_selector_unit.py         |  3 +++
 2 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 276a6219487..8a7bbd929d6 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -123,24 +123,12 @@ def forest_transplanting(root):
     root_siblings_preceding = [ s for s in root.itersiblings(preceding=True)]
     root_siblings = [s for s in root.itersiblings()]
 
-    Is_fragment=False
-    # If element node exsits in root element node's sibilings, it is fragment.
-    for node in chain(root_siblings_preceding, root_siblings):
-        if not hasattr(node.tag, '__name__'):
-            Is_fragment=True
-            # early exit. because the root is already root element.
-            # So, two root element nodes are detected. DOM violation.
-            break
-
-    if Is_fragment:
-        new_root = etree.Element("new_root")
-        root_siblings_preceding.reverse()
-        for node in chain(root_siblings_preceding, [root], root_siblings):
-            new_root.append(node)
-        return new_root, True
-
-    return root, False
+    new_root = etree.Element("new_root")
 
+    root_siblings_preceding.reverse()
+    for node in chain(root_siblings_preceding, [root], root_siblings):
+        new_root.append(node)
+    return new_root, True
 
 # Return str Utf-8 of matched rules
 def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 3f2b86d282e..047191ab03a 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -218,6 +218,9 @@ def test_trips(html_content, xpath, answer):
 </html>"""
 @pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element])
 @pytest.mark.parametrize("xpath, answer", [
+    (".", "First paragraph."),
+    ("/*", "First paragraph."),
+    ("/html", "First paragraph."),
     ("/html/body/p[1]", "First paragraph."),
     ("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
     ("count(/html/body/p[1])", "2"),

From ebf7fd4ef3e754520718b54527d207dd4641424e Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Fri, 10 May 2024 00:36:27 +0900
Subject: [PATCH 24/37] tests/test_xpath_selector_unit/test: Fix tests

---
 .../tests/test_xpath_selector_unit.py         | 50 +++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 047191ab03a..0d839d5afd7 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -68,7 +68,7 @@
                           ("some $i in //hotel/branch/staff satisfies $i/age < 20", "false"),
                           ("every $i in /hotel/branch/staff satisfies $i/age > 20", "true"),
                           ("every $i in //hotel/branch/staff satisfies $i/age > 20 ", "true"),
-                          ("let $x := branch[@location = 'California'], $y := branch[@location = 'Las Vegas'] return (avg($x/staff/age), avg($y/staff/age))", "27.5"),
+                          ("let $x := hotel/branch[@location = 'California'], $y := hotel/branch[@location = 'Las Vegas'] return (avg($x/staff/age), avg($y/staff/age))", "27.5"),
                           ("let $x := //branch[@location = 'California'], $y := //branch[@location = 'Las Vegas'] return (avg($x/staff/age), avg($y/staff/age))", "27.5"),
                           ("let $nu := 1, $de := 1000 return  'probability = ' || $nu div $de * 100 || '%'", "0.1%"),
                           ("let $nu := 2, $probability := function ($argument) { 'probability = ' ||  $nu div $argument  * 100 || '%'}, $de := 5 return $probability($de)", "40%"),
@@ -99,45 +99,45 @@ def test_hotels(html_content, xpath, answer):
  </branches_to_visit>"""
 @pytest.mark.parametrize("html_content", [branches_to_visit])
 @pytest.mark.parametrize("xpath, answer", [
-    ("manager[@name = 'Godot']/branch union manager[@name = 'Freya']/branch", "Area 51"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch union branches_to_visit/manager[@name = 'Freya']/branch", "Area 51"),
     ("//manager[@name = 'Godot']/branch union //manager[@name = 'Freya']/branch", "Stalsk12"),
-    ("manager[@name = 'Godot']/branch | manager[@name = 'Freya']/branch", "Stalsk12"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch | branches_to_visit/manager[@name = 'Freya']/branch", "Stalsk12"),
     ("//manager[@name = 'Godot']/branch | //manager[@name = 'Freya']/branch", "Stalsk12"),
-    ("manager/branch intersect manager[@name = 'Godot']/branch", "A place with no name"),
+    ("branches_to_visit/manager/branch intersect branches_to_visit/manager[@name = 'Godot']/branch", "A place with no name"),
     ("//manager/branch intersect //manager[@name = 'Godot']/branch", "A place with no name"),
-    ("manager[@name = 'Godot']/branch intersect manager[@name = 'Freya']/branch", ""),
-    ("manager/branch except manager[@name = 'Godot']/branch", "Barcelona"),
-    ("manager[@name = 'Godot']/branch[1]  eq 'Area 51'", "true"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch intersect branches_to_visit/manager[@name = 'Freya']/branch", ""),
+    ("branches_to_visit/manager/branch except branches_to_visit/manager[@name = 'Godot']/branch", "Barcelona"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch[1]  eq 'Area 51'", "true"),
     ("//manager[@name = 'Godot']/branch[1]  eq 'Area 51'", "true"),
-    ("manager[@name = 'Godot']/branch[1]  eq 'Seoul'", "false"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch[1]  eq 'Seoul'", "false"),
     ("//manager[@name = 'Godot']/branch[1]  eq 'Seoul'", "false"),
-    ("manager[@name = 'Godot']/branch[2] eq manager[@name = 'Freya']/branch[2]", "false"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch[2] eq branches_to_visit/manager[@name = 'Freya']/branch[2]", "false"),
     ("//manager[@name = 'Godot']/branch[2] eq //manager[@name = 'Freya']/branch[2]", "false"),
-    ("manager[1]/@room_no lt manager[2]/@room_no", "false"),
+    ("branches_to_visit/manager[1]/@room_no lt branches_to_visit/manager[2]/@room_no", "false"),
     ("//manager[1]/@room_no lt //manager[2]/@room_no", "false"),
-    ("manager[1]/@room_no gt manager[2]/@room_no", "true"),
+    ("branches_to_visit/manager[1]/@room_no gt branches_to_visit/manager[2]/@room_no", "true"),
     ("//manager[1]/@room_no gt //manager[2]/@room_no", "true"),
-    ("manager[@name = 'Godot']/branch[1]  = 'Area 51'", "true"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch[1]  = 'Area 51'", "true"),
     ("//manager[@name = 'Godot']/branch[1]  = 'Area 51'", "true"),
-    ("manager[@name = 'Godot']/branch[1]  = 'Seoul'", "false"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch[1]  = 'Seoul'", "false"),
     ("//manager[@name = 'Godot']/branch[1]  = 'Seoul'", "false"),
-    ("manager[@name = 'Godot']/branch  = 'Area 51'", "true"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch  = 'Area 51'", "true"),
     ("//manager[@name = 'Godot']/branch  = 'Area 51'", "true"),
-    ("manager[@name = 'Godot']/branch  = 'Barcelona'", "false"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch  = 'Barcelona'", "false"),
     ("//manager[@name = 'Godot']/branch  = 'Barcelona'", "false"),
-    ("manager[1]/@room_no > manager[2]/@room_no", "true"),
+    ("branches_to_visit/manager[1]/@room_no > branches_to_visit/manager[2]/@room_no", "true"),
     ("//manager[1]/@room_no > //manager[2]/@room_no", "true"),
-    ("manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is manager[1]/branch[1]", "false"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is branches_to_visit/manager[1]/branch[1]", "false"),
     ("//manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is //manager[1]/branch[1]", "false"),
-    ("manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is manager[1]/branch[3]", "true"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is branches_to_visit/manager[1]/branch[3]", "true"),
     ("//manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is //manager[1]/branch[3]", "true"),
-    ("manager[@name = 'Godot']/branch[ . = 'Stalsk12'] <<  manager[1]/branch[1]", "false"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch[ . = 'Stalsk12'] <<  branches_to_visit/manager[1]/branch[1]", "false"),
     ("//manager[@name = 'Godot']/branch[ . = 'Stalsk12'] <<  //manager[1]/branch[1]", "false"),
-    ("manager[@name = 'Godot']/branch[ . = 'Stalsk12']  >>  manager[1]/branch[1]", "true"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch[ . = 'Stalsk12']  >>  branches_to_visit/manager[1]/branch[1]", "true"),
     ("//manager[@name = 'Godot']/branch[ . = 'Stalsk12'] >>  //manager[1]/branch[1]", "true"),
-    ("manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is manager[@name = 'Freya']/branch[ . = 'Stalsk12']", "false"),
+    ("branches_to_visit/manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is branches_to_visit/manager[@name = 'Freya']/branch[ . = 'Stalsk12']", "false"),
     ("//manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is //manager[@name = 'Freya']/branch[ . = 'Stalsk12']", "false"),
-    ("manager[1]/@name || manager[2]/@name", "GodotFreya"),
+    ("branches_to_visit/manager[1]/@name || branches_to_visit/manager[2]/@name", "GodotFreya"),
     ("//manager[1]/@name || //manager[2]/@name", "GodotFreya"),
                           ])
 def test_branches_to_visit(html_content, xpath, answer):
@@ -170,10 +170,10 @@ def test_branches_to_visit(html_content, xpath, answer):
     ("(1 + 9 * 9 + 5) div 6", "14.5"),
     ("23 idiv 3", "7"),
     ("23 div 3", "7.66666666"),
-    ("for $i in ./trip return $i/traveler/duration * $i/traveler/price", "21002.04"),
-    ("for $i in ./trip return $i/traveler/duration ", "4"),
+    ("for $i in ./trips/trip return $i/traveler/duration * $i/traveler/price", "21002.04"),
+    ("for $i in ./trips/trip return $i/traveler/duration ", "4"),
     ("for $i in .//trip return $i/traveler/duration * $i/traveler/price", "21002.04"),
-    ("sum(for $i in ./trip return $i/traveler/duration * $i/traveler/price)", "29002.04"),
+    ("sum(for $i in ./trips/trip return $i/traveler/duration * $i/traveler/price)", "29002.04"),
     ("sum(for $i in .//trip return $i/traveler/duration * $i/traveler/price)", "29002.04"),
     #("trip[1]/depart - trip[1]/arrive", "fail_to_get_answer"),
     #("//trip[1]/depart - //trip[1]/arrive", "fail_to_get_answer"),

From 26e4a58cba1a16aff0fc07cee0928005393d6a0c Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Fri, 10 May 2024 01:44:06 +0900
Subject: [PATCH 25/37] tests/test_xpath_selector_unit/test: Add context node
 related tests

---
 .../tests/test_xpath_selector_unit.py         | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index 0d839d5afd7..db3d7f03dd4 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -205,22 +205,39 @@ def test_trips(html_content, xpath, answer):
 DOM_violation_two_html_root_element = """<!DOCTYPE html>
 <html>
   <body>
-    <h1>Hello world</h1>
+    <h1>Hello world1</h1>
     <p>First paragraph.</p>
   </body>
 </html>
 <html>
   <body>
-    <h1>Hello world</h1>
+    <h1>Hello world2</h1>
     <p>Browsers parse this part by fixing it but lxml doesn't and returns two root element node</p>
     <p>Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one.</p>
   </body>
 </html>"""
 @pytest.mark.parametrize("html_content", [DOM_violation_two_html_root_element])
 @pytest.mark.parametrize("xpath, answer", [
+    (".", "Hello world1"),
     (".", "First paragraph."),
+    (".", "Hello world2"),
+    (".", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
+    (".", "Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one."),
+    ("/*", "Hello world1"),
     ("/*", "First paragraph."),
+    ("/*", "Hello world2"),
+    ("/*", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
+    ("/*", "Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one."),
+    ("html", "Hello world1"),
+    ("html", "First paragraph."),
+    ("html", "Hello world2"),
+    ("html", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
+    ("html", "Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one."),
+    ("/html", "Hello world1"),
     ("/html", "First paragraph."),
+    ("/html", "Hello world2"),
+    ("/html", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
+    ("/html", "Therefore, if the path is /html/body/p[1], lxml(libxml2) returns two element nodes not one."),
     ("/html/body/p[1]", "First paragraph."),
     ("/html/body/p[1]", "Browsers parse this part by fixing it but lxml doesn't and returns two root element node"),
     ("count(/html/body/p[1])", "2"),

From dbf4e87b3174be8c3c76cf6aa0a1477520b65311 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Thu, 16 May 2024 14:33:13 +0900
Subject: [PATCH 26/37] requirements/chore: Change minimum version of
 elementpath

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 3084e5ab20d..36017f4a985 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -55,7 +55,7 @@ beautifulsoup4
 lxml >=4.8.0,<6
 
 # XPath 2.0-3.1 support - 4.2.0 broke something?
-elementpath==4.4.0
+elementpath>=4.2.1
 
 selenium~=4.14.0
 

From 7cd764f101cb994acafa768f1547674672c805fc Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Fri, 17 May 2024 18:27:17 +0900
Subject: [PATCH 27/37] html_tools/fix: Improve speed for function calls

---
 changedetectionio/html_tools.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 8a7bbd929d6..3425dc0809c 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -9,6 +9,10 @@
 from xml.sax.saxutils import escape as xml_escape
 import json
 import re
+from itertools import chain
+from elementpath import select as elementpath_select
+# xpath 2.0-3.1
+from elementpath.xpath3 import XPath3Parser
 
 
 # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
@@ -119,7 +123,7 @@ def forest_transplanting(root):
     To make this function work, 'fragment=True' in elementpath.select is required.
     """
     from lxml import etree
-    from itertools import chain
+
     root_siblings_preceding = [ s for s in root.itersiblings(preceding=True)]
     root_siblings = [s for s in root.itersiblings()]
 
@@ -133,9 +137,6 @@ def forest_transplanting(root):
 # Return str Utf-8 of matched rules
 def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
     from lxml import etree, html
-    import elementpath
-    # xpath 2.0-3.1
-    from elementpath.xpath3 import XPath3Parser
 
     parser = etree.HTMLParser()
     if is_rss:
@@ -146,7 +147,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
     tree, is_fragment = forest_transplanting(tree)
     html_block = ""
 
-    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser, fragment=is_fragment)
+    r = elementpath_select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser, fragment=is_fragment)
     #@note: //title/text() wont work where <title>CDATA..
 
     if type(r) != list:

From 361987796e8e98729be6e39eb88b406a91b14cee Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Sun, 26 May 2024 19:07:14 +0900
Subject: [PATCH 28/37] Revert "html_tools/docs: Fix old comment"

This reverts commit 66a7dae381367ede985202598913b5e97019b5fb.
---
 changedetectionio/html_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 3425dc0809c..a4e283d6dad 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -117,7 +117,7 @@ def elementpath_tostring(obj):
 def forest_transplanting(root):
     """
     libxml2 violates DOM rules. it means there can be multiple root element
-    nodes. So I choose just transplating them to a new root when the violation happens.
+    nodes. So I choose just transplating them to a new root by default.
     See also, https://gitlab.gnome.org/GNOME/libxml2/-/issues/716
     This will emulate xpath1 of html of libxml2 like '/html[2]/*'.
     To make this function work, 'fragment=True' in elementpath.select is required.

From 827f81a293e9f6083f25c537a8ffef5717c4cd1d Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Thu, 1 Aug 2024 18:32:28 +0900
Subject: [PATCH 29/37] Update html_tools.py description

add precise description
---
 changedetectionio/html_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 74c5fef15e8..1ca26e2f786 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -114,7 +114,7 @@ def elementpath_tostring(obj):
 
 def forest_transplanting(root):
     """
-    libxml2 violates DOM rules. it means there can be multiple root element
+    The html parser of libxml2 violates DOM rules. It means there can be multiple root element
     nodes. So I choose just transplating them to a new root by default.
     See also, https://gitlab.gnome.org/GNOME/libxml2/-/issues/716
     This will emulate xpath1 of html of libxml2 like '/html[2]/*'.

From e6ac28598a2d435656a24dc8657834933b079438 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 11 Sep 2024 02:40:08 +0900
Subject: [PATCH 30/37] Revert "tests/test_xpath_selector_unit/test: Fix tests"

This reverts commit ebf7fd4ef3e754520718b54527d207dd4641424e.
---
 .../tests/test_xpath_selector_unit.py         | 50 +++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index db3d7f03dd4..e56ef823c36 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -68,7 +68,7 @@
                           ("some $i in //hotel/branch/staff satisfies $i/age < 20", "false"),
                           ("every $i in /hotel/branch/staff satisfies $i/age > 20", "true"),
                           ("every $i in //hotel/branch/staff satisfies $i/age > 20 ", "true"),
-                          ("let $x := hotel/branch[@location = 'California'], $y := hotel/branch[@location = 'Las Vegas'] return (avg($x/staff/age), avg($y/staff/age))", "27.5"),
+                          ("let $x := branch[@location = 'California'], $y := branch[@location = 'Las Vegas'] return (avg($x/staff/age), avg($y/staff/age))", "27.5"),
                           ("let $x := //branch[@location = 'California'], $y := //branch[@location = 'Las Vegas'] return (avg($x/staff/age), avg($y/staff/age))", "27.5"),
                           ("let $nu := 1, $de := 1000 return  'probability = ' || $nu div $de * 100 || '%'", "0.1%"),
                           ("let $nu := 2, $probability := function ($argument) { 'probability = ' ||  $nu div $argument  * 100 || '%'}, $de := 5 return $probability($de)", "40%"),
@@ -99,45 +99,45 @@ def test_hotels(html_content, xpath, answer):
  </branches_to_visit>"""
 @pytest.mark.parametrize("html_content", [branches_to_visit])
 @pytest.mark.parametrize("xpath, answer", [
-    ("branches_to_visit/manager[@name = 'Godot']/branch union branches_to_visit/manager[@name = 'Freya']/branch", "Area 51"),
+    ("manager[@name = 'Godot']/branch union manager[@name = 'Freya']/branch", "Area 51"),
     ("//manager[@name = 'Godot']/branch union //manager[@name = 'Freya']/branch", "Stalsk12"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch | branches_to_visit/manager[@name = 'Freya']/branch", "Stalsk12"),
+    ("manager[@name = 'Godot']/branch | manager[@name = 'Freya']/branch", "Stalsk12"),
     ("//manager[@name = 'Godot']/branch | //manager[@name = 'Freya']/branch", "Stalsk12"),
-    ("branches_to_visit/manager/branch intersect branches_to_visit/manager[@name = 'Godot']/branch", "A place with no name"),
+    ("manager/branch intersect manager[@name = 'Godot']/branch", "A place with no name"),
     ("//manager/branch intersect //manager[@name = 'Godot']/branch", "A place with no name"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch intersect branches_to_visit/manager[@name = 'Freya']/branch", ""),
-    ("branches_to_visit/manager/branch except branches_to_visit/manager[@name = 'Godot']/branch", "Barcelona"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch[1]  eq 'Area 51'", "true"),
+    ("manager[@name = 'Godot']/branch intersect manager[@name = 'Freya']/branch", ""),
+    ("manager/branch except manager[@name = 'Godot']/branch", "Barcelona"),
+    ("manager[@name = 'Godot']/branch[1]  eq 'Area 51'", "true"),
     ("//manager[@name = 'Godot']/branch[1]  eq 'Area 51'", "true"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch[1]  eq 'Seoul'", "false"),
+    ("manager[@name = 'Godot']/branch[1]  eq 'Seoul'", "false"),
     ("//manager[@name = 'Godot']/branch[1]  eq 'Seoul'", "false"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch[2] eq branches_to_visit/manager[@name = 'Freya']/branch[2]", "false"),
+    ("manager[@name = 'Godot']/branch[2] eq manager[@name = 'Freya']/branch[2]", "false"),
     ("//manager[@name = 'Godot']/branch[2] eq //manager[@name = 'Freya']/branch[2]", "false"),
-    ("branches_to_visit/manager[1]/@room_no lt branches_to_visit/manager[2]/@room_no", "false"),
+    ("manager[1]/@room_no lt manager[2]/@room_no", "false"),
     ("//manager[1]/@room_no lt //manager[2]/@room_no", "false"),
-    ("branches_to_visit/manager[1]/@room_no gt branches_to_visit/manager[2]/@room_no", "true"),
+    ("manager[1]/@room_no gt manager[2]/@room_no", "true"),
     ("//manager[1]/@room_no gt //manager[2]/@room_no", "true"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch[1]  = 'Area 51'", "true"),
+    ("manager[@name = 'Godot']/branch[1]  = 'Area 51'", "true"),
     ("//manager[@name = 'Godot']/branch[1]  = 'Area 51'", "true"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch[1]  = 'Seoul'", "false"),
+    ("manager[@name = 'Godot']/branch[1]  = 'Seoul'", "false"),
     ("//manager[@name = 'Godot']/branch[1]  = 'Seoul'", "false"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch  = 'Area 51'", "true"),
+    ("manager[@name = 'Godot']/branch  = 'Area 51'", "true"),
     ("//manager[@name = 'Godot']/branch  = 'Area 51'", "true"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch  = 'Barcelona'", "false"),
+    ("manager[@name = 'Godot']/branch  = 'Barcelona'", "false"),
     ("//manager[@name = 'Godot']/branch  = 'Barcelona'", "false"),
-    ("branches_to_visit/manager[1]/@room_no > branches_to_visit/manager[2]/@room_no", "true"),
+    ("manager[1]/@room_no > manager[2]/@room_no", "true"),
     ("//manager[1]/@room_no > //manager[2]/@room_no", "true"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is branches_to_visit/manager[1]/branch[1]", "false"),
+    ("manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is manager[1]/branch[1]", "false"),
     ("//manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is //manager[1]/branch[1]", "false"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is branches_to_visit/manager[1]/branch[3]", "true"),
+    ("manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is manager[1]/branch[3]", "true"),
     ("//manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is //manager[1]/branch[3]", "true"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch[ . = 'Stalsk12'] <<  branches_to_visit/manager[1]/branch[1]", "false"),
+    ("manager[@name = 'Godot']/branch[ . = 'Stalsk12'] <<  manager[1]/branch[1]", "false"),
     ("//manager[@name = 'Godot']/branch[ . = 'Stalsk12'] <<  //manager[1]/branch[1]", "false"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch[ . = 'Stalsk12']  >>  branches_to_visit/manager[1]/branch[1]", "true"),
+    ("manager[@name = 'Godot']/branch[ . = 'Stalsk12']  >>  manager[1]/branch[1]", "true"),
     ("//manager[@name = 'Godot']/branch[ . = 'Stalsk12'] >>  //manager[1]/branch[1]", "true"),
-    ("branches_to_visit/manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is branches_to_visit/manager[@name = 'Freya']/branch[ . = 'Stalsk12']", "false"),
+    ("manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is manager[@name = 'Freya']/branch[ . = 'Stalsk12']", "false"),
     ("//manager[@name = 'Godot']/branch[ . = 'Stalsk12'] is //manager[@name = 'Freya']/branch[ . = 'Stalsk12']", "false"),
-    ("branches_to_visit/manager[1]/@name || branches_to_visit/manager[2]/@name", "GodotFreya"),
+    ("manager[1]/@name || manager[2]/@name", "GodotFreya"),
     ("//manager[1]/@name || //manager[2]/@name", "GodotFreya"),
                           ])
 def test_branches_to_visit(html_content, xpath, answer):
@@ -170,10 +170,10 @@ def test_branches_to_visit(html_content, xpath, answer):
     ("(1 + 9 * 9 + 5) div 6", "14.5"),
     ("23 idiv 3", "7"),
     ("23 div 3", "7.66666666"),
-    ("for $i in ./trips/trip return $i/traveler/duration * $i/traveler/price", "21002.04"),
-    ("for $i in ./trips/trip return $i/traveler/duration ", "4"),
+    ("for $i in ./trip return $i/traveler/duration * $i/traveler/price", "21002.04"),
+    ("for $i in ./trip return $i/traveler/duration ", "4"),
     ("for $i in .//trip return $i/traveler/duration * $i/traveler/price", "21002.04"),
-    ("sum(for $i in ./trips/trip return $i/traveler/duration * $i/traveler/price)", "29002.04"),
+    ("sum(for $i in ./trip return $i/traveler/duration * $i/traveler/price)", "29002.04"),
     ("sum(for $i in .//trip return $i/traveler/duration * $i/traveler/price)", "29002.04"),
     #("trip[1]/depart - trip[1]/arrive", "fail_to_get_answer"),
     #("//trip[1]/depart - //trip[1]/arrive", "fail_to_get_answer"),

From 0a0f281d805651a07b81f24c7400bf5509f0925a Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 11 Sep 2024 02:41:55 +0900
Subject: [PATCH 31/37] Revert "tests/test_xpath_selector_unit/feat: Do
 forest_transplanting by default"

This reverts commit 4d266cac9f33d62ae1c662a3128d043d9a0579fd.
---
 changedetectionio/html_tools.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 1ca26e2f786..990e0af1902 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -125,12 +125,24 @@ def forest_transplanting(root):
     root_siblings_preceding = [ s for s in root.itersiblings(preceding=True)]
     root_siblings = [s for s in root.itersiblings()]
 
-    new_root = etree.Element("new_root")
+    Is_fragment=False
+    # If element node exsits in root element node's sibilings, it is fragment.
+    for node in chain(root_siblings_preceding, root_siblings):
+        if not hasattr(node.tag, '__name__'):
+            Is_fragment=True
+            # early exit. because the root is already root element.
+            # So, two root element nodes are detected. DOM violation.
+            break
+
+    if Is_fragment:
+        new_root = etree.Element("new_root")
+        root_siblings_preceding.reverse()
+        for node in chain(root_siblings_preceding, [root], root_siblings):
+            new_root.append(node)
+        return new_root, True
+
+    return root, False
 
-    root_siblings_preceding.reverse()
-    for node in chain(root_siblings_preceding, [root], root_siblings):
-        new_root.append(node)
-    return new_root, True
 
 # Return str Utf-8 of matched rules
 def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):

From 322382096c1b88abc1a03167c184cd7ab335ab37 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 11 Sep 2024 02:50:59 +0900
Subject: [PATCH 32/37] Reapply "html_tools/docs: Fix old comment"

This reverts commit 361987796e8e98729be6e39eb88b406a91b14cee.
---
 changedetectionio/html_tools.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 990e0af1902..6bbe2236064 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -114,11 +114,12 @@ def elementpath_tostring(obj):
 
 def forest_transplanting(root):
     """
-    The html parser of libxml2 violates DOM rules. It means there can be multiple root element
-    nodes. So I choose just transplating them to a new root by default.
-    See also, https://gitlab.gnome.org/GNOME/libxml2/-/issues/716
-    This will emulate xpath1 of html of libxml2 like '/html[2]/*'.
-    To make this function work, 'fragment=True' in elementpath.select is required.
+    The html parser of libxml2 violates DOM rules. It means there can be
+    multiple root element nodes. So I choose just transplating them to a new
+    root when the violation happens.  See also,
+    https://gitlab.gnome.org/GNOME/libxml2/-/issues/716 This will emulate
+    xpath1 of html of libxml2 like '/html[2]/*'.  To make this function work,
+    'fragment=True' in elementpath.select is required.
     """
     from lxml import etree
 

From 93950c0f3db1791d6cb702554ae013bd28e5946f Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 11 Sep 2024 03:23:52 +0900
Subject: [PATCH 33/37] Update html_tools.py to trigger test

just blanks
---
 changedetectionio/html_tools.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 6bbe2236064..dc21d034477 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -123,14 +123,14 @@ def forest_transplanting(root):
     """
     from lxml import etree
 
-    root_siblings_preceding = [ s for s in root.itersiblings(preceding=True)]
+    root_siblings_preceding = [s for s in root.itersiblings(preceding=True)]
     root_siblings = [s for s in root.itersiblings()]
 
-    Is_fragment=False
+    Is_fragment = False
     # If element node exsits in root element node's sibilings, it is fragment.
     for node in chain(root_siblings_preceding, root_siblings):
         if not hasattr(node.tag, '__name__'):
-            Is_fragment=True
+            Is_fragment = True
             # early exit. because the root is already root element.
             # So, two root element nodes are detected. DOM violation.
             break

From 0e66cb072eaf36775a815bf331b1e9c8094abf1d Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 11 Sep 2024 04:06:04 +0900
Subject: [PATCH 34/37] Update html_tools.py document for trigger test

---
 changedetectionio/html_tools.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index dc21d034477..ae13e636b1d 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -73,7 +73,7 @@ def element_removal(selectors: List[str], html_content):
 
 def elementpath_tostring(obj):
     """
-    change elementpath.select results to string type
+    change elementpath.select results(XDM) to string type
     # The MIT License (MIT), Copyright (c), 2018-2021, SISSA (Scuola Internazionale Superiore di Studi Avanzati)
     # https://github.com/sissaschool/elementpath/blob/dfcc2fd3d6011b16e02bf30459a7924f547b47d0/elementpath/xpath_tokens.py#L1038
     """
@@ -116,9 +116,9 @@ def forest_transplanting(root):
     """
     The html parser of libxml2 violates DOM rules. It means there can be
     multiple root element nodes. So I choose just transplating them to a new
-    root when the violation happens.  See also,
+    root when the violation happens. See also,
     https://gitlab.gnome.org/GNOME/libxml2/-/issues/716 This will emulate
-    xpath1 of html of libxml2 like '/html[2]/*'.  To make this function work,
+    xpath1 of html of libxml2 like '/html[2]/*'. To make this function work,
     'fragment=True' in elementpath.select is required.
     """
     from lxml import etree

From 889fdbbcffe2b4b19b5287c51397a4a89cf3814d Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Wed, 11 Sep 2024 04:38:50 +0900
Subject: [PATCH 35/37] Update html_tools.py comment to trigger test

---
 changedetectionio/html_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index ae13e636b1d..6ff4c4e6f7d 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -116,7 +116,7 @@ def forest_transplanting(root):
     """
     The html parser of libxml2 violates DOM rules. It means there can be
     multiple root element nodes. So I choose just transplating them to a new
-    root when the violation happens. See also,
+    root element when the violation happens. See also,
     https://gitlab.gnome.org/GNOME/libxml2/-/issues/716 This will emulate
     xpath1 of html of libxml2 like '/html[2]/*'. To make this function work,
     'fragment=True' in elementpath.select is required.

From 4043e9adb48eef8913ecf947cae00509d30331c2 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Sat, 14 Sep 2024 02:28:15 +0900
Subject: [PATCH 36/37] html_tools/feat: Add logger for forest transplanting.

---
 changedetectionio/html_tools.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 6ff4c4e6f7d..20fbc0356e1 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -11,6 +11,7 @@
 from elementpath import select as elementpath_select
 # xpath 2.0-3.1
 from elementpath.xpath3 import XPath3Parser
+from loguru import logger
 
 
 # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
@@ -136,6 +137,7 @@ def forest_transplanting(root):
             break
 
     if Is_fragment:
+        logger.debug("forest_transplanting is triggered.")
         new_root = etree.Element("new_root")
         root_siblings_preceding.reverse()
         for node in chain(root_siblings_preceding, [root], root_siblings):

From 912470fb0d3bb4ba674f60025f6df90c0a2f53d6 Mon Sep 17 00:00:00 2001
From: Constantin Hong <hongconstantin@gmail.com>
Date: Sat, 14 Sep 2024 04:09:59 +0900
Subject: [PATCH 37/37] html_tools/docs: Add string to trigger test

---
 changedetectionio/html_tools.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index d4d920461b5..83277d73be0 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -116,7 +116,8 @@ def forest_transplanting(root):
     root element when the violation happens. See also,
     https://gitlab.gnome.org/GNOME/libxml2/-/issues/716 This will emulate
     xpath1 of html of libxml2 like '/html[2]/*'. To make this function work,
-    'fragment=True' in elementpath.select is required.
+    'fragment=True' in elementpath.select is required. This part is where I
+    violates the spec.
     """
     from lxml import etree