Use xml from Python 3.11

nvdaes · Nov 2, 2023 · 2719945 · 2719945
1 parent 5161ece
commit 2719945
Show file tree

Hide file tree

Showing 12 changed files with 837 additions and 207 deletions.
diff --git a/addon/globalPlugins/readFeeds/xml/dom/expatbuilder.py b/addon/globalPlugins/readFeeds/xml/dom/expatbuilder.py
@@ -204,11 +204,11 @@ def parseFile(self, file):
                 buffer = file.read(16*1024)
                 if not buffer:
                     break
-                parser.Parse(buffer, 0)
+                parser.Parse(buffer, False)
                 if first_buffer and self.document.documentElement:
                     self._setup_subset(buffer)
                 first_buffer = False
-            parser.Parse("", True)
+            parser.Parse(b"", True)
         except ParseEscape:
             pass
         doc = self.document
@@ -637,7 +637,7 @@ def parseString(self, string):
         nsattrs = self._getNSattrs() # get ns decls from node's ancestors
         document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs)
         try:
-            parser.Parse(document, 1)
+            parser.Parse(document, True)
         except:
             self.reset()
             raise
@@ -697,7 +697,7 @@ def external_entity_ref_handler(self, context, base, systemId, publicId):
             self.fragment = self.document.createDocumentFragment()
             self.curNode = self.fragment
             try:
-                parser.Parse(self._source, 1)
+                parser.Parse(self._source, True)
             finally:
                 self.curNode = old_cur_node
                 self.document = old_document

diff --git a/addon/globalPlugins/readFeeds/xml/dom/minidom.py b/addon/globalPlugins/readFeeds/xml/dom/minidom.py
@@ -43,10 +43,11 @@ class Node(xml.dom.Node):
     def __bool__(self):
         return True
 
-    def toxml(self, encoding=None):
-        return self.toprettyxml("", "", encoding)
+    def toxml(self, encoding=None, standalone=None):
+        return self.toprettyxml("", "", encoding, standalone)
 
-    def toprettyxml(self, indent="\t", newl="\n", encoding=None):
+    def toprettyxml(self, indent="\t", newl="\n", encoding=None,
+                    standalone=None):
         if encoding is None:
             writer = io.StringIO()
         else:
@@ -56,7 +57,7 @@ def toprettyxml(self, indent="\t", newl="\n", encoding=None):
                                       newline='\n')
         if self.nodeType == Node.DOCUMENT_NODE:
             # Can pass encoding only to document, to put it into XML header
-            self.writexml(writer, "", indent, newl, encoding)
+            self.writexml(writer, "", indent, newl, encoding, standalone)
         else:
             self.writexml(writer, "", indent, newl)
         if encoding is None:
@@ -357,6 +358,8 @@ def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
         self._name = qName
         self.namespaceURI = namespaceURI
         self._prefix = prefix
+        if localName is not None:
+            self._localName = localName
         self.childNodes = NodeList()
 
         # Add the single child node that represents the value of the attr
@@ -718,6 +721,14 @@ def unlink(self):
         Node.unlink(self)
 
     def getAttribute(self, attname):
+        """Returns the value of the specified attribute.
+
+        Returns the value of the element's attribute named attname as
+        a string. An empty string is returned if the element does not
+        have such an attribute. Note that an empty string may also be
+        returned as an explicitly given attribute value, use the
+        hasAttribute method to distinguish these two cases.
+        """
         if self._attrs is None:
             return ""
         try:
@@ -823,10 +834,16 @@ def removeAttributeNode(self, node):
         # Restore this since the node is still useful and otherwise
         # unlinked
         node.ownerDocument = self.ownerDocument
+        return node
 
     removeAttributeNodeNS = removeAttributeNode
 
     def hasAttribute(self, name):
+        """Checks whether the element has an attribute with the specified name.
+
+        Returns True if the element has an attribute with the specified name.
+        Otherwise, returns False.
+        """
         if self._attrs is None:
             return False
         return name in self._attrs
@@ -837,6 +854,11 @@ def hasAttributeNS(self, namespaceURI, localName):
         return (namespaceURI, localName) in self._attrsNS
 
     def getElementsByTagName(self, name):
+        """Returns all descendant elements with the given tag name.
+
+        Returns the list of all descendant elements (not direct children
+        only) with the specified tag name.
+        """
         return _get_elements_by_tagName_helper(self, name, NodeList())
 
     def getElementsByTagNameNS(self, namespaceURI, localName):
@@ -847,22 +869,27 @@ def __repr__(self):
         return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
 
     def writexml(self, writer, indent="", addindent="", newl=""):
+        """Write an XML element to a file-like object
+
+        Write the element to the writer object that must provide
+        a write method (e.g. a file or StringIO object).
+        """
         # indent = current indentation
         # addindent = indentation to add to higher levels
         # newl = newline string
         writer.write(indent+"<" + self.tagName)
 
         attrs = self._get_attributes()
-        a_names = sorted(attrs.keys())
 
-        for a_name in a_names:
+        for a_name in attrs.keys():
             writer.write(" %s=\"" % a_name)
             _write_data(writer, attrs[a_name].value)
             writer.write("\"")
         if self.childNodes:
             writer.write(">")
             if (len(self.childNodes) == 1 and
-                self.childNodes[0].nodeType == Node.TEXT_NODE):
+                self.childNodes[0].nodeType in (
+                        Node.TEXT_NODE, Node.CDATA_SECTION_NODE)):
                 self.childNodes[0].writexml(writer, '', '', '')
             else:
                 writer.write(newl)
@@ -1786,12 +1813,17 @@ def importNode(self, node, deep):
             raise xml.dom.NotSupportedErr("cannot import document type nodes")
         return _clone_node(node, deep, self)
 
-    def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
-        if encoding is None:
-            writer.write('<?xml version="1.0" ?>'+newl)
-        else:
-            writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
-                encoding, newl))
+    def writexml(self, writer, indent="", addindent="", newl="", encoding=None,
+                 standalone=None):
+        declarations = []
+
+        if encoding:
+            declarations.append(f'encoding="{encoding}"')
+        if standalone is not None:
+            declarations.append(f'standalone="{"yes" if standalone else "no"}"')
+
+        writer.write(f'<?xml version="1.0" {" ".join(declarations)}?>{newl}')
+
         for node in self.childNodes:
             node.writexml(writer, indent, addindent, newl)
 

diff --git a/addon/globalPlugins/readFeeds/xml/dom/pulldom.py b/addon/globalPlugins/readFeeds/xml/dom/pulldom.py
@@ -216,12 +216,6 @@ def reset(self):
         self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
         self.parser.setContentHandler(self.pulldom)
 
-    def __getitem__(self, pos):
-        rc = self.getEvent()
-        if rc:
-            return rc
-        raise IndexError
-
     def __next__(self):
         rc = self.getEvent()
         if rc:

diff --git a/addon/globalPlugins/readFeeds/xml/dom/xmlbuilder.py b/addon/globalPlugins/readFeeds/xml/dom/xmlbuilder.py
@@ -1,7 +1,6 @@
 """Implementation of the DOM Level 3 'LS-Load' feature."""
 
 import copy
-import warnings
 import xml.dom
 
 from xml.dom.NodeFilter import NodeFilter

diff --git a/addon/globalPlugins/readFeeds/xml/etree/ElementInclude.py b/addon/globalPlugins/readFeeds/xml/etree/ElementInclude.py
@@ -42,26 +42,36 @@
 # --------------------------------------------------------------------
 
 # Licensed to PSF under a Contributor Agreement.
-# See http://www.python.org/psf/license for licensing details.
+# See https://www.python.org/psf/license for licensing details.
 
 ##
 # Limited XInclude support for the ElementTree package.
 ##
 
 import copy
 from . import ElementTree
+from urllib.parse import urljoin
 
 XINCLUDE = "{http://www.w3.org/2001/XInclude}"
 
 XINCLUDE_INCLUDE = XINCLUDE + "include"
 XINCLUDE_FALLBACK = XINCLUDE + "fallback"
 
+# For security reasons, the inclusion depth is limited to this read-only value by default.
+DEFAULT_MAX_INCLUSION_DEPTH = 6
+
+
 ##
 # Fatal include error.
 
 class FatalIncludeError(SyntaxError):
     pass
 
+
+class LimitedRecursiveIncludeError(FatalIncludeError):
+    pass
+
+
 ##
 # Default loader.  This loader reads an included resource from disk.
 #
@@ -92,28 +102,58 @@ def default_loader(href, parse, encoding=None):
 # @param loader Optional resource loader.  If omitted, it defaults
 #     to {@link default_loader}.  If given, it should be a callable
 #     that implements the same interface as <b>default_loader</b>.
+# @param base_url The base URL of the original file, to resolve
+#     relative include file references.
+# @param max_depth The maximum number of recursive inclusions.
+#     Limited to reduce the risk of malicious content explosion.
+#     Pass a negative value to disable the limitation.
+# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
 # @throws FatalIncludeError If the function fails to include a given
 #     resource, or if the tree contains malformed XInclude elements.
-# @throws OSError If the function fails to load a given resource.
+# @throws IOError If the function fails to load a given resource.
+# @returns the node or its replacement if it was an XInclude node
 
-def include(elem, loader=None):
+def include(elem, loader=None, base_url=None,
+            max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
+    if max_depth is None:
+        max_depth = -1
+    elif max_depth < 0:
+        raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
+
+    if hasattr(elem, 'getroot'):
+        elem = elem.getroot()
     if loader is None:
         loader = default_loader
+
+    _include(elem, loader, base_url, max_depth, set())
+
+
+def _include(elem, loader, base_url, max_depth, _parent_hrefs):
     # look for xinclude elements
     i = 0
     while i < len(elem):
         e = elem[i]
         if e.tag == XINCLUDE_INCLUDE:
             # process xinclude directive
             href = e.get("href")
+            if base_url:
+                href = urljoin(base_url, href)
             parse = e.get("parse", "xml")
             if parse == "xml":
+                if href in _parent_hrefs:
+                    raise FatalIncludeError("recursive include of %s" % href)
+                if max_depth == 0:
+                    raise LimitedRecursiveIncludeError(
+                        "maximum xinclude depth reached when including file %s" % href)
+                _parent_hrefs.add(href)
                 node = loader(href, parse)
                 if node is None:
                     raise FatalIncludeError(
                         "cannot load %r as %r" % (href, parse)
                         )
-                node = copy.copy(node)
+                node = copy.copy(node)  # FIXME: this makes little sense with recursive includes
+                _include(node, loader, href, max_depth - 1, _parent_hrefs)
+                _parent_hrefs.remove(href)
                 if e.tail:
                     node.tail = (node.tail or "") + e.tail
                 elem[i] = node
@@ -123,11 +163,13 @@ def include(elem, loader=None):
                     raise FatalIncludeError(
                         "cannot load %r as %r" % (href, parse)
                         )
+                if e.tail:
+                    text += e.tail
                 if i:
                     node = elem[i-1]
-                    node.tail = (node.tail or "") + text + (e.tail or "")
+                    node.tail = (node.tail or "") + text
                 else:
-                    elem.text = (elem.text or "") + text + (e.tail or "")
+                    elem.text = (elem.text or "") + text
                 del elem[i]
                 continue
             else:
@@ -139,5 +181,5 @@ def include(elem, loader=None):
                 "xi:fallback tag must be child of xi:include (%r)" % e.tag
                 )
         else:
-            include(e, loader)
-        i = i + 1
+            _include(e, loader, base_url, max_depth, _parent_hrefs)
+        i += 1