From 136589d127bc2fb488c540d2a0abf1e9936120e9 Mon Sep 17 00:00:00 2001 From: Will Kahn-Greene Date: Mon, 28 Oct 2024 13:34:17 -0400 Subject: [PATCH] Remove six dependency from html5lib (#618) This way lies madness, but at least we don't have a six dependency anymore. The way this work is that we vendored html5lib 1.1, but then this applies a 01_html5lib_six.patch to that which changes imports from six to import from bleach.six_shim. This updates the vendor management code and vendorverify to install html5lib 1.1 and then apply the patch and then compare with what's in the tree. If we end up applying further patches in the future, we can use this model to do that. --- bleach/_vendor/01_html5lib_six.patch | 167 ++++++++++++++++++ bleach/_vendor/html5lib/_inputstream.py | 4 +- bleach/_vendor/html5lib/_tokenizer.py | 2 +- bleach/_vendor/html5lib/_trie/py.py | 2 +- bleach/_vendor/html5lib/_utils.py | 2 +- bleach/_vendor/html5lib/filters/lint.py | 2 +- bleach/_vendor/html5lib/filters/sanitizer.py | 2 +- bleach/_vendor/html5lib/html5parser.py | 4 +- bleach/_vendor/html5lib/serializer.py | 2 +- bleach/_vendor/html5lib/treebuilders/base.py | 2 +- bleach/_vendor/html5lib/treebuilders/etree.py | 2 +- .../html5lib/treebuilders/etree_lxml.py | 2 +- bleach/_vendor/html5lib/treewalkers/etree.py | 2 +- .../html5lib/treewalkers/etree_lxml.py | 2 +- bleach/_vendor/vendor_install.sh | 4 + scripts/vendor_verify.sh | 12 +- 16 files changed, 195 insertions(+), 18 deletions(-) create mode 100644 bleach/_vendor/01_html5lib_six.patch diff --git a/bleach/_vendor/01_html5lib_six.patch b/bleach/_vendor/01_html5lib_six.patch new file mode 100644 index 00000000..40df7611 --- /dev/null +++ b/bleach/_vendor/01_html5lib_six.patch @@ -0,0 +1,167 @@ +diff --git bleach/_vendor/html5lib/_inputstream.py bleach/_vendor/html5lib/_inputstream.py +index 0207dd2..0976251 100644 +--- bleach/_vendor/html5lib/_inputstream.py ++++ bleach/_vendor/html5lib/_inputstream.py +@@ -1,7 +1,7 @@ + from __future__ import absolute_import, division, unicode_literals + +-from six import text_type +-from six.moves import http_client, urllib ++from bleach.six_shim import text_type ++from bleach.six_shim import http_client, urllib + + import codecs + import re +diff --git bleach/_vendor/html5lib/_tokenizer.py bleach/_vendor/html5lib/_tokenizer.py +index 4748a19..d884801 100644 +--- bleach/_vendor/html5lib/_tokenizer.py ++++ bleach/_vendor/html5lib/_tokenizer.py +@@ -1,6 +1,6 @@ + from __future__ import absolute_import, division, unicode_literals + +-from six import unichr as chr ++from bleach.six_shim import unichr as chr + + from collections import deque, OrderedDict + from sys import version_info +diff --git bleach/_vendor/html5lib/_trie/py.py bleach/_vendor/html5lib/_trie/py.py +index c2ba3da..56f66bd 100644 +--- bleach/_vendor/html5lib/_trie/py.py ++++ bleach/_vendor/html5lib/_trie/py.py +@@ -1,5 +1,5 @@ + from __future__ import absolute_import, division, unicode_literals +-from six import text_type ++from bleach.six_shim import text_type + + from bisect import bisect_left + +diff --git bleach/_vendor/html5lib/_utils.py bleach/_vendor/html5lib/_utils.py +index 9ea5794..635bb02 100644 +--- bleach/_vendor/html5lib/_utils.py ++++ bleach/_vendor/html5lib/_utils.py +@@ -7,7 +7,7 @@ try: + except ImportError: + from collections import Mapping + +-from six import text_type, PY3 ++from bleach.six_shim import text_type, PY3 + + if PY3: + import xml.etree.ElementTree as default_etree +diff --git bleach/_vendor/html5lib/filters/lint.py bleach/_vendor/html5lib/filters/lint.py +index acd4d7a..1340d97 100644 +--- bleach/_vendor/html5lib/filters/lint.py ++++ bleach/_vendor/html5lib/filters/lint.py +@@ -1,6 +1,6 @@ + from __future__ import absolute_import, division, unicode_literals + +-from six import text_type ++from bleach.six_shim import text_type + + from . import base + from ..constants import namespaces, voidElements +diff --git bleach/_vendor/html5lib/filters/sanitizer.py bleach/_vendor/html5lib/filters/sanitizer.py +index 70ef906..5c31e97 100644 +--- bleach/_vendor/html5lib/filters/sanitizer.py ++++ bleach/_vendor/html5lib/filters/sanitizer.py +@@ -12,7 +12,7 @@ import re + import warnings + from xml.sax.saxutils import escape, unescape + +-from six.moves import urllib_parse as urlparse ++from bleach.six_shim import urllib_parse as urlparse + + from . import base + from ..constants import namespaces, prefixes +diff --git bleach/_vendor/html5lib/html5parser.py bleach/_vendor/html5lib/html5parser.py +index 74d829d..5427b7d 100644 +--- bleach/_vendor/html5lib/html5parser.py ++++ bleach/_vendor/html5lib/html5parser.py +@@ -1,5 +1,5 @@ + from __future__ import absolute_import, division, unicode_literals +-from six import with_metaclass, viewkeys ++from bleach.six_shim import viewkeys + + import types + +@@ -423,7 +423,7 @@ def getPhases(debug): + return type + + # pylint:disable=unused-argument +- class Phase(with_metaclass(getMetaclass(debug, log))): ++ class Phase(metaclass=getMetaclass(debug, log)): + """Base class for helper object that implements each phase of processing + """ + __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") +diff --git bleach/_vendor/html5lib/serializer.py bleach/_vendor/html5lib/serializer.py +index c66df68..5666f49 100644 +--- bleach/_vendor/html5lib/serializer.py ++++ bleach/_vendor/html5lib/serializer.py +@@ -1,5 +1,5 @@ + from __future__ import absolute_import, division, unicode_literals +-from six import text_type ++from bleach.six_shim import text_type + + import re + +diff --git bleach/_vendor/html5lib/treebuilders/base.py bleach/_vendor/html5lib/treebuilders/base.py +index e4a3d71..2869da0 100644 +--- bleach/_vendor/html5lib/treebuilders/base.py ++++ bleach/_vendor/html5lib/treebuilders/base.py +@@ -1,5 +1,5 @@ + from __future__ import absolute_import, division, unicode_literals +-from six import text_type ++from bleach.six_shim import text_type + + from ..constants import scopingElements, tableInsertModeElements, namespaces + +diff --git bleach/_vendor/html5lib/treebuilders/etree.py bleach/_vendor/html5lib/treebuilders/etree.py +index 086bed4..5ccfc4d 100644 +--- bleach/_vendor/html5lib/treebuilders/etree.py ++++ bleach/_vendor/html5lib/treebuilders/etree.py +@@ -1,7 +1,7 @@ + from __future__ import absolute_import, division, unicode_literals + # pylint:disable=protected-access + +-from six import text_type ++from bleach.six_shim import text_type + + import re + +diff --git bleach/_vendor/html5lib/treebuilders/etree_lxml.py bleach/_vendor/html5lib/treebuilders/etree_lxml.py +index e73de61..f462232 100644 +--- bleach/_vendor/html5lib/treebuilders/etree_lxml.py ++++ bleach/_vendor/html5lib/treebuilders/etree_lxml.py +@@ -28,7 +28,7 @@ from . import etree as etree_builders + from .. import _ihatexml + + import lxml.etree as etree +-from six import PY3, binary_type ++from bleach.six_shim import PY3, binary_type + + + fullTree = True +diff --git bleach/_vendor/html5lib/treewalkers/etree.py bleach/_vendor/html5lib/treewalkers/etree.py +index 4465337..a9d9450 100644 +--- bleach/_vendor/html5lib/treewalkers/etree.py ++++ bleach/_vendor/html5lib/treewalkers/etree.py +@@ -3,7 +3,7 @@ from __future__ import absolute_import, division, unicode_literals + from collections import OrderedDict + import re + +-from six import string_types ++from bleach.six_shim import string_types + + from . import base + from .._utils import moduleFactoryFactory +diff --git bleach/_vendor/html5lib/treewalkers/etree_lxml.py bleach/_vendor/html5lib/treewalkers/etree_lxml.py +index a614ac5..ef42163 100644 +--- bleach/_vendor/html5lib/treewalkers/etree_lxml.py ++++ bleach/_vendor/html5lib/treewalkers/etree_lxml.py +@@ -1,5 +1,5 @@ + from __future__ import absolute_import, division, unicode_literals +-from six import text_type ++from bleach.six_shim import text_type + + from collections import OrderedDict + diff --git a/bleach/_vendor/html5lib/_inputstream.py b/bleach/_vendor/html5lib/_inputstream.py index 0207dd21..09762517 100644 --- a/bleach/_vendor/html5lib/_inputstream.py +++ b/bleach/_vendor/html5lib/_inputstream.py @@ -1,7 +1,7 @@ from __future__ import absolute_import, division, unicode_literals -from six import text_type -from six.moves import http_client, urllib +from bleach.six_shim import text_type +from bleach.six_shim import http_client, urllib import codecs import re diff --git a/bleach/_vendor/html5lib/_tokenizer.py b/bleach/_vendor/html5lib/_tokenizer.py index 4748a197..d8848016 100644 --- a/bleach/_vendor/html5lib/_tokenizer.py +++ b/bleach/_vendor/html5lib/_tokenizer.py @@ -1,6 +1,6 @@ from __future__ import absolute_import, division, unicode_literals -from six import unichr as chr +from bleach.six_shim import unichr as chr from collections import deque, OrderedDict from sys import version_info diff --git a/bleach/_vendor/html5lib/_trie/py.py b/bleach/_vendor/html5lib/_trie/py.py index c2ba3da7..56f66bd5 100644 --- a/bleach/_vendor/html5lib/_trie/py.py +++ b/bleach/_vendor/html5lib/_trie/py.py @@ -1,5 +1,5 @@ from __future__ import absolute_import, division, unicode_literals -from six import text_type +from bleach.six_shim import text_type from bisect import bisect_left diff --git a/bleach/_vendor/html5lib/_utils.py b/bleach/_vendor/html5lib/_utils.py index 9ea57942..635bb024 100644 --- a/bleach/_vendor/html5lib/_utils.py +++ b/bleach/_vendor/html5lib/_utils.py @@ -7,7 +7,7 @@ except ImportError: from collections import Mapping -from six import text_type, PY3 +from bleach.six_shim import text_type, PY3 if PY3: import xml.etree.ElementTree as default_etree diff --git a/bleach/_vendor/html5lib/filters/lint.py b/bleach/_vendor/html5lib/filters/lint.py index acd4d7a2..1340d972 100644 --- a/bleach/_vendor/html5lib/filters/lint.py +++ b/bleach/_vendor/html5lib/filters/lint.py @@ -1,6 +1,6 @@ from __future__ import absolute_import, division, unicode_literals -from six import text_type +from bleach.six_shim import text_type from . import base from ..constants import namespaces, voidElements diff --git a/bleach/_vendor/html5lib/filters/sanitizer.py b/bleach/_vendor/html5lib/filters/sanitizer.py index 70ef9066..5c31e974 100644 --- a/bleach/_vendor/html5lib/filters/sanitizer.py +++ b/bleach/_vendor/html5lib/filters/sanitizer.py @@ -12,7 +12,7 @@ import warnings from xml.sax.saxutils import escape, unescape -from six.moves import urllib_parse as urlparse +from bleach.six_shim import urllib_parse as urlparse from . import base from ..constants import namespaces, prefixes diff --git a/bleach/_vendor/html5lib/html5parser.py b/bleach/_vendor/html5lib/html5parser.py index 74d829d9..5427b7dd 100644 --- a/bleach/_vendor/html5lib/html5parser.py +++ b/bleach/_vendor/html5lib/html5parser.py @@ -1,5 +1,5 @@ from __future__ import absolute_import, division, unicode_literals -from six import with_metaclass, viewkeys +from bleach.six_shim import viewkeys import types @@ -423,7 +423,7 @@ def getMetaclass(use_metaclass, metaclass_func): return type # pylint:disable=unused-argument - class Phase(with_metaclass(getMetaclass(debug, log))): + class Phase(metaclass=getMetaclass(debug, log)): """Base class for helper object that implements each phase of processing """ __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") diff --git a/bleach/_vendor/html5lib/serializer.py b/bleach/_vendor/html5lib/serializer.py index c66df683..5666f49a 100644 --- a/bleach/_vendor/html5lib/serializer.py +++ b/bleach/_vendor/html5lib/serializer.py @@ -1,5 +1,5 @@ from __future__ import absolute_import, division, unicode_literals -from six import text_type +from bleach.six_shim import text_type import re diff --git a/bleach/_vendor/html5lib/treebuilders/base.py b/bleach/_vendor/html5lib/treebuilders/base.py index e4a3d710..2869da00 100644 --- a/bleach/_vendor/html5lib/treebuilders/base.py +++ b/bleach/_vendor/html5lib/treebuilders/base.py @@ -1,5 +1,5 @@ from __future__ import absolute_import, division, unicode_literals -from six import text_type +from bleach.six_shim import text_type from ..constants import scopingElements, tableInsertModeElements, namespaces diff --git a/bleach/_vendor/html5lib/treebuilders/etree.py b/bleach/_vendor/html5lib/treebuilders/etree.py index 086bed4e..5ccfc4d6 100644 --- a/bleach/_vendor/html5lib/treebuilders/etree.py +++ b/bleach/_vendor/html5lib/treebuilders/etree.py @@ -1,7 +1,7 @@ from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access -from six import text_type +from bleach.six_shim import text_type import re diff --git a/bleach/_vendor/html5lib/treebuilders/etree_lxml.py b/bleach/_vendor/html5lib/treebuilders/etree_lxml.py index e73de61a..f4622322 100644 --- a/bleach/_vendor/html5lib/treebuilders/etree_lxml.py +++ b/bleach/_vendor/html5lib/treebuilders/etree_lxml.py @@ -28,7 +28,7 @@ from .. import _ihatexml import lxml.etree as etree -from six import PY3, binary_type +from bleach.six_shim import PY3, binary_type fullTree = True diff --git a/bleach/_vendor/html5lib/treewalkers/etree.py b/bleach/_vendor/html5lib/treewalkers/etree.py index 44653372..a9d9450c 100644 --- a/bleach/_vendor/html5lib/treewalkers/etree.py +++ b/bleach/_vendor/html5lib/treewalkers/etree.py @@ -3,7 +3,7 @@ from collections import OrderedDict import re -from six import string_types +from bleach.six_shim import string_types from . import base from .._utils import moduleFactoryFactory diff --git a/bleach/_vendor/html5lib/treewalkers/etree_lxml.py b/bleach/_vendor/html5lib/treewalkers/etree_lxml.py index a614ac5b..ef42163b 100644 --- a/bleach/_vendor/html5lib/treewalkers/etree_lxml.py +++ b/bleach/_vendor/html5lib/treewalkers/etree_lxml.py @@ -1,5 +1,5 @@ from __future__ import absolute_import, division, unicode_literals -from six import text_type +from bleach.six_shim import text_type from collections import OrderedDict diff --git a/bleach/_vendor/vendor_install.sh b/bleach/_vendor/vendor_install.sh index 6e61c348..6c896ee4 100755 --- a/bleach/_vendor/vendor_install.sh +++ b/bleach/_vendor/vendor_install.sh @@ -7,8 +7,12 @@ set -o pipefail BLEACH_VENDOR_DIR=${BLEACH_VENDOR_DIR:-"."} DEST=${DEST:-"."} +# Install with no dependencies pip install --no-binary all --no-compile --no-deps -r "${BLEACH_VENDOR_DIR}/vendor.txt" --target "${DEST}" +# Apply patches +(cd "${DEST}" && patch -p2 < 01_html5lib_six.patch) + # install Python 3.6.14 urllib.urlparse for #536 curl --proto '=https' --tlsv1.2 -o "${DEST}/parse.py" https://raw.githubusercontent.com/python/cpython/v3.6.14/Lib/urllib/parse.py (cd "${DEST}" && sha256sum parse.py > parse.py.SHA256SUM) diff --git a/scripts/vendor_verify.sh b/scripts/vendor_verify.sh index 6a0fe317..679bae48 100755 --- a/scripts/vendor_verify.sh +++ b/scripts/vendor_verify.sh @@ -5,7 +5,8 @@ set -e # Install vendored packages into /tmp and then compare with what's in # bleach/_vendor/. -DEST=/tmp/vendor-test +export DEST=/tmp/vendor-test +export BLEACH_VENDOR_DIR=bleach/_vendor if [[ -e "${DEST}" ]]; then echo "${DEST} exists. Please remove." @@ -14,11 +15,16 @@ fi mkdir "${DEST}" -# Get versions of pip and python +# Get diagnostic information pip --version +echo "DEST: ${DEST}" +echo "BLEACH_VENDOR_DIR: ${BLEACH_VENDOR_DIR}" + +# Copy patch files to dest directory +cp bleach/_vendor/*.patch "${DEST}" # Install vendored dependencies into temp directory -BLEACH_VENDOR_DIR=bleach/_vendor DEST="${DEST}" bleach/_vendor/vendor_install.sh +bleach/_vendor/vendor_install.sh # Diff contents of temp directory and bleach/_vendor/ excluding vendoring # infrastructure