From fe7c7132c0c7ef06cd461d3389e614b7c63dfa29 Mon Sep 17 00:00:00 2001 From: Nikos Koukis Date: Wed, 24 Jan 2024 15:52:43 +0200 Subject: [PATCH 1/7] [MISC] Fix compilation on python3.10, correct workflow file for python3.10 --- .github/workflows/pytest.yml | 2 +- uri/part/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 6f24b8c..105fa5e 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [3.8, 3.9, 3.10, 3.11] + python-version: [3.8, 3.9, "3.10", 3.11] steps: - uses: actions/checkout@v2 diff --git a/uri/part/base.py b/uri/part/base.py index 882e075..80f2f6f 100644 --- a/uri/part/base.py +++ b/uri/part/base.py @@ -51,7 +51,7 @@ def __set__(self, obj, value:Optional[Stringy]) -> None: class GroupPart(Part): __slots__: Tuple[str, ...] = () - attributes: Iterable[str, ...] = () + attributes: Iterable[str] = () sep: str = '' def __get__(self, obj, cls:Optional[type]=None) -> Union[str, 'GroupPart']: From eead56c1d353c7a3d4ec7d24e615048d4569f316 Mon Sep 17 00:00:00 2001 From: Nikos Koukis Date: Wed, 24 Jan 2024 15:59:31 +0200 Subject: [PATCH 2/7] [MISC] Fix .pre-comit-config.yaml --- .pre-commit-config.yaml | 49 +++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e397c2c..c477e5a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,31 +1,32 @@ -- repo: https://github.com/pre-commit/pre-commit-hooks.git - sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0 +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks.git + rev: "v4.5.0" hooks: - - id: check-added-large-files - - id: check-ast - - id: check-byte-order-marker - - id: check-docstring-first - - id: check-merge-conflict - - id: check-symlinks - - id: debug-statements - - id: detect-private-key - - id: end-of-file-fixer - - id: forbid-new-submodules - - id: check-json - - id: check-xml - - id: check-yaml -- repo: https://github.com/Lucas-C/pre-commit-hooks-safety - sha: v1.1.0 + - id: check-added-large-files + - id: check-ast + - id: check-byte-order-marker + - id: check-docstring-first + - id: check-merge-conflict + - id: check-symlinks + - id: debug-statements + - id: detect-private-key + - id: end-of-file-fixer + - id: forbid-new-submodules + - id: check-json + - id: check-xml + - id: check-yaml + - repo: https://github.com/Lucas-C/pre-commit-hooks-safety + rev: v1.1.0 hooks: - - id: python-safety-dependencies-check -- repo: https://github.com/Lucas-C/pre-commit-hooks-bandit - sha: v1.0.3 + - id: python-safety-dependencies-check + - repo: https://github.com/Lucas-C/pre-commit-hooks-bandit + rev: v1.0.3 hooks: - - id: python-bandit-vulnerability-check -- repo: local + - id: python-bandit-vulnerability-check + - repo: local hooks: - - id: py.test + - id: py.test name: py.test language: system entry: sh -c 'TEST_SKIP_CAPPED=1 py.test' - files: '' + files: "" From 134d3c3a709c4a188978d306c49261478ce2227d Mon Sep 17 00:00:00 2001 From: Nikos Koukis Date: Wed, 24 Jan 2024 16:36:45 +0200 Subject: [PATCH 3/7] [MISC] Run black on codebase --- .gitignore | 2 + .pre-commit-config.yaml | 8 + Makefile | 1 - pyproject.toml | 1 - test/test_003_path_like_division.py | 46 +- test/test_bucket.py | 111 +++-- test/test_parser_dburi.py | 87 ++-- test/test_qso.py | 599 ++++++++++++------------ test/test_rfc3986_5_4_1.py | 140 +++--- test/test_uri.py | 703 +++++++++++++++------------- test/test_url_normalize.py | 184 ++++---- test/test_whatwg.py | 674 +++++++++++++------------- uri/__init__.py | 15 +- uri/bucket.py | 115 +++-- uri/parse/db.py | 60 +-- uri/part/auth.py | 16 +- uri/part/authority.py | 6 +- uri/part/base.py | 117 ++--- uri/part/fragment.py | 8 +- uri/part/heir.py | 6 +- uri/part/host.py | 80 ++-- uri/part/password.py | 8 +- uri/part/path.py | 74 +-- uri/part/port.py | 10 +- uri/part/query.py | 40 +- uri/part/scheme.py | 113 ++--- uri/part/uri.py | 72 +-- uri/part/user.py | 6 +- uri/qso.py | 565 +++++++++++----------- uri/scheme.py | 76 +-- uri/typing.py | 52 +- uri/uri.py | 587 ++++++++++++----------- 32 files changed, 2346 insertions(+), 2236 deletions(-) diff --git a/.gitignore b/.gitignore index 00636d5..313964a 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,5 @@ htmlcov .mypy_cache .ropeproject tags + +.coverage.* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c477e5a..652c4af 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,3 +30,11 @@ repos: language: system entry: sh -c 'TEST_SKIP_CAPPED=1 py.test' files: "" + - id: black + name: Black formatter + description: "Black: The uncompromising Python code formatter" + entry: black + language: system + minimum_pre_commit_version: 2.9.2 + require_serial: true + types_or: [python, pyi] diff --git a/Makefile b/Makefile index bb21d98..4bab38d 100644 --- a/Makefile +++ b/Makefile @@ -37,4 +37,3 @@ release: ${PROJECT}.egg-info/PKG-INFO: pyproject.toml @mkdir -p ${VIRTUAL_ENV}/lib/pip-cache pip install --cache-dir "${VIRTUAL_ENV}/lib/pip-cache" -Ue ".[${USE}]" - diff --git a/pyproject.toml b/pyproject.toml index d59a6d6..27461e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -144,4 +144,3 @@ warn_no_return = false # required to support namespace packages # https://github.com/python/mypy/issues/14057 explicit_package_bases = true - diff --git a/test/test_003_path_like_division.py b/test/test_003_path_like_division.py index 16ea14d..ece40c9 100644 --- a/test/test_003_path_like_division.py +++ b/test/test_003_path_like_division.py @@ -2,35 +2,35 @@ def test_issue_003_path_like_division_trailing(): - base = URI("http://example.com/foo/") - assert str(base) == "http://example.com/foo/" - assert str(base / "bar.html") == "http://example.com/foo/bar.html" - - base = URI("http://example.com/foo") - assert str(base) == "http://example.com/foo" - assert str(base / "bar.html") == "http://example.com/bar.html" + base = URI("http://example.com/foo/") + assert str(base) == "http://example.com/foo/" + assert str(base / "bar.html") == "http://example.com/foo/bar.html" + + base = URI("http://example.com/foo") + assert str(base) == "http://example.com/foo" + assert str(base / "bar.html") == "http://example.com/bar.html" def test_issue_003_path_like_division_operators(): - base = URI("http://example.com/foo/bar.html") - assert str(base / "baz.html") == 'http://example.com/foo/baz.html' - assert str(base // "cdn.example.com" / "baz.html") == 'http://cdn.example.com/baz.html' - assert str(base / "/diz") == 'http://example.com/diz' - assert str(base / "#diz") == 'http://example.com/foo/bar.html#diz' - assert str(base / "https://example.com") == 'https://example.com/' + base = URI("http://example.com/foo/bar.html") + assert str(base / "baz.html") == "http://example.com/foo/baz.html" + assert str(base // "cdn.example.com" / "baz.html") == "http://cdn.example.com/baz.html" + assert str(base / "/diz") == "http://example.com/diz" + assert str(base / "#diz") == "http://example.com/foo/bar.html#diz" + assert str(base / "https://example.com") == "https://example.com/" def test_issue_003_path_on_path_division(): - base = URI("http://ats.example.com/job/listing") - - # scrape the listing, identify a job URL from that listing - target = URI("detail/sample-job") # oh no, it's relative! - - # And it's resolved. - assert str(base / target) == "http://ats.example.com/job/detail/sample-job" + base = URI("http://ats.example.com/job/listing") + + # scrape the listing, identify a job URL from that listing + target = URI("detail/sample-job") # oh no, it's relative! + + # And it's resolved. + assert str(base / target) == "http://ats.example.com/job/detail/sample-job" def test_pathlike_construction(): - target = URI("http:") // "example.com" - assert str(target) == "http://example.com/" - assert str(target / "foo") == "http://example.com/foo" + target = URI("http:") // "example.com" + assert str(target) == "http://example.com/" + assert str(target / "foo") == "http://example.com/foo" diff --git a/test/test_bucket.py b/test/test_bucket.py index 593af07..e97947e 100644 --- a/test/test_bucket.py +++ b/test/test_bucket.py @@ -3,66 +3,63 @@ from uri.bucket import Bucket EXAMPLES = [ - # String, Arguments, Name, Value, Valid - - ( '', ('', ), None, '', True ), - ( 'foo', ('foo', ), None, 'foo', True ), - ( 'foo', (None, 'foo'), None, 'foo', True ), - ( 'foo=bar', ('foo', 'bar'), 'foo', 'bar', True ), - - ( '=foo=bar', ('=foo=bar', ), '', 'foo=bar', False ), - ( '=foo=bar', ('=foo', 'bar'), '=foo', 'bar', False ), - ( '=foo=bar', ('', 'foo=bar'), '', 'foo=bar', False ), - ( 'foo=bar=', ('foo', 'bar='), 'foo', 'bar=', False ), - ( 'foo==bar=', ('foo=', 'bar='), 'foo=', 'bar=', False ), - ( 'foo==bar=', ('foo==bar=', ), 'foo', '=bar=', False ), - ( '=foo=bar=', ('=foo=bar=', ), '', 'foo=bar=', False ), - - ] + # String, Arguments, Name, Value, Valid + ("", ("",), None, "", True), + ("foo", ("foo",), None, "foo", True), + ("foo", (None, "foo"), None, "foo", True), + ("foo=bar", ("foo", "bar"), "foo", "bar", True), + ("=foo=bar", ("=foo=bar",), "", "foo=bar", False), + ("=foo=bar", ("=foo", "bar"), "=foo", "bar", False), + ("=foo=bar", ("", "foo=bar"), "", "foo=bar", False), + ("foo=bar=", ("foo", "bar="), "foo", "bar=", False), + ("foo==bar=", ("foo=", "bar="), "foo=", "bar=", False), + ("foo==bar=", ("foo==bar=",), "foo", "=bar=", False), + ("=foo=bar=", ("=foo=bar=",), "", "foo=bar=", False), +] -@pytest.mark.parametrize('string,args,name,value,valid', EXAMPLES) +@pytest.mark.parametrize("string,args,name,value,valid", EXAMPLES) class TestBucketExamples: - def test_string_identity(self, string, args, name, value, valid): - bucket = Bucket(string) - assert str(bucket) == string - - def test_names(self, string, args, name, value, valid): - bucket = Bucket(*args) - assert bucket.name == name - - def test_values(self, string, args, name, value, valid): - bucket = Bucket(*args) - assert bucket.value == value - - def test_validity(self, string, args, name, value, valid): - bucket = Bucket(string) - assert bucket.valid == valid - - def test_identity_comparison(self, string, args, name, value, valid): - bucket = Bucket(string) - assert bucket == string - - def test_unequal_comparison(self, string, args, name, value, valid): - bucket = Bucket(*args) - assert not (bucket == "xxx") - - def test_not_equal_comparison(self, string, args, name, value, valid): - bucket = Bucket(*args) - assert bucket != "xxx" - - def test_repr(self, string, args, name, value, valid): - bucket = Bucket(*args) - assert repr(bucket) == "Bucket(" + str(bucket) + ")" - - def test_length(self, string, args, name, value, valid): - bucket = Bucket(*args) - expected = 2 if '=' in string else 1 - assert len(bucket) == expected + def test_string_identity(self, string, args, name, value, valid): + bucket = Bucket(string) + assert str(bucket) == string + def test_names(self, string, args, name, value, valid): + bucket = Bucket(*args) + assert bucket.name == name -@pytest.mark.parametrize('string,args,name,value,valid', [i for i in EXAMPLES if not i[4]]) + def test_values(self, string, args, name, value, valid): + bucket = Bucket(*args) + assert bucket.value == value + + def test_validity(self, string, args, name, value, valid): + bucket = Bucket(string) + assert bucket.valid == valid + + def test_identity_comparison(self, string, args, name, value, valid): + bucket = Bucket(string) + assert bucket == string + + def test_unequal_comparison(self, string, args, name, value, valid): + bucket = Bucket(*args) + assert not (bucket == "xxx") + + def test_not_equal_comparison(self, string, args, name, value, valid): + bucket = Bucket(*args) + assert bucket != "xxx" + + def test_repr(self, string, args, name, value, valid): + bucket = Bucket(*args) + assert repr(bucket) == "Bucket(" + str(bucket) + ")" + + def test_length(self, string, args, name, value, valid): + bucket = Bucket(*args) + expected = 2 if "=" in string else 1 + assert len(bucket) == expected + + +@pytest.mark.parametrize("string,args,name,value,valid", [i for i in EXAMPLES if not i[4]]) class TestBucketExamplesInvalid: - def test_strict_string_failure(self, string, args, name, value, valid): - with pytest.raises(ValueError): - Bucket(string, strict=True) + def test_strict_string_failure(self, string, args, name, value, valid): + with pytest.raises(ValueError): + Bucket(string, strict=True) diff --git a/test/test_parser_dburi.py b/test/test_parser_dburi.py index 9e88c93..36fbbb5 100644 --- a/test/test_parser_dburi.py +++ b/test/test_parser_dburi.py @@ -5,53 +5,46 @@ from uri.qso import SENTINEL EXAMPLES = { - # Examples from: https://github.com/ferrix/dj-mongohq-url/blob/master/test_dj_mongohq_url.py - '': { - 'name': '', - 'host': None, - 'user': None, - 'password': None, - 'port': None - }, - 'mongodb://heroku:wegauwhgeuioweg@linus.mongohq.com:10031/app4523234': { - 'engine': 'mongodb', - 'name': 'app4523234', - 'host': 'linus.mongohq.com', - 'user': 'heroku', - 'password': 'wegauwhgeuioweg', - 'port': 10031 - }, - 'postgis://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com:5431/d8r82722r2kuvn': { - 'engine': 'postgis', - 'name': 'd8r82722r2kuvn', - 'host': 'ec2-107-21-253-135.compute-1.amazonaws.com', - 'user': 'uf07k1i6d8ia0v', - 'password': 'wegauwhgeuioweg', - 'port': 5431 - }, - - # '': { - # 'engine': '', - # 'name': '' - # 'host': '' - # 'user': '' - # 'password': '' - # 'port': - # }, - } + # Examples from: https://github.com/ferrix/dj-mongohq-url/blob/master/test_dj_mongohq_url.py + "": {"name": "", "host": None, "user": None, "password": None, "port": None}, + "mongodb://heroku:wegauwhgeuioweg@linus.mongohq.com:10031/app4523234": { + "engine": "mongodb", + "name": "app4523234", + "host": "linus.mongohq.com", + "user": "heroku", + "password": "wegauwhgeuioweg", + "port": 10031, + }, + "postgis://uf07k1i6d8ia0v:wegauwhgeuioweg@ec2-107-21-253-135.compute-1.amazonaws.com:5431/d8r82722r2kuvn": { + "engine": "postgis", + "name": "d8r82722r2kuvn", + "host": "ec2-107-21-253-135.compute-1.amazonaws.com", + "user": "uf07k1i6d8ia0v", + "password": "wegauwhgeuioweg", + "port": 5431, + }, + # '': { + # 'engine': '', + # 'name': '' + # 'host': '' + # 'user': '' + # 'password': '' + # 'port': + # }, +} -@pytest.mark.parametrize('string,attributes', EXAMPLES.items()) +@pytest.mark.parametrize("string,attributes", EXAMPLES.items()) class TestDBURIParsing: - @pytest.mark.parametrize('component', URI.__all_parts__ | {'base', 'qs', 'summary', 'relative'}) - def test_component(self, string, attributes, component): - return - - instance = URI(string) - value = getattr(instance, component, SENTINEL) - - if component not in attributes: - assert value in (None, SENTINEL, '') - return - - assert value == attributes[component] + @pytest.mark.parametrize("component", URI.__all_parts__ | {"base", "qs", "summary", "relative"}) + def test_component(self, string, attributes, component): + return + + instance = URI(string) + value = getattr(instance, component, SENTINEL) + + if component not in attributes: + assert value in (None, SENTINEL, "") + return + + assert value == attributes[component] diff --git a/test/test_qso.py b/test/test_qso.py index 64dd465..85110dd 100644 --- a/test/test_qso.py +++ b/test/test_qso.py @@ -3,324 +3,327 @@ from uri.qso import QSO, SENTINEL, Bucket EXAMPLES = [ - # Abstract - ('', (), {}), - ('foo=bar', (), {'foo': 'bar'}), - - # Multiple Arguments - ('foo&bar&baz&diz', ('foo', 'bar', 'baz', 'diz'), {}), - - # From Wikipedia - https://en.wikipedia.org/wiki/Query_string - ('name=ferret', (), {'name': 'ferret'}), - ('name=ferret&color=purple', (), {'name': 'ferret', 'color': 'purple'}), - ('field1=value1&field2=value2&field3=value3', (), {'field1': 'value1', 'field2': 'value2', 'field3': 'value3'}), - ('argument1+argument2+argument3', ('argument1 argument2 argument3', ), {}), - - # RFC 3986 (URI) - http://pretty-rfc.herokuapp.com/RFC3986 - ('objectClass?one', ('objectClass?one', ), {}), - ('objectClass/one', ('objectClass/one', ), {}), - - #('', (), {}), - ] + # Abstract + ("", (), {}), + ("foo=bar", (), {"foo": "bar"}), + # Multiple Arguments + ("foo&bar&baz&diz", ("foo", "bar", "baz", "diz"), {}), + # From Wikipedia - https://en.wikipedia.org/wiki/Query_string + ("name=ferret", (), {"name": "ferret"}), + ("name=ferret&color=purple", (), {"name": "ferret", "color": "purple"}), + ("field1=value1&field2=value2&field3=value3", (), {"field1": "value1", "field2": "value2", "field3": "value3"}), + ("argument1+argument2+argument3", ("argument1 argument2 argument3",), {}), + # RFC 3986 (URI) - http://pretty-rfc.herokuapp.com/RFC3986 + ("objectClass?one", ("objectClass?one",), {}), + ("objectClass/one", ("objectClass/one",), {}), + # ('', (), {}), +] MULTI_VALUE_EXAMPLES = [ - ('key=value1&key=value2&key=value3', {'key': ['value1', 'value2', 'value3']}), - ('key=value1&foo=bar&key=value2', {'key': ['value1', 'value2']}), - ('key=value1&foo&key=value2&bar&key=value3', {'key': ['value1', 'value2', 'value3'], None: ['foo', 'bar']}), - ('foo&key=value1&foo=bar&key=value2&foo=baz&diz', {'key': ['value1', 'value2'], None: ['foo', 'diz'], 'foo': ['bar', 'baz']}), - ('foo=bar&key=value1&diz=foo&key=value2&foo=baz', {'foo': ['bar', 'baz'], 'key': ['value1', 'value2'], 'diz': 'foo'}), - #('', {}), - ] + ("key=value1&key=value2&key=value3", {"key": ["value1", "value2", "value3"]}), + ("key=value1&foo=bar&key=value2", {"key": ["value1", "value2"]}), + ("key=value1&foo&key=value2&bar&key=value3", {"key": ["value1", "value2", "value3"], None: ["foo", "bar"]}), + ( + "foo&key=value1&foo=bar&key=value2&foo=baz&diz", + {"key": ["value1", "value2"], None: ["foo", "diz"], "foo": ["bar", "baz"]}, + ), + ( + "foo=bar&key=value1&diz=foo&key=value2&foo=baz", + {"foo": ["bar", "baz"], "key": ["value1", "value2"], "diz": "foo"}, + ), + # ('', {}), +] ASSIGNMENT_EXAMPLES = [ - ('key=value1&key=value2', 0, 'value3', 'key=value3&key=value2'), - ('key=value1&key=value2', 1, 'value3', 'key=value1&key=value3'), - ('key=value1&key=value2', 0, ('foo', 'value'), 'foo=value&key=value2'), - ('key=value1&key=value2', 1, ('foo', 'value'), 'key=value1&foo=value'), - ('key=value1&key=value2', 'key', ('foo', 'value'), 'foo=value'), - ('bar=baz&key=value1&key=value2', 'key', ('foo', 'value'), 'bar=baz&foo=value'), - ('key=value1&bar=baz&key=value2', 'key', ('foo', 'value'), 'bar=baz&foo=value'), - ('key=value1&key=value2&bar=baz', 'key', ('foo', 'value'), 'bar=baz&foo=value'), - ('bar=baz&key=value1&key=value2', 1, ('foo', 'value'), 'bar=baz&foo=value&key=value2'), - ('key=value1&bar=baz&key=value2', 0, ('foo', 'value'), 'foo=value&bar=baz&key=value2'), - ('key=value1&bar=baz&key=value2', 2, ('foo', 'value'), 'key=value1&bar=baz&foo=value'), - ('key=value1&key=value2&bar=baz', 0, ('foo', 'value'), 'foo=value&key=value2&bar=baz'), - ('key=value1&key=value2&bar=baz', 1, ('foo', 'value'), 'key=value1&foo=value&bar=baz'), - ('key=value1&key=value2&bar=baz', 2, ('foo', 'value'), 'key=value1&key=value2&foo=value'), - #('', , '', ''), - ] + ("key=value1&key=value2", 0, "value3", "key=value3&key=value2"), + ("key=value1&key=value2", 1, "value3", "key=value1&key=value3"), + ("key=value1&key=value2", 0, ("foo", "value"), "foo=value&key=value2"), + ("key=value1&key=value2", 1, ("foo", "value"), "key=value1&foo=value"), + ("key=value1&key=value2", "key", ("foo", "value"), "foo=value"), + ("bar=baz&key=value1&key=value2", "key", ("foo", "value"), "bar=baz&foo=value"), + ("key=value1&bar=baz&key=value2", "key", ("foo", "value"), "bar=baz&foo=value"), + ("key=value1&key=value2&bar=baz", "key", ("foo", "value"), "bar=baz&foo=value"), + ("bar=baz&key=value1&key=value2", 1, ("foo", "value"), "bar=baz&foo=value&key=value2"), + ("key=value1&bar=baz&key=value2", 0, ("foo", "value"), "foo=value&bar=baz&key=value2"), + ("key=value1&bar=baz&key=value2", 2, ("foo", "value"), "key=value1&bar=baz&foo=value"), + ("key=value1&key=value2&bar=baz", 0, ("foo", "value"), "foo=value&key=value2&bar=baz"), + ("key=value1&key=value2&bar=baz", 1, ("foo", "value"), "key=value1&foo=value&bar=baz"), + ("key=value1&key=value2&bar=baz", 2, ("foo", "value"), "key=value1&key=value2&foo=value"), + # ('', , '', ''), +] DELETION_EXAMPLES = [ - ('key=value1&key=value2&bar=baz', 0, 'key=value2&bar=baz', ['key', 'value1']), - ('key=value1&key=value2&bar=baz', 1, 'key=value1&bar=baz', ['key', 'value2']), - ('key=value1&key=value2&bar=baz', 2, 'key=value1&key=value2', ['bar', 'baz']), - ('key=value1&key=value2&bar=baz', 'key', 'bar=baz', [Bucket('key', 'value1'), Bucket('key', 'value2')]), - ('key=value1&key=value2&bar=baz', 'bar', 'key=value1&key=value2', ['bar', 'baz']), - ('key=value1&bar=baz&key=value2', 'bar', 'key=value1&key=value2', ['bar', 'baz']), - #('', , ''), - ] + ("key=value1&key=value2&bar=baz", 0, "key=value2&bar=baz", ["key", "value1"]), + ("key=value1&key=value2&bar=baz", 1, "key=value1&bar=baz", ["key", "value2"]), + ("key=value1&key=value2&bar=baz", 2, "key=value1&key=value2", ["bar", "baz"]), + ("key=value1&key=value2&bar=baz", "key", "bar=baz", [Bucket("key", "value1"), Bucket("key", "value2")]), + ("key=value1&key=value2&bar=baz", "bar", "key=value1&key=value2", ["bar", "baz"]), + ("key=value1&bar=baz&key=value2", "bar", "key=value1&key=value2", ["bar", "baz"]), + # ('', , ''), +] POP_EXAMPLES = [ - ('key=value1&key=value2&bar=baz', 0, 'key=value2&bar=baz', Bucket('key', 'value1')), - ('key=value1&key=value2&bar=baz', 1, 'key=value1&bar=baz', Bucket('key', 'value2')), - ('key=value1&key=value2&bar=baz', 2, 'key=value1&key=value2', Bucket('bar', 'baz')), - ('key=value1&key=value2&bar=baz', 'key', 'key=value1&bar=baz', 'value2'), - ('key=value1&key=value2&bar=baz', 'bar', 'key=value1&key=value2', 'baz'), - ('key=value1&bar=baz&key=value2', 'bar', 'key=value1&key=value2', 'baz'), - ('key=value1&bar=baz&key=value2', SENTINEL, 'key=value1&bar=baz', Bucket('key', 'value2')), - #('', , ''), - ] + ("key=value1&key=value2&bar=baz", 0, "key=value2&bar=baz", Bucket("key", "value1")), + ("key=value1&key=value2&bar=baz", 1, "key=value1&bar=baz", Bucket("key", "value2")), + ("key=value1&key=value2&bar=baz", 2, "key=value1&key=value2", Bucket("bar", "baz")), + ("key=value1&key=value2&bar=baz", "key", "key=value1&bar=baz", "value2"), + ("key=value1&key=value2&bar=baz", "bar", "key=value1&key=value2", "baz"), + ("key=value1&bar=baz&key=value2", "bar", "key=value1&key=value2", "baz"), + ("key=value1&bar=baz&key=value2", SENTINEL, "key=value1&bar=baz", Bucket("key", "value2")), + # ('', , ''), +] UPDATE_EXAMPLES = [ - ('key=value1&key=value2&bar=baz', 'foo=bar', 'key=value1&key=value2&bar=baz&foo=bar'), - ('key=value1&key=value2&bar=baz', 'key=value3', 'bar=baz&key=value3'), - ('key=value1&key=value2&bar=baz', 'bar=diz', 'key=value1&key=value2&bar=diz'), - ('key=value1&key=value2&bar=baz', dict(foo='bar'), 'key=value1&key=value2&bar=baz&foo=bar'), - ('key=value1&key=value2&bar=baz', ('foo=bar', 'baz=diz'), 'key=value1&key=value2&bar=baz&foo=bar&baz=diz'), - ('key=value1&key=value2&bar=baz', Bucket('foo', 'bar'), 'key=value1&key=value2&bar=baz&foo=bar'), - ('key=value1&key=value2&bar=baz', QSO("foo=baz&bar=diz"), 'key=value1&key=value2&bar=diz&foo=baz'), - ] + ("key=value1&key=value2&bar=baz", "foo=bar", "key=value1&key=value2&bar=baz&foo=bar"), + ("key=value1&key=value2&bar=baz", "key=value3", "bar=baz&key=value3"), + ("key=value1&key=value2&bar=baz", "bar=diz", "key=value1&key=value2&bar=diz"), + ("key=value1&key=value2&bar=baz", dict(foo="bar"), "key=value1&key=value2&bar=baz&foo=bar"), + ("key=value1&key=value2&bar=baz", ("foo=bar", "baz=diz"), "key=value1&key=value2&bar=baz&foo=bar&baz=diz"), + ("key=value1&key=value2&bar=baz", Bucket("foo", "bar"), "key=value1&key=value2&bar=baz&foo=bar"), + ("key=value1&key=value2&bar=baz", QSO("foo=baz&bar=diz"), "key=value1&key=value2&bar=diz&foo=baz"), +] COMBINATION_EXAMPLES = [ - ('key=value1&key=value2&bar=baz', 'foo=bar', 'key=value1&key=value2&bar=baz&foo=bar'), - ('key=value1&key=value2&bar=baz', 'key=value3', 'key=value1&key=value2&bar=baz&key=value3'), - ('key=value1&key=value2&bar=baz', 'bar=diz', 'key=value1&key=value2&bar=baz&bar=diz'), - ('key=value1&key=value2&bar=baz', dict(foo='bar'), 'key=value1&key=value2&bar=baz&foo=bar'), - ('key=value1&key=value2&bar=baz', ('foo=bar', ), 'key=value1&key=value2&bar=baz&foo=bar'), - ('key=value1&key=value2&bar=baz', Bucket('foo', 'bar'), 'key=value1&key=value2&bar=baz&foo=bar'), - ('key=value1&key=value2&bar=baz', QSO("foo=baz&bar=diz"), 'key=value1&key=value2&bar=baz&foo=baz&bar=diz'), - ] + ("key=value1&key=value2&bar=baz", "foo=bar", "key=value1&key=value2&bar=baz&foo=bar"), + ("key=value1&key=value2&bar=baz", "key=value3", "key=value1&key=value2&bar=baz&key=value3"), + ("key=value1&key=value2&bar=baz", "bar=diz", "key=value1&key=value2&bar=baz&bar=diz"), + ("key=value1&key=value2&bar=baz", dict(foo="bar"), "key=value1&key=value2&bar=baz&foo=bar"), + ("key=value1&key=value2&bar=baz", ("foo=bar",), "key=value1&key=value2&bar=baz&foo=bar"), + ("key=value1&key=value2&bar=baz", Bucket("foo", "bar"), "key=value1&key=value2&bar=baz&foo=bar"), + ("key=value1&key=value2&bar=baz", QSO("foo=baz&bar=diz"), "key=value1&key=value2&bar=baz&foo=baz&bar=diz"), +] COMPARISON_EXAMPLES = [ - ('', ''), - ('key=value1&key=value2&bar=baz', QSO('key=value1&key=value2&bar=baz')), - ] + ("", ""), + ("key=value1&key=value2&bar=baz", QSO("key=value1&key=value2&bar=baz")), +] class TestQSO: - @pytest.mark.parametrize('string,values', MULTI_VALUE_EXAMPLES) - def test_multiple_values(self, string, values): - instance = QSO(string) - - for key in values: - if not isinstance(values[key], list): continue - result = list(instance[key]) - assert result == values[key] - - @pytest.mark.parametrize('src,key,value,expect', ASSIGNMENT_EXAMPLES) - def test_multiple_reassignment(self, src, key, value, expect): - instance = QSO(src) - instance[key] = value - assert str(instance) == expect - - def test_numeric_deletion(self): - instance = QSO('key=value1&key=value2&bar=baz') - assert len(instance) == 3 - assert len(instance.groups['key']) == 2 - del instance[0] - assert len(instance) == 2 - assert len(instance.groups['key']) == 1 - assert str(instance) == 'key=value2&bar=baz' - - @pytest.mark.parametrize('src,key,expect,value', DELETION_EXAMPLES) - def test_deletion_examples(self, src, key, expect, value): - instance = QSO(src) - del instance[key] - assert str(instance) == expect - - @pytest.mark.parametrize('src,change,expect', UPDATE_EXAMPLES) - def test_update(self, src, change, expect): - instance = QSO(src) - instance.update(change) - assert str(instance) == expect - - def test_update_keywords(self): - instance = QSO("key=value1&key=value2&bar=baz") - instance.update(bar="diz") - assert str(instance) == "key=value1&key=value2&bar=diz" - instance.update(diz="doz") - assert str(instance) == "key=value1&key=value2&bar=diz&diz=doz" - instance.update(key="value3") - assert str(instance) == "bar=diz&diz=doz&key=value3" - - @pytest.mark.parametrize('src,change,expect', COMBINATION_EXAMPLES) - def test_inline_add(self, src, change, expect): - instance = QSO(src) - instance += change - assert str(instance) == expect - - def test_index(self): - instance = QSO("foo=bar&baz=diz") - assert instance.index('foo=bar') == 0 - assert instance.index('baz=diz') == 1 - - with pytest.raises(ValueError): - instance.index('diz') - - def test_count(self): - instance = QSO("") - assert instance.count('foo') == 0 - - instance = QSO("foo&bar=value1&baz=diz&bar=value2") - assert instance.count('foo') == 1 - assert instance.count('bar') == 2 - assert instance.count('baz') == 1 - - def test_insert(self): - instance = QSO("foo&bar&baz") - instance.insert(0, "diz") - assert str(instance) == "diz&foo&bar&baz" - instance.insert(-1, "doz") - assert str(instance) == "diz&foo&bar&doz&baz" - assert len(instance.groups[None]) == 5 - instance.insert(99, "twentyseven") - assert str(instance) == "diz&foo&bar&doz&baz&twentyseven" - - @pytest.mark.parametrize('src,value', COMPARISON_EXAMPLES) - def test_comparison(self, src, value): - instance = QSO(src) - assert instance == value - assert not (instance != value) - - @pytest.mark.parametrize('src,key,expect,value', POP_EXAMPLES) - def test_pop_examples(self, src, key, expect, value): - instance = QSO(src) - result = instance.pop(key) - assert str(instance) == expect - assert result == value - - @pytest.mark.parametrize('key', ['baz', 2, SENTINEL]) - def test_pop_failures(self, key): - instance = QSO() - - with pytest.raises(KeyError): - instance.pop(key) - - def test_pop_defaults(self): - instance = QSO() - - assert instance.pop(default=None) is None - assert instance.pop(0, None) is None - assert instance.pop('named', None) is None - - def test_pop_failure(self): - instance = QSO() - - with pytest.raises(KeyError): - instance.pop('key') - - def test_reverse(self): - instance = QSO("key=value1&key=value2&bar=baz") - instance.reverse() - assert str(instance) == "bar=baz&key=value2&key=value1" - assert tuple(instance['key']) == ("value2", "value1") - - def test_keys(self): - instance = QSO("key=value1&key=value2&bar=baz") - assert tuple(instance.keys()) == ('key', 'key', 'bar') - - def test_items(self): - instance = QSO("key=value1&key=value2&bar=baz") - assert tuple(instance.items()) == (('key', 'value1'), ('key', 'value2'), ('bar', 'baz')) - - def test_values(self): - instance = QSO("key=value1&key=value2&bar=baz") - assert tuple(instance.values()) == ('value1', 'value2', 'baz') - - def test_get(self): - instance = QSO("key=value1&key=value2&bar=baz") - assert tuple(instance.get('key')) == ('value1', 'value2') - assert instance.get('bar') == 'baz' - assert instance.get('baz') is None - - def test_clear(self): - instance = QSO("key=value1&key=value2&bar=baz") - assert len(instance) == 3 - instance.clear() - assert len(instance) == 0 - assert not instance - assert not instance.groups - - -@pytest.mark.parametrize('string,args,kw', EXAMPLES) + @pytest.mark.parametrize("string,values", MULTI_VALUE_EXAMPLES) + def test_multiple_values(self, string, values): + instance = QSO(string) + + for key in values: + if not isinstance(values[key], list): + continue + result = list(instance[key]) + assert result == values[key] + + @pytest.mark.parametrize("src,key,value,expect", ASSIGNMENT_EXAMPLES) + def test_multiple_reassignment(self, src, key, value, expect): + instance = QSO(src) + instance[key] = value + assert str(instance) == expect + + def test_numeric_deletion(self): + instance = QSO("key=value1&key=value2&bar=baz") + assert len(instance) == 3 + assert len(instance.groups["key"]) == 2 + del instance[0] + assert len(instance) == 2 + assert len(instance.groups["key"]) == 1 + assert str(instance) == "key=value2&bar=baz" + + @pytest.mark.parametrize("src,key,expect,value", DELETION_EXAMPLES) + def test_deletion_examples(self, src, key, expect, value): + instance = QSO(src) + del instance[key] + assert str(instance) == expect + + @pytest.mark.parametrize("src,change,expect", UPDATE_EXAMPLES) + def test_update(self, src, change, expect): + instance = QSO(src) + instance.update(change) + assert str(instance) == expect + + def test_update_keywords(self): + instance = QSO("key=value1&key=value2&bar=baz") + instance.update(bar="diz") + assert str(instance) == "key=value1&key=value2&bar=diz" + instance.update(diz="doz") + assert str(instance) == "key=value1&key=value2&bar=diz&diz=doz" + instance.update(key="value3") + assert str(instance) == "bar=diz&diz=doz&key=value3" + + @pytest.mark.parametrize("src,change,expect", COMBINATION_EXAMPLES) + def test_inline_add(self, src, change, expect): + instance = QSO(src) + instance += change + assert str(instance) == expect + + def test_index(self): + instance = QSO("foo=bar&baz=diz") + assert instance.index("foo=bar") == 0 + assert instance.index("baz=diz") == 1 + + with pytest.raises(ValueError): + instance.index("diz") + + def test_count(self): + instance = QSO("") + assert instance.count("foo") == 0 + + instance = QSO("foo&bar=value1&baz=diz&bar=value2") + assert instance.count("foo") == 1 + assert instance.count("bar") == 2 + assert instance.count("baz") == 1 + + def test_insert(self): + instance = QSO("foo&bar&baz") + instance.insert(0, "diz") + assert str(instance) == "diz&foo&bar&baz" + instance.insert(-1, "doz") + assert str(instance) == "diz&foo&bar&doz&baz" + assert len(instance.groups[None]) == 5 + instance.insert(99, "twentyseven") + assert str(instance) == "diz&foo&bar&doz&baz&twentyseven" + + @pytest.mark.parametrize("src,value", COMPARISON_EXAMPLES) + def test_comparison(self, src, value): + instance = QSO(src) + assert instance == value + assert not (instance != value) + + @pytest.mark.parametrize("src,key,expect,value", POP_EXAMPLES) + def test_pop_examples(self, src, key, expect, value): + instance = QSO(src) + result = instance.pop(key) + assert str(instance) == expect + assert result == value + + @pytest.mark.parametrize("key", ["baz", 2, SENTINEL]) + def test_pop_failures(self, key): + instance = QSO() + + with pytest.raises(KeyError): + instance.pop(key) + + def test_pop_defaults(self): + instance = QSO() + + assert instance.pop(default=None) is None + assert instance.pop(0, None) is None + assert instance.pop("named", None) is None + + def test_pop_failure(self): + instance = QSO() + + with pytest.raises(KeyError): + instance.pop("key") + + def test_reverse(self): + instance = QSO("key=value1&key=value2&bar=baz") + instance.reverse() + assert str(instance) == "bar=baz&key=value2&key=value1" + assert tuple(instance["key"]) == ("value2", "value1") + + def test_keys(self): + instance = QSO("key=value1&key=value2&bar=baz") + assert tuple(instance.keys()) == ("key", "key", "bar") + + def test_items(self): + instance = QSO("key=value1&key=value2&bar=baz") + assert tuple(instance.items()) == (("key", "value1"), ("key", "value2"), ("bar", "baz")) + + def test_values(self): + instance = QSO("key=value1&key=value2&bar=baz") + assert tuple(instance.values()) == ("value1", "value2", "baz") + + def test_get(self): + instance = QSO("key=value1&key=value2&bar=baz") + assert tuple(instance.get("key")) == ("value1", "value2") + assert instance.get("bar") == "baz" + assert instance.get("baz") is None + + def test_clear(self): + instance = QSO("key=value1&key=value2&bar=baz") + assert len(instance) == 3 + instance.clear() + assert len(instance) == 0 + assert not instance + assert not instance.groups + + +@pytest.mark.parametrize("string,args,kw", EXAMPLES) class TestQSOExamples: - def test_repr(self, string, args, kw): - instance = QSO(string) - assert repr(instance) == 'QSO("' + string + '")' - - def test_str(self, string, args, kw): - instance = QSO(string) - assert str(instance) == string - - def test_length(self, string, args, kw): - instance = QSO(string) - - if len(instance) != (len(args) + len(kw)): - __import__('pudb').set_trace() - instance = QSO(string) - - assert len(instance) == (len(args) + len(kw)) - - def test_contains(self, string, args, kw): - instance = QSO(string) - - for i in range(len(args) + len(kw)): - assert i in instance - - def test_named_assignment(self, string, args, kw): - instance = QSO(string) - instance['doz'] = '27' - - assert str(instance).endswith(('&' if (args or kw) else '') + 'doz=27') - - -@pytest.mark.parametrize('string,args,kw', [i for i in EXAMPLES if i[1]]) + def test_repr(self, string, args, kw): + instance = QSO(string) + assert repr(instance) == 'QSO("' + string + '")' + + def test_str(self, string, args, kw): + instance = QSO(string) + assert str(instance) == string + + def test_length(self, string, args, kw): + instance = QSO(string) + + if len(instance) != (len(args) + len(kw)): + __import__("pudb").set_trace() + instance = QSO(string) + + assert len(instance) == (len(args) + len(kw)) + + def test_contains(self, string, args, kw): + instance = QSO(string) + + for i in range(len(args) + len(kw)): + assert i in instance + + def test_named_assignment(self, string, args, kw): + instance = QSO(string) + instance["doz"] = "27" + + assert str(instance).endswith(("&" if (args or kw) else "") + "doz=27") + + +@pytest.mark.parametrize("string,args,kw", [i for i in EXAMPLES if i[1]]) class TestQSOPositionalUse: - def test_iteration_view(self, string, args, kw): - instance = QSO(string) - - for bucket, arg in zip(instance, args): - assert bucket.value == arg + def test_iteration_view(self, string, args, kw): + instance = QSO(string) + + for bucket, arg in zip(instance, args): + assert bucket.value == arg -@pytest.mark.parametrize('string,args,kw', [i for i in EXAMPLES if i[2]]) +@pytest.mark.parametrize("string,args,kw", [i for i in EXAMPLES if i[2]]) class TestQSOKeywordUse: - def test_contains(self, string, args, kw): - instance = QSO(string) - - for key in kw: - assert key in instance - - def test_grouped_indexing(self, string, args, kw): - instance = QSO(string) - - for key, value in kw.items(): - assert instance[key] == value - - def test_grouped_replacement(self, string, args, kw): - instance = QSO(string) - - instance['foo'] = 'doz' - - assert 'foo=doz' in str(instance) - - -@pytest.mark.parametrize('string,args,kw', [i for i in EXAMPLES if len(i[1]) > 1]) + def test_contains(self, string, args, kw): + instance = QSO(string) + + for key in kw: + assert key in instance + + def test_grouped_indexing(self, string, args, kw): + instance = QSO(string) + + for key, value in kw.items(): + assert instance[key] == value + + def test_grouped_replacement(self, string, args, kw): + instance = QSO(string) + + instance["foo"] = "doz" + + assert "foo=doz" in str(instance) + + +@pytest.mark.parametrize("string,args,kw", [i for i in EXAMPLES if len(i[1]) > 1]) class TestQSOMultiplePositional: - def test_reversing(self, string, args, kw): - instance = QSO(string) - result = list(reversed(instance)) - - assert len(result) == len(args) - assert tuple(i.value for i in result) == args[::-1] - - def test_numeric_indexing(self, string, args, kw): - instance = QSO(string) - - for i, arg in enumerate(args): - assert instance[i] == arg - - def test_indexed_replacement(self, string, args, kw): - instance = QSO(string) - instance[1] = 'doz' - assert '&doz' + def test_reversing(self, string, args, kw): + instance = QSO(string) + result = list(reversed(instance)) + + assert len(result) == len(args) + assert tuple(i.value for i in result) == args[::-1] + + def test_numeric_indexing(self, string, args, kw): + instance = QSO(string) + + for i, arg in enumerate(args): + assert instance[i] == arg + + def test_indexed_replacement(self, string, args, kw): + instance = QSO(string) + instance[1] = "doz" + assert "&doz" diff --git a/test/test_rfc3986_5_4_1.py b/test/test_rfc3986_5_4_1.py index 76559ea..1200985 100644 --- a/test/test_rfc3986_5_4_1.py +++ b/test/test_rfc3986_5_4_1.py @@ -6,81 +6,77 @@ class ReferenceResolutionExample: - """As defined by the preface of section 5.4, these examples utilize a defined base URI. - - Reference: https://pretty-rfc.herokuapp.com/RFC3986#reference-examples - """ - - BASE = URI("http://a/b/c/d;p?q") + """As defined by the preface of section 5.4, these examples utilize a defined base URI. + + Reference: https://pretty-rfc.herokuapp.com/RFC3986#reference-examples + """ + + BASE = URI("http://a/b/c/d;p?q") class TestNormalExamples(ReferenceResolutionExample): - """Examples provided by section 5.4.1 of RFC 3986.""" - - EXAMPLES = { - "g:h": "g:h", - "g": "http://a/b/c/g", - "./g": "http://a/b/c/g", - "g/": "http://a/b/c/g/", - "/g": "http://a/g", - # "//g": "http://g", # This commented out case is the "correct" one. - "//g": "http://g/", # We force URL with authorities to have paths. - "?y": "http://a/b/c/d;p?y", - "g?y": "http://a/b/c/g?y", - "#s": "http://a/b/c/d;p?q#s", - "g#s": "http://a/b/c/g#s", - "g?y#s": "http://a/b/c/g?y#s", - ";x": "http://a/b/c/;x", - "g;x": "http://a/b/c/g;x", - "g;x?y#s": "http://a/b/c/g;x?y#s", - "": "http://a/b/c/d;p?q", - ".": "http://a/b/c/", - "./": "http://a/b/c/", - "..": "http://a/b/", - "../": "http://a/b/", - "../g": "http://a/b/g", - "../..": "http://a/", - "../../": "http://a/", - "../../g": "http://a/g" - } - - @pytest.mark.parametrize('href,result', EXAMPLES.items()) - def test_resolution_equivalence(self, href, result): - resolved = self.BASE.resolve(href) - assert str(resolved) == result + """Examples provided by section 5.4.1 of RFC 3986.""" + + EXAMPLES = { + "g:h": "g:h", + "g": "http://a/b/c/g", + "./g": "http://a/b/c/g", + "g/": "http://a/b/c/g/", + "/g": "http://a/g", + # "//g": "http://g", # This commented out case is the "correct" one. + "//g": "http://g/", # We force URL with authorities to have paths. + "?y": "http://a/b/c/d;p?y", + "g?y": "http://a/b/c/g?y", + "#s": "http://a/b/c/d;p?q#s", + "g#s": "http://a/b/c/g#s", + "g?y#s": "http://a/b/c/g?y#s", + ";x": "http://a/b/c/;x", + "g;x": "http://a/b/c/g;x", + "g;x?y#s": "http://a/b/c/g;x?y#s", + "": "http://a/b/c/d;p?q", + ".": "http://a/b/c/", + "./": "http://a/b/c/", + "..": "http://a/b/", + "../": "http://a/b/", + "../g": "http://a/b/g", + "../..": "http://a/", + "../../": "http://a/", + "../../g": "http://a/g", + } + + @pytest.mark.parametrize("href,result", EXAMPLES.items()) + def test_resolution_equivalence(self, href, result): + resolved = self.BASE.resolve(href) + assert str(resolved) == result class TestAbnormalExamples(ReferenceResolutionExample): - """Examples provided by section 5.4.2 of RFC 3986.""" - - EXAMPLES = { - "../../../g": "http://a/g", - "../../../../g": "http://a/g", - - "/./g": "http://a/g", - "/../g": "http://a/g", - "g.": "http://a/b/c/g.", - ".g": "http://a/b/c/.g", - "g..": "http://a/b/c/g..", - "..g": "http://a/b/c/..g", - - "./../g": "http://a/b/g", - "./g/.": "http://a/b/c/g/", - "g/./h": "http://a/b/c/g/h", - "g/../h": "http://a/b/c/h", - "g;x=1/./y": "http://a/b/c/g;x=1/y", - "g;x=1/../y": "http://a/b/c/y", - - "g?y/./x": "http://a/b/c/g?y/./x", - "g?y/../x": "http://a/b/c/g?y/../x", - "g#s/./x": "http://a/b/c/g#s/./x", - "g#s/../x": "http://a/b/c/g#s/../x", - - # "http:g": "http:g", # for strict parsers - "http:g": "http://a/b/c/g", # for backward compatibility - } - - @pytest.mark.parametrize('href,result', EXAMPLES.items()) - def test_resolution_equivalence(self, href, result): - resolved = self.BASE.resolve(href) - assert str(resolved) == result + """Examples provided by section 5.4.2 of RFC 3986.""" + + EXAMPLES = { + "../../../g": "http://a/g", + "../../../../g": "http://a/g", + "/./g": "http://a/g", + "/../g": "http://a/g", + "g.": "http://a/b/c/g.", + ".g": "http://a/b/c/.g", + "g..": "http://a/b/c/g..", + "..g": "http://a/b/c/..g", + "./../g": "http://a/b/g", + "./g/.": "http://a/b/c/g/", + "g/./h": "http://a/b/c/g/h", + "g/../h": "http://a/b/c/h", + "g;x=1/./y": "http://a/b/c/g;x=1/y", + "g;x=1/../y": "http://a/b/c/y", + "g?y/./x": "http://a/b/c/g?y/./x", + "g?y/../x": "http://a/b/c/g?y/../x", + "g#s/./x": "http://a/b/c/g#s/./x", + "g#s/../x": "http://a/b/c/g#s/../x", + # "http:g": "http:g", # for strict parsers + "http:g": "http://a/b/c/g", # for backward compatibility + } + + @pytest.mark.parametrize("href,result", EXAMPLES.items()) + def test_resolution_equivalence(self, href, result): + resolved = self.BASE.resolve(href) + assert str(resolved) == result diff --git a/test/test_uri.py b/test/test_uri.py index 5fb853d..cae1664 100644 --- a/test/test_uri.py +++ b/test/test_uri.py @@ -7,356 +7,397 @@ from uri.uri import URI URI_COMPONENTS = [ - ('http://', dict( - relative = True, - scheme = 'http', - base = 'http://', - path = Path('.'), - )), - ('https://', dict( - relative = True, - scheme = 'https', - base = 'https://', - path = Path('.'), - )), - ('/foo', dict( - relative = True, - path = Path('/foo'), - base = '/foo', - summary = '/foo', - heirarchical = '/foo', - resource = '/foo', - )), - ('http://user:pass@example.com/over/there?name=ferret#anchor', dict( - authority = 'user:pass@example.com', - fragment = 'anchor', - user = 'user', - username = 'user', - password = 'pass', - heirarchical = 'user:pass@example.com/over/there', - host = 'example.com', - path = Path('/over/there'), - query = 'name=ferret', - scheme = 'http', - authentication = 'user:pass', - auth = 'user:pass', - base = 'http://user:pass@example.com/over/there', - summary = 'example.com/over/there', - relative = False, - resource = '/over/there?name=ferret#anchor', - )), - - # From Wikipedia - https://en.wikipedia.org/wiki/Query_string - ('http://example.com/over/there?name=ferret', dict( - authority = 'example.com', - heirarchical = 'example.com/over/there', - host = 'example.com', - path = Path('/over/there'), - query = 'name=ferret', - base = 'http://example.com/over/there', - scheme = 'http', - summary = 'example.com/over/there', - relative = False, - resource = '/over/there?name=ferret', - )), - ('http://example.com/path/to/page?name=ferret&color=purple', dict( - authority = 'example.com', - heirarchical = 'example.com/path/to/page', - host = 'example.com', - path = Path('/path/to/page'), - query = 'name=ferret&color=purple', - scheme = 'http', - base = 'http://example.com/path/to/page', - summary = 'example.com/path/to/page', - relative = False, - resource = '/path/to/page?name=ferret&color=purple', - )), - - # RFC 3986 (URI) - http://pretty-rfc.herokuapp.com/RFC3986 - ('ftp://ftp.is.co.za/rfc/rfc1808.txt', dict( - authority = 'ftp.is.co.za', - host = 'ftp.is.co.za', - path = Path('/rfc/rfc1808.txt'), - heirarchical = 'ftp.is.co.za/rfc/rfc1808.txt', - scheme = 'ftp', - base = 'ftp://ftp.is.co.za/rfc/rfc1808.txt', - summary = 'ftp.is.co.za/rfc/rfc1808.txt', - relative = False, - resource = '/rfc/rfc1808.txt', - )), - ('ldap://[2001:db8::7]/c=GB?objectClass?one', dict( - authority = '[2001:db8::7]', - path = Path('/c=GB'), - scheme = 'ldap', - query = 'objectClass?one', - host = '2001:db8::7', - heirarchical = '[2001:db8::7]/c=GB', - base = 'ldap://[2001:db8::7]/c=GB', - summary = '[2001:db8::7]/c=GB', - relative = False, - resource = '/c=GB?objectClass?one', - )), - ('http://www.ietf.org/rfc/rfc2396.txt', dict( - authority = 'www.ietf.org', - scheme = 'http', - host = 'www.ietf.org', - path = Path('/rfc/rfc2396.txt'), - heirarchical = 'www.ietf.org/rfc/rfc2396.txt', - base = 'http://www.ietf.org/rfc/rfc2396.txt', - summary = 'www.ietf.org/rfc/rfc2396.txt', - relative = False, - resource = '/rfc/rfc2396.txt', - )), - ('mailto:John.Doe@example.com', dict( - scheme = 'mailto', - path = Path('John.Doe@example.com'), - heirarchical = 'John.Doe@example.com', - summary = 'John.Doe@example.com', - base = 'mailto:John.Doe@example.com', - relative = False, - resource = 'John.Doe@example.com', - )), - ('tel:+1-816-555-1212', dict( - scheme = 'tel', - path = Path('+1-816-555-1212'), - heirarchical = '+1-816-555-1212', - summary = '+1-816-555-1212', - base = 'tel:+1-816-555-1212', - relative = False, - resource = '+1-816-555-1212', - )), - ('telnet://192.0.2.16:80/', dict( - port = 80, - scheme = 'telnet', - host = '192.0.2.16', - authority = '192.0.2.16:80', - path = Path('/'), - heirarchical = '192.0.2.16:80/', - summary = '192.0.2.16/', - base = 'telnet://192.0.2.16:80/', - relative = False, - resource = '/', - )), - ('urn:oasis:names:specification:docbook:dtd:xml:4.1.2', dict( - scheme = 'urn', - path = Path('oasis:names:specification:docbook:dtd:xml:4.1.2'), # TODO - heirarchical = 'oasis:names:specification:docbook:dtd:xml:4.1.2', - summary = 'oasis:names:specification:docbook:dtd:xml:4.1.2', - base = 'urn:oasis:names:specification:docbook:dtd:xml:4.1.2', - relative = False, - resource = 'oasis:names:specification:docbook:dtd:xml:4.1.2', - )), - - # IDNA (Internationalized Domain Name) Encoding - ('https://💩.la/', dict( - scheme = 'https', - path = Path('/'), - host = '💩.la', - authority = 'xn--ls8h.la', - heirarchical = 'xn--ls8h.la/', - summary = 'xn--ls8h.la/', - base = 'https://xn--ls8h.la/', - relative = False, - resource = '/', - uri = 'https://xn--ls8h.la/', - )) - ] + ( + "http://", + dict( + relative=True, + scheme="http", + base="http://", + path=Path("."), + ), + ), + ( + "https://", + dict( + relative=True, + scheme="https", + base="https://", + path=Path("."), + ), + ), + ( + "/foo", + dict( + relative=True, + path=Path("/foo"), + base="/foo", + summary="/foo", + heirarchical="/foo", + resource="/foo", + ), + ), + ( + "http://user:pass@example.com/over/there?name=ferret#anchor", + dict( + authority="user:pass@example.com", + fragment="anchor", + user="user", + username="user", + password="pass", + heirarchical="user:pass@example.com/over/there", + host="example.com", + path=Path("/over/there"), + query="name=ferret", + scheme="http", + authentication="user:pass", + auth="user:pass", + base="http://user:pass@example.com/over/there", + summary="example.com/over/there", + relative=False, + resource="/over/there?name=ferret#anchor", + ), + ), + # From Wikipedia - https://en.wikipedia.org/wiki/Query_string + ( + "http://example.com/over/there?name=ferret", + dict( + authority="example.com", + heirarchical="example.com/over/there", + host="example.com", + path=Path("/over/there"), + query="name=ferret", + base="http://example.com/over/there", + scheme="http", + summary="example.com/over/there", + relative=False, + resource="/over/there?name=ferret", + ), + ), + ( + "http://example.com/path/to/page?name=ferret&color=purple", + dict( + authority="example.com", + heirarchical="example.com/path/to/page", + host="example.com", + path=Path("/path/to/page"), + query="name=ferret&color=purple", + scheme="http", + base="http://example.com/path/to/page", + summary="example.com/path/to/page", + relative=False, + resource="/path/to/page?name=ferret&color=purple", + ), + ), + # RFC 3986 (URI) - http://pretty-rfc.herokuapp.com/RFC3986 + ( + "ftp://ftp.is.co.za/rfc/rfc1808.txt", + dict( + authority="ftp.is.co.za", + host="ftp.is.co.za", + path=Path("/rfc/rfc1808.txt"), + heirarchical="ftp.is.co.za/rfc/rfc1808.txt", + scheme="ftp", + base="ftp://ftp.is.co.za/rfc/rfc1808.txt", + summary="ftp.is.co.za/rfc/rfc1808.txt", + relative=False, + resource="/rfc/rfc1808.txt", + ), + ), + ( + "ldap://[2001:db8::7]/c=GB?objectClass?one", + dict( + authority="[2001:db8::7]", + path=Path("/c=GB"), + scheme="ldap", + query="objectClass?one", + host="2001:db8::7", + heirarchical="[2001:db8::7]/c=GB", + base="ldap://[2001:db8::7]/c=GB", + summary="[2001:db8::7]/c=GB", + relative=False, + resource="/c=GB?objectClass?one", + ), + ), + ( + "http://www.ietf.org/rfc/rfc2396.txt", + dict( + authority="www.ietf.org", + scheme="http", + host="www.ietf.org", + path=Path("/rfc/rfc2396.txt"), + heirarchical="www.ietf.org/rfc/rfc2396.txt", + base="http://www.ietf.org/rfc/rfc2396.txt", + summary="www.ietf.org/rfc/rfc2396.txt", + relative=False, + resource="/rfc/rfc2396.txt", + ), + ), + ( + "mailto:John.Doe@example.com", + dict( + scheme="mailto", + path=Path("John.Doe@example.com"), + heirarchical="John.Doe@example.com", + summary="John.Doe@example.com", + base="mailto:John.Doe@example.com", + relative=False, + resource="John.Doe@example.com", + ), + ), + ( + "tel:+1-816-555-1212", + dict( + scheme="tel", + path=Path("+1-816-555-1212"), + heirarchical="+1-816-555-1212", + summary="+1-816-555-1212", + base="tel:+1-816-555-1212", + relative=False, + resource="+1-816-555-1212", + ), + ), + ( + "telnet://192.0.2.16:80/", + dict( + port=80, + scheme="telnet", + host="192.0.2.16", + authority="192.0.2.16:80", + path=Path("/"), + heirarchical="192.0.2.16:80/", + summary="192.0.2.16/", + base="telnet://192.0.2.16:80/", + relative=False, + resource="/", + ), + ), + ( + "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", + dict( + scheme="urn", + path=Path("oasis:names:specification:docbook:dtd:xml:4.1.2"), # TODO + heirarchical="oasis:names:specification:docbook:dtd:xml:4.1.2", + summary="oasis:names:specification:docbook:dtd:xml:4.1.2", + base="urn:oasis:names:specification:docbook:dtd:xml:4.1.2", + relative=False, + resource="oasis:names:specification:docbook:dtd:xml:4.1.2", + ), + ), + # IDNA (Internationalized Domain Name) Encoding + ( + "https://💩.la/", + dict( + scheme="https", + path=Path("/"), + host="💩.la", + authority="xn--ls8h.la", + heirarchical="xn--ls8h.la/", + summary="xn--ls8h.la/", + base="https://xn--ls8h.la/", + relative=False, + resource="/", + uri="https://xn--ls8h.la/", + ), + ), +] for _uri, _parts in URI_COMPONENTS: - _parts.setdefault('uri', _uri) - if 'query' in _parts: _parts['qs'] = _parts['query'] - if 'host' in _parts: _parts['hostname'] = _parts['host'] - _parts.setdefault('length', len(_parts.get('uri', _uri))) + _parts.setdefault("uri", _uri) + if "query" in _parts: + _parts["qs"] = _parts["query"] + if "host" in _parts: + _parts["hostname"] = _parts["host"] + _parts.setdefault("length", len(_parts.get("uri", _uri))) @pytest.fixture def instance(): - return URI('http://user:pass@example.com/over/there?name=ferret#anchor') + return URI("http://user:pass@example.com/over/there?name=ferret#anchor") @pytest.fixture def empty(): - return URI('http://example.com/over/there') + return URI("http://example.com/over/there") def test_wsgi_unpacking(): - webob = pytest.importorskip('webob') - - url = 'https://example.com/foo/bar?baz=27' - - request = webob.Request.blank(url) - uri = URI.from_wsgi(request) - - assert str(uri) == url + webob = pytest.importorskip("webob") + url = "https://example.com/foo/bar?baz=27" -@pytest.mark.parametrize('string,attributes', URI_COMPONENTS) + request = webob.Request.blank(url) + uri = URI.from_wsgi(request) + + assert str(uri) == url + + +@pytest.mark.parametrize("string,attributes", URI_COMPONENTS) class TestURI: - def test_truthiness(self, string, attributes): - instance = URI(string) - assert instance - - def test_identity(self, string, attributes): - instance = URI(string) - assert str(instance) == attributes['uri'] - - def test_identity_bytes(self, string, attributes): - instance = URI(string) - assert bytes(instance) == attributes['uri'].encode('utf-8') - - def test_identity_comparison(self, string, attributes): - instance = URI(string) - assert instance == attributes['uri'] - - def test_inverse_bad_comparison(self, string, attributes): - instance = URI(string) - assert instance != "fnord" - - def test_length(self, string, attributes): - instance = URI(string) - assert len(instance) == attributes['length'] - - @pytest.mark.parametrize('component', URI.__all_parts__ | {'base', 'qs', 'summary', 'relative'}) - def test_component(self, string, attributes, component): - instance = URI(string) - value = getattr(instance, component, SENTINEL) - - if component not in attributes: - assert value in (None, SENTINEL, '') - return - - assert value == attributes[component] + def test_truthiness(self, string, attributes): + instance = URI(string) + assert instance + + def test_identity(self, string, attributes): + instance = URI(string) + assert str(instance) == attributes["uri"] + + def test_identity_bytes(self, string, attributes): + instance = URI(string) + assert bytes(instance) == attributes["uri"].encode("utf-8") + + def test_identity_comparison(self, string, attributes): + instance = URI(string) + assert instance == attributes["uri"] + + def test_inverse_bad_comparison(self, string, attributes): + instance = URI(string) + assert instance != "fnord" + + def test_length(self, string, attributes): + instance = URI(string) + assert len(instance) == attributes["length"] + + @pytest.mark.parametrize("component", URI.__all_parts__ | {"base", "qs", "summary", "relative"}) + def test_component(self, string, attributes, component): + instance = URI(string) + value = getattr(instance, component, SENTINEL) + + if component not in attributes: + assert value in (None, SENTINEL, "") + return + + assert value == attributes[component] class TestURIBasics: - def test_uri_error(self): - with pytest.raises(TypeError): - URI(foo="bar") - - def test_empty(self): - instance = URI() - assert str(instance) == "" - assert not instance - - def test_html_representation(self, instance): - markupsafe = pytest.importorskip('markupsafe') - - html = markupsafe.escape(instance) - expect = 'example.com/over/there' - - assert html == expect - - def test_protocol_relative_shortcut(self, instance): - https = URI("https://") - - instance = https // instance - assert str(instance) == "https://user:pass@example.com/over/there?name=ferret#anchor" - - def test_rooted(self, instance): - instance = instance / "/foo" - assert str(instance) == "http://user:pass@example.com/foo" - - def test_relative(self, instance): - instance = instance / "foo" - assert str(instance) == "http://user:pass@example.com/over/foo" - - def test_relative_assignment(self, instance): - instance /= "bar" - assert str(instance) == "http://user:pass@example.com/over/bar" - - def test_resolution_by_uri(self, instance): - assert str(instance.resolve('/baz')) == "http://user:pass@example.com/baz" - assert str(instance.resolve('baz')) == "http://user:pass@example.com/over/baz" - - def test_resolution_overriding(self, instance): - expect = "http://example.com/over/there?name=ferret#anchor" - assert str(instance.resolve(user=None, password=None)) == expect - - def test_resolution_error(self, instance): - with pytest.raises(TypeError): - instance.resolve(unknown="fnord") - - def test_qs_assignment(self): - instance = URI("http://example.com") - assert str(instance) == "http://example.com/" - - instance.qs = "foo=bar" - assert str(instance) == "http://example.com/?foo=bar" - - def test_path_usage(self): - path = Path("/foo/bar/baz") - instance = URI(path) - assert instance.scheme == 'file' - assert str(instance) == "file:///foo/bar/baz" - - def test_group_assignment(self, empty): - with pytest.raises(TypeError): - empty.authority = "bobdole.com" - - def test_protocol_assignment(self, empty): - assert empty.scheme == 'http' - - empty.scheme = b'ftp' - assert empty.scheme == 'ftp' - - def test_empty_protocol_assignment(self, empty): - assert empty.scheme == 'http' - - empty.scheme = None - assert str(empty) == "//example.com/over/there" - - def test_bad_assignment(self, empty): - with pytest.raises(AttributeError): - empty.safe_uri = 'http://example.com' - - def test_rooted_path_authority_resolution(self): - uri = URI('http://example.com/diz') - uri.path = '/foo/bar' - assert str(uri) == "http://example.com/foo/bar" - - def test_rootless_path_authority_error(self): - uri = URI('http://example.com') - - with pytest.raises(ValueError): - uri.path = 'foo/bar' + def test_uri_error(self): + with pytest.raises(TypeError): + URI(foo="bar") + + def test_empty(self): + instance = URI() + assert str(instance) == "" + assert not instance + + def test_html_representation(self, instance): + markupsafe = pytest.importorskip("markupsafe") + + html = markupsafe.escape(instance) + expect = 'example.com/over/there' + + assert html == expect + + def test_protocol_relative_shortcut(self, instance): + https = URI("https://") + + instance = https // instance + assert str(instance) == "https://user:pass@example.com/over/there?name=ferret#anchor" + + def test_rooted(self, instance): + instance = instance / "/foo" + assert str(instance) == "http://user:pass@example.com/foo" + + def test_relative(self, instance): + instance = instance / "foo" + assert str(instance) == "http://user:pass@example.com/over/foo" + + def test_relative_assignment(self, instance): + instance /= "bar" + assert str(instance) == "http://user:pass@example.com/over/bar" + + def test_resolution_by_uri(self, instance): + assert str(instance.resolve("/baz")) == "http://user:pass@example.com/baz" + assert str(instance.resolve("baz")) == "http://user:pass@example.com/over/baz" + + def test_resolution_overriding(self, instance): + expect = "http://example.com/over/there?name=ferret#anchor" + assert str(instance.resolve(user=None, password=None)) == expect + + def test_resolution_error(self, instance): + with pytest.raises(TypeError): + instance.resolve(unknown="fnord") + + def test_qs_assignment(self): + instance = URI("http://example.com") + assert str(instance) == "http://example.com/" + + instance.qs = "foo=bar" + assert str(instance) == "http://example.com/?foo=bar" + + def test_path_usage(self): + path = Path("/foo/bar/baz") + instance = URI(path) + assert instance.scheme == "file" + assert str(instance) == "file:///foo/bar/baz" + + def test_group_assignment(self, empty): + with pytest.raises(TypeError): + empty.authority = "bobdole.com" + + def test_protocol_assignment(self, empty): + assert empty.scheme == "http" + + empty.scheme = b"ftp" + assert empty.scheme == "ftp" + + def test_empty_protocol_assignment(self, empty): + assert empty.scheme == "http" + + empty.scheme = None + assert str(empty) == "//example.com/over/there" + + def test_bad_assignment(self, empty): + with pytest.raises(AttributeError): + empty.safe_uri = "http://example.com" + + def test_rooted_path_authority_resolution(self): + uri = URI("http://example.com/diz") + uri.path = "/foo/bar" + assert str(uri) == "http://example.com/foo/bar" + + def test_rootless_path_authority_error(self): + uri = URI("http://example.com") + + with pytest.raises(ValueError): + uri.path = "foo/bar" class TestURIDictlike: - def test_get(self, instance): - assert instance['name'] == 'ferret' - - def test_get_authenticated(self, instance): - secure = instance['username':'password'] - assert instance is not secure - assert secure.user == 'username' - assert secure.password == 'password' - assert str(secure) == 'http://username:password@example.com/over/there?name=ferret#anchor' - - def test_set_new(self, instance, empty): - instance['foo'] = 'bar' - assert str(instance) == 'http://user:pass@example.com/over/there?name=ferret&foo=bar#anchor' - - empty['bar'] = 'baz' - assert str(empty) == 'http://example.com/over/there?bar=baz' - - def test_set_replace(self, instance): - instance['name'] = 'lemur' - assert str(instance) == 'http://user:pass@example.com/over/there?name=lemur#anchor' - - def test_del(self, instance): - del instance['name'] - assert str(instance) == 'http://user:pass@example.com/over/there#anchor' - - def test_iter(self, instance): - assert list(instance) == ["name=ferret"] - - def test_get_fail(self, instance, empty): - with pytest.raises(KeyError): - instance['foo'] - - with pytest.raises(KeyError): - empty['name'] - - def test_repr(self, instance, empty): - assert repr(instance) == "URI('http://user@example.com/over/there?name=ferret#anchor')" - assert repr(empty) == "URI('http://example.com/over/there')" + def test_get(self, instance): + assert instance["name"] == "ferret" + + def test_get_authenticated(self, instance): + secure = instance["username":"password"] + assert instance is not secure + assert secure.user == "username" + assert secure.password == "password" + assert str(secure) == "http://username:password@example.com/over/there?name=ferret#anchor" + + def test_set_new(self, instance, empty): + instance["foo"] = "bar" + assert str(instance) == "http://user:pass@example.com/over/there?name=ferret&foo=bar#anchor" + + empty["bar"] = "baz" + assert str(empty) == "http://example.com/over/there?bar=baz" + + def test_set_replace(self, instance): + instance["name"] = "lemur" + assert str(instance) == "http://user:pass@example.com/over/there?name=lemur#anchor" + + def test_del(self, instance): + del instance["name"] + assert str(instance) == "http://user:pass@example.com/over/there#anchor" + + def test_iter(self, instance): + assert list(instance) == ["name=ferret"] + + def test_get_fail(self, instance, empty): + with pytest.raises(KeyError): + instance["foo"] + + with pytest.raises(KeyError): + empty["name"] + + def test_repr(self, instance, empty): + assert repr(instance) == "URI('http://user@example.com/over/there?name=ferret#anchor')" + assert repr(empty) == "URI('http://example.com/over/there')" diff --git a/test/test_url_normalize.py b/test/test_url_normalize.py index 0d56442..d993b59 100644 --- a/test/test_url_normalize.py +++ b/test/test_url_normalize.py @@ -9,102 +9,112 @@ from uri.uri import URI URI_COMPONENTS = [ - # From test_deconstruct_url.py EXPECTED_DATA - ('http://site.com/', dict( # ** for identity test to pass, must have path - scheme = 'http', - authority = 'site.com', - heirarchical = 'site.com/', - password = None, - host = 'site.com', - port = None, - path = Path('/'), # ** - relative = False, - summary = 'site.com/', - base = 'http://site.com/', - )), - ('http://user@www.example.com:8080/path/index.html?param=val#fragment', dict( - scheme = 'http', - auth = 'user', - authentication = 'user', - authority = 'user@www.example.com:8080', - heirarchical = 'user@www.example.com:8080/path/index.html', - host = 'www.example.com', - port = 8080, - path = Path('/path/index.html'), # ** - user = 'user', - username = 'user', - relative = False, - summary = 'www.example.com/path/index.html', - base = 'http://user@www.example.com:8080/path/index.html', - query = 'param=val', - qs = 'param=val', - fragment = 'fragment', - )), - # From test_normalize_host.py - ('http://пример.испытание/', dict( # ** for identity test to pass, must provide encoded form - scheme = 'http', - authority = 'xn--e1afmkfd.xn--80akhbyknj4f', - heirarchical = 'xn--e1afmkfd.xn--80akhbyknj4f/', - password = None, - host = 'пример.испытание', - port = None, - path = Path('/'), - relative = False, - summary = 'пример.испытание/', - base = 'http://xn--e1afmkfd.xn--80akhbyknj4f/', - uri = 'http://xn--e1afmkfd.xn--80akhbyknj4f/', - )), - ] + # From test_deconstruct_url.py EXPECTED_DATA + ( + "http://site.com/", + dict( # ** for identity test to pass, must have path + scheme="http", + authority="site.com", + heirarchical="site.com/", + password=None, + host="site.com", + port=None, + path=Path("/"), # ** + relative=False, + summary="site.com/", + base="http://site.com/", + ), + ), + ( + "http://user@www.example.com:8080/path/index.html?param=val#fragment", + dict( + scheme="http", + auth="user", + authentication="user", + authority="user@www.example.com:8080", + heirarchical="user@www.example.com:8080/path/index.html", + host="www.example.com", + port=8080, + path=Path("/path/index.html"), # ** + user="user", + username="user", + relative=False, + summary="www.example.com/path/index.html", + base="http://user@www.example.com:8080/path/index.html", + query="param=val", + qs="param=val", + fragment="fragment", + ), + ), + # From test_normalize_host.py + ( + "http://пример.испытание/", + dict( # ** for identity test to pass, must provide encoded form + scheme="http", + authority="xn--e1afmkfd.xn--80akhbyknj4f", + heirarchical="xn--e1afmkfd.xn--80akhbyknj4f/", + password=None, + host="пример.испытание", + port=None, + path=Path("/"), + relative=False, + summary="пример.испытание/", + base="http://xn--e1afmkfd.xn--80akhbyknj4f/", + uri="http://xn--e1afmkfd.xn--80akhbyknj4f/", + ), + ), +] for _uri, _parts in URI_COMPONENTS: - _parts.setdefault('uri', _uri) - if 'query' in _parts: _parts['qs'] = _parts['query'] - if 'host' in _parts: _parts['hostname'] = _parts['host'] - _parts.setdefault('length', len(_parts.get('uri', _uri))) + _parts.setdefault("uri", _uri) + if "query" in _parts: + _parts["qs"] = _parts["query"] + if "host" in _parts: + _parts["hostname"] = _parts["host"] + _parts.setdefault("length", len(_parts.get("uri", _uri))) def test_normalize_scheme(): - instance = URI('http://site.com/') - assert instance.scheme == 'http' - - instance = URI('HTTP://site.com/') - assert instance.scheme == 'http' + instance = URI("http://site.com/") + assert instance.scheme == "http" + + instance = URI("HTTP://site.com/") + assert instance.scheme == "http" def test_normalize_host(): - instance = URI('http://SITE.COM/') - assert instance.host == 'site.com' - - instance = URI('http://site.com./') - assert instance.host == 'site.com' + instance = URI("http://SITE.COM/") + assert instance.host == "site.com" + + instance = URI("http://site.com./") + assert instance.host == "site.com" -@pytest.mark.parametrize('string,attributes', URI_COMPONENTS) +@pytest.mark.parametrize("string,attributes", URI_COMPONENTS) class TestURLNormalize: - def test_truthiness(self, string, attributes): - instance = URI(string) - assert instance - - def test_identity(self, string, attributes): - instance = URI(string) - assert str(instance) == attributes['uri'] - - def test_identity_bytes(self, string, attributes): - instance = URI(string) - assert bytes(instance) == attributes['uri'].encode('utf-8') - - def test_identity_comparison(self, string, attributes): - instance = URI(string) - assert instance == attributes['uri'] - - @pytest.mark.parametrize('component', URI.__all_parts__ | {'base', 'qs', 'summary', 'relative'}) - def test_component(self, string, attributes, component): - instance = URI(string) - value = getattr(instance, component, SENTINEL) - - if component not in attributes: - assert value in (None, SENTINEL, '') - return - - assert value == attributes[component] + def test_truthiness(self, string, attributes): + instance = URI(string) + assert instance + + def test_identity(self, string, attributes): + instance = URI(string) + assert str(instance) == attributes["uri"] + + def test_identity_bytes(self, string, attributes): + instance = URI(string) + assert bytes(instance) == attributes["uri"].encode("utf-8") + + def test_identity_comparison(self, string, attributes): + instance = URI(string) + assert instance == attributes["uri"] + + @pytest.mark.parametrize("component", URI.__all_parts__ | {"base", "qs", "summary", "relative"}) + def test_component(self, string, attributes, component): + instance = URI(string) + value = getattr(instance, component, SENTINEL) + + if component not in attributes: + assert value in (None, SENTINEL, "") + return + assert value == attributes[component] diff --git a/test/test_whatwg.py b/test/test_whatwg.py index 14a349e..53e4c13 100644 --- a/test/test_whatwg.py +++ b/test/test_whatwg.py @@ -9,341 +9,375 @@ from uri.uri import URI URI_COMPONENTS = [ - # From test_url.py - ('http://www.google.com:443/', dict( # test_url_scheme ** Changing scheme does not alter port. - scheme = 'http', # ** We do not "correct" this, either. - authority = 'www.google.com:443', - heirarchical = 'www.google.com:443/', - password = None, - host = 'www.google.com', - hostname = 'www.google.com', - port = 443, - path = Path('/'), # ** - relative = False, - summary = 'www.google.com/', - base = 'http://www.google.com:443/', - )), - ('https://www.google.com/', dict( # test_url_host ** - scheme = 'https', - authority = 'www.google.com', - heirarchical = 'www.google.com/', - host = 'www.google.com', - hostname = 'www.google.com', - path = Path('/'), # ** - relative = False, - summary = 'www.google.com/', - base = 'https://www.google.com/', - )), - - # From test_special_cases.py - ('http://1.1.1.1 &@2.2.2.2/# @3.3.3.3', dict( # test_spaces_with_multiple_ipv4_addresses ** - scheme = 'http', - authority = '1.1.1.1 &@2.2.2.2', # ** - heirarchical = '1.1.1.1 &@2.2.2.2/', - auth = '1.1.1.1 &', # ** - authentication = '1.1.1.1 &', # ** - user = '1.1.1.1 &', # ** - username = '1.1.1.1 &', # ** - host = '2.2.2.2', - fragment = ' @3.3.3.3', - path = Path('/'), - relative = False, - summary = '2.2.2.2/', - base = 'http://1.1.1.1 &@2.2.2.2/', - )), - ('http://google.com/#@evil.com/', dict( # test_fragment_with_hostname ** - scheme = 'http', - authority = 'google.com', - heirarchical = 'google.com/', - host = 'google.com', - path = Path('/'), - fragment = '@evil.com/', - relative = False, - base = 'http://google.com/', - summary = 'google.com/', - )), - ('http://foo@evil.com:80@google.com/', dict( # test_multiple_ats_within_authority - scheme = 'http', - authority = 'foo@evil.com:80@google.com', - auth = 'foo@evil.com:80', - heirarchical = 'foo@evil.com:80@google.com/', - host = 'google.com', - user = 'foo@evil.com', # ** - password = '80', - path = Path('/'), - summary = 'google.com/', - authentication = 'foo@evil.com:80', - relative = False, - base = 'http://foo@evil.com:80@google.com/', - username = 'foo@evil.com', - )), - ('http://foo@evil.com:80 @google.com/', dict( # test_multiple_ats_and_space_within_authority ** - scheme = 'http', - authority = 'foo@evil.com:80 @google.com', - authentication = 'foo@evil.com:80 ', - heirarchical = 'foo@evil.com:80 @google.com/', - host = 'google.com', - user = 'foo@evil.com', # ** - username = 'foo@evil.com', # ** - password = '80 ', # ** - path = Path('/'), - auth = 'foo@evil.com:80 ', - relative = False, - summary = 'google.com/', - base = 'http://foo@evil.com:80 @google.com/', - )), - ('http://orange.tw/sandbox/NN/passwd', dict( # test_unicode_double_dot_if_stripped_bom - scheme = 'http', - authority = 'orange.tw', - heirarchical = 'orange.tw/sandbox/NN/passwd', - host = 'orange.tw', - path = Path('/sandbox/NN/passwd'), # ** - relative = False, - summary = 'orange.tw/sandbox/NN/passwd', - base = 'http://orange.tw/sandbox/NN/passwd', - )), - ('http://127.0.0.1\tfoo.google.com/', dict( # test_host_contains_tab_in_authority ** - scheme = 'http', - authority = '127.0.0.1\tfoo.google.com', - heirarchical = '127.0.0.1\tfoo.google.com/', - host = '127.0.0.1\tfoo.google.com', # ** - path = Path('/'), - relative = False, - base = 'http://127.0.0.1\tfoo.google.com/', - summary = '127.0.0.1\tfoo.google.com/', - )), - # Omitted: test_host_contains_tab_in_authority_single_or_double_encoded, test_injection_within_authority - ('http://localhost\\@google.com:12345/', dict( # test_backslash_within_authority ** - scheme = 'http', - authority = 'localhost\\@google.com:12345', - auth = 'localhost\\', - authentication = 'localhost\\', - heirarchical = 'localhost\\@google.com:12345/', - user = 'localhost\\', - username = 'localhost\\', - host = 'google.com', # ** - port = 12345, - path = Path('/'), # ** - relative = False, - base = 'http://localhost\\@google.com:12345/', - summary = 'google.com/', - )), - ] + # From test_url.py + ( + "http://www.google.com:443/", + dict( # test_url_scheme ** Changing scheme does not alter port. + scheme="http", # ** We do not "correct" this, either. + authority="www.google.com:443", + heirarchical="www.google.com:443/", + password=None, + host="www.google.com", + hostname="www.google.com", + port=443, + path=Path("/"), # ** + relative=False, + summary="www.google.com/", + base="http://www.google.com:443/", + ), + ), + ( + "https://www.google.com/", + dict( # test_url_host ** + scheme="https", + authority="www.google.com", + heirarchical="www.google.com/", + host="www.google.com", + hostname="www.google.com", + path=Path("/"), # ** + relative=False, + summary="www.google.com/", + base="https://www.google.com/", + ), + ), + # From test_special_cases.py + ( + "http://1.1.1.1 &@2.2.2.2/# @3.3.3.3", + dict( # test_spaces_with_multiple_ipv4_addresses ** + scheme="http", + authority="1.1.1.1 &@2.2.2.2", # ** + heirarchical="1.1.1.1 &@2.2.2.2/", + auth="1.1.1.1 &", # ** + authentication="1.1.1.1 &", # ** + user="1.1.1.1 &", # ** + username="1.1.1.1 &", # ** + host="2.2.2.2", + fragment=" @3.3.3.3", + path=Path("/"), + relative=False, + summary="2.2.2.2/", + base="http://1.1.1.1 &@2.2.2.2/", + ), + ), + ( + "http://google.com/#@evil.com/", + dict( # test_fragment_with_hostname ** + scheme="http", + authority="google.com", + heirarchical="google.com/", + host="google.com", + path=Path("/"), + fragment="@evil.com/", + relative=False, + base="http://google.com/", + summary="google.com/", + ), + ), + ( + "http://foo@evil.com:80@google.com/", + dict( # test_multiple_ats_within_authority + scheme="http", + authority="foo@evil.com:80@google.com", + auth="foo@evil.com:80", + heirarchical="foo@evil.com:80@google.com/", + host="google.com", + user="foo@evil.com", # ** + password="80", + path=Path("/"), + summary="google.com/", + authentication="foo@evil.com:80", + relative=False, + base="http://foo@evil.com:80@google.com/", + username="foo@evil.com", + ), + ), + ( + "http://foo@evil.com:80 @google.com/", + dict( # test_multiple_ats_and_space_within_authority ** + scheme="http", + authority="foo@evil.com:80 @google.com", + authentication="foo@evil.com:80 ", + heirarchical="foo@evil.com:80 @google.com/", + host="google.com", + user="foo@evil.com", # ** + username="foo@evil.com", # ** + password="80 ", # ** + path=Path("/"), + auth="foo@evil.com:80 ", + relative=False, + summary="google.com/", + base="http://foo@evil.com:80 @google.com/", + ), + ), + ( + "http://orange.tw/sandbox/NN/passwd", + dict( # test_unicode_double_dot_if_stripped_bom + scheme="http", + authority="orange.tw", + heirarchical="orange.tw/sandbox/NN/passwd", + host="orange.tw", + path=Path("/sandbox/NN/passwd"), # ** + relative=False, + summary="orange.tw/sandbox/NN/passwd", + base="http://orange.tw/sandbox/NN/passwd", + ), + ), + ( + "http://127.0.0.1\tfoo.google.com/", + dict( # test_host_contains_tab_in_authority ** + scheme="http", + authority="127.0.0.1\tfoo.google.com", + heirarchical="127.0.0.1\tfoo.google.com/", + host="127.0.0.1\tfoo.google.com", # ** + path=Path("/"), + relative=False, + base="http://127.0.0.1\tfoo.google.com/", + summary="127.0.0.1\tfoo.google.com/", + ), + ), + # Omitted: test_host_contains_tab_in_authority_single_or_double_encoded, test_injection_within_authority + ( + "http://localhost\\@google.com:12345/", + dict( # test_backslash_within_authority ** + scheme="http", + authority="localhost\\@google.com:12345", + auth="localhost\\", + authentication="localhost\\", + heirarchical="localhost\\@google.com:12345/", + user="localhost\\", + username="localhost\\", + host="google.com", # ** + port=12345, + path=Path("/"), # ** + relative=False, + base="http://localhost\\@google.com:12345/", + summary="google.com/", + ), + ), +] for _uri, _parts in URI_COMPONENTS: - _parts['uri'] = _uri - if 'query' in _parts: _parts['qs'] = _parts['query'] - if 'host' in _parts: _parts['hostname'] = _parts['host'] + _parts["uri"] = _uri + if "query" in _parts: + _parts["qs"] = _parts["query"] + if "host" in _parts: + _parts["hostname"] = _parts["host"] @pytest.fixture def instance(): - return URI('http://user:pass@example.com/over/there?name=ferret#anchor') + return URI("http://user:pass@example.com/over/there?name=ferret#anchor") @pytest.fixture def empty(): - return URI('http://example.com/over/there') + return URI("http://example.com/over/there") -@pytest.mark.parametrize('string,attributes', URI_COMPONENTS) +@pytest.mark.parametrize("string,attributes", URI_COMPONENTS) class TestWhatwgURI: - def test_truthiness(self, string, attributes): - instance = URI(string) - assert instance - - def test_identity(self, string, attributes): - instance = URI(string) - assert str(instance) == attributes['uri'] - - def test_identity_bytes(self, string, attributes): - instance = URI(string) - assert bytes(instance) == attributes['uri'].encode('utf-8') - - def test_identity_comparison(self, string, attributes): - instance = URI(string) - assert instance == attributes['uri'] - - def test_inverse_bad_comparison(self, string, attributes): - instance = URI(string) - assert instance != "fnord" - - def test_length(self, string, attributes): - instance = URI(string) - assert len(instance) == len(string) - - @pytest.mark.parametrize('component', URI.__all_parts__ | {'base', 'qs', 'summary', 'relative'}) - def test_component(self, string, attributes, component): - instance = URI(string) - value = getattr(instance, component, SENTINEL) - - if component not in attributes: - assert value in (None, SENTINEL, '') - return - - assert value == attributes[component] + def test_truthiness(self, string, attributes): + instance = URI(string) + assert instance + + def test_identity(self, string, attributes): + instance = URI(string) + assert str(instance) == attributes["uri"] + + def test_identity_bytes(self, string, attributes): + instance = URI(string) + assert bytes(instance) == attributes["uri"].encode("utf-8") + + def test_identity_comparison(self, string, attributes): + instance = URI(string) + assert instance == attributes["uri"] + + def test_inverse_bad_comparison(self, string, attributes): + instance = URI(string) + assert instance != "fnord" + + def test_length(self, string, attributes): + instance = URI(string) + assert len(instance) == len(string) + + @pytest.mark.parametrize("component", URI.__all_parts__ | {"base", "qs", "summary", "relative"}) + def test_component(self, string, attributes, component): + instance = URI(string) + value = getattr(instance, component, SENTINEL) + + if component not in attributes: + assert value in (None, SENTINEL, "") + return + + assert value == attributes[component] class TestWhatwgURL: - def test_url_scheme(self): - url = URI('http://www.google.com:443/') # ** - url.scheme = 'https' - - assert url.scheme == 'https' - assert url.port == 443 # ** Whatwg-URL clears port on scheme changes. Unsure why. Only if default? - assert str(url) == 'https://www.google.com:443/' # ** We do not elide default port numbers. - - def test_url_host(self): - url = URI("https://www.google.com") - url.hostname = "example.com" - - assert url.hostname == "example.com" - assert str(url) == "https://example.com/" - - def test_url_port(self): - url = URI("https://www.example.com") - url.port = 123 - - assert url.port == 123 - assert url.host == "www.example.com" # ** We do not include port number in host name. - assert url.authority == "www.example.com:123" # It is includes in the authority, however. - assert str(url) == "https://www.example.com:123/" - - url.port = 443 - - assert url.port == 443 # ** Similarly, we don't treat default assignments as None assignments. - assert url.host == "www.example.com" - assert str(url) == "https://www.example.com:443/" # ** - - def test_relative_url_with_url_contained(self, instance): - url = URI('https://www.google.com').resolve('/redirect?target=http://localhost:61020/') - - assert url.scheme == 'https' - assert url.host == 'www.google.com' - assert url.path == Path('/redirect') - assert str(url.query) == "target=http%3A//localhost%3A61020/" # ** We automatically encode and correct. - - def test_url_user_info(self): - url = URI("https://github.com") - url.user = "username" - - assert url.username == "username" - assert url.password is None - assert str(url) == "https://username@github.com/" - - url.password = "password" - - assert url.username == "username" - assert url.password == "password" - assert str(url) == "https://username:password@github.com/" - - url.username = None - - assert url.username is None - assert url.password == "password" - assert str(url) == "https://:password@github.com/" - - url.password = None - - assert url.username is None - assert url.password is None - assert str(url) == "https://github.com/" - - def test_url_query(self): - url = URI("https://www.google.com") - url.query = "a=1" # ** Don't include the prefix yourself. - - assert url.qs == "a=1" - assert str(url) == "https://www.google.com/?a=1" - - url.query = "" - - assert url.query == "" - assert str(url) == "https://www.google.com/" # ** If empty or None, we do not emit the separator. - - url.query = None - - assert not url.query # ** It isn't literally None, but it is falsy if omitted or empty. - assert str(url) == "https://www.google.com/" - # The above is due to the fact that `.query` returns a rich, dict-like object which permits mutation. - # Assigning None just clears this mutable structure. - - def test_url_fragment(self): - url = URI("https://www.google.com") - url.fragment = "abc" - - assert url.fragment == "abc" - assert str(url) == "https://www.google.com/#abc" - - url.fragment = "" - - assert url.fragment == "" - assert str(url) == "https://www.google.com/" # ** None and an empty string are both interpreted as "none". - - url.fragment = None - - assert url.fragment is None - assert str(url) == "https://www.google.com/" - - def test_url_origin(self): # ** Not _entirely_ the same, as the components come back recombined, not as a tuple. - url = URI("https://www.google.com") - assert url.origin == "https://www.google.com" - - @pytest.mark.xfail(reason="Need to look into definition of 'origin' for URI generally.") - def test_url_blob_origin(self): - url = URI("blob:https://www.google.com") - - assert url.origin == URI("https://www.google.com").origin - - -@pytest.mark.parametrize('url', [ - "https://www.google.com/", - "http://user:pass@www.example.com/", - "http://:pass@www.example.com/", - "http://user@www.example.com/", - "http://www.example.com:432/", - "http://www.example.com/?a=1;B=c", - "http://www.example.com/#Fragment", - "http://username:password@www.example.com:1234/?query=string#fragment", - ]) -@pytest.mark.parametrize('attr', ['netloc', 'hostname', 'port', 'path', 'query', 'fragment', 'username', 'password']) + def test_url_scheme(self): + url = URI("http://www.google.com:443/") # ** + url.scheme = "https" + + assert url.scheme == "https" + assert url.port == 443 # ** Whatwg-URL clears port on scheme changes. Unsure why. Only if default? + assert str(url) == "https://www.google.com:443/" # ** We do not elide default port numbers. + + def test_url_host(self): + url = URI("https://www.google.com") + url.hostname = "example.com" + + assert url.hostname == "example.com" + assert str(url) == "https://example.com/" + + def test_url_port(self): + url = URI("https://www.example.com") + url.port = 123 + + assert url.port == 123 + assert url.host == "www.example.com" # ** We do not include port number in host name. + assert url.authority == "www.example.com:123" # It is includes in the authority, however. + assert str(url) == "https://www.example.com:123/" + + url.port = 443 + + assert url.port == 443 # ** Similarly, we don't treat default assignments as None assignments. + assert url.host == "www.example.com" + assert str(url) == "https://www.example.com:443/" # ** + + def test_relative_url_with_url_contained(self, instance): + url = URI("https://www.google.com").resolve("/redirect?target=http://localhost:61020/") + + assert url.scheme == "https" + assert url.host == "www.google.com" + assert url.path == Path("/redirect") + assert str(url.query) == "target=http%3A//localhost%3A61020/" # ** We automatically encode and correct. + + def test_url_user_info(self): + url = URI("https://github.com") + url.user = "username" + + assert url.username == "username" + assert url.password is None + assert str(url) == "https://username@github.com/" + + url.password = "password" + + assert url.username == "username" + assert url.password == "password" + assert str(url) == "https://username:password@github.com/" + + url.username = None + + assert url.username is None + assert url.password == "password" + assert str(url) == "https://:password@github.com/" + + url.password = None + + assert url.username is None + assert url.password is None + assert str(url) == "https://github.com/" + + def test_url_query(self): + url = URI("https://www.google.com") + url.query = "a=1" # ** Don't include the prefix yourself. + + assert url.qs == "a=1" + assert str(url) == "https://www.google.com/?a=1" + + url.query = "" + + assert url.query == "" + assert str(url) == "https://www.google.com/" # ** If empty or None, we do not emit the separator. + + url.query = None + + assert not url.query # ** It isn't literally None, but it is falsy if omitted or empty. + assert str(url) == "https://www.google.com/" + # The above is due to the fact that `.query` returns a rich, dict-like object which permits mutation. + # Assigning None just clears this mutable structure. + + def test_url_fragment(self): + url = URI("https://www.google.com") + url.fragment = "abc" + + assert url.fragment == "abc" + assert str(url) == "https://www.google.com/#abc" + + url.fragment = "" + + assert url.fragment == "" + assert str(url) == "https://www.google.com/" # ** None and an empty string are both interpreted as "none". + + url.fragment = None + + assert url.fragment is None + assert str(url) == "https://www.google.com/" + + def test_url_origin(self): # ** Not _entirely_ the same, as the components come back recombined, not as a tuple. + url = URI("https://www.google.com") + assert url.origin == "https://www.google.com" + + @pytest.mark.xfail(reason="Need to look into definition of 'origin' for URI generally.") + def test_url_blob_origin(self): + url = URI("blob:https://www.google.com") + + assert url.origin == URI("https://www.google.com").origin + + +@pytest.mark.parametrize( + "url", + [ + "https://www.google.com/", + "http://user:pass@www.example.com/", + "http://:pass@www.example.com/", + "http://user@www.example.com/", + "http://www.example.com:432/", + "http://www.example.com/?a=1;B=c", + "http://www.example.com/#Fragment", + "http://username:password@www.example.com:1234/?query=string#fragment", + ], +) +@pytest.mark.parametrize("attr", ["netloc", "hostname", "port", "path", "query", "fragment", "username", "password"]) def test_assert_same_urlparse_result(url, attr): - urllib = urlparse(url) - uri = URI(url) - - urllib_value = getattr(urllib, attr) - uri_value = getattr(uri, attr) - - if urllib_value == "" and uri_value is None: - pytest.xfail("URI uses None where urllib uses empty strings") - - elif isinstance(uri_value, Path): - assert urllib_value == str(uri_value) # First, ensure the string versions are equal... - pytest.xfail("URI uses rich Path objects where urllib uses strings, which compared OK") - - assert urllib_value == uri_value - - -@pytest.mark.parametrize(('base', 'href', 'expected'), [ - ("http://www.google.com/", "", "http://www.google.com/"), - ("http://www.google.com/", "/", "http://www.google.com/"), - ("http://www.google.com/", "maps/", "http://www.google.com/maps/"), - ("http://www.google.com/", "one/two/", "http://www.google.com/one/two/"), - ("http://www.google.com/mail", "/maps/", "http://www.google.com/maps/"), - ("http://www.google.com/", "./", "http://www.google.com/"), - ("http://www.google.com/maps", "..", "http://www.google.com/"), - ("http://www.google.com/", "https://www.google.com/", "https://www.google.com/"), - ("http://www.google.com/", "https://maps.google.com/", "https://maps.google.com/"), - ("https://www.google.com/", "https://www.google.com:1234/", "https://www.google.com:1234/"), - ("https://www.google.com/", "?query=string", "https://www.google.com/?query=string"), - ("https://www.google.com/", "#fragment", "https://www.google.com/#fragment"), - ("http://www.google.com/", "http://user:pass@www.google.com/", "http://user:pass@www.google.com/"), - ("http://www.google.com/", "http://user@www.google.com/", "http://user@www.google.com/"), - ("http://www.google.com/", "http://:pass@www.google.com/", "http://:pass@www.google.com/"), - ]) + urllib = urlparse(url) + uri = URI(url) + + urllib_value = getattr(urllib, attr) + uri_value = getattr(uri, attr) + + if urllib_value == "" and uri_value is None: + pytest.xfail("URI uses None where urllib uses empty strings") + + elif isinstance(uri_value, Path): + assert urllib_value == str(uri_value) # First, ensure the string versions are equal... + pytest.xfail("URI uses rich Path objects where urllib uses strings, which compared OK") + + assert urllib_value == uri_value + + +@pytest.mark.parametrize( + ("base", "href", "expected"), + [ + ("http://www.google.com/", "", "http://www.google.com/"), + ("http://www.google.com/", "/", "http://www.google.com/"), + ("http://www.google.com/", "maps/", "http://www.google.com/maps/"), + ("http://www.google.com/", "one/two/", "http://www.google.com/one/two/"), + ("http://www.google.com/mail", "/maps/", "http://www.google.com/maps/"), + ("http://www.google.com/", "./", "http://www.google.com/"), + ("http://www.google.com/maps", "..", "http://www.google.com/"), + ("http://www.google.com/", "https://www.google.com/", "https://www.google.com/"), + ("http://www.google.com/", "https://maps.google.com/", "https://maps.google.com/"), + ("https://www.google.com/", "https://www.google.com:1234/", "https://www.google.com:1234/"), + ("https://www.google.com/", "?query=string", "https://www.google.com/?query=string"), + ("https://www.google.com/", "#fragment", "https://www.google.com/#fragment"), + ("http://www.google.com/", "http://user:pass@www.google.com/", "http://user:pass@www.google.com/"), + ("http://www.google.com/", "http://user@www.google.com/", "http://user@www.google.com/"), + ("http://www.google.com/", "http://:pass@www.google.com/", "http://:pass@www.google.com/"), + ], +) def test_assert_same_urljoin_result(base, href, expected): - urllib = urljoin(base, href) - uri_resolve = URI(base).resolve(href) - uri_division = str(URI(base) / href) - - assert urllib == uri_resolve == uri_division == expected + urllib = urljoin(base, href) + uri_resolve = URI(base).resolve(href) + uri_division = str(URI(base) / href) + + assert urllib == uri_resolve == uri_division == expected diff --git a/uri/__init__.py b/uri/__init__.py index 24d73b1..ce6a1d6 100644 --- a/uri/__init__.py +++ b/uri/__init__.py @@ -13,14 +13,15 @@ from .uri import URI # The primary class exposed by this package to represent a URL or URI. try: # Discover installed package metadata... - _package = _metadata('uri') - __version__ = ", ".join(_package.get_all('version')) - __author__ = "\n".join(_package.get_all('author-email')) + _package = _metadata("uri") + __version__ = ", ".join(_package.get_all("version")) + __author__ = "\n".join(_package.get_all("author-email")) except _NotFound: # ...or generate "local development" version and author information. - __version__ = 'dev' - __author__ = f"Local Development <{getlogin()}@{gethostname()}>" + __version__ = "dev" + __author__ = f"Local Development <{getlogin()}@{gethostname()}>" __license__ = "MIT" -__all__ = list(set(i for i in locals() if not i.startswith('_')) - set(__all__)) # Declare module exports for `import *` use. - +__all__ = list( + set(i for i in locals() if not i.startswith("_")) - set(__all__) +) # Declare module exports for `import *` use. diff --git a/uri/bucket.py b/uri/bucket.py index 6614248..9d035c9 100644 --- a/uri/bucket.py +++ b/uri/bucket.py @@ -2,62 +2,61 @@ class Bucket: - """A bucket is a mutable container for an optionally named scalar value.""" - - __slots__ = ('name', 'value', 'sep', 'valid') - - def __init__(self, name, value='', sep="=", strict=False): - self.valid = True - self.sep = sep - - if not value: - if isinstance(name, str): - if name.count(sep) > 1: - if strict: raise ValueError(f"Multiple occurrences of separator {sep!r} in: '{name}'") - self.valid = False - - name, value = self.split(name) - - elif isinstance(name, Bucket): - name, value = name.name, name.value - - else: - name, value = name - - self.name = name - self.value = value - - def __eq__(self, other): - return str(self) == str(other) - - def __ne__(self, other): - return not str(self) == str(other) - - def split(self, string): - name, match, value = string.partition(self.sep) - - name = unquote_plus(name) - value = unquote_plus(value) - - return name if match else None, value if match else name - - def __repr__(self): - return "{}({})".format( - self.__class__.__name__, - str(self) - ) - - def __iter__(self): - if self.name is not None: # XXX: Confirm that empty string is permissible. - yield self.name - - yield self.value - - def __len__(self): - return 1 if self.name is None else 2 - - def __str__(self): - # Certain symbols are explicitly allowed, ref: http://pretty-rfc.herokuapp.com/RFC3986#query - iterator = (quote_plus(i.encode('utf8')).replace('%3F', '?').replace('%2F', '/') for i in self) if self.valid else self - return self.sep.join(iterator) + """A bucket is a mutable container for an optionally named scalar value.""" + __slots__ = ("name", "value", "sep", "valid") + + def __init__(self, name, value="", sep="=", strict=False): + self.valid = True + self.sep = sep + + if not value: + if isinstance(name, str): + if name.count(sep) > 1: + if strict: + raise ValueError(f"Multiple occurrences of separator {sep!r} in: '{name}'") + self.valid = False + + name, value = self.split(name) + + elif isinstance(name, Bucket): + name, value = name.name, name.value + + else: + name, value = name + + self.name = name + self.value = value + + def __eq__(self, other): + return str(self) == str(other) + + def __ne__(self, other): + return not str(self) == str(other) + + def split(self, string): + name, match, value = string.partition(self.sep) + + name = unquote_plus(name) + value = unquote_plus(value) + + return name if match else None, value if match else name + + def __repr__(self): + return "{}({})".format(self.__class__.__name__, str(self)) + + def __iter__(self): + if self.name is not None: # XXX: Confirm that empty string is permissible. + yield self.name + + yield self.value + + def __len__(self): + return 1 if self.name is None else 2 + + def __str__(self): + # Certain symbols are explicitly allowed, ref: http://pretty-rfc.herokuapp.com/RFC3986#query + iterator = ( + (quote_plus(i.encode("utf8")).replace("%3F", "?").replace("%2F", "/") for i in self) if self.valid else self + ) + return self.sep.join(iterator) diff --git a/uri/parse/db.py b/uri/parse/db.py index 029935c..4f663fe 100644 --- a/uri/parse/db.py +++ b/uri/parse/db.py @@ -5,32 +5,34 @@ from .. import URI -def parse_dburi(url:str, uppercase:bool=False) -> dict: - """Parse a given URL or URI string and return the component parts relevant for database connectivity. - - These come in the general UNIX form: - - engine://[user:pass@]host[:port]/database[?options] - """ - - uri = URI(url) - - parts = { - 'engine': str(uri.scheme), - 'name': uri.path.parts[0], - 'host': uri.host, - 'user': uri.user, - 'password': uri.password, - 'port': uri.port, - 'options': uri.query, - } - - if not uri.scheme: del parts['engine'] # Parity with dj-mongohq-url - - if ',' in parts['host']: - parts['hosts'] = [i.strip() for i in parts.pop('host').split(',')] - - if uppercase: - for k in list(parts): parts[k.upper()] = parts.pop(k) - - return parts +def parse_dburi(url: str, uppercase: bool = False) -> dict: + """Parse a given URL or URI string and return the component parts relevant for database connectivity. + + These come in the general UNIX form: + + engine://[user:pass@]host[:port]/database[?options] + """ + + uri = URI(url) + + parts = { + "engine": str(uri.scheme), + "name": uri.path.parts[0], + "host": uri.host, + "user": uri.user, + "password": uri.password, + "port": uri.port, + "options": uri.query, + } + + if not uri.scheme: + del parts["engine"] # Parity with dj-mongohq-url + + if "," in parts["host"]: + parts["hosts"] = [i.strip() for i in parts.pop("host").split(",")] + + if uppercase: + for k in list(parts): + parts[k.upper()] = parts.pop(k) + + return parts diff --git a/uri/part/auth.py b/uri/part/auth.py index b3c1a4a..72f5997 100644 --- a/uri/part/auth.py +++ b/uri/part/auth.py @@ -2,14 +2,14 @@ class AuthenticationPart(GroupPart): - __slots__ = () - - attributes = ('user', 'password') - suffix = '@' + __slots__ = () + + attributes = ("user", "password") + suffix = "@" class SafeAuthenticationPart(ProxyPart): - __slots__ = () - - attribute = 'user' - suffix = '@' + __slots__ = () + + attribute = "user" + suffix = "@" diff --git a/uri/part/authority.py b/uri/part/authority.py index 98da044..5d10e24 100644 --- a/uri/part/authority.py +++ b/uri/part/authority.py @@ -2,6 +2,6 @@ class AuthorityPart(GroupPart): - __slots__ = () - - attributes = ('auth', 'host', 'port') + __slots__ = () + + attributes = ("auth", "host", "port") diff --git a/uri/part/base.py b/uri/part/base.py index 80f2f6f..0bedd7d 100644 --- a/uri/part/base.py +++ b/uri/part/base.py @@ -6,69 +6,72 @@ class Part: - """Descriptor protocol objects for combinatorial string parts with validation.""" - - __slots__: Tuple[str, ...] = () - - valid: Pattern = r(r'.*') - prefix: str = '' - suffix: str = '' - empty: str = '' - - def render(self, obj, value:Optional[Stringy], raw:bool=False) -> str: - if not value: return self.empty - return self.prefix + str(value) + self.suffix - - @abstractmethod - def __get__(self, obj, cls:Optional[type]=None): - pass - - @abstractmethod - def __set__(self, obj, value) -> None: - pass + """Descriptor protocol objects for combinatorial string parts with validation.""" + + __slots__: Tuple[str, ...] = () + + valid: Pattern = r(r".*") + prefix: str = "" + suffix: str = "" + empty: str = "" + + def render(self, obj, value: Optional[Stringy], raw: bool = False) -> str: + if not value: + return self.empty + return self.prefix + str(value) + self.suffix + + @abstractmethod + def __get__(self, obj, cls: Optional[type] = None): + pass + + @abstractmethod + def __set__(self, obj, value) -> None: + pass class ProxyPart(Part): - __slots__: Tuple[str, ...] = () - - attribute: str - cast: Callable[[Any], str] = str - - def __get__(self, obj, cls=None) -> Union[str, 'ProxyPart']: - if obj is None: return self - return getattr(obj, self.attribute) - - def __set__(self, obj, value:Optional[Stringy]) -> None: - if value == b'': - value = None - - if value is not None: - value = self.cast(value) - - setattr(obj, self.attribute, value) + __slots__: Tuple[str, ...] = () + + attribute: str + cast: Callable[[Any], str] = str + + def __get__(self, obj, cls=None) -> Union[str, "ProxyPart"]: + if obj is None: + return self + return getattr(obj, self.attribute) + + def __set__(self, obj, value: Optional[Stringy]) -> None: + if value == b"": + value = None + + if value is not None: + value = self.cast(value) + + setattr(obj, self.attribute, value) class GroupPart(Part): - __slots__: Tuple[str, ...] = () - - attributes: Iterable[str] = () - sep: str = '' - - def __get__(self, obj, cls:Optional[type]=None) -> Union[str, 'GroupPart']: - if obj is None: return self - - cls = obj.__class__ - attrs = (getattr(cls, attr).render for attr in self.attributes) - values = (getattr(obj, attr) for attr in self.attributes) - pipeline = (attr(obj, value) for attr, value in zip(attrs, values)) - - return self.sep.join(i for i in pipeline if i) - - def __set__(self, obj, value): - raise TypeError("{0.__class__.__name__} is not assignable.".format(self)) + __slots__: Tuple[str, ...] = () + + attributes: Iterable[str] = () + sep: str = "" + + def __get__(self, obj, cls: Optional[type] = None) -> Union[str, "GroupPart"]: + if obj is None: + return self + + cls = obj.__class__ + attrs = (getattr(cls, attr).render for attr in self.attributes) + values = (getattr(obj, attr) for attr in self.attributes) + pipeline = (attr(obj, value) for attr, value in zip(attrs, values)) + + return self.sep.join(i for i in pipeline if i) + + def __set__(self, obj, value): + raise TypeError("{0.__class__.__name__} is not assignable.".format(self)) class BasePart(GroupPart): - __slots__: Tuple[str, ...] = () - - attributes: Tuple[str, ...] = ('scheme', 'heirarchical') + __slots__: Tuple[str, ...] = () + + attributes: Tuple[str, ...] = ("scheme", "heirarchical") diff --git a/uri/part/fragment.py b/uri/part/fragment.py index 4129293..da80def 100644 --- a/uri/part/fragment.py +++ b/uri/part/fragment.py @@ -2,7 +2,7 @@ class FragmentPart(ProxyPart): - __slots__ = () - - attribute = '_fragment' - prefix = '#' + __slots__ = () + + attribute = "_fragment" + prefix = "#" diff --git a/uri/part/heir.py b/uri/part/heir.py index c303a81..5aa7d79 100644 --- a/uri/part/heir.py +++ b/uri/part/heir.py @@ -2,6 +2,6 @@ class HeirarchicalPart(GroupPart): - __slots__ = () - - attributes = ('auth', 'host', 'port', 'path') + __slots__ = () + + attributes = ("auth", "host", "port", "path") diff --git a/uri/part/host.py b/uri/part/host.py index bc865ef..69b24a8 100644 --- a/uri/part/host.py +++ b/uri/part/host.py @@ -6,43 +6,43 @@ class HostPart(ProxyPart): - __slots__ = () - - attribute = '_host' - - def cast(self, value:str) -> str: - value = value.rstrip('.') # Remove extraneous "DNS root authority" notation. - - if value.startswith('xn--'): # Process IDNA - internationalized domain names. - value = value.encode('ascii').decode('idna') - - return value - - def render(self, obj, value, raw:bool=False) -> str: - result = super().render(obj, value) - - if result: - try: - if not raw: - result.encode('ascii') - except UnicodeEncodeError: - result = result.encode('idna').decode('ascii') - - try: # Identify and armour IPv6 address literals. - inet_pton(AF_INET6, value) - except SocketError: - pass - else: - result = '[' + result + ']' - - return result - - def __set__(self, obj, value): - if isinstance(value, bytes): - value = value.decode('idna') - elif value.startswith('xn--'): - value = value.encode('ascii').decode('idna') - - value = value.lower().rstrip('.') - - super().__set__(obj, value) + __slots__ = () + + attribute = "_host" + + def cast(self, value: str) -> str: + value = value.rstrip(".") # Remove extraneous "DNS root authority" notation. + + if value.startswith("xn--"): # Process IDNA - internationalized domain names. + value = value.encode("ascii").decode("idna") + + return value + + def render(self, obj, value, raw: bool = False) -> str: + result = super().render(obj, value) + + if result: + try: + if not raw: + result.encode("ascii") + except UnicodeEncodeError: + result = result.encode("idna").decode("ascii") + + try: # Identify and armour IPv6 address literals. + inet_pton(AF_INET6, value) + except SocketError: + pass + else: + result = "[" + result + "]" + + return result + + def __set__(self, obj, value): + if isinstance(value, bytes): + value = value.decode("idna") + elif value.startswith("xn--"): + value = value.encode("ascii").decode("idna") + + value = value.lower().rstrip(".") + + super().__set__(obj, value) diff --git a/uri/part/password.py b/uri/part/password.py index 8adec4b..7afa472 100644 --- a/uri/part/password.py +++ b/uri/part/password.py @@ -2,7 +2,7 @@ class PasswordPart(ProxyPart): - __slots__ = () - - attribute = '_password' - prefix = ':' + __slots__ = () + + attribute = "_password" + prefix = ":" diff --git a/uri/part/path.py b/uri/part/path.py index f80b1c5..94aa38e 100644 --- a/uri/part/path.py +++ b/uri/part/path.py @@ -5,40 +5,40 @@ class PathPart(ProxyPart): - __slots__ = () - - attribute = '_path' - cast = Path - empty = '/' - - def __get__(self, obj, cls=None): - value = super(PathPart, self).__get__(obj, cls) - - if value is None: - value = Path() - obj._trailing = False - - return value - - def __set__(self, obj, value): - value = str(value) - obj._trailing = value.endswith('/') - - if obj.authority and not value.startswith('/'): - raise ValueError("Can only assign rooted paths to URI with authority.") - - super(PathPart, self).__set__(obj, value) - - def render(self, obj, value, raw=False): - result = super(PathPart, self).render(obj, value, raw) - - if result is None or result == '.': - if not obj._host: - return '' - - return self.empty - - if obj._trailing and not result.endswith('/'): - result += '/' - - return result + __slots__ = () + + attribute = "_path" + cast = Path + empty = "/" + + def __get__(self, obj, cls=None): + value = super(PathPart, self).__get__(obj, cls) + + if value is None: + value = Path() + obj._trailing = False + + return value + + def __set__(self, obj, value): + value = str(value) + obj._trailing = value.endswith("/") + + if obj.authority and not value.startswith("/"): + raise ValueError("Can only assign rooted paths to URI with authority.") + + super(PathPart, self).__set__(obj, value) + + def render(self, obj, value, raw=False): + result = super(PathPart, self).render(obj, value, raw) + + if result is None or result == ".": + if not obj._host: + return "" + + return self.empty + + if obj._trailing and not result.endswith("/"): + result += "/" + + return result diff --git a/uri/part/port.py b/uri/part/port.py index 985f646..81197db 100644 --- a/uri/part/port.py +++ b/uri/part/port.py @@ -2,8 +2,8 @@ class PortPart(ProxyPart): - __slots__ = () - - attribute = '_port' - prefix = ':' - cast = int + __slots__ = () + + attribute = "_port" + prefix = ":" + cast = int diff --git a/uri/part/query.py b/uri/part/query.py index c88bfbe..38475ad 100644 --- a/uri/part/query.py +++ b/uri/part/query.py @@ -9,23 +9,23 @@ class QueryPart(ProxyPart): - __slots__ = () - - attribute = '_query' - prefix = '?' - terminator = '#' - cast = QSO - - def __get__(self, obj, cls=None) -> QSO: - result = super(QueryPart, self).__get__(obj, cls) - - if result is None: - result = obj._query = QSO() - - return result - - def __set__(self, obj, value:QSOLike): - if value is None: - value = '' - - super(QueryPart, self).__set__(obj, value) + __slots__ = () + + attribute = "_query" + prefix = "?" + terminator = "#" + cast = QSO + + def __get__(self, obj, cls=None) -> QSO: + result = super(QueryPart, self).__get__(obj, cls) + + if result is None: + result = obj._query = QSO() + + return result + + def __set__(self, obj, value: QSOLike): + if value is None: + value = "" + + super(QueryPart, self).__set__(obj, value) diff --git a/uri/part/scheme.py b/uri/part/scheme.py index 00ef045..78d389a 100644 --- a/uri/part/scheme.py +++ b/uri/part/scheme.py @@ -1,5 +1,5 @@ from importlib.metadata import entry_points -from typing import Any, ClassVar, Dict, Optional, Union #, Self +from typing import Any, ClassVar, Dict, Optional, Union # , Self from re import compile as r, Pattern from .base import Part @@ -7,56 +7,61 @@ class SchemePart(Part): - __slots__: tuple = () # Do not populate a __dict__ dictionary attribute; only allocate space for these. - - registry: ClassVar[Dict[str, Optional[Scheme]]] = {'': None} # Singleton cache of Scheme instances, by name. - suffix: str = ':' # Protocol suffix when utilized as part of a complete URI; e.g. ':' or '://'. - valid: Pattern = r(r'[a-z][a-z0-9+.+-]*') # Protocol/scheme name validated when run without optimization. - - def load(self, plugin:str) -> Scheme: - """Attempt to retrieve a Scheme for the given named protocol. - - Utilizes a cache, which results in URI utilizing singletons of each named protocol. - """ - - assert self.valid.match(plugin), f"Invalid plugin name: {plugin!r}" - if plugin in self.registry: return self.registry[plugin] # Short circuit if we've seen this before. - - # If we haven't, attempt to load the explicit Scheme subclass to utilize for this named scheme. - try: result = entry_points(group='uri.scheme')[plugin].load() - except KeyError: result = Scheme(plugin) # Can't look up by registered name? It's generic. - else: result = result(plugin) # Otherwise, instantiate the subclass, informing it of its name. - - self.registry[plugin] = result # Record the instance in a local registry / cache and return it. - - return result - - def render(self, obj, value, raw=False): - """Render the scheme component of a whole URI.""" - result = super(SchemePart, self).render(obj, value, raw) - - if obj._scheme and obj.scheme.slashed: - result = result + '//' - - elif not obj._scheme and obj.authority: - result = '//' - - return result - - def __get__(self, obj:Any, cls:Optional[type]=None) -> Optional[Union['SchemePart', Scheme]]: - """Accessed as a class attribute, return this instance, otherwise decant a Scheme from the containing object.""" - - if obj is None: return self - return None if obj._scheme is None else self.load(obj._scheme) - - def __set__(self, obj:Any, value:Optional[Union[str,bytes]]) -> None: - """Assign a new named scheme to this URI.""" - - if isinstance(value, bytes): - value = value.decode('ascii') - - if not value: - obj._scheme = None - return - - obj._scheme = self.load(value).name # This gives the plugin registry a chance to normalize the recorded name. + __slots__: tuple = () # Do not populate a __dict__ dictionary attribute; only allocate space for these. + + registry: ClassVar[Dict[str, Optional[Scheme]]] = {"": None} # Singleton cache of Scheme instances, by name. + suffix: str = ":" # Protocol suffix when utilized as part of a complete URI; e.g. ':' or '://'. + valid: Pattern = r(r"[a-z][a-z0-9+.+-]*") # Protocol/scheme name validated when run without optimization. + + def load(self, plugin: str) -> Scheme: + """Attempt to retrieve a Scheme for the given named protocol. + + Utilizes a cache, which results in URI utilizing singletons of each named protocol. + """ + + assert self.valid.match(plugin), f"Invalid plugin name: {plugin!r}" + if plugin in self.registry: + return self.registry[plugin] # Short circuit if we've seen this before. + + # If we haven't, attempt to load the explicit Scheme subclass to utilize for this named scheme. + try: + result = entry_points(group="uri.scheme")[plugin].load() + except KeyError: + result = Scheme(plugin) # Can't look up by registered name? It's generic. + else: + result = result(plugin) # Otherwise, instantiate the subclass, informing it of its name. + + self.registry[plugin] = result # Record the instance in a local registry / cache and return it. + + return result + + def render(self, obj, value, raw=False): + """Render the scheme component of a whole URI.""" + result = super(SchemePart, self).render(obj, value, raw) + + if obj._scheme and obj.scheme.slashed: + result = result + "//" + + elif not obj._scheme and obj.authority: + result = "//" + + return result + + def __get__(self, obj: Any, cls: Optional[type] = None) -> Optional[Union["SchemePart", Scheme]]: + """Accessed as a class attribute, return this instance, otherwise decant a Scheme from the containing object.""" + + if obj is None: + return self + return None if obj._scheme is None else self.load(obj._scheme) + + def __set__(self, obj: Any, value: Optional[Union[str, bytes]]) -> None: + """Assign a new named scheme to this URI.""" + + if isinstance(value, bytes): + value = value.decode("ascii") + + if not value: + obj._scheme = None + return + + obj._scheme = self.load(value).name # This gives the plugin registry a chance to normalize the recorded name. diff --git a/uri/part/uri.py b/uri/part/uri.py index 06c0ad4..b673064 100644 --- a/uri/part/uri.py +++ b/uri/part/uri.py @@ -4,39 +4,39 @@ class URIPart: - __slots__ = ('parts', 'writeable', 'raw') - - def __init__(self, parts, writeable=True, raw=False): - self.parts = parts - self.writeable = writeable - self.raw = raw - - def __get__(self, obj, cls=None): - components = [] - - for part in self.parts: - value = getattr(obj, part) - part = getattr(cls, part) - - components.append(part.render(obj, value, self.raw)) - - return "".join(components) - - def __set__(self, obj, value): - if not self.writeable: - raise AttributeError("Can not assign to read-only URI views.") - - for part in obj.__slots__: - setattr(obj, part, None) - - if not value: - return - - result = urlsplit(str(value)) - - obj._trailing = result.path.endswith('/') - - for part in ('scheme', 'username', 'password', 'hostname', 'port', 'path', 'query', 'fragment'): - pvalue = getattr(result, part) - if pvalue: - setattr(obj, part, pvalue) + __slots__ = ("parts", "writeable", "raw") + + def __init__(self, parts, writeable=True, raw=False): + self.parts = parts + self.writeable = writeable + self.raw = raw + + def __get__(self, obj, cls=None): + components = [] + + for part in self.parts: + value = getattr(obj, part) + part = getattr(cls, part) + + components.append(part.render(obj, value, self.raw)) + + return "".join(components) + + def __set__(self, obj, value): + if not self.writeable: + raise AttributeError("Can not assign to read-only URI views.") + + for part in obj.__slots__: + setattr(obj, part, None) + + if not value: + return + + result = urlsplit(str(value)) + + obj._trailing = result.path.endswith("/") + + for part in ("scheme", "username", "password", "hostname", "port", "path", "query", "fragment"): + pvalue = getattr(result, part) + if pvalue: + setattr(obj, part, pvalue) diff --git a/uri/part/user.py b/uri/part/user.py index fdb256e..576f03d 100644 --- a/uri/part/user.py +++ b/uri/part/user.py @@ -2,6 +2,6 @@ class UserPart(ProxyPart): - __slots__ = () - - attribute = '_user' + __slots__ = () + + attribute = "_user" diff --git a/uri/qso.py b/uri/qso.py index 6142bec..68fa80e 100644 --- a/uri/qso.py +++ b/uri/qso.py @@ -8,288 +8,289 @@ class QSO: - """A representation of a query string or parameter list. - - Acts as an ordered list of bucketed values, optionally with associated key names. Values are retrievable by index - or by name. In the event of multiple values for a given name, a view of the associated values will be returned. - - Acting as both a list and dictionary may be... odd. In our case, because preserving order is the priority, most - methods that "conflict" between the two protocols favour list-like operation, using Bucket instances as the basic - unit of communication. This helps to preserve the original position if values are updated, as one benefit. - Dictionary-like view methods are provided if you want to "break it down", however, again to preserve order, - values are iterated in their original order and keys may be repeated. - """ - - __slots__ = ('buckets', 'groups', 'assignment', 'separator', 'strict') - - def _parts(self, thing): - if isinstance(thing, QSO): - return (str(part) for part in thing.buckets) - - if isinstance(thing, Bucket): - return (str(thing), ) - - if isinstance(thing, Mapping): - return thing.items() - - if isinstance(thing, str): - if self.separator in thing: - return thing.split(self.separator) - else: - return (thing, ) - - return iter(thing) - - def __init__(self, q=None, assignment="=", separator="&", strict=False): - self.buckets = [] - self.groups = {} - self.assignment = assignment - self.separator = separator - self.strict = strict - - if q: - self.extend(q) - - # Core Python Protocols - - def __repr__(self): - return '{}("{}")'.format(self.__class__.__name__, str(self)) - - def __str__(self): - return self.separator.join(str(bucket) for bucket in self.buckets) - - # ABC Protocol Methods - - def __contains__(self, value): # Container, Collection - """Test if a given key is set.""" - - if isinstance(value, int): - return 0 <= value < len(self.buckets) - - return value in self.groups - - def __iter__(self): # Iterable, Collection - """Iterate the individual buckets.""" - - return iter(self.buckets) - - def __len__(self): # Sized, Collection - """The number of assigned buckets.""" - - return len(self.buckets) - - def __reversed__(self): # Reversible - """Iterate individual buckets, backwards.""" - - return reversed(self.buckets) - - def __getitem__(self, index): # Sequence - """Look up a bucket or buckets by numeric index or key.""" - - if isinstance(index, int): - return self.buckets[index] - - group = self.groups[index] - - if len(group) == 1: - return group[0].value - - return (bucket.value for bucket in group) - - def __setitem__(self, index, value): # MutableSequence - """Assign a value or bucket to a given index, or set a value by key. - - If there are multiple values for a key, all will be removed and a new value appended. - """ - - value = Bucket(value, sep=self.assignment, strict=self.strict) - - if isinstance(index, int): - bucket = self.buckets[index] - - if value.name is not None: - bucket.name = value.name - - bucket.value = value.value - return - - value.name = value.name or index - buckets = self.groups.get(index) - - if buckets: - if len(buckets) == 1: - buckets[0].name = value.name - buckets[0].value = value.value - return - - for bucket in list(buckets): - self.remove(bucket) - - self.append(value) - - def __delitem__(self, item): # MutableSequence - """Remove a specific bucket, bucket by numeric index, or remove all buckets with the given key. - - >>> base = QSO("foo=27&bar&baz=42&bar&diz&name=ferret") - >>> del base['foo'] - >>> base - QSO("bar&baz=42&bar&diz&name=ferret") - - >>> del base[1] - >>> base - QSO("bar&bar&diz&name=ferret") - - >>> del base['bar'] - >>> base - QSO("diz&name=ferret") - - >>> del base[base.buckets[1]] - >>> base - QSO("diz") - """ - - if isinstance(item, int): - item = self.buckets[item] - - if isinstance(item, Bucket): - self.buckets.remove(item) - self.groups[item.name].remove(item) - - if not self.groups[item.name]: # Clean up after ourselves. - del self.groups[item.name] - - return - - for bucket in list(self.groups[item]): - del self[bucket] - - def __iadd__(self, other): # MutableSequence - """Extend a current set of arguments with another set. - - Allows for "addition" of a variety of things, as per `QSO.extend`: - - >>> base = QSO("foo=27") - >>> base += "bar" - >>> base += {'baz': "42"} - >>> base += ['bar', 'diz'] - >>> base += Bucket('name', 'ferret') - >>> base - QSO("foo=27&bar&baz=42&bar&diz&name=ferret") - """ - - self.extend(other) - return self - - def __eq__(self, other): # Mapping - return str(self) == str(other) - - def __ne__(self, other): # Mapping - return not (self == other) - - # ABC Public Methods - - def index(self, bucket, start=None, stop=None): # Sequence - bucket = Bucket(bucket, sep=self.assignment, strict=self.strict) - return self.buckets.index(bucket) - - def count(self, thing): # Sequence - if not self.buckets: - return 0 - - if thing in self.groups: - return len(self.groups[thing]) - - return self.groups.get(None, []).count(Bucket(thing, sep=self.assignment, strict=self.strict)) - - def append(self, bucket): # MutableSequence - bucket = Bucket(bucket, sep=self.assignment, strict=self.strict) - self.buckets.append(bucket) - self.groups.setdefault(bucket.name, []).append(bucket) - - def insert(self, index, value): # MutableSequence - if index < 0: # Allow insertions at end-relative positions. - index = len(self.buckets) + index - index = min(len(self.buckets), index) - - bucket = Bucket(value, sep=self.assignment, strict=self.strict) - - count = 0 - for i, b in enumerate(self.buckets): - if i >= index: break - if b.name == bucket.name: - count += 1 - - self.buckets.insert(index, bucket) - self.groups.setdefault(b.name, []).insert(count, bucket) - - def extend(self, *args): # MutableSequence - for parts in args: - for part in self._parts(parts): - self.append(part) - - def remove(self, bucket): # MutableSequence - del self[bucket] - - def pop(self, key=SENTINEL, default=SENTINEL): # MutableSequence, MutableMapping - if key is SENTINEL: - key = -1 - - if isinstance(key, int): - try: - bucket = self.buckets[key] - except IndexError: - if default is SENTINEL: - raise KeyError() - return default - - del self[bucket] - return bucket - - try: - bucket = self.groups[key].pop() - except KeyError: - if default is SENTINEL: - raise - return default - - self.buckets.remove(bucket) - return bucket.value - - def reverse(self): # MutableSequence - self.buckets.reverse() - - for group in self.groups.values(): - group.reverse() - - def keys(self): # Mapping - return (bucket.name for bucket in self.buckets) - - def items(self): # Mapping - return (tuple(bucket) for bucket in self.buckets) - - def values(self): # Mapping - return (bucket.value for bucket in self.buckets) - - def get(self, bucket, default=None): # Mapping - if bucket in self: - return self[bucket] - - return default - - def clear(self): # MutableMapping - """Clear all values from this query string object.""" - - del self.buckets[:] - self.groups.clear() - - def update(self, *args, **kw): # MutableMapping - for parts in args: - for bucket in self._parts(parts): - bucket = Bucket(bucket, sep=self.assignment, strict=self.strict) - self[bucket.name] = bucket.value - - for key in kw: - self[key] = kw[key] + """A representation of a query string or parameter list. + + Acts as an ordered list of bucketed values, optionally with associated key names. Values are retrievable by index + or by name. In the event of multiple values for a given name, a view of the associated values will be returned. + + Acting as both a list and dictionary may be... odd. In our case, because preserving order is the priority, most + methods that "conflict" between the two protocols favour list-like operation, using Bucket instances as the basic + unit of communication. This helps to preserve the original position if values are updated, as one benefit. + Dictionary-like view methods are provided if you want to "break it down", however, again to preserve order, + values are iterated in their original order and keys may be repeated. + """ + + __slots__ = ("buckets", "groups", "assignment", "separator", "strict") + + def _parts(self, thing): + if isinstance(thing, QSO): + return (str(part) for part in thing.buckets) + + if isinstance(thing, Bucket): + return (str(thing),) + + if isinstance(thing, Mapping): + return thing.items() + + if isinstance(thing, str): + if self.separator in thing: + return thing.split(self.separator) + else: + return (thing,) + + return iter(thing) + + def __init__(self, q=None, assignment="=", separator="&", strict=False): + self.buckets = [] + self.groups = {} + self.assignment = assignment + self.separator = separator + self.strict = strict + + if q: + self.extend(q) + + # Core Python Protocols + + def __repr__(self): + return '{}("{}")'.format(self.__class__.__name__, str(self)) + + def __str__(self): + return self.separator.join(str(bucket) for bucket in self.buckets) + + # ABC Protocol Methods + + def __contains__(self, value): # Container, Collection + """Test if a given key is set.""" + + if isinstance(value, int): + return 0 <= value < len(self.buckets) + + return value in self.groups + + def __iter__(self): # Iterable, Collection + """Iterate the individual buckets.""" + + return iter(self.buckets) + + def __len__(self): # Sized, Collection + """The number of assigned buckets.""" + + return len(self.buckets) + + def __reversed__(self): # Reversible + """Iterate individual buckets, backwards.""" + + return reversed(self.buckets) + + def __getitem__(self, index): # Sequence + """Look up a bucket or buckets by numeric index or key.""" + + if isinstance(index, int): + return self.buckets[index] + + group = self.groups[index] + + if len(group) == 1: + return group[0].value + + return (bucket.value for bucket in group) + + def __setitem__(self, index, value): # MutableSequence + """Assign a value or bucket to a given index, or set a value by key. + + If there are multiple values for a key, all will be removed and a new value appended. + """ + + value = Bucket(value, sep=self.assignment, strict=self.strict) + + if isinstance(index, int): + bucket = self.buckets[index] + + if value.name is not None: + bucket.name = value.name + + bucket.value = value.value + return + + value.name = value.name or index + buckets = self.groups.get(index) + + if buckets: + if len(buckets) == 1: + buckets[0].name = value.name + buckets[0].value = value.value + return + + for bucket in list(buckets): + self.remove(bucket) + + self.append(value) + + def __delitem__(self, item): # MutableSequence + """Remove a specific bucket, bucket by numeric index, or remove all buckets with the given key. + + >>> base = QSO("foo=27&bar&baz=42&bar&diz&name=ferret") + >>> del base['foo'] + >>> base + QSO("bar&baz=42&bar&diz&name=ferret") + + >>> del base[1] + >>> base + QSO("bar&bar&diz&name=ferret") + + >>> del base['bar'] + >>> base + QSO("diz&name=ferret") + + >>> del base[base.buckets[1]] + >>> base + QSO("diz") + """ + + if isinstance(item, int): + item = self.buckets[item] + + if isinstance(item, Bucket): + self.buckets.remove(item) + self.groups[item.name].remove(item) + + if not self.groups[item.name]: # Clean up after ourselves. + del self.groups[item.name] + + return + + for bucket in list(self.groups[item]): + del self[bucket] + + def __iadd__(self, other): # MutableSequence + """Extend a current set of arguments with another set. + + Allows for "addition" of a variety of things, as per `QSO.extend`: + + >>> base = QSO("foo=27") + >>> base += "bar" + >>> base += {'baz': "42"} + >>> base += ['bar', 'diz'] + >>> base += Bucket('name', 'ferret') + >>> base + QSO("foo=27&bar&baz=42&bar&diz&name=ferret") + """ + + self.extend(other) + return self + + def __eq__(self, other): # Mapping + return str(self) == str(other) + + def __ne__(self, other): # Mapping + return not (self == other) + + # ABC Public Methods + + def index(self, bucket, start=None, stop=None): # Sequence + bucket = Bucket(bucket, sep=self.assignment, strict=self.strict) + return self.buckets.index(bucket) + + def count(self, thing): # Sequence + if not self.buckets: + return 0 + + if thing in self.groups: + return len(self.groups[thing]) + + return self.groups.get(None, []).count(Bucket(thing, sep=self.assignment, strict=self.strict)) + + def append(self, bucket): # MutableSequence + bucket = Bucket(bucket, sep=self.assignment, strict=self.strict) + self.buckets.append(bucket) + self.groups.setdefault(bucket.name, []).append(bucket) + + def insert(self, index, value): # MutableSequence + if index < 0: # Allow insertions at end-relative positions. + index = len(self.buckets) + index + index = min(len(self.buckets), index) + + bucket = Bucket(value, sep=self.assignment, strict=self.strict) + + count = 0 + for i, b in enumerate(self.buckets): + if i >= index: + break + if b.name == bucket.name: + count += 1 + + self.buckets.insert(index, bucket) + self.groups.setdefault(b.name, []).insert(count, bucket) + + def extend(self, *args): # MutableSequence + for parts in args: + for part in self._parts(parts): + self.append(part) + + def remove(self, bucket): # MutableSequence + del self[bucket] + + def pop(self, key=SENTINEL, default=SENTINEL): # MutableSequence, MutableMapping + if key is SENTINEL: + key = -1 + + if isinstance(key, int): + try: + bucket = self.buckets[key] + except IndexError: + if default is SENTINEL: + raise KeyError() + return default + + del self[bucket] + return bucket + + try: + bucket = self.groups[key].pop() + except KeyError: + if default is SENTINEL: + raise + return default + + self.buckets.remove(bucket) + return bucket.value + + def reverse(self): # MutableSequence + self.buckets.reverse() + + for group in self.groups.values(): + group.reverse() + + def keys(self): # Mapping + return (bucket.name for bucket in self.buckets) + + def items(self): # Mapping + return (tuple(bucket) for bucket in self.buckets) + + def values(self): # Mapping + return (bucket.value for bucket in self.buckets) + + def get(self, bucket, default=None): # Mapping + if bucket in self: + return self[bucket] + + return default + + def clear(self): # MutableMapping + """Clear all values from this query string object.""" + + del self.buckets[:] + self.groups.clear() + + def update(self, *args, **kw): # MutableMapping + for parts in args: + for bucket in self._parts(parts): + bucket = Bucket(bucket, sep=self.assignment, strict=self.strict) + self[bucket.name] = bucket.value + + for key in kw: + self[key] = kw[key] MutableMapping.register(QSO) diff --git a/uri/scheme.py b/uri/scheme.py index 010c4df..0d225cf 100644 --- a/uri/scheme.py +++ b/uri/scheme.py @@ -2,46 +2,46 @@ class Scheme: - __slots__ = ('name', ) - - slashed = False # Do NOT include // separator between scheme and remainder. - - def __init__(self, name:Stringy): - self.name = str(name).strip().lower() - - def __eq__(self, other:'SchemeLike'): - if isinstance(other, str): - return self.name == other - - if isinstance(other, self.__class__): - return self is other - - def __hash__(self) -> int: - return hash(self.name) - - def __neq__(self, other:'SchemeLike') -> bool: - return not (self == other) - - def __bytes__(self) -> bytes: - return self.name.encode('ascii') - - def __str__(self) -> str: - return self.name - - def __repr__(self): - return f"{self.__class__.__name__}('{self.name}')" - - def is_relative(self, uri) -> bool: - return False + __slots__ = ("name",) + + slashed = False # Do NOT include // separator between scheme and remainder. + + def __init__(self, name: Stringy): + self.name = str(name).strip().lower() + + def __eq__(self, other: "SchemeLike"): + if isinstance(other, str): + return self.name == other + + if isinstance(other, self.__class__): + return self is other + + def __hash__(self) -> int: + return hash(self.name) + + def __neq__(self, other: "SchemeLike") -> bool: + return not (self == other) + + def __bytes__(self) -> bytes: + return self.name.encode("ascii") + + def __str__(self) -> str: + return self.name + + def __repr__(self): + return f"{self.__class__.__name__}('{self.name}')" + + def is_relative(self, uri) -> bool: + return False class URLScheme(Scheme): - __slots__ = () - - slashed = True # DO include // separator between scheme and remainder. - - def is_relative(self, uri) -> bool: - return not uri._host or not uri._path.is_absolute() + __slots__ = () + + slashed = True # DO include // separator between scheme and remainder. + + def is_relative(self, uri) -> bool: + return not uri._host or not uri._path.is_absolute() -SchemeLike = Union[Stringy,Scheme] +SchemeLike = Union[Stringy, Scheme] diff --git a/uri/typing.py b/uri/typing.py index 349c646..f549787 100644 --- a/uri/typing.py +++ b/uri/typing.py @@ -2,41 +2,41 @@ class Stringy(Protocol): - """Objects implementing this protocol may be cast to strings by way of `str()`.""" - - __slots__ = () - - @abstractmethod - def __str__(self) -> str: - pass + """Objects implementing this protocol may be cast to strings by way of `str()`.""" + + __slots__ = () + + @abstractmethod + def __str__(self) -> str: + pass class PathURI(Protocol): - """Some objects may implement an `as_uri` method which returns a URI instance.""" - - __slots__ = () - - @abstractmethod - def as_uri(self) -> Optional['URI']: - pass + """Some objects may implement an `as_uri` method which returns a URI instance.""" + + __slots__ = () + + @abstractmethod + def as_uri(self) -> Optional["URI"]: + pass class Linkable(Protocol): - """Some objects may expose a URI instance by way of `__link__` attribute.""" - - __slots__ = () - - __link__: 'URI' + """Some objects may expose a URI instance by way of `__link__` attribute.""" + + __slots__ = () + + __link__: "URI" class LinkableMethod(Protocol): - """The dynamic version of Linkable, where `__link__` is a method similar to `as_uri`.""" - - __slots__ = () - - @abstractmethod - def __link__(self) -> 'URI': - pass + """The dynamic version of Linkable, where `__link__` is a method similar to `as_uri`.""" + + __slots__ = () + + @abstractmethod + def __link__(self) -> "URI": + pass # Any object that may in some way provide a URI. diff --git a/uri/uri.py b/uri/uri.py index f88b56c..8fbbfa6 100644 --- a/uri/uri.py +++ b/uri/uri.py @@ -22,291 +22,308 @@ class URI: - """An object representing a URI (absolute or relative) and its components. - - Acts as a mutable mapping for manipulation of query string arguments. If the query string is not URL - "form encoded" attempts at mapping access or manipulation will fail with a ValueError. No effort is made to - preserve original query string key order. Repeated keys will have lists as values. - """ - - # Skip allocation of a dictionary per instance by pre-defining available slots. - __slots__ = ('_scheme', '_user', '_password', '_host', '_port', '_path', '_trailing', '_query', '_fragment') - - __parts__ = ('scheme', 'authority', 'path', 'query', 'fragment') - __origin_parts__ = ('scheme', 'authority') - __safe_parts__ = ('scheme', '_safe_auth', 'host', 'port', 'path', 'query', 'fragment') - __all_parts__ = {'scheme', 'user', 'password', 'host', 'port', 'path', 'query', 'fragment', 'auth', 'authority', - 'heirarchical', 'uri', 'username', 'hostname', 'authentication'} - - # Scalar Parts - scheme:Union[str, Scheme] = SchemePart() - user:str = UserPart() - password:str = PasswordPart() - host:str = HostPart() - port:int = PortPart() - path:Union[str, Path] = PathPart() - query = QueryPart() - fragment:Optional[str] = FragmentPart() - - # Compound Parts - auth = AuthenticationPart() - _safe_auth = SafeAuthenticationPart() - authority = netloc = AuthorityPart() - heirarchical = HeirarchicalPart() - - # Additional Compound Interfaces - uri = URIPart(__parts__) # Whole-URI retrieval or storage as string. - origin = URIPart(__origin_parts__) # The top-level "origin" for this URL. - safe = safe_uri = URIPart(__safe_parts__, False) # URI retrieval without password component, useful for logging. - base = BasePart() - origin = URIPart(('scheme', 'host', 'port'), False) - summary = URIPart(('host', 'path'), False, True) - resource = URIPart(('path', 'query', 'fragment'), False) - defrag = URIPart(tuple([i for i in __all_parts__ if i != 'fragment'])) # Fragments are not sent to web servers. - - # Common Aliases - username = user - hostname = host - credentials = authentication = userinfo = auth - netloc = authority - - # Factories - - @classmethod - def from_wsgi(URI, environ) -> 'URI': - if hasattr(environ, 'environ'): # Incidentally support passing of a variety of Request object wrappers. - environ = environ.environ - - scheme = environ['wsgi.url_scheme'] - - uri = URI( - scheme = scheme, - host = environ['SERVER_NAME'], - path = environ['SCRIPT_NAME'] + environ['PATH_INFO'], - query = environ['QUERY_STRING'] - ) - - # Handled this way to automatically elide default port numbers. - service = getservbyname(scheme) - port = int(environ['SERVER_PORT']) - if not service or service != port: uri.port = port - - return uri - - # Shortcuts - - @property - def qs(self) -> str: - query = self.query - return str(query) if query else "" - - @qs.setter - def qs(self, value) -> None: - self.query = value - - # Python Object Protocol - - def __init__(self, _uri:Optional[URILike]=None, **parts) -> None: - """Initialize a new URI from a passed in string and/or named parts. - - If both a base URI and parts are supplied than the parts will override those present in the URI. - """ - - if hasattr(_uri, '__link__'): # We utilize a custom object protocol to retrieve links to things. - _uri = _uri.__link__ - - # To allow for simpler cases, this attribute does not need to be callable. - if callable(_uri): _uri = _uri() - - if hasattr(_uri, 'as_uri'): # Support pathlib method protocol. - _uri = _uri.as_uri() - - self.uri = _uri # If None, this will also handle setting defaults. - - if parts: # If not given a base URI, defines a new URI, otherwise update the given URI. - for part, value in parts.items(): - if part not in self.__all_parts__: - raise TypeError("Unknown URI component: " + part) - - setattr(self, part, value) - - # Python Datatype Protocols - - def __repr__(self): - """Return a "safe" programmers' representation that omits passwords.""" - - return "{0}('{1}')".format(self.__class__.__name__, self.safe_uri) - - def __str__(self): - """Return the Unicode text representation of this URI, including passwords.""" - - return self.uri - - def __bytes__(self): - """Return the binary string representation of this URI, including passwords.""" - - return self.uri.encode('utf-8') - - # Python Comparison Protocol - - def __eq__(self, other): - """Compare this URI against another value.""" - - if not isinstance(other, self.__class__): - other = self.__class__(other) - - # Because things like query string argument order may differ, but still be equivalent... - for part in self.__parts__: - ours = getattr(self, part, None) - theirs = getattr(other, part, None) - - if ours != theirs: - return False - - return True - - def __ne__(self, other): - """Inverse comparison support.""" - - return not self == other - - def __bool__(self): - """Truthyness comparison.""" - - return bool(self.uri) - - # Python Mapping Protocol - - def __getitem__(self, name): - """Shortcut for retrieval of a query string argument or syntax sugar to apply a username:password pair. - - For example: - - url = URI("http://example.com/hello?name=world") - url['name'] == 'world' - - Alternatively: - - url = URI("http://example.com/hello") - authd_url = url['username':'password'] - """ - - if isinstance(name, slice): - self = self.__class__(str(self)) # We do not mutate ourselves; instead, mutate a clone. - self.user, self.password = name.start, name.stop - return self - - return self.query[name] - - def __setitem__(self, name, value): - """Shortcut for (re)assignment of query string arguments.""" - - self.query[name] = str(value) - - def __delitem__(self, name): - """Shortcut for removal of a query string argument.""" - - del self.query[name] - - def __iter__(self): - """Retrieve the query string argument names.""" - - return iter(self._query) - - def __len__(self): - """The length of the URI as a string.""" - return len(str(self.uri)) - - # Path-like behaviours. - - def __div__(self, other): - sother = str(other) - - if sother == '.': # This URI without fragment or query. - return self.__class__(self, query=None, fragment=None) - - if sother.startswith('#'): # Fragment change only. - return self.__class__(self, fragment=other[1:]) - - if '://' in sother: # Whole-uri switch. - return self.__class__(other) - - # Otherwise resolve path. - base = str(self.path) or '.' - trailing = False if base in ('/', '.') else self._trailing - - if base == '.': - base = '/' - - elif trailing: - base += '/' - - return self.__class__(self, path=urljoin(base, sother), query=None, fragment=None) - - __idiv__ = __div__ - __truediv__ = __div__ - - def __floordiv__(self, other): - other = str(other) - - if '//' in other: - _, _, other = other.partition('//') - - return self.__class__(str(self.scheme) + "://" + other) - - __ifloordiv__ = __floordiv__ - - # Support Protocols - - __link__ = __str__ # Various - make_uri = __str__ # Path - - def __html__(self): # Markupsafe - """Return an HTML representation of this link. - - A link to http://example.com/foo/bar will result in: - - example.com/foo/bar - """ - - from markupsafe import escape - - return '{summary}'.format( - address = escape(self.uri), - summary = escape(self.summary), - ) - - geturl = __str__ # API compatibility with urllib. - - @property - def relative(self): - """Identify if this URI is relative to some "current context". - - For example, if the protocol is missing, it's protocol-relative. If the host is missing, it's host-relative, etc. - """ - - scheme = self.scheme - - if not scheme: - return True - - return scheme.is_relative(self) - - def resolve(self, uri=None, **parts): - """Attempt to resolve a new URI given an updated URI, partial or complete.""" - - if uri: - result = self.__class__(urljoin(str(self), str(uri))) - else: - result = self.__class__(self) - - for part, value in parts.items(): - if part not in self.__all_parts__: - raise TypeError("Unknown URI component: " + part) - - setattr(result, part, value) - - return result + """An object representing a URI (absolute or relative) and its components. + + Acts as a mutable mapping for manipulation of query string arguments. If the query string is not URL + "form encoded" attempts at mapping access or manipulation will fail with a ValueError. No effort is made to + preserve original query string key order. Repeated keys will have lists as values. + """ + + # Skip allocation of a dictionary per instance by pre-defining available slots. + __slots__ = ("_scheme", "_user", "_password", "_host", "_port", "_path", "_trailing", "_query", "_fragment") + + __parts__ = ("scheme", "authority", "path", "query", "fragment") + __origin_parts__ = ("scheme", "authority") + __safe_parts__ = ("scheme", "_safe_auth", "host", "port", "path", "query", "fragment") + __all_parts__ = { + "scheme", + "user", + "password", + "host", + "port", + "path", + "query", + "fragment", + "auth", + "authority", + "heirarchical", + "uri", + "username", + "hostname", + "authentication", + } + + # Scalar Parts + scheme: Union[str, Scheme] = SchemePart() + user: str = UserPart() + password: str = PasswordPart() + host: str = HostPart() + port: int = PortPart() + path: Union[str, Path] = PathPart() + query = QueryPart() + fragment: Optional[str] = FragmentPart() + + # Compound Parts + auth = AuthenticationPart() + _safe_auth = SafeAuthenticationPart() + authority = netloc = AuthorityPart() + heirarchical = HeirarchicalPart() + + # Additional Compound Interfaces + uri = URIPart(__parts__) # Whole-URI retrieval or storage as string. + origin = URIPart(__origin_parts__) # The top-level "origin" for this URL. + safe = safe_uri = URIPart(__safe_parts__, False) # URI retrieval without password component, useful for logging. + base = BasePart() + origin = URIPart(("scheme", "host", "port"), False) + summary = URIPart(("host", "path"), False, True) + resource = URIPart(("path", "query", "fragment"), False) + defrag = URIPart(tuple([i for i in __all_parts__ if i != "fragment"])) # Fragments are not sent to web servers. + + # Common Aliases + username = user + hostname = host + credentials = authentication = userinfo = auth + netloc = authority + + # Factories + + @classmethod + def from_wsgi(URI, environ) -> "URI": + if hasattr(environ, "environ"): # Incidentally support passing of a variety of Request object wrappers. + environ = environ.environ + + scheme = environ["wsgi.url_scheme"] + + uri = URI( + scheme=scheme, + host=environ["SERVER_NAME"], + path=environ["SCRIPT_NAME"] + environ["PATH_INFO"], + query=environ["QUERY_STRING"], + ) + + # Handled this way to automatically elide default port numbers. + service = getservbyname(scheme) + port = int(environ["SERVER_PORT"]) + if not service or service != port: + uri.port = port + + return uri + + # Shortcuts + + @property + def qs(self) -> str: + query = self.query + return str(query) if query else "" + + @qs.setter + def qs(self, value) -> None: + self.query = value + + # Python Object Protocol + + def __init__(self, _uri: Optional[URILike] = None, **parts) -> None: + """Initialize a new URI from a passed in string and/or named parts. + + If both a base URI and parts are supplied than the parts will override those present in the URI. + """ + + if hasattr(_uri, "__link__"): # We utilize a custom object protocol to retrieve links to things. + _uri = _uri.__link__ + + # To allow for simpler cases, this attribute does not need to be callable. + if callable(_uri): + _uri = _uri() + + if hasattr(_uri, "as_uri"): # Support pathlib method protocol. + _uri = _uri.as_uri() + + self.uri = _uri # If None, this will also handle setting defaults. + + if parts: # If not given a base URI, defines a new URI, otherwise update the given URI. + for part, value in parts.items(): + if part not in self.__all_parts__: + raise TypeError("Unknown URI component: " + part) + + setattr(self, part, value) + + # Python Datatype Protocols + + def __repr__(self): + """Return a "safe" programmers' representation that omits passwords.""" + + return "{0}('{1}')".format(self.__class__.__name__, self.safe_uri) + + def __str__(self): + """Return the Unicode text representation of this URI, including passwords.""" + + return self.uri + + def __bytes__(self): + """Return the binary string representation of this URI, including passwords.""" + + return self.uri.encode("utf-8") + + # Python Comparison Protocol + + def __eq__(self, other): + """Compare this URI against another value.""" + + if not isinstance(other, self.__class__): + other = self.__class__(other) + + # Because things like query string argument order may differ, but still be equivalent... + for part in self.__parts__: + ours = getattr(self, part, None) + theirs = getattr(other, part, None) + + if ours != theirs: + return False + + return True + + def __ne__(self, other): + """Inverse comparison support.""" + + return not self == other + + def __bool__(self): + """Truthyness comparison.""" + + return bool(self.uri) + + # Python Mapping Protocol + + def __getitem__(self, name): + """Shortcut for retrieval of a query string argument or syntax sugar to apply a username:password pair. + + For example: + + url = URI("http://example.com/hello?name=world") + url['name'] == 'world' + + Alternatively: + + url = URI("http://example.com/hello") + authd_url = url['username':'password'] + """ + + if isinstance(name, slice): + self = self.__class__(str(self)) # We do not mutate ourselves; instead, mutate a clone. + self.user, self.password = name.start, name.stop + return self + + return self.query[name] + + def __setitem__(self, name, value): + """Shortcut for (re)assignment of query string arguments.""" + + self.query[name] = str(value) + + def __delitem__(self, name): + """Shortcut for removal of a query string argument.""" + + del self.query[name] + + def __iter__(self): + """Retrieve the query string argument names.""" + + return iter(self._query) + + def __len__(self): + """The length of the URI as a string.""" + return len(str(self.uri)) + + # Path-like behaviours. + + def __div__(self, other): + sother = str(other) + + if sother == ".": # This URI without fragment or query. + return self.__class__(self, query=None, fragment=None) + + if sother.startswith("#"): # Fragment change only. + return self.__class__(self, fragment=other[1:]) + + if "://" in sother: # Whole-uri switch. + return self.__class__(other) + + # Otherwise resolve path. + base = str(self.path) or "." + trailing = False if base in ("/", ".") else self._trailing + + if base == ".": + base = "/" + + elif trailing: + base += "/" + + return self.__class__(self, path=urljoin(base, sother), query=None, fragment=None) + + __idiv__ = __div__ + __truediv__ = __div__ + + def __floordiv__(self, other): + other = str(other) + + if "//" in other: + _, _, other = other.partition("//") + + return self.__class__(str(self.scheme) + "://" + other) + + __ifloordiv__ = __floordiv__ + + # Support Protocols + + __link__ = __str__ # Various + make_uri = __str__ # Path + + def __html__(self): # Markupsafe + """Return an HTML representation of this link. + + A link to http://example.com/foo/bar will result in: + + example.com/foo/bar + """ + + from markupsafe import escape + + return '{summary}'.format( + address=escape(self.uri), + summary=escape(self.summary), + ) + + geturl = __str__ # API compatibility with urllib. + + @property + def relative(self): + """Identify if this URI is relative to some "current context". + + For example, if the protocol is missing, it's protocol-relative. If the host is missing, it's host-relative, etc. + """ + + scheme = self.scheme + + if not scheme: + return True + + return scheme.is_relative(self) + + def resolve(self, uri=None, **parts): + """Attempt to resolve a new URI given an updated URI, partial or complete.""" + + if uri: + result = self.__class__(urljoin(str(self), str(uri))) + else: + result = self.__class__(self) + + for part, value in parts.items(): + if part not in self.__all_parts__: + raise TypeError("Unknown URI component: " + part) + + setattr(result, part, value) + + return result MutableMapping.register(URI) From c849975aac5c0cf53430e5af1120259bd5e754fc Mon Sep 17 00:00:00 2001 From: Nikos Koukis Date: Wed, 24 Jan 2024 16:37:31 +0200 Subject: [PATCH 4/7] [MISC] Add .git-blame-ignore-revs --- .git-blame-ignore-revs | 1 + 1 file changed, 1 insertion(+) create mode 100644 .git-blame-ignore-revs diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..214f1c4 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1 @@ +f48a22c390eed8c49d762af35006aaa3a225a468 From 87a2f824bcceedbf8d84ec175be6a3313637f741 Mon Sep 17 00:00:00 2001 From: Nikos Koukis Date: Wed, 24 Jan 2024 16:39:40 +0200 Subject: [PATCH 5/7] [MISC] Fix linter warning re __eq__ not returning on all paths --- uri/scheme.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/uri/scheme.py b/uri/scheme.py index 0d225cf..dec1c47 100644 --- a/uri/scheme.py +++ b/uri/scheme.py @@ -16,6 +16,8 @@ def __eq__(self, other: "SchemeLike"): if isinstance(other, self.__class__): return self is other + raise RuntimeError(f"Cannot compare objects - incompatible types, " f"self: {type(self)}, other: {type(other)}") + def __hash__(self) -> int: return hash(self.name) From c29d019e938d2ca2724d94fb7706f9b9759033e5 Mon Sep 17 00:00:00 2001 From: Nikos Koukis Date: Wed, 24 Jan 2024 22:20:12 +0200 Subject: [PATCH 6/7] Depende on importlib_metadata instead of `importlib.metadata` This solves the following issue: ``` TypeError: entry_points() got an unexpected keyword argument 'group' ``` This comes due to an old implementation of `importlib` and specifically when I'm running my app inside a virtual environment. Attempts to `pip3 install --force-reinstall importlib`, inside the virtualenv, did not work since it seems that `importlib` is brought from the standard python distribution regardless of whether the environment is sourced or not. To accommodate running the app inside and outside a virtualenv I'm instead depending on `importlib_metadata` which seems to resolve the aforementioned issue. --- pyproject.toml | 2 +- uri/part/scheme.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 27461e4..b311b1b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ classifiers = [ "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Utilities" ] -dependencies = [] # URI has no direct runtime dependencies. +dependencies = ["importlib_metadata"] dynamic = ["version"] [project.urls] diff --git a/uri/part/scheme.py b/uri/part/scheme.py index 78d389a..49ff5c8 100644 --- a/uri/part/scheme.py +++ b/uri/part/scheme.py @@ -1,4 +1,4 @@ -from importlib.metadata import entry_points +from importlib_metadata import entry_points from typing import Any, ClassVar, Dict, Optional, Union # , Self from re import compile as r, Pattern @@ -25,6 +25,7 @@ def load(self, plugin: str) -> Scheme: # If we haven't, attempt to load the explicit Scheme subclass to utilize for this named scheme. try: + import pdb; pdb.set_trace() result = entry_points(group="uri.scheme")[plugin].load() except KeyError: result = Scheme(plugin) # Can't look up by registered name? It's generic. From 53f87ef2a5c8fc972c602c54d51dc431a76795fe Mon Sep 17 00:00:00 2001 From: Nikos Koukis Date: Wed, 24 Jan 2024 22:23:47 +0200 Subject: [PATCH 7/7] Add to .gitignore --- .gitignore | 1 + uri/part/scheme.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 313964a..d12e04f 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,4 @@ htmlcov tags .coverage.* +.tool-versions diff --git a/uri/part/scheme.py b/uri/part/scheme.py index 49ff5c8..cd54d60 100644 --- a/uri/part/scheme.py +++ b/uri/part/scheme.py @@ -25,7 +25,6 @@ def load(self, plugin: str) -> Scheme: # If we haven't, attempt to load the explicit Scheme subclass to utilize for this named scheme. try: - import pdb; pdb.set_trace() result = entry_points(group="uri.scheme")[plugin].load() except KeyError: result = Scheme(plugin) # Can't look up by registered name? It's generic.