Skip to content

Commit

Permalink
Merge pull request #466 from python-jsonschema/fix-rebuild-bug
Browse files Browse the repository at this point in the history
Fix caching issues which render remote ref caching ineffective
  • Loading branch information
sirosen authored Jul 27, 2024
2 parents 0b580cc + f9645ee commit 60edab9
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 13 deletions.
12 changes: 3 additions & 9 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
[flake8]
exclude = .git,.tox,__pycache__,.eggs,dist,.venv*,docs,build,_build
# we enforce 80 char width with `black` "loosely", so flake8 should be set to
# not fail on up to 90 chars of width
exclude = .git,.tox,__pycache__,dist,.venv*,docs,build
max-line-length = 90

# based on the flake8 conf for `black` itself:
# https://github.com/ambv/black/blob/master/.flake8
#
# W503/W504 conflict, black causes E203
ignore = W503,W504,E203,
# black related: W503/W504 conflict, black causes E203
ignore = W503,W504,E203,B019
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ repos:
rev: 7.1.0
hooks:
- id: flake8
args: ['--config', '.flake8']
additional_dependencies:
- 'flake8-bugbear==24.1.17'
- 'flake8-typing-as-t==0.0.3'
Expand Down
11 changes: 11 additions & 0 deletions src/check_jsonschema/schema_loader/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import functools
import pathlib
import typing as t
import urllib.error
Expand Down Expand Up @@ -130,11 +131,21 @@ def get_validator(
instance_doc: dict[str, t.Any],
format_opts: FormatOptions,
fill_defaults: bool,
) -> jsonschema.protocols.Validator:
return self._get_validator(format_opts, fill_defaults)

@functools.lru_cache
def _get_validator(
self,
format_opts: FormatOptions,
fill_defaults: bool,
) -> jsonschema.protocols.Validator:
retrieval_uri = self.get_schema_retrieval_uri()
schema = self.get_schema()

schema_dialect = schema.get("$schema")
if schema_dialect is not None and not isinstance(schema_dialect, str):
schema_dialect = None

# format checker (which may be None)
format_checker = make_format_checker(format_opts, schema_dialect)
Expand Down
8 changes: 4 additions & 4 deletions src/check_jsonschema/schema_loader/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def retrieve_reference(uri: str) -> referencing.Resource[Schema]:
else:
full_uri = uri

if full_uri in cache._cache:
return cache[uri]
if full_uri in cache:
return cache[full_uri]

full_uri_scheme = urllib.parse.urlsplit(full_uri).scheme
if full_uri_scheme in ("http", "https"):
Expand All @@ -100,8 +100,8 @@ def validation_callback(content: bytes) -> None:
else:
parsed_object = get_local_file(full_uri)

cache[uri] = parsed_object
return cache[uri]
cache[full_uri] = parsed_object
return cache[full_uri]

return retrieve_reference

Expand Down
57 changes: 57 additions & 0 deletions tests/acceptance/test_remote_ref_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,3 +244,60 @@ def test_ref_resolution_with_custom_base_uri(run_line, tmp_path, check_passes):
assert result.exit_code == 0, output
else:
assert result.exit_code == 1, output


@pytest.mark.parametrize("num_instances", (1, 2, 10))
@pytest.mark.parametrize("check_passes", (True, False))
def test_remote_ref_resolution_callout_count_is_scale_free_in_instancefiles(
run_line, tmp_path, num_instances, check_passes
):
"""
Test that for any N > 1, validation of a schema with a ref against N instance files
has exactly the same number of callouts as validation when N=1
This proves that the validator and caching are working correctly, and we aren't
repeating callouts to rebuild state.
"""
schema_uri = "https://example.org/schemas/main.json"
ref_uri = "https://example.org/schemas/title_schema.json"

main_schema = {
"$id": schema_uri,
"$schema": "http://json-schema.org/draft-07/schema",
"properties": {
"title": {"$ref": "./title_schema.json"},
},
"additionalProperties": False,
}
title_schema = {"type": "string"}
responses.add("GET", schema_uri, json=main_schema)
responses.add("GET", ref_uri, json=title_schema)

# write N documents
instance_doc = {"title": "doc one" if check_passes else 2}
instance_paths = []
for i in range(num_instances):
instance_path = tmp_path / f"instance{i}.json"
instance_path.write_text(json.dumps(instance_doc))
instance_paths.append(str(instance_path))

result = run_line(
[
"check-jsonschema",
"--schemafile",
schema_uri,
]
+ instance_paths
)
output = f"\nstdout:\n{result.stdout}\n\nstderr:\n{result.stderr}"
if check_passes:
assert result.exit_code == 0, output
else:
assert result.exit_code == 1, output

# this is the moment of the "real" test run here:
# no matter how many instances there were, there should only have been two calls
# (one for the schema and one for the $ref)
assert len(responses.calls) == 2
assert len([c for c in responses.calls if c.request.url == schema_uri]) == 1
assert len([c for c in responses.calls if c.request.url == ref_uri]) == 1

0 comments on commit 60edab9

Please sign in to comment.