diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..8231339b3c --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,7 @@ +RELEASE_TYPE: patch + +When Hypothesis replays examples from its test database that it knows were previously fully shrunk it will no longer try to shrink them again. + +This should significantly speed up development workflows for slow tests, as the shrinking could contribute a significant delay when rerunning the tests. + +In some rare cases this may cause minor reductions in example quality. This was considered an acceptable tradeoff for the improved test runtime. diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py index 188fb6d1a4..36f21ec88d 100644 --- a/hypothesis-python/src/hypothesis/core.py +++ b/hypothesis-python/src/hypothesis/core.py @@ -1732,7 +1732,7 @@ def _get_fuzz_target() -> ( state = StateForActualGivenExecution( stuff, test, settings, random, wrapped_test ) - digest = function_digest(test) + database_key = function_digest(test) + b".secondary" # We track the minimal-so-far example for each distinct origin, so # that we track log-n instead of n examples for long runs. In particular # it means that we saturate for common errors in long runs instead of @@ -1758,7 +1758,7 @@ def fuzz_one_input( if settings.database is not None and ( known is None or sort_key(buffer) <= sort_key(known) ): - settings.database.save(digest, buffer) + settings.database.save(database_key, buffer) minimal_failures[data.interesting_origin] = buffer raise return bytes(data.buffer) diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py index 7cf4e72ed8..8d4f94ff24 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py @@ -266,6 +266,8 @@ def __init__( self.__data_cache = LRUReusedCache(CACHE_SIZE) self.__data_cache_ir = LRUReusedCache(CACHE_SIZE) + self.reused_previously_shrunk_test_case = False + self.__pending_call_explanation: Optional[str] = None self._switch_to_hypothesis_provider: bool = False @@ -825,6 +827,7 @@ def reuse_existing_examples(self) -> None: ) factor = 0.1 if (Phase.generate in self.settings.phases) else 1 desired_size = max(2, ceil(factor * self.settings.max_examples)) + primary_corpus_size = len(corpus) if len(corpus) < desired_size: extra_corpus = list(self.settings.database.fetch(self.secondary_key)) @@ -838,11 +841,29 @@ def reuse_existing_examples(self) -> None: extra.sort(key=sort_key) corpus.extend(extra) - for existing in corpus: + # We want a fast path where every primary entry in the database was + # interesting. + found_interesting_in_primary = False + all_interesting_in_primary_were_exact = True + + for i, existing in enumerate(corpus): + if i >= primary_corpus_size and found_interesting_in_primary: + break data = self.cached_test_function(existing, extend=BUFFER_SIZE) if data.status != Status.INTERESTING: self.settings.database.delete(self.database_key, existing) self.settings.database.delete(self.secondary_key, existing) + else: + if i < primary_corpus_size: + found_interesting_in_primary = True + assert not isinstance(data, _Overrun) + if existing != data.buffer: + all_interesting_in_primary_were_exact = False + if not self.settings.report_multiple_bugs: + break + if found_interesting_in_primary: + if all_interesting_in_primary_were_exact: + self.reused_previously_shrunk_test_case = True # Because self.database is not None (because self.has_existing_examples()) # and self.database_key is not None (because we fetched using it above), @@ -1241,6 +1262,11 @@ def _run(self) -> None: self._switch_to_hypothesis_provider = True with self._log_phase_statistics("reuse"): self.reuse_existing_examples() + # Fast path for development: If the database gave us interesting + # examples from the previously stored primary key, don't try + # shrinking it again as it's unlikely to work. + if self.reused_previously_shrunk_test_case: + self.exit_with(ExitReason.finished) # ...but we should use the supplied provider when generating... self._switch_to_hypothesis_provider = False with self._log_phase_statistics("generate"): diff --git a/hypothesis-python/tests/conjecture/test_engine.py b/hypothesis-python/tests/conjecture/test_engine.py index 8ae60ff17a..e3dc7bccf2 100644 --- a/hypothesis-python/tests/conjecture/test_engine.py +++ b/hypothesis-python/tests/conjecture/test_engine.py @@ -1631,3 +1631,72 @@ def test_mildly_complicated_strategies(strategy, condition): # covered by shrinking any mildly compliated strategy and aren't worth # testing explicitly for. This covers those. minimal(strategy, condition) + + +def test_does_not_shrink_if_replaying_from_database(): + db = InMemoryExampleDatabase() + key = b"foo" + + def f(data): + if data.draw_integer(0, 255) == 123: + data.mark_interesting() + + runner = ConjectureRunner(f, settings=settings(database=db), database_key=key) + b = bytes([123]) + runner.save_buffer(b) + runner.shrink_interesting_examples = None + runner.run() + (last_data,) = runner.interesting_examples.values() + assert last_data.buffer == b + + +def test_does_shrink_if_replaying_inexact_from_database(): + db = InMemoryExampleDatabase() + key = b"foo" + + def f(data): + data.draw_integer(0, 255) + data.mark_interesting() + + runner = ConjectureRunner(f, settings=settings(database=db), database_key=key) + b = bytes([123, 2]) + runner.save_buffer(b) + runner.run() + (last_data,) = runner.interesting_examples.values() + assert last_data.buffer == bytes([0]) + + +def test_stops_if_hits_interesting_early_and_only_want_one_bug(): + db = InMemoryExampleDatabase() + key = b"foo" + + def f(data): + data.draw_integer(0, 255) + data.mark_interesting() + + runner = ConjectureRunner( + f, settings=settings(database=db, report_multiple_bugs=False), database_key=key + ) + for i in range(256): + runner.save_buffer(bytes([i])) + runner.run() + assert runner.call_count == 1 + + +def test_skips_secondary_if_interesting_is_found(): + db = InMemoryExampleDatabase() + key = b"foo" + + def f(data): + data.draw_integer(0, 255) + data.mark_interesting() + + runner = ConjectureRunner( + f, + settings=settings(max_examples=1000, database=db, report_multiple_bugs=True), + database_key=key, + ) + for i in range(256): + db.save(runner.database_key if i < 10 else runner.secondary_key, bytes([i])) + runner.reuse_existing_examples() + assert runner.call_count == 10 diff --git a/hypothesis-python/tests/cover/test_debug_information.py b/hypothesis-python/tests/cover/test_debug_information.py index efb19ace91..9981d82249 100644 --- a/hypothesis-python/tests/cover/test_debug_information.py +++ b/hypothesis-python/tests/cover/test_debug_information.py @@ -13,13 +13,16 @@ import pytest from hypothesis import Verbosity, given, settings, strategies as st +from hypothesis.database import InMemoryExampleDatabase from tests.common.utils import capture_out def test_reports_passes(): @given(st.integers()) - @settings(verbosity=Verbosity.debug, max_examples=1000) + @settings( + verbosity=Verbosity.debug, max_examples=1000, database=InMemoryExampleDatabase() + ) def test(i): assert i < 10 diff --git a/hypothesis-python/tests/cover/test_flakiness.py b/hypothesis-python/tests/cover/test_flakiness.py index 5a360ba025..d486124735 100644 --- a/hypothesis-python/tests/cover/test_flakiness.py +++ b/hypothesis-python/tests/cover/test_flakiness.py @@ -44,7 +44,7 @@ def test_gives_flaky_error_if_assumption_is_flaky(): seen = set() @given(integers()) - @settings(verbosity=Verbosity.quiet) + @settings(verbosity=Verbosity.quiet, database=None) def oops(s): assume(s not in seen) seen.add(s) diff --git a/hypothesis-python/tests/cover/test_replay_logic.py b/hypothesis-python/tests/cover/test_replay_logic.py new file mode 100644 index 0000000000..9d13f0f55b --- /dev/null +++ b/hypothesis-python/tests/cover/test_replay_logic.py @@ -0,0 +1,145 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import pytest + +from hypothesis import given, settings, strategies as st +from hypothesis.database import InMemoryExampleDatabase +from hypothesis.internal.compat import ExceptionGroup + + +def test_does_not_shrink_on_replay(): + database = InMemoryExampleDatabase() + + call_count = 0 + + is_first = True + last = None + + @settings( + database=database, + report_multiple_bugs=False, + derandomize=False, + max_examples=1000, + ) + @given(st.lists(st.integers(), unique=True, min_size=3)) + def test(ls): + nonlocal call_count, is_first, last + if is_first and last is not None: + assert ls == last + is_first = False + last = ls + call_count += 1 + raise AssertionError + + with pytest.raises(AssertionError): + test() + + assert last is not None + + call_count = 0 + is_first = True + + with pytest.raises(AssertionError): + test() + + assert call_count == 2 + + +def test_does_not_shrink_on_replay_with_multiple_bugs(): + database = InMemoryExampleDatabase() + + call_count = 0 + + tombstone = 1000093 + + @settings( + database=database, + report_multiple_bugs=True, + derandomize=False, + max_examples=1000, + ) + @given(st.integers()) + def test(i): + nonlocal call_count + call_count += 1 + if i > tombstone: + raise AssertionError + elif i == tombstone: + raise AssertionError + + with pytest.raises(ExceptionGroup): + test() + + call_count = 0 + + with pytest.raises(ExceptionGroup): + test() + + assert call_count == 4 + + +def test_will_always_shrink_if_previous_example_does_not_replay(): + database = InMemoryExampleDatabase() + + good = set() + last = None + + @settings( + database=database, + report_multiple_bugs=True, + derandomize=False, + max_examples=1000, + ) + @given(st.integers(min_value=0)) + def test(i): + nonlocal last + if i not in good: + last = i + raise AssertionError + + for i in range(20): + with pytest.raises(AssertionError): + test() + assert last == i + good.add(last) + + +def test_will_shrink_if_the_previous_example_does_not_look_right(): + database = InMemoryExampleDatabase() + + last = None + + first_test = True + + @settings(database=database, report_multiple_bugs=True, derandomize=False) + @given(st.data()) + def test(data): + nonlocal last + m = data.draw(st.integers()) + last = m + if first_test: + data.draw(st.integers()) + assert m < 10000 + else: + raise AssertionError + + with pytest.raises(AssertionError): + test() + + assert last is not None + assert last > 0 + + first_test = False + + with pytest.raises(AssertionError): + test() + + assert last == 0 diff --git a/hypothesis-python/tests/cover/test_statistical_events.py b/hypothesis-python/tests/cover/test_statistical_events.py index a8ef19823c..e1db8e32fc 100644 --- a/hypothesis-python/tests/cover/test_statistical_events.py +++ b/hypothesis-python/tests/cover/test_statistical_events.py @@ -237,7 +237,7 @@ def do(self, item): def test_statistics_for_threshold_problem(): - @settings(max_examples=100) + @settings(max_examples=100, database=None) @given(st.floats(min_value=0, allow_infinity=False)) def threshold(error): target(error, label="error")