Add fast path for replaying already shrunk test cases

HypothesisWorks · Nov 2, 2024 · 3a74210 · 3a74210
1 parent d16b183
commit 3a74210
Show file tree

Hide file tree

Showing 8 changed files with 256 additions and 6 deletions.
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,7 @@
+RELEASE_TYPE: patch
+
+When Hypothesis replays examples from its test database that it knows were previously fully shrunk it will no longer try to shrink them again.
+
+This should significantly speed up development workflows for slow tests, as the shrinking could contribute a significant delay when rerunning the tests.
+
+In some rare cases this may cause minor reductions in example quality. This was considered an acceptable tradeoff for the improved test runtime.
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
@@ -1732,7 +1732,7 @@ def _get_fuzz_target() -> (
             state = StateForActualGivenExecution(
                 stuff, test, settings, random, wrapped_test
             )
-            digest = function_digest(test)
+            database_key = function_digest(test) + b".secondary"
             # We track the minimal-so-far example for each distinct origin, so
             # that we track log-n instead of n examples for long runs.  In particular
             # it means that we saturate for common errors in long runs instead of
@@ -1758,7 +1758,7 @@ def fuzz_one_input(
                     if settings.database is not None and (
                         known is None or sort_key(buffer) <= sort_key(known)
                     ):
-                        settings.database.save(digest, buffer)
+                        settings.database.save(database_key, buffer)
                         minimal_failures[data.interesting_origin] = buffer
                     raise
                 return bytes(data.buffer)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -266,6 +266,8 @@ def __init__(
         self.__data_cache = LRUReusedCache(CACHE_SIZE)
         self.__data_cache_ir = LRUReusedCache(CACHE_SIZE)
 
+        self.reused_previously_shrunk_test_case = False
+
         self.__pending_call_explanation: Optional[str] = None
         self._switch_to_hypothesis_provider: bool = False
 
@@ -825,6 +827,7 @@ def reuse_existing_examples(self) -> None:
             )
             factor = 0.1 if (Phase.generate in self.settings.phases) else 1
             desired_size = max(2, ceil(factor * self.settings.max_examples))
+            primary_corpus_size = len(corpus)
 
             if len(corpus) < desired_size:
                 extra_corpus = list(self.settings.database.fetch(self.secondary_key))
@@ -838,11 +841,29 @@ def reuse_existing_examples(self) -> None:
                 extra.sort(key=sort_key)
                 corpus.extend(extra)
 
-            for existing in corpus:
+            # We want a fast path where every primary entry in the database was
+            # interesting.
+            found_interesting_in_primary = False
+            all_interesting_in_primary_were_exact = True
+
+            for i, existing in enumerate(corpus):
+                if i >= primary_corpus_size and found_interesting_in_primary:
+                    break
                 data = self.cached_test_function(existing, extend=BUFFER_SIZE)
                 if data.status != Status.INTERESTING:
                     self.settings.database.delete(self.database_key, existing)
                     self.settings.database.delete(self.secondary_key, existing)
+                else:
+                    if i < primary_corpus_size:
+                        found_interesting_in_primary = True
+                        assert not isinstance(data, _Overrun)
+                        if existing != data.buffer:
+                            all_interesting_in_primary_were_exact = False
+                    if not self.settings.report_multiple_bugs:
+                        break
+            if found_interesting_in_primary:
+                if all_interesting_in_primary_were_exact:
+                    self.reused_previously_shrunk_test_case = True
 
             # Because self.database is not None (because self.has_existing_examples())
             # and self.database_key is not None (because we fetched using it above),
@@ -1241,6 +1262,11 @@ def _run(self) -> None:
         self._switch_to_hypothesis_provider = True
         with self._log_phase_statistics("reuse"):
             self.reuse_existing_examples()
+        # Fast path for development: If the database gave us interesting
+        # examples from the previously stored primary key, don't try
+        # shrinking it again as it's unlikely to work.
+        if self.reused_previously_shrunk_test_case:
+            self.exit_with(ExitReason.finished)
         # ...but we should use the supplied provider when generating...
         self._switch_to_hypothesis_provider = False
         with self._log_phase_statistics("generate"):

diff --git a/hypothesis-python/tests/conjecture/test_engine.py b/hypothesis-python/tests/conjecture/test_engine.py
@@ -1631,3 +1631,72 @@ def test_mildly_complicated_strategies(strategy, condition):
     # covered by shrinking any mildly compliated strategy and aren't worth
     # testing explicitly for. This covers those.
     minimal(strategy, condition)
+
+
+def test_does_not_shrink_if_replaying_from_database():
+    db = InMemoryExampleDatabase()
+    key = b"foo"
+
+    def f(data):
+        if data.draw_integer(0, 255) == 123:
+            data.mark_interesting()
+
+    runner = ConjectureRunner(f, settings=settings(database=db), database_key=key)
+    b = bytes([123])
+    runner.save_buffer(b)
+    runner.shrink_interesting_examples = None
+    runner.run()
+    (last_data,) = runner.interesting_examples.values()
+    assert last_data.buffer == b
+
+
+def test_does_shrink_if_replaying_inexact_from_database():
+    db = InMemoryExampleDatabase()
+    key = b"foo"
+
+    def f(data):
+        data.draw_integer(0, 255)
+        data.mark_interesting()
+
+    runner = ConjectureRunner(f, settings=settings(database=db), database_key=key)
+    b = bytes([123, 2])
+    runner.save_buffer(b)
+    runner.run()
+    (last_data,) = runner.interesting_examples.values()
+    assert last_data.buffer == bytes([0])
+
+
+def test_stops_if_hits_interesting_early_and_only_want_one_bug():
+    db = InMemoryExampleDatabase()
+    key = b"foo"
+
+    def f(data):
+        data.draw_integer(0, 255)
+        data.mark_interesting()
+
+    runner = ConjectureRunner(
+        f, settings=settings(database=db, report_multiple_bugs=False), database_key=key
+    )
+    for i in range(256):
+        runner.save_buffer(bytes([i]))
+    runner.run()
+    assert runner.call_count == 1
+
+
+def test_skips_secondary_if_interesting_is_found():
+    db = InMemoryExampleDatabase()
+    key = b"foo"
+
+    def f(data):
+        data.draw_integer(0, 255)
+        data.mark_interesting()
+
+    runner = ConjectureRunner(
+        f,
+        settings=settings(max_examples=1000, database=db, report_multiple_bugs=True),
+        database_key=key,
+    )
+    for i in range(256):
+        db.save(runner.database_key if i < 10 else runner.secondary_key, bytes([i]))
+    runner.reuse_existing_examples()
+    assert runner.call_count == 10
diff --git a/hypothesis-python/tests/cover/test_debug_information.py b/hypothesis-python/tests/cover/test_debug_information.py
@@ -13,13 +13,16 @@
 import pytest
 
 from hypothesis import Verbosity, given, settings, strategies as st
+from hypothesis.database import InMemoryExampleDatabase
 
 from tests.common.utils import capture_out
 
 
 def test_reports_passes():
     @given(st.integers())
-    @settings(verbosity=Verbosity.debug, max_examples=1000)
+    @settings(
+        verbosity=Verbosity.debug, max_examples=1000, database=InMemoryExampleDatabase()
+    )
     def test(i):
         assert i < 10
 

diff --git a/hypothesis-python/tests/cover/test_flakiness.py b/hypothesis-python/tests/cover/test_flakiness.py
@@ -44,7 +44,7 @@ def test_gives_flaky_error_if_assumption_is_flaky():
     seen = set()
 
     @given(integers())
-    @settings(verbosity=Verbosity.quiet)
+    @settings(verbosity=Verbosity.quiet, database=None)
     def oops(s):
         assume(s not in seen)
         seen.add(s)

diff --git a/hypothesis-python/tests/cover/test_replay_logic.py b/hypothesis-python/tests/cover/test_replay_logic.py
@@ -0,0 +1,145 @@
+# This file is part of Hypothesis, which may be found at
+# https://github.com/HypothesisWorks/hypothesis/
+#
+# Copyright the Hypothesis Authors.
+# Individual contributors are listed in AUTHORS.rst and the git log.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
+import pytest
+
+from hypothesis import given, settings, strategies as st
+from hypothesis.database import InMemoryExampleDatabase
+from hypothesis.internal.compat import ExceptionGroup
+
+
+def test_does_not_shrink_on_replay():
+    database = InMemoryExampleDatabase()
+
+    call_count = 0
+
+    is_first = True
+    last = None
+
+    @settings(
+        database=database,
+        report_multiple_bugs=False,
+        derandomize=False,
+        max_examples=1000,
+    )
+    @given(st.lists(st.integers(), unique=True, min_size=3))
+    def test(ls):
+        nonlocal call_count, is_first, last
+        if is_first and last is not None:
+            assert ls == last
+        is_first = False
+        last = ls
+        call_count += 1
+        raise AssertionError
+
+    with pytest.raises(AssertionError):
+        test()
+
+    assert last is not None
+
+    call_count = 0
+    is_first = True
+
+    with pytest.raises(AssertionError):
+        test()
+
+    assert call_count == 2
+
+
+def test_does_not_shrink_on_replay_with_multiple_bugs():
+    database = InMemoryExampleDatabase()
+
+    call_count = 0
+
+    tombstone = 1000093
+
+    @settings(
+        database=database,
+        report_multiple_bugs=True,
+        derandomize=False,
+        max_examples=1000,
+    )
+    @given(st.integers())
+    def test(i):
+        nonlocal call_count
+        call_count += 1
+        if i > tombstone:
+            raise AssertionError
+        elif i == tombstone:
+            raise AssertionError
+
+    with pytest.raises(ExceptionGroup):
+        test()
+
+    call_count = 0
+
+    with pytest.raises(ExceptionGroup):
+        test()
+
+    assert call_count == 4
+
+
+def test_will_always_shrink_if_previous_example_does_not_replay():
+    database = InMemoryExampleDatabase()
+
+    good = set()
+    last = None
+
+    @settings(
+        database=database,
+        report_multiple_bugs=True,
+        derandomize=False,
+        max_examples=1000,
+    )
+    @given(st.integers(min_value=0))
+    def test(i):
+        nonlocal last
+        if i not in good:
+            last = i
+            raise AssertionError
+
+    for i in range(20):
+        with pytest.raises(AssertionError):
+            test()
+        assert last == i
+        good.add(last)
+
+
+def test_will_shrink_if_the_previous_example_does_not_look_right():
+    database = InMemoryExampleDatabase()
+
+    last = None
+
+    first_test = True
+
+    @settings(database=database, report_multiple_bugs=True, derandomize=False)
+    @given(st.data())
+    def test(data):
+        nonlocal last
+        m = data.draw(st.integers())
+        last = m
+        if first_test:
+            data.draw(st.integers())
+            assert m < 10000
+        else:
+            raise AssertionError
+
+    with pytest.raises(AssertionError):
+        test()
+
+    assert last is not None
+    assert last > 0
+
+    first_test = False
+
+    with pytest.raises(AssertionError):
+        test()
+
+    assert last == 0
diff --git a/hypothesis-python/tests/cover/test_statistical_events.py b/hypothesis-python/tests/cover/test_statistical_events.py
@@ -237,7 +237,7 @@ def do(self, item):
 
 
 def test_statistics_for_threshold_problem():
-    @settings(max_examples=100)
+    @settings(max_examples=100, database=None)
     @given(st.floats(min_value=0, allow_infinity=False))
     def threshold(error):
         target(error, label="error")