From c7816d04324fb77f27b1f15b0a52deca197397e6 Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Thu, 10 Oct 2024 10:25:55 -0700 Subject: [PATCH] Mark failures for crosshair to fix? --- hypothesis-python/tests/common/utils.py | 3 ++- hypothesis-python/tests/cover/test_datetimes.py | 2 ++ hypothesis-python/tests/cover/test_filter_rewriting.py | 1 + hypothesis-python/tests/cover/test_lookup.py | 1 + hypothesis-python/tests/cover/test_reproduce_failure.py | 1 + hypothesis-python/tests/cover/test_sampled_from.py | 1 + hypothesis-python/tests/cover/test_stateful.py | 1 + hypothesis-python/tests/cover/test_targeting.py | 3 +++ hypothesis-python/tests/cover/test_testdecorators.py | 3 +++ hypothesis-python/tests/datetime/test_dateutil_timezones.py | 2 ++ hypothesis-python/tests/datetime/test_pytz_timezones.py | 1 + hypothesis-python/tests/datetime/test_zoneinfo_timezones.py | 2 ++ hypothesis-python/tests/nocover/test_characters.py | 3 +++ hypothesis-python/tests/nocover/test_database_usage.py | 1 + hypothesis-python/tests/nocover/test_duplication.py | 3 +++ hypothesis-python/tests/nocover/test_flatmap.py | 3 +++ hypothesis-python/tests/nocover/test_floating.py | 4 +++- hypothesis-python/tests/nocover/test_recursive.py | 3 ++- hypothesis-python/tests/nocover/test_regressions.py | 5 +++++ hypothesis-python/tests/nocover/test_sampled_from.py | 3 ++- hypothesis-python/tests/nocover/test_simple_numbers.py | 3 +++ 21 files changed, 45 insertions(+), 4 deletions(-) diff --git a/hypothesis-python/tests/common/utils.py b/hypothesis-python/tests/common/utils.py index 9557c98e90..208ef0e2c7 100644 --- a/hypothesis-python/tests/common/utils.py +++ b/hypothesis-python/tests/common/utils.py @@ -265,6 +265,7 @@ class Why(enum.Enum): # nested_given: https://github.com/pschanely/hypothesis-crosshair/issues/11 nested_given = "nested @given decorators don't work with crosshair" + undiscovered = "crosshair may not find the failing input" other = "reasons not elsewhere categorized" @@ -276,7 +277,7 @@ def xfail_on_crosshair(why: Why, /, *, strict=True, as_marks=False): current_backend = settings.get_profile(settings._current_profile).backend kw = { - "strict": strict, + "strict": strict and why != Why.undiscovered, "reason": f"Expected failure due to: {why.value}", "condition": current_backend == "crosshair", } diff --git a/hypothesis-python/tests/cover/test_datetimes.py b/hypothesis-python/tests/cover/test_datetimes.py index 51677c7df2..a4c3aded1b 100644 --- a/hypothesis-python/tests/cover/test_datetimes.py +++ b/hypothesis-python/tests/cover/test_datetimes.py @@ -16,6 +16,7 @@ from hypothesis.strategies import dates, datetimes, timedeltas, times from tests.common.debug import assert_simple_property, find_any, minimal +from tests.common.utils import Why, xfail_on_crosshair def test_can_find_positive_delta(): @@ -104,6 +105,7 @@ def test_single_date(val): assert find_any(dates(val, val)) is val +@xfail_on_crosshair(Why.undiscovered) def test_can_find_midnight(): find_any(times(), lambda x: x.hour == x.minute == x.second == 0) diff --git a/hypothesis-python/tests/cover/test_filter_rewriting.py b/hypothesis-python/tests/cover/test_filter_rewriting.py index 33a419142b..f38b4a3649 100644 --- a/hypothesis-python/tests/cover/test_filter_rewriting.py +++ b/hypothesis-python/tests/cover/test_filter_rewriting.py @@ -181,6 +181,7 @@ def test_rewrite_unsatisfiable_filter(s, pred): assert s.filter(pred).is_empty +@xfail_on_crosshair(Why.undiscovered) @pytest.mark.parametrize( "pred", [ diff --git a/hypothesis-python/tests/cover/test_lookup.py b/hypothesis-python/tests/cover/test_lookup.py index 3972e49ee7..2020fed0bf 100644 --- a/hypothesis-python/tests/cover/test_lookup.py +++ b/hypothesis-python/tests/cover/test_lookup.py @@ -876,6 +876,7 @@ def test_supportsop_types_support_protocol(protocol, data): assert issubclass(type(value), protocol) +@xfail_on_crosshair(Why.undiscovered) @pytest.mark.parametrize("restrict_custom_strategy", [True, False]) def test_generic_aliases_can_be_conditionally_resolved_by_registered_function( restrict_custom_strategy, diff --git a/hypothesis-python/tests/cover/test_reproduce_failure.py b/hypothesis-python/tests/cover/test_reproduce_failure.py index 18e5f85894..58a0ef43f9 100644 --- a/hypothesis-python/tests/cover/test_reproduce_failure.py +++ b/hypothesis-python/tests/cover/test_reproduce_failure.py @@ -168,6 +168,7 @@ def test(data): assert "@reproduce_failure" not in o.getvalue() +@xfail_on_crosshair(Why.undiscovered) def test_does_not_print_reproduction_for_large_data_examples_by_default(): @settings(phases=no_shrink, print_blob=False) @given(st.data()) diff --git a/hypothesis-python/tests/cover/test_sampled_from.py b/hypothesis-python/tests/cover/test_sampled_from.py index 11450ba7e4..c6be4a49cf 100644 --- a/hypothesis-python/tests/cover/test_sampled_from.py +++ b/hypothesis-python/tests/cover/test_sampled_from.py @@ -138,6 +138,7 @@ def stupid_sampled_sets(draw): return result +@xfail_on_crosshair(Why.undiscovered) @given(stupid_sampled_sets()) def test_efficient_sets_of_samples_with_chained_transformations_slow_path(x): # This deliberately exercises the standard filtering logic without going diff --git a/hypothesis-python/tests/cover/test_stateful.py b/hypothesis-python/tests/cover/test_stateful.py index b02fcb1f90..4ac86660f6 100644 --- a/hypothesis-python/tests/cover/test_stateful.py +++ b/hypothesis-python/tests/cover/test_stateful.py @@ -1277,6 +1277,7 @@ def fail_fast(self, a1, a2, a3, b1, b2, b3): ) +@xfail_on_crosshair(Why.undiscovered) def test_multiple_common_targets(): class Machine(RuleBasedStateMachine): a = Bundle("a") diff --git a/hypothesis-python/tests/cover/test_targeting.py b/hypothesis-python/tests/cover/test_targeting.py index 67784f06b2..9fc70c131c 100644 --- a/hypothesis-python/tests/cover/test_targeting.py +++ b/hypothesis-python/tests/cover/test_targeting.py @@ -16,6 +16,8 @@ from hypothesis.control import current_build_context from hypothesis.errors import InvalidArgument +from tests.common.utils import Why, xfail_on_crosshair + @example(0.0, "this covers the branch where context.data is None") @given( @@ -100,6 +102,7 @@ def test_cannot_target_same_label_twice(_): target(1.0, label="label") +@xfail_on_crosshair(Why.undiscovered) @given(st.none()) def test_cannot_target_default_label_twice(_): target(0.0) diff --git a/hypothesis-python/tests/cover/test_testdecorators.py b/hypothesis-python/tests/cover/test_testdecorators.py index f225f78f1a..812919de98 100644 --- a/hypothesis-python/tests/cover/test_testdecorators.py +++ b/hypothesis-python/tests/cover/test_testdecorators.py @@ -139,6 +139,7 @@ def test_can_be_given_keyword_args(x, name): assert len(name) < x +@xfail_on_crosshair(Why.undiscovered) @fails @given(one_of(floats(), booleans()), one_of(floats(), booleans())) def test_one_of_produces_different_values(x, y): @@ -185,6 +186,7 @@ def test_removing_an_element_from_a_unique_list(xs, y): assert y not in xs +@xfail_on_crosshair(Why.undiscovered) @fails @given(lists(integers(), min_size=2), data()) def test_removing_an_element_from_a_non_unique_list(xs, data): @@ -208,6 +210,7 @@ def test_can_mix_sampling_with_generating(x, y): assert type(x) == type(y) +@xfail_on_crosshair(Why.undiscovered) @fails @given(frozensets(integers())) def test_can_find_large_sum_frozenset(xs): diff --git a/hypothesis-python/tests/datetime/test_dateutil_timezones.py b/hypothesis-python/tests/datetime/test_dateutil_timezones.py index 933b574a91..d7849924e5 100644 --- a/hypothesis-python/tests/datetime/test_dateutil_timezones.py +++ b/hypothesis-python/tests/datetime/test_dateutil_timezones.py @@ -109,6 +109,7 @@ def test_dateutil_exists_our_not_exists_are_inverse(value): assert datetime_does_not_exist(value) == (not tz.datetime_exists(value)) +@xfail_on_crosshair(Why.undiscovered) def test_datetimes_can_exclude_imaginary(): find_any( datetimes(**DAY_WITH_IMAGINARY_HOUR_KWARGS, allow_imaginary=True), @@ -120,6 +121,7 @@ def test_datetimes_can_exclude_imaginary(): ) +@xfail_on_crosshair(Why.undiscovered) @fails_with(FailedHealthCheck) @given( datetimes( diff --git a/hypothesis-python/tests/datetime/test_pytz_timezones.py b/hypothesis-python/tests/datetime/test_pytz_timezones.py index b48987f0e9..5ce5b38b72 100644 --- a/hypothesis-python/tests/datetime/test_pytz_timezones.py +++ b/hypothesis-python/tests/datetime/test_pytz_timezones.py @@ -105,6 +105,7 @@ def test_time_bounds_must_be_naive(name, val): times(**{name: val}).validate() +@xfail_on_crosshair(Why.undiscovered) @pytest.mark.parametrize( "bound", [ diff --git a/hypothesis-python/tests/datetime/test_zoneinfo_timezones.py b/hypothesis-python/tests/datetime/test_zoneinfo_timezones.py index 1772321cbb..6c2dbe9cb8 100644 --- a/hypothesis-python/tests/datetime/test_zoneinfo_timezones.py +++ b/hypothesis-python/tests/datetime/test_zoneinfo_timezones.py @@ -17,12 +17,14 @@ from hypothesis.errors import InvalidArgument from tests.common.debug import assert_no_examples, find_any, minimal +from tests.common.utils import Why, xfail_on_crosshair def test_utc_is_minimal(): assert minimal(st.timezones()) is zoneinfo.ZoneInfo("UTC") +@xfail_on_crosshair(Why.undiscovered) def test_can_generate_non_utc(): find_any( st.datetimes(timezones=st.timezones()).filter(lambda d: d.tzinfo.key != "UTC") diff --git a/hypothesis-python/tests/nocover/test_characters.py b/hypothesis-python/tests/nocover/test_characters.py index f938f8d162..48f9c69770 100644 --- a/hypothesis-python/tests/nocover/test_characters.py +++ b/hypothesis-python/tests/nocover/test_characters.py @@ -15,6 +15,8 @@ from hypothesis import given, settings, strategies as st +from tests.common.utils import Why, xfail_on_crosshair + IDENTIFIER_CHARS = string.ascii_letters + string.digits + "_" @@ -23,6 +25,7 @@ def test_large_blacklist(c): assert c not in IDENTIFIER_CHARS +@xfail_on_crosshair(Why.symbolic_outside_context) # seems like a crosshair bug here @given(st.data()) def test_arbitrary_blacklist(data): blacklist = data.draw(st.text(st.characters(max_codepoint=1000), min_size=1)) diff --git a/hypothesis-python/tests/nocover/test_database_usage.py b/hypothesis-python/tests/nocover/test_database_usage.py index ba79bb7b9d..5548ba0564 100644 --- a/hypothesis-python/tests/nocover/test_database_usage.py +++ b/hypothesis-python/tests/nocover/test_database_usage.py @@ -32,6 +32,7 @@ def has_a_non_zero_byte(x): return any(bytes(x)) +@xfail_on_crosshair(Why.undiscovered) def test_saves_incremental_steps_in_database(): key = b"a database key" database = InMemoryExampleDatabase() diff --git a/hypothesis-python/tests/nocover/test_duplication.py b/hypothesis-python/tests/nocover/test_duplication.py index ff89a9978f..566abefda9 100644 --- a/hypothesis-python/tests/nocover/test_duplication.py +++ b/hypothesis-python/tests/nocover/test_duplication.py @@ -15,6 +15,8 @@ from hypothesis import given, settings from hypothesis.strategies._internal import SearchStrategy +from tests.common.utils import Why, xfail_on_crosshair + class Blocks(SearchStrategy): def __init__(self, n): @@ -37,6 +39,7 @@ def test(b): assert set(counts.values()) == {1} +@xfail_on_crosshair(Why.other, strict=False) # CrosshairInternal for n>0 @pytest.mark.parametrize("n", range(1, 5)) def test_mostly_does_not_duplicate_blocks_even_when_failing(n): counts = Counter() diff --git a/hypothesis-python/tests/nocover/test_flatmap.py b/hypothesis-python/tests/nocover/test_flatmap.py index 239d256a8c..aa238f1fc9 100644 --- a/hypothesis-python/tests/nocover/test_flatmap.py +++ b/hypothesis-python/tests/nocover/test_flatmap.py @@ -26,6 +26,7 @@ ) from tests.common.debug import find_any, minimal +from tests.common.utils import Why, xfail_on_crosshair ConstantLists = integers().flatmap(lambda i: lists(just(i))) @@ -97,6 +98,7 @@ def criterion(ls): assert set(result) == {False, ""} +@xfail_on_crosshair(Why.undiscovered) # for n >= 8 at least @pytest.mark.parametrize("n", range(1, 10)) def test_can_shrink_through_a_binding(n): bool_lists = integers(0, 100).flatmap( @@ -105,6 +107,7 @@ def test_can_shrink_through_a_binding(n): assert minimal(bool_lists, lambda x: x.count(True) >= n) == [True] * n +@xfail_on_crosshair(Why.undiscovered) # for n >= 8 at least @pytest.mark.parametrize("n", range(1, 10)) def test_can_delete_in_middle_of_a_binding(n): bool_lists = integers(1, 100).flatmap( diff --git a/hypothesis-python/tests/nocover/test_floating.py b/hypothesis-python/tests/nocover/test_floating.py index fd679c3429..77ebfab28a 100644 --- a/hypothesis-python/tests/nocover/test_floating.py +++ b/hypothesis-python/tests/nocover/test_floating.py @@ -20,7 +20,7 @@ from hypothesis.strategies import data, floats, lists from tests.common.debug import find_any -from tests.common.utils import fails +from tests.common.utils import Why, fails, xfail_on_crosshair TRY_HARDER = settings( max_examples=1000, suppress_health_check=[HealthCheck.filter_too_much] @@ -93,6 +93,7 @@ def test_is_not_int(x): assert x != int(x) +@xfail_on_crosshair(Why.undiscovered) @fails @given(floats()) @TRY_HARDER @@ -128,6 +129,7 @@ def test_floats_are_in_range(x, y, data): assert x <= t <= y +@xfail_on_crosshair(Why.undiscovered) @pytest.mark.parametrize("neg", [False, True]) @pytest.mark.parametrize("snan", [False, True]) def test_can_find_negative_and_signaling_nans(neg, snan): diff --git a/hypothesis-python/tests/nocover/test_recursive.py b/hypothesis-python/tests/nocover/test_recursive.py index b47ec10b7e..c12a2ae105 100644 --- a/hypothesis-python/tests/nocover/test_recursive.py +++ b/hypothesis-python/tests/nocover/test_recursive.py @@ -17,7 +17,7 @@ from hypothesis import HealthCheck, given, settings, strategies as st from tests.common.debug import find_any, minimal -from tests.common.utils import flaky +from tests.common.utils import Why, flaky, xfail_on_crosshair def test_can_generate_with_large_branching(): @@ -79,6 +79,7 @@ def test_drawing_many_near_boundary(): assert len(ls) == size +@xfail_on_crosshair(Why.undiscovered) def test_can_use_recursive_data_in_sets(): nested_sets = st.recursive(st.booleans(), st.frozensets, max_leaves=3) find_any(nested_sets, settings=settings(deadline=None)) diff --git a/hypothesis-python/tests/nocover/test_regressions.py b/hypothesis-python/tests/nocover/test_regressions.py index 980be13891..0870daf75c 100644 --- a/hypothesis-python/tests/nocover/test_regressions.py +++ b/hypothesis-python/tests/nocover/test_regressions.py @@ -14,7 +14,10 @@ from hypothesis._settings import note_deprecation from hypothesis.errors import HypothesisDeprecationWarning +from tests.common.utils import Why, xfail_on_crosshair + +@xfail_on_crosshair(Why.other) def test_note_deprecation_blames_right_code_issue_652(): msg = "this is an arbitrary deprecation warning message" @@ -58,6 +61,8 @@ def test_unique_floats_with_nan_is_not_flaky_3926(ls): # this will take a while to find the regression, but will eventually trigger it. # min_value=0 is critical to trigger the probing behavior which exhausts our buffer. +# https://github.com/pschanely/CrossHair/issues/285 for an upstream fix. +@xfail_on_crosshair(Why.other, strict=False) @given(st.integers(min_value=0, max_value=1 << 25_000)) def test_overrun_during_datatree_simulation_3874(n): pass diff --git a/hypothesis-python/tests/nocover/test_sampled_from.py b/hypothesis-python/tests/nocover/test_sampled_from.py index f72beef0ab..bf2bcb2389 100644 --- a/hypothesis-python/tests/nocover/test_sampled_from.py +++ b/hypothesis-python/tests/nocover/test_sampled_from.py @@ -21,7 +21,7 @@ from hypothesis.strategies._internal.strategies import SampledFromStrategy from tests.common.debug import find_any, minimal -from tests.common.utils import fails_with +from tests.common.utils import Why, fails_with, xfail_on_crosshair @pytest.mark.parametrize("size", [100, 10**5, 10**6, 2**25]) @@ -101,6 +101,7 @@ def test_flag_enum_repr_uses_class_not_a_list(): assert lazy_repr == "sampled_from(tests.nocover.test_sampled_from.AFlag)" +@xfail_on_crosshair(Why.undiscovered) def test_exhaustive_flags(): # Generate powerset of flag combinations. There are only 2^3 of them, so # we can reasonably expect that they are all are found. diff --git a/hypothesis-python/tests/nocover/test_simple_numbers.py b/hypothesis-python/tests/nocover/test_simple_numbers.py index 8b66aaedd2..29cd20a2da 100644 --- a/hypothesis-python/tests/nocover/test_simple_numbers.py +++ b/hypothesis-python/tests/nocover/test_simple_numbers.py @@ -17,6 +17,7 @@ from hypothesis.strategies import floats, integers, lists from tests.common.debug import minimal +from tests.common.utils import Why, xfail_on_crosshair def test_minimize_negative_int(): @@ -116,6 +117,7 @@ def test_can_minimal_infinite_negative_float(): assert minimal(floats(), lambda x: x < -sys.float_info.max) +@xfail_on_crosshair(Why.undiscovered) # sometimes def test_can_minimal_float_on_boundary_of_representable(): minimal(floats(), lambda x: x + 1 == x and not math.isinf(x)) @@ -153,6 +155,7 @@ def test_minimal_fractional_float(): assert minimal(floats(), lambda x: x >= 1.5) == 2 +@xfail_on_crosshair(Why.undiscovered) def test_minimizes_lists_of_negative_ints_up_to_boundary(): result = minimal( lists(integers(), min_size=10),