Skip to content

Commit

Permalink
Mark expected failures under crosshair
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD committed Oct 10, 2024
1 parent e455d5d commit 049dc2f
Show file tree
Hide file tree
Showing 33 changed files with 163 additions and 36 deletions.
34 changes: 34 additions & 0 deletions hypothesis-python/tests/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# obtain one at https://mozilla.org/MPL/2.0/.

import contextlib
import enum
import sys
import warnings
from io import StringIO
Expand Down Expand Up @@ -249,3 +250,36 @@ def capture_observations():
# config option, so *linking against* something built this way can break us.
# Everything is terrible
PYTHON_FTZ = next_down(sys.float_info.min) == 0.0


class Why(enum.Enum):
# Use an enum here so it's easier to find and/or exclude some cases later

# things we want to fix
flaky_replay = "Inconsistent results from replaying a failing test..."
symbolic_outside_context = "CrosshairInternal error (using value outside context)"
floats = "crosshair doesn't reason about signed zero (and other edge cases?)"
no_unsatisfiable = "doesn't raise Unsatisfiable for some reason"

# things that are basically fine to leave alone

# nested_given: https://github.com/pschanely/hypothesis-crosshair/issues/11
nested_given = "nested @given decorators don't work with crosshair"
other = "reasons not elsewhere categorized"


def xfail_on_crosshair(why: Why, /, *, strict=True, as_marks=False):
try:
import pytest
except ImportError:
return lambda fn: fn

current_backend = settings.get_profile(settings._current_profile).backend
kw = {
"strict": strict,
"reason": f"Expected failure due to: {why.value}",
"condition": current_backend == "crosshair",
}
if as_marks: # for use with pytest.param(..., marks=xfail_on_crosshair())
return (pytest.mark.xf_crosshair, pytest.mark.xfail(**kw))
return lambda fn: pytest.mark.xf_crosshair(pytest.mark.xfail(**kw)(fn))
1 change: 1 addition & 0 deletions hypothesis-python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def pytest_configure(config):
"markers",
"xp_min_version(api_version): run when greater or equal to api_version",
)
config.addinivalue_line("markers", "xf_crosshair: selection for xfailing symbolics")


def pytest_addoption(parser):
Expand Down
3 changes: 2 additions & 1 deletion hypothesis-python/tests/cover/test_cache_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from hypothesis.errors import InvalidArgument
from hypothesis.internal.cache import GenericCache, LRUCache, LRUReusedCache

from tests.common.utils import skipif_emscripten
from tests.common.utils import Why, skipif_emscripten, xfail_on_crosshair


class LRUCacheAlternative(GenericCache):
Expand Down Expand Up @@ -116,6 +116,7 @@ def test_behaves_like_a_dict_with_losses(implementation, writes, size):
assert len(target) <= min(len(model), size)


@xfail_on_crosshair(Why.symbolic_outside_context)
@settings(
suppress_health_check={HealthCheck.too_slow}
| set(settings.get_profile(settings._current_profile).suppress_health_check),
Expand Down
4 changes: 4 additions & 0 deletions hypothesis-python/tests/cover/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
from hypothesis.database import InMemoryExampleDatabase
from hypothesis.errors import InvalidArgument, NoSuchExample, Unsatisfiable

from tests.common.utils import Why, xfail_on_crosshair


@xfail_on_crosshair(Why.other)
def test_stops_after_max_examples_if_satisfying():
tracker = []

Expand All @@ -33,6 +36,7 @@ def track(x):
assert len(tracker) == max_examples


@xfail_on_crosshair(Why.symbolic_outside_context)
def test_stops_after_ten_times_max_examples_if_not_satisfying():
count = [0]

Expand Down
11 changes: 7 additions & 4 deletions hypothesis-python/tests/cover/test_filter_rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from hypothesis.strategies._internal.strings import BytesStrategy, TextStrategy

from tests.common.debug import check_can_generate_examples
from tests.common.utils import fails_with
from tests.common.utils import Why, fails_with, xfail_on_crosshair

A_FEW = 15 # speed up massively-parametrized tests

Expand Down Expand Up @@ -374,9 +374,11 @@ def test_isidentifier_filter_properly_rewritten(al, data):
assert example.isidentifier()


@pytest.mark.parametrize("al", ["¥¦§©"])
def test_isidentifer_filter_unsatisfiable(al):
fs = st.text(alphabet=al).filter(str.isidentifier)
@xfail_on_crosshair(Why.no_unsatisfiable) # maybe a bug?
def test_isidentifer_filter_unsatisfiable():
alphabet = "¥¦§©"
assert not any(f"_{c}".isidentifier() for c in alphabet)
fs = st.text(alphabet=alphabet).filter(str.isidentifier)
with pytest.raises(Unsatisfiable):
check_can_generate_examples(fs)

Expand Down Expand Up @@ -571,6 +573,7 @@ def test_filter_rewriting_lambda_len_unique_elements(
assert predicate(value)


@xfail_on_crosshair(Why.no_unsatisfiable)
@pytest.mark.parametrize(
"predicate",
[
Expand Down
3 changes: 3 additions & 0 deletions hypothesis-python/tests/cover/test_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@

from hypothesis import Phase, find, settings, strategies as st

from tests.common.utils import Why, xfail_on_crosshair


@xfail_on_crosshair(Why.symbolic_outside_context)
def test_find_uses_provided_random():
prev = None

Expand Down
6 changes: 5 additions & 1 deletion hypothesis-python/tests/cover/test_flakiness.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
from hypothesis.internal.scrutineer import Tracer
from hypothesis.strategies import booleans, composite, integers, lists, random_module

from tests.common.utils import no_shrink
from tests.common.utils import Why, no_shrink, xfail_on_crosshair


class Nope(Exception):
pass


@xfail_on_crosshair(Why.symbolic_outside_context)
def test_fails_only_once_is_flaky():
first_call = [True]

Expand All @@ -40,6 +41,7 @@ def rude(x):
assert isinstance(exceptions[0], Nope)


@xfail_on_crosshair(Why.symbolic_outside_context)
def test_gives_flaky_error_if_assumption_is_flaky():
seen = set()

Expand Down Expand Up @@ -78,6 +80,7 @@ def test(x):
assert isinstance(exceptions[0], ZeroDivisionError)


@xfail_on_crosshair(Why.symbolic_outside_context)
def test_does_not_attempt_to_shrink_flaky_errors():
values = []

Expand Down Expand Up @@ -109,6 +112,7 @@ def single_bool_lists(draw):
return result


@xfail_on_crosshair(Why.nested_given)
@example([True, False, False, False], [3], None)
@example([False, True, False, False], [3], None)
@example([False, False, True, False], [3], None)
Expand Down
3 changes: 3 additions & 0 deletions hypothesis-python/tests/cover/test_float_nastiness.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)

from tests.common.debug import find_any, minimal
from tests.common.utils import Why, xfail_on_crosshair

try:
import numpy
Expand Down Expand Up @@ -66,6 +67,7 @@ def test_does_not_generate_negative_if_right_boundary_is_positive(x):
assert math.copysign(1, x) == 1


@xfail_on_crosshair(Why.floats)
@given(st.floats(-1.0, -0.0))
def test_does_not_generate_positive_if_right_boundary_is_negative(x):
assert math.copysign(1, x) == -1
Expand All @@ -76,6 +78,7 @@ def test_half_bounded_generates_zero():
find_any(st.floats(max_value=1.0), lambda x: x == 0.0)


@xfail_on_crosshair(Why.floats)
@given(st.floats(max_value=-0.0))
def test_half_bounded_respects_sign_of_upper_bound(x):
assert math.copysign(1, x) == -1
Expand Down
14 changes: 6 additions & 8 deletions hypothesis-python/tests/cover/test_given_error_conditions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,14 @@
from hypothesis.errors import InvalidArgument, Unsatisfiable
from hypothesis.strategies import booleans, integers, nothing

from tests.common.utils import fails_with
from tests.common.utils import Why, fails_with, xfail_on_crosshair


def test_raises_unsatisfiable_if_all_false_in_finite_set():
@given(booleans())
def test_assume_false(x):
reject()

with pytest.raises(Unsatisfiable):
test_assume_false()
@xfail_on_crosshair(Why.no_unsatisfiable)
@fails_with(Unsatisfiable)
@given(booleans())
def test_raises_unsatisfiable_if_all_false_in_finite_set(x):
reject()


def test_does_not_raise_unsatisfiable_if_some_false_in_finite_set():
Expand Down
3 changes: 2 additions & 1 deletion hypothesis-python/tests/cover/test_interactive_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from hypothesis.internal.compat import WINDOWS

from tests.common.debug import find_any
from tests.common.utils import fails_with, skipif_emscripten
from tests.common.utils import Why, fails_with, skipif_emscripten, xfail_on_crosshair

pytest_plugins = "pytester"

Expand All @@ -45,6 +45,7 @@ def test_exception_in_compare_can_still_have_example():
st.one_of(st.none().map(lambda n: Decimal("snan")), st.just(Decimal(0))).example()


@xfail_on_crosshair(Why.symbolic_outside_context)
def test_does_not_always_give_the_same_example():
s = st.integers()
assert len({s.example() for _ in range(100)}) >= 10
Expand Down
2 changes: 1 addition & 1 deletion hypothesis-python/tests/cover/test_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
find_any,
minimal,
)
from tests.common.utils import fails_with, temp_registered
from tests.common.utils import Why, fails_with, temp_registered, xfail_on_crosshair

sentinel = object()
BUILTIN_TYPES = tuple(
Expand Down
3 changes: 2 additions & 1 deletion hypothesis-python/tests/cover/test_reproduce_failure.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from hypothesis.core import decode_failure, encode_failure
from hypothesis.errors import DidNotReproduce, InvalidArgument, UnsatisfiedAssumption

from tests.common.utils import capture_out, no_shrink
from tests.common.utils import Why, capture_out, no_shrink, xfail_on_crosshair


@example(bytes(20)) # shorter compressed
Expand Down Expand Up @@ -118,6 +118,7 @@ def test(x):
test()


@xfail_on_crosshair(Why.symbolic_outside_context)
def test_prints_reproduction_if_requested():
failing_example = [None]

Expand Down
5 changes: 4 additions & 1 deletion hypothesis-python/tests/cover/test_sampled_from.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
assert_simple_property,
check_can_generate_examples,
)
from tests.common.utils import fails_with
from tests.common.utils import Why, fails_with, xfail_on_crosshair

an_enum = enum.Enum("A", "a b c")
a_flag = enum.Flag("A", "a b c")
Expand Down Expand Up @@ -69,6 +69,7 @@ def test_unsat_filtered_sampling(x):
raise AssertionError


@xfail_on_crosshair(Why.no_unsatisfiable)
@fails_with(Unsatisfiable)
@settings(suppress_health_check=[])
@given(sampled_from(range(2)).filter(lambda x: x < 0))
Expand Down Expand Up @@ -144,12 +145,14 @@ def test_efficient_sets_of_samples_with_chained_transformations_slow_path(x):
assert x == {x * 2 for x in range(20) if x % 3}


@xfail_on_crosshair(Why.no_unsatisfiable)
@fails_with(Unsatisfiable)
@given(FilteredStrategy(st.sampled_from([None, False, ""]), conditions=(bool,)))
def test_unsatisfiable_explicit_filteredstrategy_sampled(x):
raise AssertionError("Unreachable because there are no valid examples")


@xfail_on_crosshair(Why.no_unsatisfiable)
@fails_with(Unsatisfiable)
@given(FilteredStrategy(st.none(), conditions=(bool,)))
def test_unsatisfiable_explicit_filteredstrategy_just(x):
Expand Down
3 changes: 2 additions & 1 deletion hypothesis-python/tests/cover/test_searchstrategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from hypothesis.strategies._internal.utils import to_jsonable

from tests.common.debug import assert_simple_property, check_can_generate_examples
from tests.common.utils import checks_deprecated_behaviour
from tests.common.utils import Why, checks_deprecated_behaviour, xfail_on_crosshair


def test_or_errors_when_given_non_strategy():
Expand Down Expand Up @@ -69,6 +69,7 @@ def test_can_map():
assert_simple_property(s, lambda v: v == "foo")


@xfail_on_crosshair(Why.no_unsatisfiable)
def test_example_raises_unsatisfiable_when_too_filtered():
with pytest.raises(Unsatisfiable):
check_can_generate_examples(integers().filter(lambda x: False))
Expand Down
3 changes: 3 additions & 0 deletions hypothesis-python/tests/cover/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@
from hypothesis.utils.conventions import not_set

from tests.common.utils import (
Why,
checks_deprecated_behaviour,
counts_calls,
fails_with,
skipif_emscripten,
validate_deprecation,
xfail_on_crosshair,
)


Expand Down Expand Up @@ -297,6 +299,7 @@ def test_database_is_reference_preserved():
assert s.database is s.database


@xfail_on_crosshair(Why.other)
@settings(verbosity=Verbosity.verbose)
@example(x=99)
@given(st.integers())
Expand Down
9 changes: 8 additions & 1 deletion hypothesis-python/tests/cover/test_stateful.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,12 @@
)
from hypothesis.strategies import binary, data, integers, just, lists

from tests.common.utils import capture_out, validate_deprecation
from tests.common.utils import (
Why,
capture_out,
validate_deprecation,
xfail_on_crosshair,
)
from tests.nocover.test_stateful import DepthMachine

NO_BLOB_SETTINGS = Settings(print_blob=False, phases=tuple(Phase)[:-1])
Expand Down Expand Up @@ -1175,6 +1180,8 @@ def teardown(self):
assert self.a >= 2


# Replay overruns after we trigger a crosshair.util.IgnoreAttempt exception for n=3
@xfail_on_crosshair(Why.other)
def test_min_steps_argument():
# You must pass a non-negative integer...
for n_steps in (-1, "nan", 5.0):
Expand Down
5 changes: 5 additions & 0 deletions hypothesis-python/tests/cover/test_testdecorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@
)

from tests.common.utils import (
Why,
assert_falsifying_output,
capture_out,
fails,
fails_with,
no_shrink,
raises,
skipif_emscripten,
xfail_on_crosshair,
)

# This particular test file is run under both pytest and nose, so it can't
Expand Down Expand Up @@ -304,6 +306,7 @@ def test_has_ascii(x):
assert any(c in ascii_characters for c in x)


@xfail_on_crosshair(Why.symbolic_outside_context, strict=False)
def test_can_derandomize():
values = []

Expand Down Expand Up @@ -393,6 +396,7 @@ def test_mixed_text(x):
assert set(x).issubset(set("abcdefg"))


@xfail_on_crosshair(Why.other, strict=False) # runs ~five failing examples
def test_when_set_to_no_simplifies_runs_failing_example_twice():
failing = []

Expand Down Expand Up @@ -478,6 +482,7 @@ def test_empty_lists(xs):
assert xs == []


@xfail_on_crosshair(Why.other, strict=False)
def test_given_usable_inline_on_lambdas():
xs = []
given(booleans())(lambda x: xs.append(x))()
Expand Down
Loading

0 comments on commit 049dc2f

Please sign in to comment.