Merge pull request NixOS#7954 from obsidiansystems/lang-test-improvem…

…ents Language test improvements
obsidiansystems · Jul 12, 2023 · cafb5e8 · cafb5e8
2 parents 4a880c3 + c704844
commit cafb5e8
Show file tree

Hide file tree

Showing 74 changed files with 762 additions and 36 deletions.
diff --git a/.gitignore b/.gitignore
@@ -95,6 +95,7 @@ perl/Makefile.config
 # /tests/lang/
 /tests/lang/*.out
 /tests/lang/*.out.xml
+/tests/lang/*.err
 /tests/lang/*.ast
 
 /perl/lib/Nix/Config.pm

diff --git a/doc/manual/src/contributing/testing.md b/doc/manual/src/contributing/testing.md
@@ -86,6 +86,31 @@ GNU gdb (GDB) 12.1
 One can debug the Nix invocation in all the usual ways.
 For example, enter `run` to start the Nix invocation.
 
+### Characterization testing
+
+Occasionally, Nix utilizes a technique called [Characterization Testing](https://en.wikipedia.org/wiki/Characterization_test) as part of the functional tests.
+This technique is to include the exact output/behavior of a former version of Nix in a test in order to check that Nix continues to produce the same behavior going forward.
+
+For example, this technique is used for the language tests, to check both the printed final value if evaluation was successful, and any errors and warnings encountered.
+
+It is frequently useful to regenerate the expected output.
+To do that, rerun the failed test with `_NIX_TEST_ACCEPT=1`.
+(At least, this is the convention we've used for `tests/lang.sh`.
+If we add more characterization testing we should always strive to be consistent.)
+
+An interesting situation to document is the case when these tests are "overfitted".
+The language tests are, again, an example of this.
+The expected successful output of evaluation is supposed to be highly stable – we do not intend to make breaking changes to (the stable parts of) the Nix language.
+However, the errors and warnings during evaluation (successful or not) are not stable in this way.
+We are free to change how they are displayed at any time.
+
+It may be surprising that we would test non-normative behavior like diagnostic outputs.
+Diagnostic outputs are indeed not a stable interface, but they still are important to users.
+By recording the expected output, the test suite guards against accidental changes, and ensure the *result* (not just the code that implements it) of the diagnostic code paths are under code review.
+Regressions are caught, and improvements always show up in code review.
+
+To ensure that characterization testing doesn't make it harder to intentionally change these interfaces, there always must be an easy way to regenerate the expected output, as we do with `_NIX_TEST_ACCEPT=1`.
+
 ## Integration tests
 
 The integration tests are defined in the Nix flake under the `hydraJobs.tests` attribute.

diff --git a/tests/dependencies.sh b/tests/dependencies.sh
@@ -15,6 +15,9 @@ if test -n "$dot"; then
     $dot < $TEST_ROOT/graph
 fi
 
+# Test GraphML graph generation
+nix-store -q --graphml "$drvPath" > $TEST_ROOT/graphml
+
 outPath=$(nix-store -rvv "$drvPath") || fail "build failed"
 
 # Test Graphviz graph generation.

diff --git a/tests/lang-test-infra.sh b/tests/lang-test-infra.sh
@@ -0,0 +1,86 @@
+# Test the function for lang.sh
+source common.sh
+
+source lang/framework.sh
+
+# We are testing this, so don't want outside world to affect us.
+unset _NIX_TEST_ACCEPT
+
+# We'll only modify this in subshells so we don't need to reset it.
+badDiff=0
+
+# matches non-empty
+echo Hi! > "$TEST_ROOT/got"
+cp "$TEST_ROOT/got" "$TEST_ROOT/expected"
+(
+  diffAndAcceptInner test "$TEST_ROOT/got" "$TEST_ROOT/expected"
+  (( "$badDiff" == 0 ))
+)
+
+# matches empty, non-existant file is the same as empty file
+echo -n > "$TEST_ROOT/got"
+(
+  diffAndAcceptInner test "$TEST_ROOT/got" "$TEST_ROOT/does-not-exist"
+  (( "$badDiff" == 0 ))
+)
+
+# doesn't matches non-empty, non-existant file is the same as empty file
+echo Hi! > "$TEST_ROOT/got"
+(
+  diffAndAcceptInner test "$TEST_ROOT/got" "$TEST_ROOT/does-not-exist"
+  (( "$badDiff" == 1 ))
+)
+
+# doesn't match, `badDiff` set, file unchanged
+echo Hi! > "$TEST_ROOT/got"
+echo Bye! > "$TEST_ROOT/expected"
+(
+  diffAndAcceptInner test "$TEST_ROOT/got" "$TEST_ROOT/expected"
+  (( "$badDiff" == 1 ))
+)
+[[ "$(echo Bye! )" == $(< "$TEST_ROOT/expected") ]]
+
+# _NIX_TEST_ACCEPT=1 matches non-empty
+echo Hi! > "$TEST_ROOT/got"
+cp "$TEST_ROOT/got" "$TEST_ROOT/expected"
+(
+  _NIX_TEST_ACCEPT=1 diffAndAcceptInner test "$TEST_ROOT/got" "$TEST_ROOT/expected"
+  (( "$badDiff" == 0 ))
+)
+
+# _NIX_TEST_ACCEPT doesn't match, `badDiff=1` set, file changed (was previously non-empty)
+echo Hi! > "$TEST_ROOT/got"
+echo Bye! > "$TEST_ROOT/expected"
+(
+  _NIX_TEST_ACCEPT=1 diffAndAcceptInner test "$TEST_ROOT/got" "$TEST_ROOT/expected"
+  (( "$badDiff" == 1 ))
+)
+[[ "$(echo Hi! )" == $(< "$TEST_ROOT/expected") ]]
+# second time succeeds
+(
+  diffAndAcceptInner test "$TEST_ROOT/got" "$TEST_ROOT/expected"
+  (( "$badDiff" == 0 ))
+)
+
+# _NIX_TEST_ACCEPT matches empty, non-existant file not created
+echo -n > "$TEST_ROOT/got"
+(
+  _NIX_TEST_ACCEPT=1 diffAndAcceptInner test "$TEST_ROOT/got" "$TEST_ROOT/does-not-exists"
+  (( "$badDiff" == 0 ))
+)
+[[ ! -f "$TEST_ROOT/does-not-exist" ]]
+
+# _NIX_TEST_ACCEPT doesn't match, output empty, file deleted
+echo -n > "$TEST_ROOT/got"
+echo Bye! > "$TEST_ROOT/expected"
+badDiff=0
+(
+  _NIX_TEST_ACCEPT=1 diffAndAcceptInner test "$TEST_ROOT/got" "$TEST_ROOT/expected"
+  (( "$badDiff" == 1 ))
+)
+[[ ! -f "$TEST_ROOT/expected" ]]
+# second time succeeds
+(
+  diffAndAcceptInner test "$TEST_ROOT/got" "$TEST_ROOT/expected"
+  (( "$badDiff" == 0 ))
+)
diff --git a/tests/lang.sh b/tests/lang.sh
@@ -1,5 +1,17 @@
 source common.sh
 
+set -o pipefail
+
+source lang/framework.sh
+
+# specialize function a bit
+function diffAndAccept() {
+    local -r testName="$1"
+    local -r got="lang/$testName.$2"
+    local -r expected="lang/$testName.$3"
+    diffAndAcceptInner "$testName" "$got" "$expected"
+}
+
 export TEST_VAR=foo # for eval-okay-getenv.nix
 export NIX_REMOTE=dummy://
 export NIX_STORE_DIR=/nix/store
@@ -20,63 +32,114 @@ nix-instantiate --eval -E 'let x = { repeating = x; tracing = builtins.trace x t
 
 set +x
 
-fail=0
+badDiff=0
+badExitCode=0
 
 for i in lang/parse-fail-*.nix; do
     echo "parsing $i (should fail)";
-    i=$(basename $i .nix)
-    if ! expect 1 nix-instantiate --parse - < lang/$i.nix; then
+    i=$(basename "$i" .nix)
+    if expectStderr 1 nix-instantiate --parse - < "lang/$i.nix" > "lang/$i.err"
+    then
+        diffAndAccept "$i" err err.exp
+    else
         echo "FAIL: $i shouldn't parse"
-        fail=1
+        badExitCode=1
     fi
 done
 
 for i in lang/parse-okay-*.nix; do
     echo "parsing $i (should succeed)";
-    i=$(basename $i .nix)
-    if ! expect 0 nix-instantiate --parse - < lang/$i.nix > lang/$i.out; then
+    i=$(basename "$i" .nix)
+    if
+        expect 0 nix-instantiate --parse - < "lang/$i.nix" \
+            1> >(sed "s!$(pwd)!/pwd!g" > "lang/$i.out") \
+            2> >(sed "s!$(pwd)!/pwd!g" > "lang/$i.err")
+    then
+        diffAndAccept "$i" out exp
+        diffAndAccept "$i" err err.exp
+    else
         echo "FAIL: $i should parse"
-        fail=1
+        badExitCode=1
     fi
 done
 
 for i in lang/eval-fail-*.nix; do
     echo "evaluating $i (should fail)";
-    i=$(basename $i .nix)
-    if ! expect 1 nix-instantiate --eval lang/$i.nix; then
+    i=$(basename "$i" .nix)
+    if
+        expectStderr 1 nix-instantiate --show-trace "lang/$i.nix" \
+            | sed "s!$(pwd)!/pwd!g" > "lang/$i.err"
+    then
+        diffAndAccept "$i" err err.exp
+    else
         echo "FAIL: $i shouldn't evaluate"
-        fail=1
+        badExitCode=1
     fi
 done
 
 for i in lang/eval-okay-*.nix; do
     echo "evaluating $i (should succeed)";
-    i=$(basename $i .nix)
+    i=$(basename "$i" .nix)
 
-    if test -e lang/$i.exp; then
-        flags=
-        if test -e lang/$i.flags; then
-            flags=$(cat lang/$i.flags)
-        fi
-        if ! expect 0 env NIX_PATH=lang/dir3:lang/dir4 HOME=/fake-home nix-instantiate $flags --eval --strict lang/$i.nix > lang/$i.out; then
+    if test -e "lang/$i.exp.xml"; then
+        if expect 0 nix-instantiate --eval --xml --no-location --strict \
+                "lang/$i.nix" > "lang/$i.out.xml"
+        then
+            diffAndAccept "$i" out.xml exp.xml
+        else
             echo "FAIL: $i should evaluate"
-            fail=1
-        elif ! diff <(< lang/$i.out sed -e "s|$(pwd)|/pwd|g") lang/$i.exp; then
-            echo "FAIL: evaluation result of $i not as expected"
-            fail=1
+            badExitCode=1
+        fi
+    elif test ! -e "lang/$i.exp-disabled"; then
+        declare -a flags=()
+        if test -e "lang/$i.flags"; then
+            read -r -a flags < "lang/$i.flags"
         fi
-    fi
 
-    if test -e lang/$i.exp.xml; then
-        if ! expect 0 nix-instantiate --eval --xml --no-location --strict \
-                lang/$i.nix > lang/$i.out.xml; then
+        if
+            expect 0 env \
+                NIX_PATH=lang/dir3:lang/dir4 \
+                HOME=/fake-home \
+                nix-instantiate "${flags[@]}" --eval --strict "lang/$i.nix" \
+                1> "lang/$i.out" \
+                2> "lang/$i.err"
+        then
+            sed -i "s!$(pwd)!/pwd!g" "lang/$i.out" "lang/$i.err"
+            diffAndAccept "$i" out exp
+            diffAndAccept "$i" err err.exp
+        else
             echo "FAIL: $i should evaluate"
-            fail=1
-        elif ! cmp -s lang/$i.out.xml lang/$i.exp.xml; then
-            echo "FAIL: XML evaluation result of $i not as expected"
-            fail=1
+            badExitCode=1
         fi
     fi
 done
 
-exit $fail
+if test -n "${_NIX_TEST_ACCEPT-}"; then
+    if (( "$badDiff" )); then
+        echo 'Output did mot match, but accepted output as the persisted expected output.'
+        echo 'That means the next time the tests are run, they should pass.'
+    else
+        echo 'NOTE: Environment variable _NIX_TEST_ACCEPT is defined,'
+        echo 'indicating the unexpected output should be accepted as the expected output going forward,'
+        echo 'but no tests had unexpected output so there was no expected output to update.'
+    fi
+    if (( "$badExitCode" )); then
+        exit "$badExitCode"
+    else
+        skipTest "regenerating golden masters"
+    fi
+else
+    if (( "$badDiff" )); then
+        echo ''
+        echo 'You can rerun this test with:'
+        echo ''
+        echo '    _NIX_TEST_ACCEPT=1 make tests/lang.sh.test'
+        echo ''
+        echo 'to regenerate the files containing the expected output,'
+        echo 'and then view the git diff to decide whether a change is'
+        echo 'good/intentional or bad/unintentional.'
+        echo 'If the diff contains arbitrary or impure information,'
+        echo 'please improve the normalization that the test applies to the output.'
+    fi
+    exit $(( "$badExitCode" + "$badDiff" ))
+fi
diff --git a/tests/lang/empty.exp b/tests/lang/empty.exp
diff --git a/tests/lang/eval-fail-abort.err.exp b/tests/lang/eval-fail-abort.err.exp
@@ -0,0 +1,10 @@
+error:
+       … while calling the 'abort' builtin
+
+         at /pwd/lang/eval-fail-abort.nix:1:14:
+
+            1| if true then abort "this should fail" else 1
+             |              ^
+            2|
+
+       error: evaluation aborted with the following error message: 'this should fail'
diff --git a/tests/lang/eval-fail-antiquoted-path.err.exp b/tests/lang/eval-fail-antiquoted-path.err.exp
@@ -0,0 +1 @@
+error: getting attributes of path ‘PWD/lang/fnord’: No such file or directory
diff --git a/tests/lang/eval-fail-assert.err.exp b/tests/lang/eval-fail-assert.err.exp
@@ -0,0 +1,36 @@
+error:
+       … while evaluating the attribute 'body'
+
+         at /pwd/lang/eval-fail-assert.nix:4:3:
+
+            3|
+            4|   body = x "x";
+             |   ^
+            5| }
+
+       … from call site
+
+         at /pwd/lang/eval-fail-assert.nix:4:10:
+
+            3|
+            4|   body = x "x";
+             |          ^
+            5| }
+
+       … while calling 'x'
+
+         at /pwd/lang/eval-fail-assert.nix:2:7:
+
+            1| let {
+            2|   x = arg: assert arg == "y"; 123;
+             |       ^
+            3|
+
+       error: assertion '(arg == "y")' failed
+
+       at /pwd/lang/eval-fail-assert.nix:2:12:
+
+            1| let {
+            2|   x = arg: assert arg == "y"; 123;
+             |            ^
+            3|
diff --git a/tests/lang/eval-fail-bad-antiquote-1.err.exp b/tests/lang/eval-fail-bad-antiquote-1.err.exp
@@ -0,0 +1,10 @@
+error:
+       … while evaluating a path segment
+
+         at /pwd/lang/eval-fail-bad-antiquote-1.nix:1:2:
+
+            1| "${x: x}"
+             |  ^
+            2|
+
+       error: cannot coerce a function to a string
diff --git a/tests/lang/eval-fail-bad-antiquote-2.err.exp b/tests/lang/eval-fail-bad-antiquote-2.err.exp
@@ -0,0 +1 @@
+error: operation 'addToStoreFromDump' is not supported by store 'dummy'
diff --git a/tests/lang/eval-fail-bad-antiquote-3.err.exp b/tests/lang/eval-fail-bad-antiquote-3.err.exp
@@ -0,0 +1,10 @@
+error:
+       … while evaluating a path segment
+
+         at /pwd/lang/eval-fail-bad-antiquote-3.nix:1:3:
+
+            1| ''${x: x}''
+             |   ^
+            2|
+
+       error: cannot coerce a function to a string
diff --git a/tests/lang/eval-fail-bad-string-interpolation-1.err.exp b/tests/lang/eval-fail-bad-string-interpolation-1.err.exp
@@ -0,0 +1,10 @@
+error:
+       … while evaluating a path segment
+
+         at /pwd/lang/eval-fail-bad-string-interpolation-1.nix:1:2:
+
+            1| "${x: x}"
+             |  ^
+            2|
+
+       error: cannot coerce a function to a string
diff --git a/tests/lang/eval-fail-bad-string-interpolation-2.err.exp b/tests/lang/eval-fail-bad-string-interpolation-2.err.exp
@@ -0,0 +1 @@
+error: operation 'addToStoreFromDump' is not supported by store 'dummy'
diff --git a/tests/lang/eval-fail-bad-string-interpolation-3.err.exp b/tests/lang/eval-fail-bad-string-interpolation-3.err.exp
@@ -0,0 +1,10 @@
+error:
+       … while evaluating a path segment
+
+         at /pwd/lang/eval-fail-bad-string-interpolation-3.nix:1:3:
+
+            1| ''${x: x}''
+             |   ^
+            2|
+
+       error: cannot coerce a function to a string