From 00bf598af3fe9cd50bf88e7736a21d859fc619c4 Mon Sep 17 00:00:00 2001 From: Austin Cory Bart Date: Wed, 11 Dec 2024 15:02:45 -0500 Subject: [PATCH] Example of Verify command in Pedal command line --- docsrc/conf.py | 2 +- docsrc/index.rst | 1 + docsrc/teachers/testing.rst | 220 ++++++++++++++++++ docsrc/teachers/verify_examples/correct.out | 10 + docsrc/teachers/verify_examples/correct.py | 2 + .../teachers/verify_examples/extra_space.out | 10 + .../teachers/verify_examples/extra_space.py | 2 + .../verify_examples/literal_space.out | 12 + .../teachers/verify_examples/literal_space.py | 2 + .../verify_examples/missing_space.out | 16 ++ .../teachers/verify_examples/missing_space.py | 2 + .../verify_examples/tried_printing_answer.out | 12 + .../verify_examples/tried_printing_answer.py | 3 + 13 files changed, 293 insertions(+), 1 deletion(-) create mode 100644 docsrc/teachers/testing.rst create mode 100644 docsrc/teachers/verify_examples/correct.out create mode 100644 docsrc/teachers/verify_examples/correct.py create mode 100644 docsrc/teachers/verify_examples/extra_space.out create mode 100644 docsrc/teachers/verify_examples/extra_space.py create mode 100644 docsrc/teachers/verify_examples/literal_space.out create mode 100644 docsrc/teachers/verify_examples/literal_space.py create mode 100644 docsrc/teachers/verify_examples/missing_space.out create mode 100644 docsrc/teachers/verify_examples/missing_space.py create mode 100644 docsrc/teachers/verify_examples/tried_printing_answer.out create mode 100644 docsrc/teachers/verify_examples/tried_printing_answer.py diff --git a/docsrc/conf.py b/docsrc/conf.py index be3e650..69a8216 100644 --- a/docsrc/conf.py +++ b/docsrc/conf.py @@ -188,7 +188,7 @@ add_module_names = False # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} # Configuration for autodoc autodoc_typehints_format = 'short' diff --git a/docsrc/index.rst b/docsrc/index.rst index e720500..1741fda 100644 --- a/docsrc/index.rst +++ b/docsrc/index.rst @@ -31,6 +31,7 @@ rather than an afterthought. teachers/examples teachers/reference teachers/cli + teachers/testing teachers/integrations developers/ffs developers/api diff --git a/docsrc/teachers/testing.rst b/docsrc/teachers/testing.rst new file mode 100644 index 0000000..828b659 --- /dev/null +++ b/docsrc/teachers/testing.rst @@ -0,0 +1,220 @@ +Testing Your Tests +================== + +A major feature of Pedal is the ability to "test your tests". +You can create example student ``Submission``, and then also create +the expected output of those submissions. You can then run your +autograding script on those submissions, and see if the feedback +you expect is generated. + +The system is also capable of saving the output generated from a given +submission, for future comparisons, as a way of "freezing" the expected +output. This is basically the same as regression testing, but for +autograding feedback! + +Verify Mode +----------- + +The ``verify`` mode is a special mode of the ``pedal`` command line tool +that allows you to run your autograding script on a submission, and +compare the output to a "frozen" version of the output. You can +run the command as follows: + +.. code-block:: console + + pedal verify grade_assignment.py submissions/ + +The names of the ``grade_assignment.py`` script and the ``submissions/`` folder +are completely under your control. They simply expect the path to a +Pedal autograding script, and a folder containing submissions. +The system will run the script on all of the Python files in the +submissions folder, and compare them to the output files with the same +names. + +If you have not yet created the output files, you can use the ``--create_output`` +argument to generate them (note that this will delete existing output files): + +.. code-block:: console + + pedal verify grade_assignment.py submissions/ --create_output + +This will run the autograding script on all of the submissions, and save +the output to the output files. The expected output is stored in easy-to-read +``.out`` files using a format similar to ``ini`` files. The results are shown +using the built-in unittest module, so inconsistencies will be shown as passing +and failing unit tests. + +Example +------- + +Let's try an example of testing a simple autograding script. Let us say +that the students were assigned the following prompt: + + The code below stores the string value ``"order out of"`` into the variable + ``a_phrase``. Using only string subscripting and the addition operator, + print the phrase ``"out of order"``. + +And they had the following starting code: + +.. code-block:: python + + a_phrase = "order out of" + print(a_phrase) + +The autograding script might look like this: + +.. code-block:: python + + from pedal import * + + ensure_literal("order out of") + prevent_ast("Str", at_most=1) + ensure_ast("Subscript", at_least=2) + prevent_ast("Subscript", at_most=5) + ensure_function_call("print") + + assert_not_output(student, "out of order ", exact_strings=True, + message="There's an extra space at the end!", + label="printing_extra_space") + assert_output(student, "out of order", exact_strings=True) + +This shows off a number of the features of Pedal: + +* `ensure_literal` checks that the student has the string "order out of" in their code. +* `ensure_ast` and `prevent_ast` checks that the student has at least 2, but no more than 5, subscripts. +* `prevent_ast` stops them from adding any string literals to their code, besides the one given. +* `ensure_function_call` checks that they are using the `print` function. +* `assert_not_output` checks that they are not printing "out of order " with an extra space at the end, with a custom + message and label. +* `assert_output` actually checks that they are printing "out of order" in the end. + +A correct solution to this problem is provided on the left, and the output file that would be generated +is on the right. The exact syntax of the output file is explained further below, but for now you can see that +the file lists the fields and their values of the `Feedback` object that was generated by the autograding script. + ++-------------------------------------------------------+-------------------------------------------------------+ +| **Correct.py** | **Expected Output** | ++=======================================================+=======================================================+ +| .. literalinclude:: verify_examples/correct.py | .. literalinclude:: verify_examples/correct.out | +| | :language: ini | +| | | ++-------------------------------------------------------+-------------------------------------------------------+ + +This is a difficult problem for students, and they might make a number of mistakes. +For example, they could forget to add the space in the middle of the phrase: + ++-----------------------------------------------------------+-------------------------------------------------------+ +| **Incorrect: Missing Space** | **Expected Output** | ++===========================================================+=======================================================+ +| .. literalinclude:: verify_examples/missing_space.py | .. literalinclude:: verify_examples/missing_space.out | +| | :language: ini | +| | | ++-----------------------------------------------------------+-------------------------------------------------------+ + +One of the most commonly made (especially when ChatGPT is asked) is to ignore the +instructions and add a space as a string literal: + ++-----------------------------------------------------------+-------------------------------------------------------+ +| **Incorrect: Space Literal** | **Expected Output** | ++===========================================================+=======================================================+ +| .. literalinclude:: verify_examples/literal_space.py | .. literalinclude:: verify_examples/literal_space.out | +| | :language: ini | +| | | ++-----------------------------------------------------------+-------------------------------------------------------+ + +Another common mistake is to add an extra space at the end (``"out of order "`` instead of ``"out of order"``), +by incorrectly grabbing the last character of the string in the third subscript: + ++-----------------------------------------------------------+-------------------------------------------------------+ +| **Incorrect: Extra Space** | **Expected Output** | ++===========================================================+=======================================================+ +| .. literalinclude:: verify_examples/extra_space.py | .. literalinclude:: verify_examples/extra_space.out | +| | :language: ini | +| | | ++-----------------------------------------------------------+-------------------------------------------------------+ + +Another common mistake is to try to print the answer directly, which is not allowed. +Note that the student tries to be clever here, and includes an unused reference to the variable `a_phrase`, +so that the system does not complain about the unused variable: + ++---------------------------------------------------------------+-----------------------------------------------------------------+ +| **Incorrect: Printing Answer** | **Expected Output** | ++===============================================================+=================================================================+ +| .. literalinclude:: verify_examples/tried_printing_answer.py | .. literalinclude:: verify_examples/tried_printing_answer.out | +| | :language: ini | +| | | ++---------------------------------------------------------------+-----------------------------------------------------------------+ + +If we had all of the output files generated, we could run the following command: + +.. code-block:: console + + pedal verify grade_assignment.py verify_examples/ + +And that would generate the following unittest output: + +.. code-block:: console + + ..... + ---------------------------------------------------------------------- + Ran 5 tests in 0.001s + + OK + +This would show that all of the tests passed, and that the autograding script is working as expected. +But what if we decided later to get rid of the custom message for the extra space mistake, without +updating our output files? Then, the output would look like: + +.. code-block:: diff + + ====================================================================== + FAIL: validate.py, using extra_space.py + ---------------------------------------------------------------------- + Traceback (most recent call last): + File "pedal\command_line\modes.py", line 450, in new_test + self.assertEqual(entire_expected, entire_actual, + AssertionError: "correct: False\nsuccess: False\nscore: 0[156 chars]ne\n" != 'correct: False\nsuccess: False\nscore: 0[265 chars]ne\n' + correct: False + success: False + score: 0 + scores: [] + category: specification + - label: printing_extra_space + + label: assert_output + title: Failed Instructor Test + - message: There's an extra space at the end! + + message: Student code failed instructor test. + + I ran your code. + + The printed output was: + + out of order + + But I expected the output to be: + + out of order + location: None + : Wrong value for 'label', 'message' in extra_space. + +You can see the helpfulness of the custom feedback message, since the two strings look very similar. +Most likely, you would want to keep the assertion with its custom message. +But more importantly, the verification system is showing you the lines that are different between the actual +and expected output. + + +Output File Format +------------------ + +The output files use a format similar to ``ini`` format, which is a simple key-value format. +Sections are divided by square brackets, and each key-value pair is separated by a colon. +A value can be a string, a number, or any other ``repr``-able object. +The results will be compared as strings, so be careful with floating point numbers! +Indentation is used in the message field to allow for multi-line messages. + +Only the included fields found in the file are compared, so you can include as much or as little information +as you want in the output files. If a field is not important, then you don't need to include it +in the output file. So if you wanted to confirm that a feedback was not ``correct``, but didn't +care what the exact message was, you could have a file like this: + +.. code-block:: ini + + [standard.final] + correct: False + +This would only check that the feedback was not correct, and would ignore the message, the location, and any other fields. \ No newline at end of file diff --git a/docsrc/teachers/verify_examples/correct.out b/docsrc/teachers/verify_examples/correct.out new file mode 100644 index 0000000..8293dc0 --- /dev/null +++ b/docsrc/teachers/verify_examples/correct.out @@ -0,0 +1,10 @@ +[standard.final] +correct: True +success: True +score: 1 +scores: [] +category: complete +label: set_correct_no_errors +title: Complete +message: Great work! +location: None diff --git a/docsrc/teachers/verify_examples/correct.py b/docsrc/teachers/verify_examples/correct.py new file mode 100644 index 0000000..46ea3c1 --- /dev/null +++ b/docsrc/teachers/verify_examples/correct.py @@ -0,0 +1,2 @@ +a_phrase = "order out of" +print(a_phrase[6:] + a_phrase[5] + a_phrase[:5]) \ No newline at end of file diff --git a/docsrc/teachers/verify_examples/extra_space.out b/docsrc/teachers/verify_examples/extra_space.out new file mode 100644 index 0000000..3c58656 --- /dev/null +++ b/docsrc/teachers/verify_examples/extra_space.out @@ -0,0 +1,10 @@ +[standard.final] +correct: False +success: False +score: 0 +scores: [] +category: specification +label: printing_extra_space +title: Failed Instructor Test +message: There's an extra space at the end! +location: None diff --git a/docsrc/teachers/verify_examples/extra_space.py b/docsrc/teachers/verify_examples/extra_space.py new file mode 100644 index 0000000..722ff5a --- /dev/null +++ b/docsrc/teachers/verify_examples/extra_space.py @@ -0,0 +1,2 @@ +a_phrase = "order out of" +print(a_phrase[6:] + a_phrase[5] + a_phrase[:6]) \ No newline at end of file diff --git a/docsrc/teachers/verify_examples/literal_space.out b/docsrc/teachers/verify_examples/literal_space.out new file mode 100644 index 0000000..01223bf --- /dev/null +++ b/docsrc/teachers/verify_examples/literal_space.out @@ -0,0 +1,12 @@ +[standard.final] +correct: False +success: False +score: 0 +scores: [] +category: specification +label: prevent_ast +title: May Not Use Code +message: You used a string literal on line 2. + You may not use that more than 1 times, + but you used it 2 times. +location: diff --git a/docsrc/teachers/verify_examples/literal_space.py b/docsrc/teachers/verify_examples/literal_space.py new file mode 100644 index 0000000..b12ec47 --- /dev/null +++ b/docsrc/teachers/verify_examples/literal_space.py @@ -0,0 +1,2 @@ +a_phrase = "order out of" +print(a_phrase[6:] + " " + a_phrase[:5]) \ No newline at end of file diff --git a/docsrc/teachers/verify_examples/missing_space.out b/docsrc/teachers/verify_examples/missing_space.out new file mode 100644 index 0000000..103d1f4 --- /dev/null +++ b/docsrc/teachers/verify_examples/missing_space.out @@ -0,0 +1,16 @@ +[standard.final] +correct: False +success: False +score: 0 +scores: [] +category: specification +label: assert_output +title: Failed Instructor Test +message: + Student code failed instructor test. + I ran your code. + The printed output was: + out oforder + But I expected the output to be: + out of order +location: None diff --git a/docsrc/teachers/verify_examples/missing_space.py b/docsrc/teachers/verify_examples/missing_space.py new file mode 100644 index 0000000..9c4a46c --- /dev/null +++ b/docsrc/teachers/verify_examples/missing_space.py @@ -0,0 +1,2 @@ +a_phrase = "order out of" +print(a_phrase[6:] + a_phrase[:5]) \ No newline at end of file diff --git a/docsrc/teachers/verify_examples/tried_printing_answer.out b/docsrc/teachers/verify_examples/tried_printing_answer.out new file mode 100644 index 0000000..581c9b2 --- /dev/null +++ b/docsrc/teachers/verify_examples/tried_printing_answer.out @@ -0,0 +1,12 @@ +[standard.final] +correct: False +success: False +score: 0 +scores: [] +category: specification +label: prevent_ast +title: May Not Use Code +message: You used a string literal on line 3. + You may not use that more than 1 times, + but you used it 2 times. +location: diff --git a/docsrc/teachers/verify_examples/tried_printing_answer.py b/docsrc/teachers/verify_examples/tried_printing_answer.py new file mode 100644 index 0000000..082831e --- /dev/null +++ b/docsrc/teachers/verify_examples/tried_printing_answer.py @@ -0,0 +1,3 @@ +a_phrase = "order out of" +a_phrase +print("out of order") \ No newline at end of file