UBC-DSCI · trevorcampbell · Aug 21, 2023 · Aug 21, 2023 · Aug 21, 2023 · Aug 22, 2023
diff --git a/.gitignore b/.gitignore
@@ -92,6 +92,8 @@ nbgrader/docs/source/user_guide/release/ps1/problem2.html
 nbgrader/docs/source/user_guide/source/header.html
 nbgrader/docs/source/user_guide/source/ps1/problem1.html
 nbgrader/docs/source/user_guide/source/ps1/problem2.html
+nbgrader/docs/source/user_guide/source/ps1_autotest/problem1.html
+nbgrader/docs/source/user_guide/source/ps1_autotest/problem2.html
 
 # components stuff
 node_modules

diff --git a/nbgrader/apps/generateassignmentapp.py b/nbgrader/apps/generateassignmentapp.py
@@ -2,10 +2,11 @@
 
 import sys
 
-from traitlets import default
+from traitlets import default, Bool
+from textwrap import dedent
 
 from .baseapp import NbGrader, nbgrader_aliases, nbgrader_flags
-from ..converters import BaseConverter, GenerateAssignment, NbGraderException
+from ..converters import BaseConverter, GenerateAssignment, NbGraderException, GenerateSourceWithTests
 from traitlets.traitlets import MetaHasTraits
 from typing import List, Any
 from traitlets.config.loader import Config
@@ -51,6 +52,12 @@
         {'BaseConverter': {'force': True}},
         "Overwrite an assignment/submission if it already exists."
     ),
+    'source_with_tests': (
+        {'GenerateAssignmentApp': {'source_with_tests': True}},
+        "Generate intermediate notebooks that contain both the autogenerated test code and the solutions. "
+        "Results will be saved in the source_with_tests/ folder. "
+        "This is useful for instructors to debug problematic autogenerated test code."
+    ),
 })
 
 
@@ -62,6 +69,17 @@ class GenerateAssignmentApp(NbGrader):
     aliases = aliases
     flags = flags
 
+    source_with_tests = Bool(
+        False,
+        help=dedent(
+            """
+            Generate intermediate notebooks that contain both the autogenerated test code and the solutions.
+            Results will be saved in the source_with_tests/ folder.
+            This is useful for instructors to debug issues in autogenerated test code.
+            """
+        )
+    ).tag(config=True)
+
     examples = """
         Produce the version of the assignment that is intended to be released to
         students. This performs several modifications to the original assignment:
@@ -112,7 +130,7 @@ class GenerateAssignmentApp(NbGrader):
     @default("classes")
     def _classes_default(self) -> List[MetaHasTraits]:
         classes = super(GenerateAssignmentApp, self)._classes_default()
-        classes.extend([BaseConverter, GenerateAssignment])
+        classes.extend([BaseConverter, GenerateAssignment, GenerateSourceWithTests])
         return classes
 
     def _load_config(self, cfg: Config, **kwargs: Any) -> None:
@@ -141,6 +159,14 @@ def start(self) -> None:
         elif len(self.extra_args) == 1:
             self.coursedir.assignment_id = self.extra_args[0]
 
+
+        if self.source_with_tests:
+            converter = GenerateSourceWithTests(coursedir=self.coursedir, parent=self)
+            try:
+                converter.start()
+            except NbGraderException:
+                sys.exit(1)
+
         converter = GenerateAssignment(coursedir=self.coursedir, parent=self)
         try:
             converter.start()

diff --git a/nbgrader/apps/quickstartapp.py b/nbgrader/apps/quickstartapp.py
@@ -40,6 +40,15 @@
             """
         )
     ),
+    'autotest': (
+        {'QuickStartApp': {'autotest': True}},
+        dedent(
+            """
+            Create notebook assignments that have examples of automatic test generation via
+            ### AUTOTEST and ### HASHED AUTOTEST statements.
+            """
+        )
+    ),
 }
 
 class QuickStartApp(NbGrader):
@@ -73,6 +82,8 @@ class QuickStartApp(NbGrader):
 
     force = Bool(False, help="Whether to overwrite existing files").tag(config=True)
 
+    autotest = Bool(False, help="Whether to use automatic test generation in example files").tag(config=True)
+
     @default("classes")
     def _classes_default(self):
         classes = super(QuickStartApp, self)._classes_default()
@@ -115,12 +126,20 @@ def start(self):
         if not os.path.isdir(course_path):
             os.mkdir(course_path)
 
-        # populating it with an example
+        # populate it with an example
         self.log.info("Copying example from the user guide...")
         example = os.path.abspath(os.path.join(
             os.path.dirname(__file__), '..', 'docs', 'source', 'user_guide', 'source'))
-        ignore_html = shutil.ignore_patterns("*.html")
-        shutil.copytree(example, os.path.join(course_path, "source"), ignore=ignore_html)
+        if self.autotest:
+            tests_file_path = os.path.abspath(os.path.join(
+                os.path.dirname(__file__), '..', 'docs', 'source', 'user_guide', 'autotests.yml'))
+            shutil.copyfile(tests_file_path, os.path.join(course_path, 'autotests.yml'))
+            ignored_files = shutil.ignore_patterns("*.html", "ps1")
+            shutil.copytree(example, os.path.join(course_path, "source"), ignore=ignored_files)
+            os.rename(os.path.join(course_path, "source", "ps1_autotest"), os.path.join(course_path, "source", "ps1"))
+        else:
+            ignored_files = shutil.ignore_patterns("*.html", "autotests.yml", "ps1_autotest")
+            shutil.copytree(example, os.path.join(course_path, "source"), ignore=ignored_files)
 
         # create the config file
         self.log.info("Generating example config file...")

diff --git a/nbgrader/converters/__init__.py b/nbgrader/converters/__init__.py
@@ -5,6 +5,7 @@
 from .feedback import Feedback
 from .generate_feedback import GenerateFeedback
 from .generate_solution import GenerateSolution
+from .generate_source_with_tests import GenerateSourceWithTests
 
 __all__ = [
     "BaseConverter",
@@ -14,5 +15,6 @@
     "Autograde",
     "Feedback",
     "GenerateFeedback",
-    "GenerateSolution"
+    "GenerateSolution",
+    "GenerateSourceWithTests"
 ]
diff --git a/nbgrader/converters/generate_assignment.py b/nbgrader/converters/generate_assignment.py
@@ -8,6 +8,7 @@
 from .base import BaseConverter, NbGraderException
 from ..preprocessors import (
     IncludeHeaderFooter,
+    InstantiateTests,
     ClearSolutions,
     LockCells,
     ComputeChecksums,
@@ -57,6 +58,7 @@ def _output_directory(self) -> str:
 
     preprocessors = List([
         IncludeHeaderFooter,
+        InstantiateTests,
         LockCells,
         ClearSolutions,
         ClearOutput,

diff --git a/nbgrader/converters/generate_source_with_tests.py b/nbgrader/converters/generate_source_with_tests.py
@@ -0,0 +1,49 @@
+import os
+import re
+
+from traitlets import List, default
+
+from .base import BaseConverter
+from ..preprocessors import (
+    InstantiateTests,
+    ClearOutput,
+    CheckCellMetadata
+)
+from traitlets.config.loader import Config
+from typing import Any
+from ..coursedir import CourseDirectory
+
+
+class GenerateSourceWithTests(BaseConverter):
+
+    @default("permissions")
+    def _permissions_default(self) -> int:
+        return 664 if self.coursedir.groupshared else 644
+
+    @property
+    def _input_directory(self) -> str:
+        return self.coursedir.source_directory
+
+    @property
+    def _output_directory(self) -> str:
+        return self.coursedir.source_with_tests_directory
+
+    preprocessors = List([
+        InstantiateTests,
+        ClearOutput,
+        CheckCellMetadata
+    ]).tag(config=True)
+
+    def _load_config(self, cfg: Config, **kwargs: Any) -> None:
+        super(GenerateSourceWithTests, self)._load_config(cfg, **kwargs)
+
+    def __init__(self, coursedir: CourseDirectory = None, **kwargs: Any) -> None:
+        super(GenerateSourceWithTests, self).__init__(coursedir=coursedir, **kwargs)
+
+    def start(self) -> None:
+        old_student_id = self.coursedir.student_id
+        self.coursedir.student_id = '.'
+        try:
+            super(GenerateSourceWithTests, self).start()
+        finally:
+            self.coursedir.student_id = old_student_id
diff --git a/nbgrader/coursedir.py b/nbgrader/coursedir.py
@@ -142,6 +142,18 @@ def _validate_notebook_id(self, proposal: Bunch) -> str:
         )
     ).tag(config=True)
 
+    source_with_tests_directory = Unicode(
+        'source_with_tests',
+        help=dedent(
+            """
+            The name of the directory that contains notebooks with both solutions
+            and instantiated test code (i.e., all AUTOTEST directives are removed
+            and replaced by actual test code). This corresponds to the
+            `nbgrader_step` variable in the `directory_structure` config option.
+            """
+        )
+    ).tag(config=True)
+
     submitted_directory = Unicode(
         'submitted',
         help=dedent(

diff --git a/nbgrader/docs/source/user_guide/advanced.rst b/nbgrader/docs/source/user_guide/advanced.rst
@@ -194,3 +194,160 @@ containerization system. For details on using ``envkernel`` with
 singularity, see the `README
 <https://github.com/NordicHPC/envkernel/blob/master/README.md>`_ of
 ``envkernel``.
+
+.. _customizing-autotests:
+
+Automatic test code generation
+---------------------------------------
+
+.. versionadded:: 0.9.0
+
+.. seealso::
+
+  :ref:`autograder-tests-cell-automatic-test-code`
+    General introduction to automatic test code generation.
+
+
+nbgrader now supports generating test code automatically
+using ``### AUTOTEST`` and ``### HASHED AUTOTEST`` statements.
+In this section, you can find more detail on how this works and 
+how to customize the test generation process. 
+Suppose you ask students to create a ``foo`` function that adds 5 to
+an integer. In the source copy of the notebook, you might write something like
+
+.. code:: python
+
+    ### BEGIN SOLUTION
+    def foo(x):
+      return x + 5
+    ### END SOLUTION
+
+In a test cell, you would normally then write test code manually to probe various aspects of the solution.
+For example, you might check that the function increments 3 to 8 properly, and that the type
+of the output is an integer.
+
+.. code:: python
+
+    assert isinstance(foo(3), int), "incrementing an int by 5 should return an int"
+    assert foo(3) == 8, "3+5 should be 8"
+
+nbgrader now provides functionality to automate this process. Instead of writing tests explicitly,
+you can instead specify *what you want to test*, and let nbgrader decide *how to test it* automatically.
+
+.. code:: python
+
+    ### AUTOTEST foo(3)
+
+This directive indicates that you want to check ``foo(3)`` in the student's notebook, and make sure it 
+aligns with the value of ``foo(3)`` in the current source copy. You can write any valid expression (in the 
+language of your notebook) after the ``### AUTOTEST`` directive. For example, you could write
+
+.. code:: python
+
+   ### AUTOTEST (foo(3) - 5 == 3)
+
+to generate test code for the expression ``foo(3)-5==3`` (i.e., a boolean value), and make sure that evaluating
+the student's copy of this expression has a result that aligns with the source version (i.e., ``True``). You can write multiple
+``### AUTOTEST`` directives in one cell. You can also separate multiple expressions on one line with semicolons:
+
+.. code:: python
+
+   ### AUTOTEST foo(3); foo(4); foo(5) != 8
+
+These directives will insert code into student notebooks where the solution is available in plaintext. If you want to
+obfuscate the answers in the student copy, you should instead use a ``### HASHED AUTOTEST``, which will produce
+a student notebook where the answers are hashed and not viewable by students.
+
+When you generate an assignment containing ``### AUTOTEST`` (or ``### HASHED AUTOTEST``) statements, nbgrader looks for a file
+named ``autotests.yml`` that contains instructions on how to generate test code. It first looks 
+in the assignment directory itself (in case you want to specify special tests for just that assignment), and if it is 
+not found there, nbgrader searches in the course root directory.
+The ``autotests.yml`` file is a `YAML <https://yaml.org/>`__ file that looks something like this:
+
+.. code:: yaml
+
+    python3:
+        setup: "from hashlib import sha1"
+        hash: 'sha1({{snippet}}.encode("utf-8")+b"{{salt}}").hexdigest()'
+        dispatch: "type({{snippet}})"
+        normalize: "str({{snippet}})"
+        check: 'assert {{snippet}} == """{{value}}""", """{{message}}"""'
+        success: "print('Success!')"
+
+        templates:
+            default:
+                - test: "type({{snippet}})"
+                  fail: "type of {{snippet}} is not correct"
+
+                - test: "{{snippet}}"
+                  fail: "value of {{snippet}} is not correct"
+
+            int:
+                - test: "type({{snippet}})"
+                  fail: "type of {{snippet}} is not int. Please make sure it is int and not np.int64, etc. You can cast your value into an int using int()"
+
+                - test: "{{snippet}}"
+                  fail: "value of {{snippet}} is not correct"
+
+The outermost  level in the YAML file (the example shows an entry for ``python3``) specifies which kernel the configuration applies to. ``autotests.yml`` can 
+have separate sections for multiple kernels / languages. The ``autotests.yml`` file uses `Jinja templates <https://jinja.palletsprojects.com/en/3.1.x/>`__ to 
+specify snippets of code that will be executed/inserted into Jupyter notebooks in the process of generating the assignment. You should familiarize yourself 
+with the basics of Jinja templates before proceeding. For each kernel, there are a few configuration settings possible:
+
+- **dispatch:** When you write ``### AUTOTEST foo(3)``, nbgrader needs to know how to test ``foo(3)``. It does so by executing ``foo(3)``, then checking its *type*,
+  and then running tests corresponding to that type in the ``autotests.yml`` file. Specifically, when generating an assignment, nbgrader substitutes the ``{{snippet}}`` template
+  variable with the expression ``foo(3)``, and then evaluates the dispatch code based on that. In this case, nbgrader runs ``type(foo(3))``, which will 
+  return ``int``, so nbgrader will know to test ``foo(3)`` using tests for integer variables.
+- **templates:** Once nbgrader determines the type of the expression ``foo(3)``, it will look for that type in the list of templates for the kernel. In this case,
+  it will find the ``int`` type in the list (it will use the **default** if the type is not found). Each type will have associated with it a 
+  list of **test**/**fail** template pairs, which tell nbgrader what tests to run 
+  and what messages to print in the event of a failure. Once again, ``{{snippet}}`` will be replaced by the ``foo(3)`` expression. In ``autotests.yml`` above, the 
+  ``int`` type has two tests: one that checks type of the expression, and one that checks its value. In this case, the student notebook will have 
+  two tests: one that checks the value of ``type(foo(3))``, and one that checks the value of ``foo(3)``.
+- **normalize:** For each test code expression (for example, ``type(foo(3))`` as mentioned previously), nbgrader will execute code using the corresponding 
+  Jupyter kernel, which will respond with a result in the form of a *string*. So nbgrader now knows that if it runs ``type(foo(3))`` at this 
+  point in the notebook, and converts the output to a string (i.e., *normalizes it*), it should obtain ``"int"``. However, nbgrader does not know how to convert output to a string; that
+  depends on the kernel! So the normalize code template tells nbgrader how to convert an expression to a string. In the ``autotests.yml`` example above, the 
+  normalize template suggests that nbgrader should try to compare ``str(type(foo(3)))`` to ``"int"``. 
+- **check:** This is the code template that will be inserted into the student notebook to run each test. The template has three variables. ``{{snippet}}`` is the normalized
+  test code. The ``{{value}}`` is the evaluated version of that test code, based on the source notebook. The ``{{message}}`` is
+  text that will be printed in the event of a test failure. In the example above, the check code template tells nbgrader to insert an ``assert`` statement to run the test.
+- **hash (optional):** This is a code template that is responsible for hashing (i.e., obfuscating) the answers in the student notebok. The template has two variables.
+  ``{{snippet}}`` represents the expression that will be hashed, and ``{{salt}}`` is used for nbgrader to insert a `salt <https://en.wikipedia.org/wiki/Salt_(cryptography)>`__ 
+  prior to hashing. The salt helps avoid students being able to identify hashes from common question types. For example, a true/false question has only two possible answers;
+  without a salt, students would be able to recognize the hashes of ``True`` and ``False`` in their notebooks. By adding a salt, nbgrader makes the hashed version of the answer 
+  different for each question, preventing identifying answers based on their hashes.
+- **setup (optional):** This is a code template that will be run at the beginning of all test cells containing ``### AUTOTEST`` or ``### HASHED AUTOTEST`` directives. It is often used to import
+  special packages that only the test code requires. In the example above, the setup code is used to import the ``sha1`` function from ``hashlib``, which is necessary
+  for hashed test generation.
+- **success (optional):** This is a code template that will be added to the end of all test cells containing ``### AUTOTEST`` or ``### HASHED AUTOTEST`` directives. In the 
+  generated student version of the notebook,
+  this code will run if all the tests pass. In the example ``autotests.yml`` file above, the success code is used to run ``print('Success!')``, i.e., simply print a message to
+  indicate that all tests in the cell passed.
+
+.. note::
+
+   For assignments with ``### AUTOTEST`` and ``### HASHED AUTOTEST`` directives, it is often handy
+   to have an editable copy of the assignment with solutions *and* test code inserted. You can
+   use ``nbgrader generate_assignment --source_with_tests`` to generate this version of an assignment,
+   which will appear in the ``source_with_tests/`` folder in the course repository.
+
+.. warning::
+
+   The default ``autotests.yml`` test templates file included with the repository has tests for many
+   common data types (``int``, ``dict``, ``list``, ``float``, etc). It also has a ``default`` test template
+   that it will try to apply to any types that do not have specified tests. If you want to automatically
+   generate your own tests for custom types, you will need to implement those test templates in ``autotests.yml``. That being said, custom
+   object types often have standard Python types as class attributes. Sometimes an easier option is to use nbgrader to test these
+   attributes automatically instead. For example, if ``obj`` is a complicated type with no specific test template available,
+   but ``obj`` has an ``int`` attribute ``x``, you could consider testing that attribute directly, e.g., ``### AUTOTEST obj.x``.
+
+.. warning::
+
+   The InstantiateTests preprocessor in nbgrader is responsible for generating test code from ``### AUTOTEST`` 
+   directives and the ``autotests.yml`` file. It has some configuration parameters not yet mentioned here.
+   The most important of these is the ``InstantiateTests.sanitizers`` dictionary, which tells nbgrader how to 
+   clean up the string output from each kind of Jupyter kernel before using it in the process of generating tests. We have 
+   implemented sanitizers for popular kernels in nbgrader already, but you might need to add your own.
+
+