From 5d5e71458e38a89137bd5dd73fc9dfea08004362 Mon Sep 17 00:00:00 2001
From: Thilo von Neumann <tvn@mail.upb.de>
Date: Mon, 10 Jun 2024 14:37:20 +0200
Subject: [PATCH 1/6] Add testcase that runs code blocks in readme

---
 README.md          | 11 +++---
 tests/test_docs.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_docs.py
diff --git a/README.md b/README.md
index 0855f74a..b7e98007 100644
--- a/README.md
+++ b/README.md
@@ -80,7 +80,7 @@ Each segment should have a minimum set of keys `"session_id"` and `"words"`.
 Depending on the metric, additional keys may be required (`"speaker"`, `"start_time"`, `"end_time"`).
 
 An example is shown below:
-```py
+```python
 [
     {
         "session_id": "recordingA", # Required
@@ -98,7 +98,7 @@ Another example can be found [here](example_files/hyp.seglst.json).
 #### [Segmental Time Mark (STM)](https://github.com/usnistgov/SCTK/blob/master/doc/infmts.htm#L75)
 Each line in an `STM` file represents one "utterance" and is defined as
 
-```STM
+```
 STM :== <filename> <channel> <speaker_id> <begin_time> <end_time> <transcript>
 ```
 where
@@ -110,13 +110,12 @@ where
 - `transcript`: space-separated list of words
 
 for example:
-```
+```STM
 recording1 1 Alice 0 0 Hello Bob.
 recording1 1 Bob 1 0 Hello Alice.
 recording1 1 Alice 2 0 How are you?
-...
 recording2 1 Alice 0 0 Hello Carol.
-...
+;; ...
 ```
 
 An example `STM` file can be found in [here](example_files/ref.stm).
@@ -124,7 +123,7 @@ An example `STM` file can be found in [here](example_files/ref.stm).
 #### [Time Marked Conversation (CTM)](https://github.com/usnistgov/SCTK/blob/master/doc/infmts.htm#L286)
 The CTM format is defined as
 
-```CTM
+```
 CTM :== <filename> <channel> <begin_time> <duration> <word> [<confidence>]
 ```
 
diff --git a/tests/test_docs.py b/tests/test_docs.py
new file mode 100644
index 00000000..7e7e661e
--- /dev/null
+++ b/tests/test_docs.py
@@ -0,0 +1,99 @@
+import pytest
+import re
+from pathlib import Path
+import ast
+
+FENCED_CODE_BLOCK_REGEX = re.compile(r'```([^`\n]*)?\n((?:.|\n)*?)\n```')
+
+# List of language blocks that are not tested
+LANG_BLACKLIST = ['shell', 'bibtex', '']
+
+
+def get_fenced_code_blocks(markdown_string: str):
+    """
+    Returns a list of tuples (lang, code, lineno) for each fenced code block in the markdown string.
+    lineno corresponds to the line where the code starts (after the opening ```).
+    """
+    def get_lineno(offset):
+        return markdown_string[:offset].count('\n') + 1
+
+    return [
+        (m.group(1), m.group(2), get_lineno(m.span()[0]))
+        for m in FENCED_CODE_BLOCK_REGEX.finditer(markdown_string)
+    ]
+
+
+def split_code_block_comment_output(code):
+    """Splits a code block where a line starts with `print` and the following
+    line is a comment.
+    The comment is expected to be the output of the print statement.
+
+    Returns a list of tuples with the code block and the expected output.
+    The print statement in the code block is replaced with `__output = ` so
+    that the result can be inspected after `exec`.
+    """
+    def get_line_offset(offset):
+        return code[:offset].count('\n')
+
+    last_match = 0
+    blocks = []
+    for m in list(re.finditer(r'print\((.*)\)\n((?:#.*\n)*#.*)', code)):
+        expected_output = '\n'.join(s[2:] for s in m.group(2).split('\n'))
+
+        blocks.append((
+            code[last_match:m.span()[0]] + '__output = ' + m.group(1),
+            expected_output,
+            get_line_offset(last_match)
+        ))
+        last_match = m.span()[1]
+    if last_match < len(code) - 1:
+        blocks.append((code[last_match:], None, get_line_offset(last_match)))
+    return blocks
+
+
+def exec_with_source(code, filename, lineno, globals_=None, locals_=None):
+    """
+    Like `compile` followed by `exec`, but sets the correct line number for the code block.
+    This is required for correct traceback display.
+    """
+    compiled = ast.parse(code, str(filename), 'exec')
+    ast.increment_lineno(compiled, lineno)
+    compiled = compile(compiled, str(filename), 'exec', optimize=0)
+    exec(compiled, globals_, locals_)
+
+
+README = Path(__file__).parent.parent / 'README.md'
+
+
+@pytest.mark.parametrize('codeblock', get_fenced_code_blocks(README.read_text()))
+def test_readme(codeblock):
+    """Run fenced code blocks in readme isolated"""
+    import os
+    os.chdir(Path(__file__).parent.parent)
+    lang, code, lineno = codeblock
+    if lang in LANG_BLACKLIST:
+        return
+
+    try:
+        if lang == 'python':
+            globals_ = {}
+            for code, expected_output, line_offset in split_code_block_comment_output(code):
+                exec_with_source(code, str(README), lineno + line_offset, globals_)
+                output = str(globals_.pop('__output', None))
+                if expected_output is not None:
+                    # Check that the output is equal to the expected output, but we want to ignore whitespace
+                    # for formatting / clarity reasons.
+                    # This is a very basic check that ignores all whitespace, but it should be
+                    # sufficient for most cases.
+                    output_ = output.replace(' ', '').replace('\n', '')
+                    expected_output_ = expected_output.replace(' ', '').replace('\n', '')
+                    assert output_ == expected_output_, f'Output mismatch: {output} != {expected_output}'
+        elif lang == 'STM':
+            # Test if the STM code block is valid.
+            import meeteval
+            meeteval.io.STM.parse(code)
+        else:
+            raise ValueError(f'Unsupported language: {lang}')
+    except Exception:
+        print(f'Error in {lang} code block:\n', code)
+        raise

From 16c136e5414454df4eb425d60f082dbd007ed77f Mon Sep 17 00:00:00 2001
From: Thilo von Neumann <tvn@mail.upb.de>
Date: Wed, 26 Jun 2024 13:43:13 +0200
Subject: [PATCH 2/6] Search for all markdown files and track global state

---
 tests/test_docs.py | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/tests/test_docs.py b/tests/test_docs.py
index 7e7e661e..41751acc 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -3,11 +3,20 @@
 from pathlib import Path
 import ast
 
+MEETEVAL_ROOT = Path(__file__).parent.parent
+
 FENCED_CODE_BLOCK_REGEX = re.compile(r'```([^`\n]*)?\n((?:.|\n)*?)\n```')
 
 # List of language blocks that are not tested
 LANG_BLACKLIST = ['shell', 'bibtex', '']
 
+KEEP_CONTEXT = ['doc/algorithms.md']
+
+
+@pytest.fixture(scope='session')
+def global_state():
+    return {}
+
 
 def get_fenced_code_blocks(markdown_string: str):
     """
@@ -62,23 +71,31 @@ def exec_with_source(code, filename, lineno, globals_=None, locals_=None):
     exec(compiled, globals_, locals_)
 
 
-README = Path(__file__).parent.parent / 'README.md'
-
-
-@pytest.mark.parametrize('codeblock', get_fenced_code_blocks(README.read_text()))
-def test_readme(codeblock):
+@pytest.mark.parametrize(
+        ('filename', 'codeblock'), 
+        [
+            # pytest.param(filename, codeblock, id=f'{str(filename.relative_to(MEETEVAL_ROOT))}-codeblock{codeblock_index}')
+            (str(filename.relative_to(MEETEVAL_ROOT)), codeblock)
+            for filename in MEETEVAL_ROOT.glob('**/*.md')
+            for codeblock_index, codeblock in enumerate(get_fenced_code_blocks(filename.read_text()))
+        ]
+)
+def test_readme(filename, codeblock, global_state):
     """Run fenced code blocks in readme isolated"""
     import os
-    os.chdir(Path(__file__).parent.parent)
+    os.chdir(MEETEVAL_ROOT)
     lang, code, lineno = codeblock
     if lang in LANG_BLACKLIST:
         return
 
     try:
         if lang == 'python':
-            globals_ = {}
+            if filename in KEEP_CONTEXT:
+                globals_ = global_state.setdefault(filename, {})
+            else:
+                globals_ = {}
             for code, expected_output, line_offset in split_code_block_comment_output(code):
-                exec_with_source(code, str(README), lineno + line_offset, globals_)
+                exec_with_source(code, str(filename), lineno + line_offset, globals_)
                 output = str(globals_.pop('__output', None))
                 if expected_output is not None:
                     # Check that the output is equal to the expected output, but we want to ignore whitespace

From f3ac1b09279a7293206bbaa4131404275fd3b6db Mon Sep 17 00:00:00 2001
From: Thilo von Neumann <tvn@mail.upb.de>
Date: Wed, 26 Jun 2024 14:59:54 +0200
Subject: [PATCH 3/6] Improve splitting at comments by using ast and capture
 prints by redirect_output

---
 tests/test_docs.py | 45 +++++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/tests/test_docs.py b/tests/test_docs.py
index 41751acc..2fd86c02 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -15,6 +15,7 @@
 
 @pytest.fixture(scope='session')
 def global_state():
+    """Used to track global state across code blocks in the files listed in `KEEP_CONTEXT`."""
     return {}
 
 
@@ -41,22 +42,22 @@ def split_code_block_comment_output(code):
     The print statement in the code block is replaced with `__output = ` so
     that the result can be inspected after `exec`.
     """
-    def get_line_offset(offset):
-        return code[:offset].count('\n')
-
+    c = ast.parse(code)
+    lines = code.splitlines()
     last_match = 0
     blocks = []
-    for m in list(re.finditer(r'print\((.*)\)\n((?:#.*\n)*#.*)', code)):
-        expected_output = '\n'.join(s[2:] for s in m.group(2).split('\n'))
-
-        blocks.append((
-            code[last_match:m.span()[0]] + '__output = ' + m.group(1),
-            expected_output,
-            get_line_offset(last_match)
-        ))
-        last_match = m.span()[1]
-    if last_match < len(code) - 1:
-        blocks.append((code[last_match:], None, get_line_offset(last_match)))
+    for s in c.body:
+        # If we parsed a print statement at the root level
+        if isinstance(s, ast.Expr) and isinstance(s.value, ast.Call) and isinstance(s.value.func, ast.Name) and s.value.func.id == 'print':
+            # Collect any lines that follow directly and start with a #
+            output = []
+            l = s.end_lineno
+            while l < len(lines) and lines[l].startswith('#'):
+                output.append(lines[l][1:])
+                l += 1
+            blocks.append(('\n'.join(lines[last_match:s.end_lineno]), '\n'.join(output), last_match))
+            last_match = l
+    blocks.append(('\n'.join(lines[last_match:]), '', last_match))
     return blocks
 
 
@@ -68,20 +69,25 @@ def exec_with_source(code, filename, lineno, globals_=None, locals_=None):
     compiled = ast.parse(code, str(filename), 'exec')
     ast.increment_lineno(compiled, lineno)
     compiled = compile(compiled, str(filename), 'exec', optimize=0)
-    exec(compiled, globals_, locals_)
+    from io import StringIO
+    from contextlib import redirect_stdout
+
+    f = StringIO()
+    with redirect_stdout(f):
+        exec(compiled, globals_, locals_)
+    return f.getvalue()
 
 
 @pytest.mark.parametrize(
         ('filename', 'codeblock'), 
         [
-            # pytest.param(filename, codeblock, id=f'{str(filename.relative_to(MEETEVAL_ROOT))}-codeblock{codeblock_index}')
             (str(filename.relative_to(MEETEVAL_ROOT)), codeblock)
             for filename in MEETEVAL_ROOT.glob('**/*.md')
-            for codeblock_index, codeblock in enumerate(get_fenced_code_blocks(filename.read_text()))
+            for codeblock in get_fenced_code_blocks(filename.read_text())
         ]
 )
 def test_readme(filename, codeblock, global_state):
-    """Run fenced code blocks in readme isolated"""
+    """Run fenced code blocks in markdown files in the MeetEval repository."""
     import os
     os.chdir(MEETEVAL_ROOT)
     lang, code, lineno = codeblock
@@ -95,8 +101,7 @@ def test_readme(filename, codeblock, global_state):
             else:
                 globals_ = {}
             for code, expected_output, line_offset in split_code_block_comment_output(code):
-                exec_with_source(code, str(filename), lineno + line_offset, globals_)
-                output = str(globals_.pop('__output', None))
+                output = exec_with_source(code, str(filename), lineno + line_offset, globals_)
                 if expected_output is not None:
                     # Check that the output is equal to the expected output, but we want to ignore whitespace
                     # for formatting / clarity reasons.

From 83c630b34a36e1f194ffe269fb67c76cc16c3f11 Mon Sep 17 00:00:00 2001
From: Thilo von Neumann <tvn@mail.upb.de>
Date: Wed, 26 Jun 2024 15:06:13 +0200
Subject: [PATCH 4/6] Remove py3.7 tests and add py3.12 tests

---
 .github/workflows/pytest.yml | 2 +-
 setup.py                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index e7a6729a..c6ad0aab 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -17,7 +17,7 @@ jobs:
       fail-fast: false
       matrix:
         # Remember to update "classifiers" in setup.py when changing Python version
-        python-version: [3.7, 3.8, 3.9, '3.10', '3.11']
+        python-version: [3.8, 3.9, '3.10', '3.11', '3.12']
 
     steps:
     - uses: actions/checkout@v3
diff --git a/setup.py b/setup.py
index 044638e4..1c5fd411 100644
--- a/setup.py
+++ b/setup.py
@@ -100,11 +100,11 @@
 
         # Specify the Python versions you support here. In particular, ensure
         # that you indicate whether you support Python 2, Python 3 or both.
-        'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
         'Programming Language :: Python :: 3.10',
         'Programming Language :: Python :: 3.11',
+        'Programming Language :: Python :: 3.12',
     ],
 
     python_requires=">=3.5",

From 3ab77ba1f269b37a06a54514b5209eb9469fad03 Mon Sep 17 00:00:00 2001
From: Thilo von Neumann <tvn@mail.upb.de>
Date: Thu, 27 Jun 2024 08:28:25 +0200
Subject: [PATCH 5/6] Update docstrings

---
 tests/test_docs.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/test_docs.py b/tests/test_docs.py
index 2fd86c02..e82e36c6 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -10,6 +10,7 @@
 # List of language blocks that are not tested
 LANG_BLACKLIST = ['shell', 'bibtex', '']
 
+# Markdown files for which the context is kept between code blocks
 KEEP_CONTEXT = ['doc/algorithms.md']
 
 
@@ -37,10 +38,6 @@ def split_code_block_comment_output(code):
     """Splits a code block where a line starts with `print` and the following
     line is a comment.
     The comment is expected to be the output of the print statement.
-
-    Returns a list of tuples with the code block and the expected output.
-    The print statement in the code block is replaced with `__output = ` so
-    that the result can be inspected after `exec`.
     """
     c = ast.parse(code)
     lines = code.splitlines()
@@ -65,6 +62,7 @@ def exec_with_source(code, filename, lineno, globals_=None, locals_=None):
     """
     Like `compile` followed by `exec`, but sets the correct line number for the code block.
     This is required for correct traceback display.
+    Captures stdout and returns it as a string.
     """
     compiled = ast.parse(code, str(filename), 'exec')
     ast.increment_lineno(compiled, lineno)

From 6bab678080c36534e98de42c4a18d80aa8afb234 Mon Sep 17 00:00:00 2001
From: Thilo von Neumann <tvn@mail.upb.de>
Date: Thu, 27 Jun 2024 12:33:35 +0200
Subject: [PATCH 6/6] Review comments

---
 tests/test_docs.py | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/tests/test_docs.py b/tests/test_docs.py
index e82e36c6..1c26e619 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -38,23 +38,47 @@ def split_code_block_comment_output(code):
     """Splits a code block where a line starts with `print` and the following
     line is a comment.
     The comment is expected to be the output of the print statement.
+
+    >>> split_code_block_comment_output(r'''
+    ... # this is a comment
+    ... print('hello')
+    ... print('world')
+    ... # hello
+    ... # world
+    ...
+    ... # Here starts the second block
+    ... a = 2
+    ...
+    ... print(
+    ...     a
+    ... )
+    ... # 2
+    ... ''')
+    [("\\n# this is a comment\\nprint('hello')\\nprint('world')", ' hello\\n world', 0), ('\\n# Here starts the second block\\na = 2\\n\\nprint(\\n    a\\n)', ' 2', 6)]
     """
     c = ast.parse(code)
     lines = code.splitlines()
     last_match = 0
     blocks = []
+    l = 0
     for s in c.body:
+        if l > s.end_lineno:
+            continue
+
         # If we parsed a print statement at the root level
         if isinstance(s, ast.Expr) and isinstance(s.value, ast.Call) and isinstance(s.value.func, ast.Name) and s.value.func.id == 'print':
             # Collect any lines that follow directly and start with a #
             output = []
             l = s.end_lineno
+            if not lines[l].startswith('#'):
+                continue
             while l < len(lines) and lines[l].startswith('#'):
                 output.append(lines[l][1:])
                 l += 1
             blocks.append(('\n'.join(lines[last_match:s.end_lineno]), '\n'.join(output), last_match))
             last_match = l
-    blocks.append(('\n'.join(lines[last_match:]), '', last_match))
+    if last_match < len(lines):
+        blocks.append(('\n'.join(lines[last_match:]), '', last_match))
     return blocks
 
 
@@ -84,10 +108,12 @@ def exec_with_source(code, filename, lineno, globals_=None, locals_=None):
             for codeblock in get_fenced_code_blocks(filename.read_text())
         ]
 )
-def test_readme(filename, codeblock, global_state):
+def test_readme(filename, codeblock, global_state, monkeypatch):
     """Run fenced code blocks in markdown files in the MeetEval repository."""
-    import os
-    os.chdir(MEETEVAL_ROOT)
+    # Some code blocks in the readme file must run in the meeteval root directory
+    # because they access the example files in `MEETEVAL_ROOT/example_files`
+    monkeypatch.chdir(MEETEVAL_ROOT)
+
     lang, code, lineno = codeblock
     if lang in LANG_BLACKLIST:
         return