Merge pull request #7 from znamlab/dev

Dev
znamlab · Sep 6, 2024 · 3a03699 · 3a03699
2 parents 16450df + b21e229
commit 3a03699
Show file tree

Hide file tree

Showing 7 changed files with 374 additions and 49 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,10 +1,20 @@
 
 ## Changelog
 
+### [v0.9] - 2024-06-06
+
+- Bugfix: slurm_it runs if dependency is an empty list.
+
+### [v0.8] - 2024-05-31
+
+- Feature: Option to run batch jobs in parallel slurm jobs. If `batch_param_names` and
+`batch_param_values` are provided, the function will be called for each tuple of values
+in `batch_param_values`
+
 
 ### [v0.7] - 2024-05-14
 
-- Feature: `pathlib.Path` are automatically converted to strings in the main slurm 
+- Feature: `pathlib.Path` are automatically converted to strings in the main slurm
     python script. This avoid crashing because `PosixPath` and co are not imported.
 
 ### [v0.6] - 2024-04-04

diff --git a/ReadMe.md b/ReadMe.md
@@ -82,7 +82,7 @@ def analysis_step(param1, param2):
 
 ## Calling the decorated function
 
-The decorated function will have 4 new keyword arguments:
+The decorated function will have 6 new keyword arguments:
 
 ```
 use_slurm (bool): whether to use slurm or not
@@ -95,11 +95,17 @@ slurm_folder (str): where to write the slurm script and logs
 scripts_name (str): name of the slurm script and python file
 slurm_options (dict): options to pass to sbatch, will update the default options
   provided in the decorator.
+batch_param_names (list): list of parameters on which the function should be batched
+batch_param_values (list): list of values for the batched parameters
 ```
 
 When `use_slurm = True`, `slurm_folder` must be provided.
 If `scripts_name` is false, the name of the function is used instead.
 
+If `batch_param_names` is provided, `batch_param_values` must be a list of tuples the
+same length as `batch_param_names`. The function will be called for each tuple of
+values in `batch_param_values`.
+
 Calling:
 
 ```python
@@ -115,9 +121,10 @@ jobid = analysis_step(param1, param2, use_slurm=True, slurm_folder='~/somewhere'
 will create `~/somewhere/run2.py` and `~/somewhere/run2.sh`, then `sbatch` the `sh` script without
 dependencies.
 
+
 ## Limitations:
 
-IMPORT and paramter types (to document)
+IMPORT and parameter types (to document)
 
 # Slurm utils
 

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="znamutils",
-    version="v0.6-dev",
+    version="v0.9",
     packages=find_packages(exclude=["tests"]),
     url="https://github.com/znamlab/znamutils",
     license="MIT",

diff --git a/tests/test_decorators.py b/tests/test_decorators.py
@@ -1,3 +1,4 @@
+import time
 from pathlib import Path
 from znamutils import slurm_it
 
@@ -96,7 +97,6 @@ def test_dependencies():
     slurm_folder = (
         Path(flz.PARAMETERS["data_root"]["processed"]) / "test" / "test_slurm_it"
     )
-    import time
 
     @slurm_it(conda_env="cottage_analysis")
     def slow_func(a, b):
@@ -115,6 +115,22 @@ def slow_func(a, b):
         slurm_folder=slurm_folder,
         job_dependency=",".join([o1, o2, o3]),
     )
+    # it work with a list
+    o5 = slow_func(
+        1,
+        2,
+        use_slurm=True,
+        slurm_folder=slurm_folder,
+        job_dependency=[o1, o2, o3, o4],
+    )
+    # it works with empty list
+    o6 = slow_func(
+        1,
+        2,
+        use_slurm=True,
+        slurm_folder=slurm_folder,
+        job_dependency=[],
+    )
 
 
 def test_update_slurm_options():
@@ -150,5 +166,48 @@ def test_func(a, b):
         assert "#SBATCH --time=00:02:00" in txt
 
 
+def test_batch_run(tmpdir):
+    @slurm_it(conda_env="cottage_analysis", slurm_options={"time": "00:01:00"})
+    def batch_test_func(tardir, a=None, b=None):
+        target = str(tardir) + f"/test_{a}.txt"
+        with open(target, "w") as f:
+            f.write(f"{a} {b}")
+        return target
+
+    batch_test_func(
+        str(tmpdir),
+        use_slurm=True,
+        scripts_name="batch_test_func_with_dep",
+        slurm_folder=str(tmpdir),
+        batch_param_list=[[1, 2], [3, 4]],
+        batch_param_names=["a", "b"],
+    )
+    sh_file = tmpdir / "batch_test_func_with_dep.sh"
+    assert sh_file.exists()
+    with open(sh_file, "r") as f:
+        txt = f.read()
+    assert "batch_test_func_with_dep.py --a $a --b $b" in txt
+    python_file = tmpdir / "batch_test_func_with_dep.py"
+    assert python_file.exists()
+    with open(python_file, "r") as f:
+        txt = f.read()
+    lines = [
+        "import argparse",
+        "",
+        "from test_decorators import batch_test_func",
+        "",
+        "parser = argparse.ArgumentParser()",
+        "parser.add_argument('--a')",
+        "parser.add_argument('--b')",
+        "args = parser.parse_args()",
+        "",
+        f"batch_test_func(tardir='{str(tmpdir)}', use_slurm=False, a=args.a, b=args.b, )",
+        "",
+    ]
+    for expected, actual in zip(lines, txt.split("\n")):
+        assert expected == actual, f"{expected} != {actual}"
+
+
 if __name__ == "__main__":
-    test_slurm_my_func()
+    tmpdir = Path(flz.PARAMETERS["data_root"]["processed"]) / "test"
+    test_batch_run(tmpdir)
diff --git a/tests/test_slurm_helpers.py b/tests/test_slurm_helpers.py
@@ -7,18 +7,175 @@
     raise ImportError("flexiznam is required to run this test")
 
 
-def test_create_slurm_sbatch():
-    p = Path(flz.PARAMETERS["data_root"]["processed"]) / "test"
+def test_create_slurm_sbatch(tmpdir):
     slurm_helper.create_slurm_sbatch(
-        p,
+        tmpdir,
         print_job_id=True,
         conda_env="cottage_analysis",
         python_script="test.py",
         script_name="test.sh",
     )
-    with open(p / "test.sh") as f:
+    with open(tmpdir / "test.sh") as f:
         txt = f.read()
+    lines = [
+        "#!/bin/bash",
+        "#SBATCH --ntasks=1",
+        "#SBATCH --time=12:00:00",
+        "#SBATCH --mem=32G",
+        "#SBATCH --partition=ncpu",
+        f"#SBATCH --output={tmpdir}/test.out",
+        'echo "Job ID: $SLURM_JOB_ID"',
+        "ml Anaconda3",
+        "source activate base",
+        "conda activate cottage_analysis",
+        "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:~/.conda/envs/cottage_analysis/lib/",
+        "",
+        "",
+        "python test.py",
+        "",
+    ]
+    for actual, expected in zip(txt.split("\n"), lines):
+        assert actual == expected
+
+    slurm_helper.create_slurm_sbatch(
+        tmpdir,
+        print_job_id=False,
+        conda_env="cottage_analysis",
+        python_script="test.py",
+        script_name="test.sh",
+    )
+    with open(tmpdir / "test.sh") as f:
+        txt = f.read()
+    for actual, expected in zip(txt.split("\n"), lines[:6] + lines[7:]):
+        assert actual == expected
+
+    slurm_helper.create_slurm_sbatch(
+        tmpdir,
+        print_job_id=True,
+        conda_env="cottage_analysis",
+        python_script="test.py",
+        script_name="test.sh",
+        env_vars_to_pass={"param": "test", "param2": "test2"},
+    )
+    with open(tmpdir / "test.sh") as f:
+        txt = f.read()
+
+    # running in batch will add the job id to output file
+    lines[5] = f"#SBATCH --output={tmpdir}/test_%j.out"
+    lines[-2] = "python test.py --param $test --param2 $test2"
+    for actual, expected in zip(txt.split("\n"), lines):
+        assert actual == expected
+
+
+def test_python_script_single_func(tmpdir):
+    target_file = tmpdir / "test.py"
+    slurm_helper.python_script_single_func(
+        target_file,
+        function_name="test",
+        arguments=None,
+        vars2parse=None,
+        imports=None,
+        from_imports=None,
+        path2string=True,
+    )
+    with open(target_file) as f:
+        txt = f.read()
+    assert txt == "\ntest()\n"
+
+    slurm_helper.python_script_single_func(
+        target_file,
+        function_name="test",
+        arguments={"dir": tmpdir},
+        vars2parse=None,
+        imports=None,
+        from_imports=None,
+        path2string=False,
+    )
+    with open(target_file) as f:
+        txt = f.read()
+    assert txt == f"\ntest(dir={repr(tmpdir)}, )\n"
+
+    slurm_helper.python_script_single_func(
+        target_file,
+        function_name="test",
+        arguments=None,
+        vars2parse=None,
+        imports="numpy",
+        from_imports=dict(flexiznam="Dataset"),
+        path2string=True,
+    )
+    with open(target_file) as f:
+        txt = f.read()
+    assert txt == "import numpy\n\nfrom flexiznam import Dataset\n\ntest()\n"
+
+    slurm_helper.python_script_single_func(
+        target_file,
+        function_name="test",
+        arguments=dict(arg1=1, arg2=2),
+        vars2parse=None,
+        imports=None,
+        from_imports=None,
+        path2string=True,
+    )
+    with open(target_file) as f:
+        txt = f.read()
+    assert txt == "\ntest(arg1=1, arg2=2, )\n"
+
+    slurm_helper.python_script_single_func(
+        target_file,
+        function_name="test",
+        arguments=dict(arg1=1, arg2=2),
+        vars2parse=dict(var1="v", var2="vv"),
+        imports=None,
+        from_imports=None,
+        path2string=True,
+    )
+    with open(target_file) as f:
+        txt = f.read()
+    lines = [
+        "import argparse",
+        "",
+        "parser = argparse.ArgumentParser()",
+        "parser.add_argument('--v')",
+        "parser.add_argument('--vv')",
+        "args = parser.parse_args()",
+        "",
+        "test(arg1=1, arg2=2, var1=args.v, var2=args.vv, )",
+        "",
+    ]
+    for expected, actual in zip(lines, txt.split("\n")):
+        assert expected == actual
+
+
+def test_run_slurm_batch():
+    script_path = "testpath/testscript.sh"
+    cmd = slurm_helper.run_slurm_batch(script_path, dry_run=True)
+    assert cmd == f"sbatch {script_path}"
+    cmd = slurm_helper.run_slurm_batch(
+        script_path, dependency_type="afterok", job_dependency="134", dry_run=True
+    )
+    assert cmd == f"sbatch --dependency=afterok:134 {script_path}"
+    cmd = slurm_helper.run_slurm_batch(
+        script_path, dependency_type="dp", job_dependency="134", dry_run=True
+    )
+    assert cmd == f"sbatch --dependency=dp:134 {script_path}"
+    cmd = slurm_helper.run_slurm_batch(
+        script_path, env_vars={"var": "value"}, dry_run=True
+    )
+    assert cmd == f"sbatch --export=var=value {script_path}"
+    cmd = slurm_helper.run_slurm_batch(
+        script_path, env_vars={"var": "value", "var2": 1}, dry_run=True
+    )
+    assert cmd == f"sbatch --export=var=value,var2=1 {script_path}"
+    cmd = slurm_helper.run_slurm_batch(
+        script_path, env_vars={"var": "value"}, job_dependency=12, dry_run=True
+    )
+    assert cmd == f"sbatch --export=var=value --dependency=afterok:12 {script_path}"
 
 
 if __name__ == "__main__":
-    test_create_slurm_sbatch()
+    tmpdir = Path(flz.PARAMETERS["data_root"]["processed"]) / "test"
+    test_run_slurm_batch()
+    test_python_script_single_func(tmpdir)
+    test_create_slurm_sbatch(tmpdir)
+    print("ok")