Skip to content

Commit

Permalink
Merge pull request #7 from znamlab/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
ablot authored Sep 6, 2024
2 parents 16450df + b21e229 commit 3a03699
Show file tree
Hide file tree
Showing 7 changed files with 374 additions and 49 deletions.
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@

## Changelog

### [v0.9] - 2024-06-06

- Bugfix: slurm_it runs if dependency is an empty list.

### [v0.8] - 2024-05-31

- Feature: Option to run batch jobs in parallel slurm jobs. If `batch_param_names` and
`batch_param_values` are provided, the function will be called for each tuple of values
in `batch_param_values`


### [v0.7] - 2024-05-14

- Feature: `pathlib.Path` are automatically converted to strings in the main slurm
- Feature: `pathlib.Path` are automatically converted to strings in the main slurm
python script. This avoid crashing because `PosixPath` and co are not imported.

### [v0.6] - 2024-04-04
Expand Down
11 changes: 9 additions & 2 deletions ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def analysis_step(param1, param2):

## Calling the decorated function

The decorated function will have 4 new keyword arguments:
The decorated function will have 6 new keyword arguments:

```
use_slurm (bool): whether to use slurm or not
Expand All @@ -95,11 +95,17 @@ slurm_folder (str): where to write the slurm script and logs
scripts_name (str): name of the slurm script and python file
slurm_options (dict): options to pass to sbatch, will update the default options
provided in the decorator.
batch_param_names (list): list of parameters on which the function should be batched
batch_param_values (list): list of values for the batched parameters
```

When `use_slurm = True`, `slurm_folder` must be provided.
If `scripts_name` is false, the name of the function is used instead.

If `batch_param_names` is provided, `batch_param_values` must be a list of tuples the
same length as `batch_param_names`. The function will be called for each tuple of
values in `batch_param_values`.

Calling:

```python
Expand All @@ -115,9 +121,10 @@ jobid = analysis_step(param1, param2, use_slurm=True, slurm_folder='~/somewhere'
will create `~/somewhere/run2.py` and `~/somewhere/run2.sh`, then `sbatch` the `sh` script without
dependencies.


## Limitations:

IMPORT and paramter types (to document)
IMPORT and parameter types (to document)

# Slurm utils

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="znamutils",
version="v0.6-dev",
version="v0.9",
packages=find_packages(exclude=["tests"]),
url="https://github.com/znamlab/znamutils",
license="MIT",
Expand Down
63 changes: 61 additions & 2 deletions tests/test_decorators.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import time
from pathlib import Path
from znamutils import slurm_it

Expand Down Expand Up @@ -96,7 +97,6 @@ def test_dependencies():
slurm_folder = (
Path(flz.PARAMETERS["data_root"]["processed"]) / "test" / "test_slurm_it"
)
import time

@slurm_it(conda_env="cottage_analysis")
def slow_func(a, b):
Expand All @@ -115,6 +115,22 @@ def slow_func(a, b):
slurm_folder=slurm_folder,
job_dependency=",".join([o1, o2, o3]),
)
# it work with a list
o5 = slow_func(
1,
2,
use_slurm=True,
slurm_folder=slurm_folder,
job_dependency=[o1, o2, o3, o4],
)
# it works with empty list
o6 = slow_func(
1,
2,
use_slurm=True,
slurm_folder=slurm_folder,
job_dependency=[],
)


def test_update_slurm_options():
Expand Down Expand Up @@ -150,5 +166,48 @@ def test_func(a, b):
assert "#SBATCH --time=00:02:00" in txt


def test_batch_run(tmpdir):
@slurm_it(conda_env="cottage_analysis", slurm_options={"time": "00:01:00"})
def batch_test_func(tardir, a=None, b=None):
target = str(tardir) + f"/test_{a}.txt"
with open(target, "w") as f:
f.write(f"{a} {b}")
return target

batch_test_func(
str(tmpdir),
use_slurm=True,
scripts_name="batch_test_func_with_dep",
slurm_folder=str(tmpdir),
batch_param_list=[[1, 2], [3, 4]],
batch_param_names=["a", "b"],
)
sh_file = tmpdir / "batch_test_func_with_dep.sh"
assert sh_file.exists()
with open(sh_file, "r") as f:
txt = f.read()
assert "batch_test_func_with_dep.py --a $a --b $b" in txt
python_file = tmpdir / "batch_test_func_with_dep.py"
assert python_file.exists()
with open(python_file, "r") as f:
txt = f.read()
lines = [
"import argparse",
"",
"from test_decorators import batch_test_func",
"",
"parser = argparse.ArgumentParser()",
"parser.add_argument('--a')",
"parser.add_argument('--b')",
"args = parser.parse_args()",
"",
f"batch_test_func(tardir='{str(tmpdir)}', use_slurm=False, a=args.a, b=args.b, )",
"",
]
for expected, actual in zip(lines, txt.split("\n")):
assert expected == actual, f"{expected} != {actual}"


if __name__ == "__main__":
test_slurm_my_func()
tmpdir = Path(flz.PARAMETERS["data_root"]["processed"]) / "test"
test_batch_run(tmpdir)
167 changes: 162 additions & 5 deletions tests/test_slurm_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,175 @@
raise ImportError("flexiznam is required to run this test")


def test_create_slurm_sbatch():
p = Path(flz.PARAMETERS["data_root"]["processed"]) / "test"
def test_create_slurm_sbatch(tmpdir):
slurm_helper.create_slurm_sbatch(
p,
tmpdir,
print_job_id=True,
conda_env="cottage_analysis",
python_script="test.py",
script_name="test.sh",
)
with open(p / "test.sh") as f:
with open(tmpdir / "test.sh") as f:
txt = f.read()
lines = [
"#!/bin/bash",
"#SBATCH --ntasks=1",
"#SBATCH --time=12:00:00",
"#SBATCH --mem=32G",
"#SBATCH --partition=ncpu",
f"#SBATCH --output={tmpdir}/test.out",
'echo "Job ID: $SLURM_JOB_ID"',
"ml Anaconda3",
"source activate base",
"conda activate cottage_analysis",
"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:~/.conda/envs/cottage_analysis/lib/",
"",
"",
"python test.py",
"",
]
for actual, expected in zip(txt.split("\n"), lines):
assert actual == expected

slurm_helper.create_slurm_sbatch(
tmpdir,
print_job_id=False,
conda_env="cottage_analysis",
python_script="test.py",
script_name="test.sh",
)
with open(tmpdir / "test.sh") as f:
txt = f.read()
for actual, expected in zip(txt.split("\n"), lines[:6] + lines[7:]):
assert actual == expected

slurm_helper.create_slurm_sbatch(
tmpdir,
print_job_id=True,
conda_env="cottage_analysis",
python_script="test.py",
script_name="test.sh",
env_vars_to_pass={"param": "test", "param2": "test2"},
)
with open(tmpdir / "test.sh") as f:
txt = f.read()

# running in batch will add the job id to output file
lines[5] = f"#SBATCH --output={tmpdir}/test_%j.out"
lines[-2] = "python test.py --param $test --param2 $test2"
for actual, expected in zip(txt.split("\n"), lines):
assert actual == expected


def test_python_script_single_func(tmpdir):
target_file = tmpdir / "test.py"
slurm_helper.python_script_single_func(
target_file,
function_name="test",
arguments=None,
vars2parse=None,
imports=None,
from_imports=None,
path2string=True,
)
with open(target_file) as f:
txt = f.read()
assert txt == "\ntest()\n"

slurm_helper.python_script_single_func(
target_file,
function_name="test",
arguments={"dir": tmpdir},
vars2parse=None,
imports=None,
from_imports=None,
path2string=False,
)
with open(target_file) as f:
txt = f.read()
assert txt == f"\ntest(dir={repr(tmpdir)}, )\n"

slurm_helper.python_script_single_func(
target_file,
function_name="test",
arguments=None,
vars2parse=None,
imports="numpy",
from_imports=dict(flexiznam="Dataset"),
path2string=True,
)
with open(target_file) as f:
txt = f.read()
assert txt == "import numpy\n\nfrom flexiznam import Dataset\n\ntest()\n"

slurm_helper.python_script_single_func(
target_file,
function_name="test",
arguments=dict(arg1=1, arg2=2),
vars2parse=None,
imports=None,
from_imports=None,
path2string=True,
)
with open(target_file) as f:
txt = f.read()
assert txt == "\ntest(arg1=1, arg2=2, )\n"

slurm_helper.python_script_single_func(
target_file,
function_name="test",
arguments=dict(arg1=1, arg2=2),
vars2parse=dict(var1="v", var2="vv"),
imports=None,
from_imports=None,
path2string=True,
)
with open(target_file) as f:
txt = f.read()
lines = [
"import argparse",
"",
"parser = argparse.ArgumentParser()",
"parser.add_argument('--v')",
"parser.add_argument('--vv')",
"args = parser.parse_args()",
"",
"test(arg1=1, arg2=2, var1=args.v, var2=args.vv, )",
"",
]
for expected, actual in zip(lines, txt.split("\n")):
assert expected == actual


def test_run_slurm_batch():
script_path = "testpath/testscript.sh"
cmd = slurm_helper.run_slurm_batch(script_path, dry_run=True)
assert cmd == f"sbatch {script_path}"
cmd = slurm_helper.run_slurm_batch(
script_path, dependency_type="afterok", job_dependency="134", dry_run=True
)
assert cmd == f"sbatch --dependency=afterok:134 {script_path}"
cmd = slurm_helper.run_slurm_batch(
script_path, dependency_type="dp", job_dependency="134", dry_run=True
)
assert cmd == f"sbatch --dependency=dp:134 {script_path}"
cmd = slurm_helper.run_slurm_batch(
script_path, env_vars={"var": "value"}, dry_run=True
)
assert cmd == f"sbatch --export=var=value {script_path}"
cmd = slurm_helper.run_slurm_batch(
script_path, env_vars={"var": "value", "var2": 1}, dry_run=True
)
assert cmd == f"sbatch --export=var=value,var2=1 {script_path}"
cmd = slurm_helper.run_slurm_batch(
script_path, env_vars={"var": "value"}, job_dependency=12, dry_run=True
)
assert cmd == f"sbatch --export=var=value --dependency=afterok:12 {script_path}"


if __name__ == "__main__":
test_create_slurm_sbatch()
tmpdir = Path(flz.PARAMETERS["data_root"]["processed"]) / "test"
test_run_slurm_batch()
test_python_script_single_func(tmpdir)
test_create_slurm_sbatch(tmpdir)
print("ok")
Loading

0 comments on commit 3a03699

Please sign in to comment.