hpcugent · boegel · Aug 14, 2023 · Aug 4, 2023 · Aug 4, 2023 · Aug 4, 2023
diff --git a/.github/workflows/script_module_list.yml b/.github/workflows/script_module_list.yml
@@ -0,0 +1,49 @@
+name: Module overview script (lint + test)
+on:
+  push:
+    paths:
+      - 'scripts/**'
+      - './.github/**'
+  pull_request:
+    paths:
+      - 'scripts/**'
+      - './.github/**'
+
+# Declare default permissions as read only.
+permissions: read-all
+jobs:
+
+  flake8-lint:
+    runs-on: ubuntu-20.04
+    name: Lint
+    steps:
+      - name: Check out source repository
+        uses: actions/checkout@v3
+      - name: Set up Python environment
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.6"
+      - name: flake8 Lint
+        uses: py-actions/flake8@v2
+        with:
+          max-line-length: "120"
+          path: "scripts/module_overview"
+
+  pytest-tests:
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.6'
+      - name: Install dependencies
+        run: |
+          cd scripts/module_overview
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install -r requirements_tests.txt
+      - name: Test with pytest
+        run: |
+          cd scripts/module_overview
+          ./test.sh
diff --git a/scripts/README.md b/scripts/README.md
@@ -0,0 +1,3 @@
+Scripts that can be used to automatically generate markdown files, can be found here.
+
+* [`module_overview`](module_overview): script to generate overview of available environment modules;
diff --git a/scripts/module_overview/README.md b/scripts/module_overview/README.md
@@ -0,0 +1,95 @@
+# Module overview
+
+A script to generate an table overview of all available module files in MarkDown format,
+which indicates on which clusters each module is available.
+
+## Requirements
+- Required Python packages are listed in `requirements.txt` and `requirements_tests.txt`
+- [Lmod](https://github.com/TACC/Lmod) must be available, and `$LMOD_CMD` must specify the path to the `lmod` binary.
+
+
+### Creating a virtual environment (optional)
+
+If the required Python packages are not available in your Python setup,
+you can easily create a dedicated virtual environment as follows:
+
+```shell
+python -m venv module_overview_venv
+source module_overview_venv/bin/activate
+pip install -r requirements.txt
+pip install -r requirements_tests.txt
+# to exit the virtual environment, run 'deactivate'
+```
+
+## Usage
+You can run the script with following command:
+
+```shell
+python module_overview.py
+```
+
+## Testing
+You can run the tests by running the `test.sh` script.
+```shell
+./test.sh
+```
+
+The tests make use of a mocked `$LMOD_CMD` script, which you can find [here](tests/data/lmod_mock.sh).
+
+### Write tests
+If you want to write additional tests and use the script effectively, follow these guidelines:
+
+
+1. **Setting up mocked Lmod:**
+
+   Before each test, ensure that you set the path to the script that mocks the `lmod` binary.
+   This can be done within the setup_class function.
+   ```python
+   path = os.path.dirname(os.path.realpath(__file__))
+
+   @classmethod
+   def setup_class(cls):
+       os.environ["LMOD_CMD"] = cls.path + "/data/lmod_mock.sh"
+   ```
+
+2. **Mocking output of `module avail cluster` command:**
+
+   The output of the command `module avail cluster/` can be put in a `.txt` file. 
+   Set the path to this file in the `$MOCK_FILE_AVAIL_CLUSTER` environment variable.
+   ```python
+   os.environ["MOCK_FILE_AVAIL_CLUSTER"] = path + "/data/data_avail_cluster_simple.txt"
+   ```
+
+3. **Mocking the `module swap` command:**
+
+   For mocking the `module swap` command, assign the path to the swap files to the `$MOCK_FILE_SWAP` environment variable.
+   Ensure that the filename contains the placeholder '`CLUSTER`', 
+   which will later be replaced with the actual cluster name when performing the swap.
+
+   ```python
+   os.environ["MOCK_FILE_SWAP"] = path + "/data/data_swap_CLUSTER.txt"
+   ```
+   When trying to swap to, for example, the `cluster/pikachu` cluster,
+   it will use the `data_swap_pikachu.txt` file as output for the swap command.
+
+### Example 
+An example of a possible `setup_class` function is given below.
+```python
+import os
+
+@classmethod
+def setup_class(cls):
+    os.environ["TESTS_PATH"] = cls.path
+    os.environ["LMOD_CMD"] = cls.path + "/data/lmod_mock.sh"
+    os.environ["MOCK_FILE_AVAIL_CLUSTER"] = cls.path + "/data/data_avail_cluster_simple.txt"
+    os.environ["MOCK_FILE_SWAP"] = cls.path + "/data/data_swap_CLUSTER.txt"
+```
+
+This does multiple things:
+1. Set the path of the tests folder in `$TESTS_PATH`
+2. Set the path to the `lmod_mock.sh` script in the environment variable `$LMOD_CMD`
+3. Set the output file for the `module avail cluster/` to the `MOCK_FILE_AVAIL_CLUSTER` variable.
+   The actual output can be found in the `data/data_avail_cluster_simple.txt` file.
+4. Set the swap files output to the `MOCK_FILE_SWAP` variable.
+   Files with swap outut will have the `data/data_swap_CLUSTER.txt`.
+   For example, `data/data_swap_pikachu.txt` could be a possible file.
diff --git a/scripts/module_overview/module_overview.py b/scripts/module_overview/module_overview.py
@@ -0,0 +1,225 @@
+#
+# Copyright 2023-2023 Ghent University
+#
+# This file is part of vsc_user_docs,
+# originally created by the HPC team of Ghent University (http://ugent.be/hpc/en),
+# with support of Ghent University (http://ugent.be/hpc),
+# the Flemish Supercomputer Centre (VSC) (https://www.vscentrum.be),
+# the Flemish Research Foundation (FWO) (http://www.fwo.be/en)
+# and the Department of Economy, Science and Innovation (EWI) (http://www.ewi-vlaanderen.be/en).
+#
+# https://github.com/hpcugent/vsc_user_docs
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+"""
+Python script to generate an overview of available modules across different clusters, in MarkDown format.
+
+@author: Michiel Lachaert (Ghent University)
+"""
+
+import numpy as np
+import os
+import subprocess
+from mdutils.mdutils import MdUtils
+from typing import Union, Tuple
+
+
+# --------------------------------------------------------------------------------------------------------
+# Functions to run "module" commands
+# --------------------------------------------------------------------------------------------------------
+
+def module(*args, filter_fn=lambda x: x) -> np.ndarray:
+    """
+    Function to run "module" commands.
+
+    @param args: Extra arguments for the module command.
+    @param filter_fn: Filter function on the ouput.
+    @return: Array with the output of the module command.
+    """
+    lmod = os.getenv('LMOD_CMD')
+    proc = subprocess.run(
+        [lmod, "python", "--terse"] + list(args),
+        encoding="utf-8",
+        stderr=subprocess.PIPE,
+        stdout=subprocess.PIPE
+    )
+    exec(proc.stdout)
+    return filter_fn(np.array(proc.stderr.split()))
+
+
+def module_avail(name: str = "", filter_fn=lambda x: x) -> np.ndarray:
+    """
+    Function to run "module avail" commands.
+
+    @param name: Module name, or empty string to return all available modules.
+    @param filter_fn: Filter on the output.
+    @return: List of all available modules of name, or all if name is not given.
+    """
+    return module("avail", name, filter_fn=filter_fn)
+
+
+def module_swap(name: str) -> None:
+    """
+    Function to run "module swap" commands.
+
+    @param name: Name of module you want to swap to.
+    """
+    module("swap", name)
+
+
+# --------------------------------------------------------------------------------------------------------
+# Fetch data
+# --------------------------------------------------------------------------------------------------------
+
+def filter_fn_gent_cluster(data: np.ndarray) -> np.ndarray:
+    """
+    Filter function for output of "module avail" commands on HPC-UGent infrastructure.
+
+    Filters out lines ending with ':' (which are paths to module files),
+    and lines starting with 'env/' or 'cluster/default', which are not actually software modules
+    @param data: Output
+    @return: Filtered output
+    """
+    return data[~np.char.endswith(data, ":") &
+                ~np.char.startswith(data, "env/") &
+                ~np.char.startswith(data, "cluster/default")
+                ]
+
+
+def filter_fn_gent_modules(data: np.ndarray) -> np.ndarray:
+    """
+    Filter function for the output of all software modules (excl. `cluster` and `env` modules).
+    @param data: Output
+    @return: Filtered output
+    """
+    return data[~np.char.endswith(data, ":") &
+                ~np.char.startswith(data, "env/") &
+                ~np.char.startswith(data, "cluster/")
+                ]
+
+
+def clusters_ugent() -> np.ndarray:
+    """
+    Returns all the cluster names of the HPC at UGent.
+    @return: cluster names
+    """
+
+    return module_avail(name="cluster/", filter_fn=filter_fn_gent_cluster)
+
+
+def modules_ugent() -> dict:
+    """
+    Returns names of all software module that are installed on the HPC on UGent.
+    They are grouped by cluster.
+    @return: Dictionary with all the modules per cluster
+    """
+    print("Start collecting modules:")
+    data = {}
+    for cluster in clusters_ugent():
+        print(f"\t Collecting available modules for {cluster}... ", end="", flush=True)
+        module_swap(cluster)
+        data[cluster] = module_avail(filter_fn=filter_fn_gent_modules)
+        print(f"found {len(data[cluster])} modules!")
+
+    print("All data collected!\n")
+    return data
+
+
+# --------------------------------------------------------------------------------------------------------
+# Util functions
+# --------------------------------------------------------------------------------------------------------
+
+def mod_names_to_software_names(mod_list: np.ndarray) -> np.ndarray:
+    """
+    Convert a list of module names to a list of the software names.
+
+    @param mod_list: List of the module names
+    @return: List of the corresponding software names
+    """
+    return np.unique([entry.split("/")[0] for entry in mod_list])
+
+
+def get_unique_software_names(data: Union[dict, list, np.ndarray]) -> Union[dict, list, np.ndarray]:
+    """
+    Simplify list of modules by removing versions and duplicates.
+
+    @param data: List of modules
+    @return: List of software names.
+    """
+
+    if isinstance(data, dict):
+        simplified_data = {cluster: mod_names_to_software_names(data[cluster]) for cluster in data}
+    else:
+        simplified_data = mod_names_to_software_names(data)
+
+    return simplified_data
+
+
+# --------------------------------------------------------------------------------------------------------
+# Generate markdown
+# --------------------------------------------------------------------------------------------------------
+
+def generate_table_data(avail_mods: dict) -> Tuple[np.ndarray, int, int]:
+    """
+    Generate data that can be used to construct a MarkDown table.
+
+    @param avail_mods: Available modules
+    @return: Returns tuple (Table data, #col, #row)
+    """
+    avail_mods = get_unique_software_names(avail_mods)
+    all_modules = get_unique_software_names(np.concatenate(list(avail_mods.values())))
+
+    final = np.array([" "])
+    cluster_names = [x.split('/')[1] for x in avail_mods.keys()]
+    final = np.append(final, cluster_names)
+
+    for package in all_modules:
+        final = np.append(final, package)
+
+        for cluster in avail_mods:
+            final = np.append(final, "X" if package in avail_mods[cluster] else " ")
+
+    return final, len(cluster_names) + 1, len(all_modules) + 1
+
+
+def generate_module_table(data: dict, md_file: MdUtils) -> None:
+    """
+    Generate the general table of the overview.
+
+    @param data: Dict with all the data. Keys are the cluster names.
+    @param md_file: MdUtils object.
+    """
+    print("Generating markdown table... ", end="", flush=True)
+    structured, col, row = generate_table_data(data)
+    md_file.new_table(columns=col, rows=row, text=list(structured), text_align='center')
+    print("Done!")
+
+
+def generate_general_overview() -> None:
+    """
+    Generate the general overview in a markdown file.
+    It generates a list of all the available software and indicates on which cluster it is available.
+    """
+    md_fn = 'module_overview.md'
+    md_file = MdUtils(file_name=md_fn, title='Overview of available modules per cluster')
+    data = modules_ugent()
+    generate_module_table(data, md_file)
+    md_file.create_md_file()
+    print(f"Module overview created at {md_fn}")
+
+
+if __name__ == '__main__':
+    # Generate the overview
+    generate_general_overview()
diff --git a/scripts/module_overview/requirements.txt b/scripts/module_overview/requirements.txt
@@ -0,0 +1,2 @@
+mdutils
+numpy
diff --git a/scripts/module_overview/requirements_tests.txt b/scripts/module_overview/requirements_tests.txt
@@ -0,0 +1,2 @@
+flake8
+pytest
diff --git a/scripts/module_overview/test.sh b/scripts/module_overview/test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s
diff --git a/scripts/module_overview/tests/data/data_avail_cluster_simple.txt b/scripts/module_overview/tests/data/data_avail_cluster_simple.txt
@@ -0,0 +1,4 @@
+/etc/modulefiles/vsc:
+cluster/dialga
+cluster/pikachu
+cluster/default