Skip to content

Commit

Permalink
Adding Parallelworks CI runscripts to .github/.parallelworks Updating…
Browse files Browse the repository at this point in the history
… workflows to us a new version of the Parallelworks CI runscripts Updating the cleanup workflow to cleanup the weekly chron job CI in addition to the PR CI rundirs (#359)
  • Loading branch information
laurenchilutti authored Dec 11, 2024
1 parent 44e20a7 commit 970884a
Show file tree
Hide file tree
Showing 6 changed files with 282 additions and 47 deletions.
6 changes: 6 additions & 0 deletions .github/.parallelworks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# .parallelworks Directory

The .parallelworks directory stores the CI scripts that reside on Parallelworks
These scripts are executed via the GitHub Actions Workflows in .github/workflows

On Parallelworks these scripts are installed at: /contrib/fv3/GFDL_atmos_cubed_sphere_CI
59 changes: 59 additions & 0 deletions .github/.parallelworks/checkout.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/sh -xe

##############################################################################
## User set up variables
## Root directory for CI
dirRoot=/contrib/fv3
## Intel version to be used
intelVersion=2023.2.0
##############################################################################
## HPC-ME container
container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif
container_env_script=/contrib/containers/load_spack_noaa-intel.sh
##############################################################################

#Parse Arguments
branch=main
commit=none
while [[ $# -gt 0 ]]; do
case $1 in
-b|--branch)
branch="$2"
shift # past argument
shift # past value
;;
-h|--hash)
commit="$2"
shift # past argument
shift # past value
;;
*)
echo "unknown argument"
exit 1
;;
esac
done

echo "branch is $branch"
echo "commit is $commit"

## Set up the directories
testDir=${dirRoot}/${intelVersion}/GFDL_atmos_cubed_sphere/${branch}/${commit}
logDir=${testDir}/log
export MODULESHOME=/usr/share/lmod/lmod
## create directories
rm -rf ${testDir}
mkdir -p ${logDir}
# salloc commands to start up
#2 tests layout 8,8 (16 nodes)
#2 tests layout 4,8 (8 nodes)
#9 tests layout 4,4 (18 nodes)
#5 tests layout 4,1 (5 nodes)
#17 tests layout 2,2 (17 nodes)
#salloc --partition=p2 -N 64 -J ${branch} sleep 20m &

## clone code
cd ${testDir}
git clone --recursive https://github.com/NOAA-GFDL/SHiELD_build.git && cd SHiELD_build && ./CHECKOUT_code |& tee ${logDir}/checkout.log
## Check out the PR
cd ${testDir}/SHiELD_SRC/GFDL_atmos_cubed_sphere && git fetch origin ${branch}:toMerge && git merge toMerge
84 changes: 84 additions & 0 deletions .github/.parallelworks/compile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/sh -xe

##############################################################################
## User set up variables
## Root directory for CI
dirRoot=/contrib/fv3
## Intel version to be used
intelVersion=2023.2.0
##############################################################################
## HPC-ME container
container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif
container_env_script=/contrib/containers/load_spack_noaa-intel.sh
##############################################################################

#Parse Arguments
branch=main
commit=none
while [[ $# -gt 0 ]]; do
case $1 in
-b|--branch)
branch="$2"
shift # past argument
shift # past value
;;
-h|--hash)
commit="$2"
shift # past argument
shift # past value
;;
-c|--config)
config="$2"
shift # past argument
shift # past value
;;
--hydro)
hydro="$2"
shift # past argument
shift # past value
;;
--bit)
bit="$2"
shift # past argument
shift # past value
;;
-m|--mode)
mode="$2"
shift # past argument
shift # past value
;;
*)
echo "unknown argument"
exit 1
;;
esac
done

if [ -z $mode ] || [ -z $bit ] || [ -z $hydro ] || [ -z $config ]
then
echo "must specify config, hydro, bit, and mode options for compile"
exit 1
fi

echo "branch is $branch"
echo "commit is $commit"
echo "mode is $mode"
echo "bit is $bit"
echo "hydro is $hydro"
echo "config is $config"

if [ $hydro = "sw" ] && [ $config = "shield" ]
then
echo "this combination should not be tested"
else
## Set up the directories
testDir=${dirRoot}/${intelVersion}/GFDL_atmos_cubed_sphere/${branch}/${commit}
logDir=${testDir}/log
# Set up build
cd ${testDir}/SHiELD_build/Build
#Define External Libs path
export EXTERNAL_LIBS=${dirRoot}/externallibs
# Build SHiELD
set -o pipefail
singularity exec -B /contrib ${container} ${container_env_script} "./COMPILE ${config} ${hydro} ${bit} ${mode} intel clean" |& tee ${logDir}/compile_${config}_${hydro}_${bit}_${mode}_intel.out
fi
79 changes: 79 additions & 0 deletions .github/.parallelworks/run_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/bin/bash -xe
ulimit -s unlimited
##############################################################################
## User set up variables
## Root directory for CI
dirRoot=/contrib/fv3
## Intel version to be used
intelVersion=2023.2.0
##############################################################################
## HPC-ME container
container=/contrib/containers/noaa-intel-prototype_2023.09.25.sif
container_env_script=/contrib/containers/load_spack_noaa-intel-mlong.sh

#Parse Arguments
branch=main
commit=none
while [[ $# -gt 0 ]]; do
case $1 in
-b|--branch)
branch="$2"
shift # past argument
shift # past value
;;
-h|--hash)
commit="$2"
shift # past argument
shift # past value
;;
-t|--test)
testname="$2"
shift # past argument
shift # past value
;;
*)
echo "unknown argument"
exit 1
;;
esac
done

if [ -z $testname ]
then
echo "must specify a test name with -t"
exit 1
fi

echo "branch is $branch"
echo "commit is $commit"
echo "test is $testname"

## Set up the directories
MODULESHOME=/usr/share/lmod/lmod
testDir=${dirRoot}/${intelVersion}/GFDL_atmos_cubed_sphere/${branch}/${commit}
logDir=${testDir}/log
baselineDir=${dirRoot}/baselines/intel/${intelVersion}

## Run the CI Test
# Define the builddir testscriptdir and rundir BUILDDIR is used by test scripts
# Set the BUILDDIR for the test script to use
export BUILDDIR="${testDir}/SHiELD_build"
testscriptDir=${BUILDDIR}/RTS/CI
runDir=${BUILDDIR}/CI/BATCH-CI

# Run CI test scripts
cd ${testscriptDir}
set -o pipefail
# Execute the test piping output to log file
./${testname} " --partition=compute --mpi=pmi2 --job-name=${commit}_${testname} singularity exec -B /contrib -B /apps ${container} ${container_env_script}" |& tee ${logDir}/run_${testname}.log

## Compare Restarts to Baseline
source $MODULESHOME/init/sh
export MODULEPATH=/mnt/shared/manual_modules:/usr/share/modulefiles/Linux:/usr/share/modulefiles/Core:/usr/share/lmod/lmod/modulefiles/Core:/apps/modules/modulefiles:/apps/modules/modulefamilies/intel
module load intel/2022.1.2
module load netcdf
module load nccmp
for resFile in `ls ${baselineDir}/${testname}`
do
nccmp -d ${baselineDir}/${testname}/${resFile} ${runDir}/${testname}/RESTART/${resFile}
done
98 changes: 52 additions & 46 deletions .github/workflows/SHiELD_parallelworks_intel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ jobs:
# so this salloc will prompt 46 nodes to startup and stay active for 20 min
# this is enough nodes for the first 17 tests to run in parallel, and we
# have 17 runners configured.
- run: salloc --partition=p2 -N 46 -J $GITHUB_SHA sleep 20m &
- run: /contrib/fv3/GFDL_atmos_cubed_sphere_CI/checkout.sh $GITHUB_REF $GITHUB_SHA
- run: salloc --partition=compute -N 46 -J $GITHUB_SHA sleep 20m &
- run: /contrib/fv3/GFDL_atmos_cubed_sphere_CI/checkout.sh -b $GITHUB_REF -h $GITHUB_SHA

build:
if: github.repository == 'NOAA-GFDL/GFDL_atmos_cubed_sphere'
Expand All @@ -43,15 +43,21 @@ jobs:
needs: [checkout]
strategy:
fail-fast: true
max-parallel: 3
max-parallel: 17
matrix:
runpath: [/contrib/fv3/GFDL_atmos_cubed_sphere_CI/]
runscript: [swcompile.sh, nhcompile.sh, hydrocompile.sh]
runscript: [/contrib/fv3/GFDL_atmos_cubed_sphere_CI/compile.sh]
config: [solo]
hydro: [sw, nh, hydro]
bit: [64bit]
mode: [repro]
steps:
- env:
RUNPATH: ${{ matrix.runpath }}
RUNSCRIPT: ${{ matrix.runscript }}
run: $RUNPATH/$RUNSCRIPT $GITHUB_REF $GITHUB_SHA
CONFIG: ${{ matrix.config }}
HYDRO: ${{ matrix.hydro }}
BIT: ${{ matrix.bit }}
MODE: ${{ matrix.mode }}
run: $RUNSCRIPT -b $GITHUB_REF -h $GITHUB_SHA -c $CONFIG --hydro $HYDRO --bit $BIT -m $MODE

test:
if: github.repository == 'NOAA-GFDL/GFDL_atmos_cubed_sphere'
Expand All @@ -62,55 +68,55 @@ jobs:
fail-fast: false
max-parallel: 17
matrix:
runpath: [/contrib/fv3/GFDL_atmos_cubed_sphere_CI/]
runscript:
runscript: [/contrib/fv3/GFDL_atmos_cubed_sphere_CI/run_test.sh]
argument:
# These are placed in order of largest to smallest jobs
#layout 8,8 needs 8 nodes on dvcimultiintel cluster
- C512r20.solo.superC.sh
- C768.sw.BTwave.sh
- C512r20.solo.superC
- C768.sw.BTwave
#layout 4,8 needs 4 nodes on dvcimultiintel cluster
- C256r20.solo.superC.sh
- C384.sw.BLvortex.sh
- C256r20.solo.superC
- C384.sw.BLvortex
#layout 4,4 needs 2 nodes on dvcimultiintel cluster
- C128r20.solo.superC.sh
- C128r3.solo.TC.d1.sh
- C128r3.solo.TC.h6.sh
- C128r3.solo.TC.sh
- C128r3.solo.TC.tr8.sh
- C192.sw.BLvortex.sh
- C192.sw.BTwave.sh
- C192.sw.modon.sh
- C384.sw.BTwave.sh
- C128r20.solo.superC
- C128r3.solo.TC.d1
- C128r3.solo.TC.h6
- C128r3.solo.TC
- C128r3.solo.TC.tr8
- C192.sw.BLvortex
- C192.sw.BTwave
- C192.sw.modon
- C384.sw.BTwave
#layout 4,1 and 2,2 need 1 node on dvcimultiintel cluster
- C96.solo.BCdry.hyd.sh
- C96.solo.BCdry.sh
- C96.solo.BCmoist.hyd.d3.sh
- C96.solo.BCmoist.hyd.sh
- C96.solo.BCmoist.nhK.sh
- C96.solo.BCmoist.sh
- C96.solo.mtn_rest.hyd.diff2.sh
- C96.solo.mtn_rest.hyd.sh
- C96.solo.mtn_rest.nonmono.diff2.sh
- C96.solo.mtn_rest.sh
- C96.sw.BLvortex.sh
- C96.sw.BTwave.sh
- C96.sw.modon.sh
- C96.sw.RHwave.sh
- d96_1k.solo.mtn_rest_shear.olddamp.sh
- d96_1k.solo.mtn_rest_shear.sh
- d96_1k.solo.mtn_schar.mono.sh
- d96_1k.solo.mtn_schar.sh
- d96_2k.solo.bubble.n0.sh
- d96_2k.solo.bubble.nhK.sh
- d96_2k.solo.bubble.sh
- d96_500m.solo.mtn_schar.sh
- C96.solo.BCdry.hyd
- C96.solo.BCdry
- C96.solo.BCmoist.hyd.d3
- C96.solo.BCmoist.hyd
- C96.solo.BCmoist.nhK
- C96.solo.BCmoist
- C96.solo.mtn_rest.hyd.diff2
- C96.solo.mtn_rest.hyd
- C96.solo.mtn_rest.nonmono.diff2
- C96.solo.mtn_rest
- C96.sw.BLvortex
- C96.sw.BTwave
- C96.sw.modon
- C96.sw.RHwave
- d96_1k.solo.mtn_rest_shear.olddamp
- d96_1k.solo.mtn_rest_shear
- d96_1k.solo.mtn_schar.mono
- d96_1k.solo.mtn_schar
- d96_2k.solo.bubble.n0
- d96_2k.solo.bubble.nhK
- d96_2k.solo.bubble
- d96_500m.solo.mtn_schar
steps:
# This will end the slurm job started in the checkout job
- run: scancel -n $GITHUB_SHA
- env:
RUNPATH: ${{ matrix.runpath }}
RUNSCRIPT: ${{ matrix.runscript }}
run: $RUNPATH/$RUNSCRIPT $GITHUB_REF $GITHUB_SHA
ARG1: ${{ matrix.argument }}
run: $RUNSCRIPT -t $ARG1 -b $GITHUB_REF -h $GITHUB_SHA
shutdown:
if: always() && github.repository == 'NOAA-GFDL/GFDL_atmos_cubed_sphere'
runs-on: [gfdlacsciintel]
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/daily_cleanup_parallelworks.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Old Build Cleanup

# This GitHub Action Workflow is runing on the GFDL_ACS_CIINTEL cluster
# This GitHub Action Workflow is runing on the gclustercigfdlacs cluster
# This will delete all build directories older than 30 days
# Build directories are on the cloud at /contrib/fv3/2023.2.0

Expand All @@ -16,3 +16,4 @@ jobs:
name: Delete Builds
steps:
- run: find /contrib/fv3/2023.2.0/GFDL_atmos_cubed_sphere/refs/pull -maxdepth 1 -mindepth 1 -mtime +30 -type d -print -exec rm -rf "{}" \;
- run: find /contrib/fv3/2023.2.0/GFDL_atmos_cubed_sphere/refs/heads -maxdepth 1 -mindepth 1 -mtime +30 -type d -print -exec rm -rf "{}" \;

0 comments on commit 970884a

Please sign in to comment.