nextflow.config

/*
================================================================================
    GBIF phylogenetic diversity pipeline config
================================================================================
    Default config options for all compute environments
--------------------------------------------------------------------------------
*/


// Global default params, used in configs
includeConfig 'conf/params.config'


// Process params (+ specific Docker containers)
process {

    // The maximum number of process instances that can be executed in parallel
    // maxForks = params.max_cpus - 1


    //// Process-specific config

    // Occurrence filtering, stage I
    withName:occ_filter{
        cpus = 4
    }

    // Counting the total number of records per H3 cell
    withName:record_count{
        cpus = 4
    }

    // Prepare a list of DOIs for the GBIF datasets
    withName:derived_datasets {
        cpus = 4
    }

    // Outlier filtering, stage II - without DBSCAN, all low abundant species
    withName:outl_low{
        cpus = 4
    }

    // Outlier filtering, stage II - with DBSCAN, independently by species
    withName:outl_high{
        cpus = 1
    }

    // Prepare a list of OpenTree IDs
    withName:prep_ott_ids{
        cpus = 1
    }

    // Get an induced subtree from OpenTree
    withName:get_ott_tree{
        cpus = 1
    }

    // Merge filtered species occurrences and prep data for Biodiverse
    withName:merge_occ{
        cpus = 4
    }

    // Create Biodiverse input files
    // and estimate observed phylogenetic diversity
    withName:prep_biodiv {
        cpus = 1
    }

    // Prepare a shapefile to spatially constrain randomizations
    withName:prep_shapefile {
        cpus = 1
    }

    // Perform Biodiverse randomizations
    withName:phylodiv {
        cpus = 1
    }

   // Perform spatially-constrained randomizations
    withName:phylodiv_constrianed {
        cpus = 1
    }

    // Prepare a file with paths of `phylodiv` output
    withName:rand_filelist {
        cpus = 1
    }

    // Aggregate randomization results
    withName:aggregate_rnds_biodiv {
        cpus = 1
    }

    // Export Biodiverse results into CSV
    withName:div_to_csv {
        cpus = 1
    }

    // Plot PD indices (non-interactive maps)
    withName:plot_pd {
        cpus = 1
    }

    // Plot PD indices (interactive map - Leaflet-based choropleth)
    withName:plot_leaflet {
        cpus = 1
    }

}  // end of process configs


// // Executor configs
// executor {
//
//   // The number of tasks the executor will handle in a parallel manner (default: 100
//   queueSize = 100
//
//   // executor.cpus define the max cpus that can be used by all processes
//   name = 'local'
//   // cpus = 8
//   cpus = ${params.max_cpus}
//   memory = '150 GB'
//
//   $local {
//       max_cpus = 8
//       max_memory = '32 GB'
//   }
// }


profiles {

    // By convention the standard profile is implicitly used when no other profile is specified by the user
    // standard {
    //     docker.enabled = true
    // }

    // Test config (built-in data, custom phylogenetic tree)
    test  { includeConfig 'conf/test.config' }

    // Test config (built-in data, tree from Open Tree of Life)
    test_ott  { includeConfig 'conf/test_ott.config' }

    // Docker-based profile
    docker {
        docker.enabled         = true
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false

        // Container specifications are here
        includeConfig 'conf/docker.config'
    }

    // Singularity-based profile
    singularity {
        singularity.enabled    = true
        singularity.autoMounts = true
        docker.enabled         = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false

        // Container specifications are here
        includeConfig 'conf/singularity.config'
    }

    // Azure Batch from the command line
    // azurebatch { includeConfig 'conf/azure_batch_cli.config' }

    // Azure Batch service
    // azurebatch { includeConfig 'conf/azure_batch.config' }

    // Azure Kubernetes servise (AKS)
    azurekube { includeConfig 'conf/azure_kubernetes.config' }

    // Debbuging profile
    debug { process.beforeScript = 'echo $HOSTNAME' }

    // Conda-based profile (currently not implemented, Biodiverse is not on conda)
    conda {
        params.enable_conda    = true
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
    }

    podman {
        podman.enabled         = true
        docker.enabled         = false
        singularity.enabled    = false
        shifter.enabled        = false
        charliecloud.enabled   = false
    }
    shifter {
        shifter.enabled        = true
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        charliecloud.enabled   = false
    }
    charliecloud {
        charliecloud.enabled   = true
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
    }

}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
env {
    PYTHONNOUSERSITE = 1
    R_PROFILE_USER   = "/.Rprofile"
    R_ENVIRON_USER   = "/.Renviron"
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']


def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
timeline {
    enabled = true
    file    = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
}
report {
    enabled = true
    file    = "${params.tracedir}/execution_report_${trace_timestamp}.html"
}
trace {
    enabled = true
    file    = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
}
dag {
    enabled = true
    file    = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg"
}


manifest {
    name            = 'PhyloNext'
    author          = 'Vladimir Mikryukov, Kessy Abarenkov, Shawn Laffan'
    homePage        = 'https://github.com/vmikk/PhyloNext'
    description     = 'GBIF phylogenetic diversity pipeline'
    mainScript      = 'main.nf'
    nextflowVersion = '>=22.10.0'
    version         = '1.4.2'
}


// Load modules.config for DSL2 module specific options
// includeConfig 'conf/modules.config'


// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}