From 1f42d4b844552b366c7c08bb333e676e3cfc5a4f Mon Sep 17 00:00:00 2001 From: Eric Charles Date: Thu, 12 Oct 2023 15:17:49 -0700 Subject: [PATCH] Added example specification files --- examples/bps_config_template.yaml | 9 ++ examples/bps_script_template.sh | 20 ++++ examples/config_dc2_steps.yaml | 121 ++++++++++++++++++++++ examples/config_dc2_test_med.yaml | 85 +++++++++++++++ examples/config_standard_elements.yaml | 137 +++++++++++++++++++++++++ examples/config_standard_scripts.yaml | 66 ++++++++++++ examples/manifest_script_template.sh | 13 +++ examples/notes.txt | 29 ++++++ 8 files changed, 480 insertions(+) create mode 100644 examples/bps_config_template.yaml create mode 100644 examples/bps_script_template.sh create mode 100644 examples/config_dc2_steps.yaml create mode 100644 examples/config_dc2_test_med.yaml create mode 100644 examples/config_standard_elements.yaml create mode 100644 examples/config_standard_scripts.yaml create mode 100644 examples/manifest_script_template.sh create mode 100644 examples/notes.txt diff --git a/examples/bps_config_template.yaml b/examples/bps_config_template.yaml new file mode 100644 index 000000000..b7443bf55 --- /dev/null +++ b/examples/bps_config_template.yaml @@ -0,0 +1,9 @@ +includeConfigs: +- ${CTRL_BPS_PANDA_DIR}/config/bps_usdf.yaml +- ${CM_PROD_DIR}/src/lsst/cm/prod/configs/HSC/test/requestMemory.yaml + +executionButler: + requestMemory: 64000 + queue: "SLAC_Rubin_Merge" + +numberOfRetries: 3 diff --git a/examples/bps_script_template.sh b/examples/bps_script_template.sh new file mode 100644 index 000000000..e5e14d4d0 --- /dev/null +++ b/examples/bps_script_template.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env -S -i CM_PROD_DIR="${CM_PROD_DIR}" HOME="${HOME}" bash + +# The shebang lines above are needed b/c setup lsst_distrib in putting +# the lsst python _after_ the virtual env python in the PATH, which +# is causing errors + +# setup LSST env. +export WEEKLY='{lsst_version}' +source /cvmfs/sw.lsst.eu/linux-x86_64/lsst_distrib/${WEEKLY}/loadLSST.bash +setup lsst_distrib + +# setup PanDA env. +latest_panda=$(ls -td /cvmfs/sw.lsst.eu/linux-x86_64/panda_env/v* | head -1) +setupScript=${latest_panda}/setup_panda_s3df.sh +source $setupScript ${WEEKLY} + +env | grep PANDA + +# let's drop a panda_auth status here for kicks +panda_auth status diff --git a/examples/config_dc2_steps.yaml b/examples/config_dc2_steps.yaml new file mode 100644 index 000000000..04f75f58f --- /dev/null +++ b/examples/config_dc2_steps.yaml @@ -0,0 +1,121 @@ +# This is an example of the template specification file for Steps used in DC2 campaigns + +# Import the common element templates +- import: "${CM_CONFIGS}/config_standard_elements.yaml" +# Now define the steps +- SpecBlock: + name: dc2_step1 + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#step1" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: split_by_query + split_dataset: raw + split_field: exposure +- SpecBlock: + name: dc2_step2 + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#step2" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: no_split +- SpecBlock: + name: dc2_step3 + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#step3" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: split_by_vals + split_field: tract +- SpecBlock: + name: dc2_step4 + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#step4" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: split_by_query + split_dataset: calexp + split_field: visit +- SpecBlock: + name: dc2_step5 + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#step5" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: split_by_vals + split_field: tract +- SpecBlock: + name: dc2_step6 + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#step6" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: split_by_query + split_dataset: calexp + split_field: visit +- SpecBlock: + name: dc2_step7 + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#step7" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: no_split +- SpecBlock: + name: dc2_step8 + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#step8" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: no_split +- SpecBlock: + name: dc2_faro_visit + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#faro_visit" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: no_split +- SpecBlock: + name: dc2_faro_matched + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#faro_matched" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: no_split +- SpecBlock: + name: dc2_faro_tract + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#faro_tract" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: no_split +- SpecBlock: + name: dc2_plots + includes: ['step'] + data: + pipeline_yaml: "${DRP_PIPE_DIR}/pipelines/LSSTCam-imSim/DRP-test-med-1.yaml#analysis_coadd_plots" + child_config: + spec_block: group + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_method: no_split diff --git a/examples/config_dc2_test_med.yaml b/examples/config_dc2_test_med.yaml new file mode 100644 index 000000000..c6320acd3 --- /dev/null +++ b/examples/config_dc2_test_med.yaml @@ -0,0 +1,85 @@ +# This is an example of the campaign level template specification file for dc2-test-med type campaigns + +# Import the Step templates for DC2 +- import: "${CM_CONFIGS}/config_dc2_steps.yaml" +# Now build the Campaign Template for DC2 test med +- SpecBlock: + name: d2c_campaign + # Define collections associated with the campaign + collections: + root: 'cm/dc2_test_med' + # Define the steps, their connections and override parameters as needed + child_config: + step1: + spec_block: dc2_step1 + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + split_min_groups: 3 + step2: + spec_block: dc2_step2 + prerequisites: ['step1'] + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + step3: + spec_block: dc2_step3 + prerequisites: ['step2'] + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2'" + split_vals: + - 3828 + - 3829 + step4: + spec_block: dc2_step4 + prerequisites: ['step3'] + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + split_min_groups: 4 + step5: + spec_block: dc2_step5 + prerequisites: ['step4'] + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + split_vals: + - 3828 + - 3829 + step6: + spec_block: dc2_step6 + prerequisites: ['step4'] + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + split_min_groups: 4 + step7: + spec_block: dc2_step7 + prerequisites: ['step3'] + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + step8: + spec_block: dc2_step8 + prerequisites: ['step3'] + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + faro_visit: + spec_block: dc2_faro_visit + prerequisites: ['step6'] + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + faro_matched: + spec_block: dc2_faro_matched + prerequisites: ['step6'] + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + faro_tract: + spec_block: dc2_faro_tract + prerequisites: ['step3'] + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + plots: + spec_block: dc2_plots + prerequisites: ['step3'] + child_config: + base_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + # Campaign level paramters + data: + butler_repo: '/repo/dc2' + data_query: "instrument='LSSTCam-imSim' and skymap='DC2' and tract in (3828, 3829)" + lsst_version: "${WEEKLY}" diff --git a/examples/config_standard_elements.yaml b/examples/config_standard_elements.yaml new file mode 100644 index 000000000..d3d09c541 --- /dev/null +++ b/examples/config_standard_elements.yaml @@ -0,0 +1,137 @@ +# This is an example of the template specification file for the standard cm-service processing setup +# for 'Elements', i.e., Jobs, Group, Steps, Campaign + + +# Import the common script templates +- import: "${CM_CONFIGS}/config_standard_scripts.yaml" +# Now define the templates for the standard elements +- SpecBlock: + # Job Template, runs a single workflow + name: job + handler: lsst.cmservice.handlers.job_handler.JobHandler + collections: + # This is the output RUN collection for the Job + job_run: "{root}/{campaign}/{step}/{group}/{job}" + scripts: + # The bps script and configuration + - Script: + name: bps + spec_block: panda_script + collections: + run: "{job_run}" + inputs: ["{step_input}", "{campaign_input}", "{campaign_ancillary}"] + # Uses bps report to collect information on the bps job and know when it is done + - Script: + name: bps_report + spec_block: panda_report_script + prerequisites: ['bps'] + collections: + run: "{job_run}" + inputs: ["{step_input}", "{campaign_input}", "{campaign_ancillary}"] + # Uses manifest checker to gather information about the workflow + - Script: + name: manifest_report + spec_block: manifest_report_script + prerequisites: ['bps_report'] + collections: + run: "{job_run}" + data: + rescue: false +- SpecBlock: + # Group Template, runs a group, hopefully using a single job to do so + name: group + handler: lsst.cmservice.handlers.element_handler.ElementHandler + collections: + group_output: "{root}/{campaign}/{step}/{group}" + group_validation: "{root}/{campaign}/{step}/{group}/validate" + scripts: + - Script: + name: run + spec_block: run_jobs + child_config: + spec_block: job +- SpecBlock: + name: step + handler: lsst.cmservice.handlers.element_handler.ElementHandler + # Define collections assocated to the step + collections: + step_input: "{root}/{campaign}/{step}/input" + step_output: "{root}/{campaign}/{step}_ouput" + step_public_output: "{root}/{campaign}/{step}" + step_validation: "{root}/{campaign}/{step}/validate" + scripts: + # Prepare the step by building input collection + - Script: + name: prepare + spec_block: prepare_step_script + collections: + output: "{step_input}" + inputs: ["{campaign_input}", "{campaign_ancillary}"] + # Prepare the groups for this step + - Script: + name: run + prerequisites: ['prepare'] + spec_block: run_groups + # Build the step output collection by chaining the job RUN collections + - Script: + name: collect_groups + prerequisites: ['run'] + spec_block: chain_collect_jobs_script + collections: + inputs: [] + output: "{step_output}" + # Build the 'public' step output collection by also chaining in the campagin inputs + - Script: + name: make_step_public_output + prerequisites: ['collect_groups'] + spec_block: chain_create_script + collections: + inputs: ["{step_output}", "{campaign_input}", "{campaign_ancillary}"] + output: "{step_public_output}" +# Now build the generic Campaign Template +- SpecBlock: + name: campaign + handler: lsst.cmservice.handlers.element_handler.ElementHandler + # Define collections associated with the campaign + collections: + campaign_source: /prod/raw/all + campaign_input: "{root}/{campaign}/input" + campaign_output: "{root}/{campaign}" + campaign_ancillary: "{root}/{campaign}/ancillary" + campaign_validation: "{root}/{campaign}/validate" + # Define scripts associate with the campaign + scripts: + # Tag the inputs for the campaign + - Script: + name: tag_inputs + spec_block: tag_inputs_script + collections: + input: "{campaign_source}" + output: "{campaign_input}" + # Create the ancillary chain for the campaign + - Script: + name: ancillary + spec_block: chain_create_script + collections: + inputs: + - calib_input + - other_calib_input + output: "{campaign_ancillary}" + # Run the steps of the campaign + - Script: + name: run + spec_block: run_steps + prerequisites: ['tag_inputs', 'ancillary'] + # Build the chained collection for the campaign + - Script: + name: collect_steps + prerequisites: ['run'] + spec_block: chain_collect_steps_script + collections: + inputs: [] + output: "{campaign_output}" + data: + prod_area: 'output/archive' + bps_yaml_template: "${CM_CONFIGS}/bps_config_template.yaml" + bps_script_template: "${CM_CONFIGS}/bps_script_template.sh" + manifest_script_template: "${CM_CONFIGS}/manifest_script_template.sh" diff --git a/examples/config_standard_scripts.yaml b/examples/config_standard_scripts.yaml new file mode 100644 index 000000000..9e6376515 --- /dev/null +++ b/examples/config_standard_scripts.yaml @@ -0,0 +1,66 @@ +# create an empty CHAINED collection +- SpecBlock: + name: chain_create_script + handler: lsst.cmservice.handlers.scripts.ChainCreateScriptHandler +# prepend to a CHAINED collection +- SpecBlock: + name: chain_prepend_script + handler: lsst.cmservice.handlers.scripts.ChainPrependScriptHandler +# collect Job RUN collections into a CHAINED collection +- SpecBlock: + name: chain_collect_jobs_script + handler: lsst.cmservice.handlers.scripts.ChainCollectScriptHandler + data: + collect: jobs +# collect Step CHAINED collections into a CHAINED collections +- SpecBlock: + name: chain_collect_steps_script + handler: lsst.cmservice.handlers.scripts.ChainCollectScriptHandler + data: + collect: steps +# make a TAGGED collection with a query +- SpecBlock: + name: tag_inputs_script + handler: lsst.cmservice.handlers.scripts.TagInputsScriptHandler +# make an empty TAGGED collection +- SpecBlock: + name: tag_create_script + handler: lsst.cmservice.handlers.scripts.TagCreateScriptHandler +# add datasets to an existing TAGGED collection +- SpecBlock: + name: tag_associate_script + handler: lsst.cmservice.handlers.scripts.TagAssociateScriptHandler +# Prepare a step by making an input collection +- SpecBlock: + name: prepare_step_script + handler: lsst.cmservice.handlers.scripts.PrepareStepScriptHandler + collections: + global_inputs: "{campaign_input}" +# Run a validation script (probably a query into sasquatch) +- SpecBlock: + name: validate_script + handler: lsst.cmservice.handlers.scripts.ValidateScriptHandler +# Run a bps submit script +- SpecBlock: + name: bps_submit_script + handler: lsst.cmservice.handlers.jobs.BpsSubmitScriptHandler +# Run a bps report script +- SpecBlock: + name: bps_report_script + handler: lsst.cmservice.handlers.jobs.BpsReportScriptHandler +# Run a manifest checker report script +- SpecBlock: + name: manifest_report_script + handler: lsst.cmservice.handlers.jobs.ManifestReportScriptHandler +# Create and run Jobs associated to a Group +- SpecBlock: + name: run_jobs + handler: lsst.cmservice.handlers.elements.RunJobsScriptHandler +# Create and run Group associated to a Step +- SpecBlock: + name: run_groups + handler: lsst.cmservice.handlers.elements.RunGroupsScriptHandler +# Create and run Steps associated to a Campaign +- SpecBlock: + name: run_steps + handler: lsst.cmservice.handlers.elements.RunStepsScriptHandler diff --git a/examples/manifest_script_template.sh b/examples/manifest_script_template.sh new file mode 100644 index 000000000..894a39729 --- /dev/null +++ b/examples/manifest_script_template.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env -S -i CM_PROD_DIR="${CM_PROD_DIR}" HOME="${HOME}" bash + +# The shebang lines above are needed b/c setup lsst_distrib in putting +# the lsst python _after_ the virtual env python in the PATH, which +# is causing errors + +# setup LSST env. +export WEEKLY='{lsst_version}' +source /cvmfs/sw.lsst.eu/linux-x86_64/lsst_distrib/${WEEKLY}/loadLSST.bash +setup lsst_distrib + +# setup cm service. +setup -j -r ${CM_SERVICE_DIR} diff --git a/examples/notes.txt b/examples/notes.txt new file mode 100644 index 000000000..293cdd08e --- /dev/null +++ b/examples/notes.txt @@ -0,0 +1,29 @@ + +# Required CM setup for now +export CM_SERVICE_DIR= +export CM_CONFIGS= + + +# Initial setup of the campaign template. This only happen once per campaign type + +# Create a production as a namespace +cm-service add production --name dc2_test_med + +# Load the related specification. +cm-service load specification --yaml_file examples/config_dc2_test_med.yaml --production_name dc2_test_med --spec_name v0 --set-as-default + +# If you want a new version of the campaign (say because you added a step, you can create a new specification) +cm-service load specification --yaml_file examples/config_dc2_test_med.yaml --production_name dc2_test_med --spec_name v1 --set-as-default + + +# Creating an launching campagin, happens many times + + +# Create a specific instance of a campaign +cm-service add campaign --parent_name dc2_test_med --name w_2023_38 --override_data "lsst_weekly=w_2023_38" # use the default specification for this production, in this case v1 + +cm-service add campaign --parent_name dc2_test_med --name w_2023_37 --spec_name v0 --override_data "lsst_weekly=w_2023_37" # use a non-default specification for this production + + +# Put a campaign in the processing queue +cm-service queue add --fullname dc2_test_med/w_2023_38