feat: remove old jecs & retrieve lumi from web

fix: rename campaign scheme feat: restructure metadata
Ming-Yan · Oct 23, 2024 · 0c54a20 · 0c54a20
1 parent b3908b3
commit 0c54a20
Show file tree

Hide file tree

Showing 1,166 changed files with 3,346 additions and 323,040 deletions.
diff --git a/metadata/94X_doublemu_PFNano.json → metadata/94X/94X_doublemu_PFNano.json b/metadata/94X_doublemu_PFNano.json → metadata/94X/94X_doublemu_PFNano.json
diff --git a/metadata/94X_singlemu_PFNano.json → metadata/94X/94X_singlemu_PFNano.json b/metadata/94X_singlemu_PFNano.json → metadata/94X/94X_singlemu_PFNano.json
diff --git a/metadata/ctag_DY_mu_PFNano.json → metadata/94X/ctag_DY_mu_PFNano.json b/metadata/ctag_DY_mu_PFNano.json → metadata/94X/ctag_DY_mu_PFNano.json
diff --git a/metadata/nano_UL17_test.json → metadata/94X/nano_UL17_test.json b/metadata/nano_UL17_test.json → metadata/94X/nano_UL17_test.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...E_Run3_2022_BTV_Comm_v2_NanoV12_noPF.json → ...E_Run3_2022_BTV_Comm_v2_NanoV12_noPF.json b/...E_Run3_2022_BTV_Comm_v2_NanoV12_noPF.json → ...E_Run3_2022_BTV_Comm_v2_NanoV12_noPF.json
diff --git a/...EE_Run3_2022_qcd_BTV_Comm_v2_NanoV12.json → ...EE_Run3_2022_qcd_BTV_Comm_v2_NanoV12.json b/...EE_Run3_2022_qcd_BTV_Comm_v2_NanoV12.json → ...EE_Run3_2022_qcd_BTV_Comm_v2_NanoV12.json
diff --git a/..._Run3_2022_qcdmu_BTV_Comm_v2_NanoV12.json → ..._Run3_2022_qcdmu_BTV_Comm_v2_NanoV12.json b/..._Run3_2022_qcdmu_BTV_Comm_v2_NanoV12.json → ..._Run3_2022_qcdmu_BTV_Comm_v2_NanoV12.json
diff --git a/...mmer22_2022_BTV_Comm_v2_NanoV12_noPF.json → ...mmer22_2022_BTV_Comm_v2_NanoV12_noPF.json b/...mmer22_2022_BTV_Comm_v2_NanoV12_noPF.json → ...mmer22_2022_BTV_Comm_v2_NanoV12_noPF.json
diff --git a/...22_Run3_2022_qcd_BTV_Comm_v2_NanoV12.json → ...22_Run3_2022_qcd_BTV_Comm_v2_NanoV12.json b/...22_Run3_2022_qcd_BTV_Comm_v2_NanoV12.json → ...22_Run3_2022_qcd_BTV_Comm_v2_NanoV12.json
diff --git a/..._Run3_2022_qcdmu_BTV_Comm_v2_NanoV12.json → ..._Run3_2022_qcdmu_BTV_Comm_v2_NanoV12.json b/..._Run3_2022_qcdmu_BTV_Comm_v2_NanoV12.json → ..._Run3_2022_qcdmu_BTV_Comm_v2_NanoV12.json
diff --git a/...mmer22EE_2022_em_BTV_Comm_v1_NanoV12.json → ...mmer22EE_2022_em_BTV_Comm_v1_NanoV12.json b/...mmer22EE_2022_em_BTV_Comm_v1_NanoV12.json → ...mmer22EE_2022_em_BTV_Comm_v1_NanoV12.json
diff --git a/...Summer22_2022_mu_BTV_Comm_v1_NanoV12.json → ...Summer22_2022_mu_BTV_Comm_v1_NanoV12.json b/...Summer22_2022_mu_BTV_Comm_v1_NanoV12.json → ...Summer22_2022_mu_BTV_Comm_v1_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2022_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json b/...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json → ...BTV_Run3_2023_Comm_MINIAODv4_NanoV12.json
diff --git a/...ta/MC_Winter22_BTV_Run3_2022_Comm_v1.json → ...22/MC_Winter22_BTV_Run3_2022_Comm_v1.json b/...ta/MC_Winter22_BTV_Run3_2022_Comm_v1.json → ...22/MC_Winter22_BTV_Run3_2022_Comm_v1.json
diff --git a/...a_Winter22_emu_BTV_Run3_2022_Comm_v1.json → ...a_Winter22_emu_BTV_Run3_2022_Comm_v1.json b/...a_Winter22_emu_BTV_Run3_2022_Comm_v1.json → ...a_Winter22_emu_BTV_Run3_2022_Comm_v1.json
diff --git a/...ta_Winter22_mu_BTV_Run3_2022_Comm_v1.json → ...ta_Winter22_mu_BTV_Run3_2022_Comm_v1.json b/...ta_Winter22_mu_BTV_Run3_2022_Comm_v1.json → ...ta_Winter22_mu_BTV_Run3_2022_Comm_v1.json
diff --git a/..._Winter22_mumu_BTV_Run3_2022_Comm_v1.json → ..._Winter22_mumu_BTV_Run3_2022_Comm_v1.json b/..._Winter22_mumu_BTV_Run3_2022_Comm_v1.json → ..._Winter22_mumu_BTV_Run3_2022_Comm_v1.json
diff --git a/metadata/MC_Winter24_MC_NanoAODv13.json → ...a/Winter24/MC_Winter24_MC_NanoAODv13.json b/metadata/MC_Winter24_MC_NanoAODv13.json → ...a/Winter24/MC_Winter24_MC_NanoAODv13.json
diff --git a/metadata/data_Winter24_emu_NanoAODv13.json → ...inter24/data_Winter24_emu_NanoAODv13.json b/metadata/data_Winter24_emu_NanoAODv13.json → ...inter24/data_Winter24_emu_NanoAODv13.json
diff --git a/metadata/data_Winter24_mu_NanoAODv13.json → ...Winter24/data_Winter24_mu_NanoAODv13.json b/metadata/data_Winter24_mu_NanoAODv13.json → ...Winter24/data_Winter24_mu_NanoAODv13.json
diff --git a/metadata/data_Summer22EE_2022_e_BTV_Run3_2022_Comm_v3_NanoV12.json b/metadata/data_Summer22EE_2022_e_BTV_Run3_2022_Comm_v3_NanoV12.json
diff --git a/scripts/suball.py b/scripts/suball.py
@@ -1,6 +1,7 @@
 import os, argparse
 from BTVNanoCommissioning.workflows import workflows
 from BTVNanoCommissioning.utils.sample import predefined_sample
+from BTVNanoCommissioning.utils.AK4_parameters import correction_config
 import os, sys, inspect
 
 current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
@@ -10,6 +11,42 @@
 from runner import config_parser, scaleout_parser, debug_parser
 
 
+# Get lumi
+def get_lumi_from_web(year):
+    import requests
+    import re
+
+    year = str(year)
+    # Define the URL of the directory
+    url = (
+        f"https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions{year[2:]}/"
+    )
+
+    # Send a request to fetch the HTML content of the webpage
+    response = requests.get(url)
+    html_content = response.text
+
+    # Use regex to find all href links that contain 'Golden.json' but do not contain 'era'
+    # Ensures it only captures the URL part within href="..." and not any other content.
+    goldenjson_files = re.findall(r'href="([^"]*Golden\.json[^"]*)"', html_content)
+
+    # Filter out any matches that contain 'era' in the filename
+    goldenjson_files = [file for file in goldenjson_files if "era" not in file]
+
+    # If there are any such files, find the latest one (assuming the files are sorted lexicographically)
+    if goldenjson_files:
+        latest_file = sorted(goldenjson_files)[
+            -1
+        ]  # Assuming lexicographical sorting works for the dates
+        os.system(f"wget {url}/{latest_file}")
+        os.system(f"mv {latest_file} src/BTVNanoCommissioning/data/lumiMasks/.")
+        return latest_file
+    else:
+        raise (
+            f"No files for Year{year} containing 'Golden.json' (excluding 'era') were found."
+        )
+
+
 ### Manage workflow in one script
 # EXAMPLE: python scripts/suball.py --scheme default_comissioning --campaign Summer23  --DAS_campaign "*Run2023D*Sep2023*,*Run3Summer23BPixNanoAODv12-130X*" --year 2023
 # prerequest a new campaign should create a entry in AK4_parameters.py
@@ -26,7 +63,7 @@
     parser.add_argument(
         "-sc",
         "--scheme",
-        default="CAMPAIGN_prompt_dataMC",
+        default="Validation",
         choices=list(workflows.keys()) + ["Validation", "SF", "default_comissioning"],
         help="Choose the function for dump luminosity(`lumi`)/failed files(`failed`) into json",
     )
@@ -43,6 +80,11 @@
         action="store_true",
         help="not transfered to https://btvweb.web.cern.ch/Commissioning/dataMC/",
     )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Run local debug test with small set of dataset with iterative executor",
+    )
 
     args = parser.parse_args()
     # summarize diffeerent group for study
@@ -61,11 +103,31 @@
             # "QCD_mu_sf"
         ],
     }
+    if args.debug:
+        args.local = True
     if args.scheme in workflows.keys():
         workflow_group["test"] = [args.scheme]
         args.scheme = "test"
+    # Check lumiMask exists and replace the Validation
+    input_lumi_json = correction_config[args.campaign]["lumiMask"]
+    if args.campaign != "prompt_dataMC" and not os.path.exists(
+        f"src/BTVNanoCommissioning/data/lumiMasks/{input_lumi_json}"
+    ):
+        raise f"src/BTVNanoCommissioning/data/lumiMasks/{input_lumi_json} not exist"
+
+    if (
+        args.campaign == "prompt_dataMC"
+        and correction_config[args.campaign]["lumiMask"] == "$PROMPT_DATAMC"
+    ):
+        input_lumi_json = get_lumi_from_web(args.year)
+        os.system(
+            f"sed -i 's/$PROMPT_DATAMC/{input_lumi_json}/g' src/BTVNanoCommissioning/utils/AK4_parameters.py"
+        )
+        print(f"======>{input_lumi_json} is used for {args.year}")
 
     for wf in workflow_group[args.scheme]:
+        if args.debug:
+            print(f"Start running {wf} workflow!!!")
         overwrite = "--overwrite" if args.overwrite else ""
         ## creating dataset
         if (
@@ -100,6 +162,7 @@
                         "DAS_campaign",
                         "version",
                         "local",
+                        "debug",
                     ]:
                         continue
                     if key in [
@@ -112,12 +175,24 @@
                         if value == True:
                             runner_config += f" --{key}"
                     elif value is not None:
-                        if "Validation" == args.scheme and types == "MC":
+                        if (
+                            "Validation" == args.scheme
+                            and types == "MC"
+                            and "limit" not in key
+                        ):
                             runner_config += " --limit 50"
+                        elif args.debug:
+                            runner_config += " --limit 1 --executor iterative"
                         else:
                             runner_config += f" --{key}={value}"
                 runner_config = runner_config_required + runner_config
                 print(runner_config)
+                with open(
+                    f"config_{args.year}_{args.campaign}_{args.scheme}_{args.version}.txt",
+                    "w",
+                ) as config_list:
+                    config_list.write(runner_config)
+
                 os.system(runner_config)
 
         # Get luminosity
@@ -174,3 +249,8 @@
                 raise Exception(
                     f"No input coffea hists_{wf}_data_{args.campaign}_{args.year}_{wf}/hists_{wf}_data_{args.campaign}_{args.year}_{wf}.coffea"
                 )
+    # revert prompt_dataMC lumimask
+    if args.campaign == "prompt_dataMC":
+        os.system(
+            f"sed -i 's/{input_lumi_json}/$PROMPT_DATAMC/g' src/BTVNanoCommissioning/utils/AK4_parameters.py"
+        )