diff --git a/README.md b/README.md
index dbdf4e3..7565dbd 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ Further general loading notes can be found in this [Notion page](https://www.not
 See [below](#collaborative-and-publication-workflows) for special cases like publications or collaborative efforts
 ## I have everything and I know I am doing
 Below assumes you have already created the necessary tables from dbt
-1. Run commands as outlined in `scripts/get_study_metadata.py`. Copy/move those files to the cBio loader ec2 instance
+1. Run commands as outlined in [scripts/get_study_metadata.py](#scriptsget_study_metadatapy). Copy/move those files to the cBio loader ec2 instance
 1. Copy over the appropriate aws account key and download files. Example using `pbta_all` study:
 
    ```sh
@@ -107,6 +107,8 @@ optional arguments:
                         ini profile name
   -c CONFIG_FILE, --config CONFIG_FILE
                         json config file with meta information; see REFS/pbta_all_case_meta_config.json example
+  -r REF_DIR, --ref-dir REF_DIR
+                        dir name containing template data_clinical* header files
 ```
 
 ### From D3b Warehouse
diff --git a/REFS/aws_bucket_key_pairs.txt b/REFS/aws_bucket_key_pairs.txt
index 82ac5d6..ebd78a8 100644
--- a/REFS/aws_bucket_key_pairs.txt
+++ b/REFS/aws_bucket_key_pairs.txt
@@ -2,4 +2,5 @@ s3://cds-246-phs002517-p30-fy20	NCI-AR
 s3://cds-246-phs002517-sequencefiles-p30-fy20	NCI-AR
 s3://cds-306-phs002517-x01	NCI-X01
 s3://d3b-cds-working-bucket	d3b
+s3://kf-strides-study-us-east-1-prd-sd-bhjxbdqk	kf
 s3://kf-study-us-east-1-prd-sd-8y99qzjj	saml
\ No newline at end of file
diff --git a/REFS/template_patient_header.txt b/REFS/template_patient_header.txt
new file mode 100644
index 0000000..3f9e8d7
--- /dev/null
+++ b/REFS/template_patient_header.txt
@@ -0,0 +1,5 @@
+#Patient Identifier	External Patient Identifier	SEX	RACE	ETHNICITY	AGE	AGE_IN_DAYS	OS_STATUS	OS_MONTHS	EFS_MONTHS	EFS_STATUS	germline_sex_estimate	cancer_predispositions
+#Patient identifier	Patient ID used by generator of data	Sex of the patient	racial demographic	ethnic demographic	Age at which the condition or disease was first diagnosed, in years	Patient age in days at initial diagnosis	Overall patient survival status	Overall survival in months since initial diagnosis	Event free (months) since initial treatment	Event free status	germline sex estimate	cancer predispositions
+#STRING	STRING	STRING	STRING	STRING	NUMBER	NUMBER	STRING	NUMBER	NUMBER	STRING	STRING	STRING
+#11	10	9	5	4	8	7	3	2	1	1	1	2
+PATIENT_ID	EXTERNAL_PATIENT_ID	SEX	RACE	ETHNICITY	AGE	AGE_IN_DAYS	OS_STATUS	OS_MONTHS	EFS_MONTHS	EFS_STATUS	GERMLINE_SEX_ESTIMATE	CANCER_PREDISPOSITIONS
diff --git a/REFS/template_sample_header.txt b/REFS/template_sample_header.txt
new file mode 100644
index 0000000..c586dc0
--- /dev/null
+++ b/REFS/template_sample_header.txt
@@ -0,0 +1,5 @@
+#Patient Identifier	COLLECTION_EVENT_ID	Sample Identifier	SPECIMEN_ID	CANCER_TYPE	CANCER_TYPE_DETAILED	ONCOTREE_CODE	TUMOR_TISSUE_SITE	TUMOR_TYPE	SAMPLE_TYPE	MATCHED_NORMAL_SAMPLE_ID	MATCHED_NORMAL_SPECIMEN_ID	CBTN_TUMOR_TYPE	MOLECULAR_SUBTYPE	HARMONIZED_DIAGNOSIS	BROAD_HISTOLOGY	CANCER_GROUP	EXPERIMENT_STRATEGY	pathology_free_text_diagnosis	tumor_fraction	tumor_ploidy	CNS_region
+#Patient identifier	Unifying ID of child sequencing events from a single biological sample	Sample Identifier using external_sample_id	kfdrc tumor biopsecimen ID	Study-defined cancer type	Study-defined cancer type detail	OncoTree alphanumeric code value for CANCER_TYPE_DETAILED	tumor tissue location	primary v metastatic tumor designation	patient tissue sample or cell line	matched normal external_sample_id	kfdrc matched normal biospecimen ID	CBTN-assigned tumor type	contains molecular subtypes for tumor types selected from pathology_diagnosis and pathology_free_text_diagnosis fields, following World Health Organization 2016 classification criteria	Disease with molecular subtype and grade	broad histology	cancer group	sequencing strategies of sample represented by this SAMPLE_ID	pathology free text diagnosis	tumor fraction	tumor ploidy	CNS region
+#STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	STRING	NUMBER	NUMBER	STRING
+#14	3	13	12	11	10	9	8	7	6	2	1	7	5	5	5	5	6	1	3	4	5
+PATIENT_ID	COLLECTION_EVENT_ID	SAMPLE_ID	SPECIMEN_ID	CANCER_TYPE	CANCER_TYPE_DETAILED	ONCOTREE_CODE	TUMOR_TISSUE_TYPE	TUMOR_TYPE	SAMPLE_TYPE	MATCHED_NORMAL_SAMPLE_ID	MATCHED_NORMAL_SPECIMEN_ID	CBTN_TUMOR_TYPE	MOLECULAR_SUBTYPE	HARMONIZED_DIAGNOSIS	BROAD_HISTOLOGY	CANCER_GROUP	EXPERIMENT_STRATEGY	PATHOLOGY_FREE_TEXT_DIAGNOSIS	TUMOR_FRACTION	TUMOR_PLOIDY	CNS_REGION
diff --git a/STUDY_CONFIGS/pbta_all_case_meta_config.json b/STUDY_CONFIGS/pbta_all_case_meta_config.json
index ff39f76..7638316 100644
--- a/STUDY_CONFIGS/pbta_all_case_meta_config.json
+++ b/STUDY_CONFIGS/pbta_all_case_meta_config.json
@@ -25,7 +25,7 @@
                         "out_file": "Desired output file name"
                     }
                 },
-                "x_head": "Special header file table for data_clinical(sample/patient). cBio data_clinical headers have 5 header rows, and which columns are used are determined by the x_file table",
+                "x_head": "Special header tsv file for data_clinical(sample/patient). cBio data_clinical headers have 5 header rows, and which columns are used are determined by the x_file table. Should be just the file name",
                 "x_file": "sample or patient tables with corresponding metadata at the sample and patient levels",
                 "genomics_etl": "a helper file with relevant cBio sample names and individual genomic files names for ETL merging",
                 "seq_center": "only if project has RNA data, a helper file to fill in missing sequencing center information for genomics etl",
@@ -229,6 +229,11 @@
                 "file_type": ["RSEM_gene","annofuse_filtered_fusions_tsv","annotated_public_outputs","ctrlfreec_pval","ctrlfreec_info","ctrlfreec_bam_seg"],
                 "out_file": "x01_genomics_file_manifest.txt"
             },
+            "cbtn_extra": {
+                "table": "bix_genomics_file.sd_bhjxbdqk_x01_extra-genomics_file_manifest",
+                "file_type": ["RSEM_gene","annofuse_filtered_fusions_tsv","annotated_public_outputs","ctrlfreec_pval","ctrlfreec_info","ctrlfreec_bam_seg"],
+                "out_file": "cbtn_extra_genomics_file_manifest.txt"
+            },
             "pnoc": {
                 "table": "bix_genomics_file.sd_8y99qzjj-genomics_file_manifest",
                 "file_type": ["RSEM_gene","annofuse_filtered_fusions_tsv","annotated_public_outputs","ctrlfreec_pval","ctrlfreec_info","ctrlfreec_bam_seg"],
@@ -236,14 +241,14 @@
             }
         },
         "sample_head": {
-            "table": "bix_workflows.data_clinical_sample_header"
+            "table": "template_sample_header.txt"
         },
         "sample_file": {
             "table": "prod_cbio.pbta_all_data_clinical_sample",
             "out_file": "data_clinical_sample.txt"
         },
         "patient_head": {
-            "table": "bix_workflows.data_clinical_patient_header"
+            "table": "template_patient_header.txt"
         },
         "patient_file": {
             "table": "prod_cbio.pbta_all_data_clinical_patient",
diff --git a/scripts/convert_fusion_as_sv.py b/scripts/convert_fusion_as_sv.py
index de8df24..0b7c35c 100755
--- a/scripts/convert_fusion_as_sv.py
+++ b/scripts/convert_fusion_as_sv.py
@@ -197,11 +197,14 @@ def init_cbio_master(fusion_results, mode, rna_metadata):
     if args.mode == 'openX':
         r_ext = "rsem"
     elif args.mode == 'dgd':
-        r_ext = "rsem"
+        r_ext = "DGD_FUSION"
     # ensure sample name is imported as str
     all_file_meta = pd.read_csv(args.table, sep="\t", dtype={'Cbio_Tumor_Name': str})
-        
+    # ext used in pbta vs openpedcan varies
     rna_subset = all_file_meta.loc[all_file_meta["File_Type"] == r_ext]
+    if rna_subset is None:
+        r_ext = 'rsem'
+        rna_subset = all_file_meta.loc[all_file_meta["File_Type"] == r_ext]
     # reset index so that references work later while iterating
     rna_subset = rna_subset.reset_index(drop=True)
     project_list = rna_subset.Cbio_project.unique()
diff --git a/scripts/get_study_metadata.py b/scripts/get_study_metadata.py
index dfa0828..7c82f9b 100644
--- a/scripts/get_study_metadata.py
+++ b/scripts/get_study_metadata.py
@@ -8,7 +8,6 @@
 from configparser import ConfigParser
 import argparse
 import json
-import pdb
 
 
 def config(filename='database.ini', section='postgresql'):
@@ -59,7 +58,7 @@ def generic_print(out_file, rows, colnames):
     return 0
 
 
-def get_data_clinical(db_cur, config_dict, prefix):
+def get_data_clinical(db_cur, config_dict, prefix, ref_dir):
     """
     Depending on the prefix of patient or sample, will pull from related tables,
     only use related header info present in table, and print the combined results.
@@ -74,19 +73,20 @@ def get_data_clinical(db_cur, config_dict, prefix):
     (rows, colnames) = generic_pull(db_cur, tbl_name)
 
     # use table header from colnames, and use to select file header
-    head_name = config_dict['database_pulls'][prefix + '_head']['table']
-    # get sample table contents, have to split if format schema.table
-    if '.' not in head_name:
-        head_sql = sql.SQL('SELECT {} FROM {};').format(sql.SQL(',').join(map(sql.Identifier, colnames)), sql.Identifier(head_name))
-    else:
-        (schema, table) = head_name.split('.')
-        head_sql = sql.SQL('SELECT {} FROM {}.{};').format(sql.SQL(',').join(map(sql.Identifier, colnames)), sql.Identifier(schema), sql.Identifier(table))
-    db_cur.execute(head_sql)
-    head = db_cur.fetchall()
+    head_file = open(ref_dir + config_dict['database_pulls'][prefix + '_head']['table'])
+    # get and read head file
+    head_lines = head_file.readlines()
     # create output file and combine results for final product
     out_file = open(datasheet_dir + "/" + config_data['database_pulls'][prefix + '_file']['out_file'], 'w')
-    for row in head:
-        out_file.write("\t".join(row) + "\n")
+    # get indices of matching head lines, then print corresponding cBio header values
+    col_i = []
+    # the last row, and the header of the data clinical table should have overlapping values
+    head_search = head_lines[-1].rstrip('\n').split('\t')
+    for col in colnames:
+        col_i.append(head_search.index(col))
+    for i in range(0, len(head_lines) -1, 1):
+        head = [head_lines[i].rstrip('\n').split('\t')[j] for j in col_i]
+        out_file.write("\t".join(head) + "\n")
     generic_print(out_file, rows, colnames)
     return 0
 
@@ -101,11 +101,19 @@ def get_manifests(db_cur, config_dict):
         try:
             tbl_name = manifests[manifest]['table']
             file_types = manifests[manifest]['file_type']
-            if '.' not in tbl_name:
-                manifest_sql = sql.SQL('SELECT * FROM {} WHERE file_type in ({});').format(sql.Identifier(tbl_name), sql.SQL(',').join(map(sql.Literal, file_types)))
+            if args.all:
+                if '.' not in tbl_name:
+                    manifest_sql = sql.SQL('SELECT * FROM {} WHERE file_type in ({});').format(sql.Identifier(tbl_name), sql.SQL(',').join(map(sql.Literal, file_types)))
+                else:
+                    (schema, table) = tbl_name.split('.')
+                    manifest_sql = sql.SQL('SELECT * FROM {}.{} WHERE file_type in ({});').format(sql.Identifier(schema), sql.Identifier(table), sql.SQL(',').join(map(sql.Literal, file_types)), sql.Literal("active"))
             else:
-                (schema, table) = tbl_name.split('.')
-                manifest_sql = sql.SQL('SELECT * FROM {}.{} WHERE file_type in ({});').format(sql.Identifier(schema), sql.Identifier(table), sql.SQL(',').join(map(sql.Literal, file_types)))
+                if '.' not in tbl_name:
+                    manifest_sql = sql.SQL('SELECT * FROM {} WHERE file_type in ({}) and status={};').format(sql.Identifier(tbl_name), sql.SQL(',').join(map(sql.Literal, file_types)))
+                else:
+                    (schema, table) = tbl_name.split('.')
+                    manifest_sql = sql.SQL('SELECT * FROM {}.{} WHERE file_type in ({}) and status={};').format(sql.Identifier(schema), sql.Identifier(table), sql.SQL(',').join(map(sql.Literal, file_types)), sql.Literal("active"))
+
             db_cur.execute(manifest_sql)
             rows = db_cur.fetchall()
             colnames = [desc[0] for desc in db_cur.description]
@@ -122,11 +130,14 @@ def get_manifests(db_cur, config_dict):
 
 parser.add_argument("-d", "--db-ini", action="store", dest="db_ini", help="Database config file - formatting like aws or sbg creds")
 parser.add_argument("-p", "--profile", action="store", dest="profile", help="ini profile name", default="postgresql")
-parser.add_argument("-c", "--config", action="store", dest="config_file", help="json config file with meta information; see REFS/pbta_all_case_meta_config.json example",)
+parser.add_argument("-c", "--config", action="store", dest="config_file", help="json config file with meta information; see REFS/pbta_all_case_meta_config.json example")
+parser.add_argument("-r", "--ref-dir", action="store", dest="ref_dir", help="dir name containing template data_clinical* header files")
+parser.add_argument("-a", "--all", action="store_true", dest="all", help="flag to include all relevant files, not just status=active files, NOT RECOMMENDED")
 
 args = parser.parse_args()
 # Load database login info
 params = config(filename=args.db_ini, section=args.profile)
+
 datasheet_dir = 'datasheets'
 # Load json config file with database pull info
 with open(args.config_file) as f:
@@ -137,13 +148,15 @@ def get_manifests(db_cur, config_dict):
         
     # dict to track keys with specific database calls
     special_keys = {"sample_head": 0, "sample_file": 0, "patient_head": 0, "patient_file": 0, "manifests": 0}
-
+    ref_dir = args.ref_dir
+    if ref_dir[-1] != '/':
+        ref_dir += '/'
     try:
         os.mkdir(datasheet_dir)
     except Exception as e:
         print(str(e) + ' IGNORE!')
-    get_data_clinical(cur, config_data, 'sample')
-    get_data_clinical(cur, config_data, 'patient')
+    get_data_clinical(cur, config_data, 'sample', ref_dir)
+    get_data_clinical(cur, config_data, 'patient', ref_dir)
     get_manifests(cur, config_data)
 
     # For all other tables to be printed simply, not in special_keys