From f7b91f9ba293e4e440255b7154892a9f577b2b6c Mon Sep 17 00:00:00 2001
From: derekwong90 <78445989+derekwong90@users.noreply.github.com>
Date: Fri, 30 Jun 2023 13:49:51 -0700
Subject: [PATCH] added griffin

---
 griffin/runners/GC_correction.sh              |  71 ++
 griffin/runners/nucleosome_profiling.sh       | 107 +++
 griffin/scripts/griffin_GC_bias.py            | 459 +++++++++++
 griffin/scripts/griffin_GC_counts.py          | 261 ++++++
 griffin/scripts/griffin_calc_GC_frequency.py  | 199 +++++
 griffin/scripts/griffin_calc_coverage.py      | 758 ++++++++++++++++++
 griffin/scripts/griffin_filter_sites.py       | 293 +++++++
 griffin/scripts/griffin_plot.py               | 160 ++++
 griffin/site_configs/DHS_sites.yaml           |  18 +
 .../site_configs/Immune_Calderon_sites.yaml   |  42 +
 griffin/site_configs/Immune_sites.yaml        |  15 +
 griffin/site_configs/LFS_sites.yaml           |   7 +
 griffin/site_configs/TCGA_sites.yaml          |  25 +
 griffin/site_configs/TFBS_Ulz_sites.yaml      | 506 ++++++++++++
 griffin/site_configs/TFBS_sites.yaml          | 337 ++++++++
 griffin/site_configs/TP53_sites.yaml          |  12 +
 griffin/site_configs/hematopoietic_sites.yaml |   3 +
 griffin/site_configs/housekeeping_sites.yaml  |   8 +
 griffin/site_configs/maneTSS_sites.yaml       |   2 +
 griffin/site_configs/uveal_sites.yaml         |   6 +
 20 files changed, 3289 insertions(+)
 create mode 100644 griffin/runners/GC_correction.sh
 create mode 100644 griffin/runners/nucleosome_profiling.sh
 create mode 100755 griffin/scripts/griffin_GC_bias.py
 create mode 100755 griffin/scripts/griffin_GC_counts.py
 create mode 100755 griffin/scripts/griffin_calc_GC_frequency.py
 create mode 100755 griffin/scripts/griffin_calc_coverage.py
 create mode 100755 griffin/scripts/griffin_filter_sites.py
 create mode 100755 griffin/scripts/griffin_plot.py
 create mode 100755 griffin/site_configs/DHS_sites.yaml
 create mode 100755 griffin/site_configs/Immune_Calderon_sites.yaml
 create mode 100755 griffin/site_configs/Immune_sites.yaml
 create mode 100755 griffin/site_configs/LFS_sites.yaml
 create mode 100755 griffin/site_configs/TCGA_sites.yaml
 create mode 100755 griffin/site_configs/TFBS_Ulz_sites.yaml
 create mode 100755 griffin/site_configs/TFBS_sites.yaml
 create mode 100755 griffin/site_configs/TP53_sites.yaml
 create mode 100755 griffin/site_configs/hematopoietic_sites.yaml
 create mode 100755 griffin/site_configs/housekeeping_sites.yaml
 create mode 100755 griffin/site_configs/maneTSS_sites.yaml
 create mode 100755 griffin/site_configs/uveal_sites.yaml

diff --git a/griffin/runners/GC_correction.sh b/griffin/runners/GC_correction.sh
new file mode 100644
index 0000000..1ec4eb4
--- /dev/null
+++ b/griffin/runners/GC_correction.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+griffin=/cluster/projects/pughlab/bin/Griffin/v0.2.0
+basedir=/cluster/projects/pughlab/projects/CHARM/LFS/griffin2
+ref=/cluster/projects/pughlab/references/TGL/hg38/hg38_random.fa
+input=/cluster/projects/pughlab/external_data/TGL49_CHARM/LFS/LFS_WG/bams
+outdir=$basedir/output/GC_correction
+shdir=$basedir/sh_scripts/GC_correction
+
+mkdir -p $outdir
+mkdir -p $shdir
+mkdir -p $outdir/mappability_bias
+mkdir -p $outdir/mappability_plots
+mkdir -p $outdir/tmp
+
+cd $input
+ls *bam > $shdir/bams
+
+cd $shdir
+sed 's/....$//' bams > bam
+mv bam bams
+
+for bam in $(cat bams);do
+
+name=${bam:0:25}
+echo $bam
+echo $name
+
+mkdir -p $outdir/tmp/$name
+
+echo -e "#!/bin/bash
+source activate base
+conda activate griffin2" > $shdir/${name}.sh
+
+echo -e "$griffin/scripts/griffin_mappability_correction.py \
+--bam_file $input/${bam}.bam \
+--bam_file_name $name \
+--output $outdir/mappability_bias/${name}.mappability_bias.txt \
+--output_plot $outdir/mappability_plots/${name}.mappability_bias.pdf \
+--mappability $griffin/Ref/k50.Umap.MultiTrackMappability.hg38.bw \
+--exclude_paths $griffin/Ref/encode_unified_GRCh38_exclusion_list.bed \
+--chrom_sizes $griffin/Ref/hg38.standard.chrom.sizes \
+--map_quality 20 \
+--CPU 8 \
+--tmp_dir $outdir/tmp/$name" >> $shdir/${name}.sh
+
+echo -e "$griffin/scripts/griffin_GC_counts.py \
+--bam_file $input/${bam}.bam \
+--bam_file_name $name \
+--mappable_regions_path $griffin/Ref/k100_minus_exclusion_lists.mappable_regions.hg38.bed \
+--ref_seq $ref \
+--chrom_sizes $griffin/Ref/hg38.standard.chrom.sizes \
+--out_dir $outdir \
+--map_q 20 \
+--size_range 15 500 \
+--CPU 8" >> $shdir/${name}.sh
+
+echo -e "$griffin/scripts/griffin_GC_bias.py \
+--bam_file_name $name \
+--mappable_name k100_minus_exclusion_lists.mappable_regions.hg38 \
+--genome_GC_frequency $griffin/Ref/genome_GC_frequency \
+--out_dir $outdir/ \
+--size_range 15 500" >> $shdir/${name}.sh
+
+done
+
+cd $shdir
+ls *.sh > files
+for file in $(cat files);do
+sbatch -p all -c 8 --mem 8G -t 24:00:00 $file
+done
diff --git a/griffin/runners/nucleosome_profiling.sh b/griffin/runners/nucleosome_profiling.sh
new file mode 100644
index 0000000..89fcee0
--- /dev/null
+++ b/griffin/runners/nucleosome_profiling.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+analysis=hematopoetic
+CPU=1
+mem=8G
+
+griffin=/cluster/projects/pughlab/bin/Griffin/v0.2.0
+basedir=/cluster/projects/pughlab/projects/CHARM/LFS/griffin2
+sites=$griffin/site_configs/${analysis}_sites.yaml
+ref=/cluster/projects/pughlab/references/TGL/hg38/hg38_random.fa
+input=/cluster/projects/pughlab/external_data/TGL49_CHARM/LFS/LFS_WG/bams
+counts=$basedir/output/GC_correction
+outdir=$basedir/output/nucleosome_profiling/$analysis
+shdir=$basedir/sh_scripts/nucleosome_profiling/$analysis
+
+encode_exclude=$griffin/Ref/encode_unified_GRCh38_exclusion_list.bed
+centromere_path=$griffin/Ref/hg38_centromeres.bed
+gap_path=$griffin/Ref/hg38_gaps.bed
+patch_path=$griffin/Ref/hg38_fix_patches.bed
+alternative_haplotype_path=$griffin/Ref/hg38_alternative_haplotypes.bed
+
+mkdir -p $outdir
+mkdir -p $outdir/tmp
+mkdir -p $outdir/results
+mkdir -p $shdir
+
+cd $input
+ls *bam > $shdir/bams
+
+cd $shdir
+sed 's/....$//' bams > bam
+mv bam bams
+
+for bam in $(cat bams);do
+
+name=${bam:0:25}
+echo $bam
+echo $name
+
+echo -e "#!/bin/bash\n
+source activate base\n
+conda activate griffin2\n" > $shdir/${name}.sh
+
+echo -e "$griffin/scripts/griffin_coverage.py \
+--sample_name $name \
+--bam $input/${bam}.bam \
+--GC_bias $counts/GC_bias/${name}.GC_bias.txt \
+--mappability_bias $counts/mappability_bias/${name}.mappability_bias.txt \
+--mappability_correction True \
+--tmp_dir $outdir/tmp \
+--reference_genome $ref \
+--mappability_bw $griffin/Ref/k50.Umap.MultiTrackMappability.hg38.bw \
+--chrom_sizes_path $griffin/Ref/hg38.standard.chrom.sizes \
+--sites_yaml $sites \
+--griffin_scripts $griffin/scripts \
+--chrom_column Chrom \
+--position_column position \
+--strand_column Strand \
+--chroms chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 \
+--norm_window -5000 5000 \
+--size_range 100 200 \
+--map_quality 20 \
+--number_of_sites none \
+--sort_by none \
+--ascending none \
+--CPU $CPU\n" >> $shdir/${name}.sh
+
+echo -e "$griffin/scripts/griffin_merge_sites.py \
+--sample_name $name \
+--uncorrected_bw_path $outdir/tmp/$name/tmp_bigWig/${name}.uncorrected.bw \
+--GC_corrected_bw_path $outdir/tmp/$name/tmp_bigWig/${name}.GC_corrected.bw \
+--GC_map_corrected_bw_path $outdir/tmp/$name/tmp_bigWig/${name}.GC_map_corrected.bw \
+--mappability_correction False \
+--tmp_dir $outdir/tmp \
+--results_dir $outdir/results \
+--mappability_bw $griffin/Ref/k50.Umap.MultiTrackMappability.hg38.bw \
+--chrom_sizes_path $griffin/Ref/hg38.standard.chrom.sizes \
+--sites_yaml $sites \
+--griffin_scripts $griffin/scripts \
+--chrom_column Chrom \
+--position_column position \
+--strand_column Strand \
+--chroms chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 \
+--norm_window -5000 5000 \
+--save_window -1000 1000 \
+--fft_window -960 960 \
+--fft_index 10 \
+--smoothing_length 165 \
+--exclude_paths $encode_exclude $centromere_path $gap_path $patch_path $alternative_haplotype_path \
+--step 15 \
+--CNA_normalization False \
+--individual False \
+--smoothing True \
+--exclude_outliers True \
+--exclude_zero_mappability True \
+--number_of_sites none \
+--sort_by none \
+--ascending none \
+--CPU $CPU\n" >> $shdir/${name}.sh
+
+done
+
+cd $shdir
+ls *.sh > files
+for file in $(cat files);do
+sbatch -c $CPU --mem $mem -t 24:00:00 $file
+done
diff --git a/griffin/scripts/griffin_GC_bias.py b/griffin/scripts/griffin_GC_bias.py
new file mode 100755
index 0000000..8dce11a
--- /dev/null
+++ b/griffin/scripts/griffin_GC_bias.py
@@ -0,0 +1,459 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[ ]:
+
+
+import pysam
+import os
+#import pybedtools #not used
+import pandas as pd
+import numpy as np
+import time
+import argparse
+import sys
+from matplotlib import pyplot as plt
+
+
+# In[ ]:
+
+
+# %matplotlib inline
+
+# bam_file_name = 'MBC_1041_1_ULP'
+# mapable_name = 'repeat_masker.mapable.k50.Umap.hg38'
+# genome_GC_frequency = '/fh/fast/ha_g/user/adoebley/projects/griffin_paper/genome_GC_frequency/results'
+# out_dir = 'tmp'
+# size_range = [15,500]
+
+
+# In[ ]:
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--bam_file_name', help='sample name (does not need to match actual file name)', required=True)
+
+parser.add_argument('--mapable_name', help='name of mapable regions file (with .bed removed)', required=True)
+
+parser.add_argument('--genome_GC_frequency',help='folder containing GC counts in the reference sequence (made by generate_reference_files.snakemake)',required=True)
+
+parser.add_argument('--out_dir',help='folder for GC bias results',required=True)
+
+parser.add_argument('--size_range',help='range of read sizes to be included',nargs=2, type=int, required=True)
+
+args = parser.parse_args()
+
+bam_file_name = args.bam_file_name
+mapable_name=args.mapable_name
+genome_GC_frequency = args.genome_GC_frequency
+out_dir = args.out_dir
+size_range = args.size_range
+
+
+# In[ ]:
+
+
+print('arguments provided:')
+
+print('\tbam_file_name = "'+bam_file_name+'"')
+print('\tmapable_name = "'+mapable_name+'"')
+
+print('\tgenome_GC_frequency = "'+genome_GC_frequency+'"')
+out_dir = out_dir.rstrip('/')
+print('\tout_dir = "'+out_dir+'"')
+
+print('\tsize_range = '+str(size_range))
+
+
+# In[ ]:
+
+
+#For now I'm going to keep the smoothing bin size as a set variable
+GC_smoothing_step = 20
+
+
+# In[ ]:
+
+
+#input is the out_file from the previous step
+in_file = out_dir +'/'+mapable_name+'/GC_counts/'+ bam_file_name+'.GC_counts.txt'
+print('in_file:',in_file)
+
+#output is smoothed version
+smoothed_out_file = out_dir +'/'+mapable_name+'/GC_bias/'+ bam_file_name+'.GC_bias.txt'
+
+#plot files
+plot_file1 = out_dir +'/'+mapable_name+'/GC_plots/'+ bam_file_name+'.GC_bias.pdf'
+plot_file2 = out_dir +'/'+mapable_name+'/GC_plots/'+ bam_file_name+'.GC_bias.summary.pdf'
+plot_file3 = out_dir +'/'+mapable_name+'/GC_plots/'+ bam_file_name+'.GC_bias.key_lengths.pdf'
+
+print('out_file:',smoothed_out_file)
+sys.stdout.flush()
+
+
+# In[ ]:
+
+
+#create output folders if needed
+if not os.path.exists(out_dir +'/'+mapable_name+'/GC_plots/'):
+    os.mkdir(out_dir +'/'+mapable_name+'/GC_plots/')
+if not os.path.exists(out_dir +'/'+mapable_name+'/GC_bias/'):
+    os.mkdir(out_dir +'/'+mapable_name+'/GC_bias/')
+
+
+# In[ ]:
+
+
+#import the GC info from the genome
+frequency_prefix = genome_GC_frequency+'/'+mapable_name+'.'
+
+GC_freq = pd.DataFrame()
+for i in range(size_range[0],size_range[1]+1):
+    current_path = frequency_prefix+str(i)+'bp.GC_frequency.txt'
+    current_data = pd.read_csv(current_path,sep='\t')
+    GC_freq = GC_freq.append(current_data, ignore_index=True)
+    
+GC_freq['GC_content']=GC_freq['num_GC']/GC_freq['length']
+GC_freq = GC_freq.sort_values(by=['GC_content','length']).reset_index(drop=True)
+
+
+# In[ ]:
+
+
+#import GC counts from the sample
+GC_df = pd.read_csv(in_file, sep='\t')
+
+GC_df['GC_content']=GC_df['num_GC']/GC_df['length']
+GC_df = GC_df.sort_values(by=['GC_content','length']).reset_index(drop=True)
+
+
+# In[ ]:
+
+
+#calculate the GC_bias
+new_df = pd.DataFrame()
+for length in range(size_range[0],size_range[1]+1):
+    current = GC_df[GC_df['length']==length].copy().reset_index(drop=True)
+    current_freq = GC_freq[GC_freq['length']==length].copy().reset_index(drop=True)
+    
+    #save the frequency of each GC content in the genome
+    current['number_of_positions']=current_freq['number_of_fragments']
+    
+    #calculate the GC bias
+    current_bias = current['number_of_fragments']/current['number_of_positions']    
+    current['GC_bias'] = current_bias
+
+    #normalize to a mean of 1 for each fragment length(compute GC bias does this same thing)
+    current['GC_bias'] = current['GC_bias']/np.nanmean(current['GC_bias'])
+    new_df = new_df.append(current, ignore_index=True)
+    
+    #print(length,len(current['GC_bias']),np.nanmean(current['GC_bias']))
+    
+new_df = new_df.sort_values(by=['GC_content','length']).reset_index(drop=True)
+
+
+# In[ ]:
+
+
+def median_smoothing(current,fraction):
+    bin_size=int(len(current)*fraction)
+    if bin_size<50:
+        bin_size=50
+    medians = []
+
+    for i in range(len(current)):
+        start = int(i-bin_size/2)
+        end = int(i+bin_size/2)
+        #if the bin starts before the beginning, just take the first bin
+        if start<0:
+            start=0
+            end=bin_size
+        #if the bin extends beyond the end, take the last bin
+        if end>=len(current):
+            start=len(current)-bin_size
+            end=len(current)
+        current_median = np.nanmedian(current['GC_bias'].iloc[start:end])
+        medians.append(current_median)
+    return(medians)
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+#smooth GC bias by size bin
+
+start_time = time.time()
+
+new_df2 = pd.DataFrame()
+for length in new_df['length'].unique():
+    if length%20==0:
+        print(length, time.time()-start_time)
+        sys.stdout.flush()
+        
+    #get a bin of similar sized fragments
+    min_len = int(length - (GC_smoothing_step/2))
+    max_len = int(length + (GC_smoothing_step/2))
+    
+    current = new_df[(new_df['length']>=min_len) & (new_df['length']<=max_len)].copy()
+
+    #perform smoothing
+    fit = median_smoothing(current,.05)  
+    current['smoothed_GC_bias']=fit
+    
+    #only keep smoothed values for the selected length
+    current = current[current['length']==length]
+    
+    #get rid of values for GC contents that are never observed
+    current['smoothed_GC_bias'] = np.where(current['number_of_positions']==0,np.nan,current['smoothed_GC_bias'])
+    
+    #normalize to a mean of 1
+    current['smoothed_GC_bias'] = current['smoothed_GC_bias']/np.nanmean(current['smoothed_GC_bias'])
+    
+    new_df2 = new_df2.append(current,ignore_index=True)
+    
+    #print(length,len(current),np.nanmean(current['smoothed_GC_bias']))
+    
+new_df = new_df2
+
+
+# In[ ]:
+
+
+new_df[new_df['length']==200]#['GC_bias'].sum()/len(new_df[new_df['length']==200])
+
+
+# In[ ]:
+
+
+#export results
+new_df2.to_csv(smoothed_out_file,sep='\t',index=False)
+
+
+# In[ ]:
+
+
+#generate one plot per size bin
+
+#set up a figure for plotting
+plot_indexes = np.arange(size_range[0]+GC_smoothing_step,size_range[1]+GC_smoothing_step,GC_smoothing_step)
+lengths_to_plot = plot_indexes
+x_dim = 6
+y_dim = int(np.ceil(len(plot_indexes)/6))
+empty_plots = int(x_dim*y_dim - len(plot_indexes))
+plot_indexes = np.append(plot_indexes,[np.nan for m in range(empty_plots)])
+plot_indexes = np.reshape(plot_indexes,(y_dim,x_dim))
+fig, axes = plt.subplots(y_dim,x_dim, figsize = (5*x_dim,3.5*y_dim), sharex = True, sharey = True)
+axes = axes.reshape(y_dim,x_dim) #make sure the axes array is two dimension (just in case it has less than 7 value)
+
+#do the plotting
+min_len = 0 
+for max_len in lengths_to_plot:
+    if max_len%20==0:
+        print(max_len)
+        
+    #pick the axis
+    current_index = np.where(plot_indexes==max_len)
+    current_index = (current_index[0][0],current_index[1][0])
+    current_ax = axes[current_index]
+
+    #pick the data
+    current1 = new_df2[(new_df2['length']>min_len) & (new_df2['length']<=max_len)].copy()
+    
+    #plot the smoothed data over top
+    for length2 in current1['length'].unique():
+        current2 = current1[current1['length']==length2]       
+        current_ax.plot(current2['GC_content'],current2['smoothed_GC_bias'], label=str(length2)+'bp')
+    
+    current_ax.set_title(str(min_len) + 'bp to '+str(max_len)+'bp')
+    current_ax.legend(ncol = 2)
+    
+    min_len = max_len
+    
+for i in range(x_dim):
+    axes[y_dim-1,i].set_xlabel('GC content')
+    
+for i in range(y_dim):
+    axes[i,0].set_ylabel('coverage bias')
+
+ylim = axes[0,0].get_ylim()
+
+old_title = axes[0,0].get_title()
+axes[0,0].set_title(bam_file_name+'\n'+mapable_name + '\n' + old_title)
+
+fig.tight_layout()
+
+plt.savefig(plot_file1)
+
+plt.close('all')
+
+
+# In[ ]:
+
+
+#key lengths
+selected_lengths = np.arange(100,201,GC_smoothing_step)
+
+fig,ax = plt.subplots(1)
+
+# for_color = len(selected_lengths)-1
+# color = (1-(i/for_color),.5*(1-(i/for_color)), i/for_color)
+
+for i,length in enumerate(selected_lengths):
+    current = new_df2[new_df2['length']==length]
+    ax.plot(current['GC_content'],current['smoothed_GC_bias'], label = str(length)+'bp')
+    
+ax.legend(ncol = 2, bbox_to_anchor = [1,1], loc = 'upper left')
+
+ax.set_xlabel('GC content')
+ax.set_ylabel('coverage bias')
+ax.set_title(bam_file_name+'\n'+mapable_name)
+
+fig.tight_layout()
+fig.savefig(plot_file3)
+plt.close('all')
+
+
+# In[ ]:
+
+
+#summary figure
+selected_lengths = np.arange(size_range[0],size_range[1],GC_smoothing_step)
+
+fig,ax = plt.subplots(1)
+
+for length in selected_lengths:
+    current = new_df2[new_df2['length']==length]
+    ax.plot(current['GC_content'],current['smoothed_GC_bias'], label = str(length)+'bp')
+ax.legend(ncol = 2, bbox_to_anchor = [1,1], loc = 'upper left')
+
+ax.set_xlabel('GC content')
+ax.set_ylabel('coverage bias')
+ax.set_title(bam_file_name+'\n'+mapable_name)
+
+fig.tight_layout()
+fig.savefig(plot_file2)
+plt.close('all')
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+# plot_file4 = out_dir +'/'+mapable_name+'/GC_plots/'+ bam_file_name+'.GC_bias.test.pdf'
+
+# selected_lengths = np.arange(size_range[0],size_range[1],GC_smoothing_step)
+
+# fig,ax = plt.subplots(1)
+
+# for length in selected_lengths:
+#     current = new_df2[new_df2['length']==length]
+    
+#     ax.plot(current['GC_content'],current['GC_bias'],alpha=.2,marker='.')
+        
+# #reset the color cycle
+# # for Matplotlib version >= 1.5
+# plt.gca().set_prop_cycle(None)
+    
+    
+# for length in selected_lengths:
+#     current = new_df2[new_df2['length']==length]
+    
+#     ax.plot(current['GC_content'],current['smoothed_GC_bias'], label = length)
+    
+
+# ax.legend(ncol = 2, bbox_to_anchor = [1,1])
+
+# ax.set_xlabel('GC content')
+# ax.set_ylabel('coverage bias')
+# ax.set_title(bam_file_name+'\n'+mapable_name)
+# ax.set_ylim(-.1,new_df2['smoothed_GC_bias'].max()+.1)
+
+# fig.tight_layout()
+# fig.savefig(plot_file4)
+
+
+# In[ ]:
+
+
+# #generate one plot per size bin
+# #raw_data
+# plot_file4 = out_dir +'/'+mapable_name+'/GC_plots/'+ bam_file_name+'.GC_bias.test.pdf'
+
+# #set up a figure for plotting
+# plot_indexes = np.arange(size_range[0]+GC_smoothing_step,size_range[1]+GC_smoothing_step,GC_smoothing_step)
+# lengths_to_plot = plot_indexes
+# x_dim = 6
+# y_dim = int(np.ceil(len(plot_indexes)/6))
+# empty_plots = int(x_dim*y_dim - len(plot_indexes))
+# plot_indexes = np.append(plot_indexes,[np.nan for m in range(empty_plots)])
+# plot_indexes = np.reshape(plot_indexes,(y_dim,x_dim))
+# fig, axes = plt.subplots(y_dim,x_dim, figsize = (5*x_dim,3.5*y_dim), sharex = True, sharey = True)
+# axes = axes.reshape(y_dim,x_dim) #make sure the axes array is two dimension (just in case it has less than 7 value)
+
+# #do the plotting
+# min_len = 0 
+# for max_len in lengths_to_plot:
+#     if max_len%20==0:
+#         print(max_len)
+        
+#     #pick the axis
+#     current_index = np.where(plot_indexes==max_len)
+#     current_index = (current_index[0][0],current_index[1][0])
+#     current_ax = axes[current_index]
+
+#     #pick the data
+#     current1 = new_df2[(new_df2['length']>min_len) & (new_df2['length']<=max_len)].copy()
+    
+#     #plot the raw data
+#     for length2 in current1['length'].unique():
+#         current2 = current1[current1['length']==length2]
+#         current_ax.plot(current2['GC_content'],current2['GC_bias'],alpha=.2,marker='.')
+        
+#     #reset the color cycle
+#     # for Matplotlib version >= 1.5
+#     plt.gca().set_prop_cycle(None)
+    
+#     #plot the smoothed data over top
+#     for length2 in current1['length'].unique():
+#         current2 = current1[current1['length']==length2]       
+#         current_ax.plot(current2['GC_content'],current2['smoothed_GC_bias'], label=length2)
+    
+#     current_ax.set_title(str(min_len) + 'bp to '+str(max_len)+'bp')
+#     current_ax.legend(ncol = 2)
+    
+#     min_len = max_len
+    
+# for i in range(x_dim):
+#     axes[y_dim-1,i].set_xlabel('GC content')
+    
+# for i in range(y_dim):
+#     axes[i,0].set_ylabel('coverage bias')
+
+# axes[0,0].set_ylim(ylim)
+
+# old_title = axes[0,0].get_title()
+# axes[0,0].set_title(bam_file_name+'\n'+mapable_name + '\n' + old_title)
+
+# fig.tight_layout()
+
+# plt.savefig(plot_file4)
+# plt.close('all')
+
+
+# In[ ]:
+
+
+
+
diff --git a/griffin/scripts/griffin_GC_counts.py b/griffin/scripts/griffin_GC_counts.py
new file mode 100755
index 0000000..5399748
--- /dev/null
+++ b/griffin/scripts/griffin_GC_counts.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[ ]:
+
+
+import pysam
+import os
+
+import pandas as pd
+import numpy as np
+import time
+import argparse
+import sys
+
+from multiprocessing import Pool
+
+
+# In[ ]:
+
+
+# ##arguments for testing 
+
+# bam_file_path = '/fh/scratch/delete90/ha_g/realigned_bams/cfDNA_MBC_ULP_hg38/realign_bam_paired_snakemake-master/results/MBC_1041_1_ULP/MBC_1041_1_ULP_recalibrated.bam'
+# bam_file_name = 'MBC_1041_1_ULP'
+# mapable_path = '../../downloads/genome/repeat_masker.mapable.k50.Umap.hg38.bedGraph'
+
+# ref_seq_path = '/fh/fast/ha_g/grp/reference/GRCh38/GRCh38.fa'
+# chrom_sizes_path = '/fh/fast/ha_g/grp/reference/GRCh38/hg38.standard.chrom.sizes'
+
+# out_dir = './tmp/'
+
+# map_q = 20
+# size_range = [15,500]
+
+# CPU = 4
+
+
+# In[ ]:
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--bam_file', help='sample_bam_file', required=True)
+parser.add_argument('--bam_file_name', help='sample name (does not need to match actual file name)', required=True)
+parser.add_argument('--mapable_regions', help='highly mapable regions to be used in GC correction, bedGraph or bed foramt', required=True)
+
+parser.add_argument('--ref_seq',help='reference sequence (fasta format)',required=True)
+parser.add_argument('--chrom_sizes',help='path to chromosome sizes for the reference seq',required=True)
+
+parser.add_argument('--out_dir',help='folder for GC bias results',required=True)
+
+parser.add_argument('--map_q',help='minimum mapping quality for reads to be considered',type=int,required=True)
+parser.add_argument('--size_range',help='range of read sizes to be included',nargs=2, type=int, required=True)
+
+parser.add_argument('--CPU',help='number of CPU for parallelizing', type=int, required=True)
+
+args = parser.parse_args()
+
+bam_file_path = args.bam_file
+bam_file_name = args.bam_file_name
+mapable_path=args.mapable_regions
+
+ref_seq_path = args.ref_seq
+chrom_sizes_path = args.chrom_sizes
+out_dir = args.out_dir
+
+map_q = args.map_q
+size_range = args.size_range
+CPU = args.CPU
+
+
+# In[ ]:
+
+
+print('arguments provided:')
+
+print('\tbam_file_path = "'+bam_file_path+'"')
+print('\tbam_file_name = "'+bam_file_name+'"')
+print('\tmapable_regions = "'+mapable_path+'"')
+
+print('\tref_seq_path = "'+ref_seq_path+'"')
+print('\tchrom_sizes_path = "'+chrom_sizes_path+'"')
+print('\tout_dir = "'+out_dir+'"')
+
+print('\tmap_q = '+str(map_q))
+print('\tsize_range = '+str(size_range))
+print('\tCPU = '+str(CPU))
+
+
+# In[ ]:
+
+
+mapable_name = mapable_path.rsplit('/',1)[1].rsplit('.',1)[0]
+out_file = out_dir +'/'+mapable_name+'/GC_counts/'+ bam_file_name+'.GC_counts.txt'
+
+print('out_file',out_file)
+
+
+# In[ ]:
+
+
+#create a directory for the GC data
+if not os.path.exists(out_dir +'/'+mapable_name):
+    os.mkdir(out_dir +'/'+mapable_name)
+if not os.path.exists(out_dir +'/'+mapable_name+'/GC_counts/'):
+    os.mkdir(out_dir +'/'+mapable_name+'/GC_counts/')
+
+
+# In[ ]:
+
+
+#import filter
+mapable_intervals = pd.read_csv(mapable_path, sep='\t', header=None)
+
+#remove non standard chromosomes and X and Y
+chroms = ['chr'+str(m) for m in range(1,23)]
+mapable_intervals = mapable_intervals[mapable_intervals[0].isin(chroms)]
+
+print('chroms:', chroms)
+print('number_of_intervals:',len(mapable_intervals))
+
+sys.stdout.flush()
+
+
+# In[ ]:
+
+
+def collect_reads(sublist):
+    #create a dict for holding the frequency of each read length and GC content
+    GC_dict = {}
+    for length in range(size_range[0],size_range[1]+1):
+        GC_dict[length]={}
+        for num_GC in range(0,length+1):
+            GC_dict[length][num_GC]=0
+        
+    #import the bam file
+    #this needs to be done within the loop otherwise it gives a truncated file warning
+    bam_file = pysam.AlignmentFile(bam_file_path, "rb")
+    print('sublist intervals:',len(sublist))
+    
+    #this might also need to be in the loop
+    #import the ref_seq
+    ref_seq=pysam.FastaFile(ref_seq_path)
+    
+    for i in range(len(sublist)):
+        chrom = sublist.iloc[i][0]
+        start = sublist.iloc[i][1]
+        end = sublist.iloc[i][2]
+        if i%5000==0:
+            print('interval',i,':',chrom,start,end,'seconds:',np.round(time.time()-start_time))
+            sys.stdout.flush()
+        #fetch any read that overlaps the inteterval (don't need to extend the interval because the fetch function does this automatically)
+        fetched = bam_file.fetch(chrom,start,end)
+        for read in fetched:
+            #use both fw (positive template length) and rv (negative template length) reads
+            if (read.is_reverse==False and read.template_length>=size_range[0] and read.template_length<=size_range[1]) or             (read.is_reverse==True and -read.template_length>=size_range[0] and -read.template_length<=size_range[1]):
+                #qc filters, some longer fragments are considered 'improper pairs' but I would like to keep these
+                if read.is_paired==True and read.mapping_quality>=map_q and read.is_duplicate==False and read.is_qcfail==False:
+                    if read.is_reverse==False:
+                        read_start = read.reference_start
+                        read_end = read.reference_start+read.template_length
+                    elif read.is_reverse==True:
+                        read_end = read.reference_start + read.reference_length
+                        read_start = read_end + read.template_length
+
+                    fragment_seq = ref_seq.fetch(read.reference_name,read_start,read_end)
+                    #tally up the GC content
+                    fragment_seq=fragment_seq.replace('g','G').replace('c','C').replace('a','A').replace('t','T').replace('n','N')
+
+    #                 #################
+    #                 ##logic check####
+    #                 #################
+    #                 if read.is_reverse==False:
+    #                     if fragment_seq[0:read.reference_length]==read.query_sequence and len(fragment_seq)==read.template_length:
+    #                         print('fw match',read.reference_length)
+    #                     else:
+    #                         print(fragment_seq[0:read.reference_length],read.reference_length,'fw')
+    #                         print(read.query_sequence,len(read.query_sequence),'fw')
+    #                         print(len(fragment_seq),read.template_length)
+    #                         print('\n')
+    #                 elif read.is_reverse==True:
+    #                     if fragment_seq[-read.reference_length:]==read.query_sequence and len(fragment_seq)==-read.template_length:
+    #                         print('rv match',read.reference_length)
+    #                     else:
+    #                         print(fragment_seq[-read.reference_length:],read.reference_length,'rv')
+    #                         print(read.query_sequence,len(read.query_sequence),'rv')
+    #                         print(len(fragment_seq),read.template_length)
+    #                         print('\n')                        
+    #                 #################
+
+                    #split and convert to numpy array
+                    fragment_seq = np.array(list(fragment_seq))
+                    #replace with values
+                    fragment_seq[(fragment_seq=='G') | (fragment_seq=='C')]=1
+                    fragment_seq[(fragment_seq=='A') | (fragment_seq=='T')]=0
+                    fragment_seq[(fragment_seq=='N')]=np.random.randint(2) #choose a random 0 or 1 for N (so that you always get an integer) #should be very rare if the filter is done right
+                    fragment_seq = fragment_seq.astype(int)
+
+                    num_GC = int(fragment_seq.sum())
+                    GC_dict[abs(read.template_length)][num_GC]+=1
+
+    print('done')
+    return(GC_dict)
+
+
+# In[ ]:
+
+
+start_time = time.time()
+p = Pool(processes=CPU) #use the available CPU
+sublists = np.array_split(mapable_intervals,CPU) #split the list into sublists, one per CPU
+
+GC_dict_list = p.map(collect_reads, sublists, 1)
+
+
+# In[ ]:
+
+
+all_GC_df = pd.DataFrame()
+for i,GC_dict in enumerate(GC_dict_list):
+    GC_df = pd.DataFrame()
+    for length in GC_dict.keys():
+        current = pd.Series(GC_dict[length]).reset_index()
+        current = current.rename(columns={'index':'num_GC',0:'number_of_fragments'})
+        current['length']=length
+        current = current[['length','num_GC','number_of_fragments']]
+        GC_df = GC_df.append(current, ignore_index=True)
+    GC_df = GC_df.set_index(['length','num_GC'])
+    all_GC_df[i] = GC_df['number_of_fragments']
+    del(GC_df,GC_dict)
+    
+all_GC_df = all_GC_df.sum(axis=1)
+all_GC_df = pd.DataFrame(all_GC_df).rename(columns = {0:'number_of_fragments'})
+all_GC_df = all_GC_df.reset_index()
+all_GC_df.to_csv(out_file,sep='\t',index=False)
+
+
+# In[ ]:
+
+
+print('done')
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
diff --git a/griffin/scripts/griffin_calc_GC_frequency.py b/griffin/scripts/griffin_calc_GC_frequency.py
new file mode 100755
index 0000000..fcc1ca1
--- /dev/null
+++ b/griffin/scripts/griffin_calc_GC_frequency.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[1]:
+
+
+import pysam
+import os
+import pandas as pd
+import numpy as np
+import time
+import argparse
+import sys
+
+
+# In[2]:
+
+
+#This script calculates the frequency of each GC content for fragments that overlap the non-blacklisted areas
+#This is performed for each fragment size in the range specified
+#this only needs to be performed once for each filter
+
+
+# In[3]:
+
+
+# #arguments for testing 
+# mapable_path = '/fh/fast/ha_g/user/adoebley/projects/griffin_paper/downloads/genome/repeat_masker.mapable.k50.Umap.hg38.bedGraph'
+
+# ref_seq_path = '/fh/fast/ha_g/grp/reference/GRCh38/GRCh38.fa'
+# chrom_sizes_path = '/fh/fast/ha_g/grp/reference/GRCh38/hg38.standard.chrom.sizes'
+# out_dir = './tmp'
+
+# # step = 1
+# length = 50 #fragment length
+
+
+# In[4]:
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--mapable_regions', help='highly mapable regions to be used in GC correction, bed or bedGraph format', required=True)
+parser.add_argument('--ref_seq',help='reference sequence (fasta format)',required=True)
+parser.add_argument('--chrom_sizes',help='path to chromosome sizes for the reference seq',required=True)
+parser.add_argument('--out_dir',help='folder for results',required=True)
+parser.add_argument('--fragment_length',help='length of fragment (in bp) for which GC will be calculated',type=int, required=True)
+
+args = parser.parse_args()
+
+mapable_path=args.mapable_regions
+ref_seq_path = args.ref_seq
+chrom_sizes_path = args.chrom_sizes
+out_dir = args.out_dir
+length = args.fragment_length
+
+
+# In[5]:
+
+
+print('arguments provided:')
+
+print('\tmapable_path = "'+mapable_path+'"')
+print('\tref_seq_path = "'+ref_seq_path+'"')
+print('\tchrom_sizes_path = "'+chrom_sizes_path+'"')
+print('\tout_dir = "'+out_dir+'"')
+print('\tlength = '+str(length))
+
+
+# In[6]:
+
+
+mapable_name = mapable_path.rsplit('/',1)[1].rsplit('.',1)[0]
+out_file = out_dir+'/'+mapable_name+'.'+str(length)+'bp.GC_frequency.txt'
+print('output path:',out_file)
+
+if not os.path.exists(out_dir):
+    os.mkdir(out_dir)
+
+
+# In[7]:
+
+
+sys.stdout.flush()
+
+
+# In[8]:
+
+
+#import filter
+mapable_intervals = pd.read_csv(mapable_path, sep='\t', header=None)
+
+#keep autosomes only
+chroms = ['chr'+str(m) for m in range(1,23)]
+mapable_intervals = mapable_intervals[mapable_intervals[0].isin(chroms)]
+
+print('chroms:', chroms)
+print('number_of_intervals:',len(mapable_intervals))
+sys.stdout.flush()
+
+
+# In[9]:
+
+
+#get chrom sizes info
+chrom_sizes = pd.read_csv(chrom_sizes_path, sep='\t', header=None)
+
+#also keep as a dict
+chrom_size_dict = chrom_sizes.set_index(0).to_dict()[1]
+
+
+# In[10]:
+
+
+#import the ref_seq
+ref_seq=pysam.FastaFile(ref_seq_path)
+
+
+# In[11]:
+
+
+#create the GC frequencies dict
+GC_dict = {}
+
+GC_dict={}
+for num_GC in range(0,length+1):
+    GC_dict[num_GC]=0
+
+
+# In[12]:
+
+
+start_time = time.time()
+
+k = length #just keeping this compatable with the previous version
+
+for i in range(len(mapable_intervals)):
+    chrom = mapable_intervals.iloc[i][0]
+    start = mapable_intervals.iloc[i][1]
+    end = mapable_intervals.iloc[i][2]
+    if i%5000==0:
+        print('interval',i,':',chrom,start,end,'seconds:',np.round(time.time()-start_time))
+        sys.stdout.flush()
+    #adjust the start and end so it includes all fragments that overlap the interval 
+    adjusted_start = start-k
+    adjusted_end = end+k
+    
+    if adjusted_start<0:
+        adjusted_start = 0
+    if adjusted_end>chrom_size_dict[chrom]:
+        adjusted_end = chrom_sizes_dict[chrom]
+        print(chrom,chrom_sizes_dict[chrom],'adjusting_end')
+
+    fetched = ref_seq.fetch(chrom,adjusted_start,adjusted_end)
+    fetched = fetched.replace('g','G').replace('c','C').replace('a','A').replace('t','T').replace('n','N')
+    fetched = np.array(list(fetched.replace('G','1').replace('C','1').replace('A','0').replace('T','0').replace('N','2')),dtype=float)
+
+    #swap the 2 for a random 1 or 0 #there has to be a better way to do this but I can't figure it out
+    #the 0 or 1 is required because the sliding window sum algorithm only does integers
+    #unknown nucleotides should be quite rare if the filter is done correctly
+    fetched[fetched==2]=np.random.randint(2) #random integer in range(2) (i.e. 0 or 1)
+
+    n=len(fetched)
+
+    window_sum = int(sum(fetched[:k]))
+    #print(k,len(fetched[:k]),window_sum)
+
+    GC_dict[window_sum]+=1
+    for i in range(n-k):
+        window_sum = int(window_sum - fetched[i] + fetched[i+k])
+        #print(k,window_sum)
+        GC_dict[window_sum]+=1
+
+
+# In[13]:
+
+
+#convert to df and export
+GC_df = pd.DataFrame()
+#save GC dict
+current = pd.Series(GC_dict).reset_index()
+current = current.rename(columns={'index':'num_GC',0:'number_of_fragments'})
+current['length']=length
+current = current[['length','num_GC','number_of_fragments']]
+GC_df = GC_df.append(current, ignore_index=True)
+GC_df.to_csv(out_file,sep='\t',index=False)
+
+
+# In[14]:
+
+
+print('done')
+
+
+# In[ ]:
+
+
+
+
diff --git a/griffin/scripts/griffin_calc_coverage.py b/griffin/scripts/griffin_calc_coverage.py
new file mode 100755
index 0000000..3fec814
--- /dev/null
+++ b/griffin/scripts/griffin_calc_coverage.py
@@ -0,0 +1,758 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[ ]:
+
+
+import os
+import sys
+import argparse
+import pandas as pd
+import pysam
+import numpy as np
+import time
+from scipy.signal import savgol_filter
+import yaml 
+from multiprocessing import Pool
+
+# import warnings
+# warnings.filterwarnings('error')
+
+
+# In[ ]:
+
+
+# from matplotlib import pyplot as plt
+# %matplotlib inline
+
+# #sample specific params for testing
+# #ER_pos_merged:
+# sample_name = 'HD45.ctDNA.WGS.FC19269447'
+# bam_path = '/fh/scratch/delete90/ha_g/realigned_bams/cfDNA_deepWGS_hg38/deepWGS_fastq_to_bam_paired_snakemake/results/HD45.ctDNA.WGS.FC19269447/HD45.ctDNA.WGS.FC19269447_recalibrated.bam'
+# GC_bias_path = '/fh/fast/ha_g/user/adoebley/projects/griffin_paper/GC_correction/MBC_GC_correction/results/repeat_masker.mapable.k50.Umap.hg38/GC_bias/HD45.ctDNA.WGS.FC19269447.GC_bias.txt'
+# background_normalization = 'none'
+# ref_seq_path = '/fh/fast/ha_g/grp/reference/GRCh38/GRCh38.fa'
+
+# # #additional params for testing
+# sites_yaml = '/fh/fast/ha_g/user/adoebley/projects/griffin_paper/tests/test_site_lists/test_sites_locations.yaml'
+# results_dir = 'tmp'
+
+# chrom_col = 'Chrom'
+# chroms = ['chr'+str(m) for m in np.arange(1,23)]
+# norm_window = [-5000, 5000] #for testing
+# plot_window = [-500, 1000]#for testing
+# fragment_length = 165
+
+# step = 15
+# sz_range = [100, 200]
+# map_q = 20
+# strand_col = 'Strand'
+
+# individual = 'False'
+# smoothing = 'True'
+
+# number_of_sites = 1000
+# sort_by = 'Chrom'
+# #sort_by = 'peak.count'
+# ascending = 'False'
+
+# CPU = 4
+# erase_intermediates = 'True'
+
+# debugging = True
+
+
+# In[ ]:
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--sample_name', help='name of sample', required=True)
+parser.add_argument('--bam', help='bam file', required=True)
+parser.add_argument('--GC_bias', help='GC bias info', required=True)
+parser.add_argument('--background_normalization', help='None or local', required=True)
+parser.add_argument('--reference_genome',help = 'path to the reference genome',required=True)
+
+parser.add_argument('--sites_yaml', help='.bed file of sites', required=True)
+parser.add_argument('--results_dir', help='directory for coverage_data', required=True)
+
+parser.add_argument('--chrom_column',help='name of column containing chromosome number', default='Chrom')
+parser.add_argument('--chroms', help='chroms to include when selecting sites', nargs='*', default=['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr21', 'chr22'])
+parser.add_argument('--norm_window',help='start and end of the window to be used for normalization',nargs=2, type=int, default=(-5000,5000))
+parser.add_argument('--plot_window',help='start and end of window to be plotted',nargs=2, type=int, default=(-1000,1000))
+parser.add_argument('--fragment_length',help='length of fragment (in bp) for which GC will be calculated, default 165',type=int, default=165)
+
+parser.add_argument('--step',help='step size when calculating coverage', type=int, default=5)
+parser.add_argument('--size_range',help='acceptable size range for fragments (to filter out genomic contamination)',nargs=2, type=int, default=(0,500))
+parser.add_argument('--map_quality',help='minimum mapping quality', type=int, default=60)
+parser.add_argument('--strand_column',help='name of column containing the strand (+ or -)', default='Strand')
+
+parser.add_argument('--individual',help='save individual site coverage. TRUE WILL RESULT IN HUGE OUTPUT FILES. (True/False)',default='False', required = True)
+parser.add_argument('--smoothing',help='whether to use a savgol filter to smooth sites (True/False)', required = True)
+
+parser.add_argument('--num_sites',help='number of sites to analyze', default='NA')
+parser.add_argument('--sort_by',help='how to select the sites to analyze', default='none')
+parser.add_argument('--ascending',help='whether to sort in ascending or descending order when selecting sites', default='NA')
+
+parser.add_argument('--cpu',help='cpu available for parallelizing', type = int, required = True)
+parser.add_argument('--erase_intermediates',help='whether to erase intermediate files to save space', type = str, default = 'True')
+
+
+args = parser.parse_args()
+
+
+sample_name=args.sample_name
+bam_path=args.bam
+GC_bias_path=args.GC_bias
+background_normalization = args.background_normalization
+ref_seq_path = args.reference_genome
+
+sites_yaml=args.sites_yaml
+results_dir=args.results_dir
+
+chrom_col=args.chrom_column
+chroms = args.chroms
+norm_window =args.norm_window
+plot_window=args.plot_window
+fragment_length=args.fragment_length
+
+step=args.step
+sz_range=args.size_range
+map_q=args.map_quality
+strand_col=args.strand_column
+
+individual=args.individual
+smoothing = args.smoothing
+
+number_of_sites=args.num_sites
+sort_by=args.sort_by
+ascending=args.ascending
+
+CPU = args.cpu
+erase_intermediates = args.erase_intermediates
+
+debugging = False
+
+
+# In[ ]:
+
+
+if ascending.lower()=='false':
+    ascending=False
+elif ascending.lower()=='true':
+    ascending=True
+else:
+    ascending='none'
+
+print('\narguments provided:')
+
+print('\tsample_name = "'+sample_name+'"')
+print('\tbam_path = "'+bam_path+'"')
+print('\tGC_bias_path = "'+GC_bias_path+'"')
+print('\tbackground_normalization = "'+background_normalization+'"')
+print('\tref_seq_path = "'+ref_seq_path+'"')
+#print('\ttumor_fraction =',tumor_fraction)
+
+print('\tsites_yaml = "'+sites_yaml+'"')
+print('\tresults_dir = "'+results_dir+'"')
+
+print('\tchrom_col = "'+chrom_col+'"')
+print('\tchroms = ',chroms)
+print('\tnorm_window = ', norm_window)
+norm_window=[int(np.ceil(norm_window[0]/step)*step),int(np.floor(norm_window[1]/step)*step)] #round to the nearest step inside the window
+print('\t#norm_window rounded to step',norm_window)
+print('\tplot_window = '+str(plot_window))
+plot_window=[int(np.ceil(plot_window[0]/step)*step),int(np.floor(plot_window[1]/step)*step)] #round to the nearest step inside the window
+print('\t#plot_window rounded to step:',plot_window)
+print('\tfragment_length =',fragment_length)
+fragment_length=int(np.ceil(fragment_length/step)*step) #round fragment length to the nearest step
+print('\t#fragment_length_rounded_up_to_step:',fragment_length)
+
+print('\tstep =',step)
+print('\tsz_range =',sz_range)
+print('\tmap_q =',map_q)
+print('\tstrand_col = "'+strand_col+'"')
+
+print('\tindividual = "'+individual+'"')
+if smoothing.lower()=='true' or smoothing.lower()=='false':
+    print('\tsmoothing = "'+smoothing+'"')
+else:
+    print('smoothing must be True or False! :',smoothing)
+    sys.exit()
+
+print('\tnumber_of_sites = "'+str(number_of_sites)+'"')
+print('\tsort_by = "'+sort_by+'"')
+print('\tascending = "'+str(ascending)+'"')
+
+print('\tCPU =',CPU)
+print('\n')
+sys.stdout.flush()
+
+
+# In[ ]:
+
+
+#define global parameters and open global files
+
+#set up global variables
+norm_columns = np.arange(norm_window[0],norm_window[1],step)
+plot_columns = np.arange(plot_window[0],plot_window[1],step)
+
+# #import the site files
+with open(sites_yaml,'r') as f:
+    site_files = yaml.safe_load(f)
+site_files = site_files['site_files']
+site_files = [[key,site_files[key]] for key in site_files.keys()]
+to_do_list = site_files
+
+########################################
+#GET GC BIAS
+########################################
+#open the GC_bias file 
+GC_bias = pd.read_csv(GC_bias_path, sep='\t')
+
+#get rid of extremely low GC bias values
+#these fragments will now be excluded 
+#these fragments are extremely rare so it is difficult to get a good estimate of GC bias
+GC_bias['smoothed_GC_bias'] = np.where(GC_bias['smoothed_GC_bias']<0.05,np.nan,GC_bias['smoothed_GC_bias'])
+
+GC_bias = GC_bias[['length','num_GC','smoothed_GC_bias']]
+GC_bias = GC_bias.set_index(['num_GC','length']).unstack()
+
+#convert to a dictionary
+GC_bias = GC_bias.to_dict()
+
+#get rid of values where the num_GC is greater than the length (included due to the way I made the dict)
+GC_bias2 = {}
+for key in GC_bias.keys():
+    length = key[1]
+    GC_bias2[length] = {}
+    for num_GC in range(0,length+1):
+        bias = GC_bias[key][num_GC]
+        GC_bias2[length][num_GC]=bias
+GC_bias = GC_bias2 
+del(GC_bias2)
+
+
+# step_1_import_sites
+
+# In[ ]:
+
+
+def step_1_import_sites(sites_file,site_name): #number of sites needed so it can be changed to an int
+    all_sites=pd.read_csv(sites_file,sep='\t')
+    print(site_name,'total sites',len(all_sites))
+    
+    #throw out sites that aren't on the selected chroms
+    all_sites = all_sites[all_sites[chrom_col].isin(chroms)]
+    print(site_name,'sites on selected chromosomes',len(all_sites))
+    
+    #select the sites to use if specified
+    if sort_by.lower()=='none': #if using all sites 
+        print(site_name,'processing all '+str(len(all_sites))+' sites')
+    
+    else: #othewise sort by the specified column
+        print(site_name,'sorting by',sort_by,'and selecting the top',number_of_sites,',ascending:',ascending)
+        print(site_name,sort_by,'initial range: ',min(all_sites[sort_by]),'to',max(all_sites[sort_by]))
+        all_sites=all_sites.sort_values(by=sort_by,ascending=ascending).reset_index(drop=True)#sort and reset index
+        all_sites=all_sites.iloc[0:int(number_of_sites)]
+        print(site_name,sort_by,'range after sorting: ',min(all_sites[sort_by]),'to',max(all_sites[sort_by]))
+
+    #add a site_name column
+    all_sites['site_name']=site_name
+    #add a sample column
+    all_sites['sample']=sample_name
+    #add background normalizatino
+    all_sites['background_normalization']=background_normalization
+    
+    #split the list of all sites into groups for quicker processing
+    max_chunk_size=500
+    n_chunks=np.ceil(len(all_sites)/max_chunk_size)
+    site_groups=np.array_split(all_sites.index.values,n_chunks)
+    
+    return(all_sites, site_groups)
+
+
+# step_2_collect_coverage
+
+# In[ ]:
+
+
+def collect_fragments(sites,window_start,window_end,direction):
+    #open the bam file for each pool worker (otherwise individual pool workers can close it)
+    bam_file = pysam.AlignmentFile(bam_path)
+    
+    #open the ref seq
+    ref_seq=pysam.FastaFile(ref_seq_path)
+
+    #extend the window by half the max fragment length in each direction
+    max_adjustment = int(np.ceil(sz_range[1]/2))
+    adjusted_start = window_start-max_adjustment
+    adjusted_end = window_end+max_adjustment
+    
+    window_columns = np.arange(window_start,window_end,step)
+    
+    #make sure Chrom is a string
+    sites[chrom_col]=sites[chrom_col].astype(str)
+            
+    cov_pd={} #set up dictionary to hold data
+    GC_cov_pd = {} #set up a dictionary to hold GC corrected data
+
+
+    for endpoint in ['start','end']:
+        cov_pd[endpoint]=pd.DataFrame(columns = window_columns)
+        GC_cov_pd[endpoint] = pd.DataFrame(columns = window_columns)
+        
+    #workaround for NCBI style formatted bams (1,2,3 etc) with UCSC sites
+    if len(sites)>0:
+        test_chrom=sites.iloc[0][chrom_col]
+        try:
+            bam_file.get_reference_length(test_chrom)
+            NCBI=False
+        except:
+            bam_file.get_reference_length(test_chrom.split('chr')[-1])
+            NCBI=True
+     
+    #run analysis on each site
+    for i in range(len(sites)): #for each location in the genome
+        if i%100==0: 
+            print (sites.iloc[0]['site_name'],i,time.time()-start_time)
+            sys.stdout.flush()
+
+        #############
+        #make dicts to hold output for this individual site
+        #############
+        #for now, catch all reads that start or end within a fragment length of the window
+        cov_dict = {}
+        GC_cov_dict = {}
+        for item in ['start','end']:
+            cov_dict[item]={m:0 for m in range(adjusted_start,adjusted_end)} 
+            GC_cov_dict[item]={m:0 for m in range(adjusted_start,adjusted_end)} 
+                
+        ####################
+        #fetch reads
+        ####################
+        #identify the analysis window for that site
+        chrom = sites.iloc[i][chrom_col]
+        position = sites.iloc[i]['position']
+        
+        if NCBI:
+            chrom=chrom.split('chr')[-1]
+                    
+        #these regions have been filtered so they should all be fetchable
+        fetched=bam_file.fetch(contig=chrom, start=adjusted_start+position, stop=adjusted_end+position) #fetch reads that map to the region of interest
+        
+        ########################
+        #count coverage
+        ########################
+        for read in fetched:
+            #filter out reads
+            if abs(read.template_length)>=sz_range[0] and abs(read.template_length)<=sz_range[1]             and read.is_paired==True and read.mapping_quality>=map_q and read.is_duplicate==False and read.is_qcfail==False:
+                read_start=read.reference_start-position #read start (with respect to the position of the current region of interest)
+                
+                #find the place where the fragment starts or ends with respect to the window
+                if read.is_reverse==False and read.template_length>0:
+                    fragment_start = read_start
+                    fragment_end = read_start+read.template_length
+                    read_type='start'
+                    
+                    #adjusted_loc = fragment_start+int(np.ceil(fragment_length/2)) #where to count the coverage
+                    midpoint = int(np.floor((fragment_start+fragment_end)/2))
+                                        
+                elif read.is_reverse==True and read.template_length<0:
+                    read_len=read.reference_length #this is the read length only (for adjusting the start position)
+                    fragment_start = read_start+read_len+read.template_length
+                    fragment_end = read_start+read_len
+                    read_type='end'
+                    
+                    #adjusted_loc = fragment_end-int(np.ceil(fragment_length/2)) #where to count the coverage
+                    midpoint = int(np.floor((fragment_start+fragment_end)/2))
+                           
+                else:
+                    continue
+                    
+                #get the fragment seq for GC content
+                #seq_string = (ref_seq.fetch(chrom,fragment_start+position,fragment_end+position)).upper() #for printing
+                
+                fragment_seq = (ref_seq.fetch(chrom,fragment_start+position,fragment_end+position)).upper()
+                fragment_seq = list(fragment_seq.replace('T','0').replace('A','0').replace('C','1').replace('G','1').replace('N',str(np.random.randint(0,2))))
+                fragment_seq = [int(m) for m in fragment_seq]
+                
+                ##check work
+                ############
+                #print(read_type,read.query_sequence)
+                #if read.is_reverse == False:
+                #    print(read_type,seq_string[0:read.reference_length])
+                #elif read.is_reverse == True:
+                #    print(read_type,seq_string[-read.reference_length:])
+                #print(sum(fragment_seq),len(fragment_seq),read.template_length)
+                #print('\n')
+                ###########
+                
+                #check that the site is in the window          
+                if midpoint>=adjusted_start and midpoint<adjusted_end:
+                    #count the fragment
+                    cov_dict[read_type][midpoint]+=1
+                    
+                    ##get the GC bias
+                    read_GC_content = sum(fragment_seq)
+                    read_GC_bias = GC_bias[abs(read.template_length)][read_GC_content]
+                    
+                    #count the fragment weighted by GC bias
+                    if not np.isnan(read_GC_bias):
+                        GC_cov_dict[read_type][midpoint]+=(1/read_GC_bias)
+   
+                else: #if fragment doesn't fully overlap
+                    continue
+            
+                del(read,midpoint,read_type)
+            
+        ###########################      
+        #after counting all reads at that site
+        ###########################
+        for endpoint in ['start','end']:
+            cov_list = np.array([cov_dict[endpoint][m] for m in range(window_start,window_end)])
+            cov_list = np.sum(cov_list.reshape(-1, step), axis=1) #take the sum of every step
+            cov_list = pd.Series(cov_list, index = window_columns, name=sites.iloc[i].name) #convert to pandas series
+            cov_pd[endpoint]=cov_pd[endpoint].append(cov_list)
+            GC_cov_list=np.array([GC_cov_dict[endpoint][m] for m in range(window_start,window_end)])
+            GC_cov_list=np.sum(GC_cov_list.reshape(-1, step), axis=1) #take the sum of every 5(step) bp
+            GC_cov_list = pd.Series(GC_cov_list, index = window_columns, name=sites.iloc[i].name) #convert to pandas series
+            GC_cov_pd[endpoint]=GC_cov_pd[endpoint].append(GC_cov_list)
+
+    #put into the same format as the output data
+    out_dict = {}
+    for data_type in ['reads','GC_corr']:
+        out_dict[data_type]={}
+        for endpoint in ['start','end']:
+            out_dict[data_type][endpoint]={}    
+    
+    for endpoint in ['start','end']:
+        out_dict['reads'][endpoint][direction] = cov_pd[endpoint]
+        out_dict['GC_corr'][endpoint][direction] = GC_cov_pd[endpoint]
+        
+    return(out_dict)
+
+
+# run full analysis
+
+# In[ ]:
+
+
+if not os.path.exists(results_dir+'/coverage/'):
+        os.mkdir(results_dir+'/coverage/')
+        print('output directory created:',results_dir+'/coverage/')
+
+
+# In[ ]:
+
+
+def run_full_analysis(input_items):
+    site_name,site_file = input_items
+
+    #############################################
+    #check whether the output files already exist
+    #############################################
+    current_out_file=results_dir+'/coverage/'+site_name+'/'+sample_name+'.'+site_name+'.coverage.txt'
+    if os.path.exists(current_out_file):
+        print('done',site_name)
+        return('done')
+    else: #if any file is incomplete
+        print('analyzing:',site_name)
+
+    #make any necessary directorys for output
+    if not os.path.exists(results_dir+'/coverage/'):
+        os.mkdir(results_dir+'/coverage/')
+        print('output directory created:',results_dir+'/coverage/')
+    elif not os.path.exists(results_dir+'/coverage/'+site_name):
+        os.mkdir(results_dir+'/coverage/'+site_name)
+        print('output directory created:',results_dir+'/coverage/'+site_name)
+    else:
+        print('output directory already exists:',results_dir+'/coverage/'+site_name)
+        pass
+
+    ############
+    #set up#####
+    ############
+    all_sites,site_groups = step_1_import_sites(site_file,site_name)
+
+
+    #set up structure for output
+    out_data = {}
+    for data_type in ['reads','GC_corr']:
+        out_data[data_type]={}
+        for endpoint in ['start','end']:
+            out_data[data_type][endpoint]={}
+            for direction in ['fw','rv']:#site direction
+                    out_data[data_type][endpoint][direction]=pd.DataFrame()
+
+    ################################
+    #process each group of sites####
+    ################################ 
+    for group in site_groups:           
+        sites=all_sites.loc[group].copy() #get the sites at the specified indexes
+
+        print(site_name,'processing sites:',sites.index.values[0],'to',sites.index.values[-1])
+        sys.stdout.flush()
+
+        #if site direction is specified, split fw and rv sites
+        if strand_col in sites.columns:
+            fw_sites=sites[~(sites[strand_col]=='-')].copy() #include non-specified sites
+            rv_sites=sites[sites[strand_col]=='-'].copy()
+            print(site_name,'fw,rv',len(fw_sites),len(rv_sites))
+        else: #if direction isn't specified, put all sites into fw sites
+            fw_sites = sites.copy()
+            rv_sites = pd.DataFrame(columns = sites.columns)
+
+        current_fw_data = collect_fragments(fw_sites,norm_window[0],norm_window[1],'fw')
+        current_rv_data = collect_fragments(rv_sites,norm_window[1]*-1,norm_window[0]*-1,'rv')
+
+        #merge current fw data into data frame
+        for data_type in ['reads','GC_corr']:
+            for endpoint in ['start','end']:
+                out_data[data_type][endpoint]['fw'] = out_data[data_type][endpoint]['fw'].append(current_fw_data[data_type][endpoint]['fw'])
+
+        #merge current rv data into data frame
+        for data_type in ['reads','GC_corr']:
+            for endpoint in ['start','end']:
+                out_data[data_type][endpoint]['rv'] = out_data[data_type][endpoint]['rv'].append(current_rv_data[data_type][endpoint]['rv'])
+
+    #flip the reverse sites
+    print(site_name,'flipping reverse sites')
+    sys.stdout.flush()
+
+    for data_type in ['reads','GC_corr']:
+        for endpoint in ['start','end']:
+            new_columns = (out_data[data_type][endpoint]['rv'].columns*-1)-step
+            out_data[data_type][endpoint]['rv'].columns = new_columns
+            out_data[data_type][endpoint]['rv']=out_data[data_type][endpoint]['rv'][np.sort(new_columns)]
+        out_data[data_type]['start']['rv'],out_data[data_type]['end']['rv'] = out_data[data_type]['end']['rv'],out_data[data_type]['start']['rv']
+
+    #merge fw and rv sites into a single data frame
+    for data_type in ['reads','GC_corr']:
+        for endpoint in ['start','end']:
+            out_data[data_type][endpoint] = out_data[data_type][endpoint]['fw'].append(out_data[data_type][endpoint]['rv']) 
+            #sort them back into the original order
+            out_data[data_type][endpoint] = out_data[data_type][endpoint].sort_index()
+
+    #get the total reads per site
+    start_total_reads = out_data['reads']['start'].sum(axis=1)
+    end_total_reads = out_data['reads']['end'].sum(axis=1)
+    GC_start_total_reads = out_data['GC_corr']['start'].sum(axis=1)
+    GC_end_total_reads = out_data['GC_corr']['end'].sum(axis=1)
+
+    #add to metadata data frame
+    all_sites['total_read_starts'] = start_total_reads
+    all_sites['total_read_ends'] = end_total_reads 
+    all_sites['GC_corrected_total_read_starts'] = GC_start_total_reads
+    all_sites['GC_corrected_total_read_ends'] = GC_end_total_reads
+    
+    #normalize the average site to 1
+    for data_type in ['reads','GC_corr']:
+        for endpoint in ['start','end']:
+            #i'm not sure why this if statement is in here, not going to delete it quite yet
+            #if len(out_data[data_type][endpoint])>0:
+            #take the means (ignore nans)
+            mean_value = np.nanmean(out_data[data_type][endpoint].astype(float).values) 
+            #normalize to 1 for an average site 
+            out_data[data_type][endpoint] = out_data[data_type][endpoint]/mean_value
+
+    #normalize individual sites to 1 if background_normalization == 'local'
+    if background_normalization == 'local':
+        print(site_name,'normalizing individual sites to one')
+        for data_type in ['reads','GC_corr']:
+            for endpoint in ['start','end']:
+                mean_data = out_data[data_type][endpoint].values.mean(axis=1,keepdims=True)
+                #replace zero with nan
+                mean_data = np.where(mean_data==0,np.nan,mean_data)
+                out_data[data_type][endpoint] = out_data[data_type][endpoint]/mean_data
+    
+
+    #if not saving individual sites, take the mean
+    if individual.lower()=='false':
+        print(site_name,'taking means')
+        for data_type in ['reads','GC_corr']:
+            for endpoint in ['start','end']:
+                #not sure why 'astype' is needed but it seems to be required    
+                #number of sites will be the same for all data types and endpoints        
+                number_of_sites_used = len(out_data[data_type][endpoint][~(np.isnan(out_data[data_type][endpoint].values.mean(axis=1).astype(float)))])
+                            
+                out_data[data_type][endpoint] = out_data[data_type][endpoint].mean(axis=0)
+                #convert back to dataframe for further processing
+                out_data[data_type][endpoint] = pd.DataFrame(out_data[data_type][endpoint]).T
+        print(site_name,'mean of',number_of_sites_used, 'sites')
+
+    #################
+    #smooth data
+    #################
+    if smoothing.lower()=='true':
+        #savgol window should be approx one fragment length but it must be odd
+        savgol_window=np.floor(fragment_length/step)
+        if savgol_window%2==0:
+            savgol_window=savgol_window+1
+        savgol_window=int(savgol_window)
+
+        print(site_name,'smoothing')
+        sys.stdout.flush()
+
+        for data_type in ['reads','GC_corr']:
+            for endpoint in ['start','end']:
+                out_data[data_type][endpoint][norm_columns]=savgol_filter(out_data[data_type][endpoint][norm_columns], savgol_window, 3)
+    #################
+
+    #keep only the plotting columns
+    for data_type in ['reads','GC_corr']:
+        out_data[data_type]['start'] = out_data[data_type]['start'][plot_columns]
+        out_data[data_type]['end'] = out_data[data_type]['end'][plot_columns]
+
+
+    #if taking the mean of all sites, get metadata for a mean site
+    if individual.lower()=='false':
+        print(site_name,'taking metadata means')
+        metadata = pd.DataFrame(all_sites.iloc[0][['site_name','sample','background_normalization']]).T
+        metadata['total_read_starts'] = np.mean(all_sites['total_read_starts'])
+        metadata['total_read_ends'] = np.mean(all_sites['total_read_ends'])
+        metadata['GC_corrected_total_read_starts'] = np.mean(all_sites['GC_corrected_total_read_starts'])
+        metadata['GC_corrected_total_read_ends'] = np.mean(all_sites['GC_corrected_total_read_ends'])
+        metadata['number_of_sites'] = number_of_sites_used
+        all_sites = metadata
+
+    #merge all the different dataframes
+    final_data = pd.DataFrame()
+    for data_type in ['reads','GC_corr']:
+        for endpoint in ['start','end']:
+            current_data = all_sites.merge(out_data[data_type][endpoint], left_index=True ,right_index=True)
+
+            if data_type == 'GC_corr':
+                current_data['GC_correction']='GC_corrected'
+            elif data_type == 'reads':
+                current_data['GC_correction']='none'
+            current_data['endpoint']=endpoint
+
+            final_data = final_data.append(current_data,ignore_index=True, sort=True)
+
+
+    #rearrange the columns back into a logical order
+    metadata_columns = list(all_sites.columns) + ['GC_correction','endpoint']
+
+    final_data = final_data[metadata_columns+list(plot_columns)]
+
+    #start and end traces should be nearly the same (because they are being used to calculate midpoints)
+    #keep only start for export
+    final_data = final_data[final_data['endpoint']=='start']
+    final_data['endpoint']='midpoint'
+    
+    #export
+    print(site_name,'exporting')
+    sys.stdout.flush()
+    final_data.to_csv(current_out_file,sep='\t',float_format='%.5f', index=False)
+    
+    print('done',site_name)
+    sys.stdout.flush()
+    
+    if debugging == True:
+        print('debugging!')
+        return(final_data)
+    elif debugging == False:
+        pass
+
+
+# In[ ]:
+
+
+#run the analysis 
+
+#start the timer
+start_time=time.time()
+
+p = Pool(processes=CPU) #use the specified number of processes
+p.map(run_full_analysis, to_do_list, 1) #run the analysis on all TFs in TFs_list. Send only one item to each processor at a time.
+
+
+# In[ ]:
+
+
+print('merging all sites')
+start_time = time.time()
+
+# merge results together and export
+merged_out_file = results_dir+'/coverage/all_sites/'+sample_name+'.all_sites.coverage.txt'
+if not os.path.exists(results_dir+'/coverage/all_sites'): #make any necessary directories for output
+    os.mkdir(results_dir+'/coverage/all_sites')
+
+merged_output=pd.DataFrame()
+for j,line in enumerate(to_do_list):
+    if j%50 == 0:
+        print(line[0],time.time()-start_time)
+        sys.stdout.flush()
+    site_name = line[0]
+    indiv_out_file = results_dir+'/coverage/'+site_name+'/'+sample_name+'.'+site_name+'.coverage.txt'
+    current_data = pd.read_csv(indiv_out_file,sep='\t')
+    merged_output = merged_output.append(current_data,ignore_index=True,sort=True)
+
+merged_output.columns
+merged_output.to_csv(merged_out_file,sep='\t',index=False)
+
+if erase_intermediates.lower()=='true':
+    for j,line in enumerate(to_do_list):
+        site_name = line[0]
+        indiv_out_file = results_dir+'/coverage/'+site_name+'/'+sample_name+'.'+site_name+'.coverage.txt'
+        os.remove(indiv_out_file)
+
+print('done with merge')
+
+
+# In[ ]:
+
+
+# #cell for testing
+# #start the timer
+# start_time=time.time()
+
+# #for testing use a single CPU
+# final_data=run_full_analysis(to_do_list[0])
+
+
+# In[ ]:
+
+
+# ##plot for testing
+# current = final_data[(final_data['endpoint']=='midpoint') & (final_data['GC_correction']=='GC_corrected')][plot_columns].mean()
+# plt.plot(plot_columns,current)
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
diff --git a/griffin/scripts/griffin_filter_sites.py b/griffin/scripts/griffin_filter_sites.py
new file mode 100755
index 0000000..7d1a2ca
--- /dev/null
+++ b/griffin/scripts/griffin_filter_sites.py
@@ -0,0 +1,293 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[ ]:
+
+
+#import stuff 
+import pandas as pd
+import numpy as np
+import math
+import time
+import pyBigWig
+import sys
+import argparse
+
+
+# In[ ]:
+
+
+# #TSS panel test
+# # #define paths and parameters
+# in_file='/fh/fast/ha_g/user/adoebley/data/SCLC_targeted_panel_sites/all_sites/TSS_targets.bed'
+# in_file_name = 'TSS_targets'
+# out_dir='./'
+# mapability_file='../../downloads/genome/k50.Umap.MultiTrackMappability.hg38.bw'
+
+# # #define the columns in the TFBS files
+# chrom_col='Chrom'
+# start_col='TSS'
+# end_col='TSS'
+# strand_col='Strand'
+# chroms = ['chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr21', 'chr22','chrX','chrY']
+
+# window_values=(-100,255) #norm window
+# targeted_window_columns=('window_start','window_end')
+# targeted_panel = 'True'
+
+# threshold = 0.95
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('-i','--in_file', help='file of TFBS to annotate', required=True)
+parser.add_argument('--name', help='in file name', required=True)
+parser.add_argument('-o','--out_dir', help='directory for output files', required=True)
+parser.add_argument('-m','--mapability_file',help='.bw file with mapability values e.g. UCSC hg38 k50.Umap.MultiTrackMappability.bw', required=True)
+
+parser.add_argument('-c','--chrom_column',help='name of column containing chromosome number', default='Chrom')
+parser.add_argument('-s','--start_column',help='name of column containing the start of the TFBS (this will be averaged with the end to identify the TFBS center)', default='Start')
+parser.add_argument('-e','--end_column',help='name of column containing the end of the TFBS (this will be averaged with the start to identify the TFBS center)', default='End')
+parser.add_argument('--strand_column',help='name of column containing the strand (+ or -)', default='Strand')
+parser.add_argument('--chroms', help='chromosomes to include when selecting sites', nargs='*')
+parser.add_argument('--window_values',help='start and end of window to be analyzed around each TFBS',nargs=2, type=int, required=True)
+parser.add_argument('--targeted_panel',help="whether the sites are from a targeted panel",default='False')
+parser.add_argument('--targeted_window_columns',help='column names that specify the start and end of the window for a targeted region',nargs=2,default=('NA','NA'))
+
+parser.add_argument('--threshold',help='define cutoff for high mapability', default=0.95, type=float)
+
+args = parser.parse_args()
+
+in_file=args.in_file
+in_file_name = args.name
+out_dir=args.out_dir.rstrip('/')+'/'
+mapability_file=args.mapability_file
+
+chrom_col=args.chrom_column
+start_col=args.start_column
+end_col=args.end_column
+strand_col = args.strand_column
+chroms = args.chroms
+
+window_values=args.window_values
+targeted_panel=args.targeted_panel
+targeted_window_columns=args.targeted_window_columns
+
+threshold=args.threshold
+
+
+# In[ ]:
+
+
+#set up parameters
+print('\narguments provided:')
+print('\tin_file = "'+in_file+'"')
+print('\tin_file_name = "'+in_file_name+'"')
+print('\tout_dir = "'+out_dir+'"')
+print('\tmapability_file = "'+mapability_file+'"')
+
+print('\tchrom_col = "'+chrom_col+'"')
+print('\tstart_col = "'+start_col+'"')
+print('\tend_col = "'+end_col+'"')
+print('\tstrand_col = "'+strand_col+'"')
+print('\tchroms = ',chroms)
+
+print('\twindow_values=',window_values)
+print('\ttargeted_panel = "'+targeted_panel+'"')
+
+window_start_column=targeted_window_columns[0]
+window_end_column=targeted_window_columns[1]
+print('\ttargeted_window_columns = ',targeted_window_columns)
+
+print('\tthreshold = '+str(threshold))
+
+print('\n')
+
+
+# In[ ]:
+
+
+#import info about the sites
+sites=pd.read_csv(in_file, sep='\t')
+print('number_of_sites:',len(sites)) 
+sys.stdout.flush()
+
+#identify the TF position
+sites['position'] = (sites[start_col]+sites[end_col])/2
+sites['position'] = np.floor(sites['position'])
+sites['position'] = sites['position'].astype(int)
+
+#identify the window around each site to be used for mapability
+sites['norm_window_start']=sites['position']+window_values[0]
+sites['norm_window_end']=sites['position']+window_values[1]
+
+#identify the window around reverse sites if direction is specified
+if strand_col in sites.columns:
+    print('flipping_reverse_sites')
+    rv_sites = sites[sites[strand_col]=='-'].copy()
+    rv_sites['norm_window_start']=rv_sites['position']-window_values[1]
+    rv_sites['norm_window_end']=rv_sites['position']-window_values[0]
+    sites[sites[strand_col]=='-'] = rv_sites.copy() #replace the rv sites with the flipped sites
+    del(rv_sites)
+    
+     
+#drop any sites that don't span the full window
+if targeted_panel.lower()=='true': 
+    
+    sites['relative_window_start']=sites[window_start_column]-sites['position']
+    sites['relative_window_end']=sites[window_end_column]-sites['position']
+    
+    if strand_col in sites.columns: #flip the orientation of the window for reverse sites
+        print('flipping_reverse_sites')
+        rv_sites = sites[sites[strand_col]=='-'].copy()
+        rv_sites[['relative_window_start','relative_window_end']]=rv_sites[['relative_window_end','relative_window_start']]*-1
+        sites[sites[strand_col]=='-'] = rv_sites #replace the rv sites with the flipped sites
+        
+    sites=sites[(sites['relative_window_start']<=window_values[0]) & (sites['relative_window_end']>=window_values[1])]
+    #drop the new columns as they are no longer needed
+    sites = sites.drop(columns=['relative_window_start','relative_window_end'])    
+
+    print('sites that span the window:',len(sites))
+    
+
+
+# In[ ]:
+
+
+if strand_col in sites.columns: 
+    print('fw_sites:',len(sites[sites[strand_col]=='+']))
+    print('rv_sites:',len(sites[sites[strand_col]=='-']))
+
+
+# In[ ]:
+
+
+sites = sites[sites[chrom_col].isin(chroms)]
+print('sites_after_removing_non_specified_chroms:',len(sites))
+
+
+# In[ ]:
+
+
+mapability = pyBigWig.open(mapability_file)
+chroms =  sites[chrom_col].unique()
+for chrom in chroms:
+    try:
+        mapability.values(chrom,100000,100010)
+    except:
+        print('###\n###\nChromosome names dont match chromosomes in mapability file, check chromosome formatting\n###\n###\n')
+        sys.exit(1)
+        
+
+
+# In[ ]:
+
+
+#run analysis
+start_time=time.time()
+
+print(in_file_name)
+sys.stdout.flush()
+
+#import the mapability data
+mapability = pyBigWig.open(mapability_file)
+
+#make list to hold mean values
+mean_values=[]
+
+#get 1% intervals for tracking progress
+one_percent=int(len(sites)/100)
+if one_percent==0: #if there are less than 100 sites
+    one_percent=1
+
+window_len = window_values[1]-window_values[0]
+
+for i in range(len(sites)):
+    if i%one_percent==0:
+        print(i,time.time()-start_time)
+        sys.stdout.flush()
+    #get the location of the site (chromosome and center of site)
+    chrom = sites.iloc[i][chrom_col]
+    
+    position=sites.iloc[i]['position']
+    start=sites.iloc[i]['norm_window_start']
+    end=sites.iloc[i]['norm_window_end']
+    
+#     print(sites.iloc[i][strand_col],start-position)
+    #fetch the values for that site
+    try:
+        fetched=mapability.values(chrom, start, end)
+        #convert nan values in the data to zeros and convert fetched to np array
+        fetched=np.nan_to_num(fetched) 
+        
+    except: #if the site can't be fetched
+        fetched=[0 for m in range(window_len)]
+        #convert the  data to an np array
+        fetched=np.array(fetched)
+
+    #reshape the data for adding to the array for all sites
+    try:
+        fetched=fetched.reshape(1,window_len)
+
+    except: #if the full site wasn't fetched
+        fetched=np.array([0 for m in range(window_len)])
+        fetched=fetched.reshape(1,window_len)
+
+    #add the fetched data to the array of data for all sites
+    mean_values.append(fetched.mean())
+
+    del (fetched,position,chrom)
+
+#drop the start and end columns because these will be recalculated based on step in future analyses
+sites = sites.drop(columns=['norm_window_start','norm_window_end'])    
+print(time.time()-start_time)
+sys.stdout.flush()
+
+
+# In[ ]:
+
+
+#calculate the mean value per site
+sites['mean_mapability']=mean_values
+
+overall_TF_mapability={'total_sites':len(sites)}
+
+#split_list_of_sites for export and count list lengths
+
+high_sites=sites[(sites['mean_mapability']>=threshold)]
+low_sites=sites[(sites['mean_mapability']<threshold)]
+high_sites.to_csv(out_dir+in_file_name+'.high_mapability'+'.txt', sep='\t', index=False)
+low_sites.to_csv(out_dir+in_file_name+'.low_mapability'+'.txt', sep='\t', index=False)
+
+overall_TF_mapability['high']=len(high_sites)
+overall_TF_mapability['low']=len(low_sites)
+
+overall_TF_mapability['mean_mapability']=sites['mean_mapability'].mean()
+
+overall_TF_mapability=pd.DataFrame(overall_TF_mapability, index=[0])
+print(overall_TF_mapability)
+
+
+overall_TF_mapability.to_csv(out_dir+in_file_name+'.counts.txt', sep='\t', index=False)
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
diff --git a/griffin/scripts/griffin_plot.py b/griffin/scripts/griffin_plot.py
new file mode 100755
index 0000000..ca65638
--- /dev/null
+++ b/griffin/scripts/griffin_plot.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[ ]:
+
+
+import os
+from matplotlib import pyplot as plt
+import pandas as pd
+import numpy as np
+import time
+import sys
+import argparse
+
+
+# In[ ]:
+
+
+# #for ipynb
+# %matplotlib inline
+
+# in_dir = '../../../demo/griffin_nucleosome_profiling/results/coverage/all_sites/'
+
+# in_files = []
+# for file in os.listdir(in_dir):
+#     in_files.append(in_dir+'/'+file)
+    
+# del(in_dir)
+
+# #actual arguments
+# in_files = in_files
+
+# plot_window = [-500,500]
+# step = 15
+
+# individual = 'True'
+# out_dir = './tmp'
+
+
+# In[ ]:
+
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--in_files', help='coverge files for all samples', nargs = '*', required=True)
+
+parser.add_argument('--plot_window',help='start and end of window to be plotted',nargs=2, type=int, default=(-1000,1000))
+parser.add_argument('--step',help='step size when calculating coverage', type=int, default=5)
+
+parser.add_argument('--individual',help='if individual sites were saved in previous steps. (True/False)',default='False')
+parser.add_argument('--out_dir',help='folder for results (new plots folder will be generated inside)',required=True)
+
+args = parser.parse_args()
+
+in_files = args.in_files
+
+plot_window=args.plot_window
+step = args.step
+
+individual = args.individual
+out_dir = args.out_dir
+
+
+# In[ ]:
+
+
+print('\nArguments provided:')
+print('\tin_files = ',in_files)
+
+print('\tplot_window = '+str(plot_window))
+plot_window=[int(np.ceil(plot_window[0]/step)*step),int(np.floor(plot_window[1]/step)*step)] #round to the nearest step inside the window
+print('\t#plot_window rounded to step:',plot_window)
+
+print('\tstep =',step)
+print('\tindividual = "'+individual+'"')
+print('\tout_dir = "'+out_dir+'"')
+
+
+# In[ ]:
+
+
+#set up global variables
+plot_columns = np.arange(plot_window[0],plot_window[1],step)
+str_plot_columns = [str(m) for m in plot_columns]
+
+print(plot_columns)
+
+
+# In[ ]:
+
+
+start_time = time.time()
+data = pd.DataFrame()
+for file in in_files:
+    new_file = pd.read_csv(file,sep='\t')
+    if individual.lower()=='true':
+        print('taking means',file)
+        for site_name in new_file['site_name'].unique():
+            print(site_name, time.time()-start_time)
+            sys.stdout.flush()
+            for normalization in ['none','GC_corrected']:
+                current = new_file[(new_file['site_name']==site_name) & (new_file['GC_correction']==normalization)]
+                current = current[str_plot_columns].mean()
+                current['site_name']=site_name
+                current['GC_correction']=normalization
+                current['sample']=new_file['sample'].iloc[0]
+                data = data.append(current, ignore_index=True)
+    else:
+        current=new_file
+        data = data.append(current, ignore_index=True)
+
+
+# In[ ]:
+
+
+#generate plots
+for site_name in data['site_name'].unique():
+    fig,axes = plt.subplots(1,2,figsize=(10,3.5), sharey = 'row')
+    for i,normalization in enumerate(['none','GC_corrected']):
+        ax = axes[i]
+        for sample in data['sample'].unique():
+            current = data[(data['sample']==sample) & (data['site_name']==site_name) & (data['GC_correction']==normalization)]
+            ax.plot(plot_columns, current[str_plot_columns].T, label=sample)
+            ax.tick_params(labelleft=True)
+        ax.set_title(site_name+' '+normalization)
+
+    axes[0].set_ylabel('normalized coverage')
+    axes[0].set_xlabel('distance from site')
+    axes[1].set_xlabel('distance from site')
+
+    if len(data['sample'].unique())<15:
+        axes[1].legend(bbox_to_anchor=[1,1],loc = 'upper left')
+    else:
+        axes[1].legend(bbox_to_anchor=[1,1],loc = 'upper left',ncol=2)
+
+    fig.tight_layout()
+    plt.savefig(out_dir+'/plots/'+sample+'_'+site_name+'.pdf')
+    plt.close('all')
+
+
+# In[ ]:
+
+
+#if info about individual sites was kept, the averaging process can take quite a while. Save for later use. 
+if individual.lower()=='true':
+    data.to_csv(out_dir+'/plots/'+'mean_data_for_all_sites.txt', sep='\t', index=False)
+    
+
+
+# In[ ]:
+
+
+
+
+
+# In[ ]:
+
+
+
+
diff --git a/griffin/site_configs/DHS_sites.yaml b/griffin/site_configs/DHS_sites.yaml
new file mode 100755
index 0000000..59a4a61
--- /dev/null
+++ b/griffin/site_configs/DHS_sites.yaml
@@ -0,0 +1,18 @@
+site_files:
+  Cancer_epithelial: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Cancer_epithelial_DHS.top_10000.txt
+  Cardiac: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Cardiac_DHS.top_10000.txt
+  Digestive: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Digestive_DHS.top_10000.txt
+  Lymphoid: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Lymphoid_DHS.top_10000.txt
+  Musculoskeletal: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Musculoskeletal_DHS.top_10000.txt
+  Myeloid_erythroid: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Myeloid_erythroid_DHS.top_10000.txt
+  Neural: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Neural_DHS.top_10000.txt
+  Organ_devel_renal: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Organ_devel_renal_DHS.top_10000.txt
+  Placental_trophoblast: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Placental_trophoblast_DHS.top_10000.txt
+  Primitive_embryonic: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Primitive_embryonic_DHS.top_10000.txt
+  Pulmonary: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Pulmonary_devel_DHS.top_10000.txt
+  Renal_cancer: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Renal_cancer_DHS.top_10000.txt
+  Stromal_A: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Stromal_A_DHS.top_10000.txt
+  Stromal_B: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Stromal_B_DHS.top_10000.txt
+  Tissue_invariant: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Tissue_invariant_DHS.top_10000.txt
+  Vascular_endothelial: /cluster/projects/pughlab/bin/Griffin/sites/DHS/Vascular_endothelial_DHS.top_10000.txt
+  stop: /cluster/projects/pughlab/bin/Griffin/stop.txt
diff --git a/griffin/site_configs/Immune_Calderon_sites.yaml b/griffin/site_configs/Immune_Calderon_sites.yaml
new file mode 100755
index 0000000..95fc6e8
--- /dev/null
+++ b/griffin/site_configs/Immune_Calderon_sites.yaml
@@ -0,0 +1,42 @@
+site_files:
+  CD34_Bone_Marrow: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/CD34_Bone_Marrow_hg38.high_mapability.txt
+  CD4isp_thymocytes: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/CD4isp_thymocytes_hg38.high_mapability.txt
+  CD8pos_T: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/CD8pos_T_hg38.high_mapability.txt
+  Central_memory_CD8pos_T: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Central_memory_CD8pos_T_hg38.high_mapability.txt
+  CLP: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/CLP_hg38.high_mapability.txt
+  CMP: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/CMP_hg38.high_mapability.txt
+  double_negative_thymocytes: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/double_negative_thymocytes_hg38.high_mapability.txt
+  double_postive_thymocytes: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/double_postive_thymocytes_hg38.high_mapability.txt
+  Effector_CD4pos_T: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Effector_CD4pos_T_hg38.high_mapability.txt
+  Effector_memory_CD8pos_T: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Effector_memory_CD8pos_T_hg38.high_mapability.txt
+  Ery: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Ery_hg38.high_mapability.txt
+  Follicular_T_Helper: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Follicular_T_Helper_hg38.high_mapability.txt
+  Gamma_delta_T: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Gamma_delta_T_hg38.high_mapability.txt
+  GMP: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/GMP_hg38.high_mapability.txt
+  HSC: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/HSC_hg38.high_mapability.txt
+  Immature_NK: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Immature_NK_hg38.high_mapability.txt
+  LMPP: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/LMPP_hg38.high_mapability.txt
+  mature_CD4_thymocytes: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/mature_CD4_thymocytes_hg38.high_mapability.txt
+  mature_CD8_thymocytes: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/mature_CD8_thymocytes_hg38.high_mapability.txt
+  Mature_NK: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Mature_NK_hg38.high_mapability.txt
+  Mem_B: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Mem_B_hg38.high_mapability.txt
+  Memory_NK: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Memory_NK_hg38.high_mapability.txt
+  Memory_Teffs: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Memory_Teffs_hg38.high_mapability.txt
+  Memory_Tregs: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Memory_Tregs_hg38.high_mapability.txt
+  MEP: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/MEP_hg38.high_mapability.txt
+  Monocytes: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Monocytes_hg38.high_mapability.txt
+  MPP: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/MPP_hg38.high_mapability.txt
+  Myeloid_DCs: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Myeloid_DCs_hg38.high_mapability.txt
+  Naive_B: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Naive_B_hg38.high_mapability.txt
+  Naive_CD8_T: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Naive_CD8_T_hg38.high_mapability.txt
+  Naive_Teffs: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Naive_Teffs_hg38.high_mapability.txt
+  Naive_Tregs: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Naive_Tregs_hg38.high_mapability.txt
+  pDCs: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/pDCs_hg38.high_mapability.txt
+  Plasmablasts: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Plasmablasts_hg38.high_mapability.txt
+  pre_T_double_negative: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/pre_T_double_negative_hg38.high_mapability.txt
+  Regulatory_T: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Regulatory_T_hg38.high_mapability.txt
+  TEC: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/TEC_hg38.high_mapability.txt
+  Th17_precursors: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Th17_precursors_hg38.high_mapability.txt
+  Th1_precursors: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Th1_precursors_hg38.high_mapability.txt
+  Th2_precursors: /cluster/projects/pughlab/bin/Griffin/sites/Immune_Calderon/Th2_precursors_hg38.high_mapability.txt
+  stop: /cluster/projects/pughlab/bin/Griffin/sites/stop.txt
diff --git a/griffin/site_configs/Immune_sites.yaml b/griffin/site_configs/Immune_sites.yaml
new file mode 100755
index 0000000..8f1c10b
--- /dev/null
+++ b/griffin/site_configs/Immune_sites.yaml
@@ -0,0 +1,15 @@
+site_files:
+  B_cell: /cluster/projects/pughlab/bin/Griffin/sites/Immune/B_cell_hg38.txt
+  CD14_monocyte: /cluster/projects/pughlab/bin/Griffin/sites/Immune/CD14_monocyte_hg38.txt
+  CD34_myeloid_progenitor: /cluster/projects/pughlab/bin/Griffin/sites/Immune/CD34_myeloid_progenitor_hg38.txt
+  CD4_T_cell: /cluster/projects/pughlab/bin/Griffin/sites/Immune/CD4_T_cell_hg38.txt
+  CD8_T_cell: /cluster/projects/pughlab/bin/Griffin/sites/Immune/CD8_T_cell_hg38.txt
+  Macrophage_inflammatory: /cluster/projects/pughlab/bin/Griffin/sites/Immune/Macrophage_inflammatory_hg38.txt
+  Macrophage_suppressor: /cluster/projects/pughlab/bin/Griffin/sites/Immune/Macrophage_suppressor_hg38.txt
+  Naive_CD8_T_cell: /cluster/projects/pughlab/bin/Griffin/sites/Immune/Naive_CD8_T_cell_hg38.txt
+  NK_cell: /cluster/projects/pughlab/bin/Griffin/sites/Immune/NK_cell_hg38.txt
+  T_cell: /cluster/projects/pughlab/bin/Griffin/sites/Immune/T_cell_hg38.txt
+  T_helper1_cell: /cluster/projects/pughlab/bin/Griffin/sites/Immune/T_helper1_cell_hg38.txt
+  T_helper2_cell: /cluster/projects/pughlab/bin/Griffin/sites/Immune/T_helper2_cell_hg38.txt
+  T_reg_cell: /cluster/projects/pughlab/bin/Griffin/sites/Immune/T_reg_cell_hg38.txt
+  stop: /cluster/projects/pughlab/bin/Griffin/stop.txt
diff --git a/griffin/site_configs/LFS_sites.yaml b/griffin/site_configs/LFS_sites.yaml
new file mode 100755
index 0000000..d6ac5b1
--- /dev/null
+++ b/griffin/site_configs/LFS_sites.yaml
@@ -0,0 +1,7 @@
+site_files:
+  LFS_hypo: /cluster/projects/pughlab/bin/Griffin/sites/LFS/LFS_hypo.txt
+  LFS_hyper: /cluster/projects/pughlab/bin/Griffin/sites/LFS/LFS_hyper.txt
+  EZH2_targets: /cluster/projects/pughlab/bin/Griffin/sites/LFS/EZH2_targets_TSS_hg38.high_mapability.txt
+  H3K27me3: /cluster/projects/pughlab/bin/Griffin/sites/LFS/H3K27me3_TSS_hg38.high_mapability.txt
+  TP53_DMR: /cluster/projects/pughlab/bin/Griffin/sites/LFS/TP53_DMR_hg38.high_mapability.txt
+  stop: /cluster/projects/pughlab/bin/Griffin/stop.txt
diff --git a/griffin/site_configs/TCGA_sites.yaml b/griffin/site_configs/TCGA_sites.yaml
new file mode 100755
index 0000000..dffed43
--- /dev/null
+++ b/griffin/site_configs/TCGA_sites.yaml
@@ -0,0 +1,25 @@
+site_files:
+  ACC: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/ACC_top_10000.txt
+  BLCA: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/BLCA_top_10000.txt
+  BRCA: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/BRCA_top_10000.txt
+  CESC: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/CESC_top_10000.txt
+  CHOL: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/CHOL_top_10000.txt
+  COAD: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/COAD_top_10000.txt
+  ESCA: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/ESCA_top_10000.txt
+  GBM: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/GBM_top_10000.txt
+  HNSC: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/HNSC_top_10000.txt
+  KIRC: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/KIRC_top_10000.txt
+  KIRP: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/KIRP_top_10000.txt
+  LGG: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/LGG_top_10000.txt
+  LIHC: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/LIHC_top_10000.txt
+  LUAD: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/LUAD_top_10000.txt
+  LUSC: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/LUSC_top_10000.txt
+  MESO: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/MESO_top_10000.txt
+  PCPG: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/PCPG_top_10000.txt
+  PRAD: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/PRAD_top_10000.txt
+  SKCM: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/SKCM_top_10000.txt
+  STAD: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/STAD_top_10000.txt
+  TGCT: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/TGCT_top_10000.txt
+  THCA: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/THCA_top_10000.txt
+  UCEC: /cluster/projects/pughlab/bin/Griffin/sites/TCGA/UCEC_top_10000.txt
+  stop: /cluster/projects/pughlab/bin/Griffin/stop.txt
diff --git a/griffin/site_configs/TFBS_Ulz_sites.yaml b/griffin/site_configs/TFBS_Ulz_sites.yaml
new file mode 100755
index 0000000..17b4f57
--- /dev/null
+++ b/griffin/site_configs/TFBS_Ulz_sites.yaml
@@ -0,0 +1,506 @@
+site_files:
+  ADNP: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ADNP_Top1000_hg38.high_mapability.txt
+  AEBP2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/AEBP2_Top1000_hg38.high_mapability.txt
+  AHR: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/AHR_Top1000_hg38.high_mapability.txt
+  AP2A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/AP2A_Top1000_hg38.high_mapability.txt
+  AP2Y: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/AP2Y_Top1000_hg38.high_mapability.txt
+  AP4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/AP4_Top1000_hg38.high_mapability.txt
+  ARID1A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ARID1A_Top1000_hg38.high_mapability.txt
+  ARID1B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ARID1B_Top1000_hg38.high_mapability.txt
+  ARID2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ARID2_Top1000_hg38.high_mapability.txt
+  ARID3A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ARID3A_Top1000_hg38.high_mapability.txt
+  ARID3B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ARID3B_Top1000_hg38.high_mapability.txt
+  ARID4B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ARID4B_Top1000_hg38.high_mapability.txt
+  ARNTL: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ARNTL_Top1000_hg38.high_mapability.txt
+  ARNT: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ARNT_Top1000_hg38.high_mapability.txt
+  AR: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/AR_Top1000_hg38.high_mapability.txt
+  ASH1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ASH1_Top1000_hg38.high_mapability.txt
+  ASH2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ASH2_Top1000_hg38.high_mapability.txt
+  ATF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ATF1_Top1000_hg38.high_mapability.txt
+  ATF2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ATF2_Top1000_hg38.high_mapability.txt
+  ATF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ATF3_Top1000_hg38.high_mapability.txt
+  ATF4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ATF4_Top1000_hg38.high_mapability.txt
+  ATF5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ATF5_Top1000_hg38.high_mapability.txt
+  ATF6A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ATF6A_Top1000_hg38.high_mapability.txt
+  ATRX: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ATRX_Top1000_hg38.high_mapability.txt
+  BACH1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/BACH1_Top1000_hg38.high_mapability.txt
+  BACH2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/BACH2_Top1000_hg38.high_mapability.txt
+  BARX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/BARX1_Top1000_hg38.high_mapability.txt
+  BARX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/BARX2_Top1000_hg38.high_mapability.txt
+  BATF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/BATF3_Top1000_hg38.high_mapability.txt
+  BATF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/BATF_Top1000_hg38.high_mapability.txt
+  BCL11A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/BCL11A_Top1000_hg38.high_mapability.txt
+  BCL3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/BCL3_Top1000_hg38.high_mapability.txt
+  BCL6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/BCL6_Top1000_hg38.high_mapability.txt
+  BMYB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/BMYB_Top1000_hg38.high_mapability.txt
+  CDX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CDX2_Top1000_hg38.high_mapability.txt
+  CEBPa: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CEBPa_Top1000_hg38.high_mapability.txt
+  CEBPb: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CEBPb_Top1000_hg38.high_mapability.txt
+  CEBPd: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CEBPd_Top1000_hg38.high_mapability.txt
+  CEBPy: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CEBPy_Top1000_hg38.high_mapability.txt
+  CETS1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CETS1_Top1000_hg38.high_mapability.txt
+  CETS2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CETS2_Top1000_hg38.high_mapability.txt
+  CFOS: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CFOS_Top1000_hg38.high_mapability.txt
+  CJUN: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CJUN_Top1000_hg38.high_mapability.txt
+  CLOCK: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CLOCK_Top1000_hg38.high_mapability.txt
+  CMAF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CMAF_Top1000_hg38.high_mapability.txt
+  CMYB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CMYB_Top1000_hg38.high_mapability.txt
+  CMYC: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CMYC_Top1000_hg38.high_mapability.txt
+  CREB3L4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CREB3L4_Top1000_hg38.high_mapability.txt
+  CREB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CREB_Top1000_hg38.high_mapability.txt
+  CREM: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CREM_Top1000_hg38.high_mapability.txt
+  CTCFL: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CTCFL_Top1000_hg38.high_mapability.txt
+  CTCF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CTCF_Top1000_hg38.high_mapability.txt
+  CUX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/CUX1_Top1000_hg38.high_mapability.txt
+  DBP: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DBP_Top1000_hg38.high_mapability.txt
+  DEAF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DEAF1_Top1000_hg38.high_mapability.txt
+  DEC1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DEC1_Top1000_hg38.high_mapability.txt
+  DLX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DLX1_Top1000_hg38.high_mapability.txt
+  DLX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DLX2_Top1000_hg38.high_mapability.txt
+  DLX4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DLX4_Top1000_hg38.high_mapability.txt
+  DMAP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DMAP1_Top1000_hg38.high_mapability.txt
+  DMRT1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DMRT1_Top1000_hg38.high_mapability.txt
+  DNMT3A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DNMT3A_Top1000_hg38.high_mapability.txt
+  DNMT3B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DNMT3B_Top1000_hg38.high_mapability.txt
+  DRTF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DRTF1_Top1000_hg38.high_mapability.txt
+  DUX4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/DUX4_Top1000_hg38.high_mapability.txt
+  E2A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/E2A_Top1000_hg38.high_mapability.txt
+  E2F1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/E2F1_Top1000_hg38.high_mapability.txt
+  E2F2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/E2F2_Top1000_hg38.high_mapability.txt
+  E2F3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/E2F3_Top1000_hg38.high_mapability.txt
+  E2F4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/E2F4_Top1000_hg38.high_mapability.txt
+  E2F5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/E2F5_Top1000_hg38.high_mapability.txt
+  E2F6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/E2F6_Top1000_hg38.high_mapability.txt
+  E2F7: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/E2F7_Top1000_hg38.high_mapability.txt
+  E2F8: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/E2F8_Top1000_hg38.high_mapability.txt
+  EAR2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/EAR2_Top1000_hg38.high_mapability.txt
+  EBF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/EBF1_Top1000_hg38.high_mapability.txt
+  EBF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/EBF3_Top1000_hg38.high_mapability.txt
+  EGR1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/EGR1_Top1000_hg38.high_mapability.txt
+  EGR2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/EGR2_Top1000_hg38.high_mapability.txt
+  EGR3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/EGR3_Top1000_hg38.high_mapability.txt
+  EHF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/EHF_Top1000_hg38.high_mapability.txt
+  ELF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ELF1_Top1000_hg38.high_mapability.txt
+  ELF2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ELF2_Top1000_hg38.high_mapability.txt
+  ELF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ELF3_Top1000_hg38.high_mapability.txt
+  ELK1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ELK1_Top1000_hg38.high_mapability.txt
+  ELK4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ELK4_Top1000_hg38.high_mapability.txt
+  EPAS1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/EPAS1_Top1000_hg38.high_mapability.txt
+  ERG: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ERG_Top1000_hg38.high_mapability.txt
+  ERR1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ERR1_Top1000_hg38.high_mapability.txt
+  ER: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ER_Top1000_hg38.high_mapability.txt
+  ETV1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ETV1_Top1000_hg38.high_mapability.txt
+  ETV4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ETV4_Top1000_hg38.high_mapability.txt
+  ETV5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ETV5_Top1000_hg38.high_mapability.txt
+  ETV6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ETV6_Top1000_hg38.high_mapability.txt
+  ETV7: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ETV7_Top1000_hg38.high_mapability.txt
+  EVI1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/EVI1_Top1000_hg38.high_mapability.txt
+  EVX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/EVX2_Top1000_hg38.high_mapability.txt
+  FEV: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FEV_Top1000_hg38.high_mapability.txt
+  FEZF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FEZF1_Top1000_hg38.high_mapability.txt
+  FLI1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FLI1_Top1000_hg38.high_mapability.txt
+  FOSB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOSB_Top1000_hg38.high_mapability.txt
+  FOXA1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXA1_Top1000_hg38.high_mapability.txt
+  FOXA2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXA2_Top1000_hg38.high_mapability.txt
+  FOXA3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXA3_Top1000_hg38.high_mapability.txt
+  FOXC1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXC1_Top1000_hg38.high_mapability.txt
+  FOXD2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXD2_Top1000_hg38.high_mapability.txt
+  FOXH1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXH1_Top1000_hg38.high_mapability.txt
+  FOXJ2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXJ2_Top1000_hg38.high_mapability.txt
+  FOXK1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXK1_Top1000_hg38.high_mapability.txt
+  FOXM1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXM1_Top1000_hg38.high_mapability.txt
+  FOXO1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXO1_Top1000_hg38.high_mapability.txt
+  FOXO3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXO3_Top1000_hg38.high_mapability.txt
+  FOXP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXP1_Top1000_hg38.high_mapability.txt
+  FOXP2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXP2_Top1000_hg38.high_mapability.txt
+  FOXP3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FOXP3_Top1000_hg38.high_mapability.txt
+  FRA1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FRA1_Top1000_hg38.high_mapability.txt
+  FRA2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/FRA2_Top1000_hg38.high_mapability.txt
+  GABPa: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GABPa_Top1000_hg38.high_mapability.txt
+  GATA1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GATA1_Top1000_hg38.high_mapability.txt
+  GATA2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GATA2_Top1000_hg38.high_mapability.txt
+  GATA3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GATA3_Top1000_hg38.high_mapability.txt
+  GATA4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GATA4_Top1000_hg38.high_mapability.txt
+  GATA6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GATA6_Top1000_hg38.high_mapability.txt
+  GATAD1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GATAD1_Top1000_hg38.high_mapability.txt
+  GFI1B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GFI1B_Top1000_hg38.high_mapability.txt
+  GLI1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GLI1_Top1000_hg38.high_mapability.txt
+  GLI2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GLI2_Top1000_hg38.high_mapability.txt
+  GLI4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GLI4_Top1000_hg38.high_mapability.txt
+  GLIS1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GLIS1_Top1000_hg38.high_mapability.txt
+  GMEB1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GMEB1_Top1000_hg38.high_mapability.txt
+  GMEB2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GMEB2_Top1000_hg38.high_mapability.txt
+  GRHL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GRHL1_Top1000_hg38.high_mapability.txt
+  GRHL2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GRHL2_Top1000_hg38.high_mapability.txt
+  GRHL3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GRHL3_Top1000_hg38.high_mapability.txt
+  GR: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/GR_Top1000_hg38.high_mapability.txt
+  HAND2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HAND2_Top1000_hg38.high_mapability.txt
+  HBP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HBP1_Top1000_hg38.high_mapability.txt
+  HES1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HES1_Top1000_hg38.high_mapability.txt
+  HES2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HES2_Top1000_hg38.high_mapability.txt
+  HEYL: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HEYL_Top1000_hg38.high_mapability.txt
+  HHEX: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HHEX_Top1000_hg38.high_mapability.txt
+  HIF1a: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HIF1a_Top1000_hg38.high_mapability.txt
+  HIF3a: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HIF3a_Top1000_hg38.high_mapability.txt
+  HINFP: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HINFP_Top1000_hg38.high_mapability.txt
+  HLF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HLF_Top1000_hg38.high_mapability.txt
+  HMBOX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HMBOX1_Top1000_hg38.high_mapability.txt
+  HMG20A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HMG20A_Top1000_hg38.high_mapability.txt
+  HMG20B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HMG20B_Top1000_hg38.high_mapability.txt
+  HNF1a: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HNF1a_Top1000_hg38.high_mapability.txt
+  HNF1b: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HNF1b_Top1000_hg38.high_mapability.txt
+  HNF4a: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HNF4a_Top1000_hg38.high_mapability.txt
+  HNF4y: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HNF4y_Top1000_hg38.high_mapability.txt
+  HOMEZ: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOMEZ_Top1000_hg38.high_mapability.txt
+  HOXA1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOXA1_Top1000_hg38.high_mapability.txt
+  HOXA2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOXA2_Top1000_hg38.high_mapability.txt
+  HOXA4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOXA4_Top1000_hg38.high_mapability.txt
+  HOXA5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOXA5_Top1000_hg38.high_mapability.txt
+  HOXA6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOXA6_Top1000_hg38.high_mapability.txt
+  HOXA9: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOXA9_Top1000_hg38.high_mapability.txt
+  HOXB13: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOXB13_Top1000_hg38.high_mapability.txt
+  HOXC8: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOXC8_Top1000_hg38.high_mapability.txt
+  HOXC9: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOXC9_Top1000_hg38.high_mapability.txt
+  HOXD12: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOXD12_Top1000_hg38.high_mapability.txt
+  HOXD1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HOXD1_Top1000_hg38.high_mapability.txt
+  HSF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HSF1_Top1000_hg38.high_mapability.txt
+  HTF4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/HTF4_Top1000_hg38.high_mapability.txt
+  ID1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ID1_Top1000_hg38.high_mapability.txt
+  ID2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ID2_Top1000_hg38.high_mapability.txt
+  ID3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ID3_Top1000_hg38.high_mapability.txt
+  IKZF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/IKZF1_Top1000_hg38.high_mapability.txt
+  IRF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/IRF1_Top1000_hg38.high_mapability.txt
+  IRF2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/IRF2_Top1000_hg38.high_mapability.txt
+  IRF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/IRF3_Top1000_hg38.high_mapability.txt
+  IRF4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/IRF4_Top1000_hg38.high_mapability.txt
+  ISL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ISL1_Top1000_hg38.high_mapability.txt
+  JARID1A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/JARID1A_Top1000_hg38.high_mapability.txt
+  JARID1B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/JARID1B_Top1000_hg38.high_mapability.txt
+  JARID1C: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/JARID1C_Top1000_hg38.high_mapability.txt
+  JARID1D: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/JARID1D_Top1000_hg38.high_mapability.txt
+  JARID2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/JARID2_Top1000_hg38.high_mapability.txt
+  JUNB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/JUNB_Top1000_hg38.high_mapability.txt
+  JUND: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/JUND_Top1000_hg38.high_mapability.txt
+  KAT5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KAT5_Top1000_hg38.high_mapability.txt
+  KLF10: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KLF10_Top1000_hg38.high_mapability.txt
+  KLF11: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KLF11_Top1000_hg38.high_mapability.txt
+  KLF12: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KLF12_Top1000_hg38.high_mapability.txt
+  KLF15: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KLF15_Top1000_hg38.high_mapability.txt
+  KLF16: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KLF16_Top1000_hg38.high_mapability.txt
+  KLF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KLF1_Top1000_hg38.high_mapability.txt
+  KLF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KLF3_Top1000_hg38.high_mapability.txt
+  KLF4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KLF4_Top1000_hg38.high_mapability.txt
+  KLF5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KLF5_Top1000_hg38.high_mapability.txt
+  KLF6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KLF6_Top1000_hg38.high_mapability.txt
+  KLF9: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/KLF9_Top1000_hg38.high_mapability.txt
+  LEF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/LEF1_Top1000_hg38.high_mapability.txt
+  LHX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/LHX2_Top1000_hg38.high_mapability.txt
+  LRH1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/LRH1_Top1000_hg38.high_mapability.txt
+  LXRa: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/LXRa_Top1000_hg38.high_mapability.txt
+  LXRb: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/LXRb_Top1000_hg38.high_mapability.txt
+  LYL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/LYL1_Top1000_hg38.high_mapability.txt
+  MAD1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MAD1_Top1000_hg38.high_mapability.txt
+  MAD3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MAD3_Top1000_hg38.high_mapability.txt
+  MAD4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MAD4_Top1000_hg38.high_mapability.txt
+  MAFB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MAFB_Top1000_hg38.high_mapability.txt
+  MAFF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MAFF_Top1000_hg38.high_mapability.txt
+  MAFK: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MAFK_Top1000_hg38.high_mapability.txt
+  MAX: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MAX_Top1000_hg38.high_mapability.txt
+  MAZ: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MAZ_Top1000_hg38.high_mapability.txt
+  MBD1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MBD1_Top1000_hg38.high_mapability.txt
+  MEF2A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MEF2A_Top1000_hg38.high_mapability.txt
+  MEF2B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MEF2B_Top1000_hg38.high_mapability.txt
+  MEF2C: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MEF2C_Top1000_hg38.high_mapability.txt
+  MEIS1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MEIS1_Top1000_hg38.high_mapability.txt
+  MIER2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MIER2_Top1000_hg38.high_mapability.txt
+  MIER3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MIER3_Top1000_hg38.high_mapability.txt
+  MITF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MITF_Top1000_hg38.high_mapability.txt
+  MIXL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MIXL1_Top1000_hg38.high_mapability.txt
+  MLL2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MLL2_Top1000_hg38.high_mapability.txt
+  MLL: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MLL_Top1000_hg38.high_mapability.txt
+  MLX: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MLX_Top1000_hg38.high_mapability.txt
+  MNT: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MNT_Top1000_hg38.high_mapability.txt
+  MTA3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MTA3_Top1000_hg38.high_mapability.txt
+  MXI1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MXI1_Top1000_hg38.high_mapability.txt
+  MYF4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MYF4_Top1000_hg38.high_mapability.txt
+  MYF5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MYF5_Top1000_hg38.high_mapability.txt
+  MYNN: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MYNN_Top1000_hg38.high_mapability.txt
+  MYOD: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MYOD_Top1000_hg38.high_mapability.txt
+  MYST1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MYST1_Top1000_hg38.high_mapability.txt
+  MYST2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MYST2_Top1000_hg38.high_mapability.txt
+  MZF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/MZF1_Top1000_hg38.high_mapability.txt
+  NANOG: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NANOG_Top1000_hg38.high_mapability.txt
+  NCOA1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NCOA1_Top1000_hg38.high_mapability.txt
+  NCOR1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NCOR1_Top1000_hg38.high_mapability.txt
+  NCOR2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NCOR2_Top1000_hg38.high_mapability.txt
+  NEUROD1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NEUROD1_Top1000_hg38.high_mapability.txt
+  NF1A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NF1A_Top1000_hg38.high_mapability.txt
+  NF1C: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NF1C_Top1000_hg38.high_mapability.txt
+  NFaB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NFaB_Top1000_hg38.high_mapability.txt
+  NFAT5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NFAT5_Top1000_hg38.high_mapability.txt
+  NFATC1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NFATC1_Top1000_hg38.high_mapability.txt
+  NFATC3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NFATC3_Top1000_hg38.high_mapability.txt
+  NFE2L2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NFE2L2_Top1000_hg38.high_mapability.txt
+  NFE2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NFE2_Top1000_hg38.high_mapability.txt
+  NFIL3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NFIL3_Top1000_hg38.high_mapability.txt
+  NFYA: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NFYA_Top1000_hg38.high_mapability.txt
+  NFYB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NFYB_Top1000_hg38.high_mapability.txt
+  NFYC: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NFYC_Top1000_hg38.high_mapability.txt
+  NGFIB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NGFIB_Top1000_hg38.high_mapability.txt
+  NGN2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NGN2_Top1000_hg38.high_mapability.txt
+  NKX2-1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NKX2-1_Top1000_hg38.high_mapability.txt
+  NKX2-2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NKX2-2_Top1000_hg38.high_mapability.txt
+  NKX2-3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NKX2-3_Top1000_hg38.high_mapability.txt
+  NKX3-1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NKX3-1_Top1000_hg38.high_mapability.txt
+  NMYC: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NMYC_Top1000_hg38.high_mapability.txt
+  NONO: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NONO_Top1000_hg38.high_mapability.txt
+  NR2F1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NR2F1_Top1000_hg38.high_mapability.txt
+  NR2F2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NR2F2_Top1000_hg38.high_mapability.txt
+  NRF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/NRF1_Top1000_hg38.high_mapability.txt
+  OSR2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/OSR2_Top1000_hg38.high_mapability.txt
+  OTX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/OTX2_Top1000_hg38.high_mapability.txt
+  OVOL2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/OVOL2_Top1000_hg38.high_mapability.txt
+  P53: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/P53_Top1000_hg38.high_mapability.txt
+  P63: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/P63_Top1000_hg38.high_mapability.txt
+  P66a: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/P66a_Top1000_hg38.high_mapability.txt
+  P66b: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/P66b_Top1000_hg38.high_mapability.txt
+  P73: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/P73_Top1000_hg38.high_mapability.txt
+  PATZ1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PATZ1_Top1000_hg38.high_mapability.txt
+  PAX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PAX2_Top1000_hg38.high_mapability.txt
+  PAX5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PAX5_Top1000_hg38.high_mapability.txt
+  PAX6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PAX6_Top1000_hg38.high_mapability.txt
+  PAX7: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PAX7_Top1000_hg38.high_mapability.txt
+  PBX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PBX1_Top1000_hg38.high_mapability.txt
+  PBX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PBX2_Top1000_hg38.high_mapability.txt
+  PBX3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PBX3_Top1000_hg38.high_mapability.txt
+  PDX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PDX1_Top1000_hg38.high_mapability.txt
+  PHOX2B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PHOX2B_Top1000_hg38.high_mapability.txt
+  PITX3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PITX3_Top1000_hg38.high_mapability.txt
+  POU2F1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/POU2F1_Top1000_hg38.high_mapability.txt
+  POU2F2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/POU2F2_Top1000_hg38.high_mapability.txt
+  POU3F2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/POU3F2_Top1000_hg38.high_mapability.txt
+  POU5F1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/POU5F1_Top1000_hg38.high_mapability.txt
+  PPARb: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PPARb_Top1000_hg38.high_mapability.txt
+  PPARy: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PPARy_Top1000_hg38.high_mapability.txt
+  PRDM14: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PRDM14_Top1000_hg38.high_mapability.txt
+  PRDM1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PRDM1_Top1000_hg38.high_mapability.txt
+  PRDM6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PRDM6_Top1000_hg38.high_mapability.txt
+  PROX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PROX1_Top1000_hg38.high_mapability.txt
+  PR: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PR_Top1000_hg38.high_mapability.txt
+  PU-1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/PU-1_Top1000_hg38.high_mapability.txt
+  RARa: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RARa_Top1000_hg38.high_mapability.txt
+  RARy: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RARy_Top1000_hg38.high_mapability.txt
+  RBAK: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RBAK_Top1000_hg38.high_mapability.txt
+  RBPJa: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RBPJa_Top1000_hg38.high_mapability.txt
+  RCOR1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RCOR1_Top1000_hg38.high_mapability.txt
+  RCOR2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RCOR2_Top1000_hg38.high_mapability.txt
+  RELB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RELB_Top1000_hg38.high_mapability.txt
+  RERE: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RERE_Top1000_hg38.high_mapability.txt
+  REST: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/REST_Top1000_hg38.high_mapability.txt
+  RFX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RFX1_Top1000_hg38.high_mapability.txt
+  RFX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RFX2_Top1000_hg38.high_mapability.txt
+  RFX3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RFX3_Top1000_hg38.high_mapability.txt
+  RFX5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RFX5_Top1000_hg38.high_mapability.txt
+  RFXANK: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RFXANK_Top1000_hg38.high_mapability.txt
+  RUNX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RUNX1_Top1000_hg38.high_mapability.txt
+  RUNX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RUNX2_Top1000_hg38.high_mapability.txt
+  RUNX3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RUNX3_Top1000_hg38.high_mapability.txt
+  RXRa: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RXRa_Top1000_hg38.high_mapability.txt
+  RXRb: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/RXRb_Top1000_hg38.high_mapability.txt
+  SALL4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SALL4_Top1000_hg38.high_mapability.txt
+  SATB1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SATB1_Top1000_hg38.high_mapability.txt
+  SCRT1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SCRT1_Top1000_hg38.high_mapability.txt
+  SEF2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SEF2_Top1000_hg38.high_mapability.txt
+  SFPQ: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SFPQ_Top1000_hg38.high_mapability.txt
+  SIX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SIX1_Top1000_hg38.high_mapability.txt
+  SIX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SIX2_Top1000_hg38.high_mapability.txt
+  SIX5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SIX5_Top1000_hg38.high_mapability.txt
+  SMAD1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SMAD1_Top1000_hg38.high_mapability.txt
+  SMAD2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SMAD2_Top1000_hg38.high_mapability.txt
+  SMAD3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SMAD3_Top1000_hg38.high_mapability.txt
+  SMAD4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SMAD4_Top1000_hg38.high_mapability.txt
+  SMAD5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SMAD5_Top1000_hg38.high_mapability.txt
+  SMARCA5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SMARCA5_Top1000_hg38.high_mapability.txt
+  SMARCC1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SMARCC1_Top1000_hg38.high_mapability.txt
+  SMARCC2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SMARCC2_Top1000_hg38.high_mapability.txt
+  SNAI2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SNAI2_Top1000_hg38.high_mapability.txt
+  SNAPC4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SNAPC4_Top1000_hg38.high_mapability.txt
+  SOX10: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SOX10_Top1000_hg38.high_mapability.txt
+  SOX13: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SOX13_Top1000_hg38.high_mapability.txt
+  SOX17: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SOX17_Top1000_hg38.high_mapability.txt
+  SOX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SOX2_Top1000_hg38.high_mapability.txt
+  SOX4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SOX4_Top1000_hg38.high_mapability.txt
+  SOX5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SOX5_Top1000_hg38.high_mapability.txt
+  SOX9: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SOX9_Top1000_hg38.high_mapability.txt
+  SP140: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SP140_Top1000_hg38.high_mapability.txt
+  SP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SP1_Top1000_hg38.high_mapability.txt
+  SP2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SP2_Top1000_hg38.high_mapability.txt
+  SP4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SP4_Top1000_hg38.high_mapability.txt
+  SP5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SP5_Top1000_hg38.high_mapability.txt
+  SPDEF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SPDEF_Top1000_hg38.high_mapability.txt
+  SPIB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SPIB_Top1000_hg38.high_mapability.txt
+  SREBP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SREBP1_Top1000_hg38.high_mapability.txt
+  SRF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SRF_Top1000_hg38.high_mapability.txt
+  SSRP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/SSRP1_Top1000_hg38.high_mapability.txt
+  STAT1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/STAT1_Top1000_hg38.high_mapability.txt
+  STAT2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/STAT2_Top1000_hg38.high_mapability.txt
+  STAT3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/STAT3_Top1000_hg38.high_mapability.txt
+  STAT4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/STAT4_Top1000_hg38.high_mapability.txt
+  STAT5A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/STAT5A_Top1000_hg38.high_mapability.txt
+  STAT5B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/STAT5B_Top1000_hg38.high_mapability.txt
+  STAT6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/STAT6_Top1000_hg38.high_mapability.txt
+  T3Rb: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/T3Rb_Top1000_hg38.high_mapability.txt
+  TAF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TAF1_Top1000_hg38.high_mapability.txt
+  TAL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TAL1_Top1000_hg38.high_mapability.txt
+  TBPL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TBPL1_Top1000_hg38.high_mapability.txt
+  TBP: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TBP_Top1000_hg38.high_mapability.txt
+  TBR2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TBR2_Top1000_hg38.high_mapability.txt
+  TBX21: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TBX21_Top1000_hg38.high_mapability.txt
+  TBX5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TBX5_Top1000_hg38.high_mapability.txt
+  TCF7L1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TCF7L1_Top1000_hg38.high_mapability.txt
+  TCF7L2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TCF7L2_Top1000_hg38.high_mapability.txt
+  TCF7: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TCF7_Top1000_hg38.high_mapability.txt
+  TEF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TEF1_Top1000_hg38.high_mapability.txt
+  TEF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TEF3_Top1000_hg38.high_mapability.txt
+  TEF4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TEF4_Top1000_hg38.high_mapability.txt
+  TEF5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TEF5_Top1000_hg38.high_mapability.txt
+  TFE3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TFE3_Top1000_hg38.high_mapability.txt
+  TGIF2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TGIF2_Top1000_hg38.high_mapability.txt
+  THAP11: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/THAP11_Top1000_hg38.high_mapability.txt
+  THAP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/THAP1_Top1000_hg38.high_mapability.txt
+  TR4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TR4_Top1000_hg38.high_mapability.txt
+  TSC22D4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TSC22D4_Top1000_hg38.high_mapability.txt
+  T: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/T_Top1000_hg38.high_mapability.txt
+  TWIST1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/TWIST1_Top1000_hg38.high_mapability.txt
+  UBF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/UBF_Top1000_hg38.high_mapability.txt
+  UBP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/UBP1_Top1000_hg38.high_mapability.txt
+  USF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/USF1_Top1000_hg38.high_mapability.txt
+  USF2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/USF2_Top1000_hg38.high_mapability.txt
+  VDR: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/VDR_Top1000_hg38.high_mapability.txt
+  VEZF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/VEZF1_Top1000_hg38.high_mapability.txt
+  WHSC1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/WHSC1_Top1000_hg38.high_mapability.txt
+  WT1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/WT1_Top1000_hg38.high_mapability.txt
+  XBP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/XBP1_Top1000_hg38.high_mapability.txt
+  YY1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/YY1_Top1000_hg38.high_mapability.txt
+  ZBED1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBED1_Top1000_hg38.high_mapability.txt
+  ZBTB11: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBTB11_Top1000_hg38.high_mapability.txt
+  ZBTB16: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBTB16_Top1000_hg38.high_mapability.txt
+  ZBTB17: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBTB17_Top1000_hg38.high_mapability.txt
+  ZBTB26: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBTB26_Top1000_hg38.high_mapability.txt
+  ZBTB2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBTB2_Top1000_hg38.high_mapability.txt
+  ZBTB33: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBTB33_Top1000_hg38.high_mapability.txt
+  ZBTB42: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBTB42_Top1000_hg38.high_mapability.txt
+  ZBTB48: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBTB48_Top1000_hg38.high_mapability.txt
+  ZBTB6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBTB6_Top1000_hg38.high_mapability.txt
+  ZBTB7A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBTB7A_Top1000_hg38.high_mapability.txt
+  ZBTB7B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZBTB7B_Top1000_hg38.high_mapability.txt
+  ZC3H8: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZC3H8_Top1000_hg38.high_mapability.txt
+  ZEB1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZEB1_Top1000_hg38.high_mapability.txt
+  ZFAT: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZFAT_Top1000_hg38.high_mapability.txt
+  ZFHX3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZFHX3_Top1000_hg38.high_mapability.txt
+  ZFP161: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZFP161_Top1000_hg38.high_mapability.txt
+  ZFP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZFP1_Top1000_hg38.high_mapability.txt
+  ZFP28: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZFP28_Top1000_hg38.high_mapability.txt
+  ZFP42: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZFP42_Top1000_hg38.high_mapability.txt
+  ZFP64: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZFP64_Top1000_hg38.high_mapability.txt
+  ZFX: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZFX_Top1000_hg38.high_mapability.txt
+  ZGPAT: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZGPAT_Top1000_hg38.high_mapability.txt
+  ZHX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZHX1_Top1000_hg38.high_mapability.txt
+  ZHX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZHX2_Top1000_hg38.high_mapability.txt
+  ZIC2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZIC2_Top1000_hg38.high_mapability.txt
+  ZIM3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZIM3_Top1000_hg38.high_mapability.txt
+  ZKSCAN1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZKSCAN1_Top1000_hg38.high_mapability.txt
+  ZNF121: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF121_Top1000_hg38.high_mapability.txt
+  ZNF134: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF134_Top1000_hg38.high_mapability.txt
+  ZNF136: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF136_Top1000_hg38.high_mapability.txt
+  ZNF140: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF140_Top1000_hg38.high_mapability.txt
+  ZNF143: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF143_Top1000_hg38.high_mapability.txt
+  ZNF146: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF146_Top1000_hg38.high_mapability.txt
+  ZNF175: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF175_Top1000_hg38.high_mapability.txt
+  ZNF18: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF18_Top1000_hg38.high_mapability.txt
+  ZNF192: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF192_Top1000_hg38.high_mapability.txt
+  ZNF193: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF193_Top1000_hg38.high_mapability.txt
+  ZNF197: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF197_Top1000_hg38.high_mapability.txt
+  ZNF217: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF217_Top1000_hg38.high_mapability.txt
+  ZNF224: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF224_Top1000_hg38.high_mapability.txt
+  ZNF22: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF22_Top1000_hg38.high_mapability.txt
+  ZNF238: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF238_Top1000_hg38.high_mapability.txt
+  ZNF250: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF250_Top1000_hg38.high_mapability.txt
+  ZNF257: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF257_Top1000_hg38.high_mapability.txt
+  ZNF260: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF260_Top1000_hg38.high_mapability.txt
+  ZNF263: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF263_Top1000_hg38.high_mapability.txt
+  ZNF264: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF264_Top1000_hg38.high_mapability.txt
+  ZNF266: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF266_Top1000_hg38.high_mapability.txt
+  ZNF274: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF274_Top1000_hg38.high_mapability.txt
+  ZNF280A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF280A_Top1000_hg38.high_mapability.txt
+  ZNF280D: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF280D_Top1000_hg38.high_mapability.txt
+  ZNF281: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF281_Top1000_hg38.high_mapability.txt
+  ZNF30: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF30_Top1000_hg38.high_mapability.txt
+  ZNF317: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF317_Top1000_hg38.high_mapability.txt
+  ZNF320: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF320_Top1000_hg38.high_mapability.txt
+  ZNF322A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF322A_Top1000_hg38.high_mapability.txt
+  ZNF323: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF323_Top1000_hg38.high_mapability.txt
+  ZNF324A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF324A_Top1000_hg38.high_mapability.txt
+  ZNF329: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF329_Top1000_hg38.high_mapability.txt
+  ZNF331: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF331_Top1000_hg38.high_mapability.txt
+  ZNF33A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF33A_Top1000_hg38.high_mapability.txt
+  ZNF341: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF341_Top1000_hg38.high_mapability.txt
+  ZNF350: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF350_Top1000_hg38.high_mapability.txt
+  ZNF35: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF35_Top1000_hg38.high_mapability.txt
+  ZNF382: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF382_Top1000_hg38.high_mapability.txt
+  ZNF384: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF384_Top1000_hg38.high_mapability.txt
+  ZNF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF3_Top1000_hg38.high_mapability.txt
+  ZNF407: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF407_Top1000_hg38.high_mapability.txt
+  ZNF410: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF410_Top1000_hg38.high_mapability.txt
+  ZNF418: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF418_Top1000_hg38.high_mapability.txt
+  ZNF41: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF41_Top1000_hg38.high_mapability.txt
+  ZNF436: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF436_Top1000_hg38.high_mapability.txt
+  ZNF449: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF449_Top1000_hg38.high_mapability.txt
+  ZNF467: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF467_Top1000_hg38.high_mapability.txt
+  ZNF48: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF48_Top1000_hg38.high_mapability.txt
+  ZNF490: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF490_Top1000_hg38.high_mapability.txt
+  ZNF511: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF511_Top1000_hg38.high_mapability.txt
+  ZNF528: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF528_Top1000_hg38.high_mapability.txt
+  ZNF547: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF547_Top1000_hg38.high_mapability.txt
+  ZNF549: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF549_Top1000_hg38.high_mapability.txt
+  ZNF554: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF554_Top1000_hg38.high_mapability.txt
+  ZNF563: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF563_Top1000_hg38.high_mapability.txt
+  ZNF574: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF574_Top1000_hg38.high_mapability.txt
+  ZNF580: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF580_Top1000_hg38.high_mapability.txt
+  ZNF584: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF584_Top1000_hg38.high_mapability.txt
+  ZNF586: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF586_Top1000_hg38.high_mapability.txt
+  ZNF589: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF589_Top1000_hg38.high_mapability.txt
+  ZNF596: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF596_Top1000_hg38.high_mapability.txt
+  ZNF597: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF597_Top1000_hg38.high_mapability.txt
+  ZNF618: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF618_Top1000_hg38.high_mapability.txt
+  ZNF639: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF639_Top1000_hg38.high_mapability.txt
+  ZNF644: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF644_Top1000_hg38.high_mapability.txt
+  ZNF652: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF652_Top1000_hg38.high_mapability.txt
+  ZNF669: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF669_Top1000_hg38.high_mapability.txt
+  ZNF680: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF680_Top1000_hg38.high_mapability.txt
+  ZNF701: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF701_Top1000_hg38.high_mapability.txt
+  ZNF708: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF708_Top1000_hg38.high_mapability.txt
+  ZNF711: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF711_Top1000_hg38.high_mapability.txt
+  ZNF740: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF740_Top1000_hg38.high_mapability.txt
+  ZNF75A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF75A_Top1000_hg38.high_mapability.txt
+  ZNF766: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF766_Top1000_hg38.high_mapability.txt
+  ZNF768: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF768_Top1000_hg38.high_mapability.txt
+  ZNF76: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF76_Top1000_hg38.high_mapability.txt
+  ZNF770: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF770_Top1000_hg38.high_mapability.txt
+  ZNF778: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF778_Top1000_hg38.high_mapability.txt
+  ZNF784: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF784_Top1000_hg38.high_mapability.txt
+  ZNF792: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF792_Top1000_hg38.high_mapability.txt
+  ZNF7: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF7_Top1000_hg38.high_mapability.txt
+  ZNF816A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF816A_Top1000_hg38.high_mapability.txt
+  ZNF83: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF83_Top1000_hg38.high_mapability.txt
+  ZNF84: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF84_Top1000_hg38.high_mapability.txt
+  ZNF85: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF85_Top1000_hg38.high_mapability.txt
+  ZNF8: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF8_Top1000_hg38.high_mapability.txt
+  ZNF92: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZNF92_Top1000_hg38.high_mapability.txt
+  ZSCAN16: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZSCAN16_Top1000_hg38.high_mapability.txt
+  ZSCAN22: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZSCAN22_Top1000_hg38.high_mapability.txt
+  ZSCAN2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZSCAN2_Top1000_hg38.high_mapability.txt
+  ZSCAN5A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZSCAN5A_Top1000_hg38.high_mapability.txt
+  ZSCAN5D: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZSCAN5D_Top1000_hg38.high_mapability.txt
+  ZXDC: /cluster/projects/pughlab/bin/Griffin/sites/TFBS_Ulz/ZXDC_Top1000_hg38.high_mapability.txt
+  stop: /cluster/projects/pughlab/bin/Griffin/sites/stop.txt
diff --git a/griffin/site_configs/TFBS_sites.yaml b/griffin/site_configs/TFBS_sites.yaml
new file mode 100755
index 0000000..a9eea55
--- /dev/null
+++ b/griffin/site_configs/TFBS_sites.yaml
@@ -0,0 +1,337 @@
+site_files:
+  AHR: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/AHR.hg38.10000.txt
+  AR: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/AR.hg38.10000.txt
+  ARID3A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ARID3A.hg38.10000.txt
+  ARNT: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ARNT.hg38.10000.txt
+  ARNTL: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ARNTL.hg38.10000.txt
+  ASCL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ASCL1.hg38.10000.txt
+  ASCL2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ASCL2.hg38.10000.txt
+  ATF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ATF1.hg38.10000.txt
+  ATF2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ATF2.hg38.10000.txt
+  ATF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ATF3.hg38.10000.txt
+  ATF4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ATF4.hg38.10000.txt
+  ATF7: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ATF7.hg38.10000.txt
+  BACH2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/BACH2.hg38.10000.txt
+  BATF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/BATF.hg38.10000.txt
+  BATF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/BATF3.hg38.10000.txt
+  BCL11A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/BCL11A.hg38.10000.txt
+  BCL11B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/BCL11B.hg38.10000.txt
+  BCL6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/BCL6.hg38.10000.txt
+  BHLHE40: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/BHLHE40.hg38.10000.txt
+  BPTF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/BPTF.hg38.10000.txt
+  CDX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/CDX2.hg38.10000.txt
+  CEBPA: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/CEBPA.hg38.10000.txt
+  CEBPB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/CEBPB.hg38.10000.txt
+  CEBPD: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/CEBPD.hg38.10000.txt
+  CEBPG: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/CEBPG.hg38.10000.txt
+  CLOCK: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/CLOCK.hg38.10000.txt
+  CREB1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/CREB1.hg38.10000.txt
+  CREM: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/CREM.hg38.10000.txt
+  CTCF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/CTCF.hg38.10000.txt
+  CTCFL: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/CTCFL.hg38.10000.txt
+  CXXC1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/CXXC1.hg38.10000.txt
+  DDIT3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/DDIT3.hg38.10000.txt
+  DUX4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/DUX4.hg38.10000.txt
+  E2F1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/E2F1.hg38.10000.txt
+  E2F4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/E2F4.hg38.10000.txt
+  E2F6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/E2F6.hg38.10000.txt
+  E2F7: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/E2F7.hg38.10000.txt
+  E2F8: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/E2F8.hg38.10000.txt
+  EBF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/EBF1.hg38.10000.txt
+  EBF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/EBF3.hg38.10000.txt
+  EGR1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/EGR1.hg38.10000.txt
+  EGR2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/EGR2.hg38.10000.txt
+  EGR3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/EGR3.hg38.10000.txt
+  EHF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/EHF.hg38.10000.txt
+  ELF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ELF1.hg38.10000.txt
+  ELF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ELF3.hg38.10000.txt
+  ELK3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ELK3.hg38.10000.txt
+  EOMES: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/EOMES.hg38.10000.txt
+  EPAS1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/EPAS1.hg38.10000.txt
+  ERF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ERF.hg38.10000.txt
+  ERG: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ERG.hg38.10000.txt
+  ESR1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ESR1.hg38.10000.txt
+  ESR2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ESR2.hg38.10000.txt
+  ESRRA: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ESRRA.hg38.10000.txt
+  ETS1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ETS1.hg38.10000.txt
+  ETV1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ETV1.hg38.10000.txt
+  ETV2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ETV2.hg38.10000.txt
+  ETV4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ETV4.hg38.10000.txt
+  ETV5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ETV5.hg38.10000.txt
+  ETV6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ETV6.hg38.10000.txt
+  FEZF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FEZF1.hg38.10000.txt
+  FLI1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FLI1.hg38.10000.txt
+  FOS: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOS.hg38.10000.txt
+  FOSB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOSB.hg38.10000.txt
+  FOSL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOSL1.hg38.10000.txt
+  FOSL2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOSL2.hg38.10000.txt
+  FOXA1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOXA1.hg38.10000.txt
+  FOXA2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOXA2.hg38.10000.txt
+  FOXA3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOXA3.hg38.10000.txt
+  FOXH1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOXH1.hg38.10000.txt
+  FOXK1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOXK1.hg38.10000.txt
+  FOXK2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOXK2.hg38.10000.txt
+  FOXM1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOXM1.hg38.10000.txt
+  FOXO1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOXO1.hg38.10000.txt
+  FOXO3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOXO3.hg38.10000.txt
+  FOXP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/FOXP1.hg38.10000.txt
+  GABPA: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/GABPA.hg38.10000.txt
+  GATA1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/GATA1.hg38.10000.txt
+  GATA2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/GATA2.hg38.10000.txt
+  GATA3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/GATA3.hg38.10000.txt
+  GATA4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/GATA4.hg38.10000.txt
+  GATA6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/GATA6.hg38.10000.txt
+  GFI1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/GFI1.hg38.10000.txt
+  GLIS1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/GLIS1.hg38.10000.txt
+  GLIS3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/GLIS3.hg38.10000.txt
+  GRHL2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/GRHL2.hg38.10000.txt
+  HAND2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HAND2.hg38.10000.txt
+  HES1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HES1.hg38.10000.txt
+  HES2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HES2.hg38.10000.txt
+  HEY1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HEY1.hg38.10000.txt
+  HIC1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HIC1.hg38.10000.txt
+  HIF1A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HIF1A.hg38.10000.txt
+  HIF3A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HIF3A.hg38.10000.txt
+  HLF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HLF.hg38.10000.txt
+  HMBOX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HMBOX1.hg38.10000.txt
+  HMG20A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HMG20A.hg38.10000.txt
+  HMGA1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HMGA1.hg38.10000.txt
+  HNF1A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HNF1A.hg38.10000.txt
+  HNF1B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HNF1B.hg38.10000.txt
+  HNF4A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HNF4A.hg38.10000.txt
+  HNF4G: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HNF4G.hg38.10000.txt
+  HOMEZ: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HOMEZ.hg38.10000.txt
+  HOXA9: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HOXA9.hg38.10000.txt
+  HOXB13: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HOXB13.hg38.10000.txt
+  HOXC5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HOXC5.hg38.10000.txt
+  HOXC9: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HOXC9.hg38.10000.txt
+  HSF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/HSF1.hg38.10000.txt
+  IKZF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/IKZF1.hg38.10000.txt
+  IKZF2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/IKZF2.hg38.10000.txt
+  IKZF5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/IKZF5.hg38.10000.txt
+  IRF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/IRF1.hg38.10000.txt
+  IRF2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/IRF2.hg38.10000.txt
+  IRF4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/IRF4.hg38.10000.txt
+  ISL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ISL1.hg38.10000.txt
+  JUN: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/JUN.hg38.10000.txt
+  JUNB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/JUNB.hg38.10000.txt
+  JUND: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/JUND.hg38.10000.txt
+  KDM2B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/KDM2B.hg38.10000.txt
+  KLF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/KLF1.hg38.10000.txt
+  KLF10: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/KLF10.hg38.10000.txt
+  KLF11: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/KLF11.hg38.10000.txt
+  KLF15: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/KLF15.hg38.10000.txt
+  KLF16: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/KLF16.hg38.10000.txt
+  KLF4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/KLF4.hg38.10000.txt
+  KLF5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/KLF5.hg38.10000.txt
+  KLF6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/KLF6.hg38.10000.txt
+  KLF9: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/KLF9.hg38.10000.txt
+  KMT2A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/KMT2A.hg38.10000.txt
+  LEF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/LEF1.hg38.10000.txt
+  LYL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/LYL1.hg38.10000.txt
+  MAF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MAF.hg38.10000.txt
+  MAFF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MAFF.hg38.10000.txt
+  MAFG: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MAFG.hg38.10000.txt
+  MAFK: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MAFK.hg38.10000.txt
+  MAX: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MAX.hg38.10000.txt
+  MAZ: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MAZ.hg38.10000.txt
+  MBD2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MBD2.hg38.10000.txt
+  MECOM: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MECOM.hg38.10000.txt
+  MECP2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MECP2.hg38.10000.txt
+  MEF2A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MEF2A.hg38.10000.txt
+  MEF2B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MEF2B.hg38.10000.txt
+  MEF2C: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MEF2C.hg38.10000.txt
+  MEIS1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MEIS1.hg38.10000.txt
+  MEIS2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MEIS2.hg38.10000.txt
+  MITF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MITF.hg38.10000.txt
+  MIXL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MIXL1.hg38.10000.txt
+  MNT: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MNT.hg38.10000.txt
+  MSC: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MSC.hg38.10000.txt
+  MXI1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MXI1.hg38.10000.txt
+  MYB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MYB.hg38.10000.txt
+  MYBL2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MYBL2.hg38.10000.txt
+  MYC: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MYC.hg38.10000.txt
+  MYCN: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MYCN.hg38.10000.txt
+  MYF5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MYF5.hg38.10000.txt
+  MYNN: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MYNN.hg38.10000.txt
+  MYOD1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MYOD1.hg38.10000.txt
+  MYOG: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/MYOG.hg38.10000.txt
+  NANOG: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NANOG.hg38.10000.txt
+  NEUROD1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NEUROD1.hg38.10000.txt
+  NEUROG2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NEUROG2.hg38.10000.txt
+  NFATC3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NFATC3.hg38.10000.txt
+  NFE2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NFE2.hg38.10000.txt
+  NFE2L2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NFE2L2.hg38.10000.txt
+  NFIA: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NFIA.hg38.10000.txt
+  NFIB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NFIB.hg38.10000.txt
+  NFIC: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NFIC.hg38.10000.txt
+  NFIL3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NFIL3.hg38.10000.txt
+  NFKB1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NFKB1.hg38.10000.txt
+  NFKB2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NFKB2.hg38.10000.txt
+  NFYA: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NFYA.hg38.10000.txt
+  NFYC: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NFYC.hg38.10000.txt
+  NKX2-1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NKX2-1.hg38.10000.txt
+  NKX3-1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NKX3-1.hg38.10000.txt
+  NR1H2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NR1H2.hg38.10000.txt
+  NR1H3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NR1H3.hg38.10000.txt
+  NR2C2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NR2C2.hg38.10000.txt
+  NR2F1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NR2F1.hg38.10000.txt
+  NR2F2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NR2F2.hg38.10000.txt
+  NR2F6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NR2F6.hg38.10000.txt
+  NR3C1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NR3C1.hg38.10000.txt
+  NR4A1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NR4A1.hg38.10000.txt
+  NR5A2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NR5A2.hg38.10000.txt
+  NRF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/NRF1.hg38.10000.txt
+  ONECUT2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ONECUT2.hg38.10000.txt
+  OSR2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/OSR2.hg38.10000.txt
+  OTX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/OTX2.hg38.10000.txt
+  OVOL2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/OVOL2.hg38.10000.txt
+  PAX3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PAX3.hg38.10000.txt
+  PAX5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PAX5.hg38.10000.txt
+  PBX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PBX1.hg38.10000.txt
+  PBX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PBX2.hg38.10000.txt
+  PBX3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PBX3.hg38.10000.txt
+  PBX4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PBX4.hg38.10000.txt
+  PDX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PDX1.hg38.10000.txt
+  PGR: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PGR.hg38.10000.txt
+  PHOX2B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PHOX2B.hg38.10000.txt
+  PITX3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PITX3.hg38.10000.txt
+  PKNOX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PKNOX1.hg38.10000.txt
+  PLAG1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PLAG1.hg38.10000.txt
+  POU2F1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/POU2F1.hg38.10000.txt
+  POU2F2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/POU2F2.hg38.10000.txt
+  POU3F2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/POU3F2.hg38.10000.txt
+  POU5F1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/POU5F1.hg38.10000.txt
+  PPARD: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PPARD.hg38.10000.txt
+  PPARG: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PPARG.hg38.10000.txt
+  PRDM1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PRDM1.hg38.10000.txt
+  PRDM6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PRDM6.hg38.10000.txt
+  PRDM9: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PRDM9.hg38.10000.txt
+  PROX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/PROX1.hg38.10000.txt
+  RARA: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RARA.hg38.10000.txt
+  RARG: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RARG.hg38.10000.txt
+  RBAK: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RBAK.hg38.10000.txt
+  RBPJ: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RBPJ.hg38.10000.txt
+  RELA: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RELA.hg38.10000.txt
+  RELB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RELB.hg38.10000.txt
+  REST: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/REST.hg38.10000.txt
+  RFX5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RFX5.hg38.10000.txt
+  RUNX1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RUNX1.hg38.10000.txt
+  RUNX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RUNX2.hg38.10000.txt
+  RUNX3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RUNX3.hg38.10000.txt
+  RXRA: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RXRA.hg38.10000.txt
+  RXRB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/RXRB.hg38.10000.txt
+  SCRT1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SCRT1.hg38.10000.txt
+  SCRT2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SCRT2.hg38.10000.txt
+  SETDB1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SETDB1.hg38.10000.txt
+  SIX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SIX2.hg38.10000.txt
+  SIX5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SIX5.hg38.10000.txt
+  SMAD1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SMAD1.hg38.10000.txt
+  SMAD3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SMAD3.hg38.10000.txt
+  SMAD4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SMAD4.hg38.10000.txt
+  SMAD5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SMAD5.hg38.10000.txt
+  SNAI2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SNAI2.hg38.10000.txt
+  SOX13: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SOX13.hg38.10000.txt
+  SOX17: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SOX17.hg38.10000.txt
+  SOX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SOX2.hg38.10000.txt
+  SOX4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SOX4.hg38.10000.txt
+  SOX5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SOX5.hg38.10000.txt
+  SOX9: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SOX9.hg38.10000.txt
+  SP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SP1.hg38.10000.txt
+  SP2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SP2.hg38.10000.txt
+  SP4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SP4.hg38.10000.txt
+  SP5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SP5.hg38.10000.txt
+  SPDEF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SPDEF.hg38.10000.txt
+  SPI1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SPI1.hg38.10000.txt
+  SPIB: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SPIB.hg38.10000.txt
+  SREBF2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SREBF2.hg38.10000.txt
+  SRF: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/SRF.hg38.10000.txt
+  STAT1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/STAT1.hg38.10000.txt
+  STAT3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/STAT3.hg38.10000.txt
+  STAT4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/STAT4.hg38.10000.txt
+  STAT5A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/STAT5A.hg38.10000.txt
+  STAT5B: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/STAT5B.hg38.10000.txt
+  STAT6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/STAT6.hg38.10000.txt
+  T: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/T.hg38.10000.txt
+  TAL1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TAL1.hg38.10000.txt
+  TBP: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TBP.hg38.10000.txt
+  TBX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TBX2.hg38.10000.txt
+  TBX21: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TBX21.hg38.10000.txt
+  TBX5: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TBX5.hg38.10000.txt
+  TCF12: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TCF12.hg38.10000.txt
+  TCF3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TCF3.hg38.10000.txt
+  TCF4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TCF4.hg38.10000.txt
+  TCF7: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TCF7.hg38.10000.txt
+  TCF7L1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TCF7L1.hg38.10000.txt
+  TCF7L2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TCF7L2.hg38.10000.txt
+  TEAD1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TEAD1.hg38.10000.txt
+  TEAD3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TEAD3.hg38.10000.txt
+  TEAD4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TEAD4.hg38.10000.txt
+  TFAP2A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TFAP2A.hg38.10000.txt
+  TFAP2C: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TFAP2C.hg38.10000.txt
+  TFAP4: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TFAP4.hg38.10000.txt
+  TFE3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TFE3.hg38.10000.txt
+  THAP11: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/THAP11.hg38.10000.txt
+  TP53: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TP53.hg38.10000.txt
+  TP63: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TP63.hg38.10000.txt
+  TP73: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TP73.hg38.10000.txt
+  TRPS1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TRPS1.hg38.10000.txt
+  TWIST1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/TWIST1.hg38.10000.txt
+  USF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/USF1.hg38.10000.txt
+  USF2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/USF2.hg38.10000.txt
+  VDR: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/VDR.hg38.10000.txt
+  VEZF1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/VEZF1.hg38.10000.txt
+  WT1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/WT1.hg38.10000.txt
+  XBP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/XBP1.hg38.10000.txt
+  YY1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/YY1.hg38.10000.txt
+  ZBTB11: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZBTB11.hg38.10000.txt
+  ZBTB14: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZBTB14.hg38.10000.txt
+  ZBTB16: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZBTB16.hg38.10000.txt
+  ZBTB17: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZBTB17.hg38.10000.txt
+  ZBTB2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZBTB2.hg38.10000.txt
+  ZBTB26: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZBTB26.hg38.10000.txt
+  ZBTB33: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZBTB33.hg38.10000.txt
+  ZBTB42: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZBTB42.hg38.10000.txt
+  ZBTB48: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZBTB48.hg38.10000.txt
+  ZBTB6: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZBTB6.hg38.10000.txt
+  ZBTB7A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZBTB7A.hg38.10000.txt
+  ZEB1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZEB1.hg38.10000.txt
+  ZEB2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZEB2.hg38.10000.txt
+  ZFHX2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZFHX2.hg38.10000.txt
+  ZFP1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZFP1.hg38.10000.txt
+  ZFP64: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZFP64.hg38.10000.txt
+  ZFX: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZFX.hg38.10000.txt
+  ZIC2: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZIC2.hg38.10000.txt
+  ZIM3: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZIM3.hg38.10000.txt
+  ZKSCAN1: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZKSCAN1.hg38.10000.txt
+  ZNF121: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF121.hg38.10000.txt
+  ZNF143: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF143.hg38.10000.txt
+  ZNF146: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF146.hg38.10000.txt
+  ZNF18: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF18.hg38.10000.txt
+  ZNF217: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF217.hg38.10000.txt
+  ZNF22: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF22.hg38.10000.txt
+  ZNF24: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF24.hg38.10000.txt
+  ZNF263: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF263.hg38.10000.txt
+  ZNF264: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF264.hg38.10000.txt
+  ZNF274: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF274.hg38.10000.txt
+  ZNF280A: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF280A.hg38.10000.txt
+  ZNF316: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF316.hg38.10000.txt
+  ZNF317: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF317.hg38.10000.txt
+  ZNF335: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF335.hg38.10000.txt
+  ZNF341: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF341.hg38.10000.txt
+  ZNF35: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF35.hg38.10000.txt
+  ZNF382: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF382.hg38.10000.txt
+  ZNF384: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF384.hg38.10000.txt
+  ZNF449: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF449.hg38.10000.txt
+  ZNF467: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF467.hg38.10000.txt
+  ZNF486: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF486.hg38.10000.txt
+  ZNF554: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF554.hg38.10000.txt
+  ZNF574: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF574.hg38.10000.txt
+  ZNF580: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF580.hg38.10000.txt
+  ZNF652: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF652.hg38.10000.txt
+  ZNF770: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF770.hg38.10000.txt
+  ZNF792: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZNF792.hg38.10000.txt
+  ZSCAN16: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZSCAN16.hg38.10000.txt
+  ZSCAN22: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZSCAN22.hg38.10000.txt
+  ZSCAN9: /cluster/projects/pughlab/bin/Griffin/sites/TFBS/ZSCAN9.hg38.10000.txt
+  stop: /cluster/projects/pughlab/bin/Griffin/stop.txt
diff --git a/griffin/site_configs/TP53_sites.yaml b/griffin/site_configs/TP53_sites.yaml
new file mode 100755
index 0000000..1b2886b
--- /dev/null
+++ b/griffin/site_configs/TP53_sites.yaml
@@ -0,0 +1,12 @@
+site_files:
+  TP53_Nguyen: /cluster/projects/pughlab/bin/Griffin/sites/TP53/TP53_Nguyen_hg38.txt
+  TP53_Hafner: /cluster/projects/pughlab/bin/Griffin/sites/TP53/TP53_Hafner_hg38.txt
+  TP53_shared: /cluster/projects/pughlab/bin/Griffin/sites/TP53/TP53_shared_hg38.txt
+  TP53_control: /cluster/projects/pughlab/bin/Griffin/sites/TP53/TP53_control_hg38.txt
+  TP53_targets_exonSpliceAcceptor: /cluster/projects/pughlab/bin/Griffin/sites/TP53/tp53_exonSpliceAcceptor_hg38.high_mapability.txt
+  TP53_targets_exonSpliceDonor: /cluster/projects/pughlab/bin/Griffin/sites/TP53/tp53_exonSpliceDonor_hg38.high_mapability.txt
+  TP53_targets_StartCodon: /cluster/projects/pughlab/bin/Griffin/sites/TP53/tp53_StartCodon_hg38.high_mapability.txt
+  TP53_targets_StopCodon: /cluster/projects/pughlab/bin/Griffin/sites/TP53/tp53_StopCodon_hg38.high_mapability.txt
+  TP53_targets_TSS: /cluster/projects/pughlab/bin/Griffin/sites/TP53/tp53_TSS_hg38.high_mapability.txt
+  TP53_targets_TSE: /cluster/projects/pughlab/bin/Griffin/sites/TP53/tp53_TSE_hg38.high_mapability.txt
+  stop: /cluster/projects/pughlab/bin/Griffin/stop.txt
diff --git a/griffin/site_configs/hematopoietic_sites.yaml b/griffin/site_configs/hematopoietic_sites.yaml
new file mode 100755
index 0000000..4967d75
--- /dev/null
+++ b/griffin/site_configs/hematopoietic_sites.yaml
@@ -0,0 +1,3 @@
+site_files:
+  hematopoietic: /cluster/projects/pughlab/bin/Griffin/sites/hematopoietic/Hematopoietic_hg38.txt
+  stop: /cluster/projects/pughlab/bin/Griffin/sites/stop.high_mapability.txt
diff --git a/griffin/site_configs/housekeeping_sites.yaml b/griffin/site_configs/housekeeping_sites.yaml
new file mode 100755
index 0000000..d443cd9
--- /dev/null
+++ b/griffin/site_configs/housekeeping_sites.yaml
@@ -0,0 +1,8 @@
+site_files:
+  housekeeping_exonSpliceAcceptor: /cluster/projects/pughlab/bin/Griffin/sites/housekeeping/housekeeping_exonSpliceAcceptor_hg38.high_mapability.txt
+  housekeeping_exonSpliceDonor: /cluster/projects/pughlab/bin/Griffin/sites/housekeeping/housekeeping_exonSpliceDonor_hg38.high_mapability.txt
+  housekeeping_StartCodon: /cluster/projects/pughlab/bin/Griffin/sites/housekeeping/housekeeping_StartCodon_hg38.high_mapability.txt
+  housekeeping_StopCodon: /cluster/projects/pughlab/bin/Griffin/sites/housekeeping/housekeeping_StopCodon_hg38.high_mapability.txt
+  housekeeping_TSE: /cluster/projects/pughlab/bin/Griffin/sites/housekeeping/housekeeping_TSE_hg38.high_mapability.txt
+  housekeeping_TSS: /cluster/projects/pughlab/bin/Griffin/sites/housekeeping/housekeeping_TSS_hg38.high_mapability.txt
+  stop: /cluster/projects/pughlab/bin/Griffin/sites/stop.high_mapability.txt
diff --git a/griffin/site_configs/maneTSS_sites.yaml b/griffin/site_configs/maneTSS_sites.yaml
new file mode 100755
index 0000000..5ae4f8e
--- /dev/null
+++ b/griffin/site_configs/maneTSS_sites.yaml
@@ -0,0 +1,2 @@
+site_files:
+  TSS: /cluster/projects/pughlab/bin/Griffin/sites/maneTSS/maneTSS_hg38.txt
diff --git a/griffin/site_configs/uveal_sites.yaml b/griffin/site_configs/uveal_sites.yaml
new file mode 100755
index 0000000..d0327ef
--- /dev/null
+++ b/griffin/site_configs/uveal_sites.yaml
@@ -0,0 +1,6 @@
+site_files:
+  Eye: /cluster/projects/pughlab/bin/Griffin/sites/uveal/eye_dnase_merged.high_mapability.txt
+  Liver_atac: /cluster/projects/pughlab/bin/Griffin/sites/uveal/liver_atac_merged.high_mapability.txt
+  Liver_dnase: /cluster/projects/pughlab/bin/Griffin/sites/uveal/liver_dnase_merged.high_mapability.txt
+  Liver_hg38: /cluster/projects/pughlab/bin/Griffin/sites/uveal/liver_hg38_merged.high_mapability.txt
+  stop: /cluster/projects/pughlab/bin/Griffin/sites/uveal/stop.high_mapability.txt