-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_run.py
67 lines (54 loc) · 1.73 KB
/
test_run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
## Required Modules
import pandas as pd
import numpy as np
import Bio
import os
from Bio import Entrez, SeqIO
import math
import torch
from torch import nn
import h5py
import seaborn as sb
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
import time
import pickle
from tqdm import tqdm
if __name__ == '__main__':
trial_run = 10000
summarystat = '/Input/Multi_Specto/Random_Million_SNPs_Part3.txt'
args_model = '/Input/Multi_Specto/deepsea.beluga.pth'
features = '/Input/Multi_Specto/deepsea_beluga_2002_features.tsv.txt'
numsnps = 1000
randomset = True
## Filtering the top N snps
print("Loading random SNPs chromosome and position..")
ss = pd.read_csv(summarystat, sep='\t')
top_n_snps = ss
## Obtaining features
print("Loading feature info...")
features = pd.read_csv(features, sep = '\t')
features['feature_names'] = features['Cell type'] +'__'+ features['Assay']+'__'+ features['Assay type']
features_ids_dnase = [features['Assay type']=='DNase']
features_ids_tf = [features['Assay type']=='TF']
features_ids_histone = [features['Assay type']=='Histone']
feature_names = features['feature_names']
## Inputing the resources for Expect.py
inputsize = 2000
batchSize = 32
maxshift = 800
args_cuda = True
## Importing the DL Model
model = Beluga()
model.load_state_dict(torch.load(args_model))
model.eval()
print("Loading Fasta sequence ...")
fasta_available = True
fasta_whole_genome = SeqIO.to_dict(SeqIO.parse("Hg38/hg38.fa","fasta"))
import multiprocessing
import time
data_matrix_ind_snp_p = {}
args_cuda = True
print("Overall Check done. Ready for deployment!")