-
Notifications
You must be signed in to change notification settings - Fork 0
/
runscript_Estar.py
86 lines (59 loc) · 2.69 KB
/
runscript_Estar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from datetime import datetime
import split_csv
import table2VecModel
import makeVectorIndex
import bm25Model
import scoreQuery
# file_name = input("Enter the dataset name : ")
#file_name = "sp_table2VecH1.csv"
#split_filename = "table2VecH.csv"
#model_name = "model_table2VecH"
#index_files = "table2VecH.idx"
#query_file_name = "queries.txt"
#ranked_file = "bm25_table2VecH.idx"
#output_rank_file = "table2VecH.out"
file_name = "sp_table2VecE_star.csv"
split_filename = "table2VecE_star.csv"
model_name = "model_table2VecE_star"
index_files = "table2VecE_star.idx"
query_file_name = "queries.txt"
ranked_file = "bm25_table2VecE_star.idx"
output_rank_file = "table2VecE_star.out"
print("Path of the file is : /scratch/cse/phd/anz208486/col873_project/NLP_project/Dataset/"+file_name)
print("Output folder path : /scratch/cse/phd/anz208486/col873_project/NLP_project/output")
output_folder = "/scratch/cse/phd/anz208486/col873_project/NLP_project/output"
input_folder = "/scratch/cse/phd/anz208486/col873_project/NLP_project/Dataset/"
# split_filename = input("Enter split file name : ")
# Spliting of the dataset.
now = datetime.now()
print("Spliting the file started at:",now.strftime("%H:%M:%S"))
train_dataset,test_dataset = split_csv.split_fun(input_folder, output_folder, file_name, split_filename)
now = datetime.now()
print("Spliting the file completed at:",now.strftime("%H:%M:%S"))
print("Location of train dataset :",train_dataset)
print("Location of test dataset :",test_dataset)
# Creating the model
now = datetime.now()
print("Model creation started at:",now.strftime("%H:%M:%S"))
model_file = table2VecModel.Tab2Vec_train(model_name,train_dataset,output_folder)
now = datetime.now()
print("Model creation completed at:",now.strftime("%H:%M:%S"))
print("Location of Model file :",model_file)
now = datetime.now()
print("Vectorization started at:",now.strftime("%H:%M:%S"))
vector_file = makeVectorIndex.indexing(index_files,input_folder,file_name,model_name,output_folder )
now = datetime.now()
print("Vectorization completed at:",now.strftime("%H:%M:%S"))
print("Location of vector :",vector_file)
now = datetime.now()
print("bm25 started at:",now.strftime("%H:%M:%S"))
bm25_file = bm25Model.bm25(input_folder,query_file_name, file_name, ranked_file, output_folder )
now = datetime.now()
print("bm25 completed at:",now.strftime("%H:%M:%S"))
print("BM25 file location :",bm25_file)
now = datetime.now()
print("ranking started at:",now.strftime("%H:%M:%S"))
rank_file = scoreQuery.score_query(input_folder,output_folder,query_file_name,index_files,ranked_file,model_name,output_rank_file)
now = datetime.now()
print("ranking completed at:",now.strftime("%H:%M:%S"))
print("Ranking file location :",rank_file)