-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
87 lines (73 loc) · 2.84 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# main
from flask import Flask, render_template, request
import time
import os
import cred
import pandas as pd
app = Flask(__name__)
import urllib.request
from data_extraction import extract_data
from bs4 import BeautifulSoup
from analytics.eda import main as eda_main
import analytics.preProcessing as pP
import analytics.spark_ml as spark_ml
from analytics.classifiers import clf_main
from analytics.classifiers import pred_success
@app.route('/')
def student():
return render_template('home.html')
@app.route('/result',methods = ['POST', 'GET'])
def result():
if request.method == 'POST':
exe_time = {}
class_size = {}
result = request.form
df, channel_id = process_result(result["ch1"])
cred.channel_name=result.to_dict().get('ch1').split("/")[-1]
gt_viz, df1, df = generate_visualizations(df,channel_id)
ml_start_time = time.time()
ml_results = ml_classifiers(df,channel_id)
exe_time["pd_ml"]=(round(time.time()-ml_start_time,2))
spark_start_time = time.time()
ml_spark = spark_ml.spark_main(channel_id)
exe_time["spark_ml"]=(round(time.time()-spark_start_time,2))
class_size["low"] = len(df[df["like_count_1"]==0])
class_size["high"] = len(df[df["like_count_1"]==1])
hists = create_figUrl(channel_id)
return render_template('result.html',hlst=hists, ml=ml_results, extime=exe_time, mlspark=ml_spark, dshape=df.shape, csize=class_size,channel_name = cred.channel_name)
@app.route('/predict', methods=['GET', 'POST'])
def predict():
preds = ""
if request.method == 'POST':
result = request.form
text = result["title"] +" "+ result["desc"]
ctext = pP.clean(text)
pdf = pd.DataFrame(data={"text":[ctext]})
X = pP.fit_vectorizer(cred.vect,pdf["text"])
preds = pred_success(X.toarray(),cred.channel_id)
return render_template('predict.html', predn = preds, channel_name = cred.channel_name)
def parse(res):
page = urllib.request.urlopen(res)
html = BeautifulSoup(page.read(),"html.parser")
return html.find_all('meta',itemprop="channelId")[0].get('content')
def process_result(result):
channel_id = parse(result)
cred.channel_id = channel_id
pro_df = extract_data(channel_id)
return pro_df, channel_id
def generate_visualizations(df,channel_id):
df, df_1, tag, title, disc = eda_main(df,channel_id)
sucess = 1
return sucess, df, df_1
def ml_classifiers(df,channel_id):
X_train, X_test, y_train, y_test = pP.main(df)
Tmodels, Fmetrices = clf_main(X_train, X_test, y_train, y_test,channel_id)
return Fmetrices
def create_figUrl(channel_id):
hists = os.listdir('static/'+channel_id)
hists = [file for file in hists]
for i in range(len(hists)):
hists[i] = 'static/'+channel_id+"/"+hists[i]
return hists
if __name__ == '__main__':
app.run(debug = True, host="0.0.0.0", port=int("3080"))