-
Notifications
You must be signed in to change notification settings - Fork 1
/
sava_classifier.py
66 lines (50 loc) · 1.81 KB
/
sava_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 25 14:19:02 2019
@author: jeetu
"""
import nltk
import random
from nltk.corpus import movie_reviews
import pickle
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
#from sklearn.svm import SVC, LinearSVC, NuSVC
#from sklearn.exceptions import NotFittedError
documents = [(list(movie_reviews.words(fileid)), category)
for category in movie_reviews.categories()
for fileid in movie_reviews.fileids(category)]
#this is similar to this method
'''
documents=[]
for category in movie_reviews.categories():
for fileid in movie_reviews.fileids(category):
documents.append(list(movie_reviews.word(fileid)),category)
'''
random.shuffle(documents)
#print (documents)
all_words=[]
for w in movie_reviews.words():
all_words.append(w.lower())
all_words=nltk.FreqDist(all_words)
word_features=list (all_words.keys())[:3000]
def find_features(document):
words = set(document)
features ={}
for w in word_features:
features[w]=(w in words)
return features
#print((find_features(movie_reviews.words('neg/cv000_29416.txt'))))
featuresets = [(find_features(rev), category) for (rev, category) in documents]
training_set =featuresets[:1900]
testing_set = featuresets[1900:]
#classifier=nltk.NaiveBayesClassifier.train(training_set)
classifier_file=open("naivebayes.pickle","rb")
classifier=pickle.load(classifier_file)
classifier_file.close()
print ("Naive Bayes Algo accuracy in percentage:- ",(nltk.classify.accuracy(classifier,testing_set))*100)
classifier.show_most_informative_features(15)
save_classifier=open("naivebayes.pickle","wb")
pickle.dump(classifier,save_classifier)
save_classifier.close()