-
Notifications
You must be signed in to change notification settings - Fork 0
/
globals.py
executable file
·68 lines (53 loc) · 2.05 KB
/
globals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from enum import Enum
import tweepy
import pytz
import re
from textblob.classifiers import NaiveBayesClassifier, DecisionTreeClassifier, NLTKClassifier
from textblob.utils import strip_punc
from datetime import datetime
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.linear_model import LogisticRegression
def clean_tweet(tweet):
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
def twitter_time_to_datetime(twitter_time):
return datetime.strptime(twitter_time,'%a %b %d %H:%M:%S +0000 %Y').replace(tzinfo=pytz.UTC)
class Classifications(Enum):
democrat = 1
republican = 2
third = 3
class MyClassifier(NLTKClassifier):
nltk_class = SklearnClassifier(LogisticRegression())
class Pclassifier:
def __init__(self, filename, wordset):
self.filename = filename
self.wordset = wordset
self.classifier = None
def normalize(self, t):
return strip_punc(t, all=True).lower()
def custom_extractor(self, document, train_set):
tokens = document.split()
tokens = [self.normalize(t) for t in tokens]
features = dict(((u'contains({0})'.format(word), (word in tokens))
for word in self.wordset))
return features
def train(self, train_set):
self.classifier = MyClassifier(train_set, self.custom_extractor)
def classify(self, text):
return self.classifier.classify(text)
def probs(self, text):
probs = {}
prob_dist = self.classifier.prob_classify(text)
for label in self.classifier.labels():
probs[str(label)] = prob_dist.prob(label)
return probs
def notable_features(self):
self.classifier.show_informative_features()
def test(self, test_set):
print(self.classifier.accuracy(test_set))
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)