-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsentiment_data_collector.py
40 lines (32 loc) · 1.13 KB
/
sentiment_data_collector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
"""
An extremely simple script to help with collection and labelling of sentiment
training data. Simply adjust the parameters and run to quickly collect & label
data without having to do any of the writing yourself.
NOTE: please don't ever "commit" this file unless you've actually improved it
"""
from tweet_snagger import TweetSnagger
import json
#################################
TOPIC = "Mudryk" # Change
INTENT = "trade" # these
NUM_TWEETS = 10 # things
#################################
FILENAME = "sentiment-analysis/dataset.json"
MODES = {
"append": "a",
"write": "w+",
"read": "r"
}
ts = TweetSnagger()
tweets = ts.snag_tweets([TOPIC], intent=INTENT, num_tweets=NUM_TWEETS)
with open(FILENAME, mode=MODES["append"]) as fptr:
for tweet in tweets:
print(tweet["content"])
label = input("label for this tweet: ")
while 0 < int(label) > 2:
label = input("try again, label for this tweet: ")
obj = {
"text": tweet["content"],
"labels": int(label)
}
fptr.write(json.dumps(obj) + '\n')