This repository has been archived by the owner on Aug 2, 2024. It is now read-only.
forked from natgaertner/candidate_classifier
-
Notifications
You must be signed in to change notification settings - Fork 1
/
get_training.py
77 lines (56 loc) · 2.01 KB
/
get_training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from argparse import ArgumentParser
from csv import DictReader, DictWriter
def get_args():
parser = ArgumentParser(description='specify which type of social')
parser.add_argument('-t', '--twitter',
help='finds twitter links',
action='store_true')
parser.add_argument('-f', '--facebook',
help='find facebook links',
action='store_true')
parser.add_argument('-w', '--web',
help='find website',
action='store_true')
parser.add_argument('-y', '--youtube',
help='find youtube',
action='store_true')
return parser.parse_args()
def read_data(infile):
fields = []
data = []
with open(infile, 'r') as infile:
reader = DictReader(infile)
fields = reader.fieldnames
data = [row for row in reader]
return fields, data
def write_data(outfile, data, fields):
with open(outfile, 'w') as outfile:
writer = DictWriter(outfile, fieldnames=fields)
writer.writeheader()
for d in data:
writer.writerow(d)
def main():
args = get_args()
path = '/Users/jcolazzi/Dropbox/BIP Production/candidates/reports/'
infile = 'all_states.csv'
fields, data = read_data(path+infile)
if args.twitter:
social_field = 'Twitter Name'
outfile = 'twitter_training.csv'
elif args.facebook:
social_field = 'Facebook URL'
outfile = 'facebook_training.csv'
elif args.web:
social_field = 'Website'
outfile = 'website_training.csv'
elif args.youtube:
social_field = 'Youtube'
outfile = 'youtube_training.csv'
else:
social_field = ''
outfile = ''
has_social = [d for d in data if len(d[social_field]) > 0]
print 'NUMBER OF CANDIDATES WITH SOCIAL: {}'.format(len(has_social))
write_data(path+outfile, has_social, fields)
if __name__ == '__main__':
main()