-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmicroPasts.py
135 lines (89 loc) · 2.93 KB
/
microPasts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
##### Extract the completed projects from the micropast website####
#This code was developed for microPasts project by mkrzyzanska
#It downlaods all the completed projects from microPasts throught the Pybossa API
#Fragments of the code are based on: https://github.com/Scifabric/pybossa/issues/1260, accessed on 18.07.2017
#Set working directory
os.chdir('path_to_directory')
#Get the libraries:
import json
import csv
import os
import requests
import time
#Get the list of all the completed tasks
#Set up a list to append the extracted tasks to
tasks = []
#Extract and append first 100 tasks (100 is a maximum throught the API
res = requests.get('http://crowdsourced.micropasts.org/api/task?state=completed&limit=100')
data = res.json()
tasks.extend(data)
#Loop to extract all the remaining task
while(len(data)!=0):
print (len(tasks))
lid=tasks[len(tasks)-1]["id"]
res = requests.get('http://crowdsourced.micropasts.org/api/task?state=completed&limit=50&last_id='+str(lid))
data = res.json()
tasks.extend(data)
###If there are errors with getting the las id try:
lid=tasks[len(tasks)-1][0]["id"]
###Save the tasks as json
with open('tasks.json', 'w') as outfile:
json.dump(tasks, outfile)
###To load the tasks from json file:
with open('tasks.json') as data_file:
tasks = json.load(data_file)
# To write as csv:
taskList = open('tasks.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(taskList)
count = 0
i=0
j=len(tasks)
while(i<j):
if i == 0:
header = tasks[0].keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(tasks[i].values())
i=i+1
taskList.close()
###Extract all the taskruns:
t = []
i=0
j=len(tasks)
while(i<j):
print (i)
task_id=tasks[i]["id"]
data = [1,2]
offset=0
while (len(data)>0):
res = requests.get('http://crowdsourced.micropasts.org/api/taskrun?task_id='+str(task_id)+'&limit=1&offset='+str(offset))
if int(res.headers['X-RateLimit-Remaining']) < 10:
time.sleep(300) # Sleep for 5 minutes
else:
data = res.json()
t.extend(data)
offset=offset+1
i=i+1
###Save the task runs:
###Save the tasks as json
with open('tasksRuns.json', 'w') as outfile:
json.dump(t, outfile)
###To load the tasks from json file:
with open('tasksRuns.json') as data_file:
t = json.load(data_file)
# To write as csv:
taskRuns = open('taskRuns.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(taskRuns)
count = 0
i=0
j=len(t)
while(i<j):
if i == 0:
header = t[0].keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(t[i].values())
i=i+1
taskRuns.close()