-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSplit_Questions_Answers.py
133 lines (104 loc) · 4.35 KB
/
Split_Questions_Answers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# coding: utf-8
# In[89]:
from collections import Counter
import scipy.stats as stats
import pandas as pd
import os
import skimage.io as io
import matplotlib.pyplot as plt
import csv
import errno
import sys
import random
#reload(sys)
#sys.setdefaultencoding('utf8')
def chunks(listItems, groupSize):
#Generator function to yield chuncks of 4 from the list
for i in range(0, len(listItems), groupSize):
yield listItems[i:i + groupSize]
def produceLabel(answerList):
entropy = stats.entropy(createFreqDistribution(answerList))
return entropy
def createFreqDistribution(answerList):
#print "before>>>"
#print answerList
#print "<<<<<<<<after>>>>>>"
answerList=[ans.lower() for ans in answerList]
#print answerList
uniqueAnswers = Counter(answerList).keys()
answersFrequencies = Counter(answerList).values()
frequencyVector = []
numSamples = 10
for count in answersFrequencies:
frequency = float(count)/float(numSamples)
frequencyVector.append(frequency)
numZeros = numSamples - len(answersFrequencies)
for i in range(numZeros):
frequencyVector.append(0)
return frequencyVector
# In[90]:
def search_dictionary(key, value, list_of_dictionaries):
return [element for element in list_of_dictionaries if element[key] == value]
# User-configurable parameter
baseDir = "D:/Study !/CPU-Vision/project/test/VQACrowdSourcing/data/"
imgDirPath = "https://www.cs.utexas.edu/~dgurari/Projects/vqAnswerCollection/" + "Images/"
vqasPath = baseDir + "Answers/answers.xls"
destPath = baseDir + "Res-Test/"
df = pd.read_excel(open(vqasPath,'rb'), sheetname='answers')
allHITResults = df['hitResult'].values
questionList = []
answersList = {}
entropyMoreHalf=[]
entropyBetweenHalfandThreeHalf=[]
entropyMoreThanThreeHalf=[]
checkCount=0
with open(destPath+'../testInput.input', 'wb') as csvfile:
VQAWriter = csv.writer(csvfile,delimiter='\t', quoting=csv.QUOTE_MINIMAL)
VQAWriter.writerow(["i1", "i2", "i3","i4"])
for hit in allHITResults:
processedhit = hit.replace("[", "")
processedhit = processedhit.replace("]", "")
processedhit = processedhit.replace("\"", "")
processedhit = processedhit.replace("{", "")
vqas = processedhit.split('}')
for vqa in vqas: # ignore last entry which will be blank
if (len(vqa) == 0):
continue
if (vqa[0] == ','):
vqa = vqa[1:len(vqa)]
imgIndex = vqa.find("imgID:")
questionIndex = vqa.find("question:")
answerIndex = vqa.find("answer:")
answerConfIndex = vqa.find("ansConf:")
imageName = vqa[imgIndex+6:questionIndex-1]
question = vqa[questionIndex+9:answerIndex-1]
answer = vqa[answerIndex+7:answerConfIndex-1]
# answerConf = vqaPieces[3].replace("ansConf:","")
dict = {'imageName': imageName, 'question': question}
questionList.append(dict)
answersList.setdefault(imageName, []).append(answer)
imgPath = imgDirPath + imageName
imgText = question + '\n'
curAnswers = answersList.get(imageName)
answers = '#'.join(map(str, curAnswers))
if (len(curAnswers) == 10):
entropyVal=produceLabel(curAnswers)
if(entropyVal>1.5):
entropyMoreThanThreeHalf.append(imageName+"|"+question+"|"+answers+"|"+str(3)+"|"+"V")
elif(entropyVal>1.0 and entropyVal<1.5):
entropyBetweenHalfandThreeHalf.append(imageName+"|"+question+"|"+answers+"|"+str(2)+"|"+"V")
elif(entropyVal>0.5):
entropyMoreHalf.append(imageName+"|"+question+"|"+answers+"|"+str(1)+"|"+"V")
else:
checkCount+=1
entropyMoreThanThreeHalf=random.sample(entropyMoreThanThreeHalf,16)
entropyBetweenHalfandThreeHalf=random.sample(entropyBetweenHalfandThreeHalf,16)
entropyMoreHalf=random.sample(entropyMoreHalf,16)
for element in chunks(entropyMoreThanThreeHalf,4):
VQAWriter.writerow(element)
for element in chunks(entropyBetweenHalfandThreeHalf,4):
VQAWriter.writerow(element)
for element in chunks(entropyMoreHalf,4):
VQAWriter.writerow(element)
# In[60]:
# In[ ]: