-
Notifications
You must be signed in to change notification settings - Fork 0
/
read_hist_svm_youtube.cpp
312 lines (275 loc) · 10.6 KB
/
read_hist_svm_youtube.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
#include "youtubedata.h"
#include "readHist_SVM.h"
#include "read_reduce.h"
using namespace std;
/**
Adjust class weights to take into account the number of samples.
Cross validation for class weights
Also::
Splitting them randomly ! so read all training codewords together with the labels !
**/
// do 2 things::
// 1. combine all positibve and negative and then spilt them keeping their labels intact. ie, do not spilt positive and negative seperately
// 2. do N classification svm instead of 2 class classification
// can do it tonite ?? challenge !! - 4 hrs coding marathon
// managing class weights
// changing the number of train and test !
// function which reads the histogram and creates svm of either individual or combined of 2 or more features;
//void readHist_svm_youtube()
//{
// cout<<"Read histogram and combine features";
// for youtube data, i have histograms video wise..
/**
read all codewords of positive category. -- >
need to split them into N sets > per video ? randomly in terms of shots ?
have 18 videos. split into 6 sets ? with 3 videos in each set ?
train on 5, val on 1.. for N times..
for negative examples..
cat 5
cow 6
bird 9
horse 6
into 6 sets >>
go to the first read video of bird --> select one number from 1-N.. assign it to that set
if all finish, start from begining ..
reading in histogram and dividing them into N sets simultaneously !
how about the combination codewords ?
first combine and then split them ! so a sperate function can be written to combine them as and when they are being assigned to a number of N !
**/
void readHist_getSVM_youtube()
{
vector<string> poscat;
posCatNames(poscat);
int N = 3;
vector<string> negcat;
negCatNames(negcat);
vector<string> allconfigs;
getallconfig(allconfigs);
cout<<"size of all config is "<<allconfigs.size()<<endl;
for(int i=0; i<allconfigs.size(); i++)
{
vector<string> feature;
getparts(allconfigs[i],feature);
stringstream svmfilenameis;
for(int f=0; f<feature.size(); f++)
{
svmfilenameis<<feature[f]<<"_";
}
svmfilenameis<<poscat[0]<<"Pos";
vector<cv::Mat> codewordsP(N);
vector<cv::Mat> labelsP(N);
readPositiveCategoryHistogram_mulFeatures(N,poscat[0],feature,codewordsP);
int totalP = 0;
// fill the labels vector
for(int i=0; i<N; i++)
{
int row = codewordsP[i].rows;
cout<<"In index "<<i<<", The number of histograms are "<<row<<endl;
totalP = totalP + row;
labelsP[i] = cvCreateMat(row,1,CV_32FC1);
for(int j=0; j<row; j++)
{
labelsP[i].at<float>(j,0) = 1;
}
}
cout<<"Total positive codewords is "<<totalP<<endl;
cout<<"The codeword dimesion is "<<codewordsP[0].cols<<endl;
vector<cv::Mat> codewordsN(N);
vector<cv::Mat> labelsN(N);
readNegativeCategoryHistogram_mulFeatures(N, negcat,feature,codewordsN);
int totalN = 0;
for(int i=0; i<N; i++)
{
int row = codewordsN[i].rows;
cout<<"In index "<<i<<", The number of histograms are "<<row<<endl;
totalN = totalN + row;
labelsN[i] = cvCreateMat(row,1,CV_32FC1);
for(int j=0; j<row; j++)
{
labelsN[i].at<float>(j,0) = -1;
}
}
cout<<"The total negative codewords are "<<totalN<<endl;
cout<<"The codeword dimesion is "<<codewordsN[0].cols<<endl;
cv::Mat alltraining = cvCreateMat(0,dictionarySize*feature.size(),CV_32FC1);
cv::Mat alllabels = cvCreateMat(0,1,CV_32FC1);
for(int i=0; i<N; i++)
{
alltraining.push_back(codewordsP[i]);
alllabels.push_back(labelsP[i]);
alltraining.push_back(codewordsN[i]);
alllabels.push_back(labelsN[i]);
}
cout<<"Total number of training examples are "<<alltraining.rows<<endl;
vector<float> accuracies;
float best_C_value;
svmCrossVal(alltraining,codewordsP,labelsP,codewordsN, labelsN, N, svmfilenameis.str(),accuracies, best_C_value);
createAndWriteSvm(alltraining,alllabels,svmfilenameis.str(),best_C_value, false);
}
}
// new version, one shot per file
void readHist_video_youtube(cv::Mat& histogram, string category, vector<string> featurename,int video,vector<int> shots)
{
int numShots = shots.size();
cv::Mat temphistogram = cvCreateMat(numShots,dictionarySize*featurename.size(),CV_32FC1);
for(int i=0; i<numShots; i++)
{
// for each shot ! read all the features !
for(int f=0; f<featurename.size(); f++)
{
// read this featurename shots
stringstream histfilename;
histfilename<<getRootDir()<<category<<"/histogram/"<<dictionarySize<<"/hist_"<<category<<"_"<<featurename[f]<<"_"<<video<<"_"<<shots[i];
ifstream hist(histfilename.str().c_str(),ios::in);
if(!hist.good())
{
cout<<"Cannot read hist file "<<histfilename.str()<<endl;
exit(0);
}
// add it to the big concatenated shot
for(int j = f*dictionarySize; j<(f+1)*dictionarySize; j++)
{
hist>>temphistogram.at<float>(i,j) ;
}
hist.close();
}
}
histogram.push_back(temphistogram);
}
// old version
void readHist_video_youtube(cv::Mat& histogram, int video,int numShots,string category, vector<string> featurename,vector<int> shots)
{
cv::Mat temphistogram = cvCreateMat(numShots,dictionarySize*featurename.size(),CV_32FC1);
float norm_factor = featurename.size();
for(int i=0; i<featurename.size(); i++)
{
stringstream histfilename;
histfilename<<getRootDir()<<category<<"/shothist_"<<category<<featurename[i]<<"_"<<video;
ifstream hist(histfilename.str().c_str(),ios::in);
// cout<<"Reading file "<<histfilename.str()<<endl;
if(!hist.good())
{
cout<<"Cannot read hist file "<<histfilename.str()<<endl;
exit(0);
}
int n =0; // number of shots stored
while(hist.good())
{
int shotnum;
hist>>shotnum;
// cout<<"The shot number read is "<<shotnum<<endl;
if(find(shots.begin(),shots.end(),shotnum) != shots.end())
{
// this is one of the shots !
// cout<<"This is one of the shots needed !"<<endl;
float tempvalue;
for(int j = i*dictionarySize; j<(i+1)*dictionarySize; j++)
{
hist>>tempvalue;
temphistogram.at<float>(n,j) = tempvalue / norm_factor;
}
n++;
}
else
{
// cout<<"This shot is not needed !"<<endl;
// just read off the value !
float notneeded;
for(int kk=0; kk<dictionarySize; kk++)
{
hist>>notneeded;
}
}
if(n == numShots)
{
// cout<<"Read all the shots needed !"<<endl;
break;
}
}
}
histogram.push_back(temphistogram);
return ;
}
void readNegativeCategoryHistogram_mulFeatures(int N, vector<string> category,vector<string> featurename,vector<cv::Mat>& codewordsN)
{
cout<<"Reading negative codewords"<<endl;
for(int i=0; i<N; i++)
{
codewordsN[i] = cvCreateMat(0,dictionarySize*featurename.size(),CV_32FC1);
}
int globalvideocount = 0;
for(int c=0; c<category.size(); c++)
{
int shotInCat = 0;
cout<<"In category "<<category[c]<<endl;
vector<int> videoindx;
getVideoIndx_youtubedata(videoindx,category[c],"train");
vector<int> V;
vector<int> S;
//vector<float> numFeat;
writingFeat_readVSFileName(V,S,category[c],"train");
for(int tv=0; tv<videoindx.size(); tv++)
{
cout<<"Inside for video "<<videoindx[tv]<<endl;
cout<<"This video goes to the index "<<globalvideocount % N<<endl;
int indxN = globalvideocount % N ;
globalvideocount ++;
int numShots = 0;
vector<int> shots;
for(int vd=0; vd<V.size(); vd++)
{
if(V[vd] == videoindx[tv])
{
// int shotforthisvideo = S[vd];
// cout<<"Shot : "<<shotforthisvideo<<" for Video: "<<videoindx[tv]<<endl;
shots.push_back(S[vd]);
numShots ++;
} // get codewords for this video !
}
shotInCat = shotInCat + numShots;
cout<<"Video: "<<videoindx[tv]<<". NumShots: "<<numShots<<endl;
// read features of each of the features and get a combined codeword !
readHist_video_youtube(codewordsN[indxN],videoindx[tv],numShots,category[c],featurename,shots);
}
cout<<"Completed category "<<category[c]<<" with shots "<<shotInCat<<endl;
}
}
void readPositiveCategoryHistogram_mulFeatures(int N, string category,vector<string> featurename,vector<cv::Mat>& codewordsP)
{
srand((unsigned)time(0));
// read the number of videos. .
cout<<"Reading positive codewords"<<endl;
for(int i=0; i<N; i++)
{
codewordsP[i] = cvCreateMat(0,dictionarySize*featurename.size(),CV_32FC1);
}
vector<int> videoindx;
getVideoIndx_youtubedata(videoindx,category,"train");
vector<int> V;
vector<int> S;
//vector<float> numFeat;
writingFeat_readVSFileName(V,S,category,"train");
for(int tv=0; tv<videoindx.size(); tv++)
{
cout<<"Inside for video "<<videoindx[tv]<<endl;
// int randomnum = rand() % N ;
int indxN = tv % N;
cout<<"This video goes to the index "<<indxN<<endl;
int numShots = 0;
vector<int> shots;
for(int vd=0; vd<V.size(); vd++)
{
if(V[vd] == videoindx[tv])
{
// int shotforthisvideo = S[vd];
// cout<<"Shot : "<<shotforthisvideo<<" for Video: "<<videoindx[tv]<<endl;
shots.push_back(S[vd]);
numShots ++;
}
// get codewords for this video !
}
cout<<"Video: "<<videoindx[tv]<<". NumShots: "<<numShots<<endl;
// read multipled feature codeword
readHist_video_youtube(codewordsP[indxN],videoindx[tv],numShots,category,featurename,shots);
}
}