-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsegmentation.cu
191 lines (153 loc) · 6.47 KB
/
segmentation.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#include "segmentation.h"
#define THREADS 16
#define thresholdVar 2
__global__ void histogramSum(int* g_idata, int* g_odata) {
extern __shared__ int temp[];
int tid = threadIdx.x;
temp[tid] = g_idata[tid + blockIdx.x * blockDim.x];
for (int d = blockDim.x >> 1; d > 0; d >>= 1) {
__syncthreads();
if (tid < d) temp[tid] += temp[tid + d];
}
if (tid == 0) {
//printf("temp[0] %d\n", temp[0]);
g_odata[blockIdx.x] = temp[0];
}
}
void reduction_gold(int* odata, int* idata, int len)
{
*odata = 0;
for (int i = 0; i < len; i++) *odata += idata[i];
}
__global__ void greyscale(unsigned char* inImg, int* outImg, int width, int height, int channels) {
// IMPLEMENTS ALGORITHM FOR 3 CHANNEL GREYSCALE IMAGE
//int x = threadIdx.x + blockIdx.x * blockDim.x;
//int y = threadIdx.y + blockIdx.y * blockDim.y;
//if (x < width && y < height) {
// int grayOffset = y * width + x;
// int rgbOffset = grayOffset * channels;
// unsigned char r = originalImg[rgbOffset];
// unsigned char g = originalImg[rgbOffset + 1];
// unsigned char b = originalImg[rgbOffset + 2];
// int offset = (r + g + b) / channels;
// for (int i = 0; i < channels; i++) {
// greyImg[rgbOffset + i] = offset;
// }
//}
// IMPLEMENTS ALGORITHM FOR 1 CHANNEL GREYSCALE IMAGE
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
if (x < width && y < height) {
int grayOffset = y * width + x;
int rgbOffset = grayOffset * channels;
unsigned char r = inImg[rgbOffset];
unsigned char g = inImg[rgbOffset + 1];
unsigned char b = inImg[rgbOffset + 2];
outImg[grayOffset] = (int)(r + g + b) / 3;
//printf("gray offset %d \n", outImg[grayOffset]);
}
}
__device__ void calculateThresholdValues(int mean, int* thresholdValue) {
int step = mean / thresholdVar;
int currentValue = 0;
for (int i = 0; i < thresholdVar; i++) {
currentValue += step;
*(thresholdValue + i) = currentValue;
//thresholdValue[i] = currentValue;
}
}
__device__ void calculateGreyValues(int* greyValue) {
int maxValue = 255;
int step = maxValue / thresholdVar;
int currentValue = 0;
for (int i = 0; i < thresholdVar; i++) {
currentValue += step;
*(greyValue + i) = currentValue;
}
}
__global__ void threshold(int* inImg, unsigned char* outImg, unsigned int width, unsigned int height, int channels, int mean, int variable = 0) {
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int pixel = y * width + x;
int value = 0;
int thresholdValues[thresholdVar];
int greyValues[thresholdVar];
int* thresholdVariable = &thresholdValues[0];
int* greys = &greyValues[0];
if (variable) {
calculateThresholdValues(mean, thresholdVariable);
calculateGreyValues(greyValues);
}
if (x < width && y < height) {
if (inImg[pixel] > mean) value = 255;
if (variable) {
if (inImg[pixel] < mean && inImg[pixel] > * (thresholdVariable + 1)) value = *(greys + variable);
if (inImg[pixel] < *(thresholdVariable + 1) && inImg[pixel] > * (thresholdVariable + 0)) value = *(greys + variable - 1);
}
outImg[pixel * channels] = value;
outImg[pixel * channels + 1] = value;
outImg[pixel * channels + 2] = value;
}
}
void convertToGreyscale(unsigned char* inImg, int* outImg, int width, int height, int channels)
{
dim3 dimGrid = dim3((width / THREADS) + 1, (height / THREADS) + 1, 1);
dim3 dimBlock = dim3(THREADS, THREADS, 1);
unsigned char* d_originalImg = NULL;
int* d_greyImg = NULL;
int size = width * height;
cudaMalloc((void**)&d_originalImg, size * channels * sizeof(unsigned char));
cudaMalloc((void**)&d_greyImg, size * sizeof(int));
cudaMemcpy(d_originalImg, inImg, size * channels, cudaMemcpyHostToDevice);
greyscale << <dimGrid, dimBlock >> > (d_originalImg, d_greyImg, width, height, channels);
cudaMemcpy(outImg, d_greyImg, size * sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(d_originalImg);
cudaFree(d_greyImg);
}
void thresholdFilter(int* inImg, unsigned char* outImg, int width, int height, int channels, int variableThreshold = 0)
{
dim3 dimGrid = dim3((width / THREADS) + 1, (height / THREADS) + 1, 1);
dim3 dimBlock = dim3(THREADS, THREADS, 1);
int size = width * height;
// CALCULATES HISTOGRAM MEAN VALUE
int* host_sum = (int*)malloc(sizeof(int) * size);
int* d_idata;
int* d_odata;
int sharedMemSize = sizeof(int) * THREADS;
int sumResult = 0;
int mean;
// Using histogram sum on host
int host_calculated;
reduction_gold(&host_calculated, inImg, size);
//int* dev_lastBlockCounter;
//cudaMalloc((void**)&dev_lastBlockCounter, sizeof(int));
//cudaMemset(dev_lastBlockCounter, 0, sizeof(int));
cudaMalloc((void**)&d_idata, size * sizeof(int));
cudaMalloc((void**)&d_odata, size * sizeof(int));
cudaMemcpy(d_idata, inImg, size * sizeof(int), cudaMemcpyHostToDevice);
////sumCommMultiBlock << <gridSize, blockSize >> > (d_idata, size, d_odata, dev_lastBlockCounter);
histogramSum << < 1, size, sharedMemSize >> > (d_idata, d_odata);
cudaMemcpy(host_sum, d_odata, size * sizeof(int), cudaMemcpyDeviceToHost);
//reduction_gold(&sumResult, host_sum, size/THREADS);
////cudaMemcpy(&d_mean, d_odata, sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(d_odata);
cudaFree(d_idata);
int using_cuda_mean = (int)host_sum / size;
mean = (int)host_calculated / size;
//printf(" calucalted by device host_sum %d size % d mean %d\n", host_sum[0], size, using_cuda_mean);
//printf(" calucalted by host \host_calculated %d size % d mean %d\n", host_calculated, size, mean);
int* d_grey = NULL;
unsigned char* d_threshold = NULL;
cudaMalloc((void**)&d_grey, size * sizeof(int));
cudaMalloc((void**)&d_threshold, size * channels);
cudaMemcpy(d_grey, inImg, size * sizeof(int), cudaMemcpyHostToDevice);
if (variableThreshold) {
threshold << <dimGrid, dimBlock >> > (d_grey, d_threshold, width, height, channels, mean, 1);
}
else {
threshold << <dimGrid, dimBlock >> > (d_grey, d_threshold, width, height, channels, mean);
}
cudaMemcpy(outImg, d_threshold, size * channels, cudaMemcpyDeviceToHost);
cudaFree(d_grey);
cudaFree(d_threshold);
}