シンプルな二値化のサンプルを追加
[cuda.git] / binarize / binarize.cu
1 /*
2         Copyright (C) 2012  fmaj7b5.info
3
4         This program is free software: you can redistribute it and/or modify
5         it under the terms of the GNU General Public License as published by
6         the Free Software Foundation, either version 2 of the License, or
7         (at your option) any later version.
8
9         This program is distributed in the hope that it will be useful,
10         but WITHOUT ANY WARRANTY; without even the implied warranty of
11         MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12         GNU General Public License for more details.
13
14         You should have received a copy of the GNU General Public License
15         along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "stdafx.h"
19 #include <cuda.h>
20
21 #include "binarize.cuh"
22
23 using namespace FM7b5;
24
25 void
26 FM7b5::binarize_gpu(ImageGray& out, const ImageGray& in, const uint8_t thres)
27 {
28
29         if (in.width() != out.width() || in.height() != out.height()) {
30                 throw std::runtime_error("sizes of input and output images are diferent.");
31         }
32
33         const size_t width(in.width()), height(in.height()), bpp(in.bpp());
34
35         uint8_t* d_in(nullptr);
36         uint8_t* d_out(nullptr);
37         size_t in_pitch(0), out_pitch(0);
38
39         const size_t threads_per_dim(32);
40         dim3 threads_per_block(threads_per_dim, threads_per_dim);
41         dim3 blocks_per_grid((width + threads_per_block.x - 1)/ threads_per_block.x,
42                              (height + threads_per_block.y - 1)/ threads_per_block.y);
43
44         cudaError_t status;
45
46         // allocate input/output memories
47         status = cudaMallocPitch(&d_in, &in_pitch, width * bpp, height);
48         if (status != cudaSuccess) {
49                 goto on_error_in;
50         }
51
52         status = cudaMallocPitch(&d_out, &out_pitch, width * bpp, height);
53         if (status != cudaSuccess) {
54                 goto on_error_out;
55         }
56
57         // copy an input image to device memory
58         status = cudaMemcpy2D(d_in, in_pitch, in.data(), in.stride(), bpp * width, height, cudaMemcpyHostToDevice);
59         if (status != cudaSuccess) {
60                 goto on_error_proc;
61         }
62
63         // launch kernel
64         binarize<<<blocks_per_grid, threads_per_block>>>(d_out, out_pitch, d_in, in_pitch, width, height, thres);
65
66         // copy the result back to host memory
67         status = cudaMemcpy2D(out.data(), out.stride(), d_out, out_pitch, bpp * width, height, cudaMemcpyDeviceToHost);
68         if (status != cudaSuccess) {
69                 goto on_error_proc;
70         }
71
72         // free device memories
73         cudaFree(d_out);
74         cudaFree(d_in);
75
76         return;
77
78         // error handling
79 on_error_proc:
80         cudaFree(d_out);
81 on_error_out:
82         cudaFree(d_in);
83 on_error_in:
84                 throw std::runtime_error(cudaGetErrorString(status));
85 }
86
87 __global__
88 void
89 FM7b5::binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres)
90 {
91         const size_t w(blockDim.x * blockIdx.x + threadIdx.x);
92         const size_t h(blockDim.y * blockIdx.y + threadIdx.y);
93
94         if (w >= width || h >= height) {
95                 return;
96         }
97
98         out[out_pitch * h + w] = (in[in_pitch * h + w] < thres) ? 0 : 255;
99 }