X-Git-Url: http://www.fmaj7b5.info/git?p=cuda.git;a=blobdiff_plain;f=binarize%2Fbinarize.cu;fp=binarize%2Fbinarize.cu;h=4f5e44723ee59dc9b53e87b89c413de3e879406b;hp=0000000000000000000000000000000000000000;hb=7b77a912a4a1202f677ae9dbff672758e2b945e4;hpb=8b2c111ed060599c6bc93b48732d36f8aa48eb2b diff --git a/binarize/binarize.cu b/binarize/binarize.cu new file mode 100644 index 0000000..4f5e447 --- /dev/null +++ b/binarize/binarize.cu @@ -0,0 +1,99 @@ +/* + Copyright (C) 2012 fmaj7b5.info + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "stdafx.h" +#include + +#include "binarize.cuh" + +using namespace FM7b5; + +void +FM7b5::binarize_gpu(ImageGray& out, const ImageGray& in, const uint8_t thres) +{ + + if (in.width() != out.width() || in.height() != out.height()) { + throw std::runtime_error("sizes of input and output images are diferent."); + } + + const size_t width(in.width()), height(in.height()), bpp(in.bpp()); + + uint8_t* d_in(nullptr); + uint8_t* d_out(nullptr); + size_t in_pitch(0), out_pitch(0); + + const size_t threads_per_dim(32); + dim3 threads_per_block(threads_per_dim, threads_per_dim); + dim3 blocks_per_grid((width + threads_per_block.x - 1)/ threads_per_block.x, + (height + threads_per_block.y - 1)/ threads_per_block.y); + + cudaError_t status; + + // allocate input/output memories + status = cudaMallocPitch(&d_in, &in_pitch, width * bpp, height); + if (status != cudaSuccess) { + goto on_error_in; + } + + status = cudaMallocPitch(&d_out, &out_pitch, width * bpp, height); + if (status != cudaSuccess) { + goto on_error_out; + } + + // copy an input image to device memory + status = cudaMemcpy2D(d_in, in_pitch, in.data(), in.stride(), bpp * width, height, cudaMemcpyHostToDevice); + if (status != cudaSuccess) { + goto on_error_proc; + } + + // launch kernel + binarize<<>>(d_out, out_pitch, d_in, in_pitch, width, height, thres); + + // copy the result back to host memory + status = cudaMemcpy2D(out.data(), out.stride(), d_out, out_pitch, bpp * width, height, cudaMemcpyDeviceToHost); + if (status != cudaSuccess) { + goto on_error_proc; + } + + // free device memories + cudaFree(d_out); + cudaFree(d_in); + + return; + + // error handling +on_error_proc: + cudaFree(d_out); +on_error_out: + cudaFree(d_in); +on_error_in: + throw std::runtime_error(cudaGetErrorString(status)); +} + +__global__ +void +FM7b5::binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres) +{ + const size_t w(blockDim.x * blockIdx.x + threadIdx.x); + const size_t h(blockDim.y * blockIdx.y + threadIdx.y); + + if (w >= width || h >= height) { + return; + } + + out[out_pitch * h + w] = (in[in_pitch * h + w] < thres) ? 0 : 255; +}