/* Copyright (C) 2012 fmaj7b5.info This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "stdafx.h" #include #include "binarize.cuh" using namespace FM7b5; void FM7b5::binarize_gpu(ImageGray& out, const ImageGray& in, const uint8_t thres) { if (in.width() != out.width() || in.height() != out.height()) { throw std::runtime_error("sizes of input and output images are diferent."); } const size_t width(in.width()), height(in.height()), bpp(in.bpp()); uint8_t* d_in(nullptr); uint8_t* d_out(nullptr); size_t in_pitch(0), out_pitch(0); const size_t threads_per_dim(32); dim3 threads_per_block(threads_per_dim, threads_per_dim); dim3 blocks_per_grid((width + threads_per_block.x - 1)/ threads_per_block.x, (height + threads_per_block.y - 1)/ threads_per_block.y); cudaError_t status; // allocate input/output memories status = cudaMallocPitch(&d_in, &in_pitch, width * bpp, height); if (status != cudaSuccess) { goto on_error_in; } status = cudaMallocPitch(&d_out, &out_pitch, width * bpp, height); if (status != cudaSuccess) { goto on_error_out; } // copy an input image to device memory status = cudaMemcpy2D(d_in, in_pitch, in.data(), in.stride(), bpp * width, height, cudaMemcpyHostToDevice); if (status != cudaSuccess) { goto on_error_proc; } // launch kernel binarize<<>>(d_out, out_pitch, d_in, in_pitch, width, height, thres); // copy the result back to host memory status = cudaMemcpy2D(out.data(), out.stride(), d_out, out_pitch, bpp * width, height, cudaMemcpyDeviceToHost); if (status != cudaSuccess) { goto on_error_proc; } // free device memories cudaFree(d_out); cudaFree(d_in); return; // error handling on_error_proc: cudaFree(d_out); on_error_out: cudaFree(d_in); on_error_in: throw std::runtime_error(cudaGetErrorString(status)); } __global__ void FM7b5::binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres) { const size_t w(blockDim.x * blockIdx.x + threadIdx.x); const size_t h(blockDim.y * blockIdx.y + threadIdx.y); if (w >= width || h >= height) { return; } out[out_pitch * h + w] = (in[in_pitch * h + w] < thres) ? 0 : 255; }