binarize/binarize.cu

   1 /*
   2         Copyright (C) 2012  fmaj7b5.info
   3
   4         This program is free software: you can redistribute it and/or modify
   5         it under the terms of the GNU General Public License as published by
   6         the Free Software Foundation, either version 2 of the License, or
   7         (at your option) any later version.
   8
   9         This program is distributed in the hope that it will be useful,
  10         but WITHOUT ANY WARRANTY; without even the implied warranty of
  11         MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12         GNU General Public License for more details.
  13
  14         You should have received a copy of the GNU General Public License
  15         along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16 */
  17
  18 #include "stdafx.h"
  19 #include <cuda.h>
  20
  21 #include "binarize.cuh"
  22
  23 using namespace FM7b5;
  24
  25 void
  26 FM7b5::binarize_gpu(ImageGray& out, const ImageGray& in, const uint8_t thres)
  27 {
  28
  29         if (in.width() != out.width() || in.height() != out.height()) {
  30                 throw std::runtime_error("sizes of input and output images are diferent.");
  31         }
  32
  33         const size_t width(in.width()), height(in.height()), bpp(in.bpp());
  34
  35         uint8_t* d_in(nullptr);
  36         uint8_t* d_out(nullptr);
  37         size_t in_pitch(0), out_pitch(0);
  38
  39         const size_t threads_per_dim(32);
  40         dim3 threads_per_block(threads_per_dim, threads_per_dim);
  41         dim3 blocks_per_grid((width + threads_per_block.x - 1)/ threads_per_block.x,
  42                              (height + threads_per_block.y - 1)/ threads_per_block.y);
  43
  44         cudaError_t status;
  45
  46         // allocate input/output memories
  47         status = cudaMallocPitch(&d_in, &in_pitch, width * bpp, height);
  48         if (status != cudaSuccess) {
  49                 goto on_error_in;
  50         }
  51
  52         status = cudaMallocPitch(&d_out, &out_pitch, width * bpp, height);
  53         if (status != cudaSuccess) {
  54                 goto on_error_out;
  55         }
  56
  57         // copy an input image to device memory
  58         status = cudaMemcpy2D(d_in, in_pitch, in.data(), in.stride(), bpp * width, height, cudaMemcpyHostToDevice);
  59         if (status != cudaSuccess) {
  60                 goto on_error_proc;
  61         }
  62
  63         // launch kernel
  64         binarize<<<blocks_per_grid, threads_per_block>>>(d_out, out_pitch, d_in, in_pitch, width, height, thres);
  65
  66         // copy the result back to host memory
  67         status = cudaMemcpy2D(out.data(), out.stride(), d_out, out_pitch, bpp * width, height, cudaMemcpyDeviceToHost);
  68         if (status != cudaSuccess) {
  69                 goto on_error_proc;
  70         }
  71
  72         // free device memories
  73         cudaFree(d_out);
  74         cudaFree(d_in);
  75
  76         return;
  77
  78         // error handling
  79 on_error_proc:
  80         cudaFree(d_out);
  81 on_error_out:
  82         cudaFree(d_in);
  83 on_error_in:
  84                 throw std::runtime_error(cudaGetErrorString(status));
  85 }
  86
  87 __global__
  88 void
  89 FM7b5::binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres)
  90 {
  91         const size_t w(blockDim.x * blockIdx.x + threadIdx.x);
  92         const size_t h(blockDim.y * blockIdx.y + threadIdx.y);
  93
  94         if (w >= width || h >= height) {
  95                 return;
  96         }
  97
  98         out[out_pitch * h + w] = (in[in_pitch * h + w] < thres) ? 0 : 255;
  99 }