メモリ周りの補助クラスを追加

[cuda.git] / binarize / binarize.cu
diff --git a/binarize/binarize.cu b/binarize/binarize.cu

index 4f5e447..3d8a8bd 100644 (file)
--- a/binarize/binarize.cu
+++ b/binarize/binarize.cu
@@ -19,81 +19,33 @@
  #include <cuda.h>
  
  #include "binarize.cuh"
+#include "DeviceMemory.cuh"
  
  using namespace FM7b5;
  
  void
  FM7b5::binarize_gpu(ImageGray& out, const ImageGray& in, const uint8_t thres)
  {
-
         if (in.width() != out.width() || in.height() != out.height()) {
                 throw std::runtime_error("sizes of input and output images are diferent.");
         }
  
         const size_t width(in.width()), height(in.height()), bpp(in.bpp());
  
-       uint8_t* d_in(nullptr);
-       uint8_t* d_out(nullptr);
-       size_t in_pitch(0), out_pitch(0);
-
         const size_t threads_per_dim(32);
         dim3 threads_per_block(threads_per_dim, threads_per_dim);
         dim3 blocks_per_grid((width + threads_per_block.x - 1)/ threads_per_block.x,
                              (height + threads_per_block.y - 1)/ threads_per_block.y);
  
-       cudaError_t status;
-
         // allocate input/output memories
-       status = cudaMallocPitch(&d_in, &in_pitch, width * bpp, height);
-       if (status != cudaSuccess) {
-               goto on_error_in;
-       }
-
-       status = cudaMallocPitch(&d_out, &out_pitch, width * bpp, height);
-       if (status != cudaSuccess) {
-               goto on_error_out;
-       }
+       memory::LinearPitch<uint8_t> d_in(width, height), d_out(width, height);
  
         // copy an input image to device memory
-       status = cudaMemcpy2D(d_in, in_pitch, in.data(), in.stride(), bpp * width, height, cudaMemcpyHostToDevice);
-       if (status != cudaSuccess) {
-               goto on_error_proc;
-       }
+       d_in.copy_from(in.data(), bpp * width, height, in.stride());
  
         // launch kernel
-       binarize<<<blocks_per_grid, threads_per_block>>>(d_out, out_pitch, d_in, in_pitch, width, height, thres);
+       binarize<<<blocks_per_grid, threads_per_block>>>(d_out.ref(), d_in.ref(), width, height, thres);
  
         // copy the result back to host memory
-       status = cudaMemcpy2D(out.data(), out.stride(), d_out, out_pitch, bpp * width, height, cudaMemcpyDeviceToHost);
-       if (status != cudaSuccess) {
-               goto on_error_proc;
-       }
-
-       // free device memories
-       cudaFree(d_out);
-       cudaFree(d_in);
-
-       return;
-
-       // error handling
-on_error_proc:
-       cudaFree(d_out);
-on_error_out:
-       cudaFree(d_in);
-on_error_in:
-               throw std::runtime_error(cudaGetErrorString(status));
-}
-
-__global__
-void
-FM7b5::binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres)
-{
-       const size_t w(blockDim.x * blockIdx.x + threadIdx.x);
-       const size_t h(blockDim.y * blockIdx.y + threadIdx.y);
-
-       if (w >= width || h >= height) {
-               return;
-       }
-
-       out[out_pitch * h + w] = (in[in_pitch * h + w] < thres) ? 0 : 255;
+       d_out.copy_to(out.data(), bpp * width, height, out.stride());
  }