メモリ周りの補助クラスを追加

author dev <dev@fmaj7b5.info>

Mon, 5 Nov 2012 13:51:26 +0000 (22:51 +0900)

committer dev <dev@fmaj7b5.info>

Mon, 5 Nov 2012 13:51:26 +0000 (22:51 +0900)
author dev <dev@fmaj7b5.info>
Mon, 5 Nov 2012 13:51:26 +0000 (22:51 +0900)
committer dev <dev@fmaj7b5.info>
Mon, 5 Nov 2012 13:51:26 +0000 (22:51 +0900)
diff --git a/binarize/binarize.cpp b/binarize/binarize.cpp

index 940132e..792b290 100644 (file)
--- a/binarize/binarize.cpp
+++ b/binarize/binarize.cpp
@@ -31,11 +31,19 @@ static void binarize_cpu(ImageGray& out, const ImageGray& in, const uint8_t thre
  
  int _tmain(int argc, _TCHAR* argv[])
  {
-       const uint8_t thres(128);
-       ImageGray image;
-
         try {
+               const uint8_t thres(128);
+               ImageGray image;
+
                 image = loadPGM("..\\img\\sine.pgm");
+
+               ImageGray out(image.width(), image.height());
+               binarize_cpu(out, image, thres);
+               savePNM(out, "result_cpu.pgm");
+
+               ImageGray out_gpu(image.width(), image.height());
+               binarize_gpu(out_gpu, image, thres);
+               savePNM(out_gpu, "result_gpu.pgm");
         }
         catch (std::exception& e)
         {
@@ -43,14 +51,6 @@ int _tmain(int argc, _TCHAR* argv[])
                 return -1;
         }
  
-       ImageGray out(image.width(), image.height());
-       binarize_cpu(out, image, thres);
-       savePNM(out, "result_cpu.pgm");
-
-       ImageGray out_gpu(image.width(), image.height());
-       binarize_gpu(out_gpu, image, thres);
-       savePNM(out_gpu, "result_gpu.pgm");
-
         return 0;
  }
  
diff --git a/binarize/binarize.cu b/binarize/binarize.cu

index 4f5e447..3d8a8bd 100644 (file)
--- a/binarize/binarize.cu
+++ b/binarize/binarize.cu
@@ -19,81 +19,33 @@
  #include <cuda.h>
  
  #include "binarize.cuh"
+#include "DeviceMemory.cuh"
  
  using namespace FM7b5;
  
  void
  FM7b5::binarize_gpu(ImageGray& out, const ImageGray& in, const uint8_t thres)
  {
-
         if (in.width() != out.width() || in.height() != out.height()) {
                 throw std::runtime_error("sizes of input and output images are diferent.");
         }
  
         const size_t width(in.width()), height(in.height()), bpp(in.bpp());
  
-       uint8_t* d_in(nullptr);
-       uint8_t* d_out(nullptr);
-       size_t in_pitch(0), out_pitch(0);
-
         const size_t threads_per_dim(32);
         dim3 threads_per_block(threads_per_dim, threads_per_dim);
         dim3 blocks_per_grid((width + threads_per_block.x - 1)/ threads_per_block.x,
                              (height + threads_per_block.y - 1)/ threads_per_block.y);
  
-       cudaError_t status;
-
         // allocate input/output memories
-       status = cudaMallocPitch(&d_in, &in_pitch, width * bpp, height);
-       if (status != cudaSuccess) {
-               goto on_error_in;
-       }
-
-       status = cudaMallocPitch(&d_out, &out_pitch, width * bpp, height);
-       if (status != cudaSuccess) {
-               goto on_error_out;
-       }
+       memory::LinearPitch<uint8_t> d_in(width, height), d_out(width, height);
  
         // copy an input image to device memory
-       status = cudaMemcpy2D(d_in, in_pitch, in.data(), in.stride(), bpp * width, height, cudaMemcpyHostToDevice);
-       if (status != cudaSuccess) {
-               goto on_error_proc;
-       }
+       d_in.copy_from(in.data(), bpp * width, height, in.stride());
  
         // launch kernel
-       binarize<<<blocks_per_grid, threads_per_block>>>(d_out, out_pitch, d_in, in_pitch, width, height, thres);
+       binarize<<<blocks_per_grid, threads_per_block>>>(d_out.ref(), d_in.ref(), width, height, thres);
  
         // copy the result back to host memory
-       status = cudaMemcpy2D(out.data(), out.stride(), d_out, out_pitch, bpp * width, height, cudaMemcpyDeviceToHost);
-       if (status != cudaSuccess) {
-               goto on_error_proc;
-       }
-
-       // free device memories
-       cudaFree(d_out);
-       cudaFree(d_in);
-
-       return;
-
-       // error handling
-on_error_proc:
-       cudaFree(d_out);
-on_error_out:
-       cudaFree(d_in);
-on_error_in:
-               throw std::runtime_error(cudaGetErrorString(status));
-}
-
-__global__
-void
-FM7b5::binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres)
-{
-       const size_t w(blockDim.x * blockIdx.x + threadIdx.x);
-       const size_t h(blockDim.y * blockIdx.y + threadIdx.y);
-
-       if (w >= width || h >= height) {
-               return;
-       }
-
-       out[out_pitch * h + w] = (in[in_pitch * h + w] < thres) ? 0 : 255;
+       d_out.copy_to(out.data(), bpp * width, height, out.stride());
  }
diff --git a/binarize/binarize.cuh b/binarize/binarize.cuh

index 0ea3dd7..e8b690e 100644 (file)
--- a/binarize/binarize.cuh
+++ b/binarize/binarize.cuh
@@ -19,10 +19,23 @@
  #define FM7b5_BINARIZE_CUH
  
  #include "binarize.h"
+#include "MDView.cuh"
  
  namespace FM7b5
  {
-       __global__ void binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres = 128);
+       template <class T, class U>
+       __global__ void
+       binarize(const MDView<T, 2> out, const MDView<U, 2> in, const size_t width, const size_t height, const uint8_t thres)
+       {
+               const size_t w(blockDim.x * blockIdx.x + threadIdx.x);
+               const size_t h(blockDim.y * blockIdx.y + threadIdx.y);
+
+               if (w >= width || h >= height) {
+                       return;
+               }
+
+               out[h][w] = (in[h][w] < thres) ? 0 : 255;
+       }
  }
  
  #endif /* FM7b5_BINARIZE_CUH */
diff --git a/libutils/DeviceMemory.cuh b/libutils/DeviceMemory.cuh

new file mode 100644 (file)

index 0000000..803b75a
--- /dev/null
+++ b/libutils/DeviceMemory.cuh
@@ -0,0 +1,172 @@
+/*
+       Copyright (C) 2012  fmaj7b5.info
+
+       This program is free software: you can redistribute it and/or modify
+       it under the terms of the GNU General Public License as published by
+       the Free Software Foundation, either version 2 of the License, or
+       (at your option) any later version.
+
+       This program is distributed in the hope that it will be useful,
+       but WITHOUT ANY WARRANTY; without even the implied warranty of
+       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+       GNU General Public License for more details.
+
+       You should have received a copy of the GNU General Public License
+       along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef FM7b5_DEVICE_MEMORY_CUH
+#define FM7b5_DEVICE_MEMORY_CUH
+
+#include <stdexcept>
+#include <type_traits>
+#include <cuda.h>
+
+#include "MDView.cuh"
+
+namespace FM7b5
+{
+       namespace memory
+       {
+               //! Linear 1-D memory
+               template <class T>
+               class Linear
+               {
+               public:
+                       typedef T value_type;
+                       typedef typename std::add_pointer<T>::type pointer_type;
+                       typedef typename std::add_const<pointer_type>::type const_pointer_type;
+                       typedef typename std::add_reference<T>::type reference_type;
+
+                       Linear(const size_t count = 0)
+                               : m_count(count)
+                       {
+                               m_extent = sizeof(value_type);
+                               md_data = static_cast<pointer_type>(allocate(m_extent * count));
+                       }
+
+                       ~Linear()
+                       {
+                               deallocate(md_data);
+                       }
+
+                       void* allocate(const size_t byte)
+                       {
+                               void* p(nullptr);
+                               cudaError_t status;
+
+                               status = cudaMalloc(&p, byte);
+                               if (status != cudaSuccess) {
+                                       throw std::bad_alloc(cudaGetErrorString(status));
+                               }
+
+                               return p;
+                       }
+
+                       void deallocate(void* p)
+                       {
+                               cudaFree(p);
+                       }
+
+                       void copy_from(const void* const src, const size_t byte)
+                       {
+                               cudaMemcpy(md_data, src, byte, cudaMemcpyHostToDevice);
+                       }
+
+                       void copy_to(void* const dst, const size_t byte)
+                       {
+                               cudaMemcpy(dst, md_data, byte, cudaMemcpyDeviceToHost);
+                       }
+
+                       size_t size() const { return m_extent * m_count; }
+                       size_t count() const { return m_count; }
+                       pointer_type data() const { return md_data; }
+
+                       MDView<value_type, 1> ref() const
+                       {
+                               return MDView<value_type, 1>(md_data, &m_extent);
+                       }
+
+               protected:
+                       size_t m_extent;
+                       size_t m_count;
+                       pointer_type md_data;
+               };
+
+               //! Linear 2-D memory
+               template <class T>
+               class LinearPitch
+               {
+               public:
+                       typedef T value_type;
+                       typedef typename std::add_pointer<T>::type pointer_type;
+                       typedef typename std::add_const<pointer_type>::type const_pointer_type;
+                       typedef typename std::add_reference<T>::type reference_type;
+
+                       LinearPitch(const size_t width, const size_t height)
+                               : m_width(width), m_height(height)
+                       {
+                               md_data = static_cast<pointer_type>(allocate(width, height, &m_extent[0]));
+                               m_extent[1] = sizeof(value_type);
+                       }
+
+                       ~LinearPitch()
+                       {
+                               deallocate(md_data);
+                       }
+
+                       void* allocate(const size_t width, const size_t height, size_t* pitch)
+                       {
+                               void* p(nullptr);
+                               cudaError_t status;
+
+                               status = cudaMallocPitch(&p, pitch, sizeof(value_type) * width, height);
+                               if (status != cudaSuccess) {
+                                       throw std::bad_alloc(cudaGetErrorString(status));
+                               }
+
+                               return p;
+                       }
+
+                       void deallocate(void* p)
+                       {
+                               cudaFree(p);
+                       }
+
+                       void copy_from(const void* const src, const size_t width, const size_t height, size_t spitch = 0)
+                       {
+                               if (spitch < 1) {
+                                       spitch = width;
+                               }
+                               cudaMemcpy2D(md_data, pitch(), src, spitch, width, height, cudaMemcpyHostToDevice);
+                       }
+
+                       void copy_to(void* const dst, const size_t width, const size_t height, size_t dpitch = 0)
+                       {
+                               if (dpitch < 1) {
+                                       dpitch = width;
+                               }
+                               cudaMemcpy2D(dst, dpitch, md_data, pitch(), width, height, cudaMemcpyDeviceToHost);
+                       }
+
+                       size_t size() const { return m_extent[1] * m_height; }
+                       size_t width() const { return m_width; }
+                       size_t height() const { return m_height; }
+                       size_t pitch() const { return m_extent[0]; }
+                       pointer_type data() const { return md_data; }
+
+                       MDView<value_type, 2> ref() const
+                       {
+                               return MDView<value_type, 2>(md_data, m_extent);
+                       }
+
+               protected:
+                       size_t m_width;
+                       size_t m_height;
+                       size_t m_extent[2];
+                       pointer_type md_data;
+               };
+       }
+}
+
+#endif /* FM7b5_DEVICE_MEMORY_CUH */
diff --git a/libutils/MDView.cuh b/libutils/MDView.cuh

new file mode 100644 (file)

index 0000000..4035de4
--- /dev/null
+++ b/libutils/MDView.cuh
@@ -0,0 +1,110 @@
+/*
+       Copyright (C) 2012  fmaj7b5.info
+
+       This program is free software: you can redistribute it and/or modify
+       it under the terms of the GNU General Public License as published by
+       the Free Software Foundation, either version 2 of the License, or
+       (at your option) any later version.
+
+       This program is distributed in the hope that it will be useful,
+       but WITHOUT ANY WARRANTY; without even the implied warranty of
+       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+       GNU General Public License for more details.
+
+       You should have received a copy of the GNU General Public License
+       along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef FM7b5_MDVIEW_CUH
+#define FM7b5_MDVIEW_CUH
+
+#include <type_traits>
+
+namespace FM7b5
+{
+       namespace detail
+       {
+               //! copy const qualifier of From to To
+               template <typename From, typename To> struct copy_const { typedef To type; };
+               template <typename From, typename To> struct copy_const<const From, To> { typedef typename std::add_const<To>::type type; };
+
+               //! array subscript access for multi dimentional data
+               template <typename T, size_t D>
+               class MDView
+               {
+               public:
+                       typedef T value_type;
+                       typedef typename copy_const<T, uint8_t>::type* byte_pointer_type;
+                       typedef typename copy_const<T, void>::type* void_pointer_type;
+
+               protected:
+                       //! temporal class for resolving the operator[]
+                       template <typename T, size_t D>
+                       class ref
+                       {
+                               byte_pointer_type const m_p;
+                               const size_t* const m_extent;
+
+                       public:
+                               typedef ref<T, D-1> result_type;
+
+                               __device__
+                               ref(void_pointer_type p, const size_t* extent)
+                                       : m_p(static_cast<byte_pointer_type>(p)), m_extent(extent)
+                               {}
+
+                               __device__
+                               result_type operator[](size_t i) const
+                               {
+                                       return result_type(m_p + *m_extent * i, m_extent + 1);
+                               }
+                       };
+
+                       template <typename T>
+                       class ref<T, 1>
+                       {
+                               byte_pointer_type const m_p;
+                               const size_t* const m_extent;
+
+                       public:
+                               typedef typename std::add_reference<T>::type result_type;
+
+                               __device__
+                               ref(void_pointer_type p, const size_t* extent)
+                                       : m_p(static_cast<byte_pointer_type>(p)), m_extent(extent)
+                               {}
+
+                               __device__
+                               result_type operator[](size_t i) const
+                               {
+                                       return *reinterpret_cast<T*>(m_p + *m_extent * i);
+                               }
+                       };
+
+                       byte_pointer_type m_data;
+                       size_t m_extent[D];
+
+               public:
+                       MDView() {}
+
+                       MDView(void_pointer_type p, const size_t extent[D])
+                               : m_data(static_cast<byte_pointer_type>(p))
+                       {
+                               for (size_t i = 0; i < D; ++i) {
+                                       m_extent[i] = extent[i];
+                               }
+                       }
+
+                       __device__
+                       typename ref<T, D>::result_type operator[](size_t i) const
+                       {
+                               ref<T, D> r(m_data, m_extent);
+                               return r[i];
+                       }
+               };
+       }
+
+       using detail::MDView;
+}
+
+#endif /* FM7b5_MDVIEW_CUH */
diff --git a/libutils/libutils.vcxproj b/libutils/libutils.vcxproj

index 48cd1fb..09f2ea2 100644 (file)
--- a/libutils/libutils.vcxproj
+++ b/libutils/libutils.vcxproj
@@ -71,8 +71,10 @@
      <None Include="ReadMe.txt" />
    </ItemGroup>
    <ItemGroup>
+    <ClInclude Include="DeviceMemory.cuh" />
      <ClInclude Include="Image.h" />
      <ClInclude Include="ImageIO.h" />
+    <ClInclude Include="MDView.cuh" />
      <ClInclude Include="stdafx.h" />
      <ClInclude Include="targetver.h" />
    </ItemGroup>
diff --git a/libutils/libutils.vcxproj.filters b/libutils/libutils.vcxproj.filters

index 2db704b..f5687b6 100644 (file)
--- a/libutils/libutils.vcxproj.filters
+++ b/libutils/libutils.vcxproj.filters
@@ -31,6 +31,12 @@
      <ClInclude Include="ImageIO.h">
        <Filter>ヘッダー ファイル</Filter>
      </ClInclude>
+    <ClInclude Include="DeviceMemory.cuh">
+      <Filter>ヘッダー ファイル</Filter>
+    </ClInclude>
+    <ClInclude Include="MDView.cuh">
+      <Filter>ヘッダー ファイル</Filter>
+    </ClInclude>
    </ItemGroup>
    <ItemGroup>
      <ClCompile Include="stdafx.cpp">
author	dev <dev@fmaj7b5.info>
	Mon, 5 Nov 2012 13:51:26 +0000 (22:51 +0900)
committer	dev <dev@fmaj7b5.info>
	Mon, 5 Nov 2012 13:51:26 +0000 (22:51 +0900)
binarize/binarize.cpp		patch \| blob \| history
binarize/binarize.cu		patch \| blob \| history
binarize/binarize.cuh		patch \| blob \| history
libutils/DeviceMemory.cuh	[new file with mode: 0644]	patch \| blob
libutils/MDView.cuh	[new file with mode: 0644]	patch \| blob
libutils/libutils.vcxproj		patch \| blob \| history
libutils/libutils.vcxproj.filters		patch \| blob \| history