int _tmain(int argc, _TCHAR* argv[])
{
- const uint8_t thres(128);
- ImageGray image;
-
try {
+ const uint8_t thres(128);
+ ImageGray image;
+
image = loadPGM("..\\img\\sine.pgm");
+
+ ImageGray out(image.width(), image.height());
+ binarize_cpu(out, image, thres);
+ savePNM(out, "result_cpu.pgm");
+
+ ImageGray out_gpu(image.width(), image.height());
+ binarize_gpu(out_gpu, image, thres);
+ savePNM(out_gpu, "result_gpu.pgm");
}
catch (std::exception& e)
{
return -1;
}
- ImageGray out(image.width(), image.height());
- binarize_cpu(out, image, thres);
- savePNM(out, "result_cpu.pgm");
-
- ImageGray out_gpu(image.width(), image.height());
- binarize_gpu(out_gpu, image, thres);
- savePNM(out_gpu, "result_gpu.pgm");
-
return 0;
}
#include <cuda.h>
#include "binarize.cuh"
+#include "DeviceMemory.cuh"
using namespace FM7b5;
void
FM7b5::binarize_gpu(ImageGray& out, const ImageGray& in, const uint8_t thres)
{
-
if (in.width() != out.width() || in.height() != out.height()) {
throw std::runtime_error("sizes of input and output images are diferent.");
}
const size_t width(in.width()), height(in.height()), bpp(in.bpp());
- uint8_t* d_in(nullptr);
- uint8_t* d_out(nullptr);
- size_t in_pitch(0), out_pitch(0);
-
const size_t threads_per_dim(32);
dim3 threads_per_block(threads_per_dim, threads_per_dim);
dim3 blocks_per_grid((width + threads_per_block.x - 1)/ threads_per_block.x,
(height + threads_per_block.y - 1)/ threads_per_block.y);
- cudaError_t status;
-
// allocate input/output memories
- status = cudaMallocPitch(&d_in, &in_pitch, width * bpp, height);
- if (status != cudaSuccess) {
- goto on_error_in;
- }
-
- status = cudaMallocPitch(&d_out, &out_pitch, width * bpp, height);
- if (status != cudaSuccess) {
- goto on_error_out;
- }
+ memory::LinearPitch<uint8_t> d_in(width, height), d_out(width, height);
// copy an input image to device memory
- status = cudaMemcpy2D(d_in, in_pitch, in.data(), in.stride(), bpp * width, height, cudaMemcpyHostToDevice);
- if (status != cudaSuccess) {
- goto on_error_proc;
- }
+ d_in.copy_from(in.data(), bpp * width, height, in.stride());
// launch kernel
- binarize<<<blocks_per_grid, threads_per_block>>>(d_out, out_pitch, d_in, in_pitch, width, height, thres);
+ binarize<<<blocks_per_grid, threads_per_block>>>(d_out.ref(), d_in.ref(), width, height, thres);
// copy the result back to host memory
- status = cudaMemcpy2D(out.data(), out.stride(), d_out, out_pitch, bpp * width, height, cudaMemcpyDeviceToHost);
- if (status != cudaSuccess) {
- goto on_error_proc;
- }
-
- // free device memories
- cudaFree(d_out);
- cudaFree(d_in);
-
- return;
-
- // error handling
-on_error_proc:
- cudaFree(d_out);
-on_error_out:
- cudaFree(d_in);
-on_error_in:
- throw std::runtime_error(cudaGetErrorString(status));
-}
-
-__global__
-void
-FM7b5::binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres)
-{
- const size_t w(blockDim.x * blockIdx.x + threadIdx.x);
- const size_t h(blockDim.y * blockIdx.y + threadIdx.y);
-
- if (w >= width || h >= height) {
- return;
- }
-
- out[out_pitch * h + w] = (in[in_pitch * h + w] < thres) ? 0 : 255;
+ d_out.copy_to(out.data(), bpp * width, height, out.stride());
}
#define FM7b5_BINARIZE_CUH
#include "binarize.h"
+#include "MDView.cuh"
namespace FM7b5
{
- __global__ void binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres = 128);
+ template <class T, class U>
+ __global__ void
+ binarize(const MDView<T, 2> out, const MDView<U, 2> in, const size_t width, const size_t height, const uint8_t thres)
+ {
+ const size_t w(blockDim.x * blockIdx.x + threadIdx.x);
+ const size_t h(blockDim.y * blockIdx.y + threadIdx.y);
+
+ if (w >= width || h >= height) {
+ return;
+ }
+
+ out[h][w] = (in[h][w] < thres) ? 0 : 255;
+ }
}
#endif /* FM7b5_BINARIZE_CUH */
--- /dev/null
+/*
+ Copyright (C) 2012 fmaj7b5.info
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef FM7b5_DEVICE_MEMORY_CUH
+#define FM7b5_DEVICE_MEMORY_CUH
+
+#include <stdexcept>
+#include <type_traits>
+#include <cuda.h>
+
+#include "MDView.cuh"
+
+namespace FM7b5
+{
+ namespace memory
+ {
+ //! Linear 1-D memory
+ template <class T>
+ class Linear
+ {
+ public:
+ typedef T value_type;
+ typedef typename std::add_pointer<T>::type pointer_type;
+ typedef typename std::add_const<pointer_type>::type const_pointer_type;
+ typedef typename std::add_reference<T>::type reference_type;
+
+ Linear(const size_t count = 0)
+ : m_count(count)
+ {
+ m_extent = sizeof(value_type);
+ md_data = static_cast<pointer_type>(allocate(m_extent * count));
+ }
+
+ ~Linear()
+ {
+ deallocate(md_data);
+ }
+
+ void* allocate(const size_t byte)
+ {
+ void* p(nullptr);
+ cudaError_t status;
+
+ status = cudaMalloc(&p, byte);
+ if (status != cudaSuccess) {
+ throw std::bad_alloc(cudaGetErrorString(status));
+ }
+
+ return p;
+ }
+
+ void deallocate(void* p)
+ {
+ cudaFree(p);
+ }
+
+ void copy_from(const void* const src, const size_t byte)
+ {
+ cudaMemcpy(md_data, src, byte, cudaMemcpyHostToDevice);
+ }
+
+ void copy_to(void* const dst, const size_t byte)
+ {
+ cudaMemcpy(dst, md_data, byte, cudaMemcpyDeviceToHost);
+ }
+
+ size_t size() const { return m_extent * m_count; }
+ size_t count() const { return m_count; }
+ pointer_type data() const { return md_data; }
+
+ MDView<value_type, 1> ref() const
+ {
+ return MDView<value_type, 1>(md_data, &m_extent);
+ }
+
+ protected:
+ size_t m_extent;
+ size_t m_count;
+ pointer_type md_data;
+ };
+
+ //! Linear 2-D memory
+ template <class T>
+ class LinearPitch
+ {
+ public:
+ typedef T value_type;
+ typedef typename std::add_pointer<T>::type pointer_type;
+ typedef typename std::add_const<pointer_type>::type const_pointer_type;
+ typedef typename std::add_reference<T>::type reference_type;
+
+ LinearPitch(const size_t width, const size_t height)
+ : m_width(width), m_height(height)
+ {
+ md_data = static_cast<pointer_type>(allocate(width, height, &m_extent[0]));
+ m_extent[1] = sizeof(value_type);
+ }
+
+ ~LinearPitch()
+ {
+ deallocate(md_data);
+ }
+
+ void* allocate(const size_t width, const size_t height, size_t* pitch)
+ {
+ void* p(nullptr);
+ cudaError_t status;
+
+ status = cudaMallocPitch(&p, pitch, sizeof(value_type) * width, height);
+ if (status != cudaSuccess) {
+ throw std::bad_alloc(cudaGetErrorString(status));
+ }
+
+ return p;
+ }
+
+ void deallocate(void* p)
+ {
+ cudaFree(p);
+ }
+
+ void copy_from(const void* const src, const size_t width, const size_t height, size_t spitch = 0)
+ {
+ if (spitch < 1) {
+ spitch = width;
+ }
+ cudaMemcpy2D(md_data, pitch(), src, spitch, width, height, cudaMemcpyHostToDevice);
+ }
+
+ void copy_to(void* const dst, const size_t width, const size_t height, size_t dpitch = 0)
+ {
+ if (dpitch < 1) {
+ dpitch = width;
+ }
+ cudaMemcpy2D(dst, dpitch, md_data, pitch(), width, height, cudaMemcpyDeviceToHost);
+ }
+
+ size_t size() const { return m_extent[1] * m_height; }
+ size_t width() const { return m_width; }
+ size_t height() const { return m_height; }
+ size_t pitch() const { return m_extent[0]; }
+ pointer_type data() const { return md_data; }
+
+ MDView<value_type, 2> ref() const
+ {
+ return MDView<value_type, 2>(md_data, m_extent);
+ }
+
+ protected:
+ size_t m_width;
+ size_t m_height;
+ size_t m_extent[2];
+ pointer_type md_data;
+ };
+ }
+}
+
+#endif /* FM7b5_DEVICE_MEMORY_CUH */
--- /dev/null
+/*
+ Copyright (C) 2012 fmaj7b5.info
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef FM7b5_MDVIEW_CUH
+#define FM7b5_MDVIEW_CUH
+
+#include <type_traits>
+
+namespace FM7b5
+{
+ namespace detail
+ {
+ //! copy const qualifier of From to To
+ template <typename From, typename To> struct copy_const { typedef To type; };
+ template <typename From, typename To> struct copy_const<const From, To> { typedef typename std::add_const<To>::type type; };
+
+ //! array subscript access for multi dimentional data
+ template <typename T, size_t D>
+ class MDView
+ {
+ public:
+ typedef T value_type;
+ typedef typename copy_const<T, uint8_t>::type* byte_pointer_type;
+ typedef typename copy_const<T, void>::type* void_pointer_type;
+
+ protected:
+ //! temporal class for resolving the operator[]
+ template <typename T, size_t D>
+ class ref
+ {
+ byte_pointer_type const m_p;
+ const size_t* const m_extent;
+
+ public:
+ typedef ref<T, D-1> result_type;
+
+ __device__
+ ref(void_pointer_type p, const size_t* extent)
+ : m_p(static_cast<byte_pointer_type>(p)), m_extent(extent)
+ {}
+
+ __device__
+ result_type operator[](size_t i) const
+ {
+ return result_type(m_p + *m_extent * i, m_extent + 1);
+ }
+ };
+
+ template <typename T>
+ class ref<T, 1>
+ {
+ byte_pointer_type const m_p;
+ const size_t* const m_extent;
+
+ public:
+ typedef typename std::add_reference<T>::type result_type;
+
+ __device__
+ ref(void_pointer_type p, const size_t* extent)
+ : m_p(static_cast<byte_pointer_type>(p)), m_extent(extent)
+ {}
+
+ __device__
+ result_type operator[](size_t i) const
+ {
+ return *reinterpret_cast<T*>(m_p + *m_extent * i);
+ }
+ };
+
+ byte_pointer_type m_data;
+ size_t m_extent[D];
+
+ public:
+ MDView() {}
+
+ MDView(void_pointer_type p, const size_t extent[D])
+ : m_data(static_cast<byte_pointer_type>(p))
+ {
+ for (size_t i = 0; i < D; ++i) {
+ m_extent[i] = extent[i];
+ }
+ }
+
+ __device__
+ typename ref<T, D>::result_type operator[](size_t i) const
+ {
+ ref<T, D> r(m_data, m_extent);
+ return r[i];
+ }
+ };
+ }
+
+ using detail::MDView;
+}
+
+#endif /* FM7b5_MDVIEW_CUH */
<None Include="ReadMe.txt" />
</ItemGroup>
<ItemGroup>
+ <ClInclude Include="DeviceMemory.cuh" />
<ClInclude Include="Image.h" />
<ClInclude Include="ImageIO.h" />
+ <ClInclude Include="MDView.cuh" />
<ClInclude Include="stdafx.h" />
<ClInclude Include="targetver.h" />
</ItemGroup>
<ClInclude Include="ImageIO.h">
<Filter>ヘッダー ファイル</Filter>
</ClInclude>
+ <ClInclude Include="DeviceMemory.cuh">
+ <Filter>ヘッダー ファイル</Filter>
+ </ClInclude>
+ <ClInclude Include="MDView.cuh">
+ <Filter>ヘッダー ファイル</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="stdafx.cpp">