From 869779783d0c06c34e02062ac7d23dac316e73a9 Mon Sep 17 00:00:00 2001 From: dev Date: Mon, 5 Nov 2012 22:51:26 +0900 Subject: [PATCH] =?utf8?q?=E3=83=A1=E3=83=A2=E3=83=AA=E5=91=A8=E3=82=8A=E3=81?= =?utf8?q?=AE=E8=A3=9C=E5=8A=A9=E3=82=AF=E3=83=A9=E3=82=B9=E3=82=92=E8=BF=BD?= =?utf8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- binarize/binarize.cpp | 22 ++--- binarize/binarize.cu | 58 ++----------- binarize/binarize.cuh | 15 +++- libutils/DeviceMemory.cuh | 172 ++++++++++++++++++++++++++++++++++++++ libutils/MDView.cuh | 110 ++++++++++++++++++++++++ libutils/libutils.vcxproj | 2 + libutils/libutils.vcxproj.filters | 6 ++ 7 files changed, 320 insertions(+), 65 deletions(-) create mode 100644 libutils/DeviceMemory.cuh create mode 100644 libutils/MDView.cuh diff --git a/binarize/binarize.cpp b/binarize/binarize.cpp index 940132e..792b290 100644 --- a/binarize/binarize.cpp +++ b/binarize/binarize.cpp @@ -31,11 +31,19 @@ static void binarize_cpu(ImageGray& out, const ImageGray& in, const uint8_t thre int _tmain(int argc, _TCHAR* argv[]) { - const uint8_t thres(128); - ImageGray image; - try { + const uint8_t thres(128); + ImageGray image; + image = loadPGM("..\\img\\sine.pgm"); + + ImageGray out(image.width(), image.height()); + binarize_cpu(out, image, thres); + savePNM(out, "result_cpu.pgm"); + + ImageGray out_gpu(image.width(), image.height()); + binarize_gpu(out_gpu, image, thres); + savePNM(out_gpu, "result_gpu.pgm"); } catch (std::exception& e) { @@ -43,14 +51,6 @@ int _tmain(int argc, _TCHAR* argv[]) return -1; } - ImageGray out(image.width(), image.height()); - binarize_cpu(out, image, thres); - savePNM(out, "result_cpu.pgm"); - - ImageGray out_gpu(image.width(), image.height()); - binarize_gpu(out_gpu, image, thres); - savePNM(out_gpu, "result_gpu.pgm"); - return 0; } diff --git a/binarize/binarize.cu b/binarize/binarize.cu index 4f5e447..3d8a8bd 100644 --- a/binarize/binarize.cu +++ b/binarize/binarize.cu @@ -19,81 +19,33 @@ #include #include "binarize.cuh" +#include "DeviceMemory.cuh" using namespace FM7b5; void FM7b5::binarize_gpu(ImageGray& out, const ImageGray& in, const uint8_t thres) { - if (in.width() != out.width() || in.height() != out.height()) { throw std::runtime_error("sizes of input and output images are diferent."); } const size_t width(in.width()), height(in.height()), bpp(in.bpp()); - uint8_t* d_in(nullptr); - uint8_t* d_out(nullptr); - size_t in_pitch(0), out_pitch(0); - const size_t threads_per_dim(32); dim3 threads_per_block(threads_per_dim, threads_per_dim); dim3 blocks_per_grid((width + threads_per_block.x - 1)/ threads_per_block.x, (height + threads_per_block.y - 1)/ threads_per_block.y); - cudaError_t status; - // allocate input/output memories - status = cudaMallocPitch(&d_in, &in_pitch, width * bpp, height); - if (status != cudaSuccess) { - goto on_error_in; - } - - status = cudaMallocPitch(&d_out, &out_pitch, width * bpp, height); - if (status != cudaSuccess) { - goto on_error_out; - } + memory::LinearPitch d_in(width, height), d_out(width, height); // copy an input image to device memory - status = cudaMemcpy2D(d_in, in_pitch, in.data(), in.stride(), bpp * width, height, cudaMemcpyHostToDevice); - if (status != cudaSuccess) { - goto on_error_proc; - } + d_in.copy_from(in.data(), bpp * width, height, in.stride()); // launch kernel - binarize<<>>(d_out, out_pitch, d_in, in_pitch, width, height, thres); + binarize<<>>(d_out.ref(), d_in.ref(), width, height, thres); // copy the result back to host memory - status = cudaMemcpy2D(out.data(), out.stride(), d_out, out_pitch, bpp * width, height, cudaMemcpyDeviceToHost); - if (status != cudaSuccess) { - goto on_error_proc; - } - - // free device memories - cudaFree(d_out); - cudaFree(d_in); - - return; - - // error handling -on_error_proc: - cudaFree(d_out); -on_error_out: - cudaFree(d_in); -on_error_in: - throw std::runtime_error(cudaGetErrorString(status)); -} - -__global__ -void -FM7b5::binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres) -{ - const size_t w(blockDim.x * blockIdx.x + threadIdx.x); - const size_t h(blockDim.y * blockIdx.y + threadIdx.y); - - if (w >= width || h >= height) { - return; - } - - out[out_pitch * h + w] = (in[in_pitch * h + w] < thres) ? 0 : 255; + d_out.copy_to(out.data(), bpp * width, height, out.stride()); } diff --git a/binarize/binarize.cuh b/binarize/binarize.cuh index 0ea3dd7..e8b690e 100644 --- a/binarize/binarize.cuh +++ b/binarize/binarize.cuh @@ -19,10 +19,23 @@ #define FM7b5_BINARIZE_CUH #include "binarize.h" +#include "MDView.cuh" namespace FM7b5 { - __global__ void binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres = 128); + template + __global__ void + binarize(const MDView out, const MDView in, const size_t width, const size_t height, const uint8_t thres) + { + const size_t w(blockDim.x * blockIdx.x + threadIdx.x); + const size_t h(blockDim.y * blockIdx.y + threadIdx.y); + + if (w >= width || h >= height) { + return; + } + + out[h][w] = (in[h][w] < thres) ? 0 : 255; + } } #endif /* FM7b5_BINARIZE_CUH */ diff --git a/libutils/DeviceMemory.cuh b/libutils/DeviceMemory.cuh new file mode 100644 index 0000000..803b75a --- /dev/null +++ b/libutils/DeviceMemory.cuh @@ -0,0 +1,172 @@ +/* + Copyright (C) 2012 fmaj7b5.info + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef FM7b5_DEVICE_MEMORY_CUH +#define FM7b5_DEVICE_MEMORY_CUH + +#include +#include +#include + +#include "MDView.cuh" + +namespace FM7b5 +{ + namespace memory + { + //! Linear 1-D memory + template + class Linear + { + public: + typedef T value_type; + typedef typename std::add_pointer::type pointer_type; + typedef typename std::add_const::type const_pointer_type; + typedef typename std::add_reference::type reference_type; + + Linear(const size_t count = 0) + : m_count(count) + { + m_extent = sizeof(value_type); + md_data = static_cast(allocate(m_extent * count)); + } + + ~Linear() + { + deallocate(md_data); + } + + void* allocate(const size_t byte) + { + void* p(nullptr); + cudaError_t status; + + status = cudaMalloc(&p, byte); + if (status != cudaSuccess) { + throw std::bad_alloc(cudaGetErrorString(status)); + } + + return p; + } + + void deallocate(void* p) + { + cudaFree(p); + } + + void copy_from(const void* const src, const size_t byte) + { + cudaMemcpy(md_data, src, byte, cudaMemcpyHostToDevice); + } + + void copy_to(void* const dst, const size_t byte) + { + cudaMemcpy(dst, md_data, byte, cudaMemcpyDeviceToHost); + } + + size_t size() const { return m_extent * m_count; } + size_t count() const { return m_count; } + pointer_type data() const { return md_data; } + + MDView ref() const + { + return MDView(md_data, &m_extent); + } + + protected: + size_t m_extent; + size_t m_count; + pointer_type md_data; + }; + + //! Linear 2-D memory + template + class LinearPitch + { + public: + typedef T value_type; + typedef typename std::add_pointer::type pointer_type; + typedef typename std::add_const::type const_pointer_type; + typedef typename std::add_reference::type reference_type; + + LinearPitch(const size_t width, const size_t height) + : m_width(width), m_height(height) + { + md_data = static_cast(allocate(width, height, &m_extent[0])); + m_extent[1] = sizeof(value_type); + } + + ~LinearPitch() + { + deallocate(md_data); + } + + void* allocate(const size_t width, const size_t height, size_t* pitch) + { + void* p(nullptr); + cudaError_t status; + + status = cudaMallocPitch(&p, pitch, sizeof(value_type) * width, height); + if (status != cudaSuccess) { + throw std::bad_alloc(cudaGetErrorString(status)); + } + + return p; + } + + void deallocate(void* p) + { + cudaFree(p); + } + + void copy_from(const void* const src, const size_t width, const size_t height, size_t spitch = 0) + { + if (spitch < 1) { + spitch = width; + } + cudaMemcpy2D(md_data, pitch(), src, spitch, width, height, cudaMemcpyHostToDevice); + } + + void copy_to(void* const dst, const size_t width, const size_t height, size_t dpitch = 0) + { + if (dpitch < 1) { + dpitch = width; + } + cudaMemcpy2D(dst, dpitch, md_data, pitch(), width, height, cudaMemcpyDeviceToHost); + } + + size_t size() const { return m_extent[1] * m_height; } + size_t width() const { return m_width; } + size_t height() const { return m_height; } + size_t pitch() const { return m_extent[0]; } + pointer_type data() const { return md_data; } + + MDView ref() const + { + return MDView(md_data, m_extent); + } + + protected: + size_t m_width; + size_t m_height; + size_t m_extent[2]; + pointer_type md_data; + }; + } +} + +#endif /* FM7b5_DEVICE_MEMORY_CUH */ diff --git a/libutils/MDView.cuh b/libutils/MDView.cuh new file mode 100644 index 0000000..4035de4 --- /dev/null +++ b/libutils/MDView.cuh @@ -0,0 +1,110 @@ +/* + Copyright (C) 2012 fmaj7b5.info + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef FM7b5_MDVIEW_CUH +#define FM7b5_MDVIEW_CUH + +#include + +namespace FM7b5 +{ + namespace detail + { + //! copy const qualifier of From to To + template struct copy_const { typedef To type; }; + template struct copy_const { typedef typename std::add_const::type type; }; + + //! array subscript access for multi dimentional data + template + class MDView + { + public: + typedef T value_type; + typedef typename copy_const::type* byte_pointer_type; + typedef typename copy_const::type* void_pointer_type; + + protected: + //! temporal class for resolving the operator[] + template + class ref + { + byte_pointer_type const m_p; + const size_t* const m_extent; + + public: + typedef ref result_type; + + __device__ + ref(void_pointer_type p, const size_t* extent) + : m_p(static_cast(p)), m_extent(extent) + {} + + __device__ + result_type operator[](size_t i) const + { + return result_type(m_p + *m_extent * i, m_extent + 1); + } + }; + + template + class ref + { + byte_pointer_type const m_p; + const size_t* const m_extent; + + public: + typedef typename std::add_reference::type result_type; + + __device__ + ref(void_pointer_type p, const size_t* extent) + : m_p(static_cast(p)), m_extent(extent) + {} + + __device__ + result_type operator[](size_t i) const + { + return *reinterpret_cast(m_p + *m_extent * i); + } + }; + + byte_pointer_type m_data; + size_t m_extent[D]; + + public: + MDView() {} + + MDView(void_pointer_type p, const size_t extent[D]) + : m_data(static_cast(p)) + { + for (size_t i = 0; i < D; ++i) { + m_extent[i] = extent[i]; + } + } + + __device__ + typename ref::result_type operator[](size_t i) const + { + ref r(m_data, m_extent); + return r[i]; + } + }; + } + + using detail::MDView; +} + +#endif /* FM7b5_MDVIEW_CUH */ diff --git a/libutils/libutils.vcxproj b/libutils/libutils.vcxproj index 48cd1fb..09f2ea2 100644 --- a/libutils/libutils.vcxproj +++ b/libutils/libutils.vcxproj @@ -71,8 +71,10 @@ + + diff --git a/libutils/libutils.vcxproj.filters b/libutils/libutils.vcxproj.filters index 2db704b..f5687b6 100644 --- a/libutils/libutils.vcxproj.filters +++ b/libutils/libutils.vcxproj.filters @@ -31,6 +31,12 @@ ヘッダー ファイル + + ヘッダー ファイル + + + ヘッダー ファイル + -- 1.7.12.4