X-Git-Url: http://www.fmaj7b5.info/git?p=cuda.git;a=blobdiff_plain;f=binarize%2Fbinarize.cu;h=3d8a8bde3a95eae4193e8acb8e97455620b3b290;hp=4f5e44723ee59dc9b53e87b89c413de3e879406b;hb=869779783d0c06c34e02062ac7d23dac316e73a9;hpb=7b77a912a4a1202f677ae9dbff672758e2b945e4 diff --git a/binarize/binarize.cu b/binarize/binarize.cu index 4f5e447..3d8a8bd 100644 --- a/binarize/binarize.cu +++ b/binarize/binarize.cu @@ -19,81 +19,33 @@ #include #include "binarize.cuh" +#include "DeviceMemory.cuh" using namespace FM7b5; void FM7b5::binarize_gpu(ImageGray& out, const ImageGray& in, const uint8_t thres) { - if (in.width() != out.width() || in.height() != out.height()) { throw std::runtime_error("sizes of input and output images are diferent."); } const size_t width(in.width()), height(in.height()), bpp(in.bpp()); - uint8_t* d_in(nullptr); - uint8_t* d_out(nullptr); - size_t in_pitch(0), out_pitch(0); - const size_t threads_per_dim(32); dim3 threads_per_block(threads_per_dim, threads_per_dim); dim3 blocks_per_grid((width + threads_per_block.x - 1)/ threads_per_block.x, (height + threads_per_block.y - 1)/ threads_per_block.y); - cudaError_t status; - // allocate input/output memories - status = cudaMallocPitch(&d_in, &in_pitch, width * bpp, height); - if (status != cudaSuccess) { - goto on_error_in; - } - - status = cudaMallocPitch(&d_out, &out_pitch, width * bpp, height); - if (status != cudaSuccess) { - goto on_error_out; - } + memory::LinearPitch d_in(width, height), d_out(width, height); // copy an input image to device memory - status = cudaMemcpy2D(d_in, in_pitch, in.data(), in.stride(), bpp * width, height, cudaMemcpyHostToDevice); - if (status != cudaSuccess) { - goto on_error_proc; - } + d_in.copy_from(in.data(), bpp * width, height, in.stride()); // launch kernel - binarize<<>>(d_out, out_pitch, d_in, in_pitch, width, height, thres); + binarize<<>>(d_out.ref(), d_in.ref(), width, height, thres); // copy the result back to host memory - status = cudaMemcpy2D(out.data(), out.stride(), d_out, out_pitch, bpp * width, height, cudaMemcpyDeviceToHost); - if (status != cudaSuccess) { - goto on_error_proc; - } - - // free device memories - cudaFree(d_out); - cudaFree(d_in); - - return; - - // error handling -on_error_proc: - cudaFree(d_out); -on_error_out: - cudaFree(d_in); -on_error_in: - throw std::runtime_error(cudaGetErrorString(status)); -} - -__global__ -void -FM7b5::binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres) -{ - const size_t w(blockDim.x * blockIdx.x + threadIdx.x); - const size_t h(blockDim.y * blockIdx.y + threadIdx.y); - - if (w >= width || h >= height) { - return; - } - - out[out_pitch * h + w] = (in[in_pitch * h + w] < thres) ? 0 : 255; + d_out.copy_to(out.data(), bpp * width, height, out.stride()); }