X-Git-Url: http://www.fmaj7b5.info/git?p=cuda.git;a=blobdiff_plain;f=binarize%2Fbinarize.cu;fp=binarize%2Fbinarize.cu;h=4f5e44723ee59dc9b53e87b89c413de3e879406b;hp=0000000000000000000000000000000000000000;hb=7b77a912a4a1202f677ae9dbff672758e2b945e4;hpb=8b2c111ed060599c6bc93b48732d36f8aa48eb2b

diff --git a/binarize/binarize.cu b/binarize/binarize.cu
new file mode 100644
index 0000000..4f5e447
--- /dev/null
+++ b/binarize/binarize.cu
@@ -0,0 +1,99 @@
+/*
+	Copyright (C) 2012  fmaj7b5.info
+
+	This program is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 2 of the License, or
+	(at your option) any later version.
+
+	This program is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "stdafx.h"
+#include <cuda.h>
+
+#include "binarize.cuh"
+
+using namespace FM7b5;
+
+void
+FM7b5::binarize_gpu(ImageGray& out, const ImageGray& in, const uint8_t thres)
+{
+
+	if (in.width() != out.width() || in.height() != out.height()) {
+		throw std::runtime_error("sizes of input and output images are diferent.");
+	}
+
+	const size_t width(in.width()), height(in.height()), bpp(in.bpp());
+
+	uint8_t* d_in(nullptr);
+	uint8_t* d_out(nullptr);
+	size_t in_pitch(0), out_pitch(0);
+
+	const size_t threads_per_dim(32);
+	dim3 threads_per_block(threads_per_dim, threads_per_dim);
+	dim3 blocks_per_grid((width + threads_per_block.x - 1)/ threads_per_block.x,
+	                     (height + threads_per_block.y - 1)/ threads_per_block.y);
+
+	cudaError_t status;
+
+	// allocate input/output memories
+	status = cudaMallocPitch(&d_in, &in_pitch, width * bpp, height);
+	if (status != cudaSuccess) {
+		goto on_error_in;
+	}
+
+	status = cudaMallocPitch(&d_out, &out_pitch, width * bpp, height);
+	if (status != cudaSuccess) {
+		goto on_error_out;
+	}
+
+	// copy an input image to device memory
+	status = cudaMemcpy2D(d_in, in_pitch, in.data(), in.stride(), bpp * width, height, cudaMemcpyHostToDevice);
+	if (status != cudaSuccess) {
+		goto on_error_proc;
+	}
+
+	// launch kernel
+	binarize<<<blocks_per_grid, threads_per_block>>>(d_out, out_pitch, d_in, in_pitch, width, height, thres);
+
+	// copy the result back to host memory
+	status = cudaMemcpy2D(out.data(), out.stride(), d_out, out_pitch, bpp * width, height, cudaMemcpyDeviceToHost);
+	if (status != cudaSuccess) {
+		goto on_error_proc;
+	}
+
+	// free device memories
+	cudaFree(d_out);
+	cudaFree(d_in);
+
+	return;
+
+	// error handling
+on_error_proc:
+	cudaFree(d_out);
+on_error_out:
+	cudaFree(d_in);
+on_error_in:
+		throw std::runtime_error(cudaGetErrorString(status));
+}
+
+__global__
+void
+FM7b5::binarize(uint8_t* out, const size_t out_pitch, const uint8_t* in, const size_t in_pitch, const size_t width, const size_t height, const uint8_t thres)
+{
+	const size_t w(blockDim.x * blockIdx.x + threadIdx.x);
+	const size_t h(blockDim.y * blockIdx.y + threadIdx.y);
+
+	if (w >= width || h >= height) {
+		return;
+	}
+
+	out[out_pitch * h + w] = (in[in_pitch * h + w] < thres) ? 0 : 255;
+}