diff --git a/libs/MVS/cuda/MeshTextureCUDA.cu b/libs/MVS/cuda/MeshTextureCUDA.cu new file mode 100644 index 0000000..82298b4 --- /dev/null +++ b/libs/MVS/cuda/MeshTextureCUDA.cu @@ -0,0 +1,285 @@ +#include "MeshTextureCUDA.h" +#include + +bool MeshTextureCUDA::cudaInitialized = false; + +__global__ void poissonBlendKernel(float* dst, const float* src, const uchar* mask, + int width, int height, int channels, float bias) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= width || y >= height) return; + + int idx = (y * width + x) * channels; + if (mask[y * width + x] == 0) return; // empty + + for (int c = 0; c < channels; c++) { + float src_val = src[idx + c]; + float dst_val = dst[idx + c]; + dst[idx + c] = src_val * bias + dst_val * (1 - bias); + } +} + +__global__ void processMaskKernel(uchar* mask, int width, int height, int stripWidth) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= width || y >= height) return; + + int idx = y * width + x; + + // 简化的掩码处理逻辑 - 根据实际需求完善 + if (x < stripWidth || x >= width - stripWidth || + y < stripWidth || y >= height - stripWidth) { + mask[idx] = 0; // 设置为空 + } +} + +// CUDA 核函数:将 uint8 图像转换为 float 图像 +__global__ void convertToFloatKernel(const uchar* src, float* dst, + int width, int height, + int srcStep, int dstStep, + float scale) { + // 计算像素坐标 + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + + // 检查是否在图像范围内 + if (x >= width || y >= height) return; + + // 计算内存索引 + int srcIdx = y * srcStep + x * 3; // 3 channels for BGR + int dstIdx = y * dstStep + x * 3; + + // 转换并缩放每个通道 + dst[dstIdx] = src[srcIdx] * scale; // Blue + dst[dstIdx + 1] = src[srcIdx + 1] * scale; // Green + dst[dstIdx + 2] = src[srcIdx + 2] * scale; // Red +} + +bool MeshTextureCUDA::Initialize() { + if (cudaInitialized) return true; + + int deviceCount; + cudaError_t error = cudaGetDeviceCount(&deviceCount); + if (error != cudaSuccess || deviceCount == 0) { + std::cerr << "CUDA initialization failed: No CUDA devices found" << std::endl; + return false; + } + + cudaSetDevice(0); // 使用第一个设备 + cudaInitialized = true; + return true; +} + +bool MeshTextureCUDA::PoissonBlendCUDA(cv::Mat& dst, const cv::Mat& src, const cv::Mat& mask, float bias) { + if (!cudaInitialized && !Initialize()) { + return false; + } + + // 验证输入矩阵 + if (dst.size() != src.size() || dst.size() != mask.size() || + dst.type() != CV_32FC3 || src.type() != CV_32FC3 || mask.type() != CV_8U) { + return false; + } + + const int width = dst.cols; + const int height = dst.rows; + const int channels = 3; + const size_t size = width * height * channels * sizeof(float); + const size_t maskSize = width * height * sizeof(uchar); + + // 分配设备内存 + float *d_dst, *d_src; + uchar *d_mask; + + cudaMalloc(&d_dst, size); + cudaMalloc(&d_src, size); + cudaMalloc(&d_mask, maskSize); + + // 拷贝数据到设备 + cudaMemcpy(d_dst, dst.ptr(), size, cudaMemcpyHostToDevice); + cudaMemcpy(d_src, src.ptr(), size, cudaMemcpyHostToDevice); + cudaMemcpy(d_mask, mask.ptr(), maskSize, cudaMemcpyHostToDevice); + + // 配置核函数 + dim3 blockSize(16, 16); + dim3 gridSize((width + blockSize.x - 1) / blockSize.x, + (height + blockSize.y - 1) / blockSize.y); + + // 启动核函数 + poissonBlendKernel<<>>(d_dst, d_src, d_mask, width, height, channels, bias); + + // 检查核函数执行 + cudaError_t error = cudaGetLastError(); + if (error != cudaSuccess) { + std::cerr << "CUDA kernel error: " << cudaGetErrorString(error) << std::endl; + cudaFree(d_dst); + cudaFree(d_src); + cudaFree(d_mask); + return false; + } + + // 等待核函数完成 + cudaDeviceSynchronize(); + + // 拷贝结果回主机 + cudaMemcpy(dst.ptr(), d_dst, size, cudaMemcpyDeviceToHost); + + // 释放设备内存 + cudaFree(d_dst); + cudaFree(d_src); + cudaFree(d_mask); + + return true; +} + +bool MeshTextureCUDA::ProcessMaskCUDA(cv::Mat& mask, int stripWidth) { + if (!cudaInitialized && !Initialize()) { + return false; + } + + const int width = mask.cols; + const int height = mask.rows; + const size_t size = width * height * sizeof(uchar); + + // 分配设备内存 + uchar *d_mask; + cudaMalloc(&d_mask, size); + + // 拷贝数据到设备 + cudaMemcpy(d_mask, mask.ptr(), size, cudaMemcpyHostToDevice); + + // 配置核函数 + dim3 blockSize(16, 16); + dim3 gridSize((width + blockSize.x - 1) / blockSize.x, + (height + blockSize.y - 1) / blockSize.y); + + // 启动核函数 + processMaskKernel<<>>(d_mask, width, height, stripWidth); + + // 检查核函数执行 + cudaError_t error = cudaGetLastError(); + if (error != cudaSuccess) { + std::cerr << "CUDA kernel error: " << cudaGetErrorString(error) << std::endl; + cudaFree(d_mask); + return false; + } + + // 等待核函数完成 + cudaDeviceSynchronize(); + + // 拷贝结果回主机 + cudaMemcpy(mask.ptr(), d_mask, size, cudaMemcpyDeviceToHost); + + // 释放设备内存 + cudaFree(d_mask); + + return true; +} + +// 将 uint8 图像转换为 float 图像 +bool MeshTextureCUDA::ConvertToCUDA(const cv::Mat& src, cv::Mat& dst, float scale) { + + // 验证输入图像 + if (src.empty() || src.type() != CV_8UC3) { + std::cerr << "Invalid source image: must be CV_8UC3" << std::endl; + return false; + } + + const int width = src.cols; + const int height = src.rows; + const int channels = 3; + + // 准备输出图像 + if (dst.empty() || dst.cols != width || dst.rows != height || dst.type() != CV_32FC3) { + dst.create(height, width, CV_32FC3); + } + + // 计算内存步长 + const size_t srcStep = src.step; + const size_t dstStep = dst.step / sizeof(float); + + // 分配设备内存 + uchar* d_src = nullptr; + float* d_dst = nullptr; + + cudaError_t error; + + // 分配源图像设备内存 + error = cudaMalloc(&d_src, src.step * height); + if (error != cudaSuccess) { + std::cerr << "CUDA malloc error (src): " << cudaGetErrorString(error) << std::endl; + return false; + } + + // 分配目标图像设备内存 + error = cudaMalloc(&d_dst, dst.step * height); + if (error != cudaSuccess) { + std::cerr << "CUDA malloc error (dst): " << cudaGetErrorString(error) << std::endl; + cudaFree(d_src); + return false; + } + + // 拷贝源图像到设备 + error = cudaMemcpy2D(d_src, src.step, src.data, src.step, + width * channels * sizeof(uchar), height, + cudaMemcpyHostToDevice); + if (error != cudaSuccess) { + std::cerr << "CUDA memcpy error (src to device): " << cudaGetErrorString(error) << std::endl; + cudaFree(d_src); + cudaFree(d_dst); + return false; + } + + // 配置核函数 + dim3 blockSize(16, 16); + dim3 gridSize((width + blockSize.x - 1) / blockSize.x, + (height + blockSize.y - 1) / blockSize.y); + + // 启动核函数 + convertToFloatKernel<<>>(d_src, d_dst, width, height, + srcStep, dstStep, scale); + + // 检查核函数执行 + error = cudaGetLastError(); + if (error != cudaSuccess) { + std::cerr << "CUDA kernel error: " << cudaGetErrorString(error) << std::endl; + cudaFree(d_src); + cudaFree(d_dst); + return false; + } + + // 等待核函数完成 + error = cudaDeviceSynchronize(); + if (error != cudaSuccess) { + std::cerr << "CUDA synchronize error: " << cudaGetErrorString(error) << std::endl; + cudaFree(d_src); + cudaFree(d_dst); + return false; + } + + // 拷贝结果回主机 + error = cudaMemcpy2D(dst.data, dst.step, d_dst, dst.step, + width * channels * sizeof(float), height, + cudaMemcpyDeviceToHost); + if (error != cudaSuccess) { + std::cerr << "CUDA memcpy error (dst to host): " << cudaGetErrorString(error) << std::endl; + cudaFree(d_src); + cudaFree(d_dst); + return false; + } + + // 释放设备内存 + cudaFree(d_src); + cudaFree(d_dst); + + return true; +} + +void MeshTextureCUDA::Cleanup() { + if (cudaInitialized) { + cudaDeviceReset(); + cudaInitialized = false; + } +} diff --git a/libs/MVS/cuda/MeshTextureCUDA.h b/libs/MVS/cuda/MeshTextureCUDA.h new file mode 100644 index 0000000..f59b31e --- /dev/null +++ b/libs/MVS/cuda/MeshTextureCUDA.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +class MeshTextureCUDA { +public: + // 初始化CUDA环境 + static bool Initialize(); + + // 泊松混合的CUDA版本 + static bool PoissonBlendCUDA(cv::Mat& dst, const cv::Mat& src, const cv::Mat& mask, float bias); + + // 掩码处理的CUDA版本 + static bool ProcessMaskCUDA(cv::Mat& mask, int stripWidth); + + // 将 uint8 图像转换为 float 图像 + static bool ConvertToCUDA(const cv::Mat& src, cv::Mat& dst, float scale = 1.0f/255.0f); + + // 清理CUDA资源 + static void Cleanup(); + +private: + static bool cudaInitialized; +}; +