/* * PatchMatchCUDA.cpp * * Copyright (c) 2014-2021 SEACAVE * * Author(s): * * cDc * * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . * * * Additional Terms: * * You are required to preserve legal notices and author attributions in * that material or in the Appropriate Legal Notices displayed by works * containing it. */ #include "Common.h" #include "PatchMatchCUDA.h" #include "DepthMap.h" #ifdef _USE_CUDA using namespace MVS; // D E F I N E S /////////////////////////////////////////////////// // S T R U C T S /////////////////////////////////////////////////// PatchMatchCUDA::PatchMatchCUDA(int device) { // initialize CUDA device if needed if (CUDA::devices.IsEmpty()) CUDA::initDevice(device); } PatchMatchCUDA::~PatchMatchCUDA() { Release(); } void PatchMatchCUDA::Release() { if (images.empty()) return; FOREACH(i, cudaImageArrays) { cudaDestroyTextureObject(textureImages[i]); cudaFreeArray(cudaImageArrays[i]); } cudaImageArrays.clear(); if (params.bGeomConsistency) { FOREACH(i, cudaDepthArrays) { cudaDestroyTextureObject(textureDepths[i]); cudaFreeArray(cudaDepthArrays[i]); } cudaDepthArrays.clear(); } images.clear(); cameras.clear(); ReleaseCUDA(); } void PatchMatchCUDA::ReleaseCUDA() { cudaFree(cudaTextureImages); cudaFree(cudaCameras); cudaFree(cudaDepthNormalEstimates); cudaFree(cudaDepthNormalCosts); cudaFree(cudaRandStates); cudaFree(cudaSelectedViews); if (params.bGeomConsistency) cudaFree(cudaTextureDepths); delete[] depthNormalEstimates; } void PatchMatchCUDA::Init(bool bGeomConsistency) { if (bGeomConsistency) { params.bGeomConsistency = true; params.nEstimationIters = 1; } else { params.bGeomConsistency = false; params.nEstimationIters = OPTDENSE::nEstimationIters; } } void PatchMatchCUDA::AllocatePatchMatchCUDA(const cv::Mat1f& image) { const size_t num_images = images.size(); CUDA::checkCudaCall(cudaMalloc((void**)&cudaTextureImages, sizeof(cudaTextureObject_t) * num_images)); CUDA::checkCudaCall(cudaMalloc((void**)&cudaCameras, sizeof(Camera) * num_images)); if (params.bGeomConsistency) CUDA::checkCudaCall(cudaMalloc((void**)&cudaTextureDepths, sizeof(cudaTextureObject_t) * (num_images-1))); const size_t size = image.size().area(); depthNormalEstimates = new Point4[size]; CUDA::checkCudaCall(cudaMalloc((void**)&cudaDepthNormalEstimates, sizeof(Point4) * size)); CUDA::checkCudaCall(cudaMalloc((void**)&cudaDepthNormalCosts, sizeof(float) * size)); CUDA::checkCudaCall(cudaMalloc((void**)&cudaSelectedViews, sizeof(unsigned) * size)); CUDA::checkCudaCall(cudaMalloc((void**)&cudaRandStates, sizeof(curandState) * size)); } void PatchMatchCUDA::AllocateImageCUDA(size_t i, const cv::Mat1f& image, bool bInitImage, bool bInitDepthMap) { const cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); if (bInitImage) { CUDA::checkCudaCall(cudaMallocArray(&cudaImageArrays[i], &channelDesc, image.cols, image.rows)); struct cudaResourceDesc resDesc; memset(&resDesc, 0, sizeof(cudaResourceDesc)); resDesc.resType = cudaResourceTypeArray; resDesc.res.array.array = cudaImageArrays[i]; struct cudaTextureDesc texDesc; memset(&texDesc, 0, sizeof(cudaTextureDesc)); texDesc.addressMode[0] = cudaAddressModeWrap; texDesc.addressMode[1] = cudaAddressModeWrap; texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeElementType; texDesc.normalizedCoords = 0; CUDA::checkCudaCall(cudaCreateTextureObject(&textureImages[i], &resDesc, &texDesc, NULL)); } if (params.bGeomConsistency && i > 0) { if (!bInitDepthMap) { textureDepths[i-1] = 0; cudaDepthArrays[i-1] = NULL; return; } CUDA::checkCudaCall(cudaMallocArray(&cudaDepthArrays[i-1], &channelDesc, image.cols, image.rows)); struct cudaResourceDesc resDesc; memset(&resDesc, 0, sizeof(cudaResourceDesc)); resDesc.resType = cudaResourceTypeArray; resDesc.res.array.array = cudaDepthArrays[i-1]; struct cudaTextureDesc texDesc; memset(&texDesc, 0, sizeof(cudaTextureDesc)); texDesc.addressMode[0] = cudaAddressModeWrap; texDesc.addressMode[1] = cudaAddressModeWrap; texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeElementType; texDesc.normalizedCoords = 0; CUDA::checkCudaCall(cudaCreateTextureObject(&textureDepths[i-1], &resDesc, &texDesc, NULL)); } } void PatchMatchCUDA::EstimateDepthMap(DepthData& depthData) { TD_TIMER_STARTD(); ASSERT(depthData.images.size() > 1); // multi-resolution DepthData& fullResDepthData(depthData); const unsigned totalScaleNumber(params.bGeomConsistency ? 0u : OPTDENSE::nSubResolutionLevels); DepthMap lowResDepthMap; NormalMap lowResNormalMap; ViewsMap lowResViewsMap; IIndex prevNumImages = (IIndex)images.size(); const IIndex numImages = depthData.images.size(); params.nNumViews = (int)numImages-1; params.nInitTopK = std::min(params.nInitTopK, params.nNumViews); params.fDepthMin = depthData.dMin; params.fDepthMax = depthData.dMax; if (prevNumImages < numImages) { images.resize(numImages); cameras.resize(numImages); cudaImageArrays.resize(numImages); textureImages.resize(numImages); } if (params.bGeomConsistency && cudaDepthArrays.size() < (size_t)params.nNumViews) { cudaDepthArrays.resize(params.nNumViews); textureDepths.resize(params.nNumViews); } const int maxPixelViews(MINF(params.nNumViews, 4)); for (unsigned scaleNumber = totalScaleNumber+1; scaleNumber-- > 0; ) { // initialize const float scale = 1.f / POWI(2, scaleNumber); DepthData currentDepthData(DepthMapsData::ScaleDepthData(fullResDepthData, scale)); DepthData& depthData(scaleNumber==0 ? fullResDepthData : currentDepthData); const Image8U::Size size(depthData.images.front().image.size()); params.bLowResProcessed = false; if (scaleNumber != totalScaleNumber) { // all resolutions, but the smallest one, if multi-resolution is enabled params.bLowResProcessed = true; cv::resize(lowResDepthMap, depthData.depthMap, size, 0, 0, cv::INTER_LINEAR); cv::resize(lowResNormalMap, depthData.normalMap, size, 0, 0, cv::INTER_NEAREST); cv::resize(lowResViewsMap, depthData.viewsMap, size, 0, 0, cv::INTER_NEAREST); CUDA::checkCudaCall(cudaMalloc((void**)&cudaLowDepths, sizeof(float) * size.area())); } else { if (totalScaleNumber > 0) { // smallest resolution, when multi-resolution is enabled fullResDepthData.depthMap.release(); fullResDepthData.normalMap.release(); fullResDepthData.confMap.release(); fullResDepthData.viewsMap.release(); } // smallest resolution if multi-resolution is enabled; highest otherwise if (depthData.viewsMap.empty()) depthData.viewsMap.create(size); } if (scaleNumber == 0) { // highest resolution if (depthData.confMap.empty()) depthData.confMap.create(size); } // set keep threshold to: params.fThresholdKeepCost = OPTDENSE::fNCCThresholdKeep; if (totalScaleNumber) { // multi-resolution enabled if (scaleNumber > 0 && scaleNumber != totalScaleNumber) { // all sub-resolutions, but the smallest and highest params.fThresholdKeepCost = 0.f; // disable filtering } else if (scaleNumber == totalScaleNumber || (!params.bGeomConsistency && OPTDENSE::nEstimationGeometricIters)) { // smallest sub-resolution OR highest resolution and geometric consistency is not running but enabled params.fThresholdKeepCost = OPTDENSE::fNCCThresholdKeep*1.2f; } } else { // multi-resolution disabled if (!params.bGeomConsistency && OPTDENSE::nEstimationGeometricIters) { // geometric consistency is not running but enabled params.fThresholdKeepCost = OPTDENSE::fNCCThresholdKeep*1.2f; } } for (IIndex i = 0; i < numImages; ++i) { const DepthData::ViewData& view = depthData.images[i]; Image32F image = view.image; Camera camera; camera.K = Eigen::Map(view.camera.K.val).cast(); camera.R = Eigen::Map(view.camera.R.val).cast(); camera.C = Eigen::Map(view.camera.C.ptr()).cast(); camera.height = image.rows; camera.width = image.cols; // store camera and image if (i == 0 && (prevNumImages < numImages || images[0].size() != image.size())) { // allocate/reallocate PatchMatch CUDA memory if (prevNumImages > 0) ReleaseCUDA(); AllocatePatchMatchCUDA(image); } if (i >= prevNumImages) { // allocate image CUDA memory AllocateImageCUDA(i, image, true, !view.depthMap.empty()); } else if (images[i].size() != image.size()) { // reallocate image CUDA memory cudaDestroyTextureObject(textureImages[i]); cudaFreeArray(cudaImageArrays[i]); if (params.bGeomConsistency && i > 0) { cudaDestroyTextureObject(textureDepths[i-1]); cudaFreeArray(cudaDepthArrays[i-1]); } AllocateImageCUDA(i, image, true, !view.depthMap.empty()); } else if (params.bGeomConsistency && i > 0 && (view.depthMap.empty() != (cudaDepthArrays[i-1] == NULL))) { // reallocate depth CUDA memory if (cudaDepthArrays[i-1]) { cudaDestroyTextureObject(textureDepths[i-1]); cudaFreeArray(cudaDepthArrays[i-1]); } AllocateImageCUDA(i, image, false, !view.depthMap.empty()); } CUDA::checkCudaCall(cudaMemcpy2DToArray(cudaImageArrays[i], 0, 0, image.ptr(), image.step[0], image.cols * sizeof(float), image.rows, cudaMemcpyHostToDevice)); if (params.bGeomConsistency && i > 0 && !view.depthMap.empty()) { // set previously computed depth-map DepthMap depthMap(view.depthMap); if (depthMap.size() != image.size()) cv::resize(depthMap, depthMap, image.size(), 0, 0, cv::INTER_LINEAR); CUDA::checkCudaCall(cudaMemcpy2DToArray(cudaDepthArrays[i-1], 0, 0, depthMap.ptr(), depthMap.step[0], sizeof(float) * depthMap.cols, depthMap.rows, cudaMemcpyHostToDevice)); } images[i] = std::move(image); cameras[i] = std::move(camera); } if (params.bGeomConsistency && cudaDepthArrays.size() > numImages - 1) { for (IIndex i = numImages; i < prevNumImages; ++i) { // free image CUDA memory cudaDestroyTextureObject(textureDepths[i-1]); cudaFreeArray(cudaDepthArrays[i-1]); } cudaDepthArrays.resize(params.nNumViews); textureDepths.resize(params.nNumViews); } if (prevNumImages > numImages) { for (IIndex i = numImages; i < prevNumImages; ++i) { // free image CUDA memory cudaDestroyTextureObject(textureImages[i]); cudaFreeArray(cudaImageArrays[i]); } images.resize(numImages); cameras.resize(numImages); cudaImageArrays.resize(numImages); textureImages.resize(numImages); } prevNumImages = numImages; // setup CUDA memory CUDA::checkCudaCall(cudaMemcpy(cudaTextureImages, textureImages.data(), sizeof(cudaTextureObject_t) * numImages, cudaMemcpyHostToDevice)); CUDA::checkCudaCall(cudaMemcpy(cudaCameras, cameras.data(), sizeof(Camera) * numImages, cudaMemcpyHostToDevice)); if (params.bGeomConsistency) { // set previously computed depth-maps ASSERT(depthData.depthMap.size() == depthData.GetView().image.size()); CUDA::checkCudaCall(cudaMemcpy(cudaTextureDepths, textureDepths.data(), sizeof(cudaTextureObject_t) * params.nNumViews, cudaMemcpyHostToDevice)); } // load depth-map and normal-map into CUDA memory for (int r = 0; r < depthData.depthMap.rows; ++r) { const int baseIndex = r * depthData.depthMap.cols; for (int c = 0; c < depthData.depthMap.cols; ++c) { const Normal& n = depthData.normalMap(r, c); const int index = baseIndex + c; Point4& depthNormal = depthNormalEstimates[index]; depthNormal.topLeftCorner<3, 1>() = Eigen::Map(n.ptr()); depthNormal.w() = depthData.depthMap(r, c); } } CUDA::checkCudaCall(cudaMemcpy(cudaDepthNormalEstimates, depthNormalEstimates, sizeof(Point4) * depthData.depthMap.size().area(), cudaMemcpyHostToDevice)); // load low resolution depth-map into CUDA memory if (params.bLowResProcessed) { ASSERT(depthData.depthMap.isContinuous()); CUDA::checkCudaCall(cudaMemcpy(cudaLowDepths, depthData.depthMap.ptr(), sizeof(float) * depthData.depthMap.size().area(), cudaMemcpyHostToDevice)); } // run CUDA patch-match ASSERT(!depthData.viewsMap.empty()); RunCUDA(depthData.confMap.getData(), (uint32_t*)depthData.viewsMap.getData()); CUDA::checkCudaCall(cudaGetLastError()); if (params.bLowResProcessed) CUDA::checkCudaCall(cudaFree(cudaLowDepths)); // load depth-map, normal-map and confidence-map from CUDA memory for (int r = 0; r < depthData.depthMap.rows; ++r) { for (int c = 0; c < depthData.depthMap.cols; ++c) { const int index = r * depthData.depthMap.cols + c; const Point4& depthNormal = depthNormalEstimates[index]; const Depth depth = depthNormal.w(); ASSERT(ISFINITE(depth)); depthData.depthMap(r, c) = depth; depthData.normalMap(r, c) = depthNormal.topLeftCorner<3, 1>(); if (scaleNumber == 0) { // converted ZNCC [0-2] score, where 0 is best, to [0-1] confidence, where 1 is best ASSERT(!depthData.confMap.empty()); float& conf = depthData.confMap(r, c); conf = conf >= 1.f ? 0.f : 1.f - conf; // map pixel views from bit-mask to index ASSERT(!depthData.viewsMap.empty()); ViewsID& views = depthData.viewsMap(r, c); if (depth > 0) { const uint32_t bitviews(*reinterpret_cast(views.val)); int j = 0; for (int i = 0; i < 32; ++i) { if (bitviews & (1 << i)) { views[j] = i; if (++j == maxPixelViews) break; } } while (j < 4) views[j++] = 255; } else views = ViewsID(255, 255, 255, 255); } } } // remember sub-resolution estimates for next iteration if (scaleNumber > 0) { lowResDepthMap = depthData.depthMap; lowResNormalMap = depthData.normalMap; lowResViewsMap = depthData.viewsMap; } } // apply ignore mask if (OPTDENSE::nIgnoreMaskLabel >= 0) { const DepthData::ViewData& view = depthData.GetView(); BitMatrix mask; if (DepthEstimator::ImportIgnoreMask(*view.pImageData, depthData.depthMap.size(), (uint16_t)OPTDENSE::nIgnoreMaskLabel, mask)) depthData.ApplyIgnoreMask(mask); } DEBUG_EXTRA("Depth-map for image %3u %s: %dx%d (%s)", depthData.images.front().GetID(), depthData.images.GetSize() > 2 ? String::FormatString("estimated using %2u images", depthData.images.size()-1).c_str() : String::FormatString("with image %3u estimated", depthData.images[1].GetID()).c_str(), images.front().cols, images.front().rows, TD_TIMER_GET_FMT().c_str()); } /*----------------------------------------------------------------*/ #endif // _USE_CUDA