You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

419 lines
15 KiB

/*
* PatchMatchCUDA.cpp
*
* Copyright (c) 2014-2021 SEACAVE
*
* Author(s):
*
* cDc <cdc.seacave@gmail.com>
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*
* Additional Terms:
*
* You are required to preserve legal notices and author attributions in
* that material or in the Appropriate Legal Notices displayed by works
* containing it.
*/
#include "Common.h"
#include "PatchMatchCUDA.h"
#include "DepthMap.h"
#ifdef _USE_CUDA
using namespace MVS;
// D E F I N E S ///////////////////////////////////////////////////
// S T R U C T S ///////////////////////////////////////////////////
PatchMatchCUDA::PatchMatchCUDA(int device)
{
// initialize CUDA device if needed
if (CUDA::devices.IsEmpty())
CUDA::initDevice(device);
}
PatchMatchCUDA::~PatchMatchCUDA()
{
Release();
}
void PatchMatchCUDA::Release()
{
if (images.empty())
return;
FOREACH(i, cudaImageArrays) {
cudaDestroyTextureObject(textureImages[i]);
cudaFreeArray(cudaImageArrays[i]);
}
cudaImageArrays.clear();
if (params.bGeomConsistency) {
FOREACH(i, cudaDepthArrays) {
cudaDestroyTextureObject(textureDepths[i]);
cudaFreeArray(cudaDepthArrays[i]);
}
cudaDepthArrays.clear();
}
images.clear();
cameras.clear();
ReleaseCUDA();
}
void PatchMatchCUDA::ReleaseCUDA()
{
cudaFree(cudaTextureImages);
cudaFree(cudaCameras);
cudaFree(cudaDepthNormalEstimates);
cudaFree(cudaDepthNormalCosts);
cudaFree(cudaRandStates);
cudaFree(cudaSelectedViews);
if (params.bGeomConsistency)
cudaFree(cudaTextureDepths);
delete[] depthNormalEstimates;
}
void PatchMatchCUDA::Init(bool bGeomConsistency)
{
if (bGeomConsistency) {
params.bGeomConsistency = true;
params.nEstimationIters = 1;
} else {
params.bGeomConsistency = false;
params.nEstimationIters = OPTDENSE::nEstimationIters;
}
}
void PatchMatchCUDA::AllocatePatchMatchCUDA(const cv::Mat1f& image)
{
const size_t num_images = images.size();
CUDA::checkCudaCall(cudaMalloc((void**)&cudaTextureImages, sizeof(cudaTextureObject_t) * num_images));
CUDA::checkCudaCall(cudaMalloc((void**)&cudaCameras, sizeof(Camera) * num_images));
if (params.bGeomConsistency)
CUDA::checkCudaCall(cudaMalloc((void**)&cudaTextureDepths, sizeof(cudaTextureObject_t) * (num_images-1)));
const size_t size = image.size().area();
depthNormalEstimates = new Point4[size];
CUDA::checkCudaCall(cudaMalloc((void**)&cudaDepthNormalEstimates, sizeof(Point4) * size));
CUDA::checkCudaCall(cudaMalloc((void**)&cudaDepthNormalCosts, sizeof(float) * size));
CUDA::checkCudaCall(cudaMalloc((void**)&cudaSelectedViews, sizeof(unsigned) * size));
CUDA::checkCudaCall(cudaMalloc((void**)&cudaRandStates, sizeof(curandState) * size));
}
void PatchMatchCUDA::AllocateImageCUDA(size_t i, const cv::Mat1f& image, bool bInitImage, bool bInitDepthMap)
{
const cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
if (bInitImage) {
CUDA::checkCudaCall(cudaMallocArray(&cudaImageArrays[i], &channelDesc, image.cols, image.rows));
struct cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(cudaResourceDesc));
resDesc.resType = cudaResourceTypeArray;
resDesc.res.array.array = cudaImageArrays[i];
struct cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(cudaTextureDesc));
texDesc.addressMode[0] = cudaAddressModeWrap;
texDesc.addressMode[1] = cudaAddressModeWrap;
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeElementType;
texDesc.normalizedCoords = 0;
CUDA::checkCudaCall(cudaCreateTextureObject(&textureImages[i], &resDesc, &texDesc, NULL));
}
if (params.bGeomConsistency && i > 0) {
if (!bInitDepthMap) {
textureDepths[i-1] = 0;
cudaDepthArrays[i-1] = NULL;
return;
}
CUDA::checkCudaCall(cudaMallocArray(&cudaDepthArrays[i-1], &channelDesc, image.cols, image.rows));
struct cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(cudaResourceDesc));
resDesc.resType = cudaResourceTypeArray;
resDesc.res.array.array = cudaDepthArrays[i-1];
struct cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(cudaTextureDesc));
texDesc.addressMode[0] = cudaAddressModeWrap;
texDesc.addressMode[1] = cudaAddressModeWrap;
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeElementType;
texDesc.normalizedCoords = 0;
CUDA::checkCudaCall(cudaCreateTextureObject(&textureDepths[i-1], &resDesc, &texDesc, NULL));
}
}
void PatchMatchCUDA::EstimateDepthMap(DepthData& depthData)
{
TD_TIMER_STARTD();
ASSERT(depthData.images.size() > 1);
// multi-resolution
DepthData& fullResDepthData(depthData);
const unsigned totalScaleNumber(params.bGeomConsistency ? 0u : OPTDENSE::nSubResolutionLevels);
DepthMap lowResDepthMap;
NormalMap lowResNormalMap;
ViewsMap lowResViewsMap;
IIndex prevNumImages = (IIndex)images.size();
const IIndex numImages = depthData.images.size();
params.nNumViews = (int)numImages-1;
params.nInitTopK = std::min(params.nInitTopK, params.nNumViews);
params.fDepthMin = depthData.dMin;
params.fDepthMax = depthData.dMax;
if (prevNumImages < numImages) {
images.resize(numImages);
cameras.resize(numImages);
cudaImageArrays.resize(numImages);
textureImages.resize(numImages);
}
if (params.bGeomConsistency && cudaDepthArrays.size() < (size_t)params.nNumViews) {
cudaDepthArrays.resize(params.nNumViews);
textureDepths.resize(params.nNumViews);
}
const int maxPixelViews(MINF(params.nNumViews, 4));
for (unsigned scaleNumber = totalScaleNumber+1; scaleNumber-- > 0; ) {
// initialize
const float scale = 1.f / POWI(2, scaleNumber);
DepthData currentDepthData(DepthMapsData::ScaleDepthData(fullResDepthData, scale));
DepthData& depthData(scaleNumber==0 ? fullResDepthData : currentDepthData);
const Image8U::Size size(depthData.images.front().image.size());
params.bLowResProcessed = false;
if (scaleNumber != totalScaleNumber) {
// all resolutions, but the smallest one, if multi-resolution is enabled
params.bLowResProcessed = true;
cv::resize(lowResDepthMap, depthData.depthMap, size, 0, 0, cv::INTER_LINEAR);
cv::resize(lowResNormalMap, depthData.normalMap, size, 0, 0, cv::INTER_NEAREST);
cv::resize(lowResViewsMap, depthData.viewsMap, size, 0, 0, cv::INTER_NEAREST);
CUDA::checkCudaCall(cudaMalloc((void**)&cudaLowDepths, sizeof(float) * size.area()));
} else {
if (totalScaleNumber > 0) {
// smallest resolution, when multi-resolution is enabled
fullResDepthData.depthMap.release();
fullResDepthData.normalMap.release();
fullResDepthData.confMap.release();
fullResDepthData.viewsMap.release();
}
// smallest resolution if multi-resolution is enabled; highest otherwise
if (depthData.viewsMap.empty())
depthData.viewsMap.create(size);
}
if (scaleNumber == 0) {
// highest resolution
if (depthData.confMap.empty())
depthData.confMap.create(size);
}
// set keep threshold to:
params.fThresholdKeepCost = OPTDENSE::fNCCThresholdKeep;
if (totalScaleNumber) {
// multi-resolution enabled
if (scaleNumber > 0 && scaleNumber != totalScaleNumber) {
// all sub-resolutions, but the smallest and highest
params.fThresholdKeepCost = 0.f; // disable filtering
} else if (scaleNumber == totalScaleNumber || (!params.bGeomConsistency && OPTDENSE::nEstimationGeometricIters)) {
// smallest sub-resolution OR highest resolution and geometric consistency is not running but enabled
params.fThresholdKeepCost = OPTDENSE::fNCCThresholdKeep*1.2f;
}
} else {
// multi-resolution disabled
if (!params.bGeomConsistency && OPTDENSE::nEstimationGeometricIters) {
// geometric consistency is not running but enabled
params.fThresholdKeepCost = OPTDENSE::fNCCThresholdKeep*1.2f;
}
}
for (IIndex i = 0; i < numImages; ++i) {
const DepthData::ViewData& view = depthData.images[i];
Image32F image = view.image;
Camera camera;
camera.K = Eigen::Map<const SEACAVE::Matrix3x3::EMat>(view.camera.K.val).cast<float>();
camera.R = Eigen::Map<const SEACAVE::Matrix3x3::EMat>(view.camera.R.val).cast<float>();
camera.C = Eigen::Map<const SEACAVE::Point3::EVec>(view.camera.C.ptr()).cast<float>();
camera.height = image.rows;
camera.width = image.cols;
// store camera and image
if (i == 0 && (prevNumImages < numImages || images[0].size() != image.size())) {
// allocate/reallocate PatchMatch CUDA memory
if (prevNumImages > 0)
ReleaseCUDA();
AllocatePatchMatchCUDA(image);
}
if (i >= prevNumImages) {
// allocate image CUDA memory
AllocateImageCUDA(i, image, true, !view.depthMap.empty());
} else
if (images[i].size() != image.size()) {
// reallocate image CUDA memory
cudaDestroyTextureObject(textureImages[i]);
cudaFreeArray(cudaImageArrays[i]);
if (params.bGeomConsistency && i > 0) {
cudaDestroyTextureObject(textureDepths[i-1]);
cudaFreeArray(cudaDepthArrays[i-1]);
}
AllocateImageCUDA(i, image, true, !view.depthMap.empty());
} else
if (params.bGeomConsistency && i > 0 && (view.depthMap.empty() != (cudaDepthArrays[i-1] == NULL))) {
// reallocate depth CUDA memory
if (cudaDepthArrays[i-1]) {
cudaDestroyTextureObject(textureDepths[i-1]);
cudaFreeArray(cudaDepthArrays[i-1]);
}
AllocateImageCUDA(i, image, false, !view.depthMap.empty());
}
CUDA::checkCudaCall(cudaMemcpy2DToArray(cudaImageArrays[i], 0, 0, image.ptr<float>(), image.step[0], image.cols * sizeof(float), image.rows, cudaMemcpyHostToDevice));
if (params.bGeomConsistency && i > 0 && !view.depthMap.empty()) {
// set previously computed depth-map
DepthMap depthMap(view.depthMap);
if (depthMap.size() != image.size())
cv::resize(depthMap, depthMap, image.size(), 0, 0, cv::INTER_LINEAR);
CUDA::checkCudaCall(cudaMemcpy2DToArray(cudaDepthArrays[i-1], 0, 0, depthMap.ptr<float>(), depthMap.step[0], sizeof(float) * depthMap.cols, depthMap.rows, cudaMemcpyHostToDevice));
}
images[i] = std::move(image);
cameras[i] = std::move(camera);
}
if (params.bGeomConsistency && cudaDepthArrays.size() > numImages - 1) {
for (IIndex i = numImages; i < prevNumImages; ++i) {
// free image CUDA memory
cudaDestroyTextureObject(textureDepths[i-1]);
cudaFreeArray(cudaDepthArrays[i-1]);
}
cudaDepthArrays.resize(params.nNumViews);
textureDepths.resize(params.nNumViews);
}
if (prevNumImages > numImages) {
for (IIndex i = numImages; i < prevNumImages; ++i) {
// free image CUDA memory
cudaDestroyTextureObject(textureImages[i]);
cudaFreeArray(cudaImageArrays[i]);
}
images.resize(numImages);
cameras.resize(numImages);
cudaImageArrays.resize(numImages);
textureImages.resize(numImages);
}
prevNumImages = numImages;
// setup CUDA memory
CUDA::checkCudaCall(cudaMemcpy(cudaTextureImages, textureImages.data(), sizeof(cudaTextureObject_t) * numImages, cudaMemcpyHostToDevice));
CUDA::checkCudaCall(cudaMemcpy(cudaCameras, cameras.data(), sizeof(Camera) * numImages, cudaMemcpyHostToDevice));
if (params.bGeomConsistency) {
// set previously computed depth-maps
ASSERT(depthData.depthMap.size() == depthData.GetView().image.size());
CUDA::checkCudaCall(cudaMemcpy(cudaTextureDepths, textureDepths.data(), sizeof(cudaTextureObject_t) * params.nNumViews, cudaMemcpyHostToDevice));
}
// load depth-map and normal-map into CUDA memory
for (int r = 0; r < depthData.depthMap.rows; ++r) {
const int baseIndex = r * depthData.depthMap.cols;
for (int c = 0; c < depthData.depthMap.cols; ++c) {
const Normal& n = depthData.normalMap(r, c);
const int index = baseIndex + c;
Point4& depthNormal = depthNormalEstimates[index];
depthNormal.topLeftCorner<3, 1>() = Eigen::Map<const Normal::EVec>(n.ptr());
depthNormal.w() = depthData.depthMap(r, c);
}
}
CUDA::checkCudaCall(cudaMemcpy(cudaDepthNormalEstimates, depthNormalEstimates, sizeof(Point4) * depthData.depthMap.size().area(), cudaMemcpyHostToDevice));
// load low resolution depth-map into CUDA memory
if (params.bLowResProcessed) {
ASSERT(depthData.depthMap.isContinuous());
CUDA::checkCudaCall(cudaMemcpy(cudaLowDepths, depthData.depthMap.ptr<float>(), sizeof(float) * depthData.depthMap.size().area(), cudaMemcpyHostToDevice));
}
// run CUDA patch-match
ASSERT(!depthData.viewsMap.empty());
RunCUDA(depthData.confMap.getData(), (uint32_t*)depthData.viewsMap.getData());
CUDA::checkCudaCall(cudaGetLastError());
if (params.bLowResProcessed)
CUDA::checkCudaCall(cudaFree(cudaLowDepths));
// load depth-map, normal-map and confidence-map from CUDA memory
for (int r = 0; r < depthData.depthMap.rows; ++r) {
for (int c = 0; c < depthData.depthMap.cols; ++c) {
const int index = r * depthData.depthMap.cols + c;
const Point4& depthNormal = depthNormalEstimates[index];
const Depth depth = depthNormal.w();
ASSERT(ISFINITE(depth));
depthData.depthMap(r, c) = depth;
depthData.normalMap(r, c) = depthNormal.topLeftCorner<3, 1>();
if (scaleNumber == 0) {
// converted ZNCC [0-2] score, where 0 is best, to [0-1] confidence, where 1 is best
ASSERT(!depthData.confMap.empty());
float& conf = depthData.confMap(r, c);
conf = conf >= 1.f ? 0.f : 1.f - conf;
// map pixel views from bit-mask to index
ASSERT(!depthData.viewsMap.empty());
ViewsID& views = depthData.viewsMap(r, c);
if (depth > 0) {
const uint32_t bitviews(*reinterpret_cast<const uint32_t*>(views.val));
int j = 0;
for (int i = 0; i < 32; ++i) {
if (bitviews & (1 << i)) {
views[j] = i;
if (++j == maxPixelViews)
break;
}
}
while (j < 4)
views[j++] = 255;
} else
views = ViewsID(255, 255, 255, 255);
}
}
}
// remember sub-resolution estimates for next iteration
if (scaleNumber > 0) {
lowResDepthMap = depthData.depthMap;
lowResNormalMap = depthData.normalMap;
lowResViewsMap = depthData.viewsMap;
}
}
// apply ignore mask
if (OPTDENSE::nIgnoreMaskLabel >= 0) {
const DepthData::ViewData& view = depthData.GetView();
BitMatrix mask;
if (DepthEstimator::ImportIgnoreMask(*view.pImageData, depthData.depthMap.size(), (uint16_t)OPTDENSE::nIgnoreMaskLabel, mask))
depthData.ApplyIgnoreMask(mask);
}
DEBUG_EXTRA("Depth-map for image %3u %s: %dx%d (%s)", depthData.images.front().GetID(),
depthData.images.GetSize() > 2 ?
String::FormatString("estimated using %2u images", depthData.images.size()-1).c_str() :
String::FormatString("with image %3u estimated", depthData.images[1].GetID()).c_str(),
images.front().cols, images.front().rows, TD_TIMER_GET_FMT().c_str());
}
/*----------------------------------------------------------------*/
#endif // _USE_CUDA