You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
662 lines
27 KiB
662 lines
27 KiB
/* |
|
* PatchMatchCUDA.cu |
|
* |
|
* Copyright (c) 2014-2021 SEACAVE |
|
* |
|
* Author(s): |
|
* |
|
* cDc <cdc.seacave@gmail.com> |
|
* |
|
* |
|
* This program is free software: you can redistribute it and/or modify |
|
* it under the terms of the GNU Affero General Public License as published by |
|
* the Free Software Foundation, either version 3 of the License, or |
|
* (at your option) any later version. |
|
* |
|
* This program is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
* GNU Affero General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU Affero General Public License |
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
* |
|
* |
|
* Additional Terms: |
|
* |
|
* You are required to preserve legal notices and author attributions in |
|
* that material or in the Appropriate Legal Notices displayed by works |
|
* containing it. |
|
*/ |
|
|
|
#include "PatchMatchCUDA.inl" |
|
|
|
// static max supported views |
|
#define MAX_VIEWS 32 |
|
|
|
// samples used to perform views selection |
|
#define NUM_SAMPLES 32 |
|
|
|
// patch window radius |
|
#define nSizeHalfWindow 4 |
|
|
|
// patch stepping |
|
#define nSizeStep 2 |
|
|
|
using namespace MVS; |
|
|
|
typedef Eigen::Matrix<int,2,1> Point2i; |
|
typedef Eigen::Matrix<float,2,1> Point2; |
|
typedef Eigen::Matrix<float,3,1> Point3; |
|
typedef Eigen::Matrix<float,4,1> Point4; |
|
typedef Eigen::Matrix<float,3,3> Matrix3; |
|
|
|
#define ImagePixels cudaTextureObject_t |
|
#define RandState curandState |
|
|
|
// square the given value |
|
__device__ inline constexpr float Square(float v) { |
|
return v * v; |
|
} |
|
|
|
// set/check a bit |
|
__device__ inline constexpr void SetBit(unsigned& input, unsigned i) { |
|
input |= (1u << i); |
|
} |
|
__device__ inline constexpr int IsBitSet(unsigned input, unsigned i) { |
|
return (input >> i) & 1u; |
|
} |
|
|
|
// swap the given values |
|
__device__ inline constexpr void Swap(float& v0, float& v1) { |
|
const float tmp = v0; |
|
v0 = v1; |
|
v1 = tmp; |
|
} |
|
|
|
// convert 2d to 1d coordinates and back |
|
__device__ inline int Point2Idx(const Point2i& p, int width) { |
|
return p.y() * width + p.x(); |
|
} |
|
__device__ inline Point2i Idx2Point(int idx, int width) { |
|
return Point2i(idx % width, idx / width); |
|
} |
|
|
|
// project and back-project a 3D point |
|
__device__ inline Point2 ProjectPoint(const PatchMatchCUDA::Camera& camera, const Point3& X) { |
|
const Point3 x = camera.K * camera.R * (X - camera.C); |
|
return x.hnormalized(); |
|
} |
|
__device__ inline Point3 BackProjectPointCamera(const PatchMatchCUDA::Camera& camera, const Point2& p, const float depth = 1.f) { |
|
return Point3( |
|
depth * (p.x() - camera.K(0,2)) / camera.K(0,0), |
|
depth * (p.y() - camera.K(1,2)) / camera.K(1,1), |
|
depth); |
|
} |
|
__device__ inline Point3 BackProjectPoint(const PatchMatchCUDA::Camera& camera, const Point2& p, const float depth) { |
|
const Point3 camX = BackProjectPointCamera(camera, p, depth); |
|
return camera.R.transpose() * camX + camera.C; |
|
} |
|
|
|
// compute camera ray direction for the given pixel |
|
__device__ inline Point3 ViewDirection(const PatchMatchCUDA::Camera& camera, const Point2i& p) { |
|
return BackProjectPointCamera(camera, p.cast<float>()).normalized(); |
|
} |
|
|
|
// sort the given values array using bubble sort algorithm |
|
__device__ inline void Sort(const float* values, float* sortedValues, int n) { |
|
for (int i = 0; i < n; ++i) |
|
sortedValues[i] = values[i]; |
|
do { |
|
int newn = 0; |
|
for (int i = 1; i < n; ++i) { |
|
if (sortedValues[i-1] > sortedValues[i]) { |
|
Swap(sortedValues[i-1], sortedValues[i]); |
|
newn = i; |
|
} |
|
} |
|
n = newn; |
|
} while(n); |
|
} |
|
|
|
// find the index of the minimum value in the given values array |
|
__device__ inline int FindMinIndex(const float* values, const int n) { |
|
float minValue = values[0]; |
|
int minValueIdx = 0; |
|
for (int i = 1; i < n; ++i) { |
|
if (minValue > values[i]) { |
|
minValue = values[i]; |
|
minValueIdx = i; |
|
} |
|
} |
|
return minValueIdx; |
|
} |
|
|
|
// convert Probability Density Function (PDF) to Cumulative Distribution Function (CDF) |
|
__device__ inline void PDF2CDF(float* probs, const int numProbs) { |
|
float probSum = 0.f; |
|
for (int i = 0; i < numProbs; ++i) |
|
probSum += probs[i]; |
|
const float invProbSum = 1.f / probSum; |
|
float sumProb = 0.f; |
|
for (int i = 0; i < numProbs-1; ++i) { |
|
sumProb += probs[i] * invProbSum; |
|
probs[i] = sumProb; |
|
} |
|
probs[numProbs-1] = 1.f; |
|
} |
|
/*----------------------------------------------------------------*/ |
|
|
|
|
|
// generate a random normal |
|
__device__ inline Point3 GenerateRandomNormal(const PatchMatchCUDA::Camera& camera, const Point2i& p, RandState* randState) |
|
{ |
|
float q1, q2, s; |
|
do { |
|
q1 = 2.f * curand_uniform(randState) - 1.f; |
|
q2 = 2.f * curand_uniform(randState) - 1.f; |
|
s = q1 * q1 + q2 * q2; |
|
} while (s >= 1.f); |
|
const float sq = sqrt(1.f - s); |
|
Point3 normal( |
|
2.f * q1 * sq, |
|
2.f * q2 * sq, |
|
1.f - 2.f * s); |
|
|
|
const Point3 viewDirection = ViewDirection(camera, p); |
|
if (normal.dot(viewDirection) > 0.f) |
|
normal = -normal; |
|
return normal.normalized(); |
|
} |
|
|
|
// randomly perturb a normal |
|
__device__ inline Point3 GeneratePerturbedNormal(const PatchMatchCUDA::Camera& camera, const Point2i& p, const Point3& normal, RandState* randState, const float perturbation) |
|
{ |
|
const Point3 viewDirection = ViewDirection(camera, p); |
|
|
|
const float a1 = (curand_uniform(randState) - 0.5f) * perturbation; |
|
const float a2 = (curand_uniform(randState) - 0.5f) * perturbation; |
|
const float a3 = (curand_uniform(randState) - 0.5f) * perturbation; |
|
|
|
const float sinA1 = sin(a1); |
|
const float sinA2 = sin(a2); |
|
const float sinA3 = sin(a3); |
|
const float cosA1 = cos(a1); |
|
const float cosA2 = cos(a2); |
|
const float cosA3 = cos(a3); |
|
|
|
Matrix3 perturb; perturb << |
|
cosA2 * cosA3, |
|
cosA3 * sinA1 * sinA2 - cosA1 * sinA3, |
|
sinA1 * sinA3 + cosA1 * cosA3 * sinA2, |
|
cosA2 * sinA3, |
|
cosA1 * cosA3 + sinA1 * sinA2 * sinA3, |
|
cosA1 * sinA2 * sinA3 - cosA3 * sinA1, |
|
-sinA2, |
|
cosA2 * sinA1, |
|
cosA1 * cosA2; |
|
|
|
Point3 normalPerturbed = perturb * normal.topLeftCorner<3,1>(); |
|
if (normalPerturbed.dot(viewDirection) >= 0.f) |
|
return normal; |
|
return normalPerturbed.normalized(); |
|
} |
|
|
|
// randomly perturb a normal |
|
__device__ inline float GeneratePerturbedDepth(float depth, RandState* randState, const float perturbation, const PatchMatchCUDA::Params& params) |
|
{ |
|
const float depthMinPerturbed = (1.f - perturbation) * depth; |
|
const float depthMaxPerturbed = (1.f + perturbation) * depth; |
|
float depthPerturbed; |
|
do { |
|
depthPerturbed = curand_uniform(randState) * (depthMaxPerturbed - depthMinPerturbed) + depthMinPerturbed; |
|
} while (depthPerturbed < params.fDepthMin && depthPerturbed > params.fDepthMax); |
|
return depthPerturbed; |
|
} |
|
|
|
// interpolate given pixel's estimate to the current position |
|
__device__ inline float InterpolatePixel(const PatchMatchCUDA::Camera& camera, const Point2i& p, const Point2i& np, float depth, const Point3& normal, const PatchMatchCUDA::Params& params) |
|
{ |
|
float depthNew; |
|
if (p.x() == np.x()) { |
|
const float nx1 = (p.y() - camera.K(1,2)) / camera.K(1,1); |
|
const float denom = normal.z() + nx1 * normal.y(); |
|
if (abs(denom) < FLT_EPSILON) |
|
return depth; |
|
const float x1 = (np.y() - camera.K(1,2)) / camera.K(1,1); |
|
const float nom = depth * (normal.z() + x1 * normal.y()); |
|
depthNew = nom / denom; |
|
} else if (p.y() == np.y()) { |
|
const float nx1 = (p.x() - camera.K(0,2)) / camera.K(0,0); |
|
const float denom = normal.z() + nx1 * normal.x(); |
|
if (abs(denom) < FLT_EPSILON) |
|
return depth; |
|
const float x1 = (np.x() - camera.K(0,2)) / camera.K(0,0); |
|
const float nom = depth * (normal.z() + x1 * normal.x()); |
|
depthNew = nom / denom; |
|
} else { |
|
const float planeD = normal.dot(BackProjectPointCamera(camera, np.cast<float>(), depth)); |
|
depthNew = planeD / normal.dot(BackProjectPointCamera(camera, p.cast<float>())); |
|
} |
|
return (depthNew >= params.fDepthMin && depthNew <= params.fDepthMax) ? depthNew : depth; |
|
} |
|
|
|
// compute normal to the surface given the 4 neighbors |
|
__device__ inline Point3 ComputeDepthGradient(const Matrix3& K, float depth, const Point2i& pos, const Point4& ndepth) { |
|
constexpr float2 nposg[4] = {{0,-1}, {0,1}, {-1,0}, {1,0}}; |
|
Point2 dg(0,0); |
|
// add neighbor depths at the gradient locations |
|
for (int i=0; i<4; ++i) |
|
dg += Point2(nposg[i].x,nposg[i].y) * (ndepth[i] - depth); |
|
// compute depth gradient |
|
const Point2 d = dg*0.5f; |
|
// compute normal from depth gradient |
|
return Point3( |
|
K(0,0)*d.x(), |
|
K(1,1)*d.y(), |
|
(K(0,2)-pos.x())*d.x()+(K(1,2)-pos.y())*d.y()-depth).normalized(); |
|
} |
|
|
|
// compose tho homography matrix that transforms a point from reference to source camera through the given plane |
|
__device__ inline Matrix3 ComputeHomography(const PatchMatchCUDA::Camera& refCamera, const PatchMatchCUDA::Camera& trgCamera, const Point2& p, const Point4& plane) |
|
{ |
|
const Point3 X = BackProjectPointCamera(refCamera, p, plane.w()); |
|
const Point3 normal = plane.topLeftCorner<3,1>(); |
|
const Point3 t = (refCamera.C - trgCamera.C) / (normal.dot(X)); |
|
const Matrix3 H = trgCamera.R * (refCamera.R.transpose() + t*normal.transpose()); |
|
return trgCamera.K * H * refCamera.K.inverse(); |
|
} |
|
|
|
// weight a neighbor texel based on color similarity and distance to the center texel |
|
__device__ inline float ComputeBilateralWeight(int xDist, int yDist, float pix, float centerPix) |
|
{ |
|
constexpr float sigmaSpatial = -1.f / (2.f * (nSizeHalfWindow-1)*(nSizeHalfWindow-1)); |
|
constexpr float sigmaColor = -1.f / (2.f * 25.f/255.f*25.f/255.f); |
|
const float spatialDistSq = float(xDist * xDist + yDist * yDist); |
|
const float colorDistSq = Square(pix - centerPix); |
|
return exp(spatialDistSq * sigmaSpatial + colorDistSq * sigmaColor); |
|
} |
|
|
|
// compute the geometric consistency weight |
|
__device__ inline float GeometricConsistencyWeight(const ImagePixels depthImage, const PatchMatchCUDA::Camera& refCamera, const PatchMatchCUDA::Camera& trgCamera, const Point4& plane, const Point2i& p) |
|
{ |
|
if (depthImage == NULL) |
|
return 0.f; |
|
constexpr float maxDist = 4.f; |
|
const Point3 forwardPoint = BackProjectPoint(refCamera, p.cast<float>(), plane.w()); |
|
const Point2 trgPt = ProjectPoint(trgCamera, forwardPoint); |
|
const float trgDepth = tex2D<float>(depthImage, trgPt.x() + 0.5f, trgPt.y() + 0.5f); |
|
if (trgDepth == 0.f) |
|
return maxDist; |
|
const Point3 trgX = BackProjectPoint(trgCamera, trgPt, trgDepth); |
|
const Point2 backwardPoint = ProjectPoint(refCamera, trgX); |
|
const Point2 diff = p.cast<float>() - backwardPoint; |
|
const float dist = diff.norm(); |
|
return min(maxDist, sqrt(dist*(dist+2.f))); |
|
} |
|
|
|
// compute photometric score using weighted ZNCC |
|
__device__ float ScorePlane(const ImagePixels refImage, const PatchMatchCUDA::Camera& refCamera, const ImagePixels trgImage, const PatchMatchCUDA::Camera& trgCamera, const Point2i& p, const Point4& plane, const float lowDepth, const PatchMatchCUDA::Params& params) |
|
{ |
|
constexpr float maxCost = 1.2f; |
|
|
|
Matrix3 H = ComputeHomography(refCamera, trgCamera, p.cast<float>(), plane); |
|
const Point2 pt = (H * p.cast<float>().homogeneous()).hnormalized(); |
|
if (pt.x() >= trgCamera.width || pt.x() < 0.f || pt.y() >= trgCamera.height || pt.y() < 0.f) |
|
return maxCost; |
|
Point3 X = H * Point2(p.x()-nSizeHalfWindow, p.y()-nSizeHalfWindow).homogeneous(); |
|
Point3 baseX(X); |
|
H *= float(nSizeStep); |
|
|
|
float sumRef = 0.f; |
|
float sumRefRef = 0.f; |
|
float sumTrg = 0.f; |
|
float sumTrgTrg = 0.f; |
|
float sumRefTrg = 0.f; |
|
float bilateralWeightSum = 0.f; |
|
const float refCenterPix = tex2D<float>(refImage, p.x() + 0.5f, p.y() + 0.5f); |
|
for (int i = -nSizeHalfWindow; i <= nSizeHalfWindow; i += nSizeStep) { |
|
for (int j = -nSizeHalfWindow; j <= nSizeHalfWindow; j += nSizeStep) { |
|
const Point2i refPt = Point2i(p.x() + j, p.y() + i); |
|
const Point2 trgPt = X.hnormalized(); |
|
const float refPix = tex2D<float>(refImage, refPt.x() + 0.5f, refPt.y() + 0.5f); |
|
const float trgPix = tex2D<float>(trgImage, trgPt.x() + 0.5f, trgPt.y() + 0.5f); |
|
const float weight = ComputeBilateralWeight(j, i, refPix, refCenterPix); |
|
const float weightRefPix = weight * refPix; |
|
const float weightTrgPix = weight * trgPix; |
|
sumRef += weightRefPix; |
|
sumTrg += weightTrgPix; |
|
sumRefRef += weightRefPix * refPix; |
|
sumTrgTrg += weightTrgPix * trgPix; |
|
sumRefTrg += weightRefPix * trgPix; |
|
bilateralWeightSum += weight; |
|
X += H.col(0); |
|
} |
|
baseX += H.col(1); |
|
X = baseX; |
|
} |
|
|
|
const float varRef = sumRefRef * bilateralWeightSum - sumRef * sumRef; |
|
if (lowDepth <= 0 && varRef < 1e-8f) |
|
return maxCost; |
|
const float varTrg = sumTrgTrg * bilateralWeightSum - sumTrg * sumTrg; |
|
const float varRefTrg = varRef * varTrg; |
|
if (varRefTrg < 1e-16f) |
|
return maxCost; |
|
const float covarTrgRef = sumRefTrg * bilateralWeightSum - sumRef * sumTrg; |
|
float ncc = 1.f - covarTrgRef / sqrt(varRefTrg); |
|
|
|
// apply depth prior weight based on patch textureless |
|
if (lowDepth > 0) { |
|
const float depth(plane.w()); |
|
const float deltaDepth(MIN((abs(lowDepth-depth) / lowDepth), 0.5f)); |
|
constexpr float smoothSigmaDepth(-1.f / (1.f * 0.02f)); // 0.12: patch texture variance below 0.02 (0.12^2) is considered texture-less |
|
const float factorDeltaDepth(exp(varRef * smoothSigmaDepth)); |
|
ncc = (1.f-factorDeltaDepth)*ncc + factorDeltaDepth*deltaDepth; |
|
} |
|
return max(0.f, min(2.f, ncc)); |
|
} |
|
|
|
// compute photometric score for all neighbor images |
|
__device__ inline void MultiViewScorePlane(const ImagePixels *images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, const Point2i& p, const Point4& plane, const float lowDepth, float* costVector, const PatchMatchCUDA::Params& params) |
|
{ |
|
for (int imgId = 1; imgId <= params.nNumViews; ++imgId) |
|
costVector[imgId-1] = ScorePlane(images[0], cameras[0], images[imgId], cameras[imgId], p, plane, lowDepth, params); |
|
if (params.bGeomConsistency) |
|
for (int imgId = 0; imgId < params.nNumViews; ++imgId) |
|
costVector[imgId] += 0.1f * GeometricConsistencyWeight(depthImages[imgId], cameras[0], cameras[imgId+1], plane, p); |
|
} |
|
// same as above, but interpolate the plane to current pixel position |
|
__device__ inline float MultiViewScoreNeighborPlane(const ImagePixels* images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, const Point2i& p, const Point2i& np, Point4 plane, const float lowDepth, float* costVector, const PatchMatchCUDA::Params& params) |
|
{ |
|
plane.w() = InterpolatePixel(cameras[0], p, np, plane.w(), plane.topLeftCorner<3,1>(), params); |
|
MultiViewScorePlane(images, depthImages, cameras, p, plane, lowDepth, costVector, params); |
|
return plane.w(); |
|
} |
|
|
|
// aggregate photometric score from all images |
|
__device__ inline float AggregateMultiViewScores(const unsigned* viewWeights, const float* costVector, int numViews) |
|
{ |
|
float cost = 0; |
|
for (int imgId = 0; imgId < numViews; ++imgId) |
|
if (viewWeights[imgId]) |
|
cost += viewWeights[imgId] * costVector[imgId]; |
|
return cost / NUM_SAMPLES; |
|
} |
|
|
|
// propagate and refine the plane estimate for the current pixel employing the asymmetric approach described in: |
|
// "Multi-View Stereo with Asymmetric Checkerboard Propagation and Multi-Hypothesis Joint View Selection", 2018 |
|
__device__ void ProcessPixel(const ImagePixels* images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, RandState* randStates, unsigned* selectedViews, const Point2i& p, const PatchMatchCUDA::Params& params, const int iter) |
|
{ |
|
int width = cameras[0].width; |
|
int height = cameras[0].height; |
|
if (p.x() >= width || p.y() >= height) |
|
return; |
|
const int idx = Point2Idx(p, width); |
|
RandState* randState = &randStates[idx]; |
|
float lowDepth = 0; |
|
if (params.bLowResProcessed) |
|
lowDepth = lowDepths[idx]; |
|
|
|
// adaptive sampling: 0 up-near, 1 down-near, 2 left-near, 3 right-near, 4 up-far, 5 down-far, 6 left-far, 7 right-far |
|
static constexpr int2 dirs[8][11] = { |
|
{{ 0,-1},{-1,-2},{ 1,-2},{-2,-3},{ 2,-3},{-3,-4},{ 3,-4}}, |
|
{{ 0, 1},{-1, 2},{ 1, 2},{-2, 3},{ 2, 3},{-3, 4},{ 3, 4}}, |
|
{{-1, 0},{-2,-1},{-2, 1},{-3,-2},{-3, 2},{-4,-3},{-4, 3}}, |
|
{{ 1, 0},{ 2,-1},{ 2, 1},{ 3,-2},{ 3, 2},{ 4,-3},{ 4, 3}}, |
|
{{0,-3},{0,-5},{0,-7},{0,-9},{0,-11},{0,-13},{0,-15},{0,-17},{0,-19},{0,-21},{0,-23}}, |
|
{{0, 3},{0, 5},{0, 7},{0, 9},{0, 11},{0, 13},{0, 15},{0, 17},{0, 19},{0, 21},{0, 23}}, |
|
{{-3,0},{-5,0},{-7,0},{-9,0},{-11,0},{-13,0},{-15,0},{-17,0},{-19,0},{-21,0},{-23,0}}, |
|
{{ 3,0},{ 5,0},{ 7,0},{ 9,0},{ 11,0},{ 13,0},{ 15,0},{ 17,0},{ 19,0},{ 21,0},{ 23,0}} |
|
}; |
|
static constexpr int numDirs[8] = {7, 7, 7, 7, 11, 11, 11, 11}; |
|
const int neighborPositions[4] = { |
|
idx - width, |
|
idx + width, |
|
idx - 1, |
|
idx + 1, |
|
}; |
|
bool valid[8] = {false, false, false, false, false, false, false, false}; |
|
int positions[8]; |
|
float neighborDepths[8]; |
|
float costArray[8][MAX_VIEWS]; |
|
|
|
for (int posId=0; posId<8; ++posId) { |
|
const int2* samples = dirs[posId]; |
|
Point2i bestNx; float bestConf(FLT_MAX); |
|
for (int dirId=0; dirId<numDirs[posId]; ++dirId) { |
|
const int2& offset = samples[dirId]; |
|
const Point2i np(p.x()+offset.x, p.y()+offset.y); |
|
if (!(np.x()>=0 && np.y()>=0 && np.x()<width && np.y()<height)) |
|
continue; |
|
const int nidx = Point2Idx(np, width); |
|
const float nconf = costs[nidx]; |
|
if (bestConf > nconf) { |
|
bestNx = np; |
|
bestConf = nconf; |
|
} |
|
} |
|
if (bestConf < FLT_MAX) { |
|
valid[posId] = true; |
|
positions[posId] = Point2Idx(bestNx, width); |
|
neighborDepths[posId] = MultiViewScoreNeighborPlane(images, depthImages, cameras, p, bestNx, planes[positions[posId]], lowDepth, costArray[posId], params); |
|
} |
|
} |
|
|
|
// multi-hypothesis view selection |
|
float viewSelectionPriors[MAX_VIEWS] = {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; |
|
for (int posId = 0; posId < 4; ++posId) { |
|
if (valid[posId]) { |
|
const unsigned selectedView = selectedViews[neighborPositions[posId]]; |
|
for (int j = 0; j < params.nNumViews; ++j) |
|
viewSelectionPriors[j] += (IsBitSet(selectedView, j) ? 0.9f : 0.1f); |
|
} |
|
} |
|
float samplingProbs[MAX_VIEWS]; |
|
constexpr float thCostBad = 1.2f; |
|
const float thCost = 0.8f * exp(Square((float)iter) / (-2.f * 4.f*4.f)); |
|
for (int imgId = 0; imgId < params.nNumViews; ++imgId) { |
|
float sumW = 0; |
|
unsigned count = 0; |
|
unsigned countBad = 0; |
|
for (int posId = 0; posId < 8; posId++) { |
|
if (valid[posId]) { |
|
if (costArray[posId][imgId] < thCost) { |
|
sumW += exp(Square(costArray[posId][imgId]) / (-2.f * 0.3f*0.3f)); |
|
++count; |
|
} else if (costArray[posId][imgId] > thCostBad) { |
|
++countBad; |
|
} |
|
} |
|
} |
|
if (count > 2 && countBad < 3) { |
|
samplingProbs[imgId] = viewSelectionPriors[imgId] * sumW / count; |
|
} else if (countBad < 3) { |
|
samplingProbs[imgId] = viewSelectionPriors[imgId] * exp(Square(thCost) / (-2.f * 0.4f*0.4f)); |
|
} else { |
|
samplingProbs[imgId] = 0.f; |
|
} |
|
} |
|
PDF2CDF(samplingProbs, params.nNumViews); |
|
unsigned viewWeights[MAX_VIEWS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; |
|
for (int sample = 0; sample < NUM_SAMPLES; ++sample) { |
|
const float randProb = curand_uniform(randState); |
|
for (int imgId = 0; imgId < params.nNumViews; ++imgId) { |
|
if (samplingProbs[imgId] > randProb) { |
|
++viewWeights[imgId]; |
|
break; |
|
} |
|
} |
|
} |
|
|
|
// propagate best neighbor plane |
|
Point4& plane = planes[idx]; |
|
float& cost = costs[idx]; |
|
unsigned newSelectedViews = 0; |
|
for (int imgId = 0; imgId < params.nNumViews; ++imgId) |
|
if (viewWeights[imgId]) |
|
SetBit(newSelectedViews, imgId); |
|
float finalCosts[8]; |
|
for (int posId = 0; posId < 8; ++posId) |
|
finalCosts[posId] = AggregateMultiViewScores(viewWeights, costArray[posId], params.nNumViews); |
|
const int minCostIdx = FindMinIndex(finalCosts, 8); |
|
float costVector[MAX_VIEWS]; |
|
MultiViewScorePlane(images, depthImages, cameras, p, plane, lowDepth, costVector, params); |
|
cost = AggregateMultiViewScores(viewWeights, costVector, params.nNumViews); |
|
if (finalCosts[minCostIdx] < cost && valid[minCostIdx]) { |
|
plane = planes[positions[minCostIdx]]; |
|
plane.w() = neighborDepths[minCostIdx]; |
|
cost = finalCosts[minCostIdx]; |
|
selectedViews[idx] = newSelectedViews; |
|
} |
|
const float depth = plane.w(); |
|
|
|
// refine estimate |
|
constexpr float perturbationDepth = 0.005f; |
|
constexpr float perturbationNormal = 0.01f * (float)M_PI; |
|
const float depthPerturbed = GeneratePerturbedDepth(depth, randState, perturbationDepth, params); |
|
const Point3 perturbedNormal = GeneratePerturbedNormal(cameras[0], p, plane.topLeftCorner<3,1>(), randState, perturbationNormal); |
|
const Point3 normalRand = GenerateRandomNormal(cameras[0], p, randState); |
|
int numValidPlanes = 3; |
|
Point3 surfaceNormal; |
|
if (valid[0] && valid[1] && valid[2] && valid[3]) { |
|
// estimate normal from surrounding surface |
|
const Point4 ndepths( |
|
planes[neighborPositions[0]].w(), |
|
planes[neighborPositions[1]].w(), |
|
planes[neighborPositions[2]].w(), |
|
planes[neighborPositions[3]].w() |
|
); |
|
surfaceNormal = ComputeDepthGradient(cameras[0].K, depth, p, ndepths); |
|
numValidPlanes = 4; |
|
} |
|
constexpr int numPlanes = 4; |
|
const float depths[numPlanes] = {depthPerturbed, depth, depth, depth}; |
|
const Point3 normals[numPlanes] = {plane.topLeftCorner<3,1>(), perturbedNormal, normalRand, surfaceNormal}; |
|
for (int i = 0; i < numValidPlanes; ++i) { |
|
Point4 newPlane; |
|
newPlane.topLeftCorner<3,1>() = normals[i]; |
|
newPlane.w() = depths[i]; |
|
MultiViewScorePlane(images, depthImages, cameras, p, newPlane, lowDepth, costVector, params); |
|
const float costPlane = AggregateMultiViewScores(viewWeights, costVector, params.nNumViews); |
|
if (cost > costPlane) { |
|
cost = costPlane; |
|
plane = newPlane; |
|
} |
|
} |
|
} |
|
|
|
// compute the score of the current plane estimate |
|
__device__ void InitializePixelScore(const ImagePixels *images, const ImagePixels* depthImages, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, RandState* randStates, unsigned* selectedViews, const Point2i& p, const PatchMatchCUDA::Params params) |
|
{ |
|
const int width = cameras[0].width; |
|
const int height = cameras[0].height; |
|
if (p.x() >= width || p.y() >= height) |
|
return; |
|
const int idx = Point2Idx(p, width); |
|
float lowDepth = 0; |
|
if (params.bLowResProcessed) |
|
lowDepth = lowDepths[idx]; |
|
// initialize estimate randomly if not set |
|
RandState* randState = &randStates[idx]; |
|
curand_init(1234/*threadIdx.x*/, p.y(), p.x(), randState); |
|
Point4& plane = planes[idx]; |
|
float depth = plane.w(); |
|
if (depth <= 0.f) { |
|
// generate random plane |
|
plane.topLeftCorner<3,1>() = GenerateRandomNormal(cameras[0], p, randState); |
|
plane.w() = curand_uniform(randState) * (params.fDepthMax - params.fDepthMin) + params.fDepthMin; |
|
} else if (plane.topLeftCorner<3,1>().dot(ViewDirection(cameras[0], p)) >= 0.f) { |
|
// generate random normal |
|
plane.topLeftCorner<3,1>() = GenerateRandomNormal(cameras[0], p, randState); |
|
} |
|
// compute costs |
|
float costVector[MAX_VIEWS]; |
|
MultiViewScorePlane(images, depthImages, cameras, p, plane, lowDepth, costVector, params); |
|
// select best views |
|
float costVectorSorted[MAX_VIEWS]; |
|
Sort(costVector, costVectorSorted, params.nNumViews); |
|
float cost = 0.f; |
|
for (int i = 0; i < params.nInitTopK; ++i) |
|
cost += costVectorSorted[i]; |
|
const float costThreshold = costVectorSorted[params.nInitTopK - 1]; |
|
unsigned& selectedView = selectedViews[idx]; |
|
selectedView = 0; |
|
for (int imgId = 0; imgId < params.nNumViews; ++imgId) |
|
if (costVector[imgId] <= costThreshold) |
|
SetBit(selectedView, imgId); |
|
costs[idx] = cost / params.nInitTopK; |
|
} |
|
__global__ void InitializeScore(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatchCUDA::Params params) |
|
{ |
|
const Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); |
|
InitializePixelScore((const ImagePixels*)textureImages, (const ImagePixels*)textureDepths, cameras, planes, lowDepths, costs, (RandState*)randStates, selectedViews, p, params); |
|
} |
|
|
|
// traverse image in a back/red checkerboard pattern |
|
__global__ void BlackPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatchCUDA::Params params, const int iter) |
|
{ |
|
Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); |
|
p.y() = p.y() * 2 + (threadIdx.x % 2 == 0 ? 0 : 1); |
|
ProcessPixel((const ImagePixels*)textureImages, (const ImagePixels*)textureDepths, cameras, planes, lowDepths, costs, (RandState*)randStates, selectedViews, p, params, iter); |
|
} |
|
__global__ void RedPixelProcess(const cudaTextureObject_t* textureImages, const cudaTextureObject_t* textureDepths, const PatchMatchCUDA::Camera* cameras, Point4* planes, const float* lowDepths, float* costs, curandState* randStates, unsigned* selectedViews, const PatchMatchCUDA::Params params, const int iter) |
|
{ |
|
Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); |
|
p.y() = p.y() * 2 + (threadIdx.x % 2 == 0 ? 1 : 0); |
|
ProcessPixel((const ImagePixels*)textureImages, (const ImagePixels*)textureDepths, cameras, planes, lowDepths, costs, (RandState*)randStates, selectedViews, p, params, iter); |
|
} |
|
|
|
// filter depth/normals |
|
__global__ void FilterPlanes(Point4* planes, float* costs, unsigned* selectedViews, int width, int height, const PatchMatchCUDA::Params params) |
|
{ |
|
const Point2i p = Point2i(blockIdx.x * blockDim.x + threadIdx.x, blockIdx.y * blockDim.y + threadIdx.y); |
|
if (p.x() >= width || p.y() >= height) |
|
return; |
|
const int idx = Point2Idx(p, width); |
|
// filter estimates if the score is not good enough |
|
Point4& plane = planes[idx]; |
|
float conf = costs[idx]; |
|
if (plane.w() <= 0 || conf >= params.fThresholdKeepCost) { |
|
conf = 0; |
|
plane = Point4::Zero(); |
|
selectedViews[idx] = 0; |
|
} |
|
} |
|
/*----------------------------------------------------------------*/ |
|
|
|
|
|
__host__ void PatchMatchCUDA::RunCUDA(float* ptrCostMap, uint32_t* ptrViewsMap) |
|
{ |
|
const unsigned width = cameras[0].width; |
|
const unsigned height = cameras[0].height; |
|
|
|
constexpr unsigned BLOCK_W = 32; |
|
constexpr unsigned BLOCK_H = (BLOCK_W / 2); |
|
|
|
const dim3 blockSize(BLOCK_W, BLOCK_H, 1); |
|
const dim3 gridSizeFull((width + BLOCK_H - 1) / BLOCK_H, (height + BLOCK_H - 1) / BLOCK_H, 1); |
|
const dim3 gridSizeCheckerboard((width + BLOCK_W - 1) / BLOCK_W, ((height / 2) + BLOCK_H - 1) / BLOCK_H, 1); |
|
|
|
InitializeScore<<<gridSizeFull, blockSize>>>(cudaTextureImages, cudaTextureDepths, cudaCameras, cudaDepthNormalEstimates, cudaLowDepths, cudaDepthNormalCosts, cudaRandStates, cudaSelectedViews, params); |
|
cudaDeviceSynchronize(); |
|
|
|
for (int iter = 0; iter < params.nEstimationIters; ++iter) { |
|
BlackPixelProcess<<<gridSizeCheckerboard, blockSize>>>(cudaTextureImages, cudaTextureDepths, cudaCameras, cudaDepthNormalEstimates, cudaLowDepths, cudaDepthNormalCosts, cudaRandStates, cudaSelectedViews, params, iter); |
|
cudaDeviceSynchronize(); |
|
RedPixelProcess<<<gridSizeCheckerboard, blockSize>>>(cudaTextureImages, cudaTextureDepths, cudaCameras, cudaDepthNormalEstimates, cudaLowDepths, cudaDepthNormalCosts, cudaRandStates, cudaSelectedViews, params, iter); |
|
cudaDeviceSynchronize(); |
|
} |
|
|
|
if (params.fThresholdKeepCost > 0) |
|
FilterPlanes<<<gridSizeFull, blockSize>>>(cudaDepthNormalEstimates, cudaDepthNormalCosts, cudaSelectedViews, width, height, params); |
|
|
|
cudaMemcpy(depthNormalEstimates, cudaDepthNormalEstimates, sizeof(Point4) * width * height, cudaMemcpyDeviceToHost); |
|
if (ptrCostMap) |
|
cudaMemcpy(ptrCostMap, cudaDepthNormalCosts, sizeof(float) * width * height, cudaMemcpyDeviceToHost); |
|
if (ptrViewsMap) |
|
cudaMemcpy(ptrViewsMap, cudaSelectedViews, sizeof(uint32_t) * width * height, cudaMemcpyDeviceToHost); |
|
|
|
cudaDeviceSynchronize(); |
|
} |
|
/*----------------------------------------------------------------*/
|
|
|