PhysicsBasedAnimationToolkit 0.0.10
Cross-platform C++20 library of algorithms and data structures commonly used in computer graphics research on physically-based simulation.
Loading...
Searching...
No Matches
Bvh.cuh
1#ifndef PBAT_GPU_IMPL_GEOMETRY_BVH_CUH
2#define PBAT_GPU_IMPL_GEOMETRY_BVH_CUH
3
4#include "Aabb.cuh"
5#include "Morton.cuh"
6#include "pbat/common/Queue.h"
7#include "pbat/common/Stack.h"
10#include "pbat/gpu/Aliases.h"
11#include "pbat/gpu/impl/common/Buffer.cuh"
14
15#include <exception>
16#include <string>
17#include <thrust/execution_policy.h>
18#include <thrust/for_each.h>
19#include <thrust/iterator/counting_iterator.h>
20#include <type_traits>
21
22namespace pbat {
23namespace gpu {
24namespace impl {
25namespace geometry {
26
33class Bvh
34{
35 public:
36 static auto constexpr kDims = 3;
37 using OverlapType = cuda::std::pair<GpuIndex, GpuIndex>;
38 using MortonCodeType = pbat::geometry::MortonCodeType;
39
40 friend class BvhQuery;
41
42 static_assert(
43 std::is_same_v<GpuIndex, std::int32_t>,
44 "gpu::BvhImpl only supported for 32-bit signed integer indices");
45
50 Bvh(GpuIndex nBoxes);
51
58 void Build(Aabb<kDims>& aabbs, Morton::Bound const& min, Morton::Bound const& max);
66 void SortByMortonCode(Aabb<kDims>& aabbs, Morton::Bound const& min, Morton::Bound const& max);
72 void BuildTree(GpuIndex n);
78 void ConstructBoxes(Aabb<kDims>& aabbs);
87 template <class FOnOverlapDetected>
88 void DetectOverlaps(Aabb<kDims> const& aabbs, FOnOverlapDetected&& fOnOverlapDetected);
99 template <class FOnOverlapDetected, auto kStackSize = 64>
100 void DetectOverlaps(
101 Aabb<kDims> const& leafAabbs,
102 Aabb<kDims> const& queryAabbs,
103 FOnOverlapDetected fOnOverlapDetected);
131 template <
132 class FGetQueryObject,
133 class FMinDistanceToBox,
134 class FDistanceToLeaf,
135 class FDistanceUpperBound,
136 class FOnNearestNeighbourFound,
137 auto kQueueSize = 8,
138 auto kStackSize = 64>
140 Aabb<kDims> const& aabbs,
141 GpuIndex nQueries,
142 FGetQueryObject fGetQueryObject,
143 FMinDistanceToBox fMinDistanceToBox,
144 FDistanceToLeaf fDistanceToleaf,
145 FDistanceUpperBound fDistanceUpperBound,
146 FOnNearestNeighbourFound fOnNearestNeighbourFound,
147 GpuScalar eps = GpuScalar(0));
172 template <
173 class FGetQueryObject,
174 class FMinDistanceToBox,
175 class FDistanceToLeaf,
176 class FDistanceUpperBound,
177 class FOnFound,
178 auto kStackSize = 64>
179 void RangeSearch(
180 Aabb<kDims> const& aabbs,
181 GpuIndex nQueries,
182 FGetQueryObject fGetQueryObject,
183 FMinDistanceToBox fMinDistanceToBox,
184 FDistanceToLeaf fDistanceToleaf,
185 FDistanceUpperBound fDistanceUpperBound,
186 FOnFound fOnFound);
187
203};
204
205template <class FOnOverlapDetected>
206inline void Bvh::DetectOverlaps(Aabb<kDims> const& aabbs, FOnOverlapDetected&& fOnOverlapDetected)
207{
208 PBAT_PROFILE_CUDA_NAMED_SCOPE("pbat.gpu.impl.geometry.Bvh.DetectOverlaps");
209 auto const nLeafBoxes = aabbs.Size();
210 thrust::for_each(
211 thrust::device,
212 thrust::make_counting_iterator(0),
213 thrust::make_counting_iterator(nLeafBoxes),
214 [inds = inds.Raw(),
215 child = child.Raw(),
216 rightmost = rightmost.Raw(),
217 b = aabbs.b.Raw(),
218 e = aabbs.e.Raw(),
219 ib = iaabbs.b.Raw(),
220 ie = iaabbs.e.Raw(),
221 leafBegin = nLeafBoxes - 1,
222 fOnOverlapDetected =
223 std::forward<FOnOverlapDetected>(fOnOverlapDetected)] PBAT_DEVICE(GpuIndex s) mutable {
224 // Traverse nodes depth-first starting from the root=0 node
226 using namespace pbat::math::linalg;
227 using namespace pbat::geometry;
228 auto const leaf = leafBegin + s;
229 auto Ls = mini::FromBuffers<3, 1>(b, s);
230 auto Us = mini::FromBuffers<3, 1>(e, s);
231 Stack<GpuIndex, 64> stack{};
232 stack.Push(0);
233 do
234 {
235 assert(not stack.IsFull());
236 GpuIndex const node = stack.Pop();
237 // Check each child node for overlap.
238 GpuIndex const lc = child[0][node];
239 GpuIndex const rc = child[1][node];
240 bool const bIsLeftLeaf = lc >= leafBegin;
241 bool const bIsRightLeaf = rc >= leafBegin;
242 auto Llc = bIsLeftLeaf ? mini::FromBuffers<3, 1>(b, lc - leafBegin) :
243 mini::FromBuffers<3, 1>(ib, lc);
244 auto Ulc = bIsLeftLeaf ? mini::FromBuffers<3, 1>(e, lc - leafBegin) :
245 mini::FromBuffers<3, 1>(ie, lc);
246 auto Lrc = bIsRightLeaf ? mini::FromBuffers<3, 1>(b, rc - leafBegin) :
247 mini::FromBuffers<3, 1>(ib, rc);
248 auto Urc = bIsRightLeaf ? mini::FromBuffers<3, 1>(e, rc - leafBegin) :
249 mini::FromBuffers<3, 1>(ie, rc);
250 bool const bLeftBoxOverlaps =
251 OverlapQueries::AxisAlignedBoundingBoxes(Ls, Us, Llc, Ulc) and
252 (rightmost[0][node] > leaf);
253 bool const bRightBoxOverlaps =
254 OverlapQueries::AxisAlignedBoundingBoxes(Ls, Us, Lrc, Urc) and
255 (rightmost[1][node] > leaf);
256
257 // Leaf overlaps another leaf node
258 if (bLeftBoxOverlaps and bIsLeftLeaf)
259 {
260 GpuIndex const si = inds[s];
261 GpuIndex const sj = inds[lc - leafBegin];
262 fOnOverlapDetected(si, sj);
263 }
264 if (bRightBoxOverlaps and bIsRightLeaf)
265 {
266 GpuIndex const si = inds[s];
267 GpuIndex const sj = inds[rc - leafBegin];
268 fOnOverlapDetected(si, sj);
269 }
270
271 // Leaf overlaps an internal node -> traverse
272 bool const bTraverseLeft = bLeftBoxOverlaps and not bIsLeftLeaf;
273 bool const bTraverseRight = bRightBoxOverlaps and not bIsRightLeaf;
274 if (bTraverseLeft)
275 stack.Push(lc);
276 if (bTraverseRight)
277 stack.Push(rc);
278 } while (not stack.IsEmpty());
279 });
280}
281
282template <class FOnOverlapDetected, auto kStackSize>
284 Aabb<kDims> const& leafAabbs,
285 Aabb<kDims> const& queryAabbs,
286 FOnOverlapDetected fOnOverlapDetected)
287{
288 PBAT_PROFILE_CUDA_NAMED_SCOPE("pbat.gpu.impl.geometry.Bvh.DetectOverlaps");
289
290 static_assert(
291 std::is_invocable_v<FOnOverlapDetected, GpuIndex, GpuIndex>,
292 "FOnOverlapDetected must be callable with signature void f(GpuIndex,GpuIndex)");
293
294 using namespace pbat::math::linalg;
295 auto fGetQueryObject = [b = queryAabbs.b.Raw(),
296 e = queryAabbs.e.Raw()] PBAT_DEVICE(GpuIndex q) {
298 LU.Col(0) = mini::FromBuffers<3, 1>(b, q);
299 LU.Col(1) = mini::FromBuffers<3, 1>(e, q);
300 return LU;
301 };
302 auto fMinDistanceToBox = [] PBAT_DEVICE(
304 mini::SVector<GpuScalar, 3> const& L2,
305 mini::SVector<GpuScalar, 3> const& U2) {
307 LU1.Col(0),
308 LU1.Col(1),
309 L2,
310 U2);
311 return static_cast<GpuScalar>(not bBoxesOverlap);
312 };
313 auto fDistanceToLeaf = [] PBAT_DEVICE(
314 [[maybe_unused]] GpuIndex q,
315 [[maybe_unused]] mini::SMatrix<GpuScalar, 3, 2> const& LUQ,
316 [[maybe_unused]] GpuIndex leaf,
317 [[maybe_unused]] GpuIndex i) {
318 return GpuScalar(0);
319 };
320 auto fQueryUpperBound = [] PBAT_DEVICE(GpuIndex q) {
321 return GpuScalar(0);
322 };
323 auto fOnFound = [fOnOverlapDetected] PBAT_DEVICE(
324 GpuIndex q,
325 [[maybe_unused]] GpuIndex leaf,
326 GpuIndex i,
327 [[maybe_unused]] GpuScalar d) mutable {
328 fOnOverlapDetected(q, i);
329 };
330
331 this->RangeSearch<
332 decltype(fGetQueryObject),
333 decltype(fMinDistanceToBox),
334 decltype(fDistanceToLeaf),
335 decltype(fQueryUpperBound),
336 decltype(fOnFound),
337 kStackSize>(
338 leafAabbs,
339 queryAabbs.Size(),
340 fGetQueryObject,
341 fMinDistanceToBox,
342 fDistanceToLeaf,
343 fQueryUpperBound,
344 fOnFound);
345}
346
347template <
348 class FGetQueryObject,
349 class FMinDistanceToBox,
350 class FDistanceToLeaf,
351 class FDistanceUpperBound,
352 class FOnNearestNeighbourFound,
353 auto kQueueSize,
354 auto kStackSize>
356 Aabb<kDims> const& aabbs,
357 GpuIndex nQueries,
358 FGetQueryObject fGetQueryObject,
359 FMinDistanceToBox fMinDistanceToBox,
360 FDistanceToLeaf fDistanceToLeaf,
361 FDistanceUpperBound fDistanceUpperBound,
362 FOnNearestNeighbourFound fOnNearestNeighbourFound,
363 GpuScalar eps)
364{
365 PBAT_PROFILE_CUDA_NAMED_SCOPE("pbat.gpu.impl.geometry.Bvh.NearestNeighbours");
366
367 using TQuery = std::invoke_result_t<FGetQueryObject, GpuIndex>;
368 using Point = pbat::math::linalg::mini::SVector<GpuScalar, kDims>;
369
370 static_assert(
371 std::is_invocable_v<FGetQueryObject, GpuIndex> and not std::is_same_v<TQuery, void>,
372 "FGetQueryObject must be callable with signature TQuery f(GpuIndex)");
373 static_assert(
374 std::is_invocable_v<FMinDistanceToBox, TQuery, Point, Point> and
375 std::is_convertible_v<
376 std::invoke_result_t<FMinDistanceToBox, TQuery, Point, Point>,
377 GpuScalar>,
378 "FMinDistanceToBox must be callable with signature GpuScalar f(TQuery, Point, Point) where "
379 "Point=pbat::math::linalg::mini::SVector<GpuScalar, 3>");
380 static_assert(
381 std::is_invocable_v<FDistanceToLeaf, GpuIndex, TQuery, GpuIndex, GpuIndex> and
382 std::is_convertible_v<
383 std::invoke_result_t<FDistanceToLeaf, GpuIndex, TQuery, GpuIndex, GpuIndex>,
384 GpuScalar>,
385 "FDistanceToLeaf must be callable with signature GpuScalar f(GpuIndex, TQuery query, "
386 "GpuIndex leaf, "
387 "GpuIndex i)");
388 static_assert(
389 std::is_invocable_v<FDistanceUpperBound, GpuIndex> and
390 std::is_convertible_v<std::invoke_result_t<FDistanceUpperBound, GpuIndex>, GpuScalar>,
391 "FDistanceUpperBound must be callable with signature GpuScalar f(GpuIndex)");
392 static_assert(
393 std::is_invocable_v<FOnNearestNeighbourFound, GpuIndex, GpuIndex, GpuScalar, GpuIndex>,
394 "FOnNearestNeighbourFound must be callable with signature void f(GpuIndex query, GpuIndex "
395 "leaf, GpuScalar dmin, GpuIndex k)");
396
397 thrust::for_each(
398 thrust::device,
399 thrust::make_counting_iterator(0),
400 thrust::make_counting_iterator(nQueries),
401 [eps,
402 fGetQueryObject,
403 fMinDistanceToBox,
404 fDistanceToLeaf,
405 fDistanceUpperBound,
406 fOnNearestNeighbourFound,
407 leafBegin = aabbs.Size() - 1,
408 inds = inds.Raw(),
409 b = aabbs.b.Raw(),
410 e = aabbs.e.Raw(),
411 ib = iaabbs.b.Raw(),
412 ie = iaabbs.e.Raw(),
413 child = child.Raw()] PBAT_DEVICE(GpuIndex q) mutable {
414 using pbat::math::linalg::mini::FromBuffers;
417
418 // Depth-first branch and bound distance minimization
419 Stack dfs{};
420 Queue nn{};
421 GpuScalar dmin{fDistanceUpperBound(q)};
422 TQuery const query{fGetQueryObject(q)};
423
424 dfs.Push(0 /*root*/);
425 do
426 {
427 assert(not dfs.IsFull());
428 GpuIndex nodeIdx = dfs.Pop();
429 bool const bIsLeafNode = nodeIdx >= leafBegin;
430 auto lo = dmin - eps;
431 auto hi = dmin + eps;
432 if (not bIsLeafNode)
433 {
434 auto L = FromBuffers<3, 1>(ib, nodeIdx);
435 auto U = FromBuffers<3, 1>(ie, nodeIdx);
436 auto db = fMinDistanceToBox(query, L, U);
437 if (db <= hi)
438 {
439 dfs.Push(child[0][nodeIdx]);
440 dfs.Push(child[1][nodeIdx]);
441 }
442 }
443 else
444 {
445 nodeIdx -= leafBegin;
446 auto L = FromBuffers<3, 1>(b, nodeIdx);
447 auto U = FromBuffers<3, 1>(e, nodeIdx);
448 GpuScalar const db = fMinDistanceToBox(query, L, U);
449 if (db <= hi)
450 {
451 GpuIndex const i = inds[nodeIdx];
452 GpuScalar const d = fDistanceToLeaf(q, query, nodeIdx, i);
453 if (d < lo)
454 {
455 nn.Clear();
456 nn.Push(i);
457 dmin = d;
458 }
459 else if (d <= hi and not nn.IsFull())
460 {
461 nn.Push(i);
462 }
463 }
464 }
465 } while (not dfs.IsEmpty());
466 GpuIndex k{0};
467 while (not nn.IsEmpty())
468 {
469 GpuIndex const i = nn.Top();
470 nn.Pop();
471 fOnNearestNeighbourFound(q, i, dmin, k++);
472 }
473 });
474}
475
476template <
477 class FGetQueryObject,
478 class FMinDistanceToBox,
479 class FDistanceToLeaf,
480 class FDistanceUpperBound,
481 class FOnFound,
482 auto kStackSize>
484 Aabb<kDims> const& aabbs,
485 GpuIndex nQueries,
486 FGetQueryObject fGetQueryObject,
487 FMinDistanceToBox fMinDistanceToBox,
488 FDistanceToLeaf fDistanceToleaf,
489 FDistanceUpperBound fDistanceUpperBound,
490 FOnFound fOnFound)
491{
492 PBAT_PROFILE_CUDA_NAMED_SCOPE("pbat.gpu.impl.geometry.Bvh.RangeSearch");
493
494 using TQuery = std::invoke_result_t<FGetQueryObject, GpuIndex>;
495 using Point = pbat::math::linalg::mini::SVector<GpuScalar, kDims>;
496 static_assert(
497 std::is_invocable_v<FGetQueryObject, GpuIndex> and not std::is_same_v<TQuery, void>,
498 "FGetQueryObject must be callable with signature TQuery f(GpuIndex)");
499 static_assert(
500 std::is_invocable_v<FMinDistanceToBox, TQuery, Point, Point> and
501 std::is_convertible_v<
502 std::invoke_result_t<FMinDistanceToBox, TQuery, Point, Point>,
503 GpuScalar>,
504 "FMinDistanceToBox must be callable with signature GpuScalar f(TQuery, Point, Point) where "
505 "Point=pbat::math::linalg::mini::SVector<GpuScalar, 3>");
506 static_assert(
507 std::is_invocable_v<FDistanceToLeaf, GpuIndex, TQuery, GpuIndex, GpuIndex> and
508 std::is_convertible_v<
509 std::invoke_result_t<FDistanceToLeaf, GpuIndex, TQuery, GpuIndex, GpuIndex>,
510 GpuScalar>,
511 "FDistanceToLeaf must be callable with signature GpuScalar f(GpuIndex q, TQuery query, "
512 "GpuIndex leaf, "
513 "GpuIndex i)");
514 static_assert(
515 std::is_invocable_v<FDistanceUpperBound, GpuIndex> and
516 std::is_convertible_v<std::invoke_result_t<FDistanceUpperBound, GpuIndex>, GpuScalar>,
517 "FDistanceUpperBound must be callable with signature GpuScalar f(GpuIndex)");
518 static_assert(
519 std::is_invocable_v<FOnFound, GpuIndex, GpuIndex, GpuIndex, GpuScalar>,
520 "FOnFound must be callable with signature void f(GpuIndex q, GpuIndex "
521 "leafIdx, GpuIndex i, GpuScalar d)");
522
523 thrust::for_each(
524 thrust::device,
525 thrust::make_counting_iterator(0),
526 thrust::make_counting_iterator(nQueries),
527 [leafBegin = aabbs.Size() - 1,
528 inds = inds.Raw(),
529 b = aabbs.b.Raw(),
530 e = aabbs.e.Raw(),
531 ib = iaabbs.b.Raw(),
532 ie = iaabbs.e.Raw(),
533 child = child.Raw(),
534 fGetQueryObject = std::forward<FGetQueryObject>(fGetQueryObject),
535 fMinDistanceToBox = std::forward<FMinDistanceToBox>(fMinDistanceToBox),
536 fDistanceToLeaf = std::forward<FDistanceToLeaf>(fDistanceToleaf),
537 fDistanceUpperBound = std::forward<FDistanceUpperBound>(fDistanceUpperBound),
538 fOnFound = std::forward<FOnFound>(fOnFound)] PBAT_DEVICE(GpuIndex q) mutable {
539 using pbat::math::linalg::mini::FromBuffers;
541
542 // Depth-first branch and bound search
543 Stack dfs{};
544 GpuScalar const upper{fDistanceUpperBound(q)};
545 TQuery const query{fGetQueryObject(q)};
546
547 dfs.Push(0 /*root*/);
548 do
549 {
550 assert(not dfs.IsFull());
551 GpuIndex nodeIdx = dfs.Pop();
552 bool const bIsLeafNode = nodeIdx >= leafBegin;
553 if (not bIsLeafNode)
554 {
555 auto L = FromBuffers<3, 1>(ib, nodeIdx);
556 auto U = FromBuffers<3, 1>(ie, nodeIdx);
557 if (fMinDistanceToBox(query, L, U) <= upper)
558 {
559 dfs.Push(child[0][nodeIdx]);
560 dfs.Push(child[1][nodeIdx]);
561 }
562 }
563 else
564 {
565 nodeIdx -= leafBegin;
566 auto L = FromBuffers<3, 1>(b, nodeIdx);
567 auto U = FromBuffers<3, 1>(e, nodeIdx);
568 GpuScalar const db = fMinDistanceToBox(query, L, U);
569 if (db <= upper)
570 {
571 GpuIndex const i = inds[nodeIdx];
572 GpuScalar const d = fDistanceToLeaf(q, query, nodeIdx, i);
573 if (d <= upper)
574 {
575 fOnFound(q, nodeIdx, i, d);
576 }
577 }
578 }
579 } while (not dfs.IsEmpty());
580 });
581}
582
583} // namespace geometry
584} // namespace impl
585} // namespace gpu
586} // namespace pbat
587
588#endif // PBAT_GPU_GEOMETRY_BVHIMPL_H
This file includes all the mini linear algebra headers.
This file contains functions to compute Morton codes.
This file contains functions to answer overlap queries.
Fixed-size queue implementation usable in both host and device code.
Fixed-size queue implementation.
Definition Queue.h:26
Fixed-size stack implementation.
Definition Stack.h:26
Definition Buffer.cuh:21
void ConstructBoxes(Aabb< kDims > &aabbs)
Computes internal node bounding boxes, assuming the BVH's hierarchy is built.
Definition Bvh.cu:175
common::Buffer< GpuIndex, 2 > child
Definition Bvh.cuh:191
Aabb< kDims > iaabbs
Definition Bvh.cuh:199
common::Buffer< GpuIndex > parent
Definition Bvh.cuh:194
common::Buffer< GpuIndex, 2 > rightmost
Definition Bvh.cuh:197
void NearestNeighbours(Aabb< kDims > const &aabbs, GpuIndex nQueries, FGetQueryObject fGetQueryObject, FMinDistanceToBox fMinDistanceToBox, FDistanceToLeaf fDistanceToleaf, FDistanceUpperBound fDistanceUpperBound, FOnNearestNeighbourFound fOnNearestNeighbourFound, GpuScalar eps=GpuScalar(0))
Definition Bvh.cuh:355
void DetectOverlaps(Aabb< kDims > const &aabbs, FOnOverlapDetected &&fOnOverlapDetected)
Definition Bvh.cuh:206
common::Buffer< GpuIndex > inds
n leaf box indices
Definition Bvh.cuh:188
void SortByMortonCode(Aabb< kDims > &aabbs, Morton::Bound const &min, Morton::Bound const &max)
Definition Bvh.cu:139
Morton morton
Morton codes of leaf boxes.
Definition Bvh.cuh:189
common::Buffer< GpuIndex > visits
Definition Bvh.cuh:201
void BuildTree(GpuIndex n)
Builds the BVH's hierarchy, assuming primitives have been sorted.
Definition Bvh.cu:159
void Build(Aabb< kDims > &aabbs, Morton::Bound const &min, Morton::Bound const &max)
Build BVH from primitive aabbs.
Definition Bvh.cu:130
void RangeSearch(Aabb< kDims > const &aabbs, GpuIndex nQueries, FGetQueryObject fGetQueryObject, FMinDistanceToBox fMinDistanceToBox, FDistanceToLeaf fDistanceToleaf, FDistanceUpperBound fDistanceUpperBound, FOnFound fOnFound)
Definition Bvh.cuh:483
Bvh(GpuIndex nBoxes)
Definition Bvh.cu:118
Definition Morton.cuh:16
Definition Matrix.h:121
Fixed-size stack implementation usable in both host and device code.
Type aliases for GPU code.
Profiling utilities for host-side GPU code.
PBAT_HOST_DEVICE bool AxisAlignedBoundingBoxes(TMatrixL1 const &L1, TMatrixU1 const &U1, TMatrixL2 const &L2, TMatrixU2 const &U2)
Tests for overlap between axis-aligned bounding box (L1,U1) and axis-aligned bounding box (L2,...
Definition OverlapQueries.h:608
Geometric queries, quantities and data structures.
Definition AabbKdTreeHierarchy.h:23
std::uint32_t MortonCodeType
Type used to represent Morton codes.
Definition Morton.h:24
GPU algorithm implementations.
Definition VertexTriangleMixedCcdDcd.h:21
GPU related public functionality.
Definition Buffer.cu:16
Linear Algebra related functionality.
Definition FilterEigenvalues.h:7
The main namespace of the library.
Definition Aliases.h:15
float GpuScalar
Scalar type for GPU code.
Definition Aliases.h:19
std::int32_t GpuIndex
Index type for GPU code.
Definition Aliases.h:20
Definition Aabb.cuh:24