PhysicsBasedAnimationToolkit 0.0.10
Cross-platform C++20 library of algorithms and data structures commonly used in computer graphics research on physically-based simulation.
Loading...
Searching...
No Matches
SweepAndPrune.cuh
1#ifndef PBAT_GPU_IMPL_GEOMETRY_SWEEPANDPRUNE_H
2#define PBAT_GPU_IMPL_GEOMETRY_SWEEPANDPRUNE_H
3
4#include "Aabb.cuh"
5#include "pbat/gpu/Aliases.h"
6#include "pbat/gpu/impl/common/Buffer.cuh"
8
9#include <cuda/functional>
10#include <thrust/execution_policy.h>
11#include <thrust/for_each.h>
12#include <thrust/iterator/counting_iterator.h>
13#include <thrust/reduce.h>
14#include <thrust/sequence.h>
15#include <thrust/sort.h>
16
17namespace pbat {
18namespace gpu {
19namespace impl {
20namespace geometry {
21
29{
30 public:
31 static auto constexpr kDims = 3;
32
38 SweepAndPrune(std::size_t nPrimitives = 0ULL);
44 void Reserve(std::size_t nPrimitives);
49 template <class FOnOverlapDetected>
50 void SortAndSweep(Aabb<kDims>& aabbs, FOnOverlapDetected&& fOnOverlapDetected);
51
52 private:
54};
55
56template <class FOnOverlapDetected>
57inline void SweepAndPrune::SortAndSweep(Aabb<kDims>& aabbs, FOnOverlapDetected&& fOnOverlapDetected)
58{
59 PBAT_PROFILE_CUDA_NAMED_SCOPE("pbat.gpu.impl.geometry.SweepAndPrune.SortAndSweep");
60
61 // 1. Preprocess internal data
62 auto const nBoxes = static_cast<GpuIndex>(aabbs.Size());
63 if (inds.Size() < nBoxes)
64 inds.Resize(nBoxes);
65 thrust::sequence(thrust::device, inds.Data(), inds.Data() + nBoxes);
66
67 // 2. Compute mean and variance of bounding box centers
68 // NOTE:
69 // We could use the streams API here to parallelize the computation of mu and sigma along each
70 // dimension.
71 PBAT_PROFILE_CUDA_NAMED_HOST_SCOPE_START(
72 muSigmaCtx,
73 "pbat.gpu.impl.geometry.SweepAndPrune.MeanVariance");
74 auto& b = aabbs.b;
75 auto& e = aabbs.e;
76 std::array<GpuScalar, kDims> mu{}, sigma{};
77 for (auto d = 0; d < kDims; ++d)
78 {
79 mu[d] = thrust::transform_reduce(
80 thrust::device,
81 thrust::make_counting_iterator(0),
82 thrust::make_counting_iterator(nBoxes),
83 cuda::proclaim_return_type<GpuScalar>(
84 [b = b.Raw()[d], e = e.Raw()[d], div = 2 * nBoxes] PBAT_DEVICE(
85 GpuIndex i) -> GpuScalar { return (b[i] + e[i]) / div; }),
86 GpuScalar(0),
87 thrust::plus<GpuScalar>());
88 }
89 for (auto d = 0; d < kDims; ++d)
90 {
91 sigma[d] = thrust::transform_reduce(
92 thrust::device,
93 thrust::make_counting_iterator(0),
94 thrust::make_counting_iterator(nBoxes),
95 cuda::proclaim_return_type<GpuScalar>(
96 [b = b.Raw()[d], e = e.Raw()[d], mu = mu[d], nBoxes] PBAT_DEVICE(
97 GpuIndex i) -> GpuScalar {
98 GpuScalar cd = (b[i] + e[i]) / GpuScalar(2);
99 GpuScalar const dx = cd - mu;
100 return dx * dx / nBoxes;
101 }),
102 GpuScalar(0),
103 thrust::plus<GpuScalar>());
104 }
105 PBAT_PROFILE_CUDA_HOST_SCOPE_END(muSigmaCtx);
106
107 // 3. Sort bounding boxes along largest variance axis
108 GpuIndex const saxis =
109 (sigma[0] > sigma[1]) ? (sigma[0] > sigma[2] ? 0 : 2) : (sigma[1] > sigma[2] ? 1 : 2);
110 std::array<GpuIndex, kDims - 1> axis{};
111 pbat::common::ForRange<1, kDims>([&]<auto d>() { axis[d - 1] = (saxis + d) % kDims; });
112 PBAT_PROFILE_CUDA_NAMED_HOST_SCOPE_START(sortCtx, "pbat.gpu.impl.geometry.SweepAndPrune.Sort");
113 auto zip = thrust::make_zip_iterator(
114 b[axis[0]].begin(),
115 b[axis[1]].begin(),
116 e[saxis].begin(),
117 e[axis[0]].begin(),
118 e[axis[1]].begin(),
119 inds.Data());
120 thrust::sort_by_key(thrust::device, b[saxis].begin(), b[saxis].end(), zip);
121 PBAT_PROFILE_CUDA_HOST_SCOPE_END(sortCtx);
122
123 // 4. Sweep to find overlaps
124 PBAT_PROFILE_CUDA_NAMED_HOST_SCOPE_START(
125 sweepCtx,
126 "pbat.gpu.impl.geometry.SweepAndPrune.Sweep");
127 thrust::for_each(
128 thrust::device,
129 thrust::make_counting_iterator(0),
130 thrust::make_counting_iterator(nBoxes),
131 [nBoxes,
132 saxis,
133 axis,
134 b = b.Raw(),
135 e = e.Raw(),
136 inds = inds.Raw(),
137 fOnOverlapDetected =
138 std::forward<FOnOverlapDetected>(fOnOverlapDetected)] PBAT_DEVICE(GpuIndex i) mutable {
139 for (auto j = i + 1; (j < nBoxes) and (e[saxis][i] >= b[saxis][j]); ++j)
140 {
141 // NOTE:
142 // We only need to compare along non-major axis'. Thus, we're not using the box-box
143 // overlap test from OverlapQueries, which naturally compares all axis'.
144 bool const bBoxesOverlap =
145 (e[axis[0]][i] >= b[axis[0]][j]) and (b[axis[0]][i] <= e[axis[0]][j]) and
146 (e[axis[1]][i] >= b[axis[1]][j]) and (b[axis[1]][i] <= e[axis[1]][j]);
147 if (bBoxesOverlap)
148 {
149 fOnOverlapDetected(inds[i], inds[j]);
150 }
151 }
152 });
153 PBAT_PROFILE_CUDA_HOST_SCOPE_END(sweepCtx);
154}
155
156} // namespace geometry
157} // namespace impl
158} // namespace gpu
159} // namespace pbat
160
161#endif // PBAT_GPU_IMPL_GEOMETRY_SWEEPANDPRUNE_H
Definition Buffer.cuh:21
SweepAndPrune(std::size_t nPrimitives=0ULL)
Construct a new Sweep And Prune object.
Definition SweepAndPrune.cu:12
void Reserve(std::size_t nPrimitives)
Definition SweepAndPrune.cu:14
void SortAndSweep(Aabb< kDims > &aabbs, FOnOverlapDetected &&fOnOverlapDetected)
Compute overlapping boxes in aabbs.
Definition SweepAndPrune.cuh:57
Type aliases for GPU code.
Profiling utilities for host-side GPU code.
constexpr void ForRange(F &&f)
Compile-time for loop over a range of values.
Definition ConstexprFor.h:55
GPU algorithm implementations.
Definition VertexTriangleMixedCcdDcd.h:21
GPU related public functionality.
Definition Buffer.cu:16
The main namespace of the library.
Definition Aliases.h:15
float GpuScalar
Scalar type for GPU code.
Definition Aliases.h:19
std::int32_t GpuIndex
Index type for GPU code.
Definition Aliases.h:20
Definition Aabb.cuh:24