59 PBAT_PROFILE_CUDA_NAMED_SCOPE(
"pbat.gpu.impl.geometry.SweepAndPrune.SortAndSweep");
62 auto const nBoxes =
static_cast<GpuIndex>(aabbs.Size());
63 if (inds.Size() < nBoxes)
65 thrust::sequence(thrust::device, inds.Data(), inds.Data() + nBoxes);
71 PBAT_PROFILE_CUDA_NAMED_HOST_SCOPE_START(
73 "pbat.gpu.impl.geometry.SweepAndPrune.MeanVariance");
76 std::array<GpuScalar, kDims> mu{}, sigma{};
77 for (
auto d = 0; d < kDims; ++d)
79 mu[d] = thrust::transform_reduce(
81 thrust::make_counting_iterator(0),
82 thrust::make_counting_iterator(nBoxes),
83 cuda::proclaim_return_type<GpuScalar>(
84 [b = b.Raw()[d], e = e.Raw()[d], div = 2 * nBoxes] PBAT_DEVICE(
87 thrust::plus<GpuScalar>());
89 for (
auto d = 0; d < kDims; ++d)
91 sigma[d] = thrust::transform_reduce(
93 thrust::make_counting_iterator(0),
94 thrust::make_counting_iterator(nBoxes),
95 cuda::proclaim_return_type<GpuScalar>(
96 [b = b.Raw()[d], e = e.Raw()[d], mu = mu[d], nBoxes] PBAT_DEVICE(
100 return dx * dx / nBoxes;
103 thrust::plus<GpuScalar>());
105 PBAT_PROFILE_CUDA_HOST_SCOPE_END(muSigmaCtx);
109 (sigma[0] > sigma[1]) ? (sigma[0] > sigma[2] ? 0 : 2) : (sigma[1] > sigma[2] ? 1 : 2);
110 std::array<
GpuIndex, kDims - 1> axis{};
112 PBAT_PROFILE_CUDA_NAMED_HOST_SCOPE_START(sortCtx,
"pbat.gpu.impl.geometry.SweepAndPrune.Sort");
113 auto zip = thrust::make_zip_iterator(
120 thrust::sort_by_key(thrust::device, b[saxis].begin(), b[saxis].end(), zip);
121 PBAT_PROFILE_CUDA_HOST_SCOPE_END(sortCtx);
124 PBAT_PROFILE_CUDA_NAMED_HOST_SCOPE_START(
126 "pbat.gpu.impl.geometry.SweepAndPrune.Sweep");
129 thrust::make_counting_iterator(0),
130 thrust::make_counting_iterator(nBoxes),
138 std::forward<FOnOverlapDetected>(fOnOverlapDetected)] PBAT_DEVICE(
GpuIndex i)
mutable {
139 for (
auto j = i + 1; (j < nBoxes) and (e[saxis][i] >= b[saxis][j]); ++j)
144 bool const bBoxesOverlap =
145 (e[axis[0]][i] >= b[axis[0]][j]) and (b[axis[0]][i] <= e[axis[0]][j]) and
146 (e[axis[1]][i] >= b[axis[1]][j]) and (b[axis[1]][i] <= e[axis[1]][j]);
149 fOnOverlapDetected(inds[i], inds[j]);
153 PBAT_PROFILE_CUDA_HOST_SCOPE_END(sweepCtx);