PhysicsBasedAnimationToolkit 0.0.10
Cross-platform C++20 library of algorithms and data structures commonly used in computer graphics research on physically-based simulation.
Loading...
Searching...
No Matches
Kernels.cuh
Go to the documentation of this file.
1
10
11#ifndef PBAT_GPU_IMPL_VBD_KERNELS_H
12#define PBAT_GPU_IMPL_VBD_KERNELS_H
13
14#include "pbat/HostDevice.h"
15#include "pbat/gpu/Aliases.h"
18#include "pbat/sim/vbd/Kernels.h"
19
20#include <array>
21#include <cstddef>
22#include <cub/block/block_reduce.cuh>
23#include <cuda/api/device.hpp>
24#include <cuda/api/launch_config_builder.hpp>
25#include <cuda/std/tuple>
26#include <limits>
27
33
34using namespace pbat::math::linalg::mini;
35
40{
44 std::array<GpuScalar*, 3> xtilde;
45 std::array<GpuScalar*, 3> xt;
46 std::array<GpuScalar*, 3> x;
47 std::array<GpuScalar*, 3> xb;
48
49 std::array<GpuIndex*, 4> T;
54 // GpuScalar const* kD; ///< |#elements| array of damping coefficients
55
59
64 static auto constexpr kMaxCollidingTrianglesPerVertex =
65 8;
68 std::array<GpuIndex*, 3> F;
71
74};
75
80template <auto kMaxContacts>
82{
92 PBAT_HOST_DEVICE
94 : f(FromFlatBuffer<kMaxContacts, 1>(fc, i)),
95 nContacts(Dot(Ones<GpuIndex, kMaxContacts>(), f >= 0)),
96 fa(Zeros<GpuIndex, kMaxContacts>()),
97 kC()
98 {
99 // Scale contact energies via mesh vertex areas and triangle areas to achieve
100 // pseudo mesh-independent contact response
101 for (auto c = 0; c < nContacts; ++c)
102 fa(c) = FA[f(c)];
103 auto sumfa = Dot(fa, Ones<GpuScalar, kMaxContacts>()); // Total triangle area
104 kC = (XVA[i] * muC) / sumfa; // Area-scaled collision penalty
105 }
106 PBAT_HOST_DEVICE GpuIndex Triangle(GpuIndex c) const { return f(c); }
107 PBAT_HOST_DEVICE GpuScalar Penalty(GpuIndex c) const { return kC * fa(c); }
108
109 SVector<GpuIndex, kMaxContacts> f;
111 SVector<GpuScalar, kMaxContacts> fa;
114};
115
122template <auto kBlockThreads>
123__global__ void VbdIteration(BackwardEulerMinimization BDF);
124
129template <auto kBlockThreads>
131{
132 public:
135 cub::BlockReduce<ElasticDerivativeStorageType, kBlockThreads>;
136 using BlockStorage = typename BlockReduce::TempStorage;
137
138 static auto constexpr kDynamicSharedMemorySize =
139 sizeof(BlockStorage);
140
145 static auto Kernel() { return &VbdIteration<kBlockThreads>; }
146};
147
148template <auto kBlockThreads>
150{
151 // Get thread info
153 using BlockReduce = typename Traits::BlockReduce;
154 using BlockStorage = typename Traits::BlockStorage;
155 extern __shared__ __align__(alignof(BlockStorage)) char shared[];
156 auto tid = threadIdx.x;
157 auto bid = blockIdx.x;
158 // Vertex index
159 GpuIndex i = BDF.partition[bid];
160 // Get vertex-tet adjacency information
161 GpuIndex GVTbegin = BDF.GVTp[i];
162 GpuIndex nAdjacentElements = BDF.GVTp[i + 1] - GVTbegin;
163 // 1. Compute vertex-element elastic energy derivatives w.r.t. i and store them in shared
164 // memory
166 auto Hi = Hgi.Slice<3, 3>(0, 0);
167 auto gi = Hgi.Col(3);
168 for (auto elocal = tid; elocal < nAdjacentElements; elocal += kBlockThreads)
169 {
170 GpuIndex e = BDF.GVTn[GVTbegin + elocal];
171 GpuIndex ilocal = BDF.GVTilocal[GVTbegin + elocal];
172 SVector<GpuIndex, 4> Te = FromBuffers<4, 1>(BDF.T, e);
173 SMatrix<GpuScalar, 4, 3> GPe = FromFlatBuffer<4, 3>(BDF.GP, e);
174 SMatrix<GpuScalar, 3, 4> xe = FromBuffers(BDF.x, Te.Transpose());
175 SVector<GpuScalar, 2> lamee = FromFlatBuffer<2, 1>(BDF.lame, e);
176 GpuScalar wg = BDF.wg[e];
177 SMatrix<GpuScalar, 3, 3> Fe = xe * GPe;
179 SVector<GpuScalar, 9> gF;
181 Psi.gradAndHessian(Fe, lamee(0), lamee(1), gF, HF);
182 using pbat::sim::vbd::kernels::AccumulateElasticGradient;
183 using pbat::sim::vbd::kernels::AccumulateElasticHessian;
184 AccumulateElasticHessian(ilocal, wg, GPe, HF, Hi);
185 AccumulateElasticGradient(ilocal, wg, GPe, gF, gi);
186 }
187
188 // 2. Compute total vertex hessian and gradient via parallel reduction
189 Hgi = BlockReduce(reinterpret_cast<BlockStorage&>(shared)).Sum(Hgi);
190 if (tid > 0)
191 return;
192
193 // Load vertex data
194 GpuScalar mi = BDF.m[i];
195 SVector<GpuScalar, 3> xti = FromBuffers<3, 1>(BDF.xt, i);
196 SVector<GpuScalar, 3> xitilde = FromBuffers<3, 1>(BDF.xtilde, i);
197 SVector<GpuScalar, 3> xi = FromBuffers<3, 1>(BDF.x, i);
198
199 // 3. Add stiffness damping
200 using pbat::sim::vbd::kernels::AddDamping;
201 AddDamping(BDF.dt, xti, xi, BDF.kD, gi, Hi);
202
203 // 3. Add contact energy
204 static auto constexpr kMaxContacts = BackwardEulerMinimization::kMaxCollidingTrianglesPerVertex;
205 kernels::ContactPenalty<kMaxContacts> cp{i, BDF.fc, BDF.XVA, BDF.FA, BDF.muC};
206 for (auto c = 0; c < cp.nContacts; ++c)
207 {
208 using pbat::sim::vbd::kernels::AccumulateVertexTriangleContact;
209 auto finds = FromBuffers<3, 1>(BDF.F, cp.Triangle(c));
210 auto xtf = FromBuffers(BDF.xt, finds.Transpose());
211 auto xf = FromBuffers(BDF.x, finds.Transpose());
212 AccumulateVertexTriangleContact(
213 xti,
214 xi,
215 xtf,
216 xf,
217 BDF.dt,
218 cp.Penalty(c),
219 BDF.muF,
220 BDF.epsv,
221 &gi,
222 &Hi);
223 }
224
225 // 4. Add inertial term
226 using pbat::sim::vbd::kernels::AddInertiaDerivatives;
227 AddInertiaDerivatives(BDF.dt2, mi, xitilde, xi, gi, Hi);
228
229 // 5. Integrate positions
230 using pbat::sim::vbd::kernels::IntegratePositions;
231 IntegratePositions(gi, Hi, xi, BDF.detHZero);
232 ToBuffers(xi, BDF.xb, i);
233}
234
244template <template <auto> class TKernelTraits, class... TArgs>
245void Invoke(GpuIndex nBlocks, GpuIndex nThreads, TArgs&&... args)
246{
247 pbat::common::ForValues<32, 64, 128, 256, 512>([&]<auto kBlockThreads>() {
248 if (nThreads > kBlockThreads / 2 and nThreads <= kBlockThreads)
249 {
250 using KernelTraitsType = TKernelTraits<kBlockThreads>;
251 auto kDynamicSharedMemorySize = static_cast<cuda::memory::shared::size_t>(
252 sizeof(KernelTraitsType::kDynamicSharedMemorySize));
253 auto kernelLaunchConfiguration =
254 cuda::launch_config_builder()
255 .block_size(kBlockThreads)
256 .dynamic_shared_memory_size(kDynamicSharedMemorySize)
257 .grid_size(nBlocks)
258 .build();
259 cuda::device::current::get().launch(
260 KernelTraitsType::Kernel(),
261 kernelLaunchConfiguration,
262 std::forward<TArgs>(args)...);
263 }
264 });
265}
266
267} // namespace pbat::gpu::impl::vbd::kernels
268
269#endif // PBAT_GPU_IMPL_VBD_KERNELS_H
This file includes all the mini linear algebra headers.
Stable Neo-Hookean smith2018snh hyperelastic energy.
Definition Matrix.h:22
Definition Matrix.h:121
Definition Matrix.h:60
Type aliases for GPU code.
constexpr void ForValues(F &&f)
Compile-time for loop over values.
Definition ConstexprFor.h:41
Device-side VBD kernels.
void Invoke(GpuIndex nBlocks, GpuIndex nThreads, TArgs &&... args)
Invokes a VBD kernel on the GPU with the specified number of blocks and threads.
Definition Kernels.cuh:245
__global__ void VbdIteration(BackwardEulerMinimization BDF)
VBD iteration kernel.
Definition Kernels.cuh:149
Mini linear algebra related functionality.
Definition Assign.h:12
float GpuScalar
Scalar type for GPU code.
Definition Aliases.h:19
std::int32_t GpuIndex
Index type for GPU code.
Definition Aliases.h:20
Device-side BFD1 minimization problem.
Definition Kernels.cuh:40
GpuIndex * GVTp
Vertex-tetrahedron adjacency list's prefix sum.
Definition Kernels.cuh:56
GpuScalar epsv
IPC smooth friction transition function's relative velocity threshold.
Definition Kernels.cuh:63
GpuScalar * lame
2x|# elements| of 1st and 2nd Lame coefficients
Definition Kernels.cuh:52
GpuScalar detHZero
Numerical zero for hessian determinant check.
Definition Kernels.cuh:53
std::array< GpuScalar *, 3 > xt
Previous vertex positions.
Definition Kernels.cuh:45
GpuScalar * XVA
|# vertices| array of vertex areas
Definition Kernels.cuh:69
GpuScalar muC
Collision penalty.
Definition Kernels.cuh:61
GpuIndex * partition
List of vertex indices that can be processed independently, i.e. in parallel.
Definition Kernels.cuh:73
GpuScalar kD
Rayleigh damping coefficient.
Definition Kernels.cuh:60
GpuScalar * wg
|# elements| array of quadrature weights
Definition Kernels.cuh:50
GpuScalar * FA
|# collision triangles| array of face areas
Definition Kernels.cuh:70
GpuScalar muF
Coefficient of friction.
Definition Kernels.cuh:62
std::array< GpuIndex *, 3 > F
3x|# collision triangles| array of triangles
Definition Kernels.cuh:68
GpuScalar * m
Lumped mass matrix.
Definition Kernels.cuh:43
std::array< GpuScalar *, 3 > x
Vertex positions.
Definition Kernels.cuh:46
std::array< GpuScalar *, 3 > xb
Vertex position write buffer.
Definition Kernels.cuh:47
GpuIndex * GVTilocal
Vertex-tetrahedron adjacency list's ilocal property.
Definition Kernels.cuh:58
GpuIndex * GVTn
Vertex-tetrahedron adjacency list's neighbour list.
Definition Kernels.cuh:57
GpuScalar dt2
Squared time step.
Definition Kernels.cuh:42
std::array< GpuScalar *, 3 > xtilde
Inertial target.
Definition Kernels.cuh:44
static auto constexpr kMaxCollidingTrianglesPerVertex
Maximum number of colliding triangles per vertex.
Definition Kernels.cuh:64
GpuScalar dt
Time step.
Definition Kernels.cuh:41
std::array< GpuIndex *, 4 > T
4x|# elements| array of tetrahedra
Definition Kernels.cuh:49
GpuScalar * GP
4x3x|# elements| array of shape function gradients
Definition Kernels.cuh:51
Penalty rescaler for mesh independent contact response.
Definition Kernels.cuh:82
SVector< GpuScalar, kMaxContacts > fa
Triangle areas.
Definition Kernels.cuh:111
SVector< GpuIndex, kMaxContacts > f
Contacting triangles.
Definition Kernels.cuh:109
GpuIndex nContacts
Number of contacts.
Definition Kernels.cuh:110
GpuScalar kC
Area-scaled collision penalty multiplier s.t. muC = kC*fa(c) for a given contact c.
Definition Kernels.cuh:113
PBAT_HOST_DEVICE ContactPenalty(GpuIndex i, GpuIndex *fc, GpuScalar *XVA, GpuScalar *FA, GpuScalar muC)
Construct a new ContactPenalty object.
Definition Kernels.cuh:93
Traits for VBD iteration kernel.
Definition Kernels.cuh:131
SMatrix< GpuScalar, 3, 4 > ElasticDerivativeStorageType
Type of data to reduce.
Definition Kernels.cuh:133
static auto Kernel()
Get the raw kernel.
Definition Kernels.cuh:145
typename BlockReduce::TempStorage BlockStorage
Storage for reduction.
Definition Kernels.cuh:136
cub::BlockReduce< ElasticDerivativeStorageType, kBlockThreads > BlockReduce
Reduction.
Definition Kernels.cuh:134
static auto constexpr kDynamicSharedMemorySize
Dynamic shared memory size.
Definition Kernels.cuh:138
Definition StableNeoHookeanEnergy.h:23