11#ifndef PBAT_GPU_IMPL_VBD_KERNELS_H
12#define PBAT_GPU_IMPL_VBD_KERNELS_H
14#include "pbat/HostDevice.h"
18#include "pbat/sim/vbd/Kernels.h"
22#include <cub/block/block_reduce.cuh>
23#include <cuda/api/device.hpp>
24#include <cuda/api/launch_config_builder.hpp>
25#include <cuda/std/tuple>
45 std::array<GpuScalar*, 3>
xt;
46 std::array<GpuScalar*, 3>
x;
47 std::array<GpuScalar*, 3>
xb;
49 std::array<GpuIndex*, 4>
T;
68 std::array<GpuIndex*, 3>
F;
80template <auto kMaxContacts>
94 :
f(FromFlatBuffer<kMaxContacts, 1>(fc, i)),
104 kC = (XVA[i] * muC) / sumfa;
109 SVector<GpuIndex, kMaxContacts>
f;
111 SVector<GpuScalar, kMaxContacts>
fa;
122template <auto kBlockThreads>
129template <auto kBlockThreads>
135 cub::BlockReduce<ElasticDerivativeStorageType, kBlockThreads>;
148template <auto kBlockThreads>
153 using BlockReduce =
typename Traits::BlockReduce;
154 using BlockStorage =
typename Traits::BlockStorage;
155 extern __shared__ __align__(
alignof(BlockStorage))
char shared[];
156 auto tid = threadIdx.x;
157 auto bid = blockIdx.x;
162 GpuIndex nAdjacentElements = BDF.
GVTp[i + 1] - GVTbegin;
166 auto Hi = Hgi.Slice<3, 3>(0, 0);
167 auto gi = Hgi.Col(3);
168 for (
auto elocal = tid; elocal < nAdjacentElements; elocal += kBlockThreads)
172 SVector<GpuIndex, 4> Te = FromBuffers<4, 1>(BDF.
T, e);
175 SVector<GpuScalar, 2> lamee = FromFlatBuffer<2, 1>(BDF.
lame, e);
179 SVector<GpuScalar, 9> gF;
181 Psi.gradAndHessian(Fe, lamee(0), lamee(1), gF, HF);
182 using pbat::sim::vbd::kernels::AccumulateElasticGradient;
183 using pbat::sim::vbd::kernels::AccumulateElasticHessian;
184 AccumulateElasticHessian(ilocal, wg, GPe, HF, Hi);
185 AccumulateElasticGradient(ilocal, wg, GPe, gF, gi);
189 Hgi = BlockReduce(
reinterpret_cast<BlockStorage&
>(shared)).Sum(Hgi);
195 SVector<GpuScalar, 3> xti = FromBuffers<3, 1>(BDF.
xt, i);
196 SVector<GpuScalar, 3> xitilde = FromBuffers<3, 1>(BDF.
xtilde, i);
197 SVector<GpuScalar, 3> xi = FromBuffers<3, 1>(BDF.
x, i);
200 using pbat::sim::vbd::kernels::AddDamping;
201 AddDamping(BDF.
dt, xti, xi, BDF.
kD, gi, Hi);
206 for (
auto c = 0; c < cp.nContacts; ++c)
208 using pbat::sim::vbd::kernels::AccumulateVertexTriangleContact;
209 auto finds = FromBuffers<3, 1>(BDF.
F, cp.Triangle(c));
210 auto xtf = FromBuffers(BDF.
xt, finds.Transpose());
211 auto xf = FromBuffers(BDF.
x, finds.Transpose());
212 AccumulateVertexTriangleContact(
226 using pbat::sim::vbd::kernels::AddInertiaDerivatives;
227 AddInertiaDerivatives(BDF.
dt2, mi, xitilde, xi, gi, Hi);
230 using pbat::sim::vbd::kernels::IntegratePositions;
231 IntegratePositions(gi, Hi, xi, BDF.
detHZero);
232 ToBuffers(xi, BDF.
xb, i);
244template <
template <auto>
class TKernelTraits,
class... TArgs>
248 if (nThreads > kBlockThreads / 2 and nThreads <= kBlockThreads)
250 using KernelTraitsType = TKernelTraits<kBlockThreads>;
251 auto kDynamicSharedMemorySize =
static_cast<cuda::memory::shared::size_t
>(
252 sizeof(KernelTraitsType::kDynamicSharedMemorySize));
253 auto kernelLaunchConfiguration =
254 cuda::launch_config_builder()
255 .block_size(kBlockThreads)
256 .dynamic_shared_memory_size(kDynamicSharedMemorySize)
259 cuda::device::current::get().launch(
260 KernelTraitsType::Kernel(),
261 kernelLaunchConfiguration,
262 std::forward<TArgs>(args)...);
This file includes all the mini linear algebra headers.
Stable Neo-Hookean smith2018snh hyperelastic energy.
Type aliases for GPU code.
constexpr void ForValues(F &&f)
Compile-time for loop over values.
Definition ConstexprFor.h:41
void Invoke(GpuIndex nBlocks, GpuIndex nThreads, TArgs &&... args)
Invokes a VBD kernel on the GPU with the specified number of blocks and threads.
Definition Kernels.cuh:245
__global__ void VbdIteration(BackwardEulerMinimization BDF)
VBD iteration kernel.
Definition Kernels.cuh:149
Mini linear algebra related functionality.
Definition Assign.h:12
float GpuScalar
Scalar type for GPU code.
Definition Aliases.h:19
std::int32_t GpuIndex
Index type for GPU code.
Definition Aliases.h:20
Device-side BFD1 minimization problem.
Definition Kernels.cuh:40
GpuIndex * GVTp
Vertex-tetrahedron adjacency list's prefix sum.
Definition Kernels.cuh:56
GpuScalar epsv
IPC smooth friction transition function's relative velocity threshold.
Definition Kernels.cuh:63
GpuScalar * lame
2x|# elements| of 1st and 2nd Lame coefficients
Definition Kernels.cuh:52
GpuScalar detHZero
Numerical zero for hessian determinant check.
Definition Kernels.cuh:53
std::array< GpuScalar *, 3 > xt
Previous vertex positions.
Definition Kernels.cuh:45
GpuScalar * XVA
|# vertices| array of vertex areas
Definition Kernels.cuh:69
GpuScalar muC
Collision penalty.
Definition Kernels.cuh:61
GpuIndex * partition
List of vertex indices that can be processed independently, i.e. in parallel.
Definition Kernels.cuh:73
GpuScalar kD
Rayleigh damping coefficient.
Definition Kernels.cuh:60
GpuScalar * wg
|# elements| array of quadrature weights
Definition Kernels.cuh:50
GpuScalar * FA
|# collision triangles| array of face areas
Definition Kernels.cuh:70
GpuScalar muF
Coefficient of friction.
Definition Kernels.cuh:62
std::array< GpuIndex *, 3 > F
3x|# collision triangles| array of triangles
Definition Kernels.cuh:68
GpuIndex * fc
Definition Kernels.cuh:66
GpuScalar * m
Lumped mass matrix.
Definition Kernels.cuh:43
std::array< GpuScalar *, 3 > x
Vertex positions.
Definition Kernels.cuh:46
std::array< GpuScalar *, 3 > xb
Vertex position write buffer.
Definition Kernels.cuh:47
GpuIndex * GVTilocal
Vertex-tetrahedron adjacency list's ilocal property.
Definition Kernels.cuh:58
GpuIndex * GVTn
Vertex-tetrahedron adjacency list's neighbour list.
Definition Kernels.cuh:57
GpuScalar dt2
Squared time step.
Definition Kernels.cuh:42
std::array< GpuScalar *, 3 > xtilde
Inertial target.
Definition Kernels.cuh:44
static auto constexpr kMaxCollidingTrianglesPerVertex
Maximum number of colliding triangles per vertex.
Definition Kernels.cuh:64
GpuScalar dt
Time step.
Definition Kernels.cuh:41
std::array< GpuIndex *, 4 > T
4x|# elements| array of tetrahedra
Definition Kernels.cuh:49
GpuScalar * GP
4x3x|# elements| array of shape function gradients
Definition Kernels.cuh:51
Traits for VBD iteration kernel.
Definition Kernels.cuh:131
SMatrix< GpuScalar, 3, 4 > ElasticDerivativeStorageType
Type of data to reduce.
Definition Kernels.cuh:133
static auto Kernel()
Get the raw kernel.
Definition Kernels.cuh:145
typename BlockReduce::TempStorage BlockStorage
Storage for reduction.
Definition Kernels.cuh:136
cub::BlockReduce< ElasticDerivativeStorageType, kBlockThreads > BlockReduce
Reduction.
Definition Kernels.cuh:134
static auto constexpr kDynamicSharedMemorySize
Dynamic shared memory size.
Definition Kernels.cuh:138
Definition StableNeoHookeanEnergy.h:23