Browse Source

bullet: update code to most recent GitHub branch.

undefined
Sam Hocevar 8 years ago
parent
commit
52529cd8b9
100 changed files with 32417 additions and 176 deletions
  1. +1
    -0
      build/autotools/common.am
  2. +0
    -176
      src/bullet/Bullet-C-Api.h
  3. +40
    -0
      src/bullet/Bullet3Collision/BroadPhaseCollision/b3BroadphaseCallback.h
  4. +1295
    -0
      src/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp
  5. +1268
    -0
      src/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h
  6. +804
    -0
      src/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.cpp
  7. +208
    -0
      src/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h
  8. +72
    -0
      src/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h
  9. +638
    -0
      src/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.cpp
  10. +474
    -0
      src/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h
  11. +59
    -0
      src/bullet/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h
  12. +41
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/b3Config.h
  13. +46
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h
  14. +520
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.cpp
  15. +62
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h
  16. +323
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.cpp
  17. +105
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h
  18. +24
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h
  19. +30
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h
  20. +20
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h
  21. +126
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h
  22. +188
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h
  23. +76
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h
  24. +40
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h
  25. +523
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h
  26. +162
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactSphereSphere.h
  27. +40
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h
  28. +832
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h
  29. +206
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h
  30. +920
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h
  31. +196
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h
  32. +90
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h
  33. +97
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h
  34. +34
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h
  35. +40
    -0
      src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h
  36. +181
    -0
      src/bullet/Bullet3Common/b3AlignedAllocator.cpp
  37. +107
    -0
      src/bullet/Bullet3Common/b3AlignedAllocator.h
  38. +517
    -0
      src/bullet/Bullet3Common/b3AlignedObjectArray.h
  39. +106
    -0
      src/bullet/Bullet3Common/b3CommandLineArgs.h
  40. +138
    -0
      src/bullet/Bullet3Common/b3FileUtils.h
  41. +450
    -0
      src/bullet/Bullet3Common/b3HashMap.h
  42. +160
    -0
      src/bullet/Bullet3Common/b3Logging.cpp
  43. +77
    -0
      src/bullet/Bullet3Common/b3Logging.h
  44. +1362
    -0
      src/bullet/Bullet3Common/b3Matrix3x3.h
  45. +71
    -0
      src/bullet/Bullet3Common/b3MinMax.h
  46. +121
    -0
      src/bullet/Bullet3Common/b3PoolAllocator.h
  47. +245
    -0
      src/bullet/Bullet3Common/b3QuadWord.h
  48. +893
    -0
      src/bullet/Bullet3Common/b3Quaternion.h
  49. +50
    -0
      src/bullet/Bullet3Common/b3Random.h
  50. +661
    -0
      src/bullet/Bullet3Common/b3Scalar.h
  51. +116
    -0
      src/bullet/Bullet3Common/b3StackAlloc.h
  52. +304
    -0
      src/bullet/Bullet3Common/b3Transform.h
  53. +228
    -0
      src/bullet/Bullet3Common/b3TransformUtil.h
  54. +1631
    -0
      src/bullet/Bullet3Common/b3Vector3.cpp
  55. +1343
    -0
      src/bullet/Bullet3Common/b3Vector3.h
  56. +97
    -0
      src/bullet/Bullet3Common/shared/b3Float4.h
  57. +64
    -0
      src/bullet/Bullet3Common/shared/b3Int2.h
  58. +68
    -0
      src/bullet/Bullet3Common/shared/b3Int4.h
  59. +179
    -0
      src/bullet/Bullet3Common/shared/b3Mat3x3.h
  60. +41
    -0
      src/bullet/Bullet3Common/shared/b3PlatformDefinitions.h
  61. +103
    -0
      src/bullet/Bullet3Common/shared/b3Quat.h
  62. +159
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h
  63. +108
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.cpp
  64. +35
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.h
  65. +807
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.cpp
  66. +550
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h
  67. +155
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h
  68. +1814
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp
  69. +149
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h
  70. +209
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.cpp
  71. +159
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h
  72. +302
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h
  73. +80
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h
  74. +161
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.cpp
  75. +483
    -0
      src/bullet/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h
  76. +484
    -0
      src/bullet/Bullet3Dynamics/b3CpuRigidBodyPipeline.cpp
  77. +67
    -0
      src/bullet/Bullet3Dynamics/b3CpuRigidBodyPipeline.h
  78. +34
    -0
      src/bullet/Bullet3Dynamics/shared/b3ContactConstraint4.h
  79. +153
    -0
      src/bullet/Bullet3Dynamics/shared/b3ConvertConstraint4.h
  80. +15
    -0
      src/bullet/Bullet3Dynamics/shared/b3Inertia.h
  81. +113
    -0
      src/bullet/Bullet3Dynamics/shared/b3IntegrateTransforms.h
  82. +232
    -0
      src/bullet/Bullet3Geometry/b3AabbUtil.h
  83. +2755
    -0
      src/bullet/Bullet3Geometry/b3ConvexHullComputer.cpp
  84. +103
    -0
      src/bullet/Bullet3Geometry/b3ConvexHullComputer.h
  85. +185
    -0
      src/bullet/Bullet3Geometry/b3GeometryUtil.cpp
  86. +42
    -0
      src/bullet/Bullet3Geometry/b3GeometryUtil.h
  87. +117
    -0
      src/bullet/Bullet3Geometry/b3GrahamScan2dConvexHull.h
  88. +44
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h
  89. +384
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp
  90. +88
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h
  91. +577
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp
  92. +125
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h
  93. +80
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp
  94. +66
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h
  95. +1322
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp
  96. +151
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h
  97. +14
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h
  98. +216
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl
  99. +199
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h
  100. +767
    -0
      src/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl

+ 1
- 0
build/autotools/common.am View File

@@ -6,6 +6,7 @@
CLEANFILES =
SUFFIXES =
EXTRA_DIST =
NULL =


#


+ 0
- 176
src/bullet/Bullet-C-Api.h View File

@@ -1,176 +0,0 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

/*
Draft high-level generic physics C-API. For low-level access, use the physics SDK native API's.
Work in progress, functionality will be added on demand.

If possible, use the richer Bullet C++ API, by including "btBulletDynamicsCommon.h"
*/

#ifndef BULLET_C_API_H
#define BULLET_C_API_H

#define PL_DECLARE_HANDLE(name) typedef struct name##__ { int unused; } *name

#ifdef BT_USE_DOUBLE_PRECISION
typedef double plReal;
#else
typedef float plReal;
#endif

typedef plReal plVector3[3];
typedef plReal plQuaternion[4];

#ifdef __cplusplus
extern "C" {
#endif

/** Particular physics SDK (C-API) */
PL_DECLARE_HANDLE(plPhysicsSdkHandle);

/** Dynamics world, belonging to some physics SDK (C-API)*/
PL_DECLARE_HANDLE(plDynamicsWorldHandle);

/** Rigid Body that can be part of a Dynamics World (C-API)*/
PL_DECLARE_HANDLE(plRigidBodyHandle);

/** Collision Shape/Geometry, property of a Rigid Body (C-API)*/
PL_DECLARE_HANDLE(plCollisionShapeHandle);

/** Constraint for Rigid Bodies (C-API)*/
PL_DECLARE_HANDLE(plConstraintHandle);

/** Triangle Mesh interface (C-API)*/
PL_DECLARE_HANDLE(plMeshInterfaceHandle);

/** Broadphase Scene/Proxy Handles (C-API)*/
PL_DECLARE_HANDLE(plCollisionBroadphaseHandle);
PL_DECLARE_HANDLE(plBroadphaseProxyHandle);
PL_DECLARE_HANDLE(plCollisionWorldHandle);

/**
Create and Delete a Physics SDK
*/

extern plPhysicsSdkHandle plNewBulletSdk(void); //this could be also another sdk, like ODE, PhysX etc.
extern void plDeletePhysicsSdk(plPhysicsSdkHandle physicsSdk);

/** Collision World, not strictly necessary, you can also just create a Dynamics World with Rigid Bodies which internally manages the Collision World with Collision Objects */

typedef void(*btBroadphaseCallback)(void* clientData, void* object1,void* object2);

extern plCollisionBroadphaseHandle plCreateSapBroadphase(btBroadphaseCallback beginCallback,btBroadphaseCallback endCallback);

extern void plDestroyBroadphase(plCollisionBroadphaseHandle bp);

extern plBroadphaseProxyHandle plCreateProxy(plCollisionBroadphaseHandle bp, void* clientData, plReal minX,plReal minY,plReal minZ, plReal maxX,plReal maxY, plReal maxZ);

extern void plDestroyProxy(plCollisionBroadphaseHandle bp, plBroadphaseProxyHandle proxyHandle);

extern void plSetBoundingBox(plBroadphaseProxyHandle proxyHandle, plReal minX,plReal minY,plReal minZ, plReal maxX,plReal maxY, plReal maxZ);

/* todo: add pair cache support with queries like add/remove/find pair */
extern plCollisionWorldHandle plCreateCollisionWorld(plPhysicsSdkHandle physicsSdk);

/* todo: add/remove objects */

/* Dynamics World */

extern plDynamicsWorldHandle plCreateDynamicsWorld(plPhysicsSdkHandle physicsSdk);

extern void plDeleteDynamicsWorld(plDynamicsWorldHandle world);

extern void plStepSimulation(plDynamicsWorldHandle, plReal timeStep);

extern void plAddRigidBody(plDynamicsWorldHandle world, plRigidBodyHandle object);

extern void plRemoveRigidBody(plDynamicsWorldHandle world, plRigidBodyHandle object);


/* Rigid Body */

extern plRigidBodyHandle plCreateRigidBody( void* user_data, float mass, plCollisionShapeHandle cshape );

extern void plDeleteRigidBody(plRigidBodyHandle body);


/* Collision Shape definition */

extern plCollisionShapeHandle plNewSphereShape(plReal radius);
extern plCollisionShapeHandle plNewBoxShape(plReal x, plReal y, plReal z);
extern plCollisionShapeHandle plNewCapsuleShape(plReal radius, plReal height);
extern plCollisionShapeHandle plNewConeShape(plReal radius, plReal height);
extern plCollisionShapeHandle plNewCylinderShape(plReal radius, plReal height);
extern plCollisionShapeHandle plNewCompoundShape(void);
extern void plAddChildShape(plCollisionShapeHandle compoundShape,plCollisionShapeHandle childShape, plVector3 childPos,plQuaternion childOrn);

extern void plDeleteShape(plCollisionShapeHandle shape);

/* Convex Meshes */
extern plCollisionShapeHandle plNewConvexHullShape(void);
extern void plAddVertex(plCollisionShapeHandle convexHull, plReal x,plReal y,plReal z);
/* Concave static triangle meshes */
extern plMeshInterfaceHandle plNewMeshInterface(void);
extern void plAddTriangle(plMeshInterfaceHandle meshHandle, plVector3 v0,plVector3 v1,plVector3 v2);
extern plCollisionShapeHandle plNewStaticTriangleMeshShape(plMeshInterfaceHandle);

extern void plSetScaling(plCollisionShapeHandle shape, plVector3 scaling);

/* SOLID has Response Callback/Table/Management */
/* PhysX has Triggers, User Callbacks and filtering */
/* ODE has the typedef void dNearCallback (void *data, dGeomID o1, dGeomID o2); */

/* typedef void plUpdatedPositionCallback(void* userData, plRigidBodyHandle rbHandle, plVector3 pos); */
/* typedef void plUpdatedOrientationCallback(void* userData, plRigidBodyHandle rbHandle, plQuaternion orientation); */

/* get world transform */
extern void plGetOpenGLMatrix(plRigidBodyHandle object, plReal* matrix);
extern void plGetPosition(plRigidBodyHandle object,plVector3 position);
extern void plGetOrientation(plRigidBodyHandle object,plQuaternion orientation);

/* set world transform (position/orientation) */
extern void plSetPosition(plRigidBodyHandle object, const plVector3 position);
extern void plSetOrientation(plRigidBodyHandle object, const plQuaternion orientation);
extern void plSetEuler(plReal yaw,plReal pitch,plReal roll, plQuaternion orient);
extern void plSetOpenGLMatrix(plRigidBodyHandle object, plReal* matrix);

typedef struct plRayCastResult {
plRigidBodyHandle m_body;
plCollisionShapeHandle m_shape;
plVector3 m_positionWorld;
plVector3 m_normalWorld;
} plRayCastResult;

extern int plRayCast(plDynamicsWorldHandle world, const plVector3 rayStart, const plVector3 rayEnd, plRayCastResult res);

/* Sweep API */

/* extern plRigidBodyHandle plObjectCast(plDynamicsWorldHandle world, const plVector3 rayStart, const plVector3 rayEnd, plVector3 hitpoint, plVector3 normal); */

/* Continuous Collision Detection API */
// needed for source/blender/blenkernel/intern/collision.c
double plNearestPoints(float p1[3], float p2[3], float p3[3], float q1[3], float q2[3], float q3[3], float *pa, float *pb, float normal[3]);

#ifdef __cplusplus
}
#endif


#endif //BULLET_C_API_H


+ 40
- 0
src/bullet/Bullet3Collision/BroadPhaseCollision/b3BroadphaseCallback.h View File

@@ -0,0 +1,40 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_BROADPHASE_CALLBACK_H
#define B3_BROADPHASE_CALLBACK_H

#include "Bullet3Common/b3Vector3.h"
struct b3BroadphaseProxy;


struct b3BroadphaseAabbCallback
{
virtual ~b3BroadphaseAabbCallback() {}
virtual bool process(const b3BroadphaseProxy* proxy) = 0;
};


struct b3BroadphaseRayCallback : public b3BroadphaseAabbCallback
{
///added some cached data to accelerate ray-AABB tests
b3Vector3 m_rayDirectionInverse;
unsigned int m_signs[3];
b3Scalar m_lambda_max;

virtual ~b3BroadphaseRayCallback() {}
};

#endif //B3_BROADPHASE_CALLBACK_H

+ 1295
- 0
src/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp
File diff suppressed because it is too large
View File


+ 1268
- 0
src/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h
File diff suppressed because it is too large
View File


+ 804
- 0
src/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.cpp View File

@@ -0,0 +1,804 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

///b3DynamicBvhBroadphase implementation by Nathanael Presson

#include "b3DynamicBvhBroadphase.h"
#include "b3OverlappingPair.h"

//
// Profiling
//

#if B3_DBVT_BP_PROFILE||B3_DBVT_BP_ENABLE_BENCHMARK
#include <stdio.h>
#endif

#if B3_DBVT_BP_PROFILE
struct b3ProfileScope
{
__forceinline b3ProfileScope(b3Clock& clock,unsigned long& value) :
m_clock(&clock),m_value(&value),m_base(clock.getTimeMicroseconds())
{
}
__forceinline ~b3ProfileScope()
{
(*m_value)+=m_clock->getTimeMicroseconds()-m_base;
}
b3Clock* m_clock;
unsigned long* m_value;
unsigned long m_base;
};
#define b3SPC(_value_) b3ProfileScope spc_scope(m_clock,_value_)
#else
#define b3SPC(_value_)
#endif

//
// Helpers
//

//
template <typename T>
static inline void b3ListAppend(T* item,T*& list)
{
item->links[0]=0;
item->links[1]=list;
if(list) list->links[0]=item;
list=item;
}

//
template <typename T>
static inline void b3ListRemove(T* item,T*& list)
{
if(item->links[0]) item->links[0]->links[1]=item->links[1]; else list=item->links[1];
if(item->links[1]) item->links[1]->links[0]=item->links[0];
}

//
template <typename T>
static inline int b3ListCount(T* root)
{
int n=0;
while(root) { ++n;root=root->links[1]; }
return(n);
}

//
template <typename T>
static inline void b3Clear(T& value)
{
static const struct ZeroDummy : T {} zerodummy;
value=zerodummy;
}

//
// Colliders
//

/* Tree collider */
struct b3DbvtTreeCollider : b3DynamicBvh::ICollide
{
b3DynamicBvhBroadphase* pbp;
b3DbvtProxy* proxy;
b3DbvtTreeCollider(b3DynamicBvhBroadphase* p) : pbp(p) {}
void Process(const b3DbvtNode* na,const b3DbvtNode* nb)
{
if(na!=nb)
{
b3DbvtProxy* pa=(b3DbvtProxy*)na->data;
b3DbvtProxy* pb=(b3DbvtProxy*)nb->data;
#if B3_DBVT_BP_SORTPAIRS
if(pa->m_uniqueId>pb->m_uniqueId)
b3Swap(pa,pb);
#endif
pbp->m_paircache->addOverlappingPair(pa->getUid(),pb->getUid());
++pbp->m_newpairs;
}
}
void Process(const b3DbvtNode* n)
{
Process(n,proxy->leaf);
}
};

//
// b3DynamicBvhBroadphase
//

//
b3DynamicBvhBroadphase::b3DynamicBvhBroadphase(int proxyCapacity, b3OverlappingPairCache* paircache)
{
m_deferedcollide = false;
m_needcleanup = true;
m_releasepaircache = (paircache!=0)?false:true;
m_prediction = 0;
m_stageCurrent = 0;
m_fixedleft = 0;
m_fupdates = 1;
m_dupdates = 0;
m_cupdates = 10;
m_newpairs = 1;
m_updates_call = 0;
m_updates_done = 0;
m_updates_ratio = 0;
m_paircache = paircache? paircache : new(b3AlignedAlloc(sizeof(b3HashedOverlappingPairCache),16)) b3HashedOverlappingPairCache();
m_pid = 0;
m_cid = 0;
for(int i=0;i<=STAGECOUNT;++i)
{
m_stageRoots[i]=0;
}
#if B3_DBVT_BP_PROFILE
b3Clear(m_profiling);
#endif
m_proxies.resize(proxyCapacity);
}

//
b3DynamicBvhBroadphase::~b3DynamicBvhBroadphase()
{
if(m_releasepaircache)
{
m_paircache->~b3OverlappingPairCache();
b3AlignedFree(m_paircache);
}
}

//
b3BroadphaseProxy* b3DynamicBvhBroadphase::createProxy( const b3Vector3& aabbMin,
const b3Vector3& aabbMax,
int objectId,
void* userPtr,
short int collisionFilterGroup,
short int collisionFilterMask)
{
b3DbvtProxy* mem = &m_proxies[objectId];
b3DbvtProxy* proxy=new(mem) b3DbvtProxy( aabbMin,aabbMax,userPtr,
collisionFilterGroup,
collisionFilterMask);

b3DbvtAabbMm aabb = b3DbvtVolume::FromMM(aabbMin,aabbMax);

//bproxy->aabb = b3DbvtVolume::FromMM(aabbMin,aabbMax);
proxy->stage = m_stageCurrent;
proxy->m_uniqueId = objectId;
proxy->leaf = m_sets[0].insert(aabb,proxy);
b3ListAppend(proxy,m_stageRoots[m_stageCurrent]);
if(!m_deferedcollide)
{
b3DbvtTreeCollider collider(this);
collider.proxy=proxy;
m_sets[0].collideTV(m_sets[0].m_root,aabb,collider);
m_sets[1].collideTV(m_sets[1].m_root,aabb,collider);
}
return(proxy);
}

//
void b3DynamicBvhBroadphase::destroyProxy( b3BroadphaseProxy* absproxy,
b3Dispatcher* dispatcher)
{
b3DbvtProxy* proxy=(b3DbvtProxy*)absproxy;
if(proxy->stage==STAGECOUNT)
m_sets[1].remove(proxy->leaf);
else
m_sets[0].remove(proxy->leaf);
b3ListRemove(proxy,m_stageRoots[proxy->stage]);
m_paircache->removeOverlappingPairsContainingProxy(proxy->getUid(),dispatcher);
m_needcleanup=true;
}

void b3DynamicBvhBroadphase::getAabb(int objectId,b3Vector3& aabbMin, b3Vector3& aabbMax ) const
{
const b3DbvtProxy* proxy=&m_proxies[objectId];
aabbMin = proxy->m_aabbMin;
aabbMax = proxy->m_aabbMax;
}
/*
void b3DynamicBvhBroadphase::getAabb(b3BroadphaseProxy* absproxy,b3Vector3& aabbMin, b3Vector3& aabbMax ) const
{
b3DbvtProxy* proxy=(b3DbvtProxy*)absproxy;
aabbMin = proxy->m_aabbMin;
aabbMax = proxy->m_aabbMax;
}
*/


struct BroadphaseRayTester : b3DynamicBvh::ICollide
{
b3BroadphaseRayCallback& m_rayCallback;
BroadphaseRayTester(b3BroadphaseRayCallback& orgCallback)
:m_rayCallback(orgCallback)
{
}
void Process(const b3DbvtNode* leaf)
{
b3DbvtProxy* proxy=(b3DbvtProxy*)leaf->data;
m_rayCallback.process(proxy);
}
};

void b3DynamicBvhBroadphase::rayTest(const b3Vector3& rayFrom,const b3Vector3& rayTo, b3BroadphaseRayCallback& rayCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax)
{
BroadphaseRayTester callback(rayCallback);

m_sets[0].rayTestInternal( m_sets[0].m_root,
rayFrom,
rayTo,
rayCallback.m_rayDirectionInverse,
rayCallback.m_signs,
rayCallback.m_lambda_max,
aabbMin,
aabbMax,
callback);

m_sets[1].rayTestInternal( m_sets[1].m_root,
rayFrom,
rayTo,
rayCallback.m_rayDirectionInverse,
rayCallback.m_signs,
rayCallback.m_lambda_max,
aabbMin,
aabbMax,
callback);

}


struct BroadphaseAabbTester : b3DynamicBvh::ICollide
{
b3BroadphaseAabbCallback& m_aabbCallback;
BroadphaseAabbTester(b3BroadphaseAabbCallback& orgCallback)
:m_aabbCallback(orgCallback)
{
}
void Process(const b3DbvtNode* leaf)
{
b3DbvtProxy* proxy=(b3DbvtProxy*)leaf->data;
m_aabbCallback.process(proxy);
}
};

void b3DynamicBvhBroadphase::aabbTest(const b3Vector3& aabbMin,const b3Vector3& aabbMax,b3BroadphaseAabbCallback& aabbCallback)
{
BroadphaseAabbTester callback(aabbCallback);

const B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) bounds=b3DbvtVolume::FromMM(aabbMin,aabbMax);
//process all children, that overlap with the given AABB bounds
m_sets[0].collideTV(m_sets[0].m_root,bounds,callback);
m_sets[1].collideTV(m_sets[1].m_root,bounds,callback);

}



//
void b3DynamicBvhBroadphase::setAabb(int objectId,
const b3Vector3& aabbMin,
const b3Vector3& aabbMax,
b3Dispatcher* /*dispatcher*/)
{
b3DbvtProxy* proxy=&m_proxies[objectId];
// b3DbvtProxy* proxy=(b3DbvtProxy*)absproxy;
B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) aabb=b3DbvtVolume::FromMM(aabbMin,aabbMax);
#if B3_DBVT_BP_PREVENTFALSEUPDATE
if(b3NotEqual(aabb,proxy->leaf->volume))
#endif
{
bool docollide=false;
if(proxy->stage==STAGECOUNT)
{/* fixed -> dynamic set */
m_sets[1].remove(proxy->leaf);
proxy->leaf=m_sets[0].insert(aabb,proxy);
docollide=true;
}
else
{/* dynamic set */
++m_updates_call;
if(b3Intersect(proxy->leaf->volume,aabb))
{/* Moving */

const b3Vector3 delta=aabbMin-proxy->m_aabbMin;
b3Vector3 velocity(((proxy->m_aabbMax-proxy->m_aabbMin)/2)*m_prediction);
if(delta[0]<0) velocity[0]=-velocity[0];
if(delta[1]<0) velocity[1]=-velocity[1];
if(delta[2]<0) velocity[2]=-velocity[2];
if (
#ifdef B3_DBVT_BP_MARGIN
m_sets[0].update(proxy->leaf,aabb,velocity,B3_DBVT_BP_MARGIN)
#else
m_sets[0].update(proxy->leaf,aabb,velocity)
#endif
)
{
++m_updates_done;
docollide=true;
}
}
else
{/* Teleporting */
m_sets[0].update(proxy->leaf,aabb);
++m_updates_done;
docollide=true;
}
}
b3ListRemove(proxy,m_stageRoots[proxy->stage]);
proxy->m_aabbMin = aabbMin;
proxy->m_aabbMax = aabbMax;
proxy->stage = m_stageCurrent;
b3ListAppend(proxy,m_stageRoots[m_stageCurrent]);
if(docollide)
{
m_needcleanup=true;
if(!m_deferedcollide)
{
b3DbvtTreeCollider collider(this);
m_sets[1].collideTTpersistentStack(m_sets[1].m_root,proxy->leaf,collider);
m_sets[0].collideTTpersistentStack(m_sets[0].m_root,proxy->leaf,collider);
}
}
}
}


//
void b3DynamicBvhBroadphase::setAabbForceUpdate( b3BroadphaseProxy* absproxy,
const b3Vector3& aabbMin,
const b3Vector3& aabbMax,
b3Dispatcher* /*dispatcher*/)
{
b3DbvtProxy* proxy=(b3DbvtProxy*)absproxy;
B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) aabb=b3DbvtVolume::FromMM(aabbMin,aabbMax);
bool docollide=false;
if(proxy->stage==STAGECOUNT)
{/* fixed -> dynamic set */
m_sets[1].remove(proxy->leaf);
proxy->leaf=m_sets[0].insert(aabb,proxy);
docollide=true;
}
else
{/* dynamic set */
++m_updates_call;
/* Teleporting */
m_sets[0].update(proxy->leaf,aabb);
++m_updates_done;
docollide=true;
}
b3ListRemove(proxy,m_stageRoots[proxy->stage]);
proxy->m_aabbMin = aabbMin;
proxy->m_aabbMax = aabbMax;
proxy->stage = m_stageCurrent;
b3ListAppend(proxy,m_stageRoots[m_stageCurrent]);
if(docollide)
{
m_needcleanup=true;
if(!m_deferedcollide)
{
b3DbvtTreeCollider collider(this);
m_sets[1].collideTTpersistentStack(m_sets[1].m_root,proxy->leaf,collider);
m_sets[0].collideTTpersistentStack(m_sets[0].m_root,proxy->leaf,collider);
}
}
}

//
void b3DynamicBvhBroadphase::calculateOverlappingPairs(b3Dispatcher* dispatcher)
{
collide(dispatcher);
#if B3_DBVT_BP_PROFILE
if(0==(m_pid%B3_DBVT_BP_PROFILING_RATE))
{
printf("fixed(%u) dynamics(%u) pairs(%u)\r\n",m_sets[1].m_leaves,m_sets[0].m_leaves,m_paircache->getNumOverlappingPairs());
unsigned int total=m_profiling.m_total;
if(total<=0) total=1;
printf("ddcollide: %u%% (%uus)\r\n",(50+m_profiling.m_ddcollide*100)/total,m_profiling.m_ddcollide/B3_DBVT_BP_PROFILING_RATE);
printf("fdcollide: %u%% (%uus)\r\n",(50+m_profiling.m_fdcollide*100)/total,m_profiling.m_fdcollide/B3_DBVT_BP_PROFILING_RATE);
printf("cleanup: %u%% (%uus)\r\n",(50+m_profiling.m_cleanup*100)/total,m_profiling.m_cleanup/B3_DBVT_BP_PROFILING_RATE);
printf("total: %uus\r\n",total/B3_DBVT_BP_PROFILING_RATE);
const unsigned long sum=m_profiling.m_ddcollide+
m_profiling.m_fdcollide+
m_profiling.m_cleanup;
printf("leaked: %u%% (%uus)\r\n",100-((50+sum*100)/total),(total-sum)/B3_DBVT_BP_PROFILING_RATE);
printf("job counts: %u%%\r\n",(m_profiling.m_jobcount*100)/((m_sets[0].m_leaves+m_sets[1].m_leaves)*B3_DBVT_BP_PROFILING_RATE));
b3Clear(m_profiling);
m_clock.reset();
}
#endif

performDeferredRemoval(dispatcher);

}

void b3DynamicBvhBroadphase::performDeferredRemoval(b3Dispatcher* dispatcher)
{

if (m_paircache->hasDeferredRemoval())
{

b3BroadphasePairArray& overlappingPairArray = m_paircache->getOverlappingPairArray();

//perform a sort, to find duplicates and to sort 'invalid' pairs to the end
overlappingPairArray.quickSort(b3BroadphasePairSortPredicate());

int invalidPair = 0;

int i;

b3BroadphasePair previousPair = b3MakeBroadphasePair(-1,-1);
for (i=0;i<overlappingPairArray.size();i++)
{
b3BroadphasePair& pair = overlappingPairArray[i];

bool isDuplicate = (pair == previousPair);

previousPair = pair;

bool needsRemoval = false;

if (!isDuplicate)
{
//important to perform AABB check that is consistent with the broadphase
b3DbvtProxy* pa=&m_proxies[pair.x];
b3DbvtProxy* pb=&m_proxies[pair.y];
bool hasOverlap = b3Intersect(pa->leaf->volume,pb->leaf->volume);

if (hasOverlap)
{
needsRemoval = false;
} else
{
needsRemoval = true;
}
} else
{
//remove duplicate
needsRemoval = true;
//should have no algorithm
}
if (needsRemoval)
{
m_paircache->cleanOverlappingPair(pair,dispatcher);

pair.x = -1;
pair.y = -1;
invalidPair++;
}
}

//perform a sort, to sort 'invalid' pairs to the end
overlappingPairArray.quickSort(b3BroadphasePairSortPredicate());
overlappingPairArray.resize(overlappingPairArray.size() - invalidPair);
}
}

//
void b3DynamicBvhBroadphase::collide(b3Dispatcher* dispatcher)
{
/*printf("---------------------------------------------------------\n");
printf("m_sets[0].m_leaves=%d\n",m_sets[0].m_leaves);
printf("m_sets[1].m_leaves=%d\n",m_sets[1].m_leaves);
printf("numPairs = %d\n",getOverlappingPairCache()->getNumOverlappingPairs());
{
int i;
for (i=0;i<getOverlappingPairCache()->getNumOverlappingPairs();i++)
{
printf("pair[%d]=(%d,%d),",i,getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy0->getUid(),
getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy1->getUid());
}
printf("\n");
}
*/



b3SPC(m_profiling.m_total);
/* optimize */
m_sets[0].optimizeIncremental(1+(m_sets[0].m_leaves*m_dupdates)/100);
if(m_fixedleft)
{
const int count=1+(m_sets[1].m_leaves*m_fupdates)/100;
m_sets[1].optimizeIncremental(1+(m_sets[1].m_leaves*m_fupdates)/100);
m_fixedleft=b3Max<int>(0,m_fixedleft-count);
}
/* dynamic -> fixed set */
m_stageCurrent=(m_stageCurrent+1)%STAGECOUNT;
b3DbvtProxy* current=m_stageRoots[m_stageCurrent];
if(current)
{
b3DbvtTreeCollider collider(this);
do {
b3DbvtProxy* next=current->links[1];
b3ListRemove(current,m_stageRoots[current->stage]);
b3ListAppend(current,m_stageRoots[STAGECOUNT]);
#if B3_DBVT_BP_ACCURATESLEEPING
m_paircache->removeOverlappingPairsContainingProxy(current,dispatcher);
collider.proxy=current;
b3DynamicBvh::collideTV(m_sets[0].m_root,current->aabb,collider);
b3DynamicBvh::collideTV(m_sets[1].m_root,current->aabb,collider);
#endif
m_sets[0].remove(current->leaf);
B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) curAabb=b3DbvtVolume::FromMM(current->m_aabbMin,current->m_aabbMax);
current->leaf = m_sets[1].insert(curAabb,current);
current->stage = STAGECOUNT;
current = next;
} while(current);
m_fixedleft=m_sets[1].m_leaves;
m_needcleanup=true;
}
/* collide dynamics */
{
b3DbvtTreeCollider collider(this);
if(m_deferedcollide)
{
b3SPC(m_profiling.m_fdcollide);
m_sets[0].collideTTpersistentStack(m_sets[0].m_root,m_sets[1].m_root,collider);
}
if(m_deferedcollide)
{
b3SPC(m_profiling.m_ddcollide);
m_sets[0].collideTTpersistentStack(m_sets[0].m_root,m_sets[0].m_root,collider);
}
}
/* clean up */
if(m_needcleanup)
{
b3SPC(m_profiling.m_cleanup);
b3BroadphasePairArray& pairs=m_paircache->getOverlappingPairArray();
if(pairs.size()>0)
{

int ni=b3Min(pairs.size(),b3Max<int>(m_newpairs,(pairs.size()*m_cupdates)/100));
for(int i=0;i<ni;++i)
{
b3BroadphasePair& p=pairs[(m_cid+i)%pairs.size()];
b3DbvtProxy* pa=&m_proxies[p.x];
b3DbvtProxy* pb=&m_proxies[p.y];
if(!b3Intersect(pa->leaf->volume,pb->leaf->volume))
{
#if B3_DBVT_BP_SORTPAIRS
if(pa->m_uniqueId>pb->m_uniqueId)
b3Swap(pa,pb);
#endif
m_paircache->removeOverlappingPair(pa->getUid(),pb->getUid(),dispatcher);
--ni;--i;
}
}
if(pairs.size()>0) m_cid=(m_cid+ni)%pairs.size(); else m_cid=0;
}
}
++m_pid;
m_newpairs=1;
m_needcleanup=false;
if(m_updates_call>0)
{ m_updates_ratio=m_updates_done/(b3Scalar)m_updates_call; }
else
{ m_updates_ratio=0; }
m_updates_done/=2;
m_updates_call/=2;
}

//
void b3DynamicBvhBroadphase::optimize()
{
m_sets[0].optimizeTopDown();
m_sets[1].optimizeTopDown();
}

//
b3OverlappingPairCache* b3DynamicBvhBroadphase::getOverlappingPairCache()
{
return(m_paircache);
}

//
const b3OverlappingPairCache* b3DynamicBvhBroadphase::getOverlappingPairCache() const
{
return(m_paircache);
}

//
void b3DynamicBvhBroadphase::getBroadphaseAabb(b3Vector3& aabbMin,b3Vector3& aabbMax) const
{

B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume) bounds;

if(!m_sets[0].empty())
if(!m_sets[1].empty()) b3Merge( m_sets[0].m_root->volume,
m_sets[1].m_root->volume,bounds);
else
bounds=m_sets[0].m_root->volume;
else if(!m_sets[1].empty()) bounds=m_sets[1].m_root->volume;
else
bounds=b3DbvtVolume::FromCR(b3MakeVector3(0,0,0),0);
aabbMin=bounds.Mins();
aabbMax=bounds.Maxs();
}

void b3DynamicBvhBroadphase::resetPool(b3Dispatcher* dispatcher)
{
int totalObjects = m_sets[0].m_leaves + m_sets[1].m_leaves;
if (!totalObjects)
{
//reset internal dynamic tree data structures
m_sets[0].clear();
m_sets[1].clear();
m_deferedcollide = false;
m_needcleanup = true;
m_stageCurrent = 0;
m_fixedleft = 0;
m_fupdates = 1;
m_dupdates = 0;
m_cupdates = 10;
m_newpairs = 1;
m_updates_call = 0;
m_updates_done = 0;
m_updates_ratio = 0;
m_pid = 0;
m_cid = 0;
for(int i=0;i<=STAGECOUNT;++i)
{
m_stageRoots[i]=0;
}
}
}

//
void b3DynamicBvhBroadphase::printStats()
{}

//
#if B3_DBVT_BP_ENABLE_BENCHMARK

struct b3BroadphaseBenchmark
{
struct Experiment
{
const char* name;
int object_count;
int update_count;
int spawn_count;
int iterations;
b3Scalar speed;
b3Scalar amplitude;
};
struct Object
{
b3Vector3 center;
b3Vector3 extents;
b3BroadphaseProxy* proxy;
b3Scalar time;
void update(b3Scalar speed,b3Scalar amplitude,b3BroadphaseInterface* pbi)
{
time += speed;
center[0] = b3Cos(time*(b3Scalar)2.17)*amplitude+
b3Sin(time)*amplitude/2;
center[1] = b3Cos(time*(b3Scalar)1.38)*amplitude+
b3Sin(time)*amplitude;
center[2] = b3Sin(time*(b3Scalar)0.777)*amplitude;
pbi->setAabb(proxy,center-extents,center+extents,0);
}
};
static int UnsignedRand(int range=RAND_MAX-1) { return(rand()%(range+1)); }
static b3Scalar UnitRand() { return(UnsignedRand(16384)/(b3Scalar)16384); }
static void OutputTime(const char* name,b3Clock& c,unsigned count=0)
{
const unsigned long us=c.getTimeMicroseconds();
const unsigned long ms=(us+500)/1000;
const b3Scalar sec=us/(b3Scalar)(1000*1000);
if(count>0)
printf("%s : %u us (%u ms), %.2f/s\r\n",name,us,ms,count/sec);
else
printf("%s : %u us (%u ms)\r\n",name,us,ms);
}
};

void b3DynamicBvhBroadphase::benchmark(b3BroadphaseInterface* pbi)
{
static const b3BroadphaseBenchmark::Experiment experiments[]=
{
{"1024o.10%",1024,10,0,8192,(b3Scalar)0.005,(b3Scalar)100},
/*{"4096o.10%",4096,10,0,8192,(b3Scalar)0.005,(b3Scalar)100},
{"8192o.10%",8192,10,0,8192,(b3Scalar)0.005,(b3Scalar)100},*/
};
static const int nexperiments=sizeof(experiments)/sizeof(experiments[0]);
b3AlignedObjectArray<b3BroadphaseBenchmark::Object*> objects;
b3Clock wallclock;
/* Begin */
for(int iexp=0;iexp<nexperiments;++iexp)
{
const b3BroadphaseBenchmark::Experiment& experiment=experiments[iexp];
const int object_count=experiment.object_count;
const int update_count=(object_count*experiment.update_count)/100;
const int spawn_count=(object_count*experiment.spawn_count)/100;
const b3Scalar speed=experiment.speed;
const b3Scalar amplitude=experiment.amplitude;
printf("Experiment #%u '%s':\r\n",iexp,experiment.name);
printf("\tObjects: %u\r\n",object_count);
printf("\tUpdate: %u\r\n",update_count);
printf("\tSpawn: %u\r\n",spawn_count);
printf("\tSpeed: %f\r\n",speed);
printf("\tAmplitude: %f\r\n",amplitude);
srand(180673);
/* Create objects */
wallclock.reset();
objects.reserve(object_count);
for(int i=0;i<object_count;++i)
{
b3BroadphaseBenchmark::Object* po=new b3BroadphaseBenchmark::Object();
po->center[0]=b3BroadphaseBenchmark::UnitRand()*50;
po->center[1]=b3BroadphaseBenchmark::UnitRand()*50;
po->center[2]=b3BroadphaseBenchmark::UnitRand()*50;
po->extents[0]=b3BroadphaseBenchmark::UnitRand()*2+2;
po->extents[1]=b3BroadphaseBenchmark::UnitRand()*2+2;
po->extents[2]=b3BroadphaseBenchmark::UnitRand()*2+2;
po->time=b3BroadphaseBenchmark::UnitRand()*2000;
po->proxy=pbi->createProxy(po->center-po->extents,po->center+po->extents,0,po,1,1,0,0);
objects.push_back(po);
}
b3BroadphaseBenchmark::OutputTime("\tInitialization",wallclock);
/* First update */
wallclock.reset();
for(int i=0;i<objects.size();++i)
{
objects[i]->update(speed,amplitude,pbi);
}
b3BroadphaseBenchmark::OutputTime("\tFirst update",wallclock);
/* Updates */
wallclock.reset();
for(int i=0;i<experiment.iterations;++i)
{
for(int j=0;j<update_count;++j)
{
objects[j]->update(speed,amplitude,pbi);
}
pbi->calculateOverlappingPairs(0);
}
b3BroadphaseBenchmark::OutputTime("\tUpdate",wallclock,experiment.iterations);
/* Clean up */
wallclock.reset();
for(int i=0;i<objects.size();++i)
{
pbi->destroyProxy(objects[i]->proxy,0);
delete objects[i];
}
objects.resize(0);
b3BroadphaseBenchmark::OutputTime("\tRelease",wallclock);
}

}
#else
/*void b3DynamicBvhBroadphase::benchmark(b3BroadphaseInterface*)
{}
*/
#endif

#if B3_DBVT_BP_PROFILE
#undef b3SPC
#endif


+ 208
- 0
src/bullet/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h View File

@@ -0,0 +1,208 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

///b3DynamicBvhBroadphase implementation by Nathanael Presson
#ifndef B3_DBVT_BROADPHASE_H
#define B3_DBVT_BROADPHASE_H

#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.h"
#include "Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h"
#include "Bullet3Common/b3AlignedObjectArray.h"

#include "b3BroadphaseCallback.h"

//
// Compile time config
//

#define B3_DBVT_BP_PROFILE 0
//#define B3_DBVT_BP_SORTPAIRS 1
#define B3_DBVT_BP_PREVENTFALSEUPDATE 0
#define B3_DBVT_BP_ACCURATESLEEPING 0
#define B3_DBVT_BP_ENABLE_BENCHMARK 0
#define B3_DBVT_BP_MARGIN (b3Scalar)0.05

#if B3_DBVT_BP_PROFILE
#define B3_DBVT_BP_PROFILING_RATE 256

#endif




B3_ATTRIBUTE_ALIGNED16(struct) b3BroadphaseProxy
{

B3_DECLARE_ALIGNED_ALLOCATOR();
///optional filtering to cull potential collisions
enum CollisionFilterGroups
{
DefaultFilter = 1,
StaticFilter = 2,
KinematicFilter = 4,
DebrisFilter = 8,
SensorTrigger = 16,
CharacterFilter = 32,
AllFilter = -1 //all bits sets: DefaultFilter | StaticFilter | KinematicFilter | DebrisFilter | SensorTrigger
};

//Usually the client b3CollisionObject or Rigidbody class
void* m_clientObject;
short int m_collisionFilterGroup;
short int m_collisionFilterMask;
void* m_multiSapParentProxy;
int m_uniqueId;//m_uniqueId is introduced for paircache. could get rid of this, by calculating the address offset etc.

b3Vector3 m_aabbMin;
b3Vector3 m_aabbMax;

B3_FORCE_INLINE int getUid() const
{
return m_uniqueId;
}

//used for memory pools
b3BroadphaseProxy() :m_clientObject(0),m_multiSapParentProxy(0)
{
}

b3BroadphaseProxy(const b3Vector3& aabbMin,const b3Vector3& aabbMax,void* userPtr,short int collisionFilterGroup, short int collisionFilterMask,void* multiSapParentProxy=0)
:m_clientObject(userPtr),
m_collisionFilterGroup(collisionFilterGroup),
m_collisionFilterMask(collisionFilterMask),
m_aabbMin(aabbMin),
m_aabbMax(aabbMax)
{
m_multiSapParentProxy = multiSapParentProxy;
}
};





//
// b3DbvtProxy
//
struct b3DbvtProxy : b3BroadphaseProxy
{
/* Fields */
//b3DbvtAabbMm aabb;
b3DbvtNode* leaf;
b3DbvtProxy* links[2];
int stage;
/* ctor */

explicit b3DbvtProxy() {}
b3DbvtProxy(const b3Vector3& aabbMin,const b3Vector3& aabbMax,void* userPtr,short int collisionFilterGroup, short int collisionFilterMask) :
b3BroadphaseProxy(aabbMin,aabbMax,userPtr,collisionFilterGroup,collisionFilterMask)
{
links[0]=links[1]=0;
}
};

typedef b3AlignedObjectArray<b3DbvtProxy*> b3DbvtProxyArray;

///The b3DynamicBvhBroadphase implements a broadphase using two dynamic AABB bounding volume hierarchies/trees (see b3DynamicBvh).
///One tree is used for static/non-moving objects, and another tree is used for dynamic objects. Objects can move from one tree to the other.
///This is a very fast broadphase, especially for very dynamic worlds where many objects are moving. Its insert/add and remove of objects is generally faster than the sweep and prune broadphases b3AxisSweep3 and b332BitAxisSweep3.
struct b3DynamicBvhBroadphase
{
/* Config */
enum {
DYNAMIC_SET = 0, /* Dynamic set index */
FIXED_SET = 1, /* Fixed set index */
STAGECOUNT = 2 /* Number of stages */
};
/* Fields */
b3DynamicBvh m_sets[2]; // Dbvt sets
b3DbvtProxy* m_stageRoots[STAGECOUNT+1]; // Stages list

b3AlignedObjectArray<b3DbvtProxy> m_proxies;
b3OverlappingPairCache* m_paircache; // Pair cache
b3Scalar m_prediction; // Velocity prediction
int m_stageCurrent; // Current stage
int m_fupdates; // % of fixed updates per frame
int m_dupdates; // % of dynamic updates per frame
int m_cupdates; // % of cleanup updates per frame
int m_newpairs; // Number of pairs created
int m_fixedleft; // Fixed optimization left
unsigned m_updates_call; // Number of updates call
unsigned m_updates_done; // Number of updates done
b3Scalar m_updates_ratio; // m_updates_done/m_updates_call
int m_pid; // Parse id
int m_cid; // Cleanup index
bool m_releasepaircache; // Release pair cache on delete
bool m_deferedcollide; // Defere dynamic/static collision to collide call
bool m_needcleanup; // Need to run cleanup?
#if B3_DBVT_BP_PROFILE
b3Clock m_clock;
struct {
unsigned long m_total;
unsigned long m_ddcollide;
unsigned long m_fdcollide;
unsigned long m_cleanup;
unsigned long m_jobcount;
} m_profiling;
#endif
/* Methods */
b3DynamicBvhBroadphase(int proxyCapacity, b3OverlappingPairCache* paircache=0);
~b3DynamicBvhBroadphase();
void collide(b3Dispatcher* dispatcher);
void optimize();
/* b3BroadphaseInterface Implementation */
b3BroadphaseProxy* createProxy(const b3Vector3& aabbMin,const b3Vector3& aabbMax,int objectIndex,void* userPtr,short int collisionFilterGroup,short int collisionFilterMask);
virtual void destroyProxy(b3BroadphaseProxy* proxy,b3Dispatcher* dispatcher);
virtual void setAabb(int objectId,const b3Vector3& aabbMin,const b3Vector3& aabbMax,b3Dispatcher* dispatcher);
virtual void rayTest(const b3Vector3& rayFrom,const b3Vector3& rayTo, b3BroadphaseRayCallback& rayCallback, const b3Vector3& aabbMin=b3MakeVector3(0,0,0), const b3Vector3& aabbMax = b3MakeVector3(0,0,0));
virtual void aabbTest(const b3Vector3& aabbMin, const b3Vector3& aabbMax, b3BroadphaseAabbCallback& callback);

//virtual void getAabb(b3BroadphaseProxy* proxy,b3Vector3& aabbMin, b3Vector3& aabbMax ) const;
virtual void getAabb(int objectId,b3Vector3& aabbMin, b3Vector3& aabbMax ) const;
virtual void calculateOverlappingPairs(b3Dispatcher* dispatcher=0);
virtual b3OverlappingPairCache* getOverlappingPairCache();
virtual const b3OverlappingPairCache* getOverlappingPairCache() const;
virtual void getBroadphaseAabb(b3Vector3& aabbMin,b3Vector3& aabbMax) const;
virtual void printStats();


///reset broadphase internal structures, to ensure determinism/reproducability
virtual void resetPool(b3Dispatcher* dispatcher);

void performDeferredRemoval(b3Dispatcher* dispatcher);
void setVelocityPrediction(b3Scalar prediction)
{
m_prediction = prediction;
}
b3Scalar getVelocityPrediction() const
{
return m_prediction;
}

///this setAabbForceUpdate is similar to setAabb but always forces the aabb update.
///it is not part of the b3BroadphaseInterface but specific to b3DynamicBvhBroadphase.
///it bypasses certain optimizations that prevent aabb updates (when the aabb shrinks), see
///http://code.google.com/p/bullet/issues/detail?id=223
void setAabbForceUpdate( b3BroadphaseProxy* absproxy,const b3Vector3& aabbMin,const b3Vector3& aabbMax,b3Dispatcher* /*dispatcher*/);

//static void benchmark(b3BroadphaseInterface*);


};

#endif

+ 72
- 0
src/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h View File

@@ -0,0 +1,72 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_OVERLAPPING_PAIR_H
#define B3_OVERLAPPING_PAIR_H

#include "Bullet3Common/shared/b3Int4.h"

#define B3_NEW_PAIR_MARKER -1
#define B3_REMOVED_PAIR_MARKER -2

typedef b3Int4 b3BroadphasePair;

inline b3Int4 b3MakeBroadphasePair(int xx,int yy)
{
b3Int4 pair;

if (xx < yy)
{
pair.x = xx;
pair.y = yy;
}
else
{
pair.x = yy;
pair.y = xx;
}
pair.z = B3_NEW_PAIR_MARKER;
pair.w = B3_NEW_PAIR_MARKER;
return pair;
}

/*struct b3BroadphasePair : public b3Int4
{
explicit b3BroadphasePair(){}
};
*/

class b3BroadphasePairSortPredicate
{
public:

bool operator() ( const b3BroadphasePair& a, const b3BroadphasePair& b ) const
{
const int uidA0 = a.x;
const int uidB0 = b.x;
const int uidA1 = a.y;
const int uidB1 = b.y;
return uidA0 > uidB0 || (uidA0 == uidB0 && uidA1 > uidB1);
}
};

B3_FORCE_INLINE bool operator==(const b3BroadphasePair& a, const b3BroadphasePair& b)
{
return (a.x == b.x ) && (a.y == b.y );
}

#endif //B3_OVERLAPPING_PAIR_H


+ 638
- 0
src/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.cpp View File

@@ -0,0 +1,638 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/



#include "b3OverlappingPairCache.h"

//#include "b3Dispatcher.h"
//#include "b3CollisionAlgorithm.h"
#include "Bullet3Geometry/b3AabbUtil.h"

#include <stdio.h>

int b3g_overlappingPairs = 0;
int b3g_removePairs =0;
int b3g_addedPairs =0;
int b3g_findPairs =0;




b3HashedOverlappingPairCache::b3HashedOverlappingPairCache():
m_overlapFilterCallback(0),
m_blockedForChanges(false)
{
int initialAllocatedSize= 2;
m_overlappingPairArray.reserve(initialAllocatedSize);
growTables();
}




b3HashedOverlappingPairCache::~b3HashedOverlappingPairCache()
{
}



void b3HashedOverlappingPairCache::cleanOverlappingPair(b3BroadphasePair& pair,b3Dispatcher* dispatcher)
{
/* if (pair.m_algorithm)
{
{
pair.m_algorithm->~b3CollisionAlgorithm();
dispatcher->freeCollisionAlgorithm(pair.m_algorithm);
pair.m_algorithm=0;
}
}
*/

}




void b3HashedOverlappingPairCache::cleanProxyFromPairs(int proxy,b3Dispatcher* dispatcher)
{

class CleanPairCallback : public b3OverlapCallback
{
int m_cleanProxy;
b3OverlappingPairCache* m_pairCache;
b3Dispatcher* m_dispatcher;

public:
CleanPairCallback(int cleanProxy,b3OverlappingPairCache* pairCache,b3Dispatcher* dispatcher)
:m_cleanProxy(cleanProxy),
m_pairCache(pairCache),
m_dispatcher(dispatcher)
{
}
virtual bool processOverlap(b3BroadphasePair& pair)
{
if ((pair.x == m_cleanProxy) ||
(pair.y == m_cleanProxy))
{
m_pairCache->cleanOverlappingPair(pair,m_dispatcher);
}
return false;
}
};

CleanPairCallback cleanPairs(proxy,this,dispatcher);

processAllOverlappingPairs(&cleanPairs,dispatcher);

}




void b3HashedOverlappingPairCache::removeOverlappingPairsContainingProxy(int proxy,b3Dispatcher* dispatcher)
{

class RemovePairCallback : public b3OverlapCallback
{
int m_obsoleteProxy;

public:
RemovePairCallback(int obsoleteProxy)
:m_obsoleteProxy(obsoleteProxy)
{
}
virtual bool processOverlap(b3BroadphasePair& pair)
{
return ((pair.x == m_obsoleteProxy) ||
(pair.y == m_obsoleteProxy));
}
};


RemovePairCallback removeCallback(proxy);

processAllOverlappingPairs(&removeCallback,dispatcher);
}





b3BroadphasePair* b3HashedOverlappingPairCache::findPair(int proxy0, int proxy1)
{
b3g_findPairs++;
if(proxy0 >proxy1)
b3Swap(proxy0,proxy1);
int proxyId1 = proxy0;
int proxyId2 = proxy1;

/*if (proxyId1 > proxyId2)
b3Swap(proxyId1, proxyId2);*/

int hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1), static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1));

if (hash >= m_hashTable.size())
{
return NULL;
}

int index = m_hashTable[hash];
while (index != B3_NULL_PAIR && equalsPair(m_overlappingPairArray[index], proxyId1, proxyId2) == false)
{
index = m_next[index];
}

if (index == B3_NULL_PAIR)
{
return NULL;
}

b3Assert(index < m_overlappingPairArray.size());

return &m_overlappingPairArray[index];
}

//#include <stdio.h>

void b3HashedOverlappingPairCache::growTables()
{

int newCapacity = m_overlappingPairArray.capacity();

if (m_hashTable.size() < newCapacity)
{
//grow hashtable and next table
int curHashtableSize = m_hashTable.size();

m_hashTable.resize(newCapacity);
m_next.resize(newCapacity);


int i;

for (i= 0; i < newCapacity; ++i)
{
m_hashTable[i] = B3_NULL_PAIR;
}
for (i = 0; i < newCapacity; ++i)
{
m_next[i] = B3_NULL_PAIR;
}

for(i=0;i<curHashtableSize;i++)
{
const b3BroadphasePair& pair = m_overlappingPairArray[i];
int proxyId1 = pair.x;
int proxyId2 = pair.y;
/*if (proxyId1 > proxyId2)
b3Swap(proxyId1, proxyId2);*/
int hashValue = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1),static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1)); // New hash value with new mask
m_next[i] = m_hashTable[hashValue];
m_hashTable[hashValue] = i;
}


}
}

b3BroadphasePair* b3HashedOverlappingPairCache::internalAddPair(int proxy0, int proxy1)
{
if(proxy0>proxy1)
b3Swap(proxy0,proxy1);
int proxyId1 = proxy0;
int proxyId2 = proxy1;

/*if (proxyId1 > proxyId2)
b3Swap(proxyId1, proxyId2);*/

int hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1),static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1)); // New hash value with new mask


b3BroadphasePair* pair = internalFindPair(proxy0, proxy1, hash);
if (pair != NULL)
{
return pair;
}
/*for(int i=0;i<m_overlappingPairArray.size();++i)
{
if( (m_overlappingPairArray[i].m_pProxy0==proxy0)&&
(m_overlappingPairArray[i].m_pProxy1==proxy1))
{
printf("Adding duplicated %u<>%u\r\n",proxyId1,proxyId2);
internalFindPair(proxy0, proxy1, hash);
}
}*/
int count = m_overlappingPairArray.size();
int oldCapacity = m_overlappingPairArray.capacity();
pair = &m_overlappingPairArray.expandNonInitializing();

//this is where we add an actual pair, so also call the 'ghost'
// if (m_ghostPairCallback)
// m_ghostPairCallback->addOverlappingPair(proxy0,proxy1);

int newCapacity = m_overlappingPairArray.capacity();

if (oldCapacity < newCapacity)
{
growTables();
//hash with new capacity
hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1),static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1));
}
*pair = b3MakeBroadphasePair(proxy0,proxy1);
// pair->m_pProxy0 = proxy0;
// pair->m_pProxy1 = proxy1;
//pair->m_algorithm = 0;
//pair->m_internalTmpValue = 0;

m_next[count] = m_hashTable[hash];
m_hashTable[hash] = count;

return pair;
}



void* b3HashedOverlappingPairCache::removeOverlappingPair(int proxy0, int proxy1,b3Dispatcher* dispatcher)
{
b3g_removePairs++;
if(proxy0>proxy1)
b3Swap(proxy0,proxy1);
int proxyId1 = proxy0;
int proxyId2 = proxy1;

/*if (proxyId1 > proxyId2)
b3Swap(proxyId1, proxyId2);*/

int hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1),static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1));

b3BroadphasePair* pair = internalFindPair(proxy0, proxy1, hash);
if (pair == NULL)
{
return 0;
}

cleanOverlappingPair(*pair,dispatcher);


int pairIndex = int(pair - &m_overlappingPairArray[0]);
b3Assert(pairIndex < m_overlappingPairArray.size());

// Remove the pair from the hash table.
int index = m_hashTable[hash];
b3Assert(index != B3_NULL_PAIR);

int previous = B3_NULL_PAIR;
while (index != pairIndex)
{
previous = index;
index = m_next[index];
}

if (previous != B3_NULL_PAIR)
{
b3Assert(m_next[previous] == pairIndex);
m_next[previous] = m_next[pairIndex];
}
else
{
m_hashTable[hash] = m_next[pairIndex];
}

// We now move the last pair into spot of the
// pair being removed. We need to fix the hash
// table indices to support the move.

int lastPairIndex = m_overlappingPairArray.size() - 1;

//if (m_ghostPairCallback)
// m_ghostPairCallback->removeOverlappingPair(proxy0, proxy1,dispatcher);

// If the removed pair is the last pair, we are done.
if (lastPairIndex == pairIndex)
{
m_overlappingPairArray.pop_back();
return 0;
}

// Remove the last pair from the hash table.
const b3BroadphasePair* last = &m_overlappingPairArray[lastPairIndex];
/* missing swap here too, Nat. */
int lastHash = static_cast<int>(getHash(static_cast<unsigned int>(last->x), static_cast<unsigned int>(last->y)) & (m_overlappingPairArray.capacity()-1));

index = m_hashTable[lastHash];
b3Assert(index != B3_NULL_PAIR);

previous = B3_NULL_PAIR;
while (index != lastPairIndex)
{
previous = index;
index = m_next[index];
}

if (previous != B3_NULL_PAIR)
{
b3Assert(m_next[previous] == lastPairIndex);
m_next[previous] = m_next[lastPairIndex];
}
else
{
m_hashTable[lastHash] = m_next[lastPairIndex];
}

// Copy the last pair into the remove pair's spot.
m_overlappingPairArray[pairIndex] = m_overlappingPairArray[lastPairIndex];

// Insert the last pair into the hash table
m_next[pairIndex] = m_hashTable[lastHash];
m_hashTable[lastHash] = pairIndex;

m_overlappingPairArray.pop_back();

return 0;
}
//#include <stdio.h>

void b3HashedOverlappingPairCache::processAllOverlappingPairs(b3OverlapCallback* callback,b3Dispatcher* dispatcher)
{

int i;

// printf("m_overlappingPairArray.size()=%d\n",m_overlappingPairArray.size());
for (i=0;i<m_overlappingPairArray.size();)
{
b3BroadphasePair* pair = &m_overlappingPairArray[i];
if (callback->processOverlap(*pair))
{
removeOverlappingPair(pair->x,pair->y,dispatcher);

b3g_overlappingPairs--;
} else
{
i++;
}
}
}





void b3HashedOverlappingPairCache::sortOverlappingPairs(b3Dispatcher* dispatcher)
{
///need to keep hashmap in sync with pair address, so rebuild all
b3BroadphasePairArray tmpPairs;
int i;
for (i=0;i<m_overlappingPairArray.size();i++)
{
tmpPairs.push_back(m_overlappingPairArray[i]);
}

for (i=0;i<tmpPairs.size();i++)
{
removeOverlappingPair(tmpPairs[i].x,tmpPairs[i].y,dispatcher);
}
for (i = 0; i < m_next.size(); i++)
{
m_next[i] = B3_NULL_PAIR;
}

tmpPairs.quickSort(b3BroadphasePairSortPredicate());

for (i=0;i<tmpPairs.size();i++)
{
addOverlappingPair(tmpPairs[i].x ,tmpPairs[i].y);
}

}


void* b3SortedOverlappingPairCache::removeOverlappingPair(int proxy0,int proxy1, b3Dispatcher* dispatcher )
{
if (!hasDeferredRemoval())
{
b3BroadphasePair findPair = b3MakeBroadphasePair(proxy0,proxy1);

int findIndex = m_overlappingPairArray.findLinearSearch(findPair);
if (findIndex < m_overlappingPairArray.size())
{
b3g_overlappingPairs--;
b3BroadphasePair& pair = m_overlappingPairArray[findIndex];
cleanOverlappingPair(pair,dispatcher);
//if (m_ghostPairCallback)
// m_ghostPairCallback->removeOverlappingPair(proxy0, proxy1,dispatcher);
m_overlappingPairArray.swap(findIndex,m_overlappingPairArray.capacity()-1);
m_overlappingPairArray.pop_back();
return 0;
}
}

return 0;
}








b3BroadphasePair* b3SortedOverlappingPairCache::addOverlappingPair(int proxy0,int proxy1)
{
//don't add overlap with own
b3Assert(proxy0 != proxy1);

if (!needsBroadphaseCollision(proxy0,proxy1))
return 0;
b3BroadphasePair* pair = &m_overlappingPairArray.expandNonInitializing();
*pair = b3MakeBroadphasePair(proxy0,proxy1);
b3g_overlappingPairs++;
b3g_addedPairs++;
// if (m_ghostPairCallback)
// m_ghostPairCallback->addOverlappingPair(proxy0, proxy1);
return pair;
}

///this findPair becomes really slow. Either sort the list to speedup the query, or
///use a different solution. It is mainly used for Removing overlapping pairs. Removal could be delayed.
///we could keep a linked list in each proxy, and store pair in one of the proxies (with lowest memory address)
///Also we can use a 2D bitmap, which can be useful for a future GPU implementation
b3BroadphasePair* b3SortedOverlappingPairCache::findPair(int proxy0,int proxy1)
{
if (!needsBroadphaseCollision(proxy0,proxy1))
return 0;

b3BroadphasePair tmpPair = b3MakeBroadphasePair(proxy0,proxy1);
int findIndex = m_overlappingPairArray.findLinearSearch(tmpPair);

if (findIndex < m_overlappingPairArray.size())
{
//b3Assert(it != m_overlappingPairSet.end());
b3BroadphasePair* pair = &m_overlappingPairArray[findIndex];
return pair;
}
return 0;
}










//#include <stdio.h>

void b3SortedOverlappingPairCache::processAllOverlappingPairs(b3OverlapCallback* callback,b3Dispatcher* dispatcher)
{

int i;

for (i=0;i<m_overlappingPairArray.size();)
{
b3BroadphasePair* pair = &m_overlappingPairArray[i];
if (callback->processOverlap(*pair))
{
cleanOverlappingPair(*pair,dispatcher);
pair->x = -1;
pair->y = -1;
m_overlappingPairArray.swap(i,m_overlappingPairArray.size()-1);
m_overlappingPairArray.pop_back();
b3g_overlappingPairs--;
} else
{
i++;
}
}
}




b3SortedOverlappingPairCache::b3SortedOverlappingPairCache():
m_blockedForChanges(false),
m_hasDeferredRemoval(true),
m_overlapFilterCallback(0)

{
int initialAllocatedSize= 2;
m_overlappingPairArray.reserve(initialAllocatedSize);
}

b3SortedOverlappingPairCache::~b3SortedOverlappingPairCache()
{
}

void b3SortedOverlappingPairCache::cleanOverlappingPair(b3BroadphasePair& pair,b3Dispatcher* dispatcher)
{
/* if (pair.m_algorithm)
{
{
pair.m_algorithm->~b3CollisionAlgorithm();
dispatcher->freeCollisionAlgorithm(pair.m_algorithm);
pair.m_algorithm=0;
b3g_removePairs--;
}
}
*/
}


void b3SortedOverlappingPairCache::cleanProxyFromPairs(int proxy,b3Dispatcher* dispatcher)
{

class CleanPairCallback : public b3OverlapCallback
{
int m_cleanProxy;
b3OverlappingPairCache* m_pairCache;
b3Dispatcher* m_dispatcher;

public:
CleanPairCallback(int cleanProxy,b3OverlappingPairCache* pairCache,b3Dispatcher* dispatcher)
:m_cleanProxy(cleanProxy),
m_pairCache(pairCache),
m_dispatcher(dispatcher)
{
}
virtual bool processOverlap(b3BroadphasePair& pair)
{
if ((pair.x == m_cleanProxy) ||
(pair.y == m_cleanProxy))
{
m_pairCache->cleanOverlappingPair(pair,m_dispatcher);
}
return false;
}
};

CleanPairCallback cleanPairs(proxy,this,dispatcher);

processAllOverlappingPairs(&cleanPairs,dispatcher);

}


void b3SortedOverlappingPairCache::removeOverlappingPairsContainingProxy(int proxy,b3Dispatcher* dispatcher)
{

class RemovePairCallback : public b3OverlapCallback
{
int m_obsoleteProxy;

public:
RemovePairCallback(int obsoleteProxy)
:m_obsoleteProxy(obsoleteProxy)
{
}
virtual bool processOverlap(b3BroadphasePair& pair)
{
return ((pair.x == m_obsoleteProxy) ||
(pair.y == m_obsoleteProxy));
}
};

RemovePairCallback removeCallback(proxy);

processAllOverlappingPairs(&removeCallback,dispatcher);
}

void b3SortedOverlappingPairCache::sortOverlappingPairs(b3Dispatcher* dispatcher)
{
//should already be sorted
}


+ 474
- 0
src/bullet/Bullet3Collision/BroadPhaseCollision/b3OverlappingPairCache.h View File

@@ -0,0 +1,474 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_OVERLAPPING_PAIR_CACHE_H
#define B3_OVERLAPPING_PAIR_CACHE_H

#include "Bullet3Common/shared/b3Int2.h"
#include "Bullet3Common/b3AlignedObjectArray.h"

class b3Dispatcher;
#include "b3OverlappingPair.h"



typedef b3AlignedObjectArray<b3BroadphasePair> b3BroadphasePairArray;

struct b3OverlapCallback
{
virtual ~b3OverlapCallback()
{}
//return true for deletion of the pair
virtual bool processOverlap(b3BroadphasePair& pair) = 0;

};

struct b3OverlapFilterCallback
{
virtual ~b3OverlapFilterCallback()
{}
// return true when pairs need collision
virtual bool needBroadphaseCollision(int proxy0,int proxy1) const = 0;
};







extern int b3g_removePairs;
extern int b3g_addedPairs;
extern int b3g_findPairs;

const int B3_NULL_PAIR=0xffffffff;

///The b3OverlappingPairCache provides an interface for overlapping pair management (add, remove, storage), used by the b3BroadphaseInterface broadphases.
///The b3HashedOverlappingPairCache and b3SortedOverlappingPairCache classes are two implementations.
class b3OverlappingPairCache
{
public:
virtual ~b3OverlappingPairCache() {} // this is needed so we can get to the derived class destructor

virtual b3BroadphasePair* getOverlappingPairArrayPtr() = 0;
virtual const b3BroadphasePair* getOverlappingPairArrayPtr() const = 0;

virtual b3BroadphasePairArray& getOverlappingPairArray() = 0;

virtual void cleanOverlappingPair(b3BroadphasePair& pair,b3Dispatcher* dispatcher) = 0;

virtual int getNumOverlappingPairs() const = 0;

virtual void cleanProxyFromPairs(int proxy,b3Dispatcher* dispatcher) = 0;

virtual void setOverlapFilterCallback(b3OverlapFilterCallback* callback) = 0;

virtual void processAllOverlappingPairs(b3OverlapCallback*,b3Dispatcher* dispatcher) = 0;

virtual b3BroadphasePair* findPair(int proxy0, int proxy1) = 0;

virtual bool hasDeferredRemoval() = 0;

//virtual void setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback)=0;

virtual b3BroadphasePair* addOverlappingPair(int proxy0,int proxy1)=0;
virtual void* removeOverlappingPair(int proxy0,int proxy1,b3Dispatcher* dispatcher)=0;
virtual void removeOverlappingPairsContainingProxy(int /*proxy0*/,b3Dispatcher* /*dispatcher*/)=0;

virtual void sortOverlappingPairs(b3Dispatcher* dispatcher) = 0;


};

/// Hash-space based Pair Cache, thanks to Erin Catto, Box2D, http://www.box2d.org, and Pierre Terdiman, Codercorner, http://codercorner.com
class b3HashedOverlappingPairCache : public b3OverlappingPairCache
{
b3BroadphasePairArray m_overlappingPairArray;
b3OverlapFilterCallback* m_overlapFilterCallback;
bool m_blockedForChanges;


public:
b3HashedOverlappingPairCache();
virtual ~b3HashedOverlappingPairCache();

virtual void removeOverlappingPairsContainingProxy(int proxy,b3Dispatcher* dispatcher);

virtual void* removeOverlappingPair(int proxy0,int proxy1,b3Dispatcher* dispatcher);
B3_FORCE_INLINE bool needsBroadphaseCollision(int proxy0,int proxy1) const
{
if (m_overlapFilterCallback)
return m_overlapFilterCallback->needBroadphaseCollision(proxy0,proxy1);

bool collides = true;//(proxy0->m_collisionFilterGroup & proxy1->m_collisionFilterMask) != 0;
//collides = collides && (proxy1->m_collisionFilterGroup & proxy0->m_collisionFilterMask);
return collides;
}

// Add a pair and return the new pair. If the pair already exists,
// no new pair is created and the old one is returned.
virtual b3BroadphasePair* addOverlappingPair(int proxy0,int proxy1)
{
b3g_addedPairs++;

if (!needsBroadphaseCollision(proxy0,proxy1))
return 0;

return internalAddPair(proxy0,proxy1);
}


void cleanProxyFromPairs(int proxy,b3Dispatcher* dispatcher);

virtual void processAllOverlappingPairs(b3OverlapCallback*,b3Dispatcher* dispatcher);

virtual b3BroadphasePair* getOverlappingPairArrayPtr()
{
return &m_overlappingPairArray[0];
}

const b3BroadphasePair* getOverlappingPairArrayPtr() const
{
return &m_overlappingPairArray[0];
}

b3BroadphasePairArray& getOverlappingPairArray()
{
return m_overlappingPairArray;
}

const b3BroadphasePairArray& getOverlappingPairArray() const
{
return m_overlappingPairArray;
}

void cleanOverlappingPair(b3BroadphasePair& pair,b3Dispatcher* dispatcher);



b3BroadphasePair* findPair(int proxy0, int proxy1);

int GetCount() const { return m_overlappingPairArray.size(); }
// b3BroadphasePair* GetPairs() { return m_pairs; }

b3OverlapFilterCallback* getOverlapFilterCallback()
{
return m_overlapFilterCallback;
}

void setOverlapFilterCallback(b3OverlapFilterCallback* callback)
{
m_overlapFilterCallback = callback;
}

int getNumOverlappingPairs() const
{
return m_overlappingPairArray.size();
}
private:
b3BroadphasePair* internalAddPair(int proxy0,int proxy1);

void growTables();

B3_FORCE_INLINE bool equalsPair(const b3BroadphasePair& pair, int proxyId1, int proxyId2)
{
return pair.x == proxyId1 && pair.y == proxyId2;
}

/*
// Thomas Wang's hash, see: http://www.concentric.net/~Ttwang/tech/inthash.htm
// This assumes proxyId1 and proxyId2 are 16-bit.
B3_FORCE_INLINE int getHash(int proxyId1, int proxyId2)
{
int key = (proxyId2 << 16) | proxyId1;
key = ~key + (key << 15);
key = key ^ (key >> 12);
key = key + (key << 2);
key = key ^ (key >> 4);
key = key * 2057;
key = key ^ (key >> 16);
return key;
}
*/


B3_FORCE_INLINE unsigned int getHash(unsigned int proxyId1, unsigned int proxyId2)
{
int key = static_cast<int>(((unsigned int)proxyId1) | (((unsigned int)proxyId2) <<16));
// Thomas Wang's hash

key += ~(key << 15);
key ^= (key >> 10);
key += (key << 3);
key ^= (key >> 6);
key += ~(key << 11);
key ^= (key >> 16);
return static_cast<unsigned int>(key);
}




B3_FORCE_INLINE b3BroadphasePair* internalFindPair(int proxy0, int proxy1, int hash)
{
int proxyId1 = proxy0;
int proxyId2 = proxy1;
#if 0 // wrong, 'equalsPair' use unsorted uids, copy-past devil striked again. Nat.
if (proxyId1 > proxyId2)
b3Swap(proxyId1, proxyId2);
#endif

int index = m_hashTable[hash];
while( index != B3_NULL_PAIR && equalsPair(m_overlappingPairArray[index], proxyId1, proxyId2) == false)
{
index = m_next[index];
}

if ( index == B3_NULL_PAIR )
{
return NULL;
}

b3Assert(index < m_overlappingPairArray.size());

return &m_overlappingPairArray[index];
}

virtual bool hasDeferredRemoval()
{
return false;
}

/* virtual void setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback)
{
m_ghostPairCallback = ghostPairCallback;
}
*/

virtual void sortOverlappingPairs(b3Dispatcher* dispatcher);

protected:
b3AlignedObjectArray<int> m_hashTable;
b3AlignedObjectArray<int> m_next;
// b3OverlappingPairCallback* m_ghostPairCallback;
};




///b3SortedOverlappingPairCache maintains the objects with overlapping AABB
///Typically managed by the Broadphase, Axis3Sweep or b3SimpleBroadphase
class b3SortedOverlappingPairCache : public b3OverlappingPairCache
{
protected:
//avoid brute-force finding all the time
b3BroadphasePairArray m_overlappingPairArray;

//during the dispatch, check that user doesn't destroy/create proxy
bool m_blockedForChanges;

///by default, do the removal during the pair traversal
bool m_hasDeferredRemoval;
//if set, use the callback instead of the built in filter in needBroadphaseCollision
b3OverlapFilterCallback* m_overlapFilterCallback;

// b3OverlappingPairCallback* m_ghostPairCallback;

public:
b3SortedOverlappingPairCache();
virtual ~b3SortedOverlappingPairCache();

virtual void processAllOverlappingPairs(b3OverlapCallback*,b3Dispatcher* dispatcher);

void* removeOverlappingPair(int proxy0,int proxy1,b3Dispatcher* dispatcher);

void cleanOverlappingPair(b3BroadphasePair& pair,b3Dispatcher* dispatcher);
b3BroadphasePair* addOverlappingPair(int proxy0,int proxy1);

b3BroadphasePair* findPair(int proxy0,int proxy1);
void cleanProxyFromPairs(int proxy,b3Dispatcher* dispatcher);

virtual void removeOverlappingPairsContainingProxy(int proxy,b3Dispatcher* dispatcher);


inline bool needsBroadphaseCollision(int proxy0,int proxy1) const
{
if (m_overlapFilterCallback)
return m_overlapFilterCallback->needBroadphaseCollision(proxy0,proxy1);

bool collides = true;//(proxy0->m_collisionFilterGroup & proxy1->m_collisionFilterMask) != 0;
//collides = collides && (proxy1->m_collisionFilterGroup & proxy0->m_collisionFilterMask);
return collides;
}
b3BroadphasePairArray& getOverlappingPairArray()
{
return m_overlappingPairArray;
}

const b3BroadphasePairArray& getOverlappingPairArray() const
{
return m_overlappingPairArray;
}



b3BroadphasePair* getOverlappingPairArrayPtr()
{
return &m_overlappingPairArray[0];
}

const b3BroadphasePair* getOverlappingPairArrayPtr() const
{
return &m_overlappingPairArray[0];
}

int getNumOverlappingPairs() const
{
return m_overlappingPairArray.size();
}
b3OverlapFilterCallback* getOverlapFilterCallback()
{
return m_overlapFilterCallback;
}

void setOverlapFilterCallback(b3OverlapFilterCallback* callback)
{
m_overlapFilterCallback = callback;
}

virtual bool hasDeferredRemoval()
{
return m_hasDeferredRemoval;
}

/* virtual void setInternalGhostPairCallback(b3OverlappingPairCallback* ghostPairCallback)
{
m_ghostPairCallback = ghostPairCallback;
}
*/
virtual void sortOverlappingPairs(b3Dispatcher* dispatcher);

};



///b3NullPairCache skips add/removal of overlapping pairs. Userful for benchmarking and unit testing.
class b3NullPairCache : public b3OverlappingPairCache
{

b3BroadphasePairArray m_overlappingPairArray;

public:

virtual b3BroadphasePair* getOverlappingPairArrayPtr()
{
return &m_overlappingPairArray[0];
}
const b3BroadphasePair* getOverlappingPairArrayPtr() const
{
return &m_overlappingPairArray[0];
}
b3BroadphasePairArray& getOverlappingPairArray()
{
return m_overlappingPairArray;
}
virtual void cleanOverlappingPair(b3BroadphasePair& /*pair*/,b3Dispatcher* /*dispatcher*/)
{

}

virtual int getNumOverlappingPairs() const
{
return 0;
}

virtual void cleanProxyFromPairs(int /*proxy*/,b3Dispatcher* /*dispatcher*/)
{

}

virtual void setOverlapFilterCallback(b3OverlapFilterCallback* /*callback*/)
{
}

virtual void processAllOverlappingPairs(b3OverlapCallback*,b3Dispatcher* /*dispatcher*/)
{
}

virtual b3BroadphasePair* findPair(int /*proxy0*/, int /*proxy1*/)
{
return 0;
}

virtual bool hasDeferredRemoval()
{
return true;
}

// virtual void setInternalGhostPairCallback(b3OverlappingPairCallback* /* ghostPairCallback */)
// {
//
// }

virtual b3BroadphasePair* addOverlappingPair(int /*proxy0*/,int /*proxy1*/)
{
return 0;
}

virtual void* removeOverlappingPair(int /*proxy0*/,int /*proxy1*/,b3Dispatcher* /*dispatcher*/)
{
return 0;
}

virtual void removeOverlappingPairsContainingProxy(int /*proxy0*/,b3Dispatcher* /*dispatcher*/)
{
}
virtual void sortOverlappingPairs(b3Dispatcher* dispatcher)
{
(void) dispatcher;
}


};


#endif //B3_OVERLAPPING_PAIR_CACHE_H



+ 59
- 0
src/bullet/Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h View File

@@ -0,0 +1,59 @@

#ifndef B3_AABB_H
#define B3_AABB_H


#include "Bullet3Common/shared/b3Float4.h"
#include "Bullet3Common/shared/b3Mat3x3.h"

typedef struct b3Aabb b3Aabb_t;

struct b3Aabb
{
union
{
float m_min[4];
b3Float4 m_minVec;
int m_minIndices[4];
};
union
{
float m_max[4];
b3Float4 m_maxVec;
int m_signedMaxIndices[4];
};
};

inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,
b3Float4ConstArg pos,
b3QuatConstArg orn,
b3Float4* aabbMinOut,b3Float4* aabbMaxOut)
{
b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);
localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);
b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);
b3Mat3x3 m;
m = b3QuatGetRotationMatrix(orn);
b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);
b3Float4 center = b3TransformPoint(localCenter,pos,orn);
b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),
b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),
b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),
0.f);
*aabbMinOut = center-extent;
*aabbMaxOut = center+extent;
}

/// conservative test for overlap between two aabbs
inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,
b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)
{
bool overlap = true;
overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;
overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;
overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;
return overlap;
}

#endif //B3_AABB_H

+ 41
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/b3Config.h View File

@@ -0,0 +1,41 @@
#ifndef B3_CONFIG_H
#define B3_CONFIG_H

struct b3Config
{
int m_maxConvexBodies;
int m_maxConvexShapes;
int m_maxBroadphasePairs;
int m_maxContactCapacity;
int m_compoundPairCapacity;

int m_maxVerticesPerFace;
int m_maxFacesPerShape;
int m_maxConvexVertices;
int m_maxConvexIndices;
int m_maxConvexUniqueEdges;
int m_maxCompoundChildShapes;
int m_maxTriConvexPairCapacity;

b3Config()
:m_maxConvexBodies(32*1024),
m_maxVerticesPerFace(64),
m_maxFacesPerShape(12),
m_maxConvexVertices(8192),
m_maxConvexIndices(81920),
m_maxConvexUniqueEdges(8192),
m_maxCompoundChildShapes(8192),
m_maxTriConvexPairCapacity(256*1024)
{
m_maxConvexShapes = m_maxConvexBodies;
m_maxBroadphasePairs = 16*m_maxConvexBodies;
m_maxContactCapacity = m_maxBroadphasePairs;
m_compoundPairCapacity = 1024*1024;
}
};


#endif//B3_CONFIG_H


+ 46
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/b3Contact4.h View File

@@ -0,0 +1,46 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_CONTACT4_H
#define B3_CONTACT4_H

#include "Bullet3Common/b3Vector3.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"

B3_ATTRIBUTE_ALIGNED16(struct) b3Contact4 : public b3Contact4Data
{
B3_DECLARE_ALIGNED_ALLOCATOR();

int getBodyA()const {return abs(m_bodyAPtrAndSignBit);}
int getBodyB()const {return abs(m_bodyBPtrAndSignBit);}
bool isBodyAFixed()const { return m_bodyAPtrAndSignBit<0;}
bool isBodyBFixed()const { return m_bodyBPtrAndSignBit<0;}
// todo. make it safer
int& getBatchIdx() { return m_batchIdx; }
const int& getBatchIdx() const { return m_batchIdx; }
float getRestituitionCoeff() const { return ((float)m_restituitionCoeffCmp/(float)0xffff); }
void setRestituitionCoeff( float c ) { b3Assert( c >= 0.f && c <= 1.f ); m_restituitionCoeffCmp = (unsigned short)(c*0xffff); }
float getFrictionCoeff() const { return ((float)m_frictionCoeffCmp/(float)0xffff); }
void setFrictionCoeff( float c ) { b3Assert( c >= 0.f && c <= 1.f ); m_frictionCoeffCmp = (unsigned short)(c*0xffff); }

//float& getNPoints() { return m_worldNormal[3]; }
int getNPoints() const { return (int) m_worldNormalOnB.w; }

float getPenetration(int idx) const { return m_worldPosB[idx].w; }

bool isInvalid() const { return (getBodyA()==0 || getBodyB()==0); }
};

#endif //B3_CONTACT4_H

+ 520
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.cpp View File

@@ -0,0 +1,520 @@
/*
Copyright (c) 2012 Advanced Micro Devices, Inc.

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans


#include "b3ConvexUtility.h"
#include "Bullet3Geometry/b3ConvexHullComputer.h"
#include "Bullet3Geometry/b3GrahamScan2dConvexHull.h"
#include "Bullet3Common/b3Quaternion.h"
#include "Bullet3Common/b3HashMap.h"





b3ConvexUtility::~b3ConvexUtility()
{
}

bool b3ConvexUtility::initializePolyhedralFeatures(const b3Vector3* orgVertices, int numPoints, bool mergeCoplanarTriangles)
{

b3ConvexHullComputer conv;
conv.compute(&orgVertices[0].getX(), sizeof(b3Vector3),numPoints,0.f,0.f);

b3AlignedObjectArray<b3Vector3> faceNormals;
int numFaces = conv.faces.size();
faceNormals.resize(numFaces);
b3ConvexHullComputer* convexUtil = &conv;

b3AlignedObjectArray<b3MyFace> tmpFaces;
tmpFaces.resize(numFaces);

int numVertices = convexUtil->vertices.size();
m_vertices.resize(numVertices);
for (int p=0;p<numVertices;p++)
{
m_vertices[p] = convexUtil->vertices[p];
}


for (int i=0;i<numFaces;i++)
{
int face = convexUtil->faces[i];
//printf("face=%d\n",face);
const b3ConvexHullComputer::Edge* firstEdge = &convexUtil->edges[face];
const b3ConvexHullComputer::Edge* edge = firstEdge;

b3Vector3 edges[3];
int numEdges = 0;
//compute face normals

do
{
int src = edge->getSourceVertex();
tmpFaces[i].m_indices.push_back(src);
int targ = edge->getTargetVertex();
b3Vector3 wa = convexUtil->vertices[src];

b3Vector3 wb = convexUtil->vertices[targ];
b3Vector3 newEdge = wb-wa;
newEdge.normalize();
if (numEdges<2)
edges[numEdges++] = newEdge;

edge = edge->getNextEdgeOfFace();
} while (edge!=firstEdge);

b3Scalar planeEq = 1e30f;

if (numEdges==2)
{
faceNormals[i] = edges[0].cross(edges[1]);
faceNormals[i].normalize();
tmpFaces[i].m_plane[0] = faceNormals[i].getX();
tmpFaces[i].m_plane[1] = faceNormals[i].getY();
tmpFaces[i].m_plane[2] = faceNormals[i].getZ();
tmpFaces[i].m_plane[3] = planeEq;

}
else
{
b3Assert(0);//degenerate?
faceNormals[i].setZero();
}

for (int v=0;v<tmpFaces[i].m_indices.size();v++)
{
b3Scalar eq = m_vertices[tmpFaces[i].m_indices[v]].dot(faceNormals[i]);
if (planeEq>eq)
{
planeEq=eq;
}
}
tmpFaces[i].m_plane[3] = -planeEq;
}

//merge coplanar faces and copy them to m_polyhedron

b3Scalar faceWeldThreshold= 0.999f;
b3AlignedObjectArray<int> todoFaces;
for (int i=0;i<tmpFaces.size();i++)
todoFaces.push_back(i);

while (todoFaces.size())
{
b3AlignedObjectArray<int> coplanarFaceGroup;
int refFace = todoFaces[todoFaces.size()-1];

coplanarFaceGroup.push_back(refFace);
b3MyFace& faceA = tmpFaces[refFace];
todoFaces.pop_back();

b3Vector3 faceNormalA = b3MakeVector3(faceA.m_plane[0],faceA.m_plane[1],faceA.m_plane[2]);
for (int j=todoFaces.size()-1;j>=0;j--)
{
int i = todoFaces[j];
b3MyFace& faceB = tmpFaces[i];
b3Vector3 faceNormalB = b3MakeVector3(faceB.m_plane[0],faceB.m_plane[1],faceB.m_plane[2]);
if (faceNormalA.dot(faceNormalB)>faceWeldThreshold)
{
coplanarFaceGroup.push_back(i);
todoFaces.remove(i);
}
}


bool did_merge = false;
if (coplanarFaceGroup.size()>1)
{
//do the merge: use Graham Scan 2d convex hull

b3AlignedObjectArray<b3GrahamVector3> orgpoints;
b3Vector3 averageFaceNormal = b3MakeVector3(0,0,0);

for (int i=0;i<coplanarFaceGroup.size();i++)
{
// m_polyhedron->m_faces.push_back(tmpFaces[coplanarFaceGroup[i]]);

b3MyFace& face = tmpFaces[coplanarFaceGroup[i]];
b3Vector3 faceNormal = b3MakeVector3(face.m_plane[0],face.m_plane[1],face.m_plane[2]);
averageFaceNormal+=faceNormal;
for (int f=0;f<face.m_indices.size();f++)
{
int orgIndex = face.m_indices[f];
b3Vector3 pt = m_vertices[orgIndex];
bool found = false;

for (int i=0;i<orgpoints.size();i++)
{
//if ((orgpoints[i].m_orgIndex == orgIndex) || ((rotatedPt-orgpoints[i]).length2()<0.0001))
if (orgpoints[i].m_orgIndex == orgIndex)
{
found=true;
break;
}
}
if (!found)
orgpoints.push_back(b3GrahamVector3(pt,orgIndex));
}
}


b3MyFace combinedFace;
for (int i=0;i<4;i++)
combinedFace.m_plane[i] = tmpFaces[coplanarFaceGroup[0]].m_plane[i];

b3AlignedObjectArray<b3GrahamVector3> hull;

averageFaceNormal.normalize();
b3GrahamScanConvexHull2D(orgpoints,hull,averageFaceNormal);

for (int i=0;i<hull.size();i++)
{
combinedFace.m_indices.push_back(hull[i].m_orgIndex);
for(int k = 0; k < orgpoints.size(); k++)
{
if(orgpoints[k].m_orgIndex == hull[i].m_orgIndex)
{
orgpoints[k].m_orgIndex = -1; // invalidate...
break;
}
}
}

// are there rejected vertices?
bool reject_merge = false;


for(int i = 0; i < orgpoints.size(); i++) {
if(orgpoints[i].m_orgIndex == -1)
continue; // this is in the hull...
// this vertex is rejected -- is anybody else using this vertex?
for(int j = 0; j < tmpFaces.size(); j++) {
b3MyFace& face = tmpFaces[j];
// is this a face of the current coplanar group?
bool is_in_current_group = false;
for(int k = 0; k < coplanarFaceGroup.size(); k++) {
if(coplanarFaceGroup[k] == j) {
is_in_current_group = true;
break;
}
}
if(is_in_current_group) // ignore this face...
continue;
// does this face use this rejected vertex?
for(int v = 0; v < face.m_indices.size(); v++) {
if(face.m_indices[v] == orgpoints[i].m_orgIndex) {
// this rejected vertex is used in another face -- reject merge
reject_merge = true;
break;
}
}
if(reject_merge)
break;
}
if(reject_merge)
break;
}

if (!reject_merge)
{
// do this merge!
did_merge = true;
m_faces.push_back(combinedFace);
}
}
if(!did_merge)
{
for (int i=0;i<coplanarFaceGroup.size();i++)
{
b3MyFace face = tmpFaces[coplanarFaceGroup[i]];
m_faces.push_back(face);
}

}



}

initialize();

return true;
}






inline bool IsAlmostZero(const b3Vector3& v)
{
if(fabsf(v.getX())>1e-6 || fabsf(v.getY())>1e-6 || fabsf(v.getZ())>1e-6) return false;
return true;
}

struct b3InternalVertexPair
{
b3InternalVertexPair(short int v0,short int v1)
:m_v0(v0),
m_v1(v1)
{
if (m_v1>m_v0)
b3Swap(m_v0,m_v1);
}
short int m_v0;
short int m_v1;
int getHash() const
{
return m_v0+(m_v1<<16);
}
bool equals(const b3InternalVertexPair& other) const
{
return m_v0==other.m_v0 && m_v1==other.m_v1;
}
};

struct b3InternalEdge
{
b3InternalEdge()
:m_face0(-1),
m_face1(-1)
{
}
short int m_face0;
short int m_face1;
};

//

#ifdef TEST_INTERNAL_OBJECTS
bool b3ConvexUtility::testContainment() const
{
for(int p=0;p<8;p++)
{
b3Vector3 LocalPt;
if(p==0) LocalPt = m_localCenter + b3Vector3(m_extents[0], m_extents[1], m_extents[2]);
else if(p==1) LocalPt = m_localCenter + b3Vector3(m_extents[0], m_extents[1], -m_extents[2]);
else if(p==2) LocalPt = m_localCenter + b3Vector3(m_extents[0], -m_extents[1], m_extents[2]);
else if(p==3) LocalPt = m_localCenter + b3Vector3(m_extents[0], -m_extents[1], -m_extents[2]);
else if(p==4) LocalPt = m_localCenter + b3Vector3(-m_extents[0], m_extents[1], m_extents[2]);
else if(p==5) LocalPt = m_localCenter + b3Vector3(-m_extents[0], m_extents[1], -m_extents[2]);
else if(p==6) LocalPt = m_localCenter + b3Vector3(-m_extents[0], -m_extents[1], m_extents[2]);
else if(p==7) LocalPt = m_localCenter + b3Vector3(-m_extents[0], -m_extents[1], -m_extents[2]);

for(int i=0;i<m_faces.size();i++)
{
const b3Vector3 Normal(m_faces[i].m_plane[0], m_faces[i].m_plane[1], m_faces[i].m_plane[2]);
const b3Scalar d = LocalPt.dot(Normal) + m_faces[i].m_plane[3];
if(d>0.0f)
return false;
}
}
return true;
}
#endif

void b3ConvexUtility::initialize()
{

b3HashMap<b3InternalVertexPair,b3InternalEdge> edges;

b3Scalar TotalArea = 0.0f;
m_localCenter.setValue(0, 0, 0);
for(int i=0;i<m_faces.size();i++)
{
int numVertices = m_faces[i].m_indices.size();
int NbTris = numVertices;
for(int j=0;j<NbTris;j++)
{
int k = (j+1)%numVertices;
b3InternalVertexPair vp(m_faces[i].m_indices[j],m_faces[i].m_indices[k]);
b3InternalEdge* edptr = edges.find(vp);
b3Vector3 edge = m_vertices[vp.m_v1]-m_vertices[vp.m_v0];
edge.normalize();

bool found = false;
b3Vector3 diff,diff2;

for (int p=0;p<m_uniqueEdges.size();p++)
{
diff = m_uniqueEdges[p]-edge;
diff2 = m_uniqueEdges[p]+edge;

// if ((diff.length2()==0.f) ||
// (diff2.length2()==0.f))

if (IsAlmostZero(diff) ||
IsAlmostZero(diff2))
{
found = true;
break;
}
}

if (!found)
{
m_uniqueEdges.push_back(edge);
}

if (edptr)
{
//TBD: figure out why I added this assert
// b3Assert(edptr->m_face0>=0);
// b3Assert(edptr->m_face1<0);
edptr->m_face1 = i;
} else
{
b3InternalEdge ed;
ed.m_face0 = i;
edges.insert(vp,ed);
}
}
}

#ifdef USE_CONNECTED_FACES
for(int i=0;i<m_faces.size();i++)
{
int numVertices = m_faces[i].m_indices.size();
m_faces[i].m_connectedFaces.resize(numVertices);

for(int j=0;j<numVertices;j++)
{
int k = (j+1)%numVertices;
b3InternalVertexPair vp(m_faces[i].m_indices[j],m_faces[i].m_indices[k]);
b3InternalEdge* edptr = edges.find(vp);
b3Assert(edptr);
b3Assert(edptr->m_face0>=0);
b3Assert(edptr->m_face1>=0);

int connectedFace = (edptr->m_face0==i)?edptr->m_face1:edptr->m_face0;
m_faces[i].m_connectedFaces[j] = connectedFace;
}
}
#endif//USE_CONNECTED_FACES

for(int i=0;i<m_faces.size();i++)
{
int numVertices = m_faces[i].m_indices.size();
int NbTris = numVertices-2;
const b3Vector3& p0 = m_vertices[m_faces[i].m_indices[0]];
for(int j=1;j<=NbTris;j++)
{
int k = (j+1)%numVertices;
const b3Vector3& p1 = m_vertices[m_faces[i].m_indices[j]];
const b3Vector3& p2 = m_vertices[m_faces[i].m_indices[k]];
b3Scalar Area = ((p0 - p1).cross(p0 - p2)).length() * 0.5f;
b3Vector3 Center = (p0+p1+p2)/3.0f;
m_localCenter += Area * Center;
TotalArea += Area;
}
}
m_localCenter /= TotalArea;




#ifdef TEST_INTERNAL_OBJECTS
if(1)
{
m_radius = FLT_MAX;
for(int i=0;i<m_faces.size();i++)
{
const b3Vector3 Normal(m_faces[i].m_plane[0], m_faces[i].m_plane[1], m_faces[i].m_plane[2]);
const b3Scalar dist = b3Fabs(m_localCenter.dot(Normal) + m_faces[i].m_plane[3]);
if(dist<m_radius)
m_radius = dist;
}

b3Scalar MinX = FLT_MAX;
b3Scalar MinY = FLT_MAX;
b3Scalar MinZ = FLT_MAX;
b3Scalar MaxX = -FLT_MAX;
b3Scalar MaxY = -FLT_MAX;
b3Scalar MaxZ = -FLT_MAX;
for(int i=0; i<m_vertices.size(); i++)
{
const b3Vector3& pt = m_vertices[i];
if(pt.getX()<MinX) MinX = pt.getX();
if(pt.getX()>MaxX) MaxX = pt.getX();
if(pt.getY()<MinY) MinY = pt.getY();
if(pt.getY()>MaxY) MaxY = pt.getY();
if(pt.getZ()<MinZ) MinZ = pt.getZ();
if(pt.getZ()>MaxZ) MaxZ = pt.getZ();
}
mC.setValue(MaxX+MinX, MaxY+MinY, MaxZ+MinZ);
mE.setValue(MaxX-MinX, MaxY-MinY, MaxZ-MinZ);



// const b3Scalar r = m_radius / sqrtf(2.0f);
const b3Scalar r = m_radius / sqrtf(3.0f);
const int LargestExtent = mE.maxAxis();
const b3Scalar Step = (mE[LargestExtent]*0.5f - r)/1024.0f;
m_extents[0] = m_extents[1] = m_extents[2] = r;
m_extents[LargestExtent] = mE[LargestExtent]*0.5f;
bool FoundBox = false;
for(int j=0;j<1024;j++)
{
if(testContainment())
{
FoundBox = true;
break;
}

m_extents[LargestExtent] -= Step;
}
if(!FoundBox)
{
m_extents[0] = m_extents[1] = m_extents[2] = r;
}
else
{
// Refine the box
const b3Scalar Step = (m_radius - r)/1024.0f;
const int e0 = (1<<LargestExtent) & 3;
const int e1 = (1<<e0) & 3;

for(int j=0;j<1024;j++)
{
const b3Scalar Saved0 = m_extents[e0];
const b3Scalar Saved1 = m_extents[e1];
m_extents[e0] += Step;
m_extents[e1] += Step;

if(!testContainment())
{
m_extents[e0] = Saved0;
m_extents[e1] = Saved1;
break;
}
}
}
}
#endif
}

+ 62
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h View File

@@ -0,0 +1,62 @@

/*
Copyright (c) 2012 Advanced Micro Devices, Inc.

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans

#ifndef _BT_CONVEX_UTILITY_H
#define _BT_CONVEX_UTILITY_H

#include "Bullet3Common/b3AlignedObjectArray.h"
#include "Bullet3Common/b3Transform.h"




struct b3MyFace
{
b3AlignedObjectArray<int> m_indices;
b3Scalar m_plane[4];
};

B3_ATTRIBUTE_ALIGNED16(class) b3ConvexUtility
{
public:
B3_DECLARE_ALIGNED_ALLOCATOR();

b3Vector3 m_localCenter;
b3Vector3 m_extents;
b3Vector3 mC;
b3Vector3 mE;
b3Scalar m_radius;
b3AlignedObjectArray<b3Vector3> m_vertices;
b3AlignedObjectArray<b3MyFace> m_faces;
b3AlignedObjectArray<b3Vector3> m_uniqueEdges;

b3ConvexUtility()
{
}
virtual ~b3ConvexUtility();

bool initializePolyhedralFeatures(const b3Vector3* orgVertices, int numVertices, bool mergeCoplanarTriangles=true);
void initialize();
bool testContainment() const;



};
#endif

+ 323
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.cpp View File

@@ -0,0 +1,323 @@
#include "b3CpuNarrowPhase.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h"

#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h"


struct b3CpuNarrowPhaseInternalData
{
b3AlignedObjectArray<b3Aabb> m_localShapeAABBCPU;
b3AlignedObjectArray<b3Collidable> m_collidablesCPU;
b3AlignedObjectArray<b3ConvexUtility*> m_convexData;
b3Config m_config;


b3AlignedObjectArray<b3ConvexPolyhedronData> m_convexPolyhedra;
b3AlignedObjectArray<b3Vector3> m_uniqueEdges;
b3AlignedObjectArray<b3Vector3> m_convexVertices;
b3AlignedObjectArray<int> m_convexIndices;
b3AlignedObjectArray<b3GpuFace> m_convexFaces;

b3AlignedObjectArray<b3Contact4Data> m_contacts;

int m_numAcceleratedShapes;
};


const b3AlignedObjectArray<b3Contact4Data>& b3CpuNarrowPhase::getContacts() const
{
return m_data->m_contacts;
}

b3Collidable& b3CpuNarrowPhase::getCollidableCpu(int collidableIndex)
{
return m_data->m_collidablesCPU[collidableIndex];
}

const b3Collidable& b3CpuNarrowPhase::getCollidableCpu(int collidableIndex) const
{
return m_data->m_collidablesCPU[collidableIndex];
}


b3CpuNarrowPhase::b3CpuNarrowPhase(const struct b3Config& config)
{
m_data = new b3CpuNarrowPhaseInternalData;
m_data->m_config = config;
m_data->m_numAcceleratedShapes = 0;
}

b3CpuNarrowPhase::~b3CpuNarrowPhase()
{
delete m_data;
}

void b3CpuNarrowPhase::computeContacts(b3AlignedObjectArray<b3Int4>& pairs, b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace, b3AlignedObjectArray<b3RigidBodyData>& bodies)
{
int nPairs = pairs.size();
int numContacts = 0;
int maxContactCapacity = m_data->m_config.m_maxContactCapacity;
m_data->m_contacts.resize(maxContactCapacity);

for (int i=0;i<nPairs;i++)
{
int bodyIndexA = pairs[i].x;
int bodyIndexB = pairs[i].y;
int collidableIndexA = bodies[bodyIndexA].m_collidableIdx;
int collidableIndexB = bodies[bodyIndexB].m_collidableIdx;

if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_SPHERE &&
m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
{
// computeContactSphereConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0],
// &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
}

if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_SPHERE)
{
// computeContactSphereConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
// &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
//printf("convex-sphere\n");
}

if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_PLANE)
{
// computeContactPlaneConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
// &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
// printf("convex-plane\n");
}

if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_PLANE &&
m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
{
// computeContactPlaneConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0],
// &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
// printf("plane-convex\n");
}

if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
{
// computeContactCompoundCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
// &m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0],
// nContacts,maxContactCapacity,treeNodesCPU,subTreesCPU,bvhInfoCPU);
// printf("convex-plane\n");
}


if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_PLANE)
{
// computeContactPlaneCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
// &m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0], &hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
// printf("convex-plane\n");
}

if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_PLANE &&
m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
{
// computeContactPlaneCompound(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0],
// &m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
// printf("plane-convex\n");
}

if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
{
//printf("pairs[i].z=%d\n",pairs[i].z);
//int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,bodies,
// m_data->m_collidablesCPU,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
int contactIndex = b3ContactConvexConvexSAT(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,bodies,
m_data->m_collidablesCPU,m_data->m_convexPolyhedra,m_data->m_convexVertices,m_data->m_uniqueEdges,m_data->m_convexIndices,m_data->m_convexFaces,m_data->m_contacts,numContacts,maxContactCapacity);


if (contactIndex>=0)
{
pairs[i].z = contactIndex;
}
// printf("plane-convex\n");
}


}

m_data->m_contacts.resize(numContacts);
}

int b3CpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* utilPtr)
{
int collidableIndex = allocateCollidable();
if (collidableIndex<0)
return collidableIndex;

b3Collidable& col = m_data->m_collidablesCPU[collidableIndex];
col.m_shapeType = SHAPE_CONVEX_HULL;
col.m_shapeIndex = -1;
{
b3Vector3 localCenter=b3MakeVector3(0,0,0);
for (int i=0;i<utilPtr->m_vertices.size();i++)
localCenter+=utilPtr->m_vertices[i];
localCenter*= (1.f/utilPtr->m_vertices.size());
utilPtr->m_localCenter = localCenter;

col.m_shapeIndex = registerConvexHullShapeInternal(utilPtr,col);
}

if (col.m_shapeIndex>=0)
{
b3Aabb aabb;
b3Vector3 myAabbMin=b3MakeVector3(1e30f,1e30f,1e30f);
b3Vector3 myAabbMax=b3MakeVector3(-1e30f,-1e30f,-1e30f);

for (int i=0;i<utilPtr->m_vertices.size();i++)
{
myAabbMin.setMin(utilPtr->m_vertices[i]);
myAabbMax.setMax(utilPtr->m_vertices[i]);
}
aabb.m_min[0] = myAabbMin[0];
aabb.m_min[1] = myAabbMin[1];
aabb.m_min[2] = myAabbMin[2];
aabb.m_minIndices[3] = 0;

aabb.m_max[0] = myAabbMax[0];
aabb.m_max[1] = myAabbMax[1];
aabb.m_max[2] = myAabbMax[2];
aabb.m_signedMaxIndices[3] = 0;

m_data->m_localShapeAABBCPU.push_back(aabb);

}
return collidableIndex;
}

int b3CpuNarrowPhase::allocateCollidable()
{
int curSize = m_data->m_collidablesCPU.size();
if (curSize<m_data->m_config.m_maxConvexShapes)
{
m_data->m_collidablesCPU.expand();
return curSize;
}
else
{
b3Error("allocateCollidable out-of-range %d\n",m_data->m_config.m_maxConvexShapes);
}
return -1;

}

int b3CpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling)
{
b3AlignedObjectArray<b3Vector3> verts;

unsigned char* vts = (unsigned char*) vertices;
for (int i=0;i<numVertices;i++)
{
float* vertex = (float*) &vts[i*strideInBytes];
verts.push_back(b3MakeVector3(vertex[0]*scaling[0],vertex[1]*scaling[1],vertex[2]*scaling[2]));
}

b3ConvexUtility* utilPtr = new b3ConvexUtility();
bool merge = true;
if (numVertices)
{
utilPtr->initializePolyhedralFeatures(&verts[0],verts.size(),merge);
}

int collidableIndex = registerConvexHullShape(utilPtr);

delete utilPtr;
return collidableIndex;
}


int b3CpuNarrowPhase::registerConvexHullShapeInternal(b3ConvexUtility* convexPtr,b3Collidable& col)
{

m_data->m_convexData.resize(m_data->m_numAcceleratedShapes+1);
m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes+1);
b3ConvexPolyhedronData& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1);
convex.mC = convexPtr->mC;
convex.mE = convexPtr->mE;
convex.m_extents= convexPtr->m_extents;
convex.m_localCenter = convexPtr->m_localCenter;
convex.m_radius = convexPtr->m_radius;
convex.m_numUniqueEdges = convexPtr->m_uniqueEdges.size();
int edgeOffset = m_data->m_uniqueEdges.size();
convex.m_uniqueEdgesOffset = edgeOffset;
m_data->m_uniqueEdges.resize(edgeOffset+convex.m_numUniqueEdges);
//convex data here
int i;
for ( i=0;i<convexPtr->m_uniqueEdges.size();i++)
{
m_data->m_uniqueEdges[edgeOffset+i] = convexPtr->m_uniqueEdges[i];
}
int faceOffset = m_data->m_convexFaces.size();
convex.m_faceOffset = faceOffset;
convex.m_numFaces = convexPtr->m_faces.size();

m_data->m_convexFaces.resize(faceOffset+convex.m_numFaces);

for (i=0;i<convexPtr->m_faces.size();i++)
{
m_data->m_convexFaces[convex.m_faceOffset+i].m_plane = b3MakeVector3(convexPtr->m_faces[i].m_plane[0],
convexPtr->m_faces[i].m_plane[1],
convexPtr->m_faces[i].m_plane[2],
convexPtr->m_faces[i].m_plane[3]);

int indexOffset = m_data->m_convexIndices.size();
int numIndices = convexPtr->m_faces[i].m_indices.size();
m_data->m_convexFaces[convex.m_faceOffset+i].m_numIndices = numIndices;
m_data->m_convexFaces[convex.m_faceOffset+i].m_indexOffset = indexOffset;
m_data->m_convexIndices.resize(indexOffset+numIndices);
for (int p=0;p<numIndices;p++)
{
m_data->m_convexIndices[indexOffset+p] = convexPtr->m_faces[i].m_indices[p];
}
}
convex.m_numVertices = convexPtr->m_vertices.size();
int vertexOffset = m_data->m_convexVertices.size();
convex.m_vertexOffset =vertexOffset;
m_data->m_convexVertices.resize(vertexOffset+convex.m_numVertices);
for (int i=0;i<convexPtr->m_vertices.size();i++)
{
m_data->m_convexVertices[vertexOffset+i] = convexPtr->m_vertices[i];
}

(m_data->m_convexData)[m_data->m_numAcceleratedShapes] = convexPtr;
return m_data->m_numAcceleratedShapes++;
}

const b3Aabb& b3CpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const
{
return m_data->m_localShapeAABBCPU[collidableIndex];
}

+ 105
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h View File

@@ -0,0 +1,105 @@
#ifndef B3_CPU_NARROWPHASE_H
#define B3_CPU_NARROWPHASE_H

#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
#include "Bullet3Common/b3AlignedObjectArray.h"
#include "Bullet3Common/b3Vector3.h"
#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
#include "Bullet3Common/shared/b3Int4.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"

class b3CpuNarrowPhase
{
protected:

struct b3CpuNarrowPhaseInternalData* m_data;
int m_acceleratedCompanionShapeIndex;
int m_planeBodyIndex;
int m_static0Index;

int registerConvexHullShapeInternal(class b3ConvexUtility* convexPtr,b3Collidable& col);
int registerConcaveMeshShape(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, b3Collidable& col, const float* scaling);

public:



b3CpuNarrowPhase(const struct b3Config& config);

virtual ~b3CpuNarrowPhase(void);

int registerSphereShape(float radius);
int registerPlaneShape(const b3Vector3& planeNormal, float planeConstant);

int registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes);
int registerFace(const b3Vector3& faceNormal, float faceConstant);
int registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices,const float* scaling);
//do they need to be merged?
int registerConvexHullShape(b3ConvexUtility* utilPtr);
int registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling);

//int registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMin, const float* aabbMax,bool writeToGpu);
void setObjectTransform(const float* position, const float* orientation , int bodyIndex);

void writeAllBodiesToGpu();
void reset();
void readbackAllBodiesToCpu();
bool getObjectTransformFromCpu(float* position, float* orientation , int bodyIndex) const;

void setObjectTransformCpu(float* position, float* orientation , int bodyIndex);
void setObjectVelocityCpu(float* linVel, float* angVel, int bodyIndex);

//virtual void computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWorldSpace, int numObjects);
virtual void computeContacts(b3AlignedObjectArray<b3Int4>& pairs, b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace, b3AlignedObjectArray<b3RigidBodyData>& bodies);


const struct b3RigidBodyData* getBodiesCpu() const;
//struct b3RigidBodyData* getBodiesCpu();

int getNumBodiesGpu() const;

int getNumBodyInertiasGpu() const;

const struct b3Collidable* getCollidablesCpu() const;
int getNumCollidablesGpu() const;


/*const struct b3Contact4* getContactsCPU() const;

int getNumContactsGpu() const;
*/

const b3AlignedObjectArray<b3Contact4Data>& getContacts() const;
int getNumRigidBodies() const;

int allocateCollidable();

int getStatic0Index() const
{
return m_static0Index;
}
b3Collidable& getCollidableCpu(int collidableIndex);
const b3Collidable& getCollidableCpu(int collidableIndex) const;

const b3CpuNarrowPhaseInternalData* getInternalData() const
{
return m_data;
}

const struct b3Aabb& getLocalSpaceAabb(int collidableIndex) const;
};

#endif //B3_CPU_NARROWPHASE_H


+ 24
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h View File

@@ -0,0 +1,24 @@

#ifndef B3_RAYCAST_INFO_H
#define B3_RAYCAST_INFO_H

#include "Bullet3Common/b3Vector3.h"

B3_ATTRIBUTE_ALIGNED16(struct) b3RayInfo
{
b3Vector3 m_from;
b3Vector3 m_to;
};

B3_ATTRIBUTE_ALIGNED16(struct) b3RayHit
{
b3Scalar m_hitFraction;
int m_hitBody;
int m_hitResult1;
int m_hitResult2;
b3Vector3 m_hitPoint;
b3Vector3 m_hitNormal;
};

#endif //B3_RAYCAST_INFO_H


+ 30
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h View File

@@ -0,0 +1,30 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_RIGID_BODY_CL
#define B3_RIGID_BODY_CL

#include "Bullet3Common/b3Scalar.h"
#include "Bullet3Common/b3Matrix3x3.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"


inline float b3GetInvMass(const b3RigidBodyData& body)
{
return body.m_invMass;
}


#endif//B3_RIGID_BODY_CL

+ 20
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h View File

@@ -0,0 +1,20 @@

#ifndef B3_BVH_SUBTREE_INFO_DATA_H
#define B3_BVH_SUBTREE_INFO_DATA_H

typedef struct b3BvhSubtreeInfoData b3BvhSubtreeInfoData_t;

struct b3BvhSubtreeInfoData
{
//12 bytes
unsigned short int m_quantizedAabbMin[3];
unsigned short int m_quantizedAabbMax[3];
//4 bytes, points to the root of the subtree
int m_rootNodeIndex;
//4 bytes
int m_subtreeSize;
int m_padding[3];
};

#endif //B3_BVH_SUBTREE_INFO_DATA_H


+ 126
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h View File

@@ -0,0 +1,126 @@


#include "Bullet3Common/shared/b3Int4.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h"



// work-in-progress
void b3BvhTraversal( __global const b3Int4* pairs,
__global const b3RigidBodyData* rigidBodies,
__global const b3Collidable* collidables,
__global b3Aabb* aabbs,
__global b3Int4* concavePairsOut,
__global volatile int* numConcavePairsOut,
__global const b3BvhSubtreeInfo* subtreeHeadersRoot,
__global const b3QuantizedBvhNode* quantizedNodesRoot,
__global const b3BvhInfo* bvhInfos,
int numPairs,
int maxNumConcavePairsCapacity,
int id)
{
int bodyIndexA = pairs[id].x;
int bodyIndexB = pairs[id].y;
int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
//once the broadphase avoids static-static pairs, we can remove this test
if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))
{
return;
}
if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)
return;

int shapeTypeB = collidables[collidableIndexB].m_shapeType;
if (shapeTypeB!=SHAPE_CONVEX_HULL &&
shapeTypeB!=SHAPE_SPHERE &&
shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS
)
return;

b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];

b3Float4 bvhAabbMin = bvhInfo.m_aabbMin;
b3Float4 bvhAabbMax = bvhInfo.m_aabbMax;
b3Float4 bvhQuantization = bvhInfo.m_quantization;
int numSubtreeHeaders = bvhInfo.m_numSubTrees;
__global const b3BvhSubtreeInfoData* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];
__global const b3QuantizedBvhNodeData* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];

unsigned short int quantizedQueryAabbMin[3];
unsigned short int quantizedQueryAabbMax[3];
b3QuantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_minVec,false,bvhAabbMin, bvhAabbMax,bvhQuantization);
b3QuantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_maxVec,true ,bvhAabbMin, bvhAabbMax,bvhQuantization);
for (int i=0;i<numSubtreeHeaders;i++)
{
b3BvhSubtreeInfoData subtree = subtreeHeaders[i];
int overlap = b3TestQuantizedAabbAgainstQuantizedAabbSlow(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
if (overlap != 0)
{
int startNodeIndex = subtree.m_rootNodeIndex;
int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize;
int curIndex = startNodeIndex;
int escapeIndex;
int isLeafNode;
int aabbOverlap;
while (curIndex < endNodeIndex)
{
b3QuantizedBvhNodeData rootNode = quantizedNodes[curIndex];
aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabbSlow(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);
isLeafNode = b3IsLeaf(&rootNode);
if (aabbOverlap)
{
if (isLeafNode)
{
int triangleIndex = b3GetTriangleIndex(&rootNode);
if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)
{
int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
int pairIdx = b3AtomicAdd (numConcavePairsOut,numChildrenB);
for (int b=0;b<numChildrenB;b++)
{
if ((pairIdx+b)<maxNumConcavePairsCapacity)
{
int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;
b3Int4 newPair = b3MakeInt4(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB);
concavePairsOut[pairIdx+b] = newPair;
}
}
} else
{
int pairIdx = b3AtomicInc(numConcavePairsOut);
if (pairIdx<maxNumConcavePairsCapacity)
{
b3Int4 newPair = b3MakeInt4(bodyIndexA,bodyIndexB,triangleIndex,0);
concavePairsOut[pairIdx] = newPair;
}
}
}
curIndex++;
} else
{
if (isLeafNode)
{
curIndex++;
} else
{
escapeIndex = b3GetEscapeIndex(&rootNode);
curIndex += escapeIndex;
}
}
}
}
}

}

+ 188
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h View File

@@ -0,0 +1,188 @@
#ifndef B3_CLIP_FACES_H
#define B3_CLIP_FACES_H


#include "Bullet3Common/shared/b3Int4.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"


inline b3Float4 b3Lerp3(b3Float4ConstArg a,b3Float4ConstArg b, float t)
{
return b3MakeFloat4( a.x + (b.x - a.x) * t,
a.y + (b.y - a.y) * t,
a.z + (b.z - a.z) * t,
0.f);
}

// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
int clipFaceGlobal(__global const b3Float4* pVtxIn, int numVertsIn, b3Float4ConstArg planeNormalWS,float planeEqWS, __global b3Float4* ppVtxOut)
{
int ve;
float ds, de;
int numVertsOut = 0;
//double-check next test
// if (numVertsIn < 2)
// return 0;
b3Float4 firstVertex=pVtxIn[numVertsIn-1];
b3Float4 endVertex = pVtxIn[0];
ds = b3Dot(planeNormalWS,firstVertex)+planeEqWS;
for (ve = 0; ve < numVertsIn; ve++)
{
endVertex=pVtxIn[ve];
de = b3Dot(planeNormalWS,endVertex)+planeEqWS;
if (ds<0)
{
if (de<0)
{
// Start < 0, end < 0, so output endVertex
ppVtxOut[numVertsOut++] = endVertex;
}
else
{
// Start < 0, end >= 0, so output intersection
ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
}
}
else
{
if (de<0)
{
// Start >= 0, end < 0 so output intersection and end
ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
ppVtxOut[numVertsOut++] = endVertex;
}
}
firstVertex = endVertex;
ds = de;
}
return numVertsOut;
}


__kernel void clipFacesAndFindContactsKernel( __global const b3Float4* separatingNormals,
__global const int* hasSeparatingAxis,
__global b3Int4* clippingFacesOut,
__global b3Float4* worldVertsA1,
__global b3Float4* worldNormalsA1,
__global b3Float4* worldVertsB1,
__global b3Float4* worldVertsB2,
int vertexFaceCapacity,
int pairIndex
)
{
// int i = get_global_id(0);
//int pairIndex = i;
int i = pairIndex;
float minDist = -1e30f;
float maxDist = 0.02f;
// if (i<numPairs)
{
if (hasSeparatingAxis[i])
{
// int bodyIndexA = pairs[i].x;
// int bodyIndexB = pairs[i].y;
int numLocalContactsOut = 0;

int capacityWorldVertsB2 = vertexFaceCapacity;
__global b3Float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];
__global b3Float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];

{
__global b3Int4* clippingFaces = clippingFacesOut;
int closestFaceA = clippingFaces[pairIndex].x;
int closestFaceB = clippingFaces[pairIndex].y;
int numVertsInA = clippingFaces[pairIndex].z;
int numVertsInB = clippingFaces[pairIndex].w;
int numVertsOut = 0;
if (closestFaceA>=0)
{
// clip polygon to back of planes of all faces of hull A that are adjacent to witness face
for(int e0=0;e0<numVertsInA;e0++)
{
const b3Float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];
const b3Float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];
const b3Float4 WorldEdge0 = aw - bw;
b3Float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];
b3Float4 planeNormalWS1 = -b3Cross(WorldEdge0,worldPlaneAnormal1);
b3Float4 worldA1 = aw;
float planeEqWS1 = -b3Dot(worldA1,planeNormalWS1);
b3Float4 planeNormalWS = planeNormalWS1;
float planeEqWS=planeEqWS1;
numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);
__global b3Float4* tmp = pVtxOut;
pVtxOut = pVtxIn;
pVtxIn = tmp;
numVertsInB = numVertsOut;
numVertsOut = 0;
}
b3Float4 planeNormalWS = worldNormalsA1[pairIndex];
float planeEqWS=-b3Dot(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);
for (int i=0;i<numVertsInB;i++)
{
float depth = b3Dot(planeNormalWS,pVtxIn[i])+planeEqWS;
if (depth <=minDist)
{
depth = minDist;
}
/*
static float maxDepth = 0.f;
if (depth < maxDepth)
{
maxDepth = depth;
if (maxDepth < -10)
{
printf("error at framecount %d?\n",myframecount);
}
printf("maxDepth = %f\n", maxDepth);

}
*/
if (depth <=maxDist)
{
b3Float4 pointInWorld = pVtxIn[i];
pVtxOut[numLocalContactsOut++] = b3MakeFloat4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
}
}
}
clippingFaces[pairIndex].w =numLocalContactsOut;

}
for (int i=0;i<numLocalContactsOut;i++)
pVtxIn[i] = pVtxOut[i];
}// if (hasSeparatingAxis[i])
}// if (i<numPairs)
}

#endif //B3_CLIP_FACES_H


+ 76
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h View File

@@ -0,0 +1,76 @@

#ifndef B3_COLLIDABLE_H
#define B3_COLLIDABLE_H


#include "Bullet3Common/shared/b3Float4.h"
#include "Bullet3Common/shared/b3Quat.h"

enum b3ShapeTypes
{
SHAPE_HEIGHT_FIELD=1,

SHAPE_CONVEX_HULL=3,
SHAPE_PLANE=4,
SHAPE_CONCAVE_TRIMESH=5,
SHAPE_COMPOUND_OF_CONVEX_HULLS=6,
SHAPE_SPHERE=7,
MAX_NUM_SHAPE_TYPES,
};

typedef struct b3Collidable b3Collidable_t;


struct b3Collidable
{
union {
int m_numChildShapes;
int m_bvhIndex;
};
union
{
float m_radius;
int m_compoundBvhIndex;
};

int m_shapeType;
union
{
int m_shapeIndex;
float m_height;
};
};

typedef struct b3GpuChildShape b3GpuChildShape_t;
struct b3GpuChildShape
{
b3Float4 m_childPosition;
b3Quat m_childOrientation;
union
{
int m_shapeIndex;//used for SHAPE_COMPOUND_OF_CONVEX_HULLS
int m_capsuleAxis;
};
union
{
float m_radius;//used for childshape of SHAPE_COMPOUND_OF_SPHERES or SHAPE_COMPOUND_OF_CAPSULES
int m_numChildShapes;//used for compound shape
};
union
{
float m_height;//used for childshape of SHAPE_COMPOUND_OF_CAPSULES
int m_collidableShapeIndex;
};
int m_shapeType;
};

struct b3CompoundOverlappingPair
{
int m_bodyIndexA;
int m_bodyIndexB;
// int m_pairType;
int m_childShapeIndexA;
int m_childShapeIndexB;
};

#endif //B3_COLLIDABLE_H

+ 40
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h View File

@@ -0,0 +1,40 @@
#ifndef B3_CONTACT4DATA_H
#define B3_CONTACT4DATA_H

#include "Bullet3Common/shared/b3Float4.h"

typedef struct b3Contact4Data b3Contact4Data_t;

struct b3Contact4Data
{
b3Float4 m_worldPosB[4];
// b3Float4 m_localPosA[4];
// b3Float4 m_localPosB[4];
b3Float4 m_worldNormalOnB; // w: m_nPoints
unsigned short m_restituitionCoeffCmp;
unsigned short m_frictionCoeffCmp;
int m_batchIdx;
int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr
int m_bodyBPtrAndSignBit;

int m_childIndexA;
int m_childIndexB;
int m_unused1;
int m_unused2;


};

inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)
{
return (int)contact->m_worldNormalOnB.w;
};

inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)
{
contact->m_worldNormalOnB.w = (float)numPoints;
};



#endif //B3_CONTACT4DATA_H

+ 523
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactConvexConvexSAT.h View File

@@ -0,0 +1,523 @@

#ifndef B3_CONTACT_CONVEX_CONVEX_SAT_H
#define B3_CONTACT_CONVEX_CONVEX_SAT_H


#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h"

#define B3_MAX_VERTS 1024



inline b3Float4 b3Lerp3(const b3Float4& a,const b3Float4& b, float t)
{
return b3MakeVector3( a.x + (b.x - a.x) * t,
a.y + (b.y - a.y) * t,
a.z + (b.z - a.z) * t,
0.f);
}


// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
inline int b3ClipFace(const b3Float4* pVtxIn, int numVertsIn, b3Float4& planeNormalWS,float planeEqWS, b3Float4* ppVtxOut)
{
int ve;
float ds, de;
int numVertsOut = 0;
if (numVertsIn < 2)
return 0;

b3Float4 firstVertex=pVtxIn[numVertsIn-1];
b3Float4 endVertex = pVtxIn[0];
ds = b3Dot3F4(planeNormalWS,firstVertex)+planeEqWS;

for (ve = 0; ve < numVertsIn; ve++)
{
endVertex=pVtxIn[ve];

de = b3Dot3F4(planeNormalWS,endVertex)+planeEqWS;

if (ds<0)
{
if (de<0)
{
// Start < 0, end < 0, so output endVertex
ppVtxOut[numVertsOut++] = endVertex;
}
else
{
// Start < 0, end >= 0, so output intersection
ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
}
}
else
{
if (de<0)
{
// Start >= 0, end < 0 so output intersection and end
ppVtxOut[numVertsOut++] = b3Lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );
ppVtxOut[numVertsOut++] = endVertex;
}
}
firstVertex = endVertex;
ds = de;
}
return numVertsOut;
}


inline int b3ClipFaceAgainstHull(const b3Float4& separatingNormal, const b3ConvexPolyhedronData* hullA,
const b3Float4& posA, const b3Quaternion& ornA, b3Float4* worldVertsB1, int numWorldVertsB1,
b3Float4* worldVertsB2, int capacityWorldVertsB2,
const float minDist, float maxDist,
const b3AlignedObjectArray<b3Float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
//const b3Float4* verticesB, const b3GpuFace* facesB, const int* indicesB,
b3Float4* contactsOut,
int contactCapacity)
{
int numContactsOut = 0;

b3Float4* pVtxIn = worldVertsB1;
b3Float4* pVtxOut = worldVertsB2;
int numVertsIn = numWorldVertsB1;
int numVertsOut = 0;

int closestFaceA=-1;
{
float dmin = FLT_MAX;
for(int face=0;face<hullA->m_numFaces;face++)
{
const b3Float4 Normal = b3MakeVector3(
facesA[hullA->m_faceOffset+face].m_plane.x,
facesA[hullA->m_faceOffset+face].m_plane.y,
facesA[hullA->m_faceOffset+face].m_plane.z,0.f);
const b3Float4 faceANormalWS = b3QuatRotate(ornA,Normal);
float d = b3Dot3F4(faceANormalWS,separatingNormal);
if (d < dmin)
{
dmin = d;
closestFaceA = face;
}
}
}
if (closestFaceA<0)
return numContactsOut;

b3GpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA];

// clip polygon to back of planes of all faces of hull A that are adjacent to witness face
int numContacts = numWorldVertsB1;
int numVerticesA = polyA.m_numIndices;
for(int e0=0;e0<numVerticesA;e0++)
{
const b3Float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];
const b3Float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];
const b3Float4 edge0 = a - b;
const b3Float4 WorldEdge0 = b3QuatRotate(ornA,edge0);
b3Float4 planeNormalA = b3MakeFloat4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
b3Float4 worldPlaneAnormal1 = b3QuatRotate(ornA,planeNormalA);

b3Float4 planeNormalWS1 = -b3Cross3(WorldEdge0,worldPlaneAnormal1);
b3Float4 worldA1 = b3TransformPoint(a,posA,ornA);
float planeEqWS1 = -b3Dot3F4(worldA1,planeNormalWS1);
b3Float4 planeNormalWS = planeNormalWS1;
float planeEqWS=planeEqWS1;
//clip face
//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
numVertsOut = b3ClipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);

//btSwap(pVtxIn,pVtxOut);
b3Float4* tmp = pVtxOut;
pVtxOut = pVtxIn;
pVtxIn = tmp;
numVertsIn = numVertsOut;
numVertsOut = 0;
}

// only keep points that are behind the witness face
{
b3Float4 localPlaneNormal = b3MakeFloat4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);
float localPlaneEq = polyA.m_plane.w;
b3Float4 planeNormalWS = b3QuatRotate(ornA,localPlaneNormal);
float planeEqWS=localPlaneEq-b3Dot3F4(planeNormalWS,posA);
for (int i=0;i<numVertsIn;i++)
{
float depth = b3Dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;
if (depth <=minDist)
{
depth = minDist;
}
if (numContactsOut<contactCapacity)
{
if (depth <=maxDist)
{
b3Float4 pointInWorld = pVtxIn[i];
//resultOut.addContactPoint(separatingNormal,point,depth);
contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);
//printf("depth=%f\n",depth);
}
} else
{
b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut,contactCapacity);
}
}
}

return numContactsOut;
}



inline int b3ClipHullAgainstHull(const b3Float4& separatingNormal,
const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
const b3Float4& posA, const b3Quaternion& ornA,const b3Float4& posB, const b3Quaternion& ornB,
b3Float4* worldVertsB1, b3Float4* worldVertsB2, int capacityWorldVerts,
const float minDist, float maxDist,
const b3AlignedObjectArray<b3Float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
const b3AlignedObjectArray<b3Float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB,

b3Float4* contactsOut,
int contactCapacity)
{
int numContactsOut = 0;
int numWorldVertsB1= 0;
B3_PROFILE("clipHullAgainstHull");

float curMaxDist=maxDist;
int closestFaceB=-1;
float dmax = -FLT_MAX;

{
//B3_PROFILE("closestFaceB");
if (hullB.m_numFaces!=1)
{
//printf("wtf\n");
}
static bool once = true;
//printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z);
for(int face=0;face<hullB.m_numFaces;face++)
{
#ifdef BT_DEBUG_SAT_FACE
if (once)
printf("face %d\n",face);
const b3GpuFace* faceB = &facesB[hullB.m_faceOffset+face];
if (once)
{
for (int i=0;i<faceB->m_numIndices;i++)
{
b3Float4 vert = verticesB[hullB.m_vertexOffset+indicesB[faceB->m_indexOffset+i]];
printf("vert[%d] = %f,%f,%f\n",i,vert.x,vert.y,vert.z);
}
}
#endif //BT_DEBUG_SAT_FACE
//if (facesB[hullB.m_faceOffset+face].m_numIndices>2)
{
const b3Float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset+face].m_plane.x,
facesB[hullB.m_faceOffset+face].m_plane.y, facesB[hullB.m_faceOffset+face].m_plane.z,0.f);
const b3Float4 WorldNormal = b3QuatRotate(ornB, Normal);
#ifdef BT_DEBUG_SAT_FACE
if (once)
printf("faceNormal = %f,%f,%f\n",Normal.x,Normal.y,Normal.z);
#endif
float d = b3Dot3F4(WorldNormal,separatingNormal);
if (d > dmax)
{
dmax = d;
closestFaceB = face;
}
}
}
once = false;
}

b3Assert(closestFaceB>=0);
{
//B3_PROFILE("worldVertsB1");
const b3GpuFace& polyB = facesB[hullB.m_faceOffset+closestFaceB];
const int numVertices = polyB.m_numIndices;
for(int e0=0;e0<numVertices;e0++)
{
const b3Float4& b = verticesB[hullB.m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
worldVertsB1[numWorldVertsB1++] = b3TransformPoint(b,posB,ornB);
}
}

if (closestFaceB>=0)
{
//B3_PROFILE("clipFaceAgainstHull");
numContactsOut = b3ClipFaceAgainstHull((b3Float4&)separatingNormal, &hullA,
posA,ornA,
worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,
verticesA, facesA, indicesA,
contactsOut,contactCapacity);
}

return numContactsOut;
}




inline int b3ClipHullHullSingle(
int bodyIndexA, int bodyIndexB,
const b3Float4& posA,
const b3Quaternion& ornA,
const b3Float4& posB,
const b3Quaternion& ornB,

int collidableIndexA, int collidableIndexB,

const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf,
b3AlignedObjectArray<b3Contact4Data>* globalContactOut,
int& nContacts,
const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA,
const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB,
const b3AlignedObjectArray<b3Vector3>& verticesA,
const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
const b3AlignedObjectArray<b3GpuFace>& facesA,
const b3AlignedObjectArray<int>& indicesA,
const b3AlignedObjectArray<b3Vector3>& verticesB,
const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
const b3AlignedObjectArray<b3GpuFace>& facesB,
const b3AlignedObjectArray<int>& indicesB,

const b3AlignedObjectArray<b3Collidable>& hostCollidablesA,
const b3AlignedObjectArray<b3Collidable>& hostCollidablesB,
const b3Vector3& sepNormalWorldSpace,
int maxContactCapacity )
{
int contactIndex = -1;
b3ConvexPolyhedronData hullA, hullB;
b3Collidable colA = hostCollidablesA[collidableIndexA];
hullA = hostConvexDataA[colA.m_shapeIndex];
//printf("numvertsA = %d\n",hullA.m_numVertices);
b3Collidable colB = hostCollidablesB[collidableIndexB];
hullB = hostConvexDataB[colB.m_shapeIndex];
//printf("numvertsB = %d\n",hullB.m_numVertices);
b3Float4 contactsOut[B3_MAX_VERTS];
int localContactCapacity = B3_MAX_VERTS;

#ifdef _WIN32
b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x));
b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x));
#endif
{
b3Float4 worldVertsB1[B3_MAX_VERTS];
b3Float4 worldVertsB2[B3_MAX_VERTS];
int capacityWorldVerts = B3_MAX_VERTS;

b3Float4 hostNormal = b3MakeFloat4(sepNormalWorldSpace.x,sepNormalWorldSpace.y,sepNormalWorldSpace.z,0.f);
int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex;
int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex;

b3Scalar minDist = -1;
b3Scalar maxDist = 0.;


b3Transform trA,trB;
{
//B3_PROFILE("b3TransformPoint computation");
//trA.setIdentity();
trA.setOrigin(b3MakeVector3(posA.x,posA.y,posA.z));
trA.setRotation(b3Quaternion(ornA.x,ornA.y,ornA.z,ornA.w));
//trB.setIdentity();
trB.setOrigin(b3MakeVector3(posB.x,posB.y,posB.z));
trB.setRotation(b3Quaternion(ornB.x,ornB.y,ornB.z,ornB.w));
}

b3Quaternion trAorn = trA.getRotation();
b3Quaternion trBorn = trB.getRotation();
int numContactsOut = b3ClipHullAgainstHull(hostNormal,
hostConvexDataA.at(shapeA),
hostConvexDataB.at(shapeB),
(b3Float4&)trA.getOrigin(), (b3Quaternion&)trAorn,
(b3Float4&)trB.getOrigin(), (b3Quaternion&)trBorn,
worldVertsB1,worldVertsB2,capacityWorldVerts,
minDist, maxDist,
verticesA, facesA,indicesA,
verticesB, facesB,indicesB,
contactsOut,localContactCapacity);

if (numContactsOut>0)
{
B3_PROFILE("overlap");

b3Float4 normalOnSurfaceB = (b3Float4&)hostNormal;
// b3Float4 centerOut;
b3Int4 contactIdx;
contactIdx.x = 0;
contactIdx.y = 1;
contactIdx.z = 2;
contactIdx.w = 3;
int numPoints = 0;
{
B3_PROFILE("extractManifold");
numPoints = b3ReduceContacts(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx);
}
b3Assert(numPoints);
if (nContacts<maxContactCapacity)
{
contactIndex = nContacts;
globalContactOut->expand();
b3Contact4Data& contact = globalContactOut->at(nContacts);
contact.m_batchIdx = 0;//i;
contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA;
contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB;

contact.m_frictionCoeffCmp = 45874;
contact.m_restituitionCoeffCmp = 0;
float distance = 0.f;
for (int p=0;p<numPoints;p++)
{
contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]];//check if it is actually on B
contact.m_worldNormalOnB = normalOnSurfaceB;
}
//printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints);
contact.m_worldNormalOnB.w = (b3Scalar)numPoints;
nContacts++;
} else
{
b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts,maxContactCapacity);
}
}
}
return contactIndex;
}




inline int b3ContactConvexConvexSAT(
int pairIndex,
int bodyIndexA, int bodyIndexB,
int collidableIndexA, int collidableIndexB,
const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies,
const b3AlignedObjectArray<b3Collidable>& collidables,
const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes,
const b3AlignedObjectArray<b3Float4>& convexVertices,
const b3AlignedObjectArray<b3Float4>& uniqueEdges,
const b3AlignedObjectArray<int>& convexIndices,
const b3AlignedObjectArray<b3GpuFace>& faces,
b3AlignedObjectArray<b3Contact4Data>& globalContactsOut,
int& nGlobalContactsOut,
int maxContactCapacity)
{
int contactIndex = -1;


b3Float4 posA = rigidBodies[bodyIndexA].m_pos;
b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
b3Float4 posB = rigidBodies[bodyIndexB].m_pos;
b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;

b3ConvexPolyhedronData hullA, hullB;
b3Float4 sepNormalWorldSpace;


b3Collidable colA = collidables[collidableIndexA];
hullA = convexShapes[colA.m_shapeIndex];
//printf("numvertsA = %d\n",hullA.m_numVertices);
b3Collidable colB = collidables[collidableIndexB];
hullB = convexShapes[colB.m_shapeIndex];
//printf("numvertsB = %d\n",hullB.m_numVertices);
// b3Float4 contactsOut[B3_MAX_VERTS];
int contactCapacity = B3_MAX_VERTS;
int numContactsOut=0;


#ifdef _WIN32
b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x));
b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x));
#endif
bool foundSepAxis = b3FindSeparatingAxis(hullA,hullB,
posA,
ornA,
posB,
ornB,

convexVertices,uniqueEdges,faces,convexIndices,
convexVertices,uniqueEdges,faces,convexIndices,
sepNormalWorldSpace
);

if (foundSepAxis)
{
contactIndex = b3ClipHullHullSingle(
bodyIndexA, bodyIndexB,
posA,ornA,
posB,ornB,
collidableIndexA, collidableIndexB,
&rigidBodies,
&globalContactsOut,
nGlobalContactsOut,
convexShapes,
convexShapes,
convexVertices,
uniqueEdges,
faces,
convexIndices,
convexVertices,
uniqueEdges,
faces,
convexIndices,

collidables,
collidables,
sepNormalWorldSpace,
maxContactCapacity);
}

return contactIndex;
}

#endif //B3_CONTACT_CONVEX_CONVEX_SAT_H

+ 162
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ContactSphereSphere.h View File

@@ -0,0 +1,162 @@

#ifndef B3_CONTACT_SPHERE_SPHERE_H
#define B3_CONTACT_SPHERE_SPHERE_H





void computeContactSphereConvex(int pairIndex,
int bodyIndexA, int bodyIndexB,
int collidableIndexA, int collidableIndexB,
const b3RigidBodyData* rigidBodies,
const b3Collidable* collidables,
const b3ConvexPolyhedronData* convexShapes,
const b3Vector3* convexVertices,
const int* convexIndices,
const b3GpuFace* faces,
b3Contact4* globalContactsOut,
int& nGlobalContactsOut,
int maxContactCapacity)
{

float radius = collidables[collidableIndexA].m_radius;
float4 spherePos1 = rigidBodies[bodyIndexA].m_pos;
b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat;



float4 pos = rigidBodies[bodyIndexB].m_pos;

b3Quaternion quat = rigidBodies[bodyIndexB].m_quat;

b3Transform tr;
tr.setIdentity();
tr.setOrigin(pos);
tr.setRotation(quat);
b3Transform trInv = tr.inverse();

float4 spherePos = trInv(spherePos1);

int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx;
int shapeIndex = collidables[collidableIndex].m_shapeIndex;
int numFaces = convexShapes[shapeIndex].m_numFaces;
float4 closestPnt = b3MakeVector3(0, 0, 0, 0);
float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0);
float minDist = -1000000.f; // TODO: What is the largest/smallest float?
bool bCollide = true;
int region = -1;
float4 localHitNormal;
for ( int f = 0; f < numFaces; f++ )
{
b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];
float4 planeEqn;
float4 localPlaneNormal = b3MakeVector3(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);
float4 n1 = localPlaneNormal;//quatRotate(quat,localPlaneNormal);
planeEqn = n1;
planeEqn[3] = face.m_plane.w;

float4 pntReturn;
float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);

if ( dist > radius)
{
bCollide = false;
break;
}

if ( dist > 0 )
{
//might hit an edge or vertex
b3Vector3 out;

bool isInPoly = IsPointInPolygon(spherePos,
&face,
&convexVertices[convexShapes[shapeIndex].m_vertexOffset],
convexIndices,
&out);
if (isInPoly)
{
if (dist>minDist)
{
minDist = dist;
closestPnt = pntReturn;
localHitNormal = planeEqn;
region=1;
}
} else
{
b3Vector3 tmp = spherePos-out;
b3Scalar l2 = tmp.length2();
if (l2<radius*radius)
{
dist = b3Sqrt(l2);
if (dist>minDist)
{
minDist = dist;
closestPnt = out;
localHitNormal = tmp/dist;
region=2;
}
} else
{
bCollide = false;
break;
}
}
}
else
{
if ( dist > minDist )
{
minDist = dist;
closestPnt = pntReturn;
localHitNormal = planeEqn;
region=3;
}
}
}
static int numChecks = 0;
numChecks++;

if (bCollide && minDist > -10000)
{
float4 normalOnSurfaceB1 = tr.getBasis()*localHitNormal;//-hitNormalWorld;
float4 pOnB1 = tr(closestPnt);
//printf("dist ,%f,",minDist);
float actualDepth = minDist-radius;
if (actualDepth<0)
{
//printf("actualDepth = ,%f,", actualDepth);
//printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z);
//printf("region=,%d,\n", region);
pOnB1[3] = actualDepth;

int dstIdx;
// dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
if (nGlobalContactsOut < maxContactCapacity)
{
dstIdx=nGlobalContactsOut;
nGlobalContactsOut++;

b3Contact4* c = &globalContactsOut[dstIdx];
c->m_worldNormalOnB = normalOnSurfaceB1;
c->setFrictionCoeff(0.7);
c->setRestituitionCoeff(0.f);

c->m_batchIdx = pairIndex;
c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;
c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;
c->m_worldPosB[0] = pOnB1;
int numPoints = 1;
c->m_worldNormalOnB.w = (b3Scalar)numPoints;
}//if (dstIdx < numPairs)
}
}//if (hasCollision)
}
#endif //B3_CONTACT_SPHERE_SPHERE_H

+ 40
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h View File

@@ -0,0 +1,40 @@

#ifndef B3_CONVEX_POLYHEDRON_DATA_H
#define B3_CONVEX_POLYHEDRON_DATA_H



#include "Bullet3Common/shared/b3Float4.h"
#include "Bullet3Common/shared/b3Quat.h"

typedef struct b3GpuFace b3GpuFace_t;
struct b3GpuFace
{
b3Float4 m_plane;
int m_indexOffset;
int m_numIndices;
int m_unusedPadding1;
int m_unusedPadding2;
};

typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;

struct b3ConvexPolyhedronData
{
b3Float4 m_localCenter;
b3Float4 m_extents;
b3Float4 mC;
b3Float4 mE;

float m_radius;
int m_faceOffset;
int m_numFaces;
int m_numVertices;

int m_vertexOffset;
int m_uniqueEdgesOffset;
int m_numUniqueEdges;
int m_unused;
};

#endif //B3_CONVEX_POLYHEDRON_DATA_H

+ 832
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h View File

@@ -0,0 +1,832 @@
#ifndef B3_FIND_CONCAVE_SEPARATING_AXIS_H
#define B3_FIND_CONCAVE_SEPARATING_AXIS_H

#define B3_TRIANGLE_NUM_CONVEX_FACES 5


#include "Bullet3Common/shared/b3Int4.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"


inline void b3Project(__global const b3ConvexPolyhedronData* hull, b3Float4ConstArg pos, b3QuatConstArg orn,
const b3Float4* dir, __global const b3Float4* vertices, float* min, float* max)
{
min[0] = FLT_MAX;
max[0] = -FLT_MAX;
int numVerts = hull->m_numVertices;

const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),*dir);
float offset = b3Dot(pos,*dir);
for(int i=0;i<numVerts;i++)
{
float dp = b3Dot(vertices[hull->m_vertexOffset+i],localDir);
if(dp < min[0])
min[0] = dp;
if(dp > max[0])
max[0] = dp;
}
if(min[0]>max[0])
{
float tmp = min[0];
min[0] = max[0];
max[0] = tmp;
}
min[0] += offset;
max[0] += offset;
}


inline bool b3TestSepAxis(const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB,
b3Float4ConstArg posA,b3QuatConstArg ornA,
b3Float4ConstArg posB,b3QuatConstArg ornB,
b3Float4* sep_axis, const b3Float4* verticesA, __global const b3Float4* verticesB,float* depth)
{
float Min0,Max0;
float Min1,Max1;
b3Project(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);
b3Project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);

if(Max0<Min1 || Max1<Min0)
return false;

float d0 = Max0 - Min1;
float d1 = Max1 - Min0;
*depth = d0<d1 ? d0:d1;
return true;
}


bool b3FindSeparatingAxis( const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB,
b3Float4ConstArg posA1,
b3QuatConstArg ornA,
b3Float4ConstArg posB1,
b3QuatConstArg ornB,
b3Float4ConstArg DeltaC2,
const b3Float4* verticesA,
const b3Float4* uniqueEdgesA,
const b3GpuFace* facesA,
const int* indicesA,

__global const b3Float4* verticesB,
__global const b3Float4* uniqueEdgesB,
__global const b3GpuFace* facesB,
__global const int* indicesB,
b3Float4* sep,
float* dmin)
{

b3Float4 posA = posA1;
posA.w = 0.f;
b3Float4 posB = posB1;
posB.w = 0.f;
/*
static int maxFaceVertex = 0;

int curFaceVertexAB = hullA->m_numFaces*hullB->m_numVertices;
curFaceVertexAB+= hullB->m_numFaces*hullA->m_numVertices;

if (curFaceVertexAB>maxFaceVertex)
{
maxFaceVertex = curFaceVertexAB;
printf("curFaceVertexAB = %d\n",curFaceVertexAB);
printf("hullA->m_numFaces = %d\n",hullA->m_numFaces);
printf("hullA->m_numVertices = %d\n",hullA->m_numVertices);
printf("hullB->m_numVertices = %d\n",hullB->m_numVertices);
}
*/

int curPlaneTests=0;
{
int numFacesA = hullA->m_numFaces;
// Test normals from hullA
for(int i=0;i<numFacesA;i++)
{
const b3Float4 normal = facesA[hullA->m_faceOffset+i].m_plane;
b3Float4 faceANormalWS = b3QuatRotate(ornA,normal);
if (b3Dot(DeltaC2,faceANormalWS)<0)
faceANormalWS*=-1.f;
curPlaneTests++;
float d;
if(!b3TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))
return false;
if(d<*dmin)
{
*dmin = d;
*sep = faceANormalWS;
}
}
}
if((b3Dot(-DeltaC2,*sep))>0.0f)
{
*sep = -(*sep);
}
return true;
}


b3Vector3 unitSphere162[]=
{
b3MakeVector3(0.000000,-1.000000,0.000000),
b3MakeVector3(0.203181,-0.967950,0.147618),
b3MakeVector3(-0.077607,-0.967950,0.238853),
b3MakeVector3(0.723607,-0.447220,0.525725),
b3MakeVector3(0.609547,-0.657519,0.442856),
b3MakeVector3(0.812729,-0.502301,0.295238),
b3MakeVector3(-0.251147,-0.967949,0.000000),
b3MakeVector3(-0.077607,-0.967950,-0.238853),
b3MakeVector3(0.203181,-0.967950,-0.147618),
b3MakeVector3(0.860698,-0.251151,0.442858),
b3MakeVector3(-0.276388,-0.447220,0.850649),
b3MakeVector3(-0.029639,-0.502302,0.864184),
b3MakeVector3(-0.155215,-0.251152,0.955422),
b3MakeVector3(-0.894426,-0.447216,0.000000),
b3MakeVector3(-0.831051,-0.502299,0.238853),
b3MakeVector3(-0.956626,-0.251149,0.147618),
b3MakeVector3(-0.276388,-0.447220,-0.850649),
b3MakeVector3(-0.483971,-0.502302,-0.716565),
b3MakeVector3(-0.436007,-0.251152,-0.864188),
b3MakeVector3(0.723607,-0.447220,-0.525725),
b3MakeVector3(0.531941,-0.502302,-0.681712),
b3MakeVector3(0.687159,-0.251152,-0.681715),
b3MakeVector3(0.687159,-0.251152,0.681715),
b3MakeVector3(-0.436007,-0.251152,0.864188),
b3MakeVector3(-0.956626,-0.251149,-0.147618),
b3MakeVector3(-0.155215,-0.251152,-0.955422),
b3MakeVector3(0.860698,-0.251151,-0.442858),
b3MakeVector3(0.276388,0.447220,0.850649),
b3MakeVector3(0.483971,0.502302,0.716565),
b3MakeVector3(0.232822,0.657519,0.716563),
b3MakeVector3(-0.723607,0.447220,0.525725),
b3MakeVector3(-0.531941,0.502302,0.681712),
b3MakeVector3(-0.609547,0.657519,0.442856),
b3MakeVector3(-0.723607,0.447220,-0.525725),
b3MakeVector3(-0.812729,0.502301,-0.295238),
b3MakeVector3(-0.609547,0.657519,-0.442856),
b3MakeVector3(0.276388,0.447220,-0.850649),
b3MakeVector3(0.029639,0.502302,-0.864184),
b3MakeVector3(0.232822,0.657519,-0.716563),
b3MakeVector3(0.894426,0.447216,0.000000),
b3MakeVector3(0.831051,0.502299,-0.238853),
b3MakeVector3(0.753442,0.657515,0.000000),
b3MakeVector3(-0.232822,-0.657519,0.716563),
b3MakeVector3(-0.162456,-0.850654,0.499995),
b3MakeVector3(0.052790,-0.723612,0.688185),
b3MakeVector3(0.138199,-0.894429,0.425321),
b3MakeVector3(0.262869,-0.525738,0.809012),
b3MakeVector3(0.361805,-0.723611,0.587779),
b3MakeVector3(0.531941,-0.502302,0.681712),
b3MakeVector3(0.425323,-0.850654,0.309011),
b3MakeVector3(0.812729,-0.502301,-0.295238),
b3MakeVector3(0.609547,-0.657519,-0.442856),
b3MakeVector3(0.850648,-0.525736,0.000000),
b3MakeVector3(0.670817,-0.723611,-0.162457),
b3MakeVector3(0.670817,-0.723610,0.162458),
b3MakeVector3(0.425323,-0.850654,-0.309011),
b3MakeVector3(0.447211,-0.894428,0.000001),
b3MakeVector3(-0.753442,-0.657515,0.000000),
b3MakeVector3(-0.525730,-0.850652,0.000000),
b3MakeVector3(-0.638195,-0.723609,0.262864),
b3MakeVector3(-0.361801,-0.894428,0.262864),
b3MakeVector3(-0.688189,-0.525736,0.499997),
b3MakeVector3(-0.447211,-0.723610,0.525729),
b3MakeVector3(-0.483971,-0.502302,0.716565),
b3MakeVector3(-0.232822,-0.657519,-0.716563),
b3MakeVector3(-0.162456,-0.850654,-0.499995),
b3MakeVector3(-0.447211,-0.723611,-0.525727),
b3MakeVector3(-0.361801,-0.894429,-0.262863),
b3MakeVector3(-0.688189,-0.525736,-0.499997),
b3MakeVector3(-0.638195,-0.723609,-0.262863),
b3MakeVector3(-0.831051,-0.502299,-0.238853),
b3MakeVector3(0.361804,-0.723612,-0.587779),
b3MakeVector3(0.138197,-0.894429,-0.425321),
b3MakeVector3(0.262869,-0.525738,-0.809012),
b3MakeVector3(0.052789,-0.723611,-0.688186),
b3MakeVector3(-0.029639,-0.502302,-0.864184),
b3MakeVector3(0.956626,0.251149,0.147618),
b3MakeVector3(0.956626,0.251149,-0.147618),
b3MakeVector3(0.951058,-0.000000,0.309013),
b3MakeVector3(1.000000,0.000000,0.000000),
b3MakeVector3(0.947213,-0.276396,0.162458),
b3MakeVector3(0.951058,0.000000,-0.309013),
b3MakeVector3(0.947213,-0.276396,-0.162458),
b3MakeVector3(0.155215,0.251152,0.955422),
b3MakeVector3(0.436007,0.251152,0.864188),
b3MakeVector3(-0.000000,-0.000000,1.000000),
b3MakeVector3(0.309017,0.000000,0.951056),
b3MakeVector3(0.138199,-0.276398,0.951055),
b3MakeVector3(0.587786,0.000000,0.809017),
b3MakeVector3(0.447216,-0.276398,0.850648),
b3MakeVector3(-0.860698,0.251151,0.442858),
b3MakeVector3(-0.687159,0.251152,0.681715),
b3MakeVector3(-0.951058,-0.000000,0.309013),
b3MakeVector3(-0.809018,0.000000,0.587783),
b3MakeVector3(-0.861803,-0.276396,0.425324),
b3MakeVector3(-0.587786,0.000000,0.809017),
b3MakeVector3(-0.670819,-0.276397,0.688191),
b3MakeVector3(-0.687159,0.251152,-0.681715),
b3MakeVector3(-0.860698,0.251151,-0.442858),
b3MakeVector3(-0.587786,-0.000000,-0.809017),
b3MakeVector3(-0.809018,-0.000000,-0.587783),
b3MakeVector3(-0.670819,-0.276397,-0.688191),
b3MakeVector3(-0.951058,0.000000,-0.309013),
b3MakeVector3(-0.861803,-0.276396,-0.425324),
b3MakeVector3(0.436007,0.251152,-0.864188),
b3MakeVector3(0.155215,0.251152,-0.955422),
b3MakeVector3(0.587786,-0.000000,-0.809017),
b3MakeVector3(0.309017,-0.000000,-0.951056),
b3MakeVector3(0.447216,-0.276398,-0.850648),
b3MakeVector3(0.000000,0.000000,-1.000000),
b3MakeVector3(0.138199,-0.276398,-0.951055),
b3MakeVector3(0.670820,0.276396,0.688190),
b3MakeVector3(0.809019,-0.000002,0.587783),
b3MakeVector3(0.688189,0.525736,0.499997),
b3MakeVector3(0.861804,0.276394,0.425323),
b3MakeVector3(0.831051,0.502299,0.238853),
b3MakeVector3(-0.447216,0.276397,0.850649),
b3MakeVector3(-0.309017,-0.000001,0.951056),
b3MakeVector3(-0.262869,0.525738,0.809012),
b3MakeVector3(-0.138199,0.276397,0.951055),
b3MakeVector3(0.029639,0.502302,0.864184),
b3MakeVector3(-0.947213,0.276396,-0.162458),
b3MakeVector3(-1.000000,0.000001,0.000000),
b3MakeVector3(-0.850648,0.525736,-0.000000),
b3MakeVector3(-0.947213,0.276397,0.162458),
b3MakeVector3(-0.812729,0.502301,0.295238),
b3MakeVector3(-0.138199,0.276397,-0.951055),
b3MakeVector3(-0.309016,-0.000000,-0.951057),
b3MakeVector3(-0.262869,0.525738,-0.809012),
b3MakeVector3(-0.447215,0.276397,-0.850649),
b3MakeVector3(-0.531941,0.502302,-0.681712),
b3MakeVector3(0.861804,0.276396,-0.425322),
b3MakeVector3(0.809019,0.000000,-0.587782),
b3MakeVector3(0.688189,0.525736,-0.499997),
b3MakeVector3(0.670821,0.276397,-0.688189),
b3MakeVector3(0.483971,0.502302,-0.716565),
b3MakeVector3(0.077607,0.967950,0.238853),
b3MakeVector3(0.251147,0.967949,0.000000),
b3MakeVector3(0.000000,1.000000,0.000000),
b3MakeVector3(0.162456,0.850654,0.499995),
b3MakeVector3(0.361800,0.894429,0.262863),
b3MakeVector3(0.447209,0.723612,0.525728),
b3MakeVector3(0.525730,0.850652,0.000000),
b3MakeVector3(0.638194,0.723610,0.262864),
b3MakeVector3(-0.203181,0.967950,0.147618),
b3MakeVector3(-0.425323,0.850654,0.309011),
b3MakeVector3(-0.138197,0.894430,0.425320),
b3MakeVector3(-0.361804,0.723612,0.587778),
b3MakeVector3(-0.052790,0.723612,0.688185),
b3MakeVector3(-0.203181,0.967950,-0.147618),
b3MakeVector3(-0.425323,0.850654,-0.309011),
b3MakeVector3(-0.447210,0.894429,0.000000),
b3MakeVector3(-0.670817,0.723611,-0.162457),
b3MakeVector3(-0.670817,0.723611,0.162457),
b3MakeVector3(0.077607,0.967950,-0.238853),
b3MakeVector3(0.162456,0.850654,-0.499995),
b3MakeVector3(-0.138197,0.894430,-0.425320),
b3MakeVector3(-0.052790,0.723612,-0.688185),
b3MakeVector3(-0.361804,0.723612,-0.587778),
b3MakeVector3(0.361800,0.894429,-0.262863),
b3MakeVector3(0.638194,0.723610,-0.262864),
b3MakeVector3(0.447209,0.723612,-0.525728)
};


bool b3FindSeparatingAxisEdgeEdge( const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB,
b3Float4ConstArg posA1,
b3QuatConstArg ornA,
b3Float4ConstArg posB1,
b3QuatConstArg ornB,
b3Float4ConstArg DeltaC2,
const b3Float4* verticesA,
const b3Float4* uniqueEdgesA,
const b3GpuFace* facesA,
const int* indicesA,
__global const b3Float4* verticesB,
__global const b3Float4* uniqueEdgesB,
__global const b3GpuFace* facesB,
__global const int* indicesB,
b3Float4* sep,
float* dmin,
bool searchAllEdgeEdge)
{


b3Float4 posA = posA1;
posA.w = 0.f;
b3Float4 posB = posB1;
posB.w = 0.f;

int curPlaneTests=0;

int curEdgeEdge = 0;
// Test edges
static int maxEdgeTests = 0;
int curEdgeTests = hullA->m_numUniqueEdges * hullB->m_numUniqueEdges;
if (curEdgeTests >maxEdgeTests )
{
maxEdgeTests = curEdgeTests ;
printf("maxEdgeTests = %d\n",maxEdgeTests );
printf("hullA->m_numUniqueEdges = %d\n",hullA->m_numUniqueEdges);
printf("hullB->m_numUniqueEdges = %d\n",hullB->m_numUniqueEdges);

}

if (searchAllEdgeEdge)
{
for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)
{
const b3Float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];
b3Float4 edge0World = b3QuatRotate(ornA,edge0);

for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)
{
const b3Float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];
b3Float4 edge1World = b3QuatRotate(ornB,edge1);


b3Float4 crossje = b3Cross(edge0World,edge1World);

curEdgeEdge++;
if(!b3IsAlmostZero(crossje))
{
crossje = b3Normalized(crossje);
if (b3Dot(DeltaC2,crossje)<0)
crossje *= -1.f;

float dist;
bool result = true;
{
float Min0,Max0;
float Min1,Max1;
b3Project(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);
b3Project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);
if(Max0<Min1 || Max1<Min0)
return false;
float d0 = Max0 - Min1;
float d1 = Max1 - Min0;
dist = d0<d1 ? d0:d1;
result = true;

}

if(dist<*dmin)
{
*dmin = dist;
*sep = crossje;
}
}
}

}
} else
{
int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3);
//printf("numDirections =%d\n",numDirections );


for(int i=0;i<numDirections;i++)
{
b3Float4 crossje = unitSphere162[i];
{
//if (b3Dot(DeltaC2,crossje)>0)
{
float dist;
bool result = true;
{
float Min0,Max0;
float Min1,Max1;
b3Project(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);
b3Project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);
if(Max0<Min1 || Max1<Min0)
return false;
float d0 = Max0 - Min1;
float d1 = Max1 - Min0;
dist = d0<d1 ? d0:d1;
result = true;

}

if(dist<*dmin)
{
*dmin = dist;
*sep = crossje;
}
}
}
}

}

if((b3Dot(-DeltaC2,*sep))>0.0f)
{
*sep = -(*sep);
}
return true;
}



inline int b3FindClippingFaces(b3Float4ConstArg separatingNormal,
__global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,
b3Float4ConstArg posA, b3QuatConstArg ornA,b3Float4ConstArg posB, b3QuatConstArg ornB,
__global b3Float4* worldVertsA1,
__global b3Float4* worldNormalsA1,
__global b3Float4* worldVertsB1,
int capacityWorldVerts,
const float minDist, float maxDist,
__global const b3Float4* verticesA,
__global const b3GpuFace_t* facesA,
__global const int* indicesA,
__global const b3Float4* verticesB,
__global const b3GpuFace_t* facesB,
__global const int* indicesB,

__global b3Int4* clippingFaces, int pairIndex)
{
int numContactsOut = 0;
int numWorldVertsB1= 0;
int closestFaceB=-1;
float dmax = -FLT_MAX;
{
for(int face=0;face<hullB->m_numFaces;face++)
{
const b3Float4 Normal = b3MakeFloat4(facesB[hullB->m_faceOffset+face].m_plane.x,
facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);
const b3Float4 WorldNormal = b3QuatRotate(ornB, Normal);
float d = b3Dot(WorldNormal,separatingNormal);
if (d > dmax)
{
dmax = d;
closestFaceB = face;
}
}
}
{
const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];
const int numVertices = polyB.m_numIndices;
for(int e0=0;e0<numVertices;e0++)
{
const b3Float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = b3TransformPoint(b,posB,ornB);
}
}
int closestFaceA=-1;
{
float dmin = FLT_MAX;
for(int face=0;face<hullA->m_numFaces;face++)
{
const b3Float4 Normal = b3MakeFloat4(
facesA[hullA->m_faceOffset+face].m_plane.x,
facesA[hullA->m_faceOffset+face].m_plane.y,
facesA[hullA->m_faceOffset+face].m_plane.z,
0.f);
const b3Float4 faceANormalWS = b3QuatRotate(ornA,Normal);
float d = b3Dot(faceANormalWS,separatingNormal);
if (d < dmin)
{
dmin = d;
closestFaceA = face;
worldNormalsA1[pairIndex] = faceANormalWS;
}
}
}
int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;
for(int e0=0;e0<numVerticesA;e0++)
{
const b3Float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];
worldVertsA1[pairIndex*capacityWorldVerts+e0] = b3TransformPoint(a, posA,ornA);
}
clippingFaces[pairIndex].x = closestFaceA;
clippingFaces[pairIndex].y = closestFaceB;
clippingFaces[pairIndex].z = numVerticesA;
clippingFaces[pairIndex].w = numWorldVertsB1;
return numContactsOut;
}



__kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs,
__global const b3RigidBodyData* rigidBodies,
__global const b3Collidable* collidables,
__global const b3ConvexPolyhedronData* convexShapes,
__global const b3Float4* vertices,
__global const b3Float4* uniqueEdges,
__global const b3GpuFace* faces,
__global const int* indices,
__global const b3GpuChildShape* gpuChildShapes,
__global b3Aabb* aabbs,
__global b3Float4* concaveSeparatingNormalsOut,
__global b3Int4* clippingFacesOut,
__global b3Vector3* worldVertsA1Out,
__global b3Vector3* worldNormalsA1Out,
__global b3Vector3* worldVertsB1Out,
__global int* hasSeparatingNormals,
int vertexFaceCapacity,
int numConcavePairs,
int pairIdx
)
{
int i = pairIdx;
/* int i = get_global_id(0);
if (i>=numConcavePairs)
return;
int pairIdx = i;
*/

int bodyIndexA = concavePairs[i].x;
int bodyIndexB = concavePairs[i].y;

int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;

int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;

if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&
collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)
{
concavePairs[pairIdx].w = -1;
return;
}

hasSeparatingNormals[i] = 0;

int numFacesA = convexShapes[shapeIndexA].m_numFaces;
int numActualConcaveConvexTests = 0;
int f = concavePairs[i].z;
bool overlap = false;
b3ConvexPolyhedronData convexPolyhedronA;

//add 3 vertices of the triangle
convexPolyhedronA.m_numVertices = 3;
convexPolyhedronA.m_vertexOffset = 0;
b3Float4 localCenter = b3MakeFloat4(0.f,0.f,0.f,0.f);

b3GpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];
b3Aabb triAabb;
triAabb.m_minVec = b3MakeFloat4(1e30f,1e30f,1e30f,0.f);
triAabb.m_maxVec = b3MakeFloat4(-1e30f,-1e30f,-1e30f,0.f);
b3Float4 verticesA[3];
for (int i=0;i<3;i++)
{
int index = indices[face.m_indexOffset+i];
b3Float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];
verticesA[i] = vert;
localCenter += vert;
triAabb.m_minVec = b3MinFloat4(triAabb.m_minVec,vert);
triAabb.m_maxVec = b3MaxFloat4(triAabb.m_maxVec,vert);

}

overlap = true;
overlap = (triAabb.m_minVec.x > aabbs[bodyIndexB].m_maxVec.x || triAabb.m_maxVec.x < aabbs[bodyIndexB].m_minVec.x) ? false : overlap;
overlap = (triAabb.m_minVec.z > aabbs[bodyIndexB].m_maxVec.z || triAabb.m_maxVec.z < aabbs[bodyIndexB].m_minVec.z) ? false : overlap;
overlap = (triAabb.m_minVec.y > aabbs[bodyIndexB].m_maxVec.y || triAabb.m_maxVec.y < aabbs[bodyIndexB].m_minVec.y) ? false : overlap;
if (overlap)
{
float dmin = FLT_MAX;
int hasSeparatingAxis=5;
b3Float4 sepAxis=b3MakeFloat4(1,2,3,4);

int localCC=0;
numActualConcaveConvexTests++;

//a triangle has 3 unique edges
convexPolyhedronA.m_numUniqueEdges = 3;
convexPolyhedronA.m_uniqueEdgesOffset = 0;
b3Float4 uniqueEdgesA[3];
uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);
uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);
uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);


convexPolyhedronA.m_faceOffset = 0;
b3Float4 normal = b3MakeFloat4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);
b3GpuFace facesA[B3_TRIANGLE_NUM_CONVEX_FACES];
int indicesA[3+3+2+2+2];
int curUsedIndices=0;
int fidx=0;

//front size of triangle
{
facesA[fidx].m_indexOffset=curUsedIndices;
indicesA[0] = 0;
indicesA[1] = 1;
indicesA[2] = 2;
curUsedIndices+=3;
float c = face.m_plane.w;
facesA[fidx].m_plane.x = normal.x;
facesA[fidx].m_plane.y = normal.y;
facesA[fidx].m_plane.z = normal.z;
facesA[fidx].m_plane.w = c;
facesA[fidx].m_numIndices=3;
}
fidx++;
//back size of triangle
{
facesA[fidx].m_indexOffset=curUsedIndices;
indicesA[3]=2;
indicesA[4]=1;
indicesA[5]=0;
curUsedIndices+=3;
float c = b3Dot(normal,verticesA[0]);
float c1 = -face.m_plane.w;
facesA[fidx].m_plane.x = -normal.x;
facesA[fidx].m_plane.y = -normal.y;
facesA[fidx].m_plane.z = -normal.z;
facesA[fidx].m_plane.w = c;
facesA[fidx].m_numIndices=3;
}
fidx++;

bool addEdgePlanes = true;
if (addEdgePlanes)
{
int numVertices=3;
int prevVertex = numVertices-1;
for (int i=0;i<numVertices;i++)
{
b3Float4 v0 = verticesA[i];
b3Float4 v1 = verticesA[prevVertex];
b3Float4 edgeNormal = b3Normalized(b3Cross(normal,v1-v0));
float c = -b3Dot(edgeNormal,v0);

facesA[fidx].m_numIndices = 2;
facesA[fidx].m_indexOffset=curUsedIndices;
indicesA[curUsedIndices++]=i;
indicesA[curUsedIndices++]=prevVertex;
facesA[fidx].m_plane.x = edgeNormal.x;
facesA[fidx].m_plane.y = edgeNormal.y;
facesA[fidx].m_plane.z = edgeNormal.z;
facesA[fidx].m_plane.w = c;
fidx++;
prevVertex = i;
}
}
convexPolyhedronA.m_numFaces = B3_TRIANGLE_NUM_CONVEX_FACES;
convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);


b3Float4 posA = rigidBodies[bodyIndexA].m_pos;
posA.w = 0.f;
b3Float4 posB = rigidBodies[bodyIndexB].m_pos;
posB.w = 0.f;

b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
b3Quaternion ornB =rigidBodies[bodyIndexB].m_quat;



///////////////////
///compound shape support

if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
{
int compoundChild = concavePairs[pairIdx].w;
int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;
int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
b3Float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
b3Quaternion childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
b3Float4 newPosB = b3TransformPoint(childPosB,posB,ornB);
b3Quaternion newOrnB = b3QuatMul(ornB,childOrnB);
posB = newPosB;
ornB = newOrnB;
shapeIndexB = collidables[childColIndexB].m_shapeIndex;
}
//////////////////

b3Float4 c0local = convexPolyhedronA.m_localCenter;
b3Float4 c0 = b3TransformPoint(c0local, posA, ornA);
b3Float4 c1local = convexShapes[shapeIndexB].m_localCenter;
b3Float4 c1 = b3TransformPoint(c1local,posB,ornB);
const b3Float4 DeltaC2 = c0 - c1;


bool sepA = b3FindSeparatingAxis( &convexPolyhedronA, &convexShapes[shapeIndexB],
posA,ornA,
posB,ornB,
DeltaC2,
verticesA,uniqueEdgesA,facesA,indicesA,
vertices,uniqueEdges,faces,indices,
&sepAxis,&dmin);
hasSeparatingAxis = 4;
if (!sepA)
{
hasSeparatingAxis = 0;
} else
{
bool sepB = b3FindSeparatingAxis( &convexShapes[shapeIndexB],&convexPolyhedronA,
posB,ornB,
posA,ornA,
DeltaC2,
vertices,uniqueEdges,faces,indices,
verticesA,uniqueEdgesA,facesA,indicesA,
&sepAxis,&dmin);

if (!sepB)
{
hasSeparatingAxis = 0;
} else
{
bool sepEE = b3FindSeparatingAxisEdgeEdge( &convexPolyhedronA, &convexShapes[shapeIndexB],
posA,ornA,
posB,ornB,
DeltaC2,
verticesA,uniqueEdgesA,facesA,indicesA,
vertices,uniqueEdges,faces,indices,
&sepAxis,&dmin,true);
if (!sepEE)
{
hasSeparatingAxis = 0;
} else
{
hasSeparatingAxis = 1;
}
}
}
if (hasSeparatingAxis)
{
hasSeparatingNormals[i]=1;
sepAxis.w = dmin;
concaveSeparatingNormalsOut[pairIdx]=sepAxis;

//now compute clipping faces A and B, and world-space clipping vertices A and B...

float minDist = -1e30f;
float maxDist = 0.02f;

b3FindClippingFaces(sepAxis,
&convexPolyhedronA,
&convexShapes[shapeIndexB],
posA,ornA,
posB,ornB,
worldVertsA1Out,
worldNormalsA1Out,
worldVertsB1Out,
vertexFaceCapacity,
minDist, maxDist,
verticesA,
facesA,
indicesA,
vertices,
faces,
indices,
clippingFacesOut, pairIdx);

} else
{
//mark this pair as in-active
concavePairs[pairIdx].w = -1;
}
}
else
{
//mark this pair as in-active
concavePairs[pairIdx].w = -1;
}
}


#endif //B3_FIND_CONCAVE_SEPARATING_AXIS_H


+ 206
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3FindSeparatingAxis.h View File

@@ -0,0 +1,206 @@
#ifndef B3_FIND_SEPARATING_AXIS_H
#define B3_FIND_SEPARATING_AXIS_H


inline void b3ProjectAxis(const b3ConvexPolyhedronData& hull, const b3Float4& pos, const b3Quaternion& orn, const b3Float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max)
{
min = FLT_MAX;
max = -FLT_MAX;
int numVerts = hull.m_numVertices;

const b3Float4 localDir = b3QuatRotate(orn.inverse(),dir);

b3Scalar offset = b3Dot3F4(pos,dir);

for(int i=0;i<numVerts;i++)
{
//b3Vector3 pt = trans * vertices[m_vertexOffset+i];
//b3Scalar dp = pt.dot(dir);
b3Vector3 vertex = vertices[hull.m_vertexOffset+i];
b3Scalar dp = b3Dot3F4((b3Float4&)vertices[hull.m_vertexOffset+i],localDir);
//b3Assert(dp==dpL);
if(dp < min) min = dp;
if(dp > max) max = dp;
}
if(min>max)
{
b3Scalar tmp = min;
min = max;
max = tmp;
}
min += offset;
max += offset;
}


inline bool b3TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
const b3Float4& posA,const b3Quaternion& ornA,
const b3Float4& posB,const b3Quaternion& ornB,
const b3Float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA,const b3AlignedObjectArray<b3Vector3>& verticesB,b3Scalar& depth)
{
b3Scalar Min0,Max0;
b3Scalar Min1,Max1;
b3ProjectAxis(hullA,posA,ornA,sep_axis,verticesA, Min0, Max0);
b3ProjectAxis(hullB,posB,ornB, sep_axis,verticesB, Min1, Max1);

if(Max0<Min1 || Max1<Min0)
return false;

b3Scalar d0 = Max0 - Min1;
b3Assert(d0>=0.0f);
b3Scalar d1 = Max1 - Min0;
b3Assert(d1>=0.0f);
depth = d0<d1 ? d0:d1;
return true;
}


inline bool b3FindSeparatingAxis( const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
const b3Float4& posA1,
const b3Quaternion& ornA,
const b3Float4& posB1,
const b3Quaternion& ornB,
const b3AlignedObjectArray<b3Vector3>& verticesA,
const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
const b3AlignedObjectArray<b3GpuFace>& facesA,
const b3AlignedObjectArray<int>& indicesA,
const b3AlignedObjectArray<b3Vector3>& verticesB,
const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
const b3AlignedObjectArray<b3GpuFace>& facesB,
const b3AlignedObjectArray<int>& indicesB,

b3Vector3& sep)
{
B3_PROFILE("findSeparatingAxis");

b3Float4 posA = posA1;
posA.w = 0.f;
b3Float4 posB = posB1;
posB.w = 0.f;
//#ifdef TEST_INTERNAL_OBJECTS
b3Float4 c0local = (b3Float4&)hullA.m_localCenter;

b3Float4 c0 = b3TransformPoint(c0local, posA, ornA);
b3Float4 c1local = (b3Float4&)hullB.m_localCenter;
b3Float4 c1 = b3TransformPoint(c1local,posB,ornB);
const b3Float4 deltaC2 = c0 - c1;
//#endif

b3Scalar dmin = FLT_MAX;
int curPlaneTests=0;

int numFacesA = hullA.m_numFaces;
// Test normals from hullA
for(int i=0;i<numFacesA;i++)
{
const b3Float4& normal = (b3Float4&)facesA[hullA.m_faceOffset+i].m_plane;
b3Float4 faceANormalWS = b3QuatRotate(ornA,normal);

if (b3Dot3F4(deltaC2,faceANormalWS)<0)
faceANormalWS*=-1.f;

curPlaneTests++;
#ifdef TEST_INTERNAL_OBJECTS
gExpectedNbTests++;
if(gUseInternalObject && !TestInternalObjects(transA,transB, DeltaC2, faceANormalWS, hullA, hullB, dmin))
continue;
gActualNbTests++;
#endif

b3Scalar d;
if(!b3TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,faceANormalWS, verticesA, verticesB,d))
return false;

if(d<dmin)
{
dmin = d;
sep = (b3Vector3&)faceANormalWS;
}
}

int numFacesB = hullB.m_numFaces;
// Test normals from hullB
for(int i=0;i<numFacesB;i++)
{
b3Float4 normal = (b3Float4&)facesB[hullB.m_faceOffset+i].m_plane;
b3Float4 WorldNormal = b3QuatRotate(ornB, normal);

if (b3Dot3F4(deltaC2,WorldNormal)<0)
{
WorldNormal*=-1.f;
}
curPlaneTests++;
#ifdef TEST_INTERNAL_OBJECTS
gExpectedNbTests++;
if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, WorldNormal, hullA, hullB, dmin))
continue;
gActualNbTests++;
#endif

b3Scalar d;
if(!b3TestSepAxis(hullA, hullB,posA,ornA,posB,ornB,WorldNormal,verticesA,verticesB,d))
return false;

if(d<dmin)
{
dmin = d;
sep = (b3Vector3&)WorldNormal;
}
}

// b3Vector3 edgeAstart,edgeAend,edgeBstart,edgeBend;

int curEdgeEdge = 0;
// Test edges
for(int e0=0;e0<hullA.m_numUniqueEdges;e0++)
{
const b3Float4& edge0 = (b3Float4&) uniqueEdgesA[hullA.m_uniqueEdgesOffset+e0];
b3Float4 edge0World = b3QuatRotate(ornA,(b3Float4&)edge0);

for(int e1=0;e1<hullB.m_numUniqueEdges;e1++)
{
const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset+e1];
b3Float4 edge1World = b3QuatRotate(ornB,(b3Float4&)edge1);


b3Float4 crossje = b3Cross3(edge0World,edge1World);

curEdgeEdge++;
if(!b3IsAlmostZero((b3Vector3&)crossje))
{
crossje = b3FastNormalized3(crossje);
if (b3Dot3F4(deltaC2,crossje)<0)
crossje*=-1.f;


#ifdef TEST_INTERNAL_OBJECTS
gExpectedNbTests++;
if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, Cross, hullA, hullB, dmin))
continue;
gActualNbTests++;
#endif

b3Scalar dist;
if(!b3TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,crossje, verticesA,verticesB,dist))
return false;

if(dist<dmin)
{
dmin = dist;
sep = (b3Vector3&)crossje;
}
}
}

}

if((b3Dot3F4(-deltaC2,(b3Float4&)sep))>0.0f)
sep = -sep;

return true;
}

#endif //B3_FIND_SEPARATING_AXIS_H


+ 920
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h View File

@@ -0,0 +1,920 @@

/***
* ---------------------------------
* Copyright (c)2012 Daniel Fiser <danfis@danfis.cz>
*
* This file was ported from mpr.c file, part of libccd.
* The Minkoski Portal Refinement implementation was ported
* to OpenCL by Erwin Coumans for the Bullet 3 Physics library.
* at http://github.com/erwincoumans/bullet3
*
* Distributed under the OSI-approved BSD License (the "License");
* see <http://www.opensource.org/licenses/bsd-license.php>.
* This software is distributed WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the License for more information.
*/




#ifndef B3_MPR_PENETRATION_H
#define B3_MPR_PENETRATION_H

#include "Bullet3Common/shared/b3PlatformDefinitions.h"
#include "Bullet3Common/shared/b3Float4.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"




#ifdef __cplusplus
#define B3_MPR_SQRT sqrtf
#else
#define B3_MPR_SQRT sqrt
#endif
#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))
#define B3_MPR_FABS fabs

#define B3_MPR_TOLERANCE 1E-6f
#define B3_MPR_MAX_ITERATIONS 1000

struct _b3MprSupport_t
{
b3Float4 v; //!< Support point in minkowski sum
b3Float4 v1; //!< Support point in obj1
b3Float4 v2; //!< Support point in obj2
};
typedef struct _b3MprSupport_t b3MprSupport_t;

struct _b3MprSimplex_t
{
b3MprSupport_t ps[4];
int last; //!< index of last added point
};
typedef struct _b3MprSimplex_t b3MprSimplex_t;

inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)
{
return &s->ps[idx];
}

inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)
{
s->last = size - 1;
}


inline int b3MprSimplexSize(const b3MprSimplex_t *s)
{
return s->last + 1;
}


inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)
{
// here is no check on boundaries
return &s->ps[idx];
}

inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)
{
*d = *s;
}

inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)
{
b3MprSupportCopy(s->ps + pos, a);
}


inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)
{
b3MprSupport_t supp;

b3MprSupportCopy(&supp, &s->ps[pos1]);
b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);
b3MprSupportCopy(&s->ps[pos2], &supp);
}


inline int b3MprIsZero(float val)
{
return B3_MPR_FABS(val) < FLT_EPSILON;
}



inline int b3MprEq(float _a, float _b)
{
float ab;
float a, b;

ab = B3_MPR_FABS(_a - _b);
if (B3_MPR_FABS(ab) < FLT_EPSILON)
return 1;

a = B3_MPR_FABS(_a);
b = B3_MPR_FABS(_b);
if (b > a){
return ab < FLT_EPSILON * b;
}else{
return ab < FLT_EPSILON * a;
}
}


inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)
{
return b3MprEq((*a).x, (*b).x)
&& b3MprEq((*a).y, (*b).y)
&& b3MprEq((*a).z, (*b).z);
}



inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull, b3ConstArray(b3Float4) verticesA)
{
b3Float4 supVec = b3MakeFloat4(0,0,0,0);
float maxDot = -B3_LARGE_FLOAT;

if( 0 < hull->m_numVertices )
{
const b3Float4 scaled = supportVec;
int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);
return verticesA[hull->m_vertexOffset+index];
}

return supVec;

}


B3_STATIC void b3MprConvexSupport(int pairIndex,int bodyIndex, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
b3ConstArray(b3Collidable_t) cpuCollidables,
b3ConstArray(b3Float4) cpuVertices,
__global b3Float4* sepAxis,
const b3Float4* _dir, b3Float4* outp, int logme)
{
//dir is in worldspace, move to local space
b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;
b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;
b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);
const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);

//find local support vertex
int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;
b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);
__global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];
b3Float4 pInA;
if (logme)
{


b3Float4 supVec = b3MakeFloat4(0,0,0,0);
float maxDot = -B3_LARGE_FLOAT;

if( 0 < hull->m_numVertices )
{
const b3Float4 scaled = localDir;
int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);
pInA = cpuVertices[hull->m_vertexOffset+index];
}


} else
{
pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);
}

//move vertex to world space
*outp = b3TransformPoint(pInA,pos,orn);
}

inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
b3ConstArray(b3Collidable_t) cpuCollidables,
b3ConstArray(b3Float4) cpuVertices,
__global b3Float4* sepAxis,
const b3Float4* _dir, b3MprSupport_t *supp)
{
b3Float4 dir;
dir = *_dir;
b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);
dir = *_dir*-1.f;
b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);
supp->v = supp->v1 - supp->v2;
}









inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)
{

center->v1 = cpuBodyBuf[bodyIndexA].m_pos;
center->v2 = cpuBodyBuf[bodyIndexB].m_pos;
center->v = center->v1 - center->v2;
}

inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)
{
(*v).x = x;
(*v).y = y;
(*v).z = z;
(*v).w = 0.f;
}

inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)
{
(*v).x += (*w).x;
(*v).y += (*w).y;
(*v).z += (*w).z;
}

inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)
{
*v = *w;
}

inline void b3MprVec3Scale(b3Float4 *d, float k)
{
*d *= k;
}

inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)
{
float dot;

dot = b3Dot3F4(*a,*b);
return dot;
}


inline float b3MprVec3Len2(const b3Float4 *v)
{
return b3MprVec3Dot(v, v);
}

inline void b3MprVec3Normalize(b3Float4 *d)
{
float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));
b3MprVec3Scale(d, k);
}

inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)
{
*d = b3Cross3(*a,*b);
}


inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)
{
*d = *v - *w;
}

inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)
{
b3Float4 v2v1, v3v1;

b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,
&b3MprSimplexPoint(portal, 1)->v);
b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,
&b3MprSimplexPoint(portal, 1)->v);
b3MprVec3Cross(dir, &v2v1, &v3v1);
b3MprVec3Normalize(dir);
}


inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,
const b3Float4 *dir)
{
float dot;
dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);
return b3MprIsZero(dot) || dot > 0.f;
}

inline int portalReachTolerance(const b3MprSimplex_t *portal,
const b3MprSupport_t *v4,
const b3Float4 *dir)
{
float dv1, dv2, dv3, dv4;
float dot1, dot2, dot3;

// find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}

dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);
dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);
dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);
dv4 = b3MprVec3Dot(&v4->v, dir);

dot1 = dv4 - dv1;
dot2 = dv4 - dv2;
dot3 = dv4 - dv3;

dot1 = B3_MPR_FMIN(dot1, dot2);
dot1 = B3_MPR_FMIN(dot1, dot3);

return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;
}

inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal,
const b3MprSupport_t *v4,
const b3Float4 *dir)
{
float dot;
dot = b3MprVec3Dot(&v4->v, dir);
return b3MprIsZero(dot) || dot > 0.f;
}

inline void b3ExpandPortal(b3MprSimplex_t *portal,
const b3MprSupport_t *v4)
{
float dot;
b3Float4 v4v0;

b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);
dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);
if (dot > 0.f){
dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);
if (dot > 0.f){
b3MprSimplexSet(portal, 1, v4);
}else{
b3MprSimplexSet(portal, 3, v4);
}
}else{
dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);
if (dot > 0.f){
b3MprSimplexSet(portal, 2, v4);
}else{
b3MprSimplexSet(portal, 1, v4);
}
}
}



B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
b3ConstArray(b3Collidable_t) cpuCollidables,
b3ConstArray(b3Float4) cpuVertices,
__global b3Float4* sepAxis,
__global int* hasSepAxis,
b3MprSimplex_t *portal)
{
b3Float4 dir, va, vb;
float dot;
int cont;

// vertex 0 is center of portal
b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));
// vertex 0 is center of portal
b3MprSimplexSetSize(portal, 1);


b3Float4 zero = b3MakeFloat4(0,0,0,0);
b3Float4* b3mpr_vec3_origin = &zero;

if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){
// Portal's center lies on origin (0,0,0) => we know that objects
// intersect but we would need to know penetration info.
// So move center little bit...
b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);
b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);
}


// vertex 1 = support in direction of origin
b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);
b3MprVec3Scale(&dir, -1.f);
b3MprVec3Normalize(&dir);


b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));

b3MprSimplexSetSize(portal, 2);

// test if origin isn't outside of v1
dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);

if (b3MprIsZero(dot) || dot < 0.f)
return -1;


// vertex 2
b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,
&b3MprSimplexPoint(portal, 1)->v);
if (b3MprIsZero(b3MprVec3Len2(&dir))){
if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){
// origin lies on v1
return 1;
}else{
// origin lies on v0-v1 segment
return 2;
}
}

b3MprVec3Normalize(&dir);
b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));
dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);
if (b3MprIsZero(dot) || dot < 0.f)
return -1;

b3MprSimplexSetSize(portal, 3);

// vertex 3 direction
b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,
&b3MprSimplexPoint(portal, 0)->v);
b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,
&b3MprSimplexPoint(portal, 0)->v);
b3MprVec3Cross(&dir, &va, &vb);
b3MprVec3Normalize(&dir);

// it is better to form portal faces to be oriented "outside" origin
dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);
if (dot > 0.f){
b3MprSimplexSwap(portal, 1, 2);
b3MprVec3Scale(&dir, -1.f);
}

while (b3MprSimplexSize(portal) < 4){
b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));
dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);
if (b3MprIsZero(dot) || dot < 0.f)
return -1;

cont = 0;

// test if origin is outside (v1, v0, v3) - set v2 as v3 and
// continue
b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,
&b3MprSimplexPoint(portal, 3)->v);
dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);
if (dot < 0.f && !b3MprIsZero(dot)){
b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));
cont = 1;
}

if (!cont){
// test if origin is outside (v3, v0, v2) - set v1 as v3 and
// continue
b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,
&b3MprSimplexPoint(portal, 2)->v);
dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);
if (dot < 0.f && !b3MprIsZero(dot)){
b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));
cont = 1;
}
}

if (cont){
b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,
&b3MprSimplexPoint(portal, 0)->v);
b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,
&b3MprSimplexPoint(portal, 0)->v);
b3MprVec3Cross(&dir, &va, &vb);
b3MprVec3Normalize(&dir);
}else{
b3MprSimplexSetSize(portal, 4);
}
}

return 0;
}


B3_STATIC int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
b3ConstArray(b3Collidable_t) cpuCollidables,
b3ConstArray(b3Float4) cpuVertices,
__global b3Float4* sepAxis,
b3MprSimplex_t *portal)
{
b3Float4 dir;
b3MprSupport_t v4;

for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)
//while (1)
{
// compute direction outside the portal (from v0 throught v1,v2,v3
// face)
b3PortalDir(portal, &dir);

// test if origin is inside the portal
if (portalEncapsulesOrigin(portal, &dir))
return 0;

// get next support point
b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);


// test if v4 can expand portal to contain origin and if portal
// expanding doesn't reach given tolerance
if (!portalCanEncapsuleOrigin(portal, &v4, &dir)
|| portalReachTolerance(portal, &v4, &dir))
{
return -1;
}

// v1-v2-v3 triangle must be rearranged to face outside Minkowski
// difference (direction from v0).
b3ExpandPortal(portal, &v4);
}

return -1;
}

B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)
{

b3Float4 zero = b3MakeFloat4(0,0,0,0);
b3Float4* b3mpr_vec3_origin = &zero;

b3Float4 dir;
size_t i;
float b[4], sum, inv;
b3Float4 vec, p1, p2;

b3PortalDir(portal, &dir);

// use barycentric coordinates of tetrahedron to find origin
b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,
&b3MprSimplexPoint(portal, 2)->v);
b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);

b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,
&b3MprSimplexPoint(portal, 2)->v);
b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);

b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,
&b3MprSimplexPoint(portal, 1)->v);
b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);

b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,
&b3MprSimplexPoint(portal, 1)->v);
b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);

sum = b[0] + b[1] + b[2] + b[3];

if (b3MprIsZero(sum) || sum < 0.f){
b[0] = 0.f;

b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,
&b3MprSimplexPoint(portal, 3)->v);
b[1] = b3MprVec3Dot(&vec, &dir);
b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,
&b3MprSimplexPoint(portal, 1)->v);
b[2] = b3MprVec3Dot(&vec, &dir);
b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,
&b3MprSimplexPoint(portal, 2)->v);
b[3] = b3MprVec3Dot(&vec, &dir);

sum = b[1] + b[2] + b[3];
}

inv = 1.f / sum;

b3MprVec3Copy(&p1, b3mpr_vec3_origin);
b3MprVec3Copy(&p2, b3mpr_vec3_origin);
for (i = 0; i < 4; i++){
b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);
b3MprVec3Scale(&vec, b[i]);
b3MprVec3Add(&p1, &vec);

b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);
b3MprVec3Scale(&vec, b[i]);
b3MprVec3Add(&p2, &vec);
}
b3MprVec3Scale(&p1, inv);
b3MprVec3Scale(&p2, inv);

b3MprVec3Copy(pos, &p1);
b3MprVec3Add(pos, &p2);
b3MprVec3Scale(pos, 0.5);
}

inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)
{
b3Float4 ab;
b3MprVec3Sub2(&ab, a, b);
return b3MprVec3Len2(&ab);
}

inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,
const b3Float4 *x0,
const b3Float4 *b,
b3Float4 *witness)
{
// The computation comes from solving equation of segment:
// S(t) = x0 + t.d
// where - x0 is initial point of segment
// - d is direction of segment from x0 (|d| > 0)
// - t belongs to <0, 1> interval
//
// Than, distance from a segment to some point P can be expressed:
// D(t) = |x0 + t.d - P|^2
// which is distance from any point on segment. Minimization
// of this function brings distance from P to segment.
// Minimization of D(t) leads to simple quadratic equation that's
// solving is straightforward.
//
// Bonus of this method is witness point for free.

float dist, t;
b3Float4 d, a;

// direction of segment
b3MprVec3Sub2(&d, b, x0);

// precompute vector from P to x0
b3MprVec3Sub2(&a, x0, P);

t = -1.f * b3MprVec3Dot(&a, &d);
t /= b3MprVec3Len2(&d);

if (t < 0.f || b3MprIsZero(t)){
dist = b3MprVec3Dist2(x0, P);
if (witness)
b3MprVec3Copy(witness, x0);
}else if (t > 1.f || b3MprEq(t, 1.f)){
dist = b3MprVec3Dist2(b, P);
if (witness)
b3MprVec3Copy(witness, b);
}else{
if (witness){
b3MprVec3Copy(witness, &d);
b3MprVec3Scale(witness, t);
b3MprVec3Add(witness, x0);
dist = b3MprVec3Dist2(witness, P);
}else{
// recycling variables
b3MprVec3Scale(&d, t);
b3MprVec3Add(&d, &a);
dist = b3MprVec3Len2(&d);
}
}

return dist;
}


inline float b3MprVec3PointTriDist2(const b3Float4 *P,
const b3Float4 *x0, const b3Float4 *B,
const b3Float4 *C,
b3Float4 *witness)
{
// Computation comes from analytic expression for triangle (x0, B, C)
// T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and
// Then equation for distance is:
// D(s, t) = | T(s, t) - P |^2
// This leads to minimization of quadratic function of two variables.
// The solution from is taken only if s is between 0 and 1, t is
// between 0 and 1 and t + s < 1, otherwise distance from segment is
// computed.

b3Float4 d1, d2, a;
float u, v, w, p, q, r;
float s, t, dist, dist2;
b3Float4 witness2;

b3MprVec3Sub2(&d1, B, x0);
b3MprVec3Sub2(&d2, C, x0);
b3MprVec3Sub2(&a, x0, P);

u = b3MprVec3Dot(&a, &a);
v = b3MprVec3Dot(&d1, &d1);
w = b3MprVec3Dot(&d2, &d2);
p = b3MprVec3Dot(&a, &d1);
q = b3MprVec3Dot(&a, &d2);
r = b3MprVec3Dot(&d1, &d2);

s = (q * r - w * p) / (w * v - r * r);
t = (-s * r - q) / w;

if ((b3MprIsZero(s) || s > 0.f)
&& (b3MprEq(s, 1.f) || s < 1.f)
&& (b3MprIsZero(t) || t > 0.f)
&& (b3MprEq(t, 1.f) || t < 1.f)
&& (b3MprEq(t + s, 1.f) || t + s < 1.f)){

if (witness){
b3MprVec3Scale(&d1, s);
b3MprVec3Scale(&d2, t);
b3MprVec3Copy(witness, x0);
b3MprVec3Add(witness, &d1);
b3MprVec3Add(witness, &d2);

dist = b3MprVec3Dist2(witness, P);
}else{
dist = s * s * v;
dist += t * t * w;
dist += 2.f * s * t * r;
dist += 2.f * s * p;
dist += 2.f * t * q;
dist += u;
}
}else{
dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);

dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);
if (dist2 < dist){
dist = dist2;
if (witness)
b3MprVec3Copy(witness, &witness2);
}

dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);
if (dist2 < dist){
dist = dist2;
if (witness)
b3MprVec3Copy(witness, &witness2);
}
}

return dist;
}


B3_STATIC void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
b3ConstArray(b3Collidable_t) cpuCollidables,
b3ConstArray(b3Float4) cpuVertices,
__global b3Float4* sepAxis,
b3MprSimplex_t *portal,
float *depth, b3Float4 *pdir, b3Float4 *pos)
{
b3Float4 dir;
b3MprSupport_t v4;
unsigned long iterations;

b3Float4 zero = b3MakeFloat4(0,0,0,0);
b3Float4* b3mpr_vec3_origin = &zero;


iterations = 1UL;
for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)
//while (1)
{
// compute portal direction and obtain next support point
b3PortalDir(portal, &dir);
b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);


// reached tolerance -> find penetration info
if (portalReachTolerance(portal, &v4, &dir)
|| iterations ==B3_MPR_MAX_ITERATIONS)
{
*depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);
*depth = B3_MPR_SQRT(*depth);
if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))
{
*pdir = dir;
}
b3MprVec3Normalize(pdir);
// barycentric coordinates:
b3FindPos(portal, pos);


return;
}

b3ExpandPortal(portal, &v4);

iterations++;
}
}

B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)
{
// Touching contact on portal's v1 - so depth is zero and direction
// is unimportant and pos can be guessed
*depth = 0.f;
b3Float4 zero = b3MakeFloat4(0,0,0,0);
b3Float4* b3mpr_vec3_origin = &zero;


b3MprVec3Copy(dir, b3mpr_vec3_origin);

b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);
b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);
b3MprVec3Scale(pos, 0.5);
}

B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,
float *depth, b3Float4 *dir, b3Float4 *pos)
{
// Origin lies on v0-v1 segment.
// Depth is distance to v1, direction also and position must be
// computed

b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);
b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);
b3MprVec3Scale(pos, 0.5f);

b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);
*depth = B3_MPR_SQRT(b3MprVec3Len2(dir));
b3MprVec3Normalize(dir);
}



inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,
b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,
b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData,
b3ConstArray(b3Collidable_t) cpuCollidables,
b3ConstArray(b3Float4) cpuVertices,
__global b3Float4* sepAxis,
__global int* hasSepAxis,
float *depthOut, b3Float4* dirOut, b3Float4* posOut)
{
b3MprSimplex_t portal;

// if (!hasSepAxis[pairIndex])
// return -1;
hasSepAxis[pairIndex] = 0;
int res;

// Phase 1: Portal discovery
res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);
//sepAxis[pairIndex] = *pdir;//or -dir?

switch (res)
{
case 0:
{
// Phase 2: Portal refinement
res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);
if (res < 0)
return -1;

// Phase 3. Penetration info
b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);
hasSepAxis[pairIndex] = 1;
sepAxis[pairIndex] = -*dirOut;
break;
}
case 1:
{
// Touching contact on portal's v1.
b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);
break;
}
case 2:
{
b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);
break;
}
default:
{
hasSepAxis[pairIndex]=0;
//if (res < 0)
//{
// Origin isn't inside portal - no collision.
return -1;
//}
}
};
return 0;
};



#endif //B3_MPR_PENETRATION_H

+ 196
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h View File

@@ -0,0 +1,196 @@

#ifndef B3_NEW_CONTACT_REDUCTION_H
#define B3_NEW_CONTACT_REDUCTION_H

#include "Bullet3Common/shared/b3Float4.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"

#define GET_NPOINTS(x) (x).m_worldNormalOnB.w


int b3ExtractManifoldSequentialGlobal(__global const b3Float4* p, int nPoints, b3Float4ConstArg nearNormal, b3Int4* contactIdx)
{
if( nPoints == 0 )
return 0;
if (nPoints <=4)
return nPoints;
if (nPoints >64)
nPoints = 64;
b3Float4 center = b3MakeFloat4(0,0,0,0);
{
for (int i=0;i<nPoints;i++)
center += p[i];
center /= (float)nPoints;
}
// sample 4 directions
b3Float4 aVector = p[0] - center;
b3Float4 u = b3Cross( nearNormal, aVector );
b3Float4 v = b3Cross( nearNormal, u );
u = b3Normalized( u );
v = b3Normalized( v );
//keep point with deepest penetration
float minW= FLT_MAX;
int minIndex=-1;
b3Float4 maxDots;
maxDots.x = FLT_MIN;
maxDots.y = FLT_MIN;
maxDots.z = FLT_MIN;
maxDots.w = FLT_MIN;
// idx, distance
for(int ie = 0; ie<nPoints; ie++ )
{
if (p[ie].w<minW)
{
minW = p[ie].w;
minIndex=ie;
}
float f;
b3Float4 r = p[ie]-center;
f = b3Dot( u, r );
if (f<maxDots.x)
{
maxDots.x = f;
contactIdx[0].x = ie;
}
f = b3Dot( -u, r );
if (f<maxDots.y)
{
maxDots.y = f;
contactIdx[0].y = ie;
}
f = b3Dot( v, r );
if (f<maxDots.z)
{
maxDots.z = f;
contactIdx[0].z = ie;
}
f = b3Dot( -v, r );
if (f<maxDots.w)
{
maxDots.w = f;
contactIdx[0].w = ie;
}
}
if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
{
//replace the first contact with minimum (todo: replace contact with least penetration)
contactIdx[0].x = minIndex;
}
return 4;
}

__kernel void b3NewContactReductionKernel( __global b3Int4* pairs,
__global const b3RigidBodyData_t* rigidBodies,
__global const b3Float4* separatingNormals,
__global const int* hasSeparatingAxis,
__global struct b3Contact4Data* globalContactsOut,
__global b3Int4* clippingFaces,
__global b3Float4* worldVertsB2,
volatile __global int* nGlobalContactsOut,
int vertexFaceCapacity,
int contactCapacity,
int numPairs,
int pairIndex
)
{
// int i = get_global_id(0);
//int pairIndex = i;
int i = pairIndex;

b3Int4 contactIdx;
contactIdx=b3MakeInt4(0,1,2,3);
if (i<numPairs)
{
if (hasSeparatingAxis[i])
{
int nPoints = clippingFaces[pairIndex].w;
if (nPoints>0)
{

__global b3Float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];
b3Float4 normal = -separatingNormals[i];
int nReducedContacts = b3ExtractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);
int dstIdx;
dstIdx = b3AtomicInc( nGlobalContactsOut);
//#if 0
b3Assert(dstIdx < contactCapacity);
if (dstIdx < contactCapacity)
{

__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];
c->m_worldNormalOnB = -normal;
c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);
c->m_batchIdx = pairIndex;
int bodyA = pairs[pairIndex].x;
int bodyB = pairs[pairIndex].y;

pairs[pairIndex].w = dstIdx;

c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;
c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;
c->m_childIndexA =-1;
c->m_childIndexB =-1;

switch (nReducedContacts)
{
case 4:
c->m_worldPosB[3] = pointsIn[contactIdx.w];
case 3:
c->m_worldPosB[2] = pointsIn[contactIdx.z];
case 2:
c->m_worldPosB[1] = pointsIn[contactIdx.y];
case 1:
c->m_worldPosB[0] = pointsIn[contactIdx.x];
default:
{
}
};
GET_NPOINTS(*c) = nReducedContacts;
}
//#endif
}// if (numContactsOut>0)
}// if (hasSeparatingAxis[i])
}// if (i<numPairs)

}
#endif

+ 90
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h View File

@@ -0,0 +1,90 @@


#ifndef B3_QUANTIZED_BVH_NODE_H
#define B3_QUANTIZED_BVH_NODE_H

#include "Bullet3Common/shared/b3Float4.h"

#define B3_MAX_NUM_PARTS_IN_BITS 10

///b3QuantizedBvhNodeData is a compressed aabb node, 16 bytes.
///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).
typedef struct b3QuantizedBvhNodeData b3QuantizedBvhNodeData_t;

struct b3QuantizedBvhNodeData
{
//12 bytes
unsigned short int m_quantizedAabbMin[3];
unsigned short int m_quantizedAabbMax[3];
//4 bytes
int m_escapeIndexOrTriangleIndex;
};

inline int b3GetTriangleIndex(const b3QuantizedBvhNodeData* rootNode)
{
unsigned int x=0;
unsigned int y = (~(x&0))<<(31-B3_MAX_NUM_PARTS_IN_BITS);
// Get only the lower bits where the triangle index is stored
return (rootNode->m_escapeIndexOrTriangleIndex&~(y));
}

inline int b3IsLeaf(const b3QuantizedBvhNodeData* rootNode)
{
//skipindex is negative (internal node), triangleindex >=0 (leafnode)
return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;
}
inline int b3GetEscapeIndex(const b3QuantizedBvhNodeData* rootNode)
{
return -rootNode->m_escapeIndexOrTriangleIndex;
}

inline void b3QuantizeWithClamp(unsigned short* out, b3Float4ConstArg point2,int isMax, b3Float4ConstArg bvhAabbMin, b3Float4ConstArg bvhAabbMax, b3Float4ConstArg bvhQuantization)
{
b3Float4 clampedPoint = b3MaxFloat4(point2,bvhAabbMin);
clampedPoint = b3MinFloat4 (clampedPoint, bvhAabbMax);

b3Float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;
if (isMax)
{
out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));
out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));
out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));
} else
{
out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));
out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));
out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));
}

}


inline int b3TestQuantizedAabbAgainstQuantizedAabbSlow(
const unsigned short int* aabbMin1,
const unsigned short int* aabbMax1,
const unsigned short int* aabbMin2,
const unsigned short int* aabbMax2)
{
//int overlap = 1;
if (aabbMin1[0] > aabbMax2[0])
return 0;
if (aabbMax1[0] < aabbMin2[0])
return 0;
if (aabbMin1[1] > aabbMax2[1])
return 0;
if (aabbMax1[1] < aabbMin2[1])
return 0;
if (aabbMin1[2] > aabbMax2[2])
return 0;
if (aabbMax1[2] < aabbMin2[2])
return 0;
return 1;
//overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;
//overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;
//overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;
//return overlap;
}


#endif //B3_QUANTIZED_BVH_NODE_H

+ 97
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3ReduceContacts.h View File

@@ -0,0 +1,97 @@
#ifndef B3_REDUCE_CONTACTS_H
#define B3_REDUCE_CONTACTS_H

inline int b3ReduceContacts(const b3Float4* p, int nPoints, const b3Float4& nearNormal, b3Int4* contactIdx)
{
if( nPoints == 0 )
return 0;
if (nPoints <=4)
return nPoints;
if (nPoints >64)
nPoints = 64;
b3Float4 center = b3MakeFloat4(0,0,0,0);
{
for (int i=0;i<nPoints;i++)
center += p[i];
center /= (float)nPoints;
}
// sample 4 directions
b3Float4 aVector = p[0] - center;
b3Float4 u = b3Cross3( nearNormal, aVector );
b3Float4 v = b3Cross3( nearNormal, u );
u = b3FastNormalized3( u );
v = b3FastNormalized3( v );
//keep point with deepest penetration
float minW= FLT_MAX;
int minIndex=-1;
b3Float4 maxDots;
maxDots.x = FLT_MIN;
maxDots.y = FLT_MIN;
maxDots.z = FLT_MIN;
maxDots.w = FLT_MIN;
// idx, distance
for(int ie = 0; ie<nPoints; ie++ )
{
if (p[ie].w<minW)
{
minW = p[ie].w;
minIndex=ie;
}
float f;
b3Float4 r = p[ie]-center;
f = b3Dot3F4( u, r );
if (f<maxDots.x)
{
maxDots.x = f;
contactIdx[0].x = ie;
}
f = b3Dot3F4( -u, r );
if (f<maxDots.y)
{
maxDots.y = f;
contactIdx[0].y = ie;
}
f = b3Dot3F4( v, r );
if (f<maxDots.z)
{
maxDots.z = f;
contactIdx[0].z = ie;
}
f = b3Dot3F4( -v, r );
if (f<maxDots.w)
{
maxDots.w = f;
contactIdx[0].w = ie;
}
}
if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
{
//replace the first contact with minimum (todo: replace contact with least penetration)
contactIdx[0].x = minIndex;
}
return 4;
}

#endif //B3_REDUCE_CONTACTS_H

+ 34
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h View File

@@ -0,0 +1,34 @@
#ifndef B3_RIGIDBODY_DATA_H
#define B3_RIGIDBODY_DATA_H

#include "Bullet3Common/shared/b3Float4.h"
#include "Bullet3Common/shared/b3Quat.h"
#include "Bullet3Common/shared/b3Mat3x3.h"

typedef struct b3RigidBodyData b3RigidBodyData_t;


struct b3RigidBodyData
{
b3Float4 m_pos;
b3Quat m_quat;
b3Float4 m_linVel;
b3Float4 m_angVel;

int m_collidableIdx;
float m_invMass;
float m_restituitionCoeff;
float m_frictionCoeff;
};

typedef struct b3InertiaData b3InertiaData_t;

struct b3InertiaData
{
b3Mat3x3 m_invInertiaWorld;
b3Mat3x3 m_initInvInertia;
};


#endif //B3_RIGIDBODY_DATA_H

+ 40
- 0
src/bullet/Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h View File

@@ -0,0 +1,40 @@
#ifndef B3_UPDATE_AABBS_H
#define B3_UPDATE_AABBS_H



#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"



void b3ComputeWorldAabb( int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs)
{
__global const b3RigidBodyData_t* body = &bodies[bodyId];

b3Float4 position = body->m_pos;
b3Quat orientation = body->m_quat;
int collidableIndex = body->m_collidableIdx;
int shapeIndex = collidables[collidableIndex].m_shapeIndex;
if (shapeIndex>=0)
{
b3Aabb_t localAabb = localShapeAABB[collidableIndex];
b3Aabb_t worldAabb;
b3Float4 aabbAMinOut,aabbAMaxOut;
float margin = 0.f;
b3TransformAabb2(localAabb.m_minVec,localAabb.m_maxVec,margin,position,orientation,&aabbAMinOut,&aabbAMaxOut);
worldAabb.m_minVec =aabbAMinOut;
worldAabb.m_minIndices[3] = bodyId;
worldAabb.m_maxVec = aabbAMaxOut;
worldAabb.m_signedMaxIndices[3] = body[bodyId].m_invMass==0.f? 0 : 1;
worldAabbs[bodyId] = worldAabb;
}
}

#endif //B3_UPDATE_AABBS_H

+ 181
- 0
src/bullet/Bullet3Common/b3AlignedAllocator.cpp View File

@@ -0,0 +1,181 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#include "b3AlignedAllocator.h"

int b3g_numAlignedAllocs = 0;
int b3g_numAlignedFree = 0;
int b3g_totalBytesAlignedAllocs = 0;//detect memory leaks

static void *b3AllocDefault(size_t size)
{
return malloc(size);
}

static void b3FreeDefault(void *ptr)
{
free(ptr);
}

static b3AllocFunc* b3s_allocFunc = b3AllocDefault;
static b3FreeFunc* b3s_freeFunc = b3FreeDefault;



#if defined (B3_HAS_ALIGNED_ALLOCATOR)
#include <malloc.h>
static void *b3AlignedAllocDefault(size_t size, int alignment)
{
return _aligned_malloc(size, (size_t)alignment);
}

static void b3AlignedFreeDefault(void *ptr)
{
_aligned_free(ptr);
}
#elif defined(__CELLOS_LV2__)
#include <stdlib.h>

static inline void *b3AlignedAllocDefault(size_t size, int alignment)
{
return memalign(alignment, size);
}

static inline void b3AlignedFreeDefault(void *ptr)
{
free(ptr);
}
#else





static inline void *b3AlignedAllocDefault(size_t size, int alignment)
{
void *ret;
char *real;
real = (char *)b3s_allocFunc(size + sizeof(void *) + (alignment-1));
if (real) {
ret = b3AlignPointer(real + sizeof(void *),alignment);
*((void **)(ret)-1) = (void *)(real);
} else {
ret = (void *)(real);
}
return (ret);
}

static inline void b3AlignedFreeDefault(void *ptr)
{
void* real;

if (ptr) {
real = *((void **)(ptr)-1);
b3s_freeFunc(real);
}
}
#endif


static b3AlignedAllocFunc* b3s_alignedAllocFunc = b3AlignedAllocDefault;
static b3AlignedFreeFunc* b3s_alignedFreeFunc = b3AlignedFreeDefault;

void b3AlignedAllocSetCustomAligned(b3AlignedAllocFunc *allocFunc, b3AlignedFreeFunc *freeFunc)
{
b3s_alignedAllocFunc = allocFunc ? allocFunc : b3AlignedAllocDefault;
b3s_alignedFreeFunc = freeFunc ? freeFunc : b3AlignedFreeDefault;
}

void b3AlignedAllocSetCustom(b3AllocFunc *allocFunc, b3FreeFunc *freeFunc)
{
b3s_allocFunc = allocFunc ? allocFunc : b3AllocDefault;
b3s_freeFunc = freeFunc ? freeFunc : b3FreeDefault;
}

#ifdef B3_DEBUG_MEMORY_ALLOCATIONS
//this generic allocator provides the total allocated number of bytes
#include <stdio.h>

void* b3AlignedAllocInternal (size_t size, int alignment,int line,char* filename)
{
void *ret;
char *real;

b3g_totalBytesAlignedAllocs += size;
b3g_numAlignedAllocs++;

real = (char *)b3s_allocFunc(size + 2*sizeof(void *) + (alignment-1));
if (real) {
ret = (void*) b3AlignPointer(real + 2*sizeof(void *), alignment);
*((void **)(ret)-1) = (void *)(real);
*((int*)(ret)-2) = size;

} else {
ret = (void *)(real);//??
}

b3Printf("allocation#%d at address %x, from %s,line %d, size %d\n",b3g_numAlignedAllocs,real, filename,line,size);

int* ptr = (int*)ret;
*ptr = 12;
return (ret);
}

void b3AlignedFreeInternal (void* ptr,int line,char* filename)
{

void* real;
b3g_numAlignedFree++;

if (ptr) {
real = *((void **)(ptr)-1);
int size = *((int*)(ptr)-2);
b3g_totalBytesAlignedAllocs -= size;

b3Printf("free #%d at address %x, from %s,line %d, size %d\n",b3g_numAlignedFree,real, filename,line,size);

b3s_freeFunc(real);
} else
{
b3Printf("NULL ptr\n");
}
}

#else //B3_DEBUG_MEMORY_ALLOCATIONS

void* b3AlignedAllocInternal (size_t size, int alignment)
{
b3g_numAlignedAllocs++;
void* ptr;
ptr = b3s_alignedAllocFunc(size, alignment);
// b3Printf("b3AlignedAllocInternal %d, %x\n",size,ptr);
return ptr;
}

void b3AlignedFreeInternal (void* ptr)
{
if (!ptr)
{
return;
}

b3g_numAlignedFree++;
// b3Printf("b3AlignedFreeInternal %x\n",ptr);
b3s_alignedFreeFunc(ptr);
}

#endif //B3_DEBUG_MEMORY_ALLOCATIONS


+ 107
- 0
src/bullet/Bullet3Common/b3AlignedAllocator.h View File

@@ -0,0 +1,107 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_ALIGNED_ALLOCATOR
#define B3_ALIGNED_ALLOCATOR

///we probably replace this with our own aligned memory allocator
///so we replace _aligned_malloc and _aligned_free with our own
///that is better portable and more predictable

#include "b3Scalar.h"
//#define B3_DEBUG_MEMORY_ALLOCATIONS 1
#ifdef B3_DEBUG_MEMORY_ALLOCATIONS

#define b3AlignedAlloc(a,b) \
b3AlignedAllocInternal(a,b,__LINE__,__FILE__)

#define b3AlignedFree(ptr) \
b3AlignedFreeInternal(ptr,__LINE__,__FILE__)

void* b3AlignedAllocInternal (size_t size, int alignment,int line,char* filename);

void b3AlignedFreeInternal (void* ptr,int line,char* filename);

#else
void* b3AlignedAllocInternal (size_t size, int alignment);
void b3AlignedFreeInternal (void* ptr);

#define b3AlignedAlloc(size,alignment) b3AlignedAllocInternal(size,alignment)
#define b3AlignedFree(ptr) b3AlignedFreeInternal(ptr)

#endif
typedef int btSizeType;

typedef void *(b3AlignedAllocFunc)(size_t size, int alignment);
typedef void (b3AlignedFreeFunc)(void *memblock);
typedef void *(b3AllocFunc)(size_t size);
typedef void (b3FreeFunc)(void *memblock);

///The developer can let all Bullet memory allocations go through a custom memory allocator, using b3AlignedAllocSetCustom
void b3AlignedAllocSetCustom(b3AllocFunc *allocFunc, b3FreeFunc *freeFunc);
///If the developer has already an custom aligned allocator, then b3AlignedAllocSetCustomAligned can be used. The default aligned allocator pre-allocates extra memory using the non-aligned allocator, and instruments it.
void b3AlignedAllocSetCustomAligned(b3AlignedAllocFunc *allocFunc, b3AlignedFreeFunc *freeFunc);


///The b3AlignedAllocator is a portable class for aligned memory allocations.
///Default implementations for unaligned and aligned allocations can be overridden by a custom allocator using b3AlignedAllocSetCustom and b3AlignedAllocSetCustomAligned.
template < typename T , unsigned Alignment >
class b3AlignedAllocator {
typedef b3AlignedAllocator< T , Alignment > self_type;
public:

//just going down a list:
b3AlignedAllocator() {}
/*
b3AlignedAllocator( const self_type & ) {}
*/

template < typename Other >
b3AlignedAllocator( const b3AlignedAllocator< Other , Alignment > & ) {}

typedef const T* const_pointer;
typedef const T& const_reference;
typedef T* pointer;
typedef T& reference;
typedef T value_type;

pointer address ( reference ref ) const { return &ref; }
const_pointer address ( const_reference ref ) const { return &ref; }
pointer allocate ( btSizeType n , const_pointer * hint = 0 ) {
(void)hint;
return reinterpret_cast< pointer >(b3AlignedAlloc( sizeof(value_type) * n , Alignment ));
}
void construct ( pointer ptr , const value_type & value ) { new (ptr) value_type( value ); }
void deallocate( pointer ptr ) {
b3AlignedFree( reinterpret_cast< void * >( ptr ) );
}
void destroy ( pointer ptr ) { ptr->~value_type(); }

template < typename O > struct rebind {
typedef b3AlignedAllocator< O , Alignment > other;
};
template < typename O >
self_type & operator=( const b3AlignedAllocator< O , Alignment > & ) { return *this; }

friend bool operator==( const self_type & , const self_type & ) { return true; }
};



#endif //B3_ALIGNED_ALLOCATOR


+ 517
- 0
src/bullet/Bullet3Common/b3AlignedObjectArray.h View File

@@ -0,0 +1,517 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/


#ifndef B3_OBJECT_ARRAY__
#define B3_OBJECT_ARRAY__

#include "b3Scalar.h" // has definitions like B3_FORCE_INLINE
#include "b3AlignedAllocator.h"

///If the platform doesn't support placement new, you can disable B3_USE_PLACEMENT_NEW
///then the b3AlignedObjectArray doesn't support objects with virtual methods, and non-trivial constructors/destructors
///You can enable B3_USE_MEMCPY, then swapping elements in the array will use memcpy instead of operator=
///see discussion here: http://continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1231 and
///http://www.continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1240

#define B3_USE_PLACEMENT_NEW 1
//#define B3_USE_MEMCPY 1 //disable, because it is cumbersome to find out for each platform where memcpy is defined. It can be in <memory.h> or <string.h> or otherwise...
#define B3_ALLOW_ARRAY_COPY_OPERATOR // enabling this can accidently perform deep copies of data if you are not careful

#ifdef B3_USE_MEMCPY
#include <memory.h>
#include <string.h>
#endif //B3_USE_MEMCPY

#ifdef B3_USE_PLACEMENT_NEW
#include <new> //for placement new
#endif //B3_USE_PLACEMENT_NEW


///The b3AlignedObjectArray template class uses a subset of the stl::vector interface for its methods
///It is developed to replace stl::vector to avoid portability issues, including STL alignment issues to add SIMD/SSE data
template <typename T>
//template <class T>
class b3AlignedObjectArray
{
b3AlignedAllocator<T , 16> m_allocator;

int m_size;
int m_capacity;
T* m_data;
//PCK: added this line
bool m_ownsMemory;

#ifdef B3_ALLOW_ARRAY_COPY_OPERATOR
public:
B3_FORCE_INLINE b3AlignedObjectArray<T>& operator=(const b3AlignedObjectArray<T> &other)
{
copyFromArray(other);
return *this;
}
#else//B3_ALLOW_ARRAY_COPY_OPERATOR
private:
B3_FORCE_INLINE b3AlignedObjectArray<T>& operator=(const b3AlignedObjectArray<T> &other);
#endif//B3_ALLOW_ARRAY_COPY_OPERATOR

protected:
B3_FORCE_INLINE int allocSize(int size)
{
return (size ? size*2 : 1);
}
B3_FORCE_INLINE void copy(int start,int end, T* dest) const
{
int i;
for (i=start;i<end;++i)
#ifdef B3_USE_PLACEMENT_NEW
new (&dest[i]) T(m_data[i]);
#else
dest[i] = m_data[i];
#endif //B3_USE_PLACEMENT_NEW
}

B3_FORCE_INLINE void init()
{
//PCK: added this line
m_ownsMemory = true;
m_data = 0;
m_size = 0;
m_capacity = 0;
}
B3_FORCE_INLINE void destroy(int first,int last)
{
int i;
for (i=first; i<last;i++)
{
m_data[i].~T();
}
}

B3_FORCE_INLINE void* allocate(int size)
{
if (size)
return m_allocator.allocate(size);
return 0;
}

B3_FORCE_INLINE void deallocate()
{
if(m_data) {
//PCK: enclosed the deallocation in this block
if (m_ownsMemory)
{
m_allocator.deallocate(m_data);
}
m_data = 0;
}
}



public:
b3AlignedObjectArray()
{
init();
}

~b3AlignedObjectArray()
{
clear();
}

///Generally it is best to avoid using the copy constructor of an b3AlignedObjectArray, and use a (const) reference to the array instead.
b3AlignedObjectArray(const b3AlignedObjectArray& otherArray)
{
init();

int otherSize = otherArray.size();
resize (otherSize);
otherArray.copy(0, otherSize, m_data);
}

/// return the number of elements in the array
B3_FORCE_INLINE int size() const
{
return m_size;
}
B3_FORCE_INLINE const T& at(int n) const
{
b3Assert(n>=0);
b3Assert(n<size());
return m_data[n];
}

B3_FORCE_INLINE T& at(int n)
{
b3Assert(n>=0);
b3Assert(n<size());
return m_data[n];
}

B3_FORCE_INLINE const T& operator[](int n) const
{
b3Assert(n>=0);
b3Assert(n<size());
return m_data[n];
}

B3_FORCE_INLINE T& operator[](int n)
{
b3Assert(n>=0);
b3Assert(n<size());
return m_data[n];
}

///clear the array, deallocated memory. Generally it is better to use array.resize(0), to reduce performance overhead of run-time memory (de)allocations.
B3_FORCE_INLINE void clear()
{
destroy(0,size());
deallocate();
init();
}

B3_FORCE_INLINE void pop_back()
{
b3Assert(m_size>0);
m_size--;
m_data[m_size].~T();
}


///resize changes the number of elements in the array. If the new size is larger, the new elements will be constructed using the optional second argument.
///when the new number of elements is smaller, the destructor will be called, but memory will not be freed, to reduce performance overhead of run-time memory (de)allocations.
B3_FORCE_INLINE void resizeNoInitialize(int newsize)
{
int curSize = size();

if (newsize < curSize)
{
} else
{
if (newsize > size())
{
reserve(newsize);
}
//leave this uninitialized
}
m_size = newsize;
}
B3_FORCE_INLINE void resize(int newsize, const T& fillData=T())
{
int curSize = size();

if (newsize < curSize)
{
for(int i = newsize; i < curSize; i++)
{
m_data[i].~T();
}
} else
{
if (newsize > size())
{
reserve(newsize);
}
#ifdef B3_USE_PLACEMENT_NEW
for (int i=curSize;i<newsize;i++)
{
new ( &m_data[i]) T(fillData);
}
#endif //B3_USE_PLACEMENT_NEW

}

m_size = newsize;
}
B3_FORCE_INLINE T& expandNonInitializing( )
{
int sz = size();
if( sz == capacity() )
{
reserve( allocSize(size()) );
}
m_size++;

return m_data[sz];
}


B3_FORCE_INLINE T& expand( const T& fillValue=T())
{
int sz = size();
if( sz == capacity() )
{
reserve( allocSize(size()) );
}
m_size++;
#ifdef B3_USE_PLACEMENT_NEW
new (&m_data[sz]) T(fillValue); //use the in-place new (not really allocating heap memory)
#endif

return m_data[sz];
}


B3_FORCE_INLINE void push_back(const T& _Val)
{
int sz = size();
if( sz == capacity() )
{
reserve( allocSize(size()) );
}
#ifdef B3_USE_PLACEMENT_NEW
new ( &m_data[m_size] ) T(_Val);
#else
m_data[size()] = _Val;
#endif //B3_USE_PLACEMENT_NEW

m_size++;
}

/// return the pre-allocated (reserved) elements, this is at least as large as the total number of elements,see size() and reserve()
B3_FORCE_INLINE int capacity() const
{
return m_capacity;
}
B3_FORCE_INLINE void reserve(int _Count)
{ // determine new minimum length of allocated storage
if (capacity() < _Count)
{ // not enough room, reallocate
T* s = (T*)allocate(_Count);
b3Assert(s);
if (s==0)
{
b3Error("b3AlignedObjectArray reserve out-of-memory\n");
_Count=0;
m_size=0;
}
copy(0, size(), s);

destroy(0,size());

deallocate();
//PCK: added this line
m_ownsMemory = true;

m_data = s;
m_capacity = _Count;

}
}


class less
{
public:

bool operator() ( const T& a, const T& b )
{
return ( a < b );
}
};

template <typename L>
void quickSortInternal(const L& CompareFunc,int lo, int hi)
{
// lo is the lower index, hi is the upper index
// of the region of array a that is to be sorted
int i=lo, j=hi;
T x=m_data[(lo+hi)/2];

// partition
do
{
while (CompareFunc(m_data[i],x))
i++;
while (CompareFunc(x,m_data[j]))
j--;
if (i<=j)
{
swap(i,j);
i++; j--;
}
} while (i<=j);

// recursion
if (lo<j)
quickSortInternal( CompareFunc, lo, j);
if (i<hi)
quickSortInternal( CompareFunc, i, hi);
}


template <typename L>
void quickSort(const L& CompareFunc)
{
//don't sort 0 or 1 elements
if (size()>1)
{
quickSortInternal(CompareFunc,0,size()-1);
}
}


///heap sort from http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Sort/Heap/
template <typename L>
void downHeap(T *pArr, int k, int n, const L& CompareFunc)
{
/* PRE: a[k+1..N] is a heap */
/* POST: a[k..N] is a heap */
T temp = pArr[k - 1];
/* k has child(s) */
while (k <= n/2)
{
int child = 2*k;
if ((child < n) && CompareFunc(pArr[child - 1] , pArr[child]))
{
child++;
}
/* pick larger child */
if (CompareFunc(temp , pArr[child - 1]))
{
/* move child up */
pArr[k - 1] = pArr[child - 1];
k = child;
}
else
{
break;
}
}
pArr[k - 1] = temp;
} /*downHeap*/

void swap(int index0,int index1)
{
#ifdef B3_USE_MEMCPY
char temp[sizeof(T)];
memcpy(temp,&m_data[index0],sizeof(T));
memcpy(&m_data[index0],&m_data[index1],sizeof(T));
memcpy(&m_data[index1],temp,sizeof(T));
#else
T temp = m_data[index0];
m_data[index0] = m_data[index1];
m_data[index1] = temp;
#endif //B3_USE_PLACEMENT_NEW

}

template <typename L>
void heapSort(const L& CompareFunc)
{
/* sort a[0..N-1], N.B. 0 to N-1 */
int k;
int n = m_size;
for (k = n/2; k > 0; k--)
{
downHeap(m_data, k, n, CompareFunc);
}

/* a[1..N] is now a heap */
while ( n>=1 )
{
swap(0,n-1); /* largest of a[0..n-1] */


n = n - 1;
/* restore a[1..i-1] heap */
downHeap(m_data, 1, n, CompareFunc);
}
}

///non-recursive binary search, assumes sorted array
int findBinarySearch(const T& key) const
{
int first = 0;
int last = size()-1;

//assume sorted array
while (first <= last) {
int mid = (first + last) / 2; // compute mid point.
if (key > m_data[mid])
first = mid + 1; // repeat search in top half.
else if (key < m_data[mid])
last = mid - 1; // repeat search in bottom half.
else
return mid; // found it. return position /////
}
return size(); // failed to find key
}


int findLinearSearch(const T& key) const
{
int index=size();
int i;

for (i=0;i<size();i++)
{
if (m_data[i] == key)
{
index = i;
break;
}
}
return index;
}

void remove(const T& key)
{

int findIndex = findLinearSearch(key);
if (findIndex<size())
{
swap( findIndex,size()-1);
pop_back();
}
}

//PCK: whole function
void initializeFromBuffer(void *buffer, int size, int capacity)
{
clear();
m_ownsMemory = false;
m_data = (T*)buffer;
m_size = size;
m_capacity = capacity;
}

void copyFromArray(const b3AlignedObjectArray& otherArray)
{
int otherSize = otherArray.size();
resize (otherSize);
otherArray.copy(0, otherSize, m_data);
}

};

#endif //B3_OBJECT_ARRAY__

+ 106
- 0
src/bullet/Bullet3Common/b3CommandLineArgs.h View File

@@ -0,0 +1,106 @@
#ifndef COMMAND_LINE_ARGS_H
#define COMMAND_LINE_ARGS_H

/******************************************************************************
* Command-line parsing
******************************************************************************/
#include <map>
#include <algorithm>
#include <string>
#include <cstring>
#include <sstream>
class b3CommandLineArgs
{
protected:

std::map<std::string, std::string> pairs;

public:

// Constructor
b3CommandLineArgs(int argc, char **argv)
{
addArgs(argc,argv);
}

void addArgs(int argc, char**argv)
{
using namespace std;

for (int i = 1; i < argc; i++)
{
std::string arg = argv[i];

if ((arg[0] != '-') || (arg[1] != '-')) {
continue;
}

std::string::size_type pos;
std::string key, val;
if ((pos = arg.find( '=')) == std::string::npos) {
key = std::string(arg, 2, arg.length() - 2);
val = "";
} else {
key = std::string(arg, 2, pos - 2);
val = std::string(arg, pos + 1, arg.length() - 1);
}
//only add new keys, don't replace existing
if(pairs.find(key) == pairs.end())
{
pairs[key] = val;
}
}
}

bool CheckCmdLineFlag(const char* arg_name)
{
using namespace std;
map<std::string, std::string>::iterator itr;
if ((itr = pairs.find(arg_name)) != pairs.end()) {
return true;
}
return false;
}

template <typename T>
bool GetCmdLineArgument(const char *arg_name, T &val);

int ParsedArgc()
{
return pairs.size();
}
};

template <typename T>
inline bool b3CommandLineArgs::GetCmdLineArgument(const char *arg_name, T &val)
{
using namespace std;
map<std::string, std::string>::iterator itr;
if ((itr = pairs.find(arg_name)) != pairs.end()) {
istringstream strstream(itr->second);
strstream >> val;
return true;
}
return false;
}

template <>
inline bool b3CommandLineArgs::GetCmdLineArgument<char*>(const char* arg_name, char* &val)
{
using namespace std;
map<std::string, std::string>::iterator itr;
if ((itr = pairs.find(arg_name)) != pairs.end()) {

std::string s = itr->second;
val = (char*) malloc(sizeof(char) * (s.length() + 1));
std::strcpy(val, s.c_str());
return true;
} else {
val = NULL;
}
return false;
}


#endif //COMMAND_LINE_ARGS_H

+ 138
- 0
src/bullet/Bullet3Common/b3FileUtils.h View File

@@ -0,0 +1,138 @@
#ifndef B3_FILE_UTILS_H
#define B3_FILE_UTILS_H

#include <stdio.h>
#include "b3Scalar.h"
#include <stddef.h>//ptrdiff_h
#include <string.h>

struct b3FileUtils
{
b3FileUtils()
{
}
virtual ~b3FileUtils()
{
}

static bool findFile(const char* orgFileName, char* relativeFileName, int maxRelativeFileNameMaxLen)
{
FILE* f=0;
f = fopen(orgFileName,"rb");
if (f)
{
//printf("original file found: [%s]\n", orgFileName);
sprintf(relativeFileName,"%s", orgFileName);
fclose(f);
return true;
}

//printf("Trying various directories, relative to current working directory\n");
const char* prefix[]={"./","./data/","../data/","../../data/","../../../data/","../../../../data/"};
int numPrefixes = sizeof(prefix)/sizeof(const char*);
f=0;
bool fileFound = false;

for (int i=0;!f && i<numPrefixes;i++)
{
#ifdef _WIN32
sprintf_s(relativeFileName,maxRelativeFileNameMaxLen,"%s%s",prefix[i],orgFileName);
#else
sprintf(relativeFileName,"%s%s",prefix[i],orgFileName);
#endif
f = fopen(relativeFileName,"rb");
if (f)
{
fileFound = true;
break;
}
}
if (f)
{
fclose(f);
}
return fileFound;
}

static const char* strip2(const char* name, const char* pattern)
{
size_t const patlen = strlen(pattern);
size_t patcnt = 0;
const char * oriptr;
const char * patloc;
// find how many times the pattern occurs in the original string
for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen)
{
patcnt++;
}
return oriptr;
}


static int extractPath(const char* fileName, char* path, int maxPathLength)
{
const char* stripped = strip2(fileName, "/");
stripped = strip2(stripped, "\\");

ptrdiff_t len = stripped-fileName;
b3Assert((len+1)<maxPathLength);

if (len && ((len+1)<maxPathLength))
{

for (int i=0;i<len;i++)
{
path[i] = fileName[i];
}
path[len]=0;
} else
{
len = 0;
b3Assert(maxPathLength>0);
if (maxPathLength>0)
{
path[len] = 0;
}
}
return len;
}

static char toLowerChar(const char t)
{
if (t>=(char)'A' && t<=(char)'Z')
return t + ((char)'a' - (char)'A');
else
return t;
}


static void toLower(char* str)
{
int len=strlen(str);
for (int i=0;i<len;i++)
{
str[i] = toLowerChar(str[i]);
}
}


/*static const char* strip2(const char* name, const char* pattern)
{
size_t const patlen = strlen(pattern);
size_t patcnt = 0;
const char * oriptr;
const char * patloc;
// find how many times the pattern occurs in the original string
for (oriptr = name; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
{
patcnt++;
}
return oriptr;
}
*/

};
#endif //B3_FILE_UTILS_H

+ 450
- 0
src/bullet/Bullet3Common/b3HashMap.h View File

@@ -0,0 +1,450 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/


#ifndef B3_HASH_MAP_H
#define B3_HASH_MAP_H

#include "b3AlignedObjectArray.h"

///very basic hashable string implementation, compatible with b3HashMap
struct b3HashString
{
const char* m_string;
unsigned int m_hash;

B3_FORCE_INLINE unsigned int getHash()const
{
return m_hash;
}

b3HashString(const char* name)
:m_string(name)
{
/* magic numbers from http://www.isthe.com/chongo/tech/comp/fnv/ */
static const unsigned int InitialFNV = 2166136261u;
static const unsigned int FNVMultiple = 16777619u;

/* Fowler / Noll / Vo (FNV) Hash */
unsigned int hash = InitialFNV;
for(int i = 0; m_string[i]; i++)
{
hash = hash ^ (m_string[i]); /* xor the low 8 bits */
hash = hash * FNVMultiple; /* multiply by the magic number */
}
m_hash = hash;
}

int portableStringCompare(const char* src, const char* dst) const
{
int ret = 0 ;

while( ! (ret = *(unsigned char *)src - *(unsigned char *)dst) && *dst)
++src, ++dst;

if ( ret < 0 )
ret = -1 ;
else if ( ret > 0 )
ret = 1 ;

return( ret );
}

bool equals(const b3HashString& other) const
{
return (m_string == other.m_string) ||
(0==portableStringCompare(m_string,other.m_string));

}

};

const int B3_HASH_NULL=0xffffffff;


class b3HashInt
{
int m_uid;
public:
b3HashInt(int uid) :m_uid(uid)
{
}

int getUid1() const
{
return m_uid;
}

void setUid1(int uid)
{
m_uid = uid;
}

bool equals(const b3HashInt& other) const
{
return getUid1() == other.getUid1();
}
//to our success
B3_FORCE_INLINE unsigned int getHash()const
{
int key = m_uid;
// Thomas Wang's hash
key += ~(key << 15); key ^= (key >> 10); key += (key << 3); key ^= (key >> 6); key += ~(key << 11); key ^= (key >> 16);
return key;
}
};



class b3HashPtr
{

union
{
const void* m_pointer;
int m_hashValues[2];
};

public:

b3HashPtr(const void* ptr)
:m_pointer(ptr)
{
}

const void* getPointer() const
{
return m_pointer;
}

bool equals(const b3HashPtr& other) const
{
return getPointer() == other.getPointer();
}

//to our success
B3_FORCE_INLINE unsigned int getHash()const
{
const bool VOID_IS_8 = ((sizeof(void*)==8));
int key = VOID_IS_8? m_hashValues[0]+m_hashValues[1] : m_hashValues[0];
// Thomas Wang's hash
key += ~(key << 15); key ^= (key >> 10); key += (key << 3); key ^= (key >> 6); key += ~(key << 11); key ^= (key >> 16);
return key;
}

};


template <class Value>
class b3HashKeyPtr
{
int m_uid;
public:

b3HashKeyPtr(int uid) :m_uid(uid)
{
}

int getUid1() const
{
return m_uid;
}

bool equals(const b3HashKeyPtr<Value>& other) const
{
return getUid1() == other.getUid1();
}

//to our success
B3_FORCE_INLINE unsigned int getHash()const
{
int key = m_uid;
// Thomas Wang's hash
key += ~(key << 15); key ^= (key >> 10); key += (key << 3); key ^= (key >> 6); key += ~(key << 11); key ^= (key >> 16);
return key;
}

};


template <class Value>
class b3HashKey
{
int m_uid;
public:

b3HashKey(int uid) :m_uid(uid)
{
}

int getUid1() const
{
return m_uid;
}

bool equals(const b3HashKey<Value>& other) const
{
return getUid1() == other.getUid1();
}
//to our success
B3_FORCE_INLINE unsigned int getHash()const
{
int key = m_uid;
// Thomas Wang's hash
key += ~(key << 15); key ^= (key >> 10); key += (key << 3); key ^= (key >> 6); key += ~(key << 11); key ^= (key >> 16);
return key;
}
};


///The b3HashMap template class implements a generic and lightweight hashmap.
///A basic sample of how to use b3HashMap is located in Demos\BasicDemo\main.cpp
template <class Key, class Value>
class b3HashMap
{

protected:
b3AlignedObjectArray<int> m_hashTable;
b3AlignedObjectArray<int> m_next;
b3AlignedObjectArray<Value> m_valueArray;
b3AlignedObjectArray<Key> m_keyArray;

void growTables(const Key& /*key*/)
{
int newCapacity = m_valueArray.capacity();

if (m_hashTable.size() < newCapacity)
{
//grow hashtable and next table
int curHashtableSize = m_hashTable.size();

m_hashTable.resize(newCapacity);
m_next.resize(newCapacity);

int i;

for (i= 0; i < newCapacity; ++i)
{
m_hashTable[i] = B3_HASH_NULL;
}
for (i = 0; i < newCapacity; ++i)
{
m_next[i] = B3_HASH_NULL;
}

for(i=0;i<curHashtableSize;i++)
{
//const Value& value = m_valueArray[i];
//const Key& key = m_keyArray[i];

int hashValue = m_keyArray[i].getHash() & (m_valueArray.capacity()-1); // New hash value with new mask
m_next[i] = m_hashTable[hashValue];
m_hashTable[hashValue] = i;
}


}
}

public:

void insert(const Key& key, const Value& value) {
int hash = key.getHash() & (m_valueArray.capacity()-1);

//replace value if the key is already there
int index = findIndex(key);
if (index != B3_HASH_NULL)
{
m_valueArray[index]=value;
return;
}

int count = m_valueArray.size();
int oldCapacity = m_valueArray.capacity();
m_valueArray.push_back(value);
m_keyArray.push_back(key);

int newCapacity = m_valueArray.capacity();
if (oldCapacity < newCapacity)
{
growTables(key);
//hash with new capacity
hash = key.getHash() & (m_valueArray.capacity()-1);
}
m_next[count] = m_hashTable[hash];
m_hashTable[hash] = count;
}

void remove(const Key& key) {

int hash = key.getHash() & (m_valueArray.capacity()-1);

int pairIndex = findIndex(key);
if (pairIndex ==B3_HASH_NULL)
{
return;
}

// Remove the pair from the hash table.
int index = m_hashTable[hash];
b3Assert(index != B3_HASH_NULL);

int previous = B3_HASH_NULL;
while (index != pairIndex)
{
previous = index;
index = m_next[index];
}

if (previous != B3_HASH_NULL)
{
b3Assert(m_next[previous] == pairIndex);
m_next[previous] = m_next[pairIndex];
}
else
{
m_hashTable[hash] = m_next[pairIndex];
}

// We now move the last pair into spot of the
// pair being removed. We need to fix the hash
// table indices to support the move.

int lastPairIndex = m_valueArray.size() - 1;

// If the removed pair is the last pair, we are done.
if (lastPairIndex == pairIndex)
{
m_valueArray.pop_back();
m_keyArray.pop_back();
return;
}

// Remove the last pair from the hash table.
int lastHash = m_keyArray[lastPairIndex].getHash() & (m_valueArray.capacity()-1);

index = m_hashTable[lastHash];
b3Assert(index != B3_HASH_NULL);

previous = B3_HASH_NULL;
while (index != lastPairIndex)
{
previous = index;
index = m_next[index];
}

if (previous != B3_HASH_NULL)
{
b3Assert(m_next[previous] == lastPairIndex);
m_next[previous] = m_next[lastPairIndex];
}
else
{
m_hashTable[lastHash] = m_next[lastPairIndex];
}

// Copy the last pair into the remove pair's spot.
m_valueArray[pairIndex] = m_valueArray[lastPairIndex];
m_keyArray[pairIndex] = m_keyArray[lastPairIndex];

// Insert the last pair into the hash table
m_next[pairIndex] = m_hashTable[lastHash];
m_hashTable[lastHash] = pairIndex;

m_valueArray.pop_back();
m_keyArray.pop_back();

}


int size() const
{
return m_valueArray.size();
}

const Value* getAtIndex(int index) const
{
b3Assert(index < m_valueArray.size());

return &m_valueArray[index];
}

Value* getAtIndex(int index)
{
b3Assert(index < m_valueArray.size());

return &m_valueArray[index];
}

Value* operator[](const Key& key) {
return find(key);
}

const Value* find(const Key& key) const
{
int index = findIndex(key);
if (index == B3_HASH_NULL)
{
return NULL;
}
return &m_valueArray[index];
}

Value* find(const Key& key)
{
int index = findIndex(key);
if (index == B3_HASH_NULL)
{
return NULL;
}
return &m_valueArray[index];
}


int findIndex(const Key& key) const
{
unsigned int hash = key.getHash() & (m_valueArray.capacity()-1);

if (hash >= (unsigned int)m_hashTable.size())
{
return B3_HASH_NULL;
}

int index = m_hashTable[hash];
while ((index != B3_HASH_NULL) && key.equals(m_keyArray[index]) == false)
{
index = m_next[index];
}
return index;
}

void clear()
{
m_hashTable.clear();
m_next.clear();
m_valueArray.clear();
m_keyArray.clear();
}

};

#endif //B3_HASH_MAP_H

+ 160
- 0
src/bullet/Bullet3Common/b3Logging.cpp View File

@@ -0,0 +1,160 @@
/*
Copyright (c) 2013 Advanced Micro Devices, Inc.

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans

#include "b3Logging.h"

#include <stdio.h>
#include <stdarg.h>

#ifdef _WIN32
#include <windows.h>
#endif //_WIN32


void b3PrintfFuncDefault(const char* msg)
{
#ifdef _WIN32
OutputDebugStringA(msg);
#endif
printf("%s",msg);
//is this portable?
fflush(stdout);
}

void b3WarningMessageFuncDefault(const char* msg)
{
#ifdef _WIN32
OutputDebugStringA(msg);
#endif
printf("%s",msg);
//is this portable?
fflush(stdout);

}


void b3ErrorMessageFuncDefault(const char* msg)
{
#ifdef _WIN32
OutputDebugStringA(msg);
#endif
printf("%s",msg);

//is this portable?
fflush(stdout);
}



static b3PrintfFunc* b3s_printfFunc = b3PrintfFuncDefault;
static b3WarningMessageFunc* b3s_warningMessageFunc = b3WarningMessageFuncDefault;
static b3ErrorMessageFunc* b3s_errorMessageFunc = b3ErrorMessageFuncDefault;


///The developer can route b3Printf output using their own implementation
void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc)
{
b3s_printfFunc = printfFunc;
}
void b3SetCustomWarningMessageFunc(b3PrintfFunc* warningMessageFunc)
{
b3s_warningMessageFunc = warningMessageFunc;
}
void b3SetCustomErrorMessageFunc(b3PrintfFunc* errorMessageFunc)
{
b3s_errorMessageFunc = errorMessageFunc;
}

//#define B3_MAX_DEBUG_STRING_LENGTH 2048
#define B3_MAX_DEBUG_STRING_LENGTH 32768


void b3OutputPrintfVarArgsInternal(const char *str, ...)
{
char strDebug[B3_MAX_DEBUG_STRING_LENGTH]={0};
va_list argList;
va_start(argList, str);
#ifdef _MSC_VER
vsprintf_s(strDebug,B3_MAX_DEBUG_STRING_LENGTH,str,argList);
#else
vsnprintf(strDebug,B3_MAX_DEBUG_STRING_LENGTH,str,argList);
#endif
(b3s_printfFunc)(strDebug);
va_end(argList);
}
void b3OutputWarningMessageVarArgsInternal(const char *str, ...)
{
char strDebug[B3_MAX_DEBUG_STRING_LENGTH]={0};
va_list argList;
va_start(argList, str);
#ifdef _MSC_VER
vsprintf_s(strDebug,B3_MAX_DEBUG_STRING_LENGTH,str,argList);
#else
vsnprintf(strDebug,B3_MAX_DEBUG_STRING_LENGTH,str,argList);
#endif
(b3s_warningMessageFunc)(strDebug);
va_end(argList);
}
void b3OutputErrorMessageVarArgsInternal(const char *str, ...)
{
char strDebug[B3_MAX_DEBUG_STRING_LENGTH]={0};
va_list argList;
va_start(argList, str);
#ifdef _MSC_VER
vsprintf_s(strDebug,B3_MAX_DEBUG_STRING_LENGTH,str,argList);
#else
vsnprintf(strDebug,B3_MAX_DEBUG_STRING_LENGTH,str,argList);
#endif
(b3s_errorMessageFunc)(strDebug);
va_end(argList);

}


void b3EnterProfileZoneDefault(const char* name)
{
}
void b3LeaveProfileZoneDefault()
{
}
static b3EnterProfileZoneFunc* b3s_enterFunc = b3EnterProfileZoneDefault;
static b3LeaveProfileZoneFunc* b3s_leaveFunc = b3LeaveProfileZoneDefault;
void b3EnterProfileZone(const char* name)
{
(b3s_enterFunc)(name);
}
void b3LeaveProfileZone()
{
(b3s_leaveFunc)();
}

void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc)
{
b3s_enterFunc = enterFunc;
}
void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc)
{
b3s_leaveFunc = leaveFunc;
}




#ifndef _MSC_VER
#undef vsprintf_s
#endif


+ 77
- 0
src/bullet/Bullet3Common/b3Logging.h View File

@@ -0,0 +1,77 @@

#ifndef B3_LOGGING_H
#define B3_LOGGING_H

#ifdef __cplusplus
extern "C" {
#endif
///We add the do/while so that the statement "if (condition) b3Printf("test"); else {...}" would fail
///You can also customize the message by uncommenting out a different line below
#define b3Printf(...) b3OutputPrintfVarArgsInternal(__VA_ARGS__)
//#define b3Printf(...) do {b3OutputPrintfVarArgsInternal("b3Printf[%s,%d]:",__FILE__,__LINE__);b3OutputPrintfVarArgsInternal(__VA_ARGS__); } while(0)
//#define b3Printf b3OutputPrintfVarArgsInternal
//#define b3Printf(...) printf(__VA_ARGS__)
//#define b3Printf(...)

#define b3Warning(...) do {b3OutputWarningMessageVarArgsInternal("b3Warning[%s,%d]:\n",__FILE__,__LINE__);b3OutputWarningMessageVarArgsInternal(__VA_ARGS__); }while(0)
#define b3Error(...) do {b3OutputErrorMessageVarArgsInternal("b3Error[%s,%d]:\n",__FILE__,__LINE__);b3OutputErrorMessageVarArgsInternal(__VA_ARGS__); } while(0)


#ifndef B3_NO_PROFILE

void b3EnterProfileZone(const char* name);
void b3LeaveProfileZone();
#ifdef __cplusplus

class b3ProfileZone
{
public:
b3ProfileZone(const char* name)
{
b3EnterProfileZone( name );
}

~b3ProfileZone()
{
b3LeaveProfileZone();
}
};

#define B3_PROFILE( name ) b3ProfileZone __profile( name )
#endif

#else //B3_NO_PROFILE

#define B3_PROFILE( name )
#define b3StartProfile(a)
#define b3StopProfile

#endif //#ifndef B3_NO_PROFILE


typedef void (b3PrintfFunc)(const char* msg);
typedef void (b3WarningMessageFunc)(const char* msg);
typedef void (b3ErrorMessageFunc)(const char* msg);
typedef void (b3EnterProfileZoneFunc)(const char* msg);
typedef void (b3LeaveProfileZoneFunc)();

///The developer can route b3Printf output using their own implementation
void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc);
void b3SetCustomWarningMessageFunc(b3WarningMessageFunc* warningMsgFunc);
void b3SetCustomErrorMessageFunc(b3ErrorMessageFunc* errorMsgFunc);

///Set custom profile zone functions (zones can be nested)
void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc);
void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc);

///Don't use those internal functions directly, use the b3Printf or b3SetCustomPrintfFunc instead (or warning/error version)
void b3OutputPrintfVarArgsInternal(const char *str, ...);
void b3OutputWarningMessageVarArgsInternal(const char *str, ...);
void b3OutputErrorMessageVarArgsInternal(const char *str, ...);

#ifdef __cplusplus
}
#endif

#endif//B3_LOGGING_H

+ 1362
- 0
src/bullet/Bullet3Common/b3Matrix3x3.h
File diff suppressed because it is too large
View File


+ 71
- 0
src/bullet/Bullet3Common/b3MinMax.h View File

@@ -0,0 +1,71 @@
/*
Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/



#ifndef B3_GEN_MINMAX_H
#define B3_GEN_MINMAX_H

#include "b3Scalar.h"

template <class T>
B3_FORCE_INLINE const T& b3Min(const T& a, const T& b)
{
return a < b ? a : b ;
}

template <class T>
B3_FORCE_INLINE const T& b3Max(const T& a, const T& b)
{
return a > b ? a : b;
}

template <class T>
B3_FORCE_INLINE const T& b3Clamped(const T& a, const T& lb, const T& ub)
{
return a < lb ? lb : (ub < a ? ub : a);
}

template <class T>
B3_FORCE_INLINE void b3SetMin(T& a, const T& b)
{
if (b < a)
{
a = b;
}
}

template <class T>
B3_FORCE_INLINE void b3SetMax(T& a, const T& b)
{
if (a < b)
{
a = b;
}
}

template <class T>
B3_FORCE_INLINE void b3Clamp(T& a, const T& lb, const T& ub)
{
if (a < lb)
{
a = lb;
}
else if (ub < a)
{
a = ub;
}
}

#endif //B3_GEN_MINMAX_H

+ 121
- 0
src/bullet/Bullet3Common/b3PoolAllocator.h View File

@@ -0,0 +1,121 @@
/*
Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/


#ifndef _BT_POOL_ALLOCATOR_H
#define _BT_POOL_ALLOCATOR_H

#include "b3Scalar.h"
#include "b3AlignedAllocator.h"

///The b3PoolAllocator class allows to efficiently allocate a large pool of objects, instead of dynamically allocating them separately.
class b3PoolAllocator
{
int m_elemSize;
int m_maxElements;
int m_freeCount;
void* m_firstFree;
unsigned char* m_pool;

public:

b3PoolAllocator(int elemSize, int maxElements)
:m_elemSize(elemSize),
m_maxElements(maxElements)
{
m_pool = (unsigned char*) b3AlignedAlloc( static_cast<unsigned int>(m_elemSize*m_maxElements),16);

unsigned char* p = m_pool;
m_firstFree = p;
m_freeCount = m_maxElements;
int count = m_maxElements;
while (--count) {
*(void**)p = (p + m_elemSize);
p += m_elemSize;
}
*(void**)p = 0;
}

~b3PoolAllocator()
{
b3AlignedFree( m_pool);
}

int getFreeCount() const
{
return m_freeCount;
}

int getUsedCount() const
{
return m_maxElements - m_freeCount;
}

int getMaxCount() const
{
return m_maxElements;
}

void* allocate(int size)
{
// release mode fix
(void)size;
b3Assert(!size || size<=m_elemSize);
b3Assert(m_freeCount>0);
void* result = m_firstFree;
m_firstFree = *(void**)m_firstFree;
--m_freeCount;
return result;
}

bool validPtr(void* ptr)
{
if (ptr) {
if (((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize))
{
return true;
}
}
return false;
}

void freeMemory(void* ptr)
{
if (ptr) {
b3Assert((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize);

*(void**)ptr = m_firstFree;
m_firstFree = ptr;
++m_freeCount;
}
}

int getElementSize() const
{
return m_elemSize;
}

unsigned char* getPoolAddress()
{
return m_pool;
}

const unsigned char* getPoolAddress() const
{
return m_pool;
}

};

#endif //_BT_POOL_ALLOCATOR_H

+ 245
- 0
src/bullet/Bullet3Common/b3QuadWord.h View File

@@ -0,0 +1,245 @@
/*
Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/


#ifndef B3_SIMD_QUADWORD_H
#define B3_SIMD_QUADWORD_H

#include "b3Scalar.h"
#include "b3MinMax.h"





#if defined (__CELLOS_LV2) && defined (__SPU__)
#include <altivec.h>
#endif

/**@brief The b3QuadWord class is base class for b3Vector3 and b3Quaternion.
* Some issues under PS3 Linux with IBM 2.1 SDK, gcc compiler prevent from using aligned quadword.
*/
#ifndef USE_LIBSPE2
B3_ATTRIBUTE_ALIGNED16(class) b3QuadWord
#else
class b3QuadWord
#endif
{
protected:

#if defined (__SPU__) && defined (__CELLOS_LV2__)
union {
vec_float4 mVec128;
b3Scalar m_floats[4];
};
public:
vec_float4 get128() const
{
return mVec128;
}

#else //__CELLOS_LV2__ __SPU__

#if defined(B3_USE_SSE) || defined(B3_USE_NEON)
public:
union {
b3SimdFloat4 mVec128;
b3Scalar m_floats[4];
struct {b3Scalar x,y,z,w;};
};
public:
B3_FORCE_INLINE b3SimdFloat4 get128() const
{
return mVec128;
}
B3_FORCE_INLINE void set128(b3SimdFloat4 v128)
{
mVec128 = v128;
}
#else
public:
union
{
b3Scalar m_floats[4];
struct {b3Scalar x,y,z,w;};
};
#endif // B3_USE_SSE

#endif //__CELLOS_LV2__ __SPU__

public:
#if defined(B3_USE_SSE) || defined(B3_USE_NEON)

// Set Vector
B3_FORCE_INLINE b3QuadWord(const b3SimdFloat4 vec)
{
mVec128 = vec;
}

// Copy constructor
B3_FORCE_INLINE b3QuadWord(const b3QuadWord& rhs)
{
mVec128 = rhs.mVec128;
}

// Assignment Operator
B3_FORCE_INLINE b3QuadWord&
operator=(const b3QuadWord& v)
{
mVec128 = v.mVec128;
return *this;
}
#endif

/**@brief Return the x value */
B3_FORCE_INLINE const b3Scalar& getX() const { return m_floats[0]; }
/**@brief Return the y value */
B3_FORCE_INLINE const b3Scalar& getY() const { return m_floats[1]; }
/**@brief Return the z value */
B3_FORCE_INLINE const b3Scalar& getZ() const { return m_floats[2]; }
/**@brief Set the x value */
B3_FORCE_INLINE void setX(b3Scalar _x) { m_floats[0] = _x;};
/**@brief Set the y value */
B3_FORCE_INLINE void setY(b3Scalar _y) { m_floats[1] = _y;};
/**@brief Set the z value */
B3_FORCE_INLINE void setZ(b3Scalar _z) { m_floats[2] = _z;};
/**@brief Set the w value */
B3_FORCE_INLINE void setW(b3Scalar _w) { m_floats[3] = _w;};
/**@brief Return the x value */


//B3_FORCE_INLINE b3Scalar& operator[](int i) { return (&m_floats[0])[i]; }
//B3_FORCE_INLINE const b3Scalar& operator[](int i) const { return (&m_floats[0])[i]; }
///operator b3Scalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
B3_FORCE_INLINE operator b3Scalar *() { return &m_floats[0]; }
B3_FORCE_INLINE operator const b3Scalar *() const { return &m_floats[0]; }

B3_FORCE_INLINE bool operator==(const b3QuadWord& other) const
{
#ifdef B3_USE_SSE
return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
#else
return ((m_floats[3]==other.m_floats[3]) &&
(m_floats[2]==other.m_floats[2]) &&
(m_floats[1]==other.m_floats[1]) &&
(m_floats[0]==other.m_floats[0]));
#endif
}

B3_FORCE_INLINE bool operator!=(const b3QuadWord& other) const
{
return !(*this == other);
}

/**@brief Set x,y,z and zero w
* @param x Value of x
* @param y Value of y
* @param z Value of z
*/
B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
{
m_floats[0]=_x;
m_floats[1]=_y;
m_floats[2]=_z;
m_floats[3] = 0.f;
}

/* void getValue(b3Scalar *m) const
{
m[0] = m_floats[0];
m[1] = m_floats[1];
m[2] = m_floats[2];
}
*/
/**@brief Set the values
* @param x Value of x
* @param y Value of y
* @param z Value of z
* @param w Value of w
*/
B3_FORCE_INLINE void setValue(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z,const b3Scalar& _w)
{
m_floats[0]=_x;
m_floats[1]=_y;
m_floats[2]=_z;
m_floats[3]=_w;
}
/**@brief No initialization constructor */
B3_FORCE_INLINE b3QuadWord()
// :m_floats[0](b3Scalar(0.)),m_floats[1](b3Scalar(0.)),m_floats[2](b3Scalar(0.)),m_floats[3](b3Scalar(0.))
{
}
/**@brief Three argument constructor (zeros w)
* @param x Value of x
* @param y Value of y
* @param z Value of z
*/
B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z)
{
m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = 0.0f;
}

/**@brief Initializing constructor
* @param x Value of x
* @param y Value of y
* @param z Value of z
* @param w Value of w
*/
B3_FORCE_INLINE b3QuadWord(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z,const b3Scalar& _w)
{
m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = _w;
}

/**@brief Set each element to the max of the current values and the values of another b3QuadWord
* @param other The other b3QuadWord to compare with
*/
B3_FORCE_INLINE void setMax(const b3QuadWord& other)
{
#ifdef B3_USE_SSE
mVec128 = _mm_max_ps(mVec128, other.mVec128);
#elif defined(B3_USE_NEON)
mVec128 = vmaxq_f32(mVec128, other.mVec128);
#else
b3SetMax(m_floats[0], other.m_floats[0]);
b3SetMax(m_floats[1], other.m_floats[1]);
b3SetMax(m_floats[2], other.m_floats[2]);
b3SetMax(m_floats[3], other.m_floats[3]);
#endif
}
/**@brief Set each element to the min of the current values and the values of another b3QuadWord
* @param other The other b3QuadWord to compare with
*/
B3_FORCE_INLINE void setMin(const b3QuadWord& other)
{
#ifdef B3_USE_SSE
mVec128 = _mm_min_ps(mVec128, other.mVec128);
#elif defined(B3_USE_NEON)
mVec128 = vminq_f32(mVec128, other.mVec128);
#else
b3SetMin(m_floats[0], other.m_floats[0]);
b3SetMin(m_floats[1], other.m_floats[1]);
b3SetMin(m_floats[2], other.m_floats[2]);
b3SetMin(m_floats[3], other.m_floats[3]);
#endif
}



};

#endif //B3_SIMD_QUADWORD_H

+ 893
- 0
src/bullet/Bullet3Common/b3Quaternion.h View File

@@ -0,0 +1,893 @@
/*
Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/



#ifndef B3_SIMD__QUATERNION_H_
#define B3_SIMD__QUATERNION_H_


#include "b3Vector3.h"
#include "b3QuadWord.h"





#ifdef B3_USE_SSE

const __m128 B3_ATTRIBUTE_ALIGNED16(b3vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};

#endif

#if defined(B3_USE_SSE) || defined(B3_USE_NEON)

const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
const b3SimdFloat4 B3_ATTRIBUTE_ALIGNED16(b3vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};

#endif

/**@brief The b3Quaternion implements quaternion to perform linear algebra rotations in combination with b3Matrix3x3, b3Vector3 and b3Transform. */
class b3Quaternion : public b3QuadWord {
public:
/**@brief No initialization constructor */
b3Quaternion() {}

#if (defined(B3_USE_SSE_IN_API) && defined(B3_USE_SSE))|| defined(B3_USE_NEON)
// Set Vector
B3_FORCE_INLINE b3Quaternion(const b3SimdFloat4 vec)
{
mVec128 = vec;
}

// Copy constructor
B3_FORCE_INLINE b3Quaternion(const b3Quaternion& rhs)
{
mVec128 = rhs.mVec128;
}

// Assignment Operator
B3_FORCE_INLINE b3Quaternion&
operator=(const b3Quaternion& v)
{
mVec128 = v.mVec128;
return *this;
}
#endif

// template <typename b3Scalar>
// explicit Quaternion(const b3Scalar *v) : Tuple4<b3Scalar>(v) {}
/**@brief Constructor from scalars */
b3Quaternion(const b3Scalar& _x, const b3Scalar& _y, const b3Scalar& _z, const b3Scalar& _w)
: b3QuadWord(_x, _y, _z, _w)
{
//b3Assert(!((_x==1.f) && (_y==0.f) && (_z==0.f) && (_w==0.f)));
}
/**@brief Axis angle Constructor
* @param axis The axis which the rotation is around
* @param angle The magnitude of the rotation around the angle (Radians) */
b3Quaternion(const b3Vector3& _axis, const b3Scalar& _angle)
{
setRotation(_axis, _angle);
}
/**@brief Constructor from Euler angles
* @param yaw Angle around Y unless B3_EULER_DEFAULT_ZYX defined then Z
* @param pitch Angle around X unless B3_EULER_DEFAULT_ZYX defined then Y
* @param roll Angle around Z unless B3_EULER_DEFAULT_ZYX defined then X */
b3Quaternion(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
{
#ifndef B3_EULER_DEFAULT_ZYX
setEuler(yaw, pitch, roll);
#else
setEulerZYX(yaw, pitch, roll);
#endif
}
/**@brief Set the rotation using axis angle notation
* @param axis The axis around which to rotate
* @param angle The magnitude of the rotation in Radians */
void setRotation(const b3Vector3& axis, const b3Scalar& _angle)
{
b3Scalar d = axis.length();
b3Assert(d != b3Scalar(0.0));
b3Scalar s = b3Sin(_angle * b3Scalar(0.5)) / d;
setValue(axis.getX() * s, axis.getY() * s, axis.getZ() * s,
b3Cos(_angle * b3Scalar(0.5)));
}
/**@brief Set the quaternion using Euler angles
* @param yaw Angle around Y
* @param pitch Angle around X
* @param roll Angle around Z */
void setEuler(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
{
b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);
b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);
b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);
b3Scalar cosYaw = b3Cos(halfYaw);
b3Scalar sinYaw = b3Sin(halfYaw);
b3Scalar cosPitch = b3Cos(halfPitch);
b3Scalar sinPitch = b3Sin(halfPitch);
b3Scalar cosRoll = b3Cos(halfRoll);
b3Scalar sinRoll = b3Sin(halfRoll);
setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
}
/**@brief Set the quaternion using euler angles
* @param yaw Angle around Z
* @param pitch Angle around Y
* @param roll Angle around X */
void setEulerZYX(const b3Scalar& yaw, const b3Scalar& pitch, const b3Scalar& roll)
{
b3Scalar halfYaw = b3Scalar(yaw) * b3Scalar(0.5);
b3Scalar halfPitch = b3Scalar(pitch) * b3Scalar(0.5);
b3Scalar halfRoll = b3Scalar(roll) * b3Scalar(0.5);
b3Scalar cosYaw = b3Cos(halfYaw);
b3Scalar sinYaw = b3Sin(halfYaw);
b3Scalar cosPitch = b3Cos(halfPitch);
b3Scalar sinPitch = b3Sin(halfPitch);
b3Scalar cosRoll = b3Cos(halfRoll);
b3Scalar sinRoll = b3Sin(halfRoll);
setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
}
/**@brief Add two quaternions
* @param q The quaternion to add to this one */
B3_FORCE_INLINE b3Quaternion& operator+=(const b3Quaternion& q)
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
mVec128 = _mm_add_ps(mVec128, q.mVec128);
#elif defined(B3_USE_NEON)
mVec128 = vaddq_f32(mVec128, q.mVec128);
#else
m_floats[0] += q.getX();
m_floats[1] += q.getY();
m_floats[2] += q.getZ();
m_floats[3] += q.m_floats[3];
#endif
return *this;
}

/**@brief Subtract out a quaternion
* @param q The quaternion to subtract from this one */
b3Quaternion& operator-=(const b3Quaternion& q)
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
mVec128 = _mm_sub_ps(mVec128, q.mVec128);
#elif defined(B3_USE_NEON)
mVec128 = vsubq_f32(mVec128, q.mVec128);
#else
m_floats[0] -= q.getX();
m_floats[1] -= q.getY();
m_floats[2] -= q.getZ();
m_floats[3] -= q.m_floats[3];
#endif
return *this;
}

/**@brief Scale this quaternion
* @param s The scalar to scale by */
b3Quaternion& operator*=(const b3Scalar& s)
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
__m128 vs = _mm_load_ss(&s); // (S 0 0 0)
vs = b3_pshufd_ps(vs, 0); // (S S S S)
mVec128 = _mm_mul_ps(mVec128, vs);
#elif defined(B3_USE_NEON)
mVec128 = vmulq_n_f32(mVec128, s);
#else
m_floats[0] *= s;
m_floats[1] *= s;
m_floats[2] *= s;
m_floats[3] *= s;
#endif
return *this;
}

/**@brief Multiply this quaternion by q on the right
* @param q The other quaternion
* Equivilant to this = this * q */
b3Quaternion& operator*=(const b3Quaternion& q)
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
__m128 vQ2 = q.get128();
__m128 A1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(0,1,2,0));
__m128 B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0));
A1 = A1 * B1;
__m128 A2 = b3_pshufd_ps(mVec128, B3_SHUFFLE(1,2,0,1));
__m128 B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));
A2 = A2 * B2;
B1 = b3_pshufd_ps(mVec128, B3_SHUFFLE(2,0,1,2));
B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
B1 = B1 * B2; // A3 *= B3
mVec128 = b3_splat_ps(mVec128, 3); // A0
mVec128 = mVec128 * vQ2; // A0 * B0
A1 = A1 + A2; // AB12
mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
mVec128 = mVec128+ A1; // AB03 + AB12

#elif defined(B3_USE_NEON)

float32x4_t vQ1 = mVec128;
float32x4_t vQ2 = q.get128();
float32x4_t A0, A1, B1, A2, B2, A3, B3;
float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
{
float32x2x2_t tmp;
tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
vQ1zx = tmp.val[0];

tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
vQ2zx = tmp.val[0];
}
vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);

vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);

vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);

A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X

A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));

A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z

A1 = vmulq_f32(A1, B1);
A2 = vmulq_f32(A2, B2);
A3 = vmulq_f32(A3, B3); // A3 *= B3
A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0

A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A0 = vaddq_f32(A0, A1); // AB03 + AB12
mVec128 = A0;
#else
setValue(
m_floats[3] * q.getX() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.getZ() - m_floats[2] * q.getY(),
m_floats[3] * q.getY() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.getX() - m_floats[0] * q.getZ(),
m_floats[3] * q.getZ() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.getY() - m_floats[1] * q.getX(),
m_floats[3] * q.m_floats[3] - m_floats[0] * q.getX() - m_floats[1] * q.getY() - m_floats[2] * q.getZ());
#endif
return *this;
}
/**@brief Return the dot product between this quaternion and another
* @param q The other quaternion */
b3Scalar dot(const b3Quaternion& q) const
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
__m128 vd;
vd = _mm_mul_ps(mVec128, q.mVec128);
__m128 t = _mm_movehl_ps(vd, vd);
vd = _mm_add_ps(vd, t);
t = _mm_shuffle_ps(vd, vd, 0x55);
vd = _mm_add_ss(vd, t);
return _mm_cvtss_f32(vd);
#elif defined(B3_USE_NEON)
float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
x = vpadd_f32(x, x);
return vget_lane_f32(x, 0);
#else
return m_floats[0] * q.getX() +
m_floats[1] * q.getY() +
m_floats[2] * q.getZ() +
m_floats[3] * q.m_floats[3];
#endif
}

/**@brief Return the length squared of the quaternion */
b3Scalar length2() const
{
return dot(*this);
}

/**@brief Return the length of the quaternion */
b3Scalar length() const
{
return b3Sqrt(length2());
}

/**@brief Normalize the quaternion
* Such that x^2 + y^2 + z^2 +w^2 = 1 */
b3Quaternion& normalize()
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
__m128 vd;
vd = _mm_mul_ps(mVec128, mVec128);
__m128 t = _mm_movehl_ps(vd, vd);
vd = _mm_add_ps(vd, t);
t = _mm_shuffle_ps(vd, vd, 0x55);
vd = _mm_add_ss(vd, t);

vd = _mm_sqrt_ss(vd);
vd = _mm_div_ss(b3vOnes, vd);
vd = b3_pshufd_ps(vd, 0); // splat
mVec128 = _mm_mul_ps(mVec128, vd);
return *this;
#else
return *this /= length();
#endif
}

/**@brief Return a scaled version of this quaternion
* @param s The scale factor */
B3_FORCE_INLINE b3Quaternion
operator*(const b3Scalar& s) const
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
__m128 vs = _mm_load_ss(&s); // (S 0 0 0)
vs = b3_pshufd_ps(vs, 0x00); // (S S S S)
return b3Quaternion(_mm_mul_ps(mVec128, vs));
#elif defined(B3_USE_NEON)
return b3Quaternion(vmulq_n_f32(mVec128, s));
#else
return b3Quaternion(getX() * s, getY() * s, getZ() * s, m_floats[3] * s);
#endif
}

/**@brief Return an inversely scaled versionof this quaternion
* @param s The inverse scale factor */
b3Quaternion operator/(const b3Scalar& s) const
{
b3Assert(s != b3Scalar(0.0));
return *this * (b3Scalar(1.0) / s);
}

/**@brief Inversely scale this quaternion
* @param s The scale factor */
b3Quaternion& operator/=(const b3Scalar& s)
{
b3Assert(s != b3Scalar(0.0));
return *this *= b3Scalar(1.0) / s;
}

/**@brief Return a normalized version of this quaternion */
b3Quaternion normalized() const
{
return *this / length();
}
/**@brief Return the angle between this quaternion and the other
* @param q The other quaternion */
b3Scalar angle(const b3Quaternion& q) const
{
b3Scalar s = b3Sqrt(length2() * q.length2());
b3Assert(s != b3Scalar(0.0));
return b3Acos(dot(q) / s);
}
/**@brief Return the angle of rotation represented by this quaternion */
b3Scalar getAngle() const
{
b3Scalar s = b3Scalar(2.) * b3Acos(m_floats[3]);
return s;
}

/**@brief Return the axis of the rotation represented by this quaternion */
b3Vector3 getAxis() const
{
b3Scalar s_squared = 1.f-m_floats[3]*m_floats[3];
if (s_squared < b3Scalar(10.) * B3_EPSILON) //Check for divide by zero
return b3MakeVector3(1.0, 0.0, 0.0); // Arbitrary
b3Scalar s = 1.f/b3Sqrt(s_squared);
return b3MakeVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
}

/**@brief Return the inverse of this quaternion */
b3Quaternion inverse() const
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
return b3Quaternion(_mm_xor_ps(mVec128, b3vQInv));
#elif defined(B3_USE_NEON)
return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vQInv));
#else
return b3Quaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
#endif
}

/**@brief Return the sum of this quaternion and the other
* @param q2 The other quaternion */
B3_FORCE_INLINE b3Quaternion
operator+(const b3Quaternion& q2) const
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
return b3Quaternion(_mm_add_ps(mVec128, q2.mVec128));
#elif defined(B3_USE_NEON)
return b3Quaternion(vaddq_f32(mVec128, q2.mVec128));
#else
const b3Quaternion& q1 = *this;
return b3Quaternion(q1.getX() + q2.getX(), q1.getY() + q2.getY(), q1.getZ() + q2.getZ(), q1.m_floats[3] + q2.m_floats[3]);
#endif
}

/**@brief Return the difference between this quaternion and the other
* @param q2 The other quaternion */
B3_FORCE_INLINE b3Quaternion
operator-(const b3Quaternion& q2) const
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
return b3Quaternion(_mm_sub_ps(mVec128, q2.mVec128));
#elif defined(B3_USE_NEON)
return b3Quaternion(vsubq_f32(mVec128, q2.mVec128));
#else
const b3Quaternion& q1 = *this;
return b3Quaternion(q1.getX() - q2.getX(), q1.getY() - q2.getY(), q1.getZ() - q2.getZ(), q1.m_floats[3] - q2.m_floats[3]);
#endif
}

/**@brief Return the negative of this quaternion
* This simply negates each element */
B3_FORCE_INLINE b3Quaternion operator-() const
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
return b3Quaternion(_mm_xor_ps(mVec128, b3vMzeroMask));
#elif defined(B3_USE_NEON)
return b3Quaternion((b3SimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)b3vMzeroMask) );
#else
const b3Quaternion& q2 = *this;
return b3Quaternion( - q2.getX(), - q2.getY(), - q2.getZ(), - q2.m_floats[3]);
#endif
}
/**@todo document this and it's use */
B3_FORCE_INLINE b3Quaternion farthest( const b3Quaternion& qd) const
{
b3Quaternion diff,sum;
diff = *this - qd;
sum = *this + qd;
if( diff.dot(diff) > sum.dot(sum) )
return qd;
return (-qd);
}

/**@todo document this and it's use */
B3_FORCE_INLINE b3Quaternion nearest( const b3Quaternion& qd) const
{
b3Quaternion diff,sum;
diff = *this - qd;
sum = *this + qd;
if( diff.dot(diff) < sum.dot(sum) )
return qd;
return (-qd);
}


/**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
* @param q The other quaternion to interpolate with
* @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q.
* Slerp interpolates assuming constant velocity. */
b3Quaternion slerp(const b3Quaternion& q, const b3Scalar& t) const
{
b3Scalar magnitude = b3Sqrt(length2() * q.length2());
b3Assert(magnitude > b3Scalar(0));

b3Scalar product = dot(q) / magnitude;
if (b3Fabs(product) < b3Scalar(1))
{
// Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
const b3Scalar sign = (product < 0) ? b3Scalar(-1) : b3Scalar(1);

const b3Scalar theta = b3Acos(sign * product);
const b3Scalar s1 = b3Sin(sign * t * theta);
const b3Scalar d = b3Scalar(1.0) / b3Sin(theta);
const b3Scalar s0 = b3Sin((b3Scalar(1.0) - t) * theta);

return b3Quaternion(
(m_floats[0] * s0 + q.getX() * s1) * d,
(m_floats[1] * s0 + q.getY() * s1) * d,
(m_floats[2] * s0 + q.getZ() * s1) * d,
(m_floats[3] * s0 + q.m_floats[3] * s1) * d);
}
else
{
return *this;
}
}

static const b3Quaternion& getIdentity()
{
static const b3Quaternion identityQuat(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.),b3Scalar(1.));
return identityQuat;
}

B3_FORCE_INLINE const b3Scalar& getW() const { return m_floats[3]; }

};





/**@brief Return the product of two quaternions */
B3_FORCE_INLINE b3Quaternion
operator*(const b3Quaternion& q1, const b3Quaternion& q2)
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
__m128 vQ1 = q1.get128();
__m128 vQ2 = q2.get128();
__m128 A0, A1, B1, A2, B2;
A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0,1,2,0)); // X Y z x // vtrn
B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0)); // W W W X // vdup vext

A1 = A1 * B1;
A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1)); // Y Z X Y // vext
B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1)); // z x Y Y // vtrn vdup

A2 = A2 * B2;

B1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2)); // z x Y Z // vtrn vext
B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2)); // Y Z x z // vext vtrn
B1 = B1 * B2; // A3 *= B3

A0 = b3_splat_ps(vQ1, 3); // A0
A0 = A0 * vQ2; // A0 * B0

A1 = A1 + A2; // AB12
A0 = A0 - B1; // AB03 = AB0 - AB3
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
A0 = A0 + A1; // AB03 + AB12
return b3Quaternion(A0);

#elif defined(B3_USE_NEON)

float32x4_t vQ1 = q1.get128();
float32x4_t vQ2 = q2.get128();
float32x4_t A0, A1, B1, A2, B2, A3, B3;
float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
{
float32x2x2_t tmp;
tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
vQ1zx = tmp.val[0];

tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
vQ2zx = tmp.val[0];
}
vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);

vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);

vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);

A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X

A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));

A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z

A1 = vmulq_f32(A1, B1);
A2 = vmulq_f32(A2, B2);
A3 = vmulq_f32(A3, B3); // A3 *= B3
A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0

A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A0 = vaddq_f32(A0, A1); // AB03 + AB12
return b3Quaternion(A0);

#else
return b3Quaternion(
q1.getW() * q2.getX() + q1.getX() * q2.getW() + q1.getY() * q2.getZ() - q1.getZ() * q2.getY(),
q1.getW() * q2.getY() + q1.getY() * q2.getW() + q1.getZ() * q2.getX() - q1.getX() * q2.getZ(),
q1.getW() * q2.getZ() + q1.getZ() * q2.getW() + q1.getX() * q2.getY() - q1.getY() * q2.getX(),
q1.getW() * q2.getW() - q1.getX() * q2.getX() - q1.getY() * q2.getY() - q1.getZ() * q2.getZ());
#endif
}

B3_FORCE_INLINE b3Quaternion
operator*(const b3Quaternion& q, const b3Vector3& w)
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
__m128 vQ1 = q.get128();
__m128 vQ2 = w.get128();
__m128 A1, B1, A2, B2, A3, B3;
A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(3,3,3,0));
B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(0,1,2,0));

A1 = A1 * B1;
A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1));
B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));

A2 = A2 * B2;

A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2));
B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
A3 = A3 * B3; // A3 *= B3

A1 = A1 + A2; // AB12
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
A1 = A1 - A3; // AB123 = AB12 - AB3
return b3Quaternion(A1);
#elif defined(B3_USE_NEON)

float32x4_t vQ1 = q.get128();
float32x4_t vQ2 = w.get128();
float32x4_t A1, B1, A2, B2, A3, B3;
float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
{
float32x2x2_t tmp;

tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
vQ2zx = tmp.val[0];

tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
vQ1zx = tmp.val[0];
}

vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);

vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);

A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x

A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));

A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z

A1 = vmulq_f32(A1, B1);
A2 = vmulq_f32(A2, B2);
A3 = vmulq_f32(A3, B3); // A3 *= B3

A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
return b3Quaternion(A1);
#else
return b3Quaternion(
q.getW() * w.getX() + q.getY() * w.getZ() - q.getZ() * w.getY(),
q.getW() * w.getY() + q.getZ() * w.getX() - q.getX() * w.getZ(),
q.getW() * w.getZ() + q.getX() * w.getY() - q.getY() * w.getX(),
-q.getX() * w.getX() - q.getY() * w.getY() - q.getZ() * w.getZ());
#endif
}

B3_FORCE_INLINE b3Quaternion
operator*(const b3Vector3& w, const b3Quaternion& q)
{
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
__m128 vQ1 = w.get128();
__m128 vQ2 = q.get128();
__m128 A1, B1, A2, B2, A3, B3;
A1 = b3_pshufd_ps(vQ1, B3_SHUFFLE(0,1,2,0)); // X Y z x
B1 = b3_pshufd_ps(vQ2, B3_SHUFFLE(3,3,3,0)); // W W W X

A1 = A1 * B1;
A2 = b3_pshufd_ps(vQ1, B3_SHUFFLE(1,2,0,1));
B2 = b3_pshufd_ps(vQ2, B3_SHUFFLE(2,0,1,1));

A2 = A2 *B2;

A3 = b3_pshufd_ps(vQ1, B3_SHUFFLE(2,0,1,2));
B3 = b3_pshufd_ps(vQ2, B3_SHUFFLE(1,2,0,2));
A3 = A3 * B3; // A3 *= B3

A1 = A1 + A2; // AB12
A1 = _mm_xor_ps(A1, b3vPPPM); // change sign of the last element
A1 = A1 - A3; // AB123 = AB12 - AB3
return b3Quaternion(A1);

#elif defined(B3_USE_NEON)

float32x4_t vQ1 = w.get128();
float32x4_t vQ2 = q.get128();
float32x4_t A1, B1, A2, B2, A3, B3;
float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
{
float32x2x2_t tmp;
tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
vQ1zx = tmp.val[0];

tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
vQ2zx = tmp.val[0];
}
vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);

vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);

vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);

A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X

A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));

A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z

A1 = vmulq_f32(A1, B1);
A2 = vmulq_f32(A2, B2);
A3 = vmulq_f32(A3, B3); // A3 *= B3

A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
// change the sign of the last element
A1 = (b3SimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)b3vPPPM);
A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
return b3Quaternion(A1);
#else
return b3Quaternion(
+w.getX() * q.getW() + w.getY() * q.getZ() - w.getZ() * q.getY(),
+w.getY() * q.getW() + w.getZ() * q.getX() - w.getX() * q.getZ(),
+w.getZ() * q.getW() + w.getX() * q.getY() - w.getY() * q.getX(),
-w.getX() * q.getX() - w.getY() * q.getY() - w.getZ() * q.getZ());
#endif
}

/**@brief Calculate the dot product between two quaternions */
B3_FORCE_INLINE b3Scalar
b3Dot(const b3Quaternion& q1, const b3Quaternion& q2)
{
return q1.dot(q2);
}


/**@brief Return the length of a quaternion */
B3_FORCE_INLINE b3Scalar
b3Length(const b3Quaternion& q)
{
return q.length();
}

/**@brief Return the angle between two quaternions*/
B3_FORCE_INLINE b3Scalar
b3Angle(const b3Quaternion& q1, const b3Quaternion& q2)
{
return q1.angle(q2);
}

/**@brief Return the inverse of a quaternion*/
B3_FORCE_INLINE b3Quaternion
b3Inverse(const b3Quaternion& q)
{
return q.inverse();
}

/**@brief Return the result of spherical linear interpolation betwen two quaternions
* @param q1 The first quaternion
* @param q2 The second quaternion
* @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2
* Slerp assumes constant velocity between positions. */
B3_FORCE_INLINE b3Quaternion
b3Slerp(const b3Quaternion& q1, const b3Quaternion& q2, const b3Scalar& t)
{
return q1.slerp(q2, t);
}

B3_FORCE_INLINE b3Quaternion
b3QuatMul(const b3Quaternion& rot0, const b3Quaternion& rot1)
{
return rot0*rot1;
}

B3_FORCE_INLINE b3Quaternion
b3QuatNormalized(const b3Quaternion& orn)
{
return orn.normalized();
}



B3_FORCE_INLINE b3Vector3
b3QuatRotate(const b3Quaternion& rotation, const b3Vector3& v)
{
b3Quaternion q = rotation * v;
q *= rotation.inverse();
#if defined (B3_USE_SSE_IN_API) && defined (B3_USE_SSE)
return b3MakeVector3(_mm_and_ps(q.get128(), b3vFFF0fMask));
#elif defined(B3_USE_NEON)
return b3MakeVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), b3vFFF0Mask));
#else
return b3MakeVector3(q.getX(),q.getY(),q.getZ());
#endif
}

B3_FORCE_INLINE b3Quaternion
b3ShortestArcQuat(const b3Vector3& v0, const b3Vector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
{
b3Vector3 c = v0.cross(v1);
b3Scalar d = v0.dot(v1);

if (d < -1.0 + B3_EPSILON)
{
b3Vector3 n,unused;
b3PlaneSpace1(v0,n,unused);
return b3Quaternion(n.getX(),n.getY(),n.getZ(),0.0f); // just pick any vector that is orthogonal to v0
}

b3Scalar s = b3Sqrt((1.0f + d) * 2.0f);
b3Scalar rs = 1.0f / s;

return b3Quaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
}

B3_FORCE_INLINE b3Quaternion
b3ShortestArcQuatNormalize2(b3Vector3& v0,b3Vector3& v1)
{
v0.normalize();
v1.normalize();
return b3ShortestArcQuat(v0,v1);
}

#endif //B3_SIMD__QUATERNION_H_




+ 50
- 0
src/bullet/Bullet3Common/b3Random.h View File

@@ -0,0 +1,50 @@
/*
Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/



#ifndef B3_GEN_RANDOM_H
#define B3_GEN_RANDOM_H

#include "b3Scalar.h"

#ifdef MT19937

#include <limits.h>
#include <mt19937.h>

#define B3_RAND_MAX UINT_MAX

B3_FORCE_INLINE void b3Srand(unsigned int seed) { init_genrand(seed); }
B3_FORCE_INLINE unsigned int b3rand() { return genrand_int32(); }

#else

#include <stdlib.h>

#define B3_RAND_MAX RAND_MAX

B3_FORCE_INLINE void b3Srand(unsigned int seed) { srand(seed); }
B3_FORCE_INLINE unsigned int b3rand() { return rand(); }

#endif

inline b3Scalar b3RandRange(b3Scalar minRange, b3Scalar maxRange)
{
return (b3rand() / (b3Scalar(B3_RAND_MAX) + b3Scalar(1.0))) * (maxRange - minRange) + minRange;
}


#endif //B3_GEN_RANDOM_H


+ 661
- 0
src/bullet/Bullet3Common/b3Scalar.h View File

@@ -0,0 +1,661 @@
/*
Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/



#ifndef B3_SCALAR_H
#define B3_SCALAR_H

#ifdef B3_MANAGED_CODE
//Aligned data types not supported in managed code
#pragma unmanaged
#endif



#include <math.h>
#include <stdlib.h>//size_t for MSVC 6.0
#include <float.h>

//Original repository is at http://github.com/erwincoumans/bullet3
#define B3_BULLET_VERSION 300

inline int b3GetVersion()
{
return B3_BULLET_VERSION;
}

#if defined(DEBUG) || defined (_DEBUG)
#define B3_DEBUG
#endif

#include "b3Logging.h"//for b3Error


#ifdef _WIN32

#if defined(__MINGW32__) || defined(__CYGWIN__) || (defined (_MSC_VER) && _MSC_VER < 1300)

#define B3_FORCE_INLINE inline
#define B3_ATTRIBUTE_ALIGNED16(a) a
#define B3_ATTRIBUTE_ALIGNED64(a) a
#define B3_ATTRIBUTE_ALIGNED128(a) a
#else
//#define B3_HAS_ALIGNED_ALLOCATOR
#pragma warning(disable : 4324) // disable padding warning
// #pragma warning(disable:4530) // Disable the exception disable but used in MSCV Stl warning.
// #pragma warning(disable:4996) //Turn off warnings about deprecated C routines
// #pragma warning(disable:4786) // Disable the "debug name too long" warning

#define B3_FORCE_INLINE __forceinline
#define B3_ATTRIBUTE_ALIGNED16(a) __declspec(align(16)) a
#define B3_ATTRIBUTE_ALIGNED64(a) __declspec(align(64)) a
#define B3_ATTRIBUTE_ALIGNED128(a) __declspec (align(128)) a
#ifdef _XBOX
#define B3_USE_VMX128

#include <ppcintrinsics.h>
#define B3_HAVE_NATIVE_FSEL
#define b3Fsel(a,b,c) __fsel((a),(b),(c))
#else

#if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined (B3_USE_DOUBLE_PRECISION))
#define B3_USE_SSE
#ifdef B3_USE_SSE
//B3_USE_SSE_IN_API is disabled under Windows by default, because
//it makes it harder to integrate Bullet into your application under Windows
//(structured embedding Bullet structs/classes need to be 16-byte aligned)
//with relatively little performance gain
//If you are not embedded Bullet data in your classes, or make sure that you align those classes on 16-byte boundaries
//you can manually enable this line or set it in the build system for a bit of performance gain (a few percent, dependent on usage)
//#define B3_USE_SSE_IN_API
#endif //B3_USE_SSE
#include <emmintrin.h>
#endif

#endif//_XBOX

#endif //__MINGW32__

#ifdef B3_DEBUG
#ifdef _MSC_VER
#include <stdio.h>
#define b3Assert(x) { if(!(x)){b3Error("Assert "__FILE__ ":%u ("#x")\n", __LINE__);__debugbreak(); }}
#else//_MSC_VER
#include <assert.h>
#define b3Assert assert
#endif//_MSC_VER
#else
#define b3Assert(x)
#endif
//b3FullAssert is optional, slows down a lot
#define b3FullAssert(x)

#define b3Likely(_c) _c
#define b3Unlikely(_c) _c

#else
#if defined (__CELLOS_LV2__)
#define B3_FORCE_INLINE inline __attribute__((always_inline))
#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
#ifndef assert
#include <assert.h>
#endif
#ifdef B3_DEBUG
#ifdef __SPU__
#include <spu_printf.h>
#define printf spu_printf
#define b3Assert(x) {if(!(x)){b3Error("Assert "__FILE__ ":%u ("#x")\n", __LINE__);spu_hcmpeq(0,0);}}
#else
#define b3Assert assert
#endif
#else
#define b3Assert(x)
#endif
//b3FullAssert is optional, slows down a lot
#define b3FullAssert(x)

#define b3Likely(_c) _c
#define b3Unlikely(_c) _c

#else

#ifdef USE_LIBSPE2

#define B3_FORCE_INLINE __inline
#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
#ifndef assert
#include <assert.h>
#endif
#ifdef B3_DEBUG
#define b3Assert assert
#else
#define b3Assert(x)
#endif
//b3FullAssert is optional, slows down a lot
#define b3FullAssert(x)


#define b3Likely(_c) __builtin_expect((_c), 1)
#define b3Unlikely(_c) __builtin_expect((_c), 0)

#else
//non-windows systems

#if (defined (__APPLE__) && (!defined (B3_USE_DOUBLE_PRECISION)))
#if defined (__i386__) || defined (__x86_64__)
#define B3_USE_SSE
//B3_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries
//if apps run into issues, we will disable the next line
#define B3_USE_SSE_IN_API
#ifdef B3_USE_SSE
// include appropriate SSE level
#if defined (__SSE4_1__)
#include <smmintrin.h>
#elif defined (__SSSE3__)
#include <tmmintrin.h>
#elif defined (__SSE3__)
#include <pmmintrin.h>
#else
#include <emmintrin.h>
#endif
#endif //B3_USE_SSE
#elif defined( __armv7__ )
#ifdef __clang__
#define B3_USE_NEON 1

#if defined B3_USE_NEON && defined (__clang__)
#include <arm_neon.h>
#endif//B3_USE_NEON
#endif //__clang__
#endif//__arm__

#define B3_FORCE_INLINE inline __attribute__ ((always_inline))
///@todo: check out alignment methods for other platforms/compilers
#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
#ifndef assert
#include <assert.h>
#endif

#if defined(DEBUG) || defined (_DEBUG)
#if defined (__i386__) || defined (__x86_64__)
#include <stdio.h>
#define b3Assert(x)\
{\
if(!(x))\
{\
b3Error("Assert %s in line %d, file %s\n",#x, __LINE__, __FILE__);\
asm volatile ("int3");\
}\
}
#else//defined (__i386__) || defined (__x86_64__)
#define b3Assert assert
#endif//defined (__i386__) || defined (__x86_64__)
#else//defined(DEBUG) || defined (_DEBUG)
#define b3Assert(x)
#endif//defined(DEBUG) || defined (_DEBUG)

//b3FullAssert is optional, slows down a lot
#define b3FullAssert(x)
#define b3Likely(_c) _c
#define b3Unlikely(_c) _c

#else

#define B3_FORCE_INLINE inline
///@todo: check out alignment methods for other platforms/compilers
#define B3_ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
#define B3_ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
#define B3_ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
///#define B3_ATTRIBUTE_ALIGNED16(a) a
///#define B3_ATTRIBUTE_ALIGNED64(a) a
///#define B3_ATTRIBUTE_ALIGNED128(a) a
#ifndef assert
#include <assert.h>
#endif

#if defined(DEBUG) || defined (_DEBUG)
#define b3Assert assert
#else
#define b3Assert(x)
#endif

//b3FullAssert is optional, slows down a lot
#define b3FullAssert(x)
#define b3Likely(_c) _c
#define b3Unlikely(_c) _c
#endif //__APPLE__

#endif // LIBSPE2

#endif //__CELLOS_LV2__
#endif


///The b3Scalar type abstracts floating point numbers, to easily switch between double and single floating point precision.
#if defined(B3_USE_DOUBLE_PRECISION)
typedef double b3Scalar;
//this number could be bigger in double precision
#define B3_LARGE_FLOAT 1e30
#else
typedef float b3Scalar;
//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX
#define B3_LARGE_FLOAT 1e18f
#endif

#ifdef B3_USE_SSE
typedef __m128 b3SimdFloat4;
#endif//B3_USE_SSE

#if defined B3_USE_SSE_IN_API && defined (B3_USE_SSE)
#ifdef _WIN32

#ifndef B3_NAN
static int b3NanMask = 0x7F800001;
#define B3_NAN (*(float*)&b3NanMask)
#endif

#ifndef B3_INFINITY_MASK
static int b3InfinityMask = 0x7F800000;
#define B3_INFINITY_MASK (*(float*)&b3InfinityMask)
#endif

inline __m128 operator + (const __m128 A, const __m128 B)
{
return _mm_add_ps(A, B);
}

inline __m128 operator - (const __m128 A, const __m128 B)
{
return _mm_sub_ps(A, B);
}

inline __m128 operator * (const __m128 A, const __m128 B)
{
return _mm_mul_ps(A, B);
}

#define b3CastfTo128i(a) (_mm_castps_si128(a))
#define b3CastfTo128d(a) (_mm_castps_pd(a))
#define b3CastiTo128f(a) (_mm_castsi128_ps(a))
#define b3CastdTo128f(a) (_mm_castpd_ps(a))
#define b3CastdTo128i(a) (_mm_castpd_si128(a))
#define b3Assign128(r0,r1,r2,r3) _mm_setr_ps(r0,r1,r2,r3)

#else//_WIN32

#define b3CastfTo128i(a) ((__m128i)(a))
#define b3CastfTo128d(a) ((__m128d)(a))
#define b3CastiTo128f(a) ((__m128) (a))
#define b3CastdTo128f(a) ((__m128) (a))
#define b3CastdTo128i(a) ((__m128i)(a))
#define b3Assign128(r0,r1,r2,r3) (__m128){r0,r1,r2,r3}
#endif//_WIN32
#endif //B3_USE_SSE_IN_API

#ifdef B3_USE_NEON
#include <arm_neon.h>

typedef float32x4_t b3SimdFloat4;
#define B3_INFINITY INFINITY
#define B3_NAN NAN
#define b3Assign128(r0,r1,r2,r3) (float32x4_t){r0,r1,r2,r3}
#endif





#define B3_DECLARE_ALIGNED_ALLOCATOR() \
B3_FORCE_INLINE void* operator new(size_t sizeInBytes) { return b3AlignedAlloc(sizeInBytes,16); } \
B3_FORCE_INLINE void operator delete(void* ptr) { b3AlignedFree(ptr); } \
B3_FORCE_INLINE void* operator new(size_t, void* ptr) { return ptr; } \
B3_FORCE_INLINE void operator delete(void*, void*) { } \
B3_FORCE_INLINE void* operator new[](size_t sizeInBytes) { return b3AlignedAlloc(sizeInBytes,16); } \
B3_FORCE_INLINE void operator delete[](void* ptr) { b3AlignedFree(ptr); } \
B3_FORCE_INLINE void* operator new[](size_t, void* ptr) { return ptr; } \
B3_FORCE_INLINE void operator delete[](void*, void*) { } \



#if defined(B3_USE_DOUBLE_PRECISION) || defined(B3_FORCE_DOUBLE_FUNCTIONS)
B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar x) { return sqrt(x); }
B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabs(x); }
B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cos(x); }
B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sin(x); }
B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tan(x); }
B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x) { if (x<b3Scalar(-1)) x=b3Scalar(-1); if (x>b3Scalar(1)) x=b3Scalar(1); return acos(x); }
B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x) { if (x<b3Scalar(-1)) x=b3Scalar(-1); if (x>b3Scalar(1)) x=b3Scalar(1); return asin(x); }
B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atan(x); }
B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2(x, y); }
B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return exp(x); }
B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return log(x); }
B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x,b3Scalar y) { return pow(x,y); }
B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x,b3Scalar y) { return fmod(x,y); }

#else
B3_FORCE_INLINE b3Scalar b3Sqrt(b3Scalar y)
{
#ifdef USE_APPROXIMATION
double x, z, tempf;
unsigned long *tfptr = ((unsigned long *)&tempf) + 1;

tempf = y;
*tfptr = (0xbfcdd90a - *tfptr)>>1; /* estimate of 1/sqrt(y) */
x = tempf;
z = y*b3Scalar(0.5);
x = (b3Scalar(1.5)*x)-(x*x)*(x*z); /* iteration formula */
x = (b3Scalar(1.5)*x)-(x*x)*(x*z);
x = (b3Scalar(1.5)*x)-(x*x)*(x*z);
x = (b3Scalar(1.5)*x)-(x*x)*(x*z);
x = (b3Scalar(1.5)*x)-(x*x)*(x*z);
return x*y;
#else
return sqrtf(y);
#endif
}
B3_FORCE_INLINE b3Scalar b3Fabs(b3Scalar x) { return fabsf(x); }
B3_FORCE_INLINE b3Scalar b3Cos(b3Scalar x) { return cosf(x); }
B3_FORCE_INLINE b3Scalar b3Sin(b3Scalar x) { return sinf(x); }
B3_FORCE_INLINE b3Scalar b3Tan(b3Scalar x) { return tanf(x); }
B3_FORCE_INLINE b3Scalar b3Acos(b3Scalar x) {
if (x<b3Scalar(-1))
x=b3Scalar(-1);
if (x>b3Scalar(1))
x=b3Scalar(1);
return acosf(x);
}
B3_FORCE_INLINE b3Scalar b3Asin(b3Scalar x) {
if (x<b3Scalar(-1))
x=b3Scalar(-1);
if (x>b3Scalar(1))
x=b3Scalar(1);
return asinf(x);
}
B3_FORCE_INLINE b3Scalar b3Atan(b3Scalar x) { return atanf(x); }
B3_FORCE_INLINE b3Scalar b3Atan2(b3Scalar x, b3Scalar y) { return atan2f(x, y); }
B3_FORCE_INLINE b3Scalar b3Exp(b3Scalar x) { return expf(x); }
B3_FORCE_INLINE b3Scalar b3Log(b3Scalar x) { return logf(x); }
B3_FORCE_INLINE b3Scalar b3Pow(b3Scalar x,b3Scalar y) { return powf(x,y); }
B3_FORCE_INLINE b3Scalar b3Fmod(b3Scalar x,b3Scalar y) { return fmodf(x,y); }
#endif

#define B3_2_PI b3Scalar(6.283185307179586232)
#define B3_PI (B3_2_PI * b3Scalar(0.5))
#define B3_HALF_PI (B3_2_PI * b3Scalar(0.25))
#define B3_RADS_PER_DEG (B3_2_PI / b3Scalar(360.0))
#define B3_DEGS_PER_RAD (b3Scalar(360.0) / B3_2_PI)
#define B3_SQRT12 b3Scalar(0.7071067811865475244008443621048490)

#define b3RecipSqrt(x) ((b3Scalar)(b3Scalar(1.0)/b3Sqrt(b3Scalar(x)))) /* reciprocal square root */


#ifdef B3_USE_DOUBLE_PRECISION
#define B3_EPSILON DBL_EPSILON
#define B3_INFINITY DBL_MAX
#else
#define B3_EPSILON FLT_EPSILON
#define B3_INFINITY FLT_MAX
#endif

B3_FORCE_INLINE b3Scalar b3Atan2Fast(b3Scalar y, b3Scalar x)
{
b3Scalar coeff_1 = B3_PI / 4.0f;
b3Scalar coeff_2 = 3.0f * coeff_1;
b3Scalar abs_y = b3Fabs(y);
b3Scalar angle;
if (x >= 0.0f) {
b3Scalar r = (x - abs_y) / (x + abs_y);
angle = coeff_1 - coeff_1 * r;
} else {
b3Scalar r = (x + abs_y) / (abs_y - x);
angle = coeff_2 - coeff_1 * r;
}
return (y < 0.0f) ? -angle : angle;
}

B3_FORCE_INLINE bool b3FuzzyZero(b3Scalar x) { return b3Fabs(x) < B3_EPSILON; }

B3_FORCE_INLINE bool b3Equal(b3Scalar a, b3Scalar eps) {
return (((a) <= eps) && !((a) < -eps));
}
B3_FORCE_INLINE bool b3GreaterEqual (b3Scalar a, b3Scalar eps) {
return (!((a) <= eps));
}


B3_FORCE_INLINE int b3IsNegative(b3Scalar x) {
return x < b3Scalar(0.0) ? 1 : 0;
}

B3_FORCE_INLINE b3Scalar b3Radians(b3Scalar x) { return x * B3_RADS_PER_DEG; }
B3_FORCE_INLINE b3Scalar b3Degrees(b3Scalar x) { return x * B3_DEGS_PER_RAD; }

#define B3_DECLARE_HANDLE(name) typedef struct name##__ { int unused; } *name

#ifndef b3Fsel
B3_FORCE_INLINE b3Scalar b3Fsel(b3Scalar a, b3Scalar b, b3Scalar c)
{
return a >= 0 ? b : c;
}
#endif
#define b3Fsels(a,b,c) (b3Scalar)b3Fsel(a,b,c)


B3_FORCE_INLINE bool b3MachineIsLittleEndian()
{
long int i = 1;
const char *p = (const char *) &i;
if (p[0] == 1) // Lowest address contains the least significant byte
return true;
else
return false;
}



///b3Select avoids branches, which makes performance much better for consoles like Playstation 3 and XBox 360
///Thanks Phil Knight. See also http://www.cellperformance.com/articles/2006/04/more_techniques_for_eliminatin_1.html
B3_FORCE_INLINE unsigned b3Select(unsigned condition, unsigned valueIfConditionNonZero, unsigned valueIfConditionZero)
{
// Set testNz to 0xFFFFFFFF if condition is nonzero, 0x00000000 if condition is zero
// Rely on positive value or'ed with its negative having sign bit on
// and zero value or'ed with its negative (which is still zero) having sign bit off
// Use arithmetic shift right, shifting the sign bit through all 32 bits
unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31);
unsigned testEqz = ~testNz;
return ((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz));
}
B3_FORCE_INLINE int b3Select(unsigned condition, int valueIfConditionNonZero, int valueIfConditionZero)
{
unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31);
unsigned testEqz = ~testNz;
return static_cast<int>((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz));
}
B3_FORCE_INLINE float b3Select(unsigned condition, float valueIfConditionNonZero, float valueIfConditionZero)
{
#ifdef B3_HAVE_NATIVE_FSEL
return (float)b3Fsel((b3Scalar)condition - b3Scalar(1.0f), valueIfConditionNonZero, valueIfConditionZero);
#else
return (condition != 0) ? valueIfConditionNonZero : valueIfConditionZero;
#endif
}

template<typename T> B3_FORCE_INLINE void b3Swap(T& a, T& b)
{
T tmp = a;
a = b;
b = tmp;
}


//PCK: endian swapping functions
B3_FORCE_INLINE unsigned b3SwapEndian(unsigned val)
{
return (((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24));
}

B3_FORCE_INLINE unsigned short b3SwapEndian(unsigned short val)
{
return static_cast<unsigned short>(((val & 0xff00) >> 8) | ((val & 0x00ff) << 8));
}

B3_FORCE_INLINE unsigned b3SwapEndian(int val)
{
return b3SwapEndian((unsigned)val);
}

B3_FORCE_INLINE unsigned short b3SwapEndian(short val)
{
return b3SwapEndian((unsigned short) val);
}

///b3SwapFloat uses using char pointers to swap the endianness
////b3SwapFloat/b3SwapDouble will NOT return a float, because the machine might 'correct' invalid floating point values
///Not all values of sign/exponent/mantissa are valid floating point numbers according to IEEE 754.
///When a floating point unit is faced with an invalid value, it may actually change the value, or worse, throw an exception.
///In most systems, running user mode code, you wouldn't get an exception, but instead the hardware/os/runtime will 'fix' the number for you.
///so instead of returning a float/double, we return integer/long long integer
B3_FORCE_INLINE unsigned int b3SwapEndianFloat(float d)
{
unsigned int a = 0;
unsigned char *dst = (unsigned char *)&a;
unsigned char *src = (unsigned char *)&d;

dst[0] = src[3];
dst[1] = src[2];
dst[2] = src[1];
dst[3] = src[0];
return a;
}

// unswap using char pointers
B3_FORCE_INLINE float b3UnswapEndianFloat(unsigned int a)
{
float d = 0.0f;
unsigned char *src = (unsigned char *)&a;
unsigned char *dst = (unsigned char *)&d;

dst[0] = src[3];
dst[1] = src[2];
dst[2] = src[1];
dst[3] = src[0];

return d;
}


// swap using char pointers
B3_FORCE_INLINE void b3SwapEndianDouble(double d, unsigned char* dst)
{
unsigned char *src = (unsigned char *)&d;

dst[0] = src[7];
dst[1] = src[6];
dst[2] = src[5];
dst[3] = src[4];
dst[4] = src[3];
dst[5] = src[2];
dst[6] = src[1];
dst[7] = src[0];

}

// unswap using char pointers
B3_FORCE_INLINE double b3UnswapEndianDouble(const unsigned char *src)
{
double d = 0.0;
unsigned char *dst = (unsigned char *)&d;

dst[0] = src[7];
dst[1] = src[6];
dst[2] = src[5];
dst[3] = src[4];
dst[4] = src[3];
dst[5] = src[2];
dst[6] = src[1];
dst[7] = src[0];

return d;
}

// returns normalized value in range [-B3_PI, B3_PI]
B3_FORCE_INLINE b3Scalar b3NormalizeAngle(b3Scalar angleInRadians)
{
angleInRadians = b3Fmod(angleInRadians, B3_2_PI);
if(angleInRadians < -B3_PI)
{
return angleInRadians + B3_2_PI;
}
else if(angleInRadians > B3_PI)
{
return angleInRadians - B3_2_PI;
}
else
{
return angleInRadians;
}
}

///rudimentary class to provide type info
struct b3TypedObject
{
b3TypedObject(int objectType)
:m_objectType(objectType)
{
}
int m_objectType;
inline int getObjectType() const
{
return m_objectType;
}
};


///align a pointer to the provided alignment, upwards
template <typename T>T* b3AlignPointer(T* unalignedPtr, size_t alignment)
{
struct b3ConvertPointerSizeT
{
union
{
T* ptr;
size_t integer;
};
};
b3ConvertPointerSizeT converter;
const size_t bit_mask = ~(alignment - 1);
converter.ptr = unalignedPtr;
converter.integer += alignment-1;
converter.integer &= bit_mask;
return converter.ptr;
}

#endif //B3_SCALAR_H

+ 116
- 0
src/bullet/Bullet3Common/b3StackAlloc.h View File

@@ -0,0 +1,116 @@
/*
Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

/*
StackAlloc extracted from GJK-EPA collision solver by Nathanael Presson
Nov.2006
*/

#ifndef B3_STACK_ALLOC
#define B3_STACK_ALLOC

#include "b3Scalar.h" //for b3Assert
#include "b3AlignedAllocator.h"

///The b3Block class is an internal structure for the b3StackAlloc memory allocator.
struct b3Block
{
b3Block* previous;
unsigned char* address;
};

///The StackAlloc class provides some fast stack-based memory allocator (LIFO last-in first-out)
class b3StackAlloc
{
public:

b3StackAlloc(unsigned int size) { ctor();create(size); }
~b3StackAlloc() { destroy(); }
inline void create(unsigned int size)
{
destroy();
data = (unsigned char*) b3AlignedAlloc(size,16);
totalsize = size;
}
inline void destroy()
{
b3Assert(usedsize==0);
//Raise(L"StackAlloc is still in use");

if(usedsize==0)
{
if(!ischild && data)
b3AlignedFree(data);

data = 0;
usedsize = 0;
}
}

int getAvailableMemory() const
{
return static_cast<int>(totalsize - usedsize);
}

unsigned char* allocate(unsigned int size)
{
const unsigned int nus(usedsize+size);
if(nus<totalsize)
{
usedsize=nus;
return(data+(usedsize-size));
}
b3Assert(0);
//&& (L"Not enough memory"));
return(0);
}
B3_FORCE_INLINE b3Block* beginBlock()
{
b3Block* pb = (b3Block*)allocate(sizeof(b3Block));
pb->previous = current;
pb->address = data+usedsize;
current = pb;
return(pb);
}
B3_FORCE_INLINE void endBlock(b3Block* block)
{
b3Assert(block==current);
//Raise(L"Unmatched blocks");
if(block==current)
{
current = block->previous;
usedsize = (unsigned int)((block->address-data)-sizeof(b3Block));
}
}

private:
void ctor()
{
data = 0;
totalsize = 0;
usedsize = 0;
current = 0;
ischild = false;
}
unsigned char* data;
unsigned int totalsize;
unsigned int usedsize;
b3Block* current;
bool ischild;
};

#endif //B3_STACK_ALLOC

+ 304
- 0
src/bullet/Bullet3Common/b3Transform.h View File

@@ -0,0 +1,304 @@
/*
Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/


#ifndef B3_TRANSFORM_H
#define B3_TRANSFORM_H


#include "b3Matrix3x3.h"

#ifdef B3_USE_DOUBLE_PRECISION
#define b3TransformData b3TransformDoubleData
#else
#define b3TransformData b3TransformFloatData
#endif




/**@brief The b3Transform class supports rigid transforms with only translation and rotation and no scaling/shear.
*It can be used in combination with b3Vector3, b3Quaternion and b3Matrix3x3 linear algebra classes. */
B3_ATTRIBUTE_ALIGNED16(class) b3Transform {
///Storage for the rotation
b3Matrix3x3 m_basis;
///Storage for the translation
b3Vector3 m_origin;

public:
/**@brief No initialization constructor */
b3Transform() {}
/**@brief Constructor from b3Quaternion (optional b3Vector3 )
* @param q Rotation from quaternion
* @param c Translation from Vector (default 0,0,0) */
explicit B3_FORCE_INLINE b3Transform(const b3Quaternion& q,
const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0)))
: m_basis(q),
m_origin(c)
{}

/**@brief Constructor from b3Matrix3x3 (optional b3Vector3)
* @param b Rotation from Matrix
* @param c Translation from Vector default (0,0,0)*/
explicit B3_FORCE_INLINE b3Transform(const b3Matrix3x3& b,
const b3Vector3& c = b3MakeVector3(b3Scalar(0), b3Scalar(0), b3Scalar(0)))
: m_basis(b),
m_origin(c)
{}
/**@brief Copy constructor */
B3_FORCE_INLINE b3Transform (const b3Transform& other)
: m_basis(other.m_basis),
m_origin(other.m_origin)
{
}
/**@brief Assignment Operator */
B3_FORCE_INLINE b3Transform& operator=(const b3Transform& other)
{
m_basis = other.m_basis;
m_origin = other.m_origin;
return *this;
}


/**@brief Set the current transform as the value of the product of two transforms
* @param t1 Transform 1
* @param t2 Transform 2
* This = Transform1 * Transform2 */
B3_FORCE_INLINE void mult(const b3Transform& t1, const b3Transform& t2) {
m_basis = t1.m_basis * t2.m_basis;
m_origin = t1(t2.m_origin);
}

/* void multInverseLeft(const b3Transform& t1, const b3Transform& t2) {
b3Vector3 v = t2.m_origin - t1.m_origin;
m_basis = b3MultTransposeLeft(t1.m_basis, t2.m_basis);
m_origin = v * t1.m_basis;
}
*/

/**@brief Return the transform of the vector */
B3_FORCE_INLINE b3Vector3 operator()(const b3Vector3& x) const
{
return x.dot3(m_basis[0], m_basis[1], m_basis[2]) + m_origin;
}

/**@brief Return the transform of the vector */
B3_FORCE_INLINE b3Vector3 operator*(const b3Vector3& x) const
{
return (*this)(x);
}

/**@brief Return the transform of the b3Quaternion */
B3_FORCE_INLINE b3Quaternion operator*(const b3Quaternion& q) const
{
return getRotation() * q;
}

/**@brief Return the basis matrix for the rotation */
B3_FORCE_INLINE b3Matrix3x3& getBasis() { return m_basis; }
/**@brief Return the basis matrix for the rotation */
B3_FORCE_INLINE const b3Matrix3x3& getBasis() const { return m_basis; }

/**@brief Return the origin vector translation */
B3_FORCE_INLINE b3Vector3& getOrigin() { return m_origin; }
/**@brief Return the origin vector translation */
B3_FORCE_INLINE const b3Vector3& getOrigin() const { return m_origin; }

/**@brief Return a quaternion representing the rotation */
b3Quaternion getRotation() const {
b3Quaternion q;
m_basis.getRotation(q);
return q;
}
/**@brief Set from an array
* @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */
void setFromOpenGLMatrix(const b3Scalar *m)
{
m_basis.setFromOpenGLSubMatrix(m);
m_origin.setValue(m[12],m[13],m[14]);
}

/**@brief Fill an array representation
* @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */
void getOpenGLMatrix(b3Scalar *m) const
{
m_basis.getOpenGLSubMatrix(m);
m[12] = m_origin.getX();
m[13] = m_origin.getY();
m[14] = m_origin.getZ();
m[15] = b3Scalar(1.0);
}

/**@brief Set the translational element
* @param origin The vector to set the translation to */
B3_FORCE_INLINE void setOrigin(const b3Vector3& origin)
{
m_origin = origin;
}

B3_FORCE_INLINE b3Vector3 invXform(const b3Vector3& inVec) const;


/**@brief Set the rotational element by b3Matrix3x3 */
B3_FORCE_INLINE void setBasis(const b3Matrix3x3& basis)
{
m_basis = basis;
}

/**@brief Set the rotational element by b3Quaternion */
B3_FORCE_INLINE void setRotation(const b3Quaternion& q)
{
m_basis.setRotation(q);
}


/**@brief Set this transformation to the identity */
void setIdentity()
{
m_basis.setIdentity();
m_origin.setValue(b3Scalar(0.0), b3Scalar(0.0), b3Scalar(0.0));
}

/**@brief Multiply this Transform by another(this = this * another)
* @param t The other transform */
b3Transform& operator*=(const b3Transform& t)
{
m_origin += m_basis * t.m_origin;
m_basis *= t.m_basis;
return *this;
}

/**@brief Return the inverse of this transform */
b3Transform inverse() const
{
b3Matrix3x3 inv = m_basis.transpose();
return b3Transform(inv, inv * -m_origin);
}

/**@brief Return the inverse of this transform times the other transform
* @param t The other transform
* return this.inverse() * the other */
b3Transform inverseTimes(const b3Transform& t) const;

/**@brief Return the product of this transform and the other */
b3Transform operator*(const b3Transform& t) const;

/**@brief Return an identity transform */
static const b3Transform& getIdentity()
{
static const b3Transform identityTransform(b3Matrix3x3::getIdentity());
return identityTransform;
}

void serialize(struct b3TransformData& dataOut) const;

void serializeFloat(struct b3TransformFloatData& dataOut) const;

void deSerialize(const struct b3TransformData& dataIn);

void deSerializeDouble(const struct b3TransformDoubleData& dataIn);

void deSerializeFloat(const struct b3TransformFloatData& dataIn);

};


B3_FORCE_INLINE b3Vector3
b3Transform::invXform(const b3Vector3& inVec) const
{
b3Vector3 v = inVec - m_origin;
return (m_basis.transpose() * v);
}

B3_FORCE_INLINE b3Transform
b3Transform::inverseTimes(const b3Transform& t) const
{
b3Vector3 v = t.getOrigin() - m_origin;
return b3Transform(m_basis.transposeTimes(t.m_basis),
v * m_basis);
}

B3_FORCE_INLINE b3Transform
b3Transform::operator*(const b3Transform& t) const
{
return b3Transform(m_basis * t.m_basis,
(*this)(t.m_origin));
}

/**@brief Test if two transforms have all elements equal */
B3_FORCE_INLINE bool operator==(const b3Transform& t1, const b3Transform& t2)
{
return ( t1.getBasis() == t2.getBasis() &&
t1.getOrigin() == t2.getOrigin() );
}


///for serialization
struct b3TransformFloatData
{
b3Matrix3x3FloatData m_basis;
b3Vector3FloatData m_origin;
};

struct b3TransformDoubleData
{
b3Matrix3x3DoubleData m_basis;
b3Vector3DoubleData m_origin;
};



B3_FORCE_INLINE void b3Transform::serialize(b3TransformData& dataOut) const
{
m_basis.serialize(dataOut.m_basis);
m_origin.serialize(dataOut.m_origin);
}

B3_FORCE_INLINE void b3Transform::serializeFloat(b3TransformFloatData& dataOut) const
{
m_basis.serializeFloat(dataOut.m_basis);
m_origin.serializeFloat(dataOut.m_origin);
}


B3_FORCE_INLINE void b3Transform::deSerialize(const b3TransformData& dataIn)
{
m_basis.deSerialize(dataIn.m_basis);
m_origin.deSerialize(dataIn.m_origin);
}

B3_FORCE_INLINE void b3Transform::deSerializeFloat(const b3TransformFloatData& dataIn)
{
m_basis.deSerializeFloat(dataIn.m_basis);
m_origin.deSerializeFloat(dataIn.m_origin);
}

B3_FORCE_INLINE void b3Transform::deSerializeDouble(const b3TransformDoubleData& dataIn)
{
m_basis.deSerializeDouble(dataIn.m_basis);
m_origin.deSerializeDouble(dataIn.m_origin);
}


#endif //B3_TRANSFORM_H







+ 228
- 0
src/bullet/Bullet3Common/b3TransformUtil.h View File

@@ -0,0 +1,228 @@
/*
Copyright (c) 2003-2013 Gino van den Bergen / Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/


#ifndef B3_TRANSFORM_UTIL_H
#define B3_TRANSFORM_UTIL_H

#include "b3Transform.h"
#define B3_ANGULAR_MOTION_THRESHOLD b3Scalar(0.5)*B3_HALF_PI




B3_FORCE_INLINE b3Vector3 b3AabbSupport(const b3Vector3& halfExtents,const b3Vector3& supportDir)
{
return b3MakeVector3(supportDir.getX() < b3Scalar(0.0) ? -halfExtents.getX() : halfExtents.getX(),
supportDir.getY() < b3Scalar(0.0) ? -halfExtents.getY() : halfExtents.getY(),
supportDir.getZ() < b3Scalar(0.0) ? -halfExtents.getZ() : halfExtents.getZ());
}






/// Utils related to temporal transforms
class b3TransformUtil
{

public:

static void integrateTransform(const b3Transform& curTrans,const b3Vector3& linvel,const b3Vector3& angvel,b3Scalar timeStep,b3Transform& predictedTransform)
{
predictedTransform.setOrigin(curTrans.getOrigin() + linvel * timeStep);
// #define QUATERNION_DERIVATIVE
#ifdef QUATERNION_DERIVATIVE
b3Quaternion predictedOrn = curTrans.getRotation();
predictedOrn += (angvel * predictedOrn) * (timeStep * b3Scalar(0.5));
predictedOrn.normalize();
#else
//Exponential map
//google for "Practical Parameterization of Rotations Using the Exponential Map", F. Sebastian Grassia

b3Vector3 axis;
b3Scalar fAngle = angvel.length();
//limit the angular motion
if (fAngle*timeStep > B3_ANGULAR_MOTION_THRESHOLD)
{
fAngle = B3_ANGULAR_MOTION_THRESHOLD / timeStep;
}

if ( fAngle < b3Scalar(0.001) )
{
// use Taylor's expansions of sync function
axis = angvel*( b3Scalar(0.5)*timeStep-(timeStep*timeStep*timeStep)*(b3Scalar(0.020833333333))*fAngle*fAngle );
}
else
{
// sync(fAngle) = sin(c*fAngle)/t
axis = angvel*( b3Sin(b3Scalar(0.5)*fAngle*timeStep)/fAngle );
}
b3Quaternion dorn (axis.getX(),axis.getY(),axis.getZ(),b3Cos( fAngle*timeStep*b3Scalar(0.5) ));
b3Quaternion orn0 = curTrans.getRotation();

b3Quaternion predictedOrn = dorn * orn0;
predictedOrn.normalize();
#endif
predictedTransform.setRotation(predictedOrn);
}

static void calculateVelocityQuaternion(const b3Vector3& pos0,const b3Vector3& pos1,const b3Quaternion& orn0,const b3Quaternion& orn1,b3Scalar timeStep,b3Vector3& linVel,b3Vector3& angVel)
{
linVel = (pos1 - pos0) / timeStep;
b3Vector3 axis;
b3Scalar angle;
if (orn0 != orn1)
{
calculateDiffAxisAngleQuaternion(orn0,orn1,axis,angle);
angVel = axis * angle / timeStep;
} else
{
angVel.setValue(0,0,0);
}
}

static void calculateDiffAxisAngleQuaternion(const b3Quaternion& orn0,const b3Quaternion& orn1a,b3Vector3& axis,b3Scalar& angle)
{
b3Quaternion orn1 = orn0.nearest(orn1a);
b3Quaternion dorn = orn1 * orn0.inverse();
angle = dorn.getAngle();
axis = b3MakeVector3(dorn.getX(),dorn.getY(),dorn.getZ());
axis[3] = b3Scalar(0.);
//check for axis length
b3Scalar len = axis.length2();
if (len < B3_EPSILON*B3_EPSILON)
axis = b3MakeVector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.));
else
axis /= b3Sqrt(len);
}

static void calculateVelocity(const b3Transform& transform0,const b3Transform& transform1,b3Scalar timeStep,b3Vector3& linVel,b3Vector3& angVel)
{
linVel = (transform1.getOrigin() - transform0.getOrigin()) / timeStep;
b3Vector3 axis;
b3Scalar angle;
calculateDiffAxisAngle(transform0,transform1,axis,angle);
angVel = axis * angle / timeStep;
}

static void calculateDiffAxisAngle(const b3Transform& transform0,const b3Transform& transform1,b3Vector3& axis,b3Scalar& angle)
{
b3Matrix3x3 dmat = transform1.getBasis() * transform0.getBasis().inverse();
b3Quaternion dorn;
dmat.getRotation(dorn);

///floating point inaccuracy can lead to w component > 1..., which breaks
dorn.normalize();
angle = dorn.getAngle();
axis = b3MakeVector3(dorn.getX(),dorn.getY(),dorn.getZ());
axis[3] = b3Scalar(0.);
//check for axis length
b3Scalar len = axis.length2();
if (len < B3_EPSILON*B3_EPSILON)
axis = b3MakeVector3(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.));
else
axis /= b3Sqrt(len);
}

};


///The b3ConvexSeparatingDistanceUtil can help speed up convex collision detection
///by conservatively updating a cached separating distance/vector instead of re-calculating the closest distance
class b3ConvexSeparatingDistanceUtil
{
b3Quaternion m_ornA;
b3Quaternion m_ornB;
b3Vector3 m_posA;
b3Vector3 m_posB;
b3Vector3 m_separatingNormal;

b3Scalar m_boundingRadiusA;
b3Scalar m_boundingRadiusB;
b3Scalar m_separatingDistance;

public:

b3ConvexSeparatingDistanceUtil(b3Scalar boundingRadiusA,b3Scalar boundingRadiusB)
:m_boundingRadiusA(boundingRadiusA),
m_boundingRadiusB(boundingRadiusB),
m_separatingDistance(0.f)
{
}

b3Scalar getConservativeSeparatingDistance()
{
return m_separatingDistance;
}

void updateSeparatingDistance(const b3Transform& transA,const b3Transform& transB)
{
const b3Vector3& toPosA = transA.getOrigin();
const b3Vector3& toPosB = transB.getOrigin();
b3Quaternion toOrnA = transA.getRotation();
b3Quaternion toOrnB = transB.getRotation();

if (m_separatingDistance>0.f)
{

b3Vector3 linVelA,angVelA,linVelB,angVelB;
b3TransformUtil::calculateVelocityQuaternion(m_posA,toPosA,m_ornA,toOrnA,b3Scalar(1.),linVelA,angVelA);
b3TransformUtil::calculateVelocityQuaternion(m_posB,toPosB,m_ornB,toOrnB,b3Scalar(1.),linVelB,angVelB);
b3Scalar maxAngularProjectedVelocity = angVelA.length() * m_boundingRadiusA + angVelB.length() * m_boundingRadiusB;
b3Vector3 relLinVel = (linVelB-linVelA);
b3Scalar relLinVelocLength = relLinVel.dot(m_separatingNormal);
if (relLinVelocLength<0.f)
{
relLinVelocLength = 0.f;
}
b3Scalar projectedMotion = maxAngularProjectedVelocity +relLinVelocLength;
m_separatingDistance -= projectedMotion;
}
m_posA = toPosA;
m_posB = toPosB;
m_ornA = toOrnA;
m_ornB = toOrnB;
}

void initSeparatingDistance(const b3Vector3& separatingVector,b3Scalar separatingDistance,const b3Transform& transA,const b3Transform& transB)
{
m_separatingDistance = separatingDistance;

if (m_separatingDistance>0.f)
{
m_separatingNormal = separatingVector;
const b3Vector3& toPosA = transA.getOrigin();
const b3Vector3& toPosB = transB.getOrigin();
b3Quaternion toOrnA = transA.getRotation();
b3Quaternion toOrnB = transB.getRotation();
m_posA = toPosA;
m_posB = toPosB;
m_ornA = toOrnA;
m_ornB = toOrnB;
}
}

};


#endif //B3_TRANSFORM_UTIL_H


+ 1631
- 0
src/bullet/Bullet3Common/b3Vector3.cpp
File diff suppressed because it is too large
View File


+ 1343
- 0
src/bullet/Bullet3Common/b3Vector3.h
File diff suppressed because it is too large
View File


+ 97
- 0
src/bullet/Bullet3Common/shared/b3Float4.h View File

@@ -0,0 +1,97 @@
#ifndef B3_FLOAT4_H
#define B3_FLOAT4_H

#include "Bullet3Common/shared/b3PlatformDefinitions.h"

#ifdef __cplusplus
#include "Bullet3Common/b3Vector3.h"
#define b3Float4 b3Vector3
#define b3Float4ConstArg const b3Vector3&
#define b3Dot3F4 b3Dot
#define b3Cross3 b3Cross
#define b3MakeFloat4 b3MakeVector3
inline b3Vector3 b3Normalized(const b3Vector3& vec)
{
return vec.normalized();
}

inline b3Float4 b3FastNormalized3(b3Float4ConstArg v)
{
return v.normalized();
}

inline b3Float4 b3MaxFloat4 (const b3Float4& a, const b3Float4& b)
{
b3Float4 tmp = a;
tmp.setMax(b);
return tmp;
}
inline b3Float4 b3MinFloat4 (const b3Float4& a, const b3Float4& b)
{
b3Float4 tmp = a;
tmp.setMin(b);
return tmp;
}



#else
typedef float4 b3Float4;
#define b3Float4ConstArg const b3Float4
#define b3MakeFloat4 (float4)
float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)
{
float4 a1 = b3MakeFloat4(v0.xyz,0.f);
float4 b1 = b3MakeFloat4(v1.xyz,0.f);
return dot(a1, b1);
}
b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)
{
float4 a1 = b3MakeFloat4(v0.xyz,0.f);
float4 b1 = b3MakeFloat4(v1.xyz,0.f);
return cross(a1, b1);
}
#define b3MinFloat4 min
#define b3MaxFloat4 max

#define b3Normalized(a) normalize(a)

#endif


inline bool b3IsAlmostZero(b3Float4ConstArg v)
{
if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)
return false;
return true;
}


inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )
{
float maxDot = -B3_INFINITY;
int i = 0;
int ptIndex = -1;
for( i = 0; i < vecLen; i++ )
{
float dot = b3Dot3F4(vecArray[i],vec);
if( dot > maxDot )
{
maxDot = dot;
ptIndex = i;
}
}
b3Assert(ptIndex>=0);
if (ptIndex<0)
{
ptIndex = 0;
}
*dotOut = maxDot;
return ptIndex;
}



#endif //B3_FLOAT4_H

+ 64
- 0
src/bullet/Bullet3Common/shared/b3Int2.h View File

@@ -0,0 +1,64 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_INT2_H
#define B3_INT2_H

#ifdef __cplusplus

struct b3UnsignedInt2
{
union
{
struct
{
unsigned int x,y;
};
struct
{
unsigned int s[2];
};
};
};

struct b3Int2
{
union
{
struct
{
int x,y;
};
struct
{
int s[2];
};
};
};

inline b3Int2 b3MakeInt2(int x, int y)
{
b3Int2 v;
v.s[0] = x; v.s[1] = y;
return v;
}
#else

#define b3UnsignedInt2 uint2
#define b3Int2 int2
#define b3MakeInt2 (int2)

#endif //__cplusplus
#endif

+ 68
- 0
src/bullet/Bullet3Common/shared/b3Int4.h View File

@@ -0,0 +1,68 @@
#ifndef B3_INT4_H
#define B3_INT4_H

#ifdef __cplusplus

#include "Bullet3Common/b3Scalar.h"


B3_ATTRIBUTE_ALIGNED16(struct) b3UnsignedInt4
{
B3_DECLARE_ALIGNED_ALLOCATOR();

union
{
struct
{
unsigned int x,y,z,w;
};
struct
{
unsigned int s[4];
};
};
};

B3_ATTRIBUTE_ALIGNED16(struct) b3Int4
{
B3_DECLARE_ALIGNED_ALLOCATOR();

union
{
struct
{
int x,y,z,w;
};
struct
{
int s[4];
};
};
};

B3_FORCE_INLINE b3Int4 b3MakeInt4(int x, int y, int z, int w = 0)
{
b3Int4 v;
v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
return v;
}

B3_FORCE_INLINE b3UnsignedInt4 b3MakeUnsignedInt4(unsigned int x, unsigned int y, unsigned int z, unsigned int w = 0)
{
b3UnsignedInt4 v;
v.s[0] = x; v.s[1] = y; v.s[2] = z; v.s[3] = w;
return v;
}

#else


#define b3UnsignedInt4 uint4
#define b3Int4 int4
#define b3MakeInt4 (int4)
#define b3MakeUnsignedInt4 (uint4)


#endif //__cplusplus

#endif //B3_INT4_H

+ 179
- 0
src/bullet/Bullet3Common/shared/b3Mat3x3.h View File

@@ -0,0 +1,179 @@

#ifndef B3_MAT3x3_H
#define B3_MAT3x3_H

#include "Bullet3Common/shared/b3Quat.h"


#ifdef __cplusplus

#include "Bullet3Common/b3Matrix3x3.h"

#define b3Mat3x3 b3Matrix3x3
#define b3Mat3x3ConstArg const b3Matrix3x3&

inline b3Mat3x3 b3QuatGetRotationMatrix(b3QuatConstArg quat)
{
return b3Mat3x3(quat);
}

inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg mat)
{
return mat.absolute();
}

#define b3GetRow(m,row) m.getRow(row)

__inline
b3Float4 mtMul3(b3Float4ConstArg a, b3Mat3x3ConstArg b)
{
return b*a;
}


#else

typedef struct
{
b3Float4 m_row[3];
}b3Mat3x3;

#define b3Mat3x3ConstArg const b3Mat3x3
#define b3GetRow(m,row) (m.m_row[row])

inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)
{
b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);
b3Mat3x3 out;

out.m_row[0].x=1-2*quat2.y-2*quat2.z;
out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;
out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;
out.m_row[0].w = 0.f;

out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;
out.m_row[1].y=1-2*quat2.x-2*quat2.z;
out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;
out.m_row[1].w = 0.f;

out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;
out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;
out.m_row[2].z=1-2*quat2.x-2*quat2.y;
out.m_row[2].w = 0.f;

return out;
}

inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)
{
b3Mat3x3 out;
out.m_row[0] = fabs(matIn.m_row[0]);
out.m_row[1] = fabs(matIn.m_row[1]);
out.m_row[2] = fabs(matIn.m_row[2]);
return out;
}


__inline
b3Mat3x3 mtZero();

__inline
b3Mat3x3 mtIdentity();

__inline
b3Mat3x3 mtTranspose(b3Mat3x3 m);

__inline
b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);

__inline
b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);

__inline
b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);

__inline
b3Mat3x3 mtZero()
{
b3Mat3x3 m;
m.m_row[0] = (b3Float4)(0.f);
m.m_row[1] = (b3Float4)(0.f);
m.m_row[2] = (b3Float4)(0.f);
return m;
}

__inline
b3Mat3x3 mtIdentity()
{
b3Mat3x3 m;
m.m_row[0] = (b3Float4)(1,0,0,0);
m.m_row[1] = (b3Float4)(0,1,0,0);
m.m_row[2] = (b3Float4)(0,0,1,0);
return m;
}

__inline
b3Mat3x3 mtTranspose(b3Mat3x3 m)
{
b3Mat3x3 out;
out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
return out;
}

__inline
b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)
{
b3Mat3x3 transB;
transB = mtTranspose( b );
b3Mat3x3 ans;
// why this doesn't run when 0ing in the for{}
a.m_row[0].w = 0.f;
a.m_row[1].w = 0.f;
a.m_row[2].w = 0.f;
for(int i=0; i<3; i++)
{
// a.m_row[i].w = 0.f;
ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);
ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);
ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);
ans.m_row[i].w = 0.f;
}
return ans;
}

__inline
b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)
{
b3Float4 ans;
ans.x = b3Dot3F4( a.m_row[0], b );
ans.y = b3Dot3F4( a.m_row[1], b );
ans.z = b3Dot3F4( a.m_row[2], b );
ans.w = 0.f;
return ans;
}

__inline
b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)
{
b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);
b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);
b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);

b3Float4 ans;
ans.x = b3Dot3F4( a, colx );
ans.y = b3Dot3F4( a, coly );
ans.z = b3Dot3F4( a, colz );
return ans;
}


#endif






#endif //B3_MAT3x3_H

+ 41
- 0
src/bullet/Bullet3Common/shared/b3PlatformDefinitions.h View File

@@ -0,0 +1,41 @@
#ifndef B3_PLATFORM_DEFINITIONS_H
#define B3_PLATFORM_DEFINITIONS_H

struct MyTest
{
int bla;
};

#ifdef __cplusplus
//#define b3ConstArray(a) const b3AlignedObjectArray<a>&
#define b3ConstArray(a) const a*
#define b3AtomicInc(a) ((*a)++)

inline int b3AtomicAdd (volatile int *p, int val)
{
int oldValue = *p;
int newValue = oldValue+val;
*p = newValue;
return oldValue;
}

#define __global

#define B3_STATIC static
#else
//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX
#define B3_LARGE_FLOAT 1e18f
#define B3_INFINITY 1e18f
#define b3Assert(a)
#define b3ConstArray(a) __global const a*
#define b3AtomicInc atomic_inc
#define b3AtomicAdd atomic_add
#define b3Fabs fabs
#define b3Sqrt native_sqrt
#define b3Sin native_sin
#define b3Cos native_cos

#define B3_STATIC
#endif

#endif

+ 103
- 0
src/bullet/Bullet3Common/shared/b3Quat.h View File

@@ -0,0 +1,103 @@
#ifndef B3_QUAT_H
#define B3_QUAT_H

#include "Bullet3Common/shared/b3PlatformDefinitions.h"
#include "Bullet3Common/shared/b3Float4.h"

#ifdef __cplusplus
#include "Bullet3Common/b3Quaternion.h"
#include "Bullet3Common/b3Transform.h"

#define b3Quat b3Quaternion
#define b3QuatConstArg const b3Quaternion&
inline b3Quat b3QuatInverse(b3QuatConstArg orn)
{
return orn.inverse();
}

inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)
{
b3Transform tr;
tr.setOrigin(translation);
tr.setRotation(orientation);
return tr(point);
}

#else
typedef float4 b3Quat;
#define b3QuatConstArg const b3Quat
inline float4 b3FastNormalize4(float4 v)
{
v = (float4)(v.xyz,0.f);
return fast_normalize(v);
}
inline b3Quat b3QuatMul(b3Quat a, b3Quat b);
inline b3Quat b3QuatNormalized(b3QuatConstArg in);
inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);
inline b3Quat b3QuatInvert(b3QuatConstArg q);
inline b3Quat b3QuatInverse(b3QuatConstArg q);

inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)
{
b3Quat ans;
ans = b3Cross3( a, b );
ans += a.w*b+b.w*a;
// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
ans.w = a.w*b.w - b3Dot3F4(a, b);
return ans;
}

inline b3Quat b3QuatNormalized(b3QuatConstArg in)
{
b3Quat q;
q=in;
//return b3FastNormalize4(in);
float len = native_sqrt(dot(q, q));
if(len > 0.f)
{
q *= 1.f / len;
}
else
{
q.x = q.y = q.z = 0.f;
q.w = 1.f;
}
return q;
}
inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)
{
b3Quat qInv = b3QuatInvert( q );
float4 vcpy = vec;
vcpy.w = 0.f;
float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);
return out;
}



inline b3Quat b3QuatInverse(b3QuatConstArg q)
{
return (b3Quat)(-q.xyz, q.w);
}

inline b3Quat b3QuatInvert(b3QuatConstArg q)
{
return (b3Quat)(-q.xyz, q.w);
}

inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)
{
return b3QuatRotate( b3QuatInvert( q ), vec );
}

inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)
{
return b3QuatRotate( orientation, point ) + (translation);
}
#endif

#endif //B3_QUAT_H

+ 159
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h View File

@@ -0,0 +1,159 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_CONTACT_SOLVER_INFO
#define B3_CONTACT_SOLVER_INFO

#include "Bullet3Common/b3Scalar.h"

enum b3SolverMode
{
B3_SOLVER_RANDMIZE_ORDER = 1,
B3_SOLVER_FRICTION_SEPARATE = 2,
B3_SOLVER_USE_WARMSTARTING = 4,
B3_SOLVER_USE_2_FRICTION_DIRECTIONS = 16,
B3_SOLVER_ENABLE_FRICTION_DIRECTION_CACHING = 32,
B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION = 64,
B3_SOLVER_CACHE_FRIENDLY = 128,
B3_SOLVER_SIMD = 256,
B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS = 512,
B3_SOLVER_ALLOW_ZERO_LENGTH_FRICTION_DIRECTIONS = 1024
};

struct b3ContactSolverInfoData
{

b3Scalar m_tau;
b3Scalar m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
b3Scalar m_friction;
b3Scalar m_timeStep;
b3Scalar m_restitution;
int m_numIterations;
b3Scalar m_maxErrorReduction;
b3Scalar m_sor;
b3Scalar m_erp;//used as Baumgarte factor
b3Scalar m_erp2;//used in Split Impulse
b3Scalar m_globalCfm;//constraint force mixing
int m_splitImpulse;
b3Scalar m_splitImpulsePenetrationThreshold;
b3Scalar m_splitImpulseTurnErp;
b3Scalar m_linearSlop;
b3Scalar m_warmstartingFactor;

int m_solverMode;
int m_restingContactRestitutionThreshold;
int m_minimumSolverBatchSize;
b3Scalar m_maxGyroscopicForce;
b3Scalar m_singleAxisRollingFrictionThreshold;


};

struct b3ContactSolverInfo : public b3ContactSolverInfoData
{


inline b3ContactSolverInfo()
{
m_tau = b3Scalar(0.6);
m_damping = b3Scalar(1.0);
m_friction = b3Scalar(0.3);
m_timeStep = b3Scalar(1.f/60.f);
m_restitution = b3Scalar(0.);
m_maxErrorReduction = b3Scalar(20.);
m_numIterations = 10;
m_erp = b3Scalar(0.2);
m_erp2 = b3Scalar(0.8);
m_globalCfm = b3Scalar(0.);
m_sor = b3Scalar(1.);
m_splitImpulse = true;
m_splitImpulsePenetrationThreshold = -.04f;
m_splitImpulseTurnErp = 0.1f;
m_linearSlop = b3Scalar(0.0);
m_warmstartingFactor=b3Scalar(0.85);
//m_solverMode = B3_SOLVER_USE_WARMSTARTING | B3_SOLVER_SIMD | B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION|B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_ENABLE_FRICTION_DIRECTION_CACHING;// | B3_SOLVER_RANDMIZE_ORDER;
m_solverMode = B3_SOLVER_USE_WARMSTARTING | B3_SOLVER_SIMD;// | B3_SOLVER_RANDMIZE_ORDER;
m_restingContactRestitutionThreshold = 2;//unused as of 2.81
m_minimumSolverBatchSize = 128; //try to combine islands until the amount of constraints reaches this limit
m_maxGyroscopicForce = 100.f; ///only used to clamp forces for bodies that have their B3_ENABLE_GYROPSCOPIC_FORCE flag set (using b3RigidBody::setFlag)
m_singleAxisRollingFrictionThreshold = 1e30f;///if the velocity is above this threshold, it will use a single constraint row (axis), otherwise 3 rows.
}
};

///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
struct b3ContactSolverInfoDoubleData
{
double m_tau;
double m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
double m_friction;
double m_timeStep;
double m_restitution;
double m_maxErrorReduction;
double m_sor;
double m_erp;//used as Baumgarte factor
double m_erp2;//used in Split Impulse
double m_globalCfm;//constraint force mixing
double m_splitImpulsePenetrationThreshold;
double m_splitImpulseTurnErp;
double m_linearSlop;
double m_warmstartingFactor;
double m_maxGyroscopicForce;
double m_singleAxisRollingFrictionThreshold;

int m_numIterations;
int m_solverMode;
int m_restingContactRestitutionThreshold;
int m_minimumSolverBatchSize;
int m_splitImpulse;
char m_padding[4];

};
///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
struct b3ContactSolverInfoFloatData
{
float m_tau;
float m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
float m_friction;
float m_timeStep;

float m_restitution;
float m_maxErrorReduction;
float m_sor;
float m_erp;//used as Baumgarte factor

float m_erp2;//used in Split Impulse
float m_globalCfm;//constraint force mixing
float m_splitImpulsePenetrationThreshold;
float m_splitImpulseTurnErp;

float m_linearSlop;
float m_warmstartingFactor;
float m_maxGyroscopicForce;
float m_singleAxisRollingFrictionThreshold;

int m_numIterations;
int m_solverMode;
int m_restingContactRestitutionThreshold;
int m_minimumSolverBatchSize;

int m_splitImpulse;
char m_padding[4];
};



#endif //B3_CONTACT_SOLVER_INFO

+ 108
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.cpp View File

@@ -0,0 +1,108 @@

#include "b3FixedConstraint.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
#include "Bullet3Common/b3TransformUtil.h"
#include <new>


b3FixedConstraint::b3FixedConstraint(int rbA,int rbB, const b3Transform& frameInA,const b3Transform& frameInB)
:b3TypedConstraint(B3_FIXED_CONSTRAINT_TYPE,rbA,rbB)
{
m_pivotInA = frameInA.getOrigin();
m_pivotInB = frameInB.getOrigin();
m_relTargetAB = frameInA.getRotation()*frameInB.getRotation().inverse();

}

b3FixedConstraint::~b3FixedConstraint ()
{
}

void b3FixedConstraint::getInfo1 (b3ConstraintInfo1* info,const b3RigidBodyData* bodies)
{
info->m_numConstraintRows = 6;
info->nub = 6;
}

void b3FixedConstraint::getInfo2 (b3ConstraintInfo2* info, const b3RigidBodyData* bodies)
{
//fix the 3 linear degrees of freedom

const b3Vector3& worldPosA = bodies[m_rbA].m_pos;
const b3Quaternion& worldOrnA = bodies[m_rbA].m_quat;
const b3Vector3& worldPosB= bodies[m_rbB].m_pos;
const b3Quaternion& worldOrnB = bodies[m_rbB].m_quat;

info->m_J1linearAxis[0] = 1;
info->m_J1linearAxis[info->rowskip+1] = 1;
info->m_J1linearAxis[2*info->rowskip+2] = 1;

b3Vector3 a1 = b3QuatRotate(worldOrnA,m_pivotInA);
{
b3Vector3* angular0 = (b3Vector3*)(info->m_J1angularAxis);
b3Vector3* angular1 = (b3Vector3*)(info->m_J1angularAxis+info->rowskip);
b3Vector3* angular2 = (b3Vector3*)(info->m_J1angularAxis+2*info->rowskip);
b3Vector3 a1neg = -a1;
a1neg.getSkewSymmetricMatrix(angular0,angular1,angular2);
}
if (info->m_J2linearAxis)
{
info->m_J2linearAxis[0] = -1;
info->m_J2linearAxis[info->rowskip+1] = -1;
info->m_J2linearAxis[2*info->rowskip+2] = -1;
}
b3Vector3 a2 = b3QuatRotate(worldOrnB,m_pivotInB);
{
// b3Vector3 a2n = -a2;
b3Vector3* angular0 = (b3Vector3*)(info->m_J2angularAxis);
b3Vector3* angular1 = (b3Vector3*)(info->m_J2angularAxis+info->rowskip);
b3Vector3* angular2 = (b3Vector3*)(info->m_J2angularAxis+2*info->rowskip);
a2.getSkewSymmetricMatrix(angular0,angular1,angular2);
}

// set right hand side for the linear dofs
b3Scalar k = info->fps * info->erp;
b3Vector3 linearError = k*(a2+worldPosB-a1-worldPosA);
int j;
for (j=0; j<3; j++)
{
info->m_constraintError[j*info->rowskip] = linearError[j];
//printf("info->m_constraintError[%d]=%f\n",j,info->m_constraintError[j]);
}

//fix the 3 angular degrees of freedom

int start_row = 3;
int s = info->rowskip;
int start_index = start_row * s;

// 3 rows to make body rotations equal
info->m_J1angularAxis[start_index] = 1;
info->m_J1angularAxis[start_index + s + 1] = 1;
info->m_J1angularAxis[start_index + s*2+2] = 1;
if ( info->m_J2angularAxis)
{
info->m_J2angularAxis[start_index] = -1;
info->m_J2angularAxis[start_index + s+1] = -1;
info->m_J2angularAxis[start_index + s*2+2] = -1;
}


// set right hand side for the angular dofs

b3Vector3 diff;
b3Scalar angle;
b3Quaternion qrelCur = worldOrnA *worldOrnB.inverse();

b3TransformUtil::calculateDiffAxisAngleQuaternion(m_relTargetAB,qrelCur,diff,angle);
diff*=-angle;
for (j=0; j<3; j++)
{
info->m_constraintError[(3+j)*info->rowskip] = k * diff[j];
}

}

+ 35
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3FixedConstraint.h View File

@@ -0,0 +1,35 @@

#ifndef B3_FIXED_CONSTRAINT_H
#define B3_FIXED_CONSTRAINT_H

#include "b3TypedConstraint.h"

B3_ATTRIBUTE_ALIGNED16(class) b3FixedConstraint : public b3TypedConstraint
{
b3Vector3 m_pivotInA;
b3Vector3 m_pivotInB;
b3Quaternion m_relTargetAB;

public:
b3FixedConstraint(int rbA,int rbB, const b3Transform& frameInA,const b3Transform& frameInB);
virtual ~b3FixedConstraint();

virtual void getInfo1 (b3ConstraintInfo1* info,const b3RigidBodyData* bodies);

virtual void getInfo2 (b3ConstraintInfo2* info, const b3RigidBodyData* bodies);

virtual void setParam(int num, b3Scalar value, int axis = -1)
{
b3Assert(0);
}
virtual b3Scalar getParam(int num, int axis = -1) const
{
b3Assert(0);
return 0.f;
}

};

#endif //B3_FIXED_CONSTRAINT_H

+ 807
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.cpp View File

@@ -0,0 +1,807 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
/*
2007-09-09
Refactored by Francisco Le?n
email: projectileman@yahoo.com
http://gimpact.sf.net
*/

#include "b3Generic6DofConstraint.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"

#include "Bullet3Common/b3TransformUtil.h"
#include "Bullet3Common/b3TransformUtil.h"
#include <new>



#define D6_USE_OBSOLETE_METHOD false
#define D6_USE_FRAME_OFFSET true






b3Generic6DofConstraint::b3Generic6DofConstraint(int rbA,int rbB, const b3Transform& frameInA, const b3Transform& frameInB, bool useLinearReferenceFrameA, const b3RigidBodyData* bodies)
: b3TypedConstraint(B3_D6_CONSTRAINT_TYPE, rbA, rbB)
, m_frameInA(frameInA)
, m_frameInB(frameInB),
m_useLinearReferenceFrameA(useLinearReferenceFrameA),
m_useOffsetForConstraintFrame(D6_USE_FRAME_OFFSET),
m_flags(0)
{
calculateTransforms(bodies);
}






#define GENERIC_D6_DISABLE_WARMSTARTING 1



b3Scalar btGetMatrixElem(const b3Matrix3x3& mat, int index);
b3Scalar btGetMatrixElem(const b3Matrix3x3& mat, int index)
{
int i = index%3;
int j = index/3;
return mat[i][j];
}



///MatrixToEulerXYZ from http://www.geometrictools.com/LibFoundation/Mathematics/Wm4Matrix3.inl.html
bool matrixToEulerXYZ(const b3Matrix3x3& mat,b3Vector3& xyz);
bool matrixToEulerXYZ(const b3Matrix3x3& mat,b3Vector3& xyz)
{
// // rot = cy*cz -cy*sz sy
// // cz*sx*sy+cx*sz cx*cz-sx*sy*sz -cy*sx
// // -cx*cz*sy+sx*sz cz*sx+cx*sy*sz cx*cy
//

b3Scalar fi = btGetMatrixElem(mat,2);
if (fi < b3Scalar(1.0f))
{
if (fi > b3Scalar(-1.0f))
{
xyz[0] = b3Atan2(-btGetMatrixElem(mat,5),btGetMatrixElem(mat,8));
xyz[1] = b3Asin(btGetMatrixElem(mat,2));
xyz[2] = b3Atan2(-btGetMatrixElem(mat,1),btGetMatrixElem(mat,0));
return true;
}
else
{
// WARNING. Not unique. XA - ZA = -atan2(r10,r11)
xyz[0] = -b3Atan2(btGetMatrixElem(mat,3),btGetMatrixElem(mat,4));
xyz[1] = -B3_HALF_PI;
xyz[2] = b3Scalar(0.0);
return false;
}
}
else
{
// WARNING. Not unique. XAngle + ZAngle = atan2(r10,r11)
xyz[0] = b3Atan2(btGetMatrixElem(mat,3),btGetMatrixElem(mat,4));
xyz[1] = B3_HALF_PI;
xyz[2] = 0.0;
}
return false;
}

//////////////////////////// b3RotationalLimitMotor ////////////////////////////////////

int b3RotationalLimitMotor::testLimitValue(b3Scalar test_value)
{
if(m_loLimit>m_hiLimit)
{
m_currentLimit = 0;//Free from violation
return 0;
}
if (test_value < m_loLimit)
{
m_currentLimit = 1;//low limit violation
m_currentLimitError = test_value - m_loLimit;
if(m_currentLimitError>B3_PI)
m_currentLimitError-=B3_2_PI;
else if(m_currentLimitError<-B3_PI)
m_currentLimitError+=B3_2_PI;
return 1;
}
else if (test_value> m_hiLimit)
{
m_currentLimit = 2;//High limit violation
m_currentLimitError = test_value - m_hiLimit;
if(m_currentLimitError>B3_PI)
m_currentLimitError-=B3_2_PI;
else if(m_currentLimitError<-B3_PI)
m_currentLimitError+=B3_2_PI;
return 2;
};

m_currentLimit = 0;//Free from violation
return 0;

}




//////////////////////////// End b3RotationalLimitMotor ////////////////////////////////////




//////////////////////////// b3TranslationalLimitMotor ////////////////////////////////////


int b3TranslationalLimitMotor::testLimitValue(int limitIndex, b3Scalar test_value)
{
b3Scalar loLimit = m_lowerLimit[limitIndex];
b3Scalar hiLimit = m_upperLimit[limitIndex];
if(loLimit > hiLimit)
{
m_currentLimit[limitIndex] = 0;//Free from violation
m_currentLimitError[limitIndex] = b3Scalar(0.f);
return 0;
}

if (test_value < loLimit)
{
m_currentLimit[limitIndex] = 2;//low limit violation
m_currentLimitError[limitIndex] = test_value - loLimit;
return 2;
}
else if (test_value> hiLimit)
{
m_currentLimit[limitIndex] = 1;//High limit violation
m_currentLimitError[limitIndex] = test_value - hiLimit;
return 1;
};

m_currentLimit[limitIndex] = 0;//Free from violation
m_currentLimitError[limitIndex] = b3Scalar(0.f);
return 0;
}



//////////////////////////// b3TranslationalLimitMotor ////////////////////////////////////

void b3Generic6DofConstraint::calculateAngleInfo()
{
b3Matrix3x3 relative_frame = m_calculatedTransformA.getBasis().inverse()*m_calculatedTransformB.getBasis();
matrixToEulerXYZ(relative_frame,m_calculatedAxisAngleDiff);
// in euler angle mode we do not actually constrain the angular velocity
// along the axes axis[0] and axis[2] (although we do use axis[1]) :
//
// to get constrain w2-w1 along ...not
// ------ --------------------- ------
// d(angle[0])/dt = 0 ax[1] x ax[2] ax[0]
// d(angle[1])/dt = 0 ax[1]
// d(angle[2])/dt = 0 ax[0] x ax[1] ax[2]
//
// constraining w2-w1 along an axis 'a' means that a'*(w2-w1)=0.
// to prove the result for angle[0], write the expression for angle[0] from
// GetInfo1 then take the derivative. to prove this for angle[2] it is
// easier to take the euler rate expression for d(angle[2])/dt with respect
// to the components of w and set that to 0.
b3Vector3 axis0 = m_calculatedTransformB.getBasis().getColumn(0);
b3Vector3 axis2 = m_calculatedTransformA.getBasis().getColumn(2);

m_calculatedAxis[1] = axis2.cross(axis0);
m_calculatedAxis[0] = m_calculatedAxis[1].cross(axis2);
m_calculatedAxis[2] = axis0.cross(m_calculatedAxis[1]);

m_calculatedAxis[0].normalize();
m_calculatedAxis[1].normalize();
m_calculatedAxis[2].normalize();

}

static b3Transform getCenterOfMassTransform(const b3RigidBodyData& body)
{
b3Transform tr(body.m_quat,body.m_pos);
return tr;
}

void b3Generic6DofConstraint::calculateTransforms(const b3RigidBodyData* bodies)
{
b3Transform transA;
b3Transform transB;
transA = getCenterOfMassTransform(bodies[m_rbA]);
transB = getCenterOfMassTransform(bodies[m_rbB]);
calculateTransforms(transA,transB,bodies);
}

void b3Generic6DofConstraint::calculateTransforms(const b3Transform& transA,const b3Transform& transB,const b3RigidBodyData* bodies)
{
m_calculatedTransformA = transA * m_frameInA;
m_calculatedTransformB = transB * m_frameInB;
calculateLinearInfo();
calculateAngleInfo();
if(m_useOffsetForConstraintFrame)
{ // get weight factors depending on masses
b3Scalar miA = bodies[m_rbA].m_invMass;
b3Scalar miB = bodies[m_rbB].m_invMass;
m_hasStaticBody = (miA < B3_EPSILON) || (miB < B3_EPSILON);
b3Scalar miS = miA + miB;
if(miS > b3Scalar(0.f))
{
m_factA = miB / miS;
}
else
{
m_factA = b3Scalar(0.5f);
}
m_factB = b3Scalar(1.0f) - m_factA;
}
}







bool b3Generic6DofConstraint::testAngularLimitMotor(int axis_index)
{
b3Scalar angle = m_calculatedAxisAngleDiff[axis_index];
angle = b3AdjustAngleToLimits(angle, m_angularLimits[axis_index].m_loLimit, m_angularLimits[axis_index].m_hiLimit);
m_angularLimits[axis_index].m_currentPosition = angle;
//test limits
m_angularLimits[axis_index].testLimitValue(angle);
return m_angularLimits[axis_index].needApplyTorques();
}




void b3Generic6DofConstraint::getInfo1 (b3ConstraintInfo1* info,const b3RigidBodyData* bodies)
{
//prepare constraint
calculateTransforms(getCenterOfMassTransform(bodies[m_rbA]),getCenterOfMassTransform(bodies[m_rbB]),bodies);
info->m_numConstraintRows = 0;
info->nub = 6;
int i;
//test linear limits
for(i = 0; i < 3; i++)
{
if(m_linearLimits.needApplyForce(i))
{
info->m_numConstraintRows++;
info->nub--;
}
}
//test angular limits
for (i=0;i<3 ;i++ )
{
if(testAngularLimitMotor(i))
{
info->m_numConstraintRows++;
info->nub--;
}
}
// printf("info->m_numConstraintRows=%d\n",info->m_numConstraintRows);
}

void b3Generic6DofConstraint::getInfo1NonVirtual (b3ConstraintInfo1* info,const b3RigidBodyData* bodies)
{
//pre-allocate all 6
info->m_numConstraintRows = 6;
info->nub = 0;
}


void b3Generic6DofConstraint::getInfo2 (b3ConstraintInfo2* info,const b3RigidBodyData* bodies)
{

b3Transform transA = getCenterOfMassTransform(bodies[m_rbA]);
b3Transform transB = getCenterOfMassTransform(bodies[m_rbB]);
const b3Vector3& linVelA = bodies[m_rbA].m_linVel;
const b3Vector3& linVelB = bodies[m_rbB].m_linVel;
const b3Vector3& angVelA = bodies[m_rbA].m_angVel;
const b3Vector3& angVelB = bodies[m_rbB].m_angVel;

if(m_useOffsetForConstraintFrame)
{ // for stability better to solve angular limits first
int row = setAngularLimits(info, 0,transA,transB,linVelA,linVelB,angVelA,angVelB);
setLinearLimits(info, row, transA,transB,linVelA,linVelB,angVelA,angVelB);
}
else
{ // leave old version for compatibility
int row = setLinearLimits(info, 0, transA,transB,linVelA,linVelB,angVelA,angVelB);
setAngularLimits(info, row,transA,transB,linVelA,linVelB,angVelA,angVelB);
}

}


void b3Generic6DofConstraint::getInfo2NonVirtual (b3ConstraintInfo2* info, const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB,const b3RigidBodyData* bodies)
{
//prepare constraint
calculateTransforms(transA,transB,bodies);

int i;
for (i=0;i<3 ;i++ )
{
testAngularLimitMotor(i);
}

if(m_useOffsetForConstraintFrame)
{ // for stability better to solve angular limits first
int row = setAngularLimits(info, 0,transA,transB,linVelA,linVelB,angVelA,angVelB);
setLinearLimits(info, row, transA,transB,linVelA,linVelB,angVelA,angVelB);
}
else
{ // leave old version for compatibility
int row = setLinearLimits(info, 0, transA,transB,linVelA,linVelB,angVelA,angVelB);
setAngularLimits(info, row,transA,transB,linVelA,linVelB,angVelA,angVelB);
}
}



int b3Generic6DofConstraint::setLinearLimits(b3ConstraintInfo2* info, int row, const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB)
{
// int row = 0;
//solve linear limits
b3RotationalLimitMotor limot;
for (int i=0;i<3 ;i++ )
{
if(m_linearLimits.needApplyForce(i))
{ // re-use rotational motor code
limot.m_bounce = b3Scalar(0.f);
limot.m_currentLimit = m_linearLimits.m_currentLimit[i];
limot.m_currentPosition = m_linearLimits.m_currentLinearDiff[i];
limot.m_currentLimitError = m_linearLimits.m_currentLimitError[i];
limot.m_damping = m_linearLimits.m_damping;
limot.m_enableMotor = m_linearLimits.m_enableMotor[i];
limot.m_hiLimit = m_linearLimits.m_upperLimit[i];
limot.m_limitSoftness = m_linearLimits.m_limitSoftness;
limot.m_loLimit = m_linearLimits.m_lowerLimit[i];
limot.m_maxLimitForce = b3Scalar(0.f);
limot.m_maxMotorForce = m_linearLimits.m_maxMotorForce[i];
limot.m_targetVelocity = m_linearLimits.m_targetVelocity[i];
b3Vector3 axis = m_calculatedTransformA.getBasis().getColumn(i);
int flags = m_flags >> (i * B3_6DOF_FLAGS_AXIS_SHIFT);
limot.m_normalCFM = (flags & B3_6DOF_FLAGS_CFM_NORM) ? m_linearLimits.m_normalCFM[i] : info->cfm[0];
limot.m_stopCFM = (flags & B3_6DOF_FLAGS_CFM_STOP) ? m_linearLimits.m_stopCFM[i] : info->cfm[0];
limot.m_stopERP = (flags & B3_6DOF_FLAGS_ERP_STOP) ? m_linearLimits.m_stopERP[i] : info->erp;
if(m_useOffsetForConstraintFrame)
{
int indx1 = (i + 1) % 3;
int indx2 = (i + 2) % 3;
int rotAllowed = 1; // rotations around orthos to current axis
if(m_angularLimits[indx1].m_currentLimit && m_angularLimits[indx2].m_currentLimit)
{
rotAllowed = 0;
}
row += get_limit_motor_info2(&limot, transA,transB,linVelA,linVelB,angVelA,angVelB, info, row, axis, 0, rotAllowed);
}
else
{
row += get_limit_motor_info2(&limot, transA,transB,linVelA,linVelB,angVelA,angVelB, info, row, axis, 0);
}
}
}
return row;
}



int b3Generic6DofConstraint::setAngularLimits(b3ConstraintInfo2 *info, int row_offset, const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB)
{
b3Generic6DofConstraint * d6constraint = this;
int row = row_offset;
//solve angular limits
for (int i=0;i<3 ;i++ )
{
if(d6constraint->getRotationalLimitMotor(i)->needApplyTorques())
{
b3Vector3 axis = d6constraint->getAxis(i);
int flags = m_flags >> ((i + 3) * B3_6DOF_FLAGS_AXIS_SHIFT);
if(!(flags & B3_6DOF_FLAGS_CFM_NORM))
{
m_angularLimits[i].m_normalCFM = info->cfm[0];
}
if(!(flags & B3_6DOF_FLAGS_CFM_STOP))
{
m_angularLimits[i].m_stopCFM = info->cfm[0];
}
if(!(flags & B3_6DOF_FLAGS_ERP_STOP))
{
m_angularLimits[i].m_stopERP = info->erp;
}
row += get_limit_motor_info2(d6constraint->getRotationalLimitMotor(i),
transA,transB,linVelA,linVelB,angVelA,angVelB, info,row,axis,1);
}
}

return row;
}




void b3Generic6DofConstraint::updateRHS(b3Scalar timeStep)
{
(void)timeStep;

}


void b3Generic6DofConstraint::setFrames(const b3Transform& frameA, const b3Transform& frameB,const b3RigidBodyData* bodies)
{
m_frameInA = frameA;
m_frameInB = frameB;

calculateTransforms(bodies);
}



b3Vector3 b3Generic6DofConstraint::getAxis(int axis_index) const
{
return m_calculatedAxis[axis_index];
}


b3Scalar b3Generic6DofConstraint::getRelativePivotPosition(int axisIndex) const
{
return m_calculatedLinearDiff[axisIndex];
}


b3Scalar b3Generic6DofConstraint::getAngle(int axisIndex) const
{
return m_calculatedAxisAngleDiff[axisIndex];
}



void b3Generic6DofConstraint::calcAnchorPos(const b3RigidBodyData* bodies)
{
b3Scalar imA = bodies[m_rbA].m_invMass;
b3Scalar imB = bodies[m_rbB].m_invMass;
b3Scalar weight;
if(imB == b3Scalar(0.0))
{
weight = b3Scalar(1.0);
}
else
{
weight = imA / (imA + imB);
}
const b3Vector3& pA = m_calculatedTransformA.getOrigin();
const b3Vector3& pB = m_calculatedTransformB.getOrigin();
m_AnchorPos = pA * weight + pB * (b3Scalar(1.0) - weight);
return;
}



void b3Generic6DofConstraint::calculateLinearInfo()
{
m_calculatedLinearDiff = m_calculatedTransformB.getOrigin() - m_calculatedTransformA.getOrigin();
m_calculatedLinearDiff = m_calculatedTransformA.getBasis().inverse() * m_calculatedLinearDiff;
for(int i = 0; i < 3; i++)
{
m_linearLimits.m_currentLinearDiff[i] = m_calculatedLinearDiff[i];
m_linearLimits.testLimitValue(i, m_calculatedLinearDiff[i]);
}
}



int b3Generic6DofConstraint::get_limit_motor_info2(
b3RotationalLimitMotor * limot,
const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB,
b3ConstraintInfo2 *info, int row, b3Vector3& ax1, int rotational,int rotAllowed)
{
int srow = row * info->rowskip;
int powered = limot->m_enableMotor;
int limit = limot->m_currentLimit;
if (powered || limit)
{ // if the joint is powered, or has joint limits, add in the extra row
b3Scalar *J1 = rotational ? info->m_J1angularAxis : info->m_J1linearAxis;
b3Scalar *J2 = rotational ? info->m_J2angularAxis : info->m_J2linearAxis;
if (J1)
{
J1[srow+0] = ax1[0];
J1[srow+1] = ax1[1];
J1[srow+2] = ax1[2];
}
if (J2)
{
J2[srow+0] = -ax1[0];
J2[srow+1] = -ax1[1];
J2[srow+2] = -ax1[2];
}
if((!rotational))
{
if (m_useOffsetForConstraintFrame)
{
b3Vector3 tmpA, tmpB, relA, relB;
// get vector from bodyB to frameB in WCS
relB = m_calculatedTransformB.getOrigin() - transB.getOrigin();
// get its projection to constraint axis
b3Vector3 projB = ax1 * relB.dot(ax1);
// get vector directed from bodyB to constraint axis (and orthogonal to it)
b3Vector3 orthoB = relB - projB;
// same for bodyA
relA = m_calculatedTransformA.getOrigin() - transA.getOrigin();
b3Vector3 projA = ax1 * relA.dot(ax1);
b3Vector3 orthoA = relA - projA;
// get desired offset between frames A and B along constraint axis
b3Scalar desiredOffs = limot->m_currentPosition - limot->m_currentLimitError;
// desired vector from projection of center of bodyA to projection of center of bodyB to constraint axis
b3Vector3 totalDist = projA + ax1 * desiredOffs - projB;
// get offset vectors relA and relB
relA = orthoA + totalDist * m_factA;
relB = orthoB - totalDist * m_factB;
tmpA = relA.cross(ax1);
tmpB = relB.cross(ax1);
if(m_hasStaticBody && (!rotAllowed))
{
tmpA *= m_factA;
tmpB *= m_factB;
}
int i;
for (i=0; i<3; i++) info->m_J1angularAxis[srow+i] = tmpA[i];
for (i=0; i<3; i++) info->m_J2angularAxis[srow+i] = -tmpB[i];
} else
{
b3Vector3 ltd; // Linear Torque Decoupling vector
b3Vector3 c = m_calculatedTransformB.getOrigin() - transA.getOrigin();
ltd = c.cross(ax1);
info->m_J1angularAxis[srow+0] = ltd[0];
info->m_J1angularAxis[srow+1] = ltd[1];
info->m_J1angularAxis[srow+2] = ltd[2];

c = m_calculatedTransformB.getOrigin() - transB.getOrigin();
ltd = -c.cross(ax1);
info->m_J2angularAxis[srow+0] = ltd[0];
info->m_J2angularAxis[srow+1] = ltd[1];
info->m_J2angularAxis[srow+2] = ltd[2];
}
}
// if we're limited low and high simultaneously, the joint motor is
// ineffective
if (limit && (limot->m_loLimit == limot->m_hiLimit)) powered = 0;
info->m_constraintError[srow] = b3Scalar(0.f);
if (powered)
{
info->cfm[srow] = limot->m_normalCFM;
if(!limit)
{
b3Scalar tag_vel = rotational ? limot->m_targetVelocity : -limot->m_targetVelocity;

b3Scalar mot_fact = getMotorFactor( limot->m_currentPosition,
limot->m_loLimit,
limot->m_hiLimit,
tag_vel,
info->fps * limot->m_stopERP);
info->m_constraintError[srow] += mot_fact * limot->m_targetVelocity;
info->m_lowerLimit[srow] = -limot->m_maxMotorForce;
info->m_upperLimit[srow] = limot->m_maxMotorForce;
}
}
if(limit)
{
b3Scalar k = info->fps * limot->m_stopERP;
if(!rotational)
{
info->m_constraintError[srow] += k * limot->m_currentLimitError;
}
else
{
info->m_constraintError[srow] += -k * limot->m_currentLimitError;
}
info->cfm[srow] = limot->m_stopCFM;
if (limot->m_loLimit == limot->m_hiLimit)
{ // limited low and high simultaneously
info->m_lowerLimit[srow] = -B3_INFINITY;
info->m_upperLimit[srow] = B3_INFINITY;
}
else
{
if (limit == 1)
{
info->m_lowerLimit[srow] = 0;
info->m_upperLimit[srow] = B3_INFINITY;
}
else
{
info->m_lowerLimit[srow] = -B3_INFINITY;
info->m_upperLimit[srow] = 0;
}
// deal with bounce
if (limot->m_bounce > 0)
{
// calculate joint velocity
b3Scalar vel;
if (rotational)
{
vel = angVelA.dot(ax1);
//make sure that if no body -> angVelB == zero vec
// if (body1)
vel -= angVelB.dot(ax1);
}
else
{
vel = linVelA.dot(ax1);
//make sure that if no body -> angVelB == zero vec
// if (body1)
vel -= linVelB.dot(ax1);
}
// only apply bounce if the velocity is incoming, and if the
// resulting c[] exceeds what we already have.
if (limit == 1)
{
if (vel < 0)
{
b3Scalar newc = -limot->m_bounce* vel;
if (newc > info->m_constraintError[srow])
info->m_constraintError[srow] = newc;
}
}
else
{
if (vel > 0)
{
b3Scalar newc = -limot->m_bounce * vel;
if (newc < info->m_constraintError[srow])
info->m_constraintError[srow] = newc;
}
}
}
}
}
return 1;
}
else return 0;
}






///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5).
///If no axis is provided, it uses the default axis for this constraint.
void b3Generic6DofConstraint::setParam(int num, b3Scalar value, int axis)
{
if((axis >= 0) && (axis < 3))
{
switch(num)
{
case B3_CONSTRAINT_STOP_ERP :
m_linearLimits.m_stopERP[axis] = value;
m_flags |= B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
break;
case B3_CONSTRAINT_STOP_CFM :
m_linearLimits.m_stopCFM[axis] = value;
m_flags |= B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
break;
case B3_CONSTRAINT_CFM :
m_linearLimits.m_normalCFM[axis] = value;
m_flags |= B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
break;
default :
b3AssertConstrParams(0);
}
}
else if((axis >=3) && (axis < 6))
{
switch(num)
{
case B3_CONSTRAINT_STOP_ERP :
m_angularLimits[axis - 3].m_stopERP = value;
m_flags |= B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
break;
case B3_CONSTRAINT_STOP_CFM :
m_angularLimits[axis - 3].m_stopCFM = value;
m_flags |= B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
break;
case B3_CONSTRAINT_CFM :
m_angularLimits[axis - 3].m_normalCFM = value;
m_flags |= B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT);
break;
default :
b3AssertConstrParams(0);
}
}
else
{
b3AssertConstrParams(0);
}
}

///return the local value of parameter
b3Scalar b3Generic6DofConstraint::getParam(int num, int axis) const
{
b3Scalar retVal = 0;
if((axis >= 0) && (axis < 3))
{
switch(num)
{
case B3_CONSTRAINT_STOP_ERP :
b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
retVal = m_linearLimits.m_stopERP[axis];
break;
case B3_CONSTRAINT_STOP_CFM :
b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
retVal = m_linearLimits.m_stopCFM[axis];
break;
case B3_CONSTRAINT_CFM :
b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
retVal = m_linearLimits.m_normalCFM[axis];
break;
default :
b3AssertConstrParams(0);
}
}
else if((axis >=3) && (axis < 6))
{
switch(num)
{
case B3_CONSTRAINT_STOP_ERP :
b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_ERP_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
retVal = m_angularLimits[axis - 3].m_stopERP;
break;
case B3_CONSTRAINT_STOP_CFM :
b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_STOP << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
retVal = m_angularLimits[axis - 3].m_stopCFM;
break;
case B3_CONSTRAINT_CFM :
b3AssertConstrParams(m_flags & (B3_6DOF_FLAGS_CFM_NORM << (axis * B3_6DOF_FLAGS_AXIS_SHIFT)));
retVal = m_angularLimits[axis - 3].m_normalCFM;
break;
default :
b3AssertConstrParams(0);
}
}
else
{
b3AssertConstrParams(0);
}
return retVal;
}


void b3Generic6DofConstraint::setAxis(const b3Vector3& axis1,const b3Vector3& axis2, const b3RigidBodyData* bodies)
{
b3Vector3 zAxis = axis1.normalized();
b3Vector3 yAxis = axis2.normalized();
b3Vector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system
b3Transform frameInW;
frameInW.setIdentity();
frameInW.getBasis().setValue( xAxis[0], yAxis[0], zAxis[0],
xAxis[1], yAxis[1], zAxis[1],
xAxis[2], yAxis[2], zAxis[2]);
// now get constraint frame in local coordinate systems
m_frameInA = getCenterOfMassTransform(bodies[m_rbA]).inverse() * frameInW;
m_frameInB = getCenterOfMassTransform(bodies[m_rbB]).inverse() * frameInW;
calculateTransforms(bodies);
}

+ 550
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h View File

@@ -0,0 +1,550 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

/// 2009 March: b3Generic6DofConstraint refactored by Roman Ponomarev
/// Added support for generic constraint solver through getInfo1/getInfo2 methods

/*
2007-09-09
b3Generic6DofConstraint Refactored by Francisco Le?n
email: projectileman@yahoo.com
http://gimpact.sf.net
*/


#ifndef B3_GENERIC_6DOF_CONSTRAINT_H
#define B3_GENERIC_6DOF_CONSTRAINT_H

#include "Bullet3Common/b3Vector3.h"
#include "b3JacobianEntry.h"
#include "b3TypedConstraint.h"

struct b3RigidBodyData;




//! Rotation Limit structure for generic joints
class b3RotationalLimitMotor
{
public:
//! limit_parameters
//!@{
b3Scalar m_loLimit;//!< joint limit
b3Scalar m_hiLimit;//!< joint limit
b3Scalar m_targetVelocity;//!< target motor velocity
b3Scalar m_maxMotorForce;//!< max force on motor
b3Scalar m_maxLimitForce;//!< max force on limit
b3Scalar m_damping;//!< Damping.
b3Scalar m_limitSoftness;//! Relaxation factor
b3Scalar m_normalCFM;//!< Constraint force mixing factor
b3Scalar m_stopERP;//!< Error tolerance factor when joint is at limit
b3Scalar m_stopCFM;//!< Constraint force mixing factor when joint is at limit
b3Scalar m_bounce;//!< restitution factor
bool m_enableMotor;

//!@}

//! temp_variables
//!@{
b3Scalar m_currentLimitError;//! How much is violated this limit
b3Scalar m_currentPosition; //! current value of angle
int m_currentLimit;//!< 0=free, 1=at lo limit, 2=at hi limit
b3Scalar m_accumulatedImpulse;
//!@}

b3RotationalLimitMotor()
{
m_accumulatedImpulse = 0.f;
m_targetVelocity = 0;
m_maxMotorForce = 0.1f;
m_maxLimitForce = 300.0f;
m_loLimit = 1.0f;
m_hiLimit = -1.0f;
m_normalCFM = 0.f;
m_stopERP = 0.2f;
m_stopCFM = 0.f;
m_bounce = 0.0f;
m_damping = 1.0f;
m_limitSoftness = 0.5f;
m_currentLimit = 0;
m_currentLimitError = 0;
m_enableMotor = false;
}

b3RotationalLimitMotor(const b3RotationalLimitMotor & limot)
{
m_targetVelocity = limot.m_targetVelocity;
m_maxMotorForce = limot.m_maxMotorForce;
m_limitSoftness = limot.m_limitSoftness;
m_loLimit = limot.m_loLimit;
m_hiLimit = limot.m_hiLimit;
m_normalCFM = limot.m_normalCFM;
m_stopERP = limot.m_stopERP;
m_stopCFM = limot.m_stopCFM;
m_bounce = limot.m_bounce;
m_currentLimit = limot.m_currentLimit;
m_currentLimitError = limot.m_currentLimitError;
m_enableMotor = limot.m_enableMotor;
}



//! Is limited
bool isLimited()
{
if(m_loLimit > m_hiLimit) return false;
return true;
}

//! Need apply correction
bool needApplyTorques()
{
if(m_currentLimit == 0 && m_enableMotor == false) return false;
return true;
}

//! calculates error
/*!
calculates m_currentLimit and m_currentLimitError.
*/
int testLimitValue(b3Scalar test_value);

//! apply the correction impulses for two bodies
b3Scalar solveAngularLimits(b3Scalar timeStep,b3Vector3& axis, b3Scalar jacDiagABInv,b3RigidBodyData * body0, b3RigidBodyData * body1);

};



class b3TranslationalLimitMotor
{
public:
b3Vector3 m_lowerLimit;//!< the constraint lower limits
b3Vector3 m_upperLimit;//!< the constraint upper limits
b3Vector3 m_accumulatedImpulse;
//! Linear_Limit_parameters
//!@{
b3Vector3 m_normalCFM;//!< Constraint force mixing factor
b3Vector3 m_stopERP;//!< Error tolerance factor when joint is at limit
b3Vector3 m_stopCFM;//!< Constraint force mixing factor when joint is at limit
b3Vector3 m_targetVelocity;//!< target motor velocity
b3Vector3 m_maxMotorForce;//!< max force on motor
b3Vector3 m_currentLimitError;//! How much is violated this limit
b3Vector3 m_currentLinearDiff;//! Current relative offset of constraint frames
b3Scalar m_limitSoftness;//!< Softness for linear limit
b3Scalar m_damping;//!< Damping for linear limit
b3Scalar m_restitution;//! Bounce parameter for linear limit
//!@}
bool m_enableMotor[3];
int m_currentLimit[3];//!< 0=free, 1=at lower limit, 2=at upper limit

b3TranslationalLimitMotor()
{
m_lowerLimit.setValue(0.f,0.f,0.f);
m_upperLimit.setValue(0.f,0.f,0.f);
m_accumulatedImpulse.setValue(0.f,0.f,0.f);
m_normalCFM.setValue(0.f, 0.f, 0.f);
m_stopERP.setValue(0.2f, 0.2f, 0.2f);
m_stopCFM.setValue(0.f, 0.f, 0.f);

m_limitSoftness = 0.7f;
m_damping = b3Scalar(1.0f);
m_restitution = b3Scalar(0.5f);
for(int i=0; i < 3; i++)
{
m_enableMotor[i] = false;
m_targetVelocity[i] = b3Scalar(0.f);
m_maxMotorForce[i] = b3Scalar(0.f);
}
}

b3TranslationalLimitMotor(const b3TranslationalLimitMotor & other )
{
m_lowerLimit = other.m_lowerLimit;
m_upperLimit = other.m_upperLimit;
m_accumulatedImpulse = other.m_accumulatedImpulse;

m_limitSoftness = other.m_limitSoftness ;
m_damping = other.m_damping;
m_restitution = other.m_restitution;
m_normalCFM = other.m_normalCFM;
m_stopERP = other.m_stopERP;
m_stopCFM = other.m_stopCFM;

for(int i=0; i < 3; i++)
{
m_enableMotor[i] = other.m_enableMotor[i];
m_targetVelocity[i] = other.m_targetVelocity[i];
m_maxMotorForce[i] = other.m_maxMotorForce[i];
}
}

//! Test limit
/*!
- free means upper < lower,
- locked means upper == lower
- limited means upper > lower
- limitIndex: first 3 are linear, next 3 are angular
*/
inline bool isLimited(int limitIndex)
{
return (m_upperLimit[limitIndex] >= m_lowerLimit[limitIndex]);
}
inline bool needApplyForce(int limitIndex)
{
if(m_currentLimit[limitIndex] == 0 && m_enableMotor[limitIndex] == false) return false;
return true;
}
int testLimitValue(int limitIndex, b3Scalar test_value);


b3Scalar solveLinearAxis(
b3Scalar timeStep,
b3Scalar jacDiagABInv,
b3RigidBodyData& body1,const b3Vector3 &pointInA,
b3RigidBodyData& body2,const b3Vector3 &pointInB,
int limit_index,
const b3Vector3 & axis_normal_on_a,
const b3Vector3 & anchorPos);


};

enum b36DofFlags
{
B3_6DOF_FLAGS_CFM_NORM = 1,
B3_6DOF_FLAGS_CFM_STOP = 2,
B3_6DOF_FLAGS_ERP_STOP = 4
};
#define B3_6DOF_FLAGS_AXIS_SHIFT 3 // bits per axis


/// b3Generic6DofConstraint between two rigidbodies each with a pivotpoint that descibes the axis location in local space
/*!
b3Generic6DofConstraint can leave any of the 6 degree of freedom 'free' or 'locked'.
currently this limit supports rotational motors<br>
<ul>
<li> For Linear limits, use b3Generic6DofConstraint.setLinearUpperLimit, b3Generic6DofConstraint.setLinearLowerLimit. You can set the parameters with the b3TranslationalLimitMotor structure accsesible through the b3Generic6DofConstraint.getTranslationalLimitMotor method.
At this moment translational motors are not supported. May be in the future. </li>

<li> For Angular limits, use the b3RotationalLimitMotor structure for configuring the limit.
This is accessible through b3Generic6DofConstraint.getLimitMotor method,
This brings support for limit parameters and motors. </li>

<li> Angulars limits have these possible ranges:
<table border=1 >
<tr>
<td><b>AXIS</b></td>
<td><b>MIN ANGLE</b></td>
<td><b>MAX ANGLE</b></td>
</tr><tr>
<td>X</td>
<td>-PI</td>
<td>PI</td>
</tr><tr>
<td>Y</td>
<td>-PI/2</td>
<td>PI/2</td>
</tr><tr>
<td>Z</td>
<td>-PI</td>
<td>PI</td>
</tr>
</table>
</li>
</ul>

*/
B3_ATTRIBUTE_ALIGNED16(class) b3Generic6DofConstraint : public b3TypedConstraint
{
protected:

//! relative_frames
//!@{
b3Transform m_frameInA;//!< the constraint space w.r.t body A
b3Transform m_frameInB;//!< the constraint space w.r.t body B
//!@}

//! Jacobians
//!@{
// b3JacobianEntry m_jacLinear[3];//!< 3 orthogonal linear constraints
// b3JacobianEntry m_jacAng[3];//!< 3 orthogonal angular constraints
//!@}

//! Linear_Limit_parameters
//!@{
b3TranslationalLimitMotor m_linearLimits;
//!@}


//! hinge_parameters
//!@{
b3RotationalLimitMotor m_angularLimits[3];
//!@}


protected:
//! temporal variables
//!@{
b3Transform m_calculatedTransformA;
b3Transform m_calculatedTransformB;
b3Vector3 m_calculatedAxisAngleDiff;
b3Vector3 m_calculatedAxis[3];
b3Vector3 m_calculatedLinearDiff;
b3Scalar m_timeStep;
b3Scalar m_factA;
b3Scalar m_factB;
bool m_hasStaticBody;
b3Vector3 m_AnchorPos; // point betwen pivots of bodies A and B to solve linear axes

bool m_useLinearReferenceFrameA;
bool m_useOffsetForConstraintFrame;
int m_flags;

//!@}

b3Generic6DofConstraint& operator=(b3Generic6DofConstraint& other)
{
b3Assert(0);
(void) other;
return *this;
}


int setAngularLimits(b3ConstraintInfo2 *info, int row_offset,const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB);

int setLinearLimits(b3ConstraintInfo2 *info, int row, const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB);


// tests linear limits
void calculateLinearInfo();

//! calcs the euler angles between the two bodies.
void calculateAngleInfo();



public:

B3_DECLARE_ALIGNED_ALLOCATOR();
b3Generic6DofConstraint(int rbA, int rbB, const b3Transform& frameInA, const b3Transform& frameInB ,bool useLinearReferenceFrameA,const b3RigidBodyData* bodies);
//! Calcs global transform of the offsets
/*!
Calcs the global transform for the joint offset for body A an B, and also calcs the agle differences between the bodies.
\sa b3Generic6DofConstraint.getCalculatedTransformA , b3Generic6DofConstraint.getCalculatedTransformB, b3Generic6DofConstraint.calculateAngleInfo
*/
void calculateTransforms(const b3Transform& transA,const b3Transform& transB,const b3RigidBodyData* bodies);

void calculateTransforms(const b3RigidBodyData* bodies);

//! Gets the global transform of the offset for body A
/*!
\sa b3Generic6DofConstraint.getFrameOffsetA, b3Generic6DofConstraint.getFrameOffsetB, b3Generic6DofConstraint.calculateAngleInfo.
*/
const b3Transform & getCalculatedTransformA() const
{
return m_calculatedTransformA;
}

//! Gets the global transform of the offset for body B
/*!
\sa b3Generic6DofConstraint.getFrameOffsetA, b3Generic6DofConstraint.getFrameOffsetB, b3Generic6DofConstraint.calculateAngleInfo.
*/
const b3Transform & getCalculatedTransformB() const
{
return m_calculatedTransformB;
}

const b3Transform & getFrameOffsetA() const
{
return m_frameInA;
}

const b3Transform & getFrameOffsetB() const
{
return m_frameInB;
}


b3Transform & getFrameOffsetA()
{
return m_frameInA;
}

b3Transform & getFrameOffsetB()
{
return m_frameInB;
}



virtual void getInfo1 (b3ConstraintInfo1* info,const b3RigidBodyData* bodies);

void getInfo1NonVirtual (b3ConstraintInfo1* info,const b3RigidBodyData* bodies);

virtual void getInfo2 (b3ConstraintInfo2* info,const b3RigidBodyData* bodies);

void getInfo2NonVirtual (b3ConstraintInfo2* info,const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB,const b3RigidBodyData* bodies);


void updateRHS(b3Scalar timeStep);

//! Get the rotation axis in global coordinates
b3Vector3 getAxis(int axis_index) const;

//! Get the relative Euler angle
/*!
\pre b3Generic6DofConstraint::calculateTransforms() must be called previously.
*/
b3Scalar getAngle(int axis_index) const;

//! Get the relative position of the constraint pivot
/*!
\pre b3Generic6DofConstraint::calculateTransforms() must be called previously.
*/
b3Scalar getRelativePivotPosition(int axis_index) const;

void setFrames(const b3Transform & frameA, const b3Transform & frameB, const b3RigidBodyData* bodies);

//! Test angular limit.
/*!
Calculates angular correction and returns true if limit needs to be corrected.
\pre b3Generic6DofConstraint::calculateTransforms() must be called previously.
*/
bool testAngularLimitMotor(int axis_index);

void setLinearLowerLimit(const b3Vector3& linearLower)
{
m_linearLimits.m_lowerLimit = linearLower;
}

void getLinearLowerLimit(b3Vector3& linearLower)
{
linearLower = m_linearLimits.m_lowerLimit;
}

void setLinearUpperLimit(const b3Vector3& linearUpper)
{
m_linearLimits.m_upperLimit = linearUpper;
}

void getLinearUpperLimit(b3Vector3& linearUpper)
{
linearUpper = m_linearLimits.m_upperLimit;
}

void setAngularLowerLimit(const b3Vector3& angularLower)
{
for(int i = 0; i < 3; i++)
m_angularLimits[i].m_loLimit = b3NormalizeAngle(angularLower[i]);
}

void getAngularLowerLimit(b3Vector3& angularLower)
{
for(int i = 0; i < 3; i++)
angularLower[i] = m_angularLimits[i].m_loLimit;
}

void setAngularUpperLimit(const b3Vector3& angularUpper)
{
for(int i = 0; i < 3; i++)
m_angularLimits[i].m_hiLimit = b3NormalizeAngle(angularUpper[i]);
}

void getAngularUpperLimit(b3Vector3& angularUpper)
{
for(int i = 0; i < 3; i++)
angularUpper[i] = m_angularLimits[i].m_hiLimit;
}

//! Retrieves the angular limit informacion
b3RotationalLimitMotor * getRotationalLimitMotor(int index)
{
return &m_angularLimits[index];
}

//! Retrieves the limit informacion
b3TranslationalLimitMotor * getTranslationalLimitMotor()
{
return &m_linearLimits;
}

//first 3 are linear, next 3 are angular
void setLimit(int axis, b3Scalar lo, b3Scalar hi)
{
if(axis<3)
{
m_linearLimits.m_lowerLimit[axis] = lo;
m_linearLimits.m_upperLimit[axis] = hi;
}
else
{
lo = b3NormalizeAngle(lo);
hi = b3NormalizeAngle(hi);
m_angularLimits[axis-3].m_loLimit = lo;
m_angularLimits[axis-3].m_hiLimit = hi;
}
}

//! Test limit
/*!
- free means upper < lower,
- locked means upper == lower
- limited means upper > lower
- limitIndex: first 3 are linear, next 3 are angular
*/
bool isLimited(int limitIndex)
{
if(limitIndex<3)
{
return m_linearLimits.isLimited(limitIndex);

}
return m_angularLimits[limitIndex-3].isLimited();
}

virtual void calcAnchorPos(const b3RigidBodyData* bodies); // overridable

int get_limit_motor_info2( b3RotationalLimitMotor * limot,
const b3Transform& transA,const b3Transform& transB,const b3Vector3& linVelA,const b3Vector3& linVelB,const b3Vector3& angVelA,const b3Vector3& angVelB,
b3ConstraintInfo2 *info, int row, b3Vector3& ax1, int rotational, int rotAllowed = false);

// access for UseFrameOffset
bool getUseFrameOffset() { return m_useOffsetForConstraintFrame; }
void setUseFrameOffset(bool frameOffsetOnOff) { m_useOffsetForConstraintFrame = frameOffsetOnOff; }

///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5).
///If no axis is provided, it uses the default axis for this constraint.
virtual void setParam(int num, b3Scalar value, int axis = -1);
///return the local value of parameter
virtual b3Scalar getParam(int num, int axis = -1) const;

void setAxis( const b3Vector3& axis1, const b3Vector3& axis2,const b3RigidBodyData* bodies);



};





#endif //B3_GENERIC_6DOF_CONSTRAINT_H

+ 155
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3JacobianEntry.h View File

@@ -0,0 +1,155 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_JACOBIAN_ENTRY_H
#define B3_JACOBIAN_ENTRY_H

#include "Bullet3Common/b3Matrix3x3.h"


//notes:
// Another memory optimization would be to store m_1MinvJt in the remaining 3 w components
// which makes the b3JacobianEntry memory layout 16 bytes
// if you only are interested in angular part, just feed massInvA and massInvB zero

/// Jacobian entry is an abstraction that allows to describe constraints
/// it can be used in combination with a constraint solver
/// Can be used to relate the effect of an impulse to the constraint error
B3_ATTRIBUTE_ALIGNED16(class) b3JacobianEntry
{
public:
b3JacobianEntry() {};
//constraint between two different rigidbodies
b3JacobianEntry(
const b3Matrix3x3& world2A,
const b3Matrix3x3& world2B,
const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,
const b3Vector3& jointAxis,
const b3Vector3& inertiaInvA,
const b3Scalar massInvA,
const b3Vector3& inertiaInvB,
const b3Scalar massInvB)
:m_linearJointAxis(jointAxis)
{
m_aJ = world2A*(rel_pos1.cross(m_linearJointAxis));
m_bJ = world2B*(rel_pos2.cross(-m_linearJointAxis));
m_0MinvJt = inertiaInvA * m_aJ;
m_1MinvJt = inertiaInvB * m_bJ;
m_Adiag = massInvA + m_0MinvJt.dot(m_aJ) + massInvB + m_1MinvJt.dot(m_bJ);

b3Assert(m_Adiag > b3Scalar(0.0));
}

//angular constraint between two different rigidbodies
b3JacobianEntry(const b3Vector3& jointAxis,
const b3Matrix3x3& world2A,
const b3Matrix3x3& world2B,
const b3Vector3& inertiaInvA,
const b3Vector3& inertiaInvB)
:m_linearJointAxis(b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)))
{
m_aJ= world2A*jointAxis;
m_bJ = world2B*-jointAxis;
m_0MinvJt = inertiaInvA * m_aJ;
m_1MinvJt = inertiaInvB * m_bJ;
m_Adiag = m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);

b3Assert(m_Adiag > b3Scalar(0.0));
}

//angular constraint between two different rigidbodies
b3JacobianEntry(const b3Vector3& axisInA,
const b3Vector3& axisInB,
const b3Vector3& inertiaInvA,
const b3Vector3& inertiaInvB)
: m_linearJointAxis(b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)))
, m_aJ(axisInA)
, m_bJ(-axisInB)
{
m_0MinvJt = inertiaInvA * m_aJ;
m_1MinvJt = inertiaInvB * m_bJ;
m_Adiag = m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);

b3Assert(m_Adiag > b3Scalar(0.0));
}

//constraint on one rigidbody
b3JacobianEntry(
const b3Matrix3x3& world2A,
const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,
const b3Vector3& jointAxis,
const b3Vector3& inertiaInvA,
const b3Scalar massInvA)
:m_linearJointAxis(jointAxis)
{
m_aJ= world2A*(rel_pos1.cross(jointAxis));
m_bJ = world2A*(rel_pos2.cross(-jointAxis));
m_0MinvJt = inertiaInvA * m_aJ;
m_1MinvJt = b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.));
m_Adiag = massInvA + m_0MinvJt.dot(m_aJ);

b3Assert(m_Adiag > b3Scalar(0.0));
}

b3Scalar getDiagonal() const { return m_Adiag; }

// for two constraints on the same rigidbody (for example vehicle friction)
b3Scalar getNonDiagonal(const b3JacobianEntry& jacB, const b3Scalar massInvA) const
{
const b3JacobianEntry& jacA = *this;
b3Scalar lin = massInvA * jacA.m_linearJointAxis.dot(jacB.m_linearJointAxis);
b3Scalar ang = jacA.m_0MinvJt.dot(jacB.m_aJ);
return lin + ang;
}


// for two constraints on sharing two same rigidbodies (for example two contact points between two rigidbodies)
b3Scalar getNonDiagonal(const b3JacobianEntry& jacB,const b3Scalar massInvA,const b3Scalar massInvB) const
{
const b3JacobianEntry& jacA = *this;
b3Vector3 lin = jacA.m_linearJointAxis * jacB.m_linearJointAxis;
b3Vector3 ang0 = jacA.m_0MinvJt * jacB.m_aJ;
b3Vector3 ang1 = jacA.m_1MinvJt * jacB.m_bJ;
b3Vector3 lin0 = massInvA * lin ;
b3Vector3 lin1 = massInvB * lin;
b3Vector3 sum = ang0+ang1+lin0+lin1;
return sum[0]+sum[1]+sum[2];
}

b3Scalar getRelativeVelocity(const b3Vector3& linvelA,const b3Vector3& angvelA,const b3Vector3& linvelB,const b3Vector3& angvelB)
{
b3Vector3 linrel = linvelA - linvelB;
b3Vector3 angvela = angvelA * m_aJ;
b3Vector3 angvelb = angvelB * m_bJ;
linrel *= m_linearJointAxis;
angvela += angvelb;
angvela += linrel;
b3Scalar rel_vel2 = angvela[0]+angvela[1]+angvela[2];
return rel_vel2 + B3_EPSILON;
}
//private:

b3Vector3 m_linearJointAxis;
b3Vector3 m_aJ;
b3Vector3 m_bJ;
b3Vector3 m_0MinvJt;
b3Vector3 m_1MinvJt;
//Optimization: can be stored in the w/last component of one of the vectors
b3Scalar m_Adiag;

};

#endif //B3_JACOBIAN_ENTRY_H

+ 1814
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp
File diff suppressed because it is too large
View File


+ 149
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h View File

@@ -0,0 +1,149 @@
#ifndef B3_PGS_JACOBI_SOLVER
#define B3_PGS_JACOBI_SOLVER


struct b3Contact4;
struct b3ContactPoint;


class b3Dispatcher;

#include "b3TypedConstraint.h"
#include "b3ContactSolverInfo.h"
#include "b3SolverBody.h"
#include "b3SolverConstraint.h"

struct b3RigidBodyData;
struct b3InertiaData;

class b3PgsJacobiSolver
{

protected:
b3AlignedObjectArray<b3SolverBody> m_tmpSolverBodyPool;
b3ConstraintArray m_tmpSolverContactConstraintPool;
b3ConstraintArray m_tmpSolverNonContactConstraintPool;
b3ConstraintArray m_tmpSolverContactFrictionConstraintPool;
b3ConstraintArray m_tmpSolverContactRollingFrictionConstraintPool;

b3AlignedObjectArray<int> m_orderTmpConstraintPool;
b3AlignedObjectArray<int> m_orderNonContactConstraintPool;
b3AlignedObjectArray<int> m_orderFrictionConstraintPool;
b3AlignedObjectArray<b3TypedConstraint::b3ConstraintInfo1> m_tmpConstraintSizesPool;
b3AlignedObjectArray<int> m_bodyCount;
b3AlignedObjectArray<int> m_bodyCountCheck;
b3AlignedObjectArray<b3Vector3> m_deltaLinearVelocities;
b3AlignedObjectArray<b3Vector3> m_deltaAngularVelocities;

bool m_usePgs;
void averageVelocities();

int m_maxOverrideNumSolverIterations;

int m_numSplitImpulseRecoveries;

b3Scalar getContactProcessingThreshold(b3Contact4* contact)
{
return 0.02f;
}
void setupFrictionConstraint( b3RigidBodyData* bodies,b3InertiaData* inertias, b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis,int solverBodyIdA,int solverBodyIdB,
b3ContactPoint& cp,const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,
b3RigidBodyData* colObj0,b3RigidBodyData* colObj1, b3Scalar relaxation,
b3Scalar desiredVelocity=0., b3Scalar cfmSlip=0.);

void setupRollingFrictionConstraint(b3RigidBodyData* bodies,b3InertiaData* inertias, b3SolverConstraint& solverConstraint, const b3Vector3& normalAxis,int solverBodyIdA,int solverBodyIdB,
b3ContactPoint& cp,const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,
b3RigidBodyData* colObj0,b3RigidBodyData* colObj1, b3Scalar relaxation,
b3Scalar desiredVelocity=0., b3Scalar cfmSlip=0.);

b3SolverConstraint& addFrictionConstraint(b3RigidBodyData* bodies,b3InertiaData* inertias,const b3Vector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,b3ContactPoint& cp,const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,b3RigidBodyData* colObj0,b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity=0., b3Scalar cfmSlip=0.);
b3SolverConstraint& addRollingFrictionConstraint(b3RigidBodyData* bodies,b3InertiaData* inertias,const b3Vector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,b3ContactPoint& cp,const b3Vector3& rel_pos1,const b3Vector3& rel_pos2,b3RigidBodyData* colObj0,b3RigidBodyData* colObj1, b3Scalar relaxation, b3Scalar desiredVelocity=0, b3Scalar cfmSlip=0.f);


void setupContactConstraint(b3RigidBodyData* bodies, b3InertiaData* inertias,
b3SolverConstraint& solverConstraint, int solverBodyIdA, int solverBodyIdB, b3ContactPoint& cp,
const b3ContactSolverInfo& infoGlobal, b3Vector3& vel, b3Scalar& rel_vel, b3Scalar& relaxation,
b3Vector3& rel_pos1, b3Vector3& rel_pos2);

void setFrictionConstraintImpulse( b3RigidBodyData* bodies, b3InertiaData* inertias,b3SolverConstraint& solverConstraint, int solverBodyIdA,int solverBodyIdB,
b3ContactPoint& cp, const b3ContactSolverInfo& infoGlobal);

///m_btSeed2 is used for re-arranging the constraint rows. improves convergence/quality of friction
unsigned long m_btSeed2;

b3Scalar restitutionCurve(b3Scalar rel_vel, b3Scalar restitution);

void convertContact(b3RigidBodyData* bodies, b3InertiaData* inertias,b3Contact4* manifold,const b3ContactSolverInfo& infoGlobal);


void resolveSplitPenetrationSIMD(
b3SolverBody& bodyA,b3SolverBody& bodyB,
const b3SolverConstraint& contactConstraint);

void resolveSplitPenetrationImpulseCacheFriendly(
b3SolverBody& bodyA,b3SolverBody& bodyB,
const b3SolverConstraint& contactConstraint);

//internal method
int getOrInitSolverBody(int bodyIndex, b3RigidBodyData* bodies,b3InertiaData* inertias);
void initSolverBody(int bodyIndex, b3SolverBody* solverBody, b3RigidBodyData* collisionObject);

void resolveSingleConstraintRowGeneric(b3SolverBody& bodyA,b3SolverBody& bodyB,const b3SolverConstraint& contactConstraint);

void resolveSingleConstraintRowGenericSIMD(b3SolverBody& bodyA,b3SolverBody& bodyB,const b3SolverConstraint& contactConstraint);
void resolveSingleConstraintRowLowerLimit(b3SolverBody& bodyA,b3SolverBody& bodyB,const b3SolverConstraint& contactConstraint);
void resolveSingleConstraintRowLowerLimitSIMD(b3SolverBody& bodyA,b3SolverBody& bodyB,const b3SolverConstraint& contactConstraint);
protected:

virtual b3Scalar solveGroupCacheFriendlySetup(b3RigidBodyData* bodies, b3InertiaData* inertias,int numBodies,b3Contact4* manifoldPtr, int numManifolds,b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);


virtual b3Scalar solveGroupCacheFriendlyIterations(b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);
virtual void solveGroupCacheFriendlySplitImpulseIterations(b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);
b3Scalar solveSingleIteration(int iteration, b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);


virtual b3Scalar solveGroupCacheFriendlyFinish(b3RigidBodyData* bodies, b3InertiaData* inertias,int numBodies,const b3ContactSolverInfo& infoGlobal);


public:

B3_DECLARE_ALIGNED_ALLOCATOR();
b3PgsJacobiSolver(bool usePgs);
virtual ~b3PgsJacobiSolver();

// void solveContacts(int numBodies, b3RigidBodyData* bodies, b3InertiaData* inertias, int numContacts, b3Contact4* contacts);
void solveContacts(int numBodies, b3RigidBodyData* bodies, b3InertiaData* inertias, int numContacts, b3Contact4* contacts, int numConstraints, b3TypedConstraint** constraints);

b3Scalar solveGroup(b3RigidBodyData* bodies,b3InertiaData* inertias,int numBodies,b3Contact4* manifoldPtr, int numManifolds,b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);

///clear internal cached data and reset random seed
virtual void reset();
unsigned long b3Rand2();

int b3RandInt2 (int n);

void setRandSeed(unsigned long seed)
{
m_btSeed2 = seed;
}
unsigned long getRandSeed() const
{
return m_btSeed2;
}




};

#endif //B3_PGS_JACOBI_SOLVER


+ 209
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.cpp View File

@@ -0,0 +1,209 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/


#include "b3Point2PointConstraint.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"

#include <new>





b3Point2PointConstraint::b3Point2PointConstraint(int rbA,int rbB, const b3Vector3& pivotInA,const b3Vector3& pivotInB)
:b3TypedConstraint(B3_POINT2POINT_CONSTRAINT_TYPE,rbA,rbB),m_pivotInA(pivotInA),m_pivotInB(pivotInB),
m_flags(0)
{

}

/*
b3Point2PointConstraint::b3Point2PointConstraint(int rbA,const b3Vector3& pivotInA)
:b3TypedConstraint(B3_POINT2POINT_CONSTRAINT_TYPE,rbA),m_pivotInA(pivotInA),m_pivotInB(rbA.getCenterOfMassTransform()(pivotInA)),
m_flags(0),
m_useSolveConstraintObsolete(false)
{
}
*/


void b3Point2PointConstraint::getInfo1 (b3ConstraintInfo1* info,const b3RigidBodyData* bodies)
{
getInfo1NonVirtual(info,bodies);
}

void b3Point2PointConstraint::getInfo1NonVirtual (b3ConstraintInfo1* info,const b3RigidBodyData* bodies)
{
info->m_numConstraintRows = 3;
info->nub = 3;
}




void b3Point2PointConstraint::getInfo2 (b3ConstraintInfo2* info, const b3RigidBodyData* bodies)
{
b3Transform trA;
trA.setIdentity();
trA.setOrigin(bodies[m_rbA].m_pos);
trA.setRotation(bodies[m_rbA].m_quat);

b3Transform trB;
trB.setIdentity();
trB.setOrigin(bodies[m_rbB].m_pos);
trB.setRotation(bodies[m_rbB].m_quat);

getInfo2NonVirtual(info, trA,trB);
}

void b3Point2PointConstraint::getInfo2NonVirtual (b3ConstraintInfo2* info, const b3Transform& body0_trans, const b3Transform& body1_trans)
{

//retrieve matrices

// anchor points in global coordinates with respect to body PORs.
// set jacobian
info->m_J1linearAxis[0] = 1;
info->m_J1linearAxis[info->rowskip+1] = 1;
info->m_J1linearAxis[2*info->rowskip+2] = 1;

b3Vector3 a1 = body0_trans.getBasis()*getPivotInA();
b3Vector3 a1a = b3QuatRotate(body0_trans.getRotation(),getPivotInA());

{
b3Vector3* angular0 = (b3Vector3*)(info->m_J1angularAxis);
b3Vector3* angular1 = (b3Vector3*)(info->m_J1angularAxis+info->rowskip);
b3Vector3* angular2 = (b3Vector3*)(info->m_J1angularAxis+2*info->rowskip);
b3Vector3 a1neg = -a1;
a1neg.getSkewSymmetricMatrix(angular0,angular1,angular2);
}
if (info->m_J2linearAxis)
{
info->m_J2linearAxis[0] = -1;
info->m_J2linearAxis[info->rowskip+1] = -1;
info->m_J2linearAxis[2*info->rowskip+2] = -1;
}
b3Vector3 a2 = body1_trans.getBasis()*getPivotInB();
{
// b3Vector3 a2n = -a2;
b3Vector3* angular0 = (b3Vector3*)(info->m_J2angularAxis);
b3Vector3* angular1 = (b3Vector3*)(info->m_J2angularAxis+info->rowskip);
b3Vector3* angular2 = (b3Vector3*)(info->m_J2angularAxis+2*info->rowskip);
a2.getSkewSymmetricMatrix(angular0,angular1,angular2);
}


// set right hand side
b3Scalar currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp;
b3Scalar k = info->fps * currERP;
int j;
for (j=0; j<3; j++)
{
info->m_constraintError[j*info->rowskip] = k * (a2[j] + body1_trans.getOrigin()[j] - a1[j] - body0_trans.getOrigin()[j]);
//printf("info->m_constraintError[%d]=%f\n",j,info->m_constraintError[j]);
}
if(m_flags & B3_P2P_FLAGS_CFM)
{
for (j=0; j<3; j++)
{
info->cfm[j*info->rowskip] = m_cfm;
}
}

b3Scalar impulseClamp = m_setting.m_impulseClamp;//
for (j=0; j<3; j++)
{
if (m_setting.m_impulseClamp > 0)
{
info->m_lowerLimit[j*info->rowskip] = -impulseClamp;
info->m_upperLimit[j*info->rowskip] = impulseClamp;
}
}
info->m_damping = m_setting.m_damping;
}



void b3Point2PointConstraint::updateRHS(b3Scalar timeStep)
{
(void)timeStep;

}

///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5).
///If no axis is provided, it uses the default axis for this constraint.
void b3Point2PointConstraint::setParam(int num, b3Scalar value, int axis)
{
if(axis != -1)
{
b3AssertConstrParams(0);
}
else
{
switch(num)
{
case B3_CONSTRAINT_ERP :
case B3_CONSTRAINT_STOP_ERP :
m_erp = value;
m_flags |= B3_P2P_FLAGS_ERP;
break;
case B3_CONSTRAINT_CFM :
case B3_CONSTRAINT_STOP_CFM :
m_cfm = value;
m_flags |= B3_P2P_FLAGS_CFM;
break;
default:
b3AssertConstrParams(0);
}
}
}

///return the local value of parameter
b3Scalar b3Point2PointConstraint::getParam(int num, int axis) const
{
b3Scalar retVal(B3_INFINITY);
if(axis != -1)
{
b3AssertConstrParams(0);
}
else
{
switch(num)
{
case B3_CONSTRAINT_ERP :
case B3_CONSTRAINT_STOP_ERP :
b3AssertConstrParams(m_flags & B3_P2P_FLAGS_ERP);
retVal = m_erp;
break;
case B3_CONSTRAINT_CFM :
case B3_CONSTRAINT_STOP_CFM :
b3AssertConstrParams(m_flags & B3_P2P_FLAGS_CFM);
retVal = m_cfm;
break;
default:
b3AssertConstrParams(0);
}
}
return retVal;
}

+ 159
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h View File

@@ -0,0 +1,159 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_POINT2POINTCONSTRAINT_H
#define B3_POINT2POINTCONSTRAINT_H

#include "Bullet3Common/b3Vector3.h"
//#include "b3JacobianEntry.h"
#include "b3TypedConstraint.h"

class b3RigidBody;


#ifdef B3_USE_DOUBLE_PRECISION
#define b3Point2PointConstraintData b3Point2PointConstraintDoubleData
#define b3Point2PointConstraintDataName "b3Point2PointConstraintDoubleData"
#else
#define b3Point2PointConstraintData b3Point2PointConstraintFloatData
#define b3Point2PointConstraintDataName "b3Point2PointConstraintFloatData"
#endif //B3_USE_DOUBLE_PRECISION

struct b3ConstraintSetting
{
b3ConstraintSetting() :
m_tau(b3Scalar(0.3)),
m_damping(b3Scalar(1.)),
m_impulseClamp(b3Scalar(0.))
{
}
b3Scalar m_tau;
b3Scalar m_damping;
b3Scalar m_impulseClamp;
};

enum b3Point2PointFlags
{
B3_P2P_FLAGS_ERP = 1,
B3_P2P_FLAGS_CFM = 2
};

/// point to point constraint between two rigidbodies each with a pivotpoint that descibes the 'ballsocket' location in local space
B3_ATTRIBUTE_ALIGNED16(class) b3Point2PointConstraint : public b3TypedConstraint
{
#ifdef IN_PARALLELL_SOLVER
public:
#endif
b3Vector3 m_pivotInA;
b3Vector3 m_pivotInB;
int m_flags;
b3Scalar m_erp;
b3Scalar m_cfm;
public:

B3_DECLARE_ALIGNED_ALLOCATOR();

b3ConstraintSetting m_setting;

b3Point2PointConstraint(int rbA,int rbB, const b3Vector3& pivotInA,const b3Vector3& pivotInB);

//b3Point2PointConstraint(int rbA,const b3Vector3& pivotInA);



virtual void getInfo1 (b3ConstraintInfo1* info,const b3RigidBodyData* bodies);

void getInfo1NonVirtual (b3ConstraintInfo1* info,const b3RigidBodyData* bodies);

virtual void getInfo2 (b3ConstraintInfo2* info, const b3RigidBodyData* bodies);

void getInfo2NonVirtual (b3ConstraintInfo2* info, const b3Transform& body0_trans, const b3Transform& body1_trans);

void updateRHS(b3Scalar timeStep);

void setPivotA(const b3Vector3& pivotA)
{
m_pivotInA = pivotA;
}

void setPivotB(const b3Vector3& pivotB)
{
m_pivotInB = pivotB;
}

const b3Vector3& getPivotInA() const
{
return m_pivotInA;
}

const b3Vector3& getPivotInB() const
{
return m_pivotInB;
}

///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5).
///If no axis is provided, it uses the default axis for this constraint.
virtual void setParam(int num, b3Scalar value, int axis = -1);
///return the local value of parameter
virtual b3Scalar getParam(int num, int axis = -1) const;

// virtual int calculateSerializeBufferSize() const;

///fills the dataBuffer and returns the struct name (and 0 on failure)
// virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const;


};

///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
struct b3Point2PointConstraintFloatData
{
b3TypedConstraintData m_typeConstraintData;
b3Vector3FloatData m_pivotInA;
b3Vector3FloatData m_pivotInB;
};

///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
struct b3Point2PointConstraintDoubleData
{
b3TypedConstraintData m_typeConstraintData;
b3Vector3DoubleData m_pivotInA;
b3Vector3DoubleData m_pivotInB;
};

/*
B3_FORCE_INLINE int b3Point2PointConstraint::calculateSerializeBufferSize() const
{
return sizeof(b3Point2PointConstraintData);

}

///fills the dataBuffer and returns the struct name (and 0 on failure)
B3_FORCE_INLINE const char* b3Point2PointConstraint::serialize(void* dataBuffer, b3Serializer* serializer) const
{
b3Point2PointConstraintData* p2pData = (b3Point2PointConstraintData*)dataBuffer;

b3TypedConstraint::serialize(&p2pData->m_typeConstraintData,serializer);
m_pivotInA.serialize(p2pData->m_pivotInA);
m_pivotInB.serialize(p2pData->m_pivotInB);

return b3Point2PointConstraintDataName;
}
*/

#endif //B3_POINT2POINTCONSTRAINT_H

+ 302
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3SolverBody.h View File

@@ -0,0 +1,302 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_SOLVER_BODY_H
#define B3_SOLVER_BODY_H


#include "Bullet3Common/b3Vector3.h"
#include "Bullet3Common/b3Matrix3x3.h"

#include "Bullet3Common/b3AlignedAllocator.h"
#include "Bullet3Common/b3TransformUtil.h"

///Until we get other contributions, only use SIMD on Windows, when using Visual Studio 2008 or later, and not double precision
#ifdef B3_USE_SSE
#define USE_SIMD 1
#endif //


#ifdef USE_SIMD

struct b3SimdScalar
{
B3_FORCE_INLINE b3SimdScalar()
{

}

B3_FORCE_INLINE b3SimdScalar(float fl)
:m_vec128 (_mm_set1_ps(fl))
{
}

B3_FORCE_INLINE b3SimdScalar(__m128 v128)
:m_vec128(v128)
{
}
union
{
__m128 m_vec128;
float m_floats[4];
float x,y,z,w;
int m_ints[4];
b3Scalar m_unusedPadding;
};
B3_FORCE_INLINE __m128 get128()
{
return m_vec128;
}

B3_FORCE_INLINE const __m128 get128() const
{
return m_vec128;
}

B3_FORCE_INLINE void set128(__m128 v128)
{
m_vec128 = v128;
}

B3_FORCE_INLINE operator __m128()
{
return m_vec128;
}
B3_FORCE_INLINE operator const __m128() const
{
return m_vec128;
}
B3_FORCE_INLINE operator float() const
{
return m_floats[0];
}

};

///@brief Return the elementwise product of two b3SimdScalar
B3_FORCE_INLINE b3SimdScalar
operator*(const b3SimdScalar& v1, const b3SimdScalar& v2)
{
return b3SimdScalar(_mm_mul_ps(v1.get128(),v2.get128()));
}

///@brief Return the elementwise product of two b3SimdScalar
B3_FORCE_INLINE b3SimdScalar
operator+(const b3SimdScalar& v1, const b3SimdScalar& v2)
{
return b3SimdScalar(_mm_add_ps(v1.get128(),v2.get128()));
}


#else
#define b3SimdScalar b3Scalar
#endif

///The b3SolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance.
B3_ATTRIBUTE_ALIGNED16 (struct) b3SolverBody
{
B3_DECLARE_ALIGNED_ALLOCATOR();
b3Transform m_worldTransform;
b3Vector3 m_deltaLinearVelocity;
b3Vector3 m_deltaAngularVelocity;
b3Vector3 m_angularFactor;
b3Vector3 m_linearFactor;
b3Vector3 m_invMass;
b3Vector3 m_pushVelocity;
b3Vector3 m_turnVelocity;
b3Vector3 m_linearVelocity;
b3Vector3 m_angularVelocity;

union
{
void* m_originalBody;
int m_originalBodyIndex;
};

int padding[3];


void setWorldTransform(const b3Transform& worldTransform)
{
m_worldTransform = worldTransform;
}

const b3Transform& getWorldTransform() const
{
return m_worldTransform;
}
B3_FORCE_INLINE void getVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity ) const
{
if (m_originalBody)
velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos);
else
velocity.setValue(0,0,0);
}

B3_FORCE_INLINE void getAngularVelocity(b3Vector3& angVel) const
{
if (m_originalBody)
angVel =m_angularVelocity+m_deltaAngularVelocity;
else
angVel.setValue(0,0,0);
}


//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
B3_FORCE_INLINE void applyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent,const b3Scalar impulseMagnitude)
{
if (m_originalBody)
{
m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor;
m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
}
}

B3_FORCE_INLINE void internalApplyPushImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent,b3Scalar impulseMagnitude)
{
if (m_originalBody)
{
m_pushVelocity += linearComponent*impulseMagnitude*m_linearFactor;
m_turnVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
}
}



const b3Vector3& getDeltaLinearVelocity() const
{
return m_deltaLinearVelocity;
}

const b3Vector3& getDeltaAngularVelocity() const
{
return m_deltaAngularVelocity;
}

const b3Vector3& getPushVelocity() const
{
return m_pushVelocity;
}

const b3Vector3& getTurnVelocity() const
{
return m_turnVelocity;
}


////////////////////////////////////////////////
///some internal methods, don't use them
b3Vector3& internalGetDeltaLinearVelocity()
{
return m_deltaLinearVelocity;
}

b3Vector3& internalGetDeltaAngularVelocity()
{
return m_deltaAngularVelocity;
}

const b3Vector3& internalGetAngularFactor() const
{
return m_angularFactor;
}

const b3Vector3& internalGetInvMass() const
{
return m_invMass;
}

void internalSetInvMass(const b3Vector3& invMass)
{
m_invMass = invMass;
}
b3Vector3& internalGetPushVelocity()
{
return m_pushVelocity;
}

b3Vector3& internalGetTurnVelocity()
{
return m_turnVelocity;
}

B3_FORCE_INLINE void internalGetVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity ) const
{
velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos);
}

B3_FORCE_INLINE void internalGetAngularVelocity(b3Vector3& angVel) const
{
angVel = m_angularVelocity+m_deltaAngularVelocity;
}


//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
B3_FORCE_INLINE void internalApplyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent,const b3Scalar impulseMagnitude)
{
//if (m_originalBody)
{
m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor;
m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
}
}

void writebackVelocity()
{
//if (m_originalBody>=0)
{
m_linearVelocity +=m_deltaLinearVelocity;
m_angularVelocity += m_deltaAngularVelocity;
//m_originalBody->setCompanionId(-1);
}
}


void writebackVelocityAndTransform(b3Scalar timeStep, b3Scalar splitImpulseTurnErp)
{
(void) timeStep;
if (m_originalBody)
{
m_linearVelocity += m_deltaLinearVelocity;
m_angularVelocity += m_deltaAngularVelocity;
//correct the position/orientation based on push/turn recovery
b3Transform newTransform;
if (m_pushVelocity[0]!=0.f || m_pushVelocity[1]!=0 || m_pushVelocity[2]!=0 || m_turnVelocity[0]!=0.f || m_turnVelocity[1]!=0 || m_turnVelocity[2]!=0)
{
// b3Quaternion orn = m_worldTransform.getRotation();
b3TransformUtil::integrateTransform(m_worldTransform,m_pushVelocity,m_turnVelocity*splitImpulseTurnErp,timeStep,newTransform);
m_worldTransform = newTransform;
}
//m_worldTransform.setRotation(orn);
//m_originalBody->setCompanionId(-1);
}
}


};

#endif //B3_SOLVER_BODY_H



+ 80
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3SolverConstraint.h View File

@@ -0,0 +1,80 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_SOLVER_CONSTRAINT_H
#define B3_SOLVER_CONSTRAINT_H


#include "Bullet3Common/b3Vector3.h"
#include "Bullet3Common/b3Matrix3x3.h"
//#include "b3JacobianEntry.h"
#include "Bullet3Common/b3AlignedObjectArray.h"

//#define NO_FRICTION_TANGENTIALS 1
#include "b3SolverBody.h"


///1D constraint along a normal axis between bodyA and bodyB. It can be combined to solve contact and friction constraints.
B3_ATTRIBUTE_ALIGNED16 (struct) b3SolverConstraint
{
B3_DECLARE_ALIGNED_ALLOCATOR();

b3Vector3 m_relpos1CrossNormal;
b3Vector3 m_contactNormal;

b3Vector3 m_relpos2CrossNormal;
//b3Vector3 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal

b3Vector3 m_angularComponentA;
b3Vector3 m_angularComponentB;
mutable b3SimdScalar m_appliedPushImpulse;
mutable b3SimdScalar m_appliedImpulse;
int m_padding1;
int m_padding2;
b3Scalar m_friction;
b3Scalar m_jacDiagABInv;
b3Scalar m_rhs;
b3Scalar m_cfm;
b3Scalar m_lowerLimit;
b3Scalar m_upperLimit;
b3Scalar m_rhsPenetration;
union
{
void* m_originalContactPoint;
b3Scalar m_unusedPadding4;
};

int m_overrideNumSolverIterations;
int m_frictionIndex;
int m_solverBodyIdA;
int m_solverBodyIdB;

enum b3SolverConstraintType
{
B3_SOLVER_CONTACT_1D = 0,
B3_SOLVER_FRICTION_1D
};
};

typedef b3AlignedObjectArray<b3SolverConstraint> b3ConstraintArray;


#endif //B3_SOLVER_CONSTRAINT_H




+ 161
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.cpp View File

@@ -0,0 +1,161 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/


#include "b3TypedConstraint.h"
//#include "Bullet3Common/b3Serializer.h"


#define B3_DEFAULT_DEBUGDRAW_SIZE b3Scalar(0.3f)



b3TypedConstraint::b3TypedConstraint(b3TypedConstraintType type, int rbA,int rbB)
:b3TypedObject(type),
m_userConstraintType(-1),
m_userConstraintPtr((void*)-1),
m_breakingImpulseThreshold(B3_INFINITY),
m_isEnabled(true),
m_needsFeedback(false),
m_overrideNumSolverIterations(-1),
m_rbA(rbA),
m_rbB(rbB),
m_appliedImpulse(b3Scalar(0.)),
m_dbgDrawSize(B3_DEFAULT_DEBUGDRAW_SIZE),
m_jointFeedback(0)
{
}




b3Scalar b3TypedConstraint::getMotorFactor(b3Scalar pos, b3Scalar lowLim, b3Scalar uppLim, b3Scalar vel, b3Scalar timeFact)
{
if(lowLim > uppLim)
{
return b3Scalar(1.0f);
}
else if(lowLim == uppLim)
{
return b3Scalar(0.0f);
}
b3Scalar lim_fact = b3Scalar(1.0f);
b3Scalar delta_max = vel / timeFact;
if(delta_max < b3Scalar(0.0f))
{
if((pos >= lowLim) && (pos < (lowLim - delta_max)))
{
lim_fact = (lowLim - pos) / delta_max;
}
else if(pos < lowLim)
{
lim_fact = b3Scalar(0.0f);
}
else
{
lim_fact = b3Scalar(1.0f);
}
}
else if(delta_max > b3Scalar(0.0f))
{
if((pos <= uppLim) && (pos > (uppLim - delta_max)))
{
lim_fact = (uppLim - pos) / delta_max;
}
else if(pos > uppLim)
{
lim_fact = b3Scalar(0.0f);
}
else
{
lim_fact = b3Scalar(1.0f);
}
}
else
{
lim_fact = b3Scalar(0.0f);
}
return lim_fact;
}



void b3AngularLimit::set(b3Scalar low, b3Scalar high, b3Scalar _softness, b3Scalar _biasFactor, b3Scalar _relaxationFactor)
{
m_halfRange = (high - low) / 2.0f;
m_center = b3NormalizeAngle(low + m_halfRange);
m_softness = _softness;
m_biasFactor = _biasFactor;
m_relaxationFactor = _relaxationFactor;
}

void b3AngularLimit::test(const b3Scalar angle)
{
m_correction = 0.0f;
m_sign = 0.0f;
m_solveLimit = false;

if (m_halfRange >= 0.0f)
{
b3Scalar deviation = b3NormalizeAngle(angle - m_center);
if (deviation < -m_halfRange)
{
m_solveLimit = true;
m_correction = - (deviation + m_halfRange);
m_sign = +1.0f;
}
else if (deviation > m_halfRange)
{
m_solveLimit = true;
m_correction = m_halfRange - deviation;
m_sign = -1.0f;
}
}
}


b3Scalar b3AngularLimit::getError() const
{
return m_correction * m_sign;
}

void b3AngularLimit::fit(b3Scalar& angle) const
{
if (m_halfRange > 0.0f)
{
b3Scalar relativeAngle = b3NormalizeAngle(angle - m_center);
if (!b3Equal(relativeAngle, m_halfRange))
{
if (relativeAngle > 0.0f)
{
angle = getHigh();
}
else
{
angle = getLow();
}
}
}
}

b3Scalar b3AngularLimit::getLow() const
{
return b3NormalizeAngle(m_center - m_halfRange);
}

b3Scalar b3AngularLimit::getHigh() const
{
return b3NormalizeAngle(m_center + m_halfRange);
}

+ 483
- 0
src/bullet/Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h View File

@@ -0,0 +1,483 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2010 Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_TYPED_CONSTRAINT_H
#define B3_TYPED_CONSTRAINT_H


#include "Bullet3Common/b3Scalar.h"
#include "b3SolverConstraint.h"

class b3Serializer;

//Don't change any of the existing enum values, so add enum types at the end for serialization compatibility
enum b3TypedConstraintType
{
B3_POINT2POINT_CONSTRAINT_TYPE=3,
B3_HINGE_CONSTRAINT_TYPE,
B3_CONETWIST_CONSTRAINT_TYPE,
B3_D6_CONSTRAINT_TYPE,
B3_SLIDER_CONSTRAINT_TYPE,
B3_CONTACT_CONSTRAINT_TYPE,
B3_D6_SPRING_CONSTRAINT_TYPE,
B3_GEAR_CONSTRAINT_TYPE,
B3_FIXED_CONSTRAINT_TYPE,
B3_MAX_CONSTRAINT_TYPE
};


enum b3ConstraintParams
{
B3_CONSTRAINT_ERP=1,
B3_CONSTRAINT_STOP_ERP,
B3_CONSTRAINT_CFM,
B3_CONSTRAINT_STOP_CFM
};

#if 1
#define b3AssertConstrParams(_par) b3Assert(_par)
#else
#define b3AssertConstrParams(_par)
#endif


B3_ATTRIBUTE_ALIGNED16(struct) b3JointFeedback
{
b3Vector3 m_appliedForceBodyA;
b3Vector3 m_appliedTorqueBodyA;
b3Vector3 m_appliedForceBodyB;
b3Vector3 m_appliedTorqueBodyB;
};


struct b3RigidBodyData;


///TypedConstraint is the baseclass for Bullet constraints and vehicles
B3_ATTRIBUTE_ALIGNED16(class) b3TypedConstraint : public b3TypedObject
{
int m_userConstraintType;

union
{
int m_userConstraintId;
void* m_userConstraintPtr;
};

b3Scalar m_breakingImpulseThreshold;
bool m_isEnabled;
bool m_needsFeedback;
int m_overrideNumSolverIterations;


b3TypedConstraint& operator=(b3TypedConstraint& other)
{
b3Assert(0);
(void) other;
return *this;
}

protected:
int m_rbA;
int m_rbB;
b3Scalar m_appliedImpulse;
b3Scalar m_dbgDrawSize;
b3JointFeedback* m_jointFeedback;

///internal method used by the constraint solver, don't use them directly
b3Scalar getMotorFactor(b3Scalar pos, b3Scalar lowLim, b3Scalar uppLim, b3Scalar vel, b3Scalar timeFact);

public:

B3_DECLARE_ALIGNED_ALLOCATOR();

virtual ~b3TypedConstraint() {};
b3TypedConstraint(b3TypedConstraintType type, int bodyA,int bodyB);

struct b3ConstraintInfo1 {
int m_numConstraintRows,nub;
};


struct b3ConstraintInfo2 {
// integrator parameters: frames per second (1/stepsize), default error
// reduction parameter (0..1).
b3Scalar fps,erp;

// for the first and second body, pointers to two (linear and angular)
// n*3 jacobian sub matrices, stored by rows. these matrices will have
// been initialized to 0 on entry. if the second body is zero then the
// J2xx pointers may be 0.
b3Scalar *m_J1linearAxis,*m_J1angularAxis,*m_J2linearAxis,*m_J2angularAxis;

// elements to jump from one row to the next in J's
int rowskip;

// right hand sides of the equation J*v = c + cfm * lambda. cfm is the
// "constraint force mixing" vector. c is set to zero on entry, cfm is
// set to a constant value (typically very small or zero) value on entry.
b3Scalar *m_constraintError,*cfm;

// lo and hi limits for variables (set to -/+ infinity on entry).
b3Scalar *m_lowerLimit,*m_upperLimit;

// findex vector for variables. see the LCP solver interface for a
// description of what this does. this is set to -1 on entry.
// note that the returned indexes are relative to the first index of
// the constraint.
int *findex;
// number of solver iterations
int m_numIterations;

//damping of the velocity
b3Scalar m_damping;
};

int getOverrideNumSolverIterations() const
{
return m_overrideNumSolverIterations;
}

///override the number of constraint solver iterations used to solve this constraint
///-1 will use the default number of iterations, as specified in SolverInfo.m_numIterations
void setOverrideNumSolverIterations(int overideNumIterations)
{
m_overrideNumSolverIterations = overideNumIterations;
}


///internal method used by the constraint solver, don't use them directly
virtual void setupSolverConstraint(b3ConstraintArray& ca, int solverBodyA,int solverBodyB, b3Scalar timeStep)
{
(void)ca;
(void)solverBodyA;
(void)solverBodyB;
(void)timeStep;
}
///internal method used by the constraint solver, don't use them directly
virtual void getInfo1 (b3ConstraintInfo1* info,const b3RigidBodyData* bodies)=0;

///internal method used by the constraint solver, don't use them directly
virtual void getInfo2 (b3ConstraintInfo2* info, const b3RigidBodyData* bodies)=0;

///internal method used by the constraint solver, don't use them directly
void internalSetAppliedImpulse(b3Scalar appliedImpulse)
{
m_appliedImpulse = appliedImpulse;
}
///internal method used by the constraint solver, don't use them directly
b3Scalar internalGetAppliedImpulse()
{
return m_appliedImpulse;
}


b3Scalar getBreakingImpulseThreshold() const
{
return m_breakingImpulseThreshold;
}

void setBreakingImpulseThreshold(b3Scalar threshold)
{
m_breakingImpulseThreshold = threshold;
}

bool isEnabled() const
{
return m_isEnabled;
}

void setEnabled(bool enabled)
{
m_isEnabled=enabled;
}


///internal method used by the constraint solver, don't use them directly
virtual void solveConstraintObsolete(b3SolverBody& /*bodyA*/,b3SolverBody& /*bodyB*/,b3Scalar /*timeStep*/) {};

int getRigidBodyA() const
{
return m_rbA;
}
int getRigidBodyB() const
{
return m_rbB;
}


int getRigidBodyA()
{
return m_rbA;
}
int getRigidBodyB()
{
return m_rbB;
}

int getUserConstraintType() const
{
return m_userConstraintType ;
}

void setUserConstraintType(int userConstraintType)
{
m_userConstraintType = userConstraintType;
};

void setUserConstraintId(int uid)
{
m_userConstraintId = uid;
}

int getUserConstraintId() const
{
return m_userConstraintId;
}

void setUserConstraintPtr(void* ptr)
{
m_userConstraintPtr = ptr;
}

void* getUserConstraintPtr()
{
return m_userConstraintPtr;
}

void setJointFeedback(b3JointFeedback* jointFeedback)
{
m_jointFeedback = jointFeedback;
}

const b3JointFeedback* getJointFeedback() const
{
return m_jointFeedback;
}

b3JointFeedback* getJointFeedback()
{
return m_jointFeedback;
}


int getUid() const
{
return m_userConstraintId;
}

bool needsFeedback() const
{
return m_needsFeedback;
}

///enableFeedback will allow to read the applied linear and angular impulse
///use getAppliedImpulse, getAppliedLinearImpulse and getAppliedAngularImpulse to read feedback information
void enableFeedback(bool needsFeedback)
{
m_needsFeedback = needsFeedback;
}

///getAppliedImpulse is an estimated total applied impulse.
///This feedback could be used to determine breaking constraints or playing sounds.
b3Scalar getAppliedImpulse() const
{
b3Assert(m_needsFeedback);
return m_appliedImpulse;
}

b3TypedConstraintType getConstraintType () const
{
return b3TypedConstraintType(m_objectType);
}
void setDbgDrawSize(b3Scalar dbgDrawSize)
{
m_dbgDrawSize = dbgDrawSize;
}
b3Scalar getDbgDrawSize()
{
return m_dbgDrawSize;
}

///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5).
///If no axis is provided, it uses the default axis for this constraint.
virtual void setParam(int num, b3Scalar value, int axis = -1) = 0;

///return the local value of parameter
virtual b3Scalar getParam(int num, int axis = -1) const = 0;
// virtual int calculateSerializeBufferSize() const;

///fills the dataBuffer and returns the struct name (and 0 on failure)
//virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const;

};

// returns angle in range [-B3_2_PI, B3_2_PI], closest to one of the limits
// all arguments should be normalized angles (i.e. in range [-B3_PI, B3_PI])
B3_FORCE_INLINE b3Scalar b3AdjustAngleToLimits(b3Scalar angleInRadians, b3Scalar angleLowerLimitInRadians, b3Scalar angleUpperLimitInRadians)
{
if(angleLowerLimitInRadians >= angleUpperLimitInRadians)
{
return angleInRadians;
}
else if(angleInRadians < angleLowerLimitInRadians)
{
b3Scalar diffLo = b3Fabs(b3NormalizeAngle(angleLowerLimitInRadians - angleInRadians));
b3Scalar diffHi = b3Fabs(b3NormalizeAngle(angleUpperLimitInRadians - angleInRadians));
return (diffLo < diffHi) ? angleInRadians : (angleInRadians + B3_2_PI);
}
else if(angleInRadians > angleUpperLimitInRadians)
{
b3Scalar diffHi = b3Fabs(b3NormalizeAngle(angleInRadians - angleUpperLimitInRadians));
b3Scalar diffLo = b3Fabs(b3NormalizeAngle(angleInRadians - angleLowerLimitInRadians));
return (diffLo < diffHi) ? (angleInRadians - B3_2_PI) : angleInRadians;
}
else
{
return angleInRadians;
}
}

///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
struct b3TypedConstraintData
{
int m_bodyA;
int m_bodyB;
char *m_name;

int m_objectType;
int m_userConstraintType;
int m_userConstraintId;
int m_needsFeedback;

float m_appliedImpulse;
float m_dbgDrawSize;

int m_disableCollisionsBetweenLinkedBodies;
int m_overrideNumSolverIterations;

float m_breakingImpulseThreshold;
int m_isEnabled;
};

/*B3_FORCE_INLINE int b3TypedConstraint::calculateSerializeBufferSize() const
{
return sizeof(b3TypedConstraintData);
}
*/


class b3AngularLimit
{
private:
b3Scalar
m_center,
m_halfRange,
m_softness,
m_biasFactor,
m_relaxationFactor,
m_correction,
m_sign;

bool
m_solveLimit;

public:
/// Default constructor initializes limit as inactive, allowing free constraint movement
b3AngularLimit()
:m_center(0.0f),
m_halfRange(-1.0f),
m_softness(0.9f),
m_biasFactor(0.3f),
m_relaxationFactor(1.0f),
m_correction(0.0f),
m_sign(0.0f),
m_solveLimit(false)
{}

/// Sets all limit's parameters.
/// When low > high limit becomes inactive.
/// When high - low > 2PI limit is ineffective too becouse no angle can exceed the limit
void set(b3Scalar low, b3Scalar high, b3Scalar _softness = 0.9f, b3Scalar _biasFactor = 0.3f, b3Scalar _relaxationFactor = 1.0f);

/// Checks conastaint angle against limit. If limit is active and the angle violates the limit
/// correction is calculated.
void test(const b3Scalar angle);

/// Returns limit's softness
inline b3Scalar getSoftness() const
{
return m_softness;
}

/// Returns limit's bias factor
inline b3Scalar getBiasFactor() const
{
return m_biasFactor;
}

/// Returns limit's relaxation factor
inline b3Scalar getRelaxationFactor() const
{
return m_relaxationFactor;
}

/// Returns correction value evaluated when test() was invoked
inline b3Scalar getCorrection() const
{
return m_correction;
}

/// Returns sign value evaluated when test() was invoked
inline b3Scalar getSign() const
{
return m_sign;
}

/// Gives half of the distance between min and max limit angle
inline b3Scalar getHalfRange() const
{
return m_halfRange;
}

/// Returns true when the last test() invocation recognized limit violation
inline bool isLimit() const
{
return m_solveLimit;
}

/// Checks given angle against limit. If limit is active and angle doesn't fit it, the angle
/// returned is modified so it equals to the limit closest to given angle.
void fit(b3Scalar& angle) const;

/// Returns correction value multiplied by sign value
b3Scalar getError() const;

b3Scalar getLow() const;

b3Scalar getHigh() const;

};



#endif //B3_TYPED_CONSTRAINT_H

+ 484
- 0
src/bullet/Bullet3Dynamics/b3CpuRigidBodyPipeline.cpp View File

@@ -0,0 +1,484 @@
#include "b3CpuRigidBodyPipeline.h"

#include "Bullet3Dynamics/shared/b3IntegrateTransforms.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3CpuNarrowPhase.h"
#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
#include "Bullet3Common/b3Vector3.h"
#include "Bullet3Dynamics/shared/b3ContactConstraint4.h"
#include "Bullet3Dynamics/shared/b3Inertia.h"


struct b3CpuRigidBodyPipelineInternalData
{
b3AlignedObjectArray<b3RigidBodyData> m_rigidBodies;
b3AlignedObjectArray<b3Inertia> m_inertias;
b3AlignedObjectArray<b3Aabb> m_aabbWorldSpace;

b3DynamicBvhBroadphase* m_bp;
b3CpuNarrowPhase* m_np;
b3Config m_config;
};

b3CpuRigidBodyPipeline::b3CpuRigidBodyPipeline(class b3CpuNarrowPhase* narrowphase, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config)
{
m_data = new b3CpuRigidBodyPipelineInternalData;
m_data->m_np = narrowphase;
m_data->m_bp = broadphaseDbvt;
m_data->m_config = config;
}

b3CpuRigidBodyPipeline::~b3CpuRigidBodyPipeline()
{
delete m_data;
}

void b3CpuRigidBodyPipeline::updateAabbWorldSpace()
{

for (int i=0;i<this->getNumBodies();i++)
{
b3RigidBodyData* body = &m_data->m_rigidBodies[i];
b3Float4 position = body->m_pos;
b3Quat orientation = body->m_quat;

int collidableIndex = body->m_collidableIdx;
b3Collidable& collidable = m_data->m_np->getCollidableCpu(collidableIndex);
int shapeIndex = collidable.m_shapeIndex;
if (shapeIndex>=0)
{

b3Aabb localAabb = m_data->m_np->getLocalSpaceAabb(shapeIndex);
b3Aabb& worldAabb = m_data->m_aabbWorldSpace[i];
float margin=0.f;
b3TransformAabb2(localAabb.m_minVec,localAabb.m_maxVec,margin,position,orientation,&worldAabb.m_minVec,&worldAabb.m_maxVec);
m_data->m_bp->setAabb(i,worldAabb.m_minVec,worldAabb.m_maxVec,0);
}
}
}

void b3CpuRigidBodyPipeline::computeOverlappingPairs()
{
int numPairs = m_data->m_bp->getOverlappingPairCache()->getNumOverlappingPairs();
m_data->m_bp->calculateOverlappingPairs();
numPairs = m_data->m_bp->getOverlappingPairCache()->getNumOverlappingPairs();
printf("numPairs=%d\n",numPairs);
}

void b3CpuRigidBodyPipeline::computeContactPoints()
{
b3AlignedObjectArray<b3Int4>& pairs = m_data->m_bp->getOverlappingPairCache()->getOverlappingPairArray();
m_data->m_np->computeContacts(pairs,m_data->m_aabbWorldSpace, m_data->m_rigidBodies);

}
void b3CpuRigidBodyPipeline::stepSimulation(float deltaTime)
{
//update world space aabb's
updateAabbWorldSpace();

//compute overlapping pairs
computeOverlappingPairs();

//compute contacts
computeContactPoints();

//solve contacts
//update transforms
integrate(deltaTime);
}


static inline float b3CalcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1,
const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1)
{
return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1);
}


static inline void b3SetLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1,
b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1)
{
linear = -n;
angular0 = -b3Cross(r0, n);
angular1 = b3Cross(r1, n);
}



static inline void b3SolveContact(b3ContactConstraint4& cs,
const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
float maxRambdaDt[4], float minRambdaDt[4])
{

b3Vector3 dLinVelA; dLinVelA.setZero();
b3Vector3 dAngVelA; dAngVelA.setZero();
b3Vector3 dLinVelB; dLinVelB.setZero();
b3Vector3 dAngVelB; dAngVelB.setZero();

for(int ic=0; ic<4; ic++)
{
// dont necessary because this makes change to 0
if( cs.m_jacCoeffInv[ic] == 0.f ) continue;

{
b3Vector3 angular0, angular1, linear;
b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA;
b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB;
b3SetLinearAndAngular( (const b3Vector3 &)-cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, linear, angular0, angular1 );

float rambdaDt = b3CalcRelVel((const b3Vector3 &)cs.m_linear,(const b3Vector3 &) -cs.m_linear, angular0, angular1,
linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic];
rambdaDt *= cs.m_jacCoeffInv[ic];

{
float prevSum = cs.m_appliedRambdaDt[ic];
float updated = prevSum;
updated += rambdaDt;
updated = b3Max( updated, minRambdaDt[ic] );
updated = b3Min( updated, maxRambdaDt[ic] );
rambdaDt = updated - prevSum;
cs.m_appliedRambdaDt[ic] = updated;
}

b3Vector3 linImp0 = invMassA*linear*rambdaDt;
b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt;
b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
#ifdef _WIN32
b3Assert(_finite(linImp0.getX()));
b3Assert(_finite(linImp1.getX()));
#endif
{
linVelA += linImp0;
angVelA += angImp0;
linVelB += linImp1;
angVelB += angImp1;
}
}
}


}





static inline void b3SolveFriction(b3ContactConstraint4& cs,
const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
float maxRambdaDt[4], float minRambdaDt[4])
{

if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return;
const b3Vector3& center = (const b3Vector3&)cs.m_center;

b3Vector3 n = -(const b3Vector3&)cs.m_linear;

b3Vector3 tangent[2];

b3PlaneSpace1 (n, tangent[0],tangent[1]);

b3Vector3 angular0, angular1, linear;
b3Vector3 r0 = center - posA;
b3Vector3 r1 = center - posB;
for(int i=0; i<2; i++)
{
b3SetLinearAndAngular( tangent[i], r0, r1, linear, angular0, angular1 );
float rambdaDt = b3CalcRelVel(linear, -linear, angular0, angular1,
linVelA, angVelA, linVelB, angVelB );
rambdaDt *= cs.m_fJacCoeffInv[i];

{
float prevSum = cs.m_fAppliedRambdaDt[i];
float updated = prevSum;
updated += rambdaDt;
updated = b3Max( updated, minRambdaDt[i] );
updated = b3Min( updated, maxRambdaDt[i] );
rambdaDt = updated - prevSum;
cs.m_fAppliedRambdaDt[i] = updated;
}

b3Vector3 linImp0 = invMassA*linear*rambdaDt;
b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt;
b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
#ifdef _WIN32
b3Assert(_finite(linImp0.getX()));
b3Assert(_finite(linImp1.getX()));
#endif
linVelA += linImp0;
angVelA += angImp0;
linVelB += linImp1;
angVelB += angImp1;
}

{ // angular damping for point constraint
b3Vector3 ab = ( posB - posA ).normalized();
b3Vector3 ac = ( center - posA ).normalized();
if( b3Dot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
{
float angNA = b3Dot( n, angVelA );
float angNB = b3Dot( n, angVelB );

angVelA -= (angNA*0.1f)*n;
angVelB -= (angNB*0.1f)*n;
}
}

}





struct b3SolveTask// : public ThreadPool::Task
{
b3SolveTask(b3AlignedObjectArray<b3RigidBodyData>& bodies,
b3AlignedObjectArray<b3Inertia>& shapes,
b3AlignedObjectArray<b3ContactConstraint4>& constraints,
int start, int nConstraints,
int maxNumBatches,
b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx
)
: m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ),
m_solveFriction( true ),m_maxNumBatches(maxNumBatches),
m_wgUsedBodies(wgUsedBodies),m_curWgidx(curWgidx)
{}

unsigned short int getType(){ return 0; }

void run(int tIdx)
{
b3AlignedObjectArray<int> usedBodies;
//printf("run..............\n");

for (int bb=0;bb<m_maxNumBatches;bb++)
{
usedBodies.resize(0);
for(int ic=m_nConstraints-1; ic>=0; ic--)
//for(int ic=0; ic<m_nConstraints; ic++)
{
int i = m_start + ic;
if (m_constraints[i].m_batchIdx != bb)
continue;

float frictionCoeff = b3GetFrictionCoeff(&m_constraints[i]);
int aIdx = (int)m_constraints[i].m_bodyA;
int bIdx = (int)m_constraints[i].m_bodyB;
int localBatch = m_constraints[i].m_batchIdx;
b3RigidBodyData& bodyA = m_bodies[aIdx];
b3RigidBodyData& bodyB = m_bodies[bIdx];

if ((bodyA.m_invMass) && (bodyB.m_invMass))
{
// printf("aIdx=%d, bIdx=%d\n", aIdx,bIdx);
}
if (bIdx==10)
{
//printf("ic(b)=%d, localBatch=%d\n",ic,localBatch);
}

if (aIdx==10)
{
//printf("ic(a)=%d, localBatch=%d\n",ic,localBatch);
}
if (usedBodies.size()<(aIdx+1))
{
usedBodies.resize(aIdx+1,0);
}
if (usedBodies.size()<(bIdx+1))
{
usedBodies.resize(bIdx+1,0);
}

if (bodyA.m_invMass)
{
b3Assert(usedBodies[aIdx]==0);
usedBodies[aIdx]++;
}
if (bodyB.m_invMass)
{
b3Assert(usedBodies[bIdx]==0);
usedBodies[bIdx]++;
}

if( !m_solveFriction )
{
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};

b3SolveContact( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld,
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld,
maxRambdaDt, minRambdaDt );

}
else
{
float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
float minRambdaDt[4] = {0.f,0.f,0.f,0.f};

float sum = 0;
for(int j=0; j<4; j++)
{
sum +=m_constraints[i].m_appliedRambdaDt[j];
}
frictionCoeff = 0.7f;
for(int j=0; j<4; j++)
{
maxRambdaDt[j] = frictionCoeff*sum;
minRambdaDt[j] = -maxRambdaDt[j];
}

b3SolveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld,
(b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld,
maxRambdaDt, minRambdaDt );
}
}

if (m_wgUsedBodies)
{
if (m_wgUsedBodies[m_curWgidx].size()<usedBodies.size())
{
m_wgUsedBodies[m_curWgidx].resize(usedBodies.size());
}
for (int i=0;i<usedBodies.size();i++)
{
if (usedBodies[i])
{
//printf("cell %d uses body %d\n", m_curWgidx,i);
m_wgUsedBodies[m_curWgidx][i]=1;
}
}
}

}


}

b3AlignedObjectArray<b3RigidBodyData>& m_bodies;
b3AlignedObjectArray<b3Inertia>& m_shapes;
b3AlignedObjectArray<b3ContactConstraint4>& m_constraints;
b3AlignedObjectArray<int>* m_wgUsedBodies;
int m_curWgidx;
int m_start;
int m_nConstraints;
bool m_solveFriction;
int m_maxNumBatches;
};

void b3CpuRigidBodyPipeline::solveContactConstraints()
{
int m_nIterations = 4;

b3AlignedObjectArray<b3ContactConstraint4> contactConstraints;
const b3AlignedObjectArray<b3Contact4Data>& contacts = m_data->m_np->getContacts();
int n = contactConstraints.size();
//convert contacts...


int maxNumBatches = 250;

for(int iter=0; iter<m_nIterations; iter++)
{
b3SolveTask task( m_data->m_rigidBodies, m_data->m_inertias, contactConstraints, 0, n ,maxNumBatches,0,0);
task.m_solveFriction = false;
task.run(0);
}

for(int iter=0; iter<m_nIterations; iter++)
{
b3SolveTask task( m_data->m_rigidBodies, m_data->m_inertias, contactConstraints, 0, n ,maxNumBatches,0,0);
task.m_solveFriction = true;
task.run(0);
}
}

void b3CpuRigidBodyPipeline::integrate(float deltaTime)
{
float angDamping=0.f;
b3Vector3 gravityAcceleration=b3MakeVector3(0,-9,0);

//integrate transforms (external forces/gravity should be moved into constraint solver)
for (int i=0;i<m_data->m_rigidBodies.size();i++)
{
b3IntegrateTransform(&m_data->m_rigidBodies[i],deltaTime,angDamping,gravityAcceleration);
}

}

int b3CpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userData)
{
b3RigidBodyData body;
int bodyIndex = m_data->m_rigidBodies.size();
body.m_invMass = mass ? 1.f/mass : 0.f;
body.m_angVel.setValue(0,0,0);
body.m_collidableIdx = collidableIndex;
body.m_frictionCoeff = 0.3f;
body.m_linVel.setValue(0,0,0);
body.m_pos.setValue(position[0],position[1],position[2]);
body.m_quat.setValue(orientation[0],orientation[1],orientation[2],orientation[3]);
body.m_restituitionCoeff = 0.f;

m_data->m_rigidBodies.push_back(body);

if (collidableIndex>=0)
{
b3Aabb& worldAabb = m_data->m_aabbWorldSpace.expand();

b3Aabb localAabb = m_data->m_np->getLocalSpaceAabb(collidableIndex);
b3Vector3 localAabbMin=b3MakeVector3(localAabb.m_min[0],localAabb.m_min[1],localAabb.m_min[2]);
b3Vector3 localAabbMax=b3MakeVector3(localAabb.m_max[0],localAabb.m_max[1],localAabb.m_max[2]);
b3Scalar margin = 0.01f;
b3Transform t;
t.setIdentity();
t.setOrigin(b3MakeVector3(position[0],position[1],position[2]));
t.setRotation(b3Quaternion(orientation[0],orientation[1],orientation[2],orientation[3]));
b3TransformAabb(localAabbMin,localAabbMax, margin,t,worldAabb.m_minVec,worldAabb.m_maxVec);

m_data->m_bp->createProxy(worldAabb.m_minVec,worldAabb.m_maxVec,bodyIndex,0,1,1);
// b3Vector3 aabbMin,aabbMax;
// m_data->m_bp->getAabb(bodyIndex,aabbMin,aabbMax);

} else
{
b3Error("registerPhysicsInstance using invalid collidableIndex\n");
}

return bodyIndex;
}


const struct b3RigidBodyData* b3CpuRigidBodyPipeline::getBodyBuffer() const
{
return m_data->m_rigidBodies.size() ? &m_data->m_rigidBodies[0] : 0;
}

int b3CpuRigidBodyPipeline::getNumBodies() const
{
return m_data->m_rigidBodies.size();
}

+ 67
- 0
src/bullet/Bullet3Dynamics/b3CpuRigidBodyPipeline.h View File

@@ -0,0 +1,67 @@
/*
Copyright (c) 2013 Advanced Micro Devices, Inc.

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Originally written by Erwin Coumans

#ifndef B3_CPU_RIGIDBODY_PIPELINE_H
#define B3_CPU_RIGIDBODY_PIPELINE_H



#include "Bullet3Common/b3AlignedObjectArray.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h"

class b3CpuRigidBodyPipeline
{
protected:
struct b3CpuRigidBodyPipelineInternalData* m_data;

int allocateCollidable();

public:


b3CpuRigidBodyPipeline(class b3CpuNarrowPhase* narrowphase, struct b3DynamicBvhBroadphase* broadphaseDbvt, const struct b3Config& config);
virtual ~b3CpuRigidBodyPipeline();

virtual void stepSimulation(float deltaTime);
virtual void integrate(float timeStep);
virtual void updateAabbWorldSpace();
virtual void computeOverlappingPairs();
virtual void computeContactPoints();
virtual void solveContactConstraints();

int registerConvexPolyhedron(class b3ConvexUtility* convex);

int registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, int userData);
void writeAllInstancesToGpu();
void copyConstraintsToHost();
void setGravity(const float* grav);
void reset();
int createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB,float breakingThreshold);
int createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold);
void removeConstraintByUid(int uid);

void addConstraint(class b3TypedConstraint* constraint);
void removeConstraint(b3TypedConstraint* constraint);

void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults);

const struct b3RigidBodyData* getBodyBuffer() const;

int getNumBodies() const;

};

#endif //B3_CPU_RIGIDBODY_PIPELINE_H

+ 34
- 0
src/bullet/Bullet3Dynamics/shared/b3ContactConstraint4.h View File

@@ -0,0 +1,34 @@
#ifndef B3_CONTACT_CONSTRAINT5_H
#define B3_CONTACT_CONSTRAINT5_H

#include "Bullet3Common/shared/b3Float4.h"

typedef struct b3ContactConstraint4 b3ContactConstraint4_t;


struct b3ContactConstraint4
{

b3Float4 m_linear;//normal?
b3Float4 m_worldPos[4];
b3Float4 m_center; // friction
float m_jacCoeffInv[4];
float m_b[4];
float m_appliedRambdaDt[4];
float m_fJacCoeffInv[2]; // friction
float m_fAppliedRambdaDt[2]; // friction

unsigned int m_bodyA;
unsigned int m_bodyB;
int m_batchIdx;
unsigned int m_paddings;

};

//inline void setFrictionCoeff(float value) { m_linear[3] = value; }
inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint)
{
return constraint->m_linear.w;
}

#endif //B3_CONTACT_CONSTRAINT5_H

+ 153
- 0
src/bullet/Bullet3Dynamics/shared/b3ConvertConstraint4.h View File

@@ -0,0 +1,153 @@


#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
#include "Bullet3Dynamics/shared/b3ContactConstraint4.h"
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"


void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q);
void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q)
{
if (b3Fabs(n.z) > 0.70710678f) {
// choose p in y-z plane
float a = n.y*n.y + n.z*n.z;
float k = 1.f/sqrt(a);
p[0].x = 0;
p[0].y = -n.z*k;
p[0].z = n.y*k;
// set q = n x p
q[0].x = a*k;
q[0].y = -n.x*p[0].z;
q[0].z = n.x*p[0].y;
}
else {
// choose p in x-y plane
float a = n.x*n.x + n.y*n.y;
float k = 1.f/sqrt(a);
p[0].x = -n.y*k;
p[0].y = n.x*k;
p[0].z = 0;
// set q = n x p
q[0].x = -n.z*p[0].y;
q[0].y = n.z*p[0].x;
q[0].z = a*k;
}
}


void setLinearAndAngular( b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1)
{
*linear = b3MakeFloat4(n.x,n.y,n.z,0.f);
*angular0 = b3Cross3(r0, n);
*angular1 = -b3Cross3(r1, n);
}


float calcRelVel( b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0,
b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1 )
{
return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1);
}


float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1,
float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1)
{
// linear0,1 are normlized
float jmj0 = invMass0;//b3Dot3F4(linear0, linear0)*invMass0;
float jmj1 = b3Dot3F4(mtMul3(angular0,*invInertia0), angular0);
float jmj2 = invMass1;//b3Dot3F4(linear1, linear1)*invMass1;
float jmj3 = b3Dot3F4(mtMul3(angular1,*invInertia1), angular1);
return -1.f/(jmj0+jmj1+jmj2+jmj3);
}


void setConstraint4( b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA,
b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB,
__global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,
b3ContactConstraint4_t* dstC )
{
dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);

float dtInv = 1.f/dt;
for(int ic=0; ic<4; ic++)
{
dstC->m_appliedRambdaDt[ic] = 0.f;
}
dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;


dstC->m_linear = src->m_worldNormalOnB;
dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );
for(int ic=0; ic<4; ic++)
{
b3Float4 r0 = src->m_worldPosB[ic] - posA;
b3Float4 r1 = src->m_worldPosB[ic] - posB;

if( ic >= src->m_worldNormalOnB.w )//npoints
{
dstC->m_jacCoeffInv[ic] = 0.f;
continue;
}

float relVelN;
{
b3Float4 linear, angular0, angular1;
setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);

dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
invMassA, &invInertiaA, invMassB, &invInertiaB );

relVelN = calcRelVel(linear, -linear, angular0, angular1,
linVelA, angVelA, linVelB, angVelB);

float e = 0.f;//src->getRestituitionCoeff();
if( relVelN*relVelN < 0.004f ) e = 0.f;

dstC->m_b[ic] = e*relVelN;
//float penetration = src->m_worldPosB[ic].w;
dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;
dstC->m_appliedRambdaDt[ic] = 0.f;
}
}

if( src->m_worldNormalOnB.w > 0 )//npoints
{ // prepare friction
b3Float4 center = b3MakeFloat4(0.f,0.f,0.f,0.f);
for(int i=0; i<src->m_worldNormalOnB.w; i++)
center += src->m_worldPosB[i];
center /= (float)src->m_worldNormalOnB.w;

b3Float4 tangent[2];
b3PlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);
b3Float4 r[2];
r[0] = center - posA;
r[1] = center - posB;

for(int i=0; i<2; i++)
{
b3Float4 linear, angular0, angular1;
setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);

dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
invMassA, &invInertiaA, invMassB, &invInertiaB );
dstC->m_fAppliedRambdaDt[i] = 0.f;
}
dstC->m_center = center;
}

for(int i=0; i<4; i++)
{
if( i<src->m_worldNormalOnB.w )
{
dstC->m_worldPos[i] = src->m_worldPosB[i];
}
else
{
dstC->m_worldPos[i] = b3MakeFloat4(0.f,0.f,0.f,0.f);
}
}
}

+ 15
- 0
src/bullet/Bullet3Dynamics/shared/b3Inertia.h View File

@@ -0,0 +1,15 @@


#ifndef B3_INERTIA_H
#define B3_INERTIA_H

#include "Bullet3Common/shared/b3Mat3x3.h"

struct b3Inertia
{
b3Mat3x3 m_invInertiaWorld;
b3Mat3x3 m_initInvInertia;
};


#endif //B3_INERTIA_H

+ 113
- 0
src/bullet/Bullet3Dynamics/shared/b3IntegrateTransforms.h View File

@@ -0,0 +1,113 @@


#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"



inline void integrateSingleTransform( __global b3RigidBodyData_t* bodies,int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)
{
if (bodies[nodeID].m_invMass != 0.f)
{
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);

//angular velocity
{
b3Float4 axis;
//add some hardcoded angular damping
bodies[nodeID].m_angVel.x *= angularDamping;
bodies[nodeID].m_angVel.y *= angularDamping;
bodies[nodeID].m_angVel.z *= angularDamping;
b3Float4 angvel = bodies[nodeID].m_angVel;

float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));
//limit the angular motion
if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
{
fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
}
if(fAngle < 0.001f)
{
// use Taylor's expansions of sync function
axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);
}
else
{
// sync(fAngle) = sin(c*fAngle)/t
axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);
}
b3Quat dorn;
dorn.x = axis.x;
dorn.y = axis.y;
dorn.z = axis.z;
dorn.w = b3Cos(fAngle * timeStep * 0.5f);
b3Quat orn0 = bodies[nodeID].m_quat;
b3Quat predictedOrn = b3QuatMul(dorn, orn0);
predictedOrn = b3QuatNormalized(predictedOrn);
bodies[nodeID].m_quat=predictedOrn;
}
//linear velocity
bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep;
//apply gravity
bodies[nodeID].m_linVel += gravityAcceleration * timeStep;
}
}

inline void b3IntegrateTransform( __global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)
{
float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);
if( (body->m_invMass != 0.f))
{
//angular velocity
{
b3Float4 axis;
//add some hardcoded angular damping
body->m_angVel.x *= angularDamping;
body->m_angVel.y *= angularDamping;
body->m_angVel.z *= angularDamping;
b3Float4 angvel = body->m_angVel;
float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));
//limit the angular motion
if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)
{
fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;
}
if(fAngle < 0.001f)
{
// use Taylor's expansions of sync function
axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);
}
else
{
// sync(fAngle) = sin(c*fAngle)/t
axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);
}
b3Quat dorn;
dorn.x = axis.x;
dorn.y = axis.y;
dorn.z = axis.z;
dorn.w = b3Cos(fAngle * timeStep * 0.5f);
b3Quat orn0 = body->m_quat;

b3Quat predictedOrn = b3QuatMul(dorn, orn0);
predictedOrn = b3QuatNormalized(predictedOrn);
body->m_quat=predictedOrn;
}

//apply gravity
body->m_linVel += gravityAcceleration * timeStep;

//linear velocity
body->m_pos += body->m_linVel * timeStep;
}
}

+ 232
- 0
src/bullet/Bullet3Geometry/b3AabbUtil.h View File

@@ -0,0 +1,232 @@
/*
Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/



#ifndef B3_AABB_UTIL2
#define B3_AABB_UTIL2

#include "Bullet3Common/b3Transform.h"
#include "Bullet3Common/b3Vector3.h"
#include "Bullet3Common/b3MinMax.h"



B3_FORCE_INLINE void b3AabbExpand (b3Vector3& aabbMin,
b3Vector3& aabbMax,
const b3Vector3& expansionMin,
const b3Vector3& expansionMax)
{
aabbMin = aabbMin + expansionMin;
aabbMax = aabbMax + expansionMax;
}

/// conservative test for overlap between two aabbs
B3_FORCE_INLINE bool b3TestPointAgainstAabb2(const b3Vector3 &aabbMin1, const b3Vector3 &aabbMax1,
const b3Vector3 &point)
{
bool overlap = true;
overlap = (aabbMin1.getX() > point.getX() || aabbMax1.getX() < point.getX()) ? false : overlap;
overlap = (aabbMin1.getZ() > point.getZ() || aabbMax1.getZ() < point.getZ()) ? false : overlap;
overlap = (aabbMin1.getY() > point.getY() || aabbMax1.getY() < point.getY()) ? false : overlap;
return overlap;
}


/// conservative test for overlap between two aabbs
B3_FORCE_INLINE bool b3TestAabbAgainstAabb2(const b3Vector3 &aabbMin1, const b3Vector3 &aabbMax1,
const b3Vector3 &aabbMin2, const b3Vector3 &aabbMax2)
{
bool overlap = true;
overlap = (aabbMin1.getX() > aabbMax2.getX() || aabbMax1.getX() < aabbMin2.getX()) ? false : overlap;
overlap = (aabbMin1.getZ() > aabbMax2.getZ() || aabbMax1.getZ() < aabbMin2.getZ()) ? false : overlap;
overlap = (aabbMin1.getY() > aabbMax2.getY() || aabbMax1.getY() < aabbMin2.getY()) ? false : overlap;
return overlap;
}

/// conservative test for overlap between triangle and aabb
B3_FORCE_INLINE bool b3TestTriangleAgainstAabb2(const b3Vector3 *vertices,
const b3Vector3 &aabbMin, const b3Vector3 &aabbMax)
{
const b3Vector3 &p1 = vertices[0];
const b3Vector3 &p2 = vertices[1];
const b3Vector3 &p3 = vertices[2];

if (b3Min(b3Min(p1[0], p2[0]), p3[0]) > aabbMax[0]) return false;
if (b3Max(b3Max(p1[0], p2[0]), p3[0]) < aabbMin[0]) return false;

if (b3Min(b3Min(p1[2], p2[2]), p3[2]) > aabbMax[2]) return false;
if (b3Max(b3Max(p1[2], p2[2]), p3[2]) < aabbMin[2]) return false;
if (b3Min(b3Min(p1[1], p2[1]), p3[1]) > aabbMax[1]) return false;
if (b3Max(b3Max(p1[1], p2[1]), p3[1]) < aabbMin[1]) return false;
return true;
}


B3_FORCE_INLINE int b3Outcode(const b3Vector3& p,const b3Vector3& halfExtent)
{
return (p.getX() < -halfExtent.getX() ? 0x01 : 0x0) |
(p.getX() > halfExtent.getX() ? 0x08 : 0x0) |
(p.getY() < -halfExtent.getY() ? 0x02 : 0x0) |
(p.getY() > halfExtent.getY() ? 0x10 : 0x0) |
(p.getZ() < -halfExtent.getZ() ? 0x4 : 0x0) |
(p.getZ() > halfExtent.getZ() ? 0x20 : 0x0);
}



B3_FORCE_INLINE bool b3RayAabb2(const b3Vector3& rayFrom,
const b3Vector3& rayInvDirection,
const unsigned int raySign[3],
const b3Vector3 bounds[2],
b3Scalar& tmin,
b3Scalar lambda_min,
b3Scalar lambda_max)
{
b3Scalar tmax, tymin, tymax, tzmin, tzmax;
tmin = (bounds[raySign[0]].getX() - rayFrom.getX()) * rayInvDirection.getX();
tmax = (bounds[1-raySign[0]].getX() - rayFrom.getX()) * rayInvDirection.getX();
tymin = (bounds[raySign[1]].getY() - rayFrom.getY()) * rayInvDirection.getY();
tymax = (bounds[1-raySign[1]].getY() - rayFrom.getY()) * rayInvDirection.getY();

if ( (tmin > tymax) || (tymin > tmax) )
return false;

if (tymin > tmin)
tmin = tymin;

if (tymax < tmax)
tmax = tymax;

tzmin = (bounds[raySign[2]].getZ() - rayFrom.getZ()) * rayInvDirection.getZ();
tzmax = (bounds[1-raySign[2]].getZ() - rayFrom.getZ()) * rayInvDirection.getZ();

if ( (tmin > tzmax) || (tzmin > tmax) )
return false;
if (tzmin > tmin)
tmin = tzmin;
if (tzmax < tmax)
tmax = tzmax;
return ( (tmin < lambda_max) && (tmax > lambda_min) );
}

B3_FORCE_INLINE bool b3RayAabb(const b3Vector3& rayFrom,
const b3Vector3& rayTo,
const b3Vector3& aabbMin,
const b3Vector3& aabbMax,
b3Scalar& param, b3Vector3& normal)
{
b3Vector3 aabbHalfExtent = (aabbMax-aabbMin)* b3Scalar(0.5);
b3Vector3 aabbCenter = (aabbMax+aabbMin)* b3Scalar(0.5);
b3Vector3 source = rayFrom - aabbCenter;
b3Vector3 target = rayTo - aabbCenter;
int sourceOutcode = b3Outcode(source,aabbHalfExtent);
int targetOutcode = b3Outcode(target,aabbHalfExtent);
if ((sourceOutcode & targetOutcode) == 0x0)
{
b3Scalar lambda_enter = b3Scalar(0.0);
b3Scalar lambda_exit = param;
b3Vector3 r = target - source;
int i;
b3Scalar normSign = 1;
b3Vector3 hitNormal = b3MakeVector3(0,0,0);
int bit=1;

for (int j=0;j<2;j++)
{
for (i = 0; i != 3; ++i)
{
if (sourceOutcode & bit)
{
b3Scalar lambda = (-source[i] - aabbHalfExtent[i]*normSign) / r[i];
if (lambda_enter <= lambda)
{
lambda_enter = lambda;
hitNormal.setValue(0,0,0);
hitNormal[i] = normSign;
}
}
else if (targetOutcode & bit)
{
b3Scalar lambda = (-source[i] - aabbHalfExtent[i]*normSign) / r[i];
b3SetMin(lambda_exit, lambda);
}
bit<<=1;
}
normSign = b3Scalar(-1.);
}
if (lambda_enter <= lambda_exit)
{
param = lambda_enter;
normal = hitNormal;
return true;
}
}
return false;
}



B3_FORCE_INLINE void b3TransformAabb(const b3Vector3& halfExtents, b3Scalar margin,const b3Transform& t,b3Vector3& aabbMinOut,b3Vector3& aabbMaxOut)
{
b3Vector3 halfExtentsWithMargin = halfExtents+b3MakeVector3(margin,margin,margin);
b3Matrix3x3 abs_b = t.getBasis().absolute();
b3Vector3 center = t.getOrigin();
b3Vector3 extent = halfExtentsWithMargin.dot3( abs_b[0], abs_b[1], abs_b[2] );
aabbMinOut = center - extent;
aabbMaxOut = center + extent;
}


B3_FORCE_INLINE void b3TransformAabb(const b3Vector3& localAabbMin,const b3Vector3& localAabbMax, b3Scalar margin,const b3Transform& trans,b3Vector3& aabbMinOut,b3Vector3& aabbMaxOut)
{
//b3Assert(localAabbMin.getX() <= localAabbMax.getX());
//b3Assert(localAabbMin.getY() <= localAabbMax.getY());
//b3Assert(localAabbMin.getZ() <= localAabbMax.getZ());
b3Vector3 localHalfExtents = b3Scalar(0.5)*(localAabbMax-localAabbMin);
localHalfExtents+=b3MakeVector3(margin,margin,margin);

b3Vector3 localCenter = b3Scalar(0.5)*(localAabbMax+localAabbMin);
b3Matrix3x3 abs_b = trans.getBasis().absolute();
b3Vector3 center = trans(localCenter);
b3Vector3 extent = localHalfExtents.dot3( abs_b[0], abs_b[1], abs_b[2] );
aabbMinOut = center-extent;
aabbMaxOut = center+extent;
}

#define B3_USE_BANCHLESS 1
#ifdef B3_USE_BANCHLESS
//This block replaces the block below and uses no branches, and replaces the 8 bit return with a 32 bit return for improved performance (~3x on XBox 360)
B3_FORCE_INLINE unsigned b3TestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
{
return static_cast<unsigned int>(b3Select((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0])
& (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2])
& (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])),
1, 0));
}
#else
B3_FORCE_INLINE bool b3TestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
{
bool overlap = true;
overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? false : overlap;
overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? false : overlap;
overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? false : overlap;
return overlap;
}
#endif //B3_USE_BANCHLESS

#endif //B3_AABB_UTIL2



+ 2755
- 0
src/bullet/Bullet3Geometry/b3ConvexHullComputer.cpp
File diff suppressed because it is too large
View File


+ 103
- 0
src/bullet/Bullet3Geometry/b3ConvexHullComputer.h View File

@@ -0,0 +1,103 @@
/*
Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/

#ifndef B3_CONVEX_HULL_COMPUTER_H
#define B3_CONVEX_HULL_COMPUTER_H

#include "Bullet3Common/b3Vector3.h"
#include "Bullet3Common/b3AlignedObjectArray.h"

/// Convex hull implementation based on Preparata and Hong
/// See http://code.google.com/p/bullet/issues/detail?id=275
/// Ole Kniemeyer, MAXON Computer GmbH
class b3ConvexHullComputer
{
private:
b3Scalar compute(const void* coords, bool doubleCoords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp);

public:

class Edge
{
private:
int next;
int reverse;
int targetVertex;

friend class b3ConvexHullComputer;

public:
int getSourceVertex() const
{
return (this + reverse)->targetVertex;
}

int getTargetVertex() const
{
return targetVertex;
}

const Edge* getNextEdgeOfVertex() const // clockwise list of all edges of a vertex
{
return this + next;
}

const Edge* getNextEdgeOfFace() const // counter-clockwise list of all edges of a face
{
return (this + reverse)->getNextEdgeOfVertex();
}

const Edge* getReverseEdge() const
{
return this + reverse;
}
};


// Vertices of the output hull
b3AlignedObjectArray<b3Vector3> vertices;

// Edges of the output hull
b3AlignedObjectArray<Edge> edges;

// Faces of the convex hull. Each entry is an index into the "edges" array pointing to an edge of the face. Faces are planar n-gons
b3AlignedObjectArray<int> faces;

/*
Compute convex hull of "count" vertices stored in "coords". "stride" is the difference in bytes
between the addresses of consecutive vertices. If "shrink" is positive, the convex hull is shrunken
by that amount (each face is moved by "shrink" length units towards the center along its normal).
If "shrinkClamp" is positive, "shrink" is clamped to not exceed "shrinkClamp * innerRadius", where "innerRadius"
is the minimum distance of a face to the center of the convex hull.

The returned value is the amount by which the hull has been shrunken. If it is negative, the amount was so large
that the resulting convex hull is empty.

The output convex hull can be found in the member variables "vertices", "edges", "faces".
*/
b3Scalar compute(const float* coords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp)
{
return compute(coords, false, stride, count, shrink, shrinkClamp);
}

// same as above, but double precision
b3Scalar compute(const double* coords, int stride, int count, b3Scalar shrink, b3Scalar shrinkClamp)
{
return compute(coords, true, stride, count, shrink, shrinkClamp);
}
};


#endif //B3_CONVEX_HULL_COMPUTER_H


+ 185
- 0
src/bullet/Bullet3Geometry/b3GeometryUtil.cpp View File

@@ -0,0 +1,185 @@
/*
Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/



#include "b3GeometryUtil.h"


/*
Make sure this dummy function never changes so that it
can be used by probes that are checking whether the
library is actually installed.
*/
extern "C"
{
void b3BulletMathProbe ();

void b3BulletMathProbe () {}
}


bool b3GeometryUtil::isPointInsidePlanes(const b3AlignedObjectArray<b3Vector3>& planeEquations, const b3Vector3& point, b3Scalar margin)
{
int numbrushes = planeEquations.size();
for (int i=0;i<numbrushes;i++)
{
const b3Vector3& N1 = planeEquations[i];
b3Scalar dist = b3Scalar(N1.dot(point))+b3Scalar(N1[3])-margin;
if (dist>b3Scalar(0.))
{
return false;
}
}
return true;
}


bool b3GeometryUtil::areVerticesBehindPlane(const b3Vector3& planeNormal, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar margin)
{
int numvertices = vertices.size();
for (int i=0;i<numvertices;i++)
{
const b3Vector3& N1 = vertices[i];
b3Scalar dist = b3Scalar(planeNormal.dot(N1))+b3Scalar(planeNormal[3])-margin;
if (dist>b3Scalar(0.))
{
return false;
}
}
return true;
}

bool notExist(const b3Vector3& planeEquation,const b3AlignedObjectArray<b3Vector3>& planeEquations);

bool notExist(const b3Vector3& planeEquation,const b3AlignedObjectArray<b3Vector3>& planeEquations)
{
int numbrushes = planeEquations.size();
for (int i=0;i<numbrushes;i++)
{
const b3Vector3& N1 = planeEquations[i];
if (planeEquation.dot(N1) > b3Scalar(0.999))
{
return false;
}
}
return true;
}

void b3GeometryUtil::getPlaneEquationsFromVertices(b3AlignedObjectArray<b3Vector3>& vertices, b3AlignedObjectArray<b3Vector3>& planeEquationsOut )
{
const int numvertices = vertices.size();
// brute force:
for (int i=0;i<numvertices;i++)
{
const b3Vector3& N1 = vertices[i];

for (int j=i+1;j<numvertices;j++)
{
const b3Vector3& N2 = vertices[j];
for (int k=j+1;k<numvertices;k++)
{

const b3Vector3& N3 = vertices[k];

b3Vector3 planeEquation,edge0,edge1;
edge0 = N2-N1;
edge1 = N3-N1;
b3Scalar normalSign = b3Scalar(1.);
for (int ww=0;ww<2;ww++)
{
planeEquation = normalSign * edge0.cross(edge1);
if (planeEquation.length2() > b3Scalar(0.0001))
{
planeEquation.normalize();
if (notExist(planeEquation,planeEquationsOut))
{
planeEquation[3] = -planeEquation.dot(N1);
//check if inside, and replace supportingVertexOut if needed
if (areVerticesBehindPlane(planeEquation,vertices,b3Scalar(0.01)))
{
planeEquationsOut.push_back(planeEquation);
}
}
}
normalSign = b3Scalar(-1.);
}
}
}
}

}

void b3GeometryUtil::getVerticesFromPlaneEquations(const b3AlignedObjectArray<b3Vector3>& planeEquations , b3AlignedObjectArray<b3Vector3>& verticesOut )
{
const int numbrushes = planeEquations.size();
// brute force:
for (int i=0;i<numbrushes;i++)
{
const b3Vector3& N1 = planeEquations[i];

for (int j=i+1;j<numbrushes;j++)
{
const b3Vector3& N2 = planeEquations[j];
for (int k=j+1;k<numbrushes;k++)
{

const b3Vector3& N3 = planeEquations[k];

b3Vector3 n2n3; n2n3 = N2.cross(N3);
b3Vector3 n3n1; n3n1 = N3.cross(N1);
b3Vector3 n1n2; n1n2 = N1.cross(N2);
if ( ( n2n3.length2() > b3Scalar(0.0001) ) &&
( n3n1.length2() > b3Scalar(0.0001) ) &&
( n1n2.length2() > b3Scalar(0.0001) ) )
{
//point P out of 3 plane equations:

// d1 ( N2 * N3 ) + d2 ( N3 * N1 ) + d3 ( N1 * N2 )
//P = -------------------------------------------------------------------------
// N1 . ( N2 * N3 )


b3Scalar quotient = (N1.dot(n2n3));
if (b3Fabs(quotient) > b3Scalar(0.000001))
{
quotient = b3Scalar(-1.) / quotient;
n2n3 *= N1[3];
n3n1 *= N2[3];
n1n2 *= N3[3];
b3Vector3 potentialVertex = n2n3;
potentialVertex += n3n1;
potentialVertex += n1n2;
potentialVertex *= quotient;

//check if inside, and replace supportingVertexOut if needed
if (isPointInsidePlanes(planeEquations,potentialVertex,b3Scalar(0.01)))
{
verticesOut.push_back(potentialVertex);
}
}
}
}
}
}
}


+ 42
- 0
src/bullet/Bullet3Geometry/b3GeometryUtil.h View File

@@ -0,0 +1,42 @@
/*
Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/


#ifndef B3_GEOMETRY_UTIL_H
#define B3_GEOMETRY_UTIL_H

#include "Bullet3Common/b3Vector3.h"
#include "Bullet3Common/b3AlignedObjectArray.h"

///The b3GeometryUtil helper class provides a few methods to convert between plane equations and vertices.
class b3GeometryUtil
{
public:
static void getPlaneEquationsFromVertices(b3AlignedObjectArray<b3Vector3>& vertices, b3AlignedObjectArray<b3Vector3>& planeEquationsOut );

static void getVerticesFromPlaneEquations(const b3AlignedObjectArray<b3Vector3>& planeEquations , b3AlignedObjectArray<b3Vector3>& verticesOut );
static bool isInside(const b3AlignedObjectArray<b3Vector3>& vertices, const b3Vector3& planeNormal, b3Scalar margin);
static bool isPointInsidePlanes(const b3AlignedObjectArray<b3Vector3>& planeEquations, const b3Vector3& point, b3Scalar margin);

static bool areVerticesBehindPlane(const b3Vector3& planeNormal, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar margin);

};


#endif //B3_GEOMETRY_UTIL_H


+ 117
- 0
src/bullet/Bullet3Geometry/b3GrahamScan2dConvexHull.h View File

@@ -0,0 +1,117 @@
/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org

This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/


#ifndef B3_GRAHAM_SCAN_2D_CONVEX_HULL_H
#define B3_GRAHAM_SCAN_2D_CONVEX_HULL_H


#include "Bullet3Common/b3Vector3.h"
#include "Bullet3Common/b3AlignedObjectArray.h"

struct b3GrahamVector3 : public b3Vector3
{
b3GrahamVector3(const b3Vector3& org, int orgIndex)
:b3Vector3(org),
m_orgIndex(orgIndex)
{
}
b3Scalar m_angle;
int m_orgIndex;
};


struct b3AngleCompareFunc {
b3Vector3 m_anchor;
b3AngleCompareFunc(const b3Vector3& anchor)
: m_anchor(anchor)
{
}
bool operator()(const b3GrahamVector3& a, const b3GrahamVector3& b) const {
if (a.m_angle != b.m_angle)
return a.m_angle < b.m_angle;
else
{
b3Scalar al = (a-m_anchor).length2();
b3Scalar bl = (b-m_anchor).length2();
if (al != bl)
return al < bl;
else
{
return a.m_orgIndex < b.m_orgIndex;
}
}
}
};

inline void b3GrahamScanConvexHull2D(b3AlignedObjectArray<b3GrahamVector3>& originalPoints, b3AlignedObjectArray<b3GrahamVector3>& hull, const b3Vector3& normalAxis)
{
b3Vector3 axis0,axis1;
b3PlaneSpace1(normalAxis,axis0,axis1);

if (originalPoints.size()<=1)
{
for (int i=0;i<originalPoints.size();i++)
hull.push_back(originalPoints[0]);
return;
}
//step1 : find anchor point with smallest projection on axis0 and move it to first location
for (int i=0;i<originalPoints.size();i++)
{
// const b3Vector3& left = originalPoints[i];
// const b3Vector3& right = originalPoints[0];
b3Scalar projL = originalPoints[i].dot(axis0);
b3Scalar projR = originalPoints[0].dot(axis0);
if (projL < projR)
{
originalPoints.swap(0,i);
}
}

//also precompute angles
originalPoints[0].m_angle = -1e30f;
for (int i=1;i<originalPoints.size();i++)
{
b3Vector3 xvec = axis0;
b3Vector3 ar = originalPoints[i]-originalPoints[0];
originalPoints[i].m_angle = b3Cross(xvec, ar).dot(normalAxis) / ar.length();
}

//step 2: sort all points, based on 'angle' with this anchor
b3AngleCompareFunc comp(originalPoints[0]);
originalPoints.quickSortInternal(comp,1,originalPoints.size()-1);

int i;
for (i = 0; i<2; i++)
hull.push_back(originalPoints[i]);

//step 3: keep all 'convex' points and discard concave points (using back tracking)
for (; i != originalPoints.size(); i++)
{
bool isConvex = false;
while (!isConvex&& hull.size()>1) {
b3Vector3& a = hull[hull.size()-2];
b3Vector3& b = hull[hull.size()-1];
isConvex = b3Cross(a-b,a-originalPoints[i]).dot(normalAxis)> 0;
if (!isConvex)
hull.pop_back();
else
hull.push_back(originalPoints[i]);
}
}
}

#endif //B3_GRAHAM_SCAN_2D_CONVEX_HULL_H

+ 44
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h View File

@@ -0,0 +1,44 @@

#ifndef B3_GPU_BROADPHASE_INTERFACE_H
#define B3_GPU_BROADPHASE_INTERFACE_H

#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
#include "Bullet3Common/b3Vector3.h"
#include "b3SapAabb.h"
#include "Bullet3Common/shared/b3Int2.h"
#include "Bullet3Common/shared/b3Int4.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"

class b3GpuBroadphaseInterface
{
public:

typedef class b3GpuBroadphaseInterface* (CreateFunc)(cl_context ctx,cl_device_id device, cl_command_queue q);

virtual ~b3GpuBroadphaseInterface()
{
}

virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)=0;
virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)=0;

virtual void calculateOverlappingPairs(int maxPairs)=0;
virtual void calculateOverlappingPairsHost(int maxPairs)=0;

//call writeAabbsToGpu after done making all changes (createProxy etc)
virtual void writeAabbsToGpu()=0;

virtual cl_mem getAabbBufferWS()=0;
virtual int getNumOverlap()=0;
virtual cl_mem getOverlappingPairBuffer()=0;

virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU()=0;
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU()=0;
virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU() = 0;
virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU() = 0;
virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU() = 0;

};

#endif //B3_GPU_BROADPHASE_INTERFACE_H

+ 384
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp View File

@@ -0,0 +1,384 @@

#include "b3GpuGridBroadphase.h"
#include "Bullet3Geometry/b3AabbUtil.h"
#include "kernels/gridBroadphaseKernels.h"
#include "kernels/sapKernels.h"
//#include "kernels/gridBroadphase.cl"


#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"



#define B3_BROADPHASE_SAP_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl"
#define B3_GRID_BROADPHASE_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl"

cl_kernel kCalcHashAABB;
cl_kernel kClearCellStart;
cl_kernel kFindCellStart;
cl_kernel kFindOverlappingPairs;
cl_kernel m_copyAabbsKernel;
cl_kernel m_sap2Kernel;





//int maxPairsPerBody = 64;
int maxBodiesPerCell = 256;//??

b3GpuGridBroadphase::b3GpuGridBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q )
:m_context(ctx),
m_device(device),
m_queue(q),
m_allAabbsGPU1(ctx,q),
m_smallAabbsMappingGPU(ctx,q),
m_largeAabbsMappingGPU(ctx,q),
m_gpuPairs(ctx,q),

m_hashGpu(ctx,q),
m_paramsGPU(ctx,q),
m_cellStartGpu(ctx,q)
{

b3Vector3 gridSize = b3MakeVector3(3,3,3);
b3Vector3 invGridSize = b3MakeVector3(1.f/gridSize[0],1.f/gridSize[1],1.f/gridSize[2]);

m_paramsCPU.m_gridSize[0] = 128;
m_paramsCPU.m_gridSize[1] = 128;
m_paramsCPU.m_gridSize[2] = 128;
m_paramsCPU.m_gridSize[3] = maxBodiesPerCell;
m_paramsCPU.setMaxBodiesPerCell(maxBodiesPerCell);
m_paramsCPU.m_invCellSize[0] = invGridSize[0];
m_paramsCPU.m_invCellSize[1] = invGridSize[1];
m_paramsCPU.m_invCellSize[2] = invGridSize[2];
m_paramsCPU.m_invCellSize[3] = 0.f;
m_paramsGPU.push_back(m_paramsCPU);

cl_int errNum=0;

{
const char* sapSrc = sapCL;
cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"",B3_BROADPHASE_SAP_PATH);
b3Assert(errNum==CL_SUCCESS);
m_copyAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg );
m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg );
b3Assert(errNum==CL_SUCCESS);
}

{
cl_program gridProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,gridBroadphaseCL,&errNum,"",B3_GRID_BROADPHASE_PATH);
b3Assert(errNum==CL_SUCCESS);

kCalcHashAABB = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kCalcHashAABB",&errNum,gridProg);
b3Assert(errNum==CL_SUCCESS);
kClearCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kClearCellStart",&errNum,gridProg);
b3Assert(errNum==CL_SUCCESS);

kFindCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kFindCellStart",&errNum,gridProg);
b3Assert(errNum==CL_SUCCESS);

kFindOverlappingPairs = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kFindOverlappingPairs",&errNum,gridProg);
b3Assert(errNum==CL_SUCCESS);

}

m_sorter = new b3RadixSort32CL(m_context,m_device,m_queue);

}
b3GpuGridBroadphase::~b3GpuGridBroadphase()
{
clReleaseKernel( kCalcHashAABB);
clReleaseKernel( kClearCellStart);
clReleaseKernel( kFindCellStart);
clReleaseKernel( kFindOverlappingPairs);
clReleaseKernel( m_sap2Kernel);
clReleaseKernel( m_copyAabbsKernel);
delete m_sorter;
}



void b3GpuGridBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
{
b3SapAabb aabb;
aabb.m_minVec = aabbMin;
aabb.m_maxVec = aabbMax;
aabb.m_minIndices[3] = userPtr;
aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr;
m_smallAabbsMappingCPU.push_back(m_allAabbsCPU1.size());

m_allAabbsCPU1.push_back(aabb);

}
void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
{
b3SapAabb aabb;
aabb.m_minVec = aabbMin;
aabb.m_maxVec = aabbMax;
aabb.m_minIndices[3] = userPtr;
aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr;
m_largeAabbsMappingCPU.push_back(m_allAabbsCPU1.size());

m_allAabbsCPU1.push_back(aabb);
}

void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
{
B3_PROFILE("b3GpuGridBroadphase::calculateOverlappingPairs");

if (0)
{
calculateOverlappingPairsHost(maxPairs);
/*
b3AlignedObjectArray<b3Int4> cpuPairs;
m_gpuPairs.copyToHost(cpuPairs);
printf("host m_gpuPairs.size()=%d\n",m_gpuPairs.size());
for (int i=0;i<m_gpuPairs.size();i++)
{
printf("host pair %d = %d,%d\n",i,cpuPairs[i].x,cpuPairs[i].y);
}
*/
return;
}


int numSmallAabbs = m_smallAabbsMappingGPU.size();

b3OpenCLArray<int> pairCount(m_context,m_queue);
pairCount.push_back(0);
m_gpuPairs.resize(maxPairs);//numSmallAabbs*maxPairsPerBody);

{
int numLargeAabbs = m_largeAabbsMappingGPU.size();
if (numLargeAabbs && numSmallAabbs)
{
B3_PROFILE("sap2Kernel");
b3BufferInfoCL bInfo[] = {
b3BufferInfoCL( m_allAabbsGPU1.getBufferCL() ),
b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ),
b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL() ),
b3BufferInfoCL( m_gpuPairs.getBufferCL() ),
b3BufferInfoCL(pairCount.getBufferCL())};
b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numLargeAabbs );
launcher.setConst( numSmallAabbs);
launcher.setConst( 0 );//axis is not used
launcher.setConst( maxPairs );
//@todo: use actual maximum work item sizes of the device instead of hardcoded values
launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64);
int numPairs = pairCount.at(0);
if (numPairs >maxPairs)
{
b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
numPairs =maxPairs;
}
}
}




if (numSmallAabbs)
{
B3_PROFILE("gridKernel");
m_hashGpu.resize(numSmallAabbs);
{
B3_PROFILE("kCalcHashAABB");
b3LauncherCL launch(m_queue,kCalcHashAABB,"kCalcHashAABB");
launch.setConst(numSmallAabbs);
launch.setBuffer(m_allAabbsGPU1.getBufferCL());
launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
launch.setBuffer(m_hashGpu.getBufferCL());
launch.setBuffer(this->m_paramsGPU.getBufferCL());
launch.launch1D(numSmallAabbs);
}

m_sorter->execute(m_hashGpu);
int numCells = this->m_paramsCPU.m_gridSize[0]*this->m_paramsCPU.m_gridSize[1]*this->m_paramsCPU.m_gridSize[2];
m_cellStartGpu.resize(numCells);
//b3AlignedObjectArray<int > cellStartCpu;
{
B3_PROFILE("kClearCellStart");
b3LauncherCL launch(m_queue,kClearCellStart,"kClearCellStart");
launch.setConst(numCells);
launch.setBuffer(m_cellStartGpu.getBufferCL());
launch.launch1D(numCells);
//m_cellStartGpu.copyToHost(cellStartCpu);
//printf("??\n");

}


{
B3_PROFILE("kFindCellStart");
b3LauncherCL launch(m_queue,kFindCellStart,"kFindCellStart");
launch.setConst(numSmallAabbs);
launch.setBuffer(m_hashGpu.getBufferCL());
launch.setBuffer(m_cellStartGpu.getBufferCL());
launch.launch1D(numSmallAabbs);
//m_cellStartGpu.copyToHost(cellStartCpu);
//printf("??\n");

}
{
B3_PROFILE("kFindOverlappingPairs");
b3LauncherCL launch(m_queue,kFindOverlappingPairs,"kFindOverlappingPairs");
launch.setConst(numSmallAabbs);
launch.setBuffer(m_allAabbsGPU1.getBufferCL());
launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
launch.setBuffer(m_hashGpu.getBufferCL());
launch.setBuffer(m_cellStartGpu.getBufferCL());
launch.setBuffer(m_paramsGPU.getBufferCL());
//launch.setBuffer(0);
launch.setBuffer(pairCount.getBufferCL());
launch.setBuffer(m_gpuPairs.getBufferCL());
launch.setConst(maxPairs);
launch.launch1D(numSmallAabbs);

int numPairs = pairCount.at(0);
if (numPairs >maxPairs)
{
b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
numPairs =maxPairs;
}
m_gpuPairs.resize(numPairs);
if (0)
{
b3AlignedObjectArray<b3Int4> pairsCpu;
m_gpuPairs.copyToHost(pairsCpu);

int sz = m_gpuPairs.size();
printf("m_gpuPairs.size()=%d\n",sz);
for (int i=0;i<m_gpuPairs.size();i++)
{
printf("pair %d = %d,%d\n",i,pairsCpu[i].x,pairsCpu[i].y);
}

printf("?!?\n");
}
}

}




//calculateOverlappingPairsHost(maxPairs);
}
void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs)
{

m_hostPairs.resize(0);
m_allAabbsGPU1.copyToHost(m_allAabbsCPU1);
for (int i=0;i<m_allAabbsCPU1.size();i++)
{
for (int j=i+1;j<m_allAabbsCPU1.size();j++)
{
if (b3TestAabbAgainstAabb2(m_allAabbsCPU1[i].m_minVec, m_allAabbsCPU1[i].m_maxVec,
m_allAabbsCPU1[j].m_minVec,m_allAabbsCPU1[j].m_maxVec))
{
b3Int4 pair;
int a = m_allAabbsCPU1[j].m_minIndices[3];
int b = m_allAabbsCPU1[i].m_minIndices[3];
if (a<=b)
{
pair.x = a;
pair.y = b;//store the original index in the unsorted aabb array
} else
{
pair.x = b;
pair.y = a;//store the original index in the unsorted aabb array
}
if (m_hostPairs.size()<maxPairs)
{
m_hostPairs.push_back(pair);
}
}
}
}


m_gpuPairs.copyFromHost(m_hostPairs);


}

//call writeAabbsToGpu after done making all changes (createProxy etc)
void b3GpuGridBroadphase::writeAabbsToGpu()
{
m_allAabbsGPU1.copyFromHost(m_allAabbsCPU1);
m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU);
m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU);

}

cl_mem b3GpuGridBroadphase::getAabbBufferWS()
{
return this->m_allAabbsGPU1.getBufferCL();
}
int b3GpuGridBroadphase::getNumOverlap()
{
return m_gpuPairs.size();
}
cl_mem b3GpuGridBroadphase::getOverlappingPairBuffer()
{
return m_gpuPairs.getBufferCL();
}

b3OpenCLArray<b3SapAabb>& b3GpuGridBroadphase::getAllAabbsGPU()
{
return m_allAabbsGPU1;
}

b3AlignedObjectArray<b3SapAabb>& b3GpuGridBroadphase::getAllAabbsCPU()
{
return m_allAabbsCPU1;
}

b3OpenCLArray<b3Int4>& b3GpuGridBroadphase::getOverlappingPairsGPU()
{
return m_gpuPairs;
}
b3OpenCLArray<int>& b3GpuGridBroadphase::getSmallAabbIndicesGPU()
{
return m_smallAabbsMappingGPU;
}
b3OpenCLArray<int>& b3GpuGridBroadphase::getLargeAabbIndicesGPU()
{
return m_largeAabbsMappingGPU;
}


+ 88
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h View File

@@ -0,0 +1,88 @@
#ifndef B3_GPU_GRID_BROADPHASE_H
#define B3_GPU_GRID_BROADPHASE_H

#include "b3GpuBroadphaseInterface.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"

struct b3ParamsGridBroadphaseCL
{

float m_invCellSize[4];
int m_gridSize[4];

int getMaxBodiesPerCell() const
{
return m_gridSize[3];
}

void setMaxBodiesPerCell(int maxOverlap)
{
m_gridSize[3] = maxOverlap;
}
};


class b3GpuGridBroadphase : public b3GpuBroadphaseInterface
{
protected:
cl_context m_context;
cl_device_id m_device;
cl_command_queue m_queue;

b3OpenCLArray<b3SapAabb> m_allAabbsGPU1;
b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU1;

b3OpenCLArray<int> m_smallAabbsMappingGPU;
b3AlignedObjectArray<int> m_smallAabbsMappingCPU;

b3OpenCLArray<int> m_largeAabbsMappingGPU;
b3AlignedObjectArray<int> m_largeAabbsMappingCPU;

b3AlignedObjectArray<b3Int4> m_hostPairs;
b3OpenCLArray<b3Int4> m_gpuPairs;

b3OpenCLArray<b3SortData> m_hashGpu;
b3OpenCLArray<int> m_cellStartGpu;

b3ParamsGridBroadphaseCL m_paramsCPU;
b3OpenCLArray<b3ParamsGridBroadphaseCL> m_paramsGPU;

class b3RadixSort32CL* m_sorter;

public:

b3GpuGridBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q );
virtual ~b3GpuGridBroadphase();

static b3GpuBroadphaseInterface* CreateFunc(cl_context ctx,cl_device_id device, cl_command_queue q)
{
return new b3GpuGridBroadphase(ctx,device,q);
}



virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);

virtual void calculateOverlappingPairs(int maxPairs);
virtual void calculateOverlappingPairsHost(int maxPairs);

//call writeAabbsToGpu after done making all changes (createProxy etc)
virtual void writeAabbsToGpu();

virtual cl_mem getAabbBufferWS();
virtual int getNumOverlap();
virtual cl_mem getOverlappingPairBuffer();

virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU();
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU();
virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU();
virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU();
virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU();

};

#endif //B3_GPU_GRID_BROADPHASE_H

+ 577
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp View File

@@ -0,0 +1,577 @@
/*
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Initial Author Jackson Lee, 2014

#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"

#include "b3GpuParallelLinearBvh.h"

b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) :
m_queue(queue),
m_radixSorter(context, device, queue),
m_rootNodeIndex(context, queue),
m_maxDistanceFromRoot(context, queue),
m_temp(context, queue),
m_internalNodeAabbs(context, queue),
m_internalNodeLeafIndexRanges(context, queue),
m_internalNodeChildNodes(context, queue),
m_internalNodeParentNodes(context, queue),
m_commonPrefixes(context, queue),
m_commonPrefixLengths(context, queue),
m_distanceFromRoot(context, queue),
m_leafNodeParentNodes(context, queue),
m_mortonCodesAndAabbIndicies(context, queue),
m_mergedAabb(context, queue),
m_leafNodeAabbs(context, queue),
m_largeAabbs(context, queue)
{
m_rootNodeIndex.resize(1);
m_maxDistanceFromRoot.resize(1);
m_temp.resize(1);
//
const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl";
const char* kernelSource = parallelLinearBvhCL; //parallelLinearBvhCL.h
cl_int error;
char* additionalMacros = 0;
m_parallelLinearBvhProgram = b3OpenCLUtils::compileCLProgramFromString(context, device, kernelSource, &error, additionalMacros, CL_PROGRAM_PATH);
b3Assert(m_parallelLinearBvhProgram);
m_separateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "separateAabbs", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_separateAabbsKernel);
m_findAllNodesMergedAabbKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findAllNodesMergedAabb", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_findAllNodesMergedAabbKernel);
m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_assignMortonCodesAndAabbIndiciesKernel);
m_computeAdjacentPairCommonPrefixKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "computeAdjacentPairCommonPrefix", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_computeAdjacentPairCommonPrefixKernel);
m_buildBinaryRadixTreeLeafNodesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeLeafNodes", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_buildBinaryRadixTreeLeafNodesKernel);
m_buildBinaryRadixTreeInternalNodesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeInternalNodes", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_buildBinaryRadixTreeInternalNodesKernel);
m_findDistanceFromRootKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findDistanceFromRoot", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_findDistanceFromRootKernel);
m_buildBinaryRadixTreeAabbsRecursiveKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeAabbsRecursive", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_buildBinaryRadixTreeAabbsRecursiveKernel);
m_findLeafIndexRangesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findLeafIndexRanges", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_findLeafIndexRangesKernel);
m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_plbvhCalculateOverlappingPairsKernel);
m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_plbvhRayTraverseKernel);
m_plbvhLargeAabbAabbTestKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhLargeAabbAabbTest", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_plbvhLargeAabbAabbTestKernel);
m_plbvhLargeAabbRayTestKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhLargeAabbRayTest", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_plbvhLargeAabbRayTestKernel);
}

b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh()
{
clReleaseKernel(m_separateAabbsKernel);
clReleaseKernel(m_findAllNodesMergedAabbKernel);
clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel);
clReleaseKernel(m_computeAdjacentPairCommonPrefixKernel);
clReleaseKernel(m_buildBinaryRadixTreeLeafNodesKernel);
clReleaseKernel(m_buildBinaryRadixTreeInternalNodesKernel);
clReleaseKernel(m_findDistanceFromRootKernel);
clReleaseKernel(m_buildBinaryRadixTreeAabbsRecursiveKernel);
clReleaseKernel(m_findLeafIndexRangesKernel);
clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel);
clReleaseKernel(m_plbvhRayTraverseKernel);
clReleaseKernel(m_plbvhLargeAabbAabbTestKernel);
clReleaseKernel(m_plbvhLargeAabbRayTestKernel);
clReleaseProgram(m_parallelLinearBvhProgram);
}

void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices,
const b3OpenCLArray<int>& largeAabbIndices)
{
B3_PROFILE("b3ParallelLinearBvh::build()");
int numLargeAabbs = largeAabbIndices.size();
int numSmallAabbs = smallAabbIndices.size();
//Since all AABBs(both large and small) are input as a contiguous array,
//with 2 additional arrays used to indicate the indices of large and small AABBs,
//it is necessary to separate the AABBs so that the large AABBs will not degrade the quality of the BVH.
{
B3_PROFILE("Separate large and small AABBs");
m_largeAabbs.resize(numLargeAabbs);
m_leafNodeAabbs.resize(numSmallAabbs);
//Write large AABBs into m_largeAabbs
{
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
b3BufferInfoCL( largeAabbIndices.getBufferCL() ),
b3BufferInfoCL( m_largeAabbs.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numLargeAabbs);
launcher.launch1D(numLargeAabbs);
}
//Write small AABBs into m_leafNodeAabbs
{
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
b3BufferInfoCL( smallAabbIndices.getBufferCL() ),
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numSmallAabbs);
launcher.launch1D(numSmallAabbs);
}
clFinish(m_queue);
}
//
int numLeaves = numSmallAabbs; //Number of leaves in the BVH == Number of rigid bodies with small AABBs
int numInternalNodes = numLeaves - 1;
if(numLeaves < 2)
{
//Number of leaf nodes is checked in calculateOverlappingPairs() and testRaysAgainstBvhAabbs(),
//so it does not matter if numLeaves == 0 and rootNodeIndex == -1
int rootNodeIndex = numLeaves - 1;
m_rootNodeIndex.copyFromHostPointer(&rootNodeIndex, 1);
//Since the AABBs need to be rearranged(sorted) for the BVH construction algorithm,
//m_mortonCodesAndAabbIndicies.m_value is used to map a sorted AABB index to the unsorted AABB index
//instead of directly moving the AABBs. It needs to be set for the ray cast traversal kernel to work.
//( m_mortonCodesAndAabbIndicies[].m_value == unsorted index == index of m_leafNodeAabbs )
if(numLeaves == 1)
{
b3SortData leaf;
leaf.m_value = 0; //1 leaf so index is always 0; leaf.m_key does not need to be set
m_mortonCodesAndAabbIndicies.resize(1);
m_mortonCodesAndAabbIndicies.copyFromHostPointer(&leaf, 1);
}
return;
}
//
{
m_internalNodeAabbs.resize(numInternalNodes);
m_internalNodeLeafIndexRanges.resize(numInternalNodes);
m_internalNodeChildNodes.resize(numInternalNodes);
m_internalNodeParentNodes.resize(numInternalNodes);

m_commonPrefixes.resize(numInternalNodes);
m_commonPrefixLengths.resize(numInternalNodes);
m_distanceFromRoot.resize(numInternalNodes);
m_leafNodeParentNodes.resize(numLeaves);
m_mortonCodesAndAabbIndicies.resize(numLeaves);
m_mergedAabb.resize(numLeaves);
}
//Find the merged AABB of all small AABBs; this is used to define the size of
//each cell in the virtual grid for the next kernel(2^10 cells in each dimension).
{
B3_PROFILE("Find AABB of merged nodes");
m_mergedAabb.copyFromOpenCLArray(m_leafNodeAabbs); //Need to make a copy since the kernel modifies the array
for(int numAabbsNeedingMerge = numLeaves; numAabbsNeedingMerge >= 2;
numAabbsNeedingMerge = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2)
{
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_mergedAabb.getBufferCL() ) //Resulting AABB is stored in m_mergedAabb[0]
};
b3LauncherCL launcher(m_queue, m_findAllNodesMergedAabbKernel, "m_findAllNodesMergedAabbKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numAabbsNeedingMerge);
launcher.launch1D(numAabbsNeedingMerge);
}
clFinish(m_queue);
}
//Insert the center of the AABBs into a virtual grid,
//then convert the discrete grid coordinates into a morton code
//For each element in m_mortonCodesAndAabbIndicies, set
// m_key == morton code (value to sort by)
// m_value == small AABB index
{
B3_PROFILE("Assign morton codes");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
b3BufferInfoCL( m_mergedAabb.getBufferCL() ),
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_assignMortonCodesAndAabbIndiciesKernel, "m_assignMortonCodesAndAabbIndiciesKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numLeaves);
launcher.launch1D(numLeaves);
clFinish(m_queue);
}
//
{
B3_PROFILE("Sort leaves by morton codes");
m_radixSorter.execute(m_mortonCodesAndAabbIndicies);
clFinish(m_queue);
}
//
constructBinaryRadixTree();
//Since it is a sorted binary radix tree, each internal node contains a contiguous subset of leaf node indices.
//The root node contains leaf node indices in the range [0, numLeafNodes - 1].
//The child nodes of each node split their parent's index range into 2 contiguous halves.
//
//For example, if the root has indices [0, 31], its children might partition that range into [0, 11] and [12, 31].
//The next level in the tree could then split those ranges into [0, 2], [3, 11], [12, 22], and [23, 31].
//
//This property can be used for optimizing calculateOverlappingPairs(), to avoid testing each AABB pair twice
{
B3_PROFILE("m_findLeafIndexRangesKernel");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_findLeafIndexRangesKernel, "m_findLeafIndexRangesKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numInternalNodes);
launcher.launch1D(numInternalNodes);
clFinish(m_queue);
}
}

void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray<b3Int4>& out_overlappingPairs)
{
int maxPairs = out_overlappingPairs.size();
b3OpenCLArray<int>& numPairsGpu = m_temp;
int reset = 0;
numPairsGpu.copyFromHostPointer(&reset, 1);
//
if( m_leafNodeAabbs.size() > 1 )
{
B3_PROFILE("PLBVH small-small AABB test");
int numQueryAabbs = m_leafNodeAabbs.size();
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ),
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ),
b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ),
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
b3BufferInfoCL( numPairsGpu.getBufferCL() ),
b3BufferInfoCL( out_overlappingPairs.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_plbvhCalculateOverlappingPairsKernel, "m_plbvhCalculateOverlappingPairsKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(maxPairs);
launcher.setConst(numQueryAabbs);
launcher.launch1D(numQueryAabbs);
clFinish(m_queue);
}
int numLargeAabbRigids = m_largeAabbs.size();
if( numLargeAabbRigids > 0 && m_leafNodeAabbs.size() > 0 )
{
B3_PROFILE("PLBVH large-small AABB test");
int numQueryAabbs = m_leafNodeAabbs.size();
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
b3BufferInfoCL( m_largeAabbs.getBufferCL() ),
b3BufferInfoCL( numPairsGpu.getBufferCL() ),
b3BufferInfoCL( out_overlappingPairs.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_plbvhLargeAabbAabbTestKernel, "m_plbvhLargeAabbAabbTestKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(maxPairs);
launcher.setConst(numLargeAabbRigids);
launcher.setConst(numQueryAabbs);
launcher.launch1D(numQueryAabbs);
clFinish(m_queue);
}
//
int numPairs = -1;
numPairsGpu.copyToHostPointer(&numPairs, 1);
if(numPairs > maxPairs)
{
b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
numPairs = maxPairs;
numPairsGpu.copyFromHostPointer(&maxPairs, 1);
}
out_overlappingPairs.resize(numPairs);
}


void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays,
b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs)
{
B3_PROFILE("PLBVH testRaysAgainstBvhAabbs()");
int numRays = rays.size();
int maxRayRigidPairs = out_rayRigidPairs.size();
int reset = 0;
out_numRayRigidPairs.copyFromHostPointer(&reset, 1);
//
if( m_leafNodeAabbs.size() > 0 )
{
B3_PROFILE("PLBVH ray test small AABB");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ),
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ),
b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ),
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
b3BufferInfoCL( rays.getBufferCL() ),
b3BufferInfoCL( out_numRayRigidPairs.getBufferCL() ),
b3BufferInfoCL( out_rayRigidPairs.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_plbvhRayTraverseKernel, "m_plbvhRayTraverseKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(maxRayRigidPairs);
launcher.setConst(numRays);
launcher.launch1D(numRays);
clFinish(m_queue);
}
int numLargeAabbRigids = m_largeAabbs.size();
if(numLargeAabbRigids > 0)
{
B3_PROFILE("PLBVH ray test large AABB");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_largeAabbs.getBufferCL() ),
b3BufferInfoCL( rays.getBufferCL() ),
b3BufferInfoCL( out_numRayRigidPairs.getBufferCL() ),
b3BufferInfoCL( out_rayRigidPairs.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_plbvhLargeAabbRayTestKernel, "m_plbvhLargeAabbRayTestKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numLargeAabbRigids);
launcher.setConst(maxRayRigidPairs);
launcher.setConst(numRays);
launcher.launch1D(numRays);
clFinish(m_queue);
}
//
int numRayRigidPairs = -1;
out_numRayRigidPairs.copyToHostPointer(&numRayRigidPairs, 1);
if(numRayRigidPairs > maxRayRigidPairs)
b3Error("Error running out of rayRigid pairs: numRayRigidPairs = %d, maxRayRigidPairs = %d.\n", numRayRigidPairs, maxRayRigidPairs);
}

void b3GpuParallelLinearBvh::constructBinaryRadixTree()
{
B3_PROFILE("b3GpuParallelLinearBvh::constructBinaryRadixTree()");
int numLeaves = m_leafNodeAabbs.size();
int numInternalNodes = numLeaves - 1;
//Each internal node is placed in between 2 leaf nodes.
//By using this arrangement and computing the common prefix between
//these 2 adjacent leaf nodes, it is possible to quickly construct a binary radix tree.
{
B3_PROFILE("m_computeAdjacentPairCommonPrefixKernel");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_computeAdjacentPairCommonPrefixKernel, "m_computeAdjacentPairCommonPrefixKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numInternalNodes);
launcher.launch1D(numInternalNodes);
clFinish(m_queue);
}
//For each leaf node, select its parent node by
//comparing the 2 nearest internal nodes and assign child node indices
{
B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ),
b3BufferInfoCL( m_leafNodeParentNodes.getBufferCL() ),
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numLeaves);
launcher.launch1D(numLeaves);
clFinish(m_queue);
}
//For each internal node, perform 2 binary searches among the other internal nodes
//to its left and right to find its potential parent nodes and assign child node indices
{
B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ),
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ),
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numInternalNodes);
launcher.launch1D(numInternalNodes);
clFinish(m_queue);
}
//Find the number of nodes seperating each internal node and the root node
//so that the AABBs can be set using the next kernel.
//Also determine the maximum number of nodes separating an internal node and the root node.
{
B3_PROFILE("m_findDistanceFromRootKernel");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ),
b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ),
b3BufferInfoCL( m_maxDistanceFromRoot.getBufferCL() ),
b3BufferInfoCL( m_distanceFromRoot.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_findDistanceFromRootKernel, "m_findDistanceFromRootKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numInternalNodes);
launcher.launch1D(numInternalNodes);
clFinish(m_queue);
}
//Starting from the internal nodes nearest to the leaf nodes, recursively move up
//the tree towards the root to set the AABBs of each internal node; each internal node
//checks its children and merges their AABBs
{
B3_PROFILE("m_buildBinaryRadixTreeAabbsRecursiveKernel");
int maxDistanceFromRoot = -1;
{
B3_PROFILE("copy maxDistanceFromRoot to CPU");
m_maxDistanceFromRoot.copyToHostPointer(&maxDistanceFromRoot, 1);
clFinish(m_queue);
}
for(int distanceFromRoot = maxDistanceFromRoot; distanceFromRoot >= 0; --distanceFromRoot)
{
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_distanceFromRoot.getBufferCL() ),
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeAabbsRecursiveKernel, "m_buildBinaryRadixTreeAabbsRecursiveKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(maxDistanceFromRoot);
launcher.setConst(distanceFromRoot);
launcher.setConst(numInternalNodes);
//It may seem inefficent to launch a thread for each internal node when a
//much smaller number of nodes is actually processed, but this is actually
//faster than determining the exact nodes that are ready to merge their child AABBs.
launcher.launch1D(numInternalNodes);
}
clFinish(m_queue);
}
}


+ 125
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h View File

@@ -0,0 +1,125 @@
/*
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Initial Author Jackson Lee, 2014

#ifndef B3_GPU_PARALLEL_LINEAR_BVH_H
#define B3_GPU_PARALLEL_LINEAR_BVH_H

//#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
#include "Bullet3Common/shared/b3Int2.h"
#include "Bullet3Common/shared/b3Int4.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h"

#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"

#include "Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h"

#define b3Int64 cl_long

///@brief GPU Parallel Linearized Bounding Volume Heirarchy(LBVH) that is reconstructed every frame
///@remarks
///See presentation in docs/b3GpuParallelLinearBvh.pdf for algorithm details.
///@par
///Related papers: \n
///"Fast BVH Construction on GPUs" [Lauterbach et al. 2009] \n
///"Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d trees" [Karras 2012] \n
///@par
///The basic algorithm for building the BVH as presented in [Lauterbach et al. 2009] consists of 4 stages:
/// - [fully parallel] Assign morton codes for each AABB using its center (after quantizing the AABB centers into a virtual grid)
/// - [fully parallel] Sort morton codes
/// - [somewhat parallel] Build binary radix tree (assign parent/child pointers for internal nodes of the BVH)
/// - [somewhat parallel] Set internal node AABBs
///@par
///[Karras 2012] improves on the algorithm by introducing fully parallel methods for the last 2 stages.
///The BVH implementation here shares many concepts with [Karras 2012], but a different method is used for constructing the tree.
///Instead of searching for the child nodes of each internal node, we search for the parent node of each node.
///Additionally, a non-atomic traversal that starts from the leaf nodes and moves towards the root node is used to set the AABBs.
class b3GpuParallelLinearBvh
{
cl_command_queue m_queue;
cl_program m_parallelLinearBvhProgram;
cl_kernel m_separateAabbsKernel;
cl_kernel m_findAllNodesMergedAabbKernel;
cl_kernel m_assignMortonCodesAndAabbIndiciesKernel;
//Binary radix tree construction kernels
cl_kernel m_computeAdjacentPairCommonPrefixKernel;
cl_kernel m_buildBinaryRadixTreeLeafNodesKernel;
cl_kernel m_buildBinaryRadixTreeInternalNodesKernel;
cl_kernel m_findDistanceFromRootKernel;
cl_kernel m_buildBinaryRadixTreeAabbsRecursiveKernel;
cl_kernel m_findLeafIndexRangesKernel;
//Traversal kernels
cl_kernel m_plbvhCalculateOverlappingPairsKernel;
cl_kernel m_plbvhRayTraverseKernel;
cl_kernel m_plbvhLargeAabbAabbTestKernel;
cl_kernel m_plbvhLargeAabbRayTestKernel;
b3RadixSort32CL m_radixSorter;
//1 element
b3OpenCLArray<int> m_rootNodeIndex; //Most significant bit(0x80000000) is set to indicate internal node
b3OpenCLArray<int> m_maxDistanceFromRoot; //Max number of internal nodes between an internal node and the root node
b3OpenCLArray<int> m_temp; //Used to hold the number of pairs in calculateOverlappingPairs()
//1 element per internal node (number_of_internal_nodes == number_of_leaves - 1)
b3OpenCLArray<b3SapAabb> m_internalNodeAabbs;
b3OpenCLArray<b3Int2> m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index
b3OpenCLArray<b3Int2> m_internalNodeChildNodes; //x == left child, y == right child; msb(0x80000000) is set to indicate internal node
b3OpenCLArray<int> m_internalNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal
//1 element per internal node; for binary radix tree construction
b3OpenCLArray<b3Int64> m_commonPrefixes;
b3OpenCLArray<int> m_commonPrefixLengths;
b3OpenCLArray<int> m_distanceFromRoot; //Number of internal nodes between this node and the root
//1 element per leaf node (leaf nodes only include small AABBs)
b3OpenCLArray<int> m_leafNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key == morton code, m_value == aabb index in m_leafNodeAabbs
b3OpenCLArray<b3SapAabb> m_mergedAabb; //m_mergedAabb[0] contains the merged AABB of all leaf nodes
b3OpenCLArray<b3SapAabb> m_leafNodeAabbs; //Contains only small AABBs
//1 element per large AABB, which is not stored in the BVH
b3OpenCLArray<b3SapAabb> m_largeAabbs;
public:
b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue);
virtual ~b3GpuParallelLinearBvh();
///Must be called before any other function
void build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices,
const b3OpenCLArray<int>& largeAabbIndices);
///calculateOverlappingPairs() uses the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs.
///@param out_overlappingPairs The size() of this array is used to determine the max number of pairs.
///If the number of overlapping pairs is < out_overlappingPairs.size(), out_overlappingPairs is resized.
void calculateOverlappingPairs(b3OpenCLArray<b3Int4>& out_overlappingPairs);
///@param out_numRigidRayPairs Array of length 1; contains the number of detected ray-rigid AABB intersections;
///this value may be greater than out_rayRigidPairs.size() if out_rayRigidPairs is not large enough.
///@param out_rayRigidPairs Contains an array of rays intersecting rigid AABBs; x == ray index, y == rigid body index.
///If the size of this array is insufficient to hold all ray-rigid AABB intersections, additional intersections are discarded.
void testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays,
b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs);
private:
void constructBinaryRadixTree();
};

#endif

+ 80
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp View File

@@ -0,0 +1,80 @@
/*
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Initial Author Jackson Lee, 2014

#include "b3GpuParallelLinearBvhBroadphase.h"

b3GpuParallelLinearBvhBroadphase::b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue) :
m_plbvh(context, device, queue),
m_overlappingPairsGpu(context, queue),
m_aabbsGpu(context, queue),
m_smallAabbsMappingGpu(context, queue),
m_largeAabbsMappingGpu(context, queue)
{
}

void b3GpuParallelLinearBvhBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask)
{
int newAabbIndex = m_aabbsCpu.size();

b3SapAabb aabb;
aabb.m_minVec = aabbMin;
aabb.m_maxVec = aabbMax;
aabb.m_minIndices[3] = userPtr;
aabb.m_signedMaxIndices[3] = newAabbIndex;
m_smallAabbsMappingCpu.push_back(newAabbIndex);
m_aabbsCpu.push_back(aabb);
}
void b3GpuParallelLinearBvhBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask)
{
int newAabbIndex = m_aabbsCpu.size();

b3SapAabb aabb;
aabb.m_minVec = aabbMin;
aabb.m_maxVec = aabbMax;
aabb.m_minIndices[3] = userPtr;
aabb.m_signedMaxIndices[3] = newAabbIndex;
m_largeAabbsMappingCpu.push_back(newAabbIndex);
m_aabbsCpu.push_back(aabb);
}

void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairs(int maxPairs)
{
//Reconstruct BVH
m_plbvh.build(m_aabbsGpu, m_smallAabbsMappingGpu, m_largeAabbsMappingGpu);
//
m_overlappingPairsGpu.resize(maxPairs);
m_plbvh.calculateOverlappingPairs(m_overlappingPairsGpu);
}
void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairsHost(int maxPairs)
{
b3Assert(0); //CPU version not implemented
}

void b3GpuParallelLinearBvhBroadphase::writeAabbsToGpu()
{
m_aabbsGpu.copyFromHost(m_aabbsCpu);
m_smallAabbsMappingGpu.copyFromHost(m_smallAabbsMappingCpu);
m_largeAabbsMappingGpu.copyFromHost(m_largeAabbsMappingCpu);
}

+ 66
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h View File

@@ -0,0 +1,66 @@
/*
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Initial Author Jackson Lee, 2014

#ifndef B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H
#define B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H

#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"

#include "b3GpuParallelLinearBvh.h"

class b3GpuParallelLinearBvhBroadphase : public b3GpuBroadphaseInterface
{
b3GpuParallelLinearBvh m_plbvh;
b3OpenCLArray<b3Int4> m_overlappingPairsGpu;
b3OpenCLArray<b3SapAabb> m_aabbsGpu;
b3OpenCLArray<int> m_smallAabbsMappingGpu;
b3OpenCLArray<int> m_largeAabbsMappingGpu;
b3AlignedObjectArray<b3SapAabb> m_aabbsCpu;
b3AlignedObjectArray<int> m_smallAabbsMappingCpu;
b3AlignedObjectArray<int> m_largeAabbsMappingCpu;
public:
b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue);
virtual ~b3GpuParallelLinearBvhBroadphase() {}

virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask);
virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask);
virtual void calculateOverlappingPairs(int maxPairs);
virtual void calculateOverlappingPairsHost(int maxPairs);

//call writeAabbsToGpu after done making all changes (createProxy etc)
virtual void writeAabbsToGpu();
virtual int getNumOverlap() { return m_overlappingPairsGpu.size(); }
virtual cl_mem getOverlappingPairBuffer() { return m_overlappingPairsGpu.getBufferCL(); }

virtual cl_mem getAabbBufferWS() { return m_aabbsGpu.getBufferCL(); }
virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() { return m_aabbsGpu; }
virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU() { return m_overlappingPairsGpu; }
virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU() { return m_smallAabbsMappingGpu; }
virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU() { return m_largeAabbsMappingGpu; }
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() { return m_aabbsCpu; }
static b3GpuBroadphaseInterface* CreateFunc(cl_context context, cl_device_id device, cl_command_queue queue)
{
return new b3GpuParallelLinearBvhBroadphase(context, device, queue);
}
};

#endif

+ 1322
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp
File diff suppressed because it is too large
View File


+ 151
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h View File

@@ -0,0 +1,151 @@
#ifndef B3_GPU_SAP_BROADPHASE_H
#define B3_GPU_SAP_BROADPHASE_H

#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2
class b3Vector3;
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"

#include "b3SapAabb.h"
#include "Bullet3Common/shared/b3Int2.h"

#include "b3GpuBroadphaseInterface.h"


class b3GpuSapBroadphase : public b3GpuBroadphaseInterface
{
cl_context m_context;
cl_device_id m_device;
cl_command_queue m_queue;
cl_kernel m_flipFloatKernel;
cl_kernel m_scatterKernel ;
cl_kernel m_copyAabbsKernel;
cl_kernel m_sapKernel;
cl_kernel m_sap2Kernel;
cl_kernel m_prepareSumVarianceKernel;

class b3RadixSort32CL* m_sorter;

///test for 3d SAP
b3AlignedObjectArray<b3SortData> m_sortedAxisCPU[3][2];
b3AlignedObjectArray<b3UnsignedInt2> m_objectMinMaxIndexCPU[3][2];
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0;
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1;
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2;
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0prev;
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1prev;
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2prev;

b3OpenCLArray<b3SortData> m_sortedAxisGPU0;
b3OpenCLArray<b3SortData> m_sortedAxisGPU1;
b3OpenCLArray<b3SortData> m_sortedAxisGPU2;
b3OpenCLArray<b3SortData> m_sortedAxisGPU0prev;
b3OpenCLArray<b3SortData> m_sortedAxisGPU1prev;
b3OpenCLArray<b3SortData> m_sortedAxisGPU2prev;


b3OpenCLArray<b3Int4> m_addedHostPairsGPU;
b3OpenCLArray<b3Int4> m_removedHostPairsGPU;
b3OpenCLArray<int> m_addedCountGPU;
b3OpenCLArray<int> m_removedCountGPU;
int m_currentBuffer;

public:

b3OpenCLArray<int> m_pairCount;


b3OpenCLArray<b3SapAabb> m_allAabbsGPU;
b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU;

virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU()
{
return m_allAabbsGPU;
}
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU()
{
return m_allAabbsCPU;
}

b3OpenCLArray<b3Vector3> m_sum;
b3OpenCLArray<b3Vector3> m_sum2;
b3OpenCLArray<b3Vector3> m_dst;

b3OpenCLArray<int> m_smallAabbsMappingGPU;
b3AlignedObjectArray<int> m_smallAabbsMappingCPU;

b3OpenCLArray<int> m_largeAabbsMappingGPU;
b3AlignedObjectArray<int> m_largeAabbsMappingCPU;

b3OpenCLArray<b3Int4> m_overlappingPairs;

//temporary gpu work memory
b3OpenCLArray<b3SortData> m_gpuSmallSortData;
b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs;

class b3PrefixScanFloat4CL* m_prefixScanFloat4;

enum b3GpuSapKernelType
{
B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU=1,
B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU,
B3_GPU_SAP_KERNEL_ORIGINAL,
B3_GPU_SAP_KERNEL_BARRIER,
B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY
};

b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q , b3GpuSapKernelType kernelType=B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
virtual ~b3GpuSapBroadphase();
static b3GpuBroadphaseInterface* CreateFuncBruteForceCpu(cl_context ctx,cl_device_id device, cl_command_queue q)
{
return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU);
}

static b3GpuBroadphaseInterface* CreateFuncBruteForceGpu(cl_context ctx,cl_device_id device, cl_command_queue q)
{
return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU);
}

static b3GpuBroadphaseInterface* CreateFuncOriginal(cl_context ctx,cl_device_id device, cl_command_queue q)
{
return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_ORIGINAL);
}
static b3GpuBroadphaseInterface* CreateFuncBarrier(cl_context ctx,cl_device_id device, cl_command_queue q)
{
return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BARRIER);
}
static b3GpuBroadphaseInterface* CreateFuncLocalMemory(cl_context ctx,cl_device_id device, cl_command_queue q)
{
return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
}

virtual void calculateOverlappingPairs(int maxPairs);
virtual void calculateOverlappingPairsHost(int maxPairs);
void reset();

void init3dSap();
virtual void calculateOverlappingPairsHostIncremental3Sap();

virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);

//call writeAabbsToGpu after done making all changes (createProxy etc)
virtual void writeAabbsToGpu();

virtual cl_mem getAabbBufferWS();
virtual int getNumOverlap();
virtual cl_mem getOverlappingPairBuffer();
virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU();
virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU();
virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU();
};

#endif //B3_GPU_SAP_BROADPHASE_H

+ 14
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h View File

@@ -0,0 +1,14 @@
#ifndef B3_SAP_AABB_H
#define B3_SAP_AABB_H

#include "Bullet3Common/b3Scalar.h"
#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"

///just make sure that the b3Aabb is 16-byte aligned
B3_ATTRIBUTE_ALIGNED16(struct) b3SapAabb : public b3Aabb
{

};


#endif //B3_SAP_AABB_H

+ 216
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl View File

@@ -0,0 +1,216 @@


int getPosHash(int4 gridPos, __global float4* pParams)
{
int4 gridDim = *((__global int4*)(pParams + 1));
gridPos.x &= gridDim.x - 1;
gridPos.y &= gridDim.y - 1;
gridPos.z &= gridDim.z - 1;
int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;
return hash;
}

int4 getGridPos(float4 worldPos, __global float4* pParams)
{
int4 gridPos;
int4 gridDim = *((__global int4*)(pParams + 1));
gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);
gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);
gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);
return gridPos;
}


// calculate grid hash value for each body using its AABB
__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )
{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
float4 bbMin = allpAABB[smallAabbMapping[index]*2];
float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];
float4 pos;
pos.x = (bbMin.x + bbMax.x) * 0.5f;
pos.y = (bbMin.y + bbMax.y) * 0.5f;
pos.z = (bbMin.z + bbMax.z) * 0.5f;
pos.w = 0.f;
// get address in grid
int4 gridPos = getGridPos(pos, pParams);
int gridHash = getPosHash(gridPos, pParams);
// store grid hash and body index
int2 hashVal;
hashVal.x = gridHash;
hashVal.y = index;
pHash[index] = hashVal;
}

__kernel void kClearCellStart( int numCells,
__global int* pCellStart )
{
int index = get_global_id(0);
if(index >= numCells)
{
return;
}
pCellStart[index] = -1;
}

__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart )
{
__local int sharedHash[513];
int index = get_global_id(0);
int2 sortedData;

if(index < numObjects)
{
sortedData = pHash[index];
// Load hash data into shared memory so that we can look
// at neighboring body's hash value without loading
// two hash values per thread
sharedHash[get_local_id(0) + 1] = sortedData.x;
if((index > 0) && (get_local_id(0) == 0))
{
// first thread in block must load neighbor body hash
sharedHash[0] = pHash[index-1].x;
}
}
barrier(CLK_LOCAL_MEM_FENCE);
if(index < numObjects)
{
if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))
{
cellStart[sortedData.x] = index;
}
}
}

int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)
{
return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
(min0.y <= max1.y)&& (min1.y <= max0.y) &&
(min0.z <= max1.z)&& (min1.z <= max0.z);
}




//search for AABB 'index' against other AABBs' in this cell
void findPairsInCell( int numObjects,
int4 gridPos,
int index,
__global int2* pHash,
__global int* pCellStart,
__global float4* allpAABB,
__global const int* smallAabbMapping,
__global float4* pParams,
volatile __global int* pairCount,
__global int4* pPairBuff2,
int maxPairs
)
{
int4 pGridDim = *((__global int4*)(pParams + 1));
int maxBodiesPerCell = pGridDim.w;
int gridHash = getPosHash(gridPos, pParams);
// get start of bucket for this cell
int bucketStart = pCellStart[gridHash];
if (bucketStart == -1)
{
return; // cell empty
}
// iterate over bodies in this cell
int2 sortedData = pHash[index];
int unsorted_indx = sortedData.y;
float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];
float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];
int handleIndex = as_int(min0.w);
int bucketEnd = bucketStart + maxBodiesPerCell;
bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;
for(int index2 = bucketStart; index2 < bucketEnd; index2++)
{
int2 cellData = pHash[index2];
if (cellData.x != gridHash)
{
break; // no longer in same bucket
}
int unsorted_indx2 = cellData.y;
//if (unsorted_indx2 < unsorted_indx) // check not colliding with self
if (unsorted_indx2 != unsorted_indx) // check not colliding with self
{
float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];
float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];
if(testAABBOverlap(min0, max0, min1, max1))
{
if (pairCount)
{
int handleIndex2 = as_int(min1.w);
if (handleIndex<handleIndex2)
{
int curPair = atomic_add(pairCount,1);
if (curPair<maxPairs)
{
int4 newpair;
newpair.x = handleIndex;
newpair.y = handleIndex2;
newpair.z = -1;
newpair.w = -1;
pPairBuff2[curPair] = newpair;
}
}
}
}
}
}
}

__kernel void kFindOverlappingPairs( int numObjects,
__global float4* allpAABB,
__global const int* smallAabbMapping,
__global int2* pHash,
__global int* pCellStart,
__global float4* pParams ,
volatile __global int* pairCount,
__global int4* pPairBuff2,
int maxPairs
)

{
int index = get_global_id(0);
if(index >= numObjects)
{
return;
}
int2 sortedData = pHash[index];
int unsorted_indx = sortedData.y;
float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];
float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];
float4 pos;
pos.x = (bbMin.x + bbMax.x) * 0.5f;
pos.y = (bbMin.y + bbMax.y) * 0.5f;
pos.z = (bbMin.z + bbMax.z) * 0.5f;
// get address in grid
int4 gridPosA = getGridPos(pos, pParams);
int4 gridPosB;
// examine only neighbouring cells
for(int z=-1; z<=1; z++)
{
gridPosB.z = gridPosA.z + z;
for(int y=-1; y<=1; y++)
{
gridPosB.y = gridPosA.y + y;
for(int x=-1; x<=1; x++)
{
gridPosB.x = gridPosA.x + x;
findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);
}
}
}
}






+ 199
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h View File

@@ -0,0 +1,199 @@
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
static const char* gridBroadphaseCL= \
"int getPosHash(int4 gridPos, __global float4* pParams)\n"
"{\n"
" int4 gridDim = *((__global int4*)(pParams + 1));\n"
" gridPos.x &= gridDim.x - 1;\n"
" gridPos.y &= gridDim.y - 1;\n"
" gridPos.z &= gridDim.z - 1;\n"
" int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;\n"
" return hash;\n"
"} \n"
"int4 getGridPos(float4 worldPos, __global float4* pParams)\n"
"{\n"
" int4 gridPos;\n"
" int4 gridDim = *((__global int4*)(pParams + 1));\n"
" gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);\n"
" gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);\n"
" gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);\n"
" return gridPos;\n"
"}\n"
"// calculate grid hash value for each body using its AABB\n"
"__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )\n"
"{\n"
" int index = get_global_id(0);\n"
" if(index >= numObjects)\n"
" {\n"
" return;\n"
" }\n"
" float4 bbMin = allpAABB[smallAabbMapping[index]*2];\n"
" float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];\n"
" float4 pos;\n"
" pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
" pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
" pos.z = (bbMin.z + bbMax.z) * 0.5f;\n"
" pos.w = 0.f;\n"
" // get address in grid\n"
" int4 gridPos = getGridPos(pos, pParams);\n"
" int gridHash = getPosHash(gridPos, pParams);\n"
" // store grid hash and body index\n"
" int2 hashVal;\n"
" hashVal.x = gridHash;\n"
" hashVal.y = index;\n"
" pHash[index] = hashVal;\n"
"}\n"
"__kernel void kClearCellStart( int numCells, \n"
" __global int* pCellStart )\n"
"{\n"
" int index = get_global_id(0);\n"
" if(index >= numCells)\n"
" {\n"
" return;\n"
" }\n"
" pCellStart[index] = -1;\n"
"}\n"
"__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart )\n"
"{\n"
" __local int sharedHash[513];\n"
" int index = get_global_id(0);\n"
" int2 sortedData;\n"
" if(index < numObjects)\n"
" {\n"
" sortedData = pHash[index];\n"
" // Load hash data into shared memory so that we can look \n"
" // at neighboring body's hash value without loading\n"
" // two hash values per thread\n"
" sharedHash[get_local_id(0) + 1] = sortedData.x;\n"
" if((index > 0) && (get_local_id(0) == 0))\n"
" {\n"
" // first thread in block must load neighbor body hash\n"
" sharedHash[0] = pHash[index-1].x;\n"
" }\n"
" }\n"
" barrier(CLK_LOCAL_MEM_FENCE);\n"
" if(index < numObjects)\n"
" {\n"
" if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))\n"
" {\n"
" cellStart[sortedData.x] = index;\n"
" }\n"
" }\n"
"}\n"
"int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)\n"
"{\n"
" return (min0.x <= max1.x)&& (min1.x <= max0.x) && \n"
" (min0.y <= max1.y)&& (min1.y <= max0.y) && \n"
" (min0.z <= max1.z)&& (min1.z <= max0.z); \n"
"}\n"
"//search for AABB 'index' against other AABBs' in this cell\n"
"void findPairsInCell( int numObjects,\n"
" int4 gridPos,\n"
" int index,\n"
" __global int2* pHash,\n"
" __global int* pCellStart,\n"
" __global float4* allpAABB, \n"
" __global const int* smallAabbMapping,\n"
" __global float4* pParams,\n"
" volatile __global int* pairCount,\n"
" __global int4* pPairBuff2,\n"
" int maxPairs\n"
" )\n"
"{\n"
" int4 pGridDim = *((__global int4*)(pParams + 1));\n"
" int maxBodiesPerCell = pGridDim.w;\n"
" int gridHash = getPosHash(gridPos, pParams);\n"
" // get start of bucket for this cell\n"
" int bucketStart = pCellStart[gridHash];\n"
" if (bucketStart == -1)\n"
" {\n"
" return; // cell empty\n"
" }\n"
" // iterate over bodies in this cell\n"
" int2 sortedData = pHash[index];\n"
" int unsorted_indx = sortedData.y;\n"
" float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; \n"
" float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n"
" int handleIndex = as_int(min0.w);\n"
" \n"
" int bucketEnd = bucketStart + maxBodiesPerCell;\n"
" bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;\n"
" for(int index2 = bucketStart; index2 < bucketEnd; index2++) \n"
" {\n"
" int2 cellData = pHash[index2];\n"
" if (cellData.x != gridHash)\n"
" {\n"
" break; // no longer in same bucket\n"
" }\n"
" int unsorted_indx2 = cellData.y;\n"
" //if (unsorted_indx2 < unsorted_indx) // check not colliding with self\n"
" if (unsorted_indx2 != unsorted_indx) // check not colliding with self\n"
" { \n"
" float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];\n"
" float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];\n"
" if(testAABBOverlap(min0, max0, min1, max1))\n"
" {\n"
" if (pairCount)\n"
" {\n"
" int handleIndex2 = as_int(min1.w);\n"
" if (handleIndex<handleIndex2)\n"
" {\n"
" int curPair = atomic_add(pairCount,1);\n"
" if (curPair<maxPairs)\n"
" {\n"
" int4 newpair;\n"
" newpair.x = handleIndex;\n"
" newpair.y = handleIndex2;\n"
" newpair.z = -1;\n"
" newpair.w = -1;\n"
" pPairBuff2[curPair] = newpair;\n"
" }\n"
" }\n"
" \n"
" }\n"
" }\n"
" }\n"
" }\n"
"}\n"
"__kernel void kFindOverlappingPairs( int numObjects,\n"
" __global float4* allpAABB, \n"
" __global const int* smallAabbMapping,\n"
" __global int2* pHash, \n"
" __global int* pCellStart, \n"
" __global float4* pParams ,\n"
" volatile __global int* pairCount,\n"
" __global int4* pPairBuff2,\n"
" int maxPairs\n"
" )\n"
"{\n"
" int index = get_global_id(0);\n"
" if(index >= numObjects)\n"
" {\n"
" return;\n"
" }\n"
" int2 sortedData = pHash[index];\n"
" int unsorted_indx = sortedData.y;\n"
" float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];\n"
" float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n"
" float4 pos;\n"
" pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
" pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
" pos.z = (bbMin.z + bbMax.z) * 0.5f;\n"
" // get address in grid\n"
" int4 gridPosA = getGridPos(pos, pParams);\n"
" int4 gridPosB; \n"
" // examine only neighbouring cells\n"
" for(int z=-1; z<=1; z++) \n"
" {\n"
" gridPosB.z = gridPosA.z + z;\n"
" for(int y=-1; y<=1; y++) \n"
" {\n"
" gridPosB.y = gridPosA.y + y;\n"
" for(int x=-1; x<=1; x++) \n"
" {\n"
" gridPosB.x = gridPosA.x + x;\n"
" findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);\n"
" }\n"
" }\n"
" }\n"
"}\n"
;

+ 767
- 0
src/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl View File

@@ -0,0 +1,767 @@
/*
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
//Initial Author Jackson Lee, 2014

typedef float b3Scalar;
typedef float4 b3Vector3;
#define b3Max max
#define b3Min min
#define b3Sqrt sqrt

typedef struct
{
unsigned int m_key;
unsigned int m_value;
} SortDataCL;

typedef struct
{
union
{
float4 m_min;
float m_minElems[4];
int m_minIndices[4];
};
union
{
float4 m_max;
float m_maxElems[4];
int m_maxIndices[4];
};
} b3AabbCL;


unsigned int interleaveBits(unsigned int x)
{
//........ ........ ......12 3456789A //x
//....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits
//......12 3456789A ......12 3456789A //x ^ (x << 16)
//11111111 ........ ........ 11111111 //0x FF 00 00 FF
//......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF;
//......12 ........ 3456789A 3456789A //x ^ (x << 8)
//......11 ........ 1111.... ....1111 //0x 03 00 F0 0F
//......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F;
//..12..12 ....3456 3456.... 789A789A //x ^ (x << 4)
//......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3
//......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3;
//....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2)
//....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49
//....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249;
//........ ........ ......11 11111111 //0x000003FF
x &= 0x000003FF; //Clear all bits above bit 10
x = (x ^ (x << 16)) & 0xFF0000FF;
x = (x ^ (x << 8)) & 0x0300F00F;
x = (x ^ (x << 4)) & 0x030C30C3;
x = (x ^ (x << 2)) & 0x09249249;
return x;
}
unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)
{
return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;
}

__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate)
{
int separatedAabbIndex = get_global_id(0);
if(separatedAabbIndex >= numAabbsToSeparate) return;

int unseparatedAabbIndex = aabbIndices[separatedAabbIndex];
out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex];
}

//Should replace with an optimized parallel reduction
__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)
{
//Each time this kernel is added to the command queue,
//the number of AABBs needing to be merged is halved
//
//Example with 159 AABBs:
// numRemainingAabbs == 159 / 2 + 159 % 2 == 80
// numMergedAabbs == 159 - 80 == 79
//So, indices [0, 78] are merged with [0 + 80, 78 + 80]
int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2;
int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs;
int aabbIndex = get_global_id(0);
if(aabbIndex >= numMergedAabbs) return;
int otherAabbIndex = aabbIndex + numRemainingAabbs;
b3AabbCL aabb = out_mergedAabb[aabbIndex];
b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];
b3AabbCL mergedAabb;
mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);
out_mergedAabb[aabbIndex] = mergedAabb;
}

__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes,
__global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs)
{
int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index
if(leafNodeIndex >= numAabbs) return;
b3AabbCL mergedAabb = mergedAabbOfAllNodes[0];
b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f;
b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024;
b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];
b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f;
b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter;
//Quantize into integer coordinates
//floor() is needed to prevent the center cell, at (0,0,0) from being twice the size
b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize;
int4 discretePosition;
discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );
discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );
discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );
//Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]
discretePosition = b3Max( -512, b3Min(discretePosition, 511) );
discretePosition += 512;
//Interleave bits(assign a morton code, also known as a z-curve)
unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);
//
SortDataCL mortonCodeIndexPair;
mortonCodeIndexPair.m_key = mortonCode;
mortonCodeIndexPair.m_value = leafNodeIndex;
out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;
}

#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128

//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes.
//If it is set, then the index is for an internal node; otherwise, it is a leaf node.
//In both cases, the bit should be cleared to access the actual node index.
int isLeafNode(int index) { return (index >> 31 == 0); }
int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }
int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }

//From sap.cl
#define NEW_PAIR_MARKER -1

bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)
{
bool overlap = true;
overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;
overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;
overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;
return overlap;
}
//From sap.cl

__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs,

__global int* rootNodeIndex,
__global int2* internalNodeChildIndices,
__global b3AabbCL* internalNodeAabbs,
__global int2* internalNodeLeafIndexRanges,
__global SortDataCL* mortonCodesAndAabbIndices,
__global int* out_numPairs, __global int4* out_overlappingPairs,
int maxPairs, int numQueryAabbs)
{
//Using get_group_id()/get_local_id() is Faster than get_global_id(0) since
//mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent)
int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);
if(queryBvhNodeIndex >= numQueryAabbs) return;
int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value;
b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];
int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];
int stackSize = 1;
stack[0] = *rootNodeIndex;
while(stackSize)
{
int internalOrLeafNodeIndex = stack[ stackSize - 1 ];
--stackSize;
int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false
int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);
//Optimization - if the BVH is structured as a binary radix tree, then
//each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).
//This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself.
{
int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;
if(highestLeafIndex <= queryBvhNodeIndex) continue;
}
//bvhRigidIndex is not used if internal node
int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;
b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];
if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )
{
if(isLeaf)
{
int4 pair;
pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];
pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];
pair.z = NEW_PAIR_MARKER;
pair.w = NEW_PAIR_MARKER;
int pairIndex = atomic_inc(out_numPairs);
if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;
}
if(!isLeaf) //Internal node
{
if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)
{
//Error
}
else
{
stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;
stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;
}
}
}
}
}


//From rayCastKernels.cl
typedef struct
{
float4 m_from;
float4 m_to;
} b3RayInfo;
//From rayCastKernels.cl

b3Vector3 b3Vector3_normalize(b3Vector3 v)
{
b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f};
return normalize(normal); //OpenCL normalize == vector4 normalize
}
b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }
b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }

int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)
{
//AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ).
//t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane.
//
//if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane
//and min.x will be the far plane; otherwise, it is reversed.
//
//In order for there to be a collision, the t_min and t_max of each pair must overlap.
//This can be tested for by selecting the highest t_min and lowest t_max and comparing them.
int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) ); //isless(x,y) returns (x < y)
//When using vector types, the select() function checks the most signficant bit,
//but isless() sets the least significant bit.
isNegative <<= 31;

//select(b, a, condition) == condition ? a : b
//When using select() with vector types, (condition[i]) is true if its most significant bit is 1
b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection;
b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection;
b3Scalar t_min_final = 0.0f;
b3Scalar t_max_final = rayLength;
//Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned.
//Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])
//Since the innermost fmin()/fmax() is always not NaN, this should never return NaN.
t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );
t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );
return (t_min_final <= t_max_final);
}

__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,

__global int* rootNodeIndex,
__global int2* internalNodeChildIndices,
__global b3AabbCL* internalNodeAabbs,
__global int2* internalNodeLeafIndexRanges,
__global SortDataCL* mortonCodesAndAabbIndices,
__global b3RayInfo* rays,
__global int* out_numRayRigidPairs,
__global int2* out_rayRigidPairs,
int maxRayRigidPairs, int numRays)
{
int rayIndex = get_global_id(0);
if(rayIndex >= numRays) return;
//
b3Vector3 rayFrom = rays[rayIndex].m_from;
b3Vector3 rayTo = rays[rayIndex].m_to;
b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);
b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );
//
int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];
int stackSize = 1;
stack[0] = *rootNodeIndex;
while(stackSize)
{
int internalOrLeafNodeIndex = stack[ stackSize - 1 ];
--stackSize;
int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false
int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);
//bvhRigidIndex is not used if internal node
int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;
b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];
if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) )
{
if(isLeaf)
{
int2 rayRigidPair;
rayRigidPair.x = rayIndex;
rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];
int pairIndex = atomic_inc(out_numRayRigidPairs);
if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;
}
if(!isLeaf) //Internal node
{
if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)
{
//Error
}
else
{
stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;
stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;
}
}
}
}
}

__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs,
__global int* out_numPairs, __global int4* out_overlappingPairs,
int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids)
{
int smallAabbIndex = get_global_id(0);
if(smallAabbIndex >= numSmallAabbRigids) return;
b3AabbCL smallAabb = smallAabbs[smallAabbIndex];
for(int i = 0; i < numLargeAabbRigids; ++i)
{
b3AabbCL largeAabb = largeAabbs[i];
if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )
{
int4 pair;
pair.x = largeAabb.m_minIndices[3];
pair.y = smallAabb.m_minIndices[3];
pair.z = NEW_PAIR_MARKER;
pair.w = NEW_PAIR_MARKER;
int pairIndex = atomic_inc(out_numPairs);
if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;
}
}
}
__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays,
__global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs,
int numLargeAabbRigids, int maxRayRigidPairs, int numRays)
{
int rayIndex = get_global_id(0);
if(rayIndex >= numRays) return;
b3Vector3 rayFrom = rays[rayIndex].m_from;
b3Vector3 rayTo = rays[rayIndex].m_to;
b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);
b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );
for(int i = 0; i < numLargeAabbRigids; ++i)
{
b3AabbCL rigidAabb = largeRigidAabbs[i];
if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) )
{
int2 rayRigidPair;
rayRigidPair.x = rayIndex;
rayRigidPair.y = rigidAabb.m_minIndices[3];
int pairIndex = atomic_inc(out_numRayRigidPairs);
if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;
}
}
}


//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.
//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.
//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.
//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).
#define B3_PLBVH_INVALID_COMMON_PREFIX 128

#define B3_PLBVH_ROOT_NODE_MARKER -1

#define b3Int64 long

int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); }
b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j)
{
//This function only needs to return (i & j) in order for the algorithm to work,
//but it may help with debugging to mask out the lower bits.

b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j);

b3Int64 sharedBits = i & j;
b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0
return sharedBits & bitmask;
}

//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths
int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)
{
return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );
}

__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices,
__global b3Int64* out_commonPrefixes,
__global int* out_commonPrefixLengths,
int numInternalNodes)
{
int internalNodeIndex = get_global_id(0);
if (internalNodeIndex >= numInternalNodes) return;
//Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,
//and the number of internal nodes is always numLeafNodes - 1
int leftLeafIndex = internalNodeIndex;
int rightLeafIndex = internalNodeIndex + 1;
int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key;
int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key;
//Binary radix tree construction algorithm does not work if there are duplicate morton codes.
//Append the index of each leaf node to each morton code so that there are no duplicates.
//The algorithm also requires that the morton codes are sorted in ascending order; this requirement
//is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true.
//
//upsample(a, b) == ( ((b3Int64)a) << 32) | b
b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex);
b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex);
out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);
out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);
}


__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes,
__global int2* out_childNodes, int numLeafNodes)
{
int leafNodeIndex = get_global_id(0);
if (leafNodeIndex >= numLeafNodes) return;
int numInternalNodes = numLeafNodes - 1;
int leftSplitIndex = leafNodeIndex - 1;
int rightSplitIndex = leafNodeIndex;
int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
//Parent node is the highest adjacent common prefix that is lower than the node's common prefix
//Leaf nodes are considered as having the highest common prefix
int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);
//Handle cases for the edge nodes; the first and last node
//For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX
if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;
if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;
int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;
out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex;
int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa
//out_childNodesAsInt[0] == int2.x == left child
//out_childNodesAsInt[1] == int2.y == right child
int isLeaf = 1;
__global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);
out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);
}

__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,
__global int2* out_childNodes,
__global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,
int numInternalNodes)
{
int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);
if(internalNodeIndex >= numInternalNodes) return;
b3Int64 nodePrefix = commonPrefixes[internalNodeIndex];
int nodePrefixLength = commonPrefixLengths[internalNodeIndex];
//#define USE_LINEAR_SEARCH
#ifdef USE_LINEAR_SEARCH
int leftIndex = -1;
int rightIndex = -1;
//Find nearest element to left with a lower common prefix
for(int i = internalNodeIndex - 1; i >= 0; --i)
{
int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);
if(nodeLeftSharedPrefixLength < nodePrefixLength)
{
leftIndex = i;
break;
}
}
//Find nearest element to right with a lower common prefix
for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)
{
int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);
if(nodeRightSharedPrefixLength < nodePrefixLength)
{
rightIndex = i;
break;
}
}
#else //Use binary search

//Find nearest element to left with a lower common prefix
int leftIndex = -1;
{
int lower = 0;
int upper = internalNodeIndex - 1;
while(lower <= upper)
{
int mid = (lower + upper) / 2;
b3Int64 midPrefix = commonPrefixes[mid];
int midPrefixLength = commonPrefixLengths[mid];
int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);
if(nodeMidSharedPrefixLength < nodePrefixLength)
{
int right = mid + 1;
if(right < internalNodeIndex)
{
b3Int64 rightPrefix = commonPrefixes[right];
int rightPrefixLength = commonPrefixLengths[right];
int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength);
if(nodeRightSharedPrefixLength < nodePrefixLength)
{
lower = right;
leftIndex = right;
}
else
{
leftIndex = mid;
break;
}
}
else
{
leftIndex = mid;
break;
}
}
else upper = mid - 1;
}
}
//Find nearest element to right with a lower common prefix
int rightIndex = -1;
{
int lower = internalNodeIndex + 1;
int upper = numInternalNodes - 1;
while(lower <= upper)
{
int mid = (lower + upper) / 2;
b3Int64 midPrefix = commonPrefixes[mid];
int midPrefixLength = commonPrefixLengths[mid];
int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);
if(nodeMidSharedPrefixLength < nodePrefixLength)
{
int left = mid - 1;
if(left > internalNodeIndex)
{
b3Int64 leftPrefix = commonPrefixes[left];
int leftPrefixLength = commonPrefixLengths[left];
int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength);
if(nodeLeftSharedPrefixLength < nodePrefixLength)
{
upper = left;
rightIndex = left;
}
else
{
rightIndex = mid;
break;
}
}
else
{
rightIndex = mid;
break;
}
}
else lower = mid + 1;
}
}
#endif
//Select parent
{
int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength);
if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false;
else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true;
int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex;
int isRootNode = (leftIndex == -1 && rightIndex == -1);
out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER;
int isLeaf = 0;
if(!isRootNode)
{
int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa
//out_childNodesAsInt[0] == int2.x == left child
//out_childNodesAsInt[1] == int2.y == right child
__global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);
out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);
}
else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);
}
}

__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes,
__global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes)
{
if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0);

int internalNodeIndex = get_global_id(0);
if(internalNodeIndex >= numInternalNodes) return;
//
int distanceFromRoot = 0;
{
int parentIndex = internalNodeParentNodes[internalNodeIndex];
while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER)
{
parentIndex = internalNodeParentNodes[parentIndex];
++distanceFromRoot;
}
}
out_distanceFromRoot[internalNodeIndex] = distanceFromRoot;
//
__local int localMaxDistanceFromRoot;
if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0;
barrier(CLK_LOCAL_MEM_FENCE);
atomic_max(&localMaxDistanceFromRoot, distanceFromRoot);
barrier(CLK_LOCAL_MEM_FENCE);
if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot);
}

__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices,
__global int2* childNodes,
__global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,
int maxDistanceFromRoot, int processedDistance, int numInternalNodes)
{
int internalNodeIndex = get_global_id(0);
if(internalNodeIndex >= numInternalNodes) return;
int distance = distanceFromRoot[internalNodeIndex];
if(distance == processedDistance)
{
int leftChildIndex = childNodes[internalNodeIndex].x;
int rightChildIndex = childNodes[internalNodeIndex].y;
int isLeftChildLeaf = isLeafNode(leftChildIndex);
int isRightChildLeaf = isLeafNode(rightChildIndex);
leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex);
rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex);
//leftRigidIndex/rightRigidIndex is not used if internal node
int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;
int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;
b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex];
b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex];
b3AabbCL mergedAabb;
mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);
mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);
internalNodeAabbs[internalNodeIndex] = mergedAabb;
}
}

__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes)
{
int internalNodeIndex = get_global_id(0);
if(internalNodeIndex >= numInternalNodes) return;
int numLeafNodes = numInternalNodes + 1;
int2 childNodes = internalNodeChildNodes[internalNodeIndex];
int2 leafIndexRange; //x == min leaf index, y == max leaf index
//Find lowest leaf index covered by this internal node
{
int lowestIndex = childNodes.x; //childNodes.x == Left child
while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x;
leafIndexRange.x = lowestIndex;
}
//Find highest leaf index covered by this internal node
{
int highestIndex = childNodes.y; //childNodes.y == Right child
while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y;
leafIndexRange.y = highestIndex;
}
//
out_leafIndexRanges[internalNodeIndex] = leafIndexRange;
}

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save