diff --git a/src/Makefile.am b/src/Makefile.am
index ea21d08b..ce2b1622 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -35,6 +35,7 @@ liblol_a_SOURCES = \
     $(sdl_sources) \
     $(d3d9_sources) \
     $(android_sources) \
+    $(bullet_sources) \
     \
     thread/threadbase.h thread/thread.h \
     \
@@ -60,7 +61,7 @@ liblol_a_SOURCES = \
     debug/fps.cpp debug/fps.h debug/sphere.cpp debug/sphere.h \
     debug/record.cpp debug/record.h debug/stats.cpp debug/stats.h \
     debug/quad.cpp debug/quad.h
-liblol_a_CPPFLAGS = @LOL_CFLAGS@
+liblol_a_CPPFLAGS = @LOL_CFLAGS@ -Ibullet
 
 SUFFIXES = .lolfx
 .lolfx.o:
@@ -113,3 +114,545 @@ android_sources = \
     image/codec/android-image.cpp \
     platform/android/androidapp.cpp platform/android/androidapp.h
 
+bullet_sources =
+
+if FALSE #CONDITIONAL_BUILD_MULTITHREADED
+bullet_sources += \
+	bullet/BulletMultiThreaded/PosixThreadSupport.h \
+	bullet/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h \
+	bullet/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h \
+	bullet/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h \
+	bullet/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h \
+	bullet/BulletMultiThreaded/PpuAddressSpace.h \
+	bullet/BulletMultiThreaded/SpuCollisionTaskProcess.h \
+	bullet/BulletMultiThreaded/PlatformDefinitions.h \
+	bullet/BulletMultiThreaded/vectormath2bullet.h \
+	bullet/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h \
+	bullet/BulletMultiThreaded/SpuCollisionObjectWrapper.h \
+	bullet/BulletMultiThreaded/SpuSampleTaskProcess.h \
+	bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h \
+	bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h \
+	bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h \
+	bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h \
+	bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h \
+	bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h \
+	bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h \
+	bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h \
+	bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h \
+	bullet/BulletMultiThreaded/SpuSync.h \
+	bullet/BulletMultiThreaded/btThreadSupportInterface.h \
+	bullet/BulletMultiThreaded/SpuLibspe2Support.h \
+	bullet/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h \
+	bullet/BulletMultiThreaded/SpuFakeDma.h \
+	bullet/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h \
+	bullet/BulletMultiThreaded/SpuDoubleBuffer.h \
+	bullet/BulletMultiThreaded/Win32ThreadSupport.h \
+	bullet/BulletMultiThreaded/SequentialThreadSupport.h
+
+libBulletMultiThreaded_la_CXXFLAGS = ${CXXFLAGS} -I./BulletMultiThreaded/vectormath/scalar/cpp
+bullet_sources += \
+		bullet/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp \
+		bullet/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp \
+		bullet/BulletMultiThreaded/SpuLibspe2Support.cpp \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp \
+		bullet/BulletMultiThreaded/btThreadSupportInterface.cpp \
+		bullet/BulletMultiThreaded/SequentialThreadSupport.cpp \
+		bullet/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp \
+		bullet/BulletMultiThreaded/Win32ThreadSupport.cpp \
+		bullet/BulletMultiThreaded/SpuFakeDma.cpp \
+		bullet/BulletMultiThreaded/PosixThreadSupport.cpp \
+		bullet/BulletMultiThreaded/SpuCollisionTaskProcess.cpp \
+		bullet/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp \
+		bullet/BulletMultiThreaded/SpuSampleTaskProcess.cpp \
+		bullet/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h \
+		bullet/BulletMultiThreaded/PpuAddressSpace.h \
+		bullet/BulletMultiThreaded/SpuSampleTaskProcess.h \
+		bullet/BulletMultiThreaded/SequentialThreadSupport.h \
+		bullet/BulletMultiThreaded/PlatformDefinitions.h \
+		bullet/BulletMultiThreaded/Win32ThreadSupport.h \
+		bullet/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h \
+		bullet/BulletMultiThreaded/btThreadSupportInterface.h \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h \
+		bullet/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h \
+		bullet/BulletMultiThreaded/SpuFakeDma.h \
+		bullet/BulletMultiThreaded/SpuSync.h \
+		bullet/BulletMultiThreaded/SpuCollisionObjectWrapper.h \
+		bullet/BulletMultiThreaded/SpuDoubleBuffer.h \
+		bullet/BulletMultiThreaded/SpuCollisionTaskProcess.h \
+		bullet/BulletMultiThreaded/PosixThreadSupport.h \
+		bullet/BulletMultiThreaded/SpuLibspe2Support.h \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h \
+		bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h
+endif
+
+bullet_sources += \
+		bullet/LinearMath/btQuickprof.cpp \
+		bullet/LinearMath/btGeometryUtil.cpp \
+		bullet/LinearMath/btAlignedAllocator.cpp \
+		bullet/LinearMath/btSerializer.cpp \
+		bullet/LinearMath/btConvexHull.cpp \
+		bullet/LinearMath/btConvexHullComputer.cpp \
+		bullet/LinearMath/btHashMap.h \
+		bullet/LinearMath/btConvexHull.h \
+		bullet/LinearMath/btAabbUtil2.h \
+		bullet/LinearMath/btGeometryUtil.h \
+		bullet/LinearMath/btQuadWord.h \
+		bullet/LinearMath/btPoolAllocator.h \
+		bullet/LinearMath/btScalar.h \
+		bullet/LinearMath/btMinMax.h \
+		bullet/LinearMath/btVector3.h \
+		bullet/LinearMath/btList.h \
+		bullet/LinearMath/btStackAlloc.h \
+		bullet/LinearMath/btMatrix3x3.h \
+		bullet/LinearMath/btMotionState.h \
+		bullet/LinearMath/btAlignedAllocator.h \
+		bullet/LinearMath/btQuaternion.h \
+		bullet/LinearMath/btAlignedObjectArray.h \
+		bullet/LinearMath/btQuickprof.h \
+		bullet/LinearMath/btSerializer.h \
+		bullet/LinearMath/btTransformUtil.h \
+		bullet/LinearMath/btTransform.h \
+		bullet/LinearMath/btDefaultMotionState.h \
+		bullet/LinearMath/btIDebugDraw.h \
+		bullet/LinearMath/btRandom.h
+
+bullet_sources += \
+		bullet/BulletCollision/NarrowPhaseCollision/btRaycastCallback.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btPersistentManifold.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btConvexCast.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.cpp \
+		bullet/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.cpp \
+		bullet/BulletCollision/CollisionDispatch/btCollisionObject.cpp \
+		bullet/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.cpp \
+		bullet/BulletCollision/CollisionDispatch/btGhostObject.cpp \
+		bullet/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.cpp \
+		bullet/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.cpp \
+		bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.cpp \
+		bullet/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.cpp \
+		bullet/BulletCollision/CollisionDispatch/btSimulationIslandManager.cpp \
+		bullet/BulletCollision/CollisionDispatch/btBoxBoxDetector.cpp \
+		bullet/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.cpp \
+		bullet/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.cpp \
+		bullet/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.cpp \
+		bullet/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.cpp \
+		bullet/BulletCollision/CollisionDispatch/SphereTriangleDetector.cpp \
+		bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp \
+		bullet/BulletCollision/CollisionDispatch/btManifoldResult.cpp \
+		bullet/BulletCollision/CollisionDispatch/btCollisionWorld.cpp \
+		bullet/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.cpp \
+		bullet/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.cpp \
+		bullet/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.cpp \
+		bullet/BulletCollision/CollisionDispatch/btUnionFind.cpp \
+		bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp \
+		bullet/BulletCollision/CollisionShapes/btTetrahedronShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btShapeHull.cpp \
+		bullet/BulletCollision/CollisionShapes/btMinkowskiSumShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btCompoundShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btConeShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btConvexPolyhedron.cpp \
+		bullet/BulletCollision/CollisionShapes/btMultiSphereShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btUniformScalingShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btSphereShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.cpp \
+		bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btTriangleMeshShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btTriangleBuffer.cpp \
+		bullet/BulletCollision/CollisionShapes/btStaticPlaneShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btPolyhedralConvexShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btEmptyShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btCollisionShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btConvexShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btConvex2dShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btConvexInternalShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btConvexHullShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btTriangleCallback.cpp \
+		bullet/BulletCollision/CollisionShapes/btCapsuleShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btConcaveShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btConvexPointCloudShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btBoxShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btBox2dShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp \
+		bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btCylinderShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.cpp \
+		bullet/BulletCollision/CollisionShapes/btStridingMeshInterface.cpp \
+		bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexMaterialArray.cpp \
+		bullet/BulletCollision/CollisionShapes/btTriangleMesh.cpp \
+		bullet/BulletCollision/BroadphaseCollision/btAxisSweep3.cpp \
+		bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp \
+		bullet/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp \
+		bullet/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.cpp \
+		bullet/BulletCollision/BroadphaseCollision/btDispatcher.cpp \
+		bullet/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp \
+		bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp \
+		bullet/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.cpp \
+		bullet/BulletCollision/BroadphaseCollision/btDbvt.cpp \
+		bullet/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp \
+		bullet/BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btConvexCast.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btPointCollector.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btRaycastCallback.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btPersistentManifold.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btManifoldPoint.h \
+		bullet/BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h \
+		bullet/BulletCollision/CollisionDispatch/btCollisionObject.h \
+		bullet/BulletCollision/CollisionDispatch/btGhostObject.h \
+		bullet/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h \
+		bullet/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h \
+		bullet/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.h \
+		bullet/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h \
+		bullet/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h \
+		bullet/BulletCollision/CollisionDispatch/btCollisionCreateFunc.h \
+		bullet/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h \
+		bullet/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.h \
+		bullet/BulletCollision/CollisionDispatch/btBoxBoxDetector.h \
+		bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.h \
+		bullet/BulletCollision/CollisionDispatch/SphereTriangleDetector.h \
+		bullet/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.h \
+		bullet/BulletCollision/CollisionDispatch/btUnionFind.h \
+		bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h \
+		bullet/BulletCollision/CollisionDispatch/btSimulationIslandManager.h \
+		bullet/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h \
+		bullet/BulletCollision/CollisionDispatch/btCollisionWorld.h \
+		bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.h \
+		bullet/BulletCollision/CollisionDispatch/btManifoldResult.h \
+		bullet/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h \
+		bullet/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h \
+		bullet/BulletCollision/CollisionDispatch/btCollisionConfiguration.h \
+		bullet/BulletCollision/CollisionShapes/btConvexShape.h \
+		bullet/BulletCollision/CollisionShapes/btConvex2dShape.h \
+		bullet/BulletCollision/CollisionShapes/btTriangleCallback.h \
+		bullet/BulletCollision/CollisionShapes/btPolyhedralConvexShape.h \
+		bullet/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.h \
+		bullet/BulletCollision/CollisionShapes/btCompoundShape.h \
+		bullet/BulletCollision/CollisionShapes/btBoxShape.h \
+		bullet/BulletCollision/CollisionShapes/btBox2dShape.h \
+		bullet/BulletCollision/CollisionShapes/btMultiSphereShape.h \
+		bullet/BulletCollision/CollisionShapes/btCollisionMargin.h \
+		bullet/BulletCollision/CollisionShapes/btConcaveShape.h \
+		bullet/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h \
+		bullet/BulletCollision/CollisionShapes/btEmptyShape.h \
+		bullet/BulletCollision/CollisionShapes/btUniformScalingShape.h \
+		bullet/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h \
+		bullet/BulletCollision/CollisionShapes/btMaterial.h \
+		bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h \
+		bullet/BulletCollision/CollisionShapes/btTriangleInfoMap.h \
+		bullet/BulletCollision/CollisionShapes/btSphereShape.h \
+		bullet/BulletCollision/CollisionShapes/btConvexPointCloudShape.h \
+		bullet/BulletCollision/CollisionShapes/btCapsuleShape.h \
+		bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h \
+		bullet/BulletCollision/CollisionShapes/btCollisionShape.h \
+		bullet/BulletCollision/CollisionShapes/btStaticPlaneShape.h \
+		bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h \
+		bullet/BulletCollision/CollisionShapes/btTriangleMeshShape.h \
+		bullet/BulletCollision/CollisionShapes/btStridingMeshInterface.h \
+		bullet/BulletCollision/CollisionShapes/btTriangleMesh.h \
+		bullet/BulletCollision/CollisionShapes/btTriangleBuffer.h \
+		bullet/BulletCollision/CollisionShapes/btShapeHull.h \
+		bullet/BulletCollision/CollisionShapes/btMinkowskiSumShape.h \
+		bullet/BulletCollision/CollisionShapes/btOptimizedBvh.h \
+		bullet/BulletCollision/CollisionShapes/btTriangleShape.h \
+		bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexMaterialArray.h \
+		bullet/BulletCollision/CollisionShapes/btCylinderShape.h \
+		bullet/BulletCollision/CollisionShapes/btTetrahedronShape.h \
+		bullet/BulletCollision/CollisionShapes/btConvexInternalShape.h \
+		bullet/BulletCollision/CollisionShapes/btConeShape.h \
+		bullet/BulletCollision/CollisionShapes/btConvexHullShape.h \
+		bullet/BulletCollision/BroadphaseCollision/btAxisSweep3.h \
+		bullet/BulletCollision/BroadphaseCollision/btDbvtBroadphase.h \
+		bullet/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h \
+		bullet/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.h \
+		bullet/BulletCollision/BroadphaseCollision/btDbvt.h \
+		bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCallback.h \
+		bullet/BulletCollision/BroadphaseCollision/btDispatcher.h \
+		bullet/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h \
+		bullet/BulletCollision/BroadphaseCollision/btBroadphaseProxy.h \
+		bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h \
+		bullet/BulletCollision/BroadphaseCollision/btBroadphaseInterface.h \
+		bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.h \
+		bullet/BulletCollision/Gimpact/btGImpactBvh.cpp\
+		bullet/BulletCollision/Gimpact/btGImpactQuantizedBvh.cpp\
+		bullet/BulletCollision/Gimpact/btTriangleShapeEx.cpp\
+		bullet/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.cpp\
+		bullet/BulletCollision/Gimpact/btGImpactShape.cpp\
+		bullet/BulletCollision/Gimpact/gim_box_set.cpp\
+		bullet/BulletCollision/Gimpact/gim_contact.cpp\
+		bullet/BulletCollision/Gimpact/gim_memory.cpp\
+		bullet/BulletCollision/Gimpact/gim_tri_collision.cpp
+
+bullet_sources += \
+		bullet/BulletDynamics/Dynamics/btRigidBody.cpp \
+		bullet/BulletDynamics/Dynamics/btSimpleDynamicsWorld.cpp \
+		bullet/BulletDynamics/Dynamics/Bullet-C-API.cpp \
+		bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btTypedConstraint.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btContactConstraint.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btSliderConstraint.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btConeTwistConstraint.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btHingeConstraint.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btHinge2Constraint.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btUniversalConstraint.cpp \
+		bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp \
+		bullet/BulletDynamics/Vehicle/btWheelInfo.cpp \
+		bullet/BulletDynamics/Vehicle/btRaycastVehicle.cpp \
+		bullet/BulletDynamics/Character/btKinematicCharacterController.cpp \
+		bullet/BulletDynamics/Character/btKinematicCharacterController.h \
+		bullet/BulletDynamics/Character/btCharacterControllerInterface.h \
+		bullet/BulletDynamics/Dynamics/btActionInterface.h \
+		bullet/BulletDynamics/Dynamics/btSimpleDynamicsWorld.h \
+		bullet/BulletDynamics/Dynamics/btRigidBody.h \
+		bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h \
+		bullet/BulletDynamics/Dynamics/btDynamicsWorld.h \
+		bullet/BulletDynamics/ConstraintSolver/btSolverBody.h \
+		bullet/BulletDynamics/ConstraintSolver/btConstraintSolver.h \
+		bullet/BulletDynamics/ConstraintSolver/btConeTwistConstraint.h \
+		bullet/BulletDynamics/ConstraintSolver/btTypedConstraint.h \
+		bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h \
+		bullet/BulletDynamics/ConstraintSolver/btContactConstraint.h \
+		bullet/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h \
+		bullet/BulletDynamics/ConstraintSolver/btJacobianEntry.h \
+		bullet/BulletDynamics/ConstraintSolver/btSolverConstraint.h \
+		bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h \
+		bullet/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h \
+		bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h \
+		bullet/BulletDynamics/ConstraintSolver/btSliderConstraint.h \
+		bullet/BulletDynamics/ConstraintSolver/btHingeConstraint.h \
+		bullet/BulletDynamics/ConstraintSolver/btHinge2Constraint.h \
+		bullet/BulletDynamics/ConstraintSolver/btUniversalConstraint.h \
+		bullet/BulletDynamics/ConstraintSolver/btSolve2bullet/LinearConstraint.h \
+		bullet/BulletDynamics/Vehicle/btVehicleRaycaster.h \
+		bullet/BulletDynamics/Vehicle/btRaycastVehicle.h \
+		bullet/BulletDynamics/Vehicle/btWheelInfo.h
+
+bullet_sources += \
+		bullet/BulletSoftBody/btDefaultSoftBodySolver.cpp \
+		bullet/BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.cpp \
+		bullet/BulletSoftBody/btSoftBody.cpp \
+		bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp \
+		bullet/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.cpp \
+		bullet/BulletSoftBody/btSoftRigidDynamicsWorld.cpp \
+		bullet/BulletSoftBody/btSoftBodyHelpers.cpp \
+		bullet/BulletSoftBody/btSoftSoftCollisionAlgorithm.cpp \
+		bullet/BulletSoftBody/btSparseSDF.h \
+		bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.h \
+		bullet/BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.h \
+		bullet/BulletSoftBody/btSoftBody.h \
+		bullet/BulletSoftBody/btSoftSoftCollisionAlgorithm.h \
+		bullet/BulletSoftBody/btSoftBodyInternals.h \
+		bullet/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.h \
+		bullet/BulletSoftBody/btSoftRigidDynamicsWorld.h \
+		bullet/BulletSoftBody/btSoftBodyHelpers.h
+
+bullet_sources += \
+	bullet/BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.h \
+	bullet/BulletSoftBody/btSoftBodyInternals.h \
+	bullet/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.h \
+	bullet/BulletSoftBody/btSoftSoftCollisionAlgorithm.h \
+	bullet/BulletSoftBody/btSoftBody.h \
+	bullet/BulletSoftBody/btSoftBodyHelpers.h \
+	bullet/BulletSoftBody/btSparseSDF.h \
+	bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.h \
+	bullet/BulletSoftBody/btSoftRigidDynamicsWorld.h \
+	bullet/BulletDynamics/Vehicle/btRaycastVehicle.h \
+	bullet/BulletDynamics/Vehicle/btWheelInfo.h \
+	bullet/BulletDynamics/Vehicle/btVehicleRaycaster.h \
+	bullet/BulletDynamics/Dynamics/btActionInterface.h \
+	bullet/BulletDynamics/Dynamics/btRigidBody.h \
+	bullet/BulletDynamics/Dynamics/btDynamicsWorld.h \
+	bullet/BulletDynamics/Dynamics/btSimpleDynamicsWorld.h \
+	bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h \
+	bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h \
+	bullet/BulletDynamics/ConstraintSolver/btSolverConstraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btTypedConstraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btSliderConstraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btConstraintSolver.h \
+	bullet/BulletDynamics/ConstraintSolver/btContactConstraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h \
+	bullet/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btJacobianEntry.h \
+	bullet/BulletDynamics/ConstraintSolver/btSolve2bullet/LinearConstraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btConeTwistConstraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btHingeConstraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btHinge2Constraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btUniversalConstraint.h \
+	bullet/BulletDynamics/ConstraintSolver/btSolverBody.h \
+	bullet/BulletDynamics/Character/btCharacterControllerInterface.h \
+	bullet/BulletDynamics/Character/btKinematicCharacterController.h \
+	bullet/BulletCollision/CollisionShapes/btShapeHull.h \
+	bullet/BulletCollision/CollisionShapes/btConcaveShape.h \
+	bullet/BulletCollision/CollisionShapes/btCollisionMargin.h \
+	bullet/BulletCollision/CollisionShapes/btCompoundShape.h \
+	bullet/BulletCollision/CollisionShapes/btConvexHullShape.h \
+	bullet/BulletCollision/CollisionShapes/btCylinderShape.h \
+	bullet/BulletCollision/CollisionShapes/btTriangleMesh.h \
+	bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h \
+	bullet/BulletCollision/CollisionShapes/btUniformScalingShape.h \
+	bullet/BulletCollision/CollisionShapes/btConvexPointCloudShape.h \
+	bullet/BulletCollision/CollisionShapes/btTetrahedronShape.h \
+	bullet/BulletCollision/CollisionShapes/btCapsuleShape.h \
+	bullet/BulletCollision/CollisionShapes/btSphereShape.h \
+	bullet/BulletCollision/CollisionShapes/btMultiSphereShape.h \
+	bullet/BulletCollision/CollisionShapes/btConvexInternalShape.h \
+	bullet/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h \
+	bullet/BulletCollision/CollisionShapes/btStridingMeshInterface.h \
+	bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h \
+	bullet/BulletCollision/CollisionShapes/btEmptyShape.h \
+	bullet/BulletCollision/CollisionShapes/btOptimizedBvh.h \
+	bullet/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h \
+	bullet/BulletCollision/CollisionShapes/btTriangleCallback.h \
+	bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexMaterialArray.h \
+	bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h \
+	bullet/BulletCollision/CollisionShapes/btTriangleInfoMap.h \
+	bullet/BulletCollision/CollisionShapes/btTriangleBuffer.h \
+	bullet/BulletCollision/CollisionShapes/btConvexShape.h \
+	bullet/BulletCollision/CollisionShapes/btConvex2dShape.h \
+	bullet/BulletCollision/CollisionShapes/btStaticPlaneShape.h \
+	bullet/BulletCollision/CollisionShapes/btConeShape.h \
+	bullet/BulletCollision/CollisionShapes/btCollisionShape.h \
+	bullet/BulletCollision/CollisionShapes/btTriangleShape.h \
+	bullet/BulletCollision/CollisionShapes/btBoxShape.h \
+	bullet/BulletCollision/CollisionShapes/btBox2dShape.h \
+	bullet/BulletCollision/CollisionShapes/btMinkowskiSumShape.h \
+	bullet/BulletCollision/CollisionShapes/btTriangleMeshShape.h \
+	bullet/BulletCollision/CollisionShapes/btMaterial.h \
+	bullet/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.h \
+	bullet/BulletCollision/CollisionShapes/btPolyhedralConvexShape.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btConvexCast.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btPersistentManifold.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btManifoldPoint.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btRaycastCallback.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btPointCollector.h \
+	bullet/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h \
+	bullet/BulletCollision/BroadphaseCollision/btDbvt.h \
+	bullet/BulletCollision/BroadphaseCollision/btDispatcher.h \
+	bullet/BulletCollision/BroadphaseCollision/btDbvtBroadphase.h \
+	bullet/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h \
+	bullet/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h \
+	bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCallback.h \
+	bullet/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.h \
+	bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.h \
+	bullet/BulletCollision/BroadphaseCollision/btAxisSweep3.h \
+	bullet/BulletCollision/BroadphaseCollision/btBroadphaseInterface.h \
+	bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h \
+	bullet/BulletCollision/BroadphaseCollision/btBroadphaseProxy.h \
+	bullet/BulletCollision/CollisionDispatch/btUnionFind.h \
+	bullet/BulletCollision/CollisionDispatch/btCollisionConfiguration.h \
+	bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.h \
+	bullet/BulletCollision/CollisionDispatch/SphereTriangleDetector.h \
+	bullet/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btCollisionWorld.h \
+	bullet/BulletCollision/CollisionDispatch/btCollisionCreateFunc.h \
+	bullet/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btCollisionObject.h \
+	bullet/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h \
+	bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btGhostObject.h \
+	bullet/BulletCollision/CollisionDispatch/btSimulationIslandManager.h \
+	bullet/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btBoxBoxDetector.h \
+	bullet/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h \
+	bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.h \
+	bullet/BulletCollision/CollisionDispatch/btManifoldResult.h \
+	bullet/BulletCollision/Gimpact/gim_memory.h \
+	bullet/BulletCollision/Gimpact/gim_clip_polygon.h \
+	bullet/BulletCollision/Gimpact/gim_bitset.h \
+	bullet/BulletCollision/Gimpact/gim_linear_math.h \
+	bullet/BulletCollision/Gimpact/btGeometryOperations.h \
+	bullet/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.h \
+	bullet/BulletCollision/Gimpact/btGImpactBvh.h \
+	bullet/BulletCollision/Gimpact/gim_box_set.h \
+	bullet/BulletCollision/Gimpact/gim_array.h \
+	bullet/BulletCollision/Gimpact/btGImpactShape.h \
+	bullet/BulletCollision/Gimpact/btTriangleShapeEx.h \
+	bullet/BulletCollision/Gimpact/btClipPolygon.h \
+	bullet/BulletCollision/Gimpact/gim_box_collision.h \
+	bullet/BulletCollision/Gimpact/gim_tri_collision.h \
+	bullet/BulletCollision/Gimpact/gim_geometry.h \
+	bullet/BulletCollision/Gimpact/gim_math.h \
+	bullet/BulletCollision/Gimpact/btQuantization.h \
+	bullet/BulletCollision/Gimpact/btGImpactQuantizedBvh.h \
+	bullet/BulletCollision/Gimpact/gim_geom_types.h \
+	bullet/BulletCollision/Gimpact/gim_basic_geometry_operations.h \
+	bullet/BulletCollision/Gimpact/gim_contact.h \
+	bullet/BulletCollision/Gimpact/gim_hash_table.h \
+	bullet/BulletCollision/Gimpact/gim_radixsort.h \
+	bullet/BulletCollision/Gimpact/btGImpactMassUtil.h \
+	bullet/BulletCollision/Gimpact/btGenericPoolAllocator.h \
+	bullet/BulletCollision/Gimpact/btBoxCollision.h \
+	bullet/BulletCollision/Gimpact/btContactProcessing.h \
+	bullet/LinearMath/btGeometryUtil.h \
+	bullet/LinearMath/btConvexHull.h \
+	bullet/LinearMath/btList.h \
+	bullet/LinearMath/btMatrix3x3.h \
+	bullet/LinearMath/btVector3.h \
+	bullet/LinearMath/btPoolAllocator.h \
+	bullet/LinearMath/btScalar.h \
+	bullet/LinearMath/btDefaultMotionState.h \
+	bullet/LinearMath/btTransform.h \
+	bullet/LinearMath/btQuadWord.h \
+	bullet/LinearMath/btAabbUtil2.h \
+	bullet/LinearMath/btTransformUtil.h \
+	bullet/LinearMath/btRandom.h \
+	bullet/LinearMath/btQuaternion.h \
+	bullet/LinearMath/btMinMax.h \
+	bullet/LinearMath/btMotionState.h \
+	bullet/LinearMath/btIDebugDraw.h \
+	bullet/LinearMath/btAlignedAllocator.h \
+	bullet/LinearMath/btStackAlloc.h \
+	bullet/LinearMath/btAlignedObjectArray.h \
+	bullet/LinearMath/btHashMap.h \
+	bullet/LinearMath/btQuickprof.h\
+	bullet/LinearMath/btSerializer.h
+
diff --git a/src/bullet/Bullet-C-Api.h b/src/bullet/Bullet-C-Api.h
new file mode 100644
index 00000000..f27a17d5
--- /dev/null
+++ b/src/bullet/Bullet-C-Api.h
@@ -0,0 +1,176 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/*
+	Draft high-level generic physics C-API. For low-level access, use the physics SDK native API's.
+	Work in progress, functionality will be added on demand.
+
+	If possible, use the richer Bullet C++ API, by including "btBulletDynamicsCommon.h"
+*/
+
+#ifndef BULLET_C_API_H
+#define BULLET_C_API_H
+
+#define PL_DECLARE_HANDLE(name) typedef struct name##__ { int unused; } *name
+
+#ifdef BT_USE_DOUBLE_PRECISION
+typedef double	plReal;
+#else
+typedef float	plReal;
+#endif
+
+typedef plReal	plVector3[3];
+typedef plReal	plQuaternion[4];
+
+#ifdef __cplusplus
+extern "C" { 
+#endif
+
+/**	Particular physics SDK (C-API) */
+	PL_DECLARE_HANDLE(plPhysicsSdkHandle);
+
+/** 	Dynamics world, belonging to some physics SDK (C-API)*/
+	PL_DECLARE_HANDLE(plDynamicsWorldHandle);
+
+/** Rigid Body that can be part of a Dynamics World (C-API)*/	
+	PL_DECLARE_HANDLE(plRigidBodyHandle);
+
+/** 	Collision Shape/Geometry, property of a Rigid Body (C-API)*/
+	PL_DECLARE_HANDLE(plCollisionShapeHandle);
+
+/** Constraint for Rigid Bodies (C-API)*/
+	PL_DECLARE_HANDLE(plConstraintHandle);
+
+/** Triangle Mesh interface (C-API)*/
+	PL_DECLARE_HANDLE(plMeshInterfaceHandle);
+
+/** Broadphase Scene/Proxy Handles (C-API)*/
+	PL_DECLARE_HANDLE(plCollisionBroadphaseHandle);
+	PL_DECLARE_HANDLE(plBroadphaseProxyHandle);
+	PL_DECLARE_HANDLE(plCollisionWorldHandle);
+
+/**
+	Create and Delete a Physics SDK	
+*/
+
+	extern	plPhysicsSdkHandle	plNewBulletSdk(void); //this could be also another sdk, like ODE, PhysX etc.
+	extern	void		plDeletePhysicsSdk(plPhysicsSdkHandle	physicsSdk);
+
+/** Collision World, not strictly necessary, you can also just create a Dynamics World with Rigid Bodies which internally manages the Collision World with Collision Objects */
+
+	typedef void(*btBroadphaseCallback)(void* clientData, void* object1,void* object2);
+
+	extern plCollisionBroadphaseHandle	plCreateSapBroadphase(btBroadphaseCallback beginCallback,btBroadphaseCallback endCallback);
+
+	extern void	plDestroyBroadphase(plCollisionBroadphaseHandle bp);
+
+	extern 	plBroadphaseProxyHandle plCreateProxy(plCollisionBroadphaseHandle bp, void* clientData, plReal minX,plReal minY,plReal minZ, plReal maxX,plReal maxY, plReal maxZ);
+
+	extern void plDestroyProxy(plCollisionBroadphaseHandle bp, plBroadphaseProxyHandle proxyHandle);
+
+	extern void plSetBoundingBox(plBroadphaseProxyHandle proxyHandle, plReal minX,plReal minY,plReal minZ, plReal maxX,plReal maxY, plReal maxZ);
+
+/* todo: add pair cache support with queries like add/remove/find pair */
+	
+	extern plCollisionWorldHandle plCreateCollisionWorld(plPhysicsSdkHandle physicsSdk);
+
+/* todo: add/remove objects */
+	
+
+/* Dynamics World */
+
+	extern  plDynamicsWorldHandle plCreateDynamicsWorld(plPhysicsSdkHandle physicsSdk);
+
+	extern  void           plDeleteDynamicsWorld(plDynamicsWorldHandle world);
+
+	extern	void	plStepSimulation(plDynamicsWorldHandle,	plReal	timeStep);
+
+	extern  void plAddRigidBody(plDynamicsWorldHandle world, plRigidBodyHandle object);
+
+	extern  void plRemoveRigidBody(plDynamicsWorldHandle world, plRigidBodyHandle object);
+
+
+/* Rigid Body  */
+
+	extern  plRigidBodyHandle plCreateRigidBody(	void* user_data,  float mass, plCollisionShapeHandle cshape );
+
+	extern  void plDeleteRigidBody(plRigidBodyHandle body);
+
+
+/* Collision Shape definition */
+
+	extern  plCollisionShapeHandle plNewSphereShape(plReal radius);
+	extern  plCollisionShapeHandle plNewBoxShape(plReal x, plReal y, plReal z);
+	extern  plCollisionShapeHandle plNewCapsuleShape(plReal radius, plReal height);	
+	extern  plCollisionShapeHandle plNewConeShape(plReal radius, plReal height);
+	extern  plCollisionShapeHandle plNewCylinderShape(plReal radius, plReal height);
+	extern	plCollisionShapeHandle plNewCompoundShape(void);
+	extern	void	plAddChildShape(plCollisionShapeHandle compoundShape,plCollisionShapeHandle childShape, plVector3 childPos,plQuaternion childOrn);
+
+	extern  void plDeleteShape(plCollisionShapeHandle shape);
+
+	/* Convex Meshes */
+	extern  plCollisionShapeHandle plNewConvexHullShape(void);
+	extern  void		plAddVertex(plCollisionShapeHandle convexHull, plReal x,plReal y,plReal z);
+/* Concave static triangle meshes */
+	extern  plMeshInterfaceHandle		   plNewMeshInterface(void);
+	extern  void		plAddTriangle(plMeshInterfaceHandle meshHandle, plVector3 v0,plVector3 v1,plVector3 v2);
+	extern  plCollisionShapeHandle plNewStaticTriangleMeshShape(plMeshInterfaceHandle);
+
+	extern  void plSetScaling(plCollisionShapeHandle shape, plVector3 scaling);
+
+/* SOLID has Response Callback/Table/Management */
+/* PhysX has Triggers, User Callbacks and filtering */
+/* ODE has the typedef void dNearCallback (void *data, dGeomID o1, dGeomID o2); */
+
+/*	typedef void plUpdatedPositionCallback(void* userData, plRigidBodyHandle	rbHandle, plVector3 pos); */
+/*	typedef void plUpdatedOrientationCallback(void* userData, plRigidBodyHandle	rbHandle, plQuaternion orientation); */
+
+	/* get world transform */
+	extern void	plGetOpenGLMatrix(plRigidBodyHandle object, plReal* matrix);
+	extern void	plGetPosition(plRigidBodyHandle object,plVector3 position);
+	extern void plGetOrientation(plRigidBodyHandle object,plQuaternion orientation);
+
+	/* set world transform (position/orientation) */
+	extern  void plSetPosition(plRigidBodyHandle object, const plVector3 position);
+	extern  void plSetOrientation(plRigidBodyHandle object, const plQuaternion orientation);
+	extern	void plSetEuler(plReal yaw,plReal pitch,plReal roll, plQuaternion orient);
+	extern	void plSetOpenGLMatrix(plRigidBodyHandle object, plReal* matrix);
+
+	typedef struct plRayCastResult {
+		plRigidBodyHandle		m_body;  
+		plCollisionShapeHandle	m_shape; 		
+		plVector3				m_positionWorld; 		
+		plVector3				m_normalWorld;
+	} plRayCastResult;
+
+	extern  int plRayCast(plDynamicsWorldHandle world, const plVector3 rayStart, const plVector3 rayEnd, plRayCastResult res);
+
+	/* Sweep API */
+
+	/* extern  plRigidBodyHandle plObjectCast(plDynamicsWorldHandle world, const plVector3 rayStart, const plVector3 rayEnd, plVector3 hitpoint, plVector3 normal); */
+
+	/* Continuous Collision Detection API */
+	
+	// needed for source/blender/blenkernel/intern/collision.c
+	double plNearestPoints(float p1[3], float p2[3], float p3[3], float q1[3], float q2[3], float q3[3], float *pa, float *pb, float normal[3]);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif //BULLET_C_API_H
+
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btAxisSweep3.cpp b/src/bullet/BulletCollision/BroadphaseCollision/btAxisSweep3.cpp
new file mode 100644
index 00000000..77763305
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btAxisSweep3.cpp
@@ -0,0 +1,37 @@
+
+//Bullet Continuous Collision Detection and Physics Library
+//Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+
+//
+// btAxisSweep3
+//
+// Copyright (c) 2006 Simon Hobbs
+//
+// This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source distribution.
+#include "btAxisSweep3.h"
+
+
+btAxisSweep3::btAxisSweep3(const btVector3& worldAabbMin,const btVector3& worldAabbMax, unsigned short int maxHandles, btOverlappingPairCache* pairCache, bool disableRaycastAccelerator)
+:btAxisSweep3Internal<unsigned short int>(worldAabbMin,worldAabbMax,0xfffe,0xffff,maxHandles,pairCache,disableRaycastAccelerator)
+{
+	// 1 handle is reserved as sentinel
+	btAssert(maxHandles > 1 && maxHandles < 32767);
+
+}
+
+
+bt32BitAxisSweep3::bt32BitAxisSweep3(const btVector3& worldAabbMin,const btVector3& worldAabbMax, unsigned int maxHandles , btOverlappingPairCache* pairCache , bool disableRaycastAccelerator)
+:btAxisSweep3Internal<unsigned int>(worldAabbMin,worldAabbMax,0xfffffffe,0x7fffffff,maxHandles,pairCache,disableRaycastAccelerator)
+{
+	// 1 handle is reserved as sentinel
+	btAssert(maxHandles > 1 && maxHandles < 2147483647);
+}
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btAxisSweep3.h b/src/bullet/BulletCollision/BroadphaseCollision/btAxisSweep3.h
new file mode 100644
index 00000000..4f4d94b3
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btAxisSweep3.h
@@ -0,0 +1,1051 @@
+//Bullet Continuous Collision Detection and Physics Library
+//Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+//
+// btAxisSweep3.h
+//
+// Copyright (c) 2006 Simon Hobbs
+//
+// This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
+//
+// Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
+//
+// 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+//
+// 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+//
+// 3. This notice may not be removed or altered from any source distribution.
+
+#ifndef BT_AXIS_SWEEP_3_H
+#define BT_AXIS_SWEEP_3_H
+
+#include "LinearMath/btVector3.h"
+#include "btOverlappingPairCache.h"
+#include "btBroadphaseInterface.h"
+#include "btBroadphaseProxy.h"
+#include "btOverlappingPairCallback.h"
+#include "btDbvtBroadphase.h"
+
+//#define DEBUG_BROADPHASE 1
+#define USE_OVERLAP_TEST_ON_REMOVES 1
+
+/// The internal templace class btAxisSweep3Internal implements the sweep and prune broadphase.
+/// It uses quantized integers to represent the begin and end points for each of the 3 axis.
+/// Dont use this class directly, use btAxisSweep3 or bt32BitAxisSweep3 instead.
+template <typename BP_FP_INT_TYPE>
+class btAxisSweep3Internal : public btBroadphaseInterface
+{
+protected:
+
+	BP_FP_INT_TYPE	m_bpHandleMask;
+	BP_FP_INT_TYPE	m_handleSentinel;
+
+public:
+	
+ BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	class Edge
+	{
+	public:
+		BP_FP_INT_TYPE m_pos;			// low bit is min/max
+		BP_FP_INT_TYPE m_handle;
+
+		BP_FP_INT_TYPE IsMax() const {return static_cast<BP_FP_INT_TYPE>(m_pos & 1);}
+	};
+
+public:
+	class	Handle : public btBroadphaseProxy
+	{
+	public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
+		// indexes into the edge arrays
+		BP_FP_INT_TYPE m_minEdges[3], m_maxEdges[3];		// 6 * 2 = 12
+//		BP_FP_INT_TYPE m_uniqueId;
+		btBroadphaseProxy*	m_dbvtProxy;//for faster raycast
+		//void* m_pOwner; this is now in btBroadphaseProxy.m_clientObject
+	
+		SIMD_FORCE_INLINE void SetNextFree(BP_FP_INT_TYPE next) {m_minEdges[0] = next;}
+		SIMD_FORCE_INLINE BP_FP_INT_TYPE GetNextFree() const {return m_minEdges[0];}
+	};		// 24 bytes + 24 for Edge structures = 44 bytes total per entry
+
+	
+protected:
+	btVector3 m_worldAabbMin;						// overall system bounds
+	btVector3 m_worldAabbMax;						// overall system bounds
+
+	btVector3 m_quantize;						// scaling factor for quantization
+
+	BP_FP_INT_TYPE m_numHandles;						// number of active handles
+	BP_FP_INT_TYPE m_maxHandles;						// max number of handles
+	Handle* m_pHandles;						// handles pool
+	
+	BP_FP_INT_TYPE m_firstFreeHandle;		// free handles list
+
+	Edge* m_pEdges[3];						// edge arrays for the 3 axes (each array has m_maxHandles * 2 + 2 sentinel entries)
+	void* m_pEdgesRawPtr[3];
+
+	btOverlappingPairCache* m_pairCache;
+
+	///btOverlappingPairCallback is an additional optional user callback for adding/removing overlapping pairs, similar interface to btOverlappingPairCache.
+	btOverlappingPairCallback* m_userPairCallback;
+	
+	bool	m_ownsPairCache;
+
+	int	m_invalidPair;
+
+	///additional dynamic aabb structure, used to accelerate ray cast queries.
+	///can be disabled using a optional argument in the constructor
+	btDbvtBroadphase*	m_raycastAccelerator;
+	btOverlappingPairCache*	m_nullPairCache;
+
+
+	// allocation/deallocation
+	BP_FP_INT_TYPE allocHandle();
+	void freeHandle(BP_FP_INT_TYPE handle);
+	
+
+	bool testOverlap2D(const Handle* pHandleA, const Handle* pHandleB,int axis0,int axis1);
+
+#ifdef DEBUG_BROADPHASE
+	void debugPrintAxis(int axis,bool checkCardinality=true);
+#endif //DEBUG_BROADPHASE
+
+	//Overlap* AddOverlap(BP_FP_INT_TYPE handleA, BP_FP_INT_TYPE handleB);
+	//void RemoveOverlap(BP_FP_INT_TYPE handleA, BP_FP_INT_TYPE handleB);
+
+	
+
+	void sortMinDown(int axis, BP_FP_INT_TYPE edge, btDispatcher* dispatcher, bool updateOverlaps );
+	void sortMinUp(int axis, BP_FP_INT_TYPE edge, btDispatcher* dispatcher, bool updateOverlaps );
+	void sortMaxDown(int axis, BP_FP_INT_TYPE edge, btDispatcher* dispatcher, bool updateOverlaps );
+	void sortMaxUp(int axis, BP_FP_INT_TYPE edge, btDispatcher* dispatcher, bool updateOverlaps );
+
+public:
+
+	btAxisSweep3Internal(const btVector3& worldAabbMin,const btVector3& worldAabbMax, BP_FP_INT_TYPE handleMask, BP_FP_INT_TYPE handleSentinel, BP_FP_INT_TYPE maxHandles = 16384, btOverlappingPairCache* pairCache=0,bool disableRaycastAccelerator = false);
+
+	virtual	~btAxisSweep3Internal();
+
+	BP_FP_INT_TYPE getNumHandles() const
+	{
+		return m_numHandles;
+	}
+
+	virtual void	calculateOverlappingPairs(btDispatcher* dispatcher);
+	
+	BP_FP_INT_TYPE addHandle(const btVector3& aabbMin,const btVector3& aabbMax, void* pOwner,short int collisionFilterGroup,short int collisionFilterMask,btDispatcher* dispatcher,void* multiSapProxy);
+	void removeHandle(BP_FP_INT_TYPE handle,btDispatcher* dispatcher);
+	void updateHandle(BP_FP_INT_TYPE handle, const btVector3& aabbMin,const btVector3& aabbMax,btDispatcher* dispatcher);
+	SIMD_FORCE_INLINE Handle* getHandle(BP_FP_INT_TYPE index) const {return m_pHandles + index;}
+
+	virtual void resetPool(btDispatcher* dispatcher);
+
+	void	processAllOverlappingPairs(btOverlapCallback* callback);
+
+	//Broadphase Interface
+	virtual btBroadphaseProxy*	createProxy(  const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask,btDispatcher* dispatcher,void* multiSapProxy);
+	virtual void	destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+	virtual void	setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax,btDispatcher* dispatcher);
+	virtual void  getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const;
+	
+	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin=btVector3(0,0,0), const btVector3& aabbMax = btVector3(0,0,0));
+	virtual void	aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback);
+
+	
+	void quantize(BP_FP_INT_TYPE* out, const btVector3& point, int isMax) const;
+	///unQuantize should be conservative: aabbMin/aabbMax should be larger then 'getAabb' result
+	void unQuantize(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const;
+	
+	bool	testAabbOverlap(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1);
+
+	btOverlappingPairCache*	getOverlappingPairCache()
+	{
+		return m_pairCache;
+	}
+	const btOverlappingPairCache*	getOverlappingPairCache() const
+	{
+		return m_pairCache;
+	}
+
+	void	setOverlappingPairUserCallback(btOverlappingPairCallback* pairCallback)
+	{
+		m_userPairCallback = pairCallback;
+	}
+	const btOverlappingPairCallback*	getOverlappingPairUserCallback() const
+	{
+		return m_userPairCallback;
+	}
+
+	///getAabb returns the axis aligned bounding box in the 'global' coordinate frame
+	///will add some transform later
+	virtual void getBroadphaseAabb(btVector3& aabbMin,btVector3& aabbMax) const
+	{
+		aabbMin = m_worldAabbMin;
+		aabbMax = m_worldAabbMax;
+	}
+
+	virtual void	printStats()
+	{
+/*		printf("btAxisSweep3.h\n");
+		printf("numHandles = %d, maxHandles = %d\n",m_numHandles,m_maxHandles);
+		printf("aabbMin=%f,%f,%f,aabbMax=%f,%f,%f\n",m_worldAabbMin.getX(),m_worldAabbMin.getY(),m_worldAabbMin.getZ(),
+			m_worldAabbMax.getX(),m_worldAabbMax.getY(),m_worldAabbMax.getZ());
+			*/
+
+	}
+
+};
+
+////////////////////////////////////////////////////////////////////
+
+
+
+
+#ifdef DEBUG_BROADPHASE
+#include <stdio.h>
+
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3<BP_FP_INT_TYPE>::debugPrintAxis(int axis, bool checkCardinality)
+{
+	int numEdges = m_pHandles[0].m_maxEdges[axis];
+	printf("SAP Axis %d, numEdges=%d\n",axis,numEdges);
+
+	int i;
+	for (i=0;i<numEdges+1;i++)
+	{
+		Edge* pEdge = m_pEdges[axis] + i;
+		Handle* pHandlePrev = getHandle(pEdge->m_handle);
+		int handleIndex = pEdge->IsMax()? pHandlePrev->m_maxEdges[axis] : pHandlePrev->m_minEdges[axis];
+		char beginOrEnd;
+		beginOrEnd=pEdge->IsMax()?'E':'B';
+		printf("	[%c,h=%d,p=%x,i=%d]\n",beginOrEnd,pEdge->m_handle,pEdge->m_pos,handleIndex);
+	}
+
+	if (checkCardinality)
+		btAssert(numEdges == m_numHandles*2+1);
+}
+#endif //DEBUG_BROADPHASE
+
+template <typename BP_FP_INT_TYPE>
+btBroadphaseProxy*	btAxisSweep3Internal<BP_FP_INT_TYPE>::createProxy(  const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr,short int collisionFilterGroup,short int collisionFilterMask,btDispatcher* dispatcher,void* multiSapProxy)
+{
+		(void)shapeType;
+		BP_FP_INT_TYPE handleId = addHandle(aabbMin,aabbMax, userPtr,collisionFilterGroup,collisionFilterMask,dispatcher,multiSapProxy);
+		
+		Handle* handle = getHandle(handleId);
+		
+		if (m_raycastAccelerator)
+		{
+			btBroadphaseProxy* rayProxy = m_raycastAccelerator->createProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,dispatcher,0);
+			handle->m_dbvtProxy = rayProxy;
+		}
+		return handle;
+}
+
+
+
+template <typename BP_FP_INT_TYPE>
+void	btAxisSweep3Internal<BP_FP_INT_TYPE>::destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher)
+{
+	Handle* handle = static_cast<Handle*>(proxy);
+	if (m_raycastAccelerator)
+		m_raycastAccelerator->destroyProxy(handle->m_dbvtProxy,dispatcher);
+	removeHandle(static_cast<BP_FP_INT_TYPE>(handle->m_uniqueId), dispatcher);
+}
+
+template <typename BP_FP_INT_TYPE>
+void	btAxisSweep3Internal<BP_FP_INT_TYPE>::setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax,btDispatcher* dispatcher)
+{
+	Handle* handle = static_cast<Handle*>(proxy);
+	handle->m_aabbMin = aabbMin;
+	handle->m_aabbMax = aabbMax;
+	updateHandle(static_cast<BP_FP_INT_TYPE>(handle->m_uniqueId), aabbMin, aabbMax,dispatcher);
+	if (m_raycastAccelerator)
+		m_raycastAccelerator->setAabb(handle->m_dbvtProxy,aabbMin,aabbMax,dispatcher);
+
+}
+
+template <typename BP_FP_INT_TYPE>
+void	btAxisSweep3Internal<BP_FP_INT_TYPE>::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback,const btVector3& aabbMin,const btVector3& aabbMax)
+{
+	if (m_raycastAccelerator)
+	{
+		m_raycastAccelerator->rayTest(rayFrom,rayTo,rayCallback,aabbMin,aabbMax);
+	} else
+	{
+		//choose axis?
+		BP_FP_INT_TYPE axis = 0;
+		//for each proxy
+		for (BP_FP_INT_TYPE i=1;i<m_numHandles*2+1;i++)
+		{
+			if (m_pEdges[axis][i].IsMax())
+			{
+				rayCallback.process(getHandle(m_pEdges[axis][i].m_handle));
+			}
+		}
+	}
+}
+
+template <typename BP_FP_INT_TYPE>
+void	btAxisSweep3Internal<BP_FP_INT_TYPE>::aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback)
+{
+	if (m_raycastAccelerator)
+	{
+		m_raycastAccelerator->aabbTest(aabbMin,aabbMax,callback);
+	} else
+	{
+		//choose axis?
+		BP_FP_INT_TYPE axis = 0;
+		//for each proxy
+		for (BP_FP_INT_TYPE i=1;i<m_numHandles*2+1;i++)
+		{
+			if (m_pEdges[axis][i].IsMax())
+			{
+				Handle* handle = getHandle(m_pEdges[axis][i].m_handle);
+				if (TestAabbAgainstAabb2(aabbMin,aabbMax,handle->m_aabbMin,handle->m_aabbMax))
+				{
+					callback.process(handle);
+				}
+			}
+		}
+	}
+}
+
+
+
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const
+{
+	Handle* pHandle = static_cast<Handle*>(proxy);
+	aabbMin = pHandle->m_aabbMin;
+	aabbMax = pHandle->m_aabbMax;
+}
+
+
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::unQuantize(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const
+{
+	Handle* pHandle = static_cast<Handle*>(proxy);
+
+	unsigned short vecInMin[3];
+	unsigned short vecInMax[3];
+
+	vecInMin[0] = m_pEdges[0][pHandle->m_minEdges[0]].m_pos ;
+	vecInMax[0] = m_pEdges[0][pHandle->m_maxEdges[0]].m_pos +1 ;
+	vecInMin[1] = m_pEdges[1][pHandle->m_minEdges[1]].m_pos ;
+	vecInMax[1] = m_pEdges[1][pHandle->m_maxEdges[1]].m_pos +1 ;
+	vecInMin[2] = m_pEdges[2][pHandle->m_minEdges[2]].m_pos ;
+	vecInMax[2] = m_pEdges[2][pHandle->m_maxEdges[2]].m_pos +1 ;
+	
+	aabbMin.setValue((btScalar)(vecInMin[0]) / (m_quantize.getX()),(btScalar)(vecInMin[1]) / (m_quantize.getY()),(btScalar)(vecInMin[2]) / (m_quantize.getZ()));
+	aabbMin += m_worldAabbMin;
+	
+	aabbMax.setValue((btScalar)(vecInMax[0]) / (m_quantize.getX()),(btScalar)(vecInMax[1]) / (m_quantize.getY()),(btScalar)(vecInMax[2]) / (m_quantize.getZ()));
+	aabbMax += m_worldAabbMin;
+}
+
+
+
+
+template <typename BP_FP_INT_TYPE>
+btAxisSweep3Internal<BP_FP_INT_TYPE>::btAxisSweep3Internal(const btVector3& worldAabbMin,const btVector3& worldAabbMax, BP_FP_INT_TYPE handleMask, BP_FP_INT_TYPE handleSentinel,BP_FP_INT_TYPE userMaxHandles, btOverlappingPairCache* pairCache , bool disableRaycastAccelerator)
+:m_bpHandleMask(handleMask),
+m_handleSentinel(handleSentinel),
+m_pairCache(pairCache),
+m_userPairCallback(0),
+m_ownsPairCache(false),
+m_invalidPair(0),
+m_raycastAccelerator(0)
+{
+	BP_FP_INT_TYPE maxHandles = static_cast<BP_FP_INT_TYPE>(userMaxHandles+1);//need to add one sentinel handle
+
+	if (!m_pairCache)
+	{
+		void* ptr = btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16);
+		m_pairCache = new(ptr) btHashedOverlappingPairCache();
+		m_ownsPairCache = true;
+	}
+
+	if (!disableRaycastAccelerator)
+	{
+		m_nullPairCache = new (btAlignedAlloc(sizeof(btNullPairCache),16)) btNullPairCache();
+		m_raycastAccelerator = new (btAlignedAlloc(sizeof(btDbvtBroadphase),16)) btDbvtBroadphase(m_nullPairCache);//m_pairCache);
+		m_raycastAccelerator->m_deferedcollide = true;//don't add/remove pairs
+	}
+
+	//btAssert(bounds.HasVolume());
+
+	// init bounds
+	m_worldAabbMin = worldAabbMin;
+	m_worldAabbMax = worldAabbMax;
+
+	btVector3 aabbSize = m_worldAabbMax - m_worldAabbMin;
+
+	BP_FP_INT_TYPE	maxInt = m_handleSentinel;
+
+	m_quantize = btVector3(btScalar(maxInt),btScalar(maxInt),btScalar(maxInt)) / aabbSize;
+
+	// allocate handles buffer, using btAlignedAlloc, and put all handles on free list
+	m_pHandles = new Handle[maxHandles];
+	
+	m_maxHandles = maxHandles;
+	m_numHandles = 0;
+
+	// handle 0 is reserved as the null index, and is also used as the sentinel
+	m_firstFreeHandle = 1;
+	{
+		for (BP_FP_INT_TYPE i = m_firstFreeHandle; i < maxHandles; i++)
+			m_pHandles[i].SetNextFree(static_cast<BP_FP_INT_TYPE>(i + 1));
+		m_pHandles[maxHandles - 1].SetNextFree(0);
+	}
+
+	{
+		// allocate edge buffers
+		for (int i = 0; i < 3; i++)
+		{
+			m_pEdgesRawPtr[i] = btAlignedAlloc(sizeof(Edge)*maxHandles*2,16);
+			m_pEdges[i] = new(m_pEdgesRawPtr[i]) Edge[maxHandles * 2];
+		}
+	}
+	//removed overlap management
+
+	// make boundary sentinels
+	
+	m_pHandles[0].m_clientObject = 0;
+
+	for (int axis = 0; axis < 3; axis++)
+	{
+		m_pHandles[0].m_minEdges[axis] = 0;
+		m_pHandles[0].m_maxEdges[axis] = 1;
+
+		m_pEdges[axis][0].m_pos = 0;
+		m_pEdges[axis][0].m_handle = 0;
+		m_pEdges[axis][1].m_pos = m_handleSentinel;
+		m_pEdges[axis][1].m_handle = 0;
+#ifdef DEBUG_BROADPHASE
+		debugPrintAxis(axis);
+#endif //DEBUG_BROADPHASE
+
+	}
+
+}
+
+template <typename BP_FP_INT_TYPE>
+btAxisSweep3Internal<BP_FP_INT_TYPE>::~btAxisSweep3Internal()
+{
+	if (m_raycastAccelerator)
+	{
+		m_nullPairCache->~btOverlappingPairCache();
+		btAlignedFree(m_nullPairCache);
+		m_raycastAccelerator->~btDbvtBroadphase();
+		btAlignedFree (m_raycastAccelerator);
+	}
+
+	for (int i = 2; i >= 0; i--)
+	{
+		btAlignedFree(m_pEdgesRawPtr[i]);
+	}
+	delete [] m_pHandles;
+
+	if (m_ownsPairCache)
+	{
+		m_pairCache->~btOverlappingPairCache();
+		btAlignedFree(m_pairCache);
+	}
+}
+
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::quantize(BP_FP_INT_TYPE* out, const btVector3& point, int isMax) const
+{
+#ifdef OLD_CLAMPING_METHOD
+	///problem with this clamping method is that the floating point during quantization might still go outside the range [(0|isMax) .. (m_handleSentinel&m_bpHandleMask]|isMax]
+	///see http://code.google.com/p/bullet/issues/detail?id=87
+	btVector3 clampedPoint(point);
+	clampedPoint.setMax(m_worldAabbMin);
+	clampedPoint.setMin(m_worldAabbMax);
+	btVector3 v = (clampedPoint - m_worldAabbMin) * m_quantize;
+	out[0] = (BP_FP_INT_TYPE)(((BP_FP_INT_TYPE)v.getX() & m_bpHandleMask) | isMax);
+	out[1] = (BP_FP_INT_TYPE)(((BP_FP_INT_TYPE)v.getY() & m_bpHandleMask) | isMax);
+	out[2] = (BP_FP_INT_TYPE)(((BP_FP_INT_TYPE)v.getZ() & m_bpHandleMask) | isMax);
+#else
+	btVector3 v = (point - m_worldAabbMin) * m_quantize;
+	out[0]=(v[0]<=0)?(BP_FP_INT_TYPE)isMax:(v[0]>=m_handleSentinel)?(BP_FP_INT_TYPE)((m_handleSentinel&m_bpHandleMask)|isMax):(BP_FP_INT_TYPE)(((BP_FP_INT_TYPE)v[0]&m_bpHandleMask)|isMax);
+	out[1]=(v[1]<=0)?(BP_FP_INT_TYPE)isMax:(v[1]>=m_handleSentinel)?(BP_FP_INT_TYPE)((m_handleSentinel&m_bpHandleMask)|isMax):(BP_FP_INT_TYPE)(((BP_FP_INT_TYPE)v[1]&m_bpHandleMask)|isMax);
+	out[2]=(v[2]<=0)?(BP_FP_INT_TYPE)isMax:(v[2]>=m_handleSentinel)?(BP_FP_INT_TYPE)((m_handleSentinel&m_bpHandleMask)|isMax):(BP_FP_INT_TYPE)(((BP_FP_INT_TYPE)v[2]&m_bpHandleMask)|isMax);
+#endif //OLD_CLAMPING_METHOD
+}
+
+
+template <typename BP_FP_INT_TYPE>
+BP_FP_INT_TYPE btAxisSweep3Internal<BP_FP_INT_TYPE>::allocHandle()
+{
+	btAssert(m_firstFreeHandle);
+
+	BP_FP_INT_TYPE handle = m_firstFreeHandle;
+	m_firstFreeHandle = getHandle(handle)->GetNextFree();
+	m_numHandles++;
+
+	return handle;
+}
+
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::freeHandle(BP_FP_INT_TYPE handle)
+{
+	btAssert(handle > 0 && handle < m_maxHandles);
+
+	getHandle(handle)->SetNextFree(m_firstFreeHandle);
+	m_firstFreeHandle = handle;
+
+	m_numHandles--;
+}
+
+
+template <typename BP_FP_INT_TYPE>
+BP_FP_INT_TYPE btAxisSweep3Internal<BP_FP_INT_TYPE>::addHandle(const btVector3& aabbMin,const btVector3& aabbMax, void* pOwner,short int collisionFilterGroup,short int collisionFilterMask,btDispatcher* dispatcher,void* multiSapProxy)
+{
+	// quantize the bounds
+	BP_FP_INT_TYPE min[3], max[3];
+	quantize(min, aabbMin, 0);
+	quantize(max, aabbMax, 1);
+
+	// allocate a handle
+	BP_FP_INT_TYPE handle = allocHandle();
+	
+
+	Handle* pHandle = getHandle(handle);
+	
+	pHandle->m_uniqueId = static_cast<int>(handle);
+	//pHandle->m_pOverlaps = 0;
+	pHandle->m_clientObject = pOwner;
+	pHandle->m_collisionFilterGroup = collisionFilterGroup;
+	pHandle->m_collisionFilterMask = collisionFilterMask;
+	pHandle->m_multiSapParentProxy = multiSapProxy;
+
+	// compute current limit of edge arrays
+	BP_FP_INT_TYPE limit = static_cast<BP_FP_INT_TYPE>(m_numHandles * 2);
+
+	
+	// insert new edges just inside the max boundary edge
+	for (BP_FP_INT_TYPE axis = 0; axis < 3; axis++)
+	{
+
+		m_pHandles[0].m_maxEdges[axis] += 2;
+
+		m_pEdges[axis][limit + 1] = m_pEdges[axis][limit - 1];
+
+		m_pEdges[axis][limit - 1].m_pos = min[axis];
+		m_pEdges[axis][limit - 1].m_handle = handle;
+
+		m_pEdges[axis][limit].m_pos = max[axis];
+		m_pEdges[axis][limit].m_handle = handle;
+
+		pHandle->m_minEdges[axis] = static_cast<BP_FP_INT_TYPE>(limit - 1);
+		pHandle->m_maxEdges[axis] = limit;
+	}
+
+	// now sort the new edges to their correct position
+	sortMinDown(0, pHandle->m_minEdges[0], dispatcher,false);
+	sortMaxDown(0, pHandle->m_maxEdges[0], dispatcher,false);
+	sortMinDown(1, pHandle->m_minEdges[1], dispatcher,false);
+	sortMaxDown(1, pHandle->m_maxEdges[1], dispatcher,false);
+	sortMinDown(2, pHandle->m_minEdges[2], dispatcher,true);
+	sortMaxDown(2, pHandle->m_maxEdges[2], dispatcher,true);
+
+
+	return handle;
+}
+
+
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::removeHandle(BP_FP_INT_TYPE handle,btDispatcher* dispatcher)
+{
+
+	Handle* pHandle = getHandle(handle);
+
+	//explicitly remove the pairs containing the proxy
+	//we could do it also in the sortMinUp (passing true)
+	///@todo: compare performance
+	if (!m_pairCache->hasDeferredRemoval())
+	{
+		m_pairCache->removeOverlappingPairsContainingProxy(pHandle,dispatcher);
+	}
+
+	// compute current limit of edge arrays
+	int limit = static_cast<int>(m_numHandles * 2);
+	
+	int axis;
+
+	for (axis = 0;axis<3;axis++)
+	{
+		m_pHandles[0].m_maxEdges[axis] -= 2;
+	}
+
+	// remove the edges by sorting them up to the end of the list
+	for ( axis = 0; axis < 3; axis++)
+	{
+		Edge* pEdges = m_pEdges[axis];
+		BP_FP_INT_TYPE max = pHandle->m_maxEdges[axis];
+		pEdges[max].m_pos = m_handleSentinel;
+
+		sortMaxUp(axis,max,dispatcher,false);
+
+
+		BP_FP_INT_TYPE i = pHandle->m_minEdges[axis];
+		pEdges[i].m_pos = m_handleSentinel;
+
+
+		sortMinUp(axis,i,dispatcher,false);
+
+		pEdges[limit-1].m_handle = 0;
+		pEdges[limit-1].m_pos = m_handleSentinel;
+		
+#ifdef DEBUG_BROADPHASE
+			debugPrintAxis(axis,false);
+#endif //DEBUG_BROADPHASE
+
+
+	}
+
+
+	// free the handle
+	freeHandle(handle);
+
+	
+}
+
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::resetPool(btDispatcher* dispatcher)
+{
+	if (m_numHandles == 0)
+	{
+		m_firstFreeHandle = 1;
+		{
+			for (BP_FP_INT_TYPE i = m_firstFreeHandle; i < m_maxHandles; i++)
+				m_pHandles[i].SetNextFree(static_cast<BP_FP_INT_TYPE>(i + 1));
+			m_pHandles[m_maxHandles - 1].SetNextFree(0);
+		}
+	}
+}       
+
+
+extern int gOverlappingPairs;
+//#include <stdio.h>
+
+template <typename BP_FP_INT_TYPE>
+void	btAxisSweep3Internal<BP_FP_INT_TYPE>::calculateOverlappingPairs(btDispatcher* dispatcher)
+{
+
+	if (m_pairCache->hasDeferredRemoval())
+	{
+	
+		btBroadphasePairArray&	overlappingPairArray = m_pairCache->getOverlappingPairArray();
+
+		//perform a sort, to find duplicates and to sort 'invalid' pairs to the end
+		overlappingPairArray.quickSort(btBroadphasePairSortPredicate());
+
+		overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair);
+		m_invalidPair = 0;
+
+		
+		int i;
+
+		btBroadphasePair previousPair;
+		previousPair.m_pProxy0 = 0;
+		previousPair.m_pProxy1 = 0;
+		previousPair.m_algorithm = 0;
+		
+		
+		for (i=0;i<overlappingPairArray.size();i++)
+		{
+		
+			btBroadphasePair& pair = overlappingPairArray[i];
+
+			bool isDuplicate = (pair == previousPair);
+
+			previousPair = pair;
+
+			bool needsRemoval = false;
+
+			if (!isDuplicate)
+			{
+				///important to use an AABB test that is consistent with the broadphase
+				bool hasOverlap = testAabbOverlap(pair.m_pProxy0,pair.m_pProxy1);
+
+				if (hasOverlap)
+				{
+					needsRemoval = false;//callback->processOverlap(pair);
+				} else
+				{
+					needsRemoval = true;
+				}
+			} else
+			{
+				//remove duplicate
+				needsRemoval = true;
+				//should have no algorithm
+				btAssert(!pair.m_algorithm);
+			}
+			
+			if (needsRemoval)
+			{
+				m_pairCache->cleanOverlappingPair(pair,dispatcher);
+
+		//		m_overlappingPairArray.swap(i,m_overlappingPairArray.size()-1);
+		//		m_overlappingPairArray.pop_back();
+				pair.m_pProxy0 = 0;
+				pair.m_pProxy1 = 0;
+				m_invalidPair++;
+				gOverlappingPairs--;
+			} 
+			
+		}
+
+	///if you don't like to skip the invalid pairs in the array, execute following code:
+	#define CLEAN_INVALID_PAIRS 1
+	#ifdef CLEAN_INVALID_PAIRS
+
+		//perform a sort, to sort 'invalid' pairs to the end
+		overlappingPairArray.quickSort(btBroadphasePairSortPredicate());
+
+		overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair);
+		m_invalidPair = 0;
+	#endif//CLEAN_INVALID_PAIRS
+		
+		//printf("overlappingPairArray.size()=%d\n",overlappingPairArray.size());
+	}
+
+}
+
+
+template <typename BP_FP_INT_TYPE>
+bool btAxisSweep3Internal<BP_FP_INT_TYPE>::testAabbOverlap(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1)
+{
+	const Handle* pHandleA = static_cast<Handle*>(proxy0);
+	const Handle* pHandleB = static_cast<Handle*>(proxy1);
+	
+	//optimization 1: check the array index (memory address), instead of the m_pos
+
+	for (int axis = 0; axis < 3; axis++)
+	{ 
+		if (pHandleA->m_maxEdges[axis] < pHandleB->m_minEdges[axis] || 
+			pHandleB->m_maxEdges[axis] < pHandleA->m_minEdges[axis]) 
+		{ 
+			return false; 
+		} 
+	} 
+	return true;
+}
+
+template <typename BP_FP_INT_TYPE>
+bool btAxisSweep3Internal<BP_FP_INT_TYPE>::testOverlap2D(const Handle* pHandleA, const Handle* pHandleB,int axis0,int axis1)
+{
+	//optimization 1: check the array index (memory address), instead of the m_pos
+
+	if (pHandleA->m_maxEdges[axis0] < pHandleB->m_minEdges[axis0] || 
+		pHandleB->m_maxEdges[axis0] < pHandleA->m_minEdges[axis0] ||
+		pHandleA->m_maxEdges[axis1] < pHandleB->m_minEdges[axis1] ||
+		pHandleB->m_maxEdges[axis1] < pHandleA->m_minEdges[axis1]) 
+	{ 
+		return false; 
+	} 
+	return true;
+}
+
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::updateHandle(BP_FP_INT_TYPE handle, const btVector3& aabbMin,const btVector3& aabbMax,btDispatcher* dispatcher)
+{
+//	btAssert(bounds.IsFinite());
+	//btAssert(bounds.HasVolume());
+
+	Handle* pHandle = getHandle(handle);
+
+	// quantize the new bounds
+	BP_FP_INT_TYPE min[3], max[3];
+	quantize(min, aabbMin, 0);
+	quantize(max, aabbMax, 1);
+
+	// update changed edges
+	for (int axis = 0; axis < 3; axis++)
+	{
+		BP_FP_INT_TYPE emin = pHandle->m_minEdges[axis];
+		BP_FP_INT_TYPE emax = pHandle->m_maxEdges[axis];
+
+		int dmin = (int)min[axis] - (int)m_pEdges[axis][emin].m_pos;
+		int dmax = (int)max[axis] - (int)m_pEdges[axis][emax].m_pos;
+
+		m_pEdges[axis][emin].m_pos = min[axis];
+		m_pEdges[axis][emax].m_pos = max[axis];
+
+		// expand (only adds overlaps)
+		if (dmin < 0)
+			sortMinDown(axis, emin,dispatcher,true);
+
+		if (dmax > 0)
+			sortMaxUp(axis, emax,dispatcher,true);
+
+		// shrink (only removes overlaps)
+		if (dmin > 0)
+			sortMinUp(axis, emin,dispatcher,true);
+
+		if (dmax < 0)
+			sortMaxDown(axis, emax,dispatcher,true);
+
+#ifdef DEBUG_BROADPHASE
+	debugPrintAxis(axis);
+#endif //DEBUG_BROADPHASE
+	}
+
+	
+}
+
+
+
+
+// sorting a min edge downwards can only ever *add* overlaps
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::sortMinDown(int axis, BP_FP_INT_TYPE edge, btDispatcher* /* dispatcher */, bool updateOverlaps)
+{
+
+	Edge* pEdge = m_pEdges[axis] + edge;
+	Edge* pPrev = pEdge - 1;
+	Handle* pHandleEdge = getHandle(pEdge->m_handle);
+
+	while (pEdge->m_pos < pPrev->m_pos)
+	{
+		Handle* pHandlePrev = getHandle(pPrev->m_handle);
+
+		if (pPrev->IsMax())
+		{
+			// if previous edge is a maximum check the bounds and add an overlap if necessary
+			const int axis1 = (1  << axis) & 3;
+			const int axis2 = (1  << axis1) & 3;
+			if (updateOverlaps && testOverlap2D(pHandleEdge, pHandlePrev,axis1,axis2))
+			{
+				m_pairCache->addOverlappingPair(pHandleEdge,pHandlePrev);
+				if (m_userPairCallback)
+					m_userPairCallback->addOverlappingPair(pHandleEdge,pHandlePrev);
+
+				//AddOverlap(pEdge->m_handle, pPrev->m_handle);
+
+			}
+
+			// update edge reference in other handle
+			pHandlePrev->m_maxEdges[axis]++;
+		}
+		else
+			pHandlePrev->m_minEdges[axis]++;
+
+		pHandleEdge->m_minEdges[axis]--;
+
+		// swap the edges
+		Edge swap = *pEdge;
+		*pEdge = *pPrev;
+		*pPrev = swap;
+
+		// decrement
+		pEdge--;
+		pPrev--;
+	}
+
+#ifdef DEBUG_BROADPHASE
+	debugPrintAxis(axis);
+#endif //DEBUG_BROADPHASE
+
+}
+
+// sorting a min edge upwards can only ever *remove* overlaps
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::sortMinUp(int axis, BP_FP_INT_TYPE edge, btDispatcher* dispatcher, bool updateOverlaps)
+{
+	Edge* pEdge = m_pEdges[axis] + edge;
+	Edge* pNext = pEdge + 1;
+	Handle* pHandleEdge = getHandle(pEdge->m_handle);
+
+	while (pNext->m_handle && (pEdge->m_pos >= pNext->m_pos))
+	{
+		Handle* pHandleNext = getHandle(pNext->m_handle);
+
+		if (pNext->IsMax())
+		{
+			Handle* handle0 = getHandle(pEdge->m_handle);
+			Handle* handle1 = getHandle(pNext->m_handle);
+			const int axis1 = (1  << axis) & 3;
+			const int axis2 = (1  << axis1) & 3;
+			
+			// if next edge is maximum remove any overlap between the two handles
+			if (updateOverlaps 
+#ifdef USE_OVERLAP_TEST_ON_REMOVES
+				&& testOverlap2D(handle0,handle1,axis1,axis2)
+#endif //USE_OVERLAP_TEST_ON_REMOVES
+				)
+			{
+				
+
+				m_pairCache->removeOverlappingPair(handle0,handle1,dispatcher);	
+				if (m_userPairCallback)
+					m_userPairCallback->removeOverlappingPair(handle0,handle1,dispatcher);
+				
+			}
+
+
+			// update edge reference in other handle
+			pHandleNext->m_maxEdges[axis]--;
+		}
+		else
+			pHandleNext->m_minEdges[axis]--;
+
+		pHandleEdge->m_minEdges[axis]++;
+
+		// swap the edges
+		Edge swap = *pEdge;
+		*pEdge = *pNext;
+		*pNext = swap;
+
+		// increment
+		pEdge++;
+		pNext++;
+	}
+
+
+}
+
+// sorting a max edge downwards can only ever *remove* overlaps
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::sortMaxDown(int axis, BP_FP_INT_TYPE edge, btDispatcher* dispatcher, bool updateOverlaps)
+{
+
+	Edge* pEdge = m_pEdges[axis] + edge;
+	Edge* pPrev = pEdge - 1;
+	Handle* pHandleEdge = getHandle(pEdge->m_handle);
+
+	while (pEdge->m_pos < pPrev->m_pos)
+	{
+		Handle* pHandlePrev = getHandle(pPrev->m_handle);
+
+		if (!pPrev->IsMax())
+		{
+			// if previous edge was a minimum remove any overlap between the two handles
+			Handle* handle0 = getHandle(pEdge->m_handle);
+			Handle* handle1 = getHandle(pPrev->m_handle);
+			const int axis1 = (1  << axis) & 3;
+			const int axis2 = (1  << axis1) & 3;
+
+			if (updateOverlaps  
+#ifdef USE_OVERLAP_TEST_ON_REMOVES
+				&& testOverlap2D(handle0,handle1,axis1,axis2)
+#endif //USE_OVERLAP_TEST_ON_REMOVES
+				)
+			{
+				//this is done during the overlappingpairarray iteration/narrowphase collision
+
+				
+				m_pairCache->removeOverlappingPair(handle0,handle1,dispatcher);
+				if (m_userPairCallback)
+					m_userPairCallback->removeOverlappingPair(handle0,handle1,dispatcher);
+			
+
+
+			}
+
+			// update edge reference in other handle
+			pHandlePrev->m_minEdges[axis]++;;
+		}
+		else
+			pHandlePrev->m_maxEdges[axis]++;
+
+		pHandleEdge->m_maxEdges[axis]--;
+
+		// swap the edges
+		Edge swap = *pEdge;
+		*pEdge = *pPrev;
+		*pPrev = swap;
+
+		// decrement
+		pEdge--;
+		pPrev--;
+	}
+
+	
+#ifdef DEBUG_BROADPHASE
+	debugPrintAxis(axis);
+#endif //DEBUG_BROADPHASE
+
+}
+
+// sorting a max edge upwards can only ever *add* overlaps
+template <typename BP_FP_INT_TYPE>
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::sortMaxUp(int axis, BP_FP_INT_TYPE edge, btDispatcher* /* dispatcher */, bool updateOverlaps)
+{
+	Edge* pEdge = m_pEdges[axis] + edge;
+	Edge* pNext = pEdge + 1;
+	Handle* pHandleEdge = getHandle(pEdge->m_handle);
+
+	while (pNext->m_handle && (pEdge->m_pos >= pNext->m_pos))
+	{
+		Handle* pHandleNext = getHandle(pNext->m_handle);
+
+		const int axis1 = (1  << axis) & 3;
+		const int axis2 = (1  << axis1) & 3;
+
+		if (!pNext->IsMax())
+		{
+			// if next edge is a minimum check the bounds and add an overlap if necessary
+			if (updateOverlaps && testOverlap2D(pHandleEdge, pHandleNext,axis1,axis2))
+			{
+				Handle* handle0 = getHandle(pEdge->m_handle);
+				Handle* handle1 = getHandle(pNext->m_handle);
+				m_pairCache->addOverlappingPair(handle0,handle1);
+				if (m_userPairCallback)
+					m_userPairCallback->addOverlappingPair(handle0,handle1);
+			}
+
+			// update edge reference in other handle
+			pHandleNext->m_minEdges[axis]--;
+		}
+		else
+			pHandleNext->m_maxEdges[axis]--;
+
+		pHandleEdge->m_maxEdges[axis]++;
+
+		// swap the edges
+		Edge swap = *pEdge;
+		*pEdge = *pNext;
+		*pNext = swap;
+
+		// increment
+		pEdge++;
+		pNext++;
+	}
+	
+}
+
+
+
+////////////////////////////////////////////////////////////////////
+
+
+/// The btAxisSweep3 is an efficient implementation of the 3d axis sweep and prune broadphase.
+/// It uses arrays rather then lists for storage of the 3 axis. Also it operates using 16 bit integer coordinates instead of floats.
+/// For large worlds and many objects, use bt32BitAxisSweep3 or btDbvtBroadphase instead. bt32BitAxisSweep3 has higher precision and allows more then 16384 objects at the cost of more memory and bit of performance.
+class btAxisSweep3 : public btAxisSweep3Internal<unsigned short int>
+{
+public:
+
+	btAxisSweep3(const btVector3& worldAabbMin,const btVector3& worldAabbMax, unsigned short int maxHandles = 16384, btOverlappingPairCache* pairCache = 0, bool disableRaycastAccelerator = false);
+
+};
+
+/// The bt32BitAxisSweep3 allows higher precision quantization and more objects compared to the btAxisSweep3 sweep and prune.
+/// This comes at the cost of more memory per handle, and a bit slower performance.
+/// It uses arrays rather then lists for storage of the 3 axis.
+class bt32BitAxisSweep3 : public btAxisSweep3Internal<unsigned int>
+{
+public:
+
+	bt32BitAxisSweep3(const btVector3& worldAabbMin,const btVector3& worldAabbMax, unsigned int maxHandles = 1500000, btOverlappingPairCache* pairCache = 0, bool disableRaycastAccelerator = false);
+
+};
+
+#endif
+
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btBroadphaseInterface.h b/src/bullet/BulletCollision/BroadphaseCollision/btBroadphaseInterface.h
new file mode 100644
index 00000000..f1bf0059
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btBroadphaseInterface.h
@@ -0,0 +1,82 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef		BT_BROADPHASE_INTERFACE_H
+#define 	BT_BROADPHASE_INTERFACE_H
+
+
+
+struct btDispatcherInfo;
+class btDispatcher;
+#include "btBroadphaseProxy.h"
+
+class btOverlappingPairCache;
+
+
+
+struct	btBroadphaseAabbCallback
+{
+	virtual ~btBroadphaseAabbCallback() {}
+	virtual bool	process(const btBroadphaseProxy* proxy) = 0;
+};
+
+
+struct	btBroadphaseRayCallback : public btBroadphaseAabbCallback
+{
+	///added some cached data to accelerate ray-AABB tests
+	btVector3		m_rayDirectionInverse;
+	unsigned int	m_signs[3];
+	btScalar		m_lambda_max;
+
+	virtual ~btBroadphaseRayCallback() {}
+};
+
+#include "LinearMath/btVector3.h"
+
+///The btBroadphaseInterface class provides an interface to detect aabb-overlapping object pairs.
+///Some implementations for this broadphase interface include btAxisSweep3, bt32BitAxisSweep3 and btDbvtBroadphase.
+///The actual overlapping pair management, storage, adding and removing of pairs is dealt by the btOverlappingPairCache class.
+class btBroadphaseInterface
+{
+public:
+	virtual ~btBroadphaseInterface() {}
+
+	virtual btBroadphaseProxy*	createProxy(  const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr, short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy) =0;
+	virtual void	destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher)=0;
+	virtual void	setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax, btDispatcher* dispatcher)=0;
+	virtual void	getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const =0;
+
+	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin=btVector3(0,0,0), const btVector3& aabbMax = btVector3(0,0,0)) = 0;
+
+	virtual void	aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback) = 0;
+
+	///calculateOverlappingPairs is optional: incremental algorithms (sweep and prune) might do it during the set aabb
+	virtual void	calculateOverlappingPairs(btDispatcher* dispatcher)=0;
+
+	virtual	btOverlappingPairCache*	getOverlappingPairCache()=0;
+	virtual	const btOverlappingPairCache*	getOverlappingPairCache() const =0;
+
+	///getAabb returns the axis aligned bounding box in the 'global' coordinate frame
+	///will add some transform later
+	virtual void getBroadphaseAabb(btVector3& aabbMin,btVector3& aabbMax) const =0;
+
+	///reset broadphase internal structures, to ensure determinism/reproducability
+	virtual void resetPool(btDispatcher* dispatcher) { (void) dispatcher; };
+
+	virtual void	printStats() = 0;
+
+};
+
+#endif //BT_BROADPHASE_INTERFACE_H
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp b/src/bullet/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp
new file mode 100644
index 00000000..f4d7341f
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btBroadphaseProxy.cpp
@@ -0,0 +1,17 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btBroadphaseProxy.h"
+
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btBroadphaseProxy.h b/src/bullet/BulletCollision/BroadphaseCollision/btBroadphaseProxy.h
new file mode 100644
index 00000000..bb58b828
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btBroadphaseProxy.h
@@ -0,0 +1,270 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_BROADPHASE_PROXY_H
+#define BT_BROADPHASE_PROXY_H
+
+#include "LinearMath/btScalar.h" //for SIMD_FORCE_INLINE
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btAlignedAllocator.h"
+
+
+/// btDispatcher uses these types
+/// IMPORTANT NOTE:The types are ordered polyhedral, implicit convex and concave
+/// to facilitate type checking
+/// CUSTOM_POLYHEDRAL_SHAPE_TYPE,CUSTOM_CONVEX_SHAPE_TYPE and CUSTOM_CONCAVE_SHAPE_TYPE can be used to extend Bullet without modifying source code
+enum BroadphaseNativeTypes
+{
+	// polyhedral convex shapes
+	BOX_SHAPE_PROXYTYPE,
+	TRIANGLE_SHAPE_PROXYTYPE,
+	TETRAHEDRAL_SHAPE_PROXYTYPE,
+	CONVEX_TRIANGLEMESH_SHAPE_PROXYTYPE,
+	CONVEX_HULL_SHAPE_PROXYTYPE,
+	CONVEX_POINT_CLOUD_SHAPE_PROXYTYPE,
+	CUSTOM_POLYHEDRAL_SHAPE_TYPE,
+//implicit convex shapes
+IMPLICIT_CONVEX_SHAPES_START_HERE,
+	SPHERE_SHAPE_PROXYTYPE,
+	MULTI_SPHERE_SHAPE_PROXYTYPE,
+	CAPSULE_SHAPE_PROXYTYPE,
+	CONE_SHAPE_PROXYTYPE,
+	CONVEX_SHAPE_PROXYTYPE,
+	CYLINDER_SHAPE_PROXYTYPE,
+	UNIFORM_SCALING_SHAPE_PROXYTYPE,
+	MINKOWSKI_SUM_SHAPE_PROXYTYPE,
+	MINKOWSKI_DIFFERENCE_SHAPE_PROXYTYPE,
+	BOX_2D_SHAPE_PROXYTYPE,
+	CONVEX_2D_SHAPE_PROXYTYPE,
+	CUSTOM_CONVEX_SHAPE_TYPE,
+//concave shapes
+CONCAVE_SHAPES_START_HERE,
+	//keep all the convex shapetype below here, for the check IsConvexShape in broadphase proxy!
+	TRIANGLE_MESH_SHAPE_PROXYTYPE,
+	SCALED_TRIANGLE_MESH_SHAPE_PROXYTYPE,
+	///used for demo integration FAST/Swift collision library and Bullet
+	FAST_CONCAVE_MESH_PROXYTYPE,
+	//terrain
+	TERRAIN_SHAPE_PROXYTYPE,
+///Used for GIMPACT Trimesh integration
+	GIMPACT_SHAPE_PROXYTYPE,
+///Multimaterial mesh
+    MULTIMATERIAL_TRIANGLE_MESH_PROXYTYPE,
+	
+	EMPTY_SHAPE_PROXYTYPE,
+	STATIC_PLANE_PROXYTYPE,
+	CUSTOM_CONCAVE_SHAPE_TYPE,
+CONCAVE_SHAPES_END_HERE,
+
+	COMPOUND_SHAPE_PROXYTYPE,
+
+	SOFTBODY_SHAPE_PROXYTYPE,
+	HFFLUID_SHAPE_PROXYTYPE,
+	HFFLUID_BUOYANT_CONVEX_SHAPE_PROXYTYPE,
+	INVALID_SHAPE_PROXYTYPE,
+
+	MAX_BROADPHASE_COLLISION_TYPES
+	
+};
+
+
+///The btBroadphaseProxy is the main class that can be used with the Bullet broadphases. 
+///It stores collision shape type information, collision filter information and a client object, typically a btCollisionObject or btRigidBody.
+ATTRIBUTE_ALIGNED16(struct) btBroadphaseProxy
+{
+
+BT_DECLARE_ALIGNED_ALLOCATOR();
+	
+	///optional filtering to cull potential collisions
+	enum CollisionFilterGroups
+	{
+	        DefaultFilter = 1,
+	        StaticFilter = 2,
+	        KinematicFilter = 4,
+	        DebrisFilter = 8,
+			SensorTrigger = 16,
+			CharacterFilter = 32,
+	        AllFilter = -1 //all bits sets: DefaultFilter | StaticFilter | KinematicFilter | DebrisFilter | SensorTrigger
+	};
+
+	//Usually the client btCollisionObject or Rigidbody class
+	void*	m_clientObject;
+	short int m_collisionFilterGroup;
+	short int m_collisionFilterMask;
+	void*	m_multiSapParentProxy;		
+	int			m_uniqueId;//m_uniqueId is introduced for paircache. could get rid of this, by calculating the address offset etc.
+
+	btVector3	m_aabbMin;
+	btVector3	m_aabbMax;
+
+	SIMD_FORCE_INLINE int getUid() const
+	{
+		return m_uniqueId;
+	}
+
+	//used for memory pools
+	btBroadphaseProxy() :m_clientObject(0),m_multiSapParentProxy(0)
+	{
+	}
+
+	btBroadphaseProxy(const btVector3& aabbMin,const btVector3& aabbMax,void* userPtr,short int collisionFilterGroup, short int collisionFilterMask,void* multiSapParentProxy=0)
+		:m_clientObject(userPtr),
+		m_collisionFilterGroup(collisionFilterGroup),
+		m_collisionFilterMask(collisionFilterMask),
+		m_aabbMin(aabbMin),
+		m_aabbMax(aabbMax)
+	{
+		m_multiSapParentProxy = multiSapParentProxy;
+	}
+
+	
+
+	static SIMD_FORCE_INLINE bool isPolyhedral(int proxyType)
+	{
+		return (proxyType  < IMPLICIT_CONVEX_SHAPES_START_HERE);
+	}
+
+	static SIMD_FORCE_INLINE bool	isConvex(int proxyType)
+	{
+		return (proxyType < CONCAVE_SHAPES_START_HERE);
+	}
+
+	static SIMD_FORCE_INLINE bool	isNonMoving(int proxyType)
+	{
+		return (isConcave(proxyType) && !(proxyType==GIMPACT_SHAPE_PROXYTYPE));
+	}
+
+	static SIMD_FORCE_INLINE bool	isConcave(int proxyType)
+	{
+		return ((proxyType > CONCAVE_SHAPES_START_HERE) &&
+			(proxyType < CONCAVE_SHAPES_END_HERE));
+	}
+	static SIMD_FORCE_INLINE bool	isCompound(int proxyType)
+	{
+		return (proxyType == COMPOUND_SHAPE_PROXYTYPE);
+	}
+
+	static SIMD_FORCE_INLINE bool	isSoftBody(int proxyType)
+	{
+		return (proxyType == SOFTBODY_SHAPE_PROXYTYPE);
+	}
+
+	static SIMD_FORCE_INLINE bool isInfinite(int proxyType)
+	{
+		return (proxyType == STATIC_PLANE_PROXYTYPE);
+	}
+
+	static SIMD_FORCE_INLINE bool isConvex2d(int proxyType)
+	{
+		return (proxyType == BOX_2D_SHAPE_PROXYTYPE) ||	(proxyType == CONVEX_2D_SHAPE_PROXYTYPE);
+	}
+
+	
+}
+;
+
+class btCollisionAlgorithm;
+
+struct btBroadphaseProxy;
+
+
+
+///The btBroadphasePair class contains a pair of aabb-overlapping objects.
+///A btDispatcher can search a btCollisionAlgorithm that performs exact/narrowphase collision detection on the actual collision shapes.
+ATTRIBUTE_ALIGNED16(struct) btBroadphasePair
+{
+	btBroadphasePair ()
+		:
+	m_pProxy0(0),
+		m_pProxy1(0),
+		m_algorithm(0),
+		m_internalInfo1(0)
+	{
+	}
+
+BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	btBroadphasePair(const btBroadphasePair& other)
+		:		m_pProxy0(other.m_pProxy0),
+				m_pProxy1(other.m_pProxy1),
+				m_algorithm(other.m_algorithm),
+				m_internalInfo1(other.m_internalInfo1)
+	{
+	}
+	btBroadphasePair(btBroadphaseProxy& proxy0,btBroadphaseProxy& proxy1)
+	{
+
+		//keep them sorted, so the std::set operations work
+		if (proxy0.m_uniqueId < proxy1.m_uniqueId)
+        { 
+            m_pProxy0 = &proxy0; 
+            m_pProxy1 = &proxy1; 
+        }
+        else 
+        { 
+			m_pProxy0 = &proxy1; 
+            m_pProxy1 = &proxy0; 
+        }
+
+		m_algorithm = 0;
+		m_internalInfo1 = 0;
+
+	}
+	
+	btBroadphaseProxy* m_pProxy0;
+	btBroadphaseProxy* m_pProxy1;
+	
+	mutable btCollisionAlgorithm* m_algorithm;
+	union { void* m_internalInfo1; int m_internalTmpValue;};//don't use this data, it will be removed in future version.
+
+};
+
+/*
+//comparison for set operation, see Solid DT_Encounter
+SIMD_FORCE_INLINE bool operator<(const btBroadphasePair& a, const btBroadphasePair& b) 
+{ 
+    return a.m_pProxy0 < b.m_pProxy0 || 
+        (a.m_pProxy0 == b.m_pProxy0 && a.m_pProxy1 < b.m_pProxy1); 
+}
+*/
+
+
+
+class btBroadphasePairSortPredicate
+{
+	public:
+
+		bool operator() ( const btBroadphasePair& a, const btBroadphasePair& b ) const
+		{
+			const int uidA0 = a.m_pProxy0 ? a.m_pProxy0->m_uniqueId : -1;
+			const int uidB0 = b.m_pProxy0 ? b.m_pProxy0->m_uniqueId : -1;
+			const int uidA1 = a.m_pProxy1 ? a.m_pProxy1->m_uniqueId : -1;
+			const int uidB1 = b.m_pProxy1 ? b.m_pProxy1->m_uniqueId : -1;
+
+			 return uidA0 > uidB0 || 
+				(a.m_pProxy0 == b.m_pProxy0 && uidA1 > uidB1) ||
+				(a.m_pProxy0 == b.m_pProxy0 && a.m_pProxy1 == b.m_pProxy1 && a.m_algorithm > b.m_algorithm); 
+		}
+};
+
+
+SIMD_FORCE_INLINE bool operator==(const btBroadphasePair& a, const btBroadphasePair& b) 
+{
+	 return (a.m_pProxy0 == b.m_pProxy0) && (a.m_pProxy1 == b.m_pProxy1);
+}
+
+
+#endif //BT_BROADPHASE_PROXY_H
+
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.cpp b/src/bullet/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.cpp
new file mode 100644
index 00000000..c95d1be0
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.cpp
@@ -0,0 +1,23 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btCollisionAlgorithm.h"
+#include "btDispatcher.h"
+
+btCollisionAlgorithm::btCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
+{
+	m_dispatcher = ci.m_dispatcher1;
+}
+
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h b/src/bullet/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h
new file mode 100644
index 00000000..36eec971
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h
@@ -0,0 +1,80 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_COLLISION_ALGORITHM_H
+#define BT_COLLISION_ALGORITHM_H
+
+#include "LinearMath/btScalar.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+struct btBroadphaseProxy;
+class btDispatcher;
+class btManifoldResult;
+class btCollisionObject;
+struct btDispatcherInfo;
+class	btPersistentManifold;
+
+typedef btAlignedObjectArray<btPersistentManifold*>	btManifoldArray;
+
+struct btCollisionAlgorithmConstructionInfo
+{
+	btCollisionAlgorithmConstructionInfo()
+		:m_dispatcher1(0),
+		m_manifold(0)
+	{
+	}
+	btCollisionAlgorithmConstructionInfo(btDispatcher* dispatcher,int temp)
+		:m_dispatcher1(dispatcher)
+	{
+		(void)temp;
+	}
+
+	btDispatcher*	m_dispatcher1;
+	btPersistentManifold*	m_manifold;
+
+//	int	getDispatcherId();
+
+};
+
+
+///btCollisionAlgorithm is an collision interface that is compatible with the Broadphase and btDispatcher.
+///It is persistent over frames
+class btCollisionAlgorithm
+{
+
+protected:
+
+	btDispatcher*	m_dispatcher;
+
+protected:
+//	int	getDispatcherId();
+	
+public:
+
+	btCollisionAlgorithm() {};
+
+	btCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci);
+
+	virtual ~btCollisionAlgorithm() {};
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut) = 0;
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut) = 0;
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray) = 0;
+};
+
+
+#endif //BT_COLLISION_ALGORITHM_H
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btDbvt.cpp b/src/bullet/BulletCollision/BroadphaseCollision/btDbvt.cpp
new file mode 100644
index 00000000..95443af5
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btDbvt.cpp
@@ -0,0 +1,1295 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///btDbvt implementation by Nathanael Presson
+
+#include "btDbvt.h"
+
+//
+typedef btAlignedObjectArray<btDbvtNode*>			tNodeArray;
+typedef btAlignedObjectArray<const btDbvtNode*>	tConstNodeArray;
+
+//
+struct btDbvtNodeEnumerator : btDbvt::ICollide
+{
+	tConstNodeArray	nodes;
+	void Process(const btDbvtNode* n) { nodes.push_back(n); }
+};
+
+//
+static DBVT_INLINE int			indexof(const btDbvtNode* node)
+{
+	return(node->parent->childs[1]==node);
+}
+
+//
+static DBVT_INLINE btDbvtVolume	merge(	const btDbvtVolume& a,
+									  const btDbvtVolume& b)
+{
+#if (DBVT_MERGE_IMPL==DBVT_IMPL_SSE)
+	ATTRIBUTE_ALIGNED16(char locals[sizeof(btDbvtAabbMm)]);
+	btDbvtVolume&	res=*(btDbvtVolume*)locals;
+#else
+		btDbvtVolume	res;
+#endif
+	Merge(a,b,res);
+	return(res);
+}
+
+// volume+edge lengths
+static DBVT_INLINE btScalar		size(const btDbvtVolume& a)
+{
+	const btVector3	edges=a.Lengths();
+	return(	edges.x()*edges.y()*edges.z()+
+		edges.x()+edges.y()+edges.z());
+}
+
+//
+static void						getmaxdepth(const btDbvtNode* node,int depth,int& maxdepth)
+{
+	if(node->isinternal())
+	{
+		getmaxdepth(node->childs[0],depth+1,maxdepth);
+		getmaxdepth(node->childs[1],depth+1,maxdepth);
+	} else maxdepth=btMax(maxdepth,depth);
+}
+
+//
+static DBVT_INLINE void			deletenode(	btDbvt* pdbvt,
+										   btDbvtNode* node)
+{
+	btAlignedFree(pdbvt->m_free);
+	pdbvt->m_free=node;
+}
+
+//
+static void						recursedeletenode(	btDbvt* pdbvt,
+												  btDbvtNode* node)
+{
+	if(!node->isleaf())
+	{
+		recursedeletenode(pdbvt,node->childs[0]);
+		recursedeletenode(pdbvt,node->childs[1]);
+	}
+	if(node==pdbvt->m_root) pdbvt->m_root=0;
+	deletenode(pdbvt,node);
+}
+
+//
+static DBVT_INLINE btDbvtNode*	createnode(	btDbvt* pdbvt,
+										   btDbvtNode* parent,
+										   void* data)
+{
+	btDbvtNode*	node;
+	if(pdbvt->m_free)
+	{ node=pdbvt->m_free;pdbvt->m_free=0; }
+	else
+	{ node=new(btAlignedAlloc(sizeof(btDbvtNode),16)) btDbvtNode(); }
+	node->parent	=	parent;
+	node->data		=	data;
+	node->childs[1]	=	0;
+	return(node);
+}
+
+//
+static DBVT_INLINE btDbvtNode*	createnode(	btDbvt* pdbvt,
+										   btDbvtNode* parent,
+										   const btDbvtVolume& volume,
+										   void* data)
+{
+	btDbvtNode*	node=createnode(pdbvt,parent,data);
+	node->volume=volume;
+	return(node);
+}
+
+//
+static DBVT_INLINE btDbvtNode*	createnode(	btDbvt* pdbvt,
+										   btDbvtNode* parent,
+										   const btDbvtVolume& volume0,
+										   const btDbvtVolume& volume1,
+										   void* data)
+{
+	btDbvtNode*	node=createnode(pdbvt,parent,data);
+	Merge(volume0,volume1,node->volume);
+	return(node);
+}
+
+//
+static void						insertleaf(	btDbvt* pdbvt,
+										   btDbvtNode* root,
+										   btDbvtNode* leaf)
+{
+	if(!pdbvt->m_root)
+	{
+		pdbvt->m_root	=	leaf;
+		leaf->parent	=	0;
+	}
+	else
+	{
+		if(!root->isleaf())
+		{
+			do	{
+				root=root->childs[Select(	leaf->volume,
+					root->childs[0]->volume,
+					root->childs[1]->volume)];
+			} while(!root->isleaf());
+		}
+		btDbvtNode*	prev=root->parent;
+		btDbvtNode*	node=createnode(pdbvt,prev,leaf->volume,root->volume,0);
+		if(prev)
+		{
+			prev->childs[indexof(root)]	=	node;
+			node->childs[0]				=	root;root->parent=node;
+			node->childs[1]				=	leaf;leaf->parent=node;
+			do	{
+				if(!prev->volume.Contain(node->volume))
+					Merge(prev->childs[0]->volume,prev->childs[1]->volume,prev->volume);
+				else
+					break;
+				node=prev;
+			} while(0!=(prev=node->parent));
+		}
+		else
+		{
+			node->childs[0]	=	root;root->parent=node;
+			node->childs[1]	=	leaf;leaf->parent=node;
+			pdbvt->m_root	=	node;
+		}
+	}
+}
+
+//
+static btDbvtNode*				removeleaf(	btDbvt* pdbvt,
+										   btDbvtNode* leaf)
+{
+	if(leaf==pdbvt->m_root)
+	{
+		pdbvt->m_root=0;
+		return(0);
+	}
+	else
+	{
+		btDbvtNode*	parent=leaf->parent;
+		btDbvtNode*	prev=parent->parent;
+		btDbvtNode*	sibling=parent->childs[1-indexof(leaf)];			
+		if(prev)
+		{
+			prev->childs[indexof(parent)]=sibling;
+			sibling->parent=prev;
+			deletenode(pdbvt,parent);
+			while(prev)
+			{
+				const btDbvtVolume	pb=prev->volume;
+				Merge(prev->childs[0]->volume,prev->childs[1]->volume,prev->volume);
+				if(NotEqual(pb,prev->volume))
+				{
+					prev=prev->parent;
+				} else break;
+			}
+			return(prev?prev:pdbvt->m_root);
+		}
+		else
+		{								
+			pdbvt->m_root=sibling;
+			sibling->parent=0;
+			deletenode(pdbvt,parent);
+			return(pdbvt->m_root);
+		}			
+	}
+}
+
+//
+static void						fetchleaves(btDbvt* pdbvt,
+											btDbvtNode* root,
+											tNodeArray& leaves,
+											int depth=-1)
+{
+	if(root->isinternal()&&depth)
+	{
+		fetchleaves(pdbvt,root->childs[0],leaves,depth-1);
+		fetchleaves(pdbvt,root->childs[1],leaves,depth-1);
+		deletenode(pdbvt,root);
+	}
+	else
+	{
+		leaves.push_back(root);
+	}
+}
+
+//
+static void						split(	const tNodeArray& leaves,
+									  tNodeArray& left,
+									  tNodeArray& right,
+									  const btVector3& org,
+									  const btVector3& axis)
+{
+	left.resize(0);
+	right.resize(0);
+	for(int i=0,ni=leaves.size();i<ni;++i)
+	{
+		if(btDot(axis,leaves[i]->volume.Center()-org)<0)
+			left.push_back(leaves[i]);
+		else
+			right.push_back(leaves[i]);
+	}
+}
+
+//
+static btDbvtVolume				bounds(	const tNodeArray& leaves)
+{
+#if DBVT_MERGE_IMPL==DBVT_IMPL_SSE
+	ATTRIBUTE_ALIGNED16(char	locals[sizeof(btDbvtVolume)]);
+	btDbvtVolume&	volume=*(btDbvtVolume*)locals;
+	volume=leaves[0]->volume;
+#else
+	btDbvtVolume volume=leaves[0]->volume;
+#endif
+	for(int i=1,ni=leaves.size();i<ni;++i)
+	{
+		Merge(volume,leaves[i]->volume,volume);
+	}
+	return(volume);
+}
+
+//
+static void						bottomup(	btDbvt* pdbvt,
+										 tNodeArray& leaves)
+{
+	while(leaves.size()>1)
+	{
+		btScalar	minsize=SIMD_INFINITY;
+		int			minidx[2]={-1,-1};
+		for(int i=0;i<leaves.size();++i)
+		{
+			for(int j=i+1;j<leaves.size();++j)
+			{
+				const btScalar	sz=size(merge(leaves[i]->volume,leaves[j]->volume));
+				if(sz<minsize)
+				{
+					minsize		=	sz;
+					minidx[0]	=	i;
+					minidx[1]	=	j;
+				}
+			}
+		}
+		btDbvtNode*	n[]	=	{leaves[minidx[0]],leaves[minidx[1]]};
+		btDbvtNode*	p	=	createnode(pdbvt,0,n[0]->volume,n[1]->volume,0);
+		p->childs[0]		=	n[0];
+		p->childs[1]		=	n[1];
+		n[0]->parent		=	p;
+		n[1]->parent		=	p;
+		leaves[minidx[0]]	=	p;
+		leaves.swap(minidx[1],leaves.size()-1);
+		leaves.pop_back();
+	}
+}
+
+//
+static btDbvtNode*			topdown(btDbvt* pdbvt,
+									tNodeArray& leaves,
+									int bu_treshold)
+{
+	static const btVector3	axis[]={btVector3(1,0,0),
+		btVector3(0,1,0),
+		btVector3(0,0,1)};
+	if(leaves.size()>1)
+	{
+		if(leaves.size()>bu_treshold)
+		{
+			const btDbvtVolume	vol=bounds(leaves);
+			const btVector3			org=vol.Center();
+			tNodeArray				sets[2];
+			int						bestaxis=-1;
+			int						bestmidp=leaves.size();
+			int						splitcount[3][2]={{0,0},{0,0},{0,0}};
+			int i;
+			for( i=0;i<leaves.size();++i)
+			{
+				const btVector3	x=leaves[i]->volume.Center()-org;
+				for(int j=0;j<3;++j)
+				{
+					++splitcount[j][btDot(x,axis[j])>0?1:0];
+				}
+			}
+			for( i=0;i<3;++i)
+			{
+				if((splitcount[i][0]>0)&&(splitcount[i][1]>0))
+				{
+					const int	midp=(int)btFabs(btScalar(splitcount[i][0]-splitcount[i][1]));
+					if(midp<bestmidp)
+					{
+						bestaxis=i;
+						bestmidp=midp;
+					}
+				}
+			}
+			if(bestaxis>=0)
+			{
+				sets[0].reserve(splitcount[bestaxis][0]);
+				sets[1].reserve(splitcount[bestaxis][1]);
+				split(leaves,sets[0],sets[1],org,axis[bestaxis]);
+			}
+			else
+			{
+				sets[0].reserve(leaves.size()/2+1);
+				sets[1].reserve(leaves.size()/2);
+				for(int i=0,ni=leaves.size();i<ni;++i)
+				{
+					sets[i&1].push_back(leaves[i]);
+				}
+			}
+			btDbvtNode*	node=createnode(pdbvt,0,vol,0);
+			node->childs[0]=topdown(pdbvt,sets[0],bu_treshold);
+			node->childs[1]=topdown(pdbvt,sets[1],bu_treshold);
+			node->childs[0]->parent=node;
+			node->childs[1]->parent=node;
+			return(node);
+		}
+		else
+		{
+			bottomup(pdbvt,leaves);
+			return(leaves[0]);
+		}
+	}
+	return(leaves[0]);
+}
+
+//
+static DBVT_INLINE btDbvtNode*	sort(btDbvtNode* n,btDbvtNode*& r)
+{
+	btDbvtNode*	p=n->parent;
+	btAssert(n->isinternal());
+	if(p>n)
+	{
+		const int		i=indexof(n);
+		const int		j=1-i;
+		btDbvtNode*	s=p->childs[j];
+		btDbvtNode*	q=p->parent;
+		btAssert(n==p->childs[i]);
+		if(q) q->childs[indexof(p)]=n; else r=n;
+		s->parent=n;
+		p->parent=n;
+		n->parent=q;
+		p->childs[0]=n->childs[0];
+		p->childs[1]=n->childs[1];
+		n->childs[0]->parent=p;
+		n->childs[1]->parent=p;
+		n->childs[i]=p;
+		n->childs[j]=s;
+		btSwap(p->volume,n->volume);
+		return(p);
+	}
+	return(n);
+}
+
+#if 0
+static DBVT_INLINE btDbvtNode*	walkup(btDbvtNode* n,int count)
+{
+	while(n&&(count--)) n=n->parent;
+	return(n);
+}
+#endif
+
+//
+// Api
+//
+
+//
+btDbvt::btDbvt()
+{
+	m_root		=	0;
+	m_free		=	0;
+	m_lkhd		=	-1;
+	m_leaves	=	0;
+	m_opath		=	0;
+}
+
+//
+btDbvt::~btDbvt()
+{
+	clear();
+}
+
+//
+void			btDbvt::clear()
+{
+	if(m_root)	
+		recursedeletenode(this,m_root);
+	btAlignedFree(m_free);
+	m_free=0;
+	m_lkhd		=	-1;
+	m_stkStack.clear();
+	m_opath		=	0;
+	
+}
+
+//
+void			btDbvt::optimizeBottomUp()
+{
+	if(m_root)
+	{
+		tNodeArray leaves;
+		leaves.reserve(m_leaves);
+		fetchleaves(this,m_root,leaves);
+		bottomup(this,leaves);
+		m_root=leaves[0];
+	}
+}
+
+//
+void			btDbvt::optimizeTopDown(int bu_treshold)
+{
+	if(m_root)
+	{
+		tNodeArray	leaves;
+		leaves.reserve(m_leaves);
+		fetchleaves(this,m_root,leaves);
+		m_root=topdown(this,leaves,bu_treshold);
+	}
+}
+
+//
+void			btDbvt::optimizeIncremental(int passes)
+{
+	if(passes<0) passes=m_leaves;
+	if(m_root&&(passes>0))
+	{
+		do	{
+			btDbvtNode*		node=m_root;
+			unsigned	bit=0;
+			while(node->isinternal())
+			{
+				node=sort(node,m_root)->childs[(m_opath>>bit)&1];
+				bit=(bit+1)&(sizeof(unsigned)*8-1);
+			}
+			update(node);
+			++m_opath;
+		} while(--passes);
+	}
+}
+
+//
+btDbvtNode*	btDbvt::insert(const btDbvtVolume& volume,void* data)
+{
+	btDbvtNode*	leaf=createnode(this,0,volume,data);
+	insertleaf(this,m_root,leaf);
+	++m_leaves;
+	return(leaf);
+}
+
+//
+void			btDbvt::update(btDbvtNode* leaf,int lookahead)
+{
+	btDbvtNode*	root=removeleaf(this,leaf);
+	if(root)
+	{
+		if(lookahead>=0)
+		{
+			for(int i=0;(i<lookahead)&&root->parent;++i)
+			{
+				root=root->parent;
+			}
+		} else root=m_root;
+	}
+	insertleaf(this,root,leaf);
+}
+
+//
+void			btDbvt::update(btDbvtNode* leaf,btDbvtVolume& volume)
+{
+	btDbvtNode*	root=removeleaf(this,leaf);
+	if(root)
+	{
+		if(m_lkhd>=0)
+		{
+			for(int i=0;(i<m_lkhd)&&root->parent;++i)
+			{
+				root=root->parent;
+			}
+		} else root=m_root;
+	}
+	leaf->volume=volume;
+	insertleaf(this,root,leaf);
+}
+
+//
+bool			btDbvt::update(btDbvtNode* leaf,btDbvtVolume& volume,const btVector3& velocity,btScalar margin)
+{
+	if(leaf->volume.Contain(volume)) return(false);
+	volume.Expand(btVector3(margin,margin,margin));
+	volume.SignedExpand(velocity);
+	update(leaf,volume);
+	return(true);
+}
+
+//
+bool			btDbvt::update(btDbvtNode* leaf,btDbvtVolume& volume,const btVector3& velocity)
+{
+	if(leaf->volume.Contain(volume)) return(false);
+	volume.SignedExpand(velocity);
+	update(leaf,volume);
+	return(true);
+}
+
+//
+bool			btDbvt::update(btDbvtNode* leaf,btDbvtVolume& volume,btScalar margin)
+{
+	if(leaf->volume.Contain(volume)) return(false);
+	volume.Expand(btVector3(margin,margin,margin));
+	update(leaf,volume);
+	return(true);
+}
+
+//
+void			btDbvt::remove(btDbvtNode* leaf)
+{
+	removeleaf(this,leaf);
+	deletenode(this,leaf);
+	--m_leaves;
+}
+
+//
+void			btDbvt::write(IWriter* iwriter) const
+{
+	btDbvtNodeEnumerator	nodes;
+	nodes.nodes.reserve(m_leaves*2);
+	enumNodes(m_root,nodes);
+	iwriter->Prepare(m_root,nodes.nodes.size());
+	for(int i=0;i<nodes.nodes.size();++i)
+	{
+		const btDbvtNode* n=nodes.nodes[i];
+		int			p=-1;
+		if(n->parent) p=nodes.nodes.findLinearSearch(n->parent);
+		if(n->isinternal())
+		{
+			const int	c0=nodes.nodes.findLinearSearch(n->childs[0]);
+			const int	c1=nodes.nodes.findLinearSearch(n->childs[1]);
+			iwriter->WriteNode(n,i,p,c0,c1);
+		}
+		else
+		{
+			iwriter->WriteLeaf(n,i,p);
+		}	
+	}
+}
+
+//
+void			btDbvt::clone(btDbvt& dest,IClone* iclone) const
+{
+	dest.clear();
+	if(m_root!=0)
+	{	
+		btAlignedObjectArray<sStkCLN>	stack;
+		stack.reserve(m_leaves);
+		stack.push_back(sStkCLN(m_root,0));
+		do	{
+			const int		i=stack.size()-1;
+			const sStkCLN	e=stack[i];
+			btDbvtNode*			n=createnode(&dest,e.parent,e.node->volume,e.node->data);
+			stack.pop_back();
+			if(e.parent!=0)
+				e.parent->childs[i&1]=n;
+			else
+				dest.m_root=n;
+			if(e.node->isinternal())
+			{
+				stack.push_back(sStkCLN(e.node->childs[0],n));
+				stack.push_back(sStkCLN(e.node->childs[1],n));
+			}
+			else
+			{
+				iclone->CloneLeaf(n);
+			}
+		} while(stack.size()>0);
+	}
+}
+
+//
+int				btDbvt::maxdepth(const btDbvtNode* node)
+{
+	int	depth=0;
+	if(node) getmaxdepth(node,1,depth);
+	return(depth);
+}
+
+//
+int				btDbvt::countLeaves(const btDbvtNode* node)
+{
+	if(node->isinternal())
+		return(countLeaves(node->childs[0])+countLeaves(node->childs[1]));
+	else
+		return(1);
+}
+
+//
+void			btDbvt::extractLeaves(const btDbvtNode* node,btAlignedObjectArray<const btDbvtNode*>& leaves)
+{
+	if(node->isinternal())
+	{
+		extractLeaves(node->childs[0],leaves);
+		extractLeaves(node->childs[1],leaves);
+	}
+	else
+	{
+		leaves.push_back(node);
+	}	
+}
+
+//
+#if DBVT_ENABLE_BENCHMARK
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "LinearMath/btQuickProf.h"
+
+/*
+q6600,2.4ghz
+
+/Ox /Ob2 /Oi /Ot /I "." /I "..\.." /I "..\..\src" /D "NDEBUG" /D "_LIB" /D "_WINDOWS" /D "_CRT_SECURE_NO_DEPRECATE" /D "_CRT_NONSTDC_NO_DEPRECATE" /D "WIN32"
+/GF /FD /MT /GS- /Gy /arch:SSE2 /Zc:wchar_t- /Fp"..\..\out\release8\build\libbulletcollision\libbulletcollision.pch"
+/Fo"..\..\out\release8\build\libbulletcollision\\"
+/Fd"..\..\out\release8\build\libbulletcollision\bulletcollision.pdb"
+/W3 /nologo /c /Wp64 /Zi /errorReport:prompt
+
+Benchmarking dbvt...
+World scale: 100.000000
+Extents base: 1.000000
+Extents range: 4.000000
+Leaves: 8192
+sizeof(btDbvtVolume): 32 bytes
+sizeof(btDbvtNode):   44 bytes
+[1] btDbvtVolume intersections: 3499 ms (-1%)
+[2] btDbvtVolume merges: 1934 ms (0%)
+[3] btDbvt::collideTT: 5485 ms (-21%)
+[4] btDbvt::collideTT self: 2814 ms (-20%)
+[5] btDbvt::collideTT xform: 7379 ms (-1%)
+[6] btDbvt::collideTT xform,self: 7270 ms (-2%)
+[7] btDbvt::rayTest: 6314 ms (0%),(332143 r/s)
+[8] insert/remove: 2093 ms (0%),(1001983 ir/s)
+[9] updates (teleport): 1879 ms (-3%),(1116100 u/s)
+[10] updates (jitter): 1244 ms (-4%),(1685813 u/s)
+[11] optimize (incremental): 2514 ms (0%),(1668000 o/s)
+[12] btDbvtVolume notequal: 3659 ms (0%)
+[13] culling(OCL+fullsort): 2218 ms (0%),(461 t/s)
+[14] culling(OCL+qsort): 3688 ms (5%),(2221 t/s)
+[15] culling(KDOP+qsort): 1139 ms (-1%),(7192 t/s)
+[16] insert/remove batch(256): 5092 ms (0%),(823704 bir/s)
+[17] btDbvtVolume select: 3419 ms (0%)
+*/
+
+struct btDbvtBenchmark
+{
+	struct NilPolicy : btDbvt::ICollide
+	{
+		NilPolicy() : m_pcount(0),m_depth(-SIMD_INFINITY),m_checksort(true)		{}
+		void	Process(const btDbvtNode*,const btDbvtNode*)				{ ++m_pcount; }
+		void	Process(const btDbvtNode*)									{ ++m_pcount; }
+		void	Process(const btDbvtNode*,btScalar depth)
+		{
+			++m_pcount;
+			if(m_checksort)
+			{ if(depth>=m_depth) m_depth=depth; else printf("wrong depth: %f (should be >= %f)\r\n",depth,m_depth); }
+		}
+		int			m_pcount;
+		btScalar	m_depth;
+		bool		m_checksort;
+	};
+	struct P14 : btDbvt::ICollide
+	{
+		struct Node
+		{
+			const btDbvtNode*	leaf;
+			btScalar			depth;
+		};
+		void Process(const btDbvtNode* leaf,btScalar depth)
+		{
+			Node	n;
+			n.leaf	=	leaf;
+			n.depth	=	depth;
+		}
+		static int sortfnc(const Node& a,const Node& b)
+		{
+			if(a.depth<b.depth) return(+1);
+			if(a.depth>b.depth) return(-1);
+			return(0);
+		}
+		btAlignedObjectArray<Node>		m_nodes;
+	};
+	struct P15 : btDbvt::ICollide
+	{
+		struct Node
+		{
+			const btDbvtNode*	leaf;
+			btScalar			depth;
+		};
+		void Process(const btDbvtNode* leaf)
+		{
+			Node	n;
+			n.leaf	=	leaf;
+			n.depth	=	dot(leaf->volume.Center(),m_axis);
+		}
+		static int sortfnc(const Node& a,const Node& b)
+		{
+			if(a.depth<b.depth) return(+1);
+			if(a.depth>b.depth) return(-1);
+			return(0);
+		}
+		btAlignedObjectArray<Node>		m_nodes;
+		btVector3						m_axis;
+	};
+	static btScalar			RandUnit()
+	{
+		return(rand()/(btScalar)RAND_MAX);
+	}
+	static btVector3		RandVector3()
+	{
+		return(btVector3(RandUnit(),RandUnit(),RandUnit()));
+	}
+	static btVector3		RandVector3(btScalar cs)
+	{
+		return(RandVector3()*cs-btVector3(cs,cs,cs)/2);
+	}
+	static btDbvtVolume	RandVolume(btScalar cs,btScalar eb,btScalar es)
+	{
+		return(btDbvtVolume::FromCE(RandVector3(cs),btVector3(eb,eb,eb)+RandVector3()*es));
+	}
+	static btTransform		RandTransform(btScalar cs)
+	{
+		btTransform	t;
+		t.setOrigin(RandVector3(cs));
+		t.setRotation(btQuaternion(RandUnit()*SIMD_PI*2,RandUnit()*SIMD_PI*2,RandUnit()*SIMD_PI*2).normalized());
+		return(t);
+	}
+	static void				RandTree(btScalar cs,btScalar eb,btScalar es,int leaves,btDbvt& dbvt)
+	{
+		dbvt.clear();
+		for(int i=0;i<leaves;++i)
+		{
+			dbvt.insert(RandVolume(cs,eb,es),0);
+		}
+	}
+};
+
+void			btDbvt::benchmark()
+{
+	static const btScalar	cfgVolumeCenterScale		=	100;
+	static const btScalar	cfgVolumeExentsBase			=	1;
+	static const btScalar	cfgVolumeExentsScale		=	4;
+	static const int		cfgLeaves					=	8192;
+	static const bool		cfgEnable					=	true;
+
+	//[1] btDbvtVolume intersections
+	bool					cfgBenchmark1_Enable		=	cfgEnable;
+	static const int		cfgBenchmark1_Iterations	=	8;
+	static const int		cfgBenchmark1_Reference		=	3499;
+	//[2] btDbvtVolume merges
+	bool					cfgBenchmark2_Enable		=	cfgEnable;
+	static const int		cfgBenchmark2_Iterations	=	4;
+	static const int		cfgBenchmark2_Reference		=	1945;
+	//[3] btDbvt::collideTT
+	bool					cfgBenchmark3_Enable		=	cfgEnable;
+	static const int		cfgBenchmark3_Iterations	=	512;
+	static const int		cfgBenchmark3_Reference		=	5485;
+	//[4] btDbvt::collideTT self
+	bool					cfgBenchmark4_Enable		=	cfgEnable;
+	static const int		cfgBenchmark4_Iterations	=	512;
+	static const int		cfgBenchmark4_Reference		=	2814;
+	//[5] btDbvt::collideTT xform
+	bool					cfgBenchmark5_Enable		=	cfgEnable;
+	static const int		cfgBenchmark5_Iterations	=	512;
+	static const btScalar	cfgBenchmark5_OffsetScale	=	2;
+	static const int		cfgBenchmark5_Reference		=	7379;
+	//[6] btDbvt::collideTT xform,self
+	bool					cfgBenchmark6_Enable		=	cfgEnable;
+	static const int		cfgBenchmark6_Iterations	=	512;
+	static const btScalar	cfgBenchmark6_OffsetScale	=	2;
+	static const int		cfgBenchmark6_Reference		=	7270;
+	//[7] btDbvt::rayTest
+	bool					cfgBenchmark7_Enable		=	cfgEnable;
+	static const int		cfgBenchmark7_Passes		=	32;
+	static const int		cfgBenchmark7_Iterations	=	65536;
+	static const int		cfgBenchmark7_Reference		=	6307;
+	//[8] insert/remove
+	bool					cfgBenchmark8_Enable		=	cfgEnable;
+	static const int		cfgBenchmark8_Passes		=	32;
+	static const int		cfgBenchmark8_Iterations	=	65536;
+	static const int		cfgBenchmark8_Reference		=	2105;
+	//[9] updates (teleport)
+	bool					cfgBenchmark9_Enable		=	cfgEnable;
+	static const int		cfgBenchmark9_Passes		=	32;
+	static const int		cfgBenchmark9_Iterations	=	65536;
+	static const int		cfgBenchmark9_Reference		=	1879;
+	//[10] updates (jitter)
+	bool					cfgBenchmark10_Enable		=	cfgEnable;
+	static const btScalar	cfgBenchmark10_Scale		=	cfgVolumeCenterScale/10000;
+	static const int		cfgBenchmark10_Passes		=	32;
+	static const int		cfgBenchmark10_Iterations	=	65536;
+	static const int		cfgBenchmark10_Reference	=	1244;
+	//[11] optimize (incremental)
+	bool					cfgBenchmark11_Enable		=	cfgEnable;
+	static const int		cfgBenchmark11_Passes		=	64;
+	static const int		cfgBenchmark11_Iterations	=	65536;
+	static const int		cfgBenchmark11_Reference	=	2510;
+	//[12] btDbvtVolume notequal
+	bool					cfgBenchmark12_Enable		=	cfgEnable;
+	static const int		cfgBenchmark12_Iterations	=	32;
+	static const int		cfgBenchmark12_Reference	=	3677;
+	//[13] culling(OCL+fullsort)
+	bool					cfgBenchmark13_Enable		=	cfgEnable;
+	static const int		cfgBenchmark13_Iterations	=	1024;
+	static const int		cfgBenchmark13_Reference	=	2231;
+	//[14] culling(OCL+qsort)
+	bool					cfgBenchmark14_Enable		=	cfgEnable;
+	static const int		cfgBenchmark14_Iterations	=	8192;
+	static const int		cfgBenchmark14_Reference	=	3500;
+	//[15] culling(KDOP+qsort)
+	bool					cfgBenchmark15_Enable		=	cfgEnable;
+	static const int		cfgBenchmark15_Iterations	=	8192;
+	static const int		cfgBenchmark15_Reference	=	1151;
+	//[16] insert/remove batch
+	bool					cfgBenchmark16_Enable		=	cfgEnable;
+	static const int		cfgBenchmark16_BatchCount	=	256;
+	static const int		cfgBenchmark16_Passes		=	16384;
+	static const int		cfgBenchmark16_Reference	=	5138;
+	//[17] select
+	bool					cfgBenchmark17_Enable		=	cfgEnable;
+	static const int		cfgBenchmark17_Iterations	=	4;
+	static const int		cfgBenchmark17_Reference	=	3390;
+
+	btClock					wallclock;
+	printf("Benchmarking dbvt...\r\n");
+	printf("\tWorld scale: %f\r\n",cfgVolumeCenterScale);
+	printf("\tExtents base: %f\r\n",cfgVolumeExentsBase);
+	printf("\tExtents range: %f\r\n",cfgVolumeExentsScale);
+	printf("\tLeaves: %u\r\n",cfgLeaves);
+	printf("\tsizeof(btDbvtVolume): %u bytes\r\n",sizeof(btDbvtVolume));
+	printf("\tsizeof(btDbvtNode):   %u bytes\r\n",sizeof(btDbvtNode));
+	if(cfgBenchmark1_Enable)
+	{// Benchmark 1	
+		srand(380843);
+		btAlignedObjectArray<btDbvtVolume>	volumes;
+		btAlignedObjectArray<bool>			results;
+		volumes.resize(cfgLeaves);
+		results.resize(cfgLeaves);
+		for(int i=0;i<cfgLeaves;++i)
+		{
+			volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale);
+		}
+		printf("[1] btDbvtVolume intersections: ");
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark1_Iterations;++i)
+		{
+			for(int j=0;j<cfgLeaves;++j)
+			{
+				for(int k=0;k<cfgLeaves;++k)
+				{
+					results[k]=Intersect(volumes[j],volumes[k]);
+				}
+			}
+		}
+		const int time=(int)wallclock.getTimeMilliseconds();
+		printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark1_Reference)*100/time);
+	}
+	if(cfgBenchmark2_Enable)
+	{// Benchmark 2	
+		srand(380843);
+		btAlignedObjectArray<btDbvtVolume>	volumes;
+		btAlignedObjectArray<btDbvtVolume>	results;
+		volumes.resize(cfgLeaves);
+		results.resize(cfgLeaves);
+		for(int i=0;i<cfgLeaves;++i)
+		{
+			volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale);
+		}
+		printf("[2] btDbvtVolume merges: ");
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark2_Iterations;++i)
+		{
+			for(int j=0;j<cfgLeaves;++j)
+			{
+				for(int k=0;k<cfgLeaves;++k)
+				{
+					Merge(volumes[j],volumes[k],results[k]);
+				}
+			}
+		}
+		const int time=(int)wallclock.getTimeMilliseconds();
+		printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark2_Reference)*100/time);
+	}
+	if(cfgBenchmark3_Enable)
+	{// Benchmark 3	
+		srand(380843);
+		btDbvt						dbvt[2];
+		btDbvtBenchmark::NilPolicy	policy;
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt[0]);
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt[1]);
+		dbvt[0].optimizeTopDown();
+		dbvt[1].optimizeTopDown();
+		printf("[3] btDbvt::collideTT: ");
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark3_Iterations;++i)
+		{
+			btDbvt::collideTT(dbvt[0].m_root,dbvt[1].m_root,policy);
+		}
+		const int time=(int)wallclock.getTimeMilliseconds();
+		printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark3_Reference)*100/time);
+	}
+	if(cfgBenchmark4_Enable)
+	{// Benchmark 4
+		srand(380843);
+		btDbvt						dbvt;
+		btDbvtBenchmark::NilPolicy	policy;
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
+		dbvt.optimizeTopDown();
+		printf("[4] btDbvt::collideTT self: ");
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark4_Iterations;++i)
+		{
+			btDbvt::collideTT(dbvt.m_root,dbvt.m_root,policy);
+		}
+		const int time=(int)wallclock.getTimeMilliseconds();
+		printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark4_Reference)*100/time);
+	}
+	if(cfgBenchmark5_Enable)
+	{// Benchmark 5	
+		srand(380843);
+		btDbvt								dbvt[2];
+		btAlignedObjectArray<btTransform>	transforms;
+		btDbvtBenchmark::NilPolicy			policy;
+		transforms.resize(cfgBenchmark5_Iterations);
+		for(int i=0;i<transforms.size();++i)
+		{
+			transforms[i]=btDbvtBenchmark::RandTransform(cfgVolumeCenterScale*cfgBenchmark5_OffsetScale);
+		}
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt[0]);
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt[1]);
+		dbvt[0].optimizeTopDown();
+		dbvt[1].optimizeTopDown();
+		printf("[5] btDbvt::collideTT xform: ");
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark5_Iterations;++i)
+		{
+			btDbvt::collideTT(dbvt[0].m_root,dbvt[1].m_root,transforms[i],policy);
+		}
+		const int time=(int)wallclock.getTimeMilliseconds();
+		printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark5_Reference)*100/time);
+	}
+	if(cfgBenchmark6_Enable)
+	{// Benchmark 6	
+		srand(380843);
+		btDbvt								dbvt;
+		btAlignedObjectArray<btTransform>	transforms;
+		btDbvtBenchmark::NilPolicy			policy;
+		transforms.resize(cfgBenchmark6_Iterations);
+		for(int i=0;i<transforms.size();++i)
+		{
+			transforms[i]=btDbvtBenchmark::RandTransform(cfgVolumeCenterScale*cfgBenchmark6_OffsetScale);
+		}
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
+		dbvt.optimizeTopDown();
+		printf("[6] btDbvt::collideTT xform,self: ");
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark6_Iterations;++i)
+		{
+			btDbvt::collideTT(dbvt.m_root,dbvt.m_root,transforms[i],policy);		
+		}
+		const int time=(int)wallclock.getTimeMilliseconds();
+		printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark6_Reference)*100/time);
+	}
+	if(cfgBenchmark7_Enable)
+	{// Benchmark 7	
+		srand(380843);
+		btDbvt								dbvt;
+		btAlignedObjectArray<btVector3>		rayorg;
+		btAlignedObjectArray<btVector3>		raydir;
+		btDbvtBenchmark::NilPolicy			policy;
+		rayorg.resize(cfgBenchmark7_Iterations);
+		raydir.resize(cfgBenchmark7_Iterations);
+		for(int i=0;i<rayorg.size();++i)
+		{
+			rayorg[i]=btDbvtBenchmark::RandVector3(cfgVolumeCenterScale*2);
+			raydir[i]=btDbvtBenchmark::RandVector3(cfgVolumeCenterScale*2);
+		}
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
+		dbvt.optimizeTopDown();
+		printf("[7] btDbvt::rayTest: ");
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark7_Passes;++i)
+		{
+			for(int j=0;j<cfgBenchmark7_Iterations;++j)
+			{
+				btDbvt::rayTest(dbvt.m_root,rayorg[j],rayorg[j]+raydir[j],policy);
+			}
+		}
+		const int	time=(int)wallclock.getTimeMilliseconds();
+		unsigned	rays=cfgBenchmark7_Passes*cfgBenchmark7_Iterations;
+		printf("%u ms (%i%%),(%u r/s)\r\n",time,(time-cfgBenchmark7_Reference)*100/time,(rays*1000)/time);
+	}
+	if(cfgBenchmark8_Enable)
+	{// Benchmark 8	
+		srand(380843);
+		btDbvt								dbvt;
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
+		dbvt.optimizeTopDown();
+		printf("[8] insert/remove: ");
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark8_Passes;++i)
+		{
+			for(int j=0;j<cfgBenchmark8_Iterations;++j)
+			{
+				dbvt.remove(dbvt.insert(btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale),0));
+			}
+		}
+		const int	time=(int)wallclock.getTimeMilliseconds();
+		const int	ir=cfgBenchmark8_Passes*cfgBenchmark8_Iterations;
+		printf("%u ms (%i%%),(%u ir/s)\r\n",time,(time-cfgBenchmark8_Reference)*100/time,ir*1000/time);
+	}
+	if(cfgBenchmark9_Enable)
+	{// Benchmark 9	
+		srand(380843);
+		btDbvt										dbvt;
+		btAlignedObjectArray<const btDbvtNode*>	leaves;
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
+		dbvt.optimizeTopDown();
+		dbvt.extractLeaves(dbvt.m_root,leaves);
+		printf("[9] updates (teleport): ");
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark9_Passes;++i)
+		{
+			for(int j=0;j<cfgBenchmark9_Iterations;++j)
+			{
+				dbvt.update(const_cast<btDbvtNode*>(leaves[rand()%cfgLeaves]),
+					btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale));
+			}
+		}
+		const int	time=(int)wallclock.getTimeMilliseconds();
+		const int	up=cfgBenchmark9_Passes*cfgBenchmark9_Iterations;
+		printf("%u ms (%i%%),(%u u/s)\r\n",time,(time-cfgBenchmark9_Reference)*100/time,up*1000/time);
+	}
+	if(cfgBenchmark10_Enable)
+	{// Benchmark 10	
+		srand(380843);
+		btDbvt										dbvt;
+		btAlignedObjectArray<const btDbvtNode*>	leaves;
+		btAlignedObjectArray<btVector3>				vectors;
+		vectors.resize(cfgBenchmark10_Iterations);
+		for(int i=0;i<vectors.size();++i)
+		{
+			vectors[i]=(btDbvtBenchmark::RandVector3()*2-btVector3(1,1,1))*cfgBenchmark10_Scale;
+		}
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
+		dbvt.optimizeTopDown();
+		dbvt.extractLeaves(dbvt.m_root,leaves);
+		printf("[10] updates (jitter): ");
+		wallclock.reset();
+
+		for(int i=0;i<cfgBenchmark10_Passes;++i)
+		{
+			for(int j=0;j<cfgBenchmark10_Iterations;++j)
+			{			
+				const btVector3&	d=vectors[j];
+				btDbvtNode*		l=const_cast<btDbvtNode*>(leaves[rand()%cfgLeaves]);
+				btDbvtVolume		v=btDbvtVolume::FromMM(l->volume.Mins()+d,l->volume.Maxs()+d);
+				dbvt.update(l,v);
+			}
+		}
+		const int	time=(int)wallclock.getTimeMilliseconds();
+		const int	up=cfgBenchmark10_Passes*cfgBenchmark10_Iterations;
+		printf("%u ms (%i%%),(%u u/s)\r\n",time,(time-cfgBenchmark10_Reference)*100/time,up*1000/time);
+	}
+	if(cfgBenchmark11_Enable)
+	{// Benchmark 11	
+		srand(380843);
+		btDbvt										dbvt;
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
+		dbvt.optimizeTopDown();
+		printf("[11] optimize (incremental): ");
+		wallclock.reset();	
+		for(int i=0;i<cfgBenchmark11_Passes;++i)
+		{
+			dbvt.optimizeIncremental(cfgBenchmark11_Iterations);
+		}
+		const int	time=(int)wallclock.getTimeMilliseconds();
+		const int	op=cfgBenchmark11_Passes*cfgBenchmark11_Iterations;
+		printf("%u ms (%i%%),(%u o/s)\r\n",time,(time-cfgBenchmark11_Reference)*100/time,op/time*1000);
+	}
+	if(cfgBenchmark12_Enable)
+	{// Benchmark 12	
+		srand(380843);
+		btAlignedObjectArray<btDbvtVolume>	volumes;
+		btAlignedObjectArray<bool>				results;
+		volumes.resize(cfgLeaves);
+		results.resize(cfgLeaves);
+		for(int i=0;i<cfgLeaves;++i)
+		{
+			volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale);
+		}
+		printf("[12] btDbvtVolume notequal: ");
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark12_Iterations;++i)
+		{
+			for(int j=0;j<cfgLeaves;++j)
+			{
+				for(int k=0;k<cfgLeaves;++k)
+				{
+					results[k]=NotEqual(volumes[j],volumes[k]);
+				}
+			}
+		}
+		const int time=(int)wallclock.getTimeMilliseconds();
+		printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark12_Reference)*100/time);
+	}
+	if(cfgBenchmark13_Enable)
+	{// Benchmark 13	
+		srand(380843);
+		btDbvt								dbvt;
+		btAlignedObjectArray<btVector3>		vectors;
+		btDbvtBenchmark::NilPolicy			policy;
+		vectors.resize(cfgBenchmark13_Iterations);
+		for(int i=0;i<vectors.size();++i)
+		{
+			vectors[i]=(btDbvtBenchmark::RandVector3()*2-btVector3(1,1,1)).normalized();
+		}
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
+		dbvt.optimizeTopDown();
+		printf("[13] culling(OCL+fullsort): ");
+		wallclock.reset();	
+		for(int i=0;i<cfgBenchmark13_Iterations;++i)
+		{
+			static const btScalar	offset=0;
+			policy.m_depth=-SIMD_INFINITY;
+			dbvt.collideOCL(dbvt.m_root,&vectors[i],&offset,vectors[i],1,policy);
+		}
+		const int	time=(int)wallclock.getTimeMilliseconds();
+		const int	t=cfgBenchmark13_Iterations;
+		printf("%u ms (%i%%),(%u t/s)\r\n",time,(time-cfgBenchmark13_Reference)*100/time,(t*1000)/time);
+	}
+	if(cfgBenchmark14_Enable)
+	{// Benchmark 14	
+		srand(380843);
+		btDbvt								dbvt;
+		btAlignedObjectArray<btVector3>		vectors;
+		btDbvtBenchmark::P14				policy;
+		vectors.resize(cfgBenchmark14_Iterations);
+		for(int i=0;i<vectors.size();++i)
+		{
+			vectors[i]=(btDbvtBenchmark::RandVector3()*2-btVector3(1,1,1)).normalized();
+		}
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
+		dbvt.optimizeTopDown();
+		policy.m_nodes.reserve(cfgLeaves);
+		printf("[14] culling(OCL+qsort): ");
+		wallclock.reset();	
+		for(int i=0;i<cfgBenchmark14_Iterations;++i)
+		{
+			static const btScalar	offset=0;
+			policy.m_nodes.resize(0);
+			dbvt.collideOCL(dbvt.m_root,&vectors[i],&offset,vectors[i],1,policy,false);
+			policy.m_nodes.quickSort(btDbvtBenchmark::P14::sortfnc);
+		}
+		const int	time=(int)wallclock.getTimeMilliseconds();
+		const int	t=cfgBenchmark14_Iterations;
+		printf("%u ms (%i%%),(%u t/s)\r\n",time,(time-cfgBenchmark14_Reference)*100/time,(t*1000)/time);
+	}
+	if(cfgBenchmark15_Enable)
+	{// Benchmark 15	
+		srand(380843);
+		btDbvt								dbvt;
+		btAlignedObjectArray<btVector3>		vectors;
+		btDbvtBenchmark::P15				policy;
+		vectors.resize(cfgBenchmark15_Iterations);
+		for(int i=0;i<vectors.size();++i)
+		{
+			vectors[i]=(btDbvtBenchmark::RandVector3()*2-btVector3(1,1,1)).normalized();
+		}
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
+		dbvt.optimizeTopDown();
+		policy.m_nodes.reserve(cfgLeaves);
+		printf("[15] culling(KDOP+qsort): ");
+		wallclock.reset();	
+		for(int i=0;i<cfgBenchmark15_Iterations;++i)
+		{
+			static const btScalar	offset=0;
+			policy.m_nodes.resize(0);
+			policy.m_axis=vectors[i];
+			dbvt.collideKDOP(dbvt.m_root,&vectors[i],&offset,1,policy);
+			policy.m_nodes.quickSort(btDbvtBenchmark::P15::sortfnc);
+		}
+		const int	time=(int)wallclock.getTimeMilliseconds();
+		const int	t=cfgBenchmark15_Iterations;
+		printf("%u ms (%i%%),(%u t/s)\r\n",time,(time-cfgBenchmark15_Reference)*100/time,(t*1000)/time);
+	}
+	if(cfgBenchmark16_Enable)
+	{// Benchmark 16	
+		srand(380843);
+		btDbvt								dbvt;
+		btAlignedObjectArray<btDbvtNode*>	batch;
+		btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt);
+		dbvt.optimizeTopDown();
+		batch.reserve(cfgBenchmark16_BatchCount);
+		printf("[16] insert/remove batch(%u): ",cfgBenchmark16_BatchCount);
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark16_Passes;++i)
+		{
+			for(int j=0;j<cfgBenchmark16_BatchCount;++j)
+			{
+				batch.push_back(dbvt.insert(btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale),0));
+			}
+			for(int j=0;j<cfgBenchmark16_BatchCount;++j)
+			{
+				dbvt.remove(batch[j]);
+			}
+			batch.resize(0);
+		}
+		const int	time=(int)wallclock.getTimeMilliseconds();
+		const int	ir=cfgBenchmark16_Passes*cfgBenchmark16_BatchCount;
+		printf("%u ms (%i%%),(%u bir/s)\r\n",time,(time-cfgBenchmark16_Reference)*100/time,int(ir*1000.0/time));
+	}
+	if(cfgBenchmark17_Enable)
+	{// Benchmark 17
+		srand(380843);
+		btAlignedObjectArray<btDbvtVolume>	volumes;
+		btAlignedObjectArray<int>			results;
+		btAlignedObjectArray<int>			indices;
+		volumes.resize(cfgLeaves);
+		results.resize(cfgLeaves);
+		indices.resize(cfgLeaves);
+		for(int i=0;i<cfgLeaves;++i)
+		{
+			indices[i]=i;
+			volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale);
+		}
+		for(int i=0;i<cfgLeaves;++i)
+		{
+			btSwap(indices[i],indices[rand()%cfgLeaves]);
+		}
+		printf("[17] btDbvtVolume select: ");
+		wallclock.reset();
+		for(int i=0;i<cfgBenchmark17_Iterations;++i)
+		{
+			for(int j=0;j<cfgLeaves;++j)
+			{
+				for(int k=0;k<cfgLeaves;++k)
+				{
+					const int idx=indices[k];
+					results[idx]=Select(volumes[idx],volumes[j],volumes[k]);
+				}
+			}
+		}
+		const int time=(int)wallclock.getTimeMilliseconds();
+		printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark17_Reference)*100/time);
+	}
+	printf("\r\n\r\n");
+}
+#endif
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btDbvt.h b/src/bullet/BulletCollision/BroadphaseCollision/btDbvt.h
new file mode 100644
index 00000000..409da80a
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btDbvt.h
@@ -0,0 +1,1257 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///btDbvt implementation by Nathanael Presson
+
+#ifndef BT_DYNAMIC_BOUNDING_VOLUME_TREE_H
+#define BT_DYNAMIC_BOUNDING_VOLUME_TREE_H
+
+#include "LinearMath/btAlignedObjectArray.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btAabbUtil2.h"
+
+//
+// Compile time configuration
+//
+
+
+// Implementation profiles
+#define DBVT_IMPL_GENERIC		0	// Generic implementation	
+#define DBVT_IMPL_SSE			1	// SSE
+
+// Template implementation of ICollide
+#ifdef _WIN32
+#if (defined (_MSC_VER) && _MSC_VER >= 1400)
+#define	DBVT_USE_TEMPLATE		1
+#else
+#define	DBVT_USE_TEMPLATE		0
+#endif
+#else
+#define	DBVT_USE_TEMPLATE		0
+#endif
+
+// Use only intrinsics instead of inline asm
+#define DBVT_USE_INTRINSIC_SSE	1
+
+// Using memmov for collideOCL
+#define DBVT_USE_MEMMOVE		1
+
+// Enable benchmarking code
+#define	DBVT_ENABLE_BENCHMARK	0
+
+// Inlining
+#define DBVT_INLINE				SIMD_FORCE_INLINE
+
+// Specific methods implementation
+
+//SSE gives errors on a MSVC 7.1
+#if defined (BT_USE_SSE) && defined (_WIN32)
+#define DBVT_SELECT_IMPL		DBVT_IMPL_SSE
+#define DBVT_MERGE_IMPL			DBVT_IMPL_SSE
+#define DBVT_INT0_IMPL			DBVT_IMPL_SSE
+#else
+#define DBVT_SELECT_IMPL		DBVT_IMPL_GENERIC
+#define DBVT_MERGE_IMPL			DBVT_IMPL_GENERIC
+#define DBVT_INT0_IMPL			DBVT_IMPL_GENERIC
+#endif
+
+#if	(DBVT_SELECT_IMPL==DBVT_IMPL_SSE)||	\
+	(DBVT_MERGE_IMPL==DBVT_IMPL_SSE)||	\
+	(DBVT_INT0_IMPL==DBVT_IMPL_SSE)
+#include <emmintrin.h>
+#endif
+
+//
+// Auto config and checks
+//
+
+#if DBVT_USE_TEMPLATE
+#define	DBVT_VIRTUAL
+#define DBVT_VIRTUAL_DTOR(a)
+#define DBVT_PREFIX					template <typename T>
+#define DBVT_IPOLICY				T& policy
+#define DBVT_CHECKTYPE				static const ICollide&	typechecker=*(T*)1;(void)typechecker;
+#else
+#define	DBVT_VIRTUAL_DTOR(a)		virtual ~a() {}
+#define DBVT_VIRTUAL				virtual
+#define DBVT_PREFIX
+#define DBVT_IPOLICY				ICollide& policy
+#define DBVT_CHECKTYPE
+#endif
+
+#if DBVT_USE_MEMMOVE
+#if !defined( __CELLOS_LV2__) && !defined(__MWERKS__)
+#include <memory.h>
+#endif
+#include <string.h>
+#endif
+
+#ifndef DBVT_USE_TEMPLATE
+#error "DBVT_USE_TEMPLATE undefined"
+#endif
+
+#ifndef DBVT_USE_MEMMOVE
+#error "DBVT_USE_MEMMOVE undefined"
+#endif
+
+#ifndef DBVT_ENABLE_BENCHMARK
+#error "DBVT_ENABLE_BENCHMARK undefined"
+#endif
+
+#ifndef DBVT_SELECT_IMPL
+#error "DBVT_SELECT_IMPL undefined"
+#endif
+
+#ifndef DBVT_MERGE_IMPL
+#error "DBVT_MERGE_IMPL undefined"
+#endif
+
+#ifndef DBVT_INT0_IMPL
+#error "DBVT_INT0_IMPL undefined"
+#endif
+
+//
+// Defaults volumes
+//
+
+/* btDbvtAabbMm			*/ 
+struct	btDbvtAabbMm
+{
+	DBVT_INLINE btVector3			Center() const	{ return((mi+mx)/2); }
+	DBVT_INLINE btVector3			Lengths() const	{ return(mx-mi); }
+	DBVT_INLINE btVector3			Extents() const	{ return((mx-mi)/2); }
+	DBVT_INLINE const btVector3&	Mins() const	{ return(mi); }
+	DBVT_INLINE const btVector3&	Maxs() const	{ return(mx); }
+	static inline btDbvtAabbMm		FromCE(const btVector3& c,const btVector3& e);
+	static inline btDbvtAabbMm		FromCR(const btVector3& c,btScalar r);
+	static inline btDbvtAabbMm		FromMM(const btVector3& mi,const btVector3& mx);
+	static inline btDbvtAabbMm		FromPoints(const btVector3* pts,int n);
+	static inline btDbvtAabbMm		FromPoints(const btVector3** ppts,int n);
+	DBVT_INLINE void				Expand(const btVector3& e);
+	DBVT_INLINE void				SignedExpand(const btVector3& e);
+	DBVT_INLINE bool				Contain(const btDbvtAabbMm& a) const;
+	DBVT_INLINE int					Classify(const btVector3& n,btScalar o,int s) const;
+	DBVT_INLINE btScalar			ProjectMinimum(const btVector3& v,unsigned signs) const;
+	DBVT_INLINE friend bool			Intersect(	const btDbvtAabbMm& a,
+		const btDbvtAabbMm& b);
+	
+	DBVT_INLINE friend bool			Intersect(	const btDbvtAabbMm& a,
+		const btVector3& b);
+
+	DBVT_INLINE friend btScalar		Proximity(	const btDbvtAabbMm& a,
+		const btDbvtAabbMm& b);
+	DBVT_INLINE friend int			Select(		const btDbvtAabbMm& o,
+		const btDbvtAabbMm& a,
+		const btDbvtAabbMm& b);
+	DBVT_INLINE friend void			Merge(		const btDbvtAabbMm& a,
+		const btDbvtAabbMm& b,
+		btDbvtAabbMm& r);
+	DBVT_INLINE friend bool			NotEqual(	const btDbvtAabbMm& a,
+		const btDbvtAabbMm& b);
+private:
+	DBVT_INLINE void				AddSpan(const btVector3& d,btScalar& smi,btScalar& smx) const;
+private:
+	btVector3	mi,mx;
+};
+
+// Types	
+typedef	btDbvtAabbMm	btDbvtVolume;
+
+/* btDbvtNode				*/ 
+struct	btDbvtNode
+{
+	btDbvtVolume	volume;
+	btDbvtNode*		parent;
+	DBVT_INLINE bool	isleaf() const		{ return(childs[1]==0); }
+	DBVT_INLINE bool	isinternal() const	{ return(!isleaf()); }
+	union
+	{
+		btDbvtNode*	childs[2];
+		void*	data;
+		int		dataAsInt;
+	};
+};
+
+///The btDbvt class implements a fast dynamic bounding volume tree based on axis aligned bounding boxes (aabb tree).
+///This btDbvt is used for soft body collision detection and for the btDbvtBroadphase. It has a fast insert, remove and update of nodes.
+///Unlike the btQuantizedBvh, nodes can be dynamically moved around, which allows for change in topology of the underlying data structure.
+struct	btDbvt
+{
+	/* Stack element	*/ 
+	struct	sStkNN
+	{
+		const btDbvtNode*	a;
+		const btDbvtNode*	b;
+		sStkNN() {}
+		sStkNN(const btDbvtNode* na,const btDbvtNode* nb) : a(na),b(nb) {}
+	};
+	struct	sStkNP
+	{
+		const btDbvtNode*	node;
+		int			mask;
+		sStkNP(const btDbvtNode* n,unsigned m) : node(n),mask(m) {}
+	};
+	struct	sStkNPS
+	{
+		const btDbvtNode*	node;
+		int			mask;
+		btScalar	value;
+		sStkNPS() {}
+		sStkNPS(const btDbvtNode* n,unsigned m,btScalar v) : node(n),mask(m),value(v) {}
+	};
+	struct	sStkCLN
+	{
+		const btDbvtNode*	node;
+		btDbvtNode*		parent;
+		sStkCLN(const btDbvtNode* n,btDbvtNode* p) : node(n),parent(p) {}
+	};
+	// Policies/Interfaces
+
+	/* ICollide	*/ 
+	struct	ICollide
+	{		
+		DBVT_VIRTUAL_DTOR(ICollide)
+			DBVT_VIRTUAL void	Process(const btDbvtNode*,const btDbvtNode*)		{}
+		DBVT_VIRTUAL void	Process(const btDbvtNode*)					{}
+		DBVT_VIRTUAL void	Process(const btDbvtNode* n,btScalar)			{ Process(n); }
+		DBVT_VIRTUAL bool	Descent(const btDbvtNode*)					{ return(true); }
+		DBVT_VIRTUAL bool	AllLeaves(const btDbvtNode*)					{ return(true); }
+	};
+	/* IWriter	*/ 
+	struct	IWriter
+	{
+		virtual ~IWriter() {}
+		virtual void		Prepare(const btDbvtNode* root,int numnodes)=0;
+		virtual void		WriteNode(const btDbvtNode*,int index,int parent,int child0,int child1)=0;
+		virtual void		WriteLeaf(const btDbvtNode*,int index,int parent)=0;
+	};
+	/* IClone	*/ 
+	struct	IClone
+	{
+		virtual ~IClone()	{}
+		virtual void		CloneLeaf(btDbvtNode*) {}
+	};
+
+	// Constants
+	enum	{
+		SIMPLE_STACKSIZE	=	64,
+		DOUBLE_STACKSIZE	=	SIMPLE_STACKSIZE*2
+	};
+
+	// Fields
+	btDbvtNode*		m_root;
+	btDbvtNode*		m_free;
+	int				m_lkhd;
+	int				m_leaves;
+	unsigned		m_opath;
+
+	
+	btAlignedObjectArray<sStkNN>	m_stkStack;
+	mutable btAlignedObjectArray<const btDbvtNode*>	m_rayTestStack;
+
+
+	// Methods
+	btDbvt();
+	~btDbvt();
+	void			clear();
+	bool			empty() const { return(0==m_root); }
+	void			optimizeBottomUp();
+	void			optimizeTopDown(int bu_treshold=128);
+	void			optimizeIncremental(int passes);
+	btDbvtNode*		insert(const btDbvtVolume& box,void* data);
+	void			update(btDbvtNode* leaf,int lookahead=-1);
+	void			update(btDbvtNode* leaf,btDbvtVolume& volume);
+	bool			update(btDbvtNode* leaf,btDbvtVolume& volume,const btVector3& velocity,btScalar margin);
+	bool			update(btDbvtNode* leaf,btDbvtVolume& volume,const btVector3& velocity);
+	bool			update(btDbvtNode* leaf,btDbvtVolume& volume,btScalar margin);	
+	void			remove(btDbvtNode* leaf);
+	void			write(IWriter* iwriter) const;
+	void			clone(btDbvt& dest,IClone* iclone=0) const;
+	static int		maxdepth(const btDbvtNode* node);
+	static int		countLeaves(const btDbvtNode* node);
+	static void		extractLeaves(const btDbvtNode* node,btAlignedObjectArray<const btDbvtNode*>& leaves);
+#if DBVT_ENABLE_BENCHMARK
+	static void		benchmark();
+#else
+	static void		benchmark(){}
+#endif
+	// DBVT_IPOLICY must support ICollide policy/interface
+	DBVT_PREFIX
+		static void		enumNodes(	const btDbvtNode* root,
+		DBVT_IPOLICY);
+	DBVT_PREFIX
+		static void		enumLeaves(	const btDbvtNode* root,
+		DBVT_IPOLICY);
+	DBVT_PREFIX
+		void		collideTT(	const btDbvtNode* root0,
+		const btDbvtNode* root1,
+		DBVT_IPOLICY);
+
+	DBVT_PREFIX
+		void		collideTTpersistentStack(	const btDbvtNode* root0,
+		  const btDbvtNode* root1,
+		  DBVT_IPOLICY);
+#if 0
+	DBVT_PREFIX
+		void		collideTT(	const btDbvtNode* root0,
+		const btDbvtNode* root1,
+		const btTransform& xform,
+		DBVT_IPOLICY);
+	DBVT_PREFIX
+		void		collideTT(	const btDbvtNode* root0,
+		const btTransform& xform0,
+		const btDbvtNode* root1,
+		const btTransform& xform1,
+		DBVT_IPOLICY);
+#endif
+
+	DBVT_PREFIX
+		void		collideTV(	const btDbvtNode* root,
+		const btDbvtVolume& volume,
+		DBVT_IPOLICY);
+	///rayTest is a re-entrant ray test, and can be called in parallel as long as the btAlignedAlloc is thread-safe (uses locking etc)
+	///rayTest is slower than rayTestInternal, because it builds a local stack, using memory allocations, and it recomputes signs/rayDirectionInverses each time
+	DBVT_PREFIX
+		static void		rayTest(	const btDbvtNode* root,
+		const btVector3& rayFrom,
+		const btVector3& rayTo,
+		DBVT_IPOLICY);
+	///rayTestInternal is faster than rayTest, because it uses a persistent stack (to reduce dynamic memory allocations to a minimum) and it uses precomputed signs/rayInverseDirections
+	///rayTestInternal is used by btDbvtBroadphase to accelerate world ray casts
+	DBVT_PREFIX
+		void		rayTestInternal(	const btDbvtNode* root,
+								const btVector3& rayFrom,
+								const btVector3& rayTo,
+								const btVector3& rayDirectionInverse,
+								unsigned int signs[3],
+								btScalar lambda_max,
+								const btVector3& aabbMin,
+								const btVector3& aabbMax,
+								DBVT_IPOLICY) const;
+
+	DBVT_PREFIX
+		static void		collideKDOP(const btDbvtNode* root,
+		const btVector3* normals,
+		const btScalar* offsets,
+		int count,
+		DBVT_IPOLICY);
+	DBVT_PREFIX
+		static void		collideOCL(	const btDbvtNode* root,
+		const btVector3* normals,
+		const btScalar* offsets,
+		const btVector3& sortaxis,
+		int count,								
+		DBVT_IPOLICY,
+		bool fullsort=true);
+	DBVT_PREFIX
+		static void		collideTU(	const btDbvtNode* root,
+		DBVT_IPOLICY);
+	// Helpers	
+	static DBVT_INLINE int	nearest(const int* i,const btDbvt::sStkNPS* a,btScalar v,int l,int h)
+	{
+		int	m=0;
+		while(l<h)
+		{
+			m=(l+h)>>1;
+			if(a[i[m]].value>=v) l=m+1; else h=m;
+		}
+		return(h);
+	}
+	static DBVT_INLINE int	allocate(	btAlignedObjectArray<int>& ifree,
+		btAlignedObjectArray<sStkNPS>& stock,
+		const sStkNPS& value)
+	{
+		int	i;
+		if(ifree.size()>0)
+		{ i=ifree[ifree.size()-1];ifree.pop_back();stock[i]=value; }
+		else
+		{ i=stock.size();stock.push_back(value); }
+		return(i); 
+	}
+	//
+private:
+	btDbvt(const btDbvt&)	{}	
+};
+
+//
+// Inline's
+//
+
+//
+inline btDbvtAabbMm			btDbvtAabbMm::FromCE(const btVector3& c,const btVector3& e)
+{
+	btDbvtAabbMm box;
+	box.mi=c-e;box.mx=c+e;
+	return(box);
+}
+
+//
+inline btDbvtAabbMm			btDbvtAabbMm::FromCR(const btVector3& c,btScalar r)
+{
+	return(FromCE(c,btVector3(r,r,r)));
+}
+
+//
+inline btDbvtAabbMm			btDbvtAabbMm::FromMM(const btVector3& mi,const btVector3& mx)
+{
+	btDbvtAabbMm box;
+	box.mi=mi;box.mx=mx;
+	return(box);
+}
+
+//
+inline btDbvtAabbMm			btDbvtAabbMm::FromPoints(const btVector3* pts,int n)
+{
+	btDbvtAabbMm box;
+	box.mi=box.mx=pts[0];
+	for(int i=1;i<n;++i)
+	{
+		box.mi.setMin(pts[i]);
+		box.mx.setMax(pts[i]);
+	}
+	return(box);
+}
+
+//
+inline btDbvtAabbMm			btDbvtAabbMm::FromPoints(const btVector3** ppts,int n)
+{
+	btDbvtAabbMm box;
+	box.mi=box.mx=*ppts[0];
+	for(int i=1;i<n;++i)
+	{
+		box.mi.setMin(*ppts[i]);
+		box.mx.setMax(*ppts[i]);
+	}
+	return(box);
+}
+
+//
+DBVT_INLINE void		btDbvtAabbMm::Expand(const btVector3& e)
+{
+	mi-=e;mx+=e;
+}
+
+//
+DBVT_INLINE void		btDbvtAabbMm::SignedExpand(const btVector3& e)
+{
+	if(e.x()>0) mx.setX(mx.x()+e[0]); else mi.setX(mi.x()+e[0]);
+	if(e.y()>0) mx.setY(mx.y()+e[1]); else mi.setY(mi.y()+e[1]);
+	if(e.z()>0) mx.setZ(mx.z()+e[2]); else mi.setZ(mi.z()+e[2]);
+}
+
+//
+DBVT_INLINE bool		btDbvtAabbMm::Contain(const btDbvtAabbMm& a) const
+{
+	return(	(mi.x()<=a.mi.x())&&
+		(mi.y()<=a.mi.y())&&
+		(mi.z()<=a.mi.z())&&
+		(mx.x()>=a.mx.x())&&
+		(mx.y()>=a.mx.y())&&
+		(mx.z()>=a.mx.z()));
+}
+
+//
+DBVT_INLINE int		btDbvtAabbMm::Classify(const btVector3& n,btScalar o,int s) const
+{
+	btVector3			pi,px;
+	switch(s)
+	{
+	case	(0+0+0):	px=btVector3(mi.x(),mi.y(),mi.z());
+		pi=btVector3(mx.x(),mx.y(),mx.z());break;
+	case	(1+0+0):	px=btVector3(mx.x(),mi.y(),mi.z());
+		pi=btVector3(mi.x(),mx.y(),mx.z());break;
+	case	(0+2+0):	px=btVector3(mi.x(),mx.y(),mi.z());
+		pi=btVector3(mx.x(),mi.y(),mx.z());break;
+	case	(1+2+0):	px=btVector3(mx.x(),mx.y(),mi.z());
+		pi=btVector3(mi.x(),mi.y(),mx.z());break;
+	case	(0+0+4):	px=btVector3(mi.x(),mi.y(),mx.z());
+		pi=btVector3(mx.x(),mx.y(),mi.z());break;
+	case	(1+0+4):	px=btVector3(mx.x(),mi.y(),mx.z());
+		pi=btVector3(mi.x(),mx.y(),mi.z());break;
+	case	(0+2+4):	px=btVector3(mi.x(),mx.y(),mx.z());
+		pi=btVector3(mx.x(),mi.y(),mi.z());break;
+	case	(1+2+4):	px=btVector3(mx.x(),mx.y(),mx.z());
+		pi=btVector3(mi.x(),mi.y(),mi.z());break;
+	}
+	if((btDot(n,px)+o)<0)		return(-1);
+	if((btDot(n,pi)+o)>=0)	return(+1);
+	return(0);
+}
+
+//
+DBVT_INLINE btScalar	btDbvtAabbMm::ProjectMinimum(const btVector3& v,unsigned signs) const
+{
+	const btVector3*	b[]={&mx,&mi};
+	const btVector3		p(	b[(signs>>0)&1]->x(),
+		b[(signs>>1)&1]->y(),
+		b[(signs>>2)&1]->z());
+	return(btDot(p,v));
+}
+
+//
+DBVT_INLINE void		btDbvtAabbMm::AddSpan(const btVector3& d,btScalar& smi,btScalar& smx) const
+{
+	for(int i=0;i<3;++i)
+	{
+		if(d[i]<0)
+		{ smi+=mx[i]*d[i];smx+=mi[i]*d[i]; }
+		else
+		{ smi+=mi[i]*d[i];smx+=mx[i]*d[i]; }
+	}
+}
+
+//
+DBVT_INLINE bool		Intersect(	const btDbvtAabbMm& a,
+								  const btDbvtAabbMm& b)
+{
+#if	DBVT_INT0_IMPL == DBVT_IMPL_SSE
+	const __m128	rt(_mm_or_ps(	_mm_cmplt_ps(_mm_load_ps(b.mx),_mm_load_ps(a.mi)),
+		_mm_cmplt_ps(_mm_load_ps(a.mx),_mm_load_ps(b.mi))));
+	const __int32*	pu((const __int32*)&rt);
+	return((pu[0]|pu[1]|pu[2])==0);
+#else
+	return(	(a.mi.x()<=b.mx.x())&&
+		(a.mx.x()>=b.mi.x())&&
+		(a.mi.y()<=b.mx.y())&&
+		(a.mx.y()>=b.mi.y())&&
+		(a.mi.z()<=b.mx.z())&&		
+		(a.mx.z()>=b.mi.z()));
+#endif
+}
+
+
+
+//
+DBVT_INLINE bool		Intersect(	const btDbvtAabbMm& a,
+								  const btVector3& b)
+{
+	return(	(b.x()>=a.mi.x())&&
+		(b.y()>=a.mi.y())&&
+		(b.z()>=a.mi.z())&&
+		(b.x()<=a.mx.x())&&
+		(b.y()<=a.mx.y())&&
+		(b.z()<=a.mx.z()));
+}
+
+
+
+
+
+//////////////////////////////////////
+
+
+//
+DBVT_INLINE btScalar	Proximity(	const btDbvtAabbMm& a,
+								  const btDbvtAabbMm& b)
+{
+	const btVector3	d=(a.mi+a.mx)-(b.mi+b.mx);
+	return(btFabs(d.x())+btFabs(d.y())+btFabs(d.z()));
+}
+
+
+
+//
+DBVT_INLINE int			Select(	const btDbvtAabbMm& o,
+							   const btDbvtAabbMm& a,
+							   const btDbvtAabbMm& b)
+{
+#if	DBVT_SELECT_IMPL == DBVT_IMPL_SSE
+	static ATTRIBUTE_ALIGNED16(const unsigned __int32)	mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff};
+	///@todo: the intrinsic version is 11% slower
+#if DBVT_USE_INTRINSIC_SSE
+
+	union btSSEUnion ///NOTE: if we use more intrinsics, move btSSEUnion into the LinearMath directory
+	{
+	   __m128		ssereg;
+	   float		floats[4];
+	   int			ints[4];
+	};
+
+	__m128	omi(_mm_load_ps(o.mi));
+	omi=_mm_add_ps(omi,_mm_load_ps(o.mx));
+	__m128	ami(_mm_load_ps(a.mi));
+	ami=_mm_add_ps(ami,_mm_load_ps(a.mx));
+	ami=_mm_sub_ps(ami,omi);
+	ami=_mm_and_ps(ami,_mm_load_ps((const float*)mask));
+	__m128	bmi(_mm_load_ps(b.mi));
+	bmi=_mm_add_ps(bmi,_mm_load_ps(b.mx));
+	bmi=_mm_sub_ps(bmi,omi);
+	bmi=_mm_and_ps(bmi,_mm_load_ps((const float*)mask));
+	__m128	t0(_mm_movehl_ps(ami,ami));
+	ami=_mm_add_ps(ami,t0);
+	ami=_mm_add_ss(ami,_mm_shuffle_ps(ami,ami,1));
+	__m128 t1(_mm_movehl_ps(bmi,bmi));
+	bmi=_mm_add_ps(bmi,t1);
+	bmi=_mm_add_ss(bmi,_mm_shuffle_ps(bmi,bmi,1));
+	
+	btSSEUnion tmp;
+	tmp.ssereg = _mm_cmple_ss(bmi,ami);
+	return tmp.ints[0]&1;
+
+#else
+	ATTRIBUTE_ALIGNED16(__int32	r[1]);
+	__asm
+	{
+		mov		eax,o
+			mov		ecx,a
+			mov		edx,b
+			movaps	xmm0,[eax]
+		movaps	xmm5,mask
+			addps	xmm0,[eax+16]	
+		movaps	xmm1,[ecx]
+		movaps	xmm2,[edx]
+		addps	xmm1,[ecx+16]
+		addps	xmm2,[edx+16]
+		subps	xmm1,xmm0
+			subps	xmm2,xmm0
+			andps	xmm1,xmm5
+			andps	xmm2,xmm5
+			movhlps	xmm3,xmm1
+			movhlps	xmm4,xmm2
+			addps	xmm1,xmm3
+			addps	xmm2,xmm4
+			pshufd	xmm3,xmm1,1
+			pshufd	xmm4,xmm2,1
+			addss	xmm1,xmm3
+			addss	xmm2,xmm4
+			cmpless	xmm2,xmm1
+			movss	r,xmm2
+	}
+	return(r[0]&1);
+#endif
+#else
+	return(Proximity(o,a)<Proximity(o,b)?0:1);
+#endif
+}
+
+//
+DBVT_INLINE void		Merge(	const btDbvtAabbMm& a,
+							  const btDbvtAabbMm& b,
+							  btDbvtAabbMm& r)
+{
+#if DBVT_MERGE_IMPL==DBVT_IMPL_SSE
+	__m128	ami(_mm_load_ps(a.mi));
+	__m128	amx(_mm_load_ps(a.mx));
+	__m128	bmi(_mm_load_ps(b.mi));
+	__m128	bmx(_mm_load_ps(b.mx));
+	ami=_mm_min_ps(ami,bmi);
+	amx=_mm_max_ps(amx,bmx);
+	_mm_store_ps(r.mi,ami);
+	_mm_store_ps(r.mx,amx);
+#else
+	for(int i=0;i<3;++i)
+	{
+		if(a.mi[i]<b.mi[i]) r.mi[i]=a.mi[i]; else r.mi[i]=b.mi[i];
+		if(a.mx[i]>b.mx[i]) r.mx[i]=a.mx[i]; else r.mx[i]=b.mx[i];
+	}
+#endif
+}
+
+//
+DBVT_INLINE bool		NotEqual(	const btDbvtAabbMm& a,
+								 const btDbvtAabbMm& b)
+{
+	return(	(a.mi.x()!=b.mi.x())||
+		(a.mi.y()!=b.mi.y())||
+		(a.mi.z()!=b.mi.z())||
+		(a.mx.x()!=b.mx.x())||
+		(a.mx.y()!=b.mx.y())||
+		(a.mx.z()!=b.mx.z()));
+}
+
+//
+// Inline's
+//
+
+//
+DBVT_PREFIX
+inline void		btDbvt::enumNodes(	const btDbvtNode* root,
+								  DBVT_IPOLICY)
+{
+	DBVT_CHECKTYPE
+		policy.Process(root);
+	if(root->isinternal())
+	{
+		enumNodes(root->childs[0],policy);
+		enumNodes(root->childs[1],policy);
+	}
+}
+
+//
+DBVT_PREFIX
+inline void		btDbvt::enumLeaves(	const btDbvtNode* root,
+								   DBVT_IPOLICY)
+{
+	DBVT_CHECKTYPE
+		if(root->isinternal())
+		{
+			enumLeaves(root->childs[0],policy);
+			enumLeaves(root->childs[1],policy);
+		}
+		else
+		{
+			policy.Process(root);
+		}
+}
+
+//
+DBVT_PREFIX
+inline void		btDbvt::collideTT(	const btDbvtNode* root0,
+								  const btDbvtNode* root1,
+								  DBVT_IPOLICY)
+{
+	DBVT_CHECKTYPE
+		if(root0&&root1)
+		{
+			int								depth=1;
+			int								treshold=DOUBLE_STACKSIZE-4;
+			btAlignedObjectArray<sStkNN>	stkStack;
+			stkStack.resize(DOUBLE_STACKSIZE);
+			stkStack[0]=sStkNN(root0,root1);
+			do	{		
+				sStkNN	p=stkStack[--depth];
+				if(depth>treshold)
+				{
+					stkStack.resize(stkStack.size()*2);
+					treshold=stkStack.size()-4;
+				}
+				if(p.a==p.b)
+				{
+					if(p.a->isinternal())
+					{
+						stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[0]);
+						stkStack[depth++]=sStkNN(p.a->childs[1],p.a->childs[1]);
+						stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[1]);
+					}
+				}
+				else if(Intersect(p.a->volume,p.b->volume))
+				{
+					if(p.a->isinternal())
+					{
+						if(p.b->isinternal())
+						{
+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]);
+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]);
+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]);
+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]);
+						}
+						else
+						{
+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b);
+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b);
+						}
+					}
+					else
+					{
+						if(p.b->isinternal())
+						{
+							stkStack[depth++]=sStkNN(p.a,p.b->childs[0]);
+							stkStack[depth++]=sStkNN(p.a,p.b->childs[1]);
+						}
+						else
+						{
+							policy.Process(p.a,p.b);
+						}
+					}
+				}
+			} while(depth);
+		}
+}
+
+
+
+DBVT_PREFIX
+inline void		btDbvt::collideTTpersistentStack(	const btDbvtNode* root0,
+								  const btDbvtNode* root1,
+								  DBVT_IPOLICY)
+{
+	DBVT_CHECKTYPE
+		if(root0&&root1)
+		{
+			int								depth=1;
+			int								treshold=DOUBLE_STACKSIZE-4;
+			
+			m_stkStack.resize(DOUBLE_STACKSIZE);
+			m_stkStack[0]=sStkNN(root0,root1);
+			do	{		
+				sStkNN	p=m_stkStack[--depth];
+				if(depth>treshold)
+				{
+					m_stkStack.resize(m_stkStack.size()*2);
+					treshold=m_stkStack.size()-4;
+				}
+				if(p.a==p.b)
+				{
+					if(p.a->isinternal())
+					{
+						m_stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[0]);
+						m_stkStack[depth++]=sStkNN(p.a->childs[1],p.a->childs[1]);
+						m_stkStack[depth++]=sStkNN(p.a->childs[0],p.a->childs[1]);
+					}
+				}
+				else if(Intersect(p.a->volume,p.b->volume))
+				{
+					if(p.a->isinternal())
+					{
+						if(p.b->isinternal())
+						{
+							m_stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]);
+							m_stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]);
+							m_stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]);
+							m_stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]);
+						}
+						else
+						{
+							m_stkStack[depth++]=sStkNN(p.a->childs[0],p.b);
+							m_stkStack[depth++]=sStkNN(p.a->childs[1],p.b);
+						}
+					}
+					else
+					{
+						if(p.b->isinternal())
+						{
+							m_stkStack[depth++]=sStkNN(p.a,p.b->childs[0]);
+							m_stkStack[depth++]=sStkNN(p.a,p.b->childs[1]);
+						}
+						else
+						{
+							policy.Process(p.a,p.b);
+						}
+					}
+				}
+			} while(depth);
+		}
+}
+
+#if 0
+//
+DBVT_PREFIX
+inline void		btDbvt::collideTT(	const btDbvtNode* root0,
+								  const btDbvtNode* root1,
+								  const btTransform& xform,
+								  DBVT_IPOLICY)
+{
+	DBVT_CHECKTYPE
+		if(root0&&root1)
+		{
+			int								depth=1;
+			int								treshold=DOUBLE_STACKSIZE-4;
+			btAlignedObjectArray<sStkNN>	stkStack;
+			stkStack.resize(DOUBLE_STACKSIZE);
+			stkStack[0]=sStkNN(root0,root1);
+			do	{
+				sStkNN	p=stkStack[--depth];
+				if(Intersect(p.a->volume,p.b->volume,xform))
+				{
+					if(depth>treshold)
+					{
+						stkStack.resize(stkStack.size()*2);
+						treshold=stkStack.size()-4;
+					}
+					if(p.a->isinternal())
+					{
+						if(p.b->isinternal())
+						{					
+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[0]);
+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[0]);
+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b->childs[1]);
+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b->childs[1]);
+						}
+						else
+						{
+							stkStack[depth++]=sStkNN(p.a->childs[0],p.b);
+							stkStack[depth++]=sStkNN(p.a->childs[1],p.b);
+						}
+					}
+					else
+					{
+						if(p.b->isinternal())
+						{
+							stkStack[depth++]=sStkNN(p.a,p.b->childs[0]);
+							stkStack[depth++]=sStkNN(p.a,p.b->childs[1]);
+						}
+						else
+						{
+							policy.Process(p.a,p.b);
+						}
+					}
+				}
+			} while(depth);
+		}
+}
+//
+DBVT_PREFIX
+inline void		btDbvt::collideTT(	const btDbvtNode* root0,
+								  const btTransform& xform0,
+								  const btDbvtNode* root1,
+								  const btTransform& xform1,
+								  DBVT_IPOLICY)
+{
+	const btTransform	xform=xform0.inverse()*xform1;
+	collideTT(root0,root1,xform,policy);
+}
+#endif 
+
+//
+DBVT_PREFIX
+inline void		btDbvt::collideTV(	const btDbvtNode* root,
+								  const btDbvtVolume& vol,
+								  DBVT_IPOLICY)
+{
+	DBVT_CHECKTYPE
+		if(root)
+		{
+			ATTRIBUTE_ALIGNED16(btDbvtVolume)		volume(vol);
+			btAlignedObjectArray<const btDbvtNode*>	stack;
+			stack.resize(0);
+			stack.reserve(SIMPLE_STACKSIZE);
+			stack.push_back(root);
+			do	{
+				const btDbvtNode*	n=stack[stack.size()-1];
+				stack.pop_back();
+				if(Intersect(n->volume,volume))
+				{
+					if(n->isinternal())
+					{
+						stack.push_back(n->childs[0]);
+						stack.push_back(n->childs[1]);
+					}
+					else
+					{
+						policy.Process(n);
+					}
+				}
+			} while(stack.size()>0);
+		}
+}
+
+DBVT_PREFIX
+inline void		btDbvt::rayTestInternal(	const btDbvtNode* root,
+								const btVector3& rayFrom,
+								const btVector3& rayTo,
+								const btVector3& rayDirectionInverse,
+								unsigned int signs[3],
+								btScalar lambda_max,
+								const btVector3& aabbMin,
+								const btVector3& aabbMax,
+								DBVT_IPOLICY) const
+{
+        (void) rayTo;
+	DBVT_CHECKTYPE
+	if(root)
+	{
+		btVector3 resultNormal;
+
+		int								depth=1;
+		int								treshold=DOUBLE_STACKSIZE-2;
+		btAlignedObjectArray<const btDbvtNode*>&	stack = m_rayTestStack;
+		stack.resize(DOUBLE_STACKSIZE);
+		stack[0]=root;
+		btVector3 bounds[2];
+		do	
+		{
+			const btDbvtNode*	node=stack[--depth];
+			bounds[0] = node->volume.Mins()-aabbMax;
+			bounds[1] = node->volume.Maxs()-aabbMin;
+			btScalar tmin=1.f,lambda_min=0.f;
+			unsigned int result1=false;
+			result1 = btRayAabb2(rayFrom,rayDirectionInverse,signs,bounds,tmin,lambda_min,lambda_max);
+			if(result1)
+			{
+				if(node->isinternal())
+				{
+					if(depth>treshold)
+					{
+						stack.resize(stack.size()*2);
+						treshold=stack.size()-2;
+					}
+					stack[depth++]=node->childs[0];
+					stack[depth++]=node->childs[1];
+				}
+				else
+				{
+					policy.Process(node);
+				}
+			}
+		} while(depth);
+	}
+}
+
+//
+DBVT_PREFIX
+inline void		btDbvt::rayTest(	const btDbvtNode* root,
+								const btVector3& rayFrom,
+								const btVector3& rayTo,
+								DBVT_IPOLICY)
+{
+	DBVT_CHECKTYPE
+		if(root)
+		{
+			btVector3 rayDir = (rayTo-rayFrom);
+			rayDir.normalize ();
+
+			///what about division by zero? --> just set rayDirection[i] to INF/BT_LARGE_FLOAT
+			btVector3 rayDirectionInverse;
+			rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[0];
+			rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[1];
+			rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[2];
+			unsigned int signs[3] = { rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0};
+
+			btScalar lambda_max = rayDir.dot(rayTo-rayFrom);
+
+			btVector3 resultNormal;
+
+			btAlignedObjectArray<const btDbvtNode*>	stack;
+
+			int								depth=1;
+			int								treshold=DOUBLE_STACKSIZE-2;
+
+			stack.resize(DOUBLE_STACKSIZE);
+			stack[0]=root;
+			btVector3 bounds[2];
+			do	{
+				const btDbvtNode*	node=stack[--depth];
+
+				bounds[0] = node->volume.Mins();
+				bounds[1] = node->volume.Maxs();
+				
+				btScalar tmin=1.f,lambda_min=0.f;
+				unsigned int result1 = btRayAabb2(rayFrom,rayDirectionInverse,signs,bounds,tmin,lambda_min,lambda_max);
+
+#ifdef COMPARE_BTRAY_AABB2
+				btScalar param=1.f;
+				bool result2 = btRayAabb(rayFrom,rayTo,node->volume.Mins(),node->volume.Maxs(),param,resultNormal);
+				btAssert(result1 == result2);
+#endif //TEST_BTRAY_AABB2
+
+				if(result1)
+				{
+					if(node->isinternal())
+					{
+						if(depth>treshold)
+						{
+							stack.resize(stack.size()*2);
+							treshold=stack.size()-2;
+						}
+						stack[depth++]=node->childs[0];
+						stack[depth++]=node->childs[1];
+					}
+					else
+					{
+						policy.Process(node);
+					}
+				}
+			} while(depth);
+
+		}
+}
+
+//
+DBVT_PREFIX
+inline void		btDbvt::collideKDOP(const btDbvtNode* root,
+									const btVector3* normals,
+									const btScalar* offsets,
+									int count,
+									DBVT_IPOLICY)
+{
+	DBVT_CHECKTYPE
+		if(root)
+		{
+			const int						inside=(1<<count)-1;
+			btAlignedObjectArray<sStkNP>	stack;
+			int								signs[sizeof(unsigned)*8];
+			btAssert(count<int (sizeof(signs)/sizeof(signs[0])));
+			for(int i=0;i<count;++i)
+			{
+				signs[i]=	((normals[i].x()>=0)?1:0)+
+					((normals[i].y()>=0)?2:0)+
+					((normals[i].z()>=0)?4:0);
+			}
+			stack.reserve(SIMPLE_STACKSIZE);
+			stack.push_back(sStkNP(root,0));
+			do	{
+				sStkNP	se=stack[stack.size()-1];
+				bool	out=false;
+				stack.pop_back();
+				for(int i=0,j=1;(!out)&&(i<count);++i,j<<=1)
+				{
+					if(0==(se.mask&j))
+					{
+						const int	side=se.node->volume.Classify(normals[i],offsets[i],signs[i]);
+						switch(side)
+						{
+						case	-1:	out=true;break;
+						case	+1:	se.mask|=j;break;
+						}
+					}
+				}
+				if(!out)
+				{
+					if((se.mask!=inside)&&(se.node->isinternal()))
+					{
+						stack.push_back(sStkNP(se.node->childs[0],se.mask));
+						stack.push_back(sStkNP(se.node->childs[1],se.mask));
+					}
+					else
+					{
+						if(policy.AllLeaves(se.node)) enumLeaves(se.node,policy);
+					}
+				}
+			} while(stack.size());
+		}
+}
+
+//
+DBVT_PREFIX
+inline void		btDbvt::collideOCL(	const btDbvtNode* root,
+								   const btVector3* normals,
+								   const btScalar* offsets,
+								   const btVector3& sortaxis,
+								   int count,
+								   DBVT_IPOLICY,
+								   bool fsort)
+{
+	DBVT_CHECKTYPE
+		if(root)
+		{
+			const unsigned					srtsgns=(sortaxis[0]>=0?1:0)+
+				(sortaxis[1]>=0?2:0)+
+				(sortaxis[2]>=0?4:0);
+			const int						inside=(1<<count)-1;
+			btAlignedObjectArray<sStkNPS>	stock;
+			btAlignedObjectArray<int>		ifree;
+			btAlignedObjectArray<int>		stack;
+			int								signs[sizeof(unsigned)*8];
+			btAssert(count<int (sizeof(signs)/sizeof(signs[0])));
+			for(int i=0;i<count;++i)
+			{
+				signs[i]=	((normals[i].x()>=0)?1:0)+
+					((normals[i].y()>=0)?2:0)+
+					((normals[i].z()>=0)?4:0);
+			}
+			stock.reserve(SIMPLE_STACKSIZE);
+			stack.reserve(SIMPLE_STACKSIZE);
+			ifree.reserve(SIMPLE_STACKSIZE);
+			stack.push_back(allocate(ifree,stock,sStkNPS(root,0,root->volume.ProjectMinimum(sortaxis,srtsgns))));
+			do	{
+				const int	id=stack[stack.size()-1];
+				sStkNPS		se=stock[id];
+				stack.pop_back();ifree.push_back(id);
+				if(se.mask!=inside)
+				{
+					bool	out=false;
+					for(int i=0,j=1;(!out)&&(i<count);++i,j<<=1)
+					{
+						if(0==(se.mask&j))
+						{
+							const int	side=se.node->volume.Classify(normals[i],offsets[i],signs[i]);
+							switch(side)
+							{
+							case	-1:	out=true;break;
+							case	+1:	se.mask|=j;break;
+							}
+						}
+					}
+					if(out) continue;
+				}
+				if(policy.Descent(se.node))
+				{
+					if(se.node->isinternal())
+					{
+						const btDbvtNode* pns[]={	se.node->childs[0],se.node->childs[1]};
+						sStkNPS		nes[]={	sStkNPS(pns[0],se.mask,pns[0]->volume.ProjectMinimum(sortaxis,srtsgns)),
+							sStkNPS(pns[1],se.mask,pns[1]->volume.ProjectMinimum(sortaxis,srtsgns))};
+						const int	q=nes[0].value<nes[1].value?1:0;				
+						int			j=stack.size();
+						if(fsort&&(j>0))
+						{
+							/* Insert 0	*/ 
+							j=nearest(&stack[0],&stock[0],nes[q].value,0,stack.size());
+							stack.push_back(0);
+#if DBVT_USE_MEMMOVE
+							memmove(&stack[j+1],&stack[j],sizeof(int)*(stack.size()-j-1));
+#else
+							for(int k=stack.size()-1;k>j;--k) stack[k]=stack[k-1];
+#endif
+							stack[j]=allocate(ifree,stock,nes[q]);
+							/* Insert 1	*/ 
+							j=nearest(&stack[0],&stock[0],nes[1-q].value,j,stack.size());
+							stack.push_back(0);
+#if DBVT_USE_MEMMOVE
+							memmove(&stack[j+1],&stack[j],sizeof(int)*(stack.size()-j-1));
+#else
+							for(int k=stack.size()-1;k>j;--k) stack[k]=stack[k-1];
+#endif
+							stack[j]=allocate(ifree,stock,nes[1-q]);
+						}
+						else
+						{
+							stack.push_back(allocate(ifree,stock,nes[q]));
+							stack.push_back(allocate(ifree,stock,nes[1-q]));
+						}
+					}
+					else
+					{
+						policy.Process(se.node,se.value);
+					}
+				}
+			} while(stack.size());
+		}
+}
+
+//
+DBVT_PREFIX
+inline void		btDbvt::collideTU(	const btDbvtNode* root,
+								  DBVT_IPOLICY)
+{
+	DBVT_CHECKTYPE
+		if(root)
+		{
+			btAlignedObjectArray<const btDbvtNode*>	stack;
+			stack.reserve(SIMPLE_STACKSIZE);
+			stack.push_back(root);
+			do	{
+				const btDbvtNode*	n=stack[stack.size()-1];
+				stack.pop_back();
+				if(policy.Descent(n))
+				{
+					if(n->isinternal())
+					{ stack.push_back(n->childs[0]);stack.push_back(n->childs[1]); }
+					else
+					{ policy.Process(n); }
+				}
+			} while(stack.size()>0);
+		}
+}
+
+//
+// PP Cleanup
+//
+
+#undef DBVT_USE_MEMMOVE
+#undef DBVT_USE_TEMPLATE
+#undef DBVT_VIRTUAL_DTOR
+#undef DBVT_VIRTUAL
+#undef DBVT_PREFIX
+#undef DBVT_IPOLICY
+#undef DBVT_CHECKTYPE
+#undef DBVT_IMPL_GENERIC
+#undef DBVT_IMPL_SSE
+#undef DBVT_USE_INTRINSIC_SSE
+#undef DBVT_SELECT_IMPL
+#undef DBVT_MERGE_IMPL
+#undef DBVT_INT0_IMPL
+
+#endif
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp b/src/bullet/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp
new file mode 100644
index 00000000..75cfac64
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp
@@ -0,0 +1,796 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///btDbvtBroadphase implementation by Nathanael Presson
+
+#include "btDbvtBroadphase.h"
+
+//
+// Profiling
+//
+
+#if DBVT_BP_PROFILE||DBVT_BP_ENABLE_BENCHMARK
+#include <stdio.h>
+#endif
+
+#if DBVT_BP_PROFILE
+struct	ProfileScope
+{
+	__forceinline ProfileScope(btClock& clock,unsigned long& value) :
+	m_clock(&clock),m_value(&value),m_base(clock.getTimeMicroseconds())
+	{
+	}
+	__forceinline ~ProfileScope()
+	{
+		(*m_value)+=m_clock->getTimeMicroseconds()-m_base;
+	}
+	btClock*		m_clock;
+	unsigned long*	m_value;
+	unsigned long	m_base;
+};
+#define	SPC(_value_)	ProfileScope	spc_scope(m_clock,_value_)
+#else
+#define	SPC(_value_)
+#endif
+
+//
+// Helpers
+//
+
+//
+template <typename T>
+static inline void	listappend(T* item,T*& list)
+{
+	item->links[0]=0;
+	item->links[1]=list;
+	if(list) list->links[0]=item;
+	list=item;
+}
+
+//
+template <typename T>
+static inline void	listremove(T* item,T*& list)
+{
+	if(item->links[0]) item->links[0]->links[1]=item->links[1]; else list=item->links[1];
+	if(item->links[1]) item->links[1]->links[0]=item->links[0];
+}
+
+//
+template <typename T>
+static inline int	listcount(T* root)
+{
+	int	n=0;
+	while(root) { ++n;root=root->links[1]; }
+	return(n);
+}
+
+//
+template <typename T>
+static inline void	clear(T& value)
+{
+	static const struct ZeroDummy : T {} zerodummy;
+	value=zerodummy;
+}
+
+//
+// Colliders
+//
+
+/* Tree collider	*/ 
+struct	btDbvtTreeCollider : btDbvt::ICollide
+{
+	btDbvtBroadphase*	pbp;
+	btDbvtProxy*		proxy;
+	btDbvtTreeCollider(btDbvtBroadphase* p) : pbp(p) {}
+	void	Process(const btDbvtNode* na,const btDbvtNode* nb)
+	{
+		if(na!=nb)
+		{
+			btDbvtProxy*	pa=(btDbvtProxy*)na->data;
+			btDbvtProxy*	pb=(btDbvtProxy*)nb->data;
+#if DBVT_BP_SORTPAIRS
+			if(pa->m_uniqueId>pb->m_uniqueId) 
+				btSwap(pa,pb);
+#endif
+			pbp->m_paircache->addOverlappingPair(pa,pb);
+			++pbp->m_newpairs;
+		}
+	}
+	void	Process(const btDbvtNode* n)
+	{
+		Process(n,proxy->leaf);
+	}
+};
+
+//
+// btDbvtBroadphase
+//
+
+//
+btDbvtBroadphase::btDbvtBroadphase(btOverlappingPairCache* paircache)
+{
+	m_deferedcollide	=	false;
+	m_needcleanup		=	true;
+	m_releasepaircache	=	(paircache!=0)?false:true;
+	m_prediction		=	0;
+	m_stageCurrent		=	0;
+	m_fixedleft			=	0;
+	m_fupdates			=	1;
+	m_dupdates			=	0;
+	m_cupdates			=	10;
+	m_newpairs			=	1;
+	m_updates_call		=	0;
+	m_updates_done		=	0;
+	m_updates_ratio		=	0;
+	m_paircache			=	paircache? paircache	: new(btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache();
+	m_gid				=	0;
+	m_pid				=	0;
+	m_cid				=	0;
+	for(int i=0;i<=STAGECOUNT;++i)
+	{
+		m_stageRoots[i]=0;
+	}
+#if DBVT_BP_PROFILE
+	clear(m_profiling);
+#endif
+}
+
+//
+btDbvtBroadphase::~btDbvtBroadphase()
+{
+	if(m_releasepaircache) 
+	{
+		m_paircache->~btOverlappingPairCache();
+		btAlignedFree(m_paircache);
+	}
+}
+
+//
+btBroadphaseProxy*				btDbvtBroadphase::createProxy(	const btVector3& aabbMin,
+															  const btVector3& aabbMax,
+															  int /*shapeType*/,
+															  void* userPtr,
+															  short int collisionFilterGroup,
+															  short int collisionFilterMask,
+															  btDispatcher* /*dispatcher*/,
+															  void* /*multiSapProxy*/)
+{
+	btDbvtProxy*		proxy=new(btAlignedAlloc(sizeof(btDbvtProxy),16)) btDbvtProxy(	aabbMin,aabbMax,userPtr,
+		collisionFilterGroup,
+		collisionFilterMask);
+
+	btDbvtAabbMm aabb = btDbvtVolume::FromMM(aabbMin,aabbMax);
+
+	//bproxy->aabb			=	btDbvtVolume::FromMM(aabbMin,aabbMax);
+	proxy->stage		=	m_stageCurrent;
+	proxy->m_uniqueId	=	++m_gid;
+	proxy->leaf			=	m_sets[0].insert(aabb,proxy);
+	listappend(proxy,m_stageRoots[m_stageCurrent]);
+	if(!m_deferedcollide)
+	{
+		btDbvtTreeCollider	collider(this);
+		collider.proxy=proxy;
+		m_sets[0].collideTV(m_sets[0].m_root,aabb,collider);
+		m_sets[1].collideTV(m_sets[1].m_root,aabb,collider);
+	}
+	return(proxy);
+}
+
+//
+void							btDbvtBroadphase::destroyProxy(	btBroadphaseProxy* absproxy,
+															   btDispatcher* dispatcher)
+{
+	btDbvtProxy*	proxy=(btDbvtProxy*)absproxy;
+	if(proxy->stage==STAGECOUNT)
+		m_sets[1].remove(proxy->leaf);
+	else
+		m_sets[0].remove(proxy->leaf);
+	listremove(proxy,m_stageRoots[proxy->stage]);
+	m_paircache->removeOverlappingPairsContainingProxy(proxy,dispatcher);
+	btAlignedFree(proxy);
+	m_needcleanup=true;
+}
+
+void	btDbvtBroadphase::getAabb(btBroadphaseProxy* absproxy,btVector3& aabbMin, btVector3& aabbMax ) const
+{
+	btDbvtProxy*						proxy=(btDbvtProxy*)absproxy;
+	aabbMin = proxy->m_aabbMin;
+	aabbMax = proxy->m_aabbMax;
+}
+
+struct	BroadphaseRayTester : btDbvt::ICollide
+{
+	btBroadphaseRayCallback& m_rayCallback;
+	BroadphaseRayTester(btBroadphaseRayCallback& orgCallback)
+		:m_rayCallback(orgCallback)
+	{
+	}
+	void					Process(const btDbvtNode* leaf)
+	{
+		btDbvtProxy*	proxy=(btDbvtProxy*)leaf->data;
+		m_rayCallback.process(proxy);
+	}
+};	
+
+void	btDbvtBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback,const btVector3& aabbMin,const btVector3& aabbMax)
+{
+	BroadphaseRayTester callback(rayCallback);
+
+	m_sets[0].rayTestInternal(	m_sets[0].m_root,
+		rayFrom,
+		rayTo,
+		rayCallback.m_rayDirectionInverse,
+		rayCallback.m_signs,
+		rayCallback.m_lambda_max,
+		aabbMin,
+		aabbMax,
+		callback);
+
+	m_sets[1].rayTestInternal(	m_sets[1].m_root,
+		rayFrom,
+		rayTo,
+		rayCallback.m_rayDirectionInverse,
+		rayCallback.m_signs,
+		rayCallback.m_lambda_max,
+		aabbMin,
+		aabbMax,
+		callback);
+
+}
+
+
+struct	BroadphaseAabbTester : btDbvt::ICollide
+{
+	btBroadphaseAabbCallback& m_aabbCallback;
+	BroadphaseAabbTester(btBroadphaseAabbCallback& orgCallback)
+		:m_aabbCallback(orgCallback)
+	{
+	}
+	void					Process(const btDbvtNode* leaf)
+	{
+		btDbvtProxy*	proxy=(btDbvtProxy*)leaf->data;
+		m_aabbCallback.process(proxy);
+	}
+};	
+
+void	btDbvtBroadphase::aabbTest(const btVector3& aabbMin,const btVector3& aabbMax,btBroadphaseAabbCallback& aabbCallback)
+{
+	BroadphaseAabbTester callback(aabbCallback);
+
+	const ATTRIBUTE_ALIGNED16(btDbvtVolume)	bounds=btDbvtVolume::FromMM(aabbMin,aabbMax);
+		//process all children, that overlap with  the given AABB bounds
+	m_sets[0].collideTV(m_sets[0].m_root,bounds,callback);
+	m_sets[1].collideTV(m_sets[1].m_root,bounds,callback);
+
+}
+
+
+
+//
+void							btDbvtBroadphase::setAabb(		btBroadphaseProxy* absproxy,
+														  const btVector3& aabbMin,
+														  const btVector3& aabbMax,
+														  btDispatcher* /*dispatcher*/)
+{
+	btDbvtProxy*						proxy=(btDbvtProxy*)absproxy;
+	ATTRIBUTE_ALIGNED16(btDbvtVolume)	aabb=btDbvtVolume::FromMM(aabbMin,aabbMax);
+#if DBVT_BP_PREVENTFALSEUPDATE
+	if(NotEqual(aabb,proxy->leaf->volume))
+#endif
+	{
+		bool	docollide=false;
+		if(proxy->stage==STAGECOUNT)
+		{/* fixed -> dynamic set	*/ 
+			m_sets[1].remove(proxy->leaf);
+			proxy->leaf=m_sets[0].insert(aabb,proxy);
+			docollide=true;
+		}
+		else
+		{/* dynamic set				*/ 
+			++m_updates_call;
+			if(Intersect(proxy->leaf->volume,aabb))
+			{/* Moving				*/ 
+
+				const btVector3	delta=aabbMin-proxy->m_aabbMin;
+				btVector3		velocity(((proxy->m_aabbMax-proxy->m_aabbMin)/2)*m_prediction);
+				if(delta[0]<0) velocity[0]=-velocity[0];
+				if(delta[1]<0) velocity[1]=-velocity[1];
+				if(delta[2]<0) velocity[2]=-velocity[2];
+				if	(
+#ifdef DBVT_BP_MARGIN				
+					m_sets[0].update(proxy->leaf,aabb,velocity,DBVT_BP_MARGIN)
+#else
+					m_sets[0].update(proxy->leaf,aabb,velocity)
+#endif
+					)
+				{
+					++m_updates_done;
+					docollide=true;
+				}
+			}
+			else
+			{/* Teleporting			*/ 
+				m_sets[0].update(proxy->leaf,aabb);
+				++m_updates_done;
+				docollide=true;
+			}	
+		}
+		listremove(proxy,m_stageRoots[proxy->stage]);
+		proxy->m_aabbMin = aabbMin;
+		proxy->m_aabbMax = aabbMax;
+		proxy->stage	=	m_stageCurrent;
+		listappend(proxy,m_stageRoots[m_stageCurrent]);
+		if(docollide)
+		{
+			m_needcleanup=true;
+			if(!m_deferedcollide)
+			{
+				btDbvtTreeCollider	collider(this);
+				m_sets[1].collideTTpersistentStack(m_sets[1].m_root,proxy->leaf,collider);
+				m_sets[0].collideTTpersistentStack(m_sets[0].m_root,proxy->leaf,collider);
+			}
+		}	
+	}
+}
+
+
+//
+void							btDbvtBroadphase::setAabbForceUpdate(		btBroadphaseProxy* absproxy,
+														  const btVector3& aabbMin,
+														  const btVector3& aabbMax,
+														  btDispatcher* /*dispatcher*/)
+{
+	btDbvtProxy*						proxy=(btDbvtProxy*)absproxy;
+	ATTRIBUTE_ALIGNED16(btDbvtVolume)	aabb=btDbvtVolume::FromMM(aabbMin,aabbMax);
+	bool	docollide=false;
+	if(proxy->stage==STAGECOUNT)
+	{/* fixed -> dynamic set	*/ 
+		m_sets[1].remove(proxy->leaf);
+		proxy->leaf=m_sets[0].insert(aabb,proxy);
+		docollide=true;
+	}
+	else
+	{/* dynamic set				*/ 
+		++m_updates_call;
+		/* Teleporting			*/ 
+		m_sets[0].update(proxy->leaf,aabb);
+		++m_updates_done;
+		docollide=true;
+	}
+	listremove(proxy,m_stageRoots[proxy->stage]);
+	proxy->m_aabbMin = aabbMin;
+	proxy->m_aabbMax = aabbMax;
+	proxy->stage	=	m_stageCurrent;
+	listappend(proxy,m_stageRoots[m_stageCurrent]);
+	if(docollide)
+	{
+		m_needcleanup=true;
+		if(!m_deferedcollide)
+		{
+			btDbvtTreeCollider	collider(this);
+			m_sets[1].collideTTpersistentStack(m_sets[1].m_root,proxy->leaf,collider);
+			m_sets[0].collideTTpersistentStack(m_sets[0].m_root,proxy->leaf,collider);
+		}
+	}	
+}
+
+//
+void							btDbvtBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
+{
+	collide(dispatcher);
+#if DBVT_BP_PROFILE
+	if(0==(m_pid%DBVT_BP_PROFILING_RATE))
+	{	
+		printf("fixed(%u) dynamics(%u) pairs(%u)\r\n",m_sets[1].m_leaves,m_sets[0].m_leaves,m_paircache->getNumOverlappingPairs());
+		unsigned int	total=m_profiling.m_total;
+		if(total<=0) total=1;
+		printf("ddcollide: %u%% (%uus)\r\n",(50+m_profiling.m_ddcollide*100)/total,m_profiling.m_ddcollide/DBVT_BP_PROFILING_RATE);
+		printf("fdcollide: %u%% (%uus)\r\n",(50+m_profiling.m_fdcollide*100)/total,m_profiling.m_fdcollide/DBVT_BP_PROFILING_RATE);
+		printf("cleanup:   %u%% (%uus)\r\n",(50+m_profiling.m_cleanup*100)/total,m_profiling.m_cleanup/DBVT_BP_PROFILING_RATE);
+		printf("total:     %uus\r\n",total/DBVT_BP_PROFILING_RATE);
+		const unsigned long	sum=m_profiling.m_ddcollide+
+			m_profiling.m_fdcollide+
+			m_profiling.m_cleanup;
+		printf("leaked: %u%% (%uus)\r\n",100-((50+sum*100)/total),(total-sum)/DBVT_BP_PROFILING_RATE);
+		printf("job counts: %u%%\r\n",(m_profiling.m_jobcount*100)/((m_sets[0].m_leaves+m_sets[1].m_leaves)*DBVT_BP_PROFILING_RATE));
+		clear(m_profiling);
+		m_clock.reset();
+	}
+#endif
+
+	performDeferredRemoval(dispatcher);
+
+}
+
+void btDbvtBroadphase::performDeferredRemoval(btDispatcher* dispatcher)
+{
+
+	if (m_paircache->hasDeferredRemoval())
+	{
+
+		btBroadphasePairArray&	overlappingPairArray = m_paircache->getOverlappingPairArray();
+
+		//perform a sort, to find duplicates and to sort 'invalid' pairs to the end
+		overlappingPairArray.quickSort(btBroadphasePairSortPredicate());
+
+		int invalidPair = 0;
+
+		
+		int i;
+
+		btBroadphasePair previousPair;
+		previousPair.m_pProxy0 = 0;
+		previousPair.m_pProxy1 = 0;
+		previousPair.m_algorithm = 0;
+		
+		
+		for (i=0;i<overlappingPairArray.size();i++)
+		{
+		
+			btBroadphasePair& pair = overlappingPairArray[i];
+
+			bool isDuplicate = (pair == previousPair);
+
+			previousPair = pair;
+
+			bool needsRemoval = false;
+
+			if (!isDuplicate)
+			{
+				//important to perform AABB check that is consistent with the broadphase
+				btDbvtProxy*		pa=(btDbvtProxy*)pair.m_pProxy0;
+				btDbvtProxy*		pb=(btDbvtProxy*)pair.m_pProxy1;
+				bool hasOverlap = Intersect(pa->leaf->volume,pb->leaf->volume);
+
+				if (hasOverlap)
+				{
+					needsRemoval = false;
+				} else
+				{
+					needsRemoval = true;
+				}
+			} else
+			{
+				//remove duplicate
+				needsRemoval = true;
+				//should have no algorithm
+				btAssert(!pair.m_algorithm);
+			}
+			
+			if (needsRemoval)
+			{
+				m_paircache->cleanOverlappingPair(pair,dispatcher);
+
+				pair.m_pProxy0 = 0;
+				pair.m_pProxy1 = 0;
+				invalidPair++;
+			} 
+			
+		}
+
+		//perform a sort, to sort 'invalid' pairs to the end
+		overlappingPairArray.quickSort(btBroadphasePairSortPredicate());
+		overlappingPairArray.resize(overlappingPairArray.size() - invalidPair);
+	}
+}
+
+//
+void							btDbvtBroadphase::collide(btDispatcher* dispatcher)
+{
+	/*printf("---------------------------------------------------------\n");
+	printf("m_sets[0].m_leaves=%d\n",m_sets[0].m_leaves);
+	printf("m_sets[1].m_leaves=%d\n",m_sets[1].m_leaves);
+	printf("numPairs = %d\n",getOverlappingPairCache()->getNumOverlappingPairs());
+	{
+		int i;
+		for (i=0;i<getOverlappingPairCache()->getNumOverlappingPairs();i++)
+		{
+			printf("pair[%d]=(%d,%d),",i,getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy0->getUid(),
+				getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy1->getUid());
+		}
+		printf("\n");
+	}
+*/
+
+
+
+	SPC(m_profiling.m_total);
+	/* optimize				*/ 
+	m_sets[0].optimizeIncremental(1+(m_sets[0].m_leaves*m_dupdates)/100);
+	if(m_fixedleft)
+	{
+		const int count=1+(m_sets[1].m_leaves*m_fupdates)/100;
+		m_sets[1].optimizeIncremental(1+(m_sets[1].m_leaves*m_fupdates)/100);
+		m_fixedleft=btMax<int>(0,m_fixedleft-count);
+	}
+	/* dynamic -> fixed set	*/ 
+	m_stageCurrent=(m_stageCurrent+1)%STAGECOUNT;
+	btDbvtProxy*	current=m_stageRoots[m_stageCurrent];
+	if(current)
+	{
+		btDbvtTreeCollider	collider(this);
+		do	{
+			btDbvtProxy*	next=current->links[1];
+			listremove(current,m_stageRoots[current->stage]);
+			listappend(current,m_stageRoots[STAGECOUNT]);
+#if DBVT_BP_ACCURATESLEEPING
+			m_paircache->removeOverlappingPairsContainingProxy(current,dispatcher);
+			collider.proxy=current;
+			btDbvt::collideTV(m_sets[0].m_root,current->aabb,collider);
+			btDbvt::collideTV(m_sets[1].m_root,current->aabb,collider);
+#endif
+			m_sets[0].remove(current->leaf);
+			ATTRIBUTE_ALIGNED16(btDbvtVolume)	curAabb=btDbvtVolume::FromMM(current->m_aabbMin,current->m_aabbMax);
+			current->leaf	=	m_sets[1].insert(curAabb,current);
+			current->stage	=	STAGECOUNT;	
+			current			=	next;
+		} while(current);
+		m_fixedleft=m_sets[1].m_leaves;
+		m_needcleanup=true;
+	}
+	/* collide dynamics		*/ 
+	{
+		btDbvtTreeCollider	collider(this);
+		if(m_deferedcollide)
+		{
+			SPC(m_profiling.m_fdcollide);
+			m_sets[0].collideTTpersistentStack(m_sets[0].m_root,m_sets[1].m_root,collider);
+		}
+		if(m_deferedcollide)
+		{
+			SPC(m_profiling.m_ddcollide);
+			m_sets[0].collideTTpersistentStack(m_sets[0].m_root,m_sets[0].m_root,collider);
+		}
+	}
+	/* clean up				*/ 
+	if(m_needcleanup)
+	{
+		SPC(m_profiling.m_cleanup);
+		btBroadphasePairArray&	pairs=m_paircache->getOverlappingPairArray();
+		if(pairs.size()>0)
+		{
+
+			int			ni=btMin(pairs.size(),btMax<int>(m_newpairs,(pairs.size()*m_cupdates)/100));
+			for(int i=0;i<ni;++i)
+			{
+				btBroadphasePair&	p=pairs[(m_cid+i)%pairs.size()];
+				btDbvtProxy*		pa=(btDbvtProxy*)p.m_pProxy0;
+				btDbvtProxy*		pb=(btDbvtProxy*)p.m_pProxy1;
+				if(!Intersect(pa->leaf->volume,pb->leaf->volume))
+				{
+#if DBVT_BP_SORTPAIRS
+					if(pa->m_uniqueId>pb->m_uniqueId) 
+						btSwap(pa,pb);
+#endif
+					m_paircache->removeOverlappingPair(pa,pb,dispatcher);
+					--ni;--i;
+				}
+			}
+			if(pairs.size()>0) m_cid=(m_cid+ni)%pairs.size(); else m_cid=0;
+		}
+	}
+	++m_pid;
+	m_newpairs=1;
+	m_needcleanup=false;
+	if(m_updates_call>0)
+	{ m_updates_ratio=m_updates_done/(btScalar)m_updates_call; }
+	else
+	{ m_updates_ratio=0; }
+	m_updates_done/=2;
+	m_updates_call/=2;
+}
+
+//
+void							btDbvtBroadphase::optimize()
+{
+	m_sets[0].optimizeTopDown();
+	m_sets[1].optimizeTopDown();
+}
+
+//
+btOverlappingPairCache*			btDbvtBroadphase::getOverlappingPairCache()
+{
+	return(m_paircache);
+}
+
+//
+const btOverlappingPairCache*	btDbvtBroadphase::getOverlappingPairCache() const
+{
+	return(m_paircache);
+}
+
+//
+void							btDbvtBroadphase::getBroadphaseAabb(btVector3& aabbMin,btVector3& aabbMax) const
+{
+
+	ATTRIBUTE_ALIGNED16(btDbvtVolume)	bounds;
+
+	if(!m_sets[0].empty())
+		if(!m_sets[1].empty())	Merge(	m_sets[0].m_root->volume,
+			m_sets[1].m_root->volume,bounds);
+		else
+			bounds=m_sets[0].m_root->volume;
+	else if(!m_sets[1].empty())	bounds=m_sets[1].m_root->volume;
+	else
+		bounds=btDbvtVolume::FromCR(btVector3(0,0,0),0);
+	aabbMin=bounds.Mins();
+	aabbMax=bounds.Maxs();
+}
+
+void btDbvtBroadphase::resetPool(btDispatcher* dispatcher)
+{
+	
+	int totalObjects = m_sets[0].m_leaves + m_sets[1].m_leaves;
+	if (!totalObjects)
+	{
+		//reset internal dynamic tree data structures
+		m_sets[0].clear();
+		m_sets[1].clear();
+		
+		m_deferedcollide	=	false;
+		m_needcleanup		=	true;
+		m_stageCurrent		=	0;
+		m_fixedleft			=	0;
+		m_fupdates			=	1;
+		m_dupdates			=	0;
+		m_cupdates			=	10;
+		m_newpairs			=	1;
+		m_updates_call		=	0;
+		m_updates_done		=	0;
+		m_updates_ratio		=	0;
+		
+		m_gid				=	0;
+		m_pid				=	0;
+		m_cid				=	0;
+		for(int i=0;i<=STAGECOUNT;++i)
+		{
+			m_stageRoots[i]=0;
+		}
+	}
+}
+
+//
+void							btDbvtBroadphase::printStats()
+{}
+
+//
+#if DBVT_BP_ENABLE_BENCHMARK
+
+struct	btBroadphaseBenchmark
+{
+	struct	Experiment
+	{
+		const char*			name;
+		int					object_count;
+		int					update_count;
+		int					spawn_count;
+		int					iterations;
+		btScalar			speed;
+		btScalar			amplitude;
+	};
+	struct	Object
+	{
+		btVector3			center;
+		btVector3			extents;
+		btBroadphaseProxy*	proxy;
+		btScalar			time;
+		void				update(btScalar speed,btScalar amplitude,btBroadphaseInterface* pbi)
+		{
+			time		+=	speed;
+			center[0]	=	btCos(time*(btScalar)2.17)*amplitude+
+				btSin(time)*amplitude/2;
+			center[1]	=	btCos(time*(btScalar)1.38)*amplitude+
+				btSin(time)*amplitude;
+			center[2]	=	btSin(time*(btScalar)0.777)*amplitude;
+			pbi->setAabb(proxy,center-extents,center+extents,0);
+		}
+	};
+	static int		UnsignedRand(int range=RAND_MAX-1)	{ return(rand()%(range+1)); }
+	static btScalar	UnitRand()							{ return(UnsignedRand(16384)/(btScalar)16384); }
+	static void		OutputTime(const char* name,btClock& c,unsigned count=0)
+	{
+		const unsigned long	us=c.getTimeMicroseconds();
+		const unsigned long	ms=(us+500)/1000;
+		const btScalar		sec=us/(btScalar)(1000*1000);
+		if(count>0)
+			printf("%s : %u us (%u ms), %.2f/s\r\n",name,us,ms,count/sec);
+		else
+			printf("%s : %u us (%u ms)\r\n",name,us,ms);
+	}
+};
+
+void							btDbvtBroadphase::benchmark(btBroadphaseInterface* pbi)
+{
+	static const btBroadphaseBenchmark::Experiment		experiments[]=
+	{
+		{"1024o.10%",1024,10,0,8192,(btScalar)0.005,(btScalar)100},
+		/*{"4096o.10%",4096,10,0,8192,(btScalar)0.005,(btScalar)100},
+		{"8192o.10%",8192,10,0,8192,(btScalar)0.005,(btScalar)100},*/
+	};
+	static const int										nexperiments=sizeof(experiments)/sizeof(experiments[0]);
+	btAlignedObjectArray<btBroadphaseBenchmark::Object*>	objects;
+	btClock													wallclock;
+	/* Begin			*/ 
+	for(int iexp=0;iexp<nexperiments;++iexp)
+	{
+		const btBroadphaseBenchmark::Experiment&	experiment=experiments[iexp];
+		const int									object_count=experiment.object_count;
+		const int									update_count=(object_count*experiment.update_count)/100;
+		const int									spawn_count=(object_count*experiment.spawn_count)/100;
+		const btScalar								speed=experiment.speed;	
+		const btScalar								amplitude=experiment.amplitude;
+		printf("Experiment #%u '%s':\r\n",iexp,experiment.name);
+		printf("\tObjects: %u\r\n",object_count);
+		printf("\tUpdate: %u\r\n",update_count);
+		printf("\tSpawn: %u\r\n",spawn_count);
+		printf("\tSpeed: %f\r\n",speed);
+		printf("\tAmplitude: %f\r\n",amplitude);
+		srand(180673);
+		/* Create objects	*/ 
+		wallclock.reset();
+		objects.reserve(object_count);
+		for(int i=0;i<object_count;++i)
+		{
+			btBroadphaseBenchmark::Object*	po=new btBroadphaseBenchmark::Object();
+			po->center[0]=btBroadphaseBenchmark::UnitRand()*50;
+			po->center[1]=btBroadphaseBenchmark::UnitRand()*50;
+			po->center[2]=btBroadphaseBenchmark::UnitRand()*50;
+			po->extents[0]=btBroadphaseBenchmark::UnitRand()*2+2;
+			po->extents[1]=btBroadphaseBenchmark::UnitRand()*2+2;
+			po->extents[2]=btBroadphaseBenchmark::UnitRand()*2+2;
+			po->time=btBroadphaseBenchmark::UnitRand()*2000;
+			po->proxy=pbi->createProxy(po->center-po->extents,po->center+po->extents,0,po,1,1,0,0);
+			objects.push_back(po);
+		}
+		btBroadphaseBenchmark::OutputTime("\tInitialization",wallclock);
+		/* First update		*/ 
+		wallclock.reset();
+		for(int i=0;i<objects.size();++i)
+		{
+			objects[i]->update(speed,amplitude,pbi);
+		}
+		btBroadphaseBenchmark::OutputTime("\tFirst update",wallclock);
+		/* Updates			*/ 
+		wallclock.reset();
+		for(int i=0;i<experiment.iterations;++i)
+		{
+			for(int j=0;j<update_count;++j)
+			{				
+				objects[j]->update(speed,amplitude,pbi);
+			}
+			pbi->calculateOverlappingPairs(0);
+		}
+		btBroadphaseBenchmark::OutputTime("\tUpdate",wallclock,experiment.iterations);
+		/* Clean up			*/ 
+		wallclock.reset();
+		for(int i=0;i<objects.size();++i)
+		{
+			pbi->destroyProxy(objects[i]->proxy,0);
+			delete objects[i];
+		}
+		objects.resize(0);
+		btBroadphaseBenchmark::OutputTime("\tRelease",wallclock);
+	}
+
+}
+#else
+void							btDbvtBroadphase::benchmark(btBroadphaseInterface*)
+{}
+#endif
+
+#if DBVT_BP_PROFILE
+#undef	SPC
+#endif
+
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btDbvtBroadphase.h b/src/bullet/BulletCollision/BroadphaseCollision/btDbvtBroadphase.h
new file mode 100644
index 00000000..18b64ad0
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btDbvtBroadphase.h
@@ -0,0 +1,146 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///btDbvtBroadphase implementation by Nathanael Presson
+#ifndef BT_DBVT_BROADPHASE_H
+#define BT_DBVT_BROADPHASE_H
+
+#include "BulletCollision/BroadphaseCollision/btDbvt.h"
+#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
+
+//
+// Compile time config
+//
+
+#define	DBVT_BP_PROFILE					0
+//#define DBVT_BP_SORTPAIRS				1
+#define DBVT_BP_PREVENTFALSEUPDATE		0
+#define DBVT_BP_ACCURATESLEEPING		0
+#define DBVT_BP_ENABLE_BENCHMARK		0
+#define DBVT_BP_MARGIN					(btScalar)0.05
+
+#if DBVT_BP_PROFILE
+#define	DBVT_BP_PROFILING_RATE	256
+#include "LinearMath/btQuickprof.h"
+#endif
+
+//
+// btDbvtProxy
+//
+struct btDbvtProxy : btBroadphaseProxy
+{
+	/* Fields		*/ 
+	//btDbvtAabbMm	aabb;
+	btDbvtNode*		leaf;
+	btDbvtProxy*	links[2];
+	int				stage;
+	/* ctor			*/ 
+	btDbvtProxy(const btVector3& aabbMin,const btVector3& aabbMax,void* userPtr,short int collisionFilterGroup, short int collisionFilterMask) :
+	btBroadphaseProxy(aabbMin,aabbMax,userPtr,collisionFilterGroup,collisionFilterMask)
+	{
+		links[0]=links[1]=0;
+	}
+};
+
+typedef btAlignedObjectArray<btDbvtProxy*>	btDbvtProxyArray;
+
+///The btDbvtBroadphase implements a broadphase using two dynamic AABB bounding volume hierarchies/trees (see btDbvt).
+///One tree is used for static/non-moving objects, and another tree is used for dynamic objects. Objects can move from one tree to the other.
+///This is a very fast broadphase, especially for very dynamic worlds where many objects are moving. Its insert/add and remove of objects is generally faster than the sweep and prune broadphases btAxisSweep3 and bt32BitAxisSweep3.
+struct	btDbvtBroadphase : btBroadphaseInterface
+{
+	/* Config		*/ 
+	enum	{
+		DYNAMIC_SET			=	0,	/* Dynamic set index	*/ 
+		FIXED_SET			=	1,	/* Fixed set index		*/ 
+		STAGECOUNT			=	2	/* Number of stages		*/ 
+	};
+	/* Fields		*/ 
+	btDbvt					m_sets[2];					// Dbvt sets
+	btDbvtProxy*			m_stageRoots[STAGECOUNT+1];	// Stages list
+	btOverlappingPairCache*	m_paircache;				// Pair cache
+	btScalar				m_prediction;				// Velocity prediction
+	int						m_stageCurrent;				// Current stage
+	int						m_fupdates;					// % of fixed updates per frame
+	int						m_dupdates;					// % of dynamic updates per frame
+	int						m_cupdates;					// % of cleanup updates per frame
+	int						m_newpairs;					// Number of pairs created
+	int						m_fixedleft;				// Fixed optimization left
+	unsigned				m_updates_call;				// Number of updates call
+	unsigned				m_updates_done;				// Number of updates done
+	btScalar				m_updates_ratio;			// m_updates_done/m_updates_call
+	int						m_pid;						// Parse id
+	int						m_cid;						// Cleanup index
+	int						m_gid;						// Gen id
+	bool					m_releasepaircache;			// Release pair cache on delete
+	bool					m_deferedcollide;			// Defere dynamic/static collision to collide call
+	bool					m_needcleanup;				// Need to run cleanup?
+#if DBVT_BP_PROFILE
+	btClock					m_clock;
+	struct	{
+		unsigned long		m_total;
+		unsigned long		m_ddcollide;
+		unsigned long		m_fdcollide;
+		unsigned long		m_cleanup;
+		unsigned long		m_jobcount;
+	}				m_profiling;
+#endif
+	/* Methods		*/ 
+	btDbvtBroadphase(btOverlappingPairCache* paircache=0);
+	~btDbvtBroadphase();
+	void							collide(btDispatcher* dispatcher);
+	void							optimize();
+	
+	/* btBroadphaseInterface Implementation	*/
+	btBroadphaseProxy*				createProxy(const btVector3& aabbMin,const btVector3& aabbMax,int shapeType,void* userPtr,short int collisionFilterGroup,short int collisionFilterMask,btDispatcher* dispatcher,void* multiSapProxy);
+	virtual void					destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+	virtual void					setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax,btDispatcher* dispatcher);
+	virtual void					rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin=btVector3(0,0,0), const btVector3& aabbMax = btVector3(0,0,0));
+	virtual void					aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback);
+
+	virtual void					getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const;
+	virtual	void					calculateOverlappingPairs(btDispatcher* dispatcher);
+	virtual	btOverlappingPairCache*	getOverlappingPairCache();
+	virtual	const btOverlappingPairCache*	getOverlappingPairCache() const;
+	virtual	void					getBroadphaseAabb(btVector3& aabbMin,btVector3& aabbMax) const;
+	virtual	void					printStats();
+
+
+	///reset broadphase internal structures, to ensure determinism/reproducability
+	virtual void resetPool(btDispatcher* dispatcher);
+
+	void	performDeferredRemoval(btDispatcher* dispatcher);
+	
+	void	setVelocityPrediction(btScalar prediction)
+	{
+		m_prediction = prediction;
+	}
+	btScalar getVelocityPrediction() const
+	{
+		return m_prediction;
+	}
+
+	///this setAabbForceUpdate is similar to setAabb but always forces the aabb update. 
+	///it is not part of the btBroadphaseInterface but specific to btDbvtBroadphase.
+	///it bypasses certain optimizations that prevent aabb updates (when the aabb shrinks), see
+	///http://code.google.com/p/bullet/issues/detail?id=223
+	void							setAabbForceUpdate(		btBroadphaseProxy* absproxy,const btVector3& aabbMin,const btVector3& aabbMax,btDispatcher* /*dispatcher*/);
+
+	static void						benchmark(btBroadphaseInterface*);
+
+
+};
+
+#endif
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btDispatcher.cpp b/src/bullet/BulletCollision/BroadphaseCollision/btDispatcher.cpp
new file mode 100644
index 00000000..20768225
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btDispatcher.cpp
@@ -0,0 +1,22 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btDispatcher.h"
+
+btDispatcher::~btDispatcher()
+{
+
+}
+
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btDispatcher.h b/src/bullet/BulletCollision/BroadphaseCollision/btDispatcher.h
new file mode 100644
index 00000000..a79cf940
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btDispatcher.h
@@ -0,0 +1,110 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_DISPATCHER_H
+#define BT_DISPATCHER_H
+#include "LinearMath/btScalar.h"
+
+class btCollisionAlgorithm;
+struct btBroadphaseProxy;
+class btRigidBody;
+class	btCollisionObject;
+class btOverlappingPairCache;
+
+
+class btPersistentManifold;
+class btStackAlloc;
+class btPoolAllocator;
+
+struct btDispatcherInfo
+{
+	enum DispatchFunc
+	{
+		DISPATCH_DISCRETE = 1,
+		DISPATCH_CONTINUOUS
+	};
+	btDispatcherInfo()
+		:m_timeStep(btScalar(0.)),
+		m_stepCount(0),
+		m_dispatchFunc(DISPATCH_DISCRETE),
+		m_timeOfImpact(btScalar(1.)),
+		m_useContinuous(true),
+		m_debugDraw(0),
+		m_enableSatConvex(false),
+		m_enableSPU(true),
+		m_useEpa(true),
+		m_allowedCcdPenetration(btScalar(0.04)),
+		m_useConvexConservativeDistanceUtil(false),
+		m_convexConservativeDistanceThreshold(0.0f),
+		m_stackAllocator(0)
+	{
+
+	}
+	btScalar	m_timeStep;
+	int			m_stepCount;
+	int			m_dispatchFunc;
+	mutable btScalar	m_timeOfImpact;
+	bool		m_useContinuous;
+	class btIDebugDraw*	m_debugDraw;
+	bool		m_enableSatConvex;
+	bool		m_enableSPU;
+	bool		m_useEpa;
+	btScalar	m_allowedCcdPenetration;
+	bool		m_useConvexConservativeDistanceUtil;
+	btScalar	m_convexConservativeDistanceThreshold;
+	btStackAlloc*	m_stackAllocator;
+};
+
+///The btDispatcher interface class can be used in combination with broadphase to dispatch calculations for overlapping pairs.
+///For example for pairwise collision detection, calculating contact points stored in btPersistentManifold or user callbacks (game logic).
+class btDispatcher
+{
+
+
+public:
+	virtual ~btDispatcher() ;
+
+	virtual btCollisionAlgorithm* findAlgorithm(btCollisionObject* body0,btCollisionObject* body1,btPersistentManifold* sharedManifold=0) = 0;
+
+	virtual btPersistentManifold*	getNewManifold(void* body0,void* body1)=0;
+
+	virtual void releaseManifold(btPersistentManifold* manifold)=0;
+
+	virtual void clearManifold(btPersistentManifold* manifold)=0;
+
+	virtual bool	needsCollision(btCollisionObject* body0,btCollisionObject* body1) = 0;
+
+	virtual bool	needsResponse(btCollisionObject* body0,btCollisionObject* body1)=0;
+
+	virtual void	dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher)  =0;
+
+	virtual int getNumManifolds() const = 0;
+
+	virtual btPersistentManifold* getManifoldByIndexInternal(int index) = 0;
+
+	virtual	btPersistentManifold**	getInternalManifoldPointer() = 0;
+
+	virtual	btPoolAllocator*	getInternalManifoldPool() = 0;
+
+	virtual	const btPoolAllocator*	getInternalManifoldPool() const = 0;
+
+	virtual	void* allocateCollisionAlgorithm(int size)  = 0;
+
+	virtual	void freeCollisionAlgorithm(void* ptr) = 0;
+
+};
+
+
+#endif //BT_DISPATCHER_H
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.cpp b/src/bullet/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.cpp
new file mode 100644
index 00000000..81369fe9
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.cpp
@@ -0,0 +1,489 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btMultiSapBroadphase.h"
+
+#include "btSimpleBroadphase.h"
+#include "LinearMath/btAabbUtil2.h"
+#include "btQuantizedBvh.h"
+
+///	btSapBroadphaseArray	m_sapBroadphases;
+
+///	btOverlappingPairCache*	m_overlappingPairs;
+extern int gOverlappingPairs;
+
+/*
+class btMultiSapSortedOverlappingPairCache : public btSortedOverlappingPairCache
+{
+public:
+
+	virtual btBroadphasePair*	addOverlappingPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1)
+	{
+		return btSortedOverlappingPairCache::addOverlappingPair((btBroadphaseProxy*)proxy0->m_multiSapParentProxy,(btBroadphaseProxy*)proxy1->m_multiSapParentProxy);
+	}
+};
+
+*/
+
+btMultiSapBroadphase::btMultiSapBroadphase(int /*maxProxies*/,btOverlappingPairCache* pairCache)
+:m_overlappingPairs(pairCache),
+m_optimizedAabbTree(0),
+m_ownsPairCache(false),
+m_invalidPair(0)
+{
+	if (!m_overlappingPairs)
+	{
+		m_ownsPairCache = true;
+		void* mem = btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16);
+		m_overlappingPairs = new (mem)btSortedOverlappingPairCache();
+	}
+
+	struct btMultiSapOverlapFilterCallback : public btOverlapFilterCallback
+	{
+		virtual ~btMultiSapOverlapFilterCallback()
+		{}
+		// return true when pairs need collision
+		virtual bool	needBroadphaseCollision(btBroadphaseProxy* childProxy0,btBroadphaseProxy* childProxy1) const
+		{
+			btBroadphaseProxy* multiProxy0 = (btBroadphaseProxy*)childProxy0->m_multiSapParentProxy;
+			btBroadphaseProxy* multiProxy1 = (btBroadphaseProxy*)childProxy1->m_multiSapParentProxy;
+			
+			bool collides = (multiProxy0->m_collisionFilterGroup & multiProxy1->m_collisionFilterMask) != 0;
+			collides = collides && (multiProxy1->m_collisionFilterGroup & multiProxy0->m_collisionFilterMask);
+	
+			return collides;
+		}
+	};
+
+	void* mem = btAlignedAlloc(sizeof(btMultiSapOverlapFilterCallback),16);
+	m_filterCallback = new (mem)btMultiSapOverlapFilterCallback();
+
+	m_overlappingPairs->setOverlapFilterCallback(m_filterCallback);
+//	mem = btAlignedAlloc(sizeof(btSimpleBroadphase),16);
+//	m_simpleBroadphase = new (mem) btSimpleBroadphase(maxProxies,m_overlappingPairs);
+}
+
+btMultiSapBroadphase::~btMultiSapBroadphase()
+{
+	if (m_ownsPairCache)
+	{
+		m_overlappingPairs->~btOverlappingPairCache();
+		btAlignedFree(m_overlappingPairs);
+	}
+}
+
+
+void	btMultiSapBroadphase::buildTree(const btVector3& bvhAabbMin,const btVector3& bvhAabbMax)
+{
+	m_optimizedAabbTree = new btQuantizedBvh();
+	m_optimizedAabbTree->setQuantizationValues(bvhAabbMin,bvhAabbMax);
+	QuantizedNodeArray&	nodes = m_optimizedAabbTree->getLeafNodeArray();
+	for (int i=0;i<m_sapBroadphases.size();i++)
+	{
+		btQuantizedBvhNode node;
+		btVector3 aabbMin,aabbMax;
+		m_sapBroadphases[i]->getBroadphaseAabb(aabbMin,aabbMax);
+		m_optimizedAabbTree->quantize(&node.m_quantizedAabbMin[0],aabbMin,0);
+		m_optimizedAabbTree->quantize(&node.m_quantizedAabbMax[0],aabbMax,1);
+		int partId = 0;
+		node.m_escapeIndexOrTriangleIndex = (partId<<(31-MAX_NUM_PARTS_IN_BITS)) | i;
+		nodes.push_back(node);
+	}
+	m_optimizedAabbTree->buildInternal();
+}
+
+btBroadphaseProxy*	btMultiSapBroadphase::createProxy(  const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr, short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* /*ignoreMe*/)
+{
+	//void* ignoreMe -> we could think of recursive multi-sap, if someone is interested
+
+	void* mem = btAlignedAlloc(sizeof(btMultiSapProxy),16);
+	btMultiSapProxy* proxy = new (mem)btMultiSapProxy(aabbMin,  aabbMax,shapeType,userPtr, collisionFilterGroup,collisionFilterMask);
+	m_multiSapProxies.push_back(proxy);
+
+	///this should deal with inserting/removal into child broadphases
+	setAabb(proxy,aabbMin,aabbMax,dispatcher);
+	return proxy;
+}
+
+void	btMultiSapBroadphase::destroyProxy(btBroadphaseProxy* /*proxy*/,btDispatcher* /*dispatcher*/)
+{
+	///not yet
+	btAssert(0);
+
+}
+
+
+void	btMultiSapBroadphase::addToChildBroadphase(btMultiSapProxy* parentMultiSapProxy, btBroadphaseProxy* childProxy, btBroadphaseInterface*	childBroadphase)
+{
+	void* mem = btAlignedAlloc(sizeof(btBridgeProxy),16);
+	btBridgeProxy* bridgeProxyRef = new(mem) btBridgeProxy;
+	bridgeProxyRef->m_childProxy = childProxy;
+	bridgeProxyRef->m_childBroadphase = childBroadphase;
+	parentMultiSapProxy->m_bridgeProxies.push_back(bridgeProxyRef);
+}
+
+
+bool boxIsContainedWithinBox(const btVector3& amin,const btVector3& amax,const btVector3& bmin,const btVector3& bmax);
+bool boxIsContainedWithinBox(const btVector3& amin,const btVector3& amax,const btVector3& bmin,const btVector3& bmax)
+{
+return
+amin.getX() >= bmin.getX() && amax.getX() <= bmax.getX() &&
+amin.getY() >= bmin.getY() && amax.getY() <= bmax.getY() &&
+amin.getZ() >= bmin.getZ() && amax.getZ() <= bmax.getZ();
+}
+
+
+
+
+
+
+void	btMultiSapBroadphase::getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const
+{
+	btMultiSapProxy* multiProxy = static_cast<btMultiSapProxy*>(proxy);
+	aabbMin = multiProxy->m_aabbMin;
+	aabbMax = multiProxy->m_aabbMax;
+}
+
+void	btMultiSapBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin,const btVector3& aabbMax)
+{
+	for (int i=0;i<m_multiSapProxies.size();i++)
+	{
+		rayCallback.process(m_multiSapProxies[i]);
+	}
+}
+
+
+//#include <stdio.h>
+
+void	btMultiSapBroadphase::setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax, btDispatcher* dispatcher)
+{
+	btMultiSapProxy* multiProxy = static_cast<btMultiSapProxy*>(proxy);
+	multiProxy->m_aabbMin = aabbMin;
+	multiProxy->m_aabbMax = aabbMax;
+	
+	
+//	bool fullyContained = false;
+//	bool alreadyInSimple = false;
+	
+
+
+	
+	struct MyNodeOverlapCallback : public btNodeOverlapCallback
+	{
+		btMultiSapBroadphase*	m_multiSap;
+		btMultiSapProxy*		m_multiProxy;
+		btDispatcher*			m_dispatcher;
+
+		MyNodeOverlapCallback(btMultiSapBroadphase* multiSap,btMultiSapProxy* multiProxy,btDispatcher* dispatcher)
+			:m_multiSap(multiSap),
+			m_multiProxy(multiProxy),
+			m_dispatcher(dispatcher)
+		{
+
+		}
+
+		virtual void processNode(int /*nodeSubPart*/, int broadphaseIndex)
+		{
+			btBroadphaseInterface* childBroadphase = m_multiSap->getBroadphaseArray()[broadphaseIndex];
+
+			int containingBroadphaseIndex = -1;
+			//already found?
+			for (int i=0;i<m_multiProxy->m_bridgeProxies.size();i++)
+			{
+
+				if (m_multiProxy->m_bridgeProxies[i]->m_childBroadphase == childBroadphase)
+				{
+					containingBroadphaseIndex = i;
+					break;
+				}
+			}
+			if (containingBroadphaseIndex<0)
+			{
+				//add it
+				btBroadphaseProxy* childProxy = childBroadphase->createProxy(m_multiProxy->m_aabbMin,m_multiProxy->m_aabbMax,m_multiProxy->m_shapeType,m_multiProxy->m_clientObject,m_multiProxy->m_collisionFilterGroup,m_multiProxy->m_collisionFilterMask, m_dispatcher,m_multiProxy);
+				m_multiSap->addToChildBroadphase(m_multiProxy,childProxy,childBroadphase);
+
+			}
+		}
+	};
+
+	MyNodeOverlapCallback	myNodeCallback(this,multiProxy,dispatcher);
+
+
+
+	
+	if (m_optimizedAabbTree)
+		m_optimizedAabbTree->reportAabbOverlappingNodex(&myNodeCallback,aabbMin,aabbMax);
+
+	int i;
+
+	for ( i=0;i<multiProxy->m_bridgeProxies.size();i++)
+	{
+		btVector3 worldAabbMin,worldAabbMax;
+		multiProxy->m_bridgeProxies[i]->m_childBroadphase->getBroadphaseAabb(worldAabbMin,worldAabbMax);
+		bool overlapsBroadphase = TestAabbAgainstAabb2(worldAabbMin,worldAabbMax,multiProxy->m_aabbMin,multiProxy->m_aabbMax);
+		if (!overlapsBroadphase)
+		{
+			//remove it now
+			btBridgeProxy* bridgeProxy = multiProxy->m_bridgeProxies[i];
+
+			btBroadphaseProxy* childProxy = bridgeProxy->m_childProxy;
+			bridgeProxy->m_childBroadphase->destroyProxy(childProxy,dispatcher);
+			
+			multiProxy->m_bridgeProxies.swap( i,multiProxy->m_bridgeProxies.size()-1);
+			multiProxy->m_bridgeProxies.pop_back();
+
+		}
+	}
+
+
+	/*
+
+	if (1)
+	{
+
+		//find broadphase that contain this multiProxy
+		int numChildBroadphases = getBroadphaseArray().size();
+		for (int i=0;i<numChildBroadphases;i++)
+		{
+			btBroadphaseInterface* childBroadphase = getBroadphaseArray()[i];
+			btVector3 worldAabbMin,worldAabbMax;
+			childBroadphase->getBroadphaseAabb(worldAabbMin,worldAabbMax);
+			bool overlapsBroadphase = TestAabbAgainstAabb2(worldAabbMin,worldAabbMax,multiProxy->m_aabbMin,multiProxy->m_aabbMax);
+			
+		//	fullyContained = fullyContained || boxIsContainedWithinBox(worldAabbMin,worldAabbMax,multiProxy->m_aabbMin,multiProxy->m_aabbMax);
+			int containingBroadphaseIndex = -1;
+			
+			//if already contains this
+			
+			for (int i=0;i<multiProxy->m_bridgeProxies.size();i++)
+			{
+				if (multiProxy->m_bridgeProxies[i]->m_childBroadphase == childBroadphase)
+				{
+					containingBroadphaseIndex = i;
+				}
+				alreadyInSimple = alreadyInSimple || (multiProxy->m_bridgeProxies[i]->m_childBroadphase == m_simpleBroadphase);
+			}
+
+			if (overlapsBroadphase)
+			{
+				if (containingBroadphaseIndex<0)
+				{
+					btBroadphaseProxy* childProxy = childBroadphase->createProxy(aabbMin,aabbMax,multiProxy->m_shapeType,multiProxy->m_clientObject,multiProxy->m_collisionFilterGroup,multiProxy->m_collisionFilterMask, dispatcher);
+					childProxy->m_multiSapParentProxy = multiProxy;
+					addToChildBroadphase(multiProxy,childProxy,childBroadphase);
+				}
+			} else
+			{
+				if (containingBroadphaseIndex>=0)
+				{
+					//remove
+					btBridgeProxy* bridgeProxy = multiProxy->m_bridgeProxies[containingBroadphaseIndex];
+
+					btBroadphaseProxy* childProxy = bridgeProxy->m_childProxy;
+					bridgeProxy->m_childBroadphase->destroyProxy(childProxy,dispatcher);
+					
+					multiProxy->m_bridgeProxies.swap( containingBroadphaseIndex,multiProxy->m_bridgeProxies.size()-1);
+					multiProxy->m_bridgeProxies.pop_back();
+				}
+			}
+		}
+
+
+		///If we are in no other child broadphase, stick the proxy in the global 'simple' broadphase (brute force)
+		///hopefully we don't end up with many entries here (can assert/provide feedback on stats)
+		if (0)//!multiProxy->m_bridgeProxies.size())
+		{
+			///we don't pass the userPtr but our multisap proxy. We need to patch this, before processing an actual collision
+			///this is needed to be able to calculate the aabb overlap
+			btBroadphaseProxy* childProxy = m_simpleBroadphase->createProxy(aabbMin,aabbMax,multiProxy->m_shapeType,multiProxy->m_clientObject,multiProxy->m_collisionFilterGroup,multiProxy->m_collisionFilterMask, dispatcher);
+			childProxy->m_multiSapParentProxy = multiProxy;
+			addToChildBroadphase(multiProxy,childProxy,m_simpleBroadphase);
+		}
+	}
+
+	if (!multiProxy->m_bridgeProxies.size())
+	{
+		///we don't pass the userPtr but our multisap proxy. We need to patch this, before processing an actual collision
+		///this is needed to be able to calculate the aabb overlap
+		btBroadphaseProxy* childProxy = m_simpleBroadphase->createProxy(aabbMin,aabbMax,multiProxy->m_shapeType,multiProxy->m_clientObject,multiProxy->m_collisionFilterGroup,multiProxy->m_collisionFilterMask, dispatcher);
+		childProxy->m_multiSapParentProxy = multiProxy;
+		addToChildBroadphase(multiProxy,childProxy,m_simpleBroadphase);
+	}
+*/
+
+
+	//update
+	for ( i=0;i<multiProxy->m_bridgeProxies.size();i++)
+	{
+		btBridgeProxy* bridgeProxyRef = multiProxy->m_bridgeProxies[i];
+		bridgeProxyRef->m_childBroadphase->setAabb(bridgeProxyRef->m_childProxy,aabbMin,aabbMax,dispatcher);
+	}
+
+}
+bool stopUpdating=false;
+
+
+
+class btMultiSapBroadphasePairSortPredicate
+{
+	public:
+
+		bool operator() ( const btBroadphasePair& a1, const btBroadphasePair& b1 ) const
+		{
+				btMultiSapBroadphase::btMultiSapProxy* aProxy0 = a1.m_pProxy0 ? (btMultiSapBroadphase::btMultiSapProxy*)a1.m_pProxy0->m_multiSapParentProxy : 0;
+				btMultiSapBroadphase::btMultiSapProxy* aProxy1 = a1.m_pProxy1 ? (btMultiSapBroadphase::btMultiSapProxy*)a1.m_pProxy1->m_multiSapParentProxy : 0;
+				btMultiSapBroadphase::btMultiSapProxy* bProxy0 = b1.m_pProxy0 ? (btMultiSapBroadphase::btMultiSapProxy*)b1.m_pProxy0->m_multiSapParentProxy : 0;
+				btMultiSapBroadphase::btMultiSapProxy* bProxy1 = b1.m_pProxy1 ? (btMultiSapBroadphase::btMultiSapProxy*)b1.m_pProxy1->m_multiSapParentProxy : 0;
+
+				 return aProxy0 > bProxy0 || 
+					(aProxy0 == bProxy0 && aProxy1 > bProxy1) ||
+					(aProxy0 == bProxy0 && aProxy1 == bProxy1 && a1.m_algorithm > b1.m_algorithm); 
+		}
+};
+
+
+        ///calculateOverlappingPairs is optional: incremental algorithms (sweep and prune) might do it during the set aabb
+void    btMultiSapBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
+{
+
+//	m_simpleBroadphase->calculateOverlappingPairs(dispatcher);
+
+	if (!stopUpdating && getOverlappingPairCache()->hasDeferredRemoval())
+	{
+	
+		btBroadphasePairArray&	overlappingPairArray = getOverlappingPairCache()->getOverlappingPairArray();
+
+	//	quicksort(overlappingPairArray,0,overlappingPairArray.size());
+
+		overlappingPairArray.quickSort(btMultiSapBroadphasePairSortPredicate());
+
+		//perform a sort, to find duplicates and to sort 'invalid' pairs to the end
+	//	overlappingPairArray.heapSort(btMultiSapBroadphasePairSortPredicate());
+
+		overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair);
+		m_invalidPair = 0;
+
+		
+		int i;
+
+		btBroadphasePair previousPair;
+		previousPair.m_pProxy0 = 0;
+		previousPair.m_pProxy1 = 0;
+		previousPair.m_algorithm = 0;
+		
+		
+		for (i=0;i<overlappingPairArray.size();i++)
+		{
+		
+			btBroadphasePair& pair = overlappingPairArray[i];
+
+			btMultiSapProxy* aProxy0 = pair.m_pProxy0 ? (btMultiSapProxy*)pair.m_pProxy0->m_multiSapParentProxy : 0;
+			btMultiSapProxy* aProxy1 = pair.m_pProxy1 ? (btMultiSapProxy*)pair.m_pProxy1->m_multiSapParentProxy : 0;
+			btMultiSapProxy* bProxy0 = previousPair.m_pProxy0 ? (btMultiSapProxy*)previousPair.m_pProxy0->m_multiSapParentProxy : 0;
+			btMultiSapProxy* bProxy1 = previousPair.m_pProxy1 ? (btMultiSapProxy*)previousPair.m_pProxy1->m_multiSapParentProxy : 0;
+
+			bool isDuplicate = (aProxy0 == bProxy0) && (aProxy1 == bProxy1);
+			
+			previousPair = pair;
+
+			bool needsRemoval = false;
+
+			if (!isDuplicate)
+			{
+				bool hasOverlap = testAabbOverlap(pair.m_pProxy0,pair.m_pProxy1);
+
+				if (hasOverlap)
+				{
+					needsRemoval = false;//callback->processOverlap(pair);
+				} else
+				{
+					needsRemoval = true;
+				}
+			} else
+			{
+				//remove duplicate
+				needsRemoval = true;
+				//should have no algorithm
+				btAssert(!pair.m_algorithm);
+			}
+			
+			if (needsRemoval)
+			{
+				getOverlappingPairCache()->cleanOverlappingPair(pair,dispatcher);
+
+		//		m_overlappingPairArray.swap(i,m_overlappingPairArray.size()-1);
+		//		m_overlappingPairArray.pop_back();
+				pair.m_pProxy0 = 0;
+				pair.m_pProxy1 = 0;
+				m_invalidPair++;
+				gOverlappingPairs--;
+			} 
+			
+		}
+
+	///if you don't like to skip the invalid pairs in the array, execute following code:
+	#define CLEAN_INVALID_PAIRS 1
+	#ifdef CLEAN_INVALID_PAIRS
+
+		//perform a sort, to sort 'invalid' pairs to the end
+		//overlappingPairArray.heapSort(btMultiSapBroadphasePairSortPredicate());
+		overlappingPairArray.quickSort(btMultiSapBroadphasePairSortPredicate());
+
+		overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair);
+		m_invalidPair = 0;
+	#endif//CLEAN_INVALID_PAIRS
+		
+		//printf("overlappingPairArray.size()=%d\n",overlappingPairArray.size());
+	}
+
+
+}
+
+
+bool	btMultiSapBroadphase::testAabbOverlap(btBroadphaseProxy* childProxy0,btBroadphaseProxy* childProxy1)
+{
+	btMultiSapProxy* multiSapProxy0 = (btMultiSapProxy*)childProxy0->m_multiSapParentProxy;
+		btMultiSapProxy* multiSapProxy1 = (btMultiSapProxy*)childProxy1->m_multiSapParentProxy;
+
+		return	TestAabbAgainstAabb2(multiSapProxy0->m_aabbMin,multiSapProxy0->m_aabbMax,
+			multiSapProxy1->m_aabbMin,multiSapProxy1->m_aabbMax);
+		
+}
+
+
+void	btMultiSapBroadphase::printStats()
+{
+/*	printf("---------------------------------\n");
+	
+		printf("btMultiSapBroadphase.h\n");
+		printf("numHandles = %d\n",m_multiSapProxies.size());
+			//find broadphase that contain this multiProxy
+		int numChildBroadphases = getBroadphaseArray().size();
+		for (int i=0;i<numChildBroadphases;i++)
+		{
+
+			btBroadphaseInterface* childBroadphase = getBroadphaseArray()[i];
+			childBroadphase->printStats();
+
+		}
+		*/
+
+}
+
+void btMultiSapBroadphase::resetPool(btDispatcher* dispatcher)
+{
+	// not yet
+}
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.h b/src/bullet/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.h
new file mode 100644
index 00000000..7bcfe6b1
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.h
@@ -0,0 +1,151 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef BT_MULTI_SAP_BROADPHASE
+#define BT_MULTI_SAP_BROADPHASE
+
+#include "btBroadphaseInterface.h"
+#include "LinearMath/btAlignedObjectArray.h"
+#include "btOverlappingPairCache.h"
+
+
+class btBroadphaseInterface;
+class btSimpleBroadphase;
+
+
+typedef btAlignedObjectArray<btBroadphaseInterface*> btSapBroadphaseArray;
+
+///The btMultiSapBroadphase is a research project, not recommended to use in production. Use btAxisSweep3 or btDbvtBroadphase instead.
+///The btMultiSapBroadphase is a broadphase that contains multiple SAP broadphases.
+///The user can add SAP broadphases that cover the world. A btBroadphaseProxy can be in multiple child broadphases at the same time.
+///A btQuantizedBvh acceleration structures finds overlapping SAPs for each btBroadphaseProxy.
+///See http://www.continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=328
+///and http://www.continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1329
+class btMultiSapBroadphase :public btBroadphaseInterface
+{
+	btSapBroadphaseArray	m_sapBroadphases;
+	
+	btSimpleBroadphase*		m_simpleBroadphase;
+
+	btOverlappingPairCache*	m_overlappingPairs;
+
+	class btQuantizedBvh*			m_optimizedAabbTree;
+
+
+	bool					m_ownsPairCache;
+	
+	btOverlapFilterCallback*	m_filterCallback;
+
+	int			m_invalidPair;
+
+	struct	btBridgeProxy
+	{
+		btBroadphaseProxy*		m_childProxy;
+		btBroadphaseInterface*	m_childBroadphase;
+	};
+
+
+public:
+
+	struct	btMultiSapProxy	: public btBroadphaseProxy
+	{
+
+		///array with all the entries that this proxy belongs to
+		btAlignedObjectArray<btBridgeProxy*> m_bridgeProxies;
+		btVector3	m_aabbMin;
+		btVector3	m_aabbMax;
+
+		int	m_shapeType;
+
+/*		void*	m_userPtr;
+		short int	m_collisionFilterGroup;
+		short int	m_collisionFilterMask;
+*/
+		btMultiSapProxy(const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr, short int collisionFilterGroup,short int collisionFilterMask)
+			:btBroadphaseProxy(aabbMin,aabbMax,userPtr,collisionFilterGroup,collisionFilterMask),
+			m_aabbMin(aabbMin),
+			m_aabbMax(aabbMax),
+			m_shapeType(shapeType)
+		{
+			m_multiSapParentProxy =this;
+		}
+
+		
+	};
+
+protected:
+
+
+	btAlignedObjectArray<btMultiSapProxy*> m_multiSapProxies;
+
+public:
+
+	btMultiSapBroadphase(int maxProxies = 16384,btOverlappingPairCache* pairCache=0);
+
+
+	btSapBroadphaseArray&	getBroadphaseArray()
+	{
+		return m_sapBroadphases;
+	}
+
+	const btSapBroadphaseArray&	getBroadphaseArray() const
+	{
+		return m_sapBroadphases;
+	}
+
+	virtual ~btMultiSapBroadphase();
+
+	virtual btBroadphaseProxy*	createProxy(  const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr, short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
+	virtual void	destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+	virtual void	setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax, btDispatcher* dispatcher);
+	virtual void	getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const;
+
+	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback,const btVector3& aabbMin=btVector3(0,0,0),const btVector3& aabbMax=btVector3(0,0,0));
+
+	void	addToChildBroadphase(btMultiSapProxy* parentMultiSapProxy, btBroadphaseProxy* childProxy, btBroadphaseInterface*	childBroadphase);
+
+	///calculateOverlappingPairs is optional: incremental algorithms (sweep and prune) might do it during the set aabb
+	virtual void	calculateOverlappingPairs(btDispatcher* dispatcher);
+
+	bool	testAabbOverlap(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1);
+
+	virtual	btOverlappingPairCache*	getOverlappingPairCache()
+	{
+		return m_overlappingPairs;
+	}
+	virtual	const btOverlappingPairCache*	getOverlappingPairCache() const
+	{
+		return m_overlappingPairs;
+	}
+
+	///getAabb returns the axis aligned bounding box in the 'global' coordinate frame
+	///will add some transform later
+	virtual void getBroadphaseAabb(btVector3& aabbMin,btVector3& aabbMax) const
+	{
+		aabbMin.setValue(-BT_LARGE_FLOAT,-BT_LARGE_FLOAT,-BT_LARGE_FLOAT);
+		aabbMax.setValue(BT_LARGE_FLOAT,BT_LARGE_FLOAT,BT_LARGE_FLOAT);
+	}
+
+	void	buildTree(const btVector3& bvhAabbMin,const btVector3& bvhAabbMax);
+
+	virtual void	printStats();
+
+	void quicksort (btBroadphasePairArray& a, int lo, int hi);
+
+	///reset broadphase internal structures, to ensure determinism/reproducability
+	virtual void resetPool(btDispatcher* dispatcher);
+
+};
+
+#endif //BT_MULTI_SAP_BROADPHASE
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp b/src/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp
new file mode 100644
index 00000000..041bbe05
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp
@@ -0,0 +1,633 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#include "btOverlappingPairCache.h"
+
+#include "btDispatcher.h"
+#include "btCollisionAlgorithm.h"
+#include "LinearMath/btAabbUtil2.h"
+
+#include <stdio.h>
+
+int	gOverlappingPairs = 0;
+
+int gRemovePairs =0;
+int gAddedPairs =0;
+int gFindPairs =0;
+
+
+
+
+btHashedOverlappingPairCache::btHashedOverlappingPairCache():
+	m_overlapFilterCallback(0),
+	m_blockedForChanges(false),
+	m_ghostPairCallback(0)
+{
+	int initialAllocatedSize= 2;
+	m_overlappingPairArray.reserve(initialAllocatedSize);
+	growTables();
+}
+
+
+
+
+btHashedOverlappingPairCache::~btHashedOverlappingPairCache()
+{
+}
+
+
+
+void	btHashedOverlappingPairCache::cleanOverlappingPair(btBroadphasePair& pair,btDispatcher* dispatcher)
+{
+	if (pair.m_algorithm)
+	{
+		{
+			pair.m_algorithm->~btCollisionAlgorithm();
+			dispatcher->freeCollisionAlgorithm(pair.m_algorithm);
+			pair.m_algorithm=0;
+		}
+	}
+}
+
+
+
+
+void	btHashedOverlappingPairCache::cleanProxyFromPairs(btBroadphaseProxy* proxy,btDispatcher* dispatcher)
+{
+
+	class	CleanPairCallback : public btOverlapCallback
+	{
+		btBroadphaseProxy* m_cleanProxy;
+		btOverlappingPairCache*	m_pairCache;
+		btDispatcher* m_dispatcher;
+
+	public:
+		CleanPairCallback(btBroadphaseProxy* cleanProxy,btOverlappingPairCache* pairCache,btDispatcher* dispatcher)
+			:m_cleanProxy(cleanProxy),
+			m_pairCache(pairCache),
+			m_dispatcher(dispatcher)
+		{
+		}
+		virtual	bool	processOverlap(btBroadphasePair& pair)
+		{
+			if ((pair.m_pProxy0 == m_cleanProxy) ||
+				(pair.m_pProxy1 == m_cleanProxy))
+			{
+				m_pairCache->cleanOverlappingPair(pair,m_dispatcher);
+			}
+			return false;
+		}
+		
+	};
+
+	CleanPairCallback cleanPairs(proxy,this,dispatcher);
+
+	processAllOverlappingPairs(&cleanPairs,dispatcher);
+
+}
+
+
+
+
+void	btHashedOverlappingPairCache::removeOverlappingPairsContainingProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher)
+{
+
+	class	RemovePairCallback : public btOverlapCallback
+	{
+		btBroadphaseProxy* m_obsoleteProxy;
+
+	public:
+		RemovePairCallback(btBroadphaseProxy* obsoleteProxy)
+			:m_obsoleteProxy(obsoleteProxy)
+		{
+		}
+		virtual	bool	processOverlap(btBroadphasePair& pair)
+		{
+			return ((pair.m_pProxy0 == m_obsoleteProxy) ||
+				(pair.m_pProxy1 == m_obsoleteProxy));
+		}
+		
+	};
+
+
+	RemovePairCallback removeCallback(proxy);
+
+	processAllOverlappingPairs(&removeCallback,dispatcher);
+}
+
+
+
+
+
+btBroadphasePair* btHashedOverlappingPairCache::findPair(btBroadphaseProxy* proxy0, btBroadphaseProxy* proxy1)
+{
+	gFindPairs++;
+	if(proxy0->m_uniqueId>proxy1->m_uniqueId) 
+		btSwap(proxy0,proxy1);
+	int proxyId1 = proxy0->getUid();
+	int proxyId2 = proxy1->getUid();
+
+	/*if (proxyId1 > proxyId2) 
+		btSwap(proxyId1, proxyId2);*/
+
+	int hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1), static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1));
+
+	if (hash >= m_hashTable.size())
+	{
+		return NULL;
+	}
+
+	int index = m_hashTable[hash];
+	while (index != BT_NULL_PAIR && equalsPair(m_overlappingPairArray[index], proxyId1, proxyId2) == false)
+	{
+		index = m_next[index];
+	}
+
+	if (index == BT_NULL_PAIR)
+	{
+		return NULL;
+	}
+
+	btAssert(index < m_overlappingPairArray.size());
+
+	return &m_overlappingPairArray[index];
+}
+
+//#include <stdio.h>
+
+void	btHashedOverlappingPairCache::growTables()
+{
+
+	int newCapacity = m_overlappingPairArray.capacity();
+
+	if (m_hashTable.size() < newCapacity)
+	{
+		//grow hashtable and next table
+		int curHashtableSize = m_hashTable.size();
+
+		m_hashTable.resize(newCapacity);
+		m_next.resize(newCapacity);
+
+
+		int i;
+
+		for (i= 0; i < newCapacity; ++i)
+		{
+			m_hashTable[i] = BT_NULL_PAIR;
+		}
+		for (i = 0; i < newCapacity; ++i)
+		{
+			m_next[i] = BT_NULL_PAIR;
+		}
+
+		for(i=0;i<curHashtableSize;i++)
+		{
+	
+			const btBroadphasePair& pair = m_overlappingPairArray[i];
+			int proxyId1 = pair.m_pProxy0->getUid();
+			int proxyId2 = pair.m_pProxy1->getUid();
+			/*if (proxyId1 > proxyId2) 
+				btSwap(proxyId1, proxyId2);*/
+			int	hashValue = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1),static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1));	// New hash value with new mask
+			m_next[i] = m_hashTable[hashValue];
+			m_hashTable[hashValue] = i;
+		}
+
+
+	}
+}
+
+btBroadphasePair* btHashedOverlappingPairCache::internalAddPair(btBroadphaseProxy* proxy0, btBroadphaseProxy* proxy1)
+{
+	if(proxy0->m_uniqueId>proxy1->m_uniqueId) 
+		btSwap(proxy0,proxy1);
+	int proxyId1 = proxy0->getUid();
+	int proxyId2 = proxy1->getUid();
+
+	/*if (proxyId1 > proxyId2) 
+		btSwap(proxyId1, proxyId2);*/
+
+	int	hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1),static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1));	// New hash value with new mask
+
+
+	btBroadphasePair* pair = internalFindPair(proxy0, proxy1, hash);
+	if (pair != NULL)
+	{
+		return pair;
+	}
+	/*for(int i=0;i<m_overlappingPairArray.size();++i)
+		{
+		if(	(m_overlappingPairArray[i].m_pProxy0==proxy0)&&
+			(m_overlappingPairArray[i].m_pProxy1==proxy1))
+			{
+			printf("Adding duplicated %u<>%u\r\n",proxyId1,proxyId2);
+			internalFindPair(proxy0, proxy1, hash);
+			}
+		}*/
+	int count = m_overlappingPairArray.size();
+	int oldCapacity = m_overlappingPairArray.capacity();
+	void* mem = &m_overlappingPairArray.expandNonInitializing();
+
+	//this is where we add an actual pair, so also call the 'ghost'
+	if (m_ghostPairCallback)
+		m_ghostPairCallback->addOverlappingPair(proxy0,proxy1);
+
+	int newCapacity = m_overlappingPairArray.capacity();
+
+	if (oldCapacity < newCapacity)
+	{
+		growTables();
+		//hash with new capacity
+		hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1),static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1));
+	}
+	
+	pair = new (mem) btBroadphasePair(*proxy0,*proxy1);
+//	pair->m_pProxy0 = proxy0;
+//	pair->m_pProxy1 = proxy1;
+	pair->m_algorithm = 0;
+	pair->m_internalTmpValue = 0;
+	
+
+	m_next[count] = m_hashTable[hash];
+	m_hashTable[hash] = count;
+
+	return pair;
+}
+
+
+
+void* btHashedOverlappingPairCache::removeOverlappingPair(btBroadphaseProxy* proxy0, btBroadphaseProxy* proxy1,btDispatcher* dispatcher)
+{
+	gRemovePairs++;
+	if(proxy0->m_uniqueId>proxy1->m_uniqueId) 
+		btSwap(proxy0,proxy1);
+	int proxyId1 = proxy0->getUid();
+	int proxyId2 = proxy1->getUid();
+
+	/*if (proxyId1 > proxyId2) 
+		btSwap(proxyId1, proxyId2);*/
+
+	int	hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1),static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1));
+
+	btBroadphasePair* pair = internalFindPair(proxy0, proxy1, hash);
+	if (pair == NULL)
+	{
+		return 0;
+	}
+
+	cleanOverlappingPair(*pair,dispatcher);
+
+	void* userData = pair->m_internalInfo1;
+
+	btAssert(pair->m_pProxy0->getUid() == proxyId1);
+	btAssert(pair->m_pProxy1->getUid() == proxyId2);
+
+	int pairIndex = int(pair - &m_overlappingPairArray[0]);
+	btAssert(pairIndex < m_overlappingPairArray.size());
+
+	// Remove the pair from the hash table.
+	int index = m_hashTable[hash];
+	btAssert(index != BT_NULL_PAIR);
+
+	int previous = BT_NULL_PAIR;
+	while (index != pairIndex)
+	{
+		previous = index;
+		index = m_next[index];
+	}
+
+	if (previous != BT_NULL_PAIR)
+	{
+		btAssert(m_next[previous] == pairIndex);
+		m_next[previous] = m_next[pairIndex];
+	}
+	else
+	{
+		m_hashTable[hash] = m_next[pairIndex];
+	}
+
+	// We now move the last pair into spot of the
+	// pair being removed. We need to fix the hash
+	// table indices to support the move.
+
+	int lastPairIndex = m_overlappingPairArray.size() - 1;
+
+	if (m_ghostPairCallback)
+		m_ghostPairCallback->removeOverlappingPair(proxy0, proxy1,dispatcher);
+
+	// If the removed pair is the last pair, we are done.
+	if (lastPairIndex == pairIndex)
+	{
+		m_overlappingPairArray.pop_back();
+		return userData;
+	}
+
+	// Remove the last pair from the hash table.
+	const btBroadphasePair* last = &m_overlappingPairArray[lastPairIndex];
+		/* missing swap here too, Nat. */ 
+	int lastHash = static_cast<int>(getHash(static_cast<unsigned int>(last->m_pProxy0->getUid()), static_cast<unsigned int>(last->m_pProxy1->getUid())) & (m_overlappingPairArray.capacity()-1));
+
+	index = m_hashTable[lastHash];
+	btAssert(index != BT_NULL_PAIR);
+
+	previous = BT_NULL_PAIR;
+	while (index != lastPairIndex)
+	{
+		previous = index;
+		index = m_next[index];
+	}
+
+	if (previous != BT_NULL_PAIR)
+	{
+		btAssert(m_next[previous] == lastPairIndex);
+		m_next[previous] = m_next[lastPairIndex];
+	}
+	else
+	{
+		m_hashTable[lastHash] = m_next[lastPairIndex];
+	}
+
+	// Copy the last pair into the remove pair's spot.
+	m_overlappingPairArray[pairIndex] = m_overlappingPairArray[lastPairIndex];
+
+	// Insert the last pair into the hash table
+	m_next[pairIndex] = m_hashTable[lastHash];
+	m_hashTable[lastHash] = pairIndex;
+
+	m_overlappingPairArray.pop_back();
+
+	return userData;
+}
+//#include <stdio.h>
+
+void	btHashedOverlappingPairCache::processAllOverlappingPairs(btOverlapCallback* callback,btDispatcher* dispatcher)
+{
+
+	int i;
+
+//	printf("m_overlappingPairArray.size()=%d\n",m_overlappingPairArray.size());
+	for (i=0;i<m_overlappingPairArray.size();)
+	{
+	
+		btBroadphasePair* pair = &m_overlappingPairArray[i];
+		if (callback->processOverlap(*pair))
+		{
+			removeOverlappingPair(pair->m_pProxy0,pair->m_pProxy1,dispatcher);
+
+			gOverlappingPairs--;
+		} else
+		{
+			i++;
+		}
+	}
+}
+
+void	btHashedOverlappingPairCache::sortOverlappingPairs(btDispatcher* dispatcher)
+{
+	///need to keep hashmap in sync with pair address, so rebuild all
+	btBroadphasePairArray tmpPairs;
+	int i;
+	for (i=0;i<m_overlappingPairArray.size();i++)
+	{
+		tmpPairs.push_back(m_overlappingPairArray[i]);
+	}
+
+	for (i=0;i<tmpPairs.size();i++)
+	{
+		removeOverlappingPair(tmpPairs[i].m_pProxy0,tmpPairs[i].m_pProxy1,dispatcher);
+	}
+	
+	for (i = 0; i < m_next.size(); i++)
+	{
+		m_next[i] = BT_NULL_PAIR;
+	}
+
+	tmpPairs.quickSort(btBroadphasePairSortPredicate());
+
+	for (i=0;i<tmpPairs.size();i++)
+	{
+		addOverlappingPair(tmpPairs[i].m_pProxy0,tmpPairs[i].m_pProxy1);
+	}
+
+	
+}
+
+
+void*	btSortedOverlappingPairCache::removeOverlappingPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1, btDispatcher* dispatcher )
+{
+	if (!hasDeferredRemoval())
+	{
+		btBroadphasePair findPair(*proxy0,*proxy1);
+
+		int findIndex = m_overlappingPairArray.findLinearSearch(findPair);
+		if (findIndex < m_overlappingPairArray.size())
+		{
+			gOverlappingPairs--;
+			btBroadphasePair& pair = m_overlappingPairArray[findIndex];
+			void* userData = pair.m_internalInfo1;
+			cleanOverlappingPair(pair,dispatcher);
+			if (m_ghostPairCallback)
+				m_ghostPairCallback->removeOverlappingPair(proxy0, proxy1,dispatcher);
+			
+			m_overlappingPairArray.swap(findIndex,m_overlappingPairArray.capacity()-1);
+			m_overlappingPairArray.pop_back();
+			return userData;
+		}
+	}
+
+	return 0;
+}
+
+
+
+
+
+
+
+
+btBroadphasePair*	btSortedOverlappingPairCache::addOverlappingPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1)
+{
+	//don't add overlap with own
+	btAssert(proxy0 != proxy1);
+
+	if (!needsBroadphaseCollision(proxy0,proxy1))
+		return 0;
+	
+	void* mem = &m_overlappingPairArray.expandNonInitializing();
+	btBroadphasePair* pair = new (mem) btBroadphasePair(*proxy0,*proxy1);
+	
+	gOverlappingPairs++;
+	gAddedPairs++;
+	
+	if (m_ghostPairCallback)
+		m_ghostPairCallback->addOverlappingPair(proxy0, proxy1);
+	return pair;
+	
+}
+
+///this findPair becomes really slow. Either sort the list to speedup the query, or
+///use a different solution. It is mainly used for Removing overlapping pairs. Removal could be delayed.
+///we could keep a linked list in each proxy, and store pair in one of the proxies (with lowest memory address)
+///Also we can use a 2D bitmap, which can be useful for a future GPU implementation
+ btBroadphasePair*	btSortedOverlappingPairCache::findPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1)
+{
+	if (!needsBroadphaseCollision(proxy0,proxy1))
+		return 0;
+
+	btBroadphasePair tmpPair(*proxy0,*proxy1);
+	int findIndex = m_overlappingPairArray.findLinearSearch(tmpPair);
+
+	if (findIndex < m_overlappingPairArray.size())
+	{
+		//btAssert(it != m_overlappingPairSet.end());
+		 btBroadphasePair* pair = &m_overlappingPairArray[findIndex];
+		return pair;
+	}
+	return 0;
+}
+
+
+
+
+
+
+
+
+
+
+//#include <stdio.h>
+
+void	btSortedOverlappingPairCache::processAllOverlappingPairs(btOverlapCallback* callback,btDispatcher* dispatcher)
+{
+
+	int i;
+
+	for (i=0;i<m_overlappingPairArray.size();)
+	{
+	
+		btBroadphasePair* pair = &m_overlappingPairArray[i];
+		if (callback->processOverlap(*pair))
+		{
+			cleanOverlappingPair(*pair,dispatcher);
+			pair->m_pProxy0 = 0;
+			pair->m_pProxy1 = 0;
+			m_overlappingPairArray.swap(i,m_overlappingPairArray.size()-1);
+			m_overlappingPairArray.pop_back();
+			gOverlappingPairs--;
+		} else
+		{
+			i++;
+		}
+	}
+}
+
+
+
+
+btSortedOverlappingPairCache::btSortedOverlappingPairCache():
+	m_blockedForChanges(false),
+	m_hasDeferredRemoval(true),
+	m_overlapFilterCallback(0),
+	m_ghostPairCallback(0)
+{
+	int initialAllocatedSize= 2;
+	m_overlappingPairArray.reserve(initialAllocatedSize);
+}
+
+btSortedOverlappingPairCache::~btSortedOverlappingPairCache()
+{
+}
+
+void	btSortedOverlappingPairCache::cleanOverlappingPair(btBroadphasePair& pair,btDispatcher* dispatcher)
+{
+	if (pair.m_algorithm)
+	{
+		{
+			pair.m_algorithm->~btCollisionAlgorithm();
+			dispatcher->freeCollisionAlgorithm(pair.m_algorithm);
+			pair.m_algorithm=0;
+			gRemovePairs--;
+		}
+	}
+}
+
+
+void	btSortedOverlappingPairCache::cleanProxyFromPairs(btBroadphaseProxy* proxy,btDispatcher* dispatcher)
+{
+
+	class	CleanPairCallback : public btOverlapCallback
+	{
+		btBroadphaseProxy* m_cleanProxy;
+		btOverlappingPairCache*	m_pairCache;
+		btDispatcher* m_dispatcher;
+
+	public:
+		CleanPairCallback(btBroadphaseProxy* cleanProxy,btOverlappingPairCache* pairCache,btDispatcher* dispatcher)
+			:m_cleanProxy(cleanProxy),
+			m_pairCache(pairCache),
+			m_dispatcher(dispatcher)
+		{
+		}
+		virtual	bool	processOverlap(btBroadphasePair& pair)
+		{
+			if ((pair.m_pProxy0 == m_cleanProxy) ||
+				(pair.m_pProxy1 == m_cleanProxy))
+			{
+				m_pairCache->cleanOverlappingPair(pair,m_dispatcher);
+			}
+			return false;
+		}
+		
+	};
+
+	CleanPairCallback cleanPairs(proxy,this,dispatcher);
+
+	processAllOverlappingPairs(&cleanPairs,dispatcher);
+
+}
+
+
+void	btSortedOverlappingPairCache::removeOverlappingPairsContainingProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher)
+{
+
+	class	RemovePairCallback : public btOverlapCallback
+	{
+		btBroadphaseProxy* m_obsoleteProxy;
+
+	public:
+		RemovePairCallback(btBroadphaseProxy* obsoleteProxy)
+			:m_obsoleteProxy(obsoleteProxy)
+		{
+		}
+		virtual	bool	processOverlap(btBroadphasePair& pair)
+		{
+			return ((pair.m_pProxy0 == m_obsoleteProxy) ||
+				(pair.m_pProxy1 == m_obsoleteProxy));
+		}
+		
+	};
+
+	RemovePairCallback removeCallback(proxy);
+
+	processAllOverlappingPairs(&removeCallback,dispatcher);
+}
+
+void	btSortedOverlappingPairCache::sortOverlappingPairs(btDispatcher* dispatcher)
+{
+	//should already be sorted
+}
+
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h b/src/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h
new file mode 100644
index 00000000..7a3806c1
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h
@@ -0,0 +1,469 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_OVERLAPPING_PAIR_CACHE_H
+#define BT_OVERLAPPING_PAIR_CACHE_H
+
+
+#include "btBroadphaseInterface.h"
+#include "btBroadphaseProxy.h"
+#include "btOverlappingPairCallback.h"
+
+#include "LinearMath/btAlignedObjectArray.h"
+class btDispatcher;
+
+typedef btAlignedObjectArray<btBroadphasePair>	btBroadphasePairArray;
+
+struct	btOverlapCallback
+{
+	virtual ~btOverlapCallback()
+	{}
+	//return true for deletion of the pair
+	virtual bool	processOverlap(btBroadphasePair& pair) = 0;
+
+};
+
+struct btOverlapFilterCallback
+{
+	virtual ~btOverlapFilterCallback()
+	{}
+	// return true when pairs need collision
+	virtual bool	needBroadphaseCollision(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1) const = 0;
+};
+
+
+
+
+
+
+
+extern int gRemovePairs;
+extern int gAddedPairs;
+extern int gFindPairs;
+
+const int BT_NULL_PAIR=0xffffffff;
+
+///The btOverlappingPairCache provides an interface for overlapping pair management (add, remove, storage), used by the btBroadphaseInterface broadphases.
+///The btHashedOverlappingPairCache and btSortedOverlappingPairCache classes are two implementations.
+class btOverlappingPairCache : public btOverlappingPairCallback
+{
+public:
+	virtual ~btOverlappingPairCache() {} // this is needed so we can get to the derived class destructor
+
+	virtual btBroadphasePair*	getOverlappingPairArrayPtr() = 0;
+	
+	virtual const btBroadphasePair*	getOverlappingPairArrayPtr() const = 0;
+
+	virtual btBroadphasePairArray&	getOverlappingPairArray() = 0;
+
+	virtual	void	cleanOverlappingPair(btBroadphasePair& pair,btDispatcher* dispatcher) = 0;
+
+	virtual int getNumOverlappingPairs() const = 0;
+
+	virtual void	cleanProxyFromPairs(btBroadphaseProxy* proxy,btDispatcher* dispatcher) = 0;
+
+	virtual	void setOverlapFilterCallback(btOverlapFilterCallback* callback) = 0;
+
+	virtual void	processAllOverlappingPairs(btOverlapCallback*,btDispatcher* dispatcher) = 0;
+
+	virtual btBroadphasePair* findPair(btBroadphaseProxy* proxy0, btBroadphaseProxy* proxy1) = 0;
+
+	virtual bool	hasDeferredRemoval() = 0;
+
+	virtual	void	setInternalGhostPairCallback(btOverlappingPairCallback* ghostPairCallback)=0;
+
+	virtual void	sortOverlappingPairs(btDispatcher* dispatcher) = 0;
+
+
+};
+
+/// Hash-space based Pair Cache, thanks to Erin Catto, Box2D, http://www.box2d.org, and Pierre Terdiman, Codercorner, http://codercorner.com
+class btHashedOverlappingPairCache : public btOverlappingPairCache
+{
+	btBroadphasePairArray	m_overlappingPairArray;
+	btOverlapFilterCallback* m_overlapFilterCallback;
+	bool		m_blockedForChanges;
+
+
+public:
+	btHashedOverlappingPairCache();
+	virtual ~btHashedOverlappingPairCache();
+
+	
+	void	removeOverlappingPairsContainingProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+
+	virtual void*	removeOverlappingPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1,btDispatcher* dispatcher);
+	
+	SIMD_FORCE_INLINE bool needsBroadphaseCollision(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1) const
+	{
+		if (m_overlapFilterCallback)
+			return m_overlapFilterCallback->needBroadphaseCollision(proxy0,proxy1);
+
+		bool collides = (proxy0->m_collisionFilterGroup & proxy1->m_collisionFilterMask) != 0;
+		collides = collides && (proxy1->m_collisionFilterGroup & proxy0->m_collisionFilterMask);
+		
+		return collides;
+	}
+
+	// Add a pair and return the new pair. If the pair already exists,
+	// no new pair is created and the old one is returned.
+	virtual btBroadphasePair* 	addOverlappingPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1)
+	{
+		gAddedPairs++;
+
+		if (!needsBroadphaseCollision(proxy0,proxy1))
+			return 0;
+
+		return internalAddPair(proxy0,proxy1);
+	}
+
+	
+
+	void	cleanProxyFromPairs(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+
+	
+	virtual void	processAllOverlappingPairs(btOverlapCallback*,btDispatcher* dispatcher);
+
+	virtual btBroadphasePair*	getOverlappingPairArrayPtr()
+	{
+		return &m_overlappingPairArray[0];
+	}
+
+	const btBroadphasePair*	getOverlappingPairArrayPtr() const
+	{
+		return &m_overlappingPairArray[0];
+	}
+
+	btBroadphasePairArray&	getOverlappingPairArray()
+	{
+		return m_overlappingPairArray;
+	}
+
+	const btBroadphasePairArray&	getOverlappingPairArray() const
+	{
+		return m_overlappingPairArray;
+	}
+
+	void	cleanOverlappingPair(btBroadphasePair& pair,btDispatcher* dispatcher);
+
+
+
+	btBroadphasePair* findPair(btBroadphaseProxy* proxy0, btBroadphaseProxy* proxy1);
+
+	int GetCount() const { return m_overlappingPairArray.size(); }
+//	btBroadphasePair* GetPairs() { return m_pairs; }
+
+	btOverlapFilterCallback* getOverlapFilterCallback()
+	{
+		return m_overlapFilterCallback;
+	}
+
+	void setOverlapFilterCallback(btOverlapFilterCallback* callback)
+	{
+		m_overlapFilterCallback = callback;
+	}
+
+	int	getNumOverlappingPairs() const
+	{
+		return m_overlappingPairArray.size();
+	}
+private:
+	
+	btBroadphasePair* 	internalAddPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1);
+
+	void	growTables();
+
+	SIMD_FORCE_INLINE bool equalsPair(const btBroadphasePair& pair, int proxyId1, int proxyId2)
+	{	
+		return pair.m_pProxy0->getUid() == proxyId1 && pair.m_pProxy1->getUid() == proxyId2;
+	}
+
+	/*
+	// Thomas Wang's hash, see: http://www.concentric.net/~Ttwang/tech/inthash.htm
+	// This assumes proxyId1 and proxyId2 are 16-bit.
+	SIMD_FORCE_INLINE int getHash(int proxyId1, int proxyId2)
+	{
+		int key = (proxyId2 << 16) | proxyId1;
+		key = ~key + (key << 15);
+		key = key ^ (key >> 12);
+		key = key + (key << 2);
+		key = key ^ (key >> 4);
+		key = key * 2057;
+		key = key ^ (key >> 16);
+		return key;
+	}
+	*/
+
+
+	
+	SIMD_FORCE_INLINE	unsigned int getHash(unsigned int proxyId1, unsigned int proxyId2)
+	{
+		int key = static_cast<int>(((unsigned int)proxyId1) | (((unsigned int)proxyId2) <<16));
+		// Thomas Wang's hash
+
+		key += ~(key << 15);
+		key ^=  (key >> 10);
+		key +=  (key << 3);
+		key ^=  (key >> 6);
+		key += ~(key << 11);
+		key ^=  (key >> 16);
+		return static_cast<unsigned int>(key);
+	}
+	
+
+
+
+
+	SIMD_FORCE_INLINE btBroadphasePair* internalFindPair(btBroadphaseProxy* proxy0, btBroadphaseProxy* proxy1, int hash)
+	{
+		int proxyId1 = proxy0->getUid();
+		int proxyId2 = proxy1->getUid();
+		#if 0 // wrong, 'equalsPair' use unsorted uids, copy-past devil striked again. Nat.
+		if (proxyId1 > proxyId2) 
+			btSwap(proxyId1, proxyId2);
+		#endif
+
+		int index = m_hashTable[hash];
+		
+		while( index != BT_NULL_PAIR && equalsPair(m_overlappingPairArray[index], proxyId1, proxyId2) == false)
+		{
+			index = m_next[index];
+		}
+
+		if ( index == BT_NULL_PAIR )
+		{
+			return NULL;
+		}
+
+		btAssert(index < m_overlappingPairArray.size());
+
+		return &m_overlappingPairArray[index];
+	}
+
+	virtual bool	hasDeferredRemoval()
+	{
+		return false;
+	}
+
+	virtual	void	setInternalGhostPairCallback(btOverlappingPairCallback* ghostPairCallback)
+	{
+		m_ghostPairCallback = ghostPairCallback;
+	}
+
+	virtual void	sortOverlappingPairs(btDispatcher* dispatcher);
+	
+
+protected:
+	
+	btAlignedObjectArray<int>	m_hashTable;
+	btAlignedObjectArray<int>	m_next;
+	btOverlappingPairCallback*	m_ghostPairCallback;
+	
+};
+
+
+
+
+///btSortedOverlappingPairCache maintains the objects with overlapping AABB
+///Typically managed by the Broadphase, Axis3Sweep or btSimpleBroadphase
+class	btSortedOverlappingPairCache : public btOverlappingPairCache
+{
+	protected:
+		//avoid brute-force finding all the time
+		btBroadphasePairArray	m_overlappingPairArray;
+
+		//during the dispatch, check that user doesn't destroy/create proxy
+		bool		m_blockedForChanges;
+
+		///by default, do the removal during the pair traversal
+		bool		m_hasDeferredRemoval;
+		
+		//if set, use the callback instead of the built in filter in needBroadphaseCollision
+		btOverlapFilterCallback* m_overlapFilterCallback;
+
+		btOverlappingPairCallback*	m_ghostPairCallback;
+
+	public:
+			
+		btSortedOverlappingPairCache();	
+		virtual ~btSortedOverlappingPairCache();
+
+		virtual void	processAllOverlappingPairs(btOverlapCallback*,btDispatcher* dispatcher);
+
+		void*	removeOverlappingPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1,btDispatcher* dispatcher);
+
+		void	cleanOverlappingPair(btBroadphasePair& pair,btDispatcher* dispatcher);
+		
+		btBroadphasePair*	addOverlappingPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1);
+
+		btBroadphasePair*	findPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1);
+			
+		
+		void	cleanProxyFromPairs(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+
+		void	removeOverlappingPairsContainingProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+
+
+		inline bool needsBroadphaseCollision(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1) const
+		{
+			if (m_overlapFilterCallback)
+				return m_overlapFilterCallback->needBroadphaseCollision(proxy0,proxy1);
+
+			bool collides = (proxy0->m_collisionFilterGroup & proxy1->m_collisionFilterMask) != 0;
+			collides = collides && (proxy1->m_collisionFilterGroup & proxy0->m_collisionFilterMask);
+			
+			return collides;
+		}
+		
+		btBroadphasePairArray&	getOverlappingPairArray()
+		{
+			return m_overlappingPairArray;
+		}
+
+		const btBroadphasePairArray&	getOverlappingPairArray() const
+		{
+			return m_overlappingPairArray;
+		}
+
+		
+
+
+		btBroadphasePair*	getOverlappingPairArrayPtr()
+		{
+			return &m_overlappingPairArray[0];
+		}
+
+		const btBroadphasePair*	getOverlappingPairArrayPtr() const
+		{
+			return &m_overlappingPairArray[0];
+		}
+
+		int	getNumOverlappingPairs() const
+		{
+			return m_overlappingPairArray.size();
+		}
+		
+		btOverlapFilterCallback* getOverlapFilterCallback()
+		{
+			return m_overlapFilterCallback;
+		}
+
+		void setOverlapFilterCallback(btOverlapFilterCallback* callback)
+		{
+			m_overlapFilterCallback = callback;
+		}
+
+		virtual bool	hasDeferredRemoval()
+		{
+			return m_hasDeferredRemoval;
+		}
+
+		virtual	void	setInternalGhostPairCallback(btOverlappingPairCallback* ghostPairCallback)
+		{
+			m_ghostPairCallback = ghostPairCallback;
+		}
+
+		virtual void	sortOverlappingPairs(btDispatcher* dispatcher);
+		
+
+};
+
+
+
+///btNullPairCache skips add/removal of overlapping pairs. Userful for benchmarking and unit testing.
+class btNullPairCache : public btOverlappingPairCache
+{
+
+	btBroadphasePairArray	m_overlappingPairArray;
+
+public:
+
+	virtual btBroadphasePair*	getOverlappingPairArrayPtr()
+	{
+		return &m_overlappingPairArray[0];
+	}
+	const btBroadphasePair*	getOverlappingPairArrayPtr() const
+	{
+		return &m_overlappingPairArray[0];
+	}
+	btBroadphasePairArray&	getOverlappingPairArray()
+	{
+		return m_overlappingPairArray;
+	}
+	
+	virtual	void	cleanOverlappingPair(btBroadphasePair& /*pair*/,btDispatcher* /*dispatcher*/)
+	{
+
+	}
+
+	virtual int getNumOverlappingPairs() const
+	{
+		return 0;
+	}
+
+	virtual void	cleanProxyFromPairs(btBroadphaseProxy* /*proxy*/,btDispatcher* /*dispatcher*/)
+	{
+
+	}
+
+	virtual	void setOverlapFilterCallback(btOverlapFilterCallback* /*callback*/)
+	{
+	}
+
+	virtual void	processAllOverlappingPairs(btOverlapCallback*,btDispatcher* /*dispatcher*/)
+	{
+	}
+
+	virtual btBroadphasePair* findPair(btBroadphaseProxy* /*proxy0*/, btBroadphaseProxy* /*proxy1*/)
+	{
+		return 0;
+	}
+
+	virtual bool	hasDeferredRemoval()
+	{
+		return true;
+	}
+
+	virtual	void	setInternalGhostPairCallback(btOverlappingPairCallback* /* ghostPairCallback */)
+	{
+
+	}
+
+	virtual btBroadphasePair*	addOverlappingPair(btBroadphaseProxy* /*proxy0*/,btBroadphaseProxy* /*proxy1*/)
+	{
+		return 0;
+	}
+
+	virtual void*	removeOverlappingPair(btBroadphaseProxy* /*proxy0*/,btBroadphaseProxy* /*proxy1*/,btDispatcher* /*dispatcher*/)
+	{
+		return 0;
+	}
+
+	virtual void	removeOverlappingPairsContainingProxy(btBroadphaseProxy* /*proxy0*/,btDispatcher* /*dispatcher*/)
+	{
+	}
+	
+	virtual void	sortOverlappingPairs(btDispatcher* dispatcher)
+	{
+        (void) dispatcher;
+	}
+
+
+};
+
+
+#endif //BT_OVERLAPPING_PAIR_CACHE_H
+
+
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCallback.h b/src/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCallback.h
new file mode 100644
index 00000000..9c7b6f81
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCallback.h
@@ -0,0 +1,40 @@
+
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef OVERLAPPING_PAIR_CALLBACK_H
+#define OVERLAPPING_PAIR_CALLBACK_H
+
+class btDispatcher;
+struct  btBroadphasePair;
+
+///The btOverlappingPairCallback class is an additional optional broadphase user callback for adding/removing overlapping pairs, similar interface to btOverlappingPairCache.
+class btOverlappingPairCallback
+{
+public:
+	virtual ~btOverlappingPairCallback()
+	{
+
+	}
+	
+	virtual btBroadphasePair*	addOverlappingPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1) = 0;
+
+	virtual void*	removeOverlappingPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1,btDispatcher* dispatcher) = 0;
+
+	virtual void	removeOverlappingPairsContainingProxy(btBroadphaseProxy* proxy0,btDispatcher* dispatcher) = 0;
+
+};
+
+#endif //OVERLAPPING_PAIR_CALLBACK_H
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp b/src/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp
new file mode 100644
index 00000000..c911435a
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp
@@ -0,0 +1,1375 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btQuantizedBvh.h"
+
+#include "LinearMath/btAabbUtil2.h"
+#include "LinearMath/btIDebugDraw.h"
+#include "LinearMath/btSerializer.h"
+
+#define RAYAABB2
+
+btQuantizedBvh::btQuantizedBvh() : 
+					m_bulletVersion(BT_BULLET_VERSION),
+					m_useQuantization(false), 
+					//m_traversalMode(TRAVERSAL_STACKLESS_CACHE_FRIENDLY)
+					m_traversalMode(TRAVERSAL_STACKLESS)
+					//m_traversalMode(TRAVERSAL_RECURSIVE)
+					,m_subtreeHeaderCount(0) //PCK: add this line
+{
+	m_bvhAabbMin.setValue(-SIMD_INFINITY,-SIMD_INFINITY,-SIMD_INFINITY);
+	m_bvhAabbMax.setValue(SIMD_INFINITY,SIMD_INFINITY,SIMD_INFINITY);
+}
+
+
+
+
+
+void btQuantizedBvh::buildInternal()
+{
+	///assumes that caller filled in the m_quantizedLeafNodes
+	m_useQuantization = true;
+	int numLeafNodes = 0;
+	
+	if (m_useQuantization)
+	{
+		//now we have an array of leafnodes in m_leafNodes
+		numLeafNodes = m_quantizedLeafNodes.size();
+
+		m_quantizedContiguousNodes.resize(2*numLeafNodes);
+
+	}
+
+	m_curNodeIndex = 0;
+
+	buildTree(0,numLeafNodes);
+
+	///if the entire tree is small then subtree size, we need to create a header info for the tree
+	if(m_useQuantization && !m_SubtreeHeaders.size())
+	{
+		btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
+		subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]);
+		subtree.m_rootNodeIndex = 0;
+		subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex();
+	}
+
+	//PCK: update the copy of the size
+	m_subtreeHeaderCount = m_SubtreeHeaders.size();
+
+	//PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary
+	m_quantizedLeafNodes.clear();
+	m_leafNodes.clear();
+}
+
+
+
+///just for debugging, to visualize the individual patches/subtrees
+#ifdef DEBUG_PATCH_COLORS
+btVector3 color[4]=
+{
+	btVector3(1,0,0),
+	btVector3(0,1,0),
+	btVector3(0,0,1),
+	btVector3(0,1,1)
+};
+#endif //DEBUG_PATCH_COLORS
+
+
+
+void	btQuantizedBvh::setQuantizationValues(const btVector3& bvhAabbMin,const btVector3& bvhAabbMax,btScalar quantizationMargin)
+{
+	//enlarge the AABB to avoid division by zero when initializing the quantization values
+	btVector3 clampValue(quantizationMargin,quantizationMargin,quantizationMargin);
+	m_bvhAabbMin = bvhAabbMin - clampValue;
+	m_bvhAabbMax = bvhAabbMax + clampValue;
+	btVector3 aabbSize = m_bvhAabbMax - m_bvhAabbMin;
+	m_bvhQuantization = btVector3(btScalar(65533.0),btScalar(65533.0),btScalar(65533.0)) / aabbSize;
+	m_useQuantization = true;
+}
+
+
+
+
+btQuantizedBvh::~btQuantizedBvh()
+{
+}
+
+#ifdef DEBUG_TREE_BUILDING
+int gStackDepth = 0;
+int gMaxStackDepth = 0;
+#endif //DEBUG_TREE_BUILDING
+
+void	btQuantizedBvh::buildTree	(int startIndex,int endIndex)
+{
+#ifdef DEBUG_TREE_BUILDING
+	gStackDepth++;
+	if (gStackDepth > gMaxStackDepth)
+		gMaxStackDepth = gStackDepth;
+#endif //DEBUG_TREE_BUILDING
+
+
+	int splitAxis, splitIndex, i;
+	int numIndices =endIndex-startIndex;
+	int curIndex = m_curNodeIndex;
+
+	btAssert(numIndices>0);
+
+	if (numIndices==1)
+	{
+#ifdef DEBUG_TREE_BUILDING
+		gStackDepth--;
+#endif //DEBUG_TREE_BUILDING
+		
+		assignInternalNodeFromLeafNode(m_curNodeIndex,startIndex);
+
+		m_curNodeIndex++;
+		return;	
+	}
+	//calculate Best Splitting Axis and where to split it. Sort the incoming 'leafNodes' array within range 'startIndex/endIndex'.
+	
+	splitAxis = calcSplittingAxis(startIndex,endIndex);
+
+	splitIndex = sortAndCalcSplittingIndex(startIndex,endIndex,splitAxis);
+
+	int internalNodeIndex = m_curNodeIndex;
+	
+	//set the min aabb to 'inf' or a max value, and set the max aabb to a -inf/minimum value.
+	//the aabb will be expanded during buildTree/mergeInternalNodeAabb with actual node values
+	setInternalNodeAabbMin(m_curNodeIndex,m_bvhAabbMax);//can't use btVector3(SIMD_INFINITY,SIMD_INFINITY,SIMD_INFINITY)) because of quantization
+	setInternalNodeAabbMax(m_curNodeIndex,m_bvhAabbMin);//can't use btVector3(-SIMD_INFINITY,-SIMD_INFINITY,-SIMD_INFINITY)) because of quantization
+	
+	
+	for (i=startIndex;i<endIndex;i++)
+	{
+		mergeInternalNodeAabb(m_curNodeIndex,getAabbMin(i),getAabbMax(i));
+	}
+
+	m_curNodeIndex++;
+	
+
+	//internalNode->m_escapeIndex;
+	
+	int leftChildNodexIndex = m_curNodeIndex;
+
+	//build left child tree
+	buildTree(startIndex,splitIndex);
+
+	int rightChildNodexIndex = m_curNodeIndex;
+	//build right child tree
+	buildTree(splitIndex,endIndex);
+
+#ifdef DEBUG_TREE_BUILDING
+	gStackDepth--;
+#endif //DEBUG_TREE_BUILDING
+
+	int escapeIndex = m_curNodeIndex - curIndex;
+
+	if (m_useQuantization)
+	{
+		//escapeIndex is the number of nodes of this subtree
+		const int sizeQuantizedNode =sizeof(btQuantizedBvhNode);
+		const int treeSizeInBytes = escapeIndex * sizeQuantizedNode;
+		if (treeSizeInBytes > MAX_SUBTREE_SIZE_IN_BYTES)
+		{
+			updateSubtreeHeaders(leftChildNodexIndex,rightChildNodexIndex);
+		}
+	} else
+	{
+
+	}
+
+	setInternalNodeEscapeIndex(internalNodeIndex,escapeIndex);
+
+}
+
+void	btQuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex)
+{
+	btAssert(m_useQuantization);
+
+	btQuantizedBvhNode& leftChildNode = m_quantizedContiguousNodes[leftChildNodexIndex];
+	int leftSubTreeSize = leftChildNode.isLeafNode() ? 1 : leftChildNode.getEscapeIndex();
+	int leftSubTreeSizeInBytes =  leftSubTreeSize * static_cast<int>(sizeof(btQuantizedBvhNode));
+	
+	btQuantizedBvhNode& rightChildNode = m_quantizedContiguousNodes[rightChildNodexIndex];
+	int rightSubTreeSize = rightChildNode.isLeafNode() ? 1 : rightChildNode.getEscapeIndex();
+	int rightSubTreeSizeInBytes =  rightSubTreeSize *  static_cast<int>(sizeof(btQuantizedBvhNode));
+
+	if(leftSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES)
+	{
+		btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
+		subtree.setAabbFromQuantizeNode(leftChildNode);
+		subtree.m_rootNodeIndex = leftChildNodexIndex;
+		subtree.m_subtreeSize = leftSubTreeSize;
+	}
+
+	if(rightSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES)
+	{
+		btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
+		subtree.setAabbFromQuantizeNode(rightChildNode);
+		subtree.m_rootNodeIndex = rightChildNodexIndex;
+		subtree.m_subtreeSize = rightSubTreeSize;
+	}
+
+	//PCK: update the copy of the size
+	m_subtreeHeaderCount = m_SubtreeHeaders.size();
+}
+
+
+int	btQuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis)
+{
+	int i;
+	int splitIndex =startIndex;
+	int numIndices = endIndex - startIndex;
+	btScalar splitValue;
+
+	btVector3 means(btScalar(0.),btScalar(0.),btScalar(0.));
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(getAabbMax(i)+getAabbMin(i));
+		means+=center;
+	}
+	means *= (btScalar(1.)/(btScalar)numIndices);
+	
+	splitValue = means[splitAxis];
+	
+	//sort leafNodes so all values larger then splitValue comes first, and smaller values start from 'splitIndex'.
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(getAabbMax(i)+getAabbMin(i));
+		if (center[splitAxis] > splitValue)
+		{
+			//swap
+			swapLeafNodes(i,splitIndex);
+			splitIndex++;
+		}
+	}
+
+	//if the splitIndex causes unbalanced trees, fix this by using the center in between startIndex and endIndex
+	//otherwise the tree-building might fail due to stack-overflows in certain cases.
+	//unbalanced1 is unsafe: it can cause stack overflows
+	//bool unbalanced1 = ((splitIndex==startIndex) || (splitIndex == (endIndex-1)));
+
+	//unbalanced2 should work too: always use center (perfect balanced trees)	
+	//bool unbalanced2 = true;
+
+	//this should be safe too:
+	int rangeBalancedIndices = numIndices/3;
+	bool unbalanced = ((splitIndex<=(startIndex+rangeBalancedIndices)) || (splitIndex >=(endIndex-1-rangeBalancedIndices)));
+	
+	if (unbalanced)
+	{
+		splitIndex = startIndex+ (numIndices>>1);
+	}
+
+	bool unbal = (splitIndex==startIndex) || (splitIndex == (endIndex));
+	(void)unbal;
+	btAssert(!unbal);
+
+	return splitIndex;
+}
+
+
+int	btQuantizedBvh::calcSplittingAxis(int startIndex,int endIndex)
+{
+	int i;
+
+	btVector3 means(btScalar(0.),btScalar(0.),btScalar(0.));
+	btVector3 variance(btScalar(0.),btScalar(0.),btScalar(0.));
+	int numIndices = endIndex-startIndex;
+
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(getAabbMax(i)+getAabbMin(i));
+		means+=center;
+	}
+	means *= (btScalar(1.)/(btScalar)numIndices);
+		
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(getAabbMax(i)+getAabbMin(i));
+		btVector3 diff2 = center-means;
+		diff2 = diff2 * diff2;
+		variance += diff2;
+	}
+	variance *= (btScalar(1.)/	((btScalar)numIndices-1)	);
+	
+	return variance.maxAxis();
+}
+
+
+
+void	btQuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+	//either choose recursive traversal (walkTree) or stackless (walkStacklessTree)
+
+	if (m_useQuantization)
+	{
+		///quantize query AABB
+		unsigned short int quantizedQueryAabbMin[3];
+		unsigned short int quantizedQueryAabbMax[3];
+		quantizeWithClamp(quantizedQueryAabbMin,aabbMin,0);
+		quantizeWithClamp(quantizedQueryAabbMax,aabbMax,1);
+
+		switch (m_traversalMode)
+		{
+		case TRAVERSAL_STACKLESS:
+				walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax,0,m_curNodeIndex);
+			break;
+		case TRAVERSAL_STACKLESS_CACHE_FRIENDLY:
+				walkStacklessQuantizedTreeCacheFriendly(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax);
+			break;
+		case TRAVERSAL_RECURSIVE:
+			{
+				const btQuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[0];
+				walkRecursiveQuantizedTreeAgainstQueryAabb(rootNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax);
+			}
+			break;
+		default:
+			//unsupported
+			btAssert(0);
+		}
+	} else
+	{
+		walkStacklessTree(nodeCallback,aabbMin,aabbMax);
+	}
+}
+
+
+int maxIterations = 0;
+
+
+void	btQuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+	btAssert(!m_useQuantization);
+
+	const btOptimizedBvhNode* rootNode = &m_contiguousNodes[0];
+	int escapeIndex, curIndex = 0;
+	int walkIterations = 0;
+	bool isLeafNode;
+	//PCK: unsigned instead of bool
+	unsigned aabbOverlap;
+
+	while (curIndex < m_curNodeIndex)
+	{
+		//catch bugs in tree data
+		btAssert (walkIterations < m_curNodeIndex);
+
+		walkIterations++;
+		aabbOverlap = TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMinOrg,rootNode->m_aabbMaxOrg);
+		isLeafNode = rootNode->m_escapeIndex == -1;
+		
+		//PCK: unsigned instead of bool
+		if (isLeafNode && (aabbOverlap != 0))
+		{
+			nodeCallback->processNode(rootNode->m_subPart,rootNode->m_triangleIndex);
+		} 
+		
+		//PCK: unsigned instead of bool
+		if ((aabbOverlap != 0) || isLeafNode)
+		{
+			rootNode++;
+			curIndex++;
+		} else
+		{
+			escapeIndex = rootNode->m_escapeIndex;
+			rootNode += escapeIndex;
+			curIndex += escapeIndex;
+		}
+	}
+	if (maxIterations < walkIterations)
+		maxIterations = walkIterations;
+
+}
+
+/*
+///this was the original recursive traversal, before we optimized towards stackless traversal
+void	btQuantizedBvh::walkTree(btOptimizedBvhNode* rootNode,btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+	bool isLeafNode, aabbOverlap = TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMin,rootNode->m_aabbMax);
+	if (aabbOverlap)
+	{
+		isLeafNode = (!rootNode->m_leftChild && !rootNode->m_rightChild);
+		if (isLeafNode)
+		{
+			nodeCallback->processNode(rootNode);
+		} else
+		{
+			walkTree(rootNode->m_leftChild,nodeCallback,aabbMin,aabbMax);
+			walkTree(rootNode->m_rightChild,nodeCallback,aabbMin,aabbMax);
+		}
+	}
+
+}
+*/
+
+void btQuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantizedBvhNode* currentNode,btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
+{
+	btAssert(m_useQuantization);
+	
+	bool isLeafNode;
+	//PCK: unsigned instead of bool
+	unsigned aabbOverlap;
+
+	//PCK: unsigned instead of bool
+	aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,currentNode->m_quantizedAabbMin,currentNode->m_quantizedAabbMax);
+	isLeafNode = currentNode->isLeafNode();
+		
+	//PCK: unsigned instead of bool
+	if (aabbOverlap != 0)
+	{
+		if (isLeafNode)
+		{
+			nodeCallback->processNode(currentNode->getPartId(),currentNode->getTriangleIndex());
+		} else
+		{
+			//process left and right children
+			const btQuantizedBvhNode* leftChildNode = currentNode+1;
+			walkRecursiveQuantizedTreeAgainstQueryAabb(leftChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax);
+
+			const btQuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? leftChildNode+1:leftChildNode+leftChildNode->getEscapeIndex();
+			walkRecursiveQuantizedTreeAgainstQueryAabb(rightChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax);
+		}
+	}		
+}
+
+
+
+void	btQuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const
+{
+	btAssert(!m_useQuantization);
+
+	const btOptimizedBvhNode* rootNode = &m_contiguousNodes[0];
+	int escapeIndex, curIndex = 0;
+	int walkIterations = 0;
+	bool isLeafNode;
+	//PCK: unsigned instead of bool
+	unsigned aabbOverlap=0;
+	unsigned rayBoxOverlap=0;
+	btScalar lambda_max = 1.0;
+	
+		/* Quick pruning by quantized box */
+	btVector3 rayAabbMin = raySource;
+	btVector3 rayAabbMax = raySource;
+	rayAabbMin.setMin(rayTarget);
+	rayAabbMax.setMax(rayTarget);
+
+	/* Add box cast extents to bounding box */
+	rayAabbMin += aabbMin;
+	rayAabbMax += aabbMax;
+
+#ifdef RAYAABB2
+	btVector3 rayDir = (rayTarget-raySource);
+	rayDir.normalize ();
+	lambda_max = rayDir.dot(rayTarget-raySource);
+	///what about division by zero? --> just set rayDirection[i] to 1.0
+	btVector3 rayDirectionInverse;
+	rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[0];
+	rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[1];
+	rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[2];
+	unsigned int sign[3] = { rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0};
+#endif
+
+	btVector3 bounds[2];
+
+	while (curIndex < m_curNodeIndex)
+	{
+		btScalar param = 1.0;
+		//catch bugs in tree data
+		btAssert (walkIterations < m_curNodeIndex);
+
+		walkIterations++;
+
+		bounds[0] = rootNode->m_aabbMinOrg;
+		bounds[1] = rootNode->m_aabbMaxOrg;
+		/* Add box cast extents */
+		bounds[0] -= aabbMax;
+		bounds[1] -= aabbMin;
+
+		aabbOverlap = TestAabbAgainstAabb2(rayAabbMin,rayAabbMax,rootNode->m_aabbMinOrg,rootNode->m_aabbMaxOrg);
+		//perhaps profile if it is worth doing the aabbOverlap test first
+
+#ifdef RAYAABB2
+			///careful with this check: need to check division by zero (above) and fix the unQuantize method
+			///thanks Joerg/hiker for the reproduction case!
+			///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858
+		rayBoxOverlap = aabbOverlap ? btRayAabb2 (raySource, rayDirectionInverse, sign, bounds, param, 0.0f, lambda_max) : false;
+
+#else
+		btVector3 normal;
+		rayBoxOverlap = btRayAabb(raySource, rayTarget,bounds[0],bounds[1],param, normal);
+#endif
+
+		isLeafNode = rootNode->m_escapeIndex == -1;
+		
+		//PCK: unsigned instead of bool
+		if (isLeafNode && (rayBoxOverlap != 0))
+		{
+			nodeCallback->processNode(rootNode->m_subPart,rootNode->m_triangleIndex);
+		} 
+		
+		//PCK: unsigned instead of bool
+		if ((rayBoxOverlap != 0) || isLeafNode)
+		{
+			rootNode++;
+			curIndex++;
+		} else
+		{
+			escapeIndex = rootNode->m_escapeIndex;
+			rootNode += escapeIndex;
+			curIndex += escapeIndex;
+		}
+	}
+	if (maxIterations < walkIterations)
+		maxIterations = walkIterations;
+
+}
+
+
+
+void	btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const
+{
+	btAssert(m_useQuantization);
+	
+	int curIndex = startNodeIndex;
+	int walkIterations = 0;
+	int subTreeSize = endNodeIndex - startNodeIndex;
+	(void)subTreeSize;
+
+	const btQuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex];
+	int escapeIndex;
+	
+	bool isLeafNode;
+	//PCK: unsigned instead of bool
+	unsigned boxBoxOverlap = 0;
+	unsigned rayBoxOverlap = 0;
+
+	btScalar lambda_max = 1.0;
+
+#ifdef RAYAABB2
+	btVector3 rayDirection = (rayTarget-raySource);
+	rayDirection.normalize ();
+	lambda_max = rayDirection.dot(rayTarget-raySource);
+	///what about division by zero? --> just set rayDirection[i] to 1.0
+	rayDirection[0] = rayDirection[0] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDirection[0];
+	rayDirection[1] = rayDirection[1] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDirection[1];
+	rayDirection[2] = rayDirection[2] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDirection[2];
+	unsigned int sign[3] = { rayDirection[0] < 0.0, rayDirection[1] < 0.0, rayDirection[2] < 0.0};
+#endif
+
+	/* Quick pruning by quantized box */
+	btVector3 rayAabbMin = raySource;
+	btVector3 rayAabbMax = raySource;
+	rayAabbMin.setMin(rayTarget);
+	rayAabbMax.setMax(rayTarget);
+
+	/* Add box cast extents to bounding box */
+	rayAabbMin += aabbMin;
+	rayAabbMax += aabbMax;
+
+	unsigned short int quantizedQueryAabbMin[3];
+	unsigned short int quantizedQueryAabbMax[3];
+	quantizeWithClamp(quantizedQueryAabbMin,rayAabbMin,0);
+	quantizeWithClamp(quantizedQueryAabbMax,rayAabbMax,1);
+
+	while (curIndex < endNodeIndex)
+	{
+
+//#define VISUALLY_ANALYZE_BVH 1
+#ifdef VISUALLY_ANALYZE_BVH
+		//some code snippet to debugDraw aabb, to visually analyze bvh structure
+		static int drawPatch = 0;
+		//need some global access to a debugDrawer
+		extern btIDebugDraw* debugDrawerPtr;
+		if (curIndex==drawPatch)
+		{
+			btVector3 aabbMin,aabbMax;
+			aabbMin = unQuantize(rootNode->m_quantizedAabbMin);
+			aabbMax = unQuantize(rootNode->m_quantizedAabbMax);
+			btVector3	color(1,0,0);
+			debugDrawerPtr->drawAabb(aabbMin,aabbMax,color);
+		}
+#endif//VISUALLY_ANALYZE_BVH
+
+		//catch bugs in tree data
+		btAssert (walkIterations < subTreeSize);
+
+		walkIterations++;
+		//PCK: unsigned instead of bool
+		// only interested if this is closer than any previous hit
+		btScalar param = 1.0;
+		rayBoxOverlap = 0;
+		boxBoxOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);
+		isLeafNode = rootNode->isLeafNode();
+		if (boxBoxOverlap)
+		{
+			btVector3 bounds[2];
+			bounds[0] = unQuantize(rootNode->m_quantizedAabbMin);
+			bounds[1] = unQuantize(rootNode->m_quantizedAabbMax);
+			/* Add box cast extents */
+			bounds[0] -= aabbMax;
+			bounds[1] -= aabbMin;
+			btVector3 normal;
+#if 0
+			bool ra2 = btRayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0, lambda_max);
+			bool ra = btRayAabb (raySource, rayTarget, bounds[0], bounds[1], param, normal);
+			if (ra2 != ra)
+			{
+				printf("functions don't match\n");
+			}
+#endif
+#ifdef RAYAABB2
+			///careful with this check: need to check division by zero (above) and fix the unQuantize method
+			///thanks Joerg/hiker for the reproduction case!
+			///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858
+
+			//BT_PROFILE("btRayAabb2");
+			rayBoxOverlap = btRayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0f, lambda_max);
+			
+#else
+			rayBoxOverlap = true;//btRayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal);
+#endif
+		}
+		
+		if (isLeafNode && rayBoxOverlap)
+		{
+			nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex());
+		}
+		
+		//PCK: unsigned instead of bool
+		if ((rayBoxOverlap != 0) || isLeafNode)
+		{
+			rootNode++;
+			curIndex++;
+		} else
+		{
+			escapeIndex = rootNode->getEscapeIndex();
+			rootNode += escapeIndex;
+			curIndex += escapeIndex;
+		}
+	}
+	if (maxIterations < walkIterations)
+		maxIterations = walkIterations;
+
+}
+
+void	btQuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const
+{
+	btAssert(m_useQuantization);
+	
+	int curIndex = startNodeIndex;
+	int walkIterations = 0;
+	int subTreeSize = endNodeIndex - startNodeIndex;
+	(void)subTreeSize;
+
+	const btQuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex];
+	int escapeIndex;
+	
+	bool isLeafNode;
+	//PCK: unsigned instead of bool
+	unsigned aabbOverlap;
+
+	while (curIndex < endNodeIndex)
+	{
+
+//#define VISUALLY_ANALYZE_BVH 1
+#ifdef VISUALLY_ANALYZE_BVH
+		//some code snippet to debugDraw aabb, to visually analyze bvh structure
+		static int drawPatch = 0;
+		//need some global access to a debugDrawer
+		extern btIDebugDraw* debugDrawerPtr;
+		if (curIndex==drawPatch)
+		{
+			btVector3 aabbMin,aabbMax;
+			aabbMin = unQuantize(rootNode->m_quantizedAabbMin);
+			aabbMax = unQuantize(rootNode->m_quantizedAabbMax);
+			btVector3	color(1,0,0);
+			debugDrawerPtr->drawAabb(aabbMin,aabbMax,color);
+		}
+#endif//VISUALLY_ANALYZE_BVH
+
+		//catch bugs in tree data
+		btAssert (walkIterations < subTreeSize);
+
+		walkIterations++;
+		//PCK: unsigned instead of bool
+		aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);
+		isLeafNode = rootNode->isLeafNode();
+		
+		if (isLeafNode && aabbOverlap)
+		{
+			nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex());
+		} 
+		
+		//PCK: unsigned instead of bool
+		if ((aabbOverlap != 0) || isLeafNode)
+		{
+			rootNode++;
+			curIndex++;
+		} else
+		{
+			escapeIndex = rootNode->getEscapeIndex();
+			rootNode += escapeIndex;
+			curIndex += escapeIndex;
+		}
+	}
+	if (maxIterations < walkIterations)
+		maxIterations = walkIterations;
+
+}
+
+//This traversal can be called from Playstation 3 SPU
+void	btQuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
+{
+	btAssert(m_useQuantization);
+
+	int i;
+
+
+	for (i=0;i<this->m_SubtreeHeaders.size();i++)
+	{
+		const btBvhSubtreeInfo& subtree = m_SubtreeHeaders[i];
+
+		//PCK: unsigned instead of bool
+		unsigned overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
+		if (overlap != 0)
+		{
+			walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax,
+				subtree.m_rootNodeIndex,
+				subtree.m_rootNodeIndex+subtree.m_subtreeSize);
+		}
+	}
+}
+
+
+void	btQuantizedBvh::reportRayOverlappingNodex (btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget) const
+{
+	reportBoxCastOverlappingNodex(nodeCallback,raySource,rayTarget,btVector3(0,0,0),btVector3(0,0,0));
+}
+
+
+void	btQuantizedBvh::reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+	//always use stackless
+
+	if (m_useQuantization)
+	{
+		walkStacklessQuantizedTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex);
+	}
+	else
+	{
+		walkStacklessTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex);
+	}
+	/*
+	{
+		//recursive traversal
+		btVector3 qaabbMin = raySource;
+		btVector3 qaabbMax = raySource;
+		qaabbMin.setMin(rayTarget);
+		qaabbMax.setMax(rayTarget);
+		qaabbMin += aabbMin;
+		qaabbMax += aabbMax;
+		reportAabbOverlappingNodex(nodeCallback,qaabbMin,qaabbMax);
+	}
+	*/
+
+}
+
+
+void	btQuantizedBvh::swapLeafNodes(int i,int splitIndex)
+{
+	if (m_useQuantization)
+	{
+			btQuantizedBvhNode tmp = m_quantizedLeafNodes[i];
+			m_quantizedLeafNodes[i] = m_quantizedLeafNodes[splitIndex];
+			m_quantizedLeafNodes[splitIndex] = tmp;
+	} else
+	{
+			btOptimizedBvhNode tmp = m_leafNodes[i];
+			m_leafNodes[i] = m_leafNodes[splitIndex];
+			m_leafNodes[splitIndex] = tmp;
+	}
+}
+
+void	btQuantizedBvh::assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex)
+{
+	if (m_useQuantization)
+	{
+		m_quantizedContiguousNodes[internalNode] = m_quantizedLeafNodes[leafNodeIndex];
+	} else
+	{
+		m_contiguousNodes[internalNode] = m_leafNodes[leafNodeIndex];
+	}
+}
+
+//PCK: include
+#include <new>
+
+#if 0
+//PCK: consts
+static const unsigned BVH_ALIGNMENT = 16;
+static const unsigned BVH_ALIGNMENT_MASK = BVH_ALIGNMENT-1;
+
+static const unsigned BVH_ALIGNMENT_BLOCKS = 2;
+#endif
+
+
+unsigned int btQuantizedBvh::getAlignmentSerializationPadding()
+{
+	// I changed this to 0 since the extra padding is not needed or used.
+	return 0;//BVH_ALIGNMENT_BLOCKS * BVH_ALIGNMENT;
+}
+
+unsigned btQuantizedBvh::calculateSerializeBufferSize() const
+{
+	unsigned baseSize = sizeof(btQuantizedBvh) + getAlignmentSerializationPadding();
+	baseSize += sizeof(btBvhSubtreeInfo) * m_subtreeHeaderCount;
+	if (m_useQuantization)
+	{
+		return baseSize + m_curNodeIndex * sizeof(btQuantizedBvhNode);
+	}
+	return baseSize + m_curNodeIndex * sizeof(btOptimizedBvhNode);
+}
+
+bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const
+{
+	btAssert(m_subtreeHeaderCount == m_SubtreeHeaders.size());
+	m_subtreeHeaderCount = m_SubtreeHeaders.size();
+
+/*	if (i_dataBufferSize < calculateSerializeBufferSize() || o_alignedDataBuffer == NULL || (((unsigned)o_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0))
+	{
+		///check alignedment for buffer?
+		btAssert(0);
+		return false;
+	}
+*/
+
+	btQuantizedBvh *targetBvh = (btQuantizedBvh *)o_alignedDataBuffer;
+
+	// construct the class so the virtual function table, etc will be set up
+	// Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor
+	new (targetBvh) btQuantizedBvh;
+
+	if (i_swapEndian)
+	{
+		targetBvh->m_curNodeIndex = static_cast<int>(btSwapEndian(m_curNodeIndex));
+
+
+		btSwapVector3Endian(m_bvhAabbMin,targetBvh->m_bvhAabbMin);
+		btSwapVector3Endian(m_bvhAabbMax,targetBvh->m_bvhAabbMax);
+		btSwapVector3Endian(m_bvhQuantization,targetBvh->m_bvhQuantization);
+
+		targetBvh->m_traversalMode = (btTraversalMode)btSwapEndian(m_traversalMode);
+		targetBvh->m_subtreeHeaderCount = static_cast<int>(btSwapEndian(m_subtreeHeaderCount));
+	}
+	else
+	{
+		targetBvh->m_curNodeIndex = m_curNodeIndex;
+		targetBvh->m_bvhAabbMin = m_bvhAabbMin;
+		targetBvh->m_bvhAabbMax = m_bvhAabbMax;
+		targetBvh->m_bvhQuantization = m_bvhQuantization;
+		targetBvh->m_traversalMode = m_traversalMode;
+		targetBvh->m_subtreeHeaderCount = m_subtreeHeaderCount;
+	}
+
+	targetBvh->m_useQuantization = m_useQuantization;
+
+	unsigned char *nodeData = (unsigned char *)targetBvh;
+	nodeData += sizeof(btQuantizedBvh);
+	
+	unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;
+	nodeData += sizeToAdd;
+	
+	int nodeCount = m_curNodeIndex;
+
+	if (m_useQuantization)
+	{
+		targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount);
+
+		if (i_swapEndian)
+		{
+			for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
+			{
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]);
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]);
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]);
+
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]);
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]);
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]);
+
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(btSwapEndian(m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex));
+			}
+		}
+		else
+		{
+			for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
+			{
+	
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0];
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1];
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2];
+
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0];
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1];
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2];
+
+				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex;
+
+
+			}
+		}
+		nodeData += sizeof(btQuantizedBvhNode) * nodeCount;
+
+		// this clears the pointer in the member variable it doesn't really do anything to the data
+		// it does call the destructor on the contained objects, but they are all classes with no destructor defined
+		// so the memory (which is not freed) is left alone
+		targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(NULL, 0, 0);
+	}
+	else
+	{
+		targetBvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount);
+
+		if (i_swapEndian)
+		{
+			for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
+			{
+				btSwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMinOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg);
+				btSwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMaxOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg);
+
+				targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(btSwapEndian(m_contiguousNodes[nodeIndex].m_escapeIndex));
+				targetBvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(btSwapEndian(m_contiguousNodes[nodeIndex].m_subPart));
+				targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(btSwapEndian(m_contiguousNodes[nodeIndex].m_triangleIndex));
+			}
+		}
+		else
+		{
+			for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
+			{
+				targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg = m_contiguousNodes[nodeIndex].m_aabbMinOrg;
+				targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg = m_contiguousNodes[nodeIndex].m_aabbMaxOrg;
+
+				targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = m_contiguousNodes[nodeIndex].m_escapeIndex;
+				targetBvh->m_contiguousNodes[nodeIndex].m_subPart = m_contiguousNodes[nodeIndex].m_subPart;
+				targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = m_contiguousNodes[nodeIndex].m_triangleIndex;
+			}
+		}
+		nodeData += sizeof(btOptimizedBvhNode) * nodeCount;
+
+		// this clears the pointer in the member variable it doesn't really do anything to the data
+		// it does call the destructor on the contained objects, but they are all classes with no destructor defined
+		// so the memory (which is not freed) is left alone
+		targetBvh->m_contiguousNodes.initializeFromBuffer(NULL, 0, 0);
+	}
+
+	sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;
+	nodeData += sizeToAdd;
+
+	// Now serialize the subtree headers
+	targetBvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, m_subtreeHeaderCount, m_subtreeHeaderCount);
+	if (i_swapEndian)
+	{
+		for (int i = 0; i < m_subtreeHeaderCount; i++)
+		{
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[0]);
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[1]);
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[2]);
+
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[0]);
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[1]);
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = btSwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[2]);
+
+			targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(btSwapEndian(m_SubtreeHeaders[i].m_rootNodeIndex));
+			targetBvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(btSwapEndian(m_SubtreeHeaders[i].m_subtreeSize));
+		}
+	}
+	else
+	{
+		for (int i = 0; i < m_subtreeHeaderCount; i++)
+		{
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = (m_SubtreeHeaders[i].m_quantizedAabbMin[0]);
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = (m_SubtreeHeaders[i].m_quantizedAabbMin[1]);
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = (m_SubtreeHeaders[i].m_quantizedAabbMin[2]);
+
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = (m_SubtreeHeaders[i].m_quantizedAabbMax[0]);
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = (m_SubtreeHeaders[i].m_quantizedAabbMax[1]);
+			targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = (m_SubtreeHeaders[i].m_quantizedAabbMax[2]);
+
+			targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = (m_SubtreeHeaders[i].m_rootNodeIndex);
+			targetBvh->m_SubtreeHeaders[i].m_subtreeSize = (m_SubtreeHeaders[i].m_subtreeSize);
+
+			// need to clear padding in destination buffer
+			targetBvh->m_SubtreeHeaders[i].m_padding[0] = 0;
+			targetBvh->m_SubtreeHeaders[i].m_padding[1] = 0;
+			targetBvh->m_SubtreeHeaders[i].m_padding[2] = 0;
+		}
+	}
+	nodeData += sizeof(btBvhSubtreeInfo) * m_subtreeHeaderCount;
+
+	// this clears the pointer in the member variable it doesn't really do anything to the data
+	// it does call the destructor on the contained objects, but they are all classes with no destructor defined
+	// so the memory (which is not freed) is left alone
+	targetBvh->m_SubtreeHeaders.initializeFromBuffer(NULL, 0, 0);
+
+	// this wipes the virtual function table pointer at the start of the buffer for the class
+	*((void**)o_alignedDataBuffer) = NULL;
+
+	return true;
+}
+
+btQuantizedBvh *btQuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)
+{
+
+	if (i_alignedDataBuffer == NULL)// || (((unsigned)i_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0))
+	{
+		return NULL;
+	}
+	btQuantizedBvh *bvh = (btQuantizedBvh *)i_alignedDataBuffer;
+
+	if (i_swapEndian)
+	{
+		bvh->m_curNodeIndex = static_cast<int>(btSwapEndian(bvh->m_curNodeIndex));
+
+		btUnSwapVector3Endian(bvh->m_bvhAabbMin);
+		btUnSwapVector3Endian(bvh->m_bvhAabbMax);
+		btUnSwapVector3Endian(bvh->m_bvhQuantization);
+
+		bvh->m_traversalMode = (btTraversalMode)btSwapEndian(bvh->m_traversalMode);
+		bvh->m_subtreeHeaderCount = static_cast<int>(btSwapEndian(bvh->m_subtreeHeaderCount));
+	}
+
+	unsigned int calculatedBufSize = bvh->calculateSerializeBufferSize();
+	btAssert(calculatedBufSize <= i_dataBufferSize);
+
+	if (calculatedBufSize > i_dataBufferSize)
+	{
+		return NULL;
+	}
+
+	unsigned char *nodeData = (unsigned char *)bvh;
+	nodeData += sizeof(btQuantizedBvh);
+	
+	unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;
+	nodeData += sizeToAdd;
+	
+	int nodeCount = bvh->m_curNodeIndex;
+
+	// Must call placement new to fill in virtual function table, etc, but we don't want to overwrite most data, so call a special version of the constructor
+	// Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor
+	new (bvh) btQuantizedBvh(*bvh, false);
+
+	if (bvh->m_useQuantization)
+	{
+		bvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount);
+
+		if (i_swapEndian)
+		{
+			for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
+			{
+				bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]);
+				bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]);
+				bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]);
+
+				bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]);
+				bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]);
+				bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]);
+
+				bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(btSwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex));
+			}
+		}
+		nodeData += sizeof(btQuantizedBvhNode) * nodeCount;
+	}
+	else
+	{
+		bvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount);
+
+		if (i_swapEndian)
+		{
+			for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)
+			{
+				btUnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg);
+				btUnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg);
+				
+				bvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(btSwapEndian(bvh->m_contiguousNodes[nodeIndex].m_escapeIndex));
+				bvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(btSwapEndian(bvh->m_contiguousNodes[nodeIndex].m_subPart));
+				bvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(btSwapEndian(bvh->m_contiguousNodes[nodeIndex].m_triangleIndex));
+			}
+		}
+		nodeData += sizeof(btOptimizedBvhNode) * nodeCount;
+	}
+
+	sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;
+	nodeData += sizeToAdd;
+
+	// Now serialize the subtree headers
+	bvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, bvh->m_subtreeHeaderCount, bvh->m_subtreeHeaderCount);
+	if (i_swapEndian)
+	{
+		for (int i = 0; i < bvh->m_subtreeHeaderCount; i++)
+		{
+			bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0]);
+			bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1]);
+			bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2]);
+
+			bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0]);
+			bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1]);
+			bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = btSwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2]);
+
+			bvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(btSwapEndian(bvh->m_SubtreeHeaders[i].m_rootNodeIndex));
+			bvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(btSwapEndian(bvh->m_SubtreeHeaders[i].m_subtreeSize));
+		}
+	}
+
+	return bvh;
+}
+
+// Constructor that prevents btVector3's default constructor from being called
+btQuantizedBvh::btQuantizedBvh(btQuantizedBvh &self, bool /* ownsMemory */) :
+m_bvhAabbMin(self.m_bvhAabbMin),
+m_bvhAabbMax(self.m_bvhAabbMax),
+m_bvhQuantization(self.m_bvhQuantization),
+m_bulletVersion(BT_BULLET_VERSION)
+{
+
+}
+
+void btQuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData)
+{
+	m_bvhAabbMax.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMax);
+	m_bvhAabbMin.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMin);
+	m_bvhQuantization.deSerializeFloat(quantizedBvhFloatData.m_bvhQuantization);
+
+	m_curNodeIndex = quantizedBvhFloatData.m_curNodeIndex;
+	m_useQuantization = quantizedBvhFloatData.m_useQuantization!=0;
+	
+	{
+		int numElem = quantizedBvhFloatData.m_numContiguousLeafNodes;
+		m_contiguousNodes.resize(numElem);
+
+		if (numElem)
+		{
+			btOptimizedBvhNodeFloatData* memPtr = quantizedBvhFloatData.m_contiguousNodesPtr;
+
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_contiguousNodes[i].m_aabbMaxOrg.deSerializeFloat(memPtr->m_aabbMaxOrg);
+				m_contiguousNodes[i].m_aabbMinOrg.deSerializeFloat(memPtr->m_aabbMinOrg);
+				m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex;
+				m_contiguousNodes[i].m_subPart = memPtr->m_subPart;
+				m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex;
+			}
+		}
+	}
+
+	{
+		int numElem = quantizedBvhFloatData.m_numQuantizedContiguousNodes;
+		m_quantizedContiguousNodes.resize(numElem);
+		
+		if (numElem)
+		{
+			btQuantizedBvhNodeData* memPtr = quantizedBvhFloatData.m_quantizedContiguousNodesPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex;
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2];
+			}
+		}
+	}
+
+	m_traversalMode = btTraversalMode(quantizedBvhFloatData.m_traversalMode);
+	
+	{
+		int numElem = quantizedBvhFloatData.m_numSubtreeHeaders;
+		m_SubtreeHeaders.resize(numElem);
+		if (numElem)
+		{
+			btBvhSubtreeInfoData* memPtr = quantizedBvhFloatData.m_subTreeInfoPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ;
+				m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1];
+				m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2];
+				m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex;
+				m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize;
+			}
+		}
+	}
+}
+
+void btQuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData)
+{
+	m_bvhAabbMax.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMax);
+	m_bvhAabbMin.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMin);
+	m_bvhQuantization.deSerializeDouble(quantizedBvhDoubleData.m_bvhQuantization);
+
+	m_curNodeIndex = quantizedBvhDoubleData.m_curNodeIndex;
+	m_useQuantization = quantizedBvhDoubleData.m_useQuantization!=0;
+	
+	{
+		int numElem = quantizedBvhDoubleData.m_numContiguousLeafNodes;
+		m_contiguousNodes.resize(numElem);
+
+		if (numElem)
+		{
+			btOptimizedBvhNodeDoubleData* memPtr = quantizedBvhDoubleData.m_contiguousNodesPtr;
+
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_contiguousNodes[i].m_aabbMaxOrg.deSerializeDouble(memPtr->m_aabbMaxOrg);
+				m_contiguousNodes[i].m_aabbMinOrg.deSerializeDouble(memPtr->m_aabbMinOrg);
+				m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex;
+				m_contiguousNodes[i].m_subPart = memPtr->m_subPart;
+				m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex;
+			}
+		}
+	}
+
+	{
+		int numElem = quantizedBvhDoubleData.m_numQuantizedContiguousNodes;
+		m_quantizedContiguousNodes.resize(numElem);
+		
+		if (numElem)
+		{
+			btQuantizedBvhNodeData* memPtr = quantizedBvhDoubleData.m_quantizedContiguousNodesPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex;
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2];
+			}
+		}
+	}
+
+	m_traversalMode = btTraversalMode(quantizedBvhDoubleData.m_traversalMode);
+	
+	{
+		int numElem = quantizedBvhDoubleData.m_numSubtreeHeaders;
+		m_SubtreeHeaders.resize(numElem);
+		if (numElem)
+		{
+			btBvhSubtreeInfoData* memPtr = quantizedBvhDoubleData.m_subTreeInfoPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ;
+				m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1];
+				m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2];
+				m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex;
+				m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize;
+			}
+		}
+	}
+
+}
+
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btQuantizedBvh::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btQuantizedBvhData* quantizedData = (btQuantizedBvhData*)dataBuffer;
+	
+	m_bvhAabbMax.serialize(quantizedData->m_bvhAabbMax);
+	m_bvhAabbMin.serialize(quantizedData->m_bvhAabbMin);
+	m_bvhQuantization.serialize(quantizedData->m_bvhQuantization);
+
+	quantizedData->m_curNodeIndex = m_curNodeIndex;
+	quantizedData->m_useQuantization = m_useQuantization;
+	
+	quantizedData->m_numContiguousLeafNodes = m_contiguousNodes.size();
+	quantizedData->m_contiguousNodesPtr = (btOptimizedBvhNodeData*) (m_contiguousNodes.size() ? serializer->getUniquePointer((void*)&m_contiguousNodes[0]) : 0);
+	if (quantizedData->m_contiguousNodesPtr)
+	{
+		int sz = sizeof(btOptimizedBvhNodeData);
+		int numElem = m_contiguousNodes.size();
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btOptimizedBvhNodeData* memPtr = (btOptimizedBvhNodeData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			m_contiguousNodes[i].m_aabbMaxOrg.serialize(memPtr->m_aabbMaxOrg);
+			m_contiguousNodes[i].m_aabbMinOrg.serialize(memPtr->m_aabbMinOrg);
+			memPtr->m_escapeIndex = m_contiguousNodes[i].m_escapeIndex;
+			memPtr->m_subPart = m_contiguousNodes[i].m_subPart;
+			memPtr->m_triangleIndex = m_contiguousNodes[i].m_triangleIndex;
+		}
+		serializer->finalizeChunk(chunk,"btOptimizedBvhNodeData",BT_ARRAY_CODE,(void*)&m_contiguousNodes[0]);
+	}
+
+	quantizedData->m_numQuantizedContiguousNodes = m_quantizedContiguousNodes.size();
+//	printf("quantizedData->m_numQuantizedContiguousNodes=%d\n",quantizedData->m_numQuantizedContiguousNodes);
+	quantizedData->m_quantizedContiguousNodesPtr =(btQuantizedBvhNodeData*) (m_quantizedContiguousNodes.size() ? serializer->getUniquePointer((void*)&m_quantizedContiguousNodes[0]) : 0);
+	if (quantizedData->m_quantizedContiguousNodesPtr)
+	{
+		int sz = sizeof(btQuantizedBvhNodeData);
+		int numElem = m_quantizedContiguousNodes.size();
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btQuantizedBvhNodeData* memPtr = (btQuantizedBvhNodeData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_escapeIndexOrTriangleIndex = m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex;
+			memPtr->m_quantizedAabbMax[0] = m_quantizedContiguousNodes[i].m_quantizedAabbMax[0];
+			memPtr->m_quantizedAabbMax[1] = m_quantizedContiguousNodes[i].m_quantizedAabbMax[1];
+			memPtr->m_quantizedAabbMax[2] = m_quantizedContiguousNodes[i].m_quantizedAabbMax[2];
+			memPtr->m_quantizedAabbMin[0] = m_quantizedContiguousNodes[i].m_quantizedAabbMin[0];
+			memPtr->m_quantizedAabbMin[1] = m_quantizedContiguousNodes[i].m_quantizedAabbMin[1];
+			memPtr->m_quantizedAabbMin[2] = m_quantizedContiguousNodes[i].m_quantizedAabbMin[2];
+		}
+		serializer->finalizeChunk(chunk,"btQuantizedBvhNodeData",BT_ARRAY_CODE,(void*)&m_quantizedContiguousNodes[0]);
+	}
+
+	quantizedData->m_traversalMode = int(m_traversalMode);
+	quantizedData->m_numSubtreeHeaders = m_SubtreeHeaders.size();
+
+	quantizedData->m_subTreeInfoPtr = (btBvhSubtreeInfoData*) (m_SubtreeHeaders.size() ? serializer->getUniquePointer((void*)&m_SubtreeHeaders[0]) : 0);
+	if (quantizedData->m_subTreeInfoPtr)
+	{
+		int sz = sizeof(btBvhSubtreeInfoData);
+		int numElem = m_SubtreeHeaders.size();
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btBvhSubtreeInfoData* memPtr = (btBvhSubtreeInfoData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_quantizedAabbMax[0] = m_SubtreeHeaders[i].m_quantizedAabbMax[0];
+			memPtr->m_quantizedAabbMax[1] = m_SubtreeHeaders[i].m_quantizedAabbMax[1];
+			memPtr->m_quantizedAabbMax[2] = m_SubtreeHeaders[i].m_quantizedAabbMax[2];
+			memPtr->m_quantizedAabbMin[0] = m_SubtreeHeaders[i].m_quantizedAabbMin[0];
+			memPtr->m_quantizedAabbMin[1] = m_SubtreeHeaders[i].m_quantizedAabbMin[1];
+			memPtr->m_quantizedAabbMin[2] = m_SubtreeHeaders[i].m_quantizedAabbMin[2];
+
+			memPtr->m_rootNodeIndex = m_SubtreeHeaders[i].m_rootNodeIndex;
+			memPtr->m_subtreeSize = m_SubtreeHeaders[i].m_subtreeSize;
+		}
+		serializer->finalizeChunk(chunk,"btBvhSubtreeInfoData",BT_ARRAY_CODE,(void*)&m_SubtreeHeaders[0]);
+	}
+	return btQuantizedBvhDataName;
+}
+
+
+
+
+
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.h b/src/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.h
new file mode 100644
index 00000000..579cc9a5
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.h
@@ -0,0 +1,579 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_QUANTIZED_BVH_H
+#define BT_QUANTIZED_BVH_H
+
+class btSerializer;
+
+//#define DEBUG_CHECK_DEQUANTIZATION 1
+#ifdef DEBUG_CHECK_DEQUANTIZATION
+#ifdef __SPU__
+#define printf spu_printf
+#endif //__SPU__
+
+#include <stdio.h>
+#include <stdlib.h>
+#endif //DEBUG_CHECK_DEQUANTIZATION
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btAlignedAllocator.h"
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btQuantizedBvhData btQuantizedBvhDoubleData
+#define btOptimizedBvhNodeData btOptimizedBvhNodeDoubleData
+#define btQuantizedBvhDataName "btQuantizedBvhDoubleData"
+#else
+#define btQuantizedBvhData btQuantizedBvhFloatData
+#define btOptimizedBvhNodeData btOptimizedBvhNodeFloatData
+#define btQuantizedBvhDataName "btQuantizedBvhFloatData"
+#endif
+
+
+
+//http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/vclrf__m128.asp
+
+
+//Note: currently we have 16 bytes per quantized node
+#define MAX_SUBTREE_SIZE_IN_BYTES  2048
+
+// 10 gives the potential for 1024 parts, with at most 2^21 (2097152) (minus one
+// actually) triangles each (since the sign bit is reserved
+#define MAX_NUM_PARTS_IN_BITS 10
+
+///btQuantizedBvhNode is a compressed aabb node, 16 bytes.
+///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).
+ATTRIBUTE_ALIGNED16	(struct) btQuantizedBvhNode
+{
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	//12 bytes
+	unsigned short int	m_quantizedAabbMin[3];
+	unsigned short int	m_quantizedAabbMax[3];
+	//4 bytes
+	int	m_escapeIndexOrTriangleIndex;
+
+	bool isLeafNode() const
+	{
+		//skipindex is negative (internal node), triangleindex >=0 (leafnode)
+		return (m_escapeIndexOrTriangleIndex >= 0);
+	}
+	int getEscapeIndex() const
+	{
+		btAssert(!isLeafNode());
+		return -m_escapeIndexOrTriangleIndex;
+	}
+	int	getTriangleIndex() const
+	{
+		btAssert(isLeafNode());
+		// Get only the lower bits where the triangle index is stored
+		return (m_escapeIndexOrTriangleIndex&~((~0)<<(31-MAX_NUM_PARTS_IN_BITS)));
+	}
+	int	getPartId() const
+	{
+		btAssert(isLeafNode());
+		// Get only the highest bits where the part index is stored
+		return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));
+	}
+}
+;
+
+/// btOptimizedBvhNode contains both internal and leaf node information.
+/// Total node size is 44 bytes / node. You can use the compressed version of 16 bytes.
+ATTRIBUTE_ALIGNED16 (struct) btOptimizedBvhNode
+{
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	//32 bytes
+	btVector3	m_aabbMinOrg;
+	btVector3	m_aabbMaxOrg;
+
+	//4
+	int	m_escapeIndex;
+
+	//8
+	//for child nodes
+	int	m_subPart;
+	int	m_triangleIndex;
+
+//pad the size to 64 bytes
+	char	m_padding[20];
+};
+
+
+///btBvhSubtreeInfo provides info to gather a subtree of limited size
+ATTRIBUTE_ALIGNED16(class) btBvhSubtreeInfo
+{
+public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	//12 bytes
+	unsigned short int	m_quantizedAabbMin[3];
+	unsigned short int	m_quantizedAabbMax[3];
+	//4 bytes, points to the root of the subtree
+	int			m_rootNodeIndex;
+	//4 bytes
+	int			m_subtreeSize;
+	int			m_padding[3];
+
+	btBvhSubtreeInfo()
+	{
+		//memset(&m_padding[0], 0, sizeof(m_padding));
+	}
+
+
+	void	setAabbFromQuantizeNode(const btQuantizedBvhNode& quantizedNode)
+	{
+		m_quantizedAabbMin[0] = quantizedNode.m_quantizedAabbMin[0];
+		m_quantizedAabbMin[1] = quantizedNode.m_quantizedAabbMin[1];
+		m_quantizedAabbMin[2] = quantizedNode.m_quantizedAabbMin[2];
+		m_quantizedAabbMax[0] = quantizedNode.m_quantizedAabbMax[0];
+		m_quantizedAabbMax[1] = quantizedNode.m_quantizedAabbMax[1];
+		m_quantizedAabbMax[2] = quantizedNode.m_quantizedAabbMax[2];
+	}
+}
+;
+
+
+class btNodeOverlapCallback
+{
+public:
+	virtual ~btNodeOverlapCallback() {};
+
+	virtual void processNode(int subPart, int triangleIndex) = 0;
+};
+
+#include "LinearMath/btAlignedAllocator.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+
+
+///for code readability:
+typedef btAlignedObjectArray<btOptimizedBvhNode>	NodeArray;
+typedef btAlignedObjectArray<btQuantizedBvhNode>	QuantizedNodeArray;
+typedef btAlignedObjectArray<btBvhSubtreeInfo>		BvhSubtreeInfoArray;
+
+
+///The btQuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU.
+///It is used by the btBvhTriangleMeshShape as midphase, and by the btMultiSapBroadphase.
+///It is recommended to use quantization for better performance and lower memory requirements.
+ATTRIBUTE_ALIGNED16(class) btQuantizedBvh
+{
+public:
+	enum btTraversalMode
+	{
+		TRAVERSAL_STACKLESS = 0,
+		TRAVERSAL_STACKLESS_CACHE_FRIENDLY,
+		TRAVERSAL_RECURSIVE
+	};
+
+protected:
+
+
+	btVector3			m_bvhAabbMin;
+	btVector3			m_bvhAabbMax;
+	btVector3			m_bvhQuantization;
+
+	int					m_bulletVersion;	//for serialization versioning. It could also be used to detect endianess.
+
+	int					m_curNodeIndex;
+	//quantization data
+	bool				m_useQuantization;
+
+
+
+	NodeArray			m_leafNodes;
+	NodeArray			m_contiguousNodes;
+	QuantizedNodeArray	m_quantizedLeafNodes;
+	QuantizedNodeArray	m_quantizedContiguousNodes;
+	
+	btTraversalMode	m_traversalMode;
+	BvhSubtreeInfoArray		m_SubtreeHeaders;
+
+	//This is only used for serialization so we don't have to add serialization directly to btAlignedObjectArray
+	mutable int m_subtreeHeaderCount;
+
+	
+
+
+
+	///two versions, one for quantized and normal nodes. This allows code-reuse while maintaining readability (no template/macro!)
+	///this might be refactored into a virtual, it is usually not calculated at run-time
+	void	setInternalNodeAabbMin(int nodeIndex, const btVector3& aabbMin)
+	{
+		if (m_useQuantization)
+		{
+			quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] ,aabbMin,0);
+		} else
+		{
+			m_contiguousNodes[nodeIndex].m_aabbMinOrg = aabbMin;
+
+		}
+	}
+	void	setInternalNodeAabbMax(int nodeIndex,const btVector3& aabbMax)
+	{
+		if (m_useQuantization)
+		{
+			quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0],aabbMax,1);
+		} else
+		{
+			m_contiguousNodes[nodeIndex].m_aabbMaxOrg = aabbMax;
+		}
+	}
+
+	btVector3 getAabbMin(int nodeIndex) const
+	{
+		if (m_useQuantization)
+		{
+			return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMin[0]);
+		}
+		//non-quantized
+		return m_leafNodes[nodeIndex].m_aabbMinOrg;
+
+	}
+	btVector3 getAabbMax(int nodeIndex) const
+	{
+		if (m_useQuantization)
+		{
+			return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMax[0]);
+		} 
+		//non-quantized
+		return m_leafNodes[nodeIndex].m_aabbMaxOrg;
+		
+	}
+
+	
+	void	setInternalNodeEscapeIndex(int nodeIndex, int escapeIndex)
+	{
+		if (m_useQuantization)
+		{
+			m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = -escapeIndex;
+		} 
+		else
+		{
+			m_contiguousNodes[nodeIndex].m_escapeIndex = escapeIndex;
+		}
+
+	}
+
+	void mergeInternalNodeAabb(int nodeIndex,const btVector3& newAabbMin,const btVector3& newAabbMax) 
+	{
+		if (m_useQuantization)
+		{
+			unsigned short int quantizedAabbMin[3];
+			unsigned short int quantizedAabbMax[3];
+			quantize(quantizedAabbMin,newAabbMin,0);
+			quantize(quantizedAabbMax,newAabbMax,1);
+			for (int i=0;i<3;i++)
+			{
+				if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] > quantizedAabbMin[i])
+					m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] = quantizedAabbMin[i];
+
+				if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] < quantizedAabbMax[i])
+					m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] = quantizedAabbMax[i];
+
+			}
+		} else
+		{
+			//non-quantized
+			m_contiguousNodes[nodeIndex].m_aabbMinOrg.setMin(newAabbMin);
+			m_contiguousNodes[nodeIndex].m_aabbMaxOrg.setMax(newAabbMax);		
+		}
+	}
+
+	void	swapLeafNodes(int firstIndex,int secondIndex);
+
+	void	assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex);
+
+protected:
+
+	
+
+	void	buildTree	(int startIndex,int endIndex);
+
+	int	calcSplittingAxis(int startIndex,int endIndex);
+
+	int	sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis);
+	
+	void	walkStacklessTree(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const;
+
+	void	walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const;
+	void	walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const;
+	void	walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const;
+
+	///tree traversal designed for small-memory processors like PS3 SPU
+	void	walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const;
+
+	///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal
+	void	walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantizedBvhNode* currentNode,btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const;
+
+	///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal
+	void	walkRecursiveQuantizedTreeAgainstQuantizedTree(const btQuantizedBvhNode* treeNodeA,const btQuantizedBvhNode* treeNodeB,btNodeOverlapCallback* nodeCallback) const;
+	
+
+
+
+	void	updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex);
+
+public:
+	
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	btQuantizedBvh();
+
+	virtual ~btQuantizedBvh();
+
+	
+	///***************************************** expert/internal use only *************************
+	void	setQuantizationValues(const btVector3& bvhAabbMin,const btVector3& bvhAabbMax,btScalar quantizationMargin=btScalar(1.0));
+	QuantizedNodeArray&	getLeafNodeArray() {			return	m_quantizedLeafNodes;	}
+	///buildInternal is expert use only: assumes that setQuantizationValues and LeafNodeArray are initialized
+	void	buildInternal();
+	///***************************************** expert/internal use only *************************
+
+	void	reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const;
+	void	reportRayOverlappingNodex (btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget) const;
+	void	reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin,const btVector3& aabbMax) const;
+
+		SIMD_FORCE_INLINE void quantize(unsigned short* out, const btVector3& point,int isMax) const
+	{
+
+		btAssert(m_useQuantization);
+
+		btAssert(point.getX() <= m_bvhAabbMax.getX());
+		btAssert(point.getY() <= m_bvhAabbMax.getY());
+		btAssert(point.getZ() <= m_bvhAabbMax.getZ());
+
+		btAssert(point.getX() >= m_bvhAabbMin.getX());
+		btAssert(point.getY() >= m_bvhAabbMin.getY());
+		btAssert(point.getZ() >= m_bvhAabbMin.getZ());
+
+		btVector3 v = (point - m_bvhAabbMin) * m_bvhQuantization;
+		///Make sure rounding is done in a way that unQuantize(quantizeWithClamp(...)) is conservative
+		///end-points always set the first bit, so that they are sorted properly (so that neighbouring AABBs overlap properly)
+		///@todo: double-check this
+		if (isMax)
+		{
+			out[0] = (unsigned short) (((unsigned short)(v.getX()+btScalar(1.)) | 1));
+			out[1] = (unsigned short) (((unsigned short)(v.getY()+btScalar(1.)) | 1));
+			out[2] = (unsigned short) (((unsigned short)(v.getZ()+btScalar(1.)) | 1));
+		} else
+		{
+			out[0] = (unsigned short) (((unsigned short)(v.getX()) & 0xfffe));
+			out[1] = (unsigned short) (((unsigned short)(v.getY()) & 0xfffe));
+			out[2] = (unsigned short) (((unsigned short)(v.getZ()) & 0xfffe));
+		}
+
+
+#ifdef DEBUG_CHECK_DEQUANTIZATION
+		btVector3 newPoint = unQuantize(out);
+		if (isMax)
+		{
+			if (newPoint.getX() < point.getX())
+			{
+				printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n",newPoint.getX()-point.getX(), newPoint.getX(),point.getX());
+			}
+			if (newPoint.getY() < point.getY())
+			{
+				printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n",newPoint.getY()-point.getY(), newPoint.getY(),point.getY());
+			}
+			if (newPoint.getZ() < point.getZ())
+			{
+
+				printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n",newPoint.getZ()-point.getZ(), newPoint.getZ(),point.getZ());
+			}
+		} else
+		{
+			if (newPoint.getX() > point.getX())
+			{
+				printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n",newPoint.getX()-point.getX(), newPoint.getX(),point.getX());
+			}
+			if (newPoint.getY() > point.getY())
+			{
+				printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n",newPoint.getY()-point.getY(), newPoint.getY(),point.getY());
+			}
+			if (newPoint.getZ() > point.getZ())
+			{
+				printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n",newPoint.getZ()-point.getZ(), newPoint.getZ(),point.getZ());
+			}
+		}
+#endif //DEBUG_CHECK_DEQUANTIZATION
+
+	}
+
+
+	SIMD_FORCE_INLINE void quantizeWithClamp(unsigned short* out, const btVector3& point2,int isMax) const
+	{
+
+		btAssert(m_useQuantization);
+
+		btVector3 clampedPoint(point2);
+		clampedPoint.setMax(m_bvhAabbMin);
+		clampedPoint.setMin(m_bvhAabbMax);
+
+		quantize(out,clampedPoint,isMax);
+
+	}
+	
+	SIMD_FORCE_INLINE btVector3	unQuantize(const unsigned short* vecIn) const
+	{
+			btVector3	vecOut;
+			vecOut.setValue(
+			(btScalar)(vecIn[0]) / (m_bvhQuantization.getX()),
+			(btScalar)(vecIn[1]) / (m_bvhQuantization.getY()),
+			(btScalar)(vecIn[2]) / (m_bvhQuantization.getZ()));
+			vecOut += m_bvhAabbMin;
+			return vecOut;
+	}
+
+	///setTraversalMode let's you choose between stackless, recursive or stackless cache friendly tree traversal. Note this is only implemented for quantized trees.
+	void	setTraversalMode(btTraversalMode	traversalMode)
+	{
+		m_traversalMode = traversalMode;
+	}
+
+
+	SIMD_FORCE_INLINE QuantizedNodeArray&	getQuantizedNodeArray()
+	{	
+		return	m_quantizedContiguousNodes;
+	}
+
+
+	SIMD_FORCE_INLINE BvhSubtreeInfoArray&	getSubtreeInfoArray()
+	{
+		return m_SubtreeHeaders;
+	}
+
+////////////////////////////////////////////////////////////////////
+
+	/////Calculate space needed to store BVH for serialization
+	unsigned calculateSerializeBufferSize() const;
+
+	/// Data buffer MUST be 16 byte aligned
+	virtual bool serialize(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const;
+
+	///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place'
+	static btQuantizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
+
+	static unsigned int getAlignmentSerializationPadding();
+//////////////////////////////////////////////////////////////////////
+
+	
+	virtual	int	calculateSerializeBufferSizeNew() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+	virtual	void deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData);
+
+	virtual	void deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData);
+
+
+////////////////////////////////////////////////////////////////////
+
+	SIMD_FORCE_INLINE bool isQuantized()
+	{
+		return m_useQuantization;
+	}
+
+private:
+	// Special "copy" constructor that allows for in-place deserialization
+	// Prevents btVector3's default constructor from being called, but doesn't inialize much else
+	// ownsMemory should most likely be false if deserializing, and if you are not, don't call this (it also changes the function signature, which we need)
+	btQuantizedBvh(btQuantizedBvh &other, bool ownsMemory);
+
+}
+;
+
+
+struct	btBvhSubtreeInfoData
+{
+	int			m_rootNodeIndex;
+	int			m_subtreeSize;
+	unsigned short m_quantizedAabbMin[3];
+	unsigned short m_quantizedAabbMax[3];
+};
+
+struct btOptimizedBvhNodeFloatData
+{
+	btVector3FloatData	m_aabbMinOrg;
+	btVector3FloatData	m_aabbMaxOrg;
+	int	m_escapeIndex;
+	int	m_subPart;
+	int	m_triangleIndex;
+	char m_pad[4];
+};
+
+struct btOptimizedBvhNodeDoubleData
+{
+	btVector3DoubleData	m_aabbMinOrg;
+	btVector3DoubleData	m_aabbMaxOrg;
+	int	m_escapeIndex;
+	int	m_subPart;
+	int	m_triangleIndex;
+	char	m_pad[4];
+};
+
+
+struct btQuantizedBvhNodeData
+{
+	unsigned short m_quantizedAabbMin[3];
+	unsigned short m_quantizedAabbMax[3];
+	int	m_escapeIndexOrTriangleIndex;
+};
+
+struct	btQuantizedBvhFloatData
+{
+	btVector3FloatData			m_bvhAabbMin;
+	btVector3FloatData			m_bvhAabbMax;
+	btVector3FloatData			m_bvhQuantization;
+	int					m_curNodeIndex;
+	int					m_useQuantization;
+	int					m_numContiguousLeafNodes;
+	int					m_numQuantizedContiguousNodes;
+	btOptimizedBvhNodeFloatData	*m_contiguousNodesPtr;
+	btQuantizedBvhNodeData		*m_quantizedContiguousNodesPtr;
+	btBvhSubtreeInfoData	*m_subTreeInfoPtr;
+	int					m_traversalMode;
+	int					m_numSubtreeHeaders;
+	
+};
+
+struct	btQuantizedBvhDoubleData
+{
+	btVector3DoubleData			m_bvhAabbMin;
+	btVector3DoubleData			m_bvhAabbMax;
+	btVector3DoubleData			m_bvhQuantization;
+	int							m_curNodeIndex;
+	int							m_useQuantization;
+	int							m_numContiguousLeafNodes;
+	int							m_numQuantizedContiguousNodes;
+	btOptimizedBvhNodeDoubleData	*m_contiguousNodesPtr;
+	btQuantizedBvhNodeData			*m_quantizedContiguousNodesPtr;
+
+	int							m_traversalMode;
+	int							m_numSubtreeHeaders;
+	btBvhSubtreeInfoData		*m_subTreeInfoPtr;
+};
+
+
+SIMD_FORCE_INLINE	int	btQuantizedBvh::calculateSerializeBufferSizeNew() const
+{
+	return sizeof(btQuantizedBvhData);
+}
+
+
+
+#endif //BT_QUANTIZED_BVH_H
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp b/src/bullet/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp
new file mode 100644
index 00000000..752fcd0f
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp
@@ -0,0 +1,349 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btSimpleBroadphase.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btMatrix3x3.h"
+#include "LinearMath/btAabbUtil2.h"
+
+#include <new>
+
+extern int gOverlappingPairs;
+
+void	btSimpleBroadphase::validate()
+{
+	for (int i=0;i<m_numHandles;i++)
+	{
+		for (int j=i+1;j<m_numHandles;j++)
+		{
+			btAssert(&m_pHandles[i] != &m_pHandles[j]);
+		}
+	}
+	
+}
+
+btSimpleBroadphase::btSimpleBroadphase(int maxProxies, btOverlappingPairCache* overlappingPairCache)
+	:m_pairCache(overlappingPairCache),
+	m_ownsPairCache(false),
+	m_invalidPair(0)
+{
+
+	if (!overlappingPairCache)
+	{
+		void* mem = btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16);
+		m_pairCache = new (mem)btHashedOverlappingPairCache();
+		m_ownsPairCache = true;
+	}
+
+	// allocate handles buffer and put all handles on free list
+	m_pHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy)*maxProxies,16);
+	m_pHandles = new(m_pHandlesRawPtr) btSimpleBroadphaseProxy[maxProxies];
+	m_maxHandles = maxProxies;
+	m_numHandles = 0;
+	m_firstFreeHandle = 0;
+	m_LastHandleIndex = -1;
+	
+
+	{
+		for (int i = m_firstFreeHandle; i < maxProxies; i++)
+		{
+			m_pHandles[i].SetNextFree(i + 1);
+			m_pHandles[i].m_uniqueId = i+2;//any UID will do, we just avoid too trivial values (0,1) for debugging purposes
+		}
+		m_pHandles[maxProxies - 1].SetNextFree(0);
+	
+	}
+
+}
+
+btSimpleBroadphase::~btSimpleBroadphase()
+{
+	btAlignedFree(m_pHandlesRawPtr);
+
+	if (m_ownsPairCache)
+	{
+		m_pairCache->~btOverlappingPairCache();
+		btAlignedFree(m_pairCache);
+	}
+}
+
+
+btBroadphaseProxy*	btSimpleBroadphase::createProxy(  const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* /*dispatcher*/,void* multiSapProxy)
+{
+	if (m_numHandles >= m_maxHandles)
+	{
+		btAssert(0);
+		return 0; //should never happen, but don't let the game crash ;-)
+	}
+	btAssert(aabbMin[0]<= aabbMax[0] && aabbMin[1]<= aabbMax[1] && aabbMin[2]<= aabbMax[2]);
+
+	int newHandleIndex = allocHandle();
+	btSimpleBroadphaseProxy* proxy = new (&m_pHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy);
+
+	return proxy;
+}
+
+class	RemovingOverlapCallback : public btOverlapCallback
+{
+protected:
+	virtual bool	processOverlap(btBroadphasePair& pair)
+	{
+		(void)pair;
+		btAssert(0);
+		return false;
+	}
+};
+
+class RemovePairContainingProxy
+{
+
+	btBroadphaseProxy*	m_targetProxy;
+	public:
+	virtual ~RemovePairContainingProxy()
+	{
+	}
+protected:
+	virtual bool processOverlap(btBroadphasePair& pair)
+	{
+		btSimpleBroadphaseProxy* proxy0 = static_cast<btSimpleBroadphaseProxy*>(pair.m_pProxy0);
+		btSimpleBroadphaseProxy* proxy1 = static_cast<btSimpleBroadphaseProxy*>(pair.m_pProxy1);
+
+		return ((m_targetProxy == proxy0 || m_targetProxy == proxy1));
+	};
+};
+
+void	btSimpleBroadphase::destroyProxy(btBroadphaseProxy* proxyOrg,btDispatcher* dispatcher)
+{
+		
+		btSimpleBroadphaseProxy* proxy0 = static_cast<btSimpleBroadphaseProxy*>(proxyOrg);
+		freeHandle(proxy0);
+
+		m_pairCache->removeOverlappingPairsContainingProxy(proxyOrg,dispatcher);
+
+		//validate();
+		
+}
+
+void	btSimpleBroadphase::getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const
+{
+	const btSimpleBroadphaseProxy* sbp = getSimpleProxyFromProxy(proxy);
+	aabbMin = sbp->m_aabbMin;
+	aabbMax = sbp->m_aabbMax;
+}
+
+void	btSimpleBroadphase::setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax, btDispatcher* /*dispatcher*/)
+{
+	btSimpleBroadphaseProxy* sbp = getSimpleProxyFromProxy(proxy);
+	sbp->m_aabbMin = aabbMin;
+	sbp->m_aabbMax = aabbMax;
+}
+
+void	btSimpleBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin,const btVector3& aabbMax)
+{
+	for (int i=0; i <= m_LastHandleIndex; i++)
+	{
+		btSimpleBroadphaseProxy* proxy = &m_pHandles[i];
+		if(!proxy->m_clientObject)
+		{
+			continue;
+		}
+		rayCallback.process(proxy);
+	}
+}
+
+
+void	btSimpleBroadphase::aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback)
+{
+	for (int i=0; i <= m_LastHandleIndex; i++)
+	{
+		btSimpleBroadphaseProxy* proxy = &m_pHandles[i];
+		if(!proxy->m_clientObject)
+		{
+			continue;
+		}
+		if (TestAabbAgainstAabb2(aabbMin,aabbMax,proxy->m_aabbMin,proxy->m_aabbMax))
+		{
+			callback.process(proxy);
+		}
+	}
+}
+
+
+
+	
+
+
+
+bool	btSimpleBroadphase::aabbOverlap(btSimpleBroadphaseProxy* proxy0,btSimpleBroadphaseProxy* proxy1)
+{
+	return proxy0->m_aabbMin[0] <= proxy1->m_aabbMax[0] && proxy1->m_aabbMin[0] <= proxy0->m_aabbMax[0] && 
+		   proxy0->m_aabbMin[1] <= proxy1->m_aabbMax[1] && proxy1->m_aabbMin[1] <= proxy0->m_aabbMax[1] &&
+		   proxy0->m_aabbMin[2] <= proxy1->m_aabbMax[2] && proxy1->m_aabbMin[2] <= proxy0->m_aabbMax[2];
+
+}
+
+
+
+//then remove non-overlapping ones
+class CheckOverlapCallback : public btOverlapCallback
+{
+public:
+	virtual bool processOverlap(btBroadphasePair& pair)
+	{
+		return (!btSimpleBroadphase::aabbOverlap(static_cast<btSimpleBroadphaseProxy*>(pair.m_pProxy0),static_cast<btSimpleBroadphaseProxy*>(pair.m_pProxy1)));
+	}
+};
+
+void	btSimpleBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
+{
+	//first check for new overlapping pairs
+	int i,j;
+	if (m_numHandles >= 0)
+	{
+		int new_largest_index = -1;
+		for (i=0; i <= m_LastHandleIndex; i++)
+		{
+			btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
+			if(!proxy0->m_clientObject)
+			{
+				continue;
+			}
+			new_largest_index = i;
+			for (j=i+1; j <= m_LastHandleIndex; j++)
+			{
+				btSimpleBroadphaseProxy* proxy1 = &m_pHandles[j];
+				btAssert(proxy0 != proxy1);
+				if(!proxy1->m_clientObject)
+				{
+					continue;
+				}
+
+				btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
+				btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
+
+				if (aabbOverlap(p0,p1))
+				{
+					if ( !m_pairCache->findPair(proxy0,proxy1))
+					{
+						m_pairCache->addOverlappingPair(proxy0,proxy1);
+					}
+				} else
+				{
+					if (!m_pairCache->hasDeferredRemoval())
+					{
+						if ( m_pairCache->findPair(proxy0,proxy1))
+						{
+							m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
+						}
+					}
+				}
+			}
+		}
+
+		m_LastHandleIndex = new_largest_index;
+
+		if (m_ownsPairCache && m_pairCache->hasDeferredRemoval())
+		{
+
+			btBroadphasePairArray&	overlappingPairArray = m_pairCache->getOverlappingPairArray();
+
+			//perform a sort, to find duplicates and to sort 'invalid' pairs to the end
+			overlappingPairArray.quickSort(btBroadphasePairSortPredicate());
+
+			overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair);
+			m_invalidPair = 0;
+
+
+			btBroadphasePair previousPair;
+			previousPair.m_pProxy0 = 0;
+			previousPair.m_pProxy1 = 0;
+			previousPair.m_algorithm = 0;
+
+
+			for (i=0;i<overlappingPairArray.size();i++)
+			{
+
+				btBroadphasePair& pair = overlappingPairArray[i];
+
+				bool isDuplicate = (pair == previousPair);
+
+				previousPair = pair;
+
+				bool needsRemoval = false;
+
+				if (!isDuplicate)
+				{
+					bool hasOverlap = testAabbOverlap(pair.m_pProxy0,pair.m_pProxy1);
+
+					if (hasOverlap)
+					{
+						needsRemoval = false;//callback->processOverlap(pair);
+					} else
+					{
+						needsRemoval = true;
+					}
+				} else
+				{
+					//remove duplicate
+					needsRemoval = true;
+					//should have no algorithm
+					btAssert(!pair.m_algorithm);
+				}
+
+				if (needsRemoval)
+				{
+					m_pairCache->cleanOverlappingPair(pair,dispatcher);
+
+					//		m_overlappingPairArray.swap(i,m_overlappingPairArray.size()-1);
+					//		m_overlappingPairArray.pop_back();
+					pair.m_pProxy0 = 0;
+					pair.m_pProxy1 = 0;
+					m_invalidPair++;
+					gOverlappingPairs--;
+				} 
+
+			}
+
+			///if you don't like to skip the invalid pairs in the array, execute following code:
+#define CLEAN_INVALID_PAIRS 1
+#ifdef CLEAN_INVALID_PAIRS
+
+			//perform a sort, to sort 'invalid' pairs to the end
+			overlappingPairArray.quickSort(btBroadphasePairSortPredicate());
+
+			overlappingPairArray.resize(overlappingPairArray.size() - m_invalidPair);
+			m_invalidPair = 0;
+#endif//CLEAN_INVALID_PAIRS
+
+		}
+	}
+}
+
+
+bool btSimpleBroadphase::testAabbOverlap(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1)
+{
+	btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
+	btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
+	return aabbOverlap(p0,p1);
+}
+
+void	btSimpleBroadphase::resetPool(btDispatcher* dispatcher)
+{
+	//not yet
+}
diff --git a/src/bullet/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h b/src/bullet/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h
new file mode 100644
index 00000000..7cb3c40a
--- /dev/null
+++ b/src/bullet/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h
@@ -0,0 +1,171 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SIMPLE_BROADPHASE_H
+#define BT_SIMPLE_BROADPHASE_H
+
+
+#include "btOverlappingPairCache.h"
+
+
+struct btSimpleBroadphaseProxy : public btBroadphaseProxy
+{
+	int			m_nextFree;
+	
+//	int			m_handleId;
+
+	
+	btSimpleBroadphaseProxy() {};
+
+	btSimpleBroadphaseProxy(const btVector3& minpt,const btVector3& maxpt,int shapeType,void* userPtr,short int collisionFilterGroup,short int collisionFilterMask,void* multiSapProxy)
+	:btBroadphaseProxy(minpt,maxpt,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy)
+	{
+		(void)shapeType;
+	}
+	
+	
+	SIMD_FORCE_INLINE void SetNextFree(int next) {m_nextFree = next;}
+	SIMD_FORCE_INLINE int GetNextFree() const {return m_nextFree;}
+
+	
+
+
+};
+
+///The SimpleBroadphase is just a unit-test for btAxisSweep3, bt32BitAxisSweep3, or btDbvtBroadphase, so use those classes instead.
+///It is a brute force aabb culling broadphase based on O(n^2) aabb checks
+class btSimpleBroadphase : public btBroadphaseInterface
+{
+
+protected:
+
+	int		m_numHandles;						// number of active handles
+	int		m_maxHandles;						// max number of handles
+	int		m_LastHandleIndex;							
+	
+	btSimpleBroadphaseProxy* m_pHandles;						// handles pool
+
+	void* m_pHandlesRawPtr;
+	int		m_firstFreeHandle;		// free handles list
+	
+	int allocHandle()
+	{
+		btAssert(m_numHandles < m_maxHandles);
+		int freeHandle = m_firstFreeHandle;
+		m_firstFreeHandle = m_pHandles[freeHandle].GetNextFree();
+		m_numHandles++;
+		if(freeHandle > m_LastHandleIndex)
+		{
+			m_LastHandleIndex = freeHandle;
+		}
+		return freeHandle;
+	}
+
+	void freeHandle(btSimpleBroadphaseProxy* proxy)
+	{
+		int handle = int(proxy-m_pHandles);
+		btAssert(handle >= 0 && handle < m_maxHandles);
+		if(handle == m_LastHandleIndex)
+		{
+			m_LastHandleIndex--;
+		}
+		proxy->SetNextFree(m_firstFreeHandle);
+		m_firstFreeHandle = handle;
+
+		proxy->m_clientObject = 0;
+
+		m_numHandles--;
+	}
+
+	btOverlappingPairCache*	m_pairCache;
+	bool	m_ownsPairCache;
+
+	int	m_invalidPair;
+
+	
+	
+	inline btSimpleBroadphaseProxy*	getSimpleProxyFromProxy(btBroadphaseProxy* proxy)
+	{
+		btSimpleBroadphaseProxy* proxy0 = static_cast<btSimpleBroadphaseProxy*>(proxy);
+		return proxy0;
+	}
+
+	inline const btSimpleBroadphaseProxy*	getSimpleProxyFromProxy(btBroadphaseProxy* proxy) const
+	{
+		const btSimpleBroadphaseProxy* proxy0 = static_cast<const btSimpleBroadphaseProxy*>(proxy);
+		return proxy0;
+	}
+
+	///reset broadphase internal structures, to ensure determinism/reproducability
+	virtual void resetPool(btDispatcher* dispatcher);
+
+
+	void	validate();
+
+protected:
+
+
+	
+
+public:
+	btSimpleBroadphase(int maxProxies=16384,btOverlappingPairCache* overlappingPairCache=0);
+	virtual ~btSimpleBroadphase();
+
+
+		static bool	aabbOverlap(btSimpleBroadphaseProxy* proxy0,btSimpleBroadphaseProxy* proxy1);
+
+
+	virtual btBroadphaseProxy*	createProxy(  const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
+
+	virtual void	calculateOverlappingPairs(btDispatcher* dispatcher);
+
+	virtual void	destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+	virtual void	setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax, btDispatcher* dispatcher);
+	virtual void	getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const;
+
+	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin=btVector3(0,0,0),const btVector3& aabbMax=btVector3(0,0,0));
+	virtual void	aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback);
+		
+	btOverlappingPairCache*	getOverlappingPairCache()
+	{
+		return m_pairCache;
+	}
+	const btOverlappingPairCache*	getOverlappingPairCache() const
+	{
+		return m_pairCache;
+	}
+
+	bool	testAabbOverlap(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1);
+
+
+	///getAabb returns the axis aligned bounding box in the 'global' coordinate frame
+	///will add some transform later
+	virtual void getBroadphaseAabb(btVector3& aabbMin,btVector3& aabbMax) const
+	{
+		aabbMin.setValue(-BT_LARGE_FLOAT,-BT_LARGE_FLOAT,-BT_LARGE_FLOAT);
+		aabbMax.setValue(BT_LARGE_FLOAT,BT_LARGE_FLOAT,BT_LARGE_FLOAT);
+	}
+
+	virtual void	printStats()
+	{
+//		printf("btSimpleBroadphase.h\n");
+//		printf("numHandles = %d, maxHandles = %d\n",m_numHandles,m_maxHandles);
+	}
+};
+
+
+
+#endif //BT_SIMPLE_BROADPHASE_H
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/SphereTriangleDetector.cpp b/src/bullet/BulletCollision/CollisionDispatch/SphereTriangleDetector.cpp
new file mode 100644
index 00000000..23a5c752
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/SphereTriangleDetector.cpp
@@ -0,0 +1,201 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "LinearMath/btScalar.h"
+#include "SphereTriangleDetector.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+
+
+SphereTriangleDetector::SphereTriangleDetector(btSphereShape* sphere,btTriangleShape* triangle,btScalar contactBreakingThreshold)
+:m_sphere(sphere),
+m_triangle(triangle),
+m_contactBreakingThreshold(contactBreakingThreshold)
+{
+
+}
+
+void	SphereTriangleDetector::getClosestPoints(const ClosestPointInput& input,Result& output,class btIDebugDraw* debugDraw,bool swapResults)
+{
+
+	(void)debugDraw;
+	const btTransform& transformA = input.m_transformA;
+	const btTransform& transformB = input.m_transformB;
+
+	btVector3 point,normal;
+	btScalar timeOfImpact = btScalar(1.);
+	btScalar depth = btScalar(0.);
+//	output.m_distance = btScalar(BT_LARGE_FLOAT);
+	//move sphere into triangle space
+	btTransform	sphereInTr = transformB.inverseTimes(transformA);
+
+	if (collide(sphereInTr.getOrigin(),point,normal,depth,timeOfImpact,m_contactBreakingThreshold))
+	{
+		if (swapResults)
+		{
+			btVector3 normalOnB = transformB.getBasis()*normal;
+			btVector3 normalOnA = -normalOnB;
+			btVector3 pointOnA = transformB*point+normalOnB*depth;
+			output.addContactPoint(normalOnA,pointOnA,depth);
+		} else
+		{
+			output.addContactPoint(transformB.getBasis()*normal,transformB*point,depth);
+		}
+	}
+
+}
+
+
+
+// See also geometrictools.com
+// Basic idea: D = |p - (lo + t0*lv)| where t0 = lv . (p - lo) / lv . lv
+btScalar SegmentSqrDistance(const btVector3& from, const btVector3& to,const btVector3 &p, btVector3 &nearest);
+
+btScalar SegmentSqrDistance(const btVector3& from, const btVector3& to,const btVector3 &p, btVector3 &nearest) {
+	btVector3 diff = p - from;
+	btVector3 v = to - from;
+	btScalar t = v.dot(diff);
+	
+	if (t > 0) {
+		btScalar dotVV = v.dot(v);
+		if (t < dotVV) {
+			t /= dotVV;
+			diff -= t*v;
+		} else {
+			t = 1;
+			diff -= v;
+		}
+	} else
+		t = 0;
+
+	nearest = from + t*v;
+	return diff.dot(diff);	
+}
+
+bool SphereTriangleDetector::facecontains(const btVector3 &p,const btVector3* vertices,btVector3& normal)  {
+	btVector3 lp(p);
+	btVector3 lnormal(normal);
+	
+	return pointInTriangle(vertices, lnormal, &lp);
+}
+
+bool SphereTriangleDetector::collide(const btVector3& sphereCenter,btVector3 &point, btVector3& resultNormal, btScalar& depth, btScalar &timeOfImpact, btScalar contactBreakingThreshold)
+{
+
+	const btVector3* vertices = &m_triangle->getVertexPtr(0);
+	
+	btScalar radius = m_sphere->getRadius();
+	btScalar radiusWithThreshold = radius + contactBreakingThreshold;
+
+	btVector3 normal = (vertices[1]-vertices[0]).cross(vertices[2]-vertices[0]);
+	normal.normalize();
+	btVector3 p1ToCentre = sphereCenter - vertices[0];
+	btScalar distanceFromPlane = p1ToCentre.dot(normal);
+
+	if (distanceFromPlane < btScalar(0.))
+	{
+		//triangle facing the other way
+		distanceFromPlane *= btScalar(-1.);
+		normal *= btScalar(-1.);
+	}
+
+	bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;
+	
+	// Check for contact / intersection
+	bool hasContact = false;
+	btVector3 contactPoint;
+	if (isInsideContactPlane) {
+		if (facecontains(sphereCenter,vertices,normal)) {
+			// Inside the contact wedge - touches a point on the shell plane
+			hasContact = true;
+			contactPoint = sphereCenter - normal*distanceFromPlane;
+		} else {
+			// Could be inside one of the contact capsules
+			btScalar contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold;
+			btVector3 nearestOnEdge;
+			for (int i = 0; i < m_triangle->getNumEdges(); i++) {
+				
+				btVector3 pa;
+				btVector3 pb;
+				
+				m_triangle->getEdge(i,pa,pb);
+
+				btScalar distanceSqr = SegmentSqrDistance(pa,pb,sphereCenter, nearestOnEdge);
+				if (distanceSqr < contactCapsuleRadiusSqr) {
+					// Yep, we're inside a capsule
+					hasContact = true;
+					contactPoint = nearestOnEdge;
+				}
+				
+			}
+		}
+	}
+
+	if (hasContact) {
+		btVector3 contactToCentre = sphereCenter - contactPoint;
+		btScalar distanceSqr = contactToCentre.length2();
+
+		if (distanceSqr < radiusWithThreshold*radiusWithThreshold)
+		{
+			if (distanceSqr>SIMD_EPSILON)
+			{
+				btScalar distance = btSqrt(distanceSqr);
+				resultNormal = contactToCentre;
+				resultNormal.normalize();
+				point = contactPoint;
+				depth = -(radius-distance);
+			} else
+			{
+				btScalar distance = 0.f;
+				resultNormal = normal;
+				point = contactPoint;
+				depth = -radius;
+			}
+			return true;
+		}
+	}
+	
+	return false;
+}
+
+
+bool SphereTriangleDetector::pointInTriangle(const btVector3 vertices[], const btVector3 &normal, btVector3 *p )
+{
+	const btVector3* p1 = &vertices[0];
+	const btVector3* p2 = &vertices[1];
+	const btVector3* p3 = &vertices[2];
+
+	btVector3 edge1( *p2 - *p1 );
+	btVector3 edge2( *p3 - *p2 );
+	btVector3 edge3( *p1 - *p3 );
+
+	btVector3 p1_to_p( *p - *p1 );
+	btVector3 p2_to_p( *p - *p2 );
+	btVector3 p3_to_p( *p - *p3 );
+
+	btVector3 edge1_normal( edge1.cross(normal));
+	btVector3 edge2_normal( edge2.cross(normal));
+	btVector3 edge3_normal( edge3.cross(normal));
+	
+	btScalar r1, r2, r3;
+	r1 = edge1_normal.dot( p1_to_p );
+	r2 = edge2_normal.dot( p2_to_p );
+	r3 = edge3_normal.dot( p3_to_p );
+	if ( ( r1 > 0 && r2 > 0 && r3 > 0 ) ||
+	     ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) )
+		return true;
+	return false;
+
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/SphereTriangleDetector.h b/src/bullet/BulletCollision/CollisionDispatch/SphereTriangleDetector.h
new file mode 100644
index 00000000..22953af4
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/SphereTriangleDetector.h
@@ -0,0 +1,51 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SPHERE_TRIANGLE_DETECTOR_H
+#define BT_SPHERE_TRIANGLE_DETECTOR_H
+
+#include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
+
+
+
+class btSphereShape;
+class btTriangleShape;
+
+
+
+/// sphere-triangle to match the btDiscreteCollisionDetectorInterface
+struct SphereTriangleDetector : public btDiscreteCollisionDetectorInterface
+{
+	virtual void	getClosestPoints(const ClosestPointInput& input,Result& output,class btIDebugDraw* debugDraw,bool swapResults=false);
+
+	SphereTriangleDetector(btSphereShape* sphere,btTriangleShape* triangle, btScalar contactBreakingThreshold);
+
+	virtual ~SphereTriangleDetector() {};
+
+	bool collide(const btVector3& sphereCenter,btVector3 &point, btVector3& resultNormal, btScalar& depth, btScalar &timeOfImpact, btScalar	contactBreakingThreshold);
+
+private:
+
+	
+	bool pointInTriangle(const btVector3 vertices[], const btVector3 &normal, btVector3 *p );
+	bool facecontains(const btVector3 &p,const btVector3* vertices,btVector3& normal);
+
+	btSphereShape* m_sphere;
+	btTriangleShape* m_triangle;
+	btScalar	m_contactBreakingThreshold;
+	
+};
+#endif //BT_SPHERE_TRIANGLE_DETECTOR_H
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.cpp
new file mode 100644
index 00000000..7e5da6c5
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.cpp
@@ -0,0 +1,47 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2008 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btActivatingCollisionAlgorithm.h"
+#include "btCollisionDispatcher.h"
+#include "btCollisionObject.h"
+
+btActivatingCollisionAlgorithm::btActivatingCollisionAlgorithm (const btCollisionAlgorithmConstructionInfo& ci)
+:btCollisionAlgorithm(ci)
+//,
+//m_colObj0(0),
+//m_colObj1(0)
+{
+}
+btActivatingCollisionAlgorithm::btActivatingCollisionAlgorithm (const btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* colObj0,btCollisionObject* colObj1)
+:btCollisionAlgorithm(ci)
+//,
+//m_colObj0(0),
+//m_colObj1(0)
+{
+//	if (ci.m_dispatcher1->needsCollision(colObj0,colObj1))
+//	{
+//		m_colObj0 = colObj0;
+//		m_colObj1 = colObj1;
+//		
+//		m_colObj0->activate();
+//		m_colObj1->activate();
+//	}
+}
+
+btActivatingCollisionAlgorithm::~btActivatingCollisionAlgorithm()
+{
+//		m_colObj0->activate();
+//		m_colObj1->activate();
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h
new file mode 100644
index 00000000..25fe0889
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h
@@ -0,0 +1,36 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2008 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef __BT_ACTIVATING_COLLISION_ALGORITHM_H
+#define __BT_ACTIVATING_COLLISION_ALGORITHM_H
+
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+
+///This class is not enabled yet (work-in-progress) to more aggressively activate objects.
+class btActivatingCollisionAlgorithm : public btCollisionAlgorithm
+{
+//	btCollisionObject* m_colObj0;
+//	btCollisionObject* m_colObj1;
+
+public:
+
+	btActivatingCollisionAlgorithm (const btCollisionAlgorithmConstructionInfo& ci);
+
+	btActivatingCollisionAlgorithm (const btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* colObj0,btCollisionObject* colObj1);
+
+	virtual ~btActivatingCollisionAlgorithm();
+
+};
+#endif //__BT_ACTIVATING_COLLISION_ALGORITHM_H
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.cpp
new file mode 100644
index 00000000..2182d0d7
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.cpp
@@ -0,0 +1,435 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+* The b2CollidePolygons routines are Copyright (c) 2006-2007 Erin Catto http://www.gphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///btBox2dBox2dCollisionAlgorithm, with modified b2CollidePolygons routines from the Box2D library.
+///The modifications include: switching from b2Vec to btVector3, redefinition of b2Dot, b2Cross
+
+#include "btBox2dBox2dCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionDispatch/btBoxBoxDetector.h"
+#include "BulletCollision/CollisionShapes/btBox2dShape.h"
+
+#define USE_PERSISTENT_CONTACTS 1
+
+btBox2dBox2dCollisionAlgorithm::btBox2dBox2dCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* obj0,btCollisionObject* obj1)
+: btActivatingCollisionAlgorithm(ci,obj0,obj1),
+m_ownManifold(false),
+m_manifoldPtr(mf)
+{
+	if (!m_manifoldPtr && m_dispatcher->needsCollision(obj0,obj1))
+	{
+		m_manifoldPtr = m_dispatcher->getNewManifold(obj0,obj1);
+		m_ownManifold = true;
+	}
+}
+
+btBox2dBox2dCollisionAlgorithm::~btBox2dBox2dCollisionAlgorithm()
+{
+	
+	if (m_ownManifold)
+	{
+		if (m_manifoldPtr)
+			m_dispatcher->releaseManifold(m_manifoldPtr);
+	}
+	
+}
+
+
+void b2CollidePolygons(btManifoldResult* manifold,  const btBox2dShape* polyA, const btTransform& xfA, const btBox2dShape* polyB, const btTransform& xfB);
+
+//#include <stdio.h>
+void btBox2dBox2dCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	if (!m_manifoldPtr)
+		return;
+
+	btCollisionObject*	col0 = body0;
+	btCollisionObject*	col1 = body1;
+	btBox2dShape* box0 = (btBox2dShape*)col0->getCollisionShape();
+	btBox2dShape* box1 = (btBox2dShape*)col1->getCollisionShape();
+
+	resultOut->setPersistentManifold(m_manifoldPtr);
+
+	b2CollidePolygons(resultOut,box0,col0->getWorldTransform(),box1,col1->getWorldTransform());
+
+	//  refreshContactPoints is only necessary when using persistent contact points. otherwise all points are newly added
+	if (m_ownManifold)
+	{
+		resultOut->refreshContactPoints();
+	}
+
+}
+
+btScalar btBox2dBox2dCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* /*body0*/,btCollisionObject* /*body1*/,const btDispatcherInfo& /*dispatchInfo*/,btManifoldResult* /*resultOut*/)
+{
+	//not yet
+	return 1.f;
+}
+
+
+struct ClipVertex
+{
+	btVector3 v;
+	int id;
+	//b2ContactID id;
+	//b2ContactID id;
+};
+
+#define b2Dot(a,b) (a).dot(b)
+#define b2Mul(a,b) (a)*(b)
+#define b2MulT(a,b) (a).transpose()*(b)
+#define b2Cross(a,b) (a).cross(b)
+#define btCrossS(a,s) btVector3(s * a.getY(), -s * a.getX(),0.f)
+
+int b2_maxManifoldPoints =2;
+
+static int ClipSegmentToLine(ClipVertex vOut[2], ClipVertex vIn[2],
+					  const btVector3& normal, btScalar offset)
+{
+	// Start with no output points
+	int numOut = 0;
+
+	// Calculate the distance of end points to the line
+	btScalar distance0 = b2Dot(normal, vIn[0].v) - offset;
+	btScalar distance1 = b2Dot(normal, vIn[1].v) - offset;
+
+	// If the points are behind the plane
+	if (distance0 <= 0.0f) vOut[numOut++] = vIn[0];
+	if (distance1 <= 0.0f) vOut[numOut++] = vIn[1];
+
+	// If the points are on different sides of the plane
+	if (distance0 * distance1 < 0.0f)
+	{
+		// Find intersection point of edge and plane
+		btScalar interp = distance0 / (distance0 - distance1);
+		vOut[numOut].v = vIn[0].v + interp * (vIn[1].v - vIn[0].v);
+		if (distance0 > 0.0f)
+		{
+			vOut[numOut].id = vIn[0].id;
+		}
+		else
+		{
+			vOut[numOut].id = vIn[1].id;
+		}
+		++numOut;
+	}
+
+	return numOut;
+}
+
+// Find the separation between poly1 and poly2 for a give edge normal on poly1.
+static btScalar EdgeSeparation(const btBox2dShape* poly1, const btTransform& xf1, int edge1,
+							  const btBox2dShape* poly2, const btTransform& xf2)
+{
+	const btVector3* vertices1 = poly1->getVertices();
+	const btVector3* normals1 = poly1->getNormals();
+
+	int count2 = poly2->getVertexCount();
+	const btVector3* vertices2 = poly2->getVertices();
+
+	btAssert(0 <= edge1 && edge1 < poly1->getVertexCount());
+
+	// Convert normal from poly1's frame into poly2's frame.
+	btVector3 normal1World = b2Mul(xf1.getBasis(), normals1[edge1]);
+	btVector3 normal1 = b2MulT(xf2.getBasis(), normal1World);
+
+	// Find support vertex on poly2 for -normal.
+	int index = 0;
+	btScalar minDot = BT_LARGE_FLOAT;
+
+	for (int i = 0; i < count2; ++i)
+	{
+		btScalar dot = b2Dot(vertices2[i], normal1);
+		if (dot < minDot)
+		{
+			minDot = dot;
+			index = i;
+		}
+	}
+
+	btVector3 v1 = b2Mul(xf1, vertices1[edge1]);
+	btVector3 v2 = b2Mul(xf2, vertices2[index]);
+	btScalar separation = b2Dot(v2 - v1, normal1World);
+	return separation;
+}
+
+// Find the max separation between poly1 and poly2 using edge normals from poly1.
+static btScalar FindMaxSeparation(int* edgeIndex,
+								 const btBox2dShape* poly1, const btTransform& xf1,
+								 const btBox2dShape* poly2, const btTransform& xf2)
+{
+	int count1 = poly1->getVertexCount();
+	const btVector3* normals1 = poly1->getNormals();
+
+	// Vector pointing from the centroid of poly1 to the centroid of poly2.
+	btVector3 d = b2Mul(xf2, poly2->getCentroid()) - b2Mul(xf1, poly1->getCentroid());
+	btVector3 dLocal1 = b2MulT(xf1.getBasis(), d);
+
+	// Find edge normal on poly1 that has the largest projection onto d.
+	int edge = 0;
+	btScalar maxDot = -BT_LARGE_FLOAT;
+	for (int i = 0; i < count1; ++i)
+	{
+		btScalar dot = b2Dot(normals1[i], dLocal1);
+		if (dot > maxDot)
+		{
+			maxDot = dot;
+			edge = i;
+		}
+	}
+
+	// Get the separation for the edge normal.
+	btScalar s = EdgeSeparation(poly1, xf1, edge, poly2, xf2);
+	if (s > 0.0f)
+	{
+		return s;
+	}
+
+	// Check the separation for the previous edge normal.
+	int prevEdge = edge - 1 >= 0 ? edge - 1 : count1 - 1;
+	btScalar sPrev = EdgeSeparation(poly1, xf1, prevEdge, poly2, xf2);
+	if (sPrev > 0.0f)
+	{
+		return sPrev;
+	}
+
+	// Check the separation for the next edge normal.
+	int nextEdge = edge + 1 < count1 ? edge + 1 : 0;
+	btScalar sNext = EdgeSeparation(poly1, xf1, nextEdge, poly2, xf2);
+	if (sNext > 0.0f)
+	{
+		return sNext;
+	}
+
+	// Find the best edge and the search direction.
+	int bestEdge;
+	btScalar bestSeparation;
+	int increment;
+	if (sPrev > s && sPrev > sNext)
+	{
+		increment = -1;
+		bestEdge = prevEdge;
+		bestSeparation = sPrev;
+	}
+	else if (sNext > s)
+	{
+		increment = 1;
+		bestEdge = nextEdge;
+		bestSeparation = sNext;
+	}
+	else
+	{
+		*edgeIndex = edge;
+		return s;
+	}
+
+	// Perform a local search for the best edge normal.
+	for ( ; ; )
+	{
+		if (increment == -1)
+			edge = bestEdge - 1 >= 0 ? bestEdge - 1 : count1 - 1;
+		else
+			edge = bestEdge + 1 < count1 ? bestEdge + 1 : 0;
+
+		s = EdgeSeparation(poly1, xf1, edge, poly2, xf2);
+		if (s > 0.0f)
+		{
+			return s;
+		}
+
+		if (s > bestSeparation)
+		{
+			bestEdge = edge;
+			bestSeparation = s;
+		}
+		else
+		{
+			break;
+		}
+	}
+
+	*edgeIndex = bestEdge;
+	return bestSeparation;
+}
+
+static void FindIncidentEdge(ClipVertex c[2],
+							 const btBox2dShape* poly1, const btTransform& xf1, int edge1,
+							 const btBox2dShape* poly2, const btTransform& xf2)
+{
+	const btVector3* normals1 = poly1->getNormals();
+
+	int count2 = poly2->getVertexCount();
+	const btVector3* vertices2 = poly2->getVertices();
+	const btVector3* normals2 = poly2->getNormals();
+
+	btAssert(0 <= edge1 && edge1 < poly1->getVertexCount());
+
+	// Get the normal of the reference edge in poly2's frame.
+	btVector3 normal1 = b2MulT(xf2.getBasis(), b2Mul(xf1.getBasis(), normals1[edge1]));
+
+	// Find the incident edge on poly2.
+	int index = 0;
+	btScalar minDot = BT_LARGE_FLOAT;
+	for (int i = 0; i < count2; ++i)
+	{
+		btScalar dot = b2Dot(normal1, normals2[i]);
+		if (dot < minDot)
+		{
+			minDot = dot;
+			index = i;
+		}
+	}
+
+	// Build the clip vertices for the incident edge.
+	int i1 = index;
+	int i2 = i1 + 1 < count2 ? i1 + 1 : 0;
+
+	c[0].v = b2Mul(xf2, vertices2[i1]);
+//	c[0].id.features.referenceEdge = (unsigned char)edge1;
+//	c[0].id.features.incidentEdge = (unsigned char)i1;
+//	c[0].id.features.incidentVertex = 0;
+
+	c[1].v = b2Mul(xf2, vertices2[i2]);
+//	c[1].id.features.referenceEdge = (unsigned char)edge1;
+//	c[1].id.features.incidentEdge = (unsigned char)i2;
+//	c[1].id.features.incidentVertex = 1;
+}
+
+// Find edge normal of max separation on A - return if separating axis is found
+// Find edge normal of max separation on B - return if separation axis is found
+// Choose reference edge as min(minA, minB)
+// Find incident edge
+// Clip
+
+// The normal points from 1 to 2
+void b2CollidePolygons(btManifoldResult* manifold,
+					  const btBox2dShape* polyA, const btTransform& xfA,
+					  const btBox2dShape* polyB, const btTransform& xfB)
+{
+
+	int edgeA = 0;
+	btScalar separationA = FindMaxSeparation(&edgeA, polyA, xfA, polyB, xfB);
+	if (separationA > 0.0f)
+		return;
+
+	int edgeB = 0;
+	btScalar separationB = FindMaxSeparation(&edgeB, polyB, xfB, polyA, xfA);
+	if (separationB > 0.0f)
+		return;
+
+	const btBox2dShape* poly1;	// reference poly
+	const btBox2dShape* poly2;	// incident poly
+	btTransform xf1, xf2;
+	int edge1;		// reference edge
+	unsigned char flip;
+	const btScalar k_relativeTol = 0.98f;
+	const btScalar k_absoluteTol = 0.001f;
+
+	// TODO_ERIN use "radius" of poly for absolute tolerance.
+	if (separationB > k_relativeTol * separationA + k_absoluteTol)
+	{
+		poly1 = polyB;
+		poly2 = polyA;
+		xf1 = xfB;
+		xf2 = xfA;
+		edge1 = edgeB;
+		flip = 1;
+	}
+	else
+	{
+		poly1 = polyA;
+		poly2 = polyB;
+		xf1 = xfA;
+		xf2 = xfB;
+		edge1 = edgeA;
+		flip = 0;
+	}
+
+	ClipVertex incidentEdge[2];
+	FindIncidentEdge(incidentEdge, poly1, xf1, edge1, poly2, xf2);
+
+	int count1 = poly1->getVertexCount();
+	const btVector3* vertices1 = poly1->getVertices();
+
+	btVector3 v11 = vertices1[edge1];
+	btVector3 v12 = edge1 + 1 < count1 ? vertices1[edge1+1] : vertices1[0];
+
+	btVector3 dv = v12 - v11;
+	btVector3 sideNormal = b2Mul(xf1.getBasis(), v12 - v11);
+	sideNormal.normalize();
+	btVector3 frontNormal = btCrossS(sideNormal, 1.0f);
+	
+	
+	v11 = b2Mul(xf1, v11);
+	v12 = b2Mul(xf1, v12);
+
+	btScalar frontOffset = b2Dot(frontNormal, v11);
+	btScalar sideOffset1 = -b2Dot(sideNormal, v11);
+	btScalar sideOffset2 = b2Dot(sideNormal, v12);
+
+	// Clip incident edge against extruded edge1 side edges.
+	ClipVertex clipPoints1[2];
+	clipPoints1[0].v.setValue(0,0,0);
+	clipPoints1[1].v.setValue(0,0,0);
+
+	ClipVertex clipPoints2[2];
+	clipPoints2[0].v.setValue(0,0,0);
+	clipPoints2[1].v.setValue(0,0,0);
+
+
+	int np;
+
+	// Clip to box side 1
+	np = ClipSegmentToLine(clipPoints1, incidentEdge, -sideNormal, sideOffset1);
+
+	if (np < 2)
+		return;
+
+	// Clip to negative box side 1
+	np = ClipSegmentToLine(clipPoints2, clipPoints1,  sideNormal, sideOffset2);
+
+	if (np < 2)
+	{
+		return;
+	}
+
+	// Now clipPoints2 contains the clipped points.
+	btVector3 manifoldNormal = flip ? -frontNormal : frontNormal;
+
+	int pointCount = 0;
+	for (int i = 0; i < b2_maxManifoldPoints; ++i)
+	{
+		btScalar separation = b2Dot(frontNormal, clipPoints2[i].v) - frontOffset;
+
+		if (separation <= 0.0f)
+		{
+			
+			//b2ManifoldPoint* cp = manifold->points + pointCount;
+			//btScalar separation = separation;
+			//cp->localPoint1 = b2MulT(xfA, clipPoints2[i].v);
+			//cp->localPoint2 = b2MulT(xfB, clipPoints2[i].v);
+
+			manifold->addContactPoint(-manifoldNormal,clipPoints2[i].v,separation);
+
+//			cp->id = clipPoints2[i].id;
+//			cp->id.features.flip = flip;
+			++pointCount;
+		}
+	}
+
+//	manifold->pointCount = pointCount;}
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.h
new file mode 100644
index 00000000..97c5be77
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.h
@@ -0,0 +1,66 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_BOX_2D_BOX_2D__COLLISION_ALGORITHM_H
+#define BT_BOX_2D_BOX_2D__COLLISION_ALGORITHM_H
+
+#include "BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+
+class btPersistentManifold;
+
+///box-box collision detection
+class btBox2dBox2dCollisionAlgorithm : public btActivatingCollisionAlgorithm
+{
+	bool	m_ownManifold;
+	btPersistentManifold*	m_manifoldPtr;
+	
+public:
+	btBox2dBox2dCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
+		: btActivatingCollisionAlgorithm(ci) {}
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	btBox2dBox2dCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+
+	virtual ~btBox2dBox2dCollisionAlgorithm();
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		if (m_manifoldPtr && m_ownManifold)
+		{
+			manifoldArray.push_back(m_manifoldPtr);
+		}
+	}
+
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			int bbsize = sizeof(btBox2dBox2dCollisionAlgorithm);
+			void* ptr = ci.m_dispatcher1->allocateCollisionAlgorithm(bbsize);
+			return new(ptr) btBox2dBox2dCollisionAlgorithm(0,ci,body0,body1);
+		}
+	};
+
+};
+
+#endif //BT_BOX_2D_BOX_2D__COLLISION_ALGORITHM_H
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.cpp
new file mode 100644
index 00000000..49628853
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.cpp
@@ -0,0 +1,85 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btBoxBoxCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "btBoxBoxDetector.h"
+
+#define USE_PERSISTENT_CONTACTS 1
+
+btBoxBoxCollisionAlgorithm::btBoxBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* obj0,btCollisionObject* obj1)
+: btActivatingCollisionAlgorithm(ci,obj0,obj1),
+m_ownManifold(false),
+m_manifoldPtr(mf)
+{
+	if (!m_manifoldPtr && m_dispatcher->needsCollision(obj0,obj1))
+	{
+		m_manifoldPtr = m_dispatcher->getNewManifold(obj0,obj1);
+		m_ownManifold = true;
+	}
+}
+
+btBoxBoxCollisionAlgorithm::~btBoxBoxCollisionAlgorithm()
+{
+	if (m_ownManifold)
+	{
+		if (m_manifoldPtr)
+			m_dispatcher->releaseManifold(m_manifoldPtr);
+	}
+}
+
+void btBoxBoxCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	if (!m_manifoldPtr)
+		return;
+
+	btCollisionObject*	col0 = body0;
+	btCollisionObject*	col1 = body1;
+	btBoxShape* box0 = (btBoxShape*)col0->getCollisionShape();
+	btBoxShape* box1 = (btBoxShape*)col1->getCollisionShape();
+
+
+
+	/// report a contact. internally this will be kept persistent, and contact reduction is done
+	resultOut->setPersistentManifold(m_manifoldPtr);
+#ifndef USE_PERSISTENT_CONTACTS	
+	m_manifoldPtr->clearManifold();
+#endif //USE_PERSISTENT_CONTACTS
+
+	btDiscreteCollisionDetectorInterface::ClosestPointInput input;
+	input.m_maximumDistanceSquared = BT_LARGE_FLOAT;
+	input.m_transformA = body0->getWorldTransform();
+	input.m_transformB = body1->getWorldTransform();
+
+	btBoxBoxDetector detector(box0,box1);
+	detector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
+
+#ifdef USE_PERSISTENT_CONTACTS
+	//  refreshContactPoints is only necessary when using persistent contact points. otherwise all points are newly added
+	if (m_ownManifold)
+	{
+		resultOut->refreshContactPoints();
+	}
+#endif //USE_PERSISTENT_CONTACTS
+
+}
+
+btScalar btBoxBoxCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* /*body0*/,btCollisionObject* /*body1*/,const btDispatcherInfo& /*dispatchInfo*/,btManifoldResult* /*resultOut*/)
+{
+	//not yet
+	return 1.f;
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h
new file mode 100644
index 00000000..f0bbae61
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h
@@ -0,0 +1,66 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_BOX_BOX__COLLISION_ALGORITHM_H
+#define BT_BOX_BOX__COLLISION_ALGORITHM_H
+
+#include "btActivatingCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+
+class btPersistentManifold;
+
+///box-box collision detection
+class btBoxBoxCollisionAlgorithm : public btActivatingCollisionAlgorithm
+{
+	bool	m_ownManifold;
+	btPersistentManifold*	m_manifoldPtr;
+	
+public:
+	btBoxBoxCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
+		: btActivatingCollisionAlgorithm(ci) {}
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	btBoxBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+
+	virtual ~btBoxBoxCollisionAlgorithm();
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		if (m_manifoldPtr && m_ownManifold)
+		{
+			manifoldArray.push_back(m_manifoldPtr);
+		}
+	}
+
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			int bbsize = sizeof(btBoxBoxCollisionAlgorithm);
+			void* ptr = ci.m_dispatcher1->allocateCollisionAlgorithm(bbsize);
+			return new(ptr) btBoxBoxCollisionAlgorithm(0,ci,body0,body1);
+		}
+	};
+
+};
+
+#endif //BT_BOX_BOX__COLLISION_ALGORITHM_H
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxDetector.cpp b/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxDetector.cpp
new file mode 100644
index 00000000..a7c8cf14
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxDetector.cpp
@@ -0,0 +1,718 @@
+/*
+ * Box-Box collision detection re-distributed under the ZLib license with permission from Russell L. Smith
+ * Original version is from Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org
+ Bullet Continuous Collision Detection and Physics Library
+ Bullet is Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///ODE box-box collision detection is adapted to work with Bullet
+
+#include "btBoxBoxDetector.h"
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+
+#include <float.h>
+#include <string.h>
+
+btBoxBoxDetector::btBoxBoxDetector(btBoxShape* box1,btBoxShape* box2)
+: m_box1(box1),
+m_box2(box2)
+{
+
+}
+
+
+// given two boxes (p1,R1,side1) and (p2,R2,side2), collide them together and
+// generate contact points. this returns 0 if there is no contact otherwise
+// it returns the number of contacts generated.
+// `normal' returns the contact normal.
+// `depth' returns the maximum penetration depth along that normal.
+// `return_code' returns a number indicating the type of contact that was
+// detected:
+//        1,2,3 = box 2 intersects with a face of box 1
+//        4,5,6 = box 1 intersects with a face of box 2
+//        7..15 = edge-edge contact
+// `maxc' is the maximum number of contacts allowed to be generated, i.e.
+// the size of the `contact' array.
+// `contact' and `skip' are the contact array information provided to the
+// collision functions. this function only fills in the position and depth
+// fields.
+struct dContactGeom;
+#define dDOTpq(a,b,p,q) ((a)[0]*(b)[0] + (a)[p]*(b)[q] + (a)[2*(p)]*(b)[2*(q)])
+#define dInfinity FLT_MAX
+
+
+/*PURE_INLINE btScalar dDOT   (const btScalar *a, const btScalar *b) { return dDOTpq(a,b,1,1); }
+PURE_INLINE btScalar dDOT13 (const btScalar *a, const btScalar *b) { return dDOTpq(a,b,1,3); }
+PURE_INLINE btScalar dDOT31 (const btScalar *a, const btScalar *b) { return dDOTpq(a,b,3,1); }
+PURE_INLINE btScalar dDOT33 (const btScalar *a, const btScalar *b) { return dDOTpq(a,b,3,3); }
+*/
+static btScalar dDOT   (const btScalar *a, const btScalar *b) { return dDOTpq(a,b,1,1); }
+static btScalar dDOT44 (const btScalar *a, const btScalar *b) { return dDOTpq(a,b,4,4); }
+static btScalar dDOT41 (const btScalar *a, const btScalar *b) { return dDOTpq(a,b,4,1); }
+static btScalar dDOT14 (const btScalar *a, const btScalar *b) { return dDOTpq(a,b,1,4); }
+#define dMULTIPLYOP1_331(A,op,B,C) \
+{\
+  (A)[0] op dDOT41((B),(C)); \
+  (A)[1] op dDOT41((B+1),(C)); \
+  (A)[2] op dDOT41((B+2),(C)); \
+}
+
+#define dMULTIPLYOP0_331(A,op,B,C) \
+{ \
+  (A)[0] op dDOT((B),(C)); \
+  (A)[1] op dDOT((B+4),(C)); \
+  (A)[2] op dDOT((B+8),(C)); \
+} 
+
+#define dMULTIPLY1_331(A,B,C) dMULTIPLYOP1_331(A,=,B,C)
+#define dMULTIPLY0_331(A,B,C) dMULTIPLYOP0_331(A,=,B,C)
+
+typedef btScalar dMatrix3[4*3];
+
+void dLineClosestApproach (const btVector3& pa, const btVector3& ua,
+			   const btVector3& pb, const btVector3& ub,
+			   btScalar *alpha, btScalar *beta);
+void dLineClosestApproach (const btVector3& pa, const btVector3& ua,
+			   const btVector3& pb, const btVector3& ub,
+			   btScalar *alpha, btScalar *beta)
+{
+  btVector3 p;
+  p[0] = pb[0] - pa[0];
+  p[1] = pb[1] - pa[1];
+  p[2] = pb[2] - pa[2];
+  btScalar uaub = dDOT(ua,ub);
+  btScalar q1 =  dDOT(ua,p);
+  btScalar q2 = -dDOT(ub,p);
+  btScalar d = 1-uaub*uaub;
+  if (d <= btScalar(0.0001f)) {
+    // @@@ this needs to be made more robust
+    *alpha = 0;
+    *beta  = 0;
+  }
+  else {
+    d = 1.f/d;
+    *alpha = (q1 + uaub*q2)*d;
+    *beta  = (uaub*q1 + q2)*d;
+  }
+}
+
+
+
+// find all the intersection points between the 2D rectangle with vertices
+// at (+/-h[0],+/-h[1]) and the 2D quadrilateral with vertices (p[0],p[1]),
+// (p[2],p[3]),(p[4],p[5]),(p[6],p[7]).
+//
+// the intersection points are returned as x,y pairs in the 'ret' array.
+// the number of intersection points is returned by the function (this will
+// be in the range 0 to 8).
+
+static int intersectRectQuad2 (btScalar h[2], btScalar p[8], btScalar ret[16])
+{
+  // q (and r) contain nq (and nr) coordinate points for the current (and
+  // chopped) polygons
+  int nq=4,nr=0;
+  btScalar buffer[16];
+  btScalar *q = p;
+  btScalar *r = ret;
+  for (int dir=0; dir <= 1; dir++) {
+    // direction notation: xy[0] = x axis, xy[1] = y axis
+    for (int sign=-1; sign <= 1; sign += 2) {
+      // chop q along the line xy[dir] = sign*h[dir]
+      btScalar *pq = q;
+      btScalar *pr = r;
+      nr = 0;
+      for (int i=nq; i > 0; i--) {
+	// go through all points in q and all lines between adjacent points
+	if (sign*pq[dir] < h[dir]) {
+	  // this point is inside the chopping line
+	  pr[0] = pq[0];
+	  pr[1] = pq[1];
+	  pr += 2;
+	  nr++;
+	  if (nr & 8) {
+	    q = r;
+	    goto done;
+	  }
+	}
+	btScalar *nextq = (i > 1) ? pq+2 : q;
+	if ((sign*pq[dir] < h[dir]) ^ (sign*nextq[dir] < h[dir])) {
+	  // this line crosses the chopping line
+	  pr[1-dir] = pq[1-dir] + (nextq[1-dir]-pq[1-dir]) /
+	    (nextq[dir]-pq[dir]) * (sign*h[dir]-pq[dir]);
+	  pr[dir] = sign*h[dir];
+	  pr += 2;
+	  nr++;
+	  if (nr & 8) {
+	    q = r;
+	    goto done;
+	  }
+	}
+	pq += 2;
+      }
+      q = r;
+      r = (q==ret) ? buffer : ret;
+      nq = nr;
+    }
+  }
+ done:
+  if (q != ret) memcpy (ret,q,nr*2*sizeof(btScalar));
+  return nr;
+}
+
+
+#define M__PI 3.14159265f
+
+// given n points in the plane (array p, of size 2*n), generate m points that
+// best represent the whole set. the definition of 'best' here is not
+// predetermined - the idea is to select points that give good box-box
+// collision detection behavior. the chosen point indexes are returned in the
+// array iret (of size m). 'i0' is always the first entry in the array.
+// n must be in the range [1..8]. m must be in the range [1..n]. i0 must be
+// in the range [0..n-1].
+
+void cullPoints2 (int n, btScalar p[], int m, int i0, int iret[]);
+void cullPoints2 (int n, btScalar p[], int m, int i0, int iret[])
+{
+  // compute the centroid of the polygon in cx,cy
+  int i,j;
+  btScalar a,cx,cy,q;
+  if (n==1) {
+    cx = p[0];
+    cy = p[1];
+  }
+  else if (n==2) {
+    cx = btScalar(0.5)*(p[0] + p[2]);
+    cy = btScalar(0.5)*(p[1] + p[3]);
+  }
+  else {
+    a = 0;
+    cx = 0;
+    cy = 0;
+    for (i=0; i<(n-1); i++) {
+      q = p[i*2]*p[i*2+3] - p[i*2+2]*p[i*2+1];
+      a += q;
+      cx += q*(p[i*2]+p[i*2+2]);
+      cy += q*(p[i*2+1]+p[i*2+3]);
+    }
+    q = p[n*2-2]*p[1] - p[0]*p[n*2-1];
+	if (btFabs(a+q) > SIMD_EPSILON)
+	{
+		a = 1.f/(btScalar(3.0)*(a+q));
+	} else
+	{
+		a=BT_LARGE_FLOAT;
+	}
+    cx = a*(cx + q*(p[n*2-2]+p[0]));
+    cy = a*(cy + q*(p[n*2-1]+p[1]));
+  }
+
+  // compute the angle of each point w.r.t. the centroid
+  btScalar A[8];
+  for (i=0; i<n; i++) A[i] = btAtan2(p[i*2+1]-cy,p[i*2]-cx);
+
+  // search for points that have angles closest to A[i0] + i*(2*pi/m).
+  int avail[8];
+  for (i=0; i<n; i++) avail[i] = 1;
+  avail[i0] = 0;
+  iret[0] = i0;
+  iret++;
+  for (j=1; j<m; j++) {
+    a = btScalar(j)*(2*M__PI/m) + A[i0];
+    if (a > M__PI) a -= 2*M__PI;
+    btScalar maxdiff=1e9,diff;
+
+    *iret = i0;			// iret is not allowed to keep this value, but it sometimes does, when diff=#QNAN0
+
+    for (i=0; i<n; i++) {
+      if (avail[i]) {
+	diff = btFabs (A[i]-a);
+	if (diff > M__PI) diff = 2*M__PI - diff;
+	if (diff < maxdiff) {
+	  maxdiff = diff;
+	  *iret = i;
+	}
+      }
+    }
+#if defined(DEBUG) || defined (_DEBUG)
+    btAssert (*iret != i0);	// ensure iret got set
+#endif
+    avail[*iret] = 0;
+    iret++;
+  }
+}
+
+
+
+int dBoxBox2 (const btVector3& p1, const dMatrix3 R1,
+	     const btVector3& side1, const btVector3& p2,
+	     const dMatrix3 R2, const btVector3& side2,
+	     btVector3& normal, btScalar *depth, int *return_code,
+		 int maxc, dContactGeom * /*contact*/, int /*skip*/,btDiscreteCollisionDetectorInterface::Result& output);
+int dBoxBox2 (const btVector3& p1, const dMatrix3 R1,
+	     const btVector3& side1, const btVector3& p2,
+	     const dMatrix3 R2, const btVector3& side2,
+	     btVector3& normal, btScalar *depth, int *return_code,
+		 int maxc, dContactGeom * /*contact*/, int /*skip*/,btDiscreteCollisionDetectorInterface::Result& output)
+{
+  const btScalar fudge_factor = btScalar(1.05);
+  btVector3 p,pp,normalC(0.f,0.f,0.f);
+  const btScalar *normalR = 0;
+  btScalar A[3],B[3],R11,R12,R13,R21,R22,R23,R31,R32,R33,
+    Q11,Q12,Q13,Q21,Q22,Q23,Q31,Q32,Q33,s,s2,l;
+  int i,j,invert_normal,code;
+
+  // get vector from centers of box 1 to box 2, relative to box 1
+  p = p2 - p1;
+  dMULTIPLY1_331 (pp,R1,p);		// get pp = p relative to body 1
+
+  // get side lengths / 2
+  A[0] = side1[0]*btScalar(0.5);
+  A[1] = side1[1]*btScalar(0.5);
+  A[2] = side1[2]*btScalar(0.5);
+  B[0] = side2[0]*btScalar(0.5);
+  B[1] = side2[1]*btScalar(0.5);
+  B[2] = side2[2]*btScalar(0.5);
+
+  // Rij is R1'*R2, i.e. the relative rotation between R1 and R2
+  R11 = dDOT44(R1+0,R2+0); R12 = dDOT44(R1+0,R2+1); R13 = dDOT44(R1+0,R2+2);
+  R21 = dDOT44(R1+1,R2+0); R22 = dDOT44(R1+1,R2+1); R23 = dDOT44(R1+1,R2+2);
+  R31 = dDOT44(R1+2,R2+0); R32 = dDOT44(R1+2,R2+1); R33 = dDOT44(R1+2,R2+2);
+
+  Q11 = btFabs(R11); Q12 = btFabs(R12); Q13 = btFabs(R13);
+  Q21 = btFabs(R21); Q22 = btFabs(R22); Q23 = btFabs(R23);
+  Q31 = btFabs(R31); Q32 = btFabs(R32); Q33 = btFabs(R33);
+
+  // for all 15 possible separating axes:
+  //   * see if the axis separates the boxes. if so, return 0.
+  //   * find the depth of the penetration along the separating axis (s2)
+  //   * if this is the largest depth so far, record it.
+  // the normal vector will be set to the separating axis with the smallest
+  // depth. note: normalR is set to point to a column of R1 or R2 if that is
+  // the smallest depth normal so far. otherwise normalR is 0 and normalC is
+  // set to a vector relative to body 1. invert_normal is 1 if the sign of
+  // the normal should be flipped.
+
+#define TST(expr1,expr2,norm,cc) \
+  s2 = btFabs(expr1) - (expr2); \
+  if (s2 > 0) return 0; \
+  if (s2 > s) { \
+    s = s2; \
+    normalR = norm; \
+    invert_normal = ((expr1) < 0); \
+    code = (cc); \
+  }
+
+  s = -dInfinity;
+  invert_normal = 0;
+  code = 0;
+
+  // separating axis = u1,u2,u3
+  TST (pp[0],(A[0] + B[0]*Q11 + B[1]*Q12 + B[2]*Q13),R1+0,1);
+  TST (pp[1],(A[1] + B[0]*Q21 + B[1]*Q22 + B[2]*Q23),R1+1,2);
+  TST (pp[2],(A[2] + B[0]*Q31 + B[1]*Q32 + B[2]*Q33),R1+2,3);
+
+  // separating axis = v1,v2,v3
+  TST (dDOT41(R2+0,p),(A[0]*Q11 + A[1]*Q21 + A[2]*Q31 + B[0]),R2+0,4);
+  TST (dDOT41(R2+1,p),(A[0]*Q12 + A[1]*Q22 + A[2]*Q32 + B[1]),R2+1,5);
+  TST (dDOT41(R2+2,p),(A[0]*Q13 + A[1]*Q23 + A[2]*Q33 + B[2]),R2+2,6);
+
+  // note: cross product axes need to be scaled when s is computed.
+  // normal (n1,n2,n3) is relative to box 1.
+#undef TST
+#define TST(expr1,expr2,n1,n2,n3,cc) \
+  s2 = btFabs(expr1) - (expr2); \
+  if (s2 > SIMD_EPSILON) return 0; \
+  l = btSqrt((n1)*(n1) + (n2)*(n2) + (n3)*(n3)); \
+  if (l > SIMD_EPSILON) { \
+    s2 /= l; \
+    if (s2*fudge_factor > s) { \
+      s = s2; \
+      normalR = 0; \
+      normalC[0] = (n1)/l; normalC[1] = (n2)/l; normalC[2] = (n3)/l; \
+      invert_normal = ((expr1) < 0); \
+      code = (cc); \
+    } \
+  }
+
+  btScalar fudge2 (1.0e-5f);
+
+  Q11 += fudge2;
+  Q12 += fudge2;
+  Q13 += fudge2;
+
+  Q21 += fudge2;
+  Q22 += fudge2;
+  Q23 += fudge2;
+
+  Q31 += fudge2;
+  Q32 += fudge2;
+  Q33 += fudge2;
+
+  // separating axis = u1 x (v1,v2,v3)
+  TST(pp[2]*R21-pp[1]*R31,(A[1]*Q31+A[2]*Q21+B[1]*Q13+B[2]*Q12),0,-R31,R21,7);
+  TST(pp[2]*R22-pp[1]*R32,(A[1]*Q32+A[2]*Q22+B[0]*Q13+B[2]*Q11),0,-R32,R22,8);
+  TST(pp[2]*R23-pp[1]*R33,(A[1]*Q33+A[2]*Q23+B[0]*Q12+B[1]*Q11),0,-R33,R23,9);
+
+  // separating axis = u2 x (v1,v2,v3)
+  TST(pp[0]*R31-pp[2]*R11,(A[0]*Q31+A[2]*Q11+B[1]*Q23+B[2]*Q22),R31,0,-R11,10);
+  TST(pp[0]*R32-pp[2]*R12,(A[0]*Q32+A[2]*Q12+B[0]*Q23+B[2]*Q21),R32,0,-R12,11);
+  TST(pp[0]*R33-pp[2]*R13,(A[0]*Q33+A[2]*Q13+B[0]*Q22+B[1]*Q21),R33,0,-R13,12);
+
+  // separating axis = u3 x (v1,v2,v3)
+  TST(pp[1]*R11-pp[0]*R21,(A[0]*Q21+A[1]*Q11+B[1]*Q33+B[2]*Q32),-R21,R11,0,13);
+  TST(pp[1]*R12-pp[0]*R22,(A[0]*Q22+A[1]*Q12+B[0]*Q33+B[2]*Q31),-R22,R12,0,14);
+  TST(pp[1]*R13-pp[0]*R23,(A[0]*Q23+A[1]*Q13+B[0]*Q32+B[1]*Q31),-R23,R13,0,15);
+
+#undef TST
+
+  if (!code) return 0;
+
+  // if we get to this point, the boxes interpenetrate. compute the normal
+  // in global coordinates.
+  if (normalR) {
+    normal[0] = normalR[0];
+    normal[1] = normalR[4];
+    normal[2] = normalR[8];
+  }
+  else {
+    dMULTIPLY0_331 (normal,R1,normalC);
+  }
+  if (invert_normal) {
+    normal[0] = -normal[0];
+    normal[1] = -normal[1];
+    normal[2] = -normal[2];
+  }
+  *depth = -s;
+
+  // compute contact point(s)
+
+  if (code > 6) {
+    // an edge from box 1 touches an edge from box 2.
+    // find a point pa on the intersecting edge of box 1
+    btVector3 pa;
+    btScalar sign;
+    for (i=0; i<3; i++) pa[i] = p1[i];
+    for (j=0; j<3; j++) {
+      sign = (dDOT14(normal,R1+j) > 0) ? btScalar(1.0) : btScalar(-1.0);
+      for (i=0; i<3; i++) pa[i] += sign * A[j] * R1[i*4+j];
+    }
+
+    // find a point pb on the intersecting edge of box 2
+    btVector3 pb;
+    for (i=0; i<3; i++) pb[i] = p2[i];
+    for (j=0; j<3; j++) {
+      sign = (dDOT14(normal,R2+j) > 0) ? btScalar(-1.0) : btScalar(1.0);
+      for (i=0; i<3; i++) pb[i] += sign * B[j] * R2[i*4+j];
+    }
+
+    btScalar alpha,beta;
+    btVector3 ua,ub;
+    for (i=0; i<3; i++) ua[i] = R1[((code)-7)/3 + i*4];
+    for (i=0; i<3; i++) ub[i] = R2[((code)-7)%3 + i*4];
+
+    dLineClosestApproach (pa,ua,pb,ub,&alpha,&beta);
+    for (i=0; i<3; i++) pa[i] += ua[i]*alpha;
+    for (i=0; i<3; i++) pb[i] += ub[i]*beta;
+
+	{
+		
+		//contact[0].pos[i] = btScalar(0.5)*(pa[i]+pb[i]);
+		//contact[0].depth = *depth;
+		btVector3 pointInWorld;
+
+#ifdef USE_CENTER_POINT
+	    for (i=0; i<3; i++) 
+			pointInWorld[i] = (pa[i]+pb[i])*btScalar(0.5);
+		output.addContactPoint(-normal,pointInWorld,-*depth);
+#else
+		output.addContactPoint(-normal,pb,-*depth);
+
+#endif //
+		*return_code = code;
+	}
+    return 1;
+  }
+
+  // okay, we have a face-something intersection (because the separating
+  // axis is perpendicular to a face). define face 'a' to be the reference
+  // face (i.e. the normal vector is perpendicular to this) and face 'b' to be
+  // the incident face (the closest face of the other box).
+
+  const btScalar *Ra,*Rb,*pa,*pb,*Sa,*Sb;
+  if (code <= 3) {
+    Ra = R1;
+    Rb = R2;
+    pa = p1;
+    pb = p2;
+    Sa = A;
+    Sb = B;
+  }
+  else {
+    Ra = R2;
+    Rb = R1;
+    pa = p2;
+    pb = p1;
+    Sa = B;
+    Sb = A;
+  }
+
+  // nr = normal vector of reference face dotted with axes of incident box.
+  // anr = absolute values of nr.
+  btVector3 normal2,nr,anr;
+  if (code <= 3) {
+    normal2[0] = normal[0];
+    normal2[1] = normal[1];
+    normal2[2] = normal[2];
+  }
+  else {
+    normal2[0] = -normal[0];
+    normal2[1] = -normal[1];
+    normal2[2] = -normal[2];
+  }
+  dMULTIPLY1_331 (nr,Rb,normal2);
+  anr[0] = btFabs (nr[0]);
+  anr[1] = btFabs (nr[1]);
+  anr[2] = btFabs (nr[2]);
+
+  // find the largest compontent of anr: this corresponds to the normal
+  // for the indident face. the other axis numbers of the indicent face
+  // are stored in a1,a2.
+  int lanr,a1,a2;
+  if (anr[1] > anr[0]) {
+    if (anr[1] > anr[2]) {
+      a1 = 0;
+      lanr = 1;
+      a2 = 2;
+    }
+    else {
+      a1 = 0;
+      a2 = 1;
+      lanr = 2;
+    }
+  }
+  else {
+    if (anr[0] > anr[2]) {
+      lanr = 0;
+      a1 = 1;
+      a2 = 2;
+    }
+    else {
+      a1 = 0;
+      a2 = 1;
+      lanr = 2;
+    }
+  }
+
+  // compute center point of incident face, in reference-face coordinates
+  btVector3 center;
+  if (nr[lanr] < 0) {
+    for (i=0; i<3; i++) center[i] = pb[i] - pa[i] + Sb[lanr] * Rb[i*4+lanr];
+  }
+  else {
+    for (i=0; i<3; i++) center[i] = pb[i] - pa[i] - Sb[lanr] * Rb[i*4+lanr];
+  }
+
+  // find the normal and non-normal axis numbers of the reference box
+  int codeN,code1,code2;
+  if (code <= 3) codeN = code-1; else codeN = code-4;
+  if (codeN==0) {
+    code1 = 1;
+    code2 = 2;
+  }
+  else if (codeN==1) {
+    code1 = 0;
+    code2 = 2;
+  }
+  else {
+    code1 = 0;
+    code2 = 1;
+  }
+
+  // find the four corners of the incident face, in reference-face coordinates
+  btScalar quad[8];	// 2D coordinate of incident face (x,y pairs)
+  btScalar c1,c2,m11,m12,m21,m22;
+  c1 = dDOT14 (center,Ra+code1);
+  c2 = dDOT14 (center,Ra+code2);
+  // optimize this? - we have already computed this data above, but it is not
+  // stored in an easy-to-index format. for now it's quicker just to recompute
+  // the four dot products.
+  m11 = dDOT44 (Ra+code1,Rb+a1);
+  m12 = dDOT44 (Ra+code1,Rb+a2);
+  m21 = dDOT44 (Ra+code2,Rb+a1);
+  m22 = dDOT44 (Ra+code2,Rb+a2);
+  {
+    btScalar k1 = m11*Sb[a1];
+    btScalar k2 = m21*Sb[a1];
+    btScalar k3 = m12*Sb[a2];
+    btScalar k4 = m22*Sb[a2];
+    quad[0] = c1 - k1 - k3;
+    quad[1] = c2 - k2 - k4;
+    quad[2] = c1 - k1 + k3;
+    quad[3] = c2 - k2 + k4;
+    quad[4] = c1 + k1 + k3;
+    quad[5] = c2 + k2 + k4;
+    quad[6] = c1 + k1 - k3;
+    quad[7] = c2 + k2 - k4;
+  }
+
+  // find the size of the reference face
+  btScalar rect[2];
+  rect[0] = Sa[code1];
+  rect[1] = Sa[code2];
+
+  // intersect the incident and reference faces
+  btScalar ret[16];
+  int n = intersectRectQuad2 (rect,quad,ret);
+  if (n < 1) return 0;		// this should never happen
+
+  // convert the intersection points into reference-face coordinates,
+  // and compute the contact position and depth for each point. only keep
+  // those points that have a positive (penetrating) depth. delete points in
+  // the 'ret' array as necessary so that 'point' and 'ret' correspond.
+  btScalar point[3*8];		// penetrating contact points
+  btScalar dep[8];			// depths for those points
+  btScalar det1 = 1.f/(m11*m22 - m12*m21);
+  m11 *= det1;
+  m12 *= det1;
+  m21 *= det1;
+  m22 *= det1;
+  int cnum = 0;			// number of penetrating contact points found
+  for (j=0; j < n; j++) {
+    btScalar k1 =  m22*(ret[j*2]-c1) - m12*(ret[j*2+1]-c2);
+    btScalar k2 = -m21*(ret[j*2]-c1) + m11*(ret[j*2+1]-c2);
+    for (i=0; i<3; i++) point[cnum*3+i] =
+			  center[i] + k1*Rb[i*4+a1] + k2*Rb[i*4+a2];
+    dep[cnum] = Sa[codeN] - dDOT(normal2,point+cnum*3);
+    if (dep[cnum] >= 0) {
+      ret[cnum*2] = ret[j*2];
+      ret[cnum*2+1] = ret[j*2+1];
+      cnum++;
+    }
+  }
+  if (cnum < 1) return 0;	// this should never happen
+
+  // we can't generate more contacts than we actually have
+  if (maxc > cnum) maxc = cnum;
+  if (maxc < 1) maxc = 1;
+
+  if (cnum <= maxc) {
+
+	  if (code<4) 
+	  {
+    // we have less contacts than we need, so we use them all
+    for (j=0; j < cnum; j++) 
+	{
+		btVector3 pointInWorld;
+		for (i=0; i<3; i++) 
+			pointInWorld[i] = point[j*3+i] + pa[i];
+		output.addContactPoint(-normal,pointInWorld,-dep[j]);
+
+    }
+	  } else
+	  {
+		  // we have less contacts than we need, so we use them all
+		for (j=0; j < cnum; j++) 
+		{
+			btVector3 pointInWorld;
+			for (i=0; i<3; i++) 
+				pointInWorld[i] = point[j*3+i] + pa[i]-normal[i]*dep[j];
+				//pointInWorld[i] = point[j*3+i] + pa[i];
+			output.addContactPoint(-normal,pointInWorld,-dep[j]);
+		}
+	  }
+  }
+  else {
+    // we have more contacts than are wanted, some of them must be culled.
+    // find the deepest point, it is always the first contact.
+    int i1 = 0;
+    btScalar maxdepth = dep[0];
+    for (i=1; i<cnum; i++) {
+      if (dep[i] > maxdepth) {
+	maxdepth = dep[i];
+	i1 = i;
+      }
+    }
+
+    int iret[8];
+    cullPoints2 (cnum,ret,maxc,i1,iret);
+
+    for (j=0; j < maxc; j++) {
+//      dContactGeom *con = CONTACT(contact,skip*j);
+  //    for (i=0; i<3; i++) con->pos[i] = point[iret[j]*3+i] + pa[i];
+    //  con->depth = dep[iret[j]];
+
+		btVector3 posInWorld;
+		for (i=0; i<3; i++) 
+			posInWorld[i] = point[iret[j]*3+i] + pa[i];
+		if (code<4) 
+	   {
+			output.addContactPoint(-normal,posInWorld,-dep[iret[j]]);
+		} else
+		{
+			output.addContactPoint(-normal,posInWorld-normal*dep[iret[j]],-dep[iret[j]]);
+		}
+    }
+    cnum = maxc;
+  }
+
+  *return_code = code;
+  return cnum;
+}
+
+void	btBoxBoxDetector::getClosestPoints(const ClosestPointInput& input,Result& output,class btIDebugDraw* /*debugDraw*/,bool /*swapResults*/)
+{
+	
+	const btTransform& transformA = input.m_transformA;
+	const btTransform& transformB = input.m_transformB;
+	
+	int skip = 0;
+	dContactGeom *contact = 0;
+
+	dMatrix3 R1;
+	dMatrix3 R2;
+
+	for (int j=0;j<3;j++)
+	{
+		R1[0+4*j] = transformA.getBasis()[j].x();
+		R2[0+4*j] = transformB.getBasis()[j].x();
+
+		R1[1+4*j] = transformA.getBasis()[j].y();
+		R2[1+4*j] = transformB.getBasis()[j].y();
+
+
+		R1[2+4*j] = transformA.getBasis()[j].z();
+		R2[2+4*j] = transformB.getBasis()[j].z();
+
+	}
+
+	
+
+	btVector3 normal;
+	btScalar depth;
+	int return_code;
+	int maxc = 4;
+
+
+	dBoxBox2 (transformA.getOrigin(), 
+	R1,
+	2.f*m_box1->getHalfExtentsWithMargin(),
+	transformB.getOrigin(),
+	R2, 
+	2.f*m_box2->getHalfExtentsWithMargin(),
+	normal, &depth, &return_code,
+	maxc, contact, skip,
+	output
+	);
+
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxDetector.h b/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxDetector.h
new file mode 100644
index 00000000..3c941f7d
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btBoxBoxDetector.h
@@ -0,0 +1,44 @@
+/*
+ * Box-Box collision detection re-distributed under the ZLib license with permission from Russell L. Smith
+ * Original version is from Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org
+
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef BT_BOX_BOX_DETECTOR_H
+#define BT_BOX_BOX_DETECTOR_H
+
+
+class btBoxShape;
+#include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
+
+
+/// btBoxBoxDetector wraps the ODE box-box collision detector
+/// re-distributed under the Zlib license with permission from Russell L. Smith
+struct btBoxBoxDetector : public btDiscreteCollisionDetectorInterface
+{
+	btBoxShape* m_box1;
+	btBoxShape* m_box2;
+
+public:
+
+	btBoxBoxDetector(btBoxShape* box1,btBoxShape* box2);
+
+	virtual ~btBoxBoxDetector() {};
+
+	virtual void	getClosestPoints(const ClosestPointInput& input,Result& output,class btIDebugDraw* debugDraw,bool swapResults=false);
+
+};
+
+#endif //BT_BOX_BOX_DETECTOR_H
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btCollisionConfiguration.h b/src/bullet/BulletCollision/CollisionDispatch/btCollisionConfiguration.h
new file mode 100644
index 00000000..f63e0923
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btCollisionConfiguration.h
@@ -0,0 +1,48 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_COLLISION_CONFIGURATION
+#define BT_COLLISION_CONFIGURATION
+
+struct btCollisionAlgorithmCreateFunc;
+
+class btStackAlloc;
+class btPoolAllocator;
+
+///btCollisionConfiguration allows to configure Bullet collision detection
+///stack allocator size, default collision algorithms and persistent manifold pool size
+///@todo: describe the meaning
+class	btCollisionConfiguration
+{
+
+public:
+
+	virtual ~btCollisionConfiguration()
+	{
+	}
+
+	///memory pools
+	virtual btPoolAllocator* getPersistentManifoldPool() = 0;
+
+	virtual btPoolAllocator* getCollisionAlgorithmPool() = 0;
+
+	virtual btStackAlloc*	getStackAllocator() = 0;
+
+	virtual btCollisionAlgorithmCreateFunc* getCollisionAlgorithmCreateFunc(int proxyType0,int proxyType1) =0;
+
+};
+
+#endif //BT_COLLISION_CONFIGURATION
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btCollisionCreateFunc.h b/src/bullet/BulletCollision/CollisionDispatch/btCollisionCreateFunc.h
new file mode 100644
index 00000000..1d7e7440
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btCollisionCreateFunc.h
@@ -0,0 +1,45 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_COLLISION_CREATE_FUNC
+#define BT_COLLISION_CREATE_FUNC
+
+#include "LinearMath/btAlignedObjectArray.h"
+class btCollisionAlgorithm;
+class btCollisionObject;
+
+struct btCollisionAlgorithmConstructionInfo;
+
+///Used by the btCollisionDispatcher to register and create instances for btCollisionAlgorithm
+struct btCollisionAlgorithmCreateFunc
+{
+	bool m_swapped;
+	
+	btCollisionAlgorithmCreateFunc()
+		:m_swapped(false)
+	{
+	}
+	virtual ~btCollisionAlgorithmCreateFunc(){};
+
+	virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& , btCollisionObject* body0,btCollisionObject* body1)
+	{
+		
+		(void)body0;
+		(void)body1;
+		return 0;
+	}
+};
+#endif //BT_COLLISION_CREATE_FUNC
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.cpp b/src/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.cpp
new file mode 100644
index 00000000..29674f3b
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.cpp
@@ -0,0 +1,310 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#include "btCollisionDispatcher.h"
+
+
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
+#include "LinearMath/btPoolAllocator.h"
+#include "BulletCollision/CollisionDispatch/btCollisionConfiguration.h"
+
+int gNumManifold = 0;
+
+#ifdef BT_DEBUG
+#include <stdio.h>
+#endif
+
+
+btCollisionDispatcher::btCollisionDispatcher (btCollisionConfiguration* collisionConfiguration): 
+m_dispatcherFlags(btCollisionDispatcher::CD_USE_RELATIVE_CONTACT_BREAKING_THRESHOLD),
+	m_collisionConfiguration(collisionConfiguration)
+{
+	int i;
+
+	setNearCallback(defaultNearCallback);
+	
+	m_collisionAlgorithmPoolAllocator = collisionConfiguration->getCollisionAlgorithmPool();
+
+	m_persistentManifoldPoolAllocator = collisionConfiguration->getPersistentManifoldPool();
+
+	for (i=0;i<MAX_BROADPHASE_COLLISION_TYPES;i++)
+	{
+		for (int j=0;j<MAX_BROADPHASE_COLLISION_TYPES;j++)
+		{
+			m_doubleDispatch[i][j] = m_collisionConfiguration->getCollisionAlgorithmCreateFunc(i,j);
+			btAssert(m_doubleDispatch[i][j]);
+		}
+	}
+	
+	
+}
+
+
+void btCollisionDispatcher::registerCollisionCreateFunc(int proxyType0, int proxyType1, btCollisionAlgorithmCreateFunc *createFunc)
+{
+	m_doubleDispatch[proxyType0][proxyType1] = createFunc;
+}
+
+btCollisionDispatcher::~btCollisionDispatcher()
+{
+}
+
+btPersistentManifold*	btCollisionDispatcher::getNewManifold(void* b0,void* b1) 
+{ 
+	gNumManifold++;
+	
+	//btAssert(gNumManifold < 65535);
+	
+
+	btCollisionObject* body0 = (btCollisionObject*)b0;
+	btCollisionObject* body1 = (btCollisionObject*)b1;
+
+	//optional relative contact breaking threshold, turned on by default (use setDispatcherFlags to switch off feature for improved performance)
+	
+	btScalar contactBreakingThreshold =  (m_dispatcherFlags & btCollisionDispatcher::CD_USE_RELATIVE_CONTACT_BREAKING_THRESHOLD) ? 
+		btMin(body0->getCollisionShape()->getContactBreakingThreshold(gContactBreakingThreshold) , body1->getCollisionShape()->getContactBreakingThreshold(gContactBreakingThreshold))
+		: gContactBreakingThreshold ;
+
+	btScalar contactProcessingThreshold = btMin(body0->getContactProcessingThreshold(),body1->getContactProcessingThreshold());
+		
+	void* mem = 0;
+	
+	if (m_persistentManifoldPoolAllocator->getFreeCount())
+	{
+		mem = m_persistentManifoldPoolAllocator->allocate(sizeof(btPersistentManifold));
+	} else
+	{
+		//we got a pool memory overflow, by default we fallback to dynamically allocate memory. If we require a contiguous contact pool then assert.
+		if ((m_dispatcherFlags&CD_DISABLE_CONTACTPOOL_DYNAMIC_ALLOCATION)==0)
+		{
+			mem = btAlignedAlloc(sizeof(btPersistentManifold),16);
+		} else
+		{
+			btAssert(0);
+			//make sure to increase the m_defaultMaxPersistentManifoldPoolSize in the btDefaultCollisionConstructionInfo/btDefaultCollisionConfiguration
+			return 0;
+		}
+	}
+	btPersistentManifold* manifold = new(mem) btPersistentManifold (body0,body1,0,contactBreakingThreshold,contactProcessingThreshold);
+	manifold->m_index1a = m_manifoldsPtr.size();
+	m_manifoldsPtr.push_back(manifold);
+
+	return manifold;
+}
+
+void btCollisionDispatcher::clearManifold(btPersistentManifold* manifold)
+{
+	manifold->clearManifold();
+}
+
+	
+void btCollisionDispatcher::releaseManifold(btPersistentManifold* manifold)
+{
+	
+	gNumManifold--;
+
+	//printf("releaseManifold: gNumManifold %d\n",gNumManifold);
+	clearManifold(manifold);
+
+	int findIndex = manifold->m_index1a;
+	btAssert(findIndex < m_manifoldsPtr.size());
+	m_manifoldsPtr.swap(findIndex,m_manifoldsPtr.size()-1);
+	m_manifoldsPtr[findIndex]->m_index1a = findIndex;
+	m_manifoldsPtr.pop_back();
+
+	manifold->~btPersistentManifold();
+	if (m_persistentManifoldPoolAllocator->validPtr(manifold))
+	{
+		m_persistentManifoldPoolAllocator->freeMemory(manifold);
+	} else
+	{
+		btAlignedFree(manifold);
+	}
+	
+}
+
+	
+
+btCollisionAlgorithm* btCollisionDispatcher::findAlgorithm(btCollisionObject* body0,btCollisionObject* body1,btPersistentManifold* sharedManifold)
+{
+	
+	btCollisionAlgorithmConstructionInfo ci;
+
+	ci.m_dispatcher1 = this;
+	ci.m_manifold = sharedManifold;
+	btCollisionAlgorithm* algo = m_doubleDispatch[body0->getCollisionShape()->getShapeType()][body1->getCollisionShape()->getShapeType()]->CreateCollisionAlgorithm(ci,body0,body1);
+
+	return algo;
+}
+
+
+
+
+bool	btCollisionDispatcher::needsResponse(btCollisionObject* body0,btCollisionObject* body1)
+{
+	//here you can do filtering
+	bool hasResponse = 
+		(body0->hasContactResponse() && body1->hasContactResponse());
+	//no response between two static/kinematic bodies:
+	hasResponse = hasResponse &&
+		((!body0->isStaticOrKinematicObject()) ||(! body1->isStaticOrKinematicObject()));
+	return hasResponse;
+}
+
+bool	btCollisionDispatcher::needsCollision(btCollisionObject* body0,btCollisionObject* body1)
+{
+	btAssert(body0);
+	btAssert(body1);
+
+	bool needsCollision = true;
+
+#ifdef BT_DEBUG
+	if (!(m_dispatcherFlags & btCollisionDispatcher::CD_STATIC_STATIC_REPORTED))
+	{
+		//broadphase filtering already deals with this
+		if (body0->isStaticOrKinematicObject() && body1->isStaticOrKinematicObject())
+		{
+			m_dispatcherFlags |= btCollisionDispatcher::CD_STATIC_STATIC_REPORTED;
+			printf("warning btCollisionDispatcher::needsCollision: static-static collision!\n");
+		}
+	}
+#endif //BT_DEBUG
+
+	if ((!body0->isActive()) && (!body1->isActive()))
+		needsCollision = false;
+	else if (!body0->checkCollideWith(body1))
+		needsCollision = false;
+	
+	return needsCollision ;
+
+}
+
+
+
+///interface for iterating all overlapping collision pairs, no matter how those pairs are stored (array, set, map etc)
+///this is useful for the collision dispatcher.
+class btCollisionPairCallback : public btOverlapCallback
+{
+	const btDispatcherInfo& m_dispatchInfo;
+	btCollisionDispatcher*	m_dispatcher;
+
+public:
+
+	btCollisionPairCallback(const btDispatcherInfo& dispatchInfo,btCollisionDispatcher*	dispatcher)
+	:m_dispatchInfo(dispatchInfo),
+	m_dispatcher(dispatcher)
+	{
+	}
+
+	/*btCollisionPairCallback& operator=(btCollisionPairCallback& other)
+	{
+		m_dispatchInfo = other.m_dispatchInfo;
+		m_dispatcher = other.m_dispatcher;
+		return *this;
+	}
+	*/
+
+
+	virtual ~btCollisionPairCallback() {}
+
+
+	virtual bool	processOverlap(btBroadphasePair& pair)
+	{
+		(*m_dispatcher->getNearCallback())(pair,*m_dispatcher,m_dispatchInfo);
+
+		return false;
+	}
+};
+
+
+
+void	btCollisionDispatcher::dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher) 
+{
+	//m_blockedForChanges = true;
+
+	btCollisionPairCallback	collisionCallback(dispatchInfo,this);
+
+	pairCache->processAllOverlappingPairs(&collisionCallback,dispatcher);
+
+	//m_blockedForChanges = false;
+
+}
+
+
+
+
+//by default, Bullet will use this near callback
+void btCollisionDispatcher::defaultNearCallback(btBroadphasePair& collisionPair, btCollisionDispatcher& dispatcher, const btDispatcherInfo& dispatchInfo)
+{
+		btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
+		btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
+
+		if (dispatcher.needsCollision(colObj0,colObj1))
+		{
+			//dispatcher will keep algorithms persistent in the collision pair
+			if (!collisionPair.m_algorithm)
+			{
+				collisionPair.m_algorithm = dispatcher.findAlgorithm(colObj0,colObj1);
+			}
+
+			if (collisionPair.m_algorithm)
+			{
+				btManifoldResult contactPointResult(colObj0,colObj1);
+				
+				if (dispatchInfo.m_dispatchFunc == 		btDispatcherInfo::DISPATCH_DISCRETE)
+				{
+					//discrete collision detection query
+					collisionPair.m_algorithm->processCollision(colObj0,colObj1,dispatchInfo,&contactPointResult);
+				} else
+				{
+					//continuous collision detection query, time of impact (toi)
+					btScalar toi = collisionPair.m_algorithm->calculateTimeOfImpact(colObj0,colObj1,dispatchInfo,&contactPointResult);
+					if (dispatchInfo.m_timeOfImpact > toi)
+						dispatchInfo.m_timeOfImpact = toi;
+
+				}
+			}
+		}
+
+}
+
+
+void* btCollisionDispatcher::allocateCollisionAlgorithm(int size)
+{
+	if (m_collisionAlgorithmPoolAllocator->getFreeCount())
+	{
+		return m_collisionAlgorithmPoolAllocator->allocate(size);
+	}
+	
+	//warn user for overflow?
+	return	btAlignedAlloc(static_cast<size_t>(size), 16);
+}
+
+void btCollisionDispatcher::freeCollisionAlgorithm(void* ptr)
+{
+	if (m_collisionAlgorithmPoolAllocator->validPtr(ptr))
+	{
+		m_collisionAlgorithmPoolAllocator->freeMemory(ptr);
+	} else
+	{
+		btAlignedFree(ptr);
+	}
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.h b/src/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.h
new file mode 100644
index 00000000..5accad9a
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.h
@@ -0,0 +1,172 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_COLLISION__DISPATCHER_H
+#define BT_COLLISION__DISPATCHER_H
+
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+
+#include "BulletCollision/CollisionDispatch/btManifoldResult.h"
+
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+class btIDebugDraw;
+class btOverlappingPairCache;
+class btPoolAllocator;
+class btCollisionConfiguration;
+
+#include "btCollisionCreateFunc.h"
+
+#define USE_DISPATCH_REGISTRY_ARRAY 1
+
+class btCollisionDispatcher;
+///user can override this nearcallback for collision filtering and more finegrained control over collision detection
+typedef void (*btNearCallback)(btBroadphasePair& collisionPair, btCollisionDispatcher& dispatcher, const btDispatcherInfo& dispatchInfo);
+
+
+///btCollisionDispatcher supports algorithms that handle ConvexConvex and ConvexConcave collision pairs.
+///Time of Impact, Closest Points and Penetration Depth.
+class btCollisionDispatcher : public btDispatcher
+{
+
+protected:
+
+	int		m_dispatcherFlags;
+
+	btAlignedObjectArray<btPersistentManifold*>	m_manifoldsPtr;
+
+	btManifoldResult	m_defaultManifoldResult;
+
+	btNearCallback		m_nearCallback;
+	
+	btPoolAllocator*	m_collisionAlgorithmPoolAllocator;
+
+	btPoolAllocator*	m_persistentManifoldPoolAllocator;
+
+	btCollisionAlgorithmCreateFunc* m_doubleDispatch[MAX_BROADPHASE_COLLISION_TYPES][MAX_BROADPHASE_COLLISION_TYPES];
+
+	btCollisionConfiguration*	m_collisionConfiguration;
+
+
+public:
+
+	enum DispatcherFlags
+	{
+		CD_STATIC_STATIC_REPORTED = 1,
+		CD_USE_RELATIVE_CONTACT_BREAKING_THRESHOLD = 2,
+		CD_DISABLE_CONTACTPOOL_DYNAMIC_ALLOCATION = 4
+	};
+
+	int	getDispatcherFlags() const
+	{
+		return m_dispatcherFlags;
+	}
+
+	void	setDispatcherFlags(int flags)
+	{
+		m_dispatcherFlags = flags;
+	}
+
+	///registerCollisionCreateFunc allows registration of custom/alternative collision create functions
+	void	registerCollisionCreateFunc(int proxyType0,int proxyType1, btCollisionAlgorithmCreateFunc* createFunc);
+
+	int	getNumManifolds() const
+	{ 
+		return int( m_manifoldsPtr.size());
+	}
+
+	btPersistentManifold**	getInternalManifoldPointer()
+	{
+		return m_manifoldsPtr.size()? &m_manifoldsPtr[0] : 0;
+	}
+
+	 btPersistentManifold* getManifoldByIndexInternal(int index)
+	{
+		return m_manifoldsPtr[index];
+	}
+
+	 const btPersistentManifold* getManifoldByIndexInternal(int index) const
+	{
+		return m_manifoldsPtr[index];
+	}
+
+	btCollisionDispatcher (btCollisionConfiguration* collisionConfiguration);
+
+	virtual ~btCollisionDispatcher();
+
+	virtual btPersistentManifold*	getNewManifold(void* b0,void* b1);
+	
+	virtual void releaseManifold(btPersistentManifold* manifold);
+
+
+	virtual void clearManifold(btPersistentManifold* manifold);
+
+			
+	btCollisionAlgorithm* findAlgorithm(btCollisionObject* body0,btCollisionObject* body1,btPersistentManifold* sharedManifold = 0);
+		
+	virtual bool	needsCollision(btCollisionObject* body0,btCollisionObject* body1);
+	
+	virtual bool	needsResponse(btCollisionObject* body0,btCollisionObject* body1);
+	
+	virtual void	dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher) ;
+
+	void	setNearCallback(btNearCallback	nearCallback)
+	{
+		m_nearCallback = nearCallback; 
+	}
+
+	btNearCallback	getNearCallback() const
+	{
+		return m_nearCallback;
+	}
+
+	//by default, Bullet will use this near callback
+	static void  defaultNearCallback(btBroadphasePair& collisionPair, btCollisionDispatcher& dispatcher, const btDispatcherInfo& dispatchInfo);
+
+	virtual	void* allocateCollisionAlgorithm(int size);
+
+	virtual	void freeCollisionAlgorithm(void* ptr);
+
+	btCollisionConfiguration*	getCollisionConfiguration()
+	{
+		return m_collisionConfiguration;
+	}
+
+	const btCollisionConfiguration*	getCollisionConfiguration() const
+	{
+		return m_collisionConfiguration;
+	}
+
+	void	setCollisionConfiguration(btCollisionConfiguration* config)
+	{
+		m_collisionConfiguration = config;
+	}
+
+	virtual	btPoolAllocator*	getInternalManifoldPool()
+	{
+		return m_persistentManifoldPoolAllocator;
+	}
+
+	virtual	const btPoolAllocator*	getInternalManifoldPool() const
+	{
+		return m_persistentManifoldPoolAllocator;
+	}
+
+};
+
+#endif //BT_COLLISION__DISPATCHER_H
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btCollisionObject.cpp b/src/bullet/BulletCollision/CollisionDispatch/btCollisionObject.cpp
new file mode 100644
index 00000000..580ea345
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btCollisionObject.cpp
@@ -0,0 +1,116 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btCollisionObject.h"
+#include "LinearMath/btSerializer.h"
+
+btCollisionObject::btCollisionObject()
+	:	m_anisotropicFriction(1.f,1.f,1.f),
+	m_hasAnisotropicFriction(false),
+	m_contactProcessingThreshold(BT_LARGE_FLOAT),
+		m_broadphaseHandle(0),
+		m_collisionShape(0),
+		m_extensionPointer(0),
+		m_rootCollisionShape(0),
+		m_collisionFlags(btCollisionObject::CF_STATIC_OBJECT),
+		m_islandTag1(-1),
+		m_companionId(-1),
+		m_activationState1(1),
+		m_deactivationTime(btScalar(0.)),
+		m_friction(btScalar(0.5)),
+		m_restitution(btScalar(0.)),
+		m_internalType(CO_COLLISION_OBJECT),
+		m_userObjectPointer(0),
+		m_hitFraction(btScalar(1.)),
+		m_ccdSweptSphereRadius(btScalar(0.)),
+		m_ccdMotionThreshold(btScalar(0.)),
+		m_checkCollideWith(false)
+{
+	m_worldTransform.setIdentity();
+}
+
+btCollisionObject::~btCollisionObject()
+{
+}
+
+void btCollisionObject::setActivationState(int newState) 
+{ 
+	if ( (m_activationState1 != DISABLE_DEACTIVATION) && (m_activationState1 != DISABLE_SIMULATION))
+		m_activationState1 = newState;
+}
+
+void btCollisionObject::forceActivationState(int newState)
+{
+	m_activationState1 = newState;
+}
+
+void btCollisionObject::activate(bool forceActivation)
+{
+	if (forceActivation || !(m_collisionFlags & (CF_STATIC_OBJECT|CF_KINEMATIC_OBJECT)))
+	{
+		setActivationState(ACTIVE_TAG);
+		m_deactivationTime = btScalar(0.);
+	}
+}
+
+const char* btCollisionObject::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+
+	btCollisionObjectData* dataOut = (btCollisionObjectData*)dataBuffer;
+
+	m_worldTransform.serialize(dataOut->m_worldTransform);
+	m_interpolationWorldTransform.serialize(dataOut->m_interpolationWorldTransform);
+	m_interpolationLinearVelocity.serialize(dataOut->m_interpolationLinearVelocity);
+	m_interpolationAngularVelocity.serialize(dataOut->m_interpolationAngularVelocity);
+	m_anisotropicFriction.serialize(dataOut->m_anisotropicFriction);
+	dataOut->m_hasAnisotropicFriction = m_hasAnisotropicFriction;
+	dataOut->m_contactProcessingThreshold = m_contactProcessingThreshold;
+	dataOut->m_broadphaseHandle = 0;
+	dataOut->m_collisionShape = serializer->getUniquePointer(m_collisionShape);
+	dataOut->m_rootCollisionShape = 0;//@todo
+	dataOut->m_collisionFlags = m_collisionFlags;
+	dataOut->m_islandTag1 = m_islandTag1;
+	dataOut->m_companionId = m_companionId;
+	dataOut->m_activationState1 = m_activationState1;
+	dataOut->m_activationState1 = m_activationState1;
+	dataOut->m_deactivationTime = m_deactivationTime;
+	dataOut->m_friction = m_friction;
+	dataOut->m_restitution = m_restitution;
+	dataOut->m_internalType = m_internalType;
+	
+	char* name = (char*) serializer->findNameForPointer(this);
+	dataOut->m_name = (char*)serializer->getUniquePointer(name);
+	if (dataOut->m_name)
+	{
+		serializer->serializeName(name);
+	}
+	dataOut->m_hitFraction = m_hitFraction;
+	dataOut->m_ccdSweptSphereRadius = m_ccdSweptSphereRadius;
+	dataOut->m_ccdMotionThreshold = m_ccdMotionThreshold;
+	dataOut->m_ccdMotionThreshold = m_ccdMotionThreshold;
+	dataOut->m_checkCollideWith = m_checkCollideWith;
+
+	return btCollisionObjectDataName;
+}
+
+
+void btCollisionObject::serializeSingleObject(class btSerializer* serializer) const
+{
+	int len = calculateSerializeBufferSize();
+	btChunk* chunk = serializer->allocate(len,1);
+	const char* structType = serialize(chunk->m_oldPtr, serializer);
+	serializer->finalizeChunk(chunk,structType,BT_COLLISIONOBJECT_CODE,(void*)this);
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h b/src/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h
new file mode 100644
index 00000000..3a11c967
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h
@@ -0,0 +1,524 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_COLLISION_OBJECT_H
+#define BT_COLLISION_OBJECT_H
+
+#include "LinearMath/btTransform.h"
+
+//island management, m_activationState1
+#define ACTIVE_TAG 1
+#define ISLAND_SLEEPING 2
+#define WANTS_DEACTIVATION 3
+#define DISABLE_DEACTIVATION 4
+#define DISABLE_SIMULATION 5
+
+struct	btBroadphaseProxy;
+class	btCollisionShape;
+struct btCollisionShapeData;
+#include "LinearMath/btMotionState.h"
+#include "LinearMath/btAlignedAllocator.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+typedef btAlignedObjectArray<class btCollisionObject*> btCollisionObjectArray;
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btCollisionObjectData btCollisionObjectDoubleData
+#define btCollisionObjectDataName "btCollisionObjectDoubleData"
+#else
+#define btCollisionObjectData btCollisionObjectFloatData
+#define btCollisionObjectDataName "btCollisionObjectFloatData"
+#endif
+
+
+/// btCollisionObject can be used to manage collision detection objects. 
+/// btCollisionObject maintains all information that is needed for a collision detection: Shape, Transform and AABB proxy.
+/// They can be added to the btCollisionWorld.
+ATTRIBUTE_ALIGNED16(class)	btCollisionObject
+{
+
+protected:
+
+	btTransform	m_worldTransform;
+
+	///m_interpolationWorldTransform is used for CCD and interpolation
+	///it can be either previous or future (predicted) transform
+	btTransform	m_interpolationWorldTransform;
+	//those two are experimental: just added for bullet time effect, so you can still apply impulses (directly modifying velocities) 
+	//without destroying the continuous interpolated motion (which uses this interpolation velocities)
+	btVector3	m_interpolationLinearVelocity;
+	btVector3	m_interpolationAngularVelocity;
+	
+	btVector3	m_anisotropicFriction;
+	int			m_hasAnisotropicFriction;
+	btScalar	m_contactProcessingThreshold;	
+
+	btBroadphaseProxy*		m_broadphaseHandle;
+	btCollisionShape*		m_collisionShape;
+	///m_extensionPointer is used by some internal low-level Bullet extensions.
+	void*					m_extensionPointer;
+	
+	///m_rootCollisionShape is temporarily used to store the original collision shape
+	///The m_collisionShape might be temporarily replaced by a child collision shape during collision detection purposes
+	///If it is NULL, the m_collisionShape is not temporarily replaced.
+	btCollisionShape*		m_rootCollisionShape;
+
+	int				m_collisionFlags;
+
+	int				m_islandTag1;
+	int				m_companionId;
+
+	int				m_activationState1;
+	btScalar			m_deactivationTime;
+
+	btScalar		m_friction;
+	btScalar		m_restitution;
+
+	///m_internalType is reserved to distinguish Bullet's btCollisionObject, btRigidBody, btSoftBody, btGhostObject etc.
+	///do not assign your own m_internalType unless you write a new dynamics object class.
+	int				m_internalType;
+
+	///users can point to their objects, m_userPointer is not used by Bullet, see setUserPointer/getUserPointer
+	void*			m_userObjectPointer;
+
+	///time of impact calculation
+	btScalar		m_hitFraction; 
+	
+	///Swept sphere radius (0.0 by default), see btConvexConvexAlgorithm::
+	btScalar		m_ccdSweptSphereRadius;
+
+	/// Don't do continuous collision detection if the motion (in one step) is less then m_ccdMotionThreshold
+	btScalar		m_ccdMotionThreshold;
+	
+	/// If some object should have elaborate collision filtering by sub-classes
+	int			m_checkCollideWith;
+
+	virtual bool	checkCollideWithOverride(btCollisionObject* /* co */)
+	{
+		return true;
+	}
+
+public:
+
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	enum CollisionFlags
+	{
+		CF_STATIC_OBJECT= 1,
+		CF_KINEMATIC_OBJECT= 2,
+		CF_NO_CONTACT_RESPONSE = 4,
+		CF_CUSTOM_MATERIAL_CALLBACK = 8,//this allows per-triangle material (friction/restitution)
+		CF_CHARACTER_OBJECT = 16,
+		CF_DISABLE_VISUALIZE_OBJECT = 32, //disable debug drawing
+		CF_DISABLE_SPU_COLLISION_PROCESSING = 64//disable parallel/SPU processing
+	};
+
+	enum	CollisionObjectTypes
+	{
+		CO_COLLISION_OBJECT =1,
+		CO_RIGID_BODY=2,
+		///CO_GHOST_OBJECT keeps track of all objects overlapping its AABB and that pass its collision filter
+		///It is useful for collision sensors, explosion objects, character controller etc.
+		CO_GHOST_OBJECT=4,
+		CO_SOFT_BODY=8,
+		CO_HF_FLUID=16,
+		CO_USER_TYPE=32
+	};
+
+	SIMD_FORCE_INLINE bool mergesSimulationIslands() const
+	{
+		///static objects, kinematic and object without contact response don't merge islands
+		return  ((m_collisionFlags & (CF_STATIC_OBJECT | CF_KINEMATIC_OBJECT | CF_NO_CONTACT_RESPONSE) )==0);
+	}
+
+	const btVector3& getAnisotropicFriction() const
+	{
+		return m_anisotropicFriction;
+	}
+	void	setAnisotropicFriction(const btVector3& anisotropicFriction)
+	{
+		m_anisotropicFriction = anisotropicFriction;
+		m_hasAnisotropicFriction = (anisotropicFriction[0]!=1.f) || (anisotropicFriction[1]!=1.f) || (anisotropicFriction[2]!=1.f);
+	}
+	bool	hasAnisotropicFriction() const
+	{
+		return m_hasAnisotropicFriction!=0;
+	}
+
+	///the constraint solver can discard solving contacts, if the distance is above this threshold. 0 by default.
+	///Note that using contacts with positive distance can improve stability. It increases, however, the chance of colliding with degerate contacts, such as 'interior' triangle edges
+	void	setContactProcessingThreshold( btScalar contactProcessingThreshold)
+	{
+		m_contactProcessingThreshold = contactProcessingThreshold;
+	}
+	btScalar	getContactProcessingThreshold() const
+	{
+		return m_contactProcessingThreshold;
+	}
+
+	SIMD_FORCE_INLINE bool		isStaticObject() const {
+		return (m_collisionFlags & CF_STATIC_OBJECT) != 0;
+	}
+
+	SIMD_FORCE_INLINE bool		isKinematicObject() const
+	{
+		return (m_collisionFlags & CF_KINEMATIC_OBJECT) != 0;
+	}
+
+	SIMD_FORCE_INLINE bool		isStaticOrKinematicObject() const
+	{
+		return (m_collisionFlags & (CF_KINEMATIC_OBJECT | CF_STATIC_OBJECT)) != 0 ;
+	}
+
+	SIMD_FORCE_INLINE bool		hasContactResponse() const {
+		return (m_collisionFlags & CF_NO_CONTACT_RESPONSE)==0;
+	}
+
+	
+	btCollisionObject();
+
+	virtual ~btCollisionObject();
+
+	virtual void	setCollisionShape(btCollisionShape* collisionShape)
+	{
+		m_collisionShape = collisionShape;
+		m_rootCollisionShape = collisionShape;
+	}
+
+	SIMD_FORCE_INLINE const btCollisionShape*	getCollisionShape() const
+	{
+		return m_collisionShape;
+	}
+
+	SIMD_FORCE_INLINE btCollisionShape*	getCollisionShape()
+	{
+		return m_collisionShape;
+	}
+
+	SIMD_FORCE_INLINE const btCollisionShape*	getRootCollisionShape() const
+	{
+		return m_rootCollisionShape;
+	}
+
+	SIMD_FORCE_INLINE btCollisionShape*	getRootCollisionShape()
+	{
+		return m_rootCollisionShape;
+	}
+
+	///Avoid using this internal API call
+	///internalSetTemporaryCollisionShape is used to temporary replace the actual collision shape by a child collision shape.
+	void	internalSetTemporaryCollisionShape(btCollisionShape* collisionShape)
+	{
+		m_collisionShape = collisionShape;
+	}
+
+	///Avoid using this internal API call, the extension pointer is used by some Bullet extensions. 
+	///If you need to store your own user pointer, use 'setUserPointer/getUserPointer' instead.
+	void*		internalGetExtensionPointer() const
+	{
+		return m_extensionPointer;
+	}
+	///Avoid using this internal API call, the extension pointer is used by some Bullet extensions
+	///If you need to store your own user pointer, use 'setUserPointer/getUserPointer' instead.
+	void	internalSetExtensionPointer(void* pointer)
+	{
+		m_extensionPointer = pointer;
+	}
+
+	SIMD_FORCE_INLINE	int	getActivationState() const { return m_activationState1;}
+	
+	void setActivationState(int newState);
+
+	void	setDeactivationTime(btScalar time)
+	{
+		m_deactivationTime = time;
+	}
+	btScalar	getDeactivationTime() const
+	{
+		return m_deactivationTime;
+	}
+
+	void forceActivationState(int newState);
+
+	void	activate(bool forceActivation = false);
+
+	SIMD_FORCE_INLINE bool isActive() const
+	{
+		return ((getActivationState() != ISLAND_SLEEPING) && (getActivationState() != DISABLE_SIMULATION));
+	}
+
+	void	setRestitution(btScalar rest)
+	{
+		m_restitution = rest;
+	}
+	btScalar	getRestitution() const
+	{
+		return m_restitution;
+	}
+	void	setFriction(btScalar frict)
+	{
+		m_friction = frict;
+	}
+	btScalar	getFriction() const
+	{
+		return m_friction;
+	}
+
+	///reserved for Bullet internal usage
+	int	getInternalType() const
+	{
+		return m_internalType;
+	}
+
+	btTransform&	getWorldTransform()
+	{
+		return m_worldTransform;
+	}
+
+	const btTransform&	getWorldTransform() const
+	{
+		return m_worldTransform;
+	}
+
+	void	setWorldTransform(const btTransform& worldTrans)
+	{
+		m_worldTransform = worldTrans;
+	}
+
+
+	SIMD_FORCE_INLINE btBroadphaseProxy*	getBroadphaseHandle()
+	{
+		return m_broadphaseHandle;
+	}
+
+	SIMD_FORCE_INLINE const btBroadphaseProxy*	getBroadphaseHandle() const
+	{
+		return m_broadphaseHandle;
+	}
+
+	void	setBroadphaseHandle(btBroadphaseProxy* handle)
+	{
+		m_broadphaseHandle = handle;
+	}
+
+
+	const btTransform&	getInterpolationWorldTransform() const
+	{
+		return m_interpolationWorldTransform;
+	}
+
+	btTransform&	getInterpolationWorldTransform()
+	{
+		return m_interpolationWorldTransform;
+	}
+
+	void	setInterpolationWorldTransform(const btTransform&	trans)
+	{
+		m_interpolationWorldTransform = trans;
+	}
+
+	void	setInterpolationLinearVelocity(const btVector3& linvel)
+	{
+		m_interpolationLinearVelocity = linvel;
+	}
+
+	void	setInterpolationAngularVelocity(const btVector3& angvel)
+	{
+		m_interpolationAngularVelocity = angvel;
+	}
+
+	const btVector3&	getInterpolationLinearVelocity() const
+	{
+		return m_interpolationLinearVelocity;
+	}
+
+	const btVector3&	getInterpolationAngularVelocity() const
+	{
+		return m_interpolationAngularVelocity;
+	}
+
+	SIMD_FORCE_INLINE int getIslandTag() const
+	{
+		return	m_islandTag1;
+	}
+
+	void	setIslandTag(int tag)
+	{
+		m_islandTag1 = tag;
+	}
+
+	SIMD_FORCE_INLINE int getCompanionId() const
+	{
+		return	m_companionId;
+	}
+
+	void	setCompanionId(int id)
+	{
+		m_companionId = id;
+	}
+
+	SIMD_FORCE_INLINE btScalar			getHitFraction() const
+	{
+		return m_hitFraction; 
+	}
+
+	void	setHitFraction(btScalar hitFraction)
+	{
+		m_hitFraction = hitFraction;
+	}
+
+	
+	SIMD_FORCE_INLINE int	getCollisionFlags() const
+	{
+		return m_collisionFlags;
+	}
+
+	void	setCollisionFlags(int flags)
+	{
+		m_collisionFlags = flags;
+	}
+	
+	///Swept sphere radius (0.0 by default), see btConvexConvexAlgorithm::
+	btScalar			getCcdSweptSphereRadius() const
+	{
+		return m_ccdSweptSphereRadius;
+	}
+
+	///Swept sphere radius (0.0 by default), see btConvexConvexAlgorithm::
+	void	setCcdSweptSphereRadius(btScalar radius)
+	{
+		m_ccdSweptSphereRadius = radius;
+	}
+
+	btScalar 	getCcdMotionThreshold() const
+	{
+		return m_ccdMotionThreshold;
+	}
+
+	btScalar 	getCcdSquareMotionThreshold() const
+	{
+		return m_ccdMotionThreshold*m_ccdMotionThreshold;
+	}
+
+
+
+	/// Don't do continuous collision detection if the motion (in one step) is less then m_ccdMotionThreshold
+	void	setCcdMotionThreshold(btScalar ccdMotionThreshold)
+	{
+		m_ccdMotionThreshold = ccdMotionThreshold;
+	}
+
+	///users can point to their objects, userPointer is not used by Bullet
+	void*	getUserPointer() const
+	{
+		return m_userObjectPointer;
+	}
+	
+	///users can point to their objects, userPointer is not used by Bullet
+	void	setUserPointer(void* userPointer)
+	{
+		m_userObjectPointer = userPointer;
+	}
+
+
+	inline bool checkCollideWith(btCollisionObject* co)
+	{
+		if (m_checkCollideWith)
+			return checkCollideWithOverride(co);
+
+		return true;
+	}
+
+	virtual	int	calculateSerializeBufferSize()	const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, class btSerializer* serializer) const;
+
+	virtual void serializeSingleObject(class btSerializer* serializer) const;
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCollisionObjectDoubleData
+{
+	void					*m_broadphaseHandle;
+	void					*m_collisionShape;
+	btCollisionShapeData	*m_rootCollisionShape;
+	char					*m_name;
+
+	btTransformDoubleData	m_worldTransform;
+	btTransformDoubleData	m_interpolationWorldTransform;
+	btVector3DoubleData		m_interpolationLinearVelocity;
+	btVector3DoubleData		m_interpolationAngularVelocity;
+	btVector3DoubleData		m_anisotropicFriction;
+	double					m_contactProcessingThreshold;	
+	double					m_deactivationTime;
+	double					m_friction;
+	double					m_restitution;
+	double					m_hitFraction; 
+	double					m_ccdSweptSphereRadius;
+	double					m_ccdMotionThreshold;
+
+	int						m_hasAnisotropicFriction;
+	int						m_collisionFlags;
+	int						m_islandTag1;
+	int						m_companionId;
+	int						m_activationState1;
+	int						m_internalType;
+	int						m_checkCollideWith;
+
+	char	m_padding[4];
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCollisionObjectFloatData
+{
+	void					*m_broadphaseHandle;
+	void					*m_collisionShape;
+	btCollisionShapeData	*m_rootCollisionShape;
+	char					*m_name;
+
+	btTransformFloatData	m_worldTransform;
+	btTransformFloatData	m_interpolationWorldTransform;
+	btVector3FloatData		m_interpolationLinearVelocity;
+	btVector3FloatData		m_interpolationAngularVelocity;
+	btVector3FloatData		m_anisotropicFriction;
+	float					m_contactProcessingThreshold;	
+	float					m_deactivationTime;
+	float					m_friction;
+	float					m_restitution;
+	float					m_hitFraction; 
+	float					m_ccdSweptSphereRadius;
+	float					m_ccdMotionThreshold;
+
+	int						m_hasAnisotropicFriction;
+	int						m_collisionFlags;
+	int						m_islandTag1;
+	int						m_companionId;
+	int						m_activationState1;
+	int						m_internalType;
+	int						m_checkCollideWith;
+};
+
+
+
+SIMD_FORCE_INLINE	int	btCollisionObject::calculateSerializeBufferSize() const
+{
+	return sizeof(btCollisionObjectData);
+}
+
+
+
+#endif //BT_COLLISION_OBJECT_H
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btCollisionWorld.cpp b/src/bullet/BulletCollision/CollisionDispatch/btCollisionWorld.cpp
new file mode 100644
index 00000000..66b93b88
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btCollisionWorld.cpp
@@ -0,0 +1,1518 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btCollisionWorld.h"
+#include "btCollisionDispatcher.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h" //for raycasting
+#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h" //for raycasting
+#include "BulletCollision/NarrowPhaseCollision/btRaycastCallback.h"
+#include "BulletCollision/CollisionShapes/btCompoundShape.h"
+#include "BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h"
+#include "BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h"
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseInterface.h"
+#include "BulletCollision/BroadphaseCollision/btDbvt.h"
+#include "LinearMath/btAabbUtil2.h"
+#include "LinearMath/btQuickprof.h"
+#include "LinearMath/btStackAlloc.h"
+#include "LinearMath/btSerializer.h"
+#include "BulletCollision/CollisionShapes/btConvexPolyhedron.h"
+
+//#define DISABLE_DBVT_COMPOUNDSHAPE_RAYCAST_ACCELERATION
+
+
+//#define USE_BRUTEFORCE_RAYBROADPHASE 1
+//RECALCULATE_AABB is slower, but benefit is that you don't need to call 'stepSimulation'  or 'updateAabbs' before using a rayTest
+//#define RECALCULATE_AABB_RAYCAST 1
+
+//When the user doesn't provide dispatcher or broadphase, create basic versions (and delete them in destructor)
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
+#include "BulletCollision/CollisionDispatch/btCollisionConfiguration.h"
+
+
+///for debug drawing
+
+//for debug rendering
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include "BulletCollision/CollisionShapes/btCompoundShape.h"
+#include "BulletCollision/CollisionShapes/btConeShape.h"
+#include "BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btCylinderShape.h"
+#include "BulletCollision/CollisionShapes/btMultiSphereShape.h"
+#include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleCallback.h"
+#include "BulletCollision/CollisionShapes/btTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
+
+
+
+btCollisionWorld::btCollisionWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache, btCollisionConfiguration* collisionConfiguration)
+:m_dispatcher1(dispatcher),
+m_broadphasePairCache(pairCache),
+m_debugDrawer(0),
+m_forceUpdateAllAabbs(true)
+{
+	m_stackAlloc = collisionConfiguration->getStackAllocator();
+	m_dispatchInfo.m_stackAllocator = m_stackAlloc;
+}
+
+
+btCollisionWorld::~btCollisionWorld()
+{
+
+	//clean up remaining objects
+	int i;
+	for (i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* collisionObject= m_collisionObjects[i];
+
+		btBroadphaseProxy* bp = collisionObject->getBroadphaseHandle();
+		if (bp)
+		{
+			//
+			// only clear the cached algorithms
+			//
+			getBroadphase()->getOverlappingPairCache()->cleanProxyFromPairs(bp,m_dispatcher1);
+			getBroadphase()->destroyProxy(bp,m_dispatcher1);
+			collisionObject->setBroadphaseHandle(0);
+		}
+	}
+
+
+}
+
+
+
+
+
+
+
+
+
+
+void	btCollisionWorld::addCollisionObject(btCollisionObject* collisionObject,short int collisionFilterGroup,short int collisionFilterMask)
+{
+
+	btAssert(collisionObject);
+
+	//check that the object isn't already added
+	btAssert( m_collisionObjects.findLinearSearch(collisionObject)  == m_collisionObjects.size());
+
+	m_collisionObjects.push_back(collisionObject);
+
+	//calculate new AABB
+	btTransform trans = collisionObject->getWorldTransform();
+
+	btVector3	minAabb;
+	btVector3	maxAabb;
+	collisionObject->getCollisionShape()->getAabb(trans,minAabb,maxAabb);
+
+	int type = collisionObject->getCollisionShape()->getShapeType();
+	collisionObject->setBroadphaseHandle( getBroadphase()->createProxy(
+		minAabb,
+		maxAabb,
+		type,
+		collisionObject,
+		collisionFilterGroup,
+		collisionFilterMask,
+		m_dispatcher1,0
+		))	;
+
+
+
+
+
+}
+
+
+
+void	btCollisionWorld::updateSingleAabb(btCollisionObject* colObj)
+{
+	btVector3 minAabb,maxAabb;
+	colObj->getCollisionShape()->getAabb(colObj->getWorldTransform(), minAabb,maxAabb);
+	//need to increase the aabb for contact thresholds
+	btVector3 contactThreshold(gContactBreakingThreshold,gContactBreakingThreshold,gContactBreakingThreshold);
+	minAabb -= contactThreshold;
+	maxAabb += contactThreshold;
+
+	if(getDispatchInfo().m_useContinuous && colObj->getInternalType()==btCollisionObject::CO_RIGID_BODY && !colObj->isStaticOrKinematicObject())
+	{
+		btVector3 minAabb2,maxAabb2;
+		colObj->getCollisionShape()->getAabb(colObj->getInterpolationWorldTransform(),minAabb2,maxAabb2);
+		minAabb2 -= contactThreshold;
+		maxAabb2 += contactThreshold;
+		minAabb.setMin(minAabb2);
+		maxAabb.setMax(maxAabb2);
+	}
+
+	btBroadphaseInterface* bp = (btBroadphaseInterface*)m_broadphasePairCache;
+
+	//moving objects should be moderately sized, probably something wrong if not
+	if ( colObj->isStaticObject() || ((maxAabb-minAabb).length2() < btScalar(1e12)))
+	{
+		bp->setAabb(colObj->getBroadphaseHandle(),minAabb,maxAabb, m_dispatcher1);
+	} else
+	{
+		//something went wrong, investigate
+		//this assert is unwanted in 3D modelers (danger of loosing work)
+		colObj->setActivationState(DISABLE_SIMULATION);
+
+		static bool reportMe = true;
+		if (reportMe && m_debugDrawer)
+		{
+			reportMe = false;
+			m_debugDrawer->reportErrorWarning("Overflow in AABB, object removed from simulation");
+			m_debugDrawer->reportErrorWarning("If you can reproduce this, please email bugs@continuousphysics.com\n");
+			m_debugDrawer->reportErrorWarning("Please include above information, your Platform, version of OS.\n");
+			m_debugDrawer->reportErrorWarning("Thanks.\n");
+		}
+	}
+}
+
+void	btCollisionWorld::updateAabbs()
+{
+	BT_PROFILE("updateAabbs");
+
+	btTransform predictedTrans;
+	for ( int i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+
+		//only update aabb of active objects
+		if (m_forceUpdateAllAabbs || colObj->isActive())
+		{
+			updateSingleAabb(colObj);
+		}
+	}
+}
+
+
+
+void	btCollisionWorld::performDiscreteCollisionDetection()
+{
+	BT_PROFILE("performDiscreteCollisionDetection");
+
+	btDispatcherInfo& dispatchInfo = getDispatchInfo();
+
+	updateAabbs();
+
+	{
+		BT_PROFILE("calculateOverlappingPairs");
+		m_broadphasePairCache->calculateOverlappingPairs(m_dispatcher1);
+	}
+
+
+	btDispatcher* dispatcher = getDispatcher();
+	{
+		BT_PROFILE("dispatchAllCollisionPairs");
+		if (dispatcher)
+			dispatcher->dispatchAllCollisionPairs(m_broadphasePairCache->getOverlappingPairCache(),dispatchInfo,m_dispatcher1);
+	}
+
+}
+
+
+
+void	btCollisionWorld::removeCollisionObject(btCollisionObject* collisionObject)
+{
+
+
+	//bool removeFromBroadphase = false;
+
+	{
+
+		btBroadphaseProxy* bp = collisionObject->getBroadphaseHandle();
+		if (bp)
+		{
+			//
+			// only clear the cached algorithms
+			//
+			getBroadphase()->getOverlappingPairCache()->cleanProxyFromPairs(bp,m_dispatcher1);
+			getBroadphase()->destroyProxy(bp,m_dispatcher1);
+			collisionObject->setBroadphaseHandle(0);
+		}
+	}
+
+
+	//swapremove
+	m_collisionObjects.remove(collisionObject);
+
+}
+
+
+
+void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTransform& rayToTrans,
+										btCollisionObject* collisionObject,
+										const btCollisionShape* collisionShape,
+										const btTransform& colObjWorldTransform,
+										RayResultCallback& resultCallback)
+{
+	btSphereShape pointShape(btScalar(0.0));
+	pointShape.setMargin(0.f);
+	const btConvexShape* castShape = &pointShape;
+
+	if (collisionShape->isConvex())
+	{
+		//		BT_PROFILE("rayTestConvex");
+		btConvexCast::CastResult castResult;
+		castResult.m_fraction = resultCallback.m_closestHitFraction;
+
+		btConvexShape* convexShape = (btConvexShape*) collisionShape;
+		btVoronoiSimplexSolver	simplexSolver;
+#define USE_SUBSIMPLEX_CONVEX_CAST 1
+#ifdef USE_SUBSIMPLEX_CONVEX_CAST
+		btSubsimplexConvexCast convexCaster(castShape,convexShape,&simplexSolver);
+#else
+		//btGjkConvexCast	convexCaster(castShape,convexShape,&simplexSolver);
+		//btContinuousConvexCollision convexCaster(castShape,convexShape,&simplexSolver,0);
+#endif //#USE_SUBSIMPLEX_CONVEX_CAST
+
+		if (convexCaster.calcTimeOfImpact(rayFromTrans,rayToTrans,colObjWorldTransform,colObjWorldTransform,castResult))
+		{
+			//add hit
+			if (castResult.m_normal.length2() > btScalar(0.0001))
+			{
+				if (castResult.m_fraction < resultCallback.m_closestHitFraction)
+				{
+#ifdef USE_SUBSIMPLEX_CONVEX_CAST
+					//rotate normal into worldspace
+					castResult.m_normal = rayFromTrans.getBasis() * castResult.m_normal;
+#endif //USE_SUBSIMPLEX_CONVEX_CAST
+
+					castResult.m_normal.normalize();
+					btCollisionWorld::LocalRayResult localRayResult
+						(
+						collisionObject,
+						0,
+						castResult.m_normal,
+						castResult.m_fraction
+						);
+
+					bool normalInWorldSpace = true;
+					resultCallback.addSingleResult(localRayResult, normalInWorldSpace);
+
+				}
+			}
+		}
+	} else {
+		if (collisionShape->isConcave())
+		{
+			//			BT_PROFILE("rayTestConcave");
+			if (collisionShape->getShapeType()==TRIANGLE_MESH_SHAPE_PROXYTYPE)
+			{
+				///optimized version for btBvhTriangleMeshShape
+				btBvhTriangleMeshShape* triangleMesh = (btBvhTriangleMeshShape*)collisionShape;
+				btTransform worldTocollisionObject = colObjWorldTransform.inverse();
+				btVector3 rayFromLocal = worldTocollisionObject * rayFromTrans.getOrigin();
+				btVector3 rayToLocal = worldTocollisionObject * rayToTrans.getOrigin();
+
+				//ConvexCast::CastResult
+				struct BridgeTriangleRaycastCallback : public btTriangleRaycastCallback
+				{
+					btCollisionWorld::RayResultCallback* m_resultCallback;
+					btCollisionObject*	m_collisionObject;
+					btTriangleMeshShape*	m_triangleMesh;
+
+					btTransform m_colObjWorldTransform;
+
+					BridgeTriangleRaycastCallback( const btVector3& from,const btVector3& to,
+						btCollisionWorld::RayResultCallback* resultCallback, btCollisionObject* collisionObject,btTriangleMeshShape*	triangleMesh,const btTransform& colObjWorldTransform):
+					//@BP Mod
+					btTriangleRaycastCallback(from,to, resultCallback->m_flags),
+						m_resultCallback(resultCallback),
+						m_collisionObject(collisionObject),
+						m_triangleMesh(triangleMesh),
+						m_colObjWorldTransform(colObjWorldTransform)
+					{
+					}
+
+
+					virtual btScalar reportHit(const btVector3& hitNormalLocal, btScalar hitFraction, int partId, int triangleIndex )
+					{
+						btCollisionWorld::LocalShapeInfo	shapeInfo;
+						shapeInfo.m_shapePart = partId;
+						shapeInfo.m_triangleIndex = triangleIndex;
+
+						btVector3 hitNormalWorld = m_colObjWorldTransform.getBasis() * hitNormalLocal;
+
+						btCollisionWorld::LocalRayResult rayResult
+							(m_collisionObject,
+							&shapeInfo,
+							hitNormalWorld,
+							hitFraction);
+
+						bool	normalInWorldSpace = true;
+						return m_resultCallback->addSingleResult(rayResult,normalInWorldSpace);
+					}
+
+				};
+
+				BridgeTriangleRaycastCallback rcb(rayFromLocal,rayToLocal,&resultCallback,collisionObject,triangleMesh,colObjWorldTransform);
+				rcb.m_hitFraction = resultCallback.m_closestHitFraction;
+				triangleMesh->performRaycast(&rcb,rayFromLocal,rayToLocal);
+			} else
+			{
+				//generic (slower) case
+				btConcaveShape* concaveShape = (btConcaveShape*)collisionShape;
+
+				btTransform worldTocollisionObject = colObjWorldTransform.inverse();
+
+				btVector3 rayFromLocal = worldTocollisionObject * rayFromTrans.getOrigin();
+				btVector3 rayToLocal = worldTocollisionObject * rayToTrans.getOrigin();
+
+				//ConvexCast::CastResult
+
+				struct BridgeTriangleRaycastCallback : public btTriangleRaycastCallback
+				{
+					btCollisionWorld::RayResultCallback* m_resultCallback;
+					btCollisionObject*	m_collisionObject;
+					btConcaveShape*	m_triangleMesh;
+
+					btTransform m_colObjWorldTransform;
+
+					BridgeTriangleRaycastCallback( const btVector3& from,const btVector3& to,
+						btCollisionWorld::RayResultCallback* resultCallback, btCollisionObject* collisionObject,btConcaveShape*	triangleMesh, const btTransform& colObjWorldTransform):
+					//@BP Mod
+					btTriangleRaycastCallback(from,to, resultCallback->m_flags),
+						m_resultCallback(resultCallback),
+						m_collisionObject(collisionObject),
+						m_triangleMesh(triangleMesh),
+						m_colObjWorldTransform(colObjWorldTransform)
+					{
+					}
+
+
+					virtual btScalar reportHit(const btVector3& hitNormalLocal, btScalar hitFraction, int partId, int triangleIndex )
+					{
+						btCollisionWorld::LocalShapeInfo	shapeInfo;
+						shapeInfo.m_shapePart = partId;
+						shapeInfo.m_triangleIndex = triangleIndex;
+
+						btVector3 hitNormalWorld = m_colObjWorldTransform.getBasis() * hitNormalLocal;
+
+						btCollisionWorld::LocalRayResult rayResult
+							(m_collisionObject,
+							&shapeInfo,
+							hitNormalWorld,
+							hitFraction);
+
+						bool	normalInWorldSpace = true;
+						return m_resultCallback->addSingleResult(rayResult,normalInWorldSpace);
+					}
+
+				};
+
+
+				BridgeTriangleRaycastCallback	rcb(rayFromLocal,rayToLocal,&resultCallback,collisionObject,concaveShape, colObjWorldTransform);
+				rcb.m_hitFraction = resultCallback.m_closestHitFraction;
+
+				btVector3 rayAabbMinLocal = rayFromLocal;
+				rayAabbMinLocal.setMin(rayToLocal);
+				btVector3 rayAabbMaxLocal = rayFromLocal;
+				rayAabbMaxLocal.setMax(rayToLocal);
+
+				concaveShape->processAllTriangles(&rcb,rayAabbMinLocal,rayAabbMaxLocal);
+			}
+		} else {
+			//			BT_PROFILE("rayTestCompound");
+			if (collisionShape->isCompound())
+			{
+				struct LocalInfoAdder2 : public RayResultCallback
+				{
+					RayResultCallback* m_userCallback;
+					int m_i;
+					
+					LocalInfoAdder2 (int i, RayResultCallback *user)
+						: m_userCallback(user), m_i(i)
+					{ 
+						m_closestHitFraction = m_userCallback->m_closestHitFraction;
+					}
+					virtual bool needsCollision(btBroadphaseProxy* p) const
+					{
+						return m_userCallback->needsCollision(p);
+					}
+
+					virtual btScalar addSingleResult (btCollisionWorld::LocalRayResult &r, bool b)
+					{
+						btCollisionWorld::LocalShapeInfo shapeInfo;
+						shapeInfo.m_shapePart = -1;
+						shapeInfo.m_triangleIndex = m_i;
+						if (r.m_localShapeInfo == NULL)
+							r.m_localShapeInfo = &shapeInfo;
+
+						const btScalar result = m_userCallback->addSingleResult(r, b);
+						m_closestHitFraction = m_userCallback->m_closestHitFraction;
+						return result;
+					}
+				};
+				
+				struct RayTester : btDbvt::ICollide
+				{
+					btCollisionObject* m_collisionObject;
+					const btCompoundShape* m_compoundShape;
+					const btTransform& m_colObjWorldTransform;
+					const btTransform& m_rayFromTrans;
+					const btTransform& m_rayToTrans;
+					RayResultCallback& m_resultCallback;
+					
+					RayTester(btCollisionObject* collisionObject,
+							const btCompoundShape* compoundShape,
+							const btTransform& colObjWorldTransform,
+							const btTransform& rayFromTrans,
+							const btTransform& rayToTrans,
+							RayResultCallback& resultCallback):
+						m_collisionObject(collisionObject),
+						m_compoundShape(compoundShape),
+						m_colObjWorldTransform(colObjWorldTransform),
+						m_rayFromTrans(rayFromTrans),
+						m_rayToTrans(rayToTrans),
+						m_resultCallback(resultCallback)
+					{
+						
+					}
+					
+					void Process(int i)
+					{
+						const btCollisionShape* childCollisionShape = m_compoundShape->getChildShape(i);
+						const btTransform& childTrans = m_compoundShape->getChildTransform(i);
+						btTransform childWorldTrans = m_colObjWorldTransform * childTrans;
+						
+						// replace collision shape so that callback can determine the triangle
+						btCollisionShape* saveCollisionShape = m_collisionObject->getCollisionShape();
+						m_collisionObject->internalSetTemporaryCollisionShape((btCollisionShape*)childCollisionShape);
+
+						LocalInfoAdder2 my_cb(i, &m_resultCallback);
+
+						rayTestSingle(
+							m_rayFromTrans,
+							m_rayToTrans,
+							m_collisionObject,
+							childCollisionShape,
+							childWorldTrans,
+							my_cb);
+						
+						// restore
+						m_collisionObject->internalSetTemporaryCollisionShape(saveCollisionShape);
+					}
+					
+					void Process(const btDbvtNode* leaf)
+					{
+						Process(leaf->dataAsInt);
+					}
+				};
+				
+				const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(collisionShape);
+				const btDbvt* dbvt = compoundShape->getDynamicAabbTree();
+
+
+				RayTester rayCB(
+					collisionObject,
+					compoundShape,
+					colObjWorldTransform,
+					rayFromTrans,
+					rayToTrans,
+					resultCallback);
+#ifndef	DISABLE_DBVT_COMPOUNDSHAPE_RAYCAST_ACCELERATION
+				if (dbvt)
+				{
+					btVector3 localRayFrom = colObjWorldTransform.inverseTimes(rayFromTrans).getOrigin();
+					btVector3 localRayTo = colObjWorldTransform.inverseTimes(rayToTrans).getOrigin();
+					btDbvt::rayTest(dbvt->m_root, localRayFrom , localRayTo, rayCB);
+				}
+				else
+#endif //DISABLE_DBVT_COMPOUNDSHAPE_RAYCAST_ACCELERATION
+				{
+					for (int i = 0, n = compoundShape->getNumChildShapes(); i < n; ++i)
+					{
+						rayCB.Process(i);
+					}	
+				}
+			}
+		}
+	}
+}
+
+void	btCollisionWorld::objectQuerySingle(const btConvexShape* castShape,const btTransform& convexFromTrans,const btTransform& convexToTrans,
+											btCollisionObject* collisionObject,
+											const btCollisionShape* collisionShape,
+											const btTransform& colObjWorldTransform,
+											ConvexResultCallback& resultCallback, btScalar allowedPenetration)
+{
+	if (collisionShape->isConvex())
+	{
+		//BT_PROFILE("convexSweepConvex");
+		btConvexCast::CastResult castResult;
+		castResult.m_allowedPenetration = allowedPenetration;
+		castResult.m_fraction = resultCallback.m_closestHitFraction;//btScalar(1.);//??
+
+		btConvexShape* convexShape = (btConvexShape*) collisionShape;
+		btVoronoiSimplexSolver	simplexSolver;
+		btGjkEpaPenetrationDepthSolver	gjkEpaPenetrationSolver;
+
+		btContinuousConvexCollision convexCaster1(castShape,convexShape,&simplexSolver,&gjkEpaPenetrationSolver);
+		//btGjkConvexCast convexCaster2(castShape,convexShape,&simplexSolver);
+		//btSubsimplexConvexCast convexCaster3(castShape,convexShape,&simplexSolver);
+
+		btConvexCast* castPtr = &convexCaster1;
+
+
+
+		if (castPtr->calcTimeOfImpact(convexFromTrans,convexToTrans,colObjWorldTransform,colObjWorldTransform,castResult))
+		{
+			//add hit
+			if (castResult.m_normal.length2() > btScalar(0.0001))
+			{
+				if (castResult.m_fraction < resultCallback.m_closestHitFraction)
+				{
+					castResult.m_normal.normalize();
+					btCollisionWorld::LocalConvexResult localConvexResult
+						(
+						collisionObject,
+						0,
+						castResult.m_normal,
+						castResult.m_hitPoint,
+						castResult.m_fraction
+						);
+
+					bool normalInWorldSpace = true;
+					resultCallback.addSingleResult(localConvexResult, normalInWorldSpace);
+
+				}
+			}
+		}
+	} else {
+		if (collisionShape->isConcave())
+		{
+			if (collisionShape->getShapeType()==TRIANGLE_MESH_SHAPE_PROXYTYPE)
+			{
+				//BT_PROFILE("convexSweepbtBvhTriangleMesh");
+				btBvhTriangleMeshShape* triangleMesh = (btBvhTriangleMeshShape*)collisionShape;
+				btTransform worldTocollisionObject = colObjWorldTransform.inverse();
+				btVector3 convexFromLocal = worldTocollisionObject * convexFromTrans.getOrigin();
+				btVector3 convexToLocal = worldTocollisionObject * convexToTrans.getOrigin();
+				// rotation of box in local mesh space = MeshRotation^-1 * ConvexToRotation
+				btTransform rotationXform = btTransform(worldTocollisionObject.getBasis() * convexToTrans.getBasis());
+
+				//ConvexCast::CastResult
+				struct BridgeTriangleConvexcastCallback : public btTriangleConvexcastCallback
+				{
+					btCollisionWorld::ConvexResultCallback* m_resultCallback;
+					btCollisionObject*	m_collisionObject;
+					btTriangleMeshShape*	m_triangleMesh;
+
+					BridgeTriangleConvexcastCallback(const btConvexShape* castShape, const btTransform& from,const btTransform& to,
+						btCollisionWorld::ConvexResultCallback* resultCallback, btCollisionObject* collisionObject,btTriangleMeshShape*	triangleMesh, const btTransform& triangleToWorld):
+					btTriangleConvexcastCallback(castShape, from,to, triangleToWorld, triangleMesh->getMargin()),
+						m_resultCallback(resultCallback),
+						m_collisionObject(collisionObject),
+						m_triangleMesh(triangleMesh)
+					{
+					}
+
+
+					virtual btScalar reportHit(const btVector3& hitNormalLocal, const btVector3& hitPointLocal, btScalar hitFraction, int partId, int triangleIndex )
+					{
+						btCollisionWorld::LocalShapeInfo	shapeInfo;
+						shapeInfo.m_shapePart = partId;
+						shapeInfo.m_triangleIndex = triangleIndex;
+						if (hitFraction <= m_resultCallback->m_closestHitFraction)
+						{
+
+							btCollisionWorld::LocalConvexResult convexResult
+								(m_collisionObject,
+								&shapeInfo,
+								hitNormalLocal,
+								hitPointLocal,
+								hitFraction);
+
+							bool	normalInWorldSpace = true;
+
+
+							return m_resultCallback->addSingleResult(convexResult,normalInWorldSpace);
+						}
+						return hitFraction;
+					}
+
+				};
+
+				BridgeTriangleConvexcastCallback tccb(castShape, convexFromTrans,convexToTrans,&resultCallback,collisionObject,triangleMesh, colObjWorldTransform);
+				tccb.m_hitFraction = resultCallback.m_closestHitFraction;
+				tccb.m_allowedPenetration = allowedPenetration;
+				btVector3 boxMinLocal, boxMaxLocal;
+				castShape->getAabb(rotationXform, boxMinLocal, boxMaxLocal);
+				triangleMesh->performConvexcast(&tccb,convexFromLocal,convexToLocal,boxMinLocal, boxMaxLocal);
+			} else
+			{
+				if (collisionShape->getShapeType()==STATIC_PLANE_PROXYTYPE)
+				{
+					btConvexCast::CastResult castResult;
+					castResult.m_allowedPenetration = allowedPenetration;
+					castResult.m_fraction = resultCallback.m_closestHitFraction;
+					btStaticPlaneShape* planeShape = (btStaticPlaneShape*) collisionShape;
+					btContinuousConvexCollision convexCaster1(castShape,planeShape);
+					btConvexCast* castPtr = &convexCaster1;
+
+					if (castPtr->calcTimeOfImpact(convexFromTrans,convexToTrans,colObjWorldTransform,colObjWorldTransform,castResult))
+					{
+						//add hit
+						if (castResult.m_normal.length2() > btScalar(0.0001))
+						{
+							if (castResult.m_fraction < resultCallback.m_closestHitFraction)
+							{
+								castResult.m_normal.normalize();
+								btCollisionWorld::LocalConvexResult localConvexResult
+									(
+									collisionObject,
+									0,
+									castResult.m_normal,
+									castResult.m_hitPoint,
+									castResult.m_fraction
+									);
+
+								bool normalInWorldSpace = true;
+								resultCallback.addSingleResult(localConvexResult, normalInWorldSpace);
+							}
+						}
+					}
+
+				} else
+				{
+					//BT_PROFILE("convexSweepConcave");
+					btConcaveShape* concaveShape = (btConcaveShape*)collisionShape;
+					btTransform worldTocollisionObject = colObjWorldTransform.inverse();
+					btVector3 convexFromLocal = worldTocollisionObject * convexFromTrans.getOrigin();
+					btVector3 convexToLocal = worldTocollisionObject * convexToTrans.getOrigin();
+					// rotation of box in local mesh space = MeshRotation^-1 * ConvexToRotation
+					btTransform rotationXform = btTransform(worldTocollisionObject.getBasis() * convexToTrans.getBasis());
+
+					//ConvexCast::CastResult
+					struct BridgeTriangleConvexcastCallback : public btTriangleConvexcastCallback
+					{
+						btCollisionWorld::ConvexResultCallback* m_resultCallback;
+						btCollisionObject*	m_collisionObject;
+						btConcaveShape*	m_triangleMesh;
+
+						BridgeTriangleConvexcastCallback(const btConvexShape* castShape, const btTransform& from,const btTransform& to,
+							btCollisionWorld::ConvexResultCallback* resultCallback, btCollisionObject* collisionObject,btConcaveShape*	triangleMesh, const btTransform& triangleToWorld):
+						btTriangleConvexcastCallback(castShape, from,to, triangleToWorld, triangleMesh->getMargin()),
+							m_resultCallback(resultCallback),
+							m_collisionObject(collisionObject),
+							m_triangleMesh(triangleMesh)
+						{
+						}
+
+
+						virtual btScalar reportHit(const btVector3& hitNormalLocal, const btVector3& hitPointLocal, btScalar hitFraction, int partId, int triangleIndex )
+						{
+							btCollisionWorld::LocalShapeInfo	shapeInfo;
+							shapeInfo.m_shapePart = partId;
+							shapeInfo.m_triangleIndex = triangleIndex;
+							if (hitFraction <= m_resultCallback->m_closestHitFraction)
+							{
+
+								btCollisionWorld::LocalConvexResult convexResult
+									(m_collisionObject,
+									&shapeInfo,
+									hitNormalLocal,
+									hitPointLocal,
+									hitFraction);
+
+								bool	normalInWorldSpace = false;
+
+								return m_resultCallback->addSingleResult(convexResult,normalInWorldSpace);
+							}
+							return hitFraction;
+						}
+
+					};
+
+					BridgeTriangleConvexcastCallback tccb(castShape, convexFromTrans,convexToTrans,&resultCallback,collisionObject,concaveShape, colObjWorldTransform);
+					tccb.m_hitFraction = resultCallback.m_closestHitFraction;
+					tccb.m_allowedPenetration = allowedPenetration;
+					btVector3 boxMinLocal, boxMaxLocal;
+					castShape->getAabb(rotationXform, boxMinLocal, boxMaxLocal);
+
+					btVector3 rayAabbMinLocal = convexFromLocal;
+					rayAabbMinLocal.setMin(convexToLocal);
+					btVector3 rayAabbMaxLocal = convexFromLocal;
+					rayAabbMaxLocal.setMax(convexToLocal);
+					rayAabbMinLocal += boxMinLocal;
+					rayAabbMaxLocal += boxMaxLocal;
+					concaveShape->processAllTriangles(&tccb,rayAabbMinLocal,rayAabbMaxLocal);
+				}
+			}
+		} else {
+			///@todo : use AABB tree or other BVH acceleration structure!
+			if (collisionShape->isCompound())
+			{
+				BT_PROFILE("convexSweepCompound");
+				const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(collisionShape);
+				int i=0;
+				for (i=0;i<compoundShape->getNumChildShapes();i++)
+				{
+					btTransform childTrans = compoundShape->getChildTransform(i);
+					const btCollisionShape* childCollisionShape = compoundShape->getChildShape(i);
+					btTransform childWorldTrans = colObjWorldTransform * childTrans;
+					// replace collision shape so that callback can determine the triangle
+					btCollisionShape* saveCollisionShape = collisionObject->getCollisionShape();
+					collisionObject->internalSetTemporaryCollisionShape((btCollisionShape*)childCollisionShape);
+                    struct	LocalInfoAdder : public ConvexResultCallback {
+                            ConvexResultCallback* m_userCallback;
+							int m_i;
+
+                            LocalInfoAdder (int i, ConvexResultCallback *user)
+								: m_userCallback(user), m_i(i)
+							{
+								m_closestHitFraction = m_userCallback->m_closestHitFraction;
+							}
+							virtual bool needsCollision(btBroadphaseProxy* p) const
+							{
+								return m_userCallback->needsCollision(p);
+							}
+                            virtual btScalar addSingleResult (btCollisionWorld::LocalConvexResult&	r,	bool b)
+                            {
+                                    btCollisionWorld::LocalShapeInfo	shapeInfo;
+                                    shapeInfo.m_shapePart = -1;
+                                    shapeInfo.m_triangleIndex = m_i;
+                                    if (r.m_localShapeInfo == NULL)
+                                        r.m_localShapeInfo = &shapeInfo;
+									const btScalar result = m_userCallback->addSingleResult(r, b);
+									m_closestHitFraction = m_userCallback->m_closestHitFraction;
+									return result;
+                                    
+                            }
+                    };
+
+                    LocalInfoAdder my_cb(i, &resultCallback);
+					
+
+					objectQuerySingle(castShape, convexFromTrans,convexToTrans,
+						collisionObject,
+						childCollisionShape,
+						childWorldTrans,
+						my_cb, allowedPenetration);
+					// restore
+					collisionObject->internalSetTemporaryCollisionShape(saveCollisionShape);
+				}
+			}
+		}
+	}
+}
+
+
+struct btSingleRayCallback : public btBroadphaseRayCallback
+{
+
+	btVector3	m_rayFromWorld;
+	btVector3	m_rayToWorld;
+	btTransform	m_rayFromTrans;
+	btTransform	m_rayToTrans;
+	btVector3	m_hitNormal;
+
+	const btCollisionWorld*	m_world;
+	btCollisionWorld::RayResultCallback&	m_resultCallback;
+
+	btSingleRayCallback(const btVector3& rayFromWorld,const btVector3& rayToWorld,const btCollisionWorld* world,btCollisionWorld::RayResultCallback& resultCallback)
+		:m_rayFromWorld(rayFromWorld),
+		m_rayToWorld(rayToWorld),
+		m_world(world),
+		m_resultCallback(resultCallback)
+	{
+		m_rayFromTrans.setIdentity();
+		m_rayFromTrans.setOrigin(m_rayFromWorld);
+		m_rayToTrans.setIdentity();
+		m_rayToTrans.setOrigin(m_rayToWorld);
+
+		btVector3 rayDir = (rayToWorld-rayFromWorld);
+
+		rayDir.normalize ();
+		///what about division by zero? --> just set rayDirection[i] to INF/BT_LARGE_FLOAT
+		m_rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[0];
+		m_rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[1];
+		m_rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[2];
+		m_signs[0] = m_rayDirectionInverse[0] < 0.0;
+		m_signs[1] = m_rayDirectionInverse[1] < 0.0;
+		m_signs[2] = m_rayDirectionInverse[2] < 0.0;
+
+		m_lambda_max = rayDir.dot(m_rayToWorld-m_rayFromWorld);
+
+	}
+
+
+
+	virtual bool	process(const btBroadphaseProxy* proxy)
+	{
+		///terminate further ray tests, once the closestHitFraction reached zero
+		if (m_resultCallback.m_closestHitFraction == btScalar(0.f))
+			return false;
+
+		btCollisionObject*	collisionObject = (btCollisionObject*)proxy->m_clientObject;
+
+		//only perform raycast if filterMask matches
+		if(m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) 
+		{
+			//RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject();
+			//btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
+#if 0
+#ifdef RECALCULATE_AABB
+			btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
+			collisionObject->getCollisionShape()->getAabb(collisionObject->getWorldTransform(),collisionObjectAabbMin,collisionObjectAabbMax);
+#else
+			//getBroadphase()->getAabb(collisionObject->getBroadphaseHandle(),collisionObjectAabbMin,collisionObjectAabbMax);
+			const btVector3& collisionObjectAabbMin = collisionObject->getBroadphaseHandle()->m_aabbMin;
+			const btVector3& collisionObjectAabbMax = collisionObject->getBroadphaseHandle()->m_aabbMax;
+#endif
+#endif
+			//btScalar hitLambda = m_resultCallback.m_closestHitFraction;
+			//culling already done by broadphase
+			//if (btRayAabb(m_rayFromWorld,m_rayToWorld,collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,m_hitNormal))
+			{
+				m_world->rayTestSingle(m_rayFromTrans,m_rayToTrans,
+					collisionObject,
+					collisionObject->getCollisionShape(),
+					collisionObject->getWorldTransform(),
+					m_resultCallback);
+			}
+		}
+		return true;
+	}
+};
+
+void	btCollisionWorld::rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const
+{
+	//BT_PROFILE("rayTest");
+	/// use the broadphase to accelerate the search for objects, based on their aabb
+	/// and for each object with ray-aabb overlap, perform an exact ray test
+	btSingleRayCallback rayCB(rayFromWorld,rayToWorld,this,resultCallback);
+
+#ifndef USE_BRUTEFORCE_RAYBROADPHASE
+	m_broadphasePairCache->rayTest(rayFromWorld,rayToWorld,rayCB);
+#else
+	for (int i=0;i<this->getNumCollisionObjects();i++)
+	{
+		rayCB.process(m_collisionObjects[i]->getBroadphaseHandle());
+	}	
+#endif //USE_BRUTEFORCE_RAYBROADPHASE
+
+}
+
+
+struct btSingleSweepCallback : public btBroadphaseRayCallback
+{
+
+	btTransform	m_convexFromTrans;
+	btTransform	m_convexToTrans;
+	btVector3	m_hitNormal;
+	const btCollisionWorld*	m_world;
+	btCollisionWorld::ConvexResultCallback&	m_resultCallback;
+	btScalar	m_allowedCcdPenetration;
+	const btConvexShape* m_castShape;
+
+
+	btSingleSweepCallback(const btConvexShape* castShape, const btTransform& convexFromTrans,const btTransform& convexToTrans,const btCollisionWorld* world,btCollisionWorld::ConvexResultCallback& resultCallback,btScalar allowedPenetration)
+		:m_convexFromTrans(convexFromTrans),
+		m_convexToTrans(convexToTrans),
+		m_world(world),
+		m_resultCallback(resultCallback),
+		m_allowedCcdPenetration(allowedPenetration),
+		m_castShape(castShape)
+	{
+		btVector3 unnormalizedRayDir = (m_convexToTrans.getOrigin()-m_convexFromTrans.getOrigin());
+		btVector3 rayDir = unnormalizedRayDir.normalized();
+		///what about division by zero? --> just set rayDirection[i] to INF/BT_LARGE_FLOAT
+		m_rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[0];
+		m_rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[1];
+		m_rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDir[2];
+		m_signs[0] = m_rayDirectionInverse[0] < 0.0;
+		m_signs[1] = m_rayDirectionInverse[1] < 0.0;
+		m_signs[2] = m_rayDirectionInverse[2] < 0.0;
+
+		m_lambda_max = rayDir.dot(unnormalizedRayDir);
+
+	}
+
+	virtual bool	process(const btBroadphaseProxy* proxy)
+	{
+		///terminate further convex sweep tests, once the closestHitFraction reached zero
+		if (m_resultCallback.m_closestHitFraction == btScalar(0.f))
+			return false;
+
+		btCollisionObject*	collisionObject = (btCollisionObject*)proxy->m_clientObject;
+
+		//only perform raycast if filterMask matches
+		if(m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) {
+			//RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject();
+			m_world->objectQuerySingle(m_castShape, m_convexFromTrans,m_convexToTrans,
+				collisionObject,
+				collisionObject->getCollisionShape(),
+				collisionObject->getWorldTransform(),
+				m_resultCallback,
+				m_allowedCcdPenetration);
+		}
+
+		return true;
+	}
+};
+
+
+
+void	btCollisionWorld::convexSweepTest(const btConvexShape* castShape, const btTransform& convexFromWorld, const btTransform& convexToWorld, ConvexResultCallback& resultCallback, btScalar allowedCcdPenetration) const
+{
+
+	BT_PROFILE("convexSweepTest");
+	/// use the broadphase to accelerate the search for objects, based on their aabb
+	/// and for each object with ray-aabb overlap, perform an exact ray test
+	/// unfortunately the implementation for rayTest and convexSweepTest duplicated, albeit practically identical
+
+
+
+	btTransform	convexFromTrans,convexToTrans;
+	convexFromTrans = convexFromWorld;
+	convexToTrans = convexToWorld;
+	btVector3 castShapeAabbMin, castShapeAabbMax;
+	/* Compute AABB that encompasses angular movement */
+	{
+		btVector3 linVel, angVel;
+		btTransformUtil::calculateVelocity (convexFromTrans, convexToTrans, 1.0, linVel, angVel);
+		btVector3 zeroLinVel;
+		zeroLinVel.setValue(0,0,0);
+		btTransform R;
+		R.setIdentity ();
+		R.setRotation (convexFromTrans.getRotation());
+		castShape->calculateTemporalAabb (R, zeroLinVel, angVel, 1.0, castShapeAabbMin, castShapeAabbMax);
+	}
+
+#ifndef USE_BRUTEFORCE_RAYBROADPHASE
+
+	btSingleSweepCallback	convexCB(castShape,convexFromWorld,convexToWorld,this,resultCallback,allowedCcdPenetration);
+
+	m_broadphasePairCache->rayTest(convexFromTrans.getOrigin(),convexToTrans.getOrigin(),convexCB,castShapeAabbMin,castShapeAabbMax);
+
+#else
+	/// go over all objects, and if the ray intersects their aabb + cast shape aabb,
+	// do a ray-shape query using convexCaster (CCD)
+	int i;
+	for (i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject*	collisionObject= m_collisionObjects[i];
+		//only perform raycast if filterMask matches
+		if(resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) {
+			//RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject();
+			btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
+			collisionObject->getCollisionShape()->getAabb(collisionObject->getWorldTransform(),collisionObjectAabbMin,collisionObjectAabbMax);
+			AabbExpand (collisionObjectAabbMin, collisionObjectAabbMax, castShapeAabbMin, castShapeAabbMax);
+			btScalar hitLambda = btScalar(1.); //could use resultCallback.m_closestHitFraction, but needs testing
+			btVector3 hitNormal;
+			if (btRayAabb(convexFromWorld.getOrigin(),convexToWorld.getOrigin(),collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,hitNormal))
+			{
+				objectQuerySingle(castShape, convexFromTrans,convexToTrans,
+					collisionObject,
+					collisionObject->getCollisionShape(),
+					collisionObject->getWorldTransform(),
+					resultCallback,
+					allowedCcdPenetration);
+			}
+		}
+	}
+#endif //USE_BRUTEFORCE_RAYBROADPHASE
+}
+
+
+
+struct btBridgedManifoldResult : public btManifoldResult
+{
+
+	btCollisionWorld::ContactResultCallback&	m_resultCallback;
+
+	btBridgedManifoldResult( btCollisionObject* obj0,btCollisionObject* obj1,btCollisionWorld::ContactResultCallback& resultCallback )
+		:btManifoldResult(obj0,obj1),
+		m_resultCallback(resultCallback)
+	{
+	}
+
+	virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+	{
+		bool isSwapped = m_manifoldPtr->getBody0() != m_body0;
+		btVector3 pointA = pointInWorld + normalOnBInWorld * depth;
+		btVector3 localA;
+		btVector3 localB;
+		if (isSwapped)
+		{
+			localA = m_rootTransB.invXform(pointA );
+			localB = m_rootTransA.invXform(pointInWorld);
+		} else
+		{
+			localA = m_rootTransA.invXform(pointA );
+			localB = m_rootTransB.invXform(pointInWorld);
+		}
+		
+		btManifoldPoint newPt(localA,localB,normalOnBInWorld,depth);
+		newPt.m_positionWorldOnA = pointA;
+		newPt.m_positionWorldOnB = pointInWorld;
+		
+	   //BP mod, store contact triangles.
+		if (isSwapped)
+		{
+			newPt.m_partId0 = m_partId1;
+			newPt.m_partId1 = m_partId0;
+			newPt.m_index0  = m_index1;
+			newPt.m_index1  = m_index0;
+		} else
+		{
+			newPt.m_partId0 = m_partId0;
+			newPt.m_partId1 = m_partId1;
+			newPt.m_index0  = m_index0;
+			newPt.m_index1  = m_index1;
+		}
+
+		//experimental feature info, for per-triangle material etc.
+		btCollisionObject* obj0 = isSwapped? m_body1 : m_body0;
+		btCollisionObject* obj1 = isSwapped? m_body0 : m_body1;
+		m_resultCallback.addSingleResult(newPt,obj0,newPt.m_partId0,newPt.m_index0,obj1,newPt.m_partId1,newPt.m_index1);
+
+	}
+	
+};
+
+
+
+struct btSingleContactCallback : public btBroadphaseAabbCallback
+{
+
+	btCollisionObject* m_collisionObject;
+	btCollisionWorld*	m_world;
+	btCollisionWorld::ContactResultCallback&	m_resultCallback;
+	
+	
+	btSingleContactCallback(btCollisionObject* collisionObject, btCollisionWorld* world,btCollisionWorld::ContactResultCallback& resultCallback)
+		:m_collisionObject(collisionObject),
+		m_world(world),
+		m_resultCallback(resultCallback)
+	{
+	}
+
+	virtual bool	process(const btBroadphaseProxy* proxy)
+	{
+		btCollisionObject*	collisionObject = (btCollisionObject*)proxy->m_clientObject;
+		if (collisionObject == m_collisionObject)
+			return true;
+
+		//only perform raycast if filterMask matches
+		if(m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) 
+		{
+			btCollisionAlgorithm* algorithm = m_world->getDispatcher()->findAlgorithm(m_collisionObject,collisionObject);
+			if (algorithm)
+			{
+				btBridgedManifoldResult contactPointResult(m_collisionObject,collisionObject, m_resultCallback);
+				//discrete collision detection query
+				algorithm->processCollision(m_collisionObject,collisionObject, m_world->getDispatchInfo(),&contactPointResult);
+
+				algorithm->~btCollisionAlgorithm();
+				m_world->getDispatcher()->freeCollisionAlgorithm(algorithm);
+			}
+		}
+		return true;
+	}
+};
+
+
+///contactTest performs a discrete collision test against all objects in the btCollisionWorld, and calls the resultCallback.
+///it reports one or more contact points for every overlapping object (including the one with deepest penetration)
+void	btCollisionWorld::contactTest( btCollisionObject* colObj, ContactResultCallback& resultCallback)
+{
+	btVector3 aabbMin,aabbMax;
+	colObj->getCollisionShape()->getAabb(colObj->getWorldTransform(),aabbMin,aabbMax);
+	btSingleContactCallback	contactCB(colObj,this,resultCallback);
+	
+	m_broadphasePairCache->aabbTest(aabbMin,aabbMax,contactCB);
+}
+
+
+///contactTest performs a discrete collision test between two collision objects and calls the resultCallback if overlap if detected.
+///it reports one or more contact points (including the one with deepest penetration)
+void	btCollisionWorld::contactPairTest(btCollisionObject* colObjA, btCollisionObject* colObjB, ContactResultCallback& resultCallback)
+{
+	btCollisionAlgorithm* algorithm = getDispatcher()->findAlgorithm(colObjA,colObjB);
+	if (algorithm)
+	{
+		btBridgedManifoldResult contactPointResult(colObjA,colObjB, resultCallback);
+		//discrete collision detection query
+		algorithm->processCollision(colObjA,colObjB, getDispatchInfo(),&contactPointResult);
+
+		algorithm->~btCollisionAlgorithm();
+		getDispatcher()->freeCollisionAlgorithm(algorithm);
+	}
+
+}
+
+
+
+
+class DebugDrawcallback : public btTriangleCallback, public btInternalTriangleIndexCallback
+{
+	btIDebugDraw*	m_debugDrawer;
+	btVector3	m_color;
+	btTransform	m_worldTrans;
+
+public:
+
+	DebugDrawcallback(btIDebugDraw*	debugDrawer,const btTransform& worldTrans,const btVector3& color) :
+	  m_debugDrawer(debugDrawer),
+		  m_color(color),
+		  m_worldTrans(worldTrans)
+	  {
+	  }
+
+	  virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int  triangleIndex)
+	  {
+		  processTriangle(triangle,partId,triangleIndex);
+	  }
+
+	  virtual void processTriangle(btVector3* triangle,int partId, int triangleIndex)
+	  {
+		  (void)partId;
+		  (void)triangleIndex;
+
+		  btVector3 wv0,wv1,wv2;
+		  wv0 = m_worldTrans*triangle[0];
+		  wv1 = m_worldTrans*triangle[1];
+		  wv2 = m_worldTrans*triangle[2];
+		  btVector3 center = (wv0+wv1+wv2)*btScalar(1./3.);
+          
+          if (m_debugDrawer->getDebugMode() & btIDebugDraw::DBG_DrawNormals )
+          {
+		    btVector3 normal = (wv1-wv0).cross(wv2-wv0);
+		    normal.normalize();
+		    btVector3 normalColor(1,1,0);
+		    m_debugDrawer->drawLine(center,center+normal,normalColor);
+          }
+		  m_debugDrawer->drawLine(wv0,wv1,m_color);
+		  m_debugDrawer->drawLine(wv1,wv2,m_color);
+		  m_debugDrawer->drawLine(wv2,wv0,m_color);
+	  }
+};
+
+
+void btCollisionWorld::debugDrawObject(const btTransform& worldTransform, const btCollisionShape* shape, const btVector3& color)
+{
+	// Draw a small simplex at the center of the object
+	getDebugDrawer()->drawTransform(worldTransform,1);
+
+	if (shape->getShapeType() == COMPOUND_SHAPE_PROXYTYPE)
+	{
+		const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(shape);
+		for (int i=compoundShape->getNumChildShapes()-1;i>=0;i--)
+		{
+			btTransform childTrans = compoundShape->getChildTransform(i);
+			const btCollisionShape* colShape = compoundShape->getChildShape(i);
+			debugDrawObject(worldTransform*childTrans,colShape,color);
+		}
+
+	} else
+	{
+
+		/// for polyhedral shapes
+		if (shape->isPolyhedral())
+		{
+			btPolyhedralConvexShape* polyshape = (btPolyhedralConvexShape*) shape;
+
+			int i;
+			if (polyshape->getConvexPolyhedron())
+			{
+				const btConvexPolyhedron* poly = polyshape->getConvexPolyhedron();
+				for (i=0;i<poly->m_faces.size();i++)
+				{
+					btVector3 centroid(0,0,0);
+					int numVerts = poly->m_faces[i].m_indices.size();
+					if (numVerts)
+					{
+						int lastV = poly->m_faces[i].m_indices[numVerts-1];
+						for (int v=0;v<poly->m_faces[i].m_indices.size();v++)
+						{
+							int curVert = poly->m_faces[i].m_indices[v];
+							centroid+=poly->m_vertices[curVert];
+							getDebugDrawer()->drawLine(worldTransform*poly->m_vertices[lastV],worldTransform*poly->m_vertices[curVert],color);
+							lastV = curVert;
+						}
+					}
+					centroid*= btScalar(1.f)/btScalar(numVerts);
+                    if (getDebugDrawer()->getDebugMode() & btIDebugDraw::DBG_DrawNormals)
+                    {
+					  btVector3 normalColor(1,1,0);
+					  btVector3 faceNormal(poly->m_faces[i].m_plane[0],poly->m_faces[i].m_plane[1],poly->m_faces[i].m_plane[2]);
+					  getDebugDrawer()->drawLine(worldTransform*centroid,worldTransform*(centroid+faceNormal),normalColor);
+                    }
+					
+				}
+
+				
+			} else
+			{
+				for (i=0;i<polyshape->getNumEdges();i++)
+				{
+					btVector3 a,b;
+					polyshape->getEdge(i,a,b);
+					btVector3 wa = worldTransform * a;
+					btVector3 wb = worldTransform * b;
+					getDebugDrawer()->drawLine(wa,wb,color);
+				}
+			}
+
+
+		}
+		else
+		{
+			switch (shape->getShapeType())
+			{
+
+			case BOX_SHAPE_PROXYTYPE:
+				{
+					const btBoxShape* boxShape = static_cast<const btBoxShape*>(shape);
+					btVector3 halfExtents = boxShape->getHalfExtentsWithMargin();
+					getDebugDrawer()->drawBox(-halfExtents,halfExtents,worldTransform,color);
+					break;
+				}
+
+			case SPHERE_SHAPE_PROXYTYPE:
+				{
+					const btSphereShape* sphereShape = static_cast<const btSphereShape*>(shape);
+					btScalar radius = sphereShape->getMargin();//radius doesn't include the margin, so draw with margin
+
+					getDebugDrawer()->drawSphere(radius, worldTransform, color);
+					break;
+				}
+			case MULTI_SPHERE_SHAPE_PROXYTYPE:
+				{
+					const btMultiSphereShape* multiSphereShape = static_cast<const btMultiSphereShape*>(shape);
+
+					btTransform childTransform;
+					childTransform.setIdentity();
+
+					for (int i = multiSphereShape->getSphereCount()-1; i>=0;i--)
+					{
+						childTransform.setOrigin(multiSphereShape->getSpherePosition(i));
+						getDebugDrawer()->drawSphere(multiSphereShape->getSphereRadius(i), worldTransform*childTransform, color);
+					}
+
+					break;
+				}
+			case CAPSULE_SHAPE_PROXYTYPE:
+				{
+					const btCapsuleShape* capsuleShape = static_cast<const btCapsuleShape*>(shape);
+
+					btScalar radius = capsuleShape->getRadius();
+					btScalar halfHeight = capsuleShape->getHalfHeight();
+
+					int upAxis = capsuleShape->getUpAxis();
+					getDebugDrawer()->drawCapsule(radius, halfHeight, upAxis, worldTransform, color);
+					break;
+				}
+			case CONE_SHAPE_PROXYTYPE:
+				{
+					const btConeShape* coneShape = static_cast<const btConeShape*>(shape);
+					btScalar radius = coneShape->getRadius();//+coneShape->getMargin();
+					btScalar height = coneShape->getHeight();//+coneShape->getMargin();
+
+					int upAxis= coneShape->getConeUpIndex();
+					getDebugDrawer()->drawCone(radius, height, upAxis, worldTransform, color);
+					break;
+
+				}
+			case CYLINDER_SHAPE_PROXYTYPE:
+				{
+					const btCylinderShape* cylinder = static_cast<const btCylinderShape*>(shape);
+					int upAxis = cylinder->getUpAxis();
+					btScalar radius = cylinder->getRadius();
+					btScalar halfHeight = cylinder->getHalfExtentsWithMargin()[upAxis];
+					getDebugDrawer()->drawCylinder(radius, halfHeight, upAxis, worldTransform, color);
+					break;
+				}
+
+			case STATIC_PLANE_PROXYTYPE:
+				{
+					const btStaticPlaneShape* staticPlaneShape = static_cast<const btStaticPlaneShape*>(shape);
+					btScalar planeConst = staticPlaneShape->getPlaneConstant();
+					const btVector3& planeNormal = staticPlaneShape->getPlaneNormal();
+					getDebugDrawer()->drawPlane(planeNormal, planeConst,worldTransform, color);
+					break;
+
+				}
+			default:
+				{
+
+					if (shape->isConcave())
+					{
+						btConcaveShape* concaveMesh = (btConcaveShape*) shape;
+
+						///@todo pass camera, for some culling? no -> we are not a graphics lib
+						btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+						btVector3 aabbMin(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT));
+
+						DebugDrawcallback drawCallback(getDebugDrawer(),worldTransform,color);
+						concaveMesh->processAllTriangles(&drawCallback,aabbMin,aabbMax);
+
+					}
+
+					if (shape->getShapeType() == CONVEX_TRIANGLEMESH_SHAPE_PROXYTYPE)
+					{
+						btConvexTriangleMeshShape* convexMesh = (btConvexTriangleMeshShape*) shape;
+						//todo: pass camera for some culling			
+						btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+						btVector3 aabbMin(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT));
+						//DebugDrawcallback drawCallback;
+						DebugDrawcallback drawCallback(getDebugDrawer(),worldTransform,color);
+						convexMesh->getMeshInterface()->InternalProcessAllTriangles(&drawCallback,aabbMin,aabbMax);
+					}
+
+
+					
+				}
+			}
+		}
+	}
+}
+
+
+void	btCollisionWorld::debugDrawWorld()
+{
+	if (getDebugDrawer() && getDebugDrawer()->getDebugMode() & btIDebugDraw::DBG_DrawContactPoints)
+	{
+		int numManifolds = getDispatcher()->getNumManifolds();
+		btVector3 color(1,0.65,0);
+		for (int i=0;i<numManifolds;i++)
+		{
+			btPersistentManifold* contactManifold = getDispatcher()->getManifoldByIndexInternal(i);
+			//btCollisionObject* obA = static_cast<btCollisionObject*>(contactManifold->getBody0());
+			//btCollisionObject* obB = static_cast<btCollisionObject*>(contactManifold->getBody1());
+
+			int numContacts = contactManifold->getNumContacts();
+			for (int j=0;j<numContacts;j++)
+			{
+				btManifoldPoint& cp = contactManifold->getContactPoint(j);
+				getDebugDrawer()->drawContactPoint(cp.m_positionWorldOnB,cp.m_normalWorldOnB,cp.getDistance(),cp.getLifeTime(),color);
+			}
+		}
+	}
+
+	if (getDebugDrawer() && (getDebugDrawer()->getDebugMode() & (btIDebugDraw::DBG_DrawWireframe | btIDebugDraw::DBG_DrawAabb)))
+	{
+		int i;
+
+		for (  i=0;i<m_collisionObjects.size();i++)
+		{
+			btCollisionObject* colObj = m_collisionObjects[i];
+			if ((colObj->getCollisionFlags() & btCollisionObject::CF_DISABLE_VISUALIZE_OBJECT)==0)
+			{
+				if (getDebugDrawer() && (getDebugDrawer()->getDebugMode() & btIDebugDraw::DBG_DrawWireframe))
+				{
+					btVector3 color(btScalar(1.),btScalar(1.),btScalar(1.));
+					switch(colObj->getActivationState())
+					{
+					case  ACTIVE_TAG:
+						color = btVector3(btScalar(1.),btScalar(1.),btScalar(1.)); break;
+					case ISLAND_SLEEPING:
+						color =  btVector3(btScalar(0.),btScalar(1.),btScalar(0.));break;
+					case WANTS_DEACTIVATION:
+						color = btVector3(btScalar(0.),btScalar(1.),btScalar(1.));break;
+					case DISABLE_DEACTIVATION:
+						color = btVector3(btScalar(1.),btScalar(0.),btScalar(0.));break;
+					case DISABLE_SIMULATION:
+						color = btVector3(btScalar(1.),btScalar(1.),btScalar(0.));break;
+					default:
+						{
+							color = btVector3(btScalar(1),btScalar(0.),btScalar(0.));
+						}
+					};
+
+					debugDrawObject(colObj->getWorldTransform(),colObj->getCollisionShape(),color);
+				}
+				if (m_debugDrawer && (m_debugDrawer->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
+				{
+					btVector3 minAabb,maxAabb;
+					btVector3 colorvec(1,0,0);
+					colObj->getCollisionShape()->getAabb(colObj->getWorldTransform(), minAabb,maxAabb);
+					btVector3 contactThreshold(gContactBreakingThreshold,gContactBreakingThreshold,gContactBreakingThreshold);
+					minAabb -= contactThreshold;
+					maxAabb += contactThreshold;
+
+					btVector3 minAabb2,maxAabb2;
+
+					if(getDispatchInfo().m_useContinuous && colObj->getInternalType()==btCollisionObject::CO_RIGID_BODY && !colObj->isStaticOrKinematicObject())
+					{
+						colObj->getCollisionShape()->getAabb(colObj->getInterpolationWorldTransform(),minAabb2,maxAabb2);
+						minAabb2 -= contactThreshold;
+						maxAabb2 += contactThreshold;
+						minAabb.setMin(minAabb2);
+						maxAabb.setMax(maxAabb2);
+					}
+
+					m_debugDrawer->drawAabb(minAabb,maxAabb,colorvec);
+				}
+			}
+
+		}
+	}
+}
+
+
+void	btCollisionWorld::serializeCollisionObjects(btSerializer* serializer)
+{
+	int i;
+	//serialize all collision objects
+	for (i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		if (colObj->getInternalType() == btCollisionObject::CO_COLLISION_OBJECT)
+		{
+			colObj->serializeSingleObject(serializer);
+		}
+	}
+
+	///keep track of shapes already serialized
+	btHashMap<btHashPtr,btCollisionShape*>	serializedShapes;
+
+	for (i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		btCollisionShape* shape = colObj->getCollisionShape();
+
+		if (!serializedShapes.find(shape))
+		{
+			serializedShapes.insert(shape,shape);
+			shape->serializeSingleShape(serializer);
+		}
+	}
+
+}
+
+
+void	btCollisionWorld::serialize(btSerializer* serializer)
+{
+
+	serializer->startSerialization();
+	
+	serializeCollisionObjects(serializer);
+	
+	serializer->finishSerialization();
+}
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btCollisionWorld.h b/src/bullet/BulletCollision/CollisionDispatch/btCollisionWorld.h
new file mode 100644
index 00000000..0a92d2d6
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btCollisionWorld.h
@@ -0,0 +1,509 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://bulletphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+/**
+ * @mainpage Bullet Documentation
+ *
+ * @section intro_sec Introduction
+ * Bullet Collision Detection & Physics SDK
+ *
+ * Bullet is a Collision Detection and Rigid Body Dynamics Library. The Library is Open Source and free for commercial use, under the ZLib license ( http://opensource.org/licenses/zlib-license.php ).
+ *
+ * The main documentation is Bullet_User_Manual.pdf, included in the source code distribution.
+ * There is the Physics Forum for feedback and general Collision Detection and Physics discussions.
+ * Please visit http://www.bulletphysics.com
+ *
+ * @section install_sec Installation
+ *
+ * @subsection step1 Step 1: Download
+ * You can download the Bullet Physics Library from the Google Code repository: http://code.google.com/p/bullet/downloads/list
+ *
+ * @subsection step2 Step 2: Building
+ * Bullet main build system for all platforms is cmake, you can download http://www.cmake.org
+ * cmake can autogenerate projectfiles for Microsoft Visual Studio, Apple Xcode, KDevelop and Unix Makefiles.
+ * The easiest is to run the CMake cmake-gui graphical user interface and choose the options and generate projectfiles.
+ * You can also use cmake in the command-line. Here are some examples for various platforms:
+ * cmake . -G "Visual Studio 9 2008"
+ * cmake . -G Xcode
+ * cmake . -G "Unix Makefiles"
+ * Although cmake is recommended, you can also use autotools for UNIX: ./autogen.sh ./configure to create a Makefile and then run make.
+ * 
+ * @subsection step3 Step 3: Testing demos
+ * Try to run and experiment with BasicDemo executable as a starting point.
+ * Bullet can be used in several ways, as Full Rigid Body simulation, as Collision Detector Library or Low Level / Snippets like the GJK Closest Point calculation.
+ * The Dependencies can be seen in this documentation under Directories
+ * 
+ * @subsection step4 Step 4: Integrating in your application, full Rigid Body and Soft Body simulation
+ * Check out BasicDemo how to create a btDynamicsWorld, btRigidBody and btCollisionShape, Stepping the simulation and synchronizing your graphics object transform.
+ * Check out SoftDemo how to use soft body dynamics, using btSoftRigidDynamicsWorld.
+ * @subsection step5 Step 5 : Integrate the Collision Detection Library (without Dynamics and other Extras)
+ * Bullet Collision Detection can also be used without the Dynamics/Extras.
+ * Check out btCollisionWorld and btCollisionObject, and the CollisionInterfaceDemo.
+ * @subsection step6 Step 6 : Use Snippets like the GJK Closest Point calculation.
+ * Bullet has been designed in a modular way keeping dependencies to a minimum. The ConvexHullDistance demo demonstrates direct use of btGjkPairDetector.
+ *
+ * @section copyright Copyright
+ * For up-to-data information and copyright and contributors list check out the Bullet_User_Manual.pdf
+ * 
+ */
+ 
+ 
+
+#ifndef BT_COLLISION_WORLD_H
+#define BT_COLLISION_WORLD_H
+
+class btStackAlloc;
+class btCollisionShape;
+class btConvexShape;
+class btBroadphaseInterface;
+class btSerializer;
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btTransform.h"
+#include "btCollisionObject.h"
+#include "btCollisionDispatcher.h"
+#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+///CollisionWorld is interface and container for the collision detection
+class btCollisionWorld
+{
+
+	
+protected:
+
+	btAlignedObjectArray<btCollisionObject*>	m_collisionObjects;
+	
+	btDispatcher*	m_dispatcher1;
+
+	btDispatcherInfo	m_dispatchInfo;
+
+	btStackAlloc*	m_stackAlloc;
+
+	btBroadphaseInterface*	m_broadphasePairCache;
+
+	btIDebugDraw*	m_debugDrawer;
+
+	///m_forceUpdateAllAabbs can be set to false as an optimization to only update active object AABBs
+	///it is true by default, because it is error-prone (setting the position of static objects wouldn't update their AABB)
+	bool m_forceUpdateAllAabbs;
+
+	void	serializeCollisionObjects(btSerializer* serializer);
+
+public:
+
+	//this constructor doesn't own the dispatcher and paircache/broadphase
+	btCollisionWorld(btDispatcher* dispatcher,btBroadphaseInterface* broadphasePairCache, btCollisionConfiguration* collisionConfiguration);
+
+	virtual ~btCollisionWorld();
+
+	void	setBroadphase(btBroadphaseInterface*	pairCache)
+	{
+		m_broadphasePairCache = pairCache;
+	}
+
+	const btBroadphaseInterface*	getBroadphase() const
+	{
+		return m_broadphasePairCache;
+	}
+
+	btBroadphaseInterface*	getBroadphase()
+	{
+		return m_broadphasePairCache;
+	}
+
+	btOverlappingPairCache*	getPairCache()
+	{
+		return m_broadphasePairCache->getOverlappingPairCache();
+	}
+
+
+	btDispatcher*	getDispatcher()
+	{
+		return m_dispatcher1;
+	}
+
+	const btDispatcher*	getDispatcher() const
+	{
+		return m_dispatcher1;
+	}
+
+	void	updateSingleAabb(btCollisionObject* colObj);
+
+	virtual void	updateAabbs();
+	
+	virtual void	setDebugDrawer(btIDebugDraw*	debugDrawer)
+	{
+			m_debugDrawer = debugDrawer;
+	}
+
+	virtual btIDebugDraw*	getDebugDrawer()
+	{
+		return m_debugDrawer;
+	}
+
+	virtual void	debugDrawWorld();
+
+	virtual void debugDrawObject(const btTransform& worldTransform, const btCollisionShape* shape, const btVector3& color);
+
+
+	///LocalShapeInfo gives extra information for complex shapes
+	///Currently, only btTriangleMeshShape is available, so it just contains triangleIndex and subpart
+	struct	LocalShapeInfo
+	{
+		int	m_shapePart;
+		int	m_triangleIndex;
+		
+		//const btCollisionShape*	m_shapeTemp;
+		//const btTransform*	m_shapeLocalTransform;
+	};
+
+	struct	LocalRayResult
+	{
+		LocalRayResult(btCollisionObject*	collisionObject, 
+			LocalShapeInfo*	localShapeInfo,
+			const btVector3&		hitNormalLocal,
+			btScalar hitFraction)
+		:m_collisionObject(collisionObject),
+		m_localShapeInfo(localShapeInfo),
+		m_hitNormalLocal(hitNormalLocal),
+		m_hitFraction(hitFraction)
+		{
+		}
+
+		btCollisionObject*		m_collisionObject;
+		LocalShapeInfo*			m_localShapeInfo;
+		btVector3				m_hitNormalLocal;
+		btScalar				m_hitFraction;
+
+	};
+
+	///RayResultCallback is used to report new raycast results
+	struct	RayResultCallback
+	{
+		btScalar	m_closestHitFraction;
+		btCollisionObject*		m_collisionObject;
+		short int	m_collisionFilterGroup;
+		short int	m_collisionFilterMask;
+      //@BP Mod - Custom flags, currently used to enable backface culling on tri-meshes, see btRaycastCallback
+      unsigned int m_flags;
+
+		virtual ~RayResultCallback()
+		{
+		}
+		bool	hasHit() const
+		{
+			return (m_collisionObject != 0);
+		}
+
+		RayResultCallback()
+			:m_closestHitFraction(btScalar(1.)),
+			m_collisionObject(0),
+			m_collisionFilterGroup(btBroadphaseProxy::DefaultFilter),
+			m_collisionFilterMask(btBroadphaseProxy::AllFilter),
+         //@BP Mod
+         m_flags(0)
+		{
+		}
+
+		virtual bool needsCollision(btBroadphaseProxy* proxy0) const
+		{
+			bool collides = (proxy0->m_collisionFilterGroup & m_collisionFilterMask) != 0;
+			collides = collides && (m_collisionFilterGroup & proxy0->m_collisionFilterMask);
+			return collides;
+		}
+
+
+		virtual	btScalar	addSingleResult(LocalRayResult& rayResult,bool normalInWorldSpace) = 0;
+	};
+
+	struct	ClosestRayResultCallback : public RayResultCallback
+	{
+		ClosestRayResultCallback(const btVector3&	rayFromWorld,const btVector3&	rayToWorld)
+		:m_rayFromWorld(rayFromWorld),
+		m_rayToWorld(rayToWorld)
+		{
+		}
+
+		btVector3	m_rayFromWorld;//used to calculate hitPointWorld from hitFraction
+		btVector3	m_rayToWorld;
+
+		btVector3	m_hitNormalWorld;
+		btVector3	m_hitPointWorld;
+			
+		virtual	btScalar	addSingleResult(LocalRayResult& rayResult,bool normalInWorldSpace)
+		{
+			//caller already does the filter on the m_closestHitFraction
+			btAssert(rayResult.m_hitFraction <= m_closestHitFraction);
+			
+			m_closestHitFraction = rayResult.m_hitFraction;
+			m_collisionObject = rayResult.m_collisionObject;
+			if (normalInWorldSpace)
+			{
+				m_hitNormalWorld = rayResult.m_hitNormalLocal;
+			} else
+			{
+				///need to transform normal into worldspace
+				m_hitNormalWorld = m_collisionObject->getWorldTransform().getBasis()*rayResult.m_hitNormalLocal;
+			}
+			m_hitPointWorld.setInterpolate3(m_rayFromWorld,m_rayToWorld,rayResult.m_hitFraction);
+			return rayResult.m_hitFraction;
+		}
+	};
+
+	struct	AllHitsRayResultCallback : public RayResultCallback
+	{
+		AllHitsRayResultCallback(const btVector3&	rayFromWorld,const btVector3&	rayToWorld)
+		:m_rayFromWorld(rayFromWorld),
+		m_rayToWorld(rayToWorld)
+		{
+		}
+
+		btAlignedObjectArray<btCollisionObject*>		m_collisionObjects;
+
+		btVector3	m_rayFromWorld;//used to calculate hitPointWorld from hitFraction
+		btVector3	m_rayToWorld;
+
+		btAlignedObjectArray<btVector3>	m_hitNormalWorld;
+		btAlignedObjectArray<btVector3>	m_hitPointWorld;
+		btAlignedObjectArray<btScalar> m_hitFractions;
+			
+		virtual	btScalar	addSingleResult(LocalRayResult& rayResult,bool normalInWorldSpace)
+		{
+			m_collisionObject = rayResult.m_collisionObject;
+			m_collisionObjects.push_back(rayResult.m_collisionObject);
+			btVector3 hitNormalWorld;
+			if (normalInWorldSpace)
+			{
+				hitNormalWorld = rayResult.m_hitNormalLocal;
+			} else
+			{
+				///need to transform normal into worldspace
+				hitNormalWorld = m_collisionObject->getWorldTransform().getBasis()*rayResult.m_hitNormalLocal;
+			}
+			m_hitNormalWorld.push_back(hitNormalWorld);
+			btVector3 hitPointWorld;
+			hitPointWorld.setInterpolate3(m_rayFromWorld,m_rayToWorld,rayResult.m_hitFraction);
+			m_hitPointWorld.push_back(hitPointWorld);
+			m_hitFractions.push_back(rayResult.m_hitFraction);
+			return m_closestHitFraction;
+		}
+	};
+
+
+	struct LocalConvexResult
+	{
+		LocalConvexResult(btCollisionObject*	hitCollisionObject, 
+			LocalShapeInfo*	localShapeInfo,
+			const btVector3&		hitNormalLocal,
+			const btVector3&		hitPointLocal,
+			btScalar hitFraction
+			)
+		:m_hitCollisionObject(hitCollisionObject),
+		m_localShapeInfo(localShapeInfo),
+		m_hitNormalLocal(hitNormalLocal),
+		m_hitPointLocal(hitPointLocal),
+		m_hitFraction(hitFraction)
+		{
+		}
+
+		btCollisionObject*		m_hitCollisionObject;
+		LocalShapeInfo*			m_localShapeInfo;
+		btVector3				m_hitNormalLocal;
+		btVector3				m_hitPointLocal;
+		btScalar				m_hitFraction;
+	};
+
+	///RayResultCallback is used to report new raycast results
+	struct	ConvexResultCallback
+	{
+		btScalar	m_closestHitFraction;
+		short int	m_collisionFilterGroup;
+		short int	m_collisionFilterMask;
+		
+		ConvexResultCallback()
+			:m_closestHitFraction(btScalar(1.)),
+			m_collisionFilterGroup(btBroadphaseProxy::DefaultFilter),
+			m_collisionFilterMask(btBroadphaseProxy::AllFilter)
+		{
+		}
+
+		virtual ~ConvexResultCallback()
+		{
+		}
+		
+		bool	hasHit() const
+		{
+			return (m_closestHitFraction < btScalar(1.));
+		}
+
+		
+
+		virtual bool needsCollision(btBroadphaseProxy* proxy0) const
+		{
+			bool collides = (proxy0->m_collisionFilterGroup & m_collisionFilterMask) != 0;
+			collides = collides && (m_collisionFilterGroup & proxy0->m_collisionFilterMask);
+			return collides;
+		}
+
+		virtual	btScalar	addSingleResult(LocalConvexResult& convexResult,bool normalInWorldSpace) = 0;
+	};
+
+	struct	ClosestConvexResultCallback : public ConvexResultCallback
+	{
+		ClosestConvexResultCallback(const btVector3&	convexFromWorld,const btVector3&	convexToWorld)
+		:m_convexFromWorld(convexFromWorld),
+		m_convexToWorld(convexToWorld),
+		m_hitCollisionObject(0)
+		{
+		}
+
+		btVector3	m_convexFromWorld;//used to calculate hitPointWorld from hitFraction
+		btVector3	m_convexToWorld;
+
+		btVector3	m_hitNormalWorld;
+		btVector3	m_hitPointWorld;
+		btCollisionObject*	m_hitCollisionObject;
+		
+		virtual	btScalar	addSingleResult(LocalConvexResult& convexResult,bool normalInWorldSpace)
+		{
+//caller already does the filter on the m_closestHitFraction
+			btAssert(convexResult.m_hitFraction <= m_closestHitFraction);
+						
+			m_closestHitFraction = convexResult.m_hitFraction;
+			m_hitCollisionObject = convexResult.m_hitCollisionObject;
+			if (normalInWorldSpace)
+			{
+				m_hitNormalWorld = convexResult.m_hitNormalLocal;
+			} else
+			{
+				///need to transform normal into worldspace
+				m_hitNormalWorld = m_hitCollisionObject->getWorldTransform().getBasis()*convexResult.m_hitNormalLocal;
+			}
+			m_hitPointWorld = convexResult.m_hitPointLocal;
+			return convexResult.m_hitFraction;
+		}
+	};
+
+	///ContactResultCallback is used to report contact points
+	struct	ContactResultCallback
+	{
+		short int	m_collisionFilterGroup;
+		short int	m_collisionFilterMask;
+		
+		ContactResultCallback()
+			:m_collisionFilterGroup(btBroadphaseProxy::DefaultFilter),
+			m_collisionFilterMask(btBroadphaseProxy::AllFilter)
+		{
+		}
+
+		virtual ~ContactResultCallback()
+		{
+		}
+		
+		virtual bool needsCollision(btBroadphaseProxy* proxy0) const
+		{
+			bool collides = (proxy0->m_collisionFilterGroup & m_collisionFilterMask) != 0;
+			collides = collides && (m_collisionFilterGroup & proxy0->m_collisionFilterMask);
+			return collides;
+		}
+
+		virtual	btScalar	addSingleResult(btManifoldPoint& cp,	const btCollisionObject* colObj0,int partId0,int index0,const btCollisionObject* colObj1,int partId1,int index1) = 0;
+	};
+
+
+
+	int	getNumCollisionObjects() const
+	{
+		return int(m_collisionObjects.size());
+	}
+
+	/// rayTest performs a raycast on all objects in the btCollisionWorld, and calls the resultCallback
+	/// This allows for several queries: first hit, all hits, any hit, dependent on the value returned by the callback.
+	virtual void rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const; 
+
+	/// convexTest performs a swept convex cast on all objects in the btCollisionWorld, and calls the resultCallback
+	/// This allows for several queries: first hit, all hits, any hit, dependent on the value return by the callback.
+	void    convexSweepTest (const btConvexShape* castShape, const btTransform& from, const btTransform& to, ConvexResultCallback& resultCallback,  btScalar allowedCcdPenetration = btScalar(0.)) const;
+
+	///contactTest performs a discrete collision test between colObj against all objects in the btCollisionWorld, and calls the resultCallback.
+	///it reports one or more contact points for every overlapping object (including the one with deepest penetration)
+	void	contactTest(btCollisionObject* colObj, ContactResultCallback& resultCallback);
+
+	///contactTest performs a discrete collision test between two collision objects and calls the resultCallback if overlap if detected.
+	///it reports one or more contact points (including the one with deepest penetration)
+	void	contactPairTest(btCollisionObject* colObjA, btCollisionObject* colObjB, ContactResultCallback& resultCallback);
+
+
+	/// rayTestSingle performs a raycast call and calls the resultCallback. It is used internally by rayTest.
+	/// In a future implementation, we consider moving the ray test as a virtual method in btCollisionShape.
+	/// This allows more customization.
+	static void	rayTestSingle(const btTransform& rayFromTrans,const btTransform& rayToTrans,
+					  btCollisionObject* collisionObject,
+					  const btCollisionShape* collisionShape,
+					  const btTransform& colObjWorldTransform,
+					  RayResultCallback& resultCallback);
+
+	/// objectQuerySingle performs a collision detection query and calls the resultCallback. It is used internally by rayTest.
+	static void	objectQuerySingle(const btConvexShape* castShape, const btTransform& rayFromTrans,const btTransform& rayToTrans,
+					  btCollisionObject* collisionObject,
+					  const btCollisionShape* collisionShape,
+					  const btTransform& colObjWorldTransform,
+					  ConvexResultCallback& resultCallback, btScalar	allowedPenetration);
+
+	virtual void	addCollisionObject(btCollisionObject* collisionObject,short int collisionFilterGroup=btBroadphaseProxy::DefaultFilter,short int collisionFilterMask=btBroadphaseProxy::AllFilter);
+
+	btCollisionObjectArray& getCollisionObjectArray()
+	{
+		return m_collisionObjects;
+	}
+
+	const btCollisionObjectArray& getCollisionObjectArray() const
+	{
+		return m_collisionObjects;
+	}
+
+
+	virtual void	removeCollisionObject(btCollisionObject* collisionObject);
+
+	virtual void	performDiscreteCollisionDetection();
+
+	btDispatcherInfo& getDispatchInfo()
+	{
+		return m_dispatchInfo;
+	}
+
+	const btDispatcherInfo& getDispatchInfo() const
+	{
+		return m_dispatchInfo;
+	}
+	
+	bool	getForceUpdateAllAabbs() const
+	{
+		return m_forceUpdateAllAabbs;
+	}
+	void setForceUpdateAllAabbs( bool forceUpdateAllAabbs)
+	{
+		m_forceUpdateAllAabbs = forceUpdateAllAabbs;
+	}
+
+	///Preliminary serialization test for Bullet 2.76. Loading those files requires a separate parser (Bullet/Demos/SerializeDemo)
+	virtual	void	serialize(btSerializer* serializer);
+
+};
+
+
+#endif //BT_COLLISION_WORLD_H
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp
new file mode 100644
index 00000000..54889a63
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp
@@ -0,0 +1,353 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btCompoundShape.h"
+#include "BulletCollision/BroadphaseCollision/btDbvt.h"
+#include "LinearMath/btIDebugDraw.h"
+#include "LinearMath/btAabbUtil2.h"
+#include "btManifoldResult.h"
+
+btCompoundCollisionAlgorithm::btCompoundCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped)
+:btActivatingCollisionAlgorithm(ci,body0,body1),
+m_isSwapped(isSwapped),
+m_sharedManifold(ci.m_manifold)
+{
+	m_ownsManifold = false;
+
+	btCollisionObject* colObj = m_isSwapped? body1 : body0;
+	btAssert (colObj->getCollisionShape()->isCompound());
+	
+	btCompoundShape* compoundShape = static_cast<btCompoundShape*>(colObj->getCollisionShape());
+	m_compoundShapeRevision = compoundShape->getUpdateRevision();
+	
+	preallocateChildAlgorithms(body0,body1);
+}
+
+void	btCompoundCollisionAlgorithm::preallocateChildAlgorithms(btCollisionObject* body0,btCollisionObject* body1)
+{
+	btCollisionObject* colObj = m_isSwapped? body1 : body0;
+	btCollisionObject* otherObj = m_isSwapped? body0 : body1;
+	btAssert (colObj->getCollisionShape()->isCompound());
+	
+	btCompoundShape* compoundShape = static_cast<btCompoundShape*>(colObj->getCollisionShape());
+
+	int numChildren = compoundShape->getNumChildShapes();
+	int i;
+	
+	m_childCollisionAlgorithms.resize(numChildren);
+	for (i=0;i<numChildren;i++)
+	{
+		if (compoundShape->getDynamicAabbTree())
+		{
+			m_childCollisionAlgorithms[i] = 0;
+		} else
+		{
+			btCollisionShape* tmpShape = colObj->getCollisionShape();
+			btCollisionShape* childShape = compoundShape->getChildShape(i);
+			colObj->internalSetTemporaryCollisionShape( childShape );
+			m_childCollisionAlgorithms[i] = m_dispatcher->findAlgorithm(colObj,otherObj,m_sharedManifold);
+			colObj->internalSetTemporaryCollisionShape( tmpShape );
+		}
+	}
+}
+
+void	btCompoundCollisionAlgorithm::removeChildAlgorithms()
+{
+	int numChildren = m_childCollisionAlgorithms.size();
+	int i;
+	for (i=0;i<numChildren;i++)
+	{
+		if (m_childCollisionAlgorithms[i])
+		{
+			m_childCollisionAlgorithms[i]->~btCollisionAlgorithm();
+			m_dispatcher->freeCollisionAlgorithm(m_childCollisionAlgorithms[i]);
+		}
+	}
+}
+
+btCompoundCollisionAlgorithm::~btCompoundCollisionAlgorithm()
+{
+	removeChildAlgorithms();
+}
+
+
+
+
+struct	btCompoundLeafCallback : btDbvt::ICollide
+{
+
+public:
+
+	btCollisionObject* m_compoundColObj;
+	btCollisionObject* m_otherObj;
+	btDispatcher* m_dispatcher;
+	const btDispatcherInfo& m_dispatchInfo;
+	btManifoldResult*	m_resultOut;
+	btCollisionAlgorithm**	m_childCollisionAlgorithms;
+	btPersistentManifold*	m_sharedManifold;
+
+
+
+
+	btCompoundLeafCallback (btCollisionObject* compoundObj,btCollisionObject* otherObj,btDispatcher* dispatcher,const btDispatcherInfo& dispatchInfo,btManifoldResult*	resultOut,btCollisionAlgorithm**	childCollisionAlgorithms,btPersistentManifold*	sharedManifold)
+		:m_compoundColObj(compoundObj),m_otherObj(otherObj),m_dispatcher(dispatcher),m_dispatchInfo(dispatchInfo),m_resultOut(resultOut),
+		m_childCollisionAlgorithms(childCollisionAlgorithms),
+		m_sharedManifold(sharedManifold)
+	{
+
+	}
+
+
+	void	ProcessChildShape(btCollisionShape* childShape,int index)
+	{
+		btAssert(index>=0);
+		btCompoundShape* compoundShape = static_cast<btCompoundShape*>(m_compoundColObj->getCollisionShape());
+		btAssert(index<compoundShape->getNumChildShapes());
+
+
+		//backup
+		btTransform	orgTrans = m_compoundColObj->getWorldTransform();
+		btTransform	orgInterpolationTrans = m_compoundColObj->getInterpolationWorldTransform();
+		const btTransform& childTrans = compoundShape->getChildTransform(index);
+		btTransform	newChildWorldTrans = orgTrans*childTrans ;
+
+		//perform an AABB check first
+		btVector3 aabbMin0,aabbMax0,aabbMin1,aabbMax1;
+		childShape->getAabb(newChildWorldTrans,aabbMin0,aabbMax0);
+		m_otherObj->getCollisionShape()->getAabb(m_otherObj->getWorldTransform(),aabbMin1,aabbMax1);
+
+		if (TestAabbAgainstAabb2(aabbMin0,aabbMax0,aabbMin1,aabbMax1))
+		{
+
+			m_compoundColObj->setWorldTransform( newChildWorldTrans);
+			m_compoundColObj->setInterpolationWorldTransform(newChildWorldTrans);
+
+			//the contactpoint is still projected back using the original inverted worldtrans
+			btCollisionShape* tmpShape = m_compoundColObj->getCollisionShape();
+			m_compoundColObj->internalSetTemporaryCollisionShape( childShape );
+
+			if (!m_childCollisionAlgorithms[index])
+				m_childCollisionAlgorithms[index] = m_dispatcher->findAlgorithm(m_compoundColObj,m_otherObj,m_sharedManifold);
+
+			///detect swapping case
+			if (m_resultOut->getBody0Internal() == m_compoundColObj)
+			{
+				m_resultOut->setShapeIdentifiersA(-1,index);
+			} else
+			{
+				m_resultOut->setShapeIdentifiersB(-1,index);
+			}
+
+			m_childCollisionAlgorithms[index]->processCollision(m_compoundColObj,m_otherObj,m_dispatchInfo,m_resultOut);
+			if (m_dispatchInfo.m_debugDraw && (m_dispatchInfo.m_debugDraw->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
+			{
+				btVector3 worldAabbMin,worldAabbMax;
+				m_dispatchInfo.m_debugDraw->drawAabb(aabbMin0,aabbMax0,btVector3(1,1,1));
+				m_dispatchInfo.m_debugDraw->drawAabb(aabbMin1,aabbMax1,btVector3(1,1,1));
+			}
+			
+			//revert back transform
+			m_compoundColObj->internalSetTemporaryCollisionShape( tmpShape);
+			m_compoundColObj->setWorldTransform(  orgTrans );
+			m_compoundColObj->setInterpolationWorldTransform(orgInterpolationTrans);
+		}
+	}
+	void		Process(const btDbvtNode* leaf)
+	{
+		int index = leaf->dataAsInt;
+
+		btCompoundShape* compoundShape = static_cast<btCompoundShape*>(m_compoundColObj->getCollisionShape());
+		btCollisionShape* childShape = compoundShape->getChildShape(index);
+		if (m_dispatchInfo.m_debugDraw && (m_dispatchInfo.m_debugDraw->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
+		{
+			btVector3 worldAabbMin,worldAabbMax;
+			btTransform	orgTrans = m_compoundColObj->getWorldTransform();
+			btTransformAabb(leaf->volume.Mins(),leaf->volume.Maxs(),0.,orgTrans,worldAabbMin,worldAabbMax);
+			m_dispatchInfo.m_debugDraw->drawAabb(worldAabbMin,worldAabbMax,btVector3(1,0,0));
+		}
+		ProcessChildShape(childShape,index);
+
+	}
+};
+
+
+
+
+
+
+void btCompoundCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	btCollisionObject* colObj = m_isSwapped? body1 : body0;
+	btCollisionObject* otherObj = m_isSwapped? body0 : body1;
+
+	
+
+	btAssert (colObj->getCollisionShape()->isCompound());
+	btCompoundShape* compoundShape = static_cast<btCompoundShape*>(colObj->getCollisionShape());
+
+	///btCompoundShape might have changed:
+	////make sure the internal child collision algorithm caches are still valid
+	if (compoundShape->getUpdateRevision() != m_compoundShapeRevision)
+	{
+		///clear and update all
+		removeChildAlgorithms();
+		
+		preallocateChildAlgorithms(body0,body1);
+	}
+
+
+	btDbvt* tree = compoundShape->getDynamicAabbTree();
+	//use a dynamic aabb tree to cull potential child-overlaps
+	btCompoundLeafCallback  callback(colObj,otherObj,m_dispatcher,dispatchInfo,resultOut,&m_childCollisionAlgorithms[0],m_sharedManifold);
+
+	///we need to refresh all contact manifolds
+	///note that we should actually recursively traverse all children, btCompoundShape can nested more then 1 level deep
+	///so we should add a 'refreshManifolds' in the btCollisionAlgorithm
+	{
+		int i;
+		btManifoldArray manifoldArray;
+		for (i=0;i<m_childCollisionAlgorithms.size();i++)
+		{
+			if (m_childCollisionAlgorithms[i])
+			{
+				m_childCollisionAlgorithms[i]->getAllContactManifolds(manifoldArray);
+				for (int m=0;m<manifoldArray.size();m++)
+				{
+					if (manifoldArray[m]->getNumContacts())
+					{
+						resultOut->setPersistentManifold(manifoldArray[m]);
+						resultOut->refreshContactPoints();
+						resultOut->setPersistentManifold(0);//??necessary?
+					}
+				}
+				manifoldArray.resize(0);
+			}
+		}
+	}
+
+	if (tree)
+	{
+
+		btVector3 localAabbMin,localAabbMax;
+		btTransform otherInCompoundSpace;
+		otherInCompoundSpace = colObj->getWorldTransform().inverse() * otherObj->getWorldTransform();
+		otherObj->getCollisionShape()->getAabb(otherInCompoundSpace,localAabbMin,localAabbMax);
+
+		const ATTRIBUTE_ALIGNED16(btDbvtVolume)	bounds=btDbvtVolume::FromMM(localAabbMin,localAabbMax);
+		//process all children, that overlap with  the given AABB bounds
+		tree->collideTV(tree->m_root,bounds,callback);
+
+	} else
+	{
+		//iterate over all children, perform an AABB check inside ProcessChildShape
+		int numChildren = m_childCollisionAlgorithms.size();
+		int i;
+		for (i=0;i<numChildren;i++)
+		{
+			callback.ProcessChildShape(compoundShape->getChildShape(i),i);
+		}
+	}
+
+	{
+				//iterate over all children, perform an AABB check inside ProcessChildShape
+		int numChildren = m_childCollisionAlgorithms.size();
+		int i;
+		btManifoldArray	manifoldArray;
+        btCollisionShape* childShape = 0;
+        btTransform	orgTrans;
+        btTransform	orgInterpolationTrans;
+        btTransform	newChildWorldTrans;
+        btVector3 aabbMin0,aabbMax0,aabbMin1,aabbMax1;        
+        
+		for (i=0;i<numChildren;i++)
+		{
+			if (m_childCollisionAlgorithms[i])
+			{
+				childShape = compoundShape->getChildShape(i);
+			//if not longer overlapping, remove the algorithm
+                orgTrans = colObj->getWorldTransform();
+                orgInterpolationTrans = colObj->getInterpolationWorldTransform();
+				const btTransform& childTrans = compoundShape->getChildTransform(i);
+                newChildWorldTrans = orgTrans*childTrans ;
+
+				//perform an AABB check first
+				childShape->getAabb(newChildWorldTrans,aabbMin0,aabbMax0);
+				otherObj->getCollisionShape()->getAabb(otherObj->getWorldTransform(),aabbMin1,aabbMax1);
+
+				if (!TestAabbAgainstAabb2(aabbMin0,aabbMax0,aabbMin1,aabbMax1))
+				{
+					m_childCollisionAlgorithms[i]->~btCollisionAlgorithm();
+					m_dispatcher->freeCollisionAlgorithm(m_childCollisionAlgorithms[i]);
+					m_childCollisionAlgorithms[i] = 0;
+				}
+			}
+		}
+	}
+}
+
+btScalar	btCompoundCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+
+	btCollisionObject* colObj = m_isSwapped? body1 : body0;
+	btCollisionObject* otherObj = m_isSwapped? body0 : body1;
+
+	btAssert (colObj->getCollisionShape()->isCompound());
+	
+	btCompoundShape* compoundShape = static_cast<btCompoundShape*>(colObj->getCollisionShape());
+
+	//We will use the OptimizedBVH, AABB tree to cull potential child-overlaps
+	//If both proxies are Compound, we will deal with that directly, by performing sequential/parallel tree traversals
+	//given Proxy0 and Proxy1, if both have a tree, Tree0 and Tree1, this means:
+	//determine overlapping nodes of Proxy1 using Proxy0 AABB against Tree1
+	//then use each overlapping node AABB against Tree0
+	//and vise versa.
+
+	btScalar hitFraction = btScalar(1.);
+
+	int numChildren = m_childCollisionAlgorithms.size();
+	int i;
+    btTransform	orgTrans;
+    btScalar frac;
+	for (i=0;i<numChildren;i++)
+	{
+		//temporarily exchange parent btCollisionShape with childShape, and recurse
+		btCollisionShape* childShape = compoundShape->getChildShape(i);
+
+		//backup
+        orgTrans = colObj->getWorldTransform();
+	
+		const btTransform& childTrans = compoundShape->getChildTransform(i);
+		//btTransform	newChildWorldTrans = orgTrans*childTrans ;
+		colObj->setWorldTransform( orgTrans*childTrans );
+
+		btCollisionShape* tmpShape = colObj->getCollisionShape();
+		colObj->internalSetTemporaryCollisionShape( childShape );
+        frac = m_childCollisionAlgorithms[i]->calculateTimeOfImpact(colObj,otherObj,dispatchInfo,resultOut);
+		if (frac<hitFraction)
+		{
+			hitFraction = frac;
+		}
+		//revert back
+		colObj->internalSetTemporaryCollisionShape( tmpShape);
+		colObj->setWorldTransform( orgTrans);
+	}
+	return hitFraction;
+
+}
+
+
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h
new file mode 100644
index 00000000..40457498
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h
@@ -0,0 +1,86 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_COMPOUND_COLLISION_ALGORITHM_H
+#define BT_COMPOUND_COLLISION_ALGORITHM_H
+
+#include "btActivatingCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseInterface.h"
+
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+class btDispatcher;
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "btCollisionCreateFunc.h"
+#include "LinearMath/btAlignedObjectArray.h"
+class btDispatcher;
+class btCollisionObject;
+
+/// btCompoundCollisionAlgorithm  supports collision between CompoundCollisionShapes and other collision shapes
+class btCompoundCollisionAlgorithm  : public btActivatingCollisionAlgorithm
+{
+	btAlignedObjectArray<btCollisionAlgorithm*> m_childCollisionAlgorithms;
+	bool m_isSwapped;
+
+	class btPersistentManifold*	m_sharedManifold;
+	bool					m_ownsManifold;
+
+	int	m_compoundShapeRevision;//to keep track of changes, so that childAlgorithm array can be updated
+	
+	void	removeChildAlgorithms();
+	
+	void	preallocateChildAlgorithms(btCollisionObject* body0,btCollisionObject* body1);
+
+public:
+
+	btCompoundCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped);
+
+	virtual ~btCompoundCollisionAlgorithm();
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	btScalar	calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		int i;
+		for (i=0;i<m_childCollisionAlgorithms.size();i++)
+		{
+			if (m_childCollisionAlgorithms[i])
+				m_childCollisionAlgorithms[i]->getAllContactManifolds(manifoldArray);
+		}
+	}
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btCompoundCollisionAlgorithm));
+			return new(mem) btCompoundCollisionAlgorithm(ci,body0,body1,false);
+		}
+	};
+
+	struct SwappedCreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btCompoundCollisionAlgorithm));
+			return new(mem) btCompoundCollisionAlgorithm(ci,body0,body1,true);
+		}
+	};
+
+};
+
+#endif //BT_COMPOUND_COLLISION_ALGORITHM_H
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.cpp
new file mode 100644
index 00000000..db7f884a
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.cpp
@@ -0,0 +1,247 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btConvex2dConvex2dAlgorithm.h"
+
+//#include <stdio.h>
+#include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseInterface.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+
+
+#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+#include "BulletCollision/CollisionDispatch/btManifoldResult.h"
+
+#include "BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h"
+#include "BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h"
+
+
+
+#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+
+#include "BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h"
+
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpa2.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h"
+
+
+btConvex2dConvex2dAlgorithm::CreateFunc::CreateFunc(btSimplexSolverInterface*			simplexSolver, btConvexPenetrationDepthSolver* pdSolver)
+{
+	m_numPerturbationIterations = 0;
+	m_minimumPointsPerturbationThreshold = 3;
+	m_simplexSolver = simplexSolver;
+	m_pdSolver = pdSolver;
+}
+
+btConvex2dConvex2dAlgorithm::CreateFunc::~CreateFunc() 
+{ 
+}
+
+btConvex2dConvex2dAlgorithm::btConvex2dConvex2dAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver,int numPerturbationIterations, int minimumPointsPerturbationThreshold)
+: btActivatingCollisionAlgorithm(ci,body0,body1),
+m_simplexSolver(simplexSolver),
+m_pdSolver(pdSolver),
+m_ownManifold (false),
+m_manifoldPtr(mf),
+m_lowLevelOfDetail(false),
+ m_numPerturbationIterations(numPerturbationIterations),
+m_minimumPointsPerturbationThreshold(minimumPointsPerturbationThreshold)
+{
+	(void)body0;
+	(void)body1;
+}
+
+
+
+
+btConvex2dConvex2dAlgorithm::~btConvex2dConvex2dAlgorithm()
+{
+	if (m_ownManifold)
+	{
+		if (m_manifoldPtr)
+			m_dispatcher->releaseManifold(m_manifoldPtr);
+	}
+}
+
+void	btConvex2dConvex2dAlgorithm ::setLowLevelOfDetail(bool useLowLevel)
+{
+	m_lowLevelOfDetail = useLowLevel;
+}
+
+
+
+extern btScalar gContactBreakingThreshold;
+
+
+//
+// Convex-Convex collision algorithm
+//
+void btConvex2dConvex2dAlgorithm ::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+
+	if (!m_manifoldPtr)
+	{
+		//swapped?
+		m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
+		m_ownManifold = true;
+	}
+	resultOut->setPersistentManifold(m_manifoldPtr);
+
+	//comment-out next line to test multi-contact generation
+	//resultOut->getPersistentManifold()->clearManifold();
+
+
+	btConvexShape* min0 = static_cast<btConvexShape*>(body0->getCollisionShape());
+	btConvexShape* min1 = static_cast<btConvexShape*>(body1->getCollisionShape());
+
+	btVector3  normalOnB;
+	btVector3  pointOnBWorld;
+
+	{
+
+
+		btGjkPairDetector::ClosestPointInput input;
+
+		btGjkPairDetector	gjkPairDetector(min0,min1,m_simplexSolver,m_pdSolver);
+		//TODO: if (dispatchInfo.m_useContinuous)
+		gjkPairDetector.setMinkowskiA(min0);
+		gjkPairDetector.setMinkowskiB(min1);
+
+		{
+			input.m_maximumDistanceSquared = min0->getMargin() + min1->getMargin() + m_manifoldPtr->getContactBreakingThreshold();
+			input.m_maximumDistanceSquared*= input.m_maximumDistanceSquared;
+		}
+
+		input.m_stackAlloc = dispatchInfo.m_stackAllocator;
+		input.m_transformA = body0->getWorldTransform();
+		input.m_transformB = body1->getWorldTransform();
+
+		gjkPairDetector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
+
+		btVector3 v0,v1;
+		btVector3 sepNormalWorldSpace;
+
+	}
+
+	if (m_ownManifold)
+	{
+		resultOut->refreshContactPoints();
+	}
+
+}
+
+
+
+
+btScalar	btConvex2dConvex2dAlgorithm::calculateTimeOfImpact(btCollisionObject* col0,btCollisionObject* col1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)resultOut;
+	(void)dispatchInfo;
+	///Rather then checking ALL pairs, only calculate TOI when motion exceeds threshold
+
+	///Linear motion for one of objects needs to exceed m_ccdSquareMotionThreshold
+	///col0->m_worldTransform,
+	btScalar resultFraction = btScalar(1.);
+
+
+	btScalar squareMot0 = (col0->getInterpolationWorldTransform().getOrigin() - col0->getWorldTransform().getOrigin()).length2();
+	btScalar squareMot1 = (col1->getInterpolationWorldTransform().getOrigin() - col1->getWorldTransform().getOrigin()).length2();
+
+	if (squareMot0 < col0->getCcdSquareMotionThreshold() &&
+		squareMot1 < col1->getCcdSquareMotionThreshold())
+		return resultFraction;
+
+
+	//An adhoc way of testing the Continuous Collision Detection algorithms
+	//One object is approximated as a sphere, to simplify things
+	//Starting in penetration should report no time of impact
+	//For proper CCD, better accuracy and handling of 'allowed' penetration should be added
+	//also the mainloop of the physics should have a kind of toi queue (something like Brian Mirtich's application of Timewarp for Rigidbodies)
+
+
+	/// Convex0 against sphere for Convex1
+	{
+		btConvexShape* convex0 = static_cast<btConvexShape*>(col0->getCollisionShape());
+
+		btSphereShape	sphere1(col1->getCcdSweptSphereRadius()); //todo: allow non-zero sphere sizes, for better approximation
+		btConvexCast::CastResult result;
+		btVoronoiSimplexSolver voronoiSimplex;
+		//SubsimplexConvexCast ccd0(&sphere,min0,&voronoiSimplex);
+		///Simplification, one object is simplified as a sphere
+		btGjkConvexCast ccd1( convex0 ,&sphere1,&voronoiSimplex);
+		//ContinuousConvexCollision ccd(min0,min1,&voronoiSimplex,0);
+		if (ccd1.calcTimeOfImpact(col0->getWorldTransform(),col0->getInterpolationWorldTransform(),
+			col1->getWorldTransform(),col1->getInterpolationWorldTransform(),result))
+		{
+
+			//store result.m_fraction in both bodies
+
+			if (col0->getHitFraction()> result.m_fraction)
+				col0->setHitFraction( result.m_fraction );
+
+			if (col1->getHitFraction() > result.m_fraction)
+				col1->setHitFraction( result.m_fraction);
+
+			if (resultFraction > result.m_fraction)
+				resultFraction = result.m_fraction;
+
+		}
+
+
+
+
+	}
+
+	/// Sphere (for convex0) against Convex1
+	{
+		btConvexShape* convex1 = static_cast<btConvexShape*>(col1->getCollisionShape());
+
+		btSphereShape	sphere0(col0->getCcdSweptSphereRadius()); //todo: allow non-zero sphere sizes, for better approximation
+		btConvexCast::CastResult result;
+		btVoronoiSimplexSolver voronoiSimplex;
+		//SubsimplexConvexCast ccd0(&sphere,min0,&voronoiSimplex);
+		///Simplification, one object is simplified as a sphere
+		btGjkConvexCast ccd1(&sphere0,convex1,&voronoiSimplex);
+		//ContinuousConvexCollision ccd(min0,min1,&voronoiSimplex,0);
+		if (ccd1.calcTimeOfImpact(col0->getWorldTransform(),col0->getInterpolationWorldTransform(),
+			col1->getWorldTransform(),col1->getInterpolationWorldTransform(),result))
+		{
+
+			//store result.m_fraction in both bodies
+
+			if (col0->getHitFraction()	> result.m_fraction)
+				col0->setHitFraction( result.m_fraction);
+
+			if (col1->getHitFraction() > result.m_fraction)
+				col1->setHitFraction( result.m_fraction);
+
+			if (resultFraction > result.m_fraction)
+				resultFraction = result.m_fraction;
+
+		}
+	}
+
+	return resultFraction;
+
+}
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.h
new file mode 100644
index 00000000..53d13b87
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.h
@@ -0,0 +1,95 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONVEX_2D_CONVEX_2D_ALGORITHM_H
+#define BT_CONVEX_2D_CONVEX_2D_ALGORITHM_H
+
+#include "BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "LinearMath/btTransformUtil.h" //for btConvexSeparatingDistanceUtil
+
+class btConvexPenetrationDepthSolver;
+
+
+///The convex2dConvex2dAlgorithm collision algorithm support 2d collision detection for btConvex2dShape
+///Currently it requires the btMinkowskiPenetrationDepthSolver, it has support for 2d penetration depth computation
+class btConvex2dConvex2dAlgorithm : public btActivatingCollisionAlgorithm
+{
+	btSimplexSolverInterface*		m_simplexSolver;
+	btConvexPenetrationDepthSolver* m_pdSolver;
+
+	
+	bool	m_ownManifold;
+	btPersistentManifold*	m_manifoldPtr;
+	bool			m_lowLevelOfDetail;
+	
+	int m_numPerturbationIterations;
+	int m_minimumPointsPerturbationThreshold;
+
+public:
+
+	btConvex2dConvex2dAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1, btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver, int numPerturbationIterations, int minimumPointsPerturbationThreshold);
+
+
+	virtual ~btConvex2dConvex2dAlgorithm();
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		///should we use m_ownManifold to avoid adding duplicates?
+		if (m_manifoldPtr && m_ownManifold)
+			manifoldArray.push_back(m_manifoldPtr);
+	}
+
+
+	void	setLowLevelOfDetail(bool useLowLevel);
+
+
+	const btPersistentManifold*	getManifold()
+	{
+		return m_manifoldPtr;
+	}
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+
+		btConvexPenetrationDepthSolver*		m_pdSolver;
+		btSimplexSolverInterface*			m_simplexSolver;
+		int m_numPerturbationIterations;
+		int m_minimumPointsPerturbationThreshold;
+
+		CreateFunc(btSimplexSolverInterface*			simplexSolver, btConvexPenetrationDepthSolver* pdSolver);
+		
+		virtual ~CreateFunc();
+
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btConvex2dConvex2dAlgorithm));
+			return new(mem) btConvex2dConvex2dAlgorithm(ci.m_manifold,ci,body0,body1,m_simplexSolver,m_pdSolver,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
+		}
+	};
+
+
+};
+
+#endif //BT_CONVEX_2D_CONVEX_2D_ALGORITHM_H
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.cpp
new file mode 100644
index 00000000..d2b2c221
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.cpp
@@ -0,0 +1,312 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btConvexConcaveCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btMultiSphereShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionShapes/btConcaveShape.h"
+#include "BulletCollision/CollisionDispatch/btManifoldResult.h"
+#include "BulletCollision/NarrowPhaseCollision/btRaycastCallback.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "LinearMath/btIDebugDraw.h"
+#include "BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h"
+
+btConvexConcaveCollisionAlgorithm::btConvexConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1,bool isSwapped)
+: btActivatingCollisionAlgorithm(ci,body0,body1),
+m_isSwapped(isSwapped),
+m_btConvexTriangleCallback(ci.m_dispatcher1,body0,body1,isSwapped)
+{
+}
+
+btConvexConcaveCollisionAlgorithm::~btConvexConcaveCollisionAlgorithm()
+{
+}
+
+void	btConvexConcaveCollisionAlgorithm::getAllContactManifolds(btManifoldArray&	manifoldArray)
+{
+	if (m_btConvexTriangleCallback.m_manifoldPtr)
+	{
+		manifoldArray.push_back(m_btConvexTriangleCallback.m_manifoldPtr);
+	}
+}
+
+
+btConvexTriangleCallback::btConvexTriangleCallback(btDispatcher*  dispatcher,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped):
+	  m_dispatcher(dispatcher),
+	m_dispatchInfoPtr(0)
+{
+	m_convexBody = isSwapped? body1:body0;
+	m_triBody = isSwapped? body0:body1;
+	
+	  //
+	  // create the manifold from the dispatcher 'manifold pool'
+	  //
+	  m_manifoldPtr = m_dispatcher->getNewManifold(m_convexBody,m_triBody);
+
+  	  clearCache();
+}
+
+btConvexTriangleCallback::~btConvexTriangleCallback()
+{
+	clearCache();
+	m_dispatcher->releaseManifold( m_manifoldPtr );
+  
+}
+  
+
+void	btConvexTriangleCallback::clearCache()
+{
+	m_dispatcher->clearManifold(m_manifoldPtr);
+}
+
+
+
+void btConvexTriangleCallback::processTriangle(btVector3* triangle,int partId, int triangleIndex)
+{
+ 
+	//just for debugging purposes
+	//printf("triangle %d",m_triangleCount++);
+
+
+	//aabb filter is already applied!	
+
+	btCollisionAlgorithmConstructionInfo ci;
+	ci.m_dispatcher1 = m_dispatcher;
+
+	btCollisionObject* ob = static_cast<btCollisionObject*>(m_triBody);
+
+
+#if 0	
+	///debug drawing of the overlapping triangles
+	if (m_dispatchInfoPtr && m_dispatchInfoPtr->m_debugDraw && (m_dispatchInfoPtr->m_debugDraw->getDebugMode() &btIDebugDraw::DBG_DrawWireframe ))
+	{
+		btVector3 color(1,1,0);
+		btTransform& tr = ob->getWorldTransform();
+		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[0]),tr(triangle[1]),color);
+		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[1]),tr(triangle[2]),color);
+		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[2]),tr(triangle[0]),color);
+	}
+#endif
+	
+	if (m_convexBody->getCollisionShape()->isConvex())
+	{
+		btTriangleShape tm(triangle[0],triangle[1],triangle[2]);	
+		tm.setMargin(m_collisionMarginTriangle);
+		
+		btCollisionShape* tmpShape = ob->getCollisionShape();
+		ob->internalSetTemporaryCollisionShape( &tm );
+
+		btCollisionAlgorithm* colAlgo = ci.m_dispatcher1->findAlgorithm(m_convexBody,m_triBody,m_manifoldPtr);
+
+		if (m_resultOut->getBody0Internal() == m_triBody)
+		{
+			m_resultOut->setShapeIdentifiersA(partId,triangleIndex);
+		}
+		else
+		{
+			m_resultOut->setShapeIdentifiersB(partId,triangleIndex);
+		}
+	
+		colAlgo->processCollision(m_convexBody,m_triBody,*m_dispatchInfoPtr,m_resultOut);
+		colAlgo->~btCollisionAlgorithm();
+		ci.m_dispatcher1->freeCollisionAlgorithm(colAlgo);
+		ob->internalSetTemporaryCollisionShape( tmpShape);
+	}
+
+
+}
+
+
+
+void	btConvexTriangleCallback::setTimeStepAndCounters(btScalar collisionMarginTriangle,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	m_dispatchInfoPtr = &dispatchInfo;
+	m_collisionMarginTriangle = collisionMarginTriangle;
+	m_resultOut = resultOut;
+
+	//recalc aabbs
+	btTransform convexInTriangleSpace;
+	convexInTriangleSpace = m_triBody->getWorldTransform().inverse() * m_convexBody->getWorldTransform();
+	btCollisionShape* convexShape = static_cast<btCollisionShape*>(m_convexBody->getCollisionShape());
+	//CollisionShape* triangleShape = static_cast<btCollisionShape*>(triBody->m_collisionShape);
+	convexShape->getAabb(convexInTriangleSpace,m_aabbMin,m_aabbMax);
+	btScalar extraMargin = collisionMarginTriangle;
+	btVector3 extra(extraMargin,extraMargin,extraMargin);
+
+	m_aabbMax += extra;
+	m_aabbMin -= extra;
+	
+}
+
+void btConvexConcaveCollisionAlgorithm::clearCache()
+{
+	m_btConvexTriangleCallback.clearCache();
+
+}
+
+void btConvexConcaveCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	
+	
+	btCollisionObject* convexBody = m_isSwapped ? body1 : body0;
+	btCollisionObject* triBody = m_isSwapped ? body0 : body1;
+
+	if (triBody->getCollisionShape()->isConcave())
+	{
+
+
+		btCollisionObject*	triOb = triBody;
+		btConcaveShape* concaveShape = static_cast<btConcaveShape*>( triOb->getCollisionShape());
+		
+		if (convexBody->getCollisionShape()->isConvex())
+		{
+			btScalar collisionMarginTriangle = concaveShape->getMargin();
+					
+			resultOut->setPersistentManifold(m_btConvexTriangleCallback.m_manifoldPtr);
+			m_btConvexTriangleCallback.setTimeStepAndCounters(collisionMarginTriangle,dispatchInfo,resultOut);
+
+			//Disable persistency. previously, some older algorithm calculated all contacts in one go, so you can clear it here.
+			//m_dispatcher->clearManifold(m_btConvexTriangleCallback.m_manifoldPtr);
+
+			m_btConvexTriangleCallback.m_manifoldPtr->setBodies(convexBody,triBody);
+
+			concaveShape->processAllTriangles( &m_btConvexTriangleCallback,m_btConvexTriangleCallback.getAabbMin(),m_btConvexTriangleCallback.getAabbMax());
+			
+			resultOut->refreshContactPoints();
+	
+		}
+	
+	}
+
+}
+
+
+btScalar btConvexConcaveCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)resultOut;
+	(void)dispatchInfo;
+	btCollisionObject* convexbody = m_isSwapped ? body1 : body0;
+	btCollisionObject* triBody = m_isSwapped ? body0 : body1;
+
+
+	//quick approximation using raycast, todo: hook up to the continuous collision detection (one of the btConvexCast)
+
+	//only perform CCD above a certain threshold, this prevents blocking on the long run
+	//because object in a blocked ccd state (hitfraction<1) get their linear velocity halved each frame...
+	btScalar squareMot0 = (convexbody->getInterpolationWorldTransform().getOrigin() - convexbody->getWorldTransform().getOrigin()).length2();
+	if (squareMot0 < convexbody->getCcdSquareMotionThreshold())
+	{
+		return btScalar(1.);
+	}
+
+	//const btVector3& from = convexbody->m_worldTransform.getOrigin();
+	//btVector3 to = convexbody->m_interpolationWorldTransform.getOrigin();
+	//todo: only do if the motion exceeds the 'radius'
+
+	btTransform triInv = triBody->getWorldTransform().inverse();
+	btTransform convexFromLocal = triInv * convexbody->getWorldTransform();
+	btTransform convexToLocal = triInv * convexbody->getInterpolationWorldTransform();
+
+	struct LocalTriangleSphereCastCallback	: public btTriangleCallback
+	{
+		btTransform m_ccdSphereFromTrans;
+		btTransform m_ccdSphereToTrans;
+		btTransform	m_meshTransform;
+
+		btScalar	m_ccdSphereRadius;
+		btScalar	m_hitFraction;
+	
+
+		LocalTriangleSphereCastCallback(const btTransform& from,const btTransform& to,btScalar ccdSphereRadius,btScalar hitFraction)
+			:m_ccdSphereFromTrans(from),
+			m_ccdSphereToTrans(to),
+			m_ccdSphereRadius(ccdSphereRadius),
+			m_hitFraction(hitFraction)
+		{			
+		}
+		
+		
+		virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex)
+		{
+			(void)partId;
+			(void)triangleIndex;
+			//do a swept sphere for now
+			btTransform ident;
+			ident.setIdentity();
+			btConvexCast::CastResult castResult;
+			castResult.m_fraction = m_hitFraction;
+			btSphereShape	pointShape(m_ccdSphereRadius);
+			btTriangleShape	triShape(triangle[0],triangle[1],triangle[2]);
+			btVoronoiSimplexSolver	simplexSolver;
+			btSubsimplexConvexCast convexCaster(&pointShape,&triShape,&simplexSolver);
+			//GjkConvexCast	convexCaster(&pointShape,convexShape,&simplexSolver);
+			//ContinuousConvexCollision convexCaster(&pointShape,convexShape,&simplexSolver,0);
+			//local space?
+
+			if (convexCaster.calcTimeOfImpact(m_ccdSphereFromTrans,m_ccdSphereToTrans,
+				ident,ident,castResult))
+			{
+				if (m_hitFraction > castResult.m_fraction)
+					m_hitFraction = castResult.m_fraction;
+			}
+
+		}
+
+	};
+
+
+	
+
+	
+	if (triBody->getCollisionShape()->isConcave())
+	{
+		btVector3 rayAabbMin = convexFromLocal.getOrigin();
+		rayAabbMin.setMin(convexToLocal.getOrigin());
+		btVector3 rayAabbMax = convexFromLocal.getOrigin();
+		rayAabbMax.setMax(convexToLocal.getOrigin());
+		btScalar ccdRadius0 = convexbody->getCcdSweptSphereRadius();
+		rayAabbMin -= btVector3(ccdRadius0,ccdRadius0,ccdRadius0);
+		rayAabbMax += btVector3(ccdRadius0,ccdRadius0,ccdRadius0);
+
+		btScalar curHitFraction = btScalar(1.); //is this available?
+		LocalTriangleSphereCastCallback raycastCallback(convexFromLocal,convexToLocal,
+			convexbody->getCcdSweptSphereRadius(),curHitFraction);
+
+		raycastCallback.m_hitFraction = convexbody->getHitFraction();
+
+		btCollisionObject* concavebody = triBody;
+
+		btConcaveShape* triangleMesh = (btConcaveShape*) concavebody->getCollisionShape();
+		
+		if (triangleMesh)
+		{
+			triangleMesh->processAllTriangles(&raycastCallback,rayAabbMin,rayAabbMax);
+		}
+	
+
+
+		if (raycastCallback.m_hitFraction < convexbody->getHitFraction())
+		{
+			convexbody->setHitFraction( raycastCallback.m_hitFraction);
+			return raycastCallback.m_hitFraction;
+		}
+	}
+
+	return btScalar(1.);
+
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.h
new file mode 100644
index 00000000..f718d1de
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.h
@@ -0,0 +1,116 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONVEX_CONCAVE_COLLISION_ALGORITHM_H
+#define BT_CONVEX_CONCAVE_COLLISION_ALGORITHM_H
+
+#include "btActivatingCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseInterface.h"
+#include "BulletCollision/CollisionShapes/btTriangleCallback.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+class btDispatcher;
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "btCollisionCreateFunc.h"
+
+///For each triangle in the concave mesh that overlaps with the AABB of a convex (m_convexProxy), processTriangle is called.
+class btConvexTriangleCallback : public btTriangleCallback
+{
+	btCollisionObject* m_convexBody;
+	btCollisionObject* m_triBody;
+
+	btVector3	m_aabbMin;
+	btVector3	m_aabbMax ;
+
+
+	btManifoldResult* m_resultOut;
+	btDispatcher*	m_dispatcher;
+	const btDispatcherInfo* m_dispatchInfoPtr;
+	btScalar m_collisionMarginTriangle;
+	
+public:
+int	m_triangleCount;
+	
+	btPersistentManifold*	m_manifoldPtr;
+
+	btConvexTriangleCallback(btDispatcher* dispatcher,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped);
+
+	void	setTimeStepAndCounters(btScalar collisionMarginTriangle,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual ~btConvexTriangleCallback();
+
+	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex);
+	
+	void clearCache();
+
+	SIMD_FORCE_INLINE const btVector3& getAabbMin() const
+	{
+		return m_aabbMin;
+	}
+	SIMD_FORCE_INLINE const btVector3& getAabbMax() const
+	{
+		return m_aabbMax;
+	}
+
+};
+
+
+
+
+/// btConvexConcaveCollisionAlgorithm  supports collision between convex shapes and (concave) trianges meshes.
+class btConvexConcaveCollisionAlgorithm  : public btActivatingCollisionAlgorithm
+{
+
+	bool	m_isSwapped;
+
+	btConvexTriangleCallback m_btConvexTriangleCallback;
+
+
+
+public:
+
+	btConvexConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped);
+
+	virtual ~btConvexConcaveCollisionAlgorithm();
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	btScalar	calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray);
+	
+	void	clearCache();
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btConvexConcaveCollisionAlgorithm));
+			return new(mem) btConvexConcaveCollisionAlgorithm(ci,body0,body1,false);
+		}
+	};
+
+	struct SwappedCreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btConvexConcaveCollisionAlgorithm));
+			return new(mem) btConvexConcaveCollisionAlgorithm(ci,body0,body1,true);
+		}
+	};
+
+};
+
+#endif //BT_CONVEX_CONCAVE_COLLISION_ALGORITHM_H
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.cpp
new file mode 100644
index 00000000..dd1f3e24
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.cpp
@@ -0,0 +1,739 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///Specialized capsule-capsule collision algorithm has been added for Bullet 2.75 release to increase ragdoll performance
+///If you experience problems with capsule-capsule collision, try to define BT_DISABLE_CAPSULE_CAPSULE_COLLIDER and report it in the Bullet forums
+///with reproduction case
+//define BT_DISABLE_CAPSULE_CAPSULE_COLLIDER 1
+//#define ZERO_MARGIN
+
+#include "btConvexConvexAlgorithm.h"
+
+//#include <stdio.h>
+#include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseInterface.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+
+
+
+#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+#include "BulletCollision/CollisionDispatch/btManifoldResult.h"
+
+#include "BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h"
+#include "BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h"
+
+
+
+#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+
+#include "BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h"
+
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpa2.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.h"
+
+
+///////////
+
+
+
+static SIMD_FORCE_INLINE void segmentsClosestPoints(
+	btVector3& ptsVector,
+	btVector3& offsetA,
+	btVector3& offsetB,
+	btScalar& tA, btScalar& tB,
+	const btVector3& translation,
+	const btVector3& dirA, btScalar hlenA,
+	const btVector3& dirB, btScalar hlenB )
+{
+	// compute the parameters of the closest points on each line segment
+
+	btScalar dirA_dot_dirB = btDot(dirA,dirB);
+	btScalar dirA_dot_trans = btDot(dirA,translation);
+	btScalar dirB_dot_trans = btDot(dirB,translation);
+
+	btScalar denom = 1.0f - dirA_dot_dirB * dirA_dot_dirB;
+
+	if ( denom == 0.0f ) {
+		tA = 0.0f;
+	} else {
+		tA = ( dirA_dot_trans - dirB_dot_trans * dirA_dot_dirB ) / denom;
+		if ( tA < -hlenA )
+			tA = -hlenA;
+		else if ( tA > hlenA )
+			tA = hlenA;
+	}
+
+	tB = tA * dirA_dot_dirB - dirB_dot_trans;
+
+	if ( tB < -hlenB ) {
+		tB = -hlenB;
+		tA = tB * dirA_dot_dirB + dirA_dot_trans;
+
+		if ( tA < -hlenA )
+			tA = -hlenA;
+		else if ( tA > hlenA )
+			tA = hlenA;
+	} else if ( tB > hlenB ) {
+		tB = hlenB;
+		tA = tB * dirA_dot_dirB + dirA_dot_trans;
+
+		if ( tA < -hlenA )
+			tA = -hlenA;
+		else if ( tA > hlenA )
+			tA = hlenA;
+	}
+
+	// compute the closest points relative to segment centers.
+
+	offsetA = dirA * tA;
+	offsetB = dirB * tB;
+
+	ptsVector = translation - offsetA + offsetB;
+}
+
+
+static SIMD_FORCE_INLINE btScalar capsuleCapsuleDistance(
+	btVector3& normalOnB,
+	btVector3& pointOnB,
+	btScalar capsuleLengthA,
+	btScalar	capsuleRadiusA,
+	btScalar capsuleLengthB,
+	btScalar	capsuleRadiusB,
+	int capsuleAxisA,
+	int capsuleAxisB,
+	const btTransform& transformA,
+	const btTransform& transformB,
+	btScalar distanceThreshold )
+{
+	btVector3 directionA = transformA.getBasis().getColumn(capsuleAxisA);
+	btVector3 translationA = transformA.getOrigin();
+	btVector3 directionB = transformB.getBasis().getColumn(capsuleAxisB);
+	btVector3 translationB = transformB.getOrigin();
+
+	// translation between centers
+
+	btVector3 translation = translationB - translationA;
+
+	// compute the closest points of the capsule line segments
+
+	btVector3 ptsVector;           // the vector between the closest points
+	
+	btVector3 offsetA, offsetB;    // offsets from segment centers to their closest points
+	btScalar tA, tB;              // parameters on line segment
+
+	segmentsClosestPoints( ptsVector, offsetA, offsetB, tA, tB, translation,
+						   directionA, capsuleLengthA, directionB, capsuleLengthB );
+
+	btScalar distance = ptsVector.length() - capsuleRadiusA - capsuleRadiusB;
+
+	if ( distance > distanceThreshold )
+		return distance;
+
+	btScalar lenSqr = ptsVector.length2();
+	if (lenSqr<= (SIMD_EPSILON*SIMD_EPSILON))
+	{
+		//degenerate case where 2 capsules are likely at the same location: take a vector tangential to 'directionA'
+		btVector3 q;
+		btPlaneSpace1(directionA,normalOnB,q);
+	} else
+	{
+		// compute the contact normal
+		normalOnB = ptsVector*-btRecipSqrt(lenSqr);
+	}
+	pointOnB = transformB.getOrigin()+offsetB + normalOnB * capsuleRadiusB;
+
+	return distance;
+}
+
+
+
+
+
+
+
+//////////
+
+
+
+
+
+btConvexConvexAlgorithm::CreateFunc::CreateFunc(btSimplexSolverInterface*			simplexSolver, btConvexPenetrationDepthSolver* pdSolver)
+{
+	m_numPerturbationIterations = 0;
+	m_minimumPointsPerturbationThreshold = 3;
+	m_simplexSolver = simplexSolver;
+	m_pdSolver = pdSolver;
+}
+
+btConvexConvexAlgorithm::CreateFunc::~CreateFunc() 
+{ 
+}
+
+btConvexConvexAlgorithm::btConvexConvexAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver,int numPerturbationIterations, int minimumPointsPerturbationThreshold)
+: btActivatingCollisionAlgorithm(ci,body0,body1),
+m_simplexSolver(simplexSolver),
+m_pdSolver(pdSolver),
+m_ownManifold (false),
+m_manifoldPtr(mf),
+m_lowLevelOfDetail(false),
+#ifdef USE_SEPDISTANCE_UTIL2
+m_sepDistance((static_cast<btConvexShape*>(body0->getCollisionShape()))->getAngularMotionDisc(),
+			  (static_cast<btConvexShape*>(body1->getCollisionShape()))->getAngularMotionDisc()),
+#endif
+m_numPerturbationIterations(numPerturbationIterations),
+m_minimumPointsPerturbationThreshold(minimumPointsPerturbationThreshold)
+{
+	(void)body0;
+	(void)body1;
+}
+
+
+
+
+btConvexConvexAlgorithm::~btConvexConvexAlgorithm()
+{
+	if (m_ownManifold)
+	{
+		if (m_manifoldPtr)
+			m_dispatcher->releaseManifold(m_manifoldPtr);
+	}
+}
+
+void	btConvexConvexAlgorithm ::setLowLevelOfDetail(bool useLowLevel)
+{
+	m_lowLevelOfDetail = useLowLevel;
+}
+
+
+struct btPerturbedContactResult : public btManifoldResult
+{
+	btManifoldResult* m_originalManifoldResult;
+	btTransform m_transformA;
+	btTransform m_transformB;
+	btTransform	m_unPerturbedTransform;
+	bool	m_perturbA;
+	btIDebugDraw*	m_debugDrawer;
+
+
+	btPerturbedContactResult(btManifoldResult* originalResult,const btTransform& transformA,const btTransform& transformB,const btTransform& unPerturbedTransform,bool perturbA,btIDebugDraw* debugDrawer)
+		:m_originalManifoldResult(originalResult),
+		m_transformA(transformA),
+		m_transformB(transformB),
+		m_unPerturbedTransform(unPerturbedTransform),
+		m_perturbA(perturbA),
+		m_debugDrawer(debugDrawer)
+	{
+	}
+	virtual ~ btPerturbedContactResult()
+	{
+	}
+
+	virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar orgDepth)
+	{
+		btVector3 endPt,startPt;
+		btScalar newDepth;
+		btVector3 newNormal;
+
+		if (m_perturbA)
+		{
+			btVector3 endPtOrg = pointInWorld + normalOnBInWorld*orgDepth;
+			endPt = (m_unPerturbedTransform*m_transformA.inverse())(endPtOrg);
+			newDepth = (endPt -  pointInWorld).dot(normalOnBInWorld);
+			startPt = endPt+normalOnBInWorld*newDepth;
+		} else
+		{
+			endPt = pointInWorld + normalOnBInWorld*orgDepth;
+			startPt = (m_unPerturbedTransform*m_transformB.inverse())(pointInWorld);
+			newDepth = (endPt -  startPt).dot(normalOnBInWorld);
+			
+		}
+
+//#define DEBUG_CONTACTS 1
+#ifdef DEBUG_CONTACTS
+		m_debugDrawer->drawLine(startPt,endPt,btVector3(1,0,0));
+		m_debugDrawer->drawSphere(startPt,0.05,btVector3(0,1,0));
+		m_debugDrawer->drawSphere(endPt,0.05,btVector3(0,0,1));
+#endif //DEBUG_CONTACTS
+
+		
+		m_originalManifoldResult->addContactPoint(normalOnBInWorld,startPt,newDepth);
+	}
+
+};
+
+extern btScalar gContactBreakingThreshold;
+
+
+//
+// Convex-Convex collision algorithm
+//
+void btConvexConvexAlgorithm ::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+
+	if (!m_manifoldPtr)
+	{
+		//swapped?
+		m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
+		m_ownManifold = true;
+	}
+	resultOut->setPersistentManifold(m_manifoldPtr);
+
+	//comment-out next line to test multi-contact generation
+	//resultOut->getPersistentManifold()->clearManifold();
+	
+
+	btConvexShape* min0 = static_cast<btConvexShape*>(body0->getCollisionShape());
+	btConvexShape* min1 = static_cast<btConvexShape*>(body1->getCollisionShape());
+
+	btVector3  normalOnB;
+		btVector3  pointOnBWorld;
+#ifndef BT_DISABLE_CAPSULE_CAPSULE_COLLIDER
+	if ((min0->getShapeType() == CAPSULE_SHAPE_PROXYTYPE) && (min1->getShapeType() == CAPSULE_SHAPE_PROXYTYPE))
+	{
+		btCapsuleShape* capsuleA = (btCapsuleShape*) min0;
+		btCapsuleShape* capsuleB = (btCapsuleShape*) min1;
+		btVector3 localScalingA = capsuleA->getLocalScaling();
+		btVector3 localScalingB = capsuleB->getLocalScaling();
+		
+		btScalar threshold = m_manifoldPtr->getContactBreakingThreshold();
+
+		btScalar dist = capsuleCapsuleDistance(normalOnB,	pointOnBWorld,capsuleA->getHalfHeight(),capsuleA->getRadius(),
+			capsuleB->getHalfHeight(),capsuleB->getRadius(),capsuleA->getUpAxis(),capsuleB->getUpAxis(),
+			body0->getWorldTransform(),body1->getWorldTransform(),threshold);
+
+		if (dist<threshold)
+		{
+			btAssert(normalOnB.length2()>=(SIMD_EPSILON*SIMD_EPSILON));
+			resultOut->addContactPoint(normalOnB,pointOnBWorld,dist);	
+		}
+		resultOut->refreshContactPoints();
+		return;
+	}
+#endif //BT_DISABLE_CAPSULE_CAPSULE_COLLIDER
+
+
+
+
+#ifdef USE_SEPDISTANCE_UTIL2
+	if (dispatchInfo.m_useConvexConservativeDistanceUtil)
+	{
+		m_sepDistance.updateSeparatingDistance(body0->getWorldTransform(),body1->getWorldTransform());
+	}
+
+	if (!dispatchInfo.m_useConvexConservativeDistanceUtil || m_sepDistance.getConservativeSeparatingDistance()<=0.f)
+#endif //USE_SEPDISTANCE_UTIL2
+
+	{
+
+	
+	btGjkPairDetector::ClosestPointInput input;
+
+	btGjkPairDetector	gjkPairDetector(min0,min1,m_simplexSolver,m_pdSolver);
+	//TODO: if (dispatchInfo.m_useContinuous)
+	gjkPairDetector.setMinkowskiA(min0);
+	gjkPairDetector.setMinkowskiB(min1);
+
+#ifdef USE_SEPDISTANCE_UTIL2
+	if (dispatchInfo.m_useConvexConservativeDistanceUtil)
+	{
+		input.m_maximumDistanceSquared = BT_LARGE_FLOAT;
+	} else
+#endif //USE_SEPDISTANCE_UTIL2
+	{
+		//if (dispatchInfo.m_convexMaxDistanceUseCPT)
+		//{
+		//	input.m_maximumDistanceSquared = min0->getMargin() + min1->getMargin() + m_manifoldPtr->getContactProcessingThreshold();
+		//} else
+		//{
+		input.m_maximumDistanceSquared = min0->getMargin() + min1->getMargin() + m_manifoldPtr->getContactBreakingThreshold();
+//		}
+
+		input.m_maximumDistanceSquared*= input.m_maximumDistanceSquared;
+	}
+
+	input.m_stackAlloc = dispatchInfo.m_stackAllocator;
+	input.m_transformA = body0->getWorldTransform();
+	input.m_transformB = body1->getWorldTransform();
+
+
+
+	
+
+#ifdef USE_SEPDISTANCE_UTIL2
+	btScalar sepDist = 0.f;
+	if (dispatchInfo.m_useConvexConservativeDistanceUtil)
+	{
+		sepDist = gjkPairDetector.getCachedSeparatingDistance();
+		if (sepDist>SIMD_EPSILON)
+		{
+			sepDist += dispatchInfo.m_convexConservativeDistanceThreshold;
+			//now perturbe directions to get multiple contact points
+			
+		}
+	}
+#endif //USE_SEPDISTANCE_UTIL2
+
+	if (min0->isPolyhedral() && min1->isPolyhedral())
+	{
+
+
+		struct btDummyResult : public btDiscreteCollisionDetectorInterface::Result
+		{
+			virtual void setShapeIdentifiersA(int partId0,int index0){}
+			virtual void setShapeIdentifiersB(int partId1,int index1){}
+			virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth) 
+			{
+			}
+		};
+		
+		btDummyResult dummy;
+
+
+		btPolyhedralConvexShape* polyhedronA = (btPolyhedralConvexShape*) min0;
+		btPolyhedralConvexShape* polyhedronB = (btPolyhedralConvexShape*) min1;
+		if (polyhedronA->getConvexPolyhedron() && polyhedronB->getConvexPolyhedron())
+		{
+
+
+			
+
+			btScalar threshold = m_manifoldPtr->getContactBreakingThreshold();
+
+			btScalar minDist = -1e30f;
+			btVector3 sepNormalWorldSpace;
+			bool foundSepAxis  = true;
+
+			if (dispatchInfo.m_enableSatConvex)
+			{
+				foundSepAxis = btPolyhedralContactClipping::findSeparatingAxis(
+					*polyhedronA->getConvexPolyhedron(), *polyhedronB->getConvexPolyhedron(),
+					body0->getWorldTransform(), 
+					body1->getWorldTransform(),
+					sepNormalWorldSpace);
+			} else
+			{
+#ifdef ZERO_MARGIN
+				gjkPairDetector.setIgnoreMargin(true);
+				gjkPairDetector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
+#else
+				//gjkPairDetector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
+				gjkPairDetector.getClosestPoints(input,dummy,dispatchInfo.m_debugDraw);
+#endif //ZERO_MARGIN
+				btScalar l2 = gjkPairDetector.getCachedSeparatingAxis().length2();
+				if (l2>SIMD_EPSILON)
+				{
+					sepNormalWorldSpace = gjkPairDetector.getCachedSeparatingAxis()*(1.f/l2);
+					//minDist = -1e30f;//gjkPairDetector.getCachedSeparatingDistance();
+					minDist = gjkPairDetector.getCachedSeparatingDistance()-min0->getMargin()-min1->getMargin();
+	
+#ifdef ZERO_MARGIN
+					foundSepAxis = true;//gjkPairDetector.getCachedSeparatingDistance()<0.f;
+#else
+					foundSepAxis = gjkPairDetector.getCachedSeparatingDistance()<(min0->getMargin()+min1->getMargin());
+#endif
+				}
+			}
+			if (foundSepAxis)
+			{
+//				printf("sepNormalWorldSpace=%f,%f,%f\n",sepNormalWorldSpace.getX(),sepNormalWorldSpace.getY(),sepNormalWorldSpace.getZ());
+
+				btPolyhedralContactClipping::clipHullAgainstHull(sepNormalWorldSpace, *polyhedronA->getConvexPolyhedron(), *polyhedronB->getConvexPolyhedron(),
+					body0->getWorldTransform(), 
+					body1->getWorldTransform(), minDist-threshold, threshold, *resultOut);
+ 				
+			}
+			if (m_ownManifold)
+			{
+				resultOut->refreshContactPoints();
+			}
+			return;
+
+		} else
+		{
+			//we can also deal with convex versus triangle (without connectivity data)
+			if (polyhedronA->getConvexPolyhedron() && polyhedronB->getShapeType()==TRIANGLE_SHAPE_PROXYTYPE)
+			{
+
+				btVertexArray vertices;
+				btTriangleShape* tri = (btTriangleShape*)polyhedronB;
+				vertices.push_back(	body1->getWorldTransform()*tri->m_vertices1[0]);
+				vertices.push_back(	body1->getWorldTransform()*tri->m_vertices1[1]);
+				vertices.push_back(	body1->getWorldTransform()*tri->m_vertices1[2]);
+				
+				//tri->initializePolyhedralFeatures();
+
+				btScalar threshold = m_manifoldPtr->getContactBreakingThreshold();
+
+				btVector3 sepNormalWorldSpace;
+				btScalar minDist =-1e30f;
+				btScalar maxDist = threshold;
+				
+				bool foundSepAxis = false;
+				if (0)
+				{
+					polyhedronB->initializePolyhedralFeatures();
+					 foundSepAxis = btPolyhedralContactClipping::findSeparatingAxis(
+					*polyhedronA->getConvexPolyhedron(), *polyhedronB->getConvexPolyhedron(),
+					body0->getWorldTransform(), 
+					body1->getWorldTransform(),
+					sepNormalWorldSpace);
+				//	 printf("sepNormalWorldSpace=%f,%f,%f\n",sepNormalWorldSpace.getX(),sepNormalWorldSpace.getY(),sepNormalWorldSpace.getZ());
+
+				} else
+				{
+#ifdef ZERO_MARGIN
+					gjkPairDetector.setIgnoreMargin(true);
+					gjkPairDetector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
+#else
+					gjkPairDetector.getClosestPoints(input,dummy,dispatchInfo.m_debugDraw);
+#endif//ZERO_MARGIN
+					
+					btScalar l2 = gjkPairDetector.getCachedSeparatingAxis().length2();
+					if (l2>SIMD_EPSILON)
+					{
+						sepNormalWorldSpace = gjkPairDetector.getCachedSeparatingAxis()*(1.f/l2);
+						//minDist = gjkPairDetector.getCachedSeparatingDistance();
+						//maxDist = threshold;
+						minDist = gjkPairDetector.getCachedSeparatingDistance()-min0->getMargin()-min1->getMargin();
+						foundSepAxis = true;
+					}
+				}
+
+				
+			if (foundSepAxis)
+			{
+				btPolyhedralContactClipping::clipFaceAgainstHull(sepNormalWorldSpace, *polyhedronA->getConvexPolyhedron(), 
+					body0->getWorldTransform(), vertices, minDist-threshold, maxDist, *resultOut);
+			}
+				
+				
+				if (m_ownManifold)
+				{
+					resultOut->refreshContactPoints();
+				}
+				
+				return;
+			}
+			
+		}
+
+
+	}
+	
+	gjkPairDetector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
+
+	//now perform 'm_numPerturbationIterations' collision queries with the perturbated collision objects
+	
+	//perform perturbation when more then 'm_minimumPointsPerturbationThreshold' points
+	if (m_numPerturbationIterations && resultOut->getPersistentManifold()->getNumContacts() < m_minimumPointsPerturbationThreshold)
+	{
+		
+		int i;
+		btVector3 v0,v1;
+		btVector3 sepNormalWorldSpace;
+		btScalar l2 = gjkPairDetector.getCachedSeparatingAxis().length2();
+	
+		if (l2>SIMD_EPSILON)
+		{
+			sepNormalWorldSpace = gjkPairDetector.getCachedSeparatingAxis()*(1.f/l2);
+			
+			btPlaneSpace1(sepNormalWorldSpace,v0,v1);
+
+
+			bool perturbeA = true;
+			const btScalar angleLimit = 0.125f * SIMD_PI;
+			btScalar perturbeAngle;
+			btScalar radiusA = min0->getAngularMotionDisc();
+			btScalar radiusB = min1->getAngularMotionDisc();
+			if (radiusA < radiusB)
+			{
+				perturbeAngle = gContactBreakingThreshold /radiusA;
+				perturbeA = true;
+			} else
+			{
+				perturbeAngle = gContactBreakingThreshold / radiusB;
+				perturbeA = false;
+			}
+			if ( perturbeAngle > angleLimit ) 
+					perturbeAngle = angleLimit;
+
+			btTransform unPerturbedTransform;
+			if (perturbeA)
+			{
+				unPerturbedTransform = input.m_transformA;
+			} else
+			{
+				unPerturbedTransform = input.m_transformB;
+			}
+			
+			for ( i=0;i<m_numPerturbationIterations;i++)
+			{
+				if (v0.length2()>SIMD_EPSILON)
+				{
+				btQuaternion perturbeRot(v0,perturbeAngle);
+				btScalar iterationAngle = i*(SIMD_2_PI/btScalar(m_numPerturbationIterations));
+				btQuaternion rotq(sepNormalWorldSpace,iterationAngle);
+				
+				
+				if (perturbeA)
+				{
+					input.m_transformA.setBasis(  btMatrix3x3(rotq.inverse()*perturbeRot*rotq)*body0->getWorldTransform().getBasis());
+					input.m_transformB = body1->getWorldTransform();
+	#ifdef DEBUG_CONTACTS
+					dispatchInfo.m_debugDraw->drawTransform(input.m_transformA,10.0);
+	#endif //DEBUG_CONTACTS
+				} else
+				{
+					input.m_transformA = body0->getWorldTransform();
+					input.m_transformB.setBasis( btMatrix3x3(rotq.inverse()*perturbeRot*rotq)*body1->getWorldTransform().getBasis());
+	#ifdef DEBUG_CONTACTS
+					dispatchInfo.m_debugDraw->drawTransform(input.m_transformB,10.0);
+	#endif
+				}
+				
+				btPerturbedContactResult perturbedResultOut(resultOut,input.m_transformA,input.m_transformB,unPerturbedTransform,perturbeA,dispatchInfo.m_debugDraw);
+				gjkPairDetector.getClosestPoints(input,perturbedResultOut,dispatchInfo.m_debugDraw);
+				}
+			}
+		}
+	}
+
+	
+
+#ifdef USE_SEPDISTANCE_UTIL2
+	if (dispatchInfo.m_useConvexConservativeDistanceUtil && (sepDist>SIMD_EPSILON))
+	{
+		m_sepDistance.initSeparatingDistance(gjkPairDetector.getCachedSeparatingAxis(),sepDist,body0->getWorldTransform(),body1->getWorldTransform());
+	}
+#endif //USE_SEPDISTANCE_UTIL2
+
+
+	}
+
+	if (m_ownManifold)
+	{
+		resultOut->refreshContactPoints();
+	}
+
+}
+
+
+
+bool disableCcd = false;
+btScalar	btConvexConvexAlgorithm::calculateTimeOfImpact(btCollisionObject* col0,btCollisionObject* col1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)resultOut;
+	(void)dispatchInfo;
+	///Rather then checking ALL pairs, only calculate TOI when motion exceeds threshold
+    
+	///Linear motion for one of objects needs to exceed m_ccdSquareMotionThreshold
+	///col0->m_worldTransform,
+	btScalar resultFraction = btScalar(1.);
+
+
+	btScalar squareMot0 = (col0->getInterpolationWorldTransform().getOrigin() - col0->getWorldTransform().getOrigin()).length2();
+	btScalar squareMot1 = (col1->getInterpolationWorldTransform().getOrigin() - col1->getWorldTransform().getOrigin()).length2();
+    
+	if (squareMot0 < col0->getCcdSquareMotionThreshold() &&
+		squareMot1 < col1->getCcdSquareMotionThreshold())
+		return resultFraction;
+
+	if (disableCcd)
+		return btScalar(1.);
+
+
+	//An adhoc way of testing the Continuous Collision Detection algorithms
+	//One object is approximated as a sphere, to simplify things
+	//Starting in penetration should report no time of impact
+	//For proper CCD, better accuracy and handling of 'allowed' penetration should be added
+	//also the mainloop of the physics should have a kind of toi queue (something like Brian Mirtich's application of Timewarp for Rigidbodies)
+
+		
+	/// Convex0 against sphere for Convex1
+	{
+		btConvexShape* convex0 = static_cast<btConvexShape*>(col0->getCollisionShape());
+
+		btSphereShape	sphere1(col1->getCcdSweptSphereRadius()); //todo: allow non-zero sphere sizes, for better approximation
+		btConvexCast::CastResult result;
+		btVoronoiSimplexSolver voronoiSimplex;
+		//SubsimplexConvexCast ccd0(&sphere,min0,&voronoiSimplex);
+		///Simplification, one object is simplified as a sphere
+		btGjkConvexCast ccd1( convex0 ,&sphere1,&voronoiSimplex);
+		//ContinuousConvexCollision ccd(min0,min1,&voronoiSimplex,0);
+		if (ccd1.calcTimeOfImpact(col0->getWorldTransform(),col0->getInterpolationWorldTransform(),
+			col1->getWorldTransform(),col1->getInterpolationWorldTransform(),result))
+		{
+		
+			//store result.m_fraction in both bodies
+		
+			if (col0->getHitFraction()> result.m_fraction)
+				col0->setHitFraction( result.m_fraction );
+
+			if (col1->getHitFraction() > result.m_fraction)
+				col1->setHitFraction( result.m_fraction);
+
+			if (resultFraction > result.m_fraction)
+				resultFraction = result.m_fraction;
+
+		}
+		
+		
+
+
+	}
+
+	/// Sphere (for convex0) against Convex1
+	{
+		btConvexShape* convex1 = static_cast<btConvexShape*>(col1->getCollisionShape());
+
+		btSphereShape	sphere0(col0->getCcdSweptSphereRadius()); //todo: allow non-zero sphere sizes, for better approximation
+		btConvexCast::CastResult result;
+		btVoronoiSimplexSolver voronoiSimplex;
+		//SubsimplexConvexCast ccd0(&sphere,min0,&voronoiSimplex);
+		///Simplification, one object is simplified as a sphere
+		btGjkConvexCast ccd1(&sphere0,convex1,&voronoiSimplex);
+		//ContinuousConvexCollision ccd(min0,min1,&voronoiSimplex,0);
+		if (ccd1.calcTimeOfImpact(col0->getWorldTransform(),col0->getInterpolationWorldTransform(),
+			col1->getWorldTransform(),col1->getInterpolationWorldTransform(),result))
+		{
+		
+			//store result.m_fraction in both bodies
+		
+			if (col0->getHitFraction()	> result.m_fraction)
+				col0->setHitFraction( result.m_fraction);
+
+			if (col1->getHitFraction() > result.m_fraction)
+				col1->setHitFraction( result.m_fraction);
+
+			if (resultFraction > result.m_fraction)
+				resultFraction = result.m_fraction;
+
+		}
+	}
+	
+	return resultFraction;
+
+}
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h
new file mode 100644
index 00000000..4380b80e
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h
@@ -0,0 +1,109 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONVEX_CONVEX_ALGORITHM_H
+#define BT_CONVEX_CONVEX_ALGORITHM_H
+
+#include "btActivatingCollisionAlgorithm.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
+#include "btCollisionCreateFunc.h"
+#include "btCollisionDispatcher.h"
+#include "LinearMath/btTransformUtil.h" //for btConvexSeparatingDistanceUtil
+
+class btConvexPenetrationDepthSolver;
+
+///Enabling USE_SEPDISTANCE_UTIL2 requires 100% reliable distance computation. However, when using large size ratios GJK can be imprecise
+///so the distance is not conservative. In that case, enabling this USE_SEPDISTANCE_UTIL2 would result in failing/missing collisions.
+///Either improve GJK for large size ratios (testing a 100 units versus a 0.1 unit object) or only enable the util
+///for certain pairs that have a small size ratio
+
+//#define USE_SEPDISTANCE_UTIL2 1
+
+///The convexConvexAlgorithm collision algorithm implements time of impact, convex closest points and penetration depth calculations between two convex objects.
+///Multiple contact points are calculated by perturbing the orientation of the smallest object orthogonal to the separating normal.
+///This idea was described by Gino van den Bergen in this forum topic http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=4&t=288&p=888#p888
+class btConvexConvexAlgorithm : public btActivatingCollisionAlgorithm
+{
+#ifdef USE_SEPDISTANCE_UTIL2
+	btConvexSeparatingDistanceUtil	m_sepDistance;
+#endif
+	btSimplexSolverInterface*		m_simplexSolver;
+	btConvexPenetrationDepthSolver* m_pdSolver;
+
+	
+	bool	m_ownManifold;
+	btPersistentManifold*	m_manifoldPtr;
+	bool			m_lowLevelOfDetail;
+	
+	int m_numPerturbationIterations;
+	int m_minimumPointsPerturbationThreshold;
+
+
+	///cache separating vector to speedup collision detection
+	
+
+public:
+
+	btConvexConvexAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1, btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver, int numPerturbationIterations, int minimumPointsPerturbationThreshold);
+
+
+	virtual ~btConvexConvexAlgorithm();
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		///should we use m_ownManifold to avoid adding duplicates?
+		if (m_manifoldPtr && m_ownManifold)
+			manifoldArray.push_back(m_manifoldPtr);
+	}
+
+
+	void	setLowLevelOfDetail(bool useLowLevel);
+
+
+	const btPersistentManifold*	getManifold()
+	{
+		return m_manifoldPtr;
+	}
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+
+		btConvexPenetrationDepthSolver*		m_pdSolver;
+		btSimplexSolverInterface*			m_simplexSolver;
+		int m_numPerturbationIterations;
+		int m_minimumPointsPerturbationThreshold;
+
+		CreateFunc(btSimplexSolverInterface*			simplexSolver, btConvexPenetrationDepthSolver* pdSolver);
+		
+		virtual ~CreateFunc();
+
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btConvexConvexAlgorithm));
+			return new(mem) btConvexConvexAlgorithm(ci.m_manifold,ci,body0,body1,m_simplexSolver,m_pdSolver,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
+		}
+	};
+
+
+};
+
+#endif //BT_CONVEX_CONVEX_ALGORITHM_H
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.cpp
new file mode 100644
index 00000000..b2e9bfaf
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.cpp
@@ -0,0 +1,173 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btConvexPlaneCollisionAlgorithm.h"
+
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
+
+//#include <stdio.h>
+
+btConvexPlaneCollisionAlgorithm::btConvexPlaneCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1, bool isSwapped, int numPerturbationIterations,int minimumPointsPerturbationThreshold)
+: btCollisionAlgorithm(ci),
+m_ownManifold(false),
+m_manifoldPtr(mf),
+m_isSwapped(isSwapped),
+m_numPerturbationIterations(numPerturbationIterations),
+m_minimumPointsPerturbationThreshold(minimumPointsPerturbationThreshold)
+{
+	btCollisionObject* convexObj = m_isSwapped? col1 : col0;
+	btCollisionObject* planeObj = m_isSwapped? col0 : col1;
+
+	if (!m_manifoldPtr && m_dispatcher->needsCollision(convexObj,planeObj))
+	{
+		m_manifoldPtr = m_dispatcher->getNewManifold(convexObj,planeObj);
+		m_ownManifold = true;
+	}
+}
+
+
+btConvexPlaneCollisionAlgorithm::~btConvexPlaneCollisionAlgorithm()
+{
+	if (m_ownManifold)
+	{
+		if (m_manifoldPtr)
+			m_dispatcher->releaseManifold(m_manifoldPtr);
+	}
+}
+
+void btConvexPlaneCollisionAlgorithm::collideSingleContact (const btQuaternion& perturbeRot, btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+    btCollisionObject* convexObj = m_isSwapped? body1 : body0;
+	btCollisionObject* planeObj = m_isSwapped? body0: body1;
+
+	btConvexShape* convexShape = (btConvexShape*) convexObj->getCollisionShape();
+	btStaticPlaneShape* planeShape = (btStaticPlaneShape*) planeObj->getCollisionShape();
+
+    bool hasCollision = false;
+	const btVector3& planeNormal = planeShape->getPlaneNormal();
+	const btScalar& planeConstant = planeShape->getPlaneConstant();
+	
+	btTransform convexWorldTransform = convexObj->getWorldTransform();
+	btTransform convexInPlaneTrans;
+	convexInPlaneTrans= planeObj->getWorldTransform().inverse() * convexWorldTransform;
+	//now perturbe the convex-world transform
+	convexWorldTransform.getBasis()*=btMatrix3x3(perturbeRot);
+	btTransform planeInConvex;
+	planeInConvex= convexWorldTransform.inverse() * planeObj->getWorldTransform();
+	
+	btVector3 vtx = convexShape->localGetSupportingVertex(planeInConvex.getBasis()*-planeNormal);
+
+	btVector3 vtxInPlane = convexInPlaneTrans(vtx);
+	btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant);
+
+	btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal;
+	btVector3 vtxInPlaneWorld = planeObj->getWorldTransform() * vtxInPlaneProjected;
+
+	hasCollision = distance < m_manifoldPtr->getContactBreakingThreshold();
+	resultOut->setPersistentManifold(m_manifoldPtr);
+	if (hasCollision)
+	{
+		/// report a contact. internally this will be kept persistent, and contact reduction is done
+		btVector3 normalOnSurfaceB = planeObj->getWorldTransform().getBasis() * planeNormal;
+		btVector3 pOnB = vtxInPlaneWorld;
+		resultOut->addContactPoint(normalOnSurfaceB,pOnB,distance);
+	}
+}
+
+
+void btConvexPlaneCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)dispatchInfo;
+	if (!m_manifoldPtr)
+		return;
+
+	btCollisionObject* convexObj = m_isSwapped? body1 : body0;
+	btCollisionObject* planeObj = m_isSwapped? body0: body1;
+
+	btConvexShape* convexShape = (btConvexShape*) convexObj->getCollisionShape();
+	btStaticPlaneShape* planeShape = (btStaticPlaneShape*) planeObj->getCollisionShape();
+
+	bool hasCollision = false;
+	const btVector3& planeNormal = planeShape->getPlaneNormal();
+	const btScalar& planeConstant = planeShape->getPlaneConstant();
+	btTransform planeInConvex;
+	planeInConvex= convexObj->getWorldTransform().inverse() * planeObj->getWorldTransform();
+	btTransform convexInPlaneTrans;
+	convexInPlaneTrans= planeObj->getWorldTransform().inverse() * convexObj->getWorldTransform();
+
+	btVector3 vtx = convexShape->localGetSupportingVertex(planeInConvex.getBasis()*-planeNormal);
+	btVector3 vtxInPlane = convexInPlaneTrans(vtx);
+	btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant);
+
+	btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal;
+	btVector3 vtxInPlaneWorld = planeObj->getWorldTransform() * vtxInPlaneProjected;
+
+	hasCollision = distance < m_manifoldPtr->getContactBreakingThreshold();
+	resultOut->setPersistentManifold(m_manifoldPtr);
+	if (hasCollision)
+	{
+		/// report a contact. internally this will be kept persistent, and contact reduction is done
+		btVector3 normalOnSurfaceB = planeObj->getWorldTransform().getBasis() * planeNormal;
+		btVector3 pOnB = vtxInPlaneWorld;
+		resultOut->addContactPoint(normalOnSurfaceB,pOnB,distance);
+	}
+
+	//the perturbation algorithm doesn't work well with implicit surfaces such as spheres, cylinder and cones:
+	//they keep on rolling forever because of the additional off-center contact points
+	//so only enable the feature for polyhedral shapes (btBoxShape, btConvexHullShape etc)
+	if (convexShape->isPolyhedral() && resultOut->getPersistentManifold()->getNumContacts()<m_minimumPointsPerturbationThreshold)
+	{
+		btVector3 v0,v1;
+		btPlaneSpace1(planeNormal,v0,v1);
+		//now perform 'm_numPerturbationIterations' collision queries with the perturbated collision objects
+
+		const btScalar angleLimit = 0.125f * SIMD_PI;
+		btScalar perturbeAngle;
+		btScalar radius = convexShape->getAngularMotionDisc();
+		perturbeAngle = gContactBreakingThreshold / radius;
+		if ( perturbeAngle > angleLimit ) 
+				perturbeAngle = angleLimit;
+
+		btQuaternion perturbeRot(v0,perturbeAngle);
+		for (int i=0;i<m_numPerturbationIterations;i++)
+		{
+			btScalar iterationAngle = i*(SIMD_2_PI/btScalar(m_numPerturbationIterations));
+			btQuaternion rotq(planeNormal,iterationAngle);
+			collideSingleContact(rotq.inverse()*perturbeRot*rotq,body0,body1,dispatchInfo,resultOut);
+		}
+	}
+
+	if (m_ownManifold)
+	{
+		if (m_manifoldPtr->getNumContacts())
+		{
+			resultOut->refreshContactPoints();
+		}
+	}
+}
+
+btScalar btConvexPlaneCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* col0,btCollisionObject* col1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)resultOut;
+	(void)dispatchInfo;
+	(void)col0;
+	(void)col1;
+
+	//not yet
+	return btScalar(1.);
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h
new file mode 100644
index 00000000..b9494f5a
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h
@@ -0,0 +1,84 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONVEX_PLANE_COLLISION_ALGORITHM_H
+#define BT_CONVEX_PLANE_COLLISION_ALGORITHM_H
+
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+class btPersistentManifold;
+#include "btCollisionDispatcher.h"
+
+#include "LinearMath/btVector3.h"
+
+/// btSphereBoxCollisionAlgorithm  provides sphere-box collision detection.
+/// Other features are frame-coherency (persistent data) and collision response.
+class btConvexPlaneCollisionAlgorithm : public btCollisionAlgorithm
+{
+	bool		m_ownManifold;
+	btPersistentManifold*	m_manifoldPtr;
+	bool		m_isSwapped;
+	int			m_numPerturbationIterations;
+	int			m_minimumPointsPerturbationThreshold;
+
+public:
+
+	btConvexPlaneCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1, bool isSwapped, int numPerturbationIterations,int minimumPointsPerturbationThreshold);
+
+	virtual ~btConvexPlaneCollisionAlgorithm();
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	void collideSingleContact (const btQuaternion& perturbeRot, btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		if (m_manifoldPtr && m_ownManifold)
+		{
+			manifoldArray.push_back(m_manifoldPtr);
+		}
+	}
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		int	m_numPerturbationIterations;
+		int m_minimumPointsPerturbationThreshold;
+			
+		CreateFunc() 
+			: m_numPerturbationIterations(1),
+			m_minimumPointsPerturbationThreshold(0)
+		{
+		}
+		
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btConvexPlaneCollisionAlgorithm));
+			if (!m_swapped)
+			{
+				return new(mem) btConvexPlaneCollisionAlgorithm(0,ci,body0,body1,false,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
+			} else
+			{
+				return new(mem) btConvexPlaneCollisionAlgorithm(0,ci,body0,body1,true,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
+			}
+		}
+	};
+
+};
+
+#endif //BT_CONVEX_PLANE_COLLISION_ALGORITHM_H
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.cpp b/src/bullet/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.cpp
new file mode 100644
index 00000000..7faee6fa
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.cpp
@@ -0,0 +1,309 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btDefaultCollisionConfiguration.h"
+
+#include "BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h"
+#ifdef USE_BUGGY_SPHERE_BOX_ALGORITHM
+#include "BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h"
+#endif //USE_BUGGY_SPHERE_BOX_ALGORITHM
+#include "BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
+
+
+
+#include "LinearMath/btStackAlloc.h"
+#include "LinearMath/btPoolAllocator.h"
+
+
+
+
+
+btDefaultCollisionConfiguration::btDefaultCollisionConfiguration(const btDefaultCollisionConstructionInfo& constructionInfo)
+//btDefaultCollisionConfiguration::btDefaultCollisionConfiguration(btStackAlloc*	stackAlloc,btPoolAllocator*	persistentManifoldPool,btPoolAllocator*	collisionAlgorithmPool)
+{
+
+	void* mem = btAlignedAlloc(sizeof(btVoronoiSimplexSolver),16);
+	m_simplexSolver = new (mem)btVoronoiSimplexSolver();
+
+	if (constructionInfo.m_useEpaPenetrationAlgorithm)
+	{
+		mem = btAlignedAlloc(sizeof(btGjkEpaPenetrationDepthSolver),16);
+		m_pdSolver = new (mem)btGjkEpaPenetrationDepthSolver;
+	}else
+	{
+		mem = btAlignedAlloc(sizeof(btMinkowskiPenetrationDepthSolver),16);
+		m_pdSolver = new (mem)btMinkowskiPenetrationDepthSolver;
+	}
+	
+	//default CreationFunctions, filling the m_doubleDispatch table
+	mem = btAlignedAlloc(sizeof(btConvexConvexAlgorithm::CreateFunc),16);
+	m_convexConvexCreateFunc = new(mem) btConvexConvexAlgorithm::CreateFunc(m_simplexSolver,m_pdSolver);
+	mem = btAlignedAlloc(sizeof(btConvexConcaveCollisionAlgorithm::CreateFunc),16);
+	m_convexConcaveCreateFunc = new (mem)btConvexConcaveCollisionAlgorithm::CreateFunc;
+	mem = btAlignedAlloc(sizeof(btConvexConcaveCollisionAlgorithm::CreateFunc),16);
+	m_swappedConvexConcaveCreateFunc = new (mem)btConvexConcaveCollisionAlgorithm::SwappedCreateFunc;
+	mem = btAlignedAlloc(sizeof(btCompoundCollisionAlgorithm::CreateFunc),16);
+	m_compoundCreateFunc = new (mem)btCompoundCollisionAlgorithm::CreateFunc;
+	mem = btAlignedAlloc(sizeof(btCompoundCollisionAlgorithm::SwappedCreateFunc),16);
+	m_swappedCompoundCreateFunc = new (mem)btCompoundCollisionAlgorithm::SwappedCreateFunc;
+	mem = btAlignedAlloc(sizeof(btEmptyAlgorithm::CreateFunc),16);
+	m_emptyCreateFunc = new(mem) btEmptyAlgorithm::CreateFunc;
+	
+	mem = btAlignedAlloc(sizeof(btSphereSphereCollisionAlgorithm::CreateFunc),16);
+	m_sphereSphereCF = new(mem) btSphereSphereCollisionAlgorithm::CreateFunc;
+#ifdef USE_BUGGY_SPHERE_BOX_ALGORITHM
+	mem = btAlignedAlloc(sizeof(btSphereBoxCollisionAlgorithm::CreateFunc),16);
+	m_sphereBoxCF = new(mem) btSphereBoxCollisionAlgorithm::CreateFunc;
+	mem = btAlignedAlloc(sizeof(btSphereBoxCollisionAlgorithm::CreateFunc),16);
+	m_boxSphereCF = new (mem)btSphereBoxCollisionAlgorithm::CreateFunc;
+	m_boxSphereCF->m_swapped = true;
+#endif //USE_BUGGY_SPHERE_BOX_ALGORITHM
+
+	mem = btAlignedAlloc(sizeof(btSphereTriangleCollisionAlgorithm::CreateFunc),16);
+	m_sphereTriangleCF = new (mem)btSphereTriangleCollisionAlgorithm::CreateFunc;
+	mem = btAlignedAlloc(sizeof(btSphereTriangleCollisionAlgorithm::CreateFunc),16);
+	m_triangleSphereCF = new (mem)btSphereTriangleCollisionAlgorithm::CreateFunc;
+	m_triangleSphereCF->m_swapped = true;
+	
+	mem = btAlignedAlloc(sizeof(btBoxBoxCollisionAlgorithm::CreateFunc),16);
+	m_boxBoxCF = new(mem)btBoxBoxCollisionAlgorithm::CreateFunc;
+
+	//convex versus plane
+	mem = btAlignedAlloc (sizeof(btConvexPlaneCollisionAlgorithm::CreateFunc),16);
+	m_convexPlaneCF = new (mem) btConvexPlaneCollisionAlgorithm::CreateFunc;
+	mem = btAlignedAlloc (sizeof(btConvexPlaneCollisionAlgorithm::CreateFunc),16);
+	m_planeConvexCF = new (mem) btConvexPlaneCollisionAlgorithm::CreateFunc;
+	m_planeConvexCF->m_swapped = true;
+	
+	///calculate maximum element size, big enough to fit any collision algorithm in the memory pool
+	int maxSize = sizeof(btConvexConvexAlgorithm);
+	int maxSize2 = sizeof(btConvexConcaveCollisionAlgorithm);
+	int maxSize3 = sizeof(btCompoundCollisionAlgorithm);
+	int sl = sizeof(btConvexSeparatingDistanceUtil);
+	sl = sizeof(btGjkPairDetector);
+	int	collisionAlgorithmMaxElementSize = btMax(maxSize,constructionInfo.m_customCollisionAlgorithmMaxElementSize);
+	collisionAlgorithmMaxElementSize = btMax(collisionAlgorithmMaxElementSize,maxSize2);
+	collisionAlgorithmMaxElementSize = btMax(collisionAlgorithmMaxElementSize,maxSize3);
+
+	if (constructionInfo.m_stackAlloc)
+	{
+		m_ownsStackAllocator = false;
+		this->m_stackAlloc = constructionInfo.m_stackAlloc;
+	} else
+	{
+		m_ownsStackAllocator = true;
+		void* mem = btAlignedAlloc(sizeof(btStackAlloc),16);
+		m_stackAlloc = new(mem)btStackAlloc(constructionInfo.m_defaultStackAllocatorSize);
+	}
+		
+	if (constructionInfo.m_persistentManifoldPool)
+	{
+		m_ownsPersistentManifoldPool = false;
+		m_persistentManifoldPool = constructionInfo.m_persistentManifoldPool;
+	} else
+	{
+		m_ownsPersistentManifoldPool = true;
+		void* mem = btAlignedAlloc(sizeof(btPoolAllocator),16);
+		m_persistentManifoldPool = new (mem) btPoolAllocator(sizeof(btPersistentManifold),constructionInfo.m_defaultMaxPersistentManifoldPoolSize);
+	}
+	
+	if (constructionInfo.m_collisionAlgorithmPool)
+	{
+		m_ownsCollisionAlgorithmPool = false;
+		m_collisionAlgorithmPool = constructionInfo.m_collisionAlgorithmPool;
+	} else
+	{
+		m_ownsCollisionAlgorithmPool = true;
+		void* mem = btAlignedAlloc(sizeof(btPoolAllocator),16);
+		m_collisionAlgorithmPool = new(mem) btPoolAllocator(collisionAlgorithmMaxElementSize,constructionInfo.m_defaultMaxCollisionAlgorithmPoolSize);
+	}
+
+
+}
+
+btDefaultCollisionConfiguration::~btDefaultCollisionConfiguration()
+{
+	if (m_ownsStackAllocator)
+	{
+		m_stackAlloc->destroy();
+		m_stackAlloc->~btStackAlloc();
+		btAlignedFree(m_stackAlloc);
+	}
+	if (m_ownsCollisionAlgorithmPool)
+	{
+		m_collisionAlgorithmPool->~btPoolAllocator();
+		btAlignedFree(m_collisionAlgorithmPool);
+	}
+	if (m_ownsPersistentManifoldPool)
+	{
+		m_persistentManifoldPool->~btPoolAllocator();
+		btAlignedFree(m_persistentManifoldPool);
+	}
+
+	m_convexConvexCreateFunc->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree(	m_convexConvexCreateFunc);
+
+	m_convexConcaveCreateFunc->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_convexConcaveCreateFunc);
+	m_swappedConvexConcaveCreateFunc->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_swappedConvexConcaveCreateFunc);
+
+	m_compoundCreateFunc->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_compoundCreateFunc);
+
+	m_swappedCompoundCreateFunc->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_swappedCompoundCreateFunc);
+
+	m_emptyCreateFunc->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_emptyCreateFunc);
+
+	m_sphereSphereCF->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_sphereSphereCF);
+
+#ifdef USE_BUGGY_SPHERE_BOX_ALGORITHM
+	m_sphereBoxCF->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_sphereBoxCF);
+	m_boxSphereCF->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_boxSphereCF);
+#endif //USE_BUGGY_SPHERE_BOX_ALGORITHM
+
+	m_sphereTriangleCF->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_sphereTriangleCF);
+	m_triangleSphereCF->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_triangleSphereCF);
+	m_boxBoxCF->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_boxBoxCF);
+
+	m_convexPlaneCF->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_convexPlaneCF);
+	m_planeConvexCF->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree( m_planeConvexCF);
+
+	m_simplexSolver->~btVoronoiSimplexSolver();
+	btAlignedFree(m_simplexSolver);
+
+	m_pdSolver->~btConvexPenetrationDepthSolver();
+	
+	btAlignedFree(m_pdSolver);
+
+
+}
+
+
+btCollisionAlgorithmCreateFunc* btDefaultCollisionConfiguration::getCollisionAlgorithmCreateFunc(int proxyType0,int proxyType1)
+{
+
+
+
+	if ((proxyType0 == SPHERE_SHAPE_PROXYTYPE) && (proxyType1==SPHERE_SHAPE_PROXYTYPE))
+	{
+		return	m_sphereSphereCF;
+	}
+#ifdef USE_BUGGY_SPHERE_BOX_ALGORITHM
+	if ((proxyType0 == SPHERE_SHAPE_PROXYTYPE) && (proxyType1==BOX_SHAPE_PROXYTYPE))
+	{
+		return	m_sphereBoxCF;
+	}
+
+	if ((proxyType0 == BOX_SHAPE_PROXYTYPE ) && (proxyType1==SPHERE_SHAPE_PROXYTYPE))
+	{
+		return	m_boxSphereCF;
+	}
+#endif //USE_BUGGY_SPHERE_BOX_ALGORITHM
+
+
+	if ((proxyType0 == SPHERE_SHAPE_PROXYTYPE ) && (proxyType1==TRIANGLE_SHAPE_PROXYTYPE))
+	{
+		return	m_sphereTriangleCF;
+	}
+
+	if ((proxyType0 == TRIANGLE_SHAPE_PROXYTYPE  ) && (proxyType1==SPHERE_SHAPE_PROXYTYPE))
+	{
+		return	m_triangleSphereCF;
+	} 
+
+	if ((proxyType0 == BOX_SHAPE_PROXYTYPE) && (proxyType1 == BOX_SHAPE_PROXYTYPE))
+	{
+		return m_boxBoxCF;
+	}
+	
+	if (btBroadphaseProxy::isConvex(proxyType0) && (proxyType1 == STATIC_PLANE_PROXYTYPE))
+	{
+		return m_convexPlaneCF;
+	}
+
+	if (btBroadphaseProxy::isConvex(proxyType1) && (proxyType0 == STATIC_PLANE_PROXYTYPE))
+	{
+		return m_planeConvexCF;
+	}
+	
+
+
+	if (btBroadphaseProxy::isConvex(proxyType0) && btBroadphaseProxy::isConvex(proxyType1))
+	{
+		return m_convexConvexCreateFunc;
+	}
+
+	if (btBroadphaseProxy::isConvex(proxyType0) && btBroadphaseProxy::isConcave(proxyType1))
+	{
+		return m_convexConcaveCreateFunc;
+	}
+
+	if (btBroadphaseProxy::isConvex(proxyType1) && btBroadphaseProxy::isConcave(proxyType0))
+	{
+		return m_swappedConvexConcaveCreateFunc;
+	}
+
+	if (btBroadphaseProxy::isCompound(proxyType0))
+	{
+		return m_compoundCreateFunc;
+	} else
+	{
+		if (btBroadphaseProxy::isCompound(proxyType1))
+		{
+			return m_swappedCompoundCreateFunc;
+		}
+	}
+
+	//failed to find an algorithm
+	return m_emptyCreateFunc;
+}
+
+void btDefaultCollisionConfiguration::setConvexConvexMultipointIterations(int numPerturbationIterations, int minimumPointsPerturbationThreshold)
+{
+	btConvexConvexAlgorithm::CreateFunc* convexConvex = (btConvexConvexAlgorithm::CreateFunc*) m_convexConvexCreateFunc;
+	convexConvex->m_numPerturbationIterations = numPerturbationIterations;
+	convexConvex->m_minimumPointsPerturbationThreshold = minimumPointsPerturbationThreshold;
+}
+
+void	btDefaultCollisionConfiguration::setPlaneConvexMultipointIterations(int numPerturbationIterations, int minimumPointsPerturbationThreshold)
+{
+	btConvexPlaneCollisionAlgorithm::CreateFunc* cpCF = (btConvexPlaneCollisionAlgorithm::CreateFunc*)m_convexPlaneCF;
+	cpCF->m_numPerturbationIterations = numPerturbationIterations;
+	cpCF->m_minimumPointsPerturbationThreshold = minimumPointsPerturbationThreshold;
+	
+	btConvexPlaneCollisionAlgorithm::CreateFunc* pcCF = (btConvexPlaneCollisionAlgorithm::CreateFunc*)m_planeConvexCF;
+	pcCF->m_numPerturbationIterations = numPerturbationIterations;
+	pcCF->m_minimumPointsPerturbationThreshold = minimumPointsPerturbationThreshold;
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h b/src/bullet/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h
new file mode 100644
index 00000000..81ed424a
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h
@@ -0,0 +1,137 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_DEFAULT_COLLISION_CONFIGURATION
+#define BT_DEFAULT_COLLISION_CONFIGURATION
+
+#include "btCollisionConfiguration.h"
+class btVoronoiSimplexSolver;
+class btConvexPenetrationDepthSolver;
+
+struct	btDefaultCollisionConstructionInfo
+{
+	btStackAlloc*		m_stackAlloc;
+	btPoolAllocator*	m_persistentManifoldPool;
+	btPoolAllocator*	m_collisionAlgorithmPool;
+	int					m_defaultMaxPersistentManifoldPoolSize;
+	int					m_defaultMaxCollisionAlgorithmPoolSize;
+	int					m_customCollisionAlgorithmMaxElementSize;
+	int					m_defaultStackAllocatorSize;
+	int					m_useEpaPenetrationAlgorithm;
+
+	btDefaultCollisionConstructionInfo()
+		:m_stackAlloc(0),
+		m_persistentManifoldPool(0),
+		m_collisionAlgorithmPool(0),
+		m_defaultMaxPersistentManifoldPoolSize(4096),
+		m_defaultMaxCollisionAlgorithmPoolSize(4096),
+		m_customCollisionAlgorithmMaxElementSize(0),
+		m_defaultStackAllocatorSize(0),
+		m_useEpaPenetrationAlgorithm(true)
+	{
+	}
+};
+
+
+
+///btCollisionConfiguration allows to configure Bullet collision detection
+///stack allocator, pool memory allocators
+///@todo: describe the meaning
+class	btDefaultCollisionConfiguration : public btCollisionConfiguration
+{
+
+protected:
+
+	int	m_persistentManifoldPoolSize;
+	
+	btStackAlloc*	m_stackAlloc;
+	bool	m_ownsStackAllocator;
+
+	btPoolAllocator*	m_persistentManifoldPool;
+	bool	m_ownsPersistentManifoldPool;
+
+
+	btPoolAllocator*	m_collisionAlgorithmPool;
+	bool	m_ownsCollisionAlgorithmPool;
+
+	//default simplex/penetration depth solvers
+	btVoronoiSimplexSolver*	m_simplexSolver;
+	btConvexPenetrationDepthSolver*	m_pdSolver;
+	
+	//default CreationFunctions, filling the m_doubleDispatch table
+	btCollisionAlgorithmCreateFunc*	m_convexConvexCreateFunc;
+	btCollisionAlgorithmCreateFunc*	m_convexConcaveCreateFunc;
+	btCollisionAlgorithmCreateFunc*	m_swappedConvexConcaveCreateFunc;
+	btCollisionAlgorithmCreateFunc*	m_compoundCreateFunc;
+	btCollisionAlgorithmCreateFunc*	m_swappedCompoundCreateFunc;
+	btCollisionAlgorithmCreateFunc* m_emptyCreateFunc;
+	btCollisionAlgorithmCreateFunc* m_sphereSphereCF;
+#ifdef USE_BUGGY_SPHERE_BOX_ALGORITHM
+	btCollisionAlgorithmCreateFunc* m_sphereBoxCF;
+	btCollisionAlgorithmCreateFunc* m_boxSphereCF;
+#endif //USE_BUGGY_SPHERE_BOX_ALGORITHM
+
+	btCollisionAlgorithmCreateFunc* m_boxBoxCF;
+	btCollisionAlgorithmCreateFunc*	m_sphereTriangleCF;
+	btCollisionAlgorithmCreateFunc*	m_triangleSphereCF;
+	btCollisionAlgorithmCreateFunc*	m_planeConvexCF;
+	btCollisionAlgorithmCreateFunc*	m_convexPlaneCF;
+	
+public:
+
+
+	btDefaultCollisionConfiguration(const btDefaultCollisionConstructionInfo& constructionInfo = btDefaultCollisionConstructionInfo());
+
+	virtual ~btDefaultCollisionConfiguration();
+
+		///memory pools
+	virtual btPoolAllocator* getPersistentManifoldPool()
+	{
+		return m_persistentManifoldPool;
+	}
+
+	virtual btPoolAllocator* getCollisionAlgorithmPool()
+	{
+		return m_collisionAlgorithmPool;
+	}
+
+	virtual btStackAlloc*	getStackAllocator()
+	{
+		return m_stackAlloc;
+	}
+
+	virtual	btVoronoiSimplexSolver*	getSimplexSolver()
+	{
+		return m_simplexSolver;
+	}
+
+
+	virtual btCollisionAlgorithmCreateFunc* getCollisionAlgorithmCreateFunc(int proxyType0,int proxyType1);
+
+	///Use this method to allow to generate multiple contact points between at once, between two objects using the generic convex-convex algorithm.
+	///By default, this feature is disabled for best performance.
+	///@param numPerturbationIterations controls the number of collision queries. Set it to zero to disable the feature.
+	///@param minimumPointsPerturbationThreshold is the minimum number of points in the contact cache, above which the feature is disabled
+	///3 is a good value for both params, if you want to enable the feature. This is because the default contact cache contains a maximum of 4 points, and one collision query at the unperturbed orientation is performed first.
+	///See Bullet/Demos/CollisionDemo for an example how this feature gathers multiple points.
+	///@todo we could add a per-object setting of those parameters, for level-of-detail collision detection.
+	void	setConvexConvexMultipointIterations(int numPerturbationIterations=3, int minimumPointsPerturbationThreshold = 3);
+
+	void	setPlaneConvexMultipointIterations(int numPerturbationIterations=3, int minimumPointsPerturbationThreshold = 3);
+
+};
+
+#endif //BT_DEFAULT_COLLISION_CONFIGURATION
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.cpp
new file mode 100644
index 00000000..93605438
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.cpp
@@ -0,0 +1,34 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btEmptyCollisionAlgorithm.h"
+
+
+
+btEmptyAlgorithm::btEmptyAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
+	: btCollisionAlgorithm(ci)
+{
+}
+
+void btEmptyAlgorithm::processCollision (btCollisionObject* ,btCollisionObject* ,const btDispatcherInfo& ,btManifoldResult* )
+{
+}
+
+btScalar btEmptyAlgorithm::calculateTimeOfImpact(btCollisionObject* ,btCollisionObject* ,const btDispatcherInfo& ,btManifoldResult* )
+{
+	return btScalar(1.);
+}
+
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h
new file mode 100644
index 00000000..f03c9dc3
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h
@@ -0,0 +1,54 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_EMPTY_ALGORITH
+#define BT_EMPTY_ALGORITH
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+#include "btCollisionCreateFunc.h"
+#include "btCollisionDispatcher.h"
+
+#define ATTRIBUTE_ALIGNED(a)
+
+///EmptyAlgorithm is a stub for unsupported collision pairs.
+///The dispatcher can dispatch a persistent btEmptyAlgorithm to avoid a search every frame.
+class btEmptyAlgorithm : public btCollisionAlgorithm
+{
+
+public:
+	
+	btEmptyAlgorithm(const btCollisionAlgorithmConstructionInfo& ci);
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+	}
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			(void)body0;
+			(void)body1;
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btEmptyAlgorithm));
+			return new(mem) btEmptyAlgorithm(ci);
+		}
+	};
+
+} ATTRIBUTE_ALIGNED(16);
+
+#endif //BT_EMPTY_ALGORITH
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btGhostObject.cpp b/src/bullet/BulletCollision/CollisionDispatch/btGhostObject.cpp
new file mode 100644
index 00000000..86141fa6
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btGhostObject.cpp
@@ -0,0 +1,171 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2008 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btGhostObject.h"
+#include "btCollisionWorld.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "LinearMath/btAabbUtil2.h"
+
+btGhostObject::btGhostObject()
+{
+	m_internalType = CO_GHOST_OBJECT;
+}
+
+btGhostObject::~btGhostObject()
+{
+	///btGhostObject should have been removed from the world, so no overlapping objects
+	btAssert(!m_overlappingObjects.size());
+}
+
+
+void btGhostObject::addOverlappingObjectInternal(btBroadphaseProxy* otherProxy,btBroadphaseProxy* thisProxy)
+{
+	btCollisionObject* otherObject = (btCollisionObject*)otherProxy->m_clientObject;
+	btAssert(otherObject);
+	///if this linearSearch becomes too slow (too many overlapping objects) we should add a more appropriate data structure
+	int index = m_overlappingObjects.findLinearSearch(otherObject);
+	if (index==m_overlappingObjects.size())
+	{
+		//not found
+		m_overlappingObjects.push_back(otherObject);
+	}
+}
+
+void btGhostObject::removeOverlappingObjectInternal(btBroadphaseProxy* otherProxy,btDispatcher* dispatcher,btBroadphaseProxy* thisProxy)
+{
+	btCollisionObject* otherObject = (btCollisionObject*)otherProxy->m_clientObject;
+	btAssert(otherObject);
+	int index = m_overlappingObjects.findLinearSearch(otherObject);
+	if (index<m_overlappingObjects.size())
+	{
+		m_overlappingObjects[index] = m_overlappingObjects[m_overlappingObjects.size()-1];
+		m_overlappingObjects.pop_back();
+	}
+}
+
+
+btPairCachingGhostObject::btPairCachingGhostObject()
+{
+	m_hashPairCache = new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache();
+}
+
+btPairCachingGhostObject::~btPairCachingGhostObject()
+{
+	m_hashPairCache->~btHashedOverlappingPairCache();
+	btAlignedFree( m_hashPairCache );
+}
+
+void btPairCachingGhostObject::addOverlappingObjectInternal(btBroadphaseProxy* otherProxy,btBroadphaseProxy* thisProxy)
+{
+	btBroadphaseProxy*actualThisProxy = thisProxy ? thisProxy : getBroadphaseHandle();
+	btAssert(actualThisProxy);
+
+	btCollisionObject* otherObject = (btCollisionObject*)otherProxy->m_clientObject;
+	btAssert(otherObject);
+	int index = m_overlappingObjects.findLinearSearch(otherObject);
+	if (index==m_overlappingObjects.size())
+	{
+		m_overlappingObjects.push_back(otherObject);
+		m_hashPairCache->addOverlappingPair(actualThisProxy,otherProxy);
+	}
+}
+
+void btPairCachingGhostObject::removeOverlappingObjectInternal(btBroadphaseProxy* otherProxy,btDispatcher* dispatcher,btBroadphaseProxy* thisProxy1)
+{
+	btCollisionObject* otherObject = (btCollisionObject*)otherProxy->m_clientObject;
+	btBroadphaseProxy* actualThisProxy = thisProxy1 ? thisProxy1 : getBroadphaseHandle();
+	btAssert(actualThisProxy);
+
+	btAssert(otherObject);
+	int index = m_overlappingObjects.findLinearSearch(otherObject);
+	if (index<m_overlappingObjects.size())
+	{
+		m_overlappingObjects[index] = m_overlappingObjects[m_overlappingObjects.size()-1];
+		m_overlappingObjects.pop_back();
+		m_hashPairCache->removeOverlappingPair(actualThisProxy,otherProxy,dispatcher);
+	}
+}
+
+
+void	btGhostObject::convexSweepTest(const btConvexShape* castShape, const btTransform& convexFromWorld, const btTransform& convexToWorld, btCollisionWorld::ConvexResultCallback& resultCallback, btScalar allowedCcdPenetration) const
+{
+	btTransform	convexFromTrans,convexToTrans;
+	convexFromTrans = convexFromWorld;
+	convexToTrans = convexToWorld;
+	btVector3 castShapeAabbMin, castShapeAabbMax;
+	/* Compute AABB that encompasses angular movement */
+	{
+		btVector3 linVel, angVel;
+		btTransformUtil::calculateVelocity (convexFromTrans, convexToTrans, 1.0, linVel, angVel);
+		btTransform R;
+		R.setIdentity ();
+		R.setRotation (convexFromTrans.getRotation());
+		castShape->calculateTemporalAabb (R, linVel, angVel, 1.0, castShapeAabbMin, castShapeAabbMax);
+	}
+
+	/// go over all objects, and if the ray intersects their aabb + cast shape aabb,
+	// do a ray-shape query using convexCaster (CCD)
+	int i;
+	for (i=0;i<m_overlappingObjects.size();i++)
+	{
+		btCollisionObject*	collisionObject= m_overlappingObjects[i];
+		//only perform raycast if filterMask matches
+		if(resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) {
+			//RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject();
+			btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
+			collisionObject->getCollisionShape()->getAabb(collisionObject->getWorldTransform(),collisionObjectAabbMin,collisionObjectAabbMax);
+			AabbExpand (collisionObjectAabbMin, collisionObjectAabbMax, castShapeAabbMin, castShapeAabbMax);
+			btScalar hitLambda = btScalar(1.); //could use resultCallback.m_closestHitFraction, but needs testing
+			btVector3 hitNormal;
+			if (btRayAabb(convexFromWorld.getOrigin(),convexToWorld.getOrigin(),collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,hitNormal))
+			{
+				btCollisionWorld::objectQuerySingle(castShape, convexFromTrans,convexToTrans,
+					collisionObject,
+						collisionObject->getCollisionShape(),
+						collisionObject->getWorldTransform(),
+						resultCallback,
+						allowedCcdPenetration);
+			}
+		}
+	}
+
+}
+
+void	btGhostObject::rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, btCollisionWorld::RayResultCallback& resultCallback) const
+{
+	btTransform rayFromTrans;
+	rayFromTrans.setIdentity();
+	rayFromTrans.setOrigin(rayFromWorld);
+	btTransform  rayToTrans;
+	rayToTrans.setIdentity();
+	rayToTrans.setOrigin(rayToWorld);
+
+
+	int i;
+	for (i=0;i<m_overlappingObjects.size();i++)
+	{
+		btCollisionObject*	collisionObject= m_overlappingObjects[i];
+		//only perform raycast if filterMask matches
+		if(resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) 
+		{
+			btCollisionWorld::rayTestSingle(rayFromTrans,rayToTrans,
+							collisionObject,
+								collisionObject->getCollisionShape(),
+								collisionObject->getWorldTransform(),
+								resultCallback);
+		}
+	}
+}
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btGhostObject.h b/src/bullet/BulletCollision/CollisionDispatch/btGhostObject.h
new file mode 100644
index 00000000..8ec86138
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btGhostObject.h
@@ -0,0 +1,175 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2008 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_GHOST_OBJECT_H
+#define BT_GHOST_OBJECT_H
+
+
+#include "btCollisionObject.h"
+#include "BulletCollision/BroadphaseCollision/btOverlappingPairCallback.h"
+#include "LinearMath/btAlignedAllocator.h"
+#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
+#include "btCollisionWorld.h"
+
+class btConvexShape;
+
+class btDispatcher;
+
+///The btGhostObject can keep track of all objects that are overlapping
+///By default, this overlap is based on the AABB
+///This is useful for creating a character controller, collision sensors/triggers, explosions etc.
+///We plan on adding rayTest and other queries for the btGhostObject
+ATTRIBUTE_ALIGNED16(class) btGhostObject : public btCollisionObject
+{
+protected:
+
+	btAlignedObjectArray<btCollisionObject*> m_overlappingObjects;
+
+public:
+
+	btGhostObject();
+
+	virtual ~btGhostObject();
+
+	void	convexSweepTest(const class btConvexShape* castShape, const btTransform& convexFromWorld, const btTransform& convexToWorld, btCollisionWorld::ConvexResultCallback& resultCallback, btScalar allowedCcdPenetration = 0.f) const;
+
+	void	rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, btCollisionWorld::RayResultCallback& resultCallback) const; 
+
+	///this method is mainly for expert/internal use only.
+	virtual void	addOverlappingObjectInternal(btBroadphaseProxy* otherProxy, btBroadphaseProxy* thisProxy=0);
+	///this method is mainly for expert/internal use only.
+	virtual void	removeOverlappingObjectInternal(btBroadphaseProxy* otherProxy,btDispatcher* dispatcher,btBroadphaseProxy* thisProxy=0);
+
+	int	getNumOverlappingObjects() const
+	{
+		return m_overlappingObjects.size();
+	}
+
+	btCollisionObject*	getOverlappingObject(int index)
+	{
+		return m_overlappingObjects[index];
+	}
+
+	const btCollisionObject*	getOverlappingObject(int index) const
+	{
+		return m_overlappingObjects[index];
+	}
+
+	btAlignedObjectArray<btCollisionObject*>&	getOverlappingPairs()
+	{
+		return m_overlappingObjects;
+	}
+
+	const btAlignedObjectArray<btCollisionObject*>	getOverlappingPairs() const
+	{
+		return m_overlappingObjects;
+	}
+
+	//
+	// internal cast
+	//
+
+	static const btGhostObject*	upcast(const btCollisionObject* colObj)
+	{
+		if (colObj->getInternalType()==CO_GHOST_OBJECT)
+			return (const btGhostObject*)colObj;
+		return 0;
+	}
+	static btGhostObject*			upcast(btCollisionObject* colObj)
+	{
+		if (colObj->getInternalType()==CO_GHOST_OBJECT)
+			return (btGhostObject*)colObj;
+		return 0;
+	}
+
+};
+
+class	btPairCachingGhostObject : public btGhostObject
+{
+	btHashedOverlappingPairCache*	m_hashPairCache;
+
+public:
+
+	btPairCachingGhostObject();
+
+	virtual ~btPairCachingGhostObject();
+
+	///this method is mainly for expert/internal use only.
+	virtual void	addOverlappingObjectInternal(btBroadphaseProxy* otherProxy, btBroadphaseProxy* thisProxy=0);
+
+	virtual void	removeOverlappingObjectInternal(btBroadphaseProxy* otherProxy,btDispatcher* dispatcher,btBroadphaseProxy* thisProxy=0);
+
+	btHashedOverlappingPairCache*	getOverlappingPairCache()
+	{
+		return m_hashPairCache;
+	}
+
+};
+
+
+
+///The btGhostPairCallback interfaces and forwards adding and removal of overlapping pairs from the btBroadphaseInterface to btGhostObject.
+class btGhostPairCallback : public btOverlappingPairCallback
+{
+	
+public:
+	btGhostPairCallback()
+	{
+	}
+
+	virtual ~btGhostPairCallback()
+	{
+		
+	}
+
+	virtual btBroadphasePair*	addOverlappingPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1)
+	{
+		btCollisionObject* colObj0 = (btCollisionObject*) proxy0->m_clientObject;
+		btCollisionObject* colObj1 = (btCollisionObject*) proxy1->m_clientObject;
+		btGhostObject* ghost0 = 		btGhostObject::upcast(colObj0);
+		btGhostObject* ghost1 = 		btGhostObject::upcast(colObj1);
+		if (ghost0)
+			ghost0->addOverlappingObjectInternal(proxy1, proxy0);
+		if (ghost1)
+			ghost1->addOverlappingObjectInternal(proxy0, proxy1);
+		return 0;
+	}
+
+	virtual void*	removeOverlappingPair(btBroadphaseProxy* proxy0,btBroadphaseProxy* proxy1,btDispatcher* dispatcher)
+	{
+		btCollisionObject* colObj0 = (btCollisionObject*) proxy0->m_clientObject;
+		btCollisionObject* colObj1 = (btCollisionObject*) proxy1->m_clientObject;
+		btGhostObject* ghost0 = 		btGhostObject::upcast(colObj0);
+		btGhostObject* ghost1 = 		btGhostObject::upcast(colObj1);
+		if (ghost0)
+			ghost0->removeOverlappingObjectInternal(proxy1,dispatcher,proxy0);
+		if (ghost1)
+			ghost1->removeOverlappingObjectInternal(proxy0,dispatcher,proxy1);
+		return 0;
+	}
+
+	virtual void	removeOverlappingPairsContainingProxy(btBroadphaseProxy* /*proxy0*/,btDispatcher* /*dispatcher*/)
+	{
+		btAssert(0);
+		//need to keep track of all ghost objects and call them here
+		//m_hashPairCache->removeOverlappingPairsContainingProxy(proxy0,dispatcher);
+	}
+
+	
+
+};
+
+#endif
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp b/src/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp
new file mode 100644
index 00000000..4353cdac
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp
@@ -0,0 +1,842 @@
+#include "btInternalEdgeUtility.h"
+
+#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/NarrowPhaseCollision/btManifoldPoint.h"
+#include "LinearMath/btIDebugDraw.h"
+
+
+//#define DEBUG_INTERNAL_EDGE
+
+#ifdef DEBUG_INTERNAL_EDGE
+#include <stdio.h>
+#endif //DEBUG_INTERNAL_EDGE
+
+
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+static btIDebugDraw* gDebugDrawer = 0;
+
+void	btSetDebugDrawer(btIDebugDraw* debugDrawer)
+{
+	gDebugDrawer = debugDrawer;
+}
+
+static void    btDebugDrawLine(const btVector3& from,const btVector3& to, const btVector3& color)
+{
+	if (gDebugDrawer)
+		gDebugDrawer->drawLine(from,to,color);
+}
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+
+static int	btGetHash(int partId, int triangleIndex)
+{
+	int hash = (partId<<(31-MAX_NUM_PARTS_IN_BITS)) | triangleIndex;
+	return hash;
+}
+
+
+
+static btScalar btGetAngle(const btVector3& edgeA, const btVector3& normalA,const btVector3& normalB)
+{
+	const btVector3 refAxis0  = edgeA;
+	const btVector3 refAxis1  = normalA;
+	const btVector3 swingAxis = normalB;
+	btScalar angle = btAtan2(swingAxis.dot(refAxis0), swingAxis.dot(refAxis1));
+	return  angle;
+}
+
+
+struct btConnectivityProcessor : public btTriangleCallback
+{
+	int				m_partIdA;
+	int				m_triangleIndexA;
+	btVector3*		m_triangleVerticesA;
+	btTriangleInfoMap*	m_triangleInfoMap;
+
+
+	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex)
+	{
+		//skip self-collisions
+		if ((m_partIdA == partId) && (m_triangleIndexA == triangleIndex))
+			return;
+
+		//skip duplicates (disabled for now)
+		//if ((m_partIdA <= partId) && (m_triangleIndexA <= triangleIndex))
+		//	return;
+
+		//search for shared vertices and edges
+		int numshared = 0;
+		int sharedVertsA[3]={-1,-1,-1};
+		int sharedVertsB[3]={-1,-1,-1};
+
+		///skip degenerate triangles
+		btScalar crossBSqr = ((triangle[1]-triangle[0]).cross(triangle[2]-triangle[0])).length2();
+		if (crossBSqr < m_triangleInfoMap->m_equalVertexThreshold)
+			return;
+
+
+		btScalar crossASqr = ((m_triangleVerticesA[1]-m_triangleVerticesA[0]).cross(m_triangleVerticesA[2]-m_triangleVerticesA[0])).length2();
+		///skip degenerate triangles
+		if (crossASqr< m_triangleInfoMap->m_equalVertexThreshold)
+			return;
+
+#if 0
+		printf("triangle A[0]	=	(%f,%f,%f)\ntriangle A[1]	=	(%f,%f,%f)\ntriangle A[2]	=	(%f,%f,%f)\n",
+			m_triangleVerticesA[0].getX(),m_triangleVerticesA[0].getY(),m_triangleVerticesA[0].getZ(),
+			m_triangleVerticesA[1].getX(),m_triangleVerticesA[1].getY(),m_triangleVerticesA[1].getZ(),
+			m_triangleVerticesA[2].getX(),m_triangleVerticesA[2].getY(),m_triangleVerticesA[2].getZ());
+
+		printf("partId=%d, triangleIndex=%d\n",partId,triangleIndex);
+		printf("triangle B[0]	=	(%f,%f,%f)\ntriangle B[1]	=	(%f,%f,%f)\ntriangle B[2]	=	(%f,%f,%f)\n",
+			triangle[0].getX(),triangle[0].getY(),triangle[0].getZ(),
+			triangle[1].getX(),triangle[1].getY(),triangle[1].getZ(),
+			triangle[2].getX(),triangle[2].getY(),triangle[2].getZ());
+#endif
+
+		for (int i=0;i<3;i++)
+		{
+			for (int j=0;j<3;j++)
+			{
+				if ( (m_triangleVerticesA[i]-triangle[j]).length2() < m_triangleInfoMap->m_equalVertexThreshold)
+				{
+					sharedVertsA[numshared] = i;
+					sharedVertsB[numshared] = j;
+					numshared++;
+					///degenerate case
+					if(numshared >= 3)
+						return;
+				}
+			}
+			///degenerate case
+			if(numshared >= 3)
+				return;
+		}
+		switch (numshared)
+		{
+		case 0:
+			{
+				break;
+			}
+		case 1:
+			{
+				//shared vertex
+				break;
+			}
+		case 2:
+			{
+				//shared edge
+				//we need to make sure the edge is in the order V2V0 and not V0V2 so that the signs are correct
+				if (sharedVertsA[0] == 0 && sharedVertsA[1] == 2)
+				{
+					sharedVertsA[0] = 2;
+					sharedVertsA[1] = 0;
+					int tmp = sharedVertsB[1];
+					sharedVertsB[1] = sharedVertsB[0];
+					sharedVertsB[0] = tmp;
+				}
+
+				int hash = btGetHash(m_partIdA,m_triangleIndexA);
+
+				btTriangleInfo* info = m_triangleInfoMap->find(hash);
+				if (!info)
+				{
+					btTriangleInfo tmp;
+					m_triangleInfoMap->insert(hash,tmp);
+					info = m_triangleInfoMap->find(hash);
+				}
+
+				int sumvertsA = sharedVertsA[0]+sharedVertsA[1];
+				int otherIndexA = 3-sumvertsA;
+
+				
+				btVector3 edge(m_triangleVerticesA[sharedVertsA[1]]-m_triangleVerticesA[sharedVertsA[0]]);
+
+				btTriangleShape tA(m_triangleVerticesA[0],m_triangleVerticesA[1],m_triangleVerticesA[2]);
+				int otherIndexB = 3-(sharedVertsB[0]+sharedVertsB[1]);
+
+				btTriangleShape tB(triangle[sharedVertsB[1]],triangle[sharedVertsB[0]],triangle[otherIndexB]);
+				//btTriangleShape tB(triangle[0],triangle[1],triangle[2]);
+
+				btVector3 normalA;
+				btVector3 normalB;
+				tA.calcNormal(normalA);
+				tB.calcNormal(normalB);
+				edge.normalize();
+				btVector3 edgeCrossA = edge.cross(normalA).normalize();
+
+				{
+					btVector3 tmp = m_triangleVerticesA[otherIndexA]-m_triangleVerticesA[sharedVertsA[0]];
+					if (edgeCrossA.dot(tmp) < 0)
+					{
+						edgeCrossA*=-1;
+					}
+				}
+
+				btVector3 edgeCrossB = edge.cross(normalB).normalize();
+
+				{
+					btVector3 tmp = triangle[otherIndexB]-triangle[sharedVertsB[0]];
+					if (edgeCrossB.dot(tmp) < 0)
+					{
+						edgeCrossB*=-1;
+					}
+				}
+
+				btScalar	angle2 = 0;
+				btScalar	ang4 = 0.f;
+
+
+				btVector3 calculatedEdge = edgeCrossA.cross(edgeCrossB);
+				btScalar len2 = calculatedEdge.length2();
+
+				btScalar correctedAngle(0);
+				btVector3 calculatedNormalB = normalA;
+				bool isConvex = false;
+
+				if (len2<m_triangleInfoMap->m_planarEpsilon)
+				{
+					angle2 = 0.f;
+					ang4 = 0.f;
+				} else
+				{
+
+					calculatedEdge.normalize();
+					btVector3 calculatedNormalA = calculatedEdge.cross(edgeCrossA);
+					calculatedNormalA.normalize();
+					angle2 = btGetAngle(calculatedNormalA,edgeCrossA,edgeCrossB);
+					ang4 = SIMD_PI-angle2;
+					btScalar dotA = normalA.dot(edgeCrossB);
+					///@todo: check if we need some epsilon, due to floating point imprecision
+					isConvex = (dotA<0.);
+
+					correctedAngle = isConvex ? ang4 : -ang4;
+					btQuaternion orn2(calculatedEdge,-correctedAngle);
+					calculatedNormalB = btMatrix3x3(orn2)*normalA;
+
+
+				}
+
+				
+
+				
+							
+				//alternatively use 
+				//btVector3 calculatedNormalB2 = quatRotate(orn,normalA);
+
+
+				switch (sumvertsA)
+				{
+				case 1:
+					{
+						btVector3 edge = m_triangleVerticesA[0]-m_triangleVerticesA[1];
+						btQuaternion orn(edge,-correctedAngle);
+						btVector3 computedNormalB = quatRotate(orn,normalA);
+						btScalar bla = computedNormalB.dot(normalB);
+						if (bla<0)
+						{
+							computedNormalB*=-1;
+							info->m_flags |= TRI_INFO_V0V1_SWAP_NORMALB;
+						}
+#ifdef DEBUG_INTERNAL_EDGE
+						if ((computedNormalB-normalB).length()>0.0001)
+						{
+							printf("warning: normals not identical\n");
+						}
+#endif//DEBUG_INTERNAL_EDGE
+
+						info->m_edgeV0V1Angle = -correctedAngle;
+
+						if (isConvex)
+							info->m_flags |= TRI_INFO_V0V1_CONVEX;
+						break;
+					}
+				case 2:
+					{
+						btVector3 edge = m_triangleVerticesA[2]-m_triangleVerticesA[0];
+						btQuaternion orn(edge,-correctedAngle);
+						btVector3 computedNormalB = quatRotate(orn,normalA);
+						if (computedNormalB.dot(normalB)<0)
+						{
+							computedNormalB*=-1;
+							info->m_flags |= TRI_INFO_V2V0_SWAP_NORMALB;
+						}
+
+#ifdef DEBUG_INTERNAL_EDGE
+						if ((computedNormalB-normalB).length()>0.0001)
+						{
+							printf("warning: normals not identical\n");
+						}
+#endif //DEBUG_INTERNAL_EDGE
+						info->m_edgeV2V0Angle = -correctedAngle;
+						if (isConvex)
+							info->m_flags |= TRI_INFO_V2V0_CONVEX;
+						break;	
+					}
+				case 3:
+					{
+						btVector3 edge = m_triangleVerticesA[1]-m_triangleVerticesA[2];
+						btQuaternion orn(edge,-correctedAngle);
+						btVector3 computedNormalB = quatRotate(orn,normalA);
+						if (computedNormalB.dot(normalB)<0)
+						{
+							info->m_flags |= TRI_INFO_V1V2_SWAP_NORMALB;
+							computedNormalB*=-1;
+						}
+#ifdef DEBUG_INTERNAL_EDGE
+						if ((computedNormalB-normalB).length()>0.0001)
+						{
+							printf("warning: normals not identical\n");
+						}
+#endif //DEBUG_INTERNAL_EDGE
+						info->m_edgeV1V2Angle = -correctedAngle;
+
+						if (isConvex)
+							info->m_flags |= TRI_INFO_V1V2_CONVEX;
+						break;
+					}
+				}
+
+				break;
+			}
+		default:
+			{
+				//				printf("warning: duplicate triangle\n");
+			}
+
+		}
+	}
+};
+/////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////
+
+void btGenerateInternalEdgeInfo (btBvhTriangleMeshShape*trimeshShape, btTriangleInfoMap* triangleInfoMap)
+{
+	//the user pointer shouldn't already be used for other purposes, we intend to store connectivity info there!
+	if (trimeshShape->getTriangleInfoMap())
+		return;
+
+	trimeshShape->setTriangleInfoMap(triangleInfoMap);
+
+	btStridingMeshInterface* meshInterface = trimeshShape->getMeshInterface();
+	const btVector3& meshScaling = meshInterface->getScaling();
+
+	for (int partId = 0; partId< meshInterface->getNumSubParts();partId++)
+	{
+		const unsigned char *vertexbase = 0;
+		int numverts = 0;
+		PHY_ScalarType type = PHY_INTEGER;
+		int stride = 0;
+		const unsigned char *indexbase = 0;
+		int indexstride = 0;
+		int numfaces = 0;
+		PHY_ScalarType indicestype = PHY_INTEGER;
+		//PHY_ScalarType indexType=0;
+
+		btVector3 triangleVerts[3];
+		meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase,numverts,	type,stride,&indexbase,indexstride,numfaces,indicestype,partId);
+		btVector3 aabbMin,aabbMax;
+
+		for (int triangleIndex = 0 ; triangleIndex < numfaces;triangleIndex++)
+		{
+			unsigned int* gfxbase = (unsigned int*)(indexbase+triangleIndex*indexstride);
+
+			for (int j=2;j>=0;j--)
+			{
+
+				int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:gfxbase[j];
+				if (type == PHY_FLOAT)
+				{
+					float* graphicsbase = (float*)(vertexbase+graphicsindex*stride);
+					triangleVerts[j] = btVector3(
+						graphicsbase[0]*meshScaling.getX(),
+						graphicsbase[1]*meshScaling.getY(),
+						graphicsbase[2]*meshScaling.getZ());
+				}
+				else
+				{
+					double* graphicsbase = (double*)(vertexbase+graphicsindex*stride);
+					triangleVerts[j] = btVector3( btScalar(graphicsbase[0]*meshScaling.getX()), btScalar(graphicsbase[1]*meshScaling.getY()), btScalar(graphicsbase[2]*meshScaling.getZ()));
+				}
+			}
+			aabbMin.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+			aabbMax.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)); 
+			aabbMin.setMin(triangleVerts[0]);
+			aabbMax.setMax(triangleVerts[0]);
+			aabbMin.setMin(triangleVerts[1]);
+			aabbMax.setMax(triangleVerts[1]);
+			aabbMin.setMin(triangleVerts[2]);
+			aabbMax.setMax(triangleVerts[2]);
+
+			btConnectivityProcessor connectivityProcessor;
+			connectivityProcessor.m_partIdA = partId;
+			connectivityProcessor.m_triangleIndexA = triangleIndex;
+			connectivityProcessor.m_triangleVerticesA = &triangleVerts[0];
+			connectivityProcessor.m_triangleInfoMap  = triangleInfoMap;
+
+			trimeshShape->processAllTriangles(&connectivityProcessor,aabbMin,aabbMax);
+		}
+
+	}
+
+}
+
+
+
+
+// Given a point and a line segment (defined by two points), compute the closest point
+// in the line.  Cap the point at the endpoints of the line segment.
+void btNearestPointInLineSegment(const btVector3 &point, const btVector3& line0, const btVector3& line1, btVector3& nearestPoint)
+{
+	btVector3 lineDelta     = line1 - line0;
+
+	// Handle degenerate lines
+	if ( lineDelta.fuzzyZero())
+	{
+		nearestPoint = line0;
+	}
+	else
+	{
+		btScalar delta = (point-line0).dot(lineDelta) / (lineDelta).dot(lineDelta);
+
+		// Clamp the point to conform to the segment's endpoints
+		if ( delta < 0 )
+			delta = 0;
+		else if ( delta > 1 )
+			delta = 1;
+
+		nearestPoint = line0 + lineDelta*delta;
+	}
+}
+
+
+
+
+bool	btClampNormal(const btVector3& edge,const btVector3& tri_normal_org,const btVector3& localContactNormalOnB, btScalar correctedEdgeAngle, btVector3 & clampedLocalNormal)
+{
+	btVector3 tri_normal = tri_normal_org;
+	//we only have a local triangle normal, not a local contact normal -> only normal in world space...
+	//either compute the current angle all in local space, or all in world space
+
+	btVector3 edgeCross = edge.cross(tri_normal).normalize();
+	btScalar curAngle = btGetAngle(edgeCross,tri_normal,localContactNormalOnB);
+
+	if (correctedEdgeAngle<0)
+	{
+		if (curAngle < correctedEdgeAngle)
+		{
+			btScalar diffAngle = correctedEdgeAngle-curAngle;
+			btQuaternion rotation(edge,diffAngle );
+			clampedLocalNormal = btMatrix3x3(rotation)*localContactNormalOnB;
+			return true;
+		}
+	}
+
+	if (correctedEdgeAngle>=0)
+	{
+		if (curAngle > correctedEdgeAngle)
+		{
+			btScalar diffAngle = correctedEdgeAngle-curAngle;
+			btQuaternion rotation(edge,diffAngle );
+			clampedLocalNormal = btMatrix3x3(rotation)*localContactNormalOnB;
+			return true;
+		}
+	}
+	return false;
+}
+
+
+
+/// Changes a btManifoldPoint collision normal to the normal from the mesh.
+void btAdjustInternalEdgeContacts(btManifoldPoint& cp, const btCollisionObject* colObj0,const btCollisionObject* colObj1, int partId0, int index0, int normalAdjustFlags)
+{
+	//btAssert(colObj0->getCollisionShape()->getShapeType() == TRIANGLE_SHAPE_PROXYTYPE);
+	if (colObj0->getCollisionShape()->getShapeType() != TRIANGLE_SHAPE_PROXYTYPE)
+		return;
+
+	btBvhTriangleMeshShape* trimesh = 0;
+	
+	if( colObj0->getRootCollisionShape()->getShapeType() == SCALED_TRIANGLE_MESH_SHAPE_PROXYTYPE )
+	   trimesh = ((btScaledBvhTriangleMeshShape*)colObj0->getRootCollisionShape())->getChildShape();
+   else	   
+	   trimesh = (btBvhTriangleMeshShape*)colObj0->getRootCollisionShape();
+	   
+   	btTriangleInfoMap* triangleInfoMapPtr = (btTriangleInfoMap*) trimesh->getTriangleInfoMap();
+	if (!triangleInfoMapPtr)
+		return;
+
+	int hash = btGetHash(partId0,index0);
+
+
+	btTriangleInfo* info = triangleInfoMapPtr->find(hash);
+	if (!info)
+		return;
+
+	btScalar frontFacing = (normalAdjustFlags & BT_TRIANGLE_CONVEX_BACKFACE_MODE)==0? 1.f : -1.f;
+	
+	const btTriangleShape* tri_shape = static_cast<const btTriangleShape*>(colObj0->getCollisionShape());
+	btVector3 v0,v1,v2;
+	tri_shape->getVertex(0,v0);
+	tri_shape->getVertex(1,v1);
+	tri_shape->getVertex(2,v2);
+
+	btVector3 center = (v0+v1+v2)*btScalar(1./3.);
+
+	btVector3 red(1,0,0), green(0,1,0),blue(0,0,1),white(1,1,1),black(0,0,0);
+	btVector3 tri_normal;
+	tri_shape->calcNormal(tri_normal);
+
+	//btScalar dot = tri_normal.dot(cp.m_normalWorldOnB);
+	btVector3 nearest;
+	btNearestPointInLineSegment(cp.m_localPointB,v0,v1,nearest);
+
+	btVector3 contact = cp.m_localPointB;
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+	const btTransform& tr = colObj0->getWorldTransform();
+	btDebugDrawLine(tr*nearest,tr*cp.m_localPointB,red);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+
+
+	bool isNearEdge = false;
+
+	int numConcaveEdgeHits = 0;
+	int numConvexEdgeHits = 0;
+
+	btVector3 localContactNormalOnB = colObj0->getWorldTransform().getBasis().transpose() * cp.m_normalWorldOnB;
+	localContactNormalOnB.normalize();//is this necessary?
+	
+	// Get closest edge
+	int      bestedge=-1;
+	btScalar    disttobestedge=BT_LARGE_FLOAT;
+	//
+	// Edge 0 -> 1
+	if (btFabs(info->m_edgeV0V1Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{	
+	   btVector3 nearest;
+	   btNearestPointInLineSegment( cp.m_localPointB, v0, v1, nearest );
+	   btScalar     len=(contact-nearest).length();
+	   //
+	   if( len < disttobestedge )
+	   {
+	      bestedge=0;
+	      disttobestedge=len;
+      }	      
+   }	   
+	// Edge 1 -> 2
+	if (btFabs(info->m_edgeV1V2Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{	
+	   btVector3 nearest;
+	   btNearestPointInLineSegment( cp.m_localPointB, v1, v2, nearest );
+	   btScalar     len=(contact-nearest).length();
+	   //
+	   if( len < disttobestedge )
+	   {
+	      bestedge=1;
+	      disttobestedge=len;
+      }	      
+   }	   
+	// Edge 2 -> 0
+	if (btFabs(info->m_edgeV2V0Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{	
+	   btVector3 nearest;
+	   btNearestPointInLineSegment( cp.m_localPointB, v2, v0, nearest );
+	   btScalar     len=(contact-nearest).length();
+	   //
+	   if( len < disttobestedge )
+	   {
+	      bestedge=2;
+	      disttobestedge=len;
+      }	      
+   }   	      	
+	
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+   btVector3 upfix=tri_normal * btVector3(0.1f,0.1f,0.1f);
+   btDebugDrawLine(tr * v0 + upfix, tr * v1 + upfix, red );
+#endif   
+	if (btFabs(info->m_edgeV0V1Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+		btDebugDrawLine(tr*contact,tr*(contact+cp.m_normalWorldOnB*10),black);
+#endif
+		btScalar len = (contact-nearest).length();
+		if(len<triangleInfoMapPtr->m_edgeDistanceThreshold)
+		if( bestedge==0 )
+		{
+			btVector3 edge(v0-v1);
+			isNearEdge = true;
+
+			if (info->m_edgeV0V1Angle==btScalar(0))
+			{
+				numConcaveEdgeHits++;
+			} else
+			{
+
+				bool isEdgeConvex = (info->m_flags & TRI_INFO_V0V1_CONVEX);
+				btScalar swapFactor = isEdgeConvex ? btScalar(1) : btScalar(-1);
+	#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+				btDebugDrawLine(tr*nearest,tr*(nearest+swapFactor*tri_normal*10),white);
+	#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+				btVector3 nA = swapFactor * tri_normal;
+
+				btQuaternion orn(edge,info->m_edgeV0V1Angle);
+				btVector3 computedNormalB = quatRotate(orn,tri_normal);
+				if (info->m_flags & TRI_INFO_V0V1_SWAP_NORMALB)
+					computedNormalB*=-1;
+				btVector3 nB = swapFactor*computedNormalB;
+
+				btScalar	NdotA = localContactNormalOnB.dot(nA);
+				btScalar	NdotB = localContactNormalOnB.dot(nB);
+				bool backFacingNormal = (NdotA< triangleInfoMapPtr->m_convexEpsilon) && (NdotB<triangleInfoMapPtr->m_convexEpsilon);
+
+#ifdef DEBUG_INTERNAL_EDGE
+				{
+					
+					btDebugDrawLine(cp.getPositionWorldOnB(),cp.getPositionWorldOnB()+tr.getBasis()*(nB*20),red);
+				}
+#endif //DEBUG_INTERNAL_EDGE
+
+
+				if (backFacingNormal)
+				{
+					numConcaveEdgeHits++;
+				}
+				else
+				{
+					numConvexEdgeHits++;
+					btVector3 clampedLocalNormal;
+					bool isClamped = btClampNormal(edge,swapFactor*tri_normal,localContactNormalOnB, info->m_edgeV0V1Angle,clampedLocalNormal);
+					if (isClamped)
+					{
+						if (((normalAdjustFlags & BT_TRIANGLE_CONVEX_DOUBLE_SIDED)!=0) || (clampedLocalNormal.dot(frontFacing*tri_normal)>0))
+						{
+							btVector3 newNormal = colObj0->getWorldTransform().getBasis() * clampedLocalNormal;
+							//					cp.m_distance1 = cp.m_distance1 * newNormal.dot(cp.m_normalWorldOnB);
+							cp.m_normalWorldOnB = newNormal;
+							// Reproject collision point along normal. (what about cp.m_distance1?)
+							cp.m_positionWorldOnB = cp.m_positionWorldOnA - cp.m_normalWorldOnB * cp.m_distance1;
+							cp.m_localPointB = colObj0->getWorldTransform().invXform(cp.m_positionWorldOnB);
+							
+						}
+					}
+				}
+			}
+		}
+	}
+
+	btNearestPointInLineSegment(contact,v1,v2,nearest);
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+	btDebugDrawLine(tr*nearest,tr*cp.m_localPointB,green);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+   btDebugDrawLine(tr * v1 + upfix, tr * v2 + upfix , green );
+#endif   
+
+	if (btFabs(info->m_edgeV1V2Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+		btDebugDrawLine(tr*contact,tr*(contact+cp.m_normalWorldOnB*10),black);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+
+
+		btScalar len = (contact-nearest).length();
+		if(len<triangleInfoMapPtr->m_edgeDistanceThreshold)
+		if( bestedge==1 )
+		{
+			isNearEdge = true;
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+			btDebugDrawLine(tr*nearest,tr*(nearest+tri_normal*10),white);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+			btVector3 edge(v1-v2);
+
+			isNearEdge = true;
+
+			if (info->m_edgeV1V2Angle == btScalar(0))
+			{
+				numConcaveEdgeHits++;
+			} else
+			{
+				bool isEdgeConvex = (info->m_flags & TRI_INFO_V1V2_CONVEX)!=0;
+				btScalar swapFactor = isEdgeConvex ? btScalar(1) : btScalar(-1);
+	#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+				btDebugDrawLine(tr*nearest,tr*(nearest+swapFactor*tri_normal*10),white);
+	#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+				btVector3 nA = swapFactor * tri_normal;
+				
+				btQuaternion orn(edge,info->m_edgeV1V2Angle);
+				btVector3 computedNormalB = quatRotate(orn,tri_normal);
+				if (info->m_flags & TRI_INFO_V1V2_SWAP_NORMALB)
+					computedNormalB*=-1;
+				btVector3 nB = swapFactor*computedNormalB;
+
+#ifdef DEBUG_INTERNAL_EDGE
+				{
+					btDebugDrawLine(cp.getPositionWorldOnB(),cp.getPositionWorldOnB()+tr.getBasis()*(nB*20),red);
+				}
+#endif //DEBUG_INTERNAL_EDGE
+
+
+				btScalar	NdotA = localContactNormalOnB.dot(nA);
+				btScalar	NdotB = localContactNormalOnB.dot(nB);
+				bool backFacingNormal = (NdotA< triangleInfoMapPtr->m_convexEpsilon) && (NdotB<triangleInfoMapPtr->m_convexEpsilon);
+
+				if (backFacingNormal)
+				{
+					numConcaveEdgeHits++;
+				}
+				else
+				{
+					numConvexEdgeHits++;
+					btVector3 localContactNormalOnB = colObj0->getWorldTransform().getBasis().transpose() * cp.m_normalWorldOnB;
+					btVector3 clampedLocalNormal;
+					bool isClamped = btClampNormal(edge,swapFactor*tri_normal,localContactNormalOnB, info->m_edgeV1V2Angle,clampedLocalNormal);
+					if (isClamped)
+					{
+						if (((normalAdjustFlags & BT_TRIANGLE_CONVEX_DOUBLE_SIDED)!=0) || (clampedLocalNormal.dot(frontFacing*tri_normal)>0))
+						{
+							btVector3 newNormal = colObj0->getWorldTransform().getBasis() * clampedLocalNormal;
+							//					cp.m_distance1 = cp.m_distance1 * newNormal.dot(cp.m_normalWorldOnB);
+							cp.m_normalWorldOnB = newNormal;
+							// Reproject collision point along normal.
+							cp.m_positionWorldOnB = cp.m_positionWorldOnA - cp.m_normalWorldOnB * cp.m_distance1;
+							cp.m_localPointB = colObj0->getWorldTransform().invXform(cp.m_positionWorldOnB);
+						}
+					}
+				}
+			}
+		}
+	}
+
+	btNearestPointInLineSegment(contact,v2,v0,nearest);
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+	btDebugDrawLine(tr*nearest,tr*cp.m_localPointB,blue);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+   btDebugDrawLine(tr * v2 + upfix, tr * v0 + upfix , blue );
+#endif   
+
+	if (btFabs(info->m_edgeV2V0Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{
+
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+		btDebugDrawLine(tr*contact,tr*(contact+cp.m_normalWorldOnB*10),black);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+		btScalar len = (contact-nearest).length();
+		if(len<triangleInfoMapPtr->m_edgeDistanceThreshold)
+		if( bestedge==2 )
+		{
+			isNearEdge = true;
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+			btDebugDrawLine(tr*nearest,tr*(nearest+tri_normal*10),white);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+			btVector3 edge(v2-v0);
+
+			if (info->m_edgeV2V0Angle==btScalar(0))
+			{
+				numConcaveEdgeHits++;
+			} else
+			{
+
+				bool isEdgeConvex = (info->m_flags & TRI_INFO_V2V0_CONVEX)!=0;
+				btScalar swapFactor = isEdgeConvex ? btScalar(1) : btScalar(-1);
+	#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+				btDebugDrawLine(tr*nearest,tr*(nearest+swapFactor*tri_normal*10),white);
+	#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+				btVector3 nA = swapFactor * tri_normal;
+				btQuaternion orn(edge,info->m_edgeV2V0Angle);
+				btVector3 computedNormalB = quatRotate(orn,tri_normal);
+				if (info->m_flags & TRI_INFO_V2V0_SWAP_NORMALB)
+					computedNormalB*=-1;
+				btVector3 nB = swapFactor*computedNormalB;
+
+#ifdef DEBUG_INTERNAL_EDGE
+				{
+					btDebugDrawLine(cp.getPositionWorldOnB(),cp.getPositionWorldOnB()+tr.getBasis()*(nB*20),red);
+				}
+#endif //DEBUG_INTERNAL_EDGE
+
+				btScalar	NdotA = localContactNormalOnB.dot(nA);
+				btScalar	NdotB = localContactNormalOnB.dot(nB);
+				bool backFacingNormal = (NdotA< triangleInfoMapPtr->m_convexEpsilon) && (NdotB<triangleInfoMapPtr->m_convexEpsilon);
+
+				if (backFacingNormal)
+				{
+					numConcaveEdgeHits++;
+				}
+				else
+				{
+					numConvexEdgeHits++;
+					//				printf("hitting convex edge\n");
+
+
+					btVector3 localContactNormalOnB = colObj0->getWorldTransform().getBasis().transpose() * cp.m_normalWorldOnB;
+					btVector3 clampedLocalNormal;
+					bool isClamped = btClampNormal(edge,swapFactor*tri_normal,localContactNormalOnB,info->m_edgeV2V0Angle,clampedLocalNormal);
+					if (isClamped)
+					{
+						if (((normalAdjustFlags & BT_TRIANGLE_CONVEX_DOUBLE_SIDED)!=0) || (clampedLocalNormal.dot(frontFacing*tri_normal)>0))
+						{
+							btVector3 newNormal = colObj0->getWorldTransform().getBasis() * clampedLocalNormal;
+							//					cp.m_distance1 = cp.m_distance1 * newNormal.dot(cp.m_normalWorldOnB);
+							cp.m_normalWorldOnB = newNormal;
+							// Reproject collision point along normal.
+							cp.m_positionWorldOnB = cp.m_positionWorldOnA - cp.m_normalWorldOnB * cp.m_distance1;
+							cp.m_localPointB = colObj0->getWorldTransform().invXform(cp.m_positionWorldOnB);
+						}
+					}
+				} 
+			}
+			
+
+		}
+	}
+
+#ifdef DEBUG_INTERNAL_EDGE
+	{
+		btVector3 color(0,1,1);
+		btDebugDrawLine(cp.getPositionWorldOnB(),cp.getPositionWorldOnB()+cp.m_normalWorldOnB*10,color);
+	}
+#endif //DEBUG_INTERNAL_EDGE
+
+	if (isNearEdge)
+	{
+
+		if (numConcaveEdgeHits>0)
+		{
+			if ((normalAdjustFlags & BT_TRIANGLE_CONCAVE_DOUBLE_SIDED)!=0)
+			{
+				//fix tri_normal so it pointing the same direction as the current local contact normal
+				if (tri_normal.dot(localContactNormalOnB) < 0)
+				{
+					tri_normal *= -1;
+				}
+				cp.m_normalWorldOnB = colObj0->getWorldTransform().getBasis()*tri_normal;
+			} else
+			{
+				btVector3 newNormal = tri_normal *frontFacing;
+				//if the tri_normal is pointing opposite direction as the current local contact normal, skip it
+				btScalar d = newNormal.dot(localContactNormalOnB) ;
+				if (d< 0)
+				{
+					return;
+				}
+				//modify the normal to be the triangle normal (or backfacing normal)
+				cp.m_normalWorldOnB = colObj0->getWorldTransform().getBasis() *newNormal;
+			}
+						
+			// Reproject collision point along normal.
+			cp.m_positionWorldOnB = cp.m_positionWorldOnA - cp.m_normalWorldOnB * cp.m_distance1;
+			cp.m_localPointB = colObj0->getWorldTransform().invXform(cp.m_positionWorldOnB);
+		}
+	}
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.h b/src/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.h
new file mode 100644
index 00000000..9efb0122
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.h
@@ -0,0 +1,46 @@
+
+#ifndef BT_INTERNAL_EDGE_UTILITY_H
+#define BT_INTERNAL_EDGE_UTILITY_H
+
+#include "LinearMath/btHashMap.h"
+#include "LinearMath/btVector3.h"
+
+#include "BulletCollision/CollisionShapes/btTriangleInfoMap.h"
+
+///The btInternalEdgeUtility helps to avoid or reduce artifacts due to wrong collision normals caused by internal edges.
+///See also http://code.google.com/p/bullet/issues/detail?id=27
+
+class btBvhTriangleMeshShape;
+class btCollisionObject;
+class btManifoldPoint;
+class btIDebugDraw;
+
+
+
+enum btInternalEdgeAdjustFlags
+{
+	BT_TRIANGLE_CONVEX_BACKFACE_MODE = 1,
+	BT_TRIANGLE_CONCAVE_DOUBLE_SIDED = 2, //double sided options are experimental, single sided is recommended
+	BT_TRIANGLE_CONVEX_DOUBLE_SIDED = 4
+};
+
+
+///Call btGenerateInternalEdgeInfo to create triangle info, store in the shape 'userInfo'
+void	btGenerateInternalEdgeInfo (btBvhTriangleMeshShape*trimeshShape, btTriangleInfoMap* triangleInfoMap);
+
+
+///Call the btFixMeshNormal to adjust the collision normal, using the triangle info map (generated using btGenerateInternalEdgeInfo)
+///If this info map is missing, or the triangle is not store in this map, nothing will be done
+void	btAdjustInternalEdgeContacts(btManifoldPoint& cp, const btCollisionObject* trimeshColObj0,const btCollisionObject* otherColObj1, int partId0, int index0, int normalAdjustFlags = 0);
+
+///Enable the BT_INTERNAL_EDGE_DEBUG_DRAW define and call btSetDebugDrawer, to get visual info to see if the internal edge utility works properly.
+///If the utility doesn't work properly, you might have to adjust the threshold values in btTriangleInfoMap
+//#define BT_INTERNAL_EDGE_DEBUG_DRAW
+
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+void	btSetDebugDrawer(btIDebugDraw* debugDrawer);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+
+#endif //BT_INTERNAL_EDGE_UTILITY_H
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btManifoldResult.cpp b/src/bullet/BulletCollision/CollisionDispatch/btManifoldResult.cpp
new file mode 100644
index 00000000..bf24246e
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btManifoldResult.cpp
@@ -0,0 +1,135 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btManifoldResult.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+
+
+///This is to allow MaterialCombiner/Custom Friction/Restitution values
+ContactAddedCallback		gContactAddedCallback=0;
+
+///User can override this material combiner by implementing gContactAddedCallback and setting body0->m_collisionFlags |= btCollisionObject::customMaterialCallback;
+inline btScalar	calculateCombinedFriction(const btCollisionObject* body0,const btCollisionObject* body1)
+{
+	btScalar friction = body0->getFriction() * body1->getFriction();
+
+	const btScalar MAX_FRICTION  = btScalar(10.);
+	if (friction < -MAX_FRICTION)
+		friction = -MAX_FRICTION;
+	if (friction > MAX_FRICTION)
+		friction = MAX_FRICTION;
+	return friction;
+
+}
+
+inline btScalar	calculateCombinedRestitution(const btCollisionObject* body0,const btCollisionObject* body1)
+{
+	return body0->getRestitution() * body1->getRestitution();
+}
+
+
+
+btManifoldResult::btManifoldResult(btCollisionObject* body0,btCollisionObject* body1)
+		:m_manifoldPtr(0),
+		m_body0(body0),
+		m_body1(body1)
+#ifdef DEBUG_PART_INDEX
+		,m_partId0(-1),
+	m_partId1(-1),
+	m_index0(-1),
+	m_index1(-1)
+#endif //DEBUG_PART_INDEX
+{
+	m_rootTransA = body0->getWorldTransform();
+	m_rootTransB = body1->getWorldTransform();
+}
+
+
+void btManifoldResult::addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+{
+	btAssert(m_manifoldPtr);
+	//order in manifold needs to match
+
+	if (depth > m_manifoldPtr->getContactBreakingThreshold())
+//	if (depth > m_manifoldPtr->getContactProcessingThreshold())
+		return;
+
+	bool isSwapped = m_manifoldPtr->getBody0() != m_body0;
+
+	btVector3 pointA = pointInWorld + normalOnBInWorld * depth;
+
+	btVector3 localA;
+	btVector3 localB;
+	
+	if (isSwapped)
+	{
+		localA = m_rootTransB.invXform(pointA );
+		localB = m_rootTransA.invXform(pointInWorld);
+	} else
+	{
+		localA = m_rootTransA.invXform(pointA );
+		localB = m_rootTransB.invXform(pointInWorld);
+	}
+
+	btManifoldPoint newPt(localA,localB,normalOnBInWorld,depth);
+	newPt.m_positionWorldOnA = pointA;
+	newPt.m_positionWorldOnB = pointInWorld;
+	
+	int insertIndex = m_manifoldPtr->getCacheEntry(newPt);
+
+	newPt.m_combinedFriction = calculateCombinedFriction(m_body0,m_body1);
+	newPt.m_combinedRestitution = calculateCombinedRestitution(m_body0,m_body1);
+
+   //BP mod, store contact triangles.
+	if (isSwapped)
+	{
+		newPt.m_partId0 = m_partId1;
+		newPt.m_partId1 = m_partId0;
+		newPt.m_index0  = m_index1;
+		newPt.m_index1  = m_index0;
+	} else
+	{
+		newPt.m_partId0 = m_partId0;
+		newPt.m_partId1 = m_partId1;
+		newPt.m_index0  = m_index0;
+		newPt.m_index1  = m_index1;
+	}
+	//printf("depth=%f\n",depth);
+	///@todo, check this for any side effects
+	if (insertIndex >= 0)
+	{
+		//const btManifoldPoint& oldPoint = m_manifoldPtr->getContactPoint(insertIndex);
+		m_manifoldPtr->replaceContactPoint(newPt,insertIndex);
+	} else
+	{
+		insertIndex = m_manifoldPtr->addManifoldPoint(newPt);
+	}
+	
+	//User can override friction and/or restitution
+	if (gContactAddedCallback &&
+		//and if either of the two bodies requires custom material
+		 ((m_body0->getCollisionFlags() & btCollisionObject::CF_CUSTOM_MATERIAL_CALLBACK) ||
+		   (m_body1->getCollisionFlags() & btCollisionObject::CF_CUSTOM_MATERIAL_CALLBACK)))
+	{
+		//experimental feature info, for per-triangle material etc.
+		btCollisionObject* obj0 = isSwapped? m_body1 : m_body0;
+		btCollisionObject* obj1 = isSwapped? m_body0 : m_body1;
+		(*gContactAddedCallback)(m_manifoldPtr->getContactPoint(insertIndex),obj0,newPt.m_partId0,newPt.m_index0,obj1,newPt.m_partId1,newPt.m_index1);
+	}
+
+}
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btManifoldResult.h b/src/bullet/BulletCollision/CollisionDispatch/btManifoldResult.h
new file mode 100644
index 00000000..18199b49
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btManifoldResult.h
@@ -0,0 +1,128 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_MANIFOLD_RESULT_H
+#define BT_MANIFOLD_RESULT_H
+
+class btCollisionObject;
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+class btManifoldPoint;
+
+#include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
+
+#include "LinearMath/btTransform.h"
+
+typedef bool (*ContactAddedCallback)(btManifoldPoint& cp,	const btCollisionObject* colObj0,int partId0,int index0,const btCollisionObject* colObj1,int partId1,int index1);
+extern ContactAddedCallback		gContactAddedCallback;
+
+//#define DEBUG_PART_INDEX 1
+
+
+///btManifoldResult is a helper class to manage  contact results.
+class btManifoldResult : public btDiscreteCollisionDetectorInterface::Result
+{
+protected:
+
+	btPersistentManifold* m_manifoldPtr;
+
+	//we need this for compounds
+	btTransform	m_rootTransA;
+	btTransform	m_rootTransB;
+
+	btCollisionObject* m_body0;
+	btCollisionObject* m_body1;
+	int	m_partId0;
+	int m_partId1;
+	int m_index0;
+	int m_index1;
+	
+
+public:
+
+	btManifoldResult()
+#ifdef DEBUG_PART_INDEX
+		:
+	m_partId0(-1),
+	m_partId1(-1),
+	m_index0(-1),
+	m_index1(-1)
+#endif //DEBUG_PART_INDEX
+	{
+	}
+
+	btManifoldResult(btCollisionObject* body0,btCollisionObject* body1);
+
+	virtual ~btManifoldResult() {};
+
+	void	setPersistentManifold(btPersistentManifold* manifoldPtr)
+	{
+		m_manifoldPtr = manifoldPtr;
+	}
+
+	const btPersistentManifold*	getPersistentManifold() const
+	{
+		return m_manifoldPtr;
+	}
+	btPersistentManifold*	getPersistentManifold()
+	{
+		return m_manifoldPtr;
+	}
+
+	virtual void setShapeIdentifiersA(int partId0,int index0)
+	{
+		m_partId0=partId0;
+		m_index0=index0;
+	}
+
+	virtual void setShapeIdentifiersB(	int partId1,int index1)
+	{
+		m_partId1=partId1;
+		m_index1=index1;
+	}
+
+
+	virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth);
+
+	SIMD_FORCE_INLINE	void refreshContactPoints()
+	{
+		btAssert(m_manifoldPtr);
+		if (!m_manifoldPtr->getNumContacts())
+			return;
+
+		bool isSwapped = m_manifoldPtr->getBody0() != m_body0;
+
+		if (isSwapped)
+		{
+			m_manifoldPtr->refreshContactPoints(m_rootTransB,m_rootTransA);
+		} else
+		{
+			m_manifoldPtr->refreshContactPoints(m_rootTransA,m_rootTransB);
+		}
+	}
+
+	const btCollisionObject* getBody0Internal() const
+	{
+		return m_body0;
+	}
+
+	const btCollisionObject* getBody1Internal() const
+	{
+		return m_body1;
+	}
+	
+};
+
+#endif //BT_MANIFOLD_RESULT_H
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btSimulationIslandManager.cpp b/src/bullet/BulletCollision/CollisionDispatch/btSimulationIslandManager.cpp
new file mode 100644
index 00000000..871c6441
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btSimulationIslandManager.cpp
@@ -0,0 +1,450 @@
+
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "LinearMath/btScalar.h"
+#include "btSimulationIslandManager.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
+
+//#include <stdio.h>
+#include "LinearMath/btQuickprof.h"
+
+btSimulationIslandManager::btSimulationIslandManager():
+m_splitIslands(true)
+{
+}
+
+btSimulationIslandManager::~btSimulationIslandManager()
+{
+}
+
+
+void btSimulationIslandManager::initUnionFind(int n)
+{
+		m_unionFind.reset(n);
+}
+		
+
+void btSimulationIslandManager::findUnions(btDispatcher* /* dispatcher */,btCollisionWorld* colWorld)
+{
+	
+	{
+		btOverlappingPairCache* pairCachePtr = colWorld->getPairCache();
+		const int numOverlappingPairs = pairCachePtr->getNumOverlappingPairs();
+		if (numOverlappingPairs)
+		{
+		btBroadphasePair* pairPtr = pairCachePtr->getOverlappingPairArrayPtr();
+		
+		for (int i=0;i<numOverlappingPairs;i++)
+		{
+			const btBroadphasePair& collisionPair = pairPtr[i];
+			btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
+			btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
+
+			if (((colObj0) && ((colObj0)->mergesSimulationIslands())) &&
+				((colObj1) && ((colObj1)->mergesSimulationIslands())))
+			{
+
+				m_unionFind.unite((colObj0)->getIslandTag(),
+					(colObj1)->getIslandTag());
+			}
+		}
+		}
+	}
+}
+
+#ifdef STATIC_SIMULATION_ISLAND_OPTIMIZATION
+void   btSimulationIslandManager::updateActivationState(btCollisionWorld* colWorld,btDispatcher* dispatcher)
+{
+
+	// put the index into m_controllers into m_tag   
+	int index = 0;
+	{
+
+		int i;
+		for (i=0;i<colWorld->getCollisionObjectArray().size(); i++)
+		{
+			btCollisionObject*   collisionObject= colWorld->getCollisionObjectArray()[i];
+			//Adding filtering here
+			if (!collisionObject->isStaticOrKinematicObject())
+			{
+				collisionObject->setIslandTag(index++);
+			}
+			collisionObject->setCompanionId(-1);
+			collisionObject->setHitFraction(btScalar(1.));
+		}
+	}
+	// do the union find
+
+	initUnionFind( index );
+
+	findUnions(dispatcher,colWorld);
+}
+
+void   btSimulationIslandManager::storeIslandActivationState(btCollisionWorld* colWorld)
+{
+	// put the islandId ('find' value) into m_tag   
+	{
+		int index = 0;
+		int i;
+		for (i=0;i<colWorld->getCollisionObjectArray().size();i++)
+		{
+			btCollisionObject* collisionObject= colWorld->getCollisionObjectArray()[i];
+			if (!collisionObject->isStaticOrKinematicObject())
+			{
+				collisionObject->setIslandTag( m_unionFind.find(index) );
+				//Set the correct object offset in Collision Object Array
+				m_unionFind.getElement(index).m_sz = i;
+				collisionObject->setCompanionId(-1);
+				index++;
+			} else
+			{
+				collisionObject->setIslandTag(-1);
+				collisionObject->setCompanionId(-2);
+			}
+		}
+	}
+}
+
+
+#else //STATIC_SIMULATION_ISLAND_OPTIMIZATION
+void	btSimulationIslandManager::updateActivationState(btCollisionWorld* colWorld,btDispatcher* dispatcher)
+{
+
+	initUnionFind( int (colWorld->getCollisionObjectArray().size()));
+
+	// put the index into m_controllers into m_tag	
+	{
+
+		int index = 0;
+		int i;
+		for (i=0;i<colWorld->getCollisionObjectArray().size(); i++)
+		{
+			btCollisionObject*	collisionObject= colWorld->getCollisionObjectArray()[i];
+			collisionObject->setIslandTag(index);
+			collisionObject->setCompanionId(-1);
+			collisionObject->setHitFraction(btScalar(1.));
+			index++;
+
+		}
+	}
+	// do the union find
+
+	findUnions(dispatcher,colWorld);
+}
+
+void	btSimulationIslandManager::storeIslandActivationState(btCollisionWorld* colWorld)
+{
+	// put the islandId ('find' value) into m_tag	
+	{
+
+
+		int index = 0;
+		int i;
+		for (i=0;i<colWorld->getCollisionObjectArray().size();i++)
+		{
+			btCollisionObject* collisionObject= colWorld->getCollisionObjectArray()[i];
+			if (!collisionObject->isStaticOrKinematicObject())
+			{
+				collisionObject->setIslandTag( m_unionFind.find(index) );
+				collisionObject->setCompanionId(-1);
+			} else
+			{
+				collisionObject->setIslandTag(-1);
+				collisionObject->setCompanionId(-2);
+			}
+			index++;
+		}
+	}
+}
+
+#endif //STATIC_SIMULATION_ISLAND_OPTIMIZATION
+
+inline	int	getIslandId(const btPersistentManifold* lhs)
+{
+	int islandId;
+	const btCollisionObject* rcolObj0 = static_cast<const btCollisionObject*>(lhs->getBody0());
+	const btCollisionObject* rcolObj1 = static_cast<const btCollisionObject*>(lhs->getBody1());
+	islandId= rcolObj0->getIslandTag()>=0?rcolObj0->getIslandTag():rcolObj1->getIslandTag();
+	return islandId;
+
+}
+
+
+
+/// function object that routes calls to operator<
+class btPersistentManifoldSortPredicate
+{
+	public:
+
+		SIMD_FORCE_INLINE bool operator() ( const btPersistentManifold* lhs, const btPersistentManifold* rhs ) const
+		{
+			return getIslandId(lhs) < getIslandId(rhs);
+		}
+};
+
+
+void btSimulationIslandManager::buildIslands(btDispatcher* dispatcher,btCollisionWorld* collisionWorld)
+{
+
+	BT_PROFILE("islandUnionFindAndQuickSort");
+	
+	btCollisionObjectArray& collisionObjects = collisionWorld->getCollisionObjectArray();
+
+	m_islandmanifold.resize(0);
+
+	//we are going to sort the unionfind array, and store the element id in the size
+	//afterwards, we clean unionfind, to make sure no-one uses it anymore
+	
+	getUnionFind().sortIslands();
+	int numElem = getUnionFind().getNumElements();
+
+	int endIslandIndex=1;
+	int startIslandIndex;
+
+
+	//update the sleeping state for bodies, if all are sleeping
+	for ( startIslandIndex=0;startIslandIndex<numElem;startIslandIndex = endIslandIndex)
+	{
+		int islandId = getUnionFind().getElement(startIslandIndex).m_id;
+		for (endIslandIndex = startIslandIndex+1;(endIslandIndex<numElem) && (getUnionFind().getElement(endIslandIndex).m_id == islandId);endIslandIndex++)
+		{
+		}
+
+		//int numSleeping = 0;
+
+		bool allSleeping = true;
+
+		int idx;
+		for (idx=startIslandIndex;idx<endIslandIndex;idx++)
+		{
+			int i = getUnionFind().getElement(idx).m_sz;
+
+			btCollisionObject* colObj0 = collisionObjects[i];
+			if ((colObj0->getIslandTag() != islandId) && (colObj0->getIslandTag() != -1))
+			{
+//				printf("error in island management\n");
+			}
+
+			btAssert((colObj0->getIslandTag() == islandId) || (colObj0->getIslandTag() == -1));
+			if (colObj0->getIslandTag() == islandId)
+			{
+				if (colObj0->getActivationState()== ACTIVE_TAG)
+				{
+					allSleeping = false;
+				}
+				if (colObj0->getActivationState()== DISABLE_DEACTIVATION)
+				{
+					allSleeping = false;
+				}
+			}
+		}
+			
+
+		if (allSleeping)
+		{
+			int idx;
+			for (idx=startIslandIndex;idx<endIslandIndex;idx++)
+			{
+				int i = getUnionFind().getElement(idx).m_sz;
+				btCollisionObject* colObj0 = collisionObjects[i];
+				if ((colObj0->getIslandTag() != islandId) && (colObj0->getIslandTag() != -1))
+				{
+//					printf("error in island management\n");
+				}
+
+				btAssert((colObj0->getIslandTag() == islandId) || (colObj0->getIslandTag() == -1));
+
+				if (colObj0->getIslandTag() == islandId)
+				{
+					colObj0->setActivationState( ISLAND_SLEEPING );
+				}
+			}
+		} else
+		{
+
+			int idx;
+			for (idx=startIslandIndex;idx<endIslandIndex;idx++)
+			{
+				int i = getUnionFind().getElement(idx).m_sz;
+
+				btCollisionObject* colObj0 = collisionObjects[i];
+				if ((colObj0->getIslandTag() != islandId) && (colObj0->getIslandTag() != -1))
+				{
+//					printf("error in island management\n");
+				}
+
+				btAssert((colObj0->getIslandTag() == islandId) || (colObj0->getIslandTag() == -1));
+
+				if (colObj0->getIslandTag() == islandId)
+				{
+					if ( colObj0->getActivationState() == ISLAND_SLEEPING)
+					{
+						colObj0->setActivationState( WANTS_DEACTIVATION);
+						colObj0->setDeactivationTime(0.f);
+					}
+				}
+			}
+		}
+	}
+
+	
+	int i;
+	int maxNumManifolds = dispatcher->getNumManifolds();
+
+//#define SPLIT_ISLANDS 1
+//#ifdef SPLIT_ISLANDS
+
+	
+//#endif //SPLIT_ISLANDS
+
+	
+	for (i=0;i<maxNumManifolds ;i++)
+	{
+		 btPersistentManifold* manifold = dispatcher->getManifoldByIndexInternal(i);
+		 
+		 btCollisionObject* colObj0 = static_cast<btCollisionObject*>(manifold->getBody0());
+		 btCollisionObject* colObj1 = static_cast<btCollisionObject*>(manifold->getBody1());
+		
+		 ///@todo: check sleeping conditions!
+		 if (((colObj0) && colObj0->getActivationState() != ISLAND_SLEEPING) ||
+			((colObj1) && colObj1->getActivationState() != ISLAND_SLEEPING))
+		{
+		
+			//kinematic objects don't merge islands, but wake up all connected objects
+			if (colObj0->isKinematicObject() && colObj0->getActivationState() != ISLAND_SLEEPING)
+			{
+				if (colObj0->hasContactResponse())
+					colObj1->activate();
+			}
+			if (colObj1->isKinematicObject() && colObj1->getActivationState() != ISLAND_SLEEPING)
+			{
+				if (colObj1->hasContactResponse())
+					colObj0->activate();
+			}
+			if(m_splitIslands)
+			{ 
+				//filtering for response
+				if (dispatcher->needsResponse(colObj0,colObj1))
+					m_islandmanifold.push_back(manifold);
+			}
+		}
+	}
+}
+
+
+
+///@todo: this is random access, it can be walked 'cache friendly'!
+void btSimulationIslandManager::buildAndProcessIslands(btDispatcher* dispatcher,btCollisionWorld* collisionWorld, IslandCallback* callback)
+{
+	btCollisionObjectArray& collisionObjects = collisionWorld->getCollisionObjectArray();
+
+	buildIslands(dispatcher,collisionWorld);
+
+	int endIslandIndex=1;
+	int startIslandIndex;
+	int numElem = getUnionFind().getNumElements();
+
+	BT_PROFILE("processIslands");
+
+	if(!m_splitIslands)
+	{
+		btPersistentManifold** manifold = dispatcher->getInternalManifoldPointer();
+		int maxNumManifolds = dispatcher->getNumManifolds();
+		callback->processIsland(&collisionObjects[0],collisionObjects.size(),manifold,maxNumManifolds, -1);
+	}
+	else
+	{
+		// Sort manifolds, based on islands
+		// Sort the vector using predicate and std::sort
+		//std::sort(islandmanifold.begin(), islandmanifold.end(), btPersistentManifoldSortPredicate);
+
+		int numManifolds = int (m_islandmanifold.size());
+
+		//tried a radix sort, but quicksort/heapsort seems still faster
+		//@todo rewrite island management
+		m_islandmanifold.quickSort(btPersistentManifoldSortPredicate());
+		//m_islandmanifold.heapSort(btPersistentManifoldSortPredicate());
+
+		//now process all active islands (sets of manifolds for now)
+
+		int startManifoldIndex = 0;
+		int endManifoldIndex = 1;
+
+		//int islandId;
+
+		
+
+	//	printf("Start Islands\n");
+
+		//traverse the simulation islands, and call the solver, unless all objects are sleeping/deactivated
+		for ( startIslandIndex=0;startIslandIndex<numElem;startIslandIndex = endIslandIndex)
+		{
+			int islandId = getUnionFind().getElement(startIslandIndex).m_id;
+
+
+			   bool islandSleeping = true;
+	                
+					for (endIslandIndex = startIslandIndex;(endIslandIndex<numElem) && (getUnionFind().getElement(endIslandIndex).m_id == islandId);endIslandIndex++)
+					{
+							int i = getUnionFind().getElement(endIslandIndex).m_sz;
+							btCollisionObject* colObj0 = collisionObjects[i];
+							m_islandBodies.push_back(colObj0);
+							if (colObj0->isActive())
+									islandSleeping = false;
+					}
+	                
+
+			//find the accompanying contact manifold for this islandId
+			int numIslandManifolds = 0;
+			btPersistentManifold** startManifold = 0;
+
+			if (startManifoldIndex<numManifolds)
+			{
+				int curIslandId = getIslandId(m_islandmanifold[startManifoldIndex]);
+				if (curIslandId == islandId)
+				{
+					startManifold = &m_islandmanifold[startManifoldIndex];
+				
+					for (endManifoldIndex = startManifoldIndex+1;(endManifoldIndex<numManifolds) && (islandId == getIslandId(m_islandmanifold[endManifoldIndex]));endManifoldIndex++)
+					{
+
+					}
+					/// Process the actual simulation, only if not sleeping/deactivated
+					numIslandManifolds = endManifoldIndex-startManifoldIndex;
+				}
+
+			}
+
+			if (!islandSleeping)
+			{
+				callback->processIsland(&m_islandBodies[0],m_islandBodies.size(),startManifold,numIslandManifolds, islandId);
+	//			printf("Island callback of size:%d bodies, %d manifolds\n",islandBodies.size(),numIslandManifolds);
+			}
+			
+			if (numIslandManifolds)
+			{
+				startManifoldIndex = endManifoldIndex;
+			}
+
+			m_islandBodies.resize(0);
+		}
+	} // else if(!splitIslands) 
+
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btSimulationIslandManager.h b/src/bullet/BulletCollision/CollisionDispatch/btSimulationIslandManager.h
new file mode 100644
index 00000000..e24c6afe
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btSimulationIslandManager.h
@@ -0,0 +1,81 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SIMULATION_ISLAND_MANAGER_H
+#define BT_SIMULATION_ISLAND_MANAGER_H
+
+#include "BulletCollision/CollisionDispatch/btUnionFind.h"
+#include "btCollisionCreateFunc.h"
+#include "LinearMath/btAlignedObjectArray.h"
+#include "btCollisionObject.h"
+
+class btCollisionObject;
+class btCollisionWorld;
+class btDispatcher;
+class btPersistentManifold;
+
+
+///SimulationIslandManager creates and handles simulation islands, using btUnionFind
+class btSimulationIslandManager
+{
+	btUnionFind m_unionFind;
+
+	btAlignedObjectArray<btPersistentManifold*>  m_islandmanifold;
+	btAlignedObjectArray<btCollisionObject* >  m_islandBodies;
+	
+	bool m_splitIslands;
+	
+public:
+	btSimulationIslandManager();
+	virtual ~btSimulationIslandManager();
+
+
+	void initUnionFind(int n);	
+	
+		
+	btUnionFind& getUnionFind() { return m_unionFind;}
+
+	virtual	void	updateActivationState(btCollisionWorld* colWorld,btDispatcher* dispatcher);
+	virtual	void	storeIslandActivationState(btCollisionWorld* world);
+
+
+	void	findUnions(btDispatcher* dispatcher,btCollisionWorld* colWorld);
+
+	
+
+	struct	IslandCallback
+	{
+		virtual ~IslandCallback() {};
+
+		virtual	void	processIsland(btCollisionObject** bodies,int numBodies,class btPersistentManifold**	manifolds,int numManifolds, int islandId) = 0;
+	};
+
+	void	buildAndProcessIslands(btDispatcher* dispatcher,btCollisionWorld* collisionWorld, IslandCallback* callback);
+
+	void buildIslands(btDispatcher* dispatcher,btCollisionWorld* colWorld);
+
+	bool getSplitIslands()
+	{
+		return m_splitIslands;
+	}
+	void setSplitIslands(bool doSplitIslands)
+	{
+		m_splitIslands = doSplitIslands;
+	}
+
+};
+
+#endif //BT_SIMULATION_ISLAND_MANAGER_H
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.cpp
new file mode 100644
index 00000000..8df87692
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.cpp
@@ -0,0 +1,260 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btSphereBoxCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+//#include <stdio.h>
+
+btSphereBoxCollisionAlgorithm::btSphereBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1, bool isSwapped)
+: btActivatingCollisionAlgorithm(ci,col0,col1),
+m_ownManifold(false),
+m_manifoldPtr(mf),
+m_isSwapped(isSwapped)
+{
+	btCollisionObject* sphereObj = m_isSwapped? col1 : col0;
+	btCollisionObject* boxObj = m_isSwapped? col0 : col1;
+	
+	if (!m_manifoldPtr && m_dispatcher->needsCollision(sphereObj,boxObj))
+	{
+		m_manifoldPtr = m_dispatcher->getNewManifold(sphereObj,boxObj);
+		m_ownManifold = true;
+	}
+}
+
+
+btSphereBoxCollisionAlgorithm::~btSphereBoxCollisionAlgorithm()
+{
+	if (m_ownManifold)
+	{
+		if (m_manifoldPtr)
+			m_dispatcher->releaseManifold(m_manifoldPtr);
+	}
+}
+
+
+
+void btSphereBoxCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)dispatchInfo;
+	(void)resultOut;
+	if (!m_manifoldPtr)
+		return;
+
+	btCollisionObject* sphereObj = m_isSwapped? body1 : body0;
+	btCollisionObject* boxObj = m_isSwapped? body0 : body1;
+
+
+	btSphereShape* sphere0 = (btSphereShape*)sphereObj->getCollisionShape();
+
+	btVector3 normalOnSurfaceB;
+	btVector3 pOnBox,pOnSphere;
+	btVector3 sphereCenter = sphereObj->getWorldTransform().getOrigin();
+	btScalar radius = sphere0->getRadius();
+	
+	btScalar dist = getSphereDistance(boxObj,pOnBox,pOnSphere,sphereCenter,radius);
+
+	resultOut->setPersistentManifold(m_manifoldPtr);
+
+	if (dist < SIMD_EPSILON)
+	{
+		btVector3 normalOnSurfaceB = (pOnBox- pOnSphere).normalize();
+
+		/// report a contact. internally this will be kept persistent, and contact reduction is done
+
+		resultOut->addContactPoint(normalOnSurfaceB,pOnBox,dist);
+		
+	}
+
+	if (m_ownManifold)
+	{
+		if (m_manifoldPtr->getNumContacts())
+		{
+			resultOut->refreshContactPoints();
+		}
+	}
+
+}
+
+btScalar btSphereBoxCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* col0,btCollisionObject* col1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)resultOut;
+	(void)dispatchInfo;
+	(void)col0;
+	(void)col1;
+
+	//not yet
+	return btScalar(1.);
+}
+
+
+btScalar btSphereBoxCollisionAlgorithm::getSphereDistance(btCollisionObject* boxObj, btVector3& pointOnBox, btVector3& v3PointOnSphere, const btVector3& sphereCenter, btScalar fRadius ) 
+{
+
+	btScalar margins;
+	btVector3 bounds[2];
+	btBoxShape* boxShape= (btBoxShape*)boxObj->getCollisionShape();
+	
+	bounds[0] = -boxShape->getHalfExtentsWithoutMargin();
+	bounds[1] = boxShape->getHalfExtentsWithoutMargin();
+
+	margins = boxShape->getMargin();//also add sphereShape margin?
+
+	const btTransform&	m44T = boxObj->getWorldTransform();
+
+	btVector3	boundsVec[2];
+	btScalar	fPenetration;
+
+	boundsVec[0] = bounds[0];
+	boundsVec[1] = bounds[1];
+
+	btVector3	marginsVec( margins, margins, margins );
+
+	// add margins
+	bounds[0] += marginsVec;
+	bounds[1] -= marginsVec;
+
+	/////////////////////////////////////////////////
+
+	btVector3	tmp, prel, n[6], normal, v3P;
+	btScalar   fSep = btScalar(10000000.0), fSepThis;
+
+	n[0].setValue( btScalar(-1.0),  btScalar(0.0),  btScalar(0.0) );
+	n[1].setValue(  btScalar(0.0), btScalar(-1.0),  btScalar(0.0) );
+	n[2].setValue(  btScalar(0.0),  btScalar(0.0), btScalar(-1.0) );
+	n[3].setValue(  btScalar(1.0),  btScalar(0.0),  btScalar(0.0) );
+	n[4].setValue(  btScalar(0.0),  btScalar(1.0),  btScalar(0.0) );
+	n[5].setValue(  btScalar(0.0),  btScalar(0.0),  btScalar(1.0) );
+
+	// convert  point in local space
+	prel = m44T.invXform( sphereCenter);
+	
+	bool	bFound = false;
+
+	v3P = prel;
+
+	for (int i=0;i<6;i++)
+	{
+		int j = i<3? 0:1;
+		if ( (fSepThis = ((v3P-bounds[j]) .dot(n[i]))) > btScalar(0.0) )
+		{
+			v3P = v3P - n[i]*fSepThis;		
+			bFound = true;
+		}
+	}
+	
+	//
+
+	if ( bFound )
+	{
+		bounds[0] = boundsVec[0];
+		bounds[1] = boundsVec[1];
+
+		normal = (prel - v3P).normalize();
+		pointOnBox = v3P + normal*margins;
+		v3PointOnSphere = prel - normal*fRadius;
+
+		if ( ((v3PointOnSphere - pointOnBox) .dot (normal)) > btScalar(0.0) )
+		{
+			return btScalar(1.0);
+		}
+
+		// transform back in world space
+		tmp = m44T( pointOnBox);
+		pointOnBox    = tmp;
+		tmp  = m44T( v3PointOnSphere);		
+		v3PointOnSphere = tmp;
+		btScalar fSeps2 = (pointOnBox-v3PointOnSphere).length2();
+		
+		//if this fails, fallback into deeper penetration case, below
+		if (fSeps2 > SIMD_EPSILON)
+		{
+			fSep = - btSqrt(fSeps2);
+			normal = (pointOnBox-v3PointOnSphere);
+			normal *= btScalar(1.)/fSep;
+		}
+
+		return fSep;
+	}
+
+	//////////////////////////////////////////////////
+	// Deep penetration case
+
+	fPenetration = getSpherePenetration( boxObj,pointOnBox, v3PointOnSphere, sphereCenter, fRadius,bounds[0],bounds[1] );
+
+	bounds[0] = boundsVec[0];
+	bounds[1] = boundsVec[1];
+
+	if ( fPenetration <= btScalar(0.0) )
+		return (fPenetration-margins);
+	else
+		return btScalar(1.0);
+}
+
+btScalar btSphereBoxCollisionAlgorithm::getSpherePenetration( btCollisionObject* boxObj,btVector3& pointOnBox, btVector3& v3PointOnSphere, const btVector3& sphereCenter, btScalar fRadius, const btVector3& aabbMin, const btVector3& aabbMax) 
+{
+
+	btVector3 bounds[2];
+
+	bounds[0] = aabbMin;
+	bounds[1] = aabbMax;
+
+	btVector3	p0, tmp, prel, n[6], normal;
+	btScalar   fSep = btScalar(-10000000.0), fSepThis;
+
+	// set p0 and normal to a default value to shup up GCC
+	p0.setValue(btScalar(0.), btScalar(0.), btScalar(0.));
+	normal.setValue(btScalar(0.), btScalar(0.), btScalar(0.));
+
+	n[0].setValue( btScalar(-1.0),  btScalar(0.0),  btScalar(0.0) );
+	n[1].setValue(  btScalar(0.0), btScalar(-1.0),  btScalar(0.0) );
+	n[2].setValue(  btScalar(0.0),  btScalar(0.0), btScalar(-1.0) );
+	n[3].setValue(  btScalar(1.0),  btScalar(0.0),  btScalar(0.0) );
+	n[4].setValue(  btScalar(0.0),  btScalar(1.0),  btScalar(0.0) );
+	n[5].setValue(  btScalar(0.0),  btScalar(0.0),  btScalar(1.0) );
+
+	const btTransform&	m44T = boxObj->getWorldTransform();
+
+	// convert  point in local space
+	prel = m44T.invXform( sphereCenter);
+
+	///////////
+
+	for (int i=0;i<6;i++)
+	{
+		int j = i<3 ? 0:1;
+		if ( (fSepThis = ((prel-bounds[j]) .dot( n[i]))-fRadius) > btScalar(0.0) )	return btScalar(1.0);
+		if ( fSepThis > fSep )
+		{
+			p0 = bounds[j];	normal = (btVector3&)n[i];
+			fSep = fSepThis;
+		}
+	}
+
+	pointOnBox = prel - normal*(normal.dot((prel-p0)));
+	v3PointOnSphere = pointOnBox + normal*fSep;
+
+	// transform back in world space
+	tmp  = m44T( pointOnBox);		
+	pointOnBox    = tmp;
+	tmp  = m44T( v3PointOnSphere);		v3PointOnSphere = tmp;
+	normal = (pointOnBox-v3PointOnSphere).normalize();
+
+	return fSep;
+
+}
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h
new file mode 100644
index 00000000..60286ae0
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h
@@ -0,0 +1,75 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SPHERE_BOX_COLLISION_ALGORITHM_H
+#define BT_SPHERE_BOX_COLLISION_ALGORITHM_H
+
+#include "btActivatingCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+class btPersistentManifold;
+#include "btCollisionDispatcher.h"
+
+#include "LinearMath/btVector3.h"
+
+/// btSphereBoxCollisionAlgorithm  provides sphere-box collision detection.
+/// Other features are frame-coherency (persistent data) and collision response.
+class btSphereBoxCollisionAlgorithm : public btActivatingCollisionAlgorithm
+{
+	bool	m_ownManifold;
+	btPersistentManifold*	m_manifoldPtr;
+	bool	m_isSwapped;
+	
+public:
+
+	btSphereBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1, bool isSwapped);
+
+	virtual ~btSphereBoxCollisionAlgorithm();
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		if (m_manifoldPtr && m_ownManifold)
+		{
+			manifoldArray.push_back(m_manifoldPtr);
+		}
+	}
+
+	btScalar getSphereDistance( btCollisionObject* boxObj,btVector3& v3PointOnBox, btVector3& v3PointOnSphere, const btVector3& v3SphereCenter, btScalar fRadius );
+
+	btScalar getSpherePenetration( btCollisionObject* boxObj, btVector3& v3PointOnBox, btVector3& v3PointOnSphere, const btVector3& v3SphereCenter, btScalar fRadius, const btVector3& aabbMin, const btVector3& aabbMax);
+	
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSphereBoxCollisionAlgorithm));
+			if (!m_swapped)
+			{
+				return new(mem) btSphereBoxCollisionAlgorithm(0,ci,body0,body1,false);
+			} else
+			{
+				return new(mem) btSphereBoxCollisionAlgorithm(0,ci,body0,body1,true);
+			}
+		}
+	};
+
+};
+
+#endif //BT_SPHERE_BOX_COLLISION_ALGORITHM_H
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.cpp
new file mode 100644
index 00000000..5c4e78fe
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.cpp
@@ -0,0 +1,105 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btSphereSphereCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+
+btSphereSphereCollisionAlgorithm::btSphereSphereCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1)
+: btActivatingCollisionAlgorithm(ci,col0,col1),
+m_ownManifold(false),
+m_manifoldPtr(mf)
+{
+	if (!m_manifoldPtr)
+	{
+		m_manifoldPtr = m_dispatcher->getNewManifold(col0,col1);
+		m_ownManifold = true;
+	}
+}
+
+btSphereSphereCollisionAlgorithm::~btSphereSphereCollisionAlgorithm()
+{
+	if (m_ownManifold)
+	{
+		if (m_manifoldPtr)
+			m_dispatcher->releaseManifold(m_manifoldPtr);
+	}
+}
+
+void btSphereSphereCollisionAlgorithm::processCollision (btCollisionObject* col0,btCollisionObject* col1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)dispatchInfo;
+
+	if (!m_manifoldPtr)
+		return;
+
+	resultOut->setPersistentManifold(m_manifoldPtr);
+
+	btSphereShape* sphere0 = (btSphereShape*)col0->getCollisionShape();
+	btSphereShape* sphere1 = (btSphereShape*)col1->getCollisionShape();
+
+	btVector3 diff = col0->getWorldTransform().getOrigin()-  col1->getWorldTransform().getOrigin();
+	btScalar len = diff.length();
+	btScalar radius0 = sphere0->getRadius();
+	btScalar radius1 = sphere1->getRadius();
+
+#ifdef CLEAR_MANIFOLD
+	m_manifoldPtr->clearManifold(); //don't do this, it disables warmstarting
+#endif
+
+	///iff distance positive, don't generate a new contact
+	if ( len > (radius0+radius1))
+	{
+#ifndef CLEAR_MANIFOLD
+		resultOut->refreshContactPoints();
+#endif //CLEAR_MANIFOLD
+		return;
+	}
+	///distance (negative means penetration)
+	btScalar dist = len - (radius0+radius1);
+
+	btVector3 normalOnSurfaceB(1,0,0);
+	if (len > SIMD_EPSILON)
+	{
+		normalOnSurfaceB = diff / len;
+	}
+
+	///point on A (worldspace)
+	///btVector3 pos0 = col0->getWorldTransform().getOrigin() - radius0 * normalOnSurfaceB;
+	///point on B (worldspace)
+	btVector3 pos1 = col1->getWorldTransform().getOrigin() + radius1* normalOnSurfaceB;
+
+	/// report a contact. internally this will be kept persistent, and contact reduction is done
+	
+	
+	resultOut->addContactPoint(normalOnSurfaceB,pos1,dist);
+
+#ifndef CLEAR_MANIFOLD
+	resultOut->refreshContactPoints();
+#endif //CLEAR_MANIFOLD
+
+}
+
+btScalar btSphereSphereCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* col0,btCollisionObject* col1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)col0;
+	(void)col1;
+	(void)dispatchInfo;
+	(void)resultOut;
+
+	//not yet
+	return btScalar(1.);
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h
new file mode 100644
index 00000000..e55acf27
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h
@@ -0,0 +1,66 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SPHERE_SPHERE_COLLISION_ALGORITHM_H
+#define BT_SPHERE_SPHERE_COLLISION_ALGORITHM_H
+
+#include "btActivatingCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+#include "btCollisionDispatcher.h"
+
+class btPersistentManifold;
+
+/// btSphereSphereCollisionAlgorithm  provides sphere-sphere collision detection.
+/// Other features are frame-coherency (persistent data) and collision response.
+/// Also provides the most basic sample for custom/user btCollisionAlgorithm
+class btSphereSphereCollisionAlgorithm : public btActivatingCollisionAlgorithm
+{
+	bool	m_ownManifold;
+	btPersistentManifold*	m_manifoldPtr;
+	
+public:
+	btSphereSphereCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+
+	btSphereSphereCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
+		: btActivatingCollisionAlgorithm(ci) {}
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		if (m_manifoldPtr && m_ownManifold)
+		{
+			manifoldArray.push_back(m_manifoldPtr);
+		}
+	}
+	
+	virtual ~btSphereSphereCollisionAlgorithm();
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSphereSphereCollisionAlgorithm));
+			return new(mem) btSphereSphereCollisionAlgorithm(0,ci,body0,body1);
+		}
+	};
+
+};
+
+#endif //BT_SPHERE_SPHERE_COLLISION_ALGORITHM_H
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.cpp b/src/bullet/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.cpp
new file mode 100644
index 00000000..c327c3ff
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.cpp
@@ -0,0 +1,84 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btSphereTriangleCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "SphereTriangleDetector.h"
+
+
+btSphereTriangleCollisionAlgorithm::btSphereTriangleCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1,bool swapped)
+: btActivatingCollisionAlgorithm(ci,col0,col1),
+m_ownManifold(false),
+m_manifoldPtr(mf),
+m_swapped(swapped)
+{
+	if (!m_manifoldPtr)
+	{
+		m_manifoldPtr = m_dispatcher->getNewManifold(col0,col1);
+		m_ownManifold = true;
+	}
+}
+
+btSphereTriangleCollisionAlgorithm::~btSphereTriangleCollisionAlgorithm()
+{
+	if (m_ownManifold)
+	{
+		if (m_manifoldPtr)
+			m_dispatcher->releaseManifold(m_manifoldPtr);
+	}
+}
+
+void btSphereTriangleCollisionAlgorithm::processCollision (btCollisionObject* col0,btCollisionObject* col1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	if (!m_manifoldPtr)
+		return;
+
+	btCollisionObject* sphereObj = m_swapped? col1 : col0;
+	btCollisionObject* triObj = m_swapped? col0 : col1;
+
+	btSphereShape* sphere = (btSphereShape*)sphereObj->getCollisionShape();
+	btTriangleShape* triangle = (btTriangleShape*)triObj->getCollisionShape();
+	
+	/// report a contact. internally this will be kept persistent, and contact reduction is done
+	resultOut->setPersistentManifold(m_manifoldPtr);
+	SphereTriangleDetector detector(sphere,triangle, m_manifoldPtr->getContactBreakingThreshold());
+	
+	btDiscreteCollisionDetectorInterface::ClosestPointInput input;
+	input.m_maximumDistanceSquared = btScalar(BT_LARGE_FLOAT);///@todo: tighter bounds
+	input.m_transformA = sphereObj->getWorldTransform();
+	input.m_transformB = triObj->getWorldTransform();
+
+	bool swapResults = m_swapped;
+
+	detector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw,swapResults);
+
+	if (m_ownManifold)
+		resultOut->refreshContactPoints();
+	
+}
+
+btScalar btSphereTriangleCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* col0,btCollisionObject* col1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)resultOut;
+	(void)dispatchInfo;
+	(void)col0;
+	(void)col1;
+
+	//not yet
+	return btScalar(1.);
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h b/src/bullet/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h
new file mode 100644
index 00000000..7c6c4d8f
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h
@@ -0,0 +1,69 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SPHERE_TRIANGLE_COLLISION_ALGORITHM_H
+#define BT_SPHERE_TRIANGLE_COLLISION_ALGORITHM_H
+
+#include "btActivatingCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+class btPersistentManifold;
+#include "btCollisionDispatcher.h"
+
+/// btSphereSphereCollisionAlgorithm  provides sphere-sphere collision detection.
+/// Other features are frame-coherency (persistent data) and collision response.
+/// Also provides the most basic sample for custom/user btCollisionAlgorithm
+class btSphereTriangleCollisionAlgorithm : public btActivatingCollisionAlgorithm
+{
+	bool	m_ownManifold;
+	btPersistentManifold*	m_manifoldPtr;
+	bool	m_swapped;
+	
+public:
+	btSphereTriangleCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,bool swapped);
+
+	btSphereTriangleCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
+		: btActivatingCollisionAlgorithm(ci) {}
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		if (m_manifoldPtr && m_ownManifold)
+		{
+			manifoldArray.push_back(m_manifoldPtr);
+		}
+	}
+	
+	virtual ~btSphereTriangleCollisionAlgorithm();
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSphereTriangleCollisionAlgorithm));
+
+			return new(mem) btSphereTriangleCollisionAlgorithm(ci.m_manifold,ci,body0,body1,m_swapped);
+		}
+	};
+
+};
+
+#endif //BT_SPHERE_TRIANGLE_COLLISION_ALGORITHM_H
+
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btUnionFind.cpp b/src/bullet/BulletCollision/CollisionDispatch/btUnionFind.cpp
new file mode 100644
index 00000000..52229335
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btUnionFind.cpp
@@ -0,0 +1,82 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btUnionFind.h"
+
+
+
+btUnionFind::~btUnionFind()
+{
+	Free();
+
+}
+
+btUnionFind::btUnionFind()
+{ 
+
+}
+
+void	btUnionFind::allocate(int N)
+{
+	m_elements.resize(N);
+}
+void	btUnionFind::Free()
+{
+	m_elements.clear();
+}
+
+
+void	btUnionFind::reset(int N)
+{
+	allocate(N);
+
+	for (int i = 0; i < N; i++) 
+	{ 
+		m_elements[i].m_id = i; m_elements[i].m_sz = 1; 
+	} 
+}
+
+
+class btUnionFindElementSortPredicate
+{
+	public:
+
+		bool operator() ( const btElement& lhs, const btElement& rhs ) const
+		{
+			return lhs.m_id < rhs.m_id;
+		}
+};
+
+///this is a special operation, destroying the content of btUnionFind.
+///it sorts the elements, based on island id, in order to make it easy to iterate over islands
+void	btUnionFind::sortIslands()
+{
+
+	//first store the original body index, and islandId
+	int numElements = m_elements.size();
+	
+	for (int i=0;i<numElements;i++)
+	{
+		m_elements[i].m_id = find(i);
+#ifndef STATIC_SIMULATION_ISLAND_OPTIMIZATION
+		m_elements[i].m_sz = i;
+#endif //STATIC_SIMULATION_ISLAND_OPTIMIZATION
+	}
+	
+	 // Sort the vector using predicate and std::sort
+	  //std::sort(m_elements.begin(), m_elements.end(), btUnionFindElementSortPredicate);
+	  m_elements.quickSort(btUnionFindElementSortPredicate());
+
+}
diff --git a/src/bullet/BulletCollision/CollisionDispatch/btUnionFind.h b/src/bullet/BulletCollision/CollisionDispatch/btUnionFind.h
new file mode 100644
index 00000000..ef2a2920
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionDispatch/btUnionFind.h
@@ -0,0 +1,129 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_UNION_FIND_H
+#define BT_UNION_FIND_H
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+#define USE_PATH_COMPRESSION 1
+
+///see for discussion of static island optimizations by Vroonsh here: http://code.google.com/p/bullet/issues/detail?id=406
+#define STATIC_SIMULATION_ISLAND_OPTIMIZATION 1
+
+struct	btElement
+{
+	int	m_id;
+	int	m_sz;
+};
+
+///UnionFind calculates connected subsets
+// Implements weighted Quick Union with path compression
+// optimization: could use short ints instead of ints (halving memory, would limit the number of rigid bodies to 64k, sounds reasonable)
+class btUnionFind
+  {
+    private:
+		btAlignedObjectArray<btElement>	m_elements;
+
+    public:
+	  
+		btUnionFind();
+		~btUnionFind();
+
+	
+		//this is a special operation, destroying the content of btUnionFind.
+		//it sorts the elements, based on island id, in order to make it easy to iterate over islands
+		void	sortIslands();
+
+	  void	reset(int N);
+
+	  SIMD_FORCE_INLINE int	getNumElements() const
+	  {
+		  return int(m_elements.size());
+	  }
+	  SIMD_FORCE_INLINE bool  isRoot(int x) const
+	  {
+		  return (x == m_elements[x].m_id);
+	  }
+
+	  btElement&	getElement(int index)
+	  {
+		  return m_elements[index];
+	  }
+	  const btElement& getElement(int index) const
+	  {
+		  return m_elements[index];
+	  }
+   
+	  void	allocate(int N);
+	  void	Free();
+
+
+
+
+	  int find(int p, int q)
+		{ 
+			return (find(p) == find(q)); 
+		}
+
+		void unite(int p, int q)
+		{
+			int i = find(p), j = find(q);
+			if (i == j) 
+				return;
+
+#ifndef USE_PATH_COMPRESSION
+			//weighted quick union, this keeps the 'trees' balanced, and keeps performance of unite O( log(n) )
+			if (m_elements[i].m_sz < m_elements[j].m_sz)
+			{ 
+				m_elements[i].m_id = j; m_elements[j].m_sz += m_elements[i].m_sz; 
+			}
+			else 
+			{ 
+				m_elements[j].m_id = i; m_elements[i].m_sz += m_elements[j].m_sz; 
+			}
+#else
+			m_elements[i].m_id = j; m_elements[j].m_sz += m_elements[i].m_sz; 
+#endif //USE_PATH_COMPRESSION
+		}
+
+		int find(int x)
+		{ 
+			//btAssert(x < m_N);
+			//btAssert(x >= 0);
+
+			while (x != m_elements[x].m_id) 
+			{
+		//not really a reason not to use path compression, and it flattens the trees/improves find performance dramatically
+	
+		#ifdef USE_PATH_COMPRESSION
+				const btElement* elementPtr = &m_elements[m_elements[x].m_id];
+				m_elements[x].m_id = elementPtr->m_id;
+				x = elementPtr->m_id;			
+		#else//
+				x = m_elements[x].m_id;
+		#endif		
+				//btAssert(x < m_N);
+				//btAssert(x >= 0);
+
+			}
+			return x; 
+		}
+
+
+  };
+
+
+#endif //BT_UNION_FIND_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btBox2dShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btBox2dShape.cpp
new file mode 100644
index 00000000..ecce028c
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btBox2dShape.cpp
@@ -0,0 +1,42 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btBox2dShape.h"
+
+
+//{ 
+
+
+void btBox2dShape::getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	btTransformAabb(getHalfExtentsWithoutMargin(),getMargin(),t,aabbMin,aabbMax);
+}
+
+
+void	btBox2dShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	//btScalar margin = btScalar(0.);
+	btVector3 halfExtents = getHalfExtentsWithMargin();
+
+	btScalar lx=btScalar(2.)*(halfExtents.x());
+	btScalar ly=btScalar(2.)*(halfExtents.y());
+	btScalar lz=btScalar(2.)*(halfExtents.z());
+
+	inertia.setValue(mass/(btScalar(12.0)) * (ly*ly + lz*lz),
+					mass/(btScalar(12.0)) * (lx*lx + lz*lz),
+					mass/(btScalar(12.0)) * (lx*lx + ly*ly));
+
+}
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btBox2dShape.h b/src/bullet/BulletCollision/CollisionShapes/btBox2dShape.h
new file mode 100644
index 00000000..f4a9ca03
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btBox2dShape.h
@@ -0,0 +1,369 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_OBB_BOX_2D_SHAPE_H
+#define BT_OBB_BOX_2D_SHAPE_H
+
+#include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h"
+#include "BulletCollision/CollisionShapes/btCollisionMargin.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btMinMax.h"
+
+///The btBox2dShape is a box primitive around the origin, its sides axis aligned with length specified by half extents, in local shape coordinates. When used as part of a btCollisionObject or btRigidBody it will be an oriented box in world space.
+class btBox2dShape: public btPolyhedralConvexShape
+{
+
+	//btVector3	m_boxHalfExtents1; //use m_implicitShapeDimensions instead
+
+	btVector3 m_centroid;
+	btVector3 m_vertices[4];
+	btVector3 m_normals[4];
+
+public:
+
+	btVector3 getHalfExtentsWithMargin() const
+	{
+		btVector3 halfExtents = getHalfExtentsWithoutMargin();
+		btVector3 margin(getMargin(),getMargin(),getMargin());
+		halfExtents += margin;
+		return halfExtents;
+	}
+	
+	const btVector3& getHalfExtentsWithoutMargin() const
+	{
+		return m_implicitShapeDimensions;//changed in Bullet 2.63: assume the scaling and margin are included
+	}
+	
+
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec) const
+	{
+		btVector3 halfExtents = getHalfExtentsWithoutMargin();
+		btVector3 margin(getMargin(),getMargin(),getMargin());
+		halfExtents += margin;
+		
+		return btVector3(btFsels(vec.x(), halfExtents.x(), -halfExtents.x()),
+			btFsels(vec.y(), halfExtents.y(), -halfExtents.y()),
+			btFsels(vec.z(), halfExtents.z(), -halfExtents.z()));
+	}
+
+	SIMD_FORCE_INLINE  btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+	{
+		const btVector3& halfExtents = getHalfExtentsWithoutMargin();
+		
+		return btVector3(btFsels(vec.x(), halfExtents.x(), -halfExtents.x()),
+			btFsels(vec.y(), halfExtents.y(), -halfExtents.y()),
+			btFsels(vec.z(), halfExtents.z(), -halfExtents.z()));
+	}
+
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+	{
+		const btVector3& halfExtents = getHalfExtentsWithoutMargin();
+	
+		for (int i=0;i<numVectors;i++)
+		{
+			const btVector3& vec = vectors[i];
+			supportVerticesOut[i].setValue(btFsels(vec.x(), halfExtents.x(), -halfExtents.x()),
+				btFsels(vec.y(), halfExtents.y(), -halfExtents.y()),
+				btFsels(vec.z(), halfExtents.z(), -halfExtents.z())); 
+		}
+
+	}
+
+
+	///a btBox2dShape is a flat 2D box in the X-Y plane (Z extents are zero)
+	btBox2dShape( const btVector3& boxHalfExtents) 
+		: btPolyhedralConvexShape(),
+		m_centroid(0,0,0)
+	{
+		m_vertices[0].setValue(-boxHalfExtents.getX(),-boxHalfExtents.getY(),0);
+		m_vertices[1].setValue(boxHalfExtents.getX(),-boxHalfExtents.getY(),0);
+		m_vertices[2].setValue(boxHalfExtents.getX(),boxHalfExtents.getY(),0);
+		m_vertices[3].setValue(-boxHalfExtents.getX(),boxHalfExtents.getY(),0);
+
+		m_normals[0].setValue(0,-1,0);
+		m_normals[1].setValue(1,0,0);
+		m_normals[2].setValue(0,1,0);
+		m_normals[3].setValue(-1,0,0);
+
+		btScalar minDimension = boxHalfExtents.getX();
+		if (minDimension>boxHalfExtents.getY())
+			minDimension = boxHalfExtents.getY();
+		setSafeMargin(minDimension);
+
+		m_shapeType = BOX_2D_SHAPE_PROXYTYPE;
+		btVector3 margin(getMargin(),getMargin(),getMargin());
+		m_implicitShapeDimensions = (boxHalfExtents * m_localScaling) - margin;
+	};
+
+	virtual void setMargin(btScalar collisionMargin)
+	{
+		//correct the m_implicitShapeDimensions for the margin
+		btVector3 oldMargin(getMargin(),getMargin(),getMargin());
+		btVector3 implicitShapeDimensionsWithMargin = m_implicitShapeDimensions+oldMargin;
+		
+		btConvexInternalShape::setMargin(collisionMargin);
+		btVector3 newMargin(getMargin(),getMargin(),getMargin());
+		m_implicitShapeDimensions = implicitShapeDimensionsWithMargin - newMargin;
+
+	}
+	virtual void	setLocalScaling(const btVector3& scaling)
+	{
+		btVector3 oldMargin(getMargin(),getMargin(),getMargin());
+		btVector3 implicitShapeDimensionsWithMargin = m_implicitShapeDimensions+oldMargin;
+		btVector3 unScaledImplicitShapeDimensionsWithMargin = implicitShapeDimensionsWithMargin / m_localScaling;
+
+		btConvexInternalShape::setLocalScaling(scaling);
+
+		m_implicitShapeDimensions = (unScaledImplicitShapeDimensionsWithMargin * m_localScaling) - oldMargin;
+
+	}
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+
+
+
+
+	int	getVertexCount() const
+	{
+		return 4;
+	}
+
+	virtual int getNumVertices()const
+	{
+		return 4;
+	}
+
+	const btVector3* getVertices() const
+	{
+		return &m_vertices[0];
+	}
+
+	const btVector3* getNormals() const
+	{
+		return &m_normals[0];
+	}
+
+
+
+
+
+
+
+	virtual void getPlane(btVector3& planeNormal,btVector3& planeSupport,int i ) const
+	{
+		//this plane might not be aligned...
+		btVector4 plane ;
+		getPlaneEquation(plane,i);
+		planeNormal = btVector3(plane.getX(),plane.getY(),plane.getZ());
+		planeSupport = localGetSupportingVertex(-planeNormal);
+	}
+
+
+	const btVector3& getCentroid() const
+	{
+		return m_centroid;
+	}
+	
+	virtual int getNumPlanes() const
+	{
+		return 6;
+	}	
+	
+	
+
+	virtual int getNumEdges() const
+	{
+		return 12;
+	}
+
+
+	virtual void getVertex(int i,btVector3& vtx) const
+	{
+		btVector3 halfExtents = getHalfExtentsWithoutMargin();
+
+		vtx = btVector3(
+				halfExtents.x() * (1-(i&1)) - halfExtents.x() * (i&1),
+				halfExtents.y() * (1-((i&2)>>1)) - halfExtents.y() * ((i&2)>>1),
+				halfExtents.z() * (1-((i&4)>>2)) - halfExtents.z() * ((i&4)>>2));
+	}
+	
+
+	virtual void	getPlaneEquation(btVector4& plane,int i) const
+	{
+		btVector3 halfExtents = getHalfExtentsWithoutMargin();
+
+		switch (i)
+		{
+		case 0:
+			plane.setValue(btScalar(1.),btScalar(0.),btScalar(0.),-halfExtents.x());
+			break;
+		case 1:
+			plane.setValue(btScalar(-1.),btScalar(0.),btScalar(0.),-halfExtents.x());
+			break;
+		case 2:
+			plane.setValue(btScalar(0.),btScalar(1.),btScalar(0.),-halfExtents.y());
+			break;
+		case 3:
+			plane.setValue(btScalar(0.),btScalar(-1.),btScalar(0.),-halfExtents.y());
+			break;
+		case 4:
+			plane.setValue(btScalar(0.),btScalar(0.),btScalar(1.),-halfExtents.z());
+			break;
+		case 5:
+			plane.setValue(btScalar(0.),btScalar(0.),btScalar(-1.),-halfExtents.z());
+			break;
+		default:
+			btAssert(0);
+		}
+	}
+
+	
+	virtual void getEdge(int i,btVector3& pa,btVector3& pb) const
+	//virtual void getEdge(int i,Edge& edge) const
+	{
+		int edgeVert0 = 0;
+		int edgeVert1 = 0;
+
+		switch (i)
+		{
+		case 0:
+				edgeVert0 = 0;
+				edgeVert1 = 1;
+			break;
+		case 1:
+				edgeVert0 = 0;
+				edgeVert1 = 2;
+			break;
+		case 2:
+			edgeVert0 = 1;
+			edgeVert1 = 3;
+
+			break;
+		case 3:
+			edgeVert0 = 2;
+			edgeVert1 = 3;
+			break;
+		case 4:
+			edgeVert0 = 0;
+			edgeVert1 = 4;
+			break;
+		case 5:
+			edgeVert0 = 1;
+			edgeVert1 = 5;
+
+			break;
+		case 6:
+			edgeVert0 = 2;
+			edgeVert1 = 6;
+			break;
+		case 7:
+			edgeVert0 = 3;
+			edgeVert1 = 7;
+			break;
+		case 8:
+			edgeVert0 = 4;
+			edgeVert1 = 5;
+			break;
+		case 9:
+			edgeVert0 = 4;
+			edgeVert1 = 6;
+			break;
+		case 10:
+			edgeVert0 = 5;
+			edgeVert1 = 7;
+			break;
+		case 11:
+			edgeVert0 = 6;
+			edgeVert1 = 7;
+			break;
+		default:
+			btAssert(0);
+
+		}
+
+		getVertex(edgeVert0,pa );
+		getVertex(edgeVert1,pb );
+	}
+
+
+
+
+	
+	virtual	bool isInside(const btVector3& pt,btScalar tolerance) const
+	{
+		btVector3 halfExtents = getHalfExtentsWithoutMargin();
+
+		//btScalar minDist = 2*tolerance;
+		
+		bool result =	(pt.x() <= (halfExtents.x()+tolerance)) &&
+						(pt.x() >= (-halfExtents.x()-tolerance)) &&
+						(pt.y() <= (halfExtents.y()+tolerance)) &&
+						(pt.y() >= (-halfExtents.y()-tolerance)) &&
+						(pt.z() <= (halfExtents.z()+tolerance)) &&
+						(pt.z() >= (-halfExtents.z()-tolerance));
+		
+		return result;
+	}
+
+
+	//debugging
+	virtual const char*	getName()const
+	{
+		return "Box2d";
+	}
+
+	virtual int		getNumPreferredPenetrationDirections() const
+	{
+		return 6;
+	}
+	
+	virtual void	getPreferredPenetrationDirection(int index, btVector3& penetrationVector) const
+	{
+		switch (index)
+		{
+		case 0:
+			penetrationVector.setValue(btScalar(1.),btScalar(0.),btScalar(0.));
+			break;
+		case 1:
+			penetrationVector.setValue(btScalar(-1.),btScalar(0.),btScalar(0.));
+			break;
+		case 2:
+			penetrationVector.setValue(btScalar(0.),btScalar(1.),btScalar(0.));
+			break;
+		case 3:
+			penetrationVector.setValue(btScalar(0.),btScalar(-1.),btScalar(0.));
+			break;
+		case 4:
+			penetrationVector.setValue(btScalar(0.),btScalar(0.),btScalar(1.));
+			break;
+		case 5:
+			penetrationVector.setValue(btScalar(0.),btScalar(0.),btScalar(-1.));
+			break;
+		default:
+			btAssert(0);
+		}
+	}
+
+};
+
+#endif //BT_OBB_BOX_2D_SHAPE_H
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btBoxShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btBoxShape.cpp
new file mode 100644
index 00000000..3859138f
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btBoxShape.cpp
@@ -0,0 +1,51 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#include "btBoxShape.h"
+
+btBoxShape::btBoxShape( const btVector3& boxHalfExtents) 
+: btPolyhedralConvexShape()
+{
+	m_shapeType = BOX_SHAPE_PROXYTYPE;
+
+	setSafeMargin(boxHalfExtents);
+
+	btVector3 margin(getMargin(),getMargin(),getMargin());
+	m_implicitShapeDimensions = (boxHalfExtents * m_localScaling) - margin;
+};
+
+
+
+
+void btBoxShape::getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	btTransformAabb(getHalfExtentsWithoutMargin(),getMargin(),t,aabbMin,aabbMax);
+}
+
+
+void	btBoxShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	//btScalar margin = btScalar(0.);
+	btVector3 halfExtents = getHalfExtentsWithMargin();
+
+	btScalar lx=btScalar(2.)*(halfExtents.x());
+	btScalar ly=btScalar(2.)*(halfExtents.y());
+	btScalar lz=btScalar(2.)*(halfExtents.z());
+
+	inertia.setValue(mass/(btScalar(12.0)) * (ly*ly + lz*lz),
+					mass/(btScalar(12.0)) * (lx*lx + lz*lz),
+					mass/(btScalar(12.0)) * (lx*lx + ly*ly));
+
+}
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btBoxShape.h b/src/bullet/BulletCollision/CollisionShapes/btBoxShape.h
new file mode 100644
index 00000000..0c5857da
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btBoxShape.h
@@ -0,0 +1,312 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_OBB_BOX_MINKOWSKI_H
+#define BT_OBB_BOX_MINKOWSKI_H
+
+#include "btPolyhedralConvexShape.h"
+#include "btCollisionMargin.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btMinMax.h"
+
+///The btBoxShape is a box primitive around the origin, its sides axis aligned with length specified by half extents, in local shape coordinates. When used as part of a btCollisionObject or btRigidBody it will be an oriented box in world space.
+class btBoxShape: public btPolyhedralConvexShape
+{
+
+	//btVector3	m_boxHalfExtents1; //use m_implicitShapeDimensions instead
+
+
+public:
+
+	btVector3 getHalfExtentsWithMargin() const
+	{
+		btVector3 halfExtents = getHalfExtentsWithoutMargin();
+		btVector3 margin(getMargin(),getMargin(),getMargin());
+		halfExtents += margin;
+		return halfExtents;
+	}
+	
+	const btVector3& getHalfExtentsWithoutMargin() const
+	{
+		return m_implicitShapeDimensions;//scaling is included, margin is not
+	}
+	
+
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec) const
+	{
+		btVector3 halfExtents = getHalfExtentsWithoutMargin();
+		btVector3 margin(getMargin(),getMargin(),getMargin());
+		halfExtents += margin;
+		
+		return btVector3(btFsels(vec.x(), halfExtents.x(), -halfExtents.x()),
+			btFsels(vec.y(), halfExtents.y(), -halfExtents.y()),
+			btFsels(vec.z(), halfExtents.z(), -halfExtents.z()));
+	}
+
+	SIMD_FORCE_INLINE  btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+	{
+		const btVector3& halfExtents = getHalfExtentsWithoutMargin();
+		
+		return btVector3(btFsels(vec.x(), halfExtents.x(), -halfExtents.x()),
+			btFsels(vec.y(), halfExtents.y(), -halfExtents.y()),
+			btFsels(vec.z(), halfExtents.z(), -halfExtents.z()));
+	}
+
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+	{
+		const btVector3& halfExtents = getHalfExtentsWithoutMargin();
+	
+		for (int i=0;i<numVectors;i++)
+		{
+			const btVector3& vec = vectors[i];
+			supportVerticesOut[i].setValue(btFsels(vec.x(), halfExtents.x(), -halfExtents.x()),
+				btFsels(vec.y(), halfExtents.y(), -halfExtents.y()),
+				btFsels(vec.z(), halfExtents.z(), -halfExtents.z())); 
+		}
+
+	}
+
+
+	btBoxShape( const btVector3& boxHalfExtents);
+
+	virtual void setMargin(btScalar collisionMargin)
+	{
+		//correct the m_implicitShapeDimensions for the margin
+		btVector3 oldMargin(getMargin(),getMargin(),getMargin());
+		btVector3 implicitShapeDimensionsWithMargin = m_implicitShapeDimensions+oldMargin;
+		
+		btConvexInternalShape::setMargin(collisionMargin);
+		btVector3 newMargin(getMargin(),getMargin(),getMargin());
+		m_implicitShapeDimensions = implicitShapeDimensionsWithMargin - newMargin;
+
+	}
+	virtual void	setLocalScaling(const btVector3& scaling)
+	{
+		btVector3 oldMargin(getMargin(),getMargin(),getMargin());
+		btVector3 implicitShapeDimensionsWithMargin = m_implicitShapeDimensions+oldMargin;
+		btVector3 unScaledImplicitShapeDimensionsWithMargin = implicitShapeDimensionsWithMargin / m_localScaling;
+
+		btConvexInternalShape::setLocalScaling(scaling);
+
+		m_implicitShapeDimensions = (unScaledImplicitShapeDimensionsWithMargin * m_localScaling) - oldMargin;
+
+	}
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	virtual void getPlane(btVector3& planeNormal,btVector3& planeSupport,int i ) const
+	{
+		//this plane might not be aligned...
+		btVector4 plane ;
+		getPlaneEquation(plane,i);
+		planeNormal = btVector3(plane.getX(),plane.getY(),plane.getZ());
+		planeSupport = localGetSupportingVertex(-planeNormal);
+	}
+
+	
+	virtual int getNumPlanes() const
+	{
+		return 6;
+	}	
+	
+	virtual int	getNumVertices() const 
+	{
+		return 8;
+	}
+
+	virtual int getNumEdges() const
+	{
+		return 12;
+	}
+
+
+	virtual void getVertex(int i,btVector3& vtx) const
+	{
+		btVector3 halfExtents = getHalfExtentsWithMargin();
+
+		vtx = btVector3(
+				halfExtents.x() * (1-(i&1)) - halfExtents.x() * (i&1),
+				halfExtents.y() * (1-((i&2)>>1)) - halfExtents.y() * ((i&2)>>1),
+				halfExtents.z() * (1-((i&4)>>2)) - halfExtents.z() * ((i&4)>>2));
+	}
+	
+
+	virtual void	getPlaneEquation(btVector4& plane,int i) const
+	{
+		btVector3 halfExtents = getHalfExtentsWithoutMargin();
+
+		switch (i)
+		{
+		case 0:
+			plane.setValue(btScalar(1.),btScalar(0.),btScalar(0.),-halfExtents.x());
+			break;
+		case 1:
+			plane.setValue(btScalar(-1.),btScalar(0.),btScalar(0.),-halfExtents.x());
+			break;
+		case 2:
+			plane.setValue(btScalar(0.),btScalar(1.),btScalar(0.),-halfExtents.y());
+			break;
+		case 3:
+			plane.setValue(btScalar(0.),btScalar(-1.),btScalar(0.),-halfExtents.y());
+			break;
+		case 4:
+			plane.setValue(btScalar(0.),btScalar(0.),btScalar(1.),-halfExtents.z());
+			break;
+		case 5:
+			plane.setValue(btScalar(0.),btScalar(0.),btScalar(-1.),-halfExtents.z());
+			break;
+		default:
+			btAssert(0);
+		}
+	}
+
+	
+	virtual void getEdge(int i,btVector3& pa,btVector3& pb) const
+	//virtual void getEdge(int i,Edge& edge) const
+	{
+		int edgeVert0 = 0;
+		int edgeVert1 = 0;
+
+		switch (i)
+		{
+		case 0:
+				edgeVert0 = 0;
+				edgeVert1 = 1;
+			break;
+		case 1:
+				edgeVert0 = 0;
+				edgeVert1 = 2;
+			break;
+		case 2:
+			edgeVert0 = 1;
+			edgeVert1 = 3;
+
+			break;
+		case 3:
+			edgeVert0 = 2;
+			edgeVert1 = 3;
+			break;
+		case 4:
+			edgeVert0 = 0;
+			edgeVert1 = 4;
+			break;
+		case 5:
+			edgeVert0 = 1;
+			edgeVert1 = 5;
+
+			break;
+		case 6:
+			edgeVert0 = 2;
+			edgeVert1 = 6;
+			break;
+		case 7:
+			edgeVert0 = 3;
+			edgeVert1 = 7;
+			break;
+		case 8:
+			edgeVert0 = 4;
+			edgeVert1 = 5;
+			break;
+		case 9:
+			edgeVert0 = 4;
+			edgeVert1 = 6;
+			break;
+		case 10:
+			edgeVert0 = 5;
+			edgeVert1 = 7;
+			break;
+		case 11:
+			edgeVert0 = 6;
+			edgeVert1 = 7;
+			break;
+		default:
+			btAssert(0);
+
+		}
+
+		getVertex(edgeVert0,pa );
+		getVertex(edgeVert1,pb );
+	}
+
+
+
+
+	
+	virtual	bool isInside(const btVector3& pt,btScalar tolerance) const
+	{
+		btVector3 halfExtents = getHalfExtentsWithoutMargin();
+
+		//btScalar minDist = 2*tolerance;
+		
+		bool result =	(pt.x() <= (halfExtents.x()+tolerance)) &&
+						(pt.x() >= (-halfExtents.x()-tolerance)) &&
+						(pt.y() <= (halfExtents.y()+tolerance)) &&
+						(pt.y() >= (-halfExtents.y()-tolerance)) &&
+						(pt.z() <= (halfExtents.z()+tolerance)) &&
+						(pt.z() >= (-halfExtents.z()-tolerance));
+		
+		return result;
+	}
+
+
+	//debugging
+	virtual const char*	getName()const
+	{
+		return "Box";
+	}
+
+	virtual int		getNumPreferredPenetrationDirections() const
+	{
+		return 6;
+	}
+	
+	virtual void	getPreferredPenetrationDirection(int index, btVector3& penetrationVector) const
+	{
+		switch (index)
+		{
+		case 0:
+			penetrationVector.setValue(btScalar(1.),btScalar(0.),btScalar(0.));
+			break;
+		case 1:
+			penetrationVector.setValue(btScalar(-1.),btScalar(0.),btScalar(0.));
+			break;
+		case 2:
+			penetrationVector.setValue(btScalar(0.),btScalar(1.),btScalar(0.));
+			break;
+		case 3:
+			penetrationVector.setValue(btScalar(0.),btScalar(-1.),btScalar(0.));
+			break;
+		case 4:
+			penetrationVector.setValue(btScalar(0.),btScalar(0.),btScalar(1.));
+			break;
+		case 5:
+			penetrationVector.setValue(btScalar(0.),btScalar(0.),btScalar(-1.));
+			break;
+		default:
+			btAssert(0);
+		}
+	}
+
+};
+
+
+#endif //BT_OBB_BOX_MINKOWSKI_H
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp
new file mode 100644
index 00000000..ace4cfa2
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp
@@ -0,0 +1,466 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//#define DISABLE_BVH
+
+#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
+#include "LinearMath/btSerializer.h"
+
+///Bvh Concave triangle mesh is a static-triangle mesh shape with Bounding Volume Hierarchy optimization.
+///Uses an interface to access the triangles to allow for sharing graphics/physics triangles.
+btBvhTriangleMeshShape::btBvhTriangleMeshShape(btStridingMeshInterface* meshInterface, bool useQuantizedAabbCompression, bool buildBvh)
+:btTriangleMeshShape(meshInterface),
+m_bvh(0),
+m_triangleInfoMap(0),
+m_useQuantizedAabbCompression(useQuantizedAabbCompression),
+m_ownsBvh(false)
+{
+	m_shapeType = TRIANGLE_MESH_SHAPE_PROXYTYPE;
+	//construct bvh from meshInterface
+#ifndef DISABLE_BVH
+
+	if (buildBvh)
+	{
+		buildOptimizedBvh();
+	}
+
+#endif //DISABLE_BVH
+
+}
+
+btBvhTriangleMeshShape::btBvhTriangleMeshShape(btStridingMeshInterface* meshInterface, bool useQuantizedAabbCompression,const btVector3& bvhAabbMin,const btVector3& bvhAabbMax,bool buildBvh)
+:btTriangleMeshShape(meshInterface),
+m_bvh(0),
+m_triangleInfoMap(0),
+m_useQuantizedAabbCompression(useQuantizedAabbCompression),
+m_ownsBvh(false)
+{
+	m_shapeType = TRIANGLE_MESH_SHAPE_PROXYTYPE;
+	//construct bvh from meshInterface
+#ifndef DISABLE_BVH
+
+	if (buildBvh)
+	{
+		void* mem = btAlignedAlloc(sizeof(btOptimizedBvh),16);
+		m_bvh = new (mem) btOptimizedBvh();
+		
+		m_bvh->build(meshInterface,m_useQuantizedAabbCompression,bvhAabbMin,bvhAabbMax);
+		m_ownsBvh = true;
+	}
+
+#endif //DISABLE_BVH
+
+}
+
+void	btBvhTriangleMeshShape::partialRefitTree(const btVector3& aabbMin,const btVector3& aabbMax)
+{
+	m_bvh->refitPartial( m_meshInterface,aabbMin,aabbMax );
+	
+	m_localAabbMin.setMin(aabbMin);
+	m_localAabbMax.setMax(aabbMax);
+}
+
+
+void	btBvhTriangleMeshShape::refitTree(const btVector3& aabbMin,const btVector3& aabbMax)
+{
+	m_bvh->refit( m_meshInterface, aabbMin,aabbMax );
+	
+	recalcLocalAabb();
+}
+
+btBvhTriangleMeshShape::~btBvhTriangleMeshShape()
+{
+	if (m_ownsBvh)
+	{
+		m_bvh->~btOptimizedBvh();
+		btAlignedFree(m_bvh);
+	}
+}
+
+void	btBvhTriangleMeshShape::performRaycast (btTriangleCallback* callback, const btVector3& raySource, const btVector3& rayTarget)
+{
+	struct	MyNodeOverlapCallback : public btNodeOverlapCallback
+	{
+		btStridingMeshInterface*	m_meshInterface;
+		btTriangleCallback* m_callback;
+
+		MyNodeOverlapCallback(btTriangleCallback* callback,btStridingMeshInterface* meshInterface)
+			:m_meshInterface(meshInterface),
+			m_callback(callback)
+		{
+		}
+				
+		virtual void processNode(int nodeSubPart, int nodeTriangleIndex)
+		{
+			btVector3 m_triangle[3];
+			const unsigned char *vertexbase;
+			int numverts;
+			PHY_ScalarType type;
+			int stride;
+			const unsigned char *indexbase;
+			int indexstride;
+			int numfaces;
+			PHY_ScalarType indicestype;
+
+			m_meshInterface->getLockedReadOnlyVertexIndexBase(
+				&vertexbase,
+				numverts,
+				type,
+				stride,
+				&indexbase,
+				indexstride,
+				numfaces,
+				indicestype,
+				nodeSubPart);
+
+			unsigned int* gfxbase = (unsigned int*)(indexbase+nodeTriangleIndex*indexstride);
+			btAssert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT);
+	
+			const btVector3& meshScaling = m_meshInterface->getScaling();
+			for (int j=2;j>=0;j--)
+			{
+				int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:gfxbase[j];
+				
+				if (type == PHY_FLOAT)
+				{
+					float* graphicsbase = (float*)(vertexbase+graphicsindex*stride);
+					
+					m_triangle[j] = btVector3(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ());		
+				}
+				else
+				{
+					double* graphicsbase = (double*)(vertexbase+graphicsindex*stride);
+					
+					m_triangle[j] = btVector3(btScalar(graphicsbase[0])*meshScaling.getX(),btScalar(graphicsbase[1])*meshScaling.getY(),btScalar(graphicsbase[2])*meshScaling.getZ());		
+				}
+			}
+
+			/* Perform ray vs. triangle collision here */
+			m_callback->processTriangle(m_triangle,nodeSubPart,nodeTriangleIndex);
+			m_meshInterface->unLockReadOnlyVertexBase(nodeSubPart);
+		}
+	};
+
+	MyNodeOverlapCallback	myNodeCallback(callback,m_meshInterface);
+
+	m_bvh->reportRayOverlappingNodex(&myNodeCallback,raySource,rayTarget);
+}
+
+void	btBvhTriangleMeshShape::performConvexcast (btTriangleCallback* callback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax)
+{
+	struct	MyNodeOverlapCallback : public btNodeOverlapCallback
+	{
+		btStridingMeshInterface*	m_meshInterface;
+		btTriangleCallback* m_callback;
+
+		MyNodeOverlapCallback(btTriangleCallback* callback,btStridingMeshInterface* meshInterface)
+			:m_meshInterface(meshInterface),
+			m_callback(callback)
+		{
+		}
+				
+		virtual void processNode(int nodeSubPart, int nodeTriangleIndex)
+		{
+			btVector3 m_triangle[3];
+			const unsigned char *vertexbase;
+			int numverts;
+			PHY_ScalarType type;
+			int stride;
+			const unsigned char *indexbase;
+			int indexstride;
+			int numfaces;
+			PHY_ScalarType indicestype;
+
+			m_meshInterface->getLockedReadOnlyVertexIndexBase(
+				&vertexbase,
+				numverts,
+				type,
+				stride,
+				&indexbase,
+				indexstride,
+				numfaces,
+				indicestype,
+				nodeSubPart);
+
+			unsigned int* gfxbase = (unsigned int*)(indexbase+nodeTriangleIndex*indexstride);
+			btAssert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT);
+	
+			const btVector3& meshScaling = m_meshInterface->getScaling();
+			for (int j=2;j>=0;j--)
+			{
+				int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:gfxbase[j];
+
+				if (type == PHY_FLOAT)
+				{
+					float* graphicsbase = (float*)(vertexbase+graphicsindex*stride);
+
+					m_triangle[j] = btVector3(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ());		
+				}
+				else
+				{
+					double* graphicsbase = (double*)(vertexbase+graphicsindex*stride);
+					
+					m_triangle[j] = btVector3(btScalar(graphicsbase[0])*meshScaling.getX(),btScalar(graphicsbase[1])*meshScaling.getY(),btScalar(graphicsbase[2])*meshScaling.getZ());		
+				}
+			}
+
+			/* Perform ray vs. triangle collision here */
+			m_callback->processTriangle(m_triangle,nodeSubPart,nodeTriangleIndex);
+			m_meshInterface->unLockReadOnlyVertexBase(nodeSubPart);
+		}
+	};
+
+	MyNodeOverlapCallback	myNodeCallback(callback,m_meshInterface);
+
+	m_bvh->reportBoxCastOverlappingNodex (&myNodeCallback, raySource, rayTarget, aabbMin, aabbMax);
+}
+
+//perform bvh tree traversal and report overlapping triangles to 'callback'
+void	btBvhTriangleMeshShape::processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+
+#ifdef DISABLE_BVH
+	//brute force traverse all triangles
+	btTriangleMeshShape::processAllTriangles(callback,aabbMin,aabbMax);
+#else
+
+	//first get all the nodes
+
+	
+	struct	MyNodeOverlapCallback : public btNodeOverlapCallback
+	{
+		btStridingMeshInterface*	m_meshInterface;
+		btTriangleCallback*		m_callback;
+		btVector3				m_triangle[3];
+
+
+		MyNodeOverlapCallback(btTriangleCallback* callback,btStridingMeshInterface* meshInterface)
+			:m_meshInterface(meshInterface),
+			m_callback(callback)
+		{
+		}
+				
+		virtual void processNode(int nodeSubPart, int nodeTriangleIndex)
+		{
+			const unsigned char *vertexbase;
+			int numverts;
+			PHY_ScalarType type;
+			int stride;
+			const unsigned char *indexbase;
+			int indexstride;
+			int numfaces;
+			PHY_ScalarType indicestype;
+			
+
+			m_meshInterface->getLockedReadOnlyVertexIndexBase(
+				&vertexbase,
+				numverts,
+				type,
+				stride,
+				&indexbase,
+				indexstride,
+				numfaces,
+				indicestype,
+				nodeSubPart);
+
+			unsigned int* gfxbase = (unsigned int*)(indexbase+nodeTriangleIndex*indexstride);
+			btAssert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT||indicestype==PHY_UCHAR);
+	
+			const btVector3& meshScaling = m_meshInterface->getScaling();
+			for (int j=2;j>=0;j--)
+			{
+				
+				int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:indicestype==PHY_INTEGER?gfxbase[j]:((unsigned char*)gfxbase)[j];
+
+
+#ifdef DEBUG_TRIANGLE_MESH
+				printf("%d ,",graphicsindex);
+#endif //DEBUG_TRIANGLE_MESH
+				if (type == PHY_FLOAT)
+				{
+					float* graphicsbase = (float*)(vertexbase+graphicsindex*stride);
+					
+					m_triangle[j] = btVector3(
+																		graphicsbase[0]*meshScaling.getX(),
+																		graphicsbase[1]*meshScaling.getY(),
+																		graphicsbase[2]*meshScaling.getZ());
+				}
+				else
+				{
+					double* graphicsbase = (double*)(vertexbase+graphicsindex*stride);
+
+					m_triangle[j] = btVector3(
+						btScalar(graphicsbase[0])*meshScaling.getX(),
+						btScalar(graphicsbase[1])*meshScaling.getY(),
+						btScalar(graphicsbase[2])*meshScaling.getZ());
+				}
+#ifdef DEBUG_TRIANGLE_MESH
+				printf("triangle vertices:%f,%f,%f\n",triangle[j].x(),triangle[j].y(),triangle[j].z());
+#endif //DEBUG_TRIANGLE_MESH
+			}
+
+			m_callback->processTriangle(m_triangle,nodeSubPart,nodeTriangleIndex);
+			m_meshInterface->unLockReadOnlyVertexBase(nodeSubPart);
+		}
+
+	};
+
+	MyNodeOverlapCallback	myNodeCallback(callback,m_meshInterface);
+
+	m_bvh->reportAabbOverlappingNodex(&myNodeCallback,aabbMin,aabbMax);
+
+
+#endif//DISABLE_BVH
+
+
+}
+
+void   btBvhTriangleMeshShape::setLocalScaling(const btVector3& scaling)
+{
+   if ((getLocalScaling() -scaling).length2() > SIMD_EPSILON)
+   {
+      btTriangleMeshShape::setLocalScaling(scaling);
+	  buildOptimizedBvh();
+   }
+}
+
+void   btBvhTriangleMeshShape::buildOptimizedBvh()
+{
+	if (m_ownsBvh)
+	{
+		m_bvh->~btOptimizedBvh();
+		btAlignedFree(m_bvh);
+	}
+	///m_localAabbMin/m_localAabbMax is already re-calculated in btTriangleMeshShape. We could just scale aabb, but this needs some more work
+	void* mem = btAlignedAlloc(sizeof(btOptimizedBvh),16);
+	m_bvh = new(mem) btOptimizedBvh();
+	//rebuild the bvh...
+	m_bvh->build(m_meshInterface,m_useQuantizedAabbCompression,m_localAabbMin,m_localAabbMax);
+	m_ownsBvh = true;
+}
+
+void   btBvhTriangleMeshShape::setOptimizedBvh(btOptimizedBvh* bvh, const btVector3& scaling)
+{
+   btAssert(!m_bvh);
+   btAssert(!m_ownsBvh);
+
+   m_bvh = bvh;
+   m_ownsBvh = false;
+   // update the scaling without rebuilding the bvh
+   if ((getLocalScaling() -scaling).length2() > SIMD_EPSILON)
+   {
+      btTriangleMeshShape::setLocalScaling(scaling);
+   }
+}
+
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btBvhTriangleMeshShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btTriangleMeshShapeData* trimeshData = (btTriangleMeshShapeData*) dataBuffer;
+
+	btCollisionShape::serialize(&trimeshData->m_collisionShapeData,serializer);
+
+	m_meshInterface->serialize(&trimeshData->m_meshInterface, serializer);
+
+	trimeshData->m_collisionMargin = float(m_collisionMargin);
+
+	
+
+	if (m_bvh && !(serializer->getSerializationFlags()&BT_SERIALIZE_NO_BVH))
+	{
+		void* chunk = serializer->findPointer(m_bvh);
+		if (chunk)
+		{
+#ifdef BT_USE_DOUBLE_PRECISION
+			trimeshData->m_quantizedDoubleBvh = (btQuantizedBvhData*)chunk;
+			trimeshData->m_quantizedFloatBvh = 0;
+#else
+			trimeshData->m_quantizedFloatBvh  = (btQuantizedBvhData*)chunk;
+			trimeshData->m_quantizedDoubleBvh= 0;
+#endif //BT_USE_DOUBLE_PRECISION
+		} else
+		{
+
+#ifdef BT_USE_DOUBLE_PRECISION
+			trimeshData->m_quantizedDoubleBvh = (btQuantizedBvhData*)serializer->getUniquePointer(m_bvh);
+			trimeshData->m_quantizedFloatBvh = 0;
+#else
+			trimeshData->m_quantizedFloatBvh  = (btQuantizedBvhData*)serializer->getUniquePointer(m_bvh);
+			trimeshData->m_quantizedDoubleBvh= 0;
+#endif //BT_USE_DOUBLE_PRECISION
+	
+			int sz = m_bvh->calculateSerializeBufferSizeNew();
+			btChunk* chunk = serializer->allocate(sz,1);
+			const char* structType = m_bvh->serialize(chunk->m_oldPtr, serializer);
+			serializer->finalizeChunk(chunk,structType,BT_QUANTIZED_BVH_CODE,m_bvh);
+		}
+	} else
+	{
+		trimeshData->m_quantizedFloatBvh = 0;
+		trimeshData->m_quantizedDoubleBvh = 0;
+	}
+
+	
+
+	if (m_triangleInfoMap && !(serializer->getSerializationFlags()&BT_SERIALIZE_NO_TRIANGLEINFOMAP))
+	{
+		void* chunk = serializer->findPointer(m_triangleInfoMap);
+		if (chunk)
+		{
+			trimeshData->m_triangleInfoMap = (btTriangleInfoMapData*)chunk;
+		} else
+		{
+			trimeshData->m_triangleInfoMap = (btTriangleInfoMapData*)serializer->getUniquePointer(m_triangleInfoMap);
+			int sz = m_triangleInfoMap->calculateSerializeBufferSize();
+			btChunk* chunk = serializer->allocate(sz,1);
+			const char* structType = m_triangleInfoMap->serialize(chunk->m_oldPtr, serializer);
+			serializer->finalizeChunk(chunk,structType,BT_TRIANLGE_INFO_MAP,m_triangleInfoMap);
+		}
+	} else
+	{
+		trimeshData->m_triangleInfoMap = 0;
+	}
+
+	return "btTriangleMeshShapeData";
+}
+
+void	btBvhTriangleMeshShape::serializeSingleBvh(btSerializer* serializer) const
+{
+	if (m_bvh)
+	{
+		int len = m_bvh->calculateSerializeBufferSizeNew(); //make sure not to use calculateSerializeBufferSize because it is used for in-place
+		btChunk* chunk = serializer->allocate(len,1);
+		const char* structType = m_bvh->serialize(chunk->m_oldPtr, serializer);
+		serializer->finalizeChunk(chunk,structType,BT_QUANTIZED_BVH_CODE,(void*)m_bvh);
+	}
+}
+
+void	btBvhTriangleMeshShape::serializeSingleTriangleInfoMap(btSerializer* serializer) const
+{
+	if (m_triangleInfoMap)
+	{
+		int len = m_triangleInfoMap->calculateSerializeBufferSize();
+		btChunk* chunk = serializer->allocate(len,1);
+		const char* structType = m_triangleInfoMap->serialize(chunk->m_oldPtr, serializer);
+		serializer->finalizeChunk(chunk,structType,BT_TRIANLGE_INFO_MAP,(void*)m_triangleInfoMap);
+	}
+}
+
+
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h b/src/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h
new file mode 100644
index 00000000..d1c21629
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h
@@ -0,0 +1,139 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_BVH_TRIANGLE_MESH_SHAPE_H
+#define BT_BVH_TRIANGLE_MESH_SHAPE_H
+
+#include "btTriangleMeshShape.h"
+#include "btOptimizedBvh.h"
+#include "LinearMath/btAlignedAllocator.h"
+#include "btTriangleInfoMap.h"
+
+///The btBvhTriangleMeshShape is a static-triangle mesh shape with several optimizations, such as bounding volume hierarchy and cache friendly traversal for PlayStation 3 Cell SPU. It is recommended to enable useQuantizedAabbCompression for better memory usage.
+///It takes a triangle mesh as input, for example a btTriangleMesh or btTriangleIndexVertexArray. The btBvhTriangleMeshShape class allows for triangle mesh deformations by a refit or partialRefit method.
+///Instead of building the bounding volume hierarchy acceleration structure, it is also possible to serialize (save) and deserialize (load) the structure from disk.
+///See Demos\ConcaveDemo\ConcavePhysicsDemo.cpp for an example.
+ATTRIBUTE_ALIGNED16(class) btBvhTriangleMeshShape : public btTriangleMeshShape
+{
+
+	btOptimizedBvh*	m_bvh;
+	btTriangleInfoMap*	m_triangleInfoMap;
+
+	bool m_useQuantizedAabbCompression;
+	bool m_ownsBvh;
+	bool m_pad[11];////need padding due to alignment
+
+public:
+
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	
+	btBvhTriangleMeshShape(btStridingMeshInterface* meshInterface, bool useQuantizedAabbCompression, bool buildBvh = true);
+
+	///optionally pass in a larger bvh aabb, used for quantization. This allows for deformations within this aabb
+	btBvhTriangleMeshShape(btStridingMeshInterface* meshInterface, bool useQuantizedAabbCompression,const btVector3& bvhAabbMin,const btVector3& bvhAabbMax, bool buildBvh = true);
+	
+	virtual ~btBvhTriangleMeshShape();
+
+	bool getOwnsBvh () const
+	{
+		return m_ownsBvh;
+	}
+
+
+	
+	void performRaycast (btTriangleCallback* callback, const btVector3& raySource, const btVector3& rayTarget);
+	void performConvexcast (btTriangleCallback* callback, const btVector3& boxSource, const btVector3& boxTarget, const btVector3& boxMin, const btVector3& boxMax);
+
+	virtual void	processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const;
+
+	void	refitTree(const btVector3& aabbMin,const btVector3& aabbMax);
+
+	///for a fast incremental refit of parts of the tree. Note: the entire AABB of the tree will become more conservative, it never shrinks
+	void	partialRefitTree(const btVector3& aabbMin,const btVector3& aabbMax);
+
+	//debugging
+	virtual const char*	getName()const {return "BVHTRIANGLEMESH";}
+
+
+	virtual void	setLocalScaling(const btVector3& scaling);
+	
+	btOptimizedBvh*	getOptimizedBvh()
+	{
+		return m_bvh;
+	}
+
+	void	setOptimizedBvh(btOptimizedBvh* bvh, const btVector3& localScaling=btVector3(1,1,1));
+
+	void    buildOptimizedBvh();
+
+	bool	usesQuantizedAabbCompression() const
+	{
+		return	m_useQuantizedAabbCompression;
+	}
+
+	void	setTriangleInfoMap(btTriangleInfoMap* triangleInfoMap)
+	{
+		m_triangleInfoMap = triangleInfoMap;
+	}
+
+	const btTriangleInfoMap*	getTriangleInfoMap() const
+	{
+		return m_triangleInfoMap;
+	}
+	
+	btTriangleInfoMap*	getTriangleInfoMap()
+	{
+		return m_triangleInfoMap;
+	}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+	virtual void	serializeSingleBvh(btSerializer* serializer) const;
+
+	virtual void	serializeSingleTriangleInfoMap(btSerializer* serializer) const;
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btTriangleMeshShapeData
+{
+	btCollisionShapeData	m_collisionShapeData;
+
+	btStridingMeshInterfaceData m_meshInterface;
+
+	btQuantizedBvhFloatData		*m_quantizedFloatBvh;
+	btQuantizedBvhDoubleData	*m_quantizedDoubleBvh;
+
+	btTriangleInfoMapData	*m_triangleInfoMap;
+	
+	float	m_collisionMargin;
+
+	char m_pad3[4];
+	
+};
+
+
+SIMD_FORCE_INLINE	int	btBvhTriangleMeshShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btTriangleMeshShapeData);
+}
+
+
+
+#endif //BT_BVH_TRIANGLE_MESH_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btCapsuleShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btCapsuleShape.cpp
new file mode 100644
index 00000000..864df26e
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btCapsuleShape.cpp
@@ -0,0 +1,171 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btCapsuleShape.h"
+
+#include "BulletCollision/CollisionShapes/btCollisionMargin.h"
+#include "LinearMath/btQuaternion.h"
+
+btCapsuleShape::btCapsuleShape(btScalar radius, btScalar height) : btConvexInternalShape ()
+{
+	m_shapeType = CAPSULE_SHAPE_PROXYTYPE;
+	m_upAxis = 1;
+	m_implicitShapeDimensions.setValue(radius,0.5f*height,radius);
+}
+
+ 
+ btVector3	btCapsuleShape::localGetSupportingVertexWithoutMargin(const btVector3& vec0)const
+{
+
+	btVector3 supVec(0,0,0);
+
+	btScalar maxDot(btScalar(-BT_LARGE_FLOAT));
+
+	btVector3 vec = vec0;
+	btScalar lenSqr = vec.length2();
+	if (lenSqr < btScalar(0.0001))
+	{
+		vec.setValue(1,0,0);
+	} else
+	{
+		btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
+		vec *= rlen;
+	}
+
+	btVector3 vtx;
+	btScalar newDot;
+	
+	btScalar radius = getRadius();
+
+
+	{
+		btVector3 pos(0,0,0);
+		pos[getUpAxis()] = getHalfHeight();
+
+		vtx = pos +vec*(radius) - vec * getMargin();
+		newDot = vec.dot(vtx);
+		if (newDot > maxDot)
+		{
+			maxDot = newDot;
+			supVec = vtx;
+		}
+	}
+	{
+		btVector3 pos(0,0,0);
+		pos[getUpAxis()] = -getHalfHeight();
+
+		vtx = pos +vec*(radius) - vec * getMargin();
+		newDot = vec.dot(vtx);
+		if (newDot > maxDot)
+		{
+			maxDot = newDot;
+			supVec = vtx;
+		}
+	}
+
+	return supVec;
+
+}
+
+ void	btCapsuleShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+
+	
+	btScalar radius = getRadius();
+
+	for (int j=0;j<numVectors;j++)
+	{
+		btScalar maxDot(btScalar(-BT_LARGE_FLOAT));
+		const btVector3& vec = vectors[j];
+
+		btVector3 vtx;
+		btScalar newDot;
+		{
+			btVector3 pos(0,0,0);
+			pos[getUpAxis()] = getHalfHeight();
+			vtx = pos +vec*(radius) - vec * getMargin();
+			newDot = vec.dot(vtx);
+			if (newDot > maxDot)
+			{
+				maxDot = newDot;
+				supportVerticesOut[j] = vtx;
+			}
+		}
+		{
+			btVector3 pos(0,0,0);
+			pos[getUpAxis()] = -getHalfHeight();
+			vtx = pos +vec*(radius) - vec * getMargin();
+			newDot = vec.dot(vtx);
+			if (newDot > maxDot)
+			{
+				maxDot = newDot;
+				supportVerticesOut[j] = vtx;
+			}
+		}
+		
+	}
+}
+
+
+void	btCapsuleShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	//as an approximation, take the inertia of the box that bounds the spheres
+
+	btTransform ident;
+	ident.setIdentity();
+
+	
+	btScalar radius = getRadius();
+
+	btVector3 halfExtents(radius,radius,radius);
+	halfExtents[getUpAxis()]+=getHalfHeight();
+
+	btScalar margin = CONVEX_DISTANCE_MARGIN;
+
+	btScalar lx=btScalar(2.)*(halfExtents[0]+margin);
+	btScalar ly=btScalar(2.)*(halfExtents[1]+margin);
+	btScalar lz=btScalar(2.)*(halfExtents[2]+margin);
+	const btScalar x2 = lx*lx;
+	const btScalar y2 = ly*ly;
+	const btScalar z2 = lz*lz;
+	const btScalar scaledmass = mass * btScalar(.08333333);
+
+	inertia[0] = scaledmass * (y2+z2);
+	inertia[1] = scaledmass * (x2+z2);
+	inertia[2] = scaledmass * (x2+y2);
+
+}
+
+btCapsuleShapeX::btCapsuleShapeX(btScalar radius,btScalar height)
+{
+	m_upAxis = 0;
+	m_implicitShapeDimensions.setValue(0.5f*height, radius,radius);
+}
+
+
+
+
+
+
+btCapsuleShapeZ::btCapsuleShapeZ(btScalar radius,btScalar height)
+{
+	m_upAxis = 2;
+	m_implicitShapeDimensions.setValue(radius,radius,0.5f*height);
+}
+
+
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btCapsuleShape.h b/src/bullet/BulletCollision/CollisionShapes/btCapsuleShape.h
new file mode 100644
index 00000000..ab763abf
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btCapsuleShape.h
@@ -0,0 +1,173 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CAPSULE_SHAPE_H
+#define BT_CAPSULE_SHAPE_H
+
+#include "btConvexInternalShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+
+
+///The btCapsuleShape represents a capsule around the Y axis, there is also the btCapsuleShapeX aligned around the X axis and btCapsuleShapeZ around the Z axis.
+///The total height is height+2*radius, so the height is just the height between the center of each 'sphere' of the capsule caps.
+///The btCapsuleShape is a convex hull of two spheres. The btMultiSphereShape is a more general collision shape that takes the convex hull of multiple sphere, so it can also represent a capsule when just using two spheres.
+class btCapsuleShape : public btConvexInternalShape
+{
+protected:
+	int	m_upAxis;
+
+protected:
+	///only used for btCapsuleShapeZ and btCapsuleShapeX subclasses.
+	btCapsuleShape() : btConvexInternalShape() {m_shapeType = CAPSULE_SHAPE_PROXYTYPE;};
+
+public:
+	btCapsuleShape(btScalar radius,btScalar height);
+
+	///CollisionShape Interface
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	/// btConvexShape Interface
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+	
+	virtual void setMargin(btScalar collisionMargin)
+	{
+		//correct the m_implicitShapeDimensions for the margin
+		btVector3 oldMargin(getMargin(),getMargin(),getMargin());
+		btVector3 implicitShapeDimensionsWithMargin = m_implicitShapeDimensions+oldMargin;
+		
+		btConvexInternalShape::setMargin(collisionMargin);
+		btVector3 newMargin(getMargin(),getMargin(),getMargin());
+		m_implicitShapeDimensions = implicitShapeDimensionsWithMargin - newMargin;
+
+	}
+
+	virtual void getAabb (const btTransform& t, btVector3& aabbMin, btVector3& aabbMax) const
+	{
+			btVector3 halfExtents(getRadius(),getRadius(),getRadius());
+			halfExtents[m_upAxis] = getRadius() + getHalfHeight();
+			halfExtents += btVector3(getMargin(),getMargin(),getMargin());
+			btMatrix3x3 abs_b = t.getBasis().absolute();  
+			btVector3 center = t.getOrigin();
+			btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));		  
+			
+			aabbMin = center - extent;
+			aabbMax = center + extent;
+	}
+
+	virtual const char*	getName()const 
+	{
+		return "CapsuleShape";
+	}
+
+	int	getUpAxis() const
+	{
+		return m_upAxis;
+	}
+
+	btScalar	getRadius() const
+	{
+		int radiusAxis = (m_upAxis+2)%3;
+		return m_implicitShapeDimensions[radiusAxis];
+	}
+
+	btScalar	getHalfHeight() const
+	{
+		return m_implicitShapeDimensions[m_upAxis];
+	}
+
+	virtual void	setLocalScaling(const btVector3& scaling)
+	{
+		btVector3 oldMargin(getMargin(),getMargin(),getMargin());
+		btVector3 implicitShapeDimensionsWithMargin = m_implicitShapeDimensions+oldMargin;
+		btVector3 unScaledImplicitShapeDimensionsWithMargin = implicitShapeDimensionsWithMargin / m_localScaling;
+
+		btConvexInternalShape::setLocalScaling(scaling);
+
+		m_implicitShapeDimensions = (unScaledImplicitShapeDimensionsWithMargin * m_localScaling) - oldMargin;
+
+	}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+
+};
+
+///btCapsuleShapeX represents a capsule around the Z axis
+///the total height is height+2*radius, so the height is just the height between the center of each 'sphere' of the capsule caps.
+class btCapsuleShapeX : public btCapsuleShape
+{
+public:
+
+	btCapsuleShapeX(btScalar radius,btScalar height);
+		
+	//debugging
+	virtual const char*	getName()const
+	{
+		return "CapsuleX";
+	}
+
+	
+
+};
+
+///btCapsuleShapeZ represents a capsule around the Z axis
+///the total height is height+2*radius, so the height is just the height between the center of each 'sphere' of the capsule caps.
+class btCapsuleShapeZ : public btCapsuleShape
+{
+public:
+	btCapsuleShapeZ(btScalar radius,btScalar height);
+
+		//debugging
+	virtual const char*	getName()const
+	{
+		return "CapsuleZ";
+	}
+
+	
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCapsuleShapeData
+{
+	btConvexInternalShapeData	m_convexInternalShapeData;
+
+	int	m_upAxis;
+
+	char	m_padding[4];
+};
+
+SIMD_FORCE_INLINE	int	btCapsuleShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btCapsuleShapeData);
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btCapsuleShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btCapsuleShapeData* shapeData = (btCapsuleShapeData*) dataBuffer;
+	
+	btConvexInternalShape::serialize(&shapeData->m_convexInternalShapeData,serializer);
+
+	shapeData->m_upAxis = m_upAxis;
+	
+	return "btCapsuleShapeData";
+}
+
+#endif //BT_CAPSULE_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btCollisionMargin.h b/src/bullet/BulletCollision/CollisionShapes/btCollisionMargin.h
new file mode 100644
index 00000000..474bf1fb
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btCollisionMargin.h
@@ -0,0 +1,27 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_COLLISION_MARGIN_H
+#define BT_COLLISION_MARGIN_H
+
+///The CONVEX_DISTANCE_MARGIN is a default collision margin for convex collision shapes derived from btConvexInternalShape.
+///This collision margin is used by Gjk and some other algorithms
+///Note that when creating small objects, you need to make sure to set a smaller collision margin, using the 'setMargin' API
+#define CONVEX_DISTANCE_MARGIN btScalar(0.04)// btScalar(0.1)//;//btScalar(0.01)
+
+
+
+#endif //BT_COLLISION_MARGIN_H
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btCollisionShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btCollisionShape.cpp
new file mode 100644
index 00000000..39ee21ca
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btCollisionShape.cpp
@@ -0,0 +1,119 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "LinearMath/btSerializer.h"
+
+/*
+  Make sure this dummy function never changes so that it
+  can be used by probes that are checking whether the
+  library is actually installed.
+*/
+extern "C" 
+{
+void btBulletCollisionProbe ();
+
+void btBulletCollisionProbe () {}
+}
+
+
+
+void	btCollisionShape::getBoundingSphere(btVector3& center,btScalar& radius) const
+{
+	btTransform tr;
+	tr.setIdentity();
+	btVector3 aabbMin,aabbMax;
+
+	getAabb(tr,aabbMin,aabbMax);
+
+	radius = (aabbMax-aabbMin).length()*btScalar(0.5);
+	center = (aabbMin+aabbMax)*btScalar(0.5);
+}
+
+
+btScalar	btCollisionShape::getContactBreakingThreshold(btScalar defaultContactThreshold) const
+{
+	return getAngularMotionDisc() * defaultContactThreshold;
+}
+
+btScalar	btCollisionShape::getAngularMotionDisc() const
+{
+	///@todo cache this value, to improve performance
+	btVector3	center;
+	btScalar disc;
+	getBoundingSphere(center,disc);
+	disc += (center).length();
+	return disc;
+}
+
+void btCollisionShape::calculateTemporalAabb(const btTransform& curTrans,const btVector3& linvel,const btVector3& angvel,btScalar timeStep, btVector3& temporalAabbMin,btVector3& temporalAabbMax) const
+{
+	//start with static aabb
+	getAabb(curTrans,temporalAabbMin,temporalAabbMax);
+
+	btScalar temporalAabbMaxx = temporalAabbMax.getX();
+	btScalar temporalAabbMaxy = temporalAabbMax.getY();
+	btScalar temporalAabbMaxz = temporalAabbMax.getZ();
+	btScalar temporalAabbMinx = temporalAabbMin.getX();
+	btScalar temporalAabbMiny = temporalAabbMin.getY();
+	btScalar temporalAabbMinz = temporalAabbMin.getZ();
+
+	// add linear motion
+	btVector3 linMotion = linvel*timeStep;
+	///@todo: simd would have a vector max/min operation, instead of per-element access
+	if (linMotion.x() > btScalar(0.))
+		temporalAabbMaxx += linMotion.x(); 
+	else
+		temporalAabbMinx += linMotion.x();
+	if (linMotion.y() > btScalar(0.))
+		temporalAabbMaxy += linMotion.y(); 
+	else
+		temporalAabbMiny += linMotion.y();
+	if (linMotion.z() > btScalar(0.))
+		temporalAabbMaxz += linMotion.z(); 
+	else
+		temporalAabbMinz += linMotion.z();
+
+	//add conservative angular motion
+	btScalar angularMotion = angvel.length() * getAngularMotionDisc() * timeStep;
+	btVector3 angularMotion3d(angularMotion,angularMotion,angularMotion);
+	temporalAabbMin = btVector3(temporalAabbMinx,temporalAabbMiny,temporalAabbMinz);
+	temporalAabbMax = btVector3(temporalAabbMaxx,temporalAabbMaxy,temporalAabbMaxz);
+
+	temporalAabbMin -= angularMotion3d;
+	temporalAabbMax += angularMotion3d;
+}
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btCollisionShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btCollisionShapeData* shapeData = (btCollisionShapeData*) dataBuffer;
+	char* name = (char*) serializer->findNameForPointer(this);
+	shapeData->m_name = (char*)serializer->getUniquePointer(name);
+	if (shapeData->m_name)
+	{
+		serializer->serializeName(name);
+	}
+	shapeData->m_shapeType = m_shapeType;
+	//shapeData->m_padding//??
+	return "btCollisionShapeData";
+}
+
+void	btCollisionShape::serializeSingleShape(btSerializer* serializer) const
+{
+	int len = calculateSerializeBufferSize();
+	btChunk* chunk = serializer->allocate(len,1);
+	const char* structType = serialize(chunk->m_oldPtr, serializer);
+	serializer->finalizeChunk(chunk,structType,BT_SHAPE_CODE,(void*)this);
+}
\ No newline at end of file
diff --git a/src/bullet/BulletCollision/CollisionShapes/btCollisionShape.h b/src/bullet/BulletCollision/CollisionShapes/btCollisionShape.h
new file mode 100644
index 00000000..865c1067
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btCollisionShape.h
@@ -0,0 +1,150 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_COLLISION_SHAPE_H
+#define BT_COLLISION_SHAPE_H
+
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btMatrix3x3.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" //for the shape types
+class btSerializer;
+
+
+///The btCollisionShape class provides an interface for collision shapes that can be shared among btCollisionObjects.
+class btCollisionShape
+{
+protected:
+	int m_shapeType;
+	void* m_userPointer;
+
+public:
+
+	btCollisionShape() : m_shapeType (INVALID_SHAPE_PROXYTYPE), m_userPointer(0)
+	{
+	}
+
+	virtual ~btCollisionShape()
+	{
+	}
+
+	///getAabb returns the axis aligned bounding box in the coordinate frame of the given transform t.
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const =0;
+
+	virtual void	getBoundingSphere(btVector3& center,btScalar& radius) const;
+
+	///getAngularMotionDisc returns the maximus radius needed for Conservative Advancement to handle time-of-impact with rotations.
+	virtual btScalar	getAngularMotionDisc() const;
+
+	virtual btScalar	getContactBreakingThreshold(btScalar defaultContactThresholdFactor) const;
+
+
+	///calculateTemporalAabb calculates the enclosing aabb for the moving object over interval [0..timeStep)
+	///result is conservative
+	void calculateTemporalAabb(const btTransform& curTrans,const btVector3& linvel,const btVector3& angvel,btScalar timeStep, btVector3& temporalAabbMin,btVector3& temporalAabbMax) const;
+
+
+
+	SIMD_FORCE_INLINE bool	isPolyhedral() const
+	{
+		return btBroadphaseProxy::isPolyhedral(getShapeType());
+	}
+
+	SIMD_FORCE_INLINE bool	isConvex2d() const
+	{
+		return btBroadphaseProxy::isConvex2d(getShapeType());
+	}
+
+	SIMD_FORCE_INLINE bool	isConvex() const
+	{
+		return btBroadphaseProxy::isConvex(getShapeType());
+	}
+	SIMD_FORCE_INLINE bool	isNonMoving() const
+	{
+		return btBroadphaseProxy::isNonMoving(getShapeType());
+	}
+	SIMD_FORCE_INLINE bool	isConcave() const
+	{
+		return btBroadphaseProxy::isConcave(getShapeType());
+	}
+	SIMD_FORCE_INLINE bool	isCompound() const
+	{
+		return btBroadphaseProxy::isCompound(getShapeType());
+	}
+
+	SIMD_FORCE_INLINE bool	isSoftBody() const
+	{
+		return btBroadphaseProxy::isSoftBody(getShapeType());
+	}
+
+	///isInfinite is used to catch simulation error (aabb check)
+	SIMD_FORCE_INLINE bool isInfinite() const
+	{
+		return btBroadphaseProxy::isInfinite(getShapeType());
+	}
+
+#ifndef __SPU__
+	virtual void	setLocalScaling(const btVector3& scaling) =0;
+	virtual const btVector3& getLocalScaling() const =0;
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const = 0;
+
+
+//debugging support
+	virtual const char*	getName()const =0 ;
+#endif //__SPU__
+
+	
+	int		getShapeType() const { return m_shapeType; }
+	virtual void	setMargin(btScalar margin) = 0;
+	virtual btScalar	getMargin() const = 0;
+
+	
+	///optional user data pointer
+	void	setUserPointer(void*  userPtr)
+	{
+		m_userPointer = userPtr;
+	}
+
+	void*	getUserPointer() const
+	{
+		return m_userPointer;
+	}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+	virtual void	serializeSingleShape(btSerializer* serializer) const;
+
+};	
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCollisionShapeData
+{
+	char	*m_name;
+	int		m_shapeType;
+	char	m_padding[4];
+};
+
+SIMD_FORCE_INLINE	int	btCollisionShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btCollisionShapeData);
+}
+
+
+
+#endif //BT_COLLISION_SHAPE_H
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btCompoundShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btCompoundShape.cpp
new file mode 100644
index 00000000..4eb860c5
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btCompoundShape.cpp
@@ -0,0 +1,356 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btCompoundShape.h"
+#include "btCollisionShape.h"
+#include "BulletCollision/BroadphaseCollision/btDbvt.h"
+#include "LinearMath/btSerializer.h"
+
+btCompoundShape::btCompoundShape(bool enableDynamicAabbTree)
+: m_localAabbMin(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT)),
+m_localAabbMax(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)),
+m_dynamicAabbTree(0),
+m_updateRevision(1),
+m_collisionMargin(btScalar(0.)),
+m_localScaling(btScalar(1.),btScalar(1.),btScalar(1.))
+{
+	m_shapeType = COMPOUND_SHAPE_PROXYTYPE;
+
+	if (enableDynamicAabbTree)
+	{
+		void* mem = btAlignedAlloc(sizeof(btDbvt),16);
+		m_dynamicAabbTree = new(mem) btDbvt();
+		btAssert(mem==m_dynamicAabbTree);
+	}
+}
+
+
+btCompoundShape::~btCompoundShape()
+{
+	if (m_dynamicAabbTree)
+	{
+		m_dynamicAabbTree->~btDbvt();
+		btAlignedFree(m_dynamicAabbTree);
+	}
+}
+
+void	btCompoundShape::addChildShape(const btTransform& localTransform,btCollisionShape* shape)
+{
+	m_updateRevision++;
+	//m_childTransforms.push_back(localTransform);
+	//m_childShapes.push_back(shape);
+	btCompoundShapeChild child;
+	child.m_node = 0;
+	child.m_transform = localTransform;
+	child.m_childShape = shape;
+	child.m_childShapeType = shape->getShapeType();
+	child.m_childMargin = shape->getMargin();
+
+	
+	//extend the local aabbMin/aabbMax
+	btVector3 localAabbMin,localAabbMax;
+	shape->getAabb(localTransform,localAabbMin,localAabbMax);
+	for (int i=0;i<3;i++)
+	{
+		if (m_localAabbMin[i] > localAabbMin[i])
+		{
+			m_localAabbMin[i] = localAabbMin[i];
+		}
+		if (m_localAabbMax[i] < localAabbMax[i])
+		{
+			m_localAabbMax[i] = localAabbMax[i];
+		}
+
+	}
+	if (m_dynamicAabbTree)
+	{
+		const btDbvtVolume	bounds=btDbvtVolume::FromMM(localAabbMin,localAabbMax);
+		int index = m_children.size();
+		child.m_node = m_dynamicAabbTree->insert(bounds,(void*)index);
+	}
+
+	m_children.push_back(child);
+
+}
+
+void	btCompoundShape::updateChildTransform(int childIndex, const btTransform& newChildTransform,bool shouldRecalculateLocalAabb)
+{
+	m_children[childIndex].m_transform = newChildTransform;
+
+	if (m_dynamicAabbTree)
+	{
+		///update the dynamic aabb tree
+		btVector3 localAabbMin,localAabbMax;
+		m_children[childIndex].m_childShape->getAabb(newChildTransform,localAabbMin,localAabbMax);
+		ATTRIBUTE_ALIGNED16(btDbvtVolume)	bounds=btDbvtVolume::FromMM(localAabbMin,localAabbMax);
+		//int index = m_children.size()-1;
+		m_dynamicAabbTree->update(m_children[childIndex].m_node,bounds);
+	}
+
+	if (shouldRecalculateLocalAabb)
+	{
+		recalculateLocalAabb();
+	}
+}
+
+void btCompoundShape::removeChildShapeByIndex(int childShapeIndex)
+{
+	m_updateRevision++;
+	btAssert(childShapeIndex >=0 && childShapeIndex < m_children.size());
+	if (m_dynamicAabbTree)
+	{
+		m_dynamicAabbTree->remove(m_children[childShapeIndex].m_node);
+	}
+	m_children.swap(childShapeIndex,m_children.size()-1);
+    if (m_dynamicAabbTree) 
+		m_children[childShapeIndex].m_node->dataAsInt = childShapeIndex;
+	m_children.pop_back();
+
+}
+
+
+
+void btCompoundShape::removeChildShape(btCollisionShape* shape)
+{
+	m_updateRevision++;
+	// Find the children containing the shape specified, and remove those children.
+	//note: there might be multiple children using the same shape!
+	for(int i = m_children.size()-1; i >= 0 ; i--)
+	{
+		if(m_children[i].m_childShape == shape)
+		{
+			removeChildShapeByIndex(i);
+		}
+	}
+
+
+
+	recalculateLocalAabb();
+}
+
+void btCompoundShape::recalculateLocalAabb()
+{
+	// Recalculate the local aabb
+	// Brute force, it iterates over all the shapes left.
+
+	m_localAabbMin = btVector3(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+	m_localAabbMax = btVector3(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT));
+
+	//extend the local aabbMin/aabbMax
+	for (int j = 0; j < m_children.size(); j++)
+	{
+		btVector3 localAabbMin,localAabbMax;
+		m_children[j].m_childShape->getAabb(m_children[j].m_transform, localAabbMin, localAabbMax);
+		for (int i=0;i<3;i++)
+		{
+			if (m_localAabbMin[i] > localAabbMin[i])
+				m_localAabbMin[i] = localAabbMin[i];
+			if (m_localAabbMax[i] < localAabbMax[i])
+				m_localAabbMax[i] = localAabbMax[i];
+		}
+	}
+}
+
+///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
+void btCompoundShape::getAabb(const btTransform& trans,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	btVector3 localHalfExtents = btScalar(0.5)*(m_localAabbMax-m_localAabbMin);
+	btVector3 localCenter = btScalar(0.5)*(m_localAabbMax+m_localAabbMin);
+	
+	//avoid an illegal AABB when there are no children
+	if (!m_children.size())
+	{
+		localHalfExtents.setValue(0,0,0);
+		localCenter.setValue(0,0,0);
+	}
+	localHalfExtents += btVector3(getMargin(),getMargin(),getMargin());
+		
+
+	btMatrix3x3 abs_b = trans.getBasis().absolute();  
+
+	btVector3 center = trans(localCenter);
+
+	btVector3 extent = btVector3(abs_b[0].dot(localHalfExtents),
+		abs_b[1].dot(localHalfExtents),
+		abs_b[2].dot(localHalfExtents));
+	aabbMin = center-extent;
+	aabbMax = center+extent;
+	
+}
+
+void	btCompoundShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	//approximation: take the inertia from the aabb for now
+	btTransform ident;
+	ident.setIdentity();
+	btVector3 aabbMin,aabbMax;
+	getAabb(ident,aabbMin,aabbMax);
+
+	btVector3 halfExtents = (aabbMax-aabbMin)*btScalar(0.5);
+
+	btScalar lx=btScalar(2.)*(halfExtents.x());
+	btScalar ly=btScalar(2.)*(halfExtents.y());
+	btScalar lz=btScalar(2.)*(halfExtents.z());
+
+	inertia[0] = mass/(btScalar(12.0)) * (ly*ly + lz*lz);
+	inertia[1] = mass/(btScalar(12.0)) * (lx*lx + lz*lz);
+	inertia[2] = mass/(btScalar(12.0)) * (lx*lx + ly*ly);
+
+}
+
+
+
+
+void btCompoundShape::calculatePrincipalAxisTransform(btScalar* masses, btTransform& principal, btVector3& inertia) const
+{
+	int n = m_children.size();
+
+	btScalar totalMass = 0;
+	btVector3 center(0, 0, 0);
+	int k;
+
+	for (k = 0; k < n; k++)
+	{
+		btAssert(masses[k]>0);
+		center += m_children[k].m_transform.getOrigin() * masses[k];
+		totalMass += masses[k];
+	}
+
+	btAssert(totalMass>0);
+
+	center /= totalMass;
+	principal.setOrigin(center);
+
+	btMatrix3x3 tensor(0, 0, 0, 0, 0, 0, 0, 0, 0);
+	for ( k = 0; k < n; k++)
+	{
+		btVector3 i;
+		m_children[k].m_childShape->calculateLocalInertia(masses[k], i);
+
+		const btTransform& t = m_children[k].m_transform;
+		btVector3 o = t.getOrigin() - center;
+
+		//compute inertia tensor in coordinate system of compound shape
+		btMatrix3x3 j = t.getBasis().transpose();
+		j[0] *= i[0];
+		j[1] *= i[1];
+		j[2] *= i[2];
+		j = t.getBasis() * j;
+
+		//add inertia tensor
+		tensor[0] += j[0];
+		tensor[1] += j[1];
+		tensor[2] += j[2];
+
+		//compute inertia tensor of pointmass at o
+		btScalar o2 = o.length2();
+		j[0].setValue(o2, 0, 0);
+		j[1].setValue(0, o2, 0);
+		j[2].setValue(0, 0, o2);
+		j[0] += o * -o.x(); 
+		j[1] += o * -o.y(); 
+		j[2] += o * -o.z();
+
+		//add inertia tensor of pointmass
+		tensor[0] += masses[k] * j[0];
+		tensor[1] += masses[k] * j[1];
+		tensor[2] += masses[k] * j[2];
+	}
+
+	tensor.diagonalize(principal.getBasis(), btScalar(0.00001), 20);
+	inertia.setValue(tensor[0][0], tensor[1][1], tensor[2][2]);
+}
+
+
+
+void btCompoundShape::setLocalScaling(const btVector3& scaling)
+{
+
+	for(int i = 0; i < m_children.size(); i++)
+	{
+		btTransform childTrans = getChildTransform(i);
+		btVector3 childScale = m_children[i].m_childShape->getLocalScaling();
+//		childScale = childScale * (childTrans.getBasis() * scaling);
+		childScale = childScale * scaling / m_localScaling;
+		m_children[i].m_childShape->setLocalScaling(childScale);
+		childTrans.setOrigin((childTrans.getOrigin())*scaling);
+		updateChildTransform(i, childTrans,false);
+	}
+	
+	m_localScaling = scaling;
+	recalculateLocalAabb();
+
+}
+
+
+void btCompoundShape::createAabbTreeFromChildren()
+{
+    if ( !m_dynamicAabbTree )
+    {
+        void* mem = btAlignedAlloc(sizeof(btDbvt),16);
+        m_dynamicAabbTree = new(mem) btDbvt();
+        btAssert(mem==m_dynamicAabbTree);
+
+        for ( int index = 0; index < m_children.size(); index++ )
+        {
+            btCompoundShapeChild &child = m_children[index];
+
+            //extend the local aabbMin/aabbMax
+            btVector3 localAabbMin,localAabbMax;
+            child.m_childShape->getAabb(child.m_transform,localAabbMin,localAabbMax);
+
+            const btDbvtVolume  bounds=btDbvtVolume::FromMM(localAabbMin,localAabbMax);
+            child.m_node = m_dynamicAabbTree->insert(bounds,(void*)index);
+        }
+    }
+}
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btCompoundShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+
+	btCompoundShapeData* shapeData = (btCompoundShapeData*) dataBuffer;
+	btCollisionShape::serialize(&shapeData->m_collisionShapeData, serializer);
+
+	shapeData->m_collisionMargin = float(m_collisionMargin);
+	shapeData->m_numChildShapes = m_children.size();
+	shapeData->m_childShapePtr = 0;
+	if (shapeData->m_numChildShapes)
+	{
+		btChunk* chunk = serializer->allocate(sizeof(btCompoundShapeChildData),shapeData->m_numChildShapes);
+		btCompoundShapeChildData* memPtr = (btCompoundShapeChildData*)chunk->m_oldPtr;
+		shapeData->m_childShapePtr = (btCompoundShapeChildData*)serializer->getUniquePointer(memPtr);
+
+		for (int i=0;i<shapeData->m_numChildShapes;i++,memPtr++)
+		{
+			memPtr->m_childMargin = float(m_children[i].m_childMargin);
+			memPtr->m_childShape = (btCollisionShapeData*)serializer->getUniquePointer(m_children[i].m_childShape);
+			//don't serialize shapes that already have been serialized
+			if (!serializer->findPointer(m_children[i].m_childShape))
+			{
+				btChunk* chunk = serializer->allocate(m_children[i].m_childShape->calculateSerializeBufferSize(),1);
+				const char* structType = m_children[i].m_childShape->serialize(chunk->m_oldPtr,serializer);
+				serializer->finalizeChunk(chunk,structType,BT_SHAPE_CODE,m_children[i].m_childShape);
+			} 
+
+			memPtr->m_childShapeType = m_children[i].m_childShapeType;
+			m_children[i].m_transform.serializeFloat(memPtr->m_transform);
+		}
+		serializer->finalizeChunk(chunk,"btCompoundShapeChildData",BT_ARRAY_CODE,chunk->m_oldPtr);
+	}
+	return "btCompoundShapeData";
+}
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btCompoundShape.h b/src/bullet/BulletCollision/CollisionShapes/btCompoundShape.h
new file mode 100644
index 00000000..141034a8
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btCompoundShape.h
@@ -0,0 +1,212 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_COMPOUND_SHAPE_H
+#define BT_COMPOUND_SHAPE_H
+
+#include "btCollisionShape.h"
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btMatrix3x3.h"
+#include "btCollisionMargin.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+//class btOptimizedBvh;
+struct btDbvt;
+
+ATTRIBUTE_ALIGNED16(struct) btCompoundShapeChild
+{
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	btTransform			m_transform;
+	btCollisionShape*	m_childShape;
+	int					m_childShapeType;
+	btScalar			m_childMargin;
+	struct btDbvtNode*	m_node;
+};
+
+SIMD_FORCE_INLINE bool operator==(const btCompoundShapeChild& c1, const btCompoundShapeChild& c2)
+{
+	return  ( c1.m_transform      == c2.m_transform &&
+		c1.m_childShape     == c2.m_childShape &&
+		c1.m_childShapeType == c2.m_childShapeType &&
+		c1.m_childMargin    == c2.m_childMargin );
+}
+
+/// The btCompoundShape allows to store multiple other btCollisionShapes
+/// This allows for moving concave collision objects. This is more general then the static concave btBvhTriangleMeshShape.
+/// It has an (optional) dynamic aabb tree to accelerate early rejection tests. 
+/// @todo: This aabb tree can also be use to speed up ray tests on btCompoundShape, see http://code.google.com/p/bullet/issues/detail?id=25
+/// Currently, removal of child shapes is only supported when disabling the aabb tree (pass 'false' in the constructor of btCompoundShape)
+ATTRIBUTE_ALIGNED16(class) btCompoundShape	: public btCollisionShape
+{
+	btAlignedObjectArray<btCompoundShapeChild> m_children;
+	btVector3						m_localAabbMin;
+	btVector3						m_localAabbMax;
+
+	btDbvt*							m_dynamicAabbTree;
+
+	///increment m_updateRevision when adding/removing/replacing child shapes, so that some caches can be updated
+	int								m_updateRevision;
+
+	btScalar	m_collisionMargin;
+
+protected:
+	btVector3	m_localScaling;
+
+public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	btCompoundShape(bool enableDynamicAabbTree = true);
+
+	virtual ~btCompoundShape();
+
+	void	addChildShape(const btTransform& localTransform,btCollisionShape* shape);
+
+	/// Remove all children shapes that contain the specified shape
+	virtual void removeChildShape(btCollisionShape* shape);
+
+	void removeChildShapeByIndex(int childShapeindex);
+
+
+	int		getNumChildShapes() const
+	{
+		return int (m_children.size());
+	}
+
+	btCollisionShape* getChildShape(int index)
+	{
+		return m_children[index].m_childShape;
+	}
+	const btCollisionShape* getChildShape(int index) const
+	{
+		return m_children[index].m_childShape;
+	}
+
+	btTransform&	getChildTransform(int index)
+	{
+		return m_children[index].m_transform;
+	}
+	const btTransform&	getChildTransform(int index) const
+	{
+		return m_children[index].m_transform;
+	}
+
+	///set a new transform for a child, and update internal data structures (local aabb and dynamic tree)
+	void	updateChildTransform(int childIndex, const btTransform& newChildTransform, bool shouldRecalculateLocalAabb = true);
+
+
+	btCompoundShapeChild* getChildList()
+	{
+		return &m_children[0];
+	}
+
+	///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
+	virtual	void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	/** Re-calculate the local Aabb. Is called at the end of removeChildShapes. 
+	Use this yourself if you modify the children or their transforms. */
+	virtual void recalculateLocalAabb(); 
+
+	virtual void	setLocalScaling(const btVector3& scaling);
+
+	virtual const btVector3& getLocalScaling() const 
+	{
+		return m_localScaling;
+	}
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	virtual void	setMargin(btScalar margin)
+	{
+		m_collisionMargin = margin;
+	}
+	virtual btScalar	getMargin() const
+	{
+		return m_collisionMargin;
+	}
+	virtual const char*	getName()const
+	{
+		return "Compound";
+	}
+
+	const btDbvt*	getDynamicAabbTree() const
+	{
+		return m_dynamicAabbTree;
+	}
+	
+	btDbvt*	getDynamicAabbTree()
+	{
+		return m_dynamicAabbTree;
+	}
+
+	void createAabbTreeFromChildren();
+
+	///computes the exact moment of inertia and the transform from the coordinate system defined by the principal axes of the moment of inertia
+	///and the center of mass to the current coordinate system. "masses" points to an array of masses of the children. The resulting transform
+	///"principal" has to be applied inversely to all children transforms in order for the local coordinate system of the compound
+	///shape to be centered at the center of mass and to coincide with the principal axes. This also necessitates a correction of the world transform
+	///of the collision object by the principal transform.
+	void calculatePrincipalAxisTransform(btScalar* masses, btTransform& principal, btVector3& inertia) const;
+
+	int	getUpdateRevision() const
+	{
+		return m_updateRevision;
+	}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btCompoundShapeChildData
+{
+	btTransformFloatData	m_transform;
+	btCollisionShapeData	*m_childShape;
+	int						m_childShapeType;
+	float					m_childMargin;
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCompoundShapeData
+{
+	btCollisionShapeData		m_collisionShapeData;
+
+	btCompoundShapeChildData	*m_childShapePtr;
+
+	int							m_numChildShapes;
+
+	float	m_collisionMargin;
+
+};
+
+
+SIMD_FORCE_INLINE	int	btCompoundShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btCompoundShapeData);
+}
+
+
+
+
+
+
+
+#endif //BT_COMPOUND_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConcaveShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btConcaveShape.cpp
new file mode 100644
index 00000000..58ff84a5
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConcaveShape.cpp
@@ -0,0 +1,27 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btConcaveShape.h"
+
+btConcaveShape::btConcaveShape() : m_collisionMargin(btScalar(0.))
+{
+
+}
+
+btConcaveShape::~btConcaveShape()
+{
+
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConcaveShape.h b/src/bullet/BulletCollision/CollisionShapes/btConcaveShape.h
new file mode 100644
index 00000000..2a03241c
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConcaveShape.h
@@ -0,0 +1,60 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONCAVE_SHAPE_H
+#define BT_CONCAVE_SHAPE_H
+
+#include "btCollisionShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+#include "btTriangleCallback.h"
+
+/// PHY_ScalarType enumerates possible scalar types.
+/// See the btStridingMeshInterface or btHeightfieldTerrainShape for its use
+typedef enum PHY_ScalarType {
+	PHY_FLOAT,
+	PHY_DOUBLE,
+	PHY_INTEGER,
+	PHY_SHORT,
+	PHY_FIXEDPOINT88,
+	PHY_UCHAR
+} PHY_ScalarType;
+
+///The btConcaveShape class provides an interface for non-moving (static) concave shapes.
+///It has been implemented by the btStaticPlaneShape, btBvhTriangleMeshShape and btHeightfieldTerrainShape.
+class btConcaveShape : public btCollisionShape
+{
+protected:
+	btScalar m_collisionMargin;
+
+public:
+	btConcaveShape();
+
+	virtual ~btConcaveShape();
+
+	virtual void	processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const = 0;
+
+	virtual btScalar getMargin() const {
+		return m_collisionMargin;
+	}
+	virtual void setMargin(btScalar collisionMargin)
+	{
+		m_collisionMargin = collisionMargin;
+	}
+
+
+
+};
+
+#endif //BT_CONCAVE_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConeShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btConeShape.cpp
new file mode 100644
index 00000000..5e83087b
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConeShape.cpp
@@ -0,0 +1,143 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btConeShape.h"
+
+
+
+btConeShape::btConeShape (btScalar radius,btScalar height): btConvexInternalShape (),
+m_radius (radius),
+m_height(height)
+{
+	m_shapeType = CONE_SHAPE_PROXYTYPE;
+	setConeUpIndex(1);
+	btVector3 halfExtents;
+	m_sinAngle = (m_radius / btSqrt(m_radius * m_radius + m_height * m_height));
+}
+
+btConeShapeZ::btConeShapeZ (btScalar radius,btScalar height):
+btConeShape(radius,height)
+{
+	setConeUpIndex(2);
+}
+
+btConeShapeX::btConeShapeX (btScalar radius,btScalar height):
+btConeShape(radius,height)
+{
+	setConeUpIndex(0);
+}
+
+///choose upAxis index
+void	btConeShape::setConeUpIndex(int upIndex)
+{
+	switch (upIndex)
+	{
+	case 0:
+			m_coneIndices[0] = 1;
+			m_coneIndices[1] = 0;
+			m_coneIndices[2] = 2;
+		break;
+	case 1:
+			m_coneIndices[0] = 0;
+			m_coneIndices[1] = 1;
+			m_coneIndices[2] = 2;
+		break;
+	case 2:
+			m_coneIndices[0] = 0;
+			m_coneIndices[1] = 2;
+			m_coneIndices[2] = 1;
+		break;
+	default:
+		btAssert(0);
+	};
+}
+
+btVector3 btConeShape::coneLocalSupport(const btVector3& v) const
+{
+	
+	btScalar halfHeight = m_height * btScalar(0.5);
+
+ if (v[m_coneIndices[1]] > v.length() * m_sinAngle)
+ {
+	btVector3 tmp;
+
+	tmp[m_coneIndices[0]] = btScalar(0.);
+	tmp[m_coneIndices[1]] = halfHeight;
+	tmp[m_coneIndices[2]] = btScalar(0.);
+	return tmp;
+ }
+  else {
+    btScalar s = btSqrt(v[m_coneIndices[0]] * v[m_coneIndices[0]] + v[m_coneIndices[2]] * v[m_coneIndices[2]]);
+    if (s > SIMD_EPSILON) {
+      btScalar d = m_radius / s;
+	  btVector3 tmp;
+	  tmp[m_coneIndices[0]] = v[m_coneIndices[0]] * d;
+	  tmp[m_coneIndices[1]] = -halfHeight;
+	  tmp[m_coneIndices[2]] = v[m_coneIndices[2]] * d;
+	  return tmp;
+    }
+    else  {
+		btVector3 tmp;
+		tmp[m_coneIndices[0]] = btScalar(0.);
+		tmp[m_coneIndices[1]] = -halfHeight;
+		tmp[m_coneIndices[2]] = btScalar(0.);
+		return tmp;
+	}
+  }
+
+}
+
+btVector3	btConeShape::localGetSupportingVertexWithoutMargin(const btVector3& vec) const
+{
+		return coneLocalSupport(vec);
+}
+
+void	btConeShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+	for (int i=0;i<numVectors;i++)
+	{
+		const btVector3& vec = vectors[i];
+		supportVerticesOut[i] = coneLocalSupport(vec);
+	}
+}
+
+
+btVector3	btConeShape::localGetSupportingVertex(const btVector3& vec)  const
+{
+	btVector3 supVertex = coneLocalSupport(vec);
+	if ( getMargin()!=btScalar(0.) )
+	{
+		btVector3 vecnorm = vec;
+		if (vecnorm .length2() < (SIMD_EPSILON*SIMD_EPSILON))
+		{
+			vecnorm.setValue(btScalar(-1.),btScalar(-1.),btScalar(-1.));
+		} 
+		vecnorm.normalize();
+		supVertex+= getMargin() * vecnorm;
+	}
+	return supVertex;
+}
+
+
+void	btConeShape::setLocalScaling(const btVector3& scaling)
+{
+	int axis = m_coneIndices[1];
+	int r1 = m_coneIndices[0];
+	int r2 = m_coneIndices[2];
+	m_height *= scaling[axis] / m_localScaling[axis];
+	m_radius *= (scaling[r1] / m_localScaling[r1] + scaling[r2] / m_localScaling[r2]) / 2;
+	m_sinAngle = (m_radius / btSqrt(m_radius * m_radius + m_height * m_height));
+	btConvexInternalShape::setLocalScaling(scaling);
+}
\ No newline at end of file
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConeShape.h b/src/bullet/BulletCollision/CollisionShapes/btConeShape.h
new file mode 100644
index 00000000..b69b5c5b
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConeShape.h
@@ -0,0 +1,103 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONE_MINKOWSKI_H
+#define BT_CONE_MINKOWSKI_H
+
+#include "btConvexInternalShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+
+///The btConeShape implements a cone shape primitive, centered around the origin and aligned with the Y axis. The btConeShapeX is aligned around the X axis and btConeShapeZ around the Z axis.
+class btConeShape : public btConvexInternalShape
+
+{
+
+	btScalar m_sinAngle;
+	btScalar m_radius;
+	btScalar m_height;
+	int		m_coneIndices[3];
+	btVector3 coneLocalSupport(const btVector3& v) const;
+
+
+public:
+	btConeShape (btScalar radius,btScalar height);
+	
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec) const;
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec) const;
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+
+	btScalar getRadius() const { return m_radius;}
+	btScalar getHeight() const { return m_height;}
+
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const
+	{
+		btTransform identity;
+		identity.setIdentity();
+		btVector3 aabbMin,aabbMax;
+		getAabb(identity,aabbMin,aabbMax);
+
+		btVector3 halfExtents = (aabbMax-aabbMin)*btScalar(0.5);
+
+		btScalar margin = getMargin();
+
+		btScalar lx=btScalar(2.)*(halfExtents.x()+margin);
+		btScalar ly=btScalar(2.)*(halfExtents.y()+margin);
+		btScalar lz=btScalar(2.)*(halfExtents.z()+margin);
+		const btScalar x2 = lx*lx;
+		const btScalar y2 = ly*ly;
+		const btScalar z2 = lz*lz;
+		const btScalar scaledmass = mass * btScalar(0.08333333);
+
+		inertia = scaledmass * (btVector3(y2+z2,x2+z2,x2+y2));
+
+//		inertia.x() = scaledmass * (y2+z2);
+//		inertia.y() = scaledmass * (x2+z2);
+//		inertia.z() = scaledmass * (x2+y2);
+	}
+
+
+		virtual const char*	getName()const 
+		{
+			return "Cone";
+		}
+		
+		///choose upAxis index
+		void	setConeUpIndex(int upIndex);
+		
+		int	getConeUpIndex() const
+		{
+			return m_coneIndices[1];
+		}
+
+	virtual void	setLocalScaling(const btVector3& scaling);
+
+};
+
+///btConeShape implements a Cone shape, around the X axis
+class btConeShapeX : public btConeShape
+{
+	public:
+		btConeShapeX(btScalar radius,btScalar height);
+};
+
+///btConeShapeZ implements a Cone shape, around the Z axis
+class btConeShapeZ : public btConeShape
+{
+	public:
+		btConeShapeZ(btScalar radius,btScalar height);
+};
+#endif //BT_CONE_MINKOWSKI_H
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvex2dShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btConvex2dShape.cpp
new file mode 100644
index 00000000..10ea3e98
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvex2dShape.cpp
@@ -0,0 +1,92 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btConvex2dShape.h"
+
+btConvex2dShape::btConvex2dShape(	btConvexShape* convexChildShape):
+btConvexShape (), m_childConvexShape(convexChildShape)
+{
+	m_shapeType = CONVEX_2D_SHAPE_PROXYTYPE;
+}
+	
+btConvex2dShape::~btConvex2dShape()
+{
+}
+
+	
+
+btVector3	btConvex2dShape::localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+{
+	return m_childConvexShape->localGetSupportingVertexWithoutMargin(vec);
+}
+
+void	btConvex2dShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+	m_childConvexShape->batchedUnitVectorGetSupportingVertexWithoutMargin(vectors,supportVerticesOut,numVectors);
+}
+
+
+btVector3	btConvex2dShape::localGetSupportingVertex(const btVector3& vec)const
+{
+	return m_childConvexShape->localGetSupportingVertex(vec);
+}
+
+
+void	btConvex2dShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	///this linear upscaling is not realistic, but we don't deal with large mass ratios...
+	m_childConvexShape->calculateLocalInertia(mass,inertia);
+}
+
+
+	///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
+void btConvex2dShape::getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	m_childConvexShape->getAabb(t,aabbMin,aabbMax);
+}
+
+void btConvex2dShape::getAabbSlow(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	m_childConvexShape->getAabbSlow(t,aabbMin,aabbMax);
+}
+
+void	btConvex2dShape::setLocalScaling(const btVector3& scaling) 
+{
+	m_childConvexShape->setLocalScaling(scaling);
+}
+
+const btVector3& btConvex2dShape::getLocalScaling() const
+{
+	return m_childConvexShape->getLocalScaling();
+}
+
+void	btConvex2dShape::setMargin(btScalar margin)
+{
+	m_childConvexShape->setMargin(margin);
+}
+btScalar	btConvex2dShape::getMargin() const
+{
+	return m_childConvexShape->getMargin();
+}
+
+int		btConvex2dShape::getNumPreferredPenetrationDirections() const
+{
+	return m_childConvexShape->getNumPreferredPenetrationDirections();
+}
+	
+void	btConvex2dShape::getPreferredPenetrationDirection(int index, btVector3& penetrationVector) const
+{
+	m_childConvexShape->getPreferredPenetrationDirection(index,penetrationVector);
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvex2dShape.h b/src/bullet/BulletCollision/CollisionShapes/btConvex2dShape.h
new file mode 100644
index 00000000..caf54329
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvex2dShape.h
@@ -0,0 +1,80 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONVEX_2D_SHAPE_H
+#define BT_CONVEX_2D_SHAPE_H
+
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+
+///The btConvex2dShape allows to use arbitrary convex shapes as 2d convex shapes, with the Z component assumed to be 0.
+///For 2d boxes, the btBox2dShape is recommended.
+class btConvex2dShape : public btConvexShape
+{
+	btConvexShape*	m_childConvexShape;
+
+	public:
+	
+	btConvex2dShape(	btConvexShape* convexChildShape);
+	
+	virtual ~btConvex2dShape();
+	
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec)const;
+
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	btConvexShape*	getChildShape() 
+	{
+		return m_childConvexShape;
+	}
+
+	const btConvexShape*	getChildShape() const
+	{
+		return m_childConvexShape;
+	}
+
+	virtual const char*	getName()const 
+	{
+		return "Convex2dShape";
+	}
+	
+
+
+	///////////////////////////
+
+
+	///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
+	void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	virtual void getAabbSlow(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	virtual void	setLocalScaling(const btVector3& scaling) ;
+	virtual const btVector3& getLocalScaling() const ;
+
+	virtual void	setMargin(btScalar margin);
+	virtual btScalar	getMargin() const;
+
+	virtual int		getNumPreferredPenetrationDirections() const;
+	
+	virtual void	getPreferredPenetrationDirection(int index, btVector3& penetrationVector) const;
+
+
+};
+
+#endif //BT_CONVEX_2D_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexHullShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btConvexHullShape.cpp
new file mode 100644
index 00000000..22624597
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexHullShape.cpp
@@ -0,0 +1,255 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btConvexHullShape.h"
+#include "BulletCollision/CollisionShapes/btCollisionMargin.h"
+
+#include "LinearMath/btQuaternion.h"
+#include "LinearMath/btSerializer.h"
+
+btConvexHullShape ::btConvexHullShape (const btScalar* points,int numPoints,int stride) : btPolyhedralConvexAabbCachingShape ()
+{
+	m_shapeType = CONVEX_HULL_SHAPE_PROXYTYPE;
+	m_unscaledPoints.resize(numPoints);
+
+	unsigned char* pointsAddress = (unsigned char*)points;
+
+	for (int i=0;i<numPoints;i++)
+	{
+		btScalar* point = (btScalar*)pointsAddress;
+		m_unscaledPoints[i] = btVector3(point[0], point[1], point[2]);
+		pointsAddress += stride;
+	}
+
+	recalcLocalAabb();
+
+}
+
+
+
+void btConvexHullShape::setLocalScaling(const btVector3& scaling)
+{
+	m_localScaling = scaling;
+	recalcLocalAabb();
+}
+
+void btConvexHullShape::addPoint(const btVector3& point)
+{
+	m_unscaledPoints.push_back(point);
+	recalcLocalAabb();
+
+}
+
+btVector3	btConvexHullShape::localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+{
+	btVector3 supVec(btScalar(0.),btScalar(0.),btScalar(0.));
+	btScalar newDot,maxDot = btScalar(-BT_LARGE_FLOAT);
+
+	for (int i=0;i<m_unscaledPoints.size();i++)
+	{
+		btVector3 vtx = m_unscaledPoints[i] * m_localScaling;
+
+		newDot = vec.dot(vtx);
+		if (newDot > maxDot)
+		{
+			maxDot = newDot;
+			supVec = vtx;
+		}
+	}
+	return supVec;
+}
+
+void	btConvexHullShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+	btScalar newDot;
+	//use 'w' component of supportVerticesOut?
+	{
+		for (int i=0;i<numVectors;i++)
+		{
+			supportVerticesOut[i][3] = btScalar(-BT_LARGE_FLOAT);
+		}
+	}
+	for (int i=0;i<m_unscaledPoints.size();i++)
+	{
+		btVector3 vtx = getScaledPoint(i);
+
+		for (int j=0;j<numVectors;j++)
+		{
+			const btVector3& vec = vectors[j];
+			
+			newDot = vec.dot(vtx);
+			if (newDot > supportVerticesOut[j][3])
+			{
+				//WARNING: don't swap next lines, the w component would get overwritten!
+				supportVerticesOut[j] = vtx;
+				supportVerticesOut[j][3] = newDot;
+			}
+		}
+	}
+
+
+
+}
+	
+
+
+btVector3	btConvexHullShape::localGetSupportingVertex(const btVector3& vec)const
+{
+	btVector3 supVertex = localGetSupportingVertexWithoutMargin(vec);
+
+	if ( getMargin()!=btScalar(0.) )
+	{
+		btVector3 vecnorm = vec;
+		if (vecnorm .length2() < (SIMD_EPSILON*SIMD_EPSILON))
+		{
+			vecnorm.setValue(btScalar(-1.),btScalar(-1.),btScalar(-1.));
+		} 
+		vecnorm.normalize();
+		supVertex+= getMargin() * vecnorm;
+	}
+	return supVertex;
+}
+
+
+
+
+
+
+
+
+
+//currently just for debugging (drawing), perhaps future support for algebraic continuous collision detection
+//Please note that you can debug-draw btConvexHullShape with the Raytracer Demo
+int	btConvexHullShape::getNumVertices() const
+{
+	return m_unscaledPoints.size();
+}
+
+int btConvexHullShape::getNumEdges() const
+{
+	return m_unscaledPoints.size();
+}
+
+void btConvexHullShape::getEdge(int i,btVector3& pa,btVector3& pb) const
+{
+
+	int index0 = i%m_unscaledPoints.size();
+	int index1 = (i+1)%m_unscaledPoints.size();
+	pa = getScaledPoint(index0);
+	pb = getScaledPoint(index1);
+}
+
+void btConvexHullShape::getVertex(int i,btVector3& vtx) const
+{
+	vtx = getScaledPoint(i);
+}
+
+int	btConvexHullShape::getNumPlanes() const
+{
+	return 0;
+}
+
+void btConvexHullShape::getPlane(btVector3& ,btVector3& ,int ) const
+{
+
+	btAssert(0);
+}
+
+//not yet
+bool btConvexHullShape::isInside(const btVector3& ,btScalar ) const
+{
+	btAssert(0);
+	return false;
+}
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btConvexHullShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	//int szc = sizeof(btConvexHullShapeData);
+	btConvexHullShapeData* shapeData = (btConvexHullShapeData*) dataBuffer;
+	btConvexInternalShape::serialize(&shapeData->m_convexInternalShapeData, serializer);
+
+	int numElem = m_unscaledPoints.size();
+	shapeData->m_numUnscaledPoints = numElem;
+#ifdef BT_USE_DOUBLE_PRECISION
+	shapeData->m_unscaledPointsFloatPtr = 0;
+	shapeData->m_unscaledPointsDoublePtr = numElem ? (btVector3Data*)serializer->getUniquePointer((void*)&m_unscaledPoints[0]):  0;
+#else
+	shapeData->m_unscaledPointsFloatPtr = numElem ? (btVector3Data*)serializer->getUniquePointer((void*)&m_unscaledPoints[0]):  0;
+	shapeData->m_unscaledPointsDoublePtr = 0;
+#endif
+	
+	if (numElem)
+	{
+		int sz = sizeof(btVector3Data);
+	//	int sz2 = sizeof(btVector3DoubleData);
+	//	int sz3 = sizeof(btVector3FloatData);
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btVector3Data* memPtr = (btVector3Data*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			m_unscaledPoints[i].serialize(*memPtr);
+		}
+		serializer->finalizeChunk(chunk,btVector3DataName,BT_ARRAY_CODE,(void*)&m_unscaledPoints[0]);
+	}
+	
+	return "btConvexHullShapeData";
+}
+
+void btConvexHullShape::project(const btTransform& trans, const btVector3& dir, btScalar& min, btScalar& max) const
+{
+#if 1
+	min = FLT_MAX;
+	max = -FLT_MAX;
+	btVector3 witnesPtMin;
+	btVector3 witnesPtMax;
+
+	int numVerts = m_unscaledPoints.size();
+	for(int i=0;i<numVerts;i++)
+	{
+		btVector3 vtx = m_unscaledPoints[i] * m_localScaling;
+		btVector3 pt = trans * vtx;
+		btScalar dp = pt.dot(dir);
+		if(dp < min)	
+		{
+			min = dp;
+			witnesPtMin = pt;
+		}
+		if(dp > max)	
+		{
+			max = dp;
+			witnesPtMax=pt;
+		}
+	}
+#else
+	btVector3 localAxis = dir*trans.getBasis();
+	btVector3 vtx1 = trans(localGetSupportingVertex(localAxis));
+	btVector3 vtx2 = trans(localGetSupportingVertex(-localAxis));
+
+	min = vtx1.dot(dir);
+	max = vtx2.dot(dir);
+#endif
+
+	if(min>max)
+	{
+		btScalar tmp = min;
+		min = max;
+		max = tmp;
+	}
+
+
+}
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexHullShape.h b/src/bullet/BulletCollision/CollisionShapes/btConvexHullShape.h
new file mode 100644
index 00000000..95a2af6a
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexHullShape.h
@@ -0,0 +1,122 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONVEX_HULL_SHAPE_H
+#define BT_CONVEX_HULL_SHAPE_H
+
+#include "btPolyhedralConvexShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+#include "LinearMath/btAlignedObjectArray.h"
+
+
+///The btConvexHullShape implements an implicit convex hull of an array of vertices.
+///Bullet provides a general and fast collision detector for convex shapes based on GJK and EPA using localGetSupportingVertex.
+ATTRIBUTE_ALIGNED16(class) btConvexHullShape : public btPolyhedralConvexAabbCachingShape
+{
+	btAlignedObjectArray<btVector3>	m_unscaledPoints;
+
+public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	
+	///this constructor optionally takes in a pointer to points. Each point is assumed to be 3 consecutive btScalar (x,y,z), the striding defines the number of bytes between each point, in memory.
+	///It is easier to not pass any points in the constructor, and just add one point at a time, using addPoint.
+	///btConvexHullShape make an internal copy of the points.
+	btConvexHullShape(const btScalar* points=0,int numPoints=0, int stride=sizeof(btVector3));
+
+	void addPoint(const btVector3& point);
+
+	
+	btVector3* getUnscaledPoints()
+	{
+		return &m_unscaledPoints[0];
+	}
+
+	const btVector3* getUnscaledPoints() const
+	{
+		return &m_unscaledPoints[0];
+	}
+
+	///getPoints is obsolete, please use getUnscaledPoints
+	const btVector3* getPoints() const
+	{
+		return getUnscaledPoints();
+	}
+
+	
+
+
+	SIMD_FORCE_INLINE	btVector3 getScaledPoint(int i) const
+	{
+		return m_unscaledPoints[i] * m_localScaling;
+	}
+
+	SIMD_FORCE_INLINE	int getNumPoints() const 
+	{
+		return m_unscaledPoints.size();
+	}
+
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec)const;
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+	
+
+	virtual void project(const btTransform& trans, const btVector3& dir, btScalar& min, btScalar& max) const;
+
+
+	//debugging
+	virtual const char*	getName()const {return "Convex";}
+
+	
+	virtual int	getNumVertices() const;
+	virtual int getNumEdges() const;
+	virtual void getEdge(int i,btVector3& pa,btVector3& pb) const;
+	virtual void getVertex(int i,btVector3& vtx) const;
+	virtual int	getNumPlanes() const;
+	virtual void getPlane(btVector3& planeNormal,btVector3& planeSupport,int i ) const;
+	virtual	bool isInside(const btVector3& pt,btScalar tolerance) const;
+
+	///in case we receive negative scaling
+	virtual void	setLocalScaling(const btVector3& scaling);
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btConvexHullShapeData
+{
+	btConvexInternalShapeData	m_convexInternalShapeData;
+
+	btVector3FloatData	*m_unscaledPointsFloatPtr;
+	btVector3DoubleData	*m_unscaledPointsDoublePtr;
+
+	int		m_numUnscaledPoints;
+	char m_padding3[4];
+
+};
+
+
+SIMD_FORCE_INLINE	int	btConvexHullShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btConvexHullShapeData);
+}
+
+
+#endif //BT_CONVEX_HULL_SHAPE_H
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexInternalShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btConvexInternalShape.cpp
new file mode 100644
index 00000000..083d60b1
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexInternalShape.cpp
@@ -0,0 +1,151 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btConvexInternalShape.h"
+
+
+
+btConvexInternalShape::btConvexInternalShape()
+: m_localScaling(btScalar(1.),btScalar(1.),btScalar(1.)),
+m_collisionMargin(CONVEX_DISTANCE_MARGIN)
+{
+}
+
+
+void	btConvexInternalShape::setLocalScaling(const btVector3& scaling)
+{
+	m_localScaling = scaling.absolute();
+}
+
+
+
+void	btConvexInternalShape::getAabbSlow(const btTransform& trans,btVector3&minAabb,btVector3&maxAabb) const
+{
+#ifndef __SPU__
+	//use localGetSupportingVertexWithoutMargin?
+	btScalar margin = getMargin();
+	for (int i=0;i<3;i++)
+	{
+		btVector3 vec(btScalar(0.),btScalar(0.),btScalar(0.));
+		vec[i] = btScalar(1.);
+
+		btVector3 sv = localGetSupportingVertex(vec*trans.getBasis());
+
+		btVector3 tmp = trans(sv);
+		maxAabb[i] = tmp[i]+margin;
+		vec[i] = btScalar(-1.);
+		tmp = trans(localGetSupportingVertex(vec*trans.getBasis()));
+		minAabb[i] = tmp[i]-margin;
+	}
+#endif
+}
+
+
+
+btVector3	btConvexInternalShape::localGetSupportingVertex(const btVector3& vec)const
+{
+#ifndef __SPU__
+
+	 btVector3	supVertex = localGetSupportingVertexWithoutMargin(vec);
+
+	if ( getMargin()!=btScalar(0.) )
+	{
+		btVector3 vecnorm = vec;
+		if (vecnorm .length2() < (SIMD_EPSILON*SIMD_EPSILON))
+		{
+			vecnorm.setValue(btScalar(-1.),btScalar(-1.),btScalar(-1.));
+		} 
+		vecnorm.normalize();
+		supVertex+= getMargin() * vecnorm;
+	}
+	return supVertex;
+
+#else
+	btAssert(0);
+	return btVector3(0,0,0);
+#endif //__SPU__
+
+ }
+
+
+btConvexInternalAabbCachingShape::btConvexInternalAabbCachingShape()
+	:	btConvexInternalShape(),
+m_localAabbMin(1,1,1),
+m_localAabbMax(-1,-1,-1),
+m_isLocalAabbValid(false)
+{
+}
+
+
+void btConvexInternalAabbCachingShape::getAabb(const btTransform& trans,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	getNonvirtualAabb(trans,aabbMin,aabbMax,getMargin());
+}
+
+void	btConvexInternalAabbCachingShape::setLocalScaling(const btVector3& scaling)
+{
+	btConvexInternalShape::setLocalScaling(scaling);
+	recalcLocalAabb();
+}
+
+
+void	btConvexInternalAabbCachingShape::recalcLocalAabb()
+{
+	m_isLocalAabbValid = true;
+	
+	#if 1
+	static const btVector3 _directions[] =
+	{
+		btVector3( 1.,  0.,  0.),
+		btVector3( 0.,  1.,  0.),
+		btVector3( 0.,  0.,  1.),
+		btVector3( -1., 0.,  0.),
+		btVector3( 0., -1.,  0.),
+		btVector3( 0.,  0., -1.)
+	};
+	
+	btVector3 _supporting[] =
+	{
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.)
+	};
+	
+	batchedUnitVectorGetSupportingVertexWithoutMargin(_directions, _supporting, 6);
+	
+	for ( int i = 0; i < 3; ++i )
+	{
+		m_localAabbMax[i] = _supporting[i][i] + m_collisionMargin;
+		m_localAabbMin[i] = _supporting[i + 3][i] - m_collisionMargin;
+	}
+	
+	#else
+
+	for (int i=0;i<3;i++)
+	{
+		btVector3 vec(btScalar(0.),btScalar(0.),btScalar(0.));
+		vec[i] = btScalar(1.);
+		btVector3 tmp = localGetSupportingVertex(vec);
+		m_localAabbMax[i] = tmp[i]+m_collisionMargin;
+		vec[i] = btScalar(-1.);
+		tmp = localGetSupportingVertex(vec);
+		m_localAabbMin[i] = tmp[i]-m_collisionMargin;
+	}
+	#endif
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexInternalShape.h b/src/bullet/BulletCollision/CollisionShapes/btConvexInternalShape.h
new file mode 100644
index 00000000..85cd9ef9
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexInternalShape.h
@@ -0,0 +1,224 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONVEX_INTERNAL_SHAPE_H
+#define BT_CONVEX_INTERNAL_SHAPE_H
+
+#include "btConvexShape.h"
+#include "LinearMath/btAabbUtil2.h"
+
+
+///The btConvexInternalShape is an internal base class, shared by most convex shape implementations.
+///The btConvexInternalShape uses a default collision margin set to CONVEX_DISTANCE_MARGIN.
+///This collision margin used by Gjk and some other algorithms, see also btCollisionMargin.h
+///Note that when creating small shapes (derived from btConvexInternalShape), 
+///you need to make sure to set a smaller collision margin, using the 'setMargin' API
+///There is a automatic mechanism 'setSafeMargin' used by btBoxShape and btCylinderShape
+class btConvexInternalShape : public btConvexShape
+{
+
+	protected:
+
+	//local scaling. collisionMargin is not scaled !
+	btVector3	m_localScaling;
+
+	btVector3	m_implicitShapeDimensions;
+	
+	btScalar	m_collisionMargin;
+
+	btScalar	m_padding;
+
+	btConvexInternalShape();
+
+public:
+
+	
+
+	virtual ~btConvexInternalShape()
+	{
+
+	}
+
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec)const;
+
+	const btVector3& getImplicitShapeDimensions() const
+	{
+		return m_implicitShapeDimensions;
+	}
+
+	///warning: use setImplicitShapeDimensions with care
+	///changing a collision shape while the body is in the world is not recommended,
+	///it is best to remove the body from the world, then make the change, and re-add it
+	///alternatively flush the contact points, see documentation for 'cleanProxyFromPairs'
+	void	setImplicitShapeDimensions(const btVector3& dimensions)
+	{
+		m_implicitShapeDimensions = dimensions;
+	}
+
+	void	setSafeMargin(btScalar minDimension, btScalar defaultMarginMultiplier = 0.1f)
+	{
+		btScalar safeMargin = defaultMarginMultiplier*minDimension;
+		if (safeMargin < getMargin())
+		{
+			setMargin(safeMargin);
+		}
+	}
+	void	setSafeMargin(const btVector3& halfExtents, btScalar defaultMarginMultiplier = 0.1f)
+	{
+		//see http://code.google.com/p/bullet/issues/detail?id=349
+		//this margin check could could be added to other collision shapes too,
+		//or add some assert/warning somewhere
+		btScalar minDimension=halfExtents[halfExtents.minAxis()]; 		
+		setSafeMargin(minDimension, defaultMarginMultiplier);
+	}
+
+	///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
+	void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+	{
+		getAabbSlow(t,aabbMin,aabbMax);
+	}
+
+
+	
+	virtual void getAabbSlow(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+
+	virtual void	setLocalScaling(const btVector3& scaling);
+	virtual const btVector3& getLocalScaling() const 
+	{
+		return m_localScaling;
+	}
+
+	const btVector3& getLocalScalingNV() const 
+	{
+		return m_localScaling;
+	}
+
+	virtual void	setMargin(btScalar margin)
+	{
+		m_collisionMargin = margin;
+	}
+	virtual btScalar	getMargin() const
+	{
+		return m_collisionMargin;
+	}
+
+	btScalar	getMarginNV() const
+	{
+		return m_collisionMargin;
+	}
+
+	virtual int		getNumPreferredPenetrationDirections() const
+	{
+		return 0;
+	}
+	
+	virtual void	getPreferredPenetrationDirection(int index, btVector3& penetrationVector) const
+	{
+		(void)penetrationVector;
+		(void)index;
+		btAssert(0);
+	}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+	
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btConvexInternalShapeData
+{
+	btCollisionShapeData	m_collisionShapeData;
+
+	btVector3FloatData	m_localScaling;
+
+	btVector3FloatData	m_implicitShapeDimensions;
+	
+	float			m_collisionMargin;
+
+	int	m_padding;
+
+};
+
+
+
+SIMD_FORCE_INLINE	int	btConvexInternalShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btConvexInternalShapeData);
+}
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btConvexInternalShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btConvexInternalShapeData* shapeData = (btConvexInternalShapeData*) dataBuffer;
+	btCollisionShape::serialize(&shapeData->m_collisionShapeData, serializer);
+
+	m_implicitShapeDimensions.serializeFloat(shapeData->m_implicitShapeDimensions);
+	m_localScaling.serializeFloat(shapeData->m_localScaling);
+	shapeData->m_collisionMargin = float(m_collisionMargin);
+
+	return "btConvexInternalShapeData";
+}
+
+
+
+
+///btConvexInternalAabbCachingShape adds local aabb caching for convex shapes, to avoid expensive bounding box calculations
+class btConvexInternalAabbCachingShape : public btConvexInternalShape
+{
+	btVector3	m_localAabbMin;
+	btVector3	m_localAabbMax;
+	bool		m_isLocalAabbValid;
+	
+protected:
+					
+	btConvexInternalAabbCachingShape();
+	
+	void setCachedLocalAabb (const btVector3& aabbMin, const btVector3& aabbMax)
+	{
+		m_isLocalAabbValid = true;
+		m_localAabbMin = aabbMin;
+		m_localAabbMax = aabbMax;
+	}
+
+	inline void getCachedLocalAabb (btVector3& aabbMin, btVector3& aabbMax) const
+	{
+		btAssert(m_isLocalAabbValid);
+		aabbMin = m_localAabbMin;
+		aabbMax = m_localAabbMax;
+	}
+
+	inline void getNonvirtualAabb(const btTransform& trans,btVector3& aabbMin,btVector3& aabbMax, btScalar margin) const
+	{
+
+		//lazy evaluation of local aabb
+		btAssert(m_isLocalAabbValid);
+		btTransformAabb(m_localAabbMin,m_localAabbMax,margin,trans,aabbMin,aabbMax);
+	}
+		
+public:
+		
+	virtual void	setLocalScaling(const btVector3& scaling);
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	void	recalcLocalAabb();
+
+};
+
+#endif //BT_CONVEX_INTERNAL_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexPointCloudShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btConvexPointCloudShape.cpp
new file mode 100644
index 00000000..c1b155ae
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexPointCloudShape.cpp
@@ -0,0 +1,157 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btConvexPointCloudShape.h"
+#include "BulletCollision/CollisionShapes/btCollisionMargin.h"
+
+#include "LinearMath/btQuaternion.h"
+
+void btConvexPointCloudShape::setLocalScaling(const btVector3& scaling)
+{
+	m_localScaling = scaling;
+	recalcLocalAabb();
+}
+
+#ifndef __SPU__
+btVector3	btConvexPointCloudShape::localGetSupportingVertexWithoutMargin(const btVector3& vec0)const
+{
+	btVector3 supVec(btScalar(0.),btScalar(0.),btScalar(0.));
+	btScalar newDot,maxDot = btScalar(-BT_LARGE_FLOAT);
+
+	btVector3 vec = vec0;
+	btScalar lenSqr = vec.length2();
+	if (lenSqr < btScalar(0.0001))
+	{
+		vec.setValue(1,0,0);
+	} else
+	{
+		btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
+		vec *= rlen;
+	}
+
+
+	for (int i=0;i<m_numPoints;i++)
+	{
+		btVector3 vtx = getScaledPoint(i);
+
+		newDot = vec.dot(vtx);
+		if (newDot > maxDot)
+		{
+			maxDot = newDot;
+			supVec = vtx;
+		}
+	}
+	return supVec;
+}
+
+void	btConvexPointCloudShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+	btScalar newDot;
+	//use 'w' component of supportVerticesOut?
+	{
+		for (int i=0;i<numVectors;i++)
+		{
+			supportVerticesOut[i][3] = btScalar(-BT_LARGE_FLOAT);
+		}
+	}
+	for (int i=0;i<m_numPoints;i++)
+	{
+		btVector3 vtx = getScaledPoint(i);
+
+		for (int j=0;j<numVectors;j++)
+		{
+			const btVector3& vec = vectors[j];
+			
+			newDot = vec.dot(vtx);
+			if (newDot > supportVerticesOut[j][3])
+			{
+				//WARNING: don't swap next lines, the w component would get overwritten!
+				supportVerticesOut[j] = vtx;
+				supportVerticesOut[j][3] = newDot;
+			}
+		}
+	}
+
+
+
+}
+	
+
+
+btVector3	btConvexPointCloudShape::localGetSupportingVertex(const btVector3& vec)const
+{
+	btVector3 supVertex = localGetSupportingVertexWithoutMargin(vec);
+
+	if ( getMargin()!=btScalar(0.) )
+	{
+		btVector3 vecnorm = vec;
+		if (vecnorm .length2() < (SIMD_EPSILON*SIMD_EPSILON))
+		{
+			vecnorm.setValue(btScalar(-1.),btScalar(-1.),btScalar(-1.));
+		} 
+		vecnorm.normalize();
+		supVertex+= getMargin() * vecnorm;
+	}
+	return supVertex;
+}
+
+
+#endif
+
+
+
+
+
+
+//currently just for debugging (drawing), perhaps future support for algebraic continuous collision detection
+//Please note that you can debug-draw btConvexHullShape with the Raytracer Demo
+int	btConvexPointCloudShape::getNumVertices() const
+{
+	return m_numPoints;
+}
+
+int btConvexPointCloudShape::getNumEdges() const
+{
+	return 0;
+}
+
+void btConvexPointCloudShape::getEdge(int i,btVector3& pa,btVector3& pb) const
+{
+	btAssert (0);
+}
+
+void btConvexPointCloudShape::getVertex(int i,btVector3& vtx) const
+{
+	vtx = m_unscaledPoints[i]*m_localScaling;
+}
+
+int	btConvexPointCloudShape::getNumPlanes() const
+{
+	return 0;
+}
+
+void btConvexPointCloudShape::getPlane(btVector3& ,btVector3& ,int ) const
+{
+
+	btAssert(0);
+}
+
+//not yet
+bool btConvexPointCloudShape::isInside(const btVector3& ,btScalar ) const
+{
+	btAssert(0);
+	return false;
+}
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexPointCloudShape.h b/src/bullet/BulletCollision/CollisionShapes/btConvexPointCloudShape.h
new file mode 100644
index 00000000..54b5afac
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexPointCloudShape.h
@@ -0,0 +1,105 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONVEX_POINT_CLOUD_SHAPE_H
+#define BT_CONVEX_POINT_CLOUD_SHAPE_H
+
+#include "btPolyhedralConvexShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+#include "LinearMath/btAlignedObjectArray.h"
+
+///The btConvexPointCloudShape implements an implicit convex hull of an array of vertices.
+ATTRIBUTE_ALIGNED16(class) btConvexPointCloudShape : public btPolyhedralConvexAabbCachingShape
+{
+	btVector3* m_unscaledPoints;
+	int m_numPoints;
+
+public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	btConvexPointCloudShape()
+	{
+		m_localScaling.setValue(1.f,1.f,1.f);
+		m_shapeType = CONVEX_POINT_CLOUD_SHAPE_PROXYTYPE;
+		m_unscaledPoints = 0;
+		m_numPoints = 0;
+	}
+
+	btConvexPointCloudShape(btVector3* points,int numPoints, const btVector3& localScaling,bool computeAabb = true)
+	{
+		m_localScaling = localScaling;
+		m_shapeType = CONVEX_POINT_CLOUD_SHAPE_PROXYTYPE;
+		m_unscaledPoints = points;
+		m_numPoints = numPoints;
+
+		if (computeAabb)
+			recalcLocalAabb();
+	}
+
+	void setPoints (btVector3* points, int numPoints, bool computeAabb = true,const btVector3& localScaling=btVector3(1.f,1.f,1.f))
+	{
+		m_unscaledPoints = points;
+		m_numPoints = numPoints;
+		m_localScaling = localScaling;
+
+		if (computeAabb)
+			recalcLocalAabb();
+	}
+
+	SIMD_FORCE_INLINE	btVector3* getUnscaledPoints()
+	{
+		return m_unscaledPoints;
+	}
+
+	SIMD_FORCE_INLINE	const btVector3* getUnscaledPoints() const
+	{
+		return m_unscaledPoints;
+	}
+
+	SIMD_FORCE_INLINE	int getNumPoints() const 
+	{
+		return m_numPoints;
+	}
+
+	SIMD_FORCE_INLINE	btVector3	getScaledPoint( int index) const
+	{
+		return m_unscaledPoints[index] * m_localScaling;
+	}
+
+#ifndef __SPU__
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec)const;
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+#endif
+
+
+	//debugging
+	virtual const char*	getName()const {return "ConvexPointCloud";}
+
+	virtual int	getNumVertices() const;
+	virtual int getNumEdges() const;
+	virtual void getEdge(int i,btVector3& pa,btVector3& pb) const;
+	virtual void getVertex(int i,btVector3& vtx) const;
+	virtual int	getNumPlanes() const;
+	virtual void getPlane(btVector3& planeNormal,btVector3& planeSupport,int i ) const;
+	virtual	bool isInside(const btVector3& pt,btScalar tolerance) const;
+
+	///in case we receive negative scaling
+	virtual void	setLocalScaling(const btVector3& scaling);
+};
+
+
+#endif //BT_CONVEX_POINT_CLOUD_SHAPE_H
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexPolyhedron.cpp b/src/bullet/BulletCollision/CollisionShapes/btConvexPolyhedron.cpp
new file mode 100644
index 00000000..1e26be53
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexPolyhedron.cpp
@@ -0,0 +1,296 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+///This file was written by Erwin Coumans
+///Separating axis rest based on work from Pierre Terdiman, see
+///And contact clipping based on work from Simon Hobbs
+
+#include "btConvexPolyhedron.h"
+#include "LinearMath/btHashMap.h"
+
+btConvexPolyhedron::btConvexPolyhedron()
+{
+
+}
+btConvexPolyhedron::~btConvexPolyhedron()
+{
+
+}
+
+
+inline bool IsAlmostZero(const btVector3& v)
+{
+	if(fabsf(v.x())>1e-6 || fabsf(v.y())>1e-6 || fabsf(v.z())>1e-6)	return false;
+	return true;
+}
+
+struct btInternalVertexPair
+{
+	btInternalVertexPair(short int v0,short int v1)
+		:m_v0(v0),
+		m_v1(v1)
+	{
+		if (m_v1>m_v0)
+			btSwap(m_v0,m_v1);
+	}
+	short int m_v0;
+	short int m_v1;
+	int getHash() const
+	{
+		return m_v0+(m_v1<<16);
+	}
+	bool equals(const btInternalVertexPair& other) const
+	{
+		return m_v0==other.m_v0 && m_v1==other.m_v1;
+	}
+};
+
+struct btInternalEdge
+{
+	btInternalEdge()
+		:m_face0(-1),
+		m_face1(-1)
+	{
+	}
+	short int m_face0;
+	short int m_face1;
+};
+
+//
+
+#ifdef TEST_INTERNAL_OBJECTS
+bool btConvexPolyhedron::testContainment() const
+{
+	for(int p=0;p<8;p++)
+	{
+		btVector3 LocalPt;
+		if(p==0)		LocalPt = m_localCenter + btVector3(m_extents[0], m_extents[1], m_extents[2]);
+		else if(p==1)	LocalPt = m_localCenter + btVector3(m_extents[0], m_extents[1], -m_extents[2]);
+		else if(p==2)	LocalPt = m_localCenter + btVector3(m_extents[0], -m_extents[1], m_extents[2]);
+		else if(p==3)	LocalPt = m_localCenter + btVector3(m_extents[0], -m_extents[1], -m_extents[2]);
+		else if(p==4)	LocalPt = m_localCenter + btVector3(-m_extents[0], m_extents[1], m_extents[2]);
+		else if(p==5)	LocalPt = m_localCenter + btVector3(-m_extents[0], m_extents[1], -m_extents[2]);
+		else if(p==6)	LocalPt = m_localCenter + btVector3(-m_extents[0], -m_extents[1], m_extents[2]);
+		else if(p==7)	LocalPt = m_localCenter + btVector3(-m_extents[0], -m_extents[1], -m_extents[2]);
+
+		for(int i=0;i<m_faces.size();i++)
+		{
+			const btVector3 Normal(m_faces[i].m_plane[0], m_faces[i].m_plane[1], m_faces[i].m_plane[2]);
+			const btScalar d = LocalPt.dot(Normal) + m_faces[i].m_plane[3];
+			if(d>0.0f)
+				return false;
+		}
+	}
+	return true;
+}
+#endif
+
+void	btConvexPolyhedron::initialize()
+{
+
+	btHashMap<btInternalVertexPair,btInternalEdge> edges;
+
+	btScalar TotalArea = 0.0f;
+	
+	m_localCenter.setValue(0, 0, 0);
+	for(int i=0;i<m_faces.size();i++)
+	{
+		int numVertices = m_faces[i].m_indices.size();
+		int NbTris = numVertices;
+		for(int j=0;j<NbTris;j++)
+		{
+			int k = (j+1)%numVertices;
+			btInternalVertexPair vp(m_faces[i].m_indices[j],m_faces[i].m_indices[k]);
+			btInternalEdge* edptr = edges.find(vp);
+			btVector3 edge = m_vertices[vp.m_v1]-m_vertices[vp.m_v0];
+			edge.normalize();
+
+			bool found = false;
+
+			for (int p=0;p<m_uniqueEdges.size();p++)
+			{
+				
+				if (IsAlmostZero(m_uniqueEdges[p]-edge) || 
+					IsAlmostZero(m_uniqueEdges[p]+edge))
+				{
+					found = true;
+					break;
+				}
+			}
+
+			if (!found)
+			{
+				m_uniqueEdges.push_back(edge);
+			}
+
+			if (edptr)
+			{
+				btAssert(edptr->m_face0>=0);
+				btAssert(edptr->m_face1<0);
+				edptr->m_face1 = i;
+			} else
+			{
+				btInternalEdge ed;
+				ed.m_face0 = i;
+				edges.insert(vp,ed);
+			}
+		}
+	}
+
+#ifdef USE_CONNECTED_FACES
+	for(int i=0;i<m_faces.size();i++)
+	{
+		int numVertices = m_faces[i].m_indices.size();
+		m_faces[i].m_connectedFaces.resize(numVertices);
+
+		for(int j=0;j<numVertices;j++)
+		{
+			int k = (j+1)%numVertices;
+			btInternalVertexPair vp(m_faces[i].m_indices[j],m_faces[i].m_indices[k]);
+			btInternalEdge* edptr = edges.find(vp);
+			btAssert(edptr);
+			btAssert(edptr->m_face0>=0);
+			btAssert(edptr->m_face1>=0);
+
+			int connectedFace = (edptr->m_face0==i)?edptr->m_face1:edptr->m_face0;
+			m_faces[i].m_connectedFaces[j] = connectedFace;
+		}
+	}
+#endif//USE_CONNECTED_FACES
+
+	for(int i=0;i<m_faces.size();i++)
+	{
+		int numVertices = m_faces[i].m_indices.size();
+		int NbTris = numVertices-2;
+		
+		const btVector3& p0 = m_vertices[m_faces[i].m_indices[0]];
+		for(int j=1;j<=NbTris;j++)
+		{
+			int k = (j+1)%numVertices;
+			const btVector3& p1 = m_vertices[m_faces[i].m_indices[j]];
+			const btVector3& p2 = m_vertices[m_faces[i].m_indices[k]];
+			btScalar Area = ((p0 - p1).cross(p0 - p2)).length() * 0.5f;
+			btVector3 Center = (p0+p1+p2)/3.0f;
+			m_localCenter += Area * Center;
+			TotalArea += Area;
+		}
+	}
+	m_localCenter /= TotalArea;
+
+
+
+
+#ifdef TEST_INTERNAL_OBJECTS
+	if(1)
+	{
+		m_radius = FLT_MAX;
+		for(int i=0;i<m_faces.size();i++)
+		{
+			const btVector3 Normal(m_faces[i].m_plane[0], m_faces[i].m_plane[1], m_faces[i].m_plane[2]);
+			const btScalar dist = btFabs(m_localCenter.dot(Normal) + m_faces[i].m_plane[3]);
+			if(dist<m_radius)
+				m_radius = dist;
+		}
+
+	
+		btScalar MinX = FLT_MAX;
+		btScalar MinY = FLT_MAX;
+		btScalar MinZ = FLT_MAX;
+		btScalar MaxX = -FLT_MAX;
+		btScalar MaxY = -FLT_MAX;
+		btScalar MaxZ = -FLT_MAX;
+		for(int i=0; i<m_vertices.size(); i++)
+		{
+			const btVector3& pt = m_vertices[i];
+			if(pt.x()<MinX)	MinX = pt.x();
+			if(pt.x()>MaxX)	MaxX = pt.x();
+			if(pt.y()<MinY)	MinY = pt.y();
+			if(pt.y()>MaxY)	MaxY = pt.y();
+			if(pt.z()<MinZ)	MinZ = pt.z();
+			if(pt.z()>MaxZ)	MaxZ = pt.z();
+		}
+		mC.setValue(MaxX+MinX, MaxY+MinY, MaxZ+MinZ);
+		mE.setValue(MaxX-MinX, MaxY-MinY, MaxZ-MinZ);
+
+
+
+//		const btScalar r = m_radius / sqrtf(2.0f);
+		const btScalar r = m_radius / sqrtf(3.0f);
+		const int LargestExtent = mE.maxAxis();
+		const btScalar Step = (mE[LargestExtent]*0.5f - r)/1024.0f;
+		m_extents[0] = m_extents[1] = m_extents[2] = r;
+		m_extents[LargestExtent] = mE[LargestExtent]*0.5f;
+		bool FoundBox = false;
+		for(int j=0;j<1024;j++)
+		{
+			if(testContainment())
+			{
+				FoundBox = true;
+				break;
+			}
+
+			m_extents[LargestExtent] -= Step;
+		}
+		if(!FoundBox)
+		{
+			m_extents[0] = m_extents[1] = m_extents[2] = r;
+		}
+		else
+		{
+			// Refine the box
+			const btScalar Step = (m_radius - r)/1024.0f;
+			const int e0 = (1<<LargestExtent) & 3;
+			const int e1 = (1<<e0) & 3;
+
+			for(int j=0;j<1024;j++)
+			{
+				const btScalar Saved0 = m_extents[e0];
+				const btScalar Saved1 = m_extents[e1];
+				m_extents[e0] += Step;
+				m_extents[e1] += Step;
+
+				if(!testContainment())
+				{
+					m_extents[e0] = Saved0;
+					m_extents[e1] = Saved1;
+					break;
+				}
+			}
+		}
+	}
+#endif
+}
+
+
+void btConvexPolyhedron::project(const btTransform& trans, const btVector3& dir, btScalar& min, btScalar& max) const
+{
+	min = FLT_MAX;
+	max = -FLT_MAX;
+	int numVerts = m_vertices.size();
+	for(int i=0;i<numVerts;i++)
+	{
+		btVector3 pt = trans * m_vertices[i];
+		btScalar dp = pt.dot(dir);
+		if(dp < min)	min = dp;
+		if(dp > max)	max = dp;
+	}
+	if(min>max)
+	{
+		btScalar tmp = min;
+		min = max;
+		max = tmp;
+	}
+}
\ No newline at end of file
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexPolyhedron.h b/src/bullet/BulletCollision/CollisionShapes/btConvexPolyhedron.h
new file mode 100644
index 00000000..08db39a3
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexPolyhedron.h
@@ -0,0 +1,62 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+///This file was written by Erwin Coumans
+
+
+#ifndef _BT_POLYHEDRAL_FEATURES_H
+#define _BT_POLYHEDRAL_FEATURES_H
+
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+#define TEST_INTERNAL_OBJECTS 1
+
+
+struct btFace
+{
+	btAlignedObjectArray<int>	m_indices;
+//	btAlignedObjectArray<int>	m_connectedFaces;
+	btScalar	m_plane[4];
+};
+
+
+class btConvexPolyhedron
+{
+	public:
+	btConvexPolyhedron();
+	virtual	~btConvexPolyhedron();
+
+	btAlignedObjectArray<btVector3>	m_vertices;
+	btAlignedObjectArray<btFace>	m_faces;
+	btAlignedObjectArray<btVector3> m_uniqueEdges;
+
+	btVector3		m_localCenter;
+	btVector3		m_extents;
+	btScalar		m_radius;
+	btVector3		mC;
+	btVector3		mE;
+
+	void	initialize();
+	bool testContainment() const;
+
+	void project(const btTransform& trans, const btVector3& dir, btScalar& min, btScalar& max) const;
+};
+
+	
+#endif //_BT_POLYHEDRAL_FEATURES_H
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btConvexShape.cpp
new file mode 100644
index 00000000..8c67d8eb
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexShape.cpp
@@ -0,0 +1,446 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btConvexShape.h"
+#include "btTriangleShape.h"
+#include "btSphereShape.h"
+#include "btCylinderShape.h"
+#include "btCapsuleShape.h"
+#include "btConvexHullShape.h"
+#include "btConvexPointCloudShape.h"
+
+///not supported on IBM SDK, until we fix the alignment of btVector3
+#if defined (__CELLOS_LV2__) && defined (__SPU__)
+#include <spu_intrinsics.h>
+static inline vec_float4 vec_dot3( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0, vec1 );
+    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
+    return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
+}
+#endif //__SPU__
+
+btConvexShape::btConvexShape ()
+{
+}
+
+btConvexShape::~btConvexShape()
+{
+
+}
+
+
+void btConvexShape::project(const btTransform& trans, const btVector3& dir, btScalar& min, btScalar& max) const
+{
+	btVector3 localAxis = dir*trans.getBasis();
+	btVector3 vtx1 = trans(localGetSupportingVertex(localAxis));
+	btVector3 vtx2 = trans(localGetSupportingVertex(-localAxis));
+
+	min = vtx1.dot(dir);
+	max = vtx2.dot(dir);
+
+	if(min>max)
+	{
+		btScalar tmp = min;
+		min = max;
+		max = tmp;
+	}
+}
+
+
+static btVector3 convexHullSupport (const btVector3& localDirOrg, const btVector3* points, int numPoints, const btVector3& localScaling)
+{	
+
+	btVector3 vec = localDirOrg * localScaling;
+
+#if defined (__CELLOS_LV2__) && defined (__SPU__)
+
+	btVector3 localDir = vec;
+
+	vec_float4 v_distMax = {-FLT_MAX,0,0,0};
+	vec_int4 v_idxMax = {-999,0,0,0};
+	int v=0;
+	int numverts = numPoints;
+
+	for(;v<(int)numverts-4;v+=4) {
+		vec_float4 p0 = vec_dot3(points[v  ].get128(),localDir.get128());
+		vec_float4 p1 = vec_dot3(points[v+1].get128(),localDir.get128());
+		vec_float4 p2 = vec_dot3(points[v+2].get128(),localDir.get128());
+		vec_float4 p3 = vec_dot3(points[v+3].get128(),localDir.get128());
+		const vec_int4 i0 = {v  ,0,0,0};
+		const vec_int4 i1 = {v+1,0,0,0};
+		const vec_int4 i2 = {v+2,0,0,0};
+		const vec_int4 i3 = {v+3,0,0,0};
+		vec_uint4  retGt01 = spu_cmpgt(p0,p1);
+		vec_float4 pmax01 = spu_sel(p1,p0,retGt01);
+		vec_int4   imax01 = spu_sel(i1,i0,retGt01);
+		vec_uint4  retGt23 = spu_cmpgt(p2,p3);
+		vec_float4 pmax23 = spu_sel(p3,p2,retGt23);
+		vec_int4   imax23 = spu_sel(i3,i2,retGt23);
+		vec_uint4  retGt0123 = spu_cmpgt(pmax01,pmax23);
+		vec_float4 pmax0123 = spu_sel(pmax23,pmax01,retGt0123);
+		vec_int4   imax0123 = spu_sel(imax23,imax01,retGt0123);
+		vec_uint4  retGtMax = spu_cmpgt(v_distMax,pmax0123);
+		v_distMax = spu_sel(pmax0123,v_distMax,retGtMax);
+		v_idxMax = spu_sel(imax0123,v_idxMax,retGtMax);
+	}
+	for(;v<(int)numverts;v++) {
+		vec_float4 p = vec_dot3(points[v].get128(),localDir.get128());
+		const vec_int4 i = {v,0,0,0};
+		vec_uint4  retGtMax = spu_cmpgt(v_distMax,p);
+		v_distMax = spu_sel(p,v_distMax,retGtMax);
+		v_idxMax = spu_sel(i,v_idxMax,retGtMax);
+	}
+	int ptIndex = spu_extract(v_idxMax,0);
+	const btVector3& supVec= points[ptIndex] * localScaling;
+	return supVec;
+#else
+
+	btScalar newDot,maxDot = btScalar(-BT_LARGE_FLOAT);
+	int ptIndex = -1;
+
+	for (int i=0;i<numPoints;i++)
+	{
+
+		newDot = vec.dot(points[i]);
+		if (newDot > maxDot)
+		{
+			maxDot = newDot;
+			ptIndex = i;
+		}
+	}
+	btAssert(ptIndex >= 0);
+	btVector3 supVec = points[ptIndex] * localScaling;
+	return supVec;
+#endif //__SPU__
+}
+
+btVector3 btConvexShape::localGetSupportVertexWithoutMarginNonVirtual (const btVector3& localDir) const
+{
+	switch (m_shapeType)
+	{
+    case SPHERE_SHAPE_PROXYTYPE:
+	{
+		return btVector3(0,0,0);
+    }
+	case BOX_SHAPE_PROXYTYPE:
+	{
+		btBoxShape* convexShape = (btBoxShape*)this;
+		const btVector3& halfExtents = convexShape->getImplicitShapeDimensions();
+
+		return btVector3(btFsels(localDir.x(), halfExtents.x(), -halfExtents.x()),
+			btFsels(localDir.y(), halfExtents.y(), -halfExtents.y()),
+			btFsels(localDir.z(), halfExtents.z(), -halfExtents.z()));
+	}
+	case TRIANGLE_SHAPE_PROXYTYPE:
+	{
+		btTriangleShape* triangleShape = (btTriangleShape*)this;
+		btVector3 dir(localDir.getX(),localDir.getY(),localDir.getZ());
+		btVector3* vertices = &triangleShape->m_vertices1[0];
+		btVector3 dots(dir.dot(vertices[0]), dir.dot(vertices[1]), dir.dot(vertices[2]));
+		btVector3 sup = vertices[dots.maxAxis()];
+		return btVector3(sup.getX(),sup.getY(),sup.getZ());
+	}
+	case CYLINDER_SHAPE_PROXYTYPE:
+	{
+		btCylinderShape* cylShape = (btCylinderShape*)this;
+		//mapping of halfextents/dimension onto radius/height depends on how cylinder local orientation is (upAxis)
+
+		btVector3 halfExtents = cylShape->getImplicitShapeDimensions();
+		btVector3 v(localDir.getX(),localDir.getY(),localDir.getZ());
+		int cylinderUpAxis = cylShape->getUpAxis();
+		int XX(1),YY(0),ZZ(2);
+
+		switch (cylinderUpAxis)
+		{
+		case 0:
+		{
+			XX = 1;
+			YY = 0;
+			ZZ = 2;
+		}
+		break;
+		case 1:
+		{
+			XX = 0;
+			YY = 1;
+			ZZ = 2;	
+		}
+		break;
+		case 2:
+		{
+			XX = 0;
+			YY = 2;
+			ZZ = 1;
+			
+		}
+		break;
+		default:
+			btAssert(0);
+		break;
+		};
+
+		btScalar radius = halfExtents[XX];
+		btScalar halfHeight = halfExtents[cylinderUpAxis];
+
+		btVector3 tmp;
+		btScalar d ;
+
+		btScalar s = btSqrt(v[XX] * v[XX] + v[ZZ] * v[ZZ]);
+		if (s != btScalar(0.0))
+		{
+			d = radius / s;  
+			tmp[XX] = v[XX] * d;
+			tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
+			tmp[ZZ] = v[ZZ] * d;
+			return btVector3(tmp.getX(),tmp.getY(),tmp.getZ());
+		} else {
+			tmp[XX] = radius;
+			tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
+			tmp[ZZ] = btScalar(0.0);
+			return btVector3(tmp.getX(),tmp.getY(),tmp.getZ());
+		}
+	}
+	case CAPSULE_SHAPE_PROXYTYPE:
+	{
+		btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ());
+
+		btCapsuleShape* capsuleShape = (btCapsuleShape*)this;
+		btScalar halfHeight = capsuleShape->getHalfHeight();
+		int capsuleUpAxis = capsuleShape->getUpAxis();
+
+		btScalar radius = capsuleShape->getRadius();
+		btVector3 supVec(0,0,0);
+
+		btScalar maxDot(btScalar(-BT_LARGE_FLOAT));
+
+		btVector3 vec = vec0;
+		btScalar lenSqr = vec.length2();
+		if (lenSqr < btScalar(0.0001))
+		{
+			vec.setValue(1,0,0);
+		} else
+		{
+			btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
+			vec *= rlen;
+		}
+		btVector3 vtx;
+		btScalar newDot;
+		{
+			btVector3 pos(0,0,0);
+			pos[capsuleUpAxis] = halfHeight;
+
+			//vtx = pos +vec*(radius);
+			vtx = pos +vec*(radius) - vec * capsuleShape->getMarginNV();
+			newDot = vec.dot(vtx);
+			
+
+			if (newDot > maxDot)
+			{
+				maxDot = newDot;
+				supVec = vtx;
+			}
+		}
+		{
+			btVector3 pos(0,0,0);
+			pos[capsuleUpAxis] = -halfHeight;
+
+			//vtx = pos +vec*(radius);
+			vtx = pos +vec*(radius) - vec * capsuleShape->getMarginNV();
+			newDot = vec.dot(vtx);
+			if (newDot > maxDot)
+			{
+				maxDot = newDot;
+				supVec = vtx;
+			}
+		}
+		return btVector3(supVec.getX(),supVec.getY(),supVec.getZ());	
+	}
+	case CONVEX_POINT_CLOUD_SHAPE_PROXYTYPE:
+	{
+		btConvexPointCloudShape* convexPointCloudShape = (btConvexPointCloudShape*)this;
+		btVector3* points = convexPointCloudShape->getUnscaledPoints ();
+		int numPoints = convexPointCloudShape->getNumPoints ();
+		return convexHullSupport (localDir, points, numPoints,convexPointCloudShape->getLocalScalingNV());
+	}
+	case CONVEX_HULL_SHAPE_PROXYTYPE:
+	{
+		btConvexHullShape* convexHullShape = (btConvexHullShape*)this;
+		btVector3* points = convexHullShape->getUnscaledPoints();
+		int numPoints = convexHullShape->getNumPoints ();
+		return convexHullSupport (localDir, points, numPoints,convexHullShape->getLocalScalingNV());
+	}
+    default:
+#ifndef __SPU__
+		return this->localGetSupportingVertexWithoutMargin (localDir);
+#else
+		btAssert (0);
+#endif
+	}
+
+	// should never reach here
+	btAssert (0);
+	return btVector3 (btScalar(0.0f), btScalar(0.0f), btScalar(0.0f));
+}
+
+btVector3 btConvexShape::localGetSupportVertexNonVirtual (const btVector3& localDir) const
+{
+	btVector3 localDirNorm = localDir;
+	if (localDirNorm .length2() < (SIMD_EPSILON*SIMD_EPSILON))
+	{
+		localDirNorm.setValue(btScalar(-1.),btScalar(-1.),btScalar(-1.));
+	}
+	localDirNorm.normalize ();
+
+	return localGetSupportVertexWithoutMarginNonVirtual(localDirNorm)+ getMarginNonVirtual() * localDirNorm;
+}
+
+/* TODO: This should be bumped up to btCollisionShape () */
+btScalar btConvexShape::getMarginNonVirtual () const
+{
+	switch (m_shapeType)
+	{
+    case SPHERE_SHAPE_PROXYTYPE:
+	{
+		btSphereShape* sphereShape = (btSphereShape*)this;
+		return sphereShape->getRadius ();
+	}
+	case BOX_SHAPE_PROXYTYPE:
+	{
+		btBoxShape* convexShape = (btBoxShape*)this;
+		return convexShape->getMarginNV ();
+	}
+	case TRIANGLE_SHAPE_PROXYTYPE:
+	{
+		btTriangleShape* triangleShape = (btTriangleShape*)this;
+		return triangleShape->getMarginNV ();
+	}
+	case CYLINDER_SHAPE_PROXYTYPE:
+	{
+		btCylinderShape* cylShape = (btCylinderShape*)this;
+		return cylShape->getMarginNV();
+	}
+	case CAPSULE_SHAPE_PROXYTYPE:
+	{
+		btCapsuleShape* capsuleShape = (btCapsuleShape*)this;
+		return capsuleShape->getMarginNV();
+	}
+	case CONVEX_POINT_CLOUD_SHAPE_PROXYTYPE:
+	/* fall through */
+	case CONVEX_HULL_SHAPE_PROXYTYPE:
+	{
+		btPolyhedralConvexShape* convexHullShape = (btPolyhedralConvexShape*)this;
+		return convexHullShape->getMarginNV();
+	}
+    default:
+#ifndef __SPU__
+		return this->getMargin ();
+#else
+		btAssert (0);
+#endif
+	}
+
+	// should never reach here
+	btAssert (0);
+	return btScalar(0.0f);
+}
+#ifndef __SPU__
+void btConvexShape::getAabbNonVirtual (const btTransform& t, btVector3& aabbMin, btVector3& aabbMax) const
+{
+	switch (m_shapeType)
+	{
+    case SPHERE_SHAPE_PROXYTYPE:
+	{
+		btSphereShape* sphereShape = (btSphereShape*)this;
+		btScalar radius = sphereShape->getImplicitShapeDimensions().getX();// * convexShape->getLocalScaling().getX();
+		btScalar margin = radius + sphereShape->getMarginNonVirtual();
+		const btVector3& center = t.getOrigin();
+		btVector3 extent(margin,margin,margin);
+		aabbMin = center - extent;
+		aabbMax = center + extent;
+    }
+	break;
+	case CYLINDER_SHAPE_PROXYTYPE:
+	/* fall through */
+	case BOX_SHAPE_PROXYTYPE:
+	{
+		btBoxShape* convexShape = (btBoxShape*)this;
+		btScalar margin=convexShape->getMarginNonVirtual();
+		btVector3 halfExtents = convexShape->getImplicitShapeDimensions();
+		halfExtents += btVector3(margin,margin,margin);
+		btMatrix3x3 abs_b = t.getBasis().absolute();  
+		btVector3 center = t.getOrigin();
+		btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
+		
+		aabbMin = center - extent;
+		aabbMax = center + extent;
+		break;
+	}
+	case TRIANGLE_SHAPE_PROXYTYPE:
+	{
+		btTriangleShape* triangleShape = (btTriangleShape*)this;
+		btScalar margin = triangleShape->getMarginNonVirtual();
+		for (int i=0;i<3;i++)
+		{
+			btVector3 vec(btScalar(0.),btScalar(0.),btScalar(0.));
+			vec[i] = btScalar(1.);
+
+			btVector3 sv = localGetSupportVertexWithoutMarginNonVirtual(vec*t.getBasis());
+
+			btVector3 tmp = t(sv);
+			aabbMax[i] = tmp[i]+margin;
+			vec[i] = btScalar(-1.);
+			tmp = t(localGetSupportVertexWithoutMarginNonVirtual(vec*t.getBasis()));
+			aabbMin[i] = tmp[i]-margin;
+		}	
+	}
+	break;
+	case CAPSULE_SHAPE_PROXYTYPE:
+	{
+		btCapsuleShape* capsuleShape = (btCapsuleShape*)this;
+		btVector3 halfExtents(capsuleShape->getRadius(),capsuleShape->getRadius(),capsuleShape->getRadius());
+		int m_upAxis = capsuleShape->getUpAxis();
+		halfExtents[m_upAxis] = capsuleShape->getRadius() + capsuleShape->getHalfHeight();
+		halfExtents += btVector3(capsuleShape->getMarginNonVirtual(),capsuleShape->getMarginNonVirtual(),capsuleShape->getMarginNonVirtual());
+		btMatrix3x3 abs_b = t.getBasis().absolute();  
+		btVector3 center = t.getOrigin();
+		btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));		  	
+		aabbMin = center - extent;
+		aabbMax = center + extent;
+	}
+	break;
+	case CONVEX_POINT_CLOUD_SHAPE_PROXYTYPE:
+	case CONVEX_HULL_SHAPE_PROXYTYPE:
+	{
+		btPolyhedralConvexAabbCachingShape* convexHullShape = (btPolyhedralConvexAabbCachingShape*)this;
+		btScalar margin = convexHullShape->getMarginNonVirtual();
+		convexHullShape->getNonvirtualAabb (t, aabbMin, aabbMax, margin);
+	}
+	break;
+    default:
+#ifndef __SPU__
+		this->getAabb (t, aabbMin, aabbMax);
+#else
+		btAssert (0);
+#endif
+	break;
+	}
+
+	// should never reach here
+	btAssert (0);
+}
+
+#endif //__SPU__
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexShape.h b/src/bullet/BulletCollision/CollisionShapes/btConvexShape.h
new file mode 100644
index 00000000..290cd9fd
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexShape.h
@@ -0,0 +1,84 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONVEX_SHAPE_INTERFACE1
+#define BT_CONVEX_SHAPE_INTERFACE1
+
+#include "btCollisionShape.h"
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btMatrix3x3.h"
+#include "btCollisionMargin.h"
+#include "LinearMath/btAlignedAllocator.h"
+
+#define MAX_PREFERRED_PENETRATION_DIRECTIONS 10
+
+/// The btConvexShape is an abstract shape interface, implemented by all convex shapes such as btBoxShape, btConvexHullShape etc.
+/// It describes general convex shapes using the localGetSupportingVertex interface, used by collision detectors such as btGjkPairDetector.
+ATTRIBUTE_ALIGNED16(class) btConvexShape : public btCollisionShape
+{
+
+
+public:
+
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	btConvexShape ();
+
+	virtual ~btConvexShape();
+
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec)const = 0;
+
+	////////
+	#ifndef __SPU__
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec) const=0;
+	#endif //#ifndef __SPU__
+
+	btVector3 localGetSupportVertexWithoutMarginNonVirtual (const btVector3& vec) const;
+	btVector3 localGetSupportVertexNonVirtual (const btVector3& vec) const;
+	btScalar getMarginNonVirtual () const;
+	void getAabbNonVirtual (const btTransform& t, btVector3& aabbMin, btVector3& aabbMax) const;
+
+	virtual void project(const btTransform& trans, const btVector3& dir, btScalar& min, btScalar& max) const;
+
+	
+	//notice that the vectors should be unit length
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const= 0;
+
+	///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
+	void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const =0;
+
+	virtual void getAabbSlow(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const =0;
+
+	virtual void	setLocalScaling(const btVector3& scaling) =0;
+	virtual const btVector3& getLocalScaling() const =0;
+
+	virtual void	setMargin(btScalar margin)=0;
+
+	virtual btScalar	getMargin() const=0;
+
+	virtual int		getNumPreferredPenetrationDirections() const=0;
+	
+	virtual void	getPreferredPenetrationDirection(int index, btVector3& penetrationVector) const=0;
+
+
+	
+	
+};
+
+
+
+#endif //BT_CONVEX_SHAPE_INTERFACE1
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.cpp
new file mode 100644
index 00000000..0f9ced55
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.cpp
@@ -0,0 +1,315 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btConvexTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btCollisionMargin.h"
+
+#include "LinearMath/btQuaternion.h"
+#include "BulletCollision/CollisionShapes/btStridingMeshInterface.h"
+
+
+btConvexTriangleMeshShape ::btConvexTriangleMeshShape (btStridingMeshInterface* meshInterface, bool calcAabb)
+: btPolyhedralConvexAabbCachingShape(), m_stridingMesh(meshInterface)
+{
+	m_shapeType = CONVEX_TRIANGLEMESH_SHAPE_PROXYTYPE;
+	if ( calcAabb )
+		recalcLocalAabb();
+}
+
+
+
+
+///It's not nice to have all this virtual function overhead, so perhaps we can also gather the points once
+///but then we are duplicating
+class LocalSupportVertexCallback: public btInternalTriangleIndexCallback
+{
+
+	btVector3 m_supportVertexLocal;
+public:
+
+	btScalar m_maxDot;
+	btVector3 m_supportVecLocal;
+
+	LocalSupportVertexCallback(const btVector3& supportVecLocal)
+		: m_supportVertexLocal(btScalar(0.),btScalar(0.),btScalar(0.)),
+		m_maxDot(btScalar(-BT_LARGE_FLOAT)),
+                m_supportVecLocal(supportVecLocal)
+	{
+	}
+
+	virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int  triangleIndex)
+	{
+		(void)triangleIndex;
+		(void)partId;
+
+		for (int i=0;i<3;i++)
+		{
+			btScalar dot = m_supportVecLocal.dot(triangle[i]);
+			if (dot > m_maxDot)
+			{
+				m_maxDot = dot;
+				m_supportVertexLocal = triangle[i];
+			}
+		}
+	}
+	
+	btVector3	GetSupportVertexLocal()
+	{
+		return m_supportVertexLocal;
+	}
+
+};
+
+
+
+
+
+btVector3	btConvexTriangleMeshShape::localGetSupportingVertexWithoutMargin(const btVector3& vec0)const
+{
+	btVector3 supVec(btScalar(0.),btScalar(0.),btScalar(0.));
+
+	btVector3 vec = vec0;
+	btScalar lenSqr = vec.length2();
+	if (lenSqr < btScalar(0.0001))
+	{
+		vec.setValue(1,0,0);
+	} else
+	{
+		btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
+		vec *= rlen;
+	}
+
+	LocalSupportVertexCallback	supportCallback(vec);
+	btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+	m_stridingMesh->InternalProcessAllTriangles(&supportCallback,-aabbMax,aabbMax);
+	supVec = supportCallback.GetSupportVertexLocal();
+
+	return supVec;
+}
+
+void	btConvexTriangleMeshShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+	//use 'w' component of supportVerticesOut?
+	{
+		for (int i=0;i<numVectors;i++)
+		{
+			supportVerticesOut[i][3] = btScalar(-BT_LARGE_FLOAT);
+		}
+	}
+	
+	///@todo: could do the batch inside the callback!
+
+
+	for (int j=0;j<numVectors;j++)
+	{
+		const btVector3& vec = vectors[j];
+		LocalSupportVertexCallback	supportCallback(vec);
+		btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+		m_stridingMesh->InternalProcessAllTriangles(&supportCallback,-aabbMax,aabbMax);
+		supportVerticesOut[j] = supportCallback.GetSupportVertexLocal();
+	}
+	
+}
+	
+
+
+btVector3	btConvexTriangleMeshShape::localGetSupportingVertex(const btVector3& vec)const
+{
+	btVector3 supVertex = localGetSupportingVertexWithoutMargin(vec);
+
+	if ( getMargin()!=btScalar(0.) )
+	{
+		btVector3 vecnorm = vec;
+		if (vecnorm .length2() < (SIMD_EPSILON*SIMD_EPSILON))
+		{
+			vecnorm.setValue(btScalar(-1.),btScalar(-1.),btScalar(-1.));
+		} 
+		vecnorm.normalize();
+		supVertex+= getMargin() * vecnorm;
+	}
+	return supVertex;
+}
+
+
+
+
+
+
+
+
+
+//currently just for debugging (drawing), perhaps future support for algebraic continuous collision detection
+//Please note that you can debug-draw btConvexTriangleMeshShape with the Raytracer Demo
+int	btConvexTriangleMeshShape::getNumVertices() const
+{
+	//cache this?
+	return 0;
+	
+}
+
+int btConvexTriangleMeshShape::getNumEdges() const
+{
+	return 0;
+}
+
+void btConvexTriangleMeshShape::getEdge(int ,btVector3& ,btVector3& ) const
+{
+	btAssert(0);	
+}
+
+void btConvexTriangleMeshShape::getVertex(int ,btVector3& ) const
+{
+	btAssert(0);
+}
+
+int	btConvexTriangleMeshShape::getNumPlanes() const
+{
+	return 0;
+}
+
+void btConvexTriangleMeshShape::getPlane(btVector3& ,btVector3& ,int  ) const
+{
+	btAssert(0);
+}
+
+//not yet
+bool btConvexTriangleMeshShape::isInside(const btVector3& ,btScalar ) const
+{
+	btAssert(0);
+	return false;
+}
+
+
+
+void	btConvexTriangleMeshShape::setLocalScaling(const btVector3& scaling)
+{
+	m_stridingMesh->setScaling(scaling);
+	
+	recalcLocalAabb();
+	
+}
+
+
+const btVector3& btConvexTriangleMeshShape::getLocalScaling() const
+{
+	return m_stridingMesh->getScaling();
+}
+
+void btConvexTriangleMeshShape::calculatePrincipalAxisTransform(btTransform& principal, btVector3& inertia, btScalar& volume) const
+{
+   class CenterCallback: public btInternalTriangleIndexCallback
+   {
+      bool first;
+      btVector3 ref;
+      btVector3 sum;
+      btScalar volume;
+
+   public:
+
+      CenterCallback() : first(true), ref(0, 0, 0), sum(0, 0, 0), volume(0)
+      {
+      }
+
+      virtual void internalProcessTriangleIndex(btVector3* triangle, int partId, int triangleIndex)
+      {
+         (void) triangleIndex;
+         (void) partId;
+         if (first)
+         {
+            ref = triangle[0];
+            first = false;
+         }
+         else
+         {
+            btScalar vol = btFabs((triangle[0] - ref).triple(triangle[1] - ref, triangle[2] - ref));
+            sum += (btScalar(0.25) * vol) * ((triangle[0] + triangle[1] + triangle[2] + ref));
+            volume += vol;
+         }
+      }
+      
+      btVector3 getCenter()
+      {
+         return (volume > 0) ? sum / volume : ref;
+      }
+
+      btScalar getVolume()
+      {
+         return volume * btScalar(1. / 6);
+      }
+
+   };
+
+   class InertiaCallback: public btInternalTriangleIndexCallback
+   {
+      btMatrix3x3 sum;
+      btVector3 center;
+
+   public:
+
+      InertiaCallback(btVector3& center) : sum(0, 0, 0, 0, 0, 0, 0, 0, 0), center(center)
+      {
+      }
+
+      virtual void internalProcessTriangleIndex(btVector3* triangle, int partId, int triangleIndex)
+      {
+         (void) triangleIndex;
+         (void) partId;
+         btMatrix3x3 i;
+         btVector3 a = triangle[0] - center;
+         btVector3 b = triangle[1] - center;
+         btVector3 c = triangle[2] - center;
+         btScalar volNeg = -btFabs(a.triple(b, c)) * btScalar(1. / 6);
+         for (int j = 0; j < 3; j++)
+         {
+            for (int k = 0; k <= j; k++)
+            {
+               i[j][k] = i[k][j] = volNeg * (btScalar(0.1) * (a[j] * a[k] + b[j] * b[k] + c[j] * c[k])
+                  + btScalar(0.05) * (a[j] * b[k] + a[k] * b[j] + a[j] * c[k] + a[k] * c[j] + b[j] * c[k] + b[k] * c[j]));
+            }
+         }
+         btScalar i00 = -i[0][0];
+         btScalar i11 = -i[1][1];
+         btScalar i22 = -i[2][2];
+         i[0][0] = i11 + i22; 
+         i[1][1] = i22 + i00; 
+         i[2][2] = i00 + i11;
+         sum[0] += i[0];
+         sum[1] += i[1];
+         sum[2] += i[2];
+      }
+      
+      btMatrix3x3& getInertia()
+      {
+         return sum;
+      }
+
+   };
+
+   CenterCallback centerCallback;
+   btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+   m_stridingMesh->InternalProcessAllTriangles(&centerCallback, -aabbMax, aabbMax);
+   btVector3 center = centerCallback.getCenter();
+   principal.setOrigin(center);
+   volume = centerCallback.getVolume();
+
+   InertiaCallback inertiaCallback(center);
+   m_stridingMesh->InternalProcessAllTriangles(&inertiaCallback, -aabbMax, aabbMax);
+
+   btMatrix3x3& i = inertiaCallback.getInertia();
+   i.diagonalize(principal.getBasis(), btScalar(0.00001), 20);
+   inertia.setValue(i[0][0], i[1][1], i[2][2]);
+   inertia /= volume;
+}
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h b/src/bullet/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h
new file mode 100644
index 00000000..af5d0038
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h
@@ -0,0 +1,75 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef BT_CONVEX_TRIANGLEMESH_SHAPE_H
+#define BT_CONVEX_TRIANGLEMESH_SHAPE_H
+
+
+#include "btPolyhedralConvexShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+
+
+/// The btConvexTriangleMeshShape is a convex hull of a triangle mesh, but the performance is not as good as btConvexHullShape.
+/// A small benefit of this class is that it uses the btStridingMeshInterface, so you can avoid the duplication of the triangle mesh data. Nevertheless, most users should use the much better performing btConvexHullShape instead.
+class btConvexTriangleMeshShape : public btPolyhedralConvexAabbCachingShape
+{
+
+	class btStridingMeshInterface*	m_stridingMesh;
+
+public:
+	btConvexTriangleMeshShape(btStridingMeshInterface* meshInterface, bool calcAabb = true);
+
+	class btStridingMeshInterface*	getMeshInterface()
+	{
+		return m_stridingMesh;
+	}
+	const class btStridingMeshInterface* getMeshInterface() const
+	{
+		return m_stridingMesh;
+	}
+	
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec)const;
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+	
+	//debugging
+	virtual const char*	getName()const {return "ConvexTrimesh";}
+	
+	virtual int	getNumVertices() const;
+	virtual int getNumEdges() const;
+	virtual void getEdge(int i,btVector3& pa,btVector3& pb) const;
+	virtual void getVertex(int i,btVector3& vtx) const;
+	virtual int	getNumPlanes() const;
+	virtual void getPlane(btVector3& planeNormal,btVector3& planeSupport,int i ) const;
+	virtual	bool isInside(const btVector3& pt,btScalar tolerance) const;
+
+	
+	virtual void	setLocalScaling(const btVector3& scaling);
+	virtual const btVector3& getLocalScaling() const;
+
+	///computes the exact moment of inertia and the transform from the coordinate system defined by the principal axes of the moment of inertia
+	///and the center of mass to the current coordinate system. A mass of 1 is assumed, for other masses just multiply the computed "inertia"
+	///by the mass. The resulting transform "principal" has to be applied inversely to the mesh in order for the local coordinate system of the
+	///shape to be centered at the center of mass and to coincide with the principal axes. This also necessitates a correction of the world transform
+	///of the collision object by the principal transform. This method also computes the volume of the convex mesh.
+	void calculatePrincipalAxisTransform(btTransform& principal, btVector3& inertia, btScalar& volume) const;
+
+};
+
+
+
+#endif //BT_CONVEX_TRIANGLEMESH_SHAPE_H
+
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btCylinderShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btCylinderShape.cpp
new file mode 100644
index 00000000..6cfe43be
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btCylinderShape.cpp
@@ -0,0 +1,281 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btCylinderShape.h"
+
+btCylinderShape::btCylinderShape (const btVector3& halfExtents)
+:btConvexInternalShape(),
+m_upAxis(1)
+{
+	setSafeMargin(halfExtents);
+
+	btVector3 margin(getMargin(),getMargin(),getMargin());
+	m_implicitShapeDimensions = (halfExtents * m_localScaling) - margin;
+	m_shapeType = CYLINDER_SHAPE_PROXYTYPE;
+}
+
+
+btCylinderShapeX::btCylinderShapeX (const btVector3& halfExtents)
+:btCylinderShape(halfExtents)
+{
+	m_upAxis = 0;
+
+}
+
+
+btCylinderShapeZ::btCylinderShapeZ (const btVector3& halfExtents)
+:btCylinderShape(halfExtents)
+{
+	m_upAxis = 2;
+
+}
+
+void btCylinderShape::getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	btTransformAabb(getHalfExtentsWithoutMargin(),getMargin(),t,aabbMin,aabbMax);
+}
+
+void	btCylinderShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+
+//Until Bullet 2.77 a box approximation was used, so uncomment this if you need backwards compatibility
+//#define USE_BOX_INERTIA_APPROXIMATION 1
+#ifndef USE_BOX_INERTIA_APPROXIMATION
+
+	/*
+	cylinder is defined as following:
+	*
+	* - principle axis aligned along y by default, radius in x, z-value not used
+	* - for btCylinderShapeX: principle axis aligned along x, radius in y direction, z-value not used
+	* - for btCylinderShapeZ: principle axis aligned along z, radius in x direction, y-value not used
+	*
+	*/
+
+	btScalar radius2;	// square of cylinder radius
+	btScalar height2;	// square of cylinder height
+	btVector3 halfExtents = getHalfExtentsWithMargin();	// get cylinder dimension
+	btScalar div12 = mass / 12.f;
+	btScalar div4 = mass / 4.f;
+	btScalar div2 = mass / 2.f;
+	int idxRadius, idxHeight;
+
+	switch (m_upAxis)	// get indices of radius and height of cylinder
+	{
+		case 0:		// cylinder is aligned along x
+			idxRadius = 1;
+			idxHeight = 0;
+			break;
+		case 2:		// cylinder is aligned along z
+			idxRadius = 0;
+			idxHeight = 2;
+			break;
+		default:	// cylinder is aligned along y
+			idxRadius = 0;
+			idxHeight = 1;
+	}
+
+	// calculate squares
+	radius2 = halfExtents[idxRadius] * halfExtents[idxRadius];
+	height2 = btScalar(4.) * halfExtents[idxHeight] * halfExtents[idxHeight];
+
+	// calculate tensor terms
+	btScalar t1 = div12 * height2 + div4 * radius2;
+	btScalar t2 = div2 * radius2;
+
+	switch (m_upAxis)	// set diagonal elements of inertia tensor
+	{
+		case 0:		// cylinder is aligned along x
+			inertia.setValue(t2,t1,t1);
+			break;
+		case 2:		// cylinder is aligned along z
+			inertia.setValue(t1,t1,t2);
+			break;
+		default:	// cylinder is aligned along y
+			inertia.setValue(t1,t2,t1);
+	}
+#else //USE_BOX_INERTIA_APPROXIMATION
+	//approximation of box shape
+	btVector3 halfExtents = getHalfExtentsWithMargin();
+
+	btScalar lx=btScalar(2.)*(halfExtents.x());
+	btScalar ly=btScalar(2.)*(halfExtents.y());
+	btScalar lz=btScalar(2.)*(halfExtents.z());
+
+	inertia.setValue(mass/(btScalar(12.0)) * (ly*ly + lz*lz),
+					mass/(btScalar(12.0)) * (lx*lx + lz*lz),
+					mass/(btScalar(12.0)) * (lx*lx + ly*ly));
+#endif //USE_BOX_INERTIA_APPROXIMATION
+}
+
+
+SIMD_FORCE_INLINE btVector3 CylinderLocalSupportX(const btVector3& halfExtents,const btVector3& v) 
+{
+const int cylinderUpAxis = 0;
+const int XX = 1;
+const int YY = 0;
+const int ZZ = 2;
+
+	//mapping depends on how cylinder local orientation is
+	// extents of the cylinder is: X,Y is for radius, and Z for height
+
+
+	btScalar radius = halfExtents[XX];
+	btScalar halfHeight = halfExtents[cylinderUpAxis];
+
+
+    btVector3 tmp;
+	btScalar d ;
+
+    btScalar s = btSqrt(v[XX] * v[XX] + v[ZZ] * v[ZZ]);
+    if (s != btScalar(0.0))
+	{
+        d = radius / s;  
+		tmp[XX] = v[XX] * d;
+		tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
+		tmp[ZZ] = v[ZZ] * d;
+		return tmp;
+	}
+    else
+	{
+	    tmp[XX] = radius;
+		tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
+		tmp[ZZ] = btScalar(0.0);
+		return tmp;
+    }
+
+
+}
+
+
+
+
+
+
+inline  btVector3 CylinderLocalSupportY(const btVector3& halfExtents,const btVector3& v) 
+{
+
+const int cylinderUpAxis = 1;
+const int XX = 0;
+const int YY = 1;
+const int ZZ = 2;
+
+
+	btScalar radius = halfExtents[XX];
+	btScalar halfHeight = halfExtents[cylinderUpAxis];
+
+
+    btVector3 tmp;
+	btScalar d ;
+
+    btScalar s = btSqrt(v[XX] * v[XX] + v[ZZ] * v[ZZ]);
+    if (s != btScalar(0.0))
+	{
+        d = radius / s;  
+		tmp[XX] = v[XX] * d;
+		tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
+		tmp[ZZ] = v[ZZ] * d;
+		return tmp;
+	}
+    else
+	{
+	    tmp[XX] = radius;
+		tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
+		tmp[ZZ] = btScalar(0.0);
+		return tmp;
+    }
+
+}
+
+inline btVector3 CylinderLocalSupportZ(const btVector3& halfExtents,const btVector3& v) 
+{
+const int cylinderUpAxis = 2;
+const int XX = 0;
+const int YY = 2;
+const int ZZ = 1;
+
+	//mapping depends on how cylinder local orientation is
+	// extents of the cylinder is: X,Y is for radius, and Z for height
+
+
+	btScalar radius = halfExtents[XX];
+	btScalar halfHeight = halfExtents[cylinderUpAxis];
+
+
+    btVector3 tmp;
+	btScalar d ;
+
+    btScalar s = btSqrt(v[XX] * v[XX] + v[ZZ] * v[ZZ]);
+    if (s != btScalar(0.0))
+	{
+        d = radius / s;  
+		tmp[XX] = v[XX] * d;
+		tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
+		tmp[ZZ] = v[ZZ] * d;
+		return tmp;
+	}
+    else
+	{
+	    tmp[XX] = radius;
+		tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
+		tmp[ZZ] = btScalar(0.0);
+		return tmp;
+    }
+
+
+}
+
+btVector3	btCylinderShapeX::localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+{
+	return CylinderLocalSupportX(getHalfExtentsWithoutMargin(),vec);
+}
+
+
+btVector3	btCylinderShapeZ::localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+{
+	return CylinderLocalSupportZ(getHalfExtentsWithoutMargin(),vec);
+}
+btVector3	btCylinderShape::localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+{
+	return CylinderLocalSupportY(getHalfExtentsWithoutMargin(),vec);
+}
+
+void	btCylinderShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+	for (int i=0;i<numVectors;i++)
+	{
+		supportVerticesOut[i] = CylinderLocalSupportY(getHalfExtentsWithoutMargin(),vectors[i]);
+	}
+}
+
+void	btCylinderShapeZ::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+	for (int i=0;i<numVectors;i++)
+	{
+		supportVerticesOut[i] = CylinderLocalSupportZ(getHalfExtentsWithoutMargin(),vectors[i]);
+	}
+}
+
+
+
+
+void	btCylinderShapeX::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+	for (int i=0;i<numVectors;i++)
+	{
+		supportVerticesOut[i] = CylinderLocalSupportX(getHalfExtentsWithoutMargin(),vectors[i]);
+	}
+}
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btCylinderShape.h b/src/bullet/BulletCollision/CollisionShapes/btCylinderShape.h
new file mode 100644
index 00000000..125bfc78
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btCylinderShape.h
@@ -0,0 +1,200 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CYLINDER_MINKOWSKI_H
+#define BT_CYLINDER_MINKOWSKI_H
+
+#include "btBoxShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+#include "LinearMath/btVector3.h"
+
+/// The btCylinderShape class implements a cylinder shape primitive, centered around the origin. Its central axis aligned with the Y axis. btCylinderShapeX is aligned with the X axis and btCylinderShapeZ around the Z axis.
+class btCylinderShape : public btConvexInternalShape
+
+{
+
+protected:
+
+	int	m_upAxis;
+
+public:
+
+	btVector3 getHalfExtentsWithMargin() const
+	{
+		btVector3 halfExtents = getHalfExtentsWithoutMargin();
+		btVector3 margin(getMargin(),getMargin(),getMargin());
+		halfExtents += margin;
+		return halfExtents;
+	}
+	
+	const btVector3& getHalfExtentsWithoutMargin() const
+	{
+		return m_implicitShapeDimensions;//changed in Bullet 2.63: assume the scaling and margin are included
+	}
+
+	btCylinderShape (const btVector3& halfExtents);
+	
+	void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+
+	virtual void setMargin(btScalar collisionMargin)
+	{
+		//correct the m_implicitShapeDimensions for the margin
+		btVector3 oldMargin(getMargin(),getMargin(),getMargin());
+		btVector3 implicitShapeDimensionsWithMargin = m_implicitShapeDimensions+oldMargin;
+		
+		btConvexInternalShape::setMargin(collisionMargin);
+		btVector3 newMargin(getMargin(),getMargin(),getMargin());
+		m_implicitShapeDimensions = implicitShapeDimensionsWithMargin - newMargin;
+
+	}
+
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec) const
+	{
+
+		btVector3 supVertex;
+		supVertex = localGetSupportingVertexWithoutMargin(vec);
+		
+		if ( getMargin()!=btScalar(0.) )
+		{
+			btVector3 vecnorm = vec;
+			if (vecnorm .length2() < (SIMD_EPSILON*SIMD_EPSILON))
+			{
+				vecnorm.setValue(btScalar(-1.),btScalar(-1.),btScalar(-1.));
+			} 
+			vecnorm.normalize();
+			supVertex+= getMargin() * vecnorm;
+		}
+		return supVertex;
+	}
+
+
+	//use box inertia
+	//	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+
+	int	getUpAxis() const
+	{
+		return m_upAxis;
+	}
+
+	virtual btScalar getRadius() const
+	{
+		return getHalfExtentsWithMargin().getX();
+	}
+
+	virtual void	setLocalScaling(const btVector3& scaling)
+	{
+		btVector3 oldMargin(getMargin(),getMargin(),getMargin());
+		btVector3 implicitShapeDimensionsWithMargin = m_implicitShapeDimensions+oldMargin;
+		btVector3 unScaledImplicitShapeDimensionsWithMargin = implicitShapeDimensionsWithMargin / m_localScaling;
+
+		btConvexInternalShape::setLocalScaling(scaling);
+
+		m_implicitShapeDimensions = (unScaledImplicitShapeDimensionsWithMargin * m_localScaling) - oldMargin;
+
+	}
+
+	//debugging
+	virtual const char*	getName()const
+	{
+		return "CylinderY";
+	}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+};
+
+class btCylinderShapeX : public btCylinderShape
+{
+public:
+	btCylinderShapeX (const btVector3& halfExtents);
+
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+	
+		//debugging
+	virtual const char*	getName()const
+	{
+		return "CylinderX";
+	}
+
+	virtual btScalar getRadius() const
+	{
+		return getHalfExtentsWithMargin().getY();
+	}
+
+};
+
+class btCylinderShapeZ : public btCylinderShape
+{
+public:
+	btCylinderShapeZ (const btVector3& halfExtents);
+
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+
+		//debugging
+	virtual const char*	getName()const
+	{
+		return "CylinderZ";
+	}
+
+	virtual btScalar getRadius() const
+	{
+		return getHalfExtentsWithMargin().getX();
+	}
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCylinderShapeData
+{
+	btConvexInternalShapeData	m_convexInternalShapeData;
+
+	int	m_upAxis;
+
+	char	m_padding[4];
+};
+
+SIMD_FORCE_INLINE	int	btCylinderShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btCylinderShapeData);
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btCylinderShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btCylinderShapeData* shapeData = (btCylinderShapeData*) dataBuffer;
+	
+	btConvexInternalShape::serialize(&shapeData->m_convexInternalShapeData,serializer);
+
+	shapeData->m_upAxis = m_upAxis;
+	
+	return "btCylinderShapeData";
+}
+
+
+
+#endif //BT_CYLINDER_MINKOWSKI_H
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btEmptyShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btEmptyShape.cpp
new file mode 100644
index 00000000..a9e6df5c
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btEmptyShape.cpp
@@ -0,0 +1,50 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btEmptyShape.h"
+
+
+#include "btCollisionShape.h"
+
+
+btEmptyShape::btEmptyShape() : btConcaveShape ()
+{
+	m_shapeType = EMPTY_SHAPE_PROXYTYPE;
+}
+
+
+btEmptyShape::~btEmptyShape()
+{
+}
+
+
+	///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
+void btEmptyShape::getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	btVector3 margin(getMargin(),getMargin(),getMargin());
+
+	aabbMin = t.getOrigin() - margin;
+
+	aabbMax = t.getOrigin() + margin;
+
+}
+
+void	btEmptyShape::calculateLocalInertia(btScalar ,btVector3& ) const
+{
+	btAssert(0);
+}
+
+	
+	
diff --git a/src/bullet/BulletCollision/CollisionShapes/btEmptyShape.h b/src/bullet/BulletCollision/CollisionShapes/btEmptyShape.h
new file mode 100644
index 00000000..87b7b66d
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btEmptyShape.h
@@ -0,0 +1,70 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_EMPTY_SHAPE_H
+#define BT_EMPTY_SHAPE_H
+
+#include "btConcaveShape.h"
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btMatrix3x3.h"
+#include "btCollisionMargin.h"
+
+
+
+
+/// The btEmptyShape is a collision shape without actual collision detection shape, so most users should ignore this class.
+/// It can be replaced by another shape during runtime, but the inertia tensor should be recomputed.
+class btEmptyShape	: public btConcaveShape
+{
+public:
+	btEmptyShape();
+
+	virtual ~btEmptyShape();
+
+
+	///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
+	void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+
+	virtual void	setLocalScaling(const btVector3& scaling)
+	{
+		m_localScaling = scaling;
+	}
+	virtual const btVector3& getLocalScaling() const 
+	{
+		return m_localScaling;
+	}
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+	
+	virtual const char*	getName()const
+	{
+		return "Empty";
+	}
+
+	virtual void processAllTriangles(btTriangleCallback* ,const btVector3& ,const btVector3& ) const
+	{
+	}
+
+protected:
+	btVector3	m_localScaling;
+
+};
+
+
+
+#endif //BT_EMPTY_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp
new file mode 100644
index 00000000..95631c30
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp
@@ -0,0 +1,411 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btHeightfieldTerrainShape.h"
+
+#include "LinearMath/btTransformUtil.h"
+
+
+
+btHeightfieldTerrainShape::btHeightfieldTerrainShape
+(
+int heightStickWidth, int heightStickLength, const void* heightfieldData,
+btScalar heightScale, btScalar minHeight, btScalar maxHeight,int upAxis,
+PHY_ScalarType hdt, bool flipQuadEdges
+)
+{
+	initialize(heightStickWidth, heightStickLength, heightfieldData,
+	           heightScale, minHeight, maxHeight, upAxis, hdt,
+	           flipQuadEdges);
+}
+
+
+
+btHeightfieldTerrainShape::btHeightfieldTerrainShape(int heightStickWidth, int heightStickLength,const void* heightfieldData,btScalar maxHeight,int upAxis,bool useFloatData,bool flipQuadEdges)
+{
+	// legacy constructor: support only float or unsigned char,
+	// 	and min height is zero
+	PHY_ScalarType hdt = (useFloatData) ? PHY_FLOAT : PHY_UCHAR;
+	btScalar minHeight = 0.0;
+
+	// previously, height = uchar * maxHeight / 65535.
+	// So to preserve legacy behavior, heightScale = maxHeight / 65535
+	btScalar heightScale = maxHeight / 65535;
+
+	initialize(heightStickWidth, heightStickLength, heightfieldData,
+	           heightScale, minHeight, maxHeight, upAxis, hdt,
+	           flipQuadEdges);
+}
+
+
+
+void btHeightfieldTerrainShape::initialize
+(
+int heightStickWidth, int heightStickLength, const void* heightfieldData,
+btScalar heightScale, btScalar minHeight, btScalar maxHeight, int upAxis,
+PHY_ScalarType hdt, bool flipQuadEdges
+)
+{
+	// validation
+	btAssert(heightStickWidth > 1 && "bad width");
+	btAssert(heightStickLength > 1 && "bad length");
+	btAssert(heightfieldData && "null heightfield data");
+	// btAssert(heightScale) -- do we care?  Trust caller here
+	btAssert(minHeight <= maxHeight && "bad min/max height");
+	btAssert(upAxis >= 0 && upAxis < 3 &&
+	    "bad upAxis--should be in range [0,2]");
+	btAssert(hdt != PHY_UCHAR || hdt != PHY_FLOAT || hdt != PHY_SHORT &&
+	    "Bad height data type enum");
+
+	// initialize member variables
+	m_shapeType = TERRAIN_SHAPE_PROXYTYPE;
+	m_heightStickWidth = heightStickWidth;
+	m_heightStickLength = heightStickLength;
+	m_minHeight = minHeight;
+	m_maxHeight = maxHeight;
+	m_width = (btScalar) (heightStickWidth - 1);
+	m_length = (btScalar) (heightStickLength - 1);
+	m_heightScale = heightScale;
+	m_heightfieldDataUnknown = heightfieldData;
+	m_heightDataType = hdt;
+	m_flipQuadEdges = flipQuadEdges;
+	m_useDiamondSubdivision = false;
+	m_upAxis = upAxis;
+	m_localScaling.setValue(btScalar(1.), btScalar(1.), btScalar(1.));
+
+	// determine min/max axis-aligned bounding box (aabb) values
+	switch (m_upAxis)
+	{
+	case 0:
+		{
+			m_localAabbMin.setValue(m_minHeight, 0, 0);
+			m_localAabbMax.setValue(m_maxHeight, m_width, m_length);
+			break;
+		}
+	case 1:
+		{
+			m_localAabbMin.setValue(0, m_minHeight, 0);
+			m_localAabbMax.setValue(m_width, m_maxHeight, m_length);
+			break;
+		};
+	case 2:
+		{
+			m_localAabbMin.setValue(0, 0, m_minHeight);
+			m_localAabbMax.setValue(m_width, m_length, m_maxHeight);
+			break;
+		}
+	default:
+		{
+			//need to get valid m_upAxis
+			btAssert(0 && "Bad m_upAxis");
+		}
+	}
+
+	// remember origin (defined as exact middle of aabb)
+	m_localOrigin = btScalar(0.5) * (m_localAabbMin + m_localAabbMax);
+}
+
+
+
+btHeightfieldTerrainShape::~btHeightfieldTerrainShape()
+{
+}
+
+
+
+void btHeightfieldTerrainShape::getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	btVector3 halfExtents = (m_localAabbMax-m_localAabbMin)* m_localScaling * btScalar(0.5);
+
+	btVector3 localOrigin(0, 0, 0);
+	localOrigin[m_upAxis] = (m_minHeight + m_maxHeight) * btScalar(0.5);
+	localOrigin *= m_localScaling;
+
+	btMatrix3x3 abs_b = t.getBasis().absolute();  
+	btVector3 center = t.getOrigin();
+	btVector3 extent = btVector3(abs_b[0].dot(halfExtents),
+		   abs_b[1].dot(halfExtents),
+		  abs_b[2].dot(halfExtents));
+	extent += btVector3(getMargin(),getMargin(),getMargin());
+
+	aabbMin = center - extent;
+	aabbMax = center + extent;
+}
+
+
+/// This returns the "raw" (user's initial) height, not the actual height.
+/// The actual height needs to be adjusted to be relative to the center
+///   of the heightfield's AABB.
+btScalar
+btHeightfieldTerrainShape::getRawHeightFieldValue(int x,int y) const
+{
+	btScalar val = 0.f;
+	switch (m_heightDataType)
+	{
+	case PHY_FLOAT:
+		{
+			val = m_heightfieldDataFloat[(y*m_heightStickWidth)+x];
+			break;
+		}
+
+	case PHY_UCHAR:
+		{
+			unsigned char heightFieldValue = m_heightfieldDataUnsignedChar[(y*m_heightStickWidth)+x];
+			val = heightFieldValue * m_heightScale;
+			break;
+		}
+
+	case PHY_SHORT:
+		{
+			short hfValue = m_heightfieldDataShort[(y * m_heightStickWidth) + x];
+			val = hfValue * m_heightScale;
+			break;
+		}
+
+	default:
+		{
+			btAssert(!"Bad m_heightDataType");
+		}
+	}
+
+	return val;
+}
+
+
+
+
+/// this returns the vertex in bullet-local coordinates
+void	btHeightfieldTerrainShape::getVertex(int x,int y,btVector3& vertex) const
+{
+	btAssert(x>=0);
+	btAssert(y>=0);
+	btAssert(x<m_heightStickWidth);
+	btAssert(y<m_heightStickLength);
+
+	btScalar	height = getRawHeightFieldValue(x,y);
+
+	switch (m_upAxis)
+	{
+	case 0:
+		{
+		vertex.setValue(
+			height - m_localOrigin.getX(),
+			(-m_width/btScalar(2.0)) + x,
+			(-m_length/btScalar(2.0) ) + y
+			);
+			break;
+		}
+	case 1:
+		{
+			vertex.setValue(
+			(-m_width/btScalar(2.0)) + x,
+			height - m_localOrigin.getY(),
+			(-m_length/btScalar(2.0)) + y
+			);
+			break;
+		};
+	case 2:
+		{
+			vertex.setValue(
+			(-m_width/btScalar(2.0)) + x,
+			(-m_length/btScalar(2.0)) + y,
+			height - m_localOrigin.getZ()
+			);
+			break;
+		}
+	default:
+		{
+			//need to get valid m_upAxis
+			btAssert(0);
+		}
+	}
+
+	vertex*=m_localScaling;
+}
+
+
+
+static inline int
+getQuantized
+(
+btScalar x
+)
+{
+	if (x < 0.0) {
+		return (int) (x - 0.5);
+	}
+	return (int) (x + 0.5);
+}
+
+
+
+/// given input vector, return quantized version
+/**
+  This routine is basically determining the gridpoint indices for a given
+  input vector, answering the question: "which gridpoint is closest to the
+  provided point?".
+
+  "with clamp" means that we restrict the point to be in the heightfield's
+  axis-aligned bounding box.
+ */
+void btHeightfieldTerrainShape::quantizeWithClamp(int* out, const btVector3& point,int /*isMax*/) const
+{
+	btVector3 clampedPoint(point);
+	clampedPoint.setMax(m_localAabbMin);
+	clampedPoint.setMin(m_localAabbMax);
+
+	out[0] = getQuantized(clampedPoint.getX());
+	out[1] = getQuantized(clampedPoint.getY());
+	out[2] = getQuantized(clampedPoint.getZ());
+		
+}
+
+
+
+/// process all triangles within the provided axis-aligned bounding box
+/**
+  basic algorithm:
+    - convert input aabb to local coordinates (scale down and shift for local origin)
+    - convert input aabb to a range of heightfield grid points (quantize)
+    - iterate over all triangles in that subset of the grid
+ */
+void	btHeightfieldTerrainShape::processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+	// scale down the input aabb's so they are in local (non-scaled) coordinates
+	btVector3	localAabbMin = aabbMin*btVector3(1.f/m_localScaling[0],1.f/m_localScaling[1],1.f/m_localScaling[2]);
+	btVector3	localAabbMax = aabbMax*btVector3(1.f/m_localScaling[0],1.f/m_localScaling[1],1.f/m_localScaling[2]);
+
+	// account for local origin
+	localAabbMin += m_localOrigin;
+	localAabbMax += m_localOrigin;
+
+	//quantize the aabbMin and aabbMax, and adjust the start/end ranges
+	int	quantizedAabbMin[3];
+	int	quantizedAabbMax[3];
+	quantizeWithClamp(quantizedAabbMin, localAabbMin,0);
+	quantizeWithClamp(quantizedAabbMax, localAabbMax,1);
+	
+	// expand the min/max quantized values
+	// this is to catch the case where the input aabb falls between grid points!
+	for (int i = 0; i < 3; ++i) {
+		quantizedAabbMin[i]--;
+		quantizedAabbMax[i]++;
+	}	
+
+	int startX=0;
+	int endX=m_heightStickWidth-1;
+	int startJ=0;
+	int endJ=m_heightStickLength-1;
+
+	switch (m_upAxis)
+	{
+	case 0:
+		{
+			if (quantizedAabbMin[1]>startX)
+				startX = quantizedAabbMin[1];
+			if (quantizedAabbMax[1]<endX)
+				endX = quantizedAabbMax[1];
+			if (quantizedAabbMin[2]>startJ)
+				startJ = quantizedAabbMin[2];
+			if (quantizedAabbMax[2]<endJ)
+				endJ = quantizedAabbMax[2];
+			break;
+		}
+	case 1:
+		{
+			if (quantizedAabbMin[0]>startX)
+				startX = quantizedAabbMin[0];
+			if (quantizedAabbMax[0]<endX)
+				endX = quantizedAabbMax[0];
+			if (quantizedAabbMin[2]>startJ)
+				startJ = quantizedAabbMin[2];
+			if (quantizedAabbMax[2]<endJ)
+				endJ = quantizedAabbMax[2];
+			break;
+		};
+	case 2:
+		{
+			if (quantizedAabbMin[0]>startX)
+				startX = quantizedAabbMin[0];
+			if (quantizedAabbMax[0]<endX)
+				endX = quantizedAabbMax[0];
+			if (quantizedAabbMin[1]>startJ)
+				startJ = quantizedAabbMin[1];
+			if (quantizedAabbMax[1]<endJ)
+				endJ = quantizedAabbMax[1];
+			break;
+		}
+	default:
+		{
+			//need to get valid m_upAxis
+			btAssert(0);
+		}
+	}
+
+	
+  
+
+	for(int j=startJ; j<endJ; j++)
+	{
+		for(int x=startX; x<endX; x++)
+		{
+			btVector3 vertices[3];
+			if (m_flipQuadEdges || (m_useDiamondSubdivision && !((j+x) & 1)))
+			{
+        //first triangle
+        getVertex(x,j,vertices[0]);
+        getVertex(x+1,j,vertices[1]);
+        getVertex(x+1,j+1,vertices[2]);
+        callback->processTriangle(vertices,x,j);
+        //second triangle
+        getVertex(x,j,vertices[0]);
+        getVertex(x+1,j+1,vertices[1]);
+        getVertex(x,j+1,vertices[2]);
+        callback->processTriangle(vertices,x,j);				
+			} else
+			{
+        //first triangle
+        getVertex(x,j,vertices[0]);
+        getVertex(x,j+1,vertices[1]);
+        getVertex(x+1,j,vertices[2]);
+        callback->processTriangle(vertices,x,j);
+        //second triangle
+        getVertex(x+1,j,vertices[0]);
+        getVertex(x,j+1,vertices[1]);
+        getVertex(x+1,j+1,vertices[2]);
+        callback->processTriangle(vertices,x,j);
+			}
+		}
+	}
+
+	
+
+}
+
+void	btHeightfieldTerrainShape::calculateLocalInertia(btScalar ,btVector3& inertia) const
+{
+	//moving concave objects not supported
+	
+	inertia.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+}
+
+void	btHeightfieldTerrainShape::setLocalScaling(const btVector3& scaling)
+{
+	m_localScaling = scaling;
+}
+const btVector3& btHeightfieldTerrainShape::getLocalScaling() const
+{
+	return m_localScaling;
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h b/src/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h
new file mode 100644
index 00000000..78e231e0
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h
@@ -0,0 +1,161 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_HEIGHTFIELD_TERRAIN_SHAPE_H
+#define BT_HEIGHTFIELD_TERRAIN_SHAPE_H
+
+#include "btConcaveShape.h"
+
+///btHeightfieldTerrainShape simulates a 2D heightfield terrain
+/**
+  The caller is responsible for maintaining the heightfield array; this
+  class does not make a copy.
+
+  The heightfield can be dynamic so long as the min/max height values
+  capture the extremes (heights must always be in that range).
+
+  The local origin of the heightfield is assumed to be the exact
+  center (as determined by width and length and height, with each
+  axis multiplied by the localScaling).
+
+  \b NOTE: be careful with coordinates.  If you have a heightfield with a local
+  min height of -100m, and a max height of +500m, you may be tempted to place it
+  at the origin (0,0) and expect the heights in world coordinates to be
+  -100 to +500 meters.
+  Actually, the heights will be -300 to +300m, because bullet will re-center
+  the heightfield based on its AABB (which is determined by the min/max
+  heights).  So keep in mind that once you create a btHeightfieldTerrainShape
+  object, the heights will be adjusted relative to the center of the AABB.  This
+  is different to the behavior of many rendering engines, but is useful for
+  physics engines.
+
+  Most (but not all) rendering and heightfield libraries assume upAxis = 1
+  (that is, the y-axis is "up").  This class allows any of the 3 coordinates
+  to be "up".  Make sure your choice of axis is consistent with your rendering
+  system.
+
+  The heightfield heights are determined from the data type used for the
+  heightfieldData array.  
+
+   - PHY_UCHAR: height at a point is the uchar value at the
+       grid point, multipled by heightScale.  uchar isn't recommended
+       because of its inability to deal with negative values, and
+       low resolution (8-bit).
+
+   - PHY_SHORT: height at a point is the short int value at that grid
+       point, multipled by heightScale.
+
+   - PHY_FLOAT: height at a point is the float value at that grid
+       point.  heightScale is ignored when using the float heightfield
+       data type.
+
+  Whatever the caller specifies as minHeight and maxHeight will be honored.
+  The class will not inspect the heightfield to discover the actual minimum
+  or maximum heights.  These values are used to determine the heightfield's
+  axis-aligned bounding box, multiplied by localScaling.
+
+  For usage and testing see the TerrainDemo.
+ */
+class btHeightfieldTerrainShape : public btConcaveShape
+{
+protected:
+	btVector3	m_localAabbMin;
+	btVector3	m_localAabbMax;
+	btVector3	m_localOrigin;
+
+	///terrain data
+	int	m_heightStickWidth;
+	int m_heightStickLength;
+	btScalar	m_minHeight;
+	btScalar	m_maxHeight;
+	btScalar m_width;
+	btScalar m_length;
+	btScalar m_heightScale;
+	union
+	{
+		const unsigned char*	m_heightfieldDataUnsignedChar;
+		const short*		m_heightfieldDataShort;
+		const btScalar*			m_heightfieldDataFloat;
+		const void*	m_heightfieldDataUnknown;
+	};
+
+	PHY_ScalarType	m_heightDataType;	
+	bool	m_flipQuadEdges;
+  bool  m_useDiamondSubdivision;
+
+	int	m_upAxis;
+	
+	btVector3	m_localScaling;
+
+	virtual btScalar	getRawHeightFieldValue(int x,int y) const;
+	void		quantizeWithClamp(int* out, const btVector3& point,int isMax) const;
+	void		getVertex(int x,int y,btVector3& vertex) const;
+
+
+
+	/// protected initialization
+	/**
+	  Handles the work of constructors so that public constructors can be
+	  backwards-compatible without a lot of copy/paste.
+	 */
+	void initialize(int heightStickWidth, int heightStickLength,
+	                const void* heightfieldData, btScalar heightScale,
+	                btScalar minHeight, btScalar maxHeight, int upAxis,
+	                PHY_ScalarType heightDataType, bool flipQuadEdges);
+
+public:
+	/// preferred constructor
+	/**
+	  This constructor supports a range of heightfield
+	  data types, and allows for a non-zero minimum height value.
+	  heightScale is needed for any integer-based heightfield data types.
+	 */
+	btHeightfieldTerrainShape(int heightStickWidth,int heightStickLength,
+	                          const void* heightfieldData, btScalar heightScale,
+	                          btScalar minHeight, btScalar maxHeight,
+	                          int upAxis, PHY_ScalarType heightDataType,
+	                          bool flipQuadEdges);
+
+	/// legacy constructor
+	/**
+	  The legacy constructor assumes the heightfield has a minimum height
+	  of zero.  Only unsigned char or floats are supported.  For legacy
+	  compatibility reasons, heightScale is calculated as maxHeight / 65535 
+	  (and is only used when useFloatData = false).
+ 	 */
+	btHeightfieldTerrainShape(int heightStickWidth,int heightStickLength,const void* heightfieldData, btScalar maxHeight,int upAxis,bool useFloatData,bool flipQuadEdges);
+
+	virtual ~btHeightfieldTerrainShape();
+
+
+	void setUseDiamondSubdivision(bool useDiamondSubdivision=true) { m_useDiamondSubdivision = useDiamondSubdivision;}
+
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	virtual void	processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const;
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	virtual void	setLocalScaling(const btVector3& scaling);
+	
+	virtual const btVector3& getLocalScaling() const;
+	
+	//debugging
+	virtual const char*	getName()const {return "HEIGHTFIELD";}
+
+};
+
+#endif //BT_HEIGHTFIELD_TERRAIN_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btMaterial.h b/src/bullet/BulletCollision/CollisionShapes/btMaterial.h
new file mode 100644
index 00000000..866f9b4d
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btMaterial.h
@@ -0,0 +1,35 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/// This file was created by Alex Silverman
+
+#ifndef BT_MATERIAL_H
+#define BT_MATERIAL_H
+
+// Material class to be used by btMultimaterialTriangleMeshShape to store triangle properties
+class btMaterial
+{
+    // public members so that materials can change due to world events
+public:
+    btScalar m_friction;
+    btScalar m_restitution;
+    int pad[2];
+
+    btMaterial(){}
+    btMaterial(btScalar fric, btScalar rest) { m_friction = fric; m_restitution = rest; }
+};
+
+#endif // BT_MATERIAL_H
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btMinkowskiSumShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btMinkowskiSumShape.cpp
new file mode 100644
index 00000000..06707e24
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btMinkowskiSumShape.cpp
@@ -0,0 +1,60 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btMinkowskiSumShape.h"
+
+
+btMinkowskiSumShape::btMinkowskiSumShape(const btConvexShape* shapeA,const btConvexShape* shapeB)
+: btConvexInternalShape (),
+m_shapeA(shapeA),
+m_shapeB(shapeB)
+{
+	m_shapeType = MINKOWSKI_DIFFERENCE_SHAPE_PROXYTYPE;
+	m_transA.setIdentity();
+	m_transB.setIdentity();
+}
+
+btVector3 btMinkowskiSumShape::localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+{
+	btVector3 supVertexA = m_transA(m_shapeA->localGetSupportingVertexWithoutMargin(vec*m_transA.getBasis()));
+	btVector3 supVertexB = m_transB(m_shapeB->localGetSupportingVertexWithoutMargin(-vec*m_transB.getBasis()));
+	return  supVertexA - supVertexB;
+}
+
+void	btMinkowskiSumShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+	///@todo: could make recursive use of batching. probably this shape is not used frequently.
+	for (int i=0;i<numVectors;i++)
+	{
+		supportVerticesOut[i] = localGetSupportingVertexWithoutMargin(vectors[i]);
+	}
+
+}
+
+
+
+btScalar	btMinkowskiSumShape::getMargin() const
+{
+	return m_shapeA->getMargin() + m_shapeB->getMargin();
+}
+
+
+void	btMinkowskiSumShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	(void)mass;
+	btAssert(0);
+	inertia.setValue(0,0,0);
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btMinkowskiSumShape.h b/src/bullet/BulletCollision/CollisionShapes/btMinkowskiSumShape.h
new file mode 100644
index 00000000..6c844e8c
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btMinkowskiSumShape.h
@@ -0,0 +1,60 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_MINKOWSKI_SUM_SHAPE_H
+#define BT_MINKOWSKI_SUM_SHAPE_H
+
+#include "btConvexInternalShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+
+/// The btMinkowskiSumShape is only for advanced users. This shape represents implicit based minkowski sum of two convex implicit shapes.
+class btMinkowskiSumShape : public btConvexInternalShape
+{
+
+	btTransform	m_transA;
+	btTransform	m_transB;
+	const btConvexShape*	m_shapeA;
+	const btConvexShape*	m_shapeB;
+
+public:
+
+	btMinkowskiSumShape(const btConvexShape* shapeA,const btConvexShape* shapeB);
+
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	void	setTransformA(const btTransform&	transA) { m_transA = transA;}
+	void	setTransformB(const btTransform&	transB) { m_transB = transB;}
+
+	const btTransform& getTransformA()const  { return m_transA;}
+	const btTransform& GetTransformB()const  { return m_transB;}
+
+
+	virtual btScalar	getMargin() const;
+
+	const btConvexShape*	getShapeA() const { return m_shapeA;}
+	const btConvexShape*	getShapeB() const { return m_shapeB;}
+
+	virtual const char*	getName()const 
+	{
+		return "MinkowskiSum";
+	}
+};
+
+#endif //BT_MINKOWSKI_SUM_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btMultiSphereShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btMultiSphereShape.cpp
new file mode 100644
index 00000000..c996bfcd
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btMultiSphereShape.cpp
@@ -0,0 +1,167 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#include "btMultiSphereShape.h"
+#include "BulletCollision/CollisionShapes/btCollisionMargin.h"
+#include "LinearMath/btQuaternion.h"
+#include "LinearMath/btSerializer.h"
+
+btMultiSphereShape::btMultiSphereShape (const btVector3* positions,const btScalar* radi,int numSpheres)
+:btConvexInternalAabbCachingShape ()
+{
+	m_shapeType = MULTI_SPHERE_SHAPE_PROXYTYPE;
+	//btScalar startMargin = btScalar(BT_LARGE_FLOAT);
+
+	m_localPositionArray.resize(numSpheres);
+	m_radiArray.resize(numSpheres);
+	for (int i=0;i<numSpheres;i++)
+	{
+		m_localPositionArray[i] = positions[i];
+		m_radiArray[i] = radi[i];
+		
+	}
+
+	recalcLocalAabb();
+
+}
+
+ 
+ btVector3	btMultiSphereShape::localGetSupportingVertexWithoutMargin(const btVector3& vec0)const
+{
+	int i;
+	btVector3 supVec(0,0,0);
+
+	btScalar maxDot(btScalar(-BT_LARGE_FLOAT));
+
+
+	btVector3 vec = vec0;
+	btScalar lenSqr = vec.length2();
+	if (lenSqr < (SIMD_EPSILON*SIMD_EPSILON))
+	{
+		vec.setValue(1,0,0);
+	} else
+	{
+		btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
+		vec *= rlen;
+	}
+
+	btVector3 vtx;
+	btScalar newDot;
+
+	const btVector3* pos = &m_localPositionArray[0];
+	const btScalar* rad = &m_radiArray[0];
+	int numSpheres = m_localPositionArray.size();
+
+	for (i=0;i<numSpheres;i++)
+	{
+		vtx = (*pos) +vec*m_localScaling*(*rad) - vec * getMargin();
+		pos++;
+		rad++;
+		newDot = vec.dot(vtx);
+		if (newDot > maxDot)
+		{
+			maxDot = newDot;
+			supVec = vtx;
+		}
+	}
+
+	return supVec;
+
+}
+
+ void	btMultiSphereShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+
+	for (int j=0;j<numVectors;j++)
+	{
+		btScalar maxDot(btScalar(-BT_LARGE_FLOAT));
+
+		const btVector3& vec = vectors[j];
+
+		btVector3 vtx;
+		btScalar newDot;
+
+		const btVector3* pos = &m_localPositionArray[0];
+		const btScalar* rad = &m_radiArray[0];
+		int numSpheres = m_localPositionArray.size();
+		for (int i=0;i<numSpheres;i++)
+		{
+			vtx = (*pos) +vec*m_localScaling*(*rad) - vec * getMargin();
+			pos++;
+			rad++;
+			newDot = vec.dot(vtx);
+			if (newDot > maxDot)
+			{
+				maxDot = newDot;
+				supportVerticesOut[j] = vtx;
+			}
+		}
+	}
+}
+
+
+
+
+
+
+
+
+void	btMultiSphereShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	//as an approximation, take the inertia of the box that bounds the spheres
+
+	btVector3 localAabbMin,localAabbMax;
+	getCachedLocalAabb(localAabbMin,localAabbMax);
+	btVector3 halfExtents = (localAabbMax-localAabbMin)*btScalar(0.5);
+
+	btScalar lx=btScalar(2.)*(halfExtents.x());
+	btScalar ly=btScalar(2.)*(halfExtents.y());
+	btScalar lz=btScalar(2.)*(halfExtents.z());
+
+	inertia.setValue(mass/(btScalar(12.0)) * (ly*ly + lz*lz),
+					mass/(btScalar(12.0)) * (lx*lx + lz*lz),
+					mass/(btScalar(12.0)) * (lx*lx + ly*ly));
+
+}
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btMultiSphereShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btMultiSphereShapeData* shapeData = (btMultiSphereShapeData*) dataBuffer;
+	btConvexInternalShape::serialize(&shapeData->m_convexInternalShapeData, serializer);
+
+	int numElem = m_localPositionArray.size();
+	shapeData->m_localPositionArrayPtr = numElem ? (btPositionAndRadius*)serializer->getUniquePointer((void*)&m_localPositionArray[0]):  0;
+	
+	shapeData->m_localPositionArraySize = numElem;
+	if (numElem)
+	{
+		btChunk* chunk = serializer->allocate(sizeof(btPositionAndRadius),numElem);
+		btPositionAndRadius* memPtr = (btPositionAndRadius*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			m_localPositionArray[i].serializeFloat(memPtr->m_pos);
+			memPtr->m_radius = float(m_radiArray[i]);
+		}
+		serializer->finalizeChunk(chunk,"btPositionAndRadius",BT_ARRAY_CODE,(void*)&m_localPositionArray[0]);
+	}
+	
+	return "btMultiSphereShapeData";
+}
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btMultiSphereShape.h b/src/bullet/BulletCollision/CollisionShapes/btMultiSphereShape.h
new file mode 100644
index 00000000..06c5d16d
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btMultiSphereShape.h
@@ -0,0 +1,99 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_MULTI_SPHERE_MINKOWSKI_H
+#define BT_MULTI_SPHERE_MINKOWSKI_H
+
+#include "btConvexInternalShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+#include "LinearMath/btAlignedObjectArray.h"
+#include "LinearMath/btAabbUtil2.h"
+
+
+
+///The btMultiSphereShape represents the convex hull of a collection of spheres. You can create special capsules or other smooth volumes.
+///It is possible to animate the spheres for deformation, but call 'recalcLocalAabb' after changing any sphere position/radius
+class btMultiSphereShape : public btConvexInternalAabbCachingShape
+{
+	
+	btAlignedObjectArray<btVector3> m_localPositionArray;
+	btAlignedObjectArray<btScalar>  m_radiArray;
+	
+public:
+	btMultiSphereShape (const btVector3* positions,const btScalar* radi,int numSpheres);
+
+	///CollisionShape Interface
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	/// btConvexShape Interface
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+	
+	int	getSphereCount() const
+	{
+		return m_localPositionArray.size();
+	}
+
+	const btVector3&	getSpherePosition(int index) const
+	{
+		return m_localPositionArray[index];
+	}
+
+	btScalar	getSphereRadius(int index) const
+	{
+		return m_radiArray[index];
+	}
+
+
+	virtual const char*	getName()const 
+	{
+		return "MultiSphere";
+	}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+
+};
+
+
+struct	btPositionAndRadius
+{
+	btVector3FloatData	m_pos;
+	float		m_radius;
+};
+
+struct	btMultiSphereShapeData
+{
+	btConvexInternalShapeData	m_convexInternalShapeData;
+
+	btPositionAndRadius	*m_localPositionArrayPtr;
+	int				m_localPositionArraySize;
+	char	m_padding[4];
+};
+
+
+
+SIMD_FORCE_INLINE	int	btMultiSphereShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btMultiSphereShapeData);
+}
+
+
+
+#endif //BT_MULTI_SPHERE_MINKOWSKI_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.cpp
new file mode 100644
index 00000000..58799ac9
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.cpp
@@ -0,0 +1,45 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/// This file was created by Alex Silverman
+
+#include "BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexMaterialArray.h"
+//#include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
+
+
+///Obtains the material for a specific triangle
+const btMaterial * btMultimaterialTriangleMeshShape::getMaterialProperties(int partID, int triIndex)
+{
+    const unsigned char * materialBase = 0;
+    int numMaterials;
+    PHY_ScalarType materialType;
+    int materialStride;
+    const unsigned char * triangleMaterialBase = 0;
+    int numTriangles;
+    int triangleMaterialStride;
+    PHY_ScalarType triangleType;
+
+    ((btTriangleIndexVertexMaterialArray*)m_meshInterface)->getLockedReadOnlyMaterialBase(&materialBase, numMaterials, materialType, materialStride,
+        &triangleMaterialBase, numTriangles, triangleMaterialStride, triangleType, partID);
+
+    // return the pointer to the place with the friction for the triangle
+    // TODO: This depends on whether it's a moving mesh or not
+    // BUG IN GIMPACT
+    //return (btScalar*)(&materialBase[triangleMaterialBase[(triIndex-1) * triangleMaterialStride] * materialStride]);
+    int * matInd = (int *)(&(triangleMaterialBase[(triIndex * triangleMaterialStride)]));
+    btMaterial *matVal = (btMaterial *)(&(materialBase[*matInd * materialStride]));
+    return (matVal);
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.h b/src/bullet/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.h
new file mode 100644
index 00000000..2b92ab7d
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.h
@@ -0,0 +1,120 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/// This file was created by Alex Silverman
+
+#ifndef BT_BVH_TRIANGLE_MATERIAL_MESH_SHAPE_H
+#define BT_BVH_TRIANGLE_MATERIAL_MESH_SHAPE_H
+
+#include "btBvhTriangleMeshShape.h"
+#include "btMaterial.h"
+
+///The BvhTriangleMaterialMeshShape extends the btBvhTriangleMeshShape. Its main contribution is the interface into a material array, which allows per-triangle friction and restitution.
+ATTRIBUTE_ALIGNED16(class) btMultimaterialTriangleMeshShape : public btBvhTriangleMeshShape
+{
+    btAlignedObjectArray <btMaterial*> m_materialList;
+    int ** m_triangleMaterials;
+
+public:
+
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+    btMultimaterialTriangleMeshShape(btStridingMeshInterface* meshInterface, bool useQuantizedAabbCompression, bool buildBvh = true):
+        btBvhTriangleMeshShape(meshInterface, useQuantizedAabbCompression, buildBvh)
+        {
+            m_shapeType = MULTIMATERIAL_TRIANGLE_MESH_PROXYTYPE;
+
+            const unsigned char *vertexbase;
+            int numverts;
+            PHY_ScalarType type;
+            int stride;
+            const unsigned char *indexbase;
+            int indexstride;
+            int numfaces;
+            PHY_ScalarType indicestype;
+
+            //m_materialLookup = (int**)(btAlignedAlloc(sizeof(int*) * meshInterface->getNumSubParts(), 16));
+
+            for(int i = 0; i < meshInterface->getNumSubParts(); i++)
+            {
+                m_meshInterface->getLockedReadOnlyVertexIndexBase(
+                    &vertexbase,
+                    numverts,
+                    type,
+                    stride,
+                    &indexbase,
+                    indexstride,
+                    numfaces,
+                    indicestype,
+                    i);
+                //m_materialLookup[i] = (int*)(btAlignedAlloc(sizeof(int) * numfaces, 16));
+            }
+        }
+
+	///optionally pass in a larger bvh aabb, used for quantization. This allows for deformations within this aabb
+	btMultimaterialTriangleMeshShape(btStridingMeshInterface* meshInterface, bool useQuantizedAabbCompression,const btVector3& bvhAabbMin,const btVector3& bvhAabbMax, bool buildBvh = true):
+        btBvhTriangleMeshShape(meshInterface, useQuantizedAabbCompression, bvhAabbMin, bvhAabbMax, buildBvh)
+        {
+            m_shapeType = MULTIMATERIAL_TRIANGLE_MESH_PROXYTYPE;
+
+            const unsigned char *vertexbase;
+            int numverts;
+            PHY_ScalarType type;
+            int stride;
+            const unsigned char *indexbase;
+            int indexstride;
+            int numfaces;
+            PHY_ScalarType indicestype;
+
+            //m_materialLookup = (int**)(btAlignedAlloc(sizeof(int*) * meshInterface->getNumSubParts(), 16));
+
+            for(int i = 0; i < meshInterface->getNumSubParts(); i++)
+            {
+                m_meshInterface->getLockedReadOnlyVertexIndexBase(
+                    &vertexbase,
+                    numverts,
+                    type,
+                    stride,
+                    &indexbase,
+                    indexstride,
+                    numfaces,
+                    indicestype,
+                    i);
+                //m_materialLookup[i] = (int*)(btAlignedAlloc(sizeof(int) * numfaces * 2, 16));
+            }
+        }
+	
+    virtual ~btMultimaterialTriangleMeshShape()
+    {
+/*
+        for(int i = 0; i < m_meshInterface->getNumSubParts(); i++)
+        {
+            btAlignedFree(m_materialValues[i]);
+            m_materialLookup[i] = NULL;
+        }
+        btAlignedFree(m_materialValues);
+        m_materialLookup = NULL;
+*/
+    }
+	//debugging
+	virtual const char*	getName()const {return "MULTIMATERIALTRIANGLEMESH";}
+
+    ///Obtains the material for a specific triangle
+    const btMaterial * getMaterialProperties(int partID, int triIndex);
+
+}
+;
+
+#endif //BT_BVH_TRIANGLE_MATERIAL_MESH_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp b/src/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp
new file mode 100644
index 00000000..6f36775f
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp
@@ -0,0 +1,391 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btOptimizedBvh.h"
+#include "btStridingMeshInterface.h"
+#include "LinearMath/btAabbUtil2.h"
+#include "LinearMath/btIDebugDraw.h"
+
+
+btOptimizedBvh::btOptimizedBvh()
+{ 
+}
+
+btOptimizedBvh::~btOptimizedBvh()
+{
+}
+
+
+void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax)
+{
+	m_useQuantization = useQuantizedAabbCompression;
+
+
+	// NodeArray	triangleNodes;
+
+	struct	NodeTriangleCallback : public btInternalTriangleIndexCallback
+	{
+
+		NodeArray&	m_triangleNodes;
+
+		NodeTriangleCallback& operator=(NodeTriangleCallback& other)
+		{
+			m_triangleNodes.copyFromArray(other.m_triangleNodes);
+			return *this;
+		}
+		
+		NodeTriangleCallback(NodeArray&	triangleNodes)
+			:m_triangleNodes(triangleNodes)
+		{
+		}
+
+		virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int  triangleIndex)
+		{
+			btOptimizedBvhNode node;
+			btVector3	aabbMin,aabbMax;
+			aabbMin.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+			aabbMax.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)); 
+			aabbMin.setMin(triangle[0]);
+			aabbMax.setMax(triangle[0]);
+			aabbMin.setMin(triangle[1]);
+			aabbMax.setMax(triangle[1]);
+			aabbMin.setMin(triangle[2]);
+			aabbMax.setMax(triangle[2]);
+
+			//with quantization?
+			node.m_aabbMinOrg = aabbMin;
+			node.m_aabbMaxOrg = aabbMax;
+
+			node.m_escapeIndex = -1;
+	
+			//for child nodes
+			node.m_subPart = partId;
+			node.m_triangleIndex = triangleIndex;
+			m_triangleNodes.push_back(node);
+		}
+	};
+	struct	QuantizedNodeTriangleCallback : public btInternalTriangleIndexCallback
+	{
+		QuantizedNodeArray&	m_triangleNodes;
+		const btQuantizedBvh* m_optimizedTree; // for quantization
+
+		QuantizedNodeTriangleCallback& operator=(QuantizedNodeTriangleCallback& other)
+		{
+			m_triangleNodes.copyFromArray(other.m_triangleNodes);
+			m_optimizedTree = other.m_optimizedTree;
+			return *this;
+		}
+
+		QuantizedNodeTriangleCallback(QuantizedNodeArray&	triangleNodes,const btQuantizedBvh* tree)
+			:m_triangleNodes(triangleNodes),m_optimizedTree(tree)
+		{
+		}
+
+		virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int  triangleIndex)
+		{
+			// The partId and triangle index must fit in the same (positive) integer
+			btAssert(partId < (1<<MAX_NUM_PARTS_IN_BITS));
+			btAssert(triangleIndex < (1<<(31-MAX_NUM_PARTS_IN_BITS)));
+			//negative indices are reserved for escapeIndex
+			btAssert(triangleIndex>=0);
+
+			btQuantizedBvhNode node;
+			btVector3	aabbMin,aabbMax;
+			aabbMin.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+			aabbMax.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)); 
+			aabbMin.setMin(triangle[0]);
+			aabbMax.setMax(triangle[0]);
+			aabbMin.setMin(triangle[1]);
+			aabbMax.setMax(triangle[1]);
+			aabbMin.setMin(triangle[2]);
+			aabbMax.setMax(triangle[2]);
+
+			//PCK: add these checks for zero dimensions of aabb
+			const btScalar MIN_AABB_DIMENSION = btScalar(0.002);
+			const btScalar MIN_AABB_HALF_DIMENSION = btScalar(0.001);
+			if (aabbMax.x() - aabbMin.x() < MIN_AABB_DIMENSION)
+			{
+				aabbMax.setX(aabbMax.x() + MIN_AABB_HALF_DIMENSION);
+				aabbMin.setX(aabbMin.x() - MIN_AABB_HALF_DIMENSION);
+			}
+			if (aabbMax.y() - aabbMin.y() < MIN_AABB_DIMENSION)
+			{
+				aabbMax.setY(aabbMax.y() + MIN_AABB_HALF_DIMENSION);
+				aabbMin.setY(aabbMin.y() - MIN_AABB_HALF_DIMENSION);
+			}
+			if (aabbMax.z() - aabbMin.z() < MIN_AABB_DIMENSION)
+			{
+				aabbMax.setZ(aabbMax.z() + MIN_AABB_HALF_DIMENSION);
+				aabbMin.setZ(aabbMin.z() - MIN_AABB_HALF_DIMENSION);
+			}
+
+			m_optimizedTree->quantize(&node.m_quantizedAabbMin[0],aabbMin,0);
+			m_optimizedTree->quantize(&node.m_quantizedAabbMax[0],aabbMax,1);
+
+			node.m_escapeIndexOrTriangleIndex = (partId<<(31-MAX_NUM_PARTS_IN_BITS)) | triangleIndex;
+
+			m_triangleNodes.push_back(node);
+		}
+	};
+	
+
+
+	int numLeafNodes = 0;
+
+	
+	if (m_useQuantization)
+	{
+
+		//initialize quantization values
+		setQuantizationValues(bvhAabbMin,bvhAabbMax);
+
+		QuantizedNodeTriangleCallback	callback(m_quantizedLeafNodes,this);
+
+	
+		triangles->InternalProcessAllTriangles(&callback,m_bvhAabbMin,m_bvhAabbMax);
+
+		//now we have an array of leafnodes in m_leafNodes
+		numLeafNodes = m_quantizedLeafNodes.size();
+
+
+		m_quantizedContiguousNodes.resize(2*numLeafNodes);
+
+
+	} else
+	{
+		NodeTriangleCallback	callback(m_leafNodes);
+
+		btVector3 aabbMin(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT));
+		btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+
+		triangles->InternalProcessAllTriangles(&callback,aabbMin,aabbMax);
+
+		//now we have an array of leafnodes in m_leafNodes
+		numLeafNodes = m_leafNodes.size();
+
+		m_contiguousNodes.resize(2*numLeafNodes);
+	}
+
+	m_curNodeIndex = 0;
+
+	buildTree(0,numLeafNodes);
+
+	///if the entire tree is small then subtree size, we need to create a header info for the tree
+	if(m_useQuantization && !m_SubtreeHeaders.size())
+	{
+		btBvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();
+		subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]);
+		subtree.m_rootNodeIndex = 0;
+		subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex();
+	}
+
+	//PCK: update the copy of the size
+	m_subtreeHeaderCount = m_SubtreeHeaders.size();
+
+	//PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary
+	m_quantizedLeafNodes.clear();
+	m_leafNodes.clear();
+}
+
+
+
+
+void	btOptimizedBvh::refit(btStridingMeshInterface* meshInterface,const btVector3& aabbMin,const btVector3& aabbMax)
+{
+	if (m_useQuantization)
+	{
+
+		setQuantizationValues(aabbMin,aabbMax);
+
+		updateBvhNodes(meshInterface,0,m_curNodeIndex,0);
+
+		///now update all subtree headers
+
+		int i;
+		for (i=0;i<m_SubtreeHeaders.size();i++)
+		{
+			btBvhSubtreeInfo& subtree = m_SubtreeHeaders[i];
+			subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]);
+		}
+
+	} else
+	{
+
+	}
+}
+
+
+
+
+void	btOptimizedBvh::refitPartial(btStridingMeshInterface* meshInterface,const btVector3& aabbMin,const btVector3& aabbMax)
+{
+	//incrementally initialize quantization values
+	btAssert(m_useQuantization);
+
+	btAssert(aabbMin.getX() > m_bvhAabbMin.getX());
+	btAssert(aabbMin.getY() > m_bvhAabbMin.getY());
+	btAssert(aabbMin.getZ() > m_bvhAabbMin.getZ());
+
+	btAssert(aabbMax.getX() < m_bvhAabbMax.getX());
+	btAssert(aabbMax.getY() < m_bvhAabbMax.getY());
+	btAssert(aabbMax.getZ() < m_bvhAabbMax.getZ());
+
+	///we should update all quantization values, using updateBvhNodes(meshInterface);
+	///but we only update chunks that overlap the given aabb
+	
+	unsigned short	quantizedQueryAabbMin[3];
+	unsigned short	quantizedQueryAabbMax[3];
+
+	quantize(&quantizedQueryAabbMin[0],aabbMin,0);
+	quantize(&quantizedQueryAabbMax[0],aabbMax,1);
+
+	int i;
+	for (i=0;i<this->m_SubtreeHeaders.size();i++)
+	{
+		btBvhSubtreeInfo& subtree = m_SubtreeHeaders[i];
+
+		//PCK: unsigned instead of bool
+		unsigned overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
+		if (overlap != 0)
+		{
+			updateBvhNodes(meshInterface,subtree.m_rootNodeIndex,subtree.m_rootNodeIndex+subtree.m_subtreeSize,i);
+
+			subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]);
+		}
+	}
+	
+}
+
+void	btOptimizedBvh::updateBvhNodes(btStridingMeshInterface* meshInterface,int firstNode,int endNode,int index)
+{
+	(void)index;
+
+	btAssert(m_useQuantization);
+
+	int curNodeSubPart=-1;
+
+	//get access info to trianglemesh data
+		const unsigned char *vertexbase = 0;
+		int numverts = 0;
+		PHY_ScalarType type = PHY_INTEGER;
+		int stride = 0;
+		const unsigned char *indexbase = 0;
+		int indexstride = 0;
+		int numfaces = 0;
+		PHY_ScalarType indicestype = PHY_INTEGER;
+
+		btVector3	triangleVerts[3];
+		btVector3	aabbMin,aabbMax;
+		const btVector3& meshScaling = meshInterface->getScaling();
+		
+		int i;
+		for (i=endNode-1;i>=firstNode;i--)
+		{
+
+
+			btQuantizedBvhNode& curNode = m_quantizedContiguousNodes[i];
+			if (curNode.isLeafNode())
+			{
+				//recalc aabb from triangle data
+				int nodeSubPart = curNode.getPartId();
+				int nodeTriangleIndex = curNode.getTriangleIndex();
+				if (nodeSubPart != curNodeSubPart)
+				{
+					if (curNodeSubPart >= 0)
+						meshInterface->unLockReadOnlyVertexBase(curNodeSubPart);
+					meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase,numverts,	type,stride,&indexbase,indexstride,numfaces,indicestype,nodeSubPart);
+
+					curNodeSubPart = nodeSubPart;
+					btAssert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT);
+				}
+				//triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts,
+
+				unsigned int* gfxbase = (unsigned int*)(indexbase+nodeTriangleIndex*indexstride);
+				
+				
+				for (int j=2;j>=0;j--)
+				{
+					
+					int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:gfxbase[j];
+					if (type == PHY_FLOAT)
+					{
+						float* graphicsbase = (float*)(vertexbase+graphicsindex*stride);
+						triangleVerts[j] = btVector3(
+							graphicsbase[0]*meshScaling.getX(),
+							graphicsbase[1]*meshScaling.getY(),
+							graphicsbase[2]*meshScaling.getZ());
+					}
+					else
+					{
+						double* graphicsbase = (double*)(vertexbase+graphicsindex*stride);
+						triangleVerts[j] = btVector3( btScalar(graphicsbase[0]*meshScaling.getX()), btScalar(graphicsbase[1]*meshScaling.getY()), btScalar(graphicsbase[2]*meshScaling.getZ()));
+					}
+				}
+
+
+				
+				aabbMin.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+				aabbMax.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)); 
+				aabbMin.setMin(triangleVerts[0]);
+				aabbMax.setMax(triangleVerts[0]);
+				aabbMin.setMin(triangleVerts[1]);
+				aabbMax.setMax(triangleVerts[1]);
+				aabbMin.setMin(triangleVerts[2]);
+				aabbMax.setMax(triangleVerts[2]);
+
+				quantize(&curNode.m_quantizedAabbMin[0],aabbMin,0);
+				quantize(&curNode.m_quantizedAabbMax[0],aabbMax,1);
+				
+			} else
+			{
+				//combine aabb from both children
+
+				btQuantizedBvhNode* leftChildNode = &m_quantizedContiguousNodes[i+1];
+				
+				btQuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? &m_quantizedContiguousNodes[i+2] :
+					&m_quantizedContiguousNodes[i+1+leftChildNode->getEscapeIndex()];
+				
+
+				{
+					for (int i=0;i<3;i++)
+					{
+						curNode.m_quantizedAabbMin[i] = leftChildNode->m_quantizedAabbMin[i];
+						if (curNode.m_quantizedAabbMin[i]>rightChildNode->m_quantizedAabbMin[i])
+							curNode.m_quantizedAabbMin[i]=rightChildNode->m_quantizedAabbMin[i];
+
+						curNode.m_quantizedAabbMax[i] = leftChildNode->m_quantizedAabbMax[i];
+						if (curNode.m_quantizedAabbMax[i] < rightChildNode->m_quantizedAabbMax[i])
+							curNode.m_quantizedAabbMax[i] = rightChildNode->m_quantizedAabbMax[i];
+					}
+				}
+			}
+
+		}
+
+		if (curNodeSubPart >= 0)
+			meshInterface->unLockReadOnlyVertexBase(curNodeSubPart);
+
+		
+}
+
+///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place'
+btOptimizedBvh* btOptimizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)
+{
+	btQuantizedBvh* bvh = btQuantizedBvh::deSerializeInPlace(i_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
+	
+	//we don't add additional data so just do a static upcast
+	return static_cast<btOptimizedBvh*>(bvh);
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.h b/src/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.h
new file mode 100644
index 00000000..715961f5
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.h
@@ -0,0 +1,65 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///Contains contributions from Disney Studio's
+
+#ifndef BT_OPTIMIZED_BVH_H
+#define BT_OPTIMIZED_BVH_H
+
+#include "BulletCollision/BroadphaseCollision/btQuantizedBvh.h"
+
+class btStridingMeshInterface;
+
+
+///The btOptimizedBvh extends the btQuantizedBvh to create AABB tree for triangle meshes, through the btStridingMeshInterface.
+ATTRIBUTE_ALIGNED16(class) btOptimizedBvh : public btQuantizedBvh
+{
+	
+public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+protected:
+
+public:
+
+	btOptimizedBvh();
+
+	virtual ~btOptimizedBvh();
+
+	void	build(btStridingMeshInterface* triangles,bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax);
+
+	void	refit(btStridingMeshInterface* triangles,const btVector3& aabbMin,const btVector3& aabbMax);
+
+	void	refitPartial(btStridingMeshInterface* triangles,const btVector3& aabbMin, const btVector3& aabbMax);
+
+	void	updateBvhNodes(btStridingMeshInterface* meshInterface,int firstNode,int endNode,int index);
+
+	/// Data buffer MUST be 16 byte aligned
+	virtual bool serializeInPlace(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const
+	{
+		return btQuantizedBvh::serialize(o_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
+
+	}
+
+	///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place'
+	static btOptimizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
+
+
+};
+
+
+#endif //BT_OPTIMIZED_BVH_H
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btPolyhedralConvexShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btPolyhedralConvexShape.cpp
new file mode 100644
index 00000000..82def79c
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btPolyhedralConvexShape.cpp
@@ -0,0 +1,475 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h"
+#include "btConvexPolyhedron.h"
+#include "LinearMath/btConvexHullComputer.h"
+#include <new>
+#include "LinearMath/btGeometryUtil.h"
+#include "LinearMath/btGrahamScan2dConvexHull.h"
+
+
+btPolyhedralConvexShape::btPolyhedralConvexShape() :btConvexInternalShape(),
+m_polyhedron(0)
+{
+
+}
+
+btPolyhedralConvexShape::~btPolyhedralConvexShape()
+{
+	if (m_polyhedron)
+	{
+		btAlignedFree(m_polyhedron);
+	}
+}
+
+
+bool	btPolyhedralConvexShape::initializePolyhedralFeatures()
+{
+
+	if (m_polyhedron)
+		btAlignedFree(m_polyhedron);
+	
+	void* mem = btAlignedAlloc(sizeof(btConvexPolyhedron),16);
+	m_polyhedron = new (mem) btConvexPolyhedron;
+
+		btAlignedObjectArray<btVector3> orgVertices;
+
+	for (int i=0;i<getNumVertices();i++)
+	{
+		btVector3& newVertex = orgVertices.expand();
+		getVertex(i,newVertex);
+	}
+
+#if 0
+	btAlignedObjectArray<btVector3> planeEquations;
+	btGeometryUtil::getPlaneEquationsFromVertices(orgVertices,planeEquations);
+
+	btAlignedObjectArray<btVector3> shiftedPlaneEquations;
+	for (int p=0;p<planeEquations.size();p++)
+	{
+		   btVector3 plane = planeEquations[p];
+		   plane[3] -= getMargin();
+		   shiftedPlaneEquations.push_back(plane);
+	}
+
+	btAlignedObjectArray<btVector3> tmpVertices;
+
+	btGeometryUtil::getVerticesFromPlaneEquations(shiftedPlaneEquations,tmpVertices);
+	btConvexHullComputer conv;
+	conv.compute(&tmpVertices[0].getX(), sizeof(btVector3),tmpVertices.size(),0.f,0.f);
+
+#else
+	btConvexHullComputer conv;
+	conv.compute(&orgVertices[0].getX(), sizeof(btVector3),orgVertices.size(),0.f,0.f);
+
+#endif
+
+
+
+	btAlignedObjectArray<btVector3> faceNormals;
+	int numFaces = conv.faces.size();
+	faceNormals.resize(numFaces);
+	btConvexHullComputer* convexUtil = &conv;
+
+	
+	btAlignedObjectArray<btFace>	tmpFaces;
+	tmpFaces.resize(numFaces);
+
+	int numVertices = convexUtil->vertices.size();
+	m_polyhedron->m_vertices.resize(numVertices);
+	for (int p=0;p<numVertices;p++)
+	{
+		m_polyhedron->m_vertices[p] = convexUtil->vertices[p];
+	}
+
+
+	for (int i=0;i<numFaces;i++)
+	{
+		int face = convexUtil->faces[i];
+		//printf("face=%d\n",face);
+		const btConvexHullComputer::Edge*  firstEdge = &convexUtil->edges[face];
+		const btConvexHullComputer::Edge*  edge = firstEdge;
+
+		btVector3 edges[3];
+		int numEdges = 0;
+		//compute face normals
+
+		btScalar maxCross2 = 0.f;
+		int chosenEdge = -1;
+
+		do
+		{
+			
+			int src = edge->getSourceVertex();
+			tmpFaces[i].m_indices.push_back(src);
+			int targ = edge->getTargetVertex();
+			btVector3 wa = convexUtil->vertices[src];
+
+			btVector3 wb = convexUtil->vertices[targ];
+			btVector3 newEdge = wb-wa;
+			newEdge.normalize();
+			if (numEdges<2)
+				edges[numEdges++] = newEdge;
+
+			edge = edge->getNextEdgeOfFace();
+		} while (edge!=firstEdge);
+
+		btScalar planeEq = 1e30f;
+
+		
+		if (numEdges==2)
+		{
+			faceNormals[i] = edges[0].cross(edges[1]);
+			faceNormals[i].normalize();
+			tmpFaces[i].m_plane[0] = faceNormals[i].getX();
+			tmpFaces[i].m_plane[1] = faceNormals[i].getY();
+			tmpFaces[i].m_plane[2] = faceNormals[i].getZ();
+			tmpFaces[i].m_plane[3] = planeEq;
+
+		}
+		else
+		{
+			btAssert(0);//degenerate?
+			faceNormals[i].setZero();
+		}
+
+		for (int v=0;v<tmpFaces[i].m_indices.size();v++)
+		{
+			btScalar eq = m_polyhedron->m_vertices[tmpFaces[i].m_indices[v]].dot(faceNormals[i]);
+			if (planeEq>eq)
+			{
+				planeEq=eq;
+			}
+		}
+		tmpFaces[i].m_plane[3] = -planeEq;
+	}
+
+	//merge coplanar faces and copy them to m_polyhedron
+
+	btScalar faceWeldThreshold= 0.999f;
+	btAlignedObjectArray<int> todoFaces;
+	for (int i=0;i<tmpFaces.size();i++)
+		todoFaces.push_back(i);
+
+	while (todoFaces.size())
+	{
+		btAlignedObjectArray<int> coplanarFaceGroup;
+		int refFace = todoFaces[todoFaces.size()-1];
+
+		coplanarFaceGroup.push_back(refFace);
+		btFace& faceA = tmpFaces[refFace];
+		todoFaces.pop_back();
+
+		btVector3 faceNormalA(faceA.m_plane[0],faceA.m_plane[1],faceA.m_plane[2]);
+		for (int j=todoFaces.size()-1;j>=0;j--)
+		{
+			int i = todoFaces[j];
+			btFace& faceB = tmpFaces[i];
+			btVector3 faceNormalB(faceB.m_plane[0],faceB.m_plane[1],faceB.m_plane[2]);
+			if (faceNormalA.dot(faceNormalB)>faceWeldThreshold)
+			{
+				coplanarFaceGroup.push_back(i);
+				todoFaces.remove(i);
+			}
+		}
+
+
+		bool did_merge = false;
+		if (coplanarFaceGroup.size()>1)
+		{
+			//do the merge: use Graham Scan 2d convex hull
+
+			btAlignedObjectArray<GrahamVector2> orgpoints;
+
+			for (int i=0;i<coplanarFaceGroup.size();i++)
+			{
+//				m_polyhedron->m_faces.push_back(tmpFaces[coplanarFaceGroup[i]]);
+
+				btFace& face = tmpFaces[coplanarFaceGroup[i]];
+				btVector3 faceNormal(face.m_plane[0],face.m_plane[1],face.m_plane[2]);
+				btVector3 xyPlaneNormal(0,0,1);
+
+				btQuaternion rotationArc = shortestArcQuat(faceNormal,xyPlaneNormal);
+				
+				for (int f=0;f<face.m_indices.size();f++)
+				{
+					int orgIndex = face.m_indices[f];
+					btVector3 pt = m_polyhedron->m_vertices[orgIndex];
+					btVector3 rotatedPt =  quatRotate(rotationArc,pt);
+					rotatedPt.setZ(0);
+					bool found = false;
+
+					for (int i=0;i<orgpoints.size();i++)
+					{
+						//if ((orgpoints[i].m_orgIndex == orgIndex) || ((rotatedPt-orgpoints[i]).length2()<0.0001))
+						if (orgpoints[i].m_orgIndex == orgIndex)
+						{
+							found=true;
+							break;
+						}
+					}
+					if (!found)
+						orgpoints.push_back(GrahamVector2(rotatedPt,orgIndex));
+				}
+			}
+
+			btFace combinedFace;
+			for (int i=0;i<4;i++)
+				combinedFace.m_plane[i] = tmpFaces[coplanarFaceGroup[0]].m_plane[i];
+
+			btAlignedObjectArray<GrahamVector2> hull;
+			GrahamScanConvexHull2D(orgpoints,hull);
+
+			for (int i=0;i<hull.size();i++)
+			{
+				combinedFace.m_indices.push_back(hull[i].m_orgIndex);
+				for(int k = 0; k < orgpoints.size(); k++) {
+					if(orgpoints[k].m_orgIndex == hull[i].m_orgIndex) {
+						orgpoints[k].m_orgIndex = -1; // invalidate...
+						break;
+			}
+				}
+			}
+			// are there rejected vertices?
+			bool reject_merge = false;
+			for(int i = 0; i < orgpoints.size(); i++) {
+				if(orgpoints[i].m_orgIndex == -1)
+					continue; // this is in the hull...
+				// this vertex is rejected -- is anybody else using this vertex?
+				for(int j = 0; j < tmpFaces.size(); j++) {
+					btFace& face = tmpFaces[j];
+					// is this a face of the current coplanar group?
+					bool is_in_current_group = false;
+					for(int k = 0; k < coplanarFaceGroup.size(); k++) {
+						if(coplanarFaceGroup[k] == j) {
+							is_in_current_group = true;
+							break;
+						}
+					}
+					if(is_in_current_group) // ignore this face...
+						continue;
+					// does this face use this rejected vertex?
+					for(int v = 0; v < face.m_indices.size(); v++) {
+						if(face.m_indices[v] == orgpoints[i].m_orgIndex) {
+							// this rejected vertex is used in another face -- reject merge
+							reject_merge = true;
+							break;
+						}
+					}
+					if(reject_merge)
+						break;
+				}
+				if(reject_merge)
+					break;
+			}
+			if(!reject_merge) {
+				// do this merge!
+				did_merge = true;
+			m_polyhedron->m_faces.push_back(combinedFace);
+			}
+		}
+		if(!did_merge)
+		{
+			for (int i=0;i<coplanarFaceGroup.size();i++)
+			{
+				m_polyhedron->m_faces.push_back(tmpFaces[coplanarFaceGroup[i]]);
+			}
+
+		}
+
+
+
+	}
+	
+	m_polyhedron->initialize();
+
+	return true;
+}
+
+
+btVector3	btPolyhedralConvexShape::localGetSupportingVertexWithoutMargin(const btVector3& vec0)const
+{
+
+
+	btVector3 supVec(0,0,0);
+#ifndef __SPU__
+	int i;
+	btScalar maxDot(btScalar(-BT_LARGE_FLOAT));
+
+	btVector3 vec = vec0;
+	btScalar lenSqr = vec.length2();
+	if (lenSqr < btScalar(0.0001))
+	{
+		vec.setValue(1,0,0);
+	} else
+	{
+		btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
+		vec *= rlen;
+	}
+
+	btVector3 vtx;
+	btScalar newDot;
+
+	for (i=0;i<getNumVertices();i++)
+	{
+		getVertex(i,vtx);
+		newDot = vec.dot(vtx);
+		if (newDot > maxDot)
+		{
+			maxDot = newDot;
+			supVec = vtx;
+		}
+	}
+
+	
+#endif //__SPU__
+	return supVec;
+}
+
+
+
+void	btPolyhedralConvexShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+#ifndef __SPU__
+	int i;
+
+	btVector3 vtx;
+	btScalar newDot;
+
+	for (i=0;i<numVectors;i++)
+	{
+		supportVerticesOut[i][3] = btScalar(-BT_LARGE_FLOAT);
+	}
+
+	for (int j=0;j<numVectors;j++)
+	{
+	
+		const btVector3& vec = vectors[j];
+
+		for (i=0;i<getNumVertices();i++)
+		{
+			getVertex(i,vtx);
+			newDot = vec.dot(vtx);
+			if (newDot > supportVerticesOut[j][3])
+			{
+				//WARNING: don't swap next lines, the w component would get overwritten!
+				supportVerticesOut[j] = vtx;
+				supportVerticesOut[j][3] = newDot;
+			}
+		}
+	}
+#endif //__SPU__
+}
+
+
+
+void	btPolyhedralConvexShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+#ifndef __SPU__
+	//not yet, return box inertia
+
+	btScalar margin = getMargin();
+
+	btTransform ident;
+	ident.setIdentity();
+	btVector3 aabbMin,aabbMax;
+	getAabb(ident,aabbMin,aabbMax);
+	btVector3 halfExtents = (aabbMax-aabbMin)*btScalar(0.5);
+
+	btScalar lx=btScalar(2.)*(halfExtents.x()+margin);
+	btScalar ly=btScalar(2.)*(halfExtents.y()+margin);
+	btScalar lz=btScalar(2.)*(halfExtents.z()+margin);
+	const btScalar x2 = lx*lx;
+	const btScalar y2 = ly*ly;
+	const btScalar z2 = lz*lz;
+	const btScalar scaledmass = mass * btScalar(0.08333333);
+
+	inertia = scaledmass * (btVector3(y2+z2,x2+z2,x2+y2));
+#endif //__SPU__
+}
+
+
+
+void	btPolyhedralConvexAabbCachingShape::setLocalScaling(const btVector3& scaling)
+{
+	btConvexInternalShape::setLocalScaling(scaling);
+	recalcLocalAabb();
+}
+
+btPolyhedralConvexAabbCachingShape::btPolyhedralConvexAabbCachingShape()
+:btPolyhedralConvexShape(),
+m_localAabbMin(1,1,1),
+m_localAabbMax(-1,-1,-1),
+m_isLocalAabbValid(false)
+{
+}
+
+void btPolyhedralConvexAabbCachingShape::getAabb(const btTransform& trans,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	getNonvirtualAabb(trans,aabbMin,aabbMax,getMargin());
+}
+
+void	btPolyhedralConvexAabbCachingShape::recalcLocalAabb()
+{
+	m_isLocalAabbValid = true;
+	
+	#if 1
+	static const btVector3 _directions[] =
+	{
+		btVector3( 1.,  0.,  0.),
+		btVector3( 0.,  1.,  0.),
+		btVector3( 0.,  0.,  1.),
+		btVector3( -1., 0.,  0.),
+		btVector3( 0., -1.,  0.),
+		btVector3( 0.,  0., -1.)
+	};
+	
+	btVector3 _supporting[] =
+	{
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.)
+	};
+	
+	batchedUnitVectorGetSupportingVertexWithoutMargin(_directions, _supporting, 6);
+	
+	for ( int i = 0; i < 3; ++i )
+	{
+		m_localAabbMax[i] = _supporting[i][i] + m_collisionMargin;
+		m_localAabbMin[i] = _supporting[i + 3][i] - m_collisionMargin;
+	}
+	
+	#else
+
+	for (int i=0;i<3;i++)
+	{
+		btVector3 vec(btScalar(0.),btScalar(0.),btScalar(0.));
+		vec[i] = btScalar(1.);
+		btVector3 tmp = localGetSupportingVertex(vec);
+		m_localAabbMax[i] = tmp[i];
+		vec[i] = btScalar(-1.);
+		tmp = localGetSupportingVertex(vec);
+		m_localAabbMin[i] = tmp[i];
+	}
+	#endif
+}
+
+
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btPolyhedralConvexShape.h b/src/bullet/BulletCollision/CollisionShapes/btPolyhedralConvexShape.h
new file mode 100644
index 00000000..ee2e1e28
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btPolyhedralConvexShape.h
@@ -0,0 +1,112 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_POLYHEDRAL_CONVEX_SHAPE_H
+#define BT_POLYHEDRAL_CONVEX_SHAPE_H
+
+#include "LinearMath/btMatrix3x3.h"
+#include "btConvexInternalShape.h"
+class btConvexPolyhedron;
+
+
+///The btPolyhedralConvexShape is an internal interface class for polyhedral convex shapes.
+class btPolyhedralConvexShape : public btConvexInternalShape
+{
+	
+
+protected:
+	
+	btConvexPolyhedron* m_polyhedron;
+
+public:
+
+	btPolyhedralConvexShape();
+
+	virtual ~btPolyhedralConvexShape();
+
+	///optional method mainly used to generate multiple contact points by clipping polyhedral features (faces/edges)
+	virtual bool	initializePolyhedralFeatures();
+
+	const btConvexPolyhedron*	getConvexPolyhedron() const
+	{
+		return m_polyhedron;
+	}
+
+	//brute force implementations
+
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+	
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+	
+	
+	virtual int	getNumVertices() const = 0 ;
+	virtual int getNumEdges() const = 0;
+	virtual void getEdge(int i,btVector3& pa,btVector3& pb) const = 0;
+	virtual void getVertex(int i,btVector3& vtx) const = 0;
+	virtual int	getNumPlanes() const = 0;
+	virtual void getPlane(btVector3& planeNormal,btVector3& planeSupport,int i ) const = 0;
+//	virtual int getIndex(int i) const = 0 ; 
+
+	virtual	bool isInside(const btVector3& pt,btScalar tolerance) const = 0;
+	
+};
+
+
+///The btPolyhedralConvexAabbCachingShape adds aabb caching to the btPolyhedralConvexShape
+class btPolyhedralConvexAabbCachingShape : public btPolyhedralConvexShape
+{
+
+	btVector3	m_localAabbMin;
+	btVector3	m_localAabbMax;
+	bool		m_isLocalAabbValid;
+		
+protected:
+
+	void setCachedLocalAabb (const btVector3& aabbMin, const btVector3& aabbMax)
+	{
+		m_isLocalAabbValid = true;
+		m_localAabbMin = aabbMin;
+		m_localAabbMax = aabbMax;
+	}
+
+	inline void getCachedLocalAabb (btVector3& aabbMin, btVector3& aabbMax) const
+	{
+		btAssert(m_isLocalAabbValid);
+		aabbMin = m_localAabbMin;
+		aabbMax = m_localAabbMax;
+	}
+
+public:
+
+	btPolyhedralConvexAabbCachingShape();
+	
+	inline void getNonvirtualAabb(const btTransform& trans,btVector3& aabbMin,btVector3& aabbMax, btScalar margin) const
+	{
+
+		//lazy evaluation of local aabb
+		btAssert(m_isLocalAabbValid);
+		btTransformAabb(m_localAabbMin,m_localAabbMax,margin,trans,aabbMin,aabbMax);
+	}
+
+	virtual void	setLocalScaling(const btVector3& scaling);
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	void	recalcLocalAabb();
+
+};
+
+#endif //BT_POLYHEDRAL_CONVEX_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.cpp
new file mode 100644
index 00000000..25d58d61
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.cpp
@@ -0,0 +1,123 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btScaledBvhTriangleMeshShape.h"
+
+btScaledBvhTriangleMeshShape::btScaledBvhTriangleMeshShape(btBvhTriangleMeshShape* childShape,const btVector3& localScaling)
+:m_localScaling(localScaling),m_bvhTriMeshShape(childShape)
+{
+	m_shapeType = SCALED_TRIANGLE_MESH_SHAPE_PROXYTYPE;
+}
+
+btScaledBvhTriangleMeshShape::~btScaledBvhTriangleMeshShape()
+{
+}
+
+
+class btScaledTriangleCallback : public btTriangleCallback
+{
+	btTriangleCallback* m_originalCallback;
+
+	btVector3	m_localScaling;
+
+public:
+
+	btScaledTriangleCallback(btTriangleCallback* originalCallback,const btVector3& localScaling)
+		:m_originalCallback(originalCallback),
+		m_localScaling(localScaling)
+	{
+	}
+
+	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex)
+	{
+		btVector3 newTriangle[3];
+		newTriangle[0] = triangle[0]*m_localScaling;
+		newTriangle[1] = triangle[1]*m_localScaling;
+		newTriangle[2] = triangle[2]*m_localScaling;
+		m_originalCallback->processTriangle(&newTriangle[0],partId,triangleIndex);
+	}
+};
+
+void	btScaledBvhTriangleMeshShape::processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+	btScaledTriangleCallback scaledCallback(callback,m_localScaling);
+	
+	btVector3 invLocalScaling(1.f/m_localScaling.getX(),1.f/m_localScaling.getY(),1.f/m_localScaling.getZ());
+	btVector3 scaledAabbMin,scaledAabbMax;
+
+	///support negative scaling
+	scaledAabbMin[0] = m_localScaling.getX() >= 0. ? aabbMin[0] * invLocalScaling[0] : aabbMax[0] * invLocalScaling[0];
+	scaledAabbMin[1] = m_localScaling.getY() >= 0. ? aabbMin[1] * invLocalScaling[1] : aabbMax[1] * invLocalScaling[1];
+	scaledAabbMin[2] = m_localScaling.getZ() >= 0. ? aabbMin[2] * invLocalScaling[2] : aabbMax[2] * invLocalScaling[2];
+	scaledAabbMin[3] = 0.f;
+	
+	scaledAabbMax[0] = m_localScaling.getX() <= 0. ? aabbMin[0] * invLocalScaling[0] : aabbMax[0] * invLocalScaling[0];
+	scaledAabbMax[1] = m_localScaling.getY() <= 0. ? aabbMin[1] * invLocalScaling[1] : aabbMax[1] * invLocalScaling[1];
+	scaledAabbMax[2] = m_localScaling.getZ() <= 0. ? aabbMin[2] * invLocalScaling[2] : aabbMax[2] * invLocalScaling[2];
+	scaledAabbMax[3] = 0.f;
+	
+	
+	m_bvhTriMeshShape->processAllTriangles(&scaledCallback,scaledAabbMin,scaledAabbMax);
+}
+
+
+void	btScaledBvhTriangleMeshShape::getAabb(const btTransform& trans,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	btVector3 localAabbMin = m_bvhTriMeshShape->getLocalAabbMin();
+	btVector3 localAabbMax = m_bvhTriMeshShape->getLocalAabbMax();
+
+	btVector3 tmpLocalAabbMin = localAabbMin * m_localScaling;
+	btVector3 tmpLocalAabbMax = localAabbMax * m_localScaling;
+
+	localAabbMin[0] = (m_localScaling.getX() >= 0.) ? tmpLocalAabbMin[0] : tmpLocalAabbMax[0];
+	localAabbMin[1] = (m_localScaling.getY() >= 0.) ? tmpLocalAabbMin[1] : tmpLocalAabbMax[1];
+	localAabbMin[2] = (m_localScaling.getZ() >= 0.) ? tmpLocalAabbMin[2] : tmpLocalAabbMax[2];
+	localAabbMax[0] = (m_localScaling.getX() <= 0.) ? tmpLocalAabbMin[0] : tmpLocalAabbMax[0];
+	localAabbMax[1] = (m_localScaling.getY() <= 0.) ? tmpLocalAabbMin[1] : tmpLocalAabbMax[1];
+	localAabbMax[2] = (m_localScaling.getZ() <= 0.) ? tmpLocalAabbMin[2] : tmpLocalAabbMax[2];
+
+	btVector3 localHalfExtents = btScalar(0.5)*(localAabbMax-localAabbMin);
+	btScalar margin = m_bvhTriMeshShape->getMargin();
+	localHalfExtents += btVector3(margin,margin,margin);
+	btVector3 localCenter = btScalar(0.5)*(localAabbMax+localAabbMin);
+	
+	btMatrix3x3 abs_b = trans.getBasis().absolute();  
+
+	btVector3 center = trans(localCenter);
+
+	btVector3 extent = btVector3(abs_b[0].dot(localHalfExtents),
+		   abs_b[1].dot(localHalfExtents),
+		  abs_b[2].dot(localHalfExtents));
+	aabbMin = center - extent;
+	aabbMax = center + extent;
+
+}
+
+void	btScaledBvhTriangleMeshShape::setLocalScaling(const btVector3& scaling)
+{
+	m_localScaling = scaling;
+}
+
+const btVector3& btScaledBvhTriangleMeshShape::getLocalScaling() const
+{
+	return m_localScaling;
+}
+
+void	btScaledBvhTriangleMeshShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	///don't make this a movable object!
+//	btAssert(0);
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h b/src/bullet/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h
new file mode 100644
index 00000000..ff86ef31
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h
@@ -0,0 +1,93 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SCALED_BVH_TRIANGLE_MESH_SHAPE_H
+#define BT_SCALED_BVH_TRIANGLE_MESH_SHAPE_H
+
+#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
+
+
+///The btScaledBvhTriangleMeshShape allows to instance a scaled version of an existing btBvhTriangleMeshShape.
+///Note that each btBvhTriangleMeshShape still can have its own local scaling, independent from this btScaledBvhTriangleMeshShape 'localScaling'
+ATTRIBUTE_ALIGNED16(class) btScaledBvhTriangleMeshShape : public btConcaveShape
+{
+	
+	
+	btVector3	m_localScaling;
+
+	btBvhTriangleMeshShape*	m_bvhTriMeshShape;
+
+public:
+
+
+	btScaledBvhTriangleMeshShape(btBvhTriangleMeshShape* childShape,const btVector3& localScaling);
+
+	virtual ~btScaledBvhTriangleMeshShape();
+
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+	virtual void	setLocalScaling(const btVector3& scaling);
+	virtual const btVector3& getLocalScaling() const;
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	virtual void	processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const;
+
+	btBvhTriangleMeshShape*	getChildShape()
+	{
+		return m_bvhTriMeshShape;
+	}
+
+	const btBvhTriangleMeshShape*	getChildShape() const
+	{
+		return m_bvhTriMeshShape;
+	}
+
+	//debugging
+	virtual const char*	getName()const {return "SCALEDBVHTRIANGLEMESH";}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btScaledTriangleMeshShapeData
+{
+	btTriangleMeshShapeData	m_trimeshShapeData;
+
+	btVector3FloatData	m_localScaling;
+};
+
+
+SIMD_FORCE_INLINE	int	btScaledBvhTriangleMeshShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btScaledTriangleMeshShapeData);
+}
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btScaledBvhTriangleMeshShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btScaledTriangleMeshShapeData* scaledMeshData = (btScaledTriangleMeshShapeData*) dataBuffer;
+	m_bvhTriMeshShape->serialize(&scaledMeshData->m_trimeshShapeData,serializer);
+	scaledMeshData->m_trimeshShapeData.m_collisionShapeData.m_shapeType = SCALED_TRIANGLE_MESH_SHAPE_PROXYTYPE;
+	m_localScaling.serializeFloat(scaledMeshData->m_localScaling);
+	return "btScaledTriangleMeshShapeData";
+}
+
+
+#endif //BT_SCALED_BVH_TRIANGLE_MESH_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btShapeHull.cpp b/src/bullet/BulletCollision/CollisionShapes/btShapeHull.cpp
new file mode 100644
index 00000000..3beaf865
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btShapeHull.cpp
@@ -0,0 +1,170 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//btShapeHull was implemented by John McCutchan.
+
+
+#include "btShapeHull.h"
+#include "LinearMath/btConvexHull.h"
+
+#define NUM_UNITSPHERE_POINTS 42
+
+btShapeHull::btShapeHull (const btConvexShape* shape)
+{
+	m_shape = shape;
+	m_vertices.clear ();
+	m_indices.clear();
+	m_numIndices = 0;
+}
+
+btShapeHull::~btShapeHull ()
+{
+	m_indices.clear();	
+	m_vertices.clear ();
+}
+
+bool
+btShapeHull::buildHull (btScalar /*margin*/)
+{
+	int numSampleDirections = NUM_UNITSPHERE_POINTS;
+	{
+		int numPDA = m_shape->getNumPreferredPenetrationDirections();
+		if (numPDA)
+		{
+			for (int i=0;i<numPDA;i++)
+			{
+				btVector3 norm;
+				m_shape->getPreferredPenetrationDirection(i,norm);
+				getUnitSpherePoints()[numSampleDirections] = norm;
+				numSampleDirections++;
+			}
+		}
+	}
+
+	btVector3 supportPoints[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+	int i;
+	for (i = 0; i < numSampleDirections; i++)
+	{
+		supportPoints[i] = m_shape->localGetSupportingVertex(getUnitSpherePoints()[i]);
+	}
+
+	HullDesc hd;
+	hd.mFlags = QF_TRIANGLES;
+	hd.mVcount = static_cast<unsigned int>(numSampleDirections);
+
+#ifdef BT_USE_DOUBLE_PRECISION
+	hd.mVertices = &supportPoints[0];
+	hd.mVertexStride = sizeof(btVector3);
+#else
+	hd.mVertices = &supportPoints[0];
+	hd.mVertexStride = sizeof (btVector3);
+#endif
+
+	HullLibrary hl;
+	HullResult hr;
+	if (hl.CreateConvexHull (hd, hr) == QE_FAIL)
+	{
+		return false;
+	}
+
+	m_vertices.resize (static_cast<int>(hr.mNumOutputVertices));
+
+
+	for (i = 0; i < static_cast<int>(hr.mNumOutputVertices); i++)
+	{
+		m_vertices[i] = hr.m_OutputVertices[i];
+	}
+	m_numIndices = hr.mNumIndices;
+	m_indices.resize(static_cast<int>(m_numIndices));
+	for (i = 0; i < static_cast<int>(m_numIndices); i++)
+	{
+		m_indices[i] = hr.m_Indices[i];
+	}
+
+	// free temporary hull result that we just copied
+	hl.ReleaseResult (hr);
+
+	return true;
+}
+
+int
+btShapeHull::numTriangles () const
+{
+	return static_cast<int>(m_numIndices / 3);
+}
+
+int
+btShapeHull::numVertices () const
+{
+	return m_vertices.size ();
+}
+
+int
+btShapeHull::numIndices () const
+{
+	return static_cast<int>(m_numIndices);
+}
+
+
+btVector3* btShapeHull::getUnitSpherePoints()
+{
+	static btVector3 sUnitSpherePoints[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] = 
+	{
+		btVector3(btScalar(0.000000) , btScalar(-0.000000),btScalar(-1.000000)),
+		btVector3(btScalar(0.723608) , btScalar(-0.525725),btScalar(-0.447219)),
+		btVector3(btScalar(-0.276388) , btScalar(-0.850649),btScalar(-0.447219)),
+		btVector3(btScalar(-0.894426) , btScalar(-0.000000),btScalar(-0.447216)),
+		btVector3(btScalar(-0.276388) , btScalar(0.850649),btScalar(-0.447220)),
+		btVector3(btScalar(0.723608) , btScalar(0.525725),btScalar(-0.447219)),
+		btVector3(btScalar(0.276388) , btScalar(-0.850649),btScalar(0.447220)),
+		btVector3(btScalar(-0.723608) , btScalar(-0.525725),btScalar(0.447219)),
+		btVector3(btScalar(-0.723608) , btScalar(0.525725),btScalar(0.447219)),
+		btVector3(btScalar(0.276388) , btScalar(0.850649),btScalar(0.447219)),
+		btVector3(btScalar(0.894426) , btScalar(0.000000),btScalar(0.447216)),
+		btVector3(btScalar(-0.000000) , btScalar(0.000000),btScalar(1.000000)),
+		btVector3(btScalar(0.425323) , btScalar(-0.309011),btScalar(-0.850654)),
+		btVector3(btScalar(-0.162456) , btScalar(-0.499995),btScalar(-0.850654)),
+		btVector3(btScalar(0.262869) , btScalar(-0.809012),btScalar(-0.525738)),
+		btVector3(btScalar(0.425323) , btScalar(0.309011),btScalar(-0.850654)),
+		btVector3(btScalar(0.850648) , btScalar(-0.000000),btScalar(-0.525736)),
+		btVector3(btScalar(-0.525730) , btScalar(-0.000000),btScalar(-0.850652)),
+		btVector3(btScalar(-0.688190) , btScalar(-0.499997),btScalar(-0.525736)),
+		btVector3(btScalar(-0.162456) , btScalar(0.499995),btScalar(-0.850654)),
+		btVector3(btScalar(-0.688190) , btScalar(0.499997),btScalar(-0.525736)),
+		btVector3(btScalar(0.262869) , btScalar(0.809012),btScalar(-0.525738)),
+		btVector3(btScalar(0.951058) , btScalar(0.309013),btScalar(0.000000)),
+		btVector3(btScalar(0.951058) , btScalar(-0.309013),btScalar(0.000000)),
+		btVector3(btScalar(0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+		btVector3(btScalar(0.000000) , btScalar(-1.000000),btScalar(0.000000)),
+		btVector3(btScalar(-0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+		btVector3(btScalar(-0.951058) , btScalar(-0.309013),btScalar(-0.000000)),
+		btVector3(btScalar(-0.951058) , btScalar(0.309013),btScalar(-0.000000)),
+		btVector3(btScalar(-0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+		btVector3(btScalar(-0.000000) , btScalar(1.000000),btScalar(-0.000000)),
+		btVector3(btScalar(0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+		btVector3(btScalar(0.688190) , btScalar(-0.499997),btScalar(0.525736)),
+		btVector3(btScalar(-0.262869) , btScalar(-0.809012),btScalar(0.525738)),
+		btVector3(btScalar(-0.850648) , btScalar(0.000000),btScalar(0.525736)),
+		btVector3(btScalar(-0.262869) , btScalar(0.809012),btScalar(0.525738)),
+		btVector3(btScalar(0.688190) , btScalar(0.499997),btScalar(0.525736)),
+		btVector3(btScalar(0.525730) , btScalar(0.000000),btScalar(0.850652)),
+		btVector3(btScalar(0.162456) , btScalar(-0.499995),btScalar(0.850654)),
+		btVector3(btScalar(-0.425323) , btScalar(-0.309011),btScalar(0.850654)),
+		btVector3(btScalar(-0.425323) , btScalar(0.309011),btScalar(0.850654)),
+		btVector3(btScalar(0.162456) , btScalar(0.499995),btScalar(0.850654))
+	};
+	return sUnitSpherePoints;
+}
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btShapeHull.h b/src/bullet/BulletCollision/CollisionShapes/btShapeHull.h
new file mode 100644
index 00000000..642a2887
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btShapeHull.h
@@ -0,0 +1,59 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///btShapeHull implemented by John McCutchan.
+
+#ifndef BT_SHAPE_HULL_H
+#define BT_SHAPE_HULL_H
+
+#include "LinearMath/btAlignedObjectArray.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+
+
+///The btShapeHull class takes a btConvexShape, builds a simplified convex hull using btConvexHull and provides triangle indices and vertices.
+///It can be useful for to simplify a complex convex object and for visualization of a non-polyhedral convex object.
+///It approximates the convex hull using the supporting vertex of 42 directions.
+class btShapeHull
+{
+protected:
+
+	btAlignedObjectArray<btVector3> m_vertices;
+	btAlignedObjectArray<unsigned int> m_indices;
+	unsigned int m_numIndices;
+	const btConvexShape* m_shape;
+
+	static btVector3* getUnitSpherePoints();
+
+public:
+	btShapeHull (const btConvexShape* shape);
+	~btShapeHull ();
+
+	bool buildHull (btScalar margin);
+
+	int numTriangles () const;
+	int numVertices () const;
+	int numIndices () const;
+
+	const btVector3* getVertexPointer() const
+	{
+		return &m_vertices[0];
+	}
+	const unsigned int* getIndexPointer() const
+	{
+		return &m_indices[0];
+	}
+};
+
+#endif //BT_SHAPE_HULL_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btSphereShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btSphereShape.cpp
new file mode 100644
index 00000000..b9a736c0
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btSphereShape.cpp
@@ -0,0 +1,71 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btSphereShape.h"
+#include "BulletCollision/CollisionShapes/btCollisionMargin.h"
+
+#include "LinearMath/btQuaternion.h"
+
+btVector3	btSphereShape::localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+{
+	(void)vec;
+	return btVector3(btScalar(0.),btScalar(0.),btScalar(0.));
+}
+
+void	btSphereShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+	(void)vectors;
+
+	for (int i=0;i<numVectors;i++)
+	{
+		supportVerticesOut[i].setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+	}
+}
+
+
+btVector3	btSphereShape::localGetSupportingVertex(const btVector3& vec)const
+{
+	btVector3 supVertex;
+	supVertex = localGetSupportingVertexWithoutMargin(vec);
+
+	btVector3 vecnorm = vec;
+	if (vecnorm .length2() < (SIMD_EPSILON*SIMD_EPSILON))
+	{
+		vecnorm.setValue(btScalar(-1.),btScalar(-1.),btScalar(-1.));
+	} 
+	vecnorm.normalize();
+	supVertex+= getMargin() * vecnorm;
+	return supVertex;
+}
+
+
+//broken due to scaling
+void btSphereShape::getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	const btVector3& center = t.getOrigin();
+	btVector3 extent(getMargin(),getMargin(),getMargin());
+	aabbMin = center - extent;
+	aabbMax = center + extent;
+}
+
+
+
+void	btSphereShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	btScalar elem = btScalar(0.4) * mass * getMargin()*getMargin();
+	inertia.setValue(elem,elem,elem);
+
+}
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btSphereShape.h b/src/bullet/BulletCollision/CollisionShapes/btSphereShape.h
new file mode 100644
index 00000000..b192efee
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btSphereShape.h
@@ -0,0 +1,73 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef BT_SPHERE_MINKOWSKI_H
+#define BT_SPHERE_MINKOWSKI_H
+
+#include "btConvexInternalShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+
+///The btSphereShape implements an implicit sphere, centered around a local origin with radius.
+ATTRIBUTE_ALIGNED16(class) btSphereShape : public btConvexInternalShape
+
+{
+	
+public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	btSphereShape (btScalar radius) : btConvexInternalShape ()
+	{
+		m_shapeType = SPHERE_SHAPE_PROXYTYPE;
+		m_implicitShapeDimensions.setX(radius);
+		m_collisionMargin = radius;
+	}
+	
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec)const;
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+	//notice that the vectors should be unit length
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+
+	btScalar	getRadius() const { return m_implicitShapeDimensions.getX() * m_localScaling.getX();}
+
+	void	setUnscaledRadius(btScalar	radius)
+	{
+		m_implicitShapeDimensions.setX(radius);
+		btConvexInternalShape::setMargin(radius);
+	}
+
+	//debugging
+	virtual const char*	getName()const {return "SPHERE";}
+
+	virtual void	setMargin(btScalar margin)
+	{
+		btConvexInternalShape::setMargin(margin);
+	}
+	virtual btScalar	getMargin() const
+	{
+		//to improve gjk behaviour, use radius+margin as the full margin, so never get into the penetration case
+		//this means, non-uniform scaling is not supported anymore
+		return getRadius();
+	}
+
+
+};
+
+
+#endif //BT_SPHERE_MINKOWSKI_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btStaticPlaneShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btStaticPlaneShape.cpp
new file mode 100644
index 00000000..38ef8f03
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btStaticPlaneShape.cpp
@@ -0,0 +1,107 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btStaticPlaneShape.h"
+
+#include "LinearMath/btTransformUtil.h"
+
+
+btStaticPlaneShape::btStaticPlaneShape(const btVector3& planeNormal,btScalar planeConstant)
+: btConcaveShape (), m_planeNormal(planeNormal.normalized()),
+m_planeConstant(planeConstant),
+m_localScaling(btScalar(0.),btScalar(0.),btScalar(0.))
+{
+	m_shapeType = STATIC_PLANE_PROXYTYPE;
+	//	btAssert( btFuzzyZero(m_planeNormal.length() - btScalar(1.)) );
+}
+
+
+btStaticPlaneShape::~btStaticPlaneShape()
+{
+}
+
+
+
+void btStaticPlaneShape::getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	(void)t;
+	/*
+	btVector3 infvec (btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+
+	btVector3 center = m_planeNormal*m_planeConstant;
+	aabbMin = center + infvec*m_planeNormal;
+	aabbMax = aabbMin;
+	aabbMin.setMin(center - infvec*m_planeNormal);
+	aabbMax.setMax(center - infvec*m_planeNormal); 
+	*/
+
+	aabbMin.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT));
+	aabbMax.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+
+}
+
+
+
+
+void	btStaticPlaneShape::processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+
+	btVector3 halfExtents = (aabbMax - aabbMin) * btScalar(0.5);
+	btScalar radius = halfExtents.length();
+	btVector3 center = (aabbMax + aabbMin) * btScalar(0.5);
+	
+	//this is where the triangles are generated, given AABB and plane equation (normal/constant)
+
+	btVector3 tangentDir0,tangentDir1;
+
+	//tangentDir0/tangentDir1 can be precalculated
+	btPlaneSpace1(m_planeNormal,tangentDir0,tangentDir1);
+
+	btVector3 supVertex0,supVertex1;
+
+	btVector3 projectedCenter = center - (m_planeNormal.dot(center) - m_planeConstant)*m_planeNormal;
+	
+	btVector3 triangle[3];
+	triangle[0] = projectedCenter + tangentDir0*radius + tangentDir1*radius;
+	triangle[1] = projectedCenter + tangentDir0*radius - tangentDir1*radius;
+	triangle[2] = projectedCenter - tangentDir0*radius - tangentDir1*radius;
+
+	callback->processTriangle(triangle,0,0);
+
+	triangle[0] = projectedCenter - tangentDir0*radius - tangentDir1*radius;
+	triangle[1] = projectedCenter - tangentDir0*radius + tangentDir1*radius;
+	triangle[2] = projectedCenter + tangentDir0*radius + tangentDir1*radius;
+
+	callback->processTriangle(triangle,0,1);
+
+}
+
+void	btStaticPlaneShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	(void)mass;
+
+	//moving concave objects not supported
+	
+	inertia.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+}
+
+void	btStaticPlaneShape::setLocalScaling(const btVector3& scaling)
+{
+	m_localScaling = scaling;
+}
+const btVector3& btStaticPlaneShape::getLocalScaling() const
+{
+	return m_localScaling;
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btStaticPlaneShape.h b/src/bullet/BulletCollision/CollisionShapes/btStaticPlaneShape.h
new file mode 100644
index 00000000..b13825e6
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btStaticPlaneShape.h
@@ -0,0 +1,103 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_STATIC_PLANE_SHAPE_H
+#define BT_STATIC_PLANE_SHAPE_H
+
+#include "btConcaveShape.h"
+
+
+///The btStaticPlaneShape simulates an infinite non-moving (static) collision plane.
+ATTRIBUTE_ALIGNED16(class) btStaticPlaneShape : public btConcaveShape
+{
+protected:
+	btVector3	m_localAabbMin;
+	btVector3	m_localAabbMax;
+	
+	btVector3	m_planeNormal;
+	btScalar      m_planeConstant;
+	btVector3	m_localScaling;
+
+public:
+	btStaticPlaneShape(const btVector3& planeNormal,btScalar planeConstant);
+
+	virtual ~btStaticPlaneShape();
+
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	virtual void	processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const;
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	virtual void	setLocalScaling(const btVector3& scaling);
+	virtual const btVector3& getLocalScaling() const;
+	
+	const btVector3&	getPlaneNormal() const
+	{
+		return	m_planeNormal;
+	}
+
+	const btScalar&	getPlaneConstant() const
+	{
+		return	m_planeConstant;
+	}
+
+	//debugging
+	virtual const char*	getName()const {return "STATICPLANE";}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btStaticPlaneShapeData
+{
+	btCollisionShapeData	m_collisionShapeData;
+
+	btVector3FloatData	m_localScaling;
+	btVector3FloatData	m_planeNormal;
+	float			m_planeConstant;
+	char	m_pad[4];
+};
+
+
+SIMD_FORCE_INLINE	int	btStaticPlaneShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btStaticPlaneShapeData);
+}
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btStaticPlaneShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btStaticPlaneShapeData* planeData = (btStaticPlaneShapeData*) dataBuffer;
+	btCollisionShape::serialize(&planeData->m_collisionShapeData,serializer);
+
+	m_localScaling.serializeFloat(planeData->m_localScaling);
+	m_planeNormal.serializeFloat(planeData->m_planeNormal);
+	planeData->m_planeConstant = float(m_planeConstant);
+		
+	return "btStaticPlaneShapeData";
+}
+
+
+#endif //BT_STATIC_PLANE_SHAPE_H
+
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btStridingMeshInterface.cpp b/src/bullet/BulletCollision/CollisionShapes/btStridingMeshInterface.cpp
new file mode 100644
index 00000000..dd22fc56
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btStridingMeshInterface.cpp
@@ -0,0 +1,381 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btStridingMeshInterface.h"
+#include "LinearMath/btSerializer.h"
+
+btStridingMeshInterface::~btStridingMeshInterface()
+{
+
+}
+
+
+void	btStridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleIndexCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+	(void)aabbMin;
+	(void)aabbMax;
+	int numtotalphysicsverts = 0;
+	int part,graphicssubparts = getNumSubParts();
+	const unsigned char * vertexbase;
+	const unsigned char * indexbase;
+	int indexstride;
+	PHY_ScalarType type;
+	PHY_ScalarType gfxindextype;
+	int stride,numverts,numtriangles;
+	int gfxindex;
+	btVector3 triangle[3];
+
+	btVector3 meshScaling = getScaling();
+
+	///if the number of parts is big, the performance might drop due to the innerloop switch on indextype
+	for (part=0;part<graphicssubparts ;part++)
+	{
+		getLockedReadOnlyVertexIndexBase(&vertexbase,numverts,type,stride,&indexbase,indexstride,numtriangles,gfxindextype,part);
+		numtotalphysicsverts+=numtriangles*3; //upper bound
+
+		///unlike that developers want to pass in double-precision meshes in single-precision Bullet build
+		///so disable this feature by default
+		///see patch http://code.google.com/p/bullet/issues/detail?id=213
+
+		switch (type)
+		{
+		case PHY_FLOAT:
+		 {
+
+			 float* graphicsbase;
+
+			 switch (gfxindextype)
+			 {
+			 case PHY_INTEGER:
+				 {
+					 for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+					 {
+						 unsigned int* tri_indices= (unsigned int*)(indexbase+gfxindex*indexstride);
+						 graphicsbase = (float*)(vertexbase+tri_indices[0]*stride);
+						 triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ());
+						 graphicsbase = (float*)(vertexbase+tri_indices[1]*stride);
+						 triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ());
+						 graphicsbase = (float*)(vertexbase+tri_indices[2]*stride);
+						 triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ());
+						 callback->internalProcessTriangleIndex(triangle,part,gfxindex);
+					 }
+					 break;
+				 }
+			 case PHY_SHORT:
+				 {
+					 for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+					 {
+						 unsigned short int* tri_indices= (unsigned short int*)(indexbase+gfxindex*indexstride);
+						 graphicsbase = (float*)(vertexbase+tri_indices[0]*stride);
+						 triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ());
+						 graphicsbase = (float*)(vertexbase+tri_indices[1]*stride);
+						 triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ());
+						 graphicsbase = (float*)(vertexbase+tri_indices[2]*stride);
+						 triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ());
+						 callback->internalProcessTriangleIndex(triangle,part,gfxindex);
+					 }
+					 break;
+				 }
+			case PHY_UCHAR:
+				 {
+					 for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+					 {
+						 unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride);
+						 graphicsbase = (float*)(vertexbase+tri_indices[0]*stride);
+						 triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ());
+						 graphicsbase = (float*)(vertexbase+tri_indices[1]*stride);
+						 triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ());
+						 graphicsbase = (float*)(vertexbase+tri_indices[2]*stride);
+						 triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ());
+						 callback->internalProcessTriangleIndex(triangle,part,gfxindex);
+					 }
+					 break;
+				 }
+			 default:
+				 btAssert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT));
+			 }
+			 break;
+		 }
+
+		case PHY_DOUBLE:
+			{
+				double* graphicsbase;
+
+				switch (gfxindextype)
+				{
+				case PHY_INTEGER:
+					{
+						for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+						{
+							unsigned int* tri_indices= (unsigned int*)(indexbase+gfxindex*indexstride);
+							graphicsbase = (double*)(vertexbase+tri_indices[0]*stride);
+							triangle[0].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),(btScalar)graphicsbase[2]*meshScaling.getZ());
+							graphicsbase = (double*)(vertexbase+tri_indices[1]*stride);
+							triangle[1].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),  (btScalar)graphicsbase[2]*meshScaling.getZ());
+							graphicsbase = (double*)(vertexbase+tri_indices[2]*stride);
+							triangle[2].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),  (btScalar)graphicsbase[2]*meshScaling.getZ());
+							callback->internalProcessTriangleIndex(triangle,part,gfxindex);
+						}
+						break;
+					}
+				case PHY_SHORT:
+					{
+						for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+						{
+							unsigned short int* tri_indices= (unsigned short int*)(indexbase+gfxindex*indexstride);
+							graphicsbase = (double*)(vertexbase+tri_indices[0]*stride);
+							triangle[0].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),(btScalar)graphicsbase[2]*meshScaling.getZ());
+							graphicsbase = (double*)(vertexbase+tri_indices[1]*stride);
+							triangle[1].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),  (btScalar)graphicsbase[2]*meshScaling.getZ());
+							graphicsbase = (double*)(vertexbase+tri_indices[2]*stride);
+							triangle[2].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),  (btScalar)graphicsbase[2]*meshScaling.getZ());
+							callback->internalProcessTriangleIndex(triangle,part,gfxindex);
+						}
+						break;
+					}
+				case PHY_UCHAR:
+					{
+						for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+						{
+							unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride);
+							graphicsbase = (double*)(vertexbase+tri_indices[0]*stride);
+							triangle[0].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),(btScalar)graphicsbase[2]*meshScaling.getZ());
+							graphicsbase = (double*)(vertexbase+tri_indices[1]*stride);
+							triangle[1].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),  (btScalar)graphicsbase[2]*meshScaling.getZ());
+							graphicsbase = (double*)(vertexbase+tri_indices[2]*stride);
+							triangle[2].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),  (btScalar)graphicsbase[2]*meshScaling.getZ());
+							callback->internalProcessTriangleIndex(triangle,part,gfxindex);
+						}
+						break;
+					}
+				default:
+					btAssert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT));
+				}
+				break;
+			}
+		default:
+			btAssert((type == PHY_FLOAT) || (type == PHY_DOUBLE));
+		}
+
+		unLockReadOnlyVertexBase(part);
+	}
+}
+
+void	btStridingMeshInterface::calculateAabbBruteForce(btVector3& aabbMin,btVector3& aabbMax)
+{
+
+	struct	AabbCalculationCallback : public btInternalTriangleIndexCallback
+	{
+		btVector3	m_aabbMin;
+		btVector3	m_aabbMax;
+
+		AabbCalculationCallback()
+		{
+			m_aabbMin.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+			m_aabbMax.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT));
+		}
+
+		virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int  triangleIndex)
+		{
+			(void)partId;
+			(void)triangleIndex;
+
+			m_aabbMin.setMin(triangle[0]);
+			m_aabbMax.setMax(triangle[0]);
+			m_aabbMin.setMin(triangle[1]);
+			m_aabbMax.setMax(triangle[1]);
+			m_aabbMin.setMin(triangle[2]);
+			m_aabbMax.setMax(triangle[2]);
+		}
+	};
+
+	//first calculate the total aabb for all triangles
+	AabbCalculationCallback	aabbCallback;
+	aabbMin.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT));
+	aabbMax.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+	InternalProcessAllTriangles(&aabbCallback,aabbMin,aabbMax);
+
+	aabbMin = aabbCallback.m_aabbMin;
+	aabbMax = aabbCallback.m_aabbMax;
+}
+
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btStridingMeshInterface::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btStridingMeshInterfaceData* trimeshData = (btStridingMeshInterfaceData*) dataBuffer;
+
+	trimeshData->m_numMeshParts = getNumSubParts();
+
+	//void* uniquePtr = 0;
+
+	trimeshData->m_meshPartsPtr = 0;
+
+	if (trimeshData->m_numMeshParts)
+	{
+		btChunk* chunk = serializer->allocate(sizeof(btMeshPartData),trimeshData->m_numMeshParts);
+		btMeshPartData* memPtr = (btMeshPartData*)chunk->m_oldPtr;
+		trimeshData->m_meshPartsPtr = (btMeshPartData *)serializer->getUniquePointer(memPtr);
+
+
+	//	int numtotalphysicsverts = 0;
+		int part,graphicssubparts = getNumSubParts();
+		const unsigned char * vertexbase;
+		const unsigned char * indexbase;
+		int indexstride;
+		PHY_ScalarType type;
+		PHY_ScalarType gfxindextype;
+		int stride,numverts,numtriangles;
+		int gfxindex;
+	//	btVector3 triangle[3];
+
+		btVector3 meshScaling = getScaling();
+
+		///if the number of parts is big, the performance might drop due to the innerloop switch on indextype
+		for (part=0;part<graphicssubparts ;part++,memPtr++)
+		{
+			getLockedReadOnlyVertexIndexBase(&vertexbase,numverts,type,stride,&indexbase,indexstride,numtriangles,gfxindextype,part);
+			memPtr->m_numTriangles = numtriangles;//indices = 3*numtriangles
+			memPtr->m_numVertices = numverts;
+			memPtr->m_indices16 = 0;
+			memPtr->m_indices32 = 0;
+			memPtr->m_3indices16 = 0;
+			memPtr->m_3indices8 = 0;
+			memPtr->m_vertices3f = 0;
+			memPtr->m_vertices3d = 0;
+
+
+			switch (gfxindextype)
+			{
+			case PHY_INTEGER:
+				{
+					int numindices = numtriangles*3;
+				
+					if (numindices)
+					{
+						btChunk* chunk = serializer->allocate(sizeof(btIntIndexData),numindices);
+						btIntIndexData* tmpIndices = (btIntIndexData*)chunk->m_oldPtr;
+						memPtr->m_indices32 = (btIntIndexData*)serializer->getUniquePointer(tmpIndices);
+						for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+						{
+							unsigned int* tri_indices= (unsigned int*)(indexbase+gfxindex*indexstride);
+							tmpIndices[gfxindex*3].m_value = tri_indices[0];
+							tmpIndices[gfxindex*3+1].m_value = tri_indices[1];
+							tmpIndices[gfxindex*3+2].m_value = tri_indices[2];
+						}
+						serializer->finalizeChunk(chunk,"btIntIndexData",BT_ARRAY_CODE,(void*)chunk->m_oldPtr);
+					}
+					break;
+				}
+			case PHY_SHORT:
+				{
+					if (numtriangles)
+					{
+						btChunk* chunk = serializer->allocate(sizeof(btShortIntIndexTripletData),numtriangles);
+						btShortIntIndexTripletData* tmpIndices = (btShortIntIndexTripletData*)chunk->m_oldPtr;
+						memPtr->m_3indices16 = (btShortIntIndexTripletData*) serializer->getUniquePointer(tmpIndices);
+						for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+						{
+							unsigned short int* tri_indices= (unsigned short int*)(indexbase+gfxindex*indexstride);
+							tmpIndices[gfxindex].m_values[0] = tri_indices[0];
+							tmpIndices[gfxindex].m_values[1] = tri_indices[1];
+							tmpIndices[gfxindex].m_values[2] = tri_indices[2];
+						}
+						serializer->finalizeChunk(chunk,"btShortIntIndexTripletData",BT_ARRAY_CODE,(void*)chunk->m_oldPtr);
+					}
+					break;
+				}
+				case PHY_UCHAR:
+				{
+					if (numtriangles)
+					{
+						btChunk* chunk = serializer->allocate(sizeof(btCharIndexTripletData),numtriangles);
+						btCharIndexTripletData* tmpIndices = (btCharIndexTripletData*)chunk->m_oldPtr;
+						memPtr->m_3indices8 = (btCharIndexTripletData*) serializer->getUniquePointer(tmpIndices);
+						for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+						{
+							unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride);
+							tmpIndices[gfxindex].m_values[0] = tri_indices[0];
+							tmpIndices[gfxindex].m_values[1] = tri_indices[1];
+							tmpIndices[gfxindex].m_values[2] = tri_indices[2];
+						}
+						serializer->finalizeChunk(chunk,"btCharIndexTripletData",BT_ARRAY_CODE,(void*)chunk->m_oldPtr);
+					}
+					break;
+				}
+			default:
+				{
+					btAssert(0);
+					//unknown index type
+				}
+			}
+
+			switch (type)
+			{
+			case PHY_FLOAT:
+			 {
+				 float* graphicsbase;
+
+				 if (numverts)
+				 {
+					 btChunk* chunk = serializer->allocate(sizeof(btVector3FloatData),numverts);
+					 btVector3FloatData* tmpVertices = (btVector3FloatData*) chunk->m_oldPtr;
+					 memPtr->m_vertices3f = (btVector3FloatData *)serializer->getUniquePointer(tmpVertices);
+					 for (int i=0;i<numverts;i++)
+					 {
+						 graphicsbase = (float*)(vertexbase+i*stride);
+						 tmpVertices[i].m_floats[0] = graphicsbase[0];
+						 tmpVertices[i].m_floats[1] = graphicsbase[1];
+						 tmpVertices[i].m_floats[2] = graphicsbase[2];
+					 }
+					 serializer->finalizeChunk(chunk,"btVector3FloatData",BT_ARRAY_CODE,(void*)chunk->m_oldPtr);
+				 }
+				 break;
+				}
+
+			case PHY_DOUBLE:
+				{
+					if (numverts)
+					{
+						btChunk* chunk = serializer->allocate(sizeof(btVector3DoubleData),numverts);
+						btVector3DoubleData* tmpVertices = (btVector3DoubleData*) chunk->m_oldPtr;
+						memPtr->m_vertices3d = (btVector3DoubleData *) serializer->getUniquePointer(tmpVertices);
+						for (int i=0;i<numverts;i++)
+					 {
+						 double* graphicsbase = (double*)(vertexbase+i*stride);//for now convert to float, might leave it at double
+						 tmpVertices[i].m_floats[0] = graphicsbase[0];
+						 tmpVertices[i].m_floats[1] = graphicsbase[1];
+						 tmpVertices[i].m_floats[2] = graphicsbase[2];
+					 }
+						serializer->finalizeChunk(chunk,"btVector3DoubleData",BT_ARRAY_CODE,(void*)chunk->m_oldPtr);
+					}
+					break;
+				}
+
+			default:
+				btAssert((type == PHY_FLOAT) || (type == PHY_DOUBLE));
+			}
+
+			unLockReadOnlyVertexBase(part);
+		}
+
+		serializer->finalizeChunk(chunk,"btMeshPartData",BT_ARRAY_CODE,chunk->m_oldPtr);
+	}
+
+
+	m_scaling.serializeFloat(trimeshData->m_scaling);
+	return "btStridingMeshInterfaceData";
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btStridingMeshInterface.h b/src/bullet/BulletCollision/CollisionShapes/btStridingMeshInterface.h
new file mode 100644
index 00000000..f2b27ade
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btStridingMeshInterface.h
@@ -0,0 +1,162 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_STRIDING_MESHINTERFACE_H
+#define BT_STRIDING_MESHINTERFACE_H
+
+#include "LinearMath/btVector3.h"
+#include "btTriangleCallback.h"
+#include "btConcaveShape.h"
+
+
+
+
+
+///	The btStridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with btBvhTriangleMeshShape and some other collision shapes.
+/// Using index striding of 3*sizeof(integer) it can use triangle arrays, using index striding of 1*sizeof(integer) it can handle triangle strips.
+/// It allows for sharing graphics and collision meshes. Also it provides locking/unlocking of graphics meshes that are in gpu memory.
+class  btStridingMeshInterface
+{
+	protected:
+	
+		btVector3 m_scaling;
+
+	public:
+		btStridingMeshInterface() :m_scaling(btScalar(1.),btScalar(1.),btScalar(1.))
+		{
+
+		}
+
+		virtual ~btStridingMeshInterface();
+
+
+
+		virtual void	InternalProcessAllTriangles(btInternalTriangleIndexCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const;
+
+		///brute force method to calculate aabb
+		void	calculateAabbBruteForce(btVector3& aabbMin,btVector3& aabbMax);
+
+		/// get read and write access to a subpart of a triangle mesh
+		/// this subpart has a continuous array of vertices and indices
+		/// in this way the mesh can be handled as chunks of memory with striding
+		/// very similar to OpenGL vertexarray support
+		/// make a call to unLockVertexBase when the read and write access is finished	
+		virtual void	getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& stride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0)=0;
+		
+		virtual void	getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& stride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0) const=0;
+	
+		/// unLockVertexBase finishes the access to a subpart of the triangle mesh
+		/// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished
+		virtual void	unLockVertexBase(int subpart)=0;
+
+		virtual void	unLockReadOnlyVertexBase(int subpart) const=0;
+
+
+		/// getNumSubParts returns the number of seperate subparts
+		/// each subpart has a continuous array of vertices and indices
+		virtual int		getNumSubParts() const=0;
+
+		virtual void	preallocateVertices(int numverts)=0;
+		virtual void	preallocateIndices(int numindices)=0;
+
+		virtual bool	hasPremadeAabb() const { return false; }
+		virtual void	setPremadeAabb(const btVector3& aabbMin, const btVector3& aabbMax ) const
+                {
+                        (void) aabbMin;
+                        (void) aabbMax;
+                }
+		virtual void	getPremadeAabb(btVector3* aabbMin, btVector3* aabbMax ) const
+        {
+            (void) aabbMin;
+            (void) aabbMax;
+        }
+
+		const btVector3&	getScaling() const {
+			return m_scaling;
+		}
+		void	setScaling(const btVector3& scaling)
+		{
+			m_scaling = scaling;
+		}
+
+		virtual	int	calculateSerializeBufferSize() const;
+
+		///fills the dataBuffer and returns the struct name (and 0 on failure)
+		virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+
+};
+
+struct	btIntIndexData
+{
+	int	m_value;
+};
+
+struct	btShortIntIndexData
+{
+	short m_value;
+	char m_pad[2];
+};
+
+struct	btShortIntIndexTripletData
+{
+	short	m_values[3];
+	char	m_pad[2];
+};
+
+struct	btCharIndexTripletData
+{
+	unsigned char m_values[3];
+	char	m_pad;
+};
+
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btMeshPartData
+{
+	btVector3FloatData			*m_vertices3f;
+	btVector3DoubleData			*m_vertices3d;
+
+	btIntIndexData				*m_indices32;
+	btShortIntIndexTripletData	*m_3indices16;
+	btCharIndexTripletData		*m_3indices8;
+
+	btShortIntIndexData			*m_indices16;//backwards compatibility
+
+	int                     m_numTriangles;//length of m_indices = m_numTriangles
+	int                     m_numVertices;
+};
+
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btStridingMeshInterfaceData
+{
+	btMeshPartData	*m_meshPartsPtr;
+	btVector3FloatData	m_scaling;
+	int	m_numMeshParts;
+	char m_padding[4];
+};
+
+
+
+
+SIMD_FORCE_INLINE	int	btStridingMeshInterface::calculateSerializeBufferSize() const
+{
+	return sizeof(btStridingMeshInterfaceData);
+}
+
+
+
+#endif //BT_STRIDING_MESHINTERFACE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTetrahedronShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btTetrahedronShape.cpp
new file mode 100644
index 00000000..52f346bf
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTetrahedronShape.cpp
@@ -0,0 +1,218 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btTetrahedronShape.h"
+#include "LinearMath/btMatrix3x3.h"
+
+btBU_Simplex1to4::btBU_Simplex1to4() : btPolyhedralConvexAabbCachingShape (),
+m_numVertices(0)
+{
+	m_shapeType = TETRAHEDRAL_SHAPE_PROXYTYPE;
+}
+
+btBU_Simplex1to4::btBU_Simplex1to4(const btVector3& pt0) : btPolyhedralConvexAabbCachingShape (),
+m_numVertices(0)
+{
+	m_shapeType = TETRAHEDRAL_SHAPE_PROXYTYPE;
+	addVertex(pt0);
+}
+
+btBU_Simplex1to4::btBU_Simplex1to4(const btVector3& pt0,const btVector3& pt1) : btPolyhedralConvexAabbCachingShape (),
+m_numVertices(0)
+{
+	m_shapeType = TETRAHEDRAL_SHAPE_PROXYTYPE;
+	addVertex(pt0);
+	addVertex(pt1);
+}
+
+btBU_Simplex1to4::btBU_Simplex1to4(const btVector3& pt0,const btVector3& pt1,const btVector3& pt2) : btPolyhedralConvexAabbCachingShape (),
+m_numVertices(0)
+{
+	m_shapeType = TETRAHEDRAL_SHAPE_PROXYTYPE;
+	addVertex(pt0);
+	addVertex(pt1);
+	addVertex(pt2);
+}
+
+btBU_Simplex1to4::btBU_Simplex1to4(const btVector3& pt0,const btVector3& pt1,const btVector3& pt2,const btVector3& pt3) : btPolyhedralConvexAabbCachingShape (),
+m_numVertices(0)
+{
+	m_shapeType = TETRAHEDRAL_SHAPE_PROXYTYPE;
+	addVertex(pt0);
+	addVertex(pt1);
+	addVertex(pt2);
+	addVertex(pt3);
+}
+
+
+void btBU_Simplex1to4::getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+{
+#if 1
+	btPolyhedralConvexAabbCachingShape::getAabb(t,aabbMin,aabbMax);
+#else
+	aabbMin.setValue(BT_LARGE_FLOAT,BT_LARGE_FLOAT,BT_LARGE_FLOAT);
+	aabbMax.setValue(-BT_LARGE_FLOAT,-BT_LARGE_FLOAT,-BT_LARGE_FLOAT);
+
+	//just transform the vertices in worldspace, and take their AABB
+	for (int i=0;i<m_numVertices;i++)
+	{
+		btVector3 worldVertex = t(m_vertices[i]);
+		aabbMin.setMin(worldVertex);
+		aabbMax.setMax(worldVertex);
+	}
+#endif
+}
+
+
+
+
+
+void btBU_Simplex1to4::addVertex(const btVector3& pt)
+{
+	m_vertices[m_numVertices++] = pt;
+	recalcLocalAabb();
+}
+
+
+int	btBU_Simplex1to4::getNumVertices() const
+{
+	return m_numVertices;
+}
+
+int btBU_Simplex1to4::getNumEdges() const
+{
+	//euler formula, F-E+V = 2, so E = F+V-2
+
+	switch (m_numVertices)
+	{
+	case 0:
+		return 0;
+	case 1: return 0;
+	case 2: return 1;
+	case 3: return 3;
+	case 4: return 6;
+
+
+	}
+
+	return 0;
+}
+
+void btBU_Simplex1to4::getEdge(int i,btVector3& pa,btVector3& pb) const
+{
+	
+    switch (m_numVertices)
+	{
+
+	case 2: 
+		pa = m_vertices[0];
+		pb = m_vertices[1];
+		break;
+	case 3:  
+		switch (i)
+		{
+		case 0:
+			pa = m_vertices[0];
+			pb = m_vertices[1];
+			break;
+		case 1:
+			pa = m_vertices[1];
+			pb = m_vertices[2];
+			break;
+		case 2:
+			pa = m_vertices[2];
+			pb = m_vertices[0];
+			break;
+
+		}
+		break;
+	case 4: 
+		switch (i)
+		{
+		case 0:
+			pa = m_vertices[0];
+			pb = m_vertices[1];
+			break;
+		case 1:
+			pa = m_vertices[1];
+			pb = m_vertices[2];
+			break;
+		case 2:
+			pa = m_vertices[2];
+			pb = m_vertices[0];
+			break;
+		case 3:
+			pa = m_vertices[0];
+			pb = m_vertices[3];
+			break;
+		case 4:
+			pa = m_vertices[1];
+			pb = m_vertices[3];
+			break;
+		case 5:
+			pa = m_vertices[2];
+			pb = m_vertices[3];
+			break;
+		}
+
+	}
+
+
+
+
+}
+
+void btBU_Simplex1to4::getVertex(int i,btVector3& vtx) const
+{
+	vtx = m_vertices[i];
+}
+
+int	btBU_Simplex1to4::getNumPlanes() const
+{
+	switch (m_numVertices)
+	{
+	case 0:
+			return 0;
+	case 1:
+			return 0;
+	case 2:
+			return 0;
+	case 3:
+			return 2;
+	case 4:
+			return 4;
+	default:
+		{
+		}
+	}
+	return 0;
+}
+
+
+void btBU_Simplex1to4::getPlane(btVector3&, btVector3& ,int ) const
+{
+	
+}
+
+int btBU_Simplex1to4::getIndex(int ) const
+{
+	return 0;
+}
+
+bool btBU_Simplex1to4::isInside(const btVector3& ,btScalar ) const
+{
+	return false;
+}
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTetrahedronShape.h b/src/bullet/BulletCollision/CollisionShapes/btTetrahedronShape.h
new file mode 100644
index 00000000..6b7128ef
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTetrahedronShape.h
@@ -0,0 +1,74 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SIMPLEX_1TO4_SHAPE
+#define BT_SIMPLEX_1TO4_SHAPE
+
+
+#include "btPolyhedralConvexShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+
+
+///The btBU_Simplex1to4 implements tetrahedron, triangle, line, vertex collision shapes. In most cases it is better to use btConvexHullShape instead.
+class btBU_Simplex1to4 : public btPolyhedralConvexAabbCachingShape
+{
+protected:
+
+	int	m_numVertices;
+	btVector3	m_vertices[4];
+
+public:
+	btBU_Simplex1to4();
+
+	btBU_Simplex1to4(const btVector3& pt0);
+	btBU_Simplex1to4(const btVector3& pt0,const btVector3& pt1);
+	btBU_Simplex1to4(const btVector3& pt0,const btVector3& pt1,const btVector3& pt2);
+	btBU_Simplex1to4(const btVector3& pt0,const btVector3& pt1,const btVector3& pt2,const btVector3& pt3);
+
+    
+	void	reset()
+	{
+		m_numVertices = 0;
+	}
+	
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	void addVertex(const btVector3& pt);
+
+	//PolyhedralConvexShape interface
+
+	virtual int	getNumVertices() const;
+
+	virtual int getNumEdges() const;
+
+	virtual void getEdge(int i,btVector3& pa,btVector3& pb) const;
+	
+	virtual void getVertex(int i,btVector3& vtx) const;
+
+	virtual int	getNumPlanes() const;
+
+	virtual void getPlane(btVector3& planeNormal,btVector3& planeSupport,int i) const;
+
+	virtual int getIndex(int i) const;
+
+	virtual	bool isInside(const btVector3& pt,btScalar tolerance) const;
+
+
+	///getName is for debugging
+	virtual const char*	getName()const { return "btBU_Simplex1to4";}
+
+};
+
+#endif //BT_SIMPLEX_1TO4_SHAPE
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleBuffer.cpp b/src/bullet/BulletCollision/CollisionShapes/btTriangleBuffer.cpp
new file mode 100644
index 00000000..3027e65b
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleBuffer.cpp
@@ -0,0 +1,35 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btTriangleBuffer.h"
+
+
+
+
+
+
+
+void btTriangleBuffer::processTriangle(btVector3* triangle,int partId,int  triangleIndex)
+{
+		btTriangle	tri;
+		tri.m_vertex0 = triangle[0];
+		tri.m_vertex1 = triangle[1];
+		tri.m_vertex2 = triangle[2];
+		tri.m_partId = partId;
+		tri.m_triangleIndex = triangleIndex;
+			
+		m_triangleBuffer.push_back(tri);
+}
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleBuffer.h b/src/bullet/BulletCollision/CollisionShapes/btTriangleBuffer.h
new file mode 100644
index 00000000..b71fc8b3
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleBuffer.h
@@ -0,0 +1,69 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_TRIANGLE_BUFFER_H
+#define BT_TRIANGLE_BUFFER_H
+
+#include "btTriangleCallback.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+struct	btTriangle
+{
+	btVector3	m_vertex0;
+	btVector3	m_vertex1;
+	btVector3	m_vertex2;
+	int	m_partId;
+	int	m_triangleIndex;
+};
+
+///The btTriangleBuffer callback can be useful to collect and store overlapping triangles between AABB and concave objects that support 'processAllTriangles'
+///Example usage of this class:
+///			btTriangleBuffer	triBuf;
+///			concaveShape->processAllTriangles(&triBuf,aabbMin, aabbMax);
+///			for (int i=0;i<triBuf.getNumTriangles();i++)
+///			{
+///				const btTriangle& tri = triBuf.getTriangle(i);
+///				//do something useful here with the triangle
+///			}
+class btTriangleBuffer : public btTriangleCallback
+{
+
+	btAlignedObjectArray<btTriangle>	m_triangleBuffer;
+	
+public:
+
+
+	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex);
+	
+	int	getNumTriangles() const
+	{
+		return int(m_triangleBuffer.size());
+	}
+	
+	const btTriangle&	getTriangle(int index) const
+	{
+		return m_triangleBuffer[index];
+	}
+
+	void	clearBuffer()
+	{
+		m_triangleBuffer.clear();
+	}
+	
+};
+
+
+#endif //BT_TRIANGLE_BUFFER_H
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleCallback.cpp b/src/bullet/BulletCollision/CollisionShapes/btTriangleCallback.cpp
new file mode 100644
index 00000000..f558bf6d
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleCallback.cpp
@@ -0,0 +1,28 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btTriangleCallback.h"
+
+btTriangleCallback::~btTriangleCallback()
+{
+
+}
+
+
+btInternalTriangleIndexCallback::~btInternalTriangleIndexCallback()
+{
+
+}
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleCallback.h b/src/bullet/BulletCollision/CollisionShapes/btTriangleCallback.h
new file mode 100644
index 00000000..461c57f8
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleCallback.h
@@ -0,0 +1,42 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_TRIANGLE_CALLBACK_H
+#define BT_TRIANGLE_CALLBACK_H
+
+#include "LinearMath/btVector3.h"
+
+
+///The btTriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles.
+///This callback is called by processAllTriangles for all btConcaveShape derived class, such as  btBvhTriangleMeshShape, btStaticPlaneShape and btHeightfieldTerrainShape.
+class btTriangleCallback
+{
+public:
+
+	virtual ~btTriangleCallback();
+	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex) = 0;
+};
+
+class btInternalTriangleIndexCallback
+{
+public:
+
+	virtual ~btInternalTriangleIndexCallback();
+	virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int  triangleIndex) = 0;
+};
+
+
+
+#endif //BT_TRIANGLE_CALLBACK_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.cpp b/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.cpp
new file mode 100644
index 00000000..a665024c
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.cpp
@@ -0,0 +1,95 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btTriangleIndexVertexArray.h"
+
+btTriangleIndexVertexArray::btTriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride)
+: m_hasAabb(0)
+{
+	btIndexedMesh mesh;
+
+	mesh.m_numTriangles = numTriangles;
+	mesh.m_triangleIndexBase = (const unsigned char *)triangleIndexBase;
+	mesh.m_triangleIndexStride = triangleIndexStride;
+	mesh.m_numVertices = numVertices;
+	mesh.m_vertexBase = (const unsigned char *)vertexBase;
+	mesh.m_vertexStride = vertexStride;
+
+	addIndexedMesh(mesh);
+
+}
+
+btTriangleIndexVertexArray::~btTriangleIndexVertexArray()
+{
+
+}
+
+void	btTriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart)
+{
+	btAssert(subpart< getNumSubParts() );
+
+	btIndexedMesh& mesh = m_indexedMeshes[subpart];
+
+	numverts = mesh.m_numVertices;
+	(*vertexbase) = (unsigned char *) mesh.m_vertexBase;
+
+   type = mesh.m_vertexType;
+
+	vertexStride = mesh.m_vertexStride;
+
+	numfaces = mesh.m_numTriangles;
+
+	(*indexbase) = (unsigned char *)mesh.m_triangleIndexBase;
+	indexstride = mesh.m_triangleIndexStride;
+	indicestype = mesh.m_indexType;
+}
+
+void	btTriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) const
+{
+	const btIndexedMesh& mesh = m_indexedMeshes[subpart];
+
+	numverts = mesh.m_numVertices;
+	(*vertexbase) = (const unsigned char *)mesh.m_vertexBase;
+
+   type = mesh.m_vertexType;
+   
+	vertexStride = mesh.m_vertexStride;
+
+	numfaces = mesh.m_numTriangles;
+	(*indexbase) = (const unsigned char *)mesh.m_triangleIndexBase;
+	indexstride = mesh.m_triangleIndexStride;
+	indicestype = mesh.m_indexType;
+}
+
+bool	btTriangleIndexVertexArray::hasPremadeAabb() const
+{
+	return (m_hasAabb == 1);
+}
+
+
+void	btTriangleIndexVertexArray::setPremadeAabb(const btVector3& aabbMin, const btVector3& aabbMax ) const
+{
+	m_aabbMin = aabbMin;
+	m_aabbMax = aabbMax;
+	m_hasAabb = 1; // this is intentionally an int see notes in header
+}
+
+void	btTriangleIndexVertexArray::getPremadeAabb(btVector3* aabbMin, btVector3* aabbMax ) const
+{
+	*aabbMin = m_aabbMin;
+	*aabbMax = m_aabbMax;
+}
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h b/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h
new file mode 100644
index 00000000..9e1544e8
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h
@@ -0,0 +1,133 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_TRIANGLE_INDEX_VERTEX_ARRAY_H
+#define BT_TRIANGLE_INDEX_VERTEX_ARRAY_H
+
+#include "btStridingMeshInterface.h"
+#include "LinearMath/btAlignedObjectArray.h"
+#include "LinearMath/btScalar.h"
+
+
+///The btIndexedMesh indexes a single vertex and index array. Multiple btIndexedMesh objects can be passed into a btTriangleIndexVertexArray using addIndexedMesh.
+///Instead of the number of indices, we pass the number of triangles.
+ATTRIBUTE_ALIGNED16( struct)	btIndexedMesh
+{
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+   int                     m_numTriangles;
+   const unsigned char *   m_triangleIndexBase;
+   // Size in byte of the indices for one triangle (3*sizeof(index_type) if the indices are tightly packed)
+   int                     m_triangleIndexStride;
+   int                     m_numVertices;
+   const unsigned char *   m_vertexBase;
+   // Size of a vertex, in bytes
+   int                     m_vertexStride;
+
+   // The index type is set when adding an indexed mesh to the
+   // btTriangleIndexVertexArray, do not set it manually
+   PHY_ScalarType m_indexType;
+
+   // The vertex type has a default type similar to Bullet's precision mode (float or double)
+   // but can be set manually if you for example run Bullet with double precision but have
+   // mesh data in single precision..
+   PHY_ScalarType m_vertexType;
+
+
+   btIndexedMesh()
+	   :m_indexType(PHY_INTEGER),
+#ifdef BT_USE_DOUBLE_PRECISION
+      m_vertexType(PHY_DOUBLE)
+#else // BT_USE_DOUBLE_PRECISION
+      m_vertexType(PHY_FLOAT)
+#endif // BT_USE_DOUBLE_PRECISION
+      {
+      }
+}
+;
+
+
+typedef btAlignedObjectArray<btIndexedMesh>	IndexedMeshArray;
+
+///The btTriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays.
+///Additional meshes can be added using addIndexedMesh
+///No duplcate is made of the vertex/index data, it only indexes into external vertex/index arrays.
+///So keep those arrays around during the lifetime of this btTriangleIndexVertexArray.
+ATTRIBUTE_ALIGNED16( class) btTriangleIndexVertexArray : public btStridingMeshInterface
+{
+protected:
+	IndexedMeshArray	m_indexedMeshes;
+	int m_pad[2];
+	mutable int m_hasAabb; // using int instead of bool to maintain alignment
+	mutable btVector3 m_aabbMin;
+	mutable btVector3 m_aabbMax;
+
+public:
+
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	btTriangleIndexVertexArray() : m_hasAabb(0)
+	{
+	}
+
+	virtual ~btTriangleIndexVertexArray();
+
+	//just to be backwards compatible
+	btTriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride);
+	
+	void	addIndexedMesh(const btIndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER)
+	{
+		m_indexedMeshes.push_back(mesh);
+		m_indexedMeshes[m_indexedMeshes.size()-1].m_indexType = indexType;
+	}
+	
+	
+	virtual void	getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0);
+
+	virtual void	getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0) const;
+
+	/// unLockVertexBase finishes the access to a subpart of the triangle mesh
+	/// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished
+	virtual void	unLockVertexBase(int subpart) {(void)subpart;}
+
+	virtual void	unLockReadOnlyVertexBase(int subpart) const {(void)subpart;}
+
+	/// getNumSubParts returns the number of seperate subparts
+	/// each subpart has a continuous array of vertices and indices
+	virtual int		getNumSubParts() const { 
+		return (int)m_indexedMeshes.size();
+	}
+
+	IndexedMeshArray&	getIndexedMeshArray()
+	{
+		return m_indexedMeshes;
+	}
+
+	const IndexedMeshArray&	getIndexedMeshArray() const
+	{
+		return m_indexedMeshes;
+	}
+
+	virtual void	preallocateVertices(int numverts){(void) numverts;}
+	virtual void	preallocateIndices(int numindices){(void) numindices;}
+
+	virtual bool	hasPremadeAabb() const;
+	virtual void	setPremadeAabb(const btVector3& aabbMin, const btVector3& aabbMax ) const;
+	virtual void	getPremadeAabb(btVector3* aabbMin, btVector3* aabbMax ) const;
+
+}
+;
+
+#endif //BT_TRIANGLE_INDEX_VERTEX_ARRAY_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexMaterialArray.cpp b/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexMaterialArray.cpp
new file mode 100644
index 00000000..dc562941
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexMaterialArray.cpp
@@ -0,0 +1,86 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///This file was created by Alex Silverman
+
+#include "btTriangleIndexVertexMaterialArray.h"
+
+btTriangleIndexVertexMaterialArray::btTriangleIndexVertexMaterialArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,
+                                   int numVertices,btScalar* vertexBase,int vertexStride,
+                                   int numMaterials, unsigned char* materialBase, int materialStride,
+                                   int* triangleMaterialsBase, int materialIndexStride) :
+btTriangleIndexVertexArray(numTriangles, triangleIndexBase, triangleIndexStride, numVertices, vertexBase, vertexStride)
+{
+    btMaterialProperties mat;
+
+    mat.m_numMaterials = numMaterials;
+    mat.m_materialBase = materialBase;
+    mat.m_materialStride = materialStride;
+#ifdef BT_USE_DOUBLE_PRECISION
+    mat.m_materialType = PHY_DOUBLE;
+#else
+    mat.m_materialType = PHY_FLOAT;
+#endif
+
+    mat.m_numTriangles = numTriangles;
+    mat.m_triangleMaterialsBase = (unsigned char *)triangleMaterialsBase;
+    mat.m_triangleMaterialStride = materialIndexStride;
+    mat.m_triangleType = PHY_INTEGER;
+
+    addMaterialProperties(mat);
+}
+
+
+void btTriangleIndexVertexMaterialArray::getLockedMaterialBase(unsigned char **materialBase, int& numMaterials, PHY_ScalarType& materialType, int& materialStride,
+                                   unsigned char ** triangleMaterialBase, int& numTriangles, int& triangleMaterialStride, PHY_ScalarType& triangleType, int subpart)
+{
+    btAssert(subpart< getNumSubParts() );
+
+    btMaterialProperties& mats = m_materials[subpart];
+
+    numMaterials = mats.m_numMaterials;
+    (*materialBase) = (unsigned char *) mats.m_materialBase;
+#ifdef BT_USE_DOUBLE_PRECISION
+    materialType = PHY_DOUBLE;
+#else
+    materialType = PHY_FLOAT;
+#endif
+    materialStride = mats.m_materialStride;
+
+    numTriangles = mats.m_numTriangles;
+    (*triangleMaterialBase) = (unsigned char *)mats.m_triangleMaterialsBase;
+    triangleMaterialStride = mats.m_triangleMaterialStride;
+    triangleType = mats.m_triangleType;
+}
+
+void btTriangleIndexVertexMaterialArray::getLockedReadOnlyMaterialBase(const unsigned char **materialBase, int& numMaterials, PHY_ScalarType& materialType, int& materialStride,
+                                           const unsigned char ** triangleMaterialBase, int& numTriangles, int& triangleMaterialStride, PHY_ScalarType& triangleType, int subpart)
+{
+    btMaterialProperties& mats = m_materials[subpart];
+
+    numMaterials = mats.m_numMaterials;
+    (*materialBase) = (const unsigned char *) mats.m_materialBase;
+#ifdef BT_USE_DOUBLE_PRECISION
+    materialType = PHY_DOUBLE;
+#else
+    materialType = PHY_FLOAT;
+#endif
+    materialStride = mats.m_materialStride;
+
+    numTriangles = mats.m_numTriangles;
+    (*triangleMaterialBase) = (const unsigned char *)mats.m_triangleMaterialsBase;
+    triangleMaterialStride = mats.m_triangleMaterialStride;
+    triangleType = mats.m_triangleType;
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexMaterialArray.h b/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexMaterialArray.h
new file mode 100644
index 00000000..ba4f7b46
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleIndexVertexMaterialArray.h
@@ -0,0 +1,84 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///This file was created by Alex Silverman
+
+#ifndef BT_MULTIMATERIAL_TRIANGLE_INDEX_VERTEX_ARRAY_H
+#define BT_MULTIMATERIAL_TRIANGLE_INDEX_VERTEX_ARRAY_H
+
+#include "btTriangleIndexVertexArray.h"
+
+
+ATTRIBUTE_ALIGNED16( struct)	btMaterialProperties
+{
+    ///m_materialBase ==========> 2 btScalar values make up one material, friction then restitution
+    int m_numMaterials;
+    const unsigned char * m_materialBase;
+    int m_materialStride;
+    PHY_ScalarType m_materialType;
+    ///m_numTriangles <=========== This exists in the btIndexedMesh object for the same subpart, but since we're
+    ///                           padding the structure, it can be reproduced at no real cost
+    ///m_triangleMaterials =====> 1 integer value makes up one entry
+    ///                           eg: m_triangleMaterials[1] = 5; // This will set triangle 2 to use material 5
+    int m_numTriangles; 
+    const unsigned char * m_triangleMaterialsBase;
+    int m_triangleMaterialStride;
+    ///m_triangleType <========== Automatically set in addMaterialProperties
+    PHY_ScalarType m_triangleType;
+};
+
+typedef btAlignedObjectArray<btMaterialProperties>	MaterialArray;
+
+///Teh btTriangleIndexVertexMaterialArray is built on TriangleIndexVertexArray
+///The addition of a material array allows for the utilization of the partID and
+///triangleIndex that are returned in the ContactAddedCallback.  As with
+///TriangleIndexVertexArray, no duplicate is made of the material data, so it
+///is the users responsibility to maintain the array during the lifetime of the
+///TriangleIndexVertexMaterialArray.
+ATTRIBUTE_ALIGNED16(class) btTriangleIndexVertexMaterialArray : public btTriangleIndexVertexArray
+{
+protected:
+    MaterialArray       m_materials;
+		
+public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+    btTriangleIndexVertexMaterialArray()
+	{
+	}
+
+    btTriangleIndexVertexMaterialArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,
+        int numVertices,btScalar* vertexBase,int vertexStride,
+        int numMaterials, unsigned char* materialBase, int materialStride,
+        int* triangleMaterialsBase, int materialIndexStride);
+
+    virtual ~btTriangleIndexVertexMaterialArray() {}
+
+    void	addMaterialProperties(const btMaterialProperties& mat, PHY_ScalarType triangleType = PHY_INTEGER)
+    {
+        m_materials.push_back(mat);
+        m_materials[m_materials.size()-1].m_triangleType = triangleType;
+    }
+
+    virtual void getLockedMaterialBase(unsigned char **materialBase, int& numMaterials, PHY_ScalarType& materialType, int& materialStride,
+        unsigned char ** triangleMaterialBase, int& numTriangles, int& triangleMaterialStride, PHY_ScalarType& triangleType ,int subpart = 0);
+
+    virtual void getLockedReadOnlyMaterialBase(const unsigned char **materialBase, int& numMaterials, PHY_ScalarType& materialType, int& materialStride,
+        const unsigned char ** triangleMaterialBase, int& numTriangles, int& triangleMaterialStride, PHY_ScalarType& triangleType, int subpart = 0);
+
+}
+;
+
+#endif //BT_MULTIMATERIAL_TRIANGLE_INDEX_VERTEX_ARRAY_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleInfoMap.h b/src/bullet/BulletCollision/CollisionShapes/btTriangleInfoMap.h
new file mode 100644
index 00000000..1cea7045
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleInfoMap.h
@@ -0,0 +1,241 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2010 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef _BT_TRIANGLE_INFO_MAP_H
+#define _BT_TRIANGLE_INFO_MAP_H
+
+
+#include "LinearMath/btHashMap.h"
+#include "LinearMath/btSerializer.h"
+
+
+///for btTriangleInfo m_flags
+#define TRI_INFO_V0V1_CONVEX 1
+#define TRI_INFO_V1V2_CONVEX 2
+#define TRI_INFO_V2V0_CONVEX 4
+
+#define TRI_INFO_V0V1_SWAP_NORMALB 8
+#define TRI_INFO_V1V2_SWAP_NORMALB 16
+#define TRI_INFO_V2V0_SWAP_NORMALB 32
+
+
+///The btTriangleInfo structure stores information to adjust collision normals to avoid collisions against internal edges
+///it can be generated using 
+struct	btTriangleInfo
+{
+	btTriangleInfo()
+	{
+		m_edgeV0V1Angle = SIMD_2_PI;
+		m_edgeV1V2Angle = SIMD_2_PI;
+		m_edgeV2V0Angle = SIMD_2_PI;
+		m_flags=0;
+	}
+
+	int			m_flags;
+
+	btScalar	m_edgeV0V1Angle;
+	btScalar	m_edgeV1V2Angle;
+	btScalar	m_edgeV2V0Angle;
+
+};
+
+typedef btHashMap<btHashInt,btTriangleInfo> btInternalTriangleInfoMap;
+
+
+///The btTriangleInfoMap stores edge angle information for some triangles. You can compute this information yourself or using btGenerateInternalEdgeInfo.
+struct	btTriangleInfoMap : public btInternalTriangleInfoMap
+{
+	btScalar	m_convexEpsilon;///used to determine if an edge or contact normal is convex, using the dot product
+	btScalar	m_planarEpsilon; ///used to determine if a triangle edge is planar with zero angle
+	btScalar	m_equalVertexThreshold; ///used to compute connectivity: if the distance between two vertices is smaller than m_equalVertexThreshold, they are considered to be 'shared'
+	btScalar	m_edgeDistanceThreshold; ///used to determine edge contacts: if the closest distance between a contact point and an edge is smaller than this distance threshold it is considered to "hit the edge"
+	btScalar	m_maxEdgeAngleThreshold; //ignore edges that connect triangles at an angle larger than this m_maxEdgeAngleThreshold
+	btScalar	m_zeroAreaThreshold; ///used to determine if a triangle is degenerate (length squared of cross product of 2 triangle edges < threshold)
+	
+	
+	btTriangleInfoMap()
+	{
+		m_convexEpsilon = 0.00f;
+		m_planarEpsilon = 0.0001f;
+		m_equalVertexThreshold = btScalar(0.0001)*btScalar(0.0001);
+		m_edgeDistanceThreshold = btScalar(0.1);
+		m_zeroAreaThreshold = btScalar(0.0001)*btScalar(0.0001);
+		m_maxEdgeAngleThreshold = SIMD_2_PI;
+	}
+	virtual ~btTriangleInfoMap() {}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+	void	deSerialize(struct btTriangleInfoMapData& data);
+
+};
+
+///those fields have to be float and not btScalar for the serialization to work properly
+struct	btTriangleInfoData
+{
+	int			m_flags;
+	float	m_edgeV0V1Angle;
+	float	m_edgeV1V2Angle;
+	float	m_edgeV2V0Angle;
+};
+
+struct	btTriangleInfoMapData
+{
+	int					*m_hashTablePtr;
+	int					*m_nextPtr;
+	btTriangleInfoData	*m_valueArrayPtr;
+	int					*m_keyArrayPtr;
+
+	float	m_convexEpsilon;
+	float	m_planarEpsilon;
+	float	m_equalVertexThreshold; 
+	float	m_edgeDistanceThreshold;
+	float	m_zeroAreaThreshold;
+
+	int		m_nextSize;
+	int		m_hashTableSize;
+	int		m_numValues;
+	int		m_numKeys;
+	char	m_padding[4];
+};
+
+SIMD_FORCE_INLINE	int	btTriangleInfoMap::calculateSerializeBufferSize() const
+{
+	return sizeof(btTriangleInfoMapData);
+}
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btTriangleInfoMap::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btTriangleInfoMapData* tmapData = (btTriangleInfoMapData*) dataBuffer;
+	tmapData->m_convexEpsilon = m_convexEpsilon;
+	tmapData->m_planarEpsilon = m_planarEpsilon;
+	tmapData->m_equalVertexThreshold = m_equalVertexThreshold;
+	tmapData->m_edgeDistanceThreshold = m_edgeDistanceThreshold;
+	tmapData->m_zeroAreaThreshold = m_zeroAreaThreshold;
+	
+	tmapData->m_hashTableSize = m_hashTable.size();
+
+	tmapData->m_hashTablePtr = tmapData->m_hashTableSize ? (int*)serializer->getUniquePointer((void*)&m_hashTable[0]) : 0;
+	if (tmapData->m_hashTablePtr)
+	{ 
+		//serialize an int buffer
+		int sz = sizeof(int);
+		int numElem = tmapData->m_hashTableSize;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		int* memPtr = (int*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			*memPtr = m_hashTable[i];
+		}
+		serializer->finalizeChunk(chunk,"int",BT_ARRAY_CODE,(void*)&m_hashTable[0]);
+
+	}
+
+	tmapData->m_nextSize = m_next.size();
+	tmapData->m_nextPtr = tmapData->m_nextSize? (int*)serializer->getUniquePointer((void*)&m_next[0]): 0;
+	if (tmapData->m_nextPtr)
+	{
+		int sz = sizeof(int);
+		int numElem = tmapData->m_nextSize;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		int* memPtr = (int*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			*memPtr = m_next[i];
+		}
+		serializer->finalizeChunk(chunk,"int",BT_ARRAY_CODE,(void*)&m_next[0]);
+	}
+	
+	tmapData->m_numValues = m_valueArray.size();
+	tmapData->m_valueArrayPtr = tmapData->m_numValues ? (btTriangleInfoData*)serializer->getUniquePointer((void*)&m_valueArray[0]): 0;
+	if (tmapData->m_valueArrayPtr)
+	{
+		int sz = sizeof(btTriangleInfoData);
+		int numElem = tmapData->m_numValues;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btTriangleInfoData* memPtr = (btTriangleInfoData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_edgeV0V1Angle = m_valueArray[i].m_edgeV0V1Angle;
+			memPtr->m_edgeV1V2Angle = m_valueArray[i].m_edgeV1V2Angle;
+			memPtr->m_edgeV2V0Angle = m_valueArray[i].m_edgeV2V0Angle;
+			memPtr->m_flags = m_valueArray[i].m_flags;
+		}
+		serializer->finalizeChunk(chunk,"btTriangleInfoData",BT_ARRAY_CODE,(void*) &m_valueArray[0]);
+	}
+	
+	tmapData->m_numKeys = m_keyArray.size();
+	tmapData->m_keyArrayPtr = tmapData->m_numKeys ? (int*)serializer->getUniquePointer((void*)&m_keyArray[0]) : 0;
+	if (tmapData->m_keyArrayPtr)
+	{
+		int sz = sizeof(int);
+		int numElem = tmapData->m_numValues;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		int* memPtr = (int*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			*memPtr = m_keyArray[i].getUid1();
+		}
+		serializer->finalizeChunk(chunk,"int",BT_ARRAY_CODE,(void*) &m_keyArray[0]);
+
+	}
+	return "btTriangleInfoMapData";
+}
+
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	void	btTriangleInfoMap::deSerialize(btTriangleInfoMapData& tmapData )
+{
+
+
+	m_convexEpsilon = tmapData.m_convexEpsilon;
+	m_planarEpsilon = tmapData.m_planarEpsilon;
+	m_equalVertexThreshold = tmapData.m_equalVertexThreshold;
+	m_edgeDistanceThreshold = tmapData.m_edgeDistanceThreshold;
+	m_zeroAreaThreshold = tmapData.m_zeroAreaThreshold;
+	m_hashTable.resize(tmapData.m_hashTableSize);
+	int i =0;
+	for (i=0;i<tmapData.m_hashTableSize;i++)
+	{
+		m_hashTable[i] = tmapData.m_hashTablePtr[i];
+	}
+	m_next.resize(tmapData.m_nextSize);
+	for (i=0;i<tmapData.m_nextSize;i++)
+	{
+		m_next[i] = tmapData.m_nextPtr[i];
+	}
+	m_valueArray.resize(tmapData.m_numValues);
+	for (i=0;i<tmapData.m_numValues;i++)
+	{
+		m_valueArray[i].m_edgeV0V1Angle = tmapData.m_valueArrayPtr[i].m_edgeV0V1Angle;
+		m_valueArray[i].m_edgeV1V2Angle = tmapData.m_valueArrayPtr[i].m_edgeV1V2Angle;
+		m_valueArray[i].m_edgeV2V0Angle = tmapData.m_valueArrayPtr[i].m_edgeV2V0Angle;
+		m_valueArray[i].m_flags = tmapData.m_valueArrayPtr[i].m_flags;
+	}
+	
+	m_keyArray.resize(tmapData.m_numKeys,btHashInt(0));
+	for (i=0;i<tmapData.m_numKeys;i++)
+	{
+		m_keyArray[i].setUid1(tmapData.m_keyArrayPtr[i]);
+	}
+}
+
+
+#endif //_BT_TRIANGLE_INFO_MAP_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleMesh.cpp b/src/bullet/BulletCollision/CollisionShapes/btTriangleMesh.cpp
new file mode 100644
index 00000000..b29e0f71
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleMesh.cpp
@@ -0,0 +1,140 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btTriangleMesh.h"
+
+
+
+btTriangleMesh::btTriangleMesh (bool use32bitIndices,bool use4componentVertices)
+:m_use32bitIndices(use32bitIndices),
+m_use4componentVertices(use4componentVertices),
+m_weldingThreshold(0.0)
+{
+	btIndexedMesh meshIndex;
+	meshIndex.m_numTriangles = 0;
+	meshIndex.m_numVertices = 0;
+	meshIndex.m_indexType = PHY_INTEGER;
+	meshIndex.m_triangleIndexBase = 0;
+	meshIndex.m_triangleIndexStride = 3*sizeof(int);
+	meshIndex.m_vertexBase = 0;
+	meshIndex.m_vertexStride = sizeof(btVector3);
+	m_indexedMeshes.push_back(meshIndex);
+
+	if (m_use32bitIndices)
+	{
+		m_indexedMeshes[0].m_numTriangles = m_32bitIndices.size()/3;
+		m_indexedMeshes[0].m_triangleIndexBase = 0;
+		m_indexedMeshes[0].m_indexType = PHY_INTEGER;
+		m_indexedMeshes[0].m_triangleIndexStride = 3*sizeof(int);
+	} else
+	{
+		m_indexedMeshes[0].m_numTriangles = m_16bitIndices.size()/3;
+		m_indexedMeshes[0].m_triangleIndexBase = 0;
+		m_indexedMeshes[0].m_indexType = PHY_SHORT;
+		m_indexedMeshes[0].m_triangleIndexStride = 3*sizeof(short int);
+	}
+
+	if (m_use4componentVertices)
+	{
+		m_indexedMeshes[0].m_numVertices = m_4componentVertices.size();
+		m_indexedMeshes[0].m_vertexBase = 0;
+		m_indexedMeshes[0].m_vertexStride = sizeof(btVector3);
+	} else
+	{
+		m_indexedMeshes[0].m_numVertices = m_3componentVertices.size()/3;
+		m_indexedMeshes[0].m_vertexBase = 0;
+		m_indexedMeshes[0].m_vertexStride = 3*sizeof(btScalar);
+	}
+
+
+}
+
+void	btTriangleMesh::addIndex(int index)
+{
+	if (m_use32bitIndices)
+	{
+		m_32bitIndices.push_back(index);
+		m_indexedMeshes[0].m_triangleIndexBase = (unsigned char*) &m_32bitIndices[0];
+	} else
+	{
+		m_16bitIndices.push_back(index);
+		m_indexedMeshes[0].m_triangleIndexBase = (unsigned char*) &m_16bitIndices[0];
+	}
+}
+
+
+int	btTriangleMesh::findOrAddVertex(const btVector3& vertex, bool removeDuplicateVertices)
+{
+	//return index of new/existing vertex
+	///@todo: could use acceleration structure for this
+	if (m_use4componentVertices)
+	{
+		if (removeDuplicateVertices)
+			{
+			for (int i=0;i< m_4componentVertices.size();i++)
+			{
+				if ((m_4componentVertices[i]-vertex).length2() <= m_weldingThreshold)
+				{
+					return i;
+				}
+			}
+		}
+		m_indexedMeshes[0].m_numVertices++;
+		m_4componentVertices.push_back(vertex);
+		m_indexedMeshes[0].m_vertexBase = (unsigned char*)&m_4componentVertices[0];
+
+		return m_4componentVertices.size()-1;
+		
+	} else
+	{
+		
+		if (removeDuplicateVertices)
+		{
+			for (int i=0;i< m_3componentVertices.size();i+=3)
+			{
+				btVector3 vtx(m_3componentVertices[i],m_3componentVertices[i+1],m_3componentVertices[i+2]);
+				if ((vtx-vertex).length2() <= m_weldingThreshold)
+				{
+					return i/3;
+				}
+			}
+	}
+		m_3componentVertices.push_back((float)vertex.getX());
+		m_3componentVertices.push_back((float)vertex.getY());
+		m_3componentVertices.push_back((float)vertex.getZ());
+		m_indexedMeshes[0].m_numVertices++;
+		m_indexedMeshes[0].m_vertexBase = (unsigned char*)&m_3componentVertices[0];
+		return (m_3componentVertices.size()/3)-1;
+	}
+
+}
+		
+void	btTriangleMesh::addTriangle(const btVector3& vertex0,const btVector3& vertex1,const btVector3& vertex2,bool removeDuplicateVertices)
+{
+	m_indexedMeshes[0].m_numTriangles++;
+	addIndex(findOrAddVertex(vertex0,removeDuplicateVertices));
+	addIndex(findOrAddVertex(vertex1,removeDuplicateVertices));
+	addIndex(findOrAddVertex(vertex2,removeDuplicateVertices));
+}
+
+int btTriangleMesh::getNumTriangles() const
+{
+	if (m_use32bitIndices)
+	{
+		return m_32bitIndices.size() / 3;
+	}
+	return m_16bitIndices.size() / 3;
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleMesh.h b/src/bullet/BulletCollision/CollisionShapes/btTriangleMesh.h
new file mode 100644
index 00000000..f623157f
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleMesh.h
@@ -0,0 +1,69 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_TRIANGLE_MESH_H
+#define BT_TRIANGLE_MESH_H
+
+#include "btTriangleIndexVertexArray.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+///The btTriangleMesh class is a convenience class derived from btTriangleIndexVertexArray, that provides storage for a concave triangle mesh. It can be used as data for the btBvhTriangleMeshShape.
+///It allows either 32bit or 16bit indices, and 4 (x-y-z-w) or 3 (x-y-z) component vertices.
+///If you want to share triangle/index data between graphics mesh and collision mesh (btBvhTriangleMeshShape), you can directly use btTriangleIndexVertexArray or derive your own class from btStridingMeshInterface.
+///Performance of btTriangleMesh and btTriangleIndexVertexArray used in a btBvhTriangleMeshShape is the same.
+class btTriangleMesh : public btTriangleIndexVertexArray
+{
+	btAlignedObjectArray<btVector3>	m_4componentVertices;
+	btAlignedObjectArray<float>		m_3componentVertices;
+
+	btAlignedObjectArray<unsigned int>		m_32bitIndices;
+	btAlignedObjectArray<unsigned short int>		m_16bitIndices;
+	bool	m_use32bitIndices;
+	bool	m_use4componentVertices;
+	
+
+	public:
+		btScalar	m_weldingThreshold;
+
+		btTriangleMesh (bool use32bitIndices=true,bool use4componentVertices=true);
+
+		bool	getUse32bitIndices() const
+		{
+			return m_use32bitIndices;
+		}
+
+		bool	getUse4componentVertices() const
+		{
+			return m_use4componentVertices;
+		}
+		///By default addTriangle won't search for duplicate vertices, because the search is very slow for large triangle meshes.
+		///In general it is better to directly use btTriangleIndexVertexArray instead.
+		void	addTriangle(const btVector3& vertex0,const btVector3& vertex1,const btVector3& vertex2, bool removeDuplicateVertices=false);
+		
+		int getNumTriangles() const;
+
+		virtual void	preallocateVertices(int numverts){(void) numverts;}
+		virtual void	preallocateIndices(int numindices){(void) numindices;}
+
+		///findOrAddVertex is an internal method, use addTriangle instead
+		int		findOrAddVertex(const btVector3& vertex, bool removeDuplicateVertices);
+		///addIndex is an internal method, use addTriangle instead
+		void	addIndex(int index);
+		
+};
+
+#endif //BT_TRIANGLE_MESH_H
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleMeshShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btTriangleMeshShape.cpp
new file mode 100644
index 00000000..683684da
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleMeshShape.cpp
@@ -0,0 +1,211 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btTriangleMeshShape.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btQuaternion.h"
+#include "btStridingMeshInterface.h"
+#include "LinearMath/btAabbUtil2.h"
+#include "BulletCollision/CollisionShapes/btCollisionMargin.h"
+
+
+btTriangleMeshShape::btTriangleMeshShape(btStridingMeshInterface* meshInterface)
+: btConcaveShape (), m_meshInterface(meshInterface)
+{
+	m_shapeType = TRIANGLE_MESH_SHAPE_PROXYTYPE;
+	if(meshInterface->hasPremadeAabb())
+	{
+		meshInterface->getPremadeAabb(&m_localAabbMin, &m_localAabbMax);
+	}
+	else
+	{
+		recalcLocalAabb();
+	}
+}
+
+
+btTriangleMeshShape::~btTriangleMeshShape()
+{
+		
+}
+
+
+
+
+void btTriangleMeshShape::getAabb(const btTransform& trans,btVector3& aabbMin,btVector3& aabbMax) const
+{
+
+	btVector3 localHalfExtents = btScalar(0.5)*(m_localAabbMax-m_localAabbMin);
+	localHalfExtents += btVector3(getMargin(),getMargin(),getMargin());
+	btVector3 localCenter = btScalar(0.5)*(m_localAabbMax+m_localAabbMin);
+	
+	btMatrix3x3 abs_b = trans.getBasis().absolute();  
+
+	btVector3 center = trans(localCenter);
+
+	btVector3 extent = btVector3(abs_b[0].dot(localHalfExtents),
+		   abs_b[1].dot(localHalfExtents),
+		  abs_b[2].dot(localHalfExtents));
+	aabbMin = center - extent;
+	aabbMax = center + extent;
+
+
+}
+
+void	btTriangleMeshShape::recalcLocalAabb()
+{
+	for (int i=0;i<3;i++)
+	{
+		btVector3 vec(btScalar(0.),btScalar(0.),btScalar(0.));
+		vec[i] = btScalar(1.);
+		btVector3 tmp = localGetSupportingVertex(vec);
+		m_localAabbMax[i] = tmp[i]+m_collisionMargin;
+		vec[i] = btScalar(-1.);
+		tmp = localGetSupportingVertex(vec);
+		m_localAabbMin[i] = tmp[i]-m_collisionMargin;
+	}
+}
+
+
+
+class SupportVertexCallback : public btTriangleCallback
+{
+
+	btVector3 m_supportVertexLocal;
+public:
+
+	btTransform	m_worldTrans;
+	btScalar m_maxDot;
+	btVector3 m_supportVecLocal;
+
+	SupportVertexCallback(const btVector3& supportVecWorld,const btTransform& trans)
+		: m_supportVertexLocal(btScalar(0.),btScalar(0.),btScalar(0.)), m_worldTrans(trans) ,m_maxDot(btScalar(-BT_LARGE_FLOAT))
+		
+	{
+		m_supportVecLocal = supportVecWorld * m_worldTrans.getBasis();
+	}
+
+	virtual void processTriangle( btVector3* triangle,int partId, int triangleIndex)
+	{
+		(void)partId;
+		(void)triangleIndex;
+		for (int i=0;i<3;i++)
+		{
+			btScalar dot = m_supportVecLocal.dot(triangle[i]);
+			if (dot > m_maxDot)
+			{
+				m_maxDot = dot;
+				m_supportVertexLocal = triangle[i];
+			}
+		}
+	}
+
+	btVector3 GetSupportVertexWorldSpace()
+	{
+		return m_worldTrans(m_supportVertexLocal);
+	}
+
+	btVector3	GetSupportVertexLocal()
+	{
+		return m_supportVertexLocal;
+	}
+
+};
+
+	
+void btTriangleMeshShape::setLocalScaling(const btVector3& scaling)
+{
+	m_meshInterface->setScaling(scaling);
+	recalcLocalAabb();
+}
+
+const btVector3& btTriangleMeshShape::getLocalScaling() const
+{
+	return m_meshInterface->getScaling();
+}
+
+
+
+
+
+
+//#define DEBUG_TRIANGLE_MESH
+
+
+
+void	btTriangleMeshShape::processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+		struct FilteredCallback : public btInternalTriangleIndexCallback
+	{
+		btTriangleCallback* m_callback;
+		btVector3 m_aabbMin;
+		btVector3 m_aabbMax;
+
+		FilteredCallback(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax)
+			:m_callback(callback),
+			m_aabbMin(aabbMin),
+			m_aabbMax(aabbMax)
+		{
+		}
+
+		virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int triangleIndex)
+		{
+			if (TestTriangleAgainstAabb2(&triangle[0],m_aabbMin,m_aabbMax))
+			{
+				//check aabb in triangle-space, before doing this
+				m_callback->processTriangle(triangle,partId,triangleIndex);
+			}
+			
+		}
+
+	};
+
+	FilteredCallback filterCallback(callback,aabbMin,aabbMax);
+
+	m_meshInterface->InternalProcessAllTriangles(&filterCallback,aabbMin,aabbMax);
+}
+
+
+
+
+
+void	btTriangleMeshShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	(void)mass;
+	//moving concave objects not supported
+	btAssert(0);
+	inertia.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+}
+
+
+btVector3 btTriangleMeshShape::localGetSupportingVertex(const btVector3& vec) const
+{
+	btVector3 supportVertex;
+
+	btTransform ident;
+	ident.setIdentity();
+
+	SupportVertexCallback supportCallback(vec,ident);
+
+	btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+	
+	processAllTriangles(&supportCallback,-aabbMax,aabbMax);
+		
+	supportVertex = supportCallback.GetSupportVertexLocal();
+
+	return supportVertex;
+}
+
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleMeshShape.h b/src/bullet/BulletCollision/CollisionShapes/btTriangleMeshShape.h
new file mode 100644
index 00000000..c8caf8fe
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleMeshShape.h
@@ -0,0 +1,89 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_TRIANGLE_MESH_SHAPE_H
+#define BT_TRIANGLE_MESH_SHAPE_H
+
+#include "btConcaveShape.h"
+#include "btStridingMeshInterface.h"
+
+
+///The btTriangleMeshShape is an internal concave triangle mesh interface. Don't use this class directly, use btBvhTriangleMeshShape instead.
+class btTriangleMeshShape : public btConcaveShape
+{
+protected:
+	btVector3	m_localAabbMin;
+	btVector3	m_localAabbMax;
+	btStridingMeshInterface* m_meshInterface;
+
+	///btTriangleMeshShape constructor has been disabled/protected, so that users will not mistakenly use this class.
+	///Don't use btTriangleMeshShape but use btBvhTriangleMeshShape instead!
+	btTriangleMeshShape(btStridingMeshInterface* meshInterface);
+
+public:
+
+	virtual ~btTriangleMeshShape();
+
+	virtual btVector3 localGetSupportingVertex(const btVector3& vec) const;
+
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+	{
+		btAssert(0);
+		return localGetSupportingVertex(vec);
+	}
+
+	void	recalcLocalAabb();
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	virtual void	processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const;
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	virtual void	setLocalScaling(const btVector3& scaling);
+	virtual const btVector3& getLocalScaling() const;
+	
+	btStridingMeshInterface* getMeshInterface()
+	{
+		return m_meshInterface;
+	}
+
+	const btStridingMeshInterface* getMeshInterface() const
+	{
+		return m_meshInterface;
+	}
+
+	const btVector3& getLocalAabbMin() const
+	{
+		return m_localAabbMin;
+	}
+	const btVector3& getLocalAabbMax() const
+	{
+		return m_localAabbMax;
+	}
+
+
+
+	//debugging
+	virtual const char*	getName()const {return "TRIANGLEMESH";}
+
+	
+
+};
+
+
+
+
+#endif //BT_TRIANGLE_MESH_SHAPE_H
diff --git a/src/bullet/BulletCollision/CollisionShapes/btTriangleShape.h b/src/bullet/BulletCollision/CollisionShapes/btTriangleShape.h
new file mode 100644
index 00000000..71b05573
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btTriangleShape.h
@@ -0,0 +1,182 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_OBB_TRIANGLE_MINKOWSKI_H
+#define BT_OBB_TRIANGLE_MINKOWSKI_H
+
+#include "btConvexShape.h"
+#include "btBoxShape.h"
+
+ATTRIBUTE_ALIGNED16(class) btTriangleShape : public btPolyhedralConvexShape
+{
+
+
+public:
+
+	btVector3	m_vertices1[3];
+
+	virtual int getNumVertices() const
+	{
+		return 3;
+	}
+
+	btVector3& getVertexPtr(int index)
+	{
+		return m_vertices1[index];
+	}
+
+	const btVector3& getVertexPtr(int index) const
+	{
+		return m_vertices1[index];
+	}
+	virtual void getVertex(int index,btVector3& vert) const
+	{
+		vert = m_vertices1[index];
+	}
+
+	virtual int getNumEdges() const
+	{
+		return 3;
+	}
+	
+	virtual void getEdge(int i,btVector3& pa,btVector3& pb) const
+	{
+		getVertex(i,pa);
+		getVertex((i+1)%3,pb);
+	}
+
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax)const 
+	{
+//		btAssert(0);
+		getAabbSlow(t,aabbMin,aabbMax);
+	}
+
+	btVector3 localGetSupportingVertexWithoutMargin(const btVector3& dir)const 
+	{
+		btVector3 dots(dir.dot(m_vertices1[0]), dir.dot(m_vertices1[1]), dir.dot(m_vertices1[2]));
+	  	return m_vertices1[dots.maxAxis()];
+
+	}
+
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+	{
+		for (int i=0;i<numVectors;i++)
+		{
+			const btVector3& dir = vectors[i];
+			btVector3 dots(dir.dot(m_vertices1[0]), dir.dot(m_vertices1[1]), dir.dot(m_vertices1[2]));
+  			supportVerticesOut[i] = m_vertices1[dots.maxAxis()];
+		}
+
+	}
+
+	btTriangleShape() : btPolyhedralConvexShape ()
+    {
+		m_shapeType = TRIANGLE_SHAPE_PROXYTYPE;
+	}
+
+	btTriangleShape(const btVector3& p0,const btVector3& p1,const btVector3& p2) : btPolyhedralConvexShape ()
+    {
+		m_shapeType = TRIANGLE_SHAPE_PROXYTYPE;
+        m_vertices1[0] = p0;
+        m_vertices1[1] = p1;
+        m_vertices1[2] = p2;
+    }
+
+
+	virtual void getPlane(btVector3& planeNormal,btVector3& planeSupport,int i) const
+	{
+		getPlaneEquation(i,planeNormal,planeSupport);
+	}
+
+	virtual int	getNumPlanes() const
+	{
+		return 1;
+	}
+
+	void calcNormal(btVector3& normal) const
+	{
+		normal = (m_vertices1[1]-m_vertices1[0]).cross(m_vertices1[2]-m_vertices1[0]);
+		normal.normalize();
+	}
+
+	virtual void getPlaneEquation(int i, btVector3& planeNormal,btVector3& planeSupport) const
+	{
+		(void)i;
+		calcNormal(planeNormal);
+		planeSupport = m_vertices1[0];
+	}
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const
+	{
+		(void)mass;
+		btAssert(0);
+		inertia.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+	}
+
+		virtual	bool isInside(const btVector3& pt,btScalar tolerance) const
+	{
+		btVector3 normal;
+		calcNormal(normal);
+		//distance to plane
+		btScalar dist = pt.dot(normal);
+		btScalar planeconst = m_vertices1[0].dot(normal);
+		dist -= planeconst;
+		if (dist >= -tolerance && dist <= tolerance)
+		{
+			//inside check on edge-planes
+			int i;
+			for (i=0;i<3;i++)
+			{
+				btVector3 pa,pb;
+				getEdge(i,pa,pb);
+				btVector3 edge = pb-pa;
+				btVector3 edgeNormal = edge.cross(normal);
+				edgeNormal.normalize();
+				btScalar dist = pt.dot( edgeNormal);
+				btScalar edgeConst = pa.dot(edgeNormal);
+				dist -= edgeConst;
+				if (dist < -tolerance)
+					return false;
+			}
+			
+			return true;
+		}
+
+		return false;
+	}
+		//debugging
+		virtual const char*	getName()const
+		{
+			return "Triangle";
+		}
+
+		virtual int		getNumPreferredPenetrationDirections() const
+		{
+			return 2;
+		}
+		
+		virtual void	getPreferredPenetrationDirection(int index, btVector3& penetrationVector) const
+		{
+			calcNormal(penetrationVector);
+			if (index)
+				penetrationVector *= btScalar(-1.);
+		}
+
+
+};
+
+#endif //BT_OBB_TRIANGLE_MINKOWSKI_H
+
diff --git a/src/bullet/BulletCollision/CollisionShapes/btUniformScalingShape.cpp b/src/bullet/BulletCollision/CollisionShapes/btUniformScalingShape.cpp
new file mode 100644
index 00000000..b148bbd9
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btUniformScalingShape.cpp
@@ -0,0 +1,160 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btUniformScalingShape.h"
+
+btUniformScalingShape::btUniformScalingShape(	btConvexShape* convexChildShape,btScalar uniformScalingFactor):
+btConvexShape (), m_childConvexShape(convexChildShape),
+m_uniformScalingFactor(uniformScalingFactor)
+{
+	m_shapeType = UNIFORM_SCALING_SHAPE_PROXYTYPE;
+}
+	
+btUniformScalingShape::~btUniformScalingShape()
+{
+}
+	
+
+btVector3	btUniformScalingShape::localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+{
+	btVector3 tmpVertex;
+	tmpVertex = m_childConvexShape->localGetSupportingVertexWithoutMargin(vec);
+	return tmpVertex*m_uniformScalingFactor;
+}
+
+void	btUniformScalingShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+{
+	m_childConvexShape->batchedUnitVectorGetSupportingVertexWithoutMargin(vectors,supportVerticesOut,numVectors);
+	int i;
+	for (i=0;i<numVectors;i++)
+	{
+		supportVerticesOut[i] = supportVerticesOut[i] * m_uniformScalingFactor;
+	}
+}
+
+
+btVector3	btUniformScalingShape::localGetSupportingVertex(const btVector3& vec)const
+{
+	btVector3 tmpVertex;
+	tmpVertex = m_childConvexShape->localGetSupportingVertex(vec);
+	return tmpVertex*m_uniformScalingFactor;
+}
+
+
+void	btUniformScalingShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+
+	///this linear upscaling is not realistic, but we don't deal with large mass ratios...
+	btVector3 tmpInertia;
+	m_childConvexShape->calculateLocalInertia(mass,tmpInertia);
+	inertia = tmpInertia * m_uniformScalingFactor;
+}
+
+
+	///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
+void btUniformScalingShape::getAabb(const btTransform& trans,btVector3& aabbMin,btVector3& aabbMax) const
+{
+	getAabbSlow(trans,aabbMin,aabbMax);
+
+}
+
+void btUniformScalingShape::getAabbSlow(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+{
+#if 1
+	btVector3 _directions[] =
+	{
+		btVector3( 1.,  0.,  0.),
+		btVector3( 0.,  1.,  0.),
+		btVector3( 0.,  0.,  1.),
+		btVector3( -1., 0.,  0.),
+		btVector3( 0., -1.,  0.),
+		btVector3( 0.,  0., -1.)
+	};
+	
+	btVector3 _supporting[] =
+	{
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.)
+	};
+
+	for (int i=0;i<6;i++)
+	{
+		_directions[i] = _directions[i]*t.getBasis();
+	}
+	
+	batchedUnitVectorGetSupportingVertexWithoutMargin(_directions, _supporting, 6);
+	
+	btVector3 aabbMin1(0,0,0),aabbMax1(0,0,0);
+
+	for ( int i = 0; i < 3; ++i )
+	{
+		aabbMax1[i] = t(_supporting[i])[i];
+		aabbMin1[i] = t(_supporting[i + 3])[i];
+	}
+	btVector3 marginVec(getMargin(),getMargin(),getMargin());
+	aabbMin = aabbMin1-marginVec;
+	aabbMax = aabbMax1+marginVec;
+	
+#else
+
+	btScalar margin = getMargin();
+	for (int i=0;i<3;i++)
+	{
+		btVector3 vec(btScalar(0.),btScalar(0.),btScalar(0.));
+		vec[i] = btScalar(1.);
+		btVector3 sv = localGetSupportingVertex(vec*t.getBasis());
+		btVector3 tmp = t(sv);
+		aabbMax[i] = tmp[i]+margin;
+		vec[i] = btScalar(-1.);
+		sv = localGetSupportingVertex(vec*t.getBasis());
+		tmp = t(sv);
+		aabbMin[i] = tmp[i]-margin;
+	}
+
+#endif
+}
+
+void	btUniformScalingShape::setLocalScaling(const btVector3& scaling) 
+{
+	m_childConvexShape->setLocalScaling(scaling);
+}
+
+const btVector3& btUniformScalingShape::getLocalScaling() const
+{
+	return m_childConvexShape->getLocalScaling();
+}
+
+void	btUniformScalingShape::setMargin(btScalar margin)
+{
+	m_childConvexShape->setMargin(margin);
+}
+btScalar	btUniformScalingShape::getMargin() const
+{
+	return m_childConvexShape->getMargin() * m_uniformScalingFactor;
+}
+
+int		btUniformScalingShape::getNumPreferredPenetrationDirections() const
+{
+	return m_childConvexShape->getNumPreferredPenetrationDirections();
+}
+	
+void	btUniformScalingShape::getPreferredPenetrationDirection(int index, btVector3& penetrationVector) const
+{
+	m_childConvexShape->getPreferredPenetrationDirection(index,penetrationVector);
+}
diff --git a/src/bullet/BulletCollision/CollisionShapes/btUniformScalingShape.h b/src/bullet/BulletCollision/CollisionShapes/btUniformScalingShape.h
new file mode 100644
index 00000000..cbf7e6fd
--- /dev/null
+++ b/src/bullet/BulletCollision/CollisionShapes/btUniformScalingShape.h
@@ -0,0 +1,87 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_UNIFORM_SCALING_SHAPE_H
+#define BT_UNIFORM_SCALING_SHAPE_H
+
+#include "btConvexShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
+
+///The btUniformScalingShape allows to re-use uniform scaled instances of btConvexShape in a memory efficient way.
+///Istead of using btUniformScalingShape, it is better to use the non-uniform setLocalScaling method on convex shapes that implement it.
+class btUniformScalingShape : public btConvexShape
+{
+	btConvexShape*	m_childConvexShape;
+
+	btScalar	m_uniformScalingFactor;
+	
+	public:
+	
+	btUniformScalingShape(	btConvexShape* convexChildShape, btScalar uniformScalingFactor);
+	
+	virtual ~btUniformScalingShape();
+	
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
+
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec)const;
+
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	btScalar	getUniformScalingFactor() const
+	{
+		return m_uniformScalingFactor;
+	}
+
+	btConvexShape*	getChildShape() 
+	{
+		return m_childConvexShape;
+	}
+
+	const btConvexShape*	getChildShape() const
+	{
+		return m_childConvexShape;
+	}
+
+	virtual const char*	getName()const 
+	{
+		return "UniformScalingShape";
+	}
+	
+
+
+	///////////////////////////
+
+
+	///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
+	void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	virtual void getAabbSlow(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
+
+	virtual void	setLocalScaling(const btVector3& scaling) ;
+	virtual const btVector3& getLocalScaling() const ;
+
+	virtual void	setMargin(btScalar margin);
+	virtual btScalar	getMargin() const;
+
+	virtual int		getNumPreferredPenetrationDirections() const;
+	
+	virtual void	getPreferredPenetrationDirection(int index, btVector3& penetrationVector) const;
+
+
+};
+
+#endif //BT_UNIFORM_SCALING_SHAPE_H
diff --git a/src/bullet/BulletCollision/Gimpact/btBoxCollision.h b/src/bullet/BulletCollision/Gimpact/btBoxCollision.h
new file mode 100644
index 00000000..d5676aaa
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btBoxCollision.h
@@ -0,0 +1,647 @@
+#ifndef BT_BOX_COLLISION_H_INCLUDED
+#define BT_BOX_COLLISION_H_INCLUDED
+
+/*! \file gim_box_collision.h
+\author Francisco Leon Najera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "LinearMath/btTransform.h"
+
+
+///Swap numbers
+#define BT_SWAP_NUMBERS(a,b){ \
+    a = a+b; \
+    b = a-b; \
+    a = a-b; \
+}\
+
+
+#define BT_MAX(a,b) (a<b?b:a)
+#define BT_MIN(a,b) (a>b?b:a)
+
+#define BT_GREATER(x, y)	btFabs(x) > (y)
+
+#define BT_MAX3(a,b,c) BT_MAX(a,BT_MAX(b,c))
+#define BT_MIN3(a,b,c) BT_MIN(a,BT_MIN(b,c))
+
+
+
+
+
+
+enum eBT_PLANE_INTERSECTION_TYPE
+{
+	BT_CONST_BACK_PLANE = 0,
+	BT_CONST_COLLIDE_PLANE,
+	BT_CONST_FRONT_PLANE
+};
+
+//SIMD_FORCE_INLINE bool test_cross_edge_box(
+//	const btVector3 & edge,
+//	const btVector3 & absolute_edge,
+//	const btVector3 & pointa,
+//	const btVector3 & pointb, const btVector3 & extend,
+//	int dir_index0,
+//	int dir_index1
+//	int component_index0,
+//	int component_index1)
+//{
+//	// dir coords are -z and y
+//
+//	const btScalar dir0 = -edge[dir_index0];
+//	const btScalar dir1 = edge[dir_index1];
+//	btScalar pmin = pointa[component_index0]*dir0 + pointa[component_index1]*dir1;
+//	btScalar pmax = pointb[component_index0]*dir0 + pointb[component_index1]*dir1;
+//	//find minmax
+//	if(pmin>pmax)
+//	{
+//		BT_SWAP_NUMBERS(pmin,pmax);
+//	}
+//	//find extends
+//	const btScalar rad = extend[component_index0] * absolute_edge[dir_index0] +
+//					extend[component_index1] * absolute_edge[dir_index1];
+//
+//	if(pmin>rad || -rad>pmax) return false;
+//	return true;
+//}
+//
+//SIMD_FORCE_INLINE bool test_cross_edge_box_X_axis(
+//	const btVector3 & edge,
+//	const btVector3 & absolute_edge,
+//	const btVector3 & pointa,
+//	const btVector3 & pointb, btVector3 & extend)
+//{
+//
+//	return test_cross_edge_box(edge,absolute_edge,pointa,pointb,extend,2,1,1,2);
+//}
+//
+//
+//SIMD_FORCE_INLINE bool test_cross_edge_box_Y_axis(
+//	const btVector3 & edge,
+//	const btVector3 & absolute_edge,
+//	const btVector3 & pointa,
+//	const btVector3 & pointb, btVector3 & extend)
+//{
+//
+//	return test_cross_edge_box(edge,absolute_edge,pointa,pointb,extend,0,2,2,0);
+//}
+//
+//SIMD_FORCE_INLINE bool test_cross_edge_box_Z_axis(
+//	const btVector3 & edge,
+//	const btVector3 & absolute_edge,
+//	const btVector3 & pointa,
+//	const btVector3 & pointb, btVector3 & extend)
+//{
+//
+//	return test_cross_edge_box(edge,absolute_edge,pointa,pointb,extend,1,0,0,1);
+//}
+
+
+#define TEST_CROSS_EDGE_BOX_MCR(edge,absolute_edge,pointa,pointb,_extend,i_dir_0,i_dir_1,i_comp_0,i_comp_1)\
+{\
+	const btScalar dir0 = -edge[i_dir_0];\
+	const btScalar dir1 = edge[i_dir_1];\
+	btScalar pmin = pointa[i_comp_0]*dir0 + pointa[i_comp_1]*dir1;\
+	btScalar pmax = pointb[i_comp_0]*dir0 + pointb[i_comp_1]*dir1;\
+	if(pmin>pmax)\
+	{\
+		BT_SWAP_NUMBERS(pmin,pmax); \
+	}\
+	const btScalar abs_dir0 = absolute_edge[i_dir_0];\
+	const btScalar abs_dir1 = absolute_edge[i_dir_1];\
+	const btScalar rad = _extend[i_comp_0] * abs_dir0 + _extend[i_comp_1] * abs_dir1;\
+	if(pmin>rad || -rad>pmax) return false;\
+}\
+
+
+#define TEST_CROSS_EDGE_BOX_X_AXIS_MCR(edge,absolute_edge,pointa,pointb,_extend)\
+{\
+	TEST_CROSS_EDGE_BOX_MCR(edge,absolute_edge,pointa,pointb,_extend,2,1,1,2);\
+}\
+
+#define TEST_CROSS_EDGE_BOX_Y_AXIS_MCR(edge,absolute_edge,pointa,pointb,_extend)\
+{\
+	TEST_CROSS_EDGE_BOX_MCR(edge,absolute_edge,pointa,pointb,_extend,0,2,2,0);\
+}\
+
+#define TEST_CROSS_EDGE_BOX_Z_AXIS_MCR(edge,absolute_edge,pointa,pointb,_extend)\
+{\
+	TEST_CROSS_EDGE_BOX_MCR(edge,absolute_edge,pointa,pointb,_extend,1,0,0,1);\
+}\
+
+
+//! Returns the dot product between a vec3f and the col of a matrix
+SIMD_FORCE_INLINE btScalar bt_mat3_dot_col(
+const btMatrix3x3 & mat, const btVector3 & vec3, int colindex)
+{
+	return vec3[0]*mat[0][colindex] + vec3[1]*mat[1][colindex] + vec3[2]*mat[2][colindex];
+}
+
+
+//!  Class for transforming a model1 to the space of model0
+ATTRIBUTE_ALIGNED16	(class) BT_BOX_BOX_TRANSFORM_CACHE
+{
+public:
+    btVector3  m_T1to0;//!< Transforms translation of model1 to model 0
+	btMatrix3x3 m_R1to0;//!< Transforms Rotation of model1 to model 0, equal  to R0' * R1
+	btMatrix3x3 m_AR;//!< Absolute value of m_R1to0
+
+	SIMD_FORCE_INLINE void calc_absolute_matrix()
+	{
+//		static const btVector3 vepsi(1e-6f,1e-6f,1e-6f);
+//		m_AR[0] = vepsi + m_R1to0[0].absolute();
+//		m_AR[1] = vepsi + m_R1to0[1].absolute();
+//		m_AR[2] = vepsi + m_R1to0[2].absolute();
+
+		int i,j;
+
+        for(i=0;i<3;i++)
+        {
+            for(j=0;j<3;j++ )
+            {
+            	m_AR[i][j] = 1e-6f + btFabs(m_R1to0[i][j]);
+            }
+        }
+
+	}
+
+	BT_BOX_BOX_TRANSFORM_CACHE()
+	{
+	}
+
+
+
+	//! Calc the transformation relative  1 to 0. Inverts matrics by transposing
+	SIMD_FORCE_INLINE void calc_from_homogenic(const btTransform & trans0,const btTransform & trans1)
+	{
+
+		btTransform temp_trans = trans0.inverse();
+		temp_trans = temp_trans * trans1;
+
+		m_T1to0 = temp_trans.getOrigin();
+		m_R1to0 = temp_trans.getBasis();
+
+
+		calc_absolute_matrix();
+	}
+
+	//! Calcs the full invertion of the matrices. Useful for scaling matrices
+	SIMD_FORCE_INLINE void calc_from_full_invert(const btTransform & trans0,const btTransform & trans1)
+	{
+		m_R1to0 = trans0.getBasis().inverse();
+		m_T1to0 = m_R1to0 * (-trans0.getOrigin());
+
+		m_T1to0 += m_R1to0*trans1.getOrigin();
+		m_R1to0 *= trans1.getBasis();
+
+		calc_absolute_matrix();
+	}
+
+	SIMD_FORCE_INLINE btVector3 transform(const btVector3 & point) const
+	{
+		return btVector3(m_R1to0[0].dot(point) + m_T1to0.x(),
+			m_R1to0[1].dot(point) + m_T1to0.y(),
+			m_R1to0[2].dot(point) + m_T1to0.z());
+	}
+};
+
+
+#define BOX_PLANE_EPSILON 0.000001f
+
+//! Axis aligned box
+ATTRIBUTE_ALIGNED16	(class) btAABB
+{
+public:
+	btVector3 m_min;
+	btVector3 m_max;
+
+	btAABB()
+	{}
+
+
+	btAABB(const btVector3 & V1,
+			 const btVector3 & V2,
+			 const btVector3 & V3)
+	{
+		m_min[0] = BT_MIN3(V1[0],V2[0],V3[0]);
+		m_min[1] = BT_MIN3(V1[1],V2[1],V3[1]);
+		m_min[2] = BT_MIN3(V1[2],V2[2],V3[2]);
+
+		m_max[0] = BT_MAX3(V1[0],V2[0],V3[0]);
+		m_max[1] = BT_MAX3(V1[1],V2[1],V3[1]);
+		m_max[2] = BT_MAX3(V1[2],V2[2],V3[2]);
+	}
+
+	btAABB(const btVector3 & V1,
+			 const btVector3 & V2,
+			 const btVector3 & V3,
+			 btScalar margin)
+	{
+		m_min[0] = BT_MIN3(V1[0],V2[0],V3[0]);
+		m_min[1] = BT_MIN3(V1[1],V2[1],V3[1]);
+		m_min[2] = BT_MIN3(V1[2],V2[2],V3[2]);
+
+		m_max[0] = BT_MAX3(V1[0],V2[0],V3[0]);
+		m_max[1] = BT_MAX3(V1[1],V2[1],V3[1]);
+		m_max[2] = BT_MAX3(V1[2],V2[2],V3[2]);
+
+		m_min[0] -= margin;
+		m_min[1] -= margin;
+		m_min[2] -= margin;
+		m_max[0] += margin;
+		m_max[1] += margin;
+		m_max[2] += margin;
+	}
+
+	btAABB(const btAABB &other):
+		m_min(other.m_min),m_max(other.m_max)
+	{
+	}
+
+	btAABB(const btAABB &other,btScalar margin ):
+		m_min(other.m_min),m_max(other.m_max)
+	{
+		m_min[0] -= margin;
+		m_min[1] -= margin;
+		m_min[2] -= margin;
+		m_max[0] += margin;
+		m_max[1] += margin;
+		m_max[2] += margin;
+	}
+
+	SIMD_FORCE_INLINE void invalidate()
+	{
+		m_min[0] = SIMD_INFINITY;
+		m_min[1] = SIMD_INFINITY;
+		m_min[2] = SIMD_INFINITY;
+		m_max[0] = -SIMD_INFINITY;
+		m_max[1] = -SIMD_INFINITY;
+		m_max[2] = -SIMD_INFINITY;
+	}
+
+	SIMD_FORCE_INLINE void increment_margin(btScalar margin)
+	{
+		m_min[0] -= margin;
+		m_min[1] -= margin;
+		m_min[2] -= margin;
+		m_max[0] += margin;
+		m_max[1] += margin;
+		m_max[2] += margin;
+	}
+
+	SIMD_FORCE_INLINE void copy_with_margin(const btAABB &other, btScalar margin)
+	{
+		m_min[0] = other.m_min[0] - margin;
+		m_min[1] = other.m_min[1] - margin;
+		m_min[2] = other.m_min[2] - margin;
+
+		m_max[0] = other.m_max[0] + margin;
+		m_max[1] = other.m_max[1] + margin;
+		m_max[2] = other.m_max[2] + margin;
+	}
+
+	template<typename CLASS_POINT>
+	SIMD_FORCE_INLINE void calc_from_triangle(
+							const CLASS_POINT & V1,
+							const CLASS_POINT & V2,
+							const CLASS_POINT & V3)
+	{
+		m_min[0] = BT_MIN3(V1[0],V2[0],V3[0]);
+		m_min[1] = BT_MIN3(V1[1],V2[1],V3[1]);
+		m_min[2] = BT_MIN3(V1[2],V2[2],V3[2]);
+
+		m_max[0] = BT_MAX3(V1[0],V2[0],V3[0]);
+		m_max[1] = BT_MAX3(V1[1],V2[1],V3[1]);
+		m_max[2] = BT_MAX3(V1[2],V2[2],V3[2]);
+	}
+
+	template<typename CLASS_POINT>
+	SIMD_FORCE_INLINE void calc_from_triangle_margin(
+							const CLASS_POINT & V1,
+							const CLASS_POINT & V2,
+							const CLASS_POINT & V3, btScalar margin)
+	{
+		m_min[0] = BT_MIN3(V1[0],V2[0],V3[0]);
+		m_min[1] = BT_MIN3(V1[1],V2[1],V3[1]);
+		m_min[2] = BT_MIN3(V1[2],V2[2],V3[2]);
+
+		m_max[0] = BT_MAX3(V1[0],V2[0],V3[0]);
+		m_max[1] = BT_MAX3(V1[1],V2[1],V3[1]);
+		m_max[2] = BT_MAX3(V1[2],V2[2],V3[2]);
+
+		m_min[0] -= margin;
+		m_min[1] -= margin;
+		m_min[2] -= margin;
+		m_max[0] += margin;
+		m_max[1] += margin;
+		m_max[2] += margin;
+	}
+
+	//! Apply a transform to an AABB
+	SIMD_FORCE_INLINE void appy_transform(const btTransform & trans)
+	{
+		btVector3 center = (m_max+m_min)*0.5f;
+		btVector3 extends = m_max - center;
+		// Compute new center
+		center = trans(center);
+
+		btVector3 textends(extends.dot(trans.getBasis().getRow(0).absolute()),
+ 				 extends.dot(trans.getBasis().getRow(1).absolute()),
+				 extends.dot(trans.getBasis().getRow(2).absolute()));
+
+		m_min = center - textends;
+		m_max = center + textends;
+	}
+
+
+	//! Apply a transform to an AABB
+	SIMD_FORCE_INLINE void appy_transform_trans_cache(const BT_BOX_BOX_TRANSFORM_CACHE & trans)
+	{
+		btVector3 center = (m_max+m_min)*0.5f;
+		btVector3 extends = m_max - center;
+		// Compute new center
+		center = trans.transform(center);
+
+		btVector3 textends(extends.dot(trans.m_R1to0.getRow(0).absolute()),
+ 				 extends.dot(trans.m_R1to0.getRow(1).absolute()),
+				 extends.dot(trans.m_R1to0.getRow(2).absolute()));
+
+		m_min = center - textends;
+		m_max = center + textends;
+	}
+
+	//! Merges a Box
+	SIMD_FORCE_INLINE void merge(const btAABB & box)
+	{
+		m_min[0] = BT_MIN(m_min[0],box.m_min[0]);
+		m_min[1] = BT_MIN(m_min[1],box.m_min[1]);
+		m_min[2] = BT_MIN(m_min[2],box.m_min[2]);
+
+		m_max[0] = BT_MAX(m_max[0],box.m_max[0]);
+		m_max[1] = BT_MAX(m_max[1],box.m_max[1]);
+		m_max[2] = BT_MAX(m_max[2],box.m_max[2]);
+	}
+
+	//! Merges a point
+	template<typename CLASS_POINT>
+	SIMD_FORCE_INLINE void merge_point(const CLASS_POINT & point)
+	{
+		m_min[0] = BT_MIN(m_min[0],point[0]);
+		m_min[1] = BT_MIN(m_min[1],point[1]);
+		m_min[2] = BT_MIN(m_min[2],point[2]);
+
+		m_max[0] = BT_MAX(m_max[0],point[0]);
+		m_max[1] = BT_MAX(m_max[1],point[1]);
+		m_max[2] = BT_MAX(m_max[2],point[2]);
+	}
+
+	//! Gets the extend and center
+	SIMD_FORCE_INLINE void get_center_extend(btVector3 & center,btVector3 & extend)  const
+	{
+		center = (m_max+m_min)*0.5f;
+		extend = m_max - center;
+	}
+
+	//! Finds the intersecting box between this box and the other.
+	SIMD_FORCE_INLINE void find_intersection(const btAABB & other, btAABB & intersection)  const
+	{
+		intersection.m_min[0] = BT_MAX(other.m_min[0],m_min[0]);
+		intersection.m_min[1] = BT_MAX(other.m_min[1],m_min[1]);
+		intersection.m_min[2] = BT_MAX(other.m_min[2],m_min[2]);
+
+		intersection.m_max[0] = BT_MIN(other.m_max[0],m_max[0]);
+		intersection.m_max[1] = BT_MIN(other.m_max[1],m_max[1]);
+		intersection.m_max[2] = BT_MIN(other.m_max[2],m_max[2]);
+	}
+
+
+	SIMD_FORCE_INLINE bool has_collision(const btAABB & other) const
+	{
+		if(m_min[0] > other.m_max[0] ||
+		   m_max[0] < other.m_min[0] ||
+		   m_min[1] > other.m_max[1] ||
+		   m_max[1] < other.m_min[1] ||
+		   m_min[2] > other.m_max[2] ||
+		   m_max[2] < other.m_min[2])
+		{
+			return false;
+		}
+		return true;
+	}
+
+	/*! \brief Finds the Ray intersection parameter.
+	\param aabb Aligned box
+	\param vorigin A vec3f with the origin of the ray
+	\param vdir A vec3f with the direction of the ray
+	*/
+	SIMD_FORCE_INLINE bool collide_ray(const btVector3 & vorigin,const btVector3 & vdir)  const
+	{
+		btVector3 extents,center;
+		this->get_center_extend(center,extents);;
+
+		btScalar Dx = vorigin[0] - center[0];
+		if(BT_GREATER(Dx, extents[0]) && Dx*vdir[0]>=0.0f)	return false;
+		btScalar Dy = vorigin[1] - center[1];
+		if(BT_GREATER(Dy, extents[1]) && Dy*vdir[1]>=0.0f)	return false;
+		btScalar Dz = vorigin[2] - center[2];
+		if(BT_GREATER(Dz, extents[2]) && Dz*vdir[2]>=0.0f)	return false;
+
+
+		btScalar f = vdir[1] * Dz - vdir[2] * Dy;
+		if(btFabs(f) > extents[1]*btFabs(vdir[2]) + extents[2]*btFabs(vdir[1])) return false;
+		f = vdir[2] * Dx - vdir[0] * Dz;
+		if(btFabs(f) > extents[0]*btFabs(vdir[2]) + extents[2]*btFabs(vdir[0]))return false;
+		f = vdir[0] * Dy - vdir[1] * Dx;
+		if(btFabs(f) > extents[0]*btFabs(vdir[1]) + extents[1]*btFabs(vdir[0]))return false;
+		return true;
+	}
+
+
+	SIMD_FORCE_INLINE void projection_interval(const btVector3 & direction, btScalar &vmin, btScalar &vmax) const
+	{
+		btVector3 center = (m_max+m_min)*0.5f;
+		btVector3 extend = m_max-center;
+
+		btScalar _fOrigin =  direction.dot(center);
+		btScalar _fMaximumExtent = extend.dot(direction.absolute());
+		vmin = _fOrigin - _fMaximumExtent;
+		vmax = _fOrigin + _fMaximumExtent;
+	}
+
+	SIMD_FORCE_INLINE eBT_PLANE_INTERSECTION_TYPE plane_classify(const btVector4 &plane) const
+	{
+		btScalar _fmin,_fmax;
+		this->projection_interval(plane,_fmin,_fmax);
+
+		if(plane[3] > _fmax + BOX_PLANE_EPSILON)
+		{
+			return BT_CONST_BACK_PLANE; // 0
+		}
+
+		if(plane[3]+BOX_PLANE_EPSILON >=_fmin)
+		{
+			return BT_CONST_COLLIDE_PLANE; //1
+		}
+		return BT_CONST_FRONT_PLANE;//2
+	}
+
+	SIMD_FORCE_INLINE bool overlapping_trans_conservative(const btAABB & box, btTransform & trans1_to_0) const
+	{
+		btAABB tbox = box;
+		tbox.appy_transform(trans1_to_0);
+		return has_collision(tbox);
+	}
+
+	SIMD_FORCE_INLINE bool overlapping_trans_conservative2(const btAABB & box,
+		const BT_BOX_BOX_TRANSFORM_CACHE & trans1_to_0) const
+	{
+		btAABB tbox = box;
+		tbox.appy_transform_trans_cache(trans1_to_0);
+		return has_collision(tbox);
+	}
+
+	//! transcache is the transformation cache from box to this AABB
+	SIMD_FORCE_INLINE bool overlapping_trans_cache(
+		const btAABB & box,const BT_BOX_BOX_TRANSFORM_CACHE & transcache, bool fulltest) const
+	{
+
+		//Taken from OPCODE
+		btVector3 ea,eb;//extends
+		btVector3 ca,cb;//extends
+		get_center_extend(ca,ea);
+		box.get_center_extend(cb,eb);
+
+
+		btVector3 T;
+		btScalar t,t2;
+		int i;
+
+		// Class I : A's basis vectors
+		for(i=0;i<3;i++)
+		{
+			T[i] =  transcache.m_R1to0[i].dot(cb) + transcache.m_T1to0[i] - ca[i];
+			t = transcache.m_AR[i].dot(eb) + ea[i];
+			if(BT_GREATER(T[i], t))	return false;
+		}
+		// Class II : B's basis vectors
+		for(i=0;i<3;i++)
+		{
+			t = bt_mat3_dot_col(transcache.m_R1to0,T,i);
+			t2 = bt_mat3_dot_col(transcache.m_AR,ea,i) + eb[i];
+			if(BT_GREATER(t,t2))	return false;
+		}
+		// Class III : 9 cross products
+		if(fulltest)
+		{
+			int j,m,n,o,p,q,r;
+			for(i=0;i<3;i++)
+			{
+				m = (i+1)%3;
+				n = (i+2)%3;
+				o = i==0?1:0;
+				p = i==2?1:2;
+				for(j=0;j<3;j++)
+				{
+					q = j==2?1:2;
+					r = j==0?1:0;
+					t = T[n]*transcache.m_R1to0[m][j] - T[m]*transcache.m_R1to0[n][j];
+					t2 = ea[o]*transcache.m_AR[p][j] + ea[p]*transcache.m_AR[o][j] +
+						eb[r]*transcache.m_AR[i][q] + eb[q]*transcache.m_AR[i][r];
+					if(BT_GREATER(t,t2))	return false;
+				}
+			}
+		}
+		return true;
+	}
+
+	//! Simple test for planes.
+	SIMD_FORCE_INLINE bool collide_plane(
+		const btVector4 & plane) const
+	{
+		eBT_PLANE_INTERSECTION_TYPE classify = plane_classify(plane);
+		return (classify == BT_CONST_COLLIDE_PLANE);
+	}
+
+	//! test for a triangle, with edges
+	SIMD_FORCE_INLINE bool collide_triangle_exact(
+		const btVector3 & p1,
+		const btVector3 & p2,
+		const btVector3 & p3,
+		const btVector4 & triangle_plane) const
+	{
+		if(!collide_plane(triangle_plane)) return false;
+
+		btVector3 center,extends;
+		this->get_center_extend(center,extends);
+
+		const btVector3 v1(p1 - center);
+		const btVector3 v2(p2 - center);
+		const btVector3 v3(p3 - center);
+
+		//First axis
+		btVector3 diff(v2 - v1);
+		btVector3 abs_diff = diff.absolute();
+		//Test With X axis
+		TEST_CROSS_EDGE_BOX_X_AXIS_MCR(diff,abs_diff,v1,v3,extends);
+		//Test With Y axis
+		TEST_CROSS_EDGE_BOX_Y_AXIS_MCR(diff,abs_diff,v1,v3,extends);
+		//Test With Z axis
+		TEST_CROSS_EDGE_BOX_Z_AXIS_MCR(diff,abs_diff,v1,v3,extends);
+
+
+		diff = v3 - v2;
+		abs_diff = diff.absolute();
+		//Test With X axis
+		TEST_CROSS_EDGE_BOX_X_AXIS_MCR(diff,abs_diff,v2,v1,extends);
+		//Test With Y axis
+		TEST_CROSS_EDGE_BOX_Y_AXIS_MCR(diff,abs_diff,v2,v1,extends);
+		//Test With Z axis
+		TEST_CROSS_EDGE_BOX_Z_AXIS_MCR(diff,abs_diff,v2,v1,extends);
+
+		diff = v1 - v3;
+		abs_diff = diff.absolute();
+		//Test With X axis
+		TEST_CROSS_EDGE_BOX_X_AXIS_MCR(diff,abs_diff,v3,v2,extends);
+		//Test With Y axis
+		TEST_CROSS_EDGE_BOX_Y_AXIS_MCR(diff,abs_diff,v3,v2,extends);
+		//Test With Z axis
+		TEST_CROSS_EDGE_BOX_Z_AXIS_MCR(diff,abs_diff,v3,v2,extends);
+
+		return true;
+	}
+};
+
+
+//! Compairison of transformation objects
+SIMD_FORCE_INLINE bool btCompareTransformsEqual(const btTransform & t1,const btTransform & t2)
+{
+	if(!(t1.getOrigin() == t2.getOrigin()) ) return false;
+
+	if(!(t1.getBasis().getRow(0) == t2.getBasis().getRow(0)) ) return false;
+	if(!(t1.getBasis().getRow(1) == t2.getBasis().getRow(1)) ) return false;
+	if(!(t1.getBasis().getRow(2) == t2.getBasis().getRow(2)) ) return false;
+	return true;
+}
+
+
+
+#endif // GIM_BOX_COLLISION_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/btClipPolygon.h b/src/bullet/BulletCollision/Gimpact/btClipPolygon.h
new file mode 100644
index 00000000..de0a5231
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btClipPolygon.h
@@ -0,0 +1,182 @@
+#ifndef BT_CLIP_POLYGON_H_INCLUDED
+#define BT_CLIP_POLYGON_H_INCLUDED
+
+/*! \file btClipPolygon.h
+\author Francisco Leon Najera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btGeometryUtil.h"
+
+
+SIMD_FORCE_INLINE btScalar bt_distance_point_plane(const btVector4 & plane,const btVector3 &point)
+{
+	return point.dot(plane) - plane[3];
+}
+
+/*! Vector blending
+Takes two vectors a, b, blends them together*/
+SIMD_FORCE_INLINE void bt_vec_blend(btVector3 &vr, const btVector3 &va,const btVector3 &vb, btScalar blend_factor)
+{
+	vr = (1-blend_factor)*va + blend_factor*vb;
+}
+
+//! This function calcs the distance from a 3D plane
+SIMD_FORCE_INLINE void bt_plane_clip_polygon_collect(
+						const btVector3 & point0,
+						const btVector3 & point1,
+						btScalar dist0,
+						btScalar dist1,
+						btVector3 * clipped,
+						int & clipped_count)
+{
+	bool _prevclassif = (dist0>SIMD_EPSILON);
+	bool _classif = (dist1>SIMD_EPSILON);
+	if(_classif!=_prevclassif)
+	{
+		btScalar blendfactor = -dist0/(dist1-dist0);
+		bt_vec_blend(clipped[clipped_count],point0,point1,blendfactor);
+		clipped_count++;
+	}
+	if(!_classif)
+	{
+		clipped[clipped_count] = point1;
+		clipped_count++;
+	}
+}
+
+
+//! Clips a polygon by a plane
+/*!
+*\return The count of the clipped counts
+*/
+SIMD_FORCE_INLINE int bt_plane_clip_polygon(
+						const btVector4 & plane,
+						const btVector3 * polygon_points,
+						int polygon_point_count,
+						btVector3 * clipped)
+{
+    int clipped_count = 0;
+
+
+    //clip first point
+	btScalar firstdist = bt_distance_point_plane(plane,polygon_points[0]);;
+	if(!(firstdist>SIMD_EPSILON))
+	{
+		clipped[clipped_count] = polygon_points[0];
+		clipped_count++;
+	}
+
+	btScalar olddist = firstdist;
+	for(int i=1;i<polygon_point_count;i++)
+	{
+		btScalar dist = bt_distance_point_plane(plane,polygon_points[i]);
+
+		bt_plane_clip_polygon_collect(
+						polygon_points[i-1],polygon_points[i],
+						olddist,
+						dist,
+						clipped,
+						clipped_count);
+
+
+		olddist = dist;
+	}
+
+	//RETURN TO FIRST  point
+
+	bt_plane_clip_polygon_collect(
+					polygon_points[polygon_point_count-1],polygon_points[0],
+					olddist,
+					firstdist,
+					clipped,
+					clipped_count);
+
+	return clipped_count;
+}
+
+//! Clips a polygon by a plane
+/*!
+*\param clipped must be an array of 16 points.
+*\return The count of the clipped counts
+*/
+SIMD_FORCE_INLINE int bt_plane_clip_triangle(
+						const btVector4 & plane,
+						const btVector3 & point0,
+						const btVector3 & point1,
+						const btVector3& point2,
+						btVector3 * clipped // an allocated array of 16 points at least
+						)
+{
+    int clipped_count = 0;
+
+    //clip first point0
+	btScalar firstdist = bt_distance_point_plane(plane,point0);;
+	if(!(firstdist>SIMD_EPSILON))
+	{
+		clipped[clipped_count] = point0;
+		clipped_count++;
+	}
+
+	// point 1
+	btScalar olddist = firstdist;
+	btScalar dist = bt_distance_point_plane(plane,point1);
+
+	bt_plane_clip_polygon_collect(
+					point0,point1,
+					olddist,
+					dist,
+					clipped,
+					clipped_count);
+
+	olddist = dist;
+
+
+	// point 2
+	dist = bt_distance_point_plane(plane,point2);
+
+	bt_plane_clip_polygon_collect(
+					point1,point2,
+					olddist,
+					dist,
+					clipped,
+					clipped_count);
+	olddist = dist;
+
+
+
+	//RETURN TO FIRST  point0
+	bt_plane_clip_polygon_collect(
+					point2,point0,
+					olddist,
+					firstdist,
+					clipped,
+					clipped_count);
+
+	return clipped_count;
+}
+
+
+
+
+
+#endif // GIM_TRI_COLLISION_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/btContactProcessing.cpp b/src/bullet/BulletCollision/Gimpact/btContactProcessing.cpp
new file mode 100644
index 00000000..eed31d83
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btContactProcessing.cpp
@@ -0,0 +1,181 @@
+
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#include "btContactProcessing.h"
+
+#define MAX_COINCIDENT 8
+
+struct CONTACT_KEY_TOKEN
+{
+	unsigned int m_key;
+	int m_value;
+	CONTACT_KEY_TOKEN()
+    {
+    }
+
+    CONTACT_KEY_TOKEN(unsigned int key,int token)
+    {
+    	m_key = key;
+    	m_value =  token;
+    }
+
+    CONTACT_KEY_TOKEN(const CONTACT_KEY_TOKEN& rtoken)
+    {
+    	m_key = rtoken.m_key;
+    	m_value = rtoken.m_value;
+    }
+
+    inline bool operator <(const CONTACT_KEY_TOKEN& other) const
+	{
+		return (m_key < other.m_key);
+	}
+
+	inline bool operator >(const CONTACT_KEY_TOKEN& other) const
+	{
+		return (m_key > other.m_key);
+	}
+
+};
+
+class CONTACT_KEY_TOKEN_COMP
+{
+	public:
+
+		bool operator() ( const CONTACT_KEY_TOKEN& a, const CONTACT_KEY_TOKEN& b ) const
+		{
+			return ( a < b );
+		}
+};
+
+
+void btContactArray::merge_contacts(
+	const btContactArray & contacts, bool normal_contact_average)
+{
+	clear();
+
+	int i;
+	if(contacts.size()==0) return;
+
+
+	if(contacts.size()==1)
+	{
+		push_back(contacts[0]);
+		return;
+	}
+
+	btAlignedObjectArray<CONTACT_KEY_TOKEN> keycontacts;
+
+	keycontacts.reserve(contacts.size());
+
+	//fill key contacts
+
+	for ( i = 0;i<contacts.size() ;i++ )
+	{
+		keycontacts.push_back(CONTACT_KEY_TOKEN(contacts[i].calc_key_contact(),i));
+	}
+
+	//sort keys
+	keycontacts.quickSort(CONTACT_KEY_TOKEN_COMP());
+
+	// Merge contacts
+	int coincident_count=0;
+	btVector3 coincident_normals[MAX_COINCIDENT];
+
+	unsigned int last_key = keycontacts[0].m_key;
+	unsigned int key = 0;
+
+	push_back(contacts[keycontacts[0].m_value]);
+
+	GIM_CONTACT * pcontact = &(*this)[0];
+
+	for( i=1;i<keycontacts.size();i++)
+	{
+	    key = keycontacts[i].m_key;
+		const GIM_CONTACT * scontact = &contacts[keycontacts[i].m_value];
+
+		if(last_key ==  key)//same points
+		{
+			//merge contact
+			if(pcontact->m_depth - CONTACT_DIFF_EPSILON > scontact->m_depth)//)
+			{
+				*pcontact = *scontact;
+                coincident_count = 0;
+			}
+			else if(normal_contact_average)
+			{
+				if(btFabs(pcontact->m_depth - scontact->m_depth)<CONTACT_DIFF_EPSILON)
+                {
+                    if(coincident_count<MAX_COINCIDENT)
+                    {
+                    	coincident_normals[coincident_count] = scontact->m_normal;
+                        coincident_count++;
+                    }
+                }
+			}
+		}
+		else
+		{//add new contact
+
+		    if(normal_contact_average && coincident_count>0)
+		    {
+		    	pcontact->interpolate_normals(coincident_normals,coincident_count);
+		        coincident_count = 0;
+		    }
+
+		    push_back(*scontact);
+		    pcontact = &(*this)[this->size()-1];
+        }
+		last_key = key;
+	}
+}
+
+void btContactArray::merge_contacts_unique(const btContactArray & contacts)
+{
+	clear();
+
+	if(contacts.size()==0) return;
+
+	if(contacts.size()==1)
+	{
+		push_back(contacts[0]);
+		return;
+	}
+
+	GIM_CONTACT average_contact = contacts[0];
+
+	for (int i=1;i<contacts.size() ;i++ )
+	{
+		average_contact.m_point += contacts[i].m_point;
+		average_contact.m_normal += contacts[i].m_normal * contacts[i].m_depth;
+	}
+
+	//divide
+	btScalar divide_average = 1.0f/((btScalar)contacts.size());
+
+	average_contact.m_point *= divide_average;
+
+	average_contact.m_normal *= divide_average;
+
+	average_contact.m_depth = average_contact.m_normal.length();
+
+	average_contact.m_normal /= average_contact.m_depth;
+
+}
+
diff --git a/src/bullet/BulletCollision/Gimpact/btContactProcessing.h b/src/bullet/BulletCollision/Gimpact/btContactProcessing.h
new file mode 100644
index 00000000..0c66f8e1
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btContactProcessing.h
@@ -0,0 +1,145 @@
+#ifndef BT_CONTACT_H_INCLUDED
+#define BT_CONTACT_H_INCLUDED
+
+/*! \file gim_contact.h
+\author Francisco Leon Najera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btAlignedObjectArray.h"
+#include "btTriangleShapeEx.h"
+
+
+
+/**
+Configuration var for applying interpolation of  contact normals
+*/
+#define NORMAL_CONTACT_AVERAGE 1
+
+#define CONTACT_DIFF_EPSILON 0.00001f
+
+///The GIM_CONTACT is an internal GIMPACT structure, similar to btManifoldPoint.
+///@todo: remove and replace GIM_CONTACT by btManifoldPoint.
+class GIM_CONTACT
+{
+public:
+    btVector3 m_point;
+    btVector3 m_normal;
+    btScalar m_depth;//Positive value indicates interpenetration
+    btScalar m_distance;//Padding not for use
+    int m_feature1;//Face number
+    int m_feature2;//Face number
+public:
+    GIM_CONTACT()
+    {
+    }
+
+    GIM_CONTACT(const GIM_CONTACT & contact):
+				m_point(contact.m_point),
+				m_normal(contact.m_normal),
+				m_depth(contact.m_depth),
+				m_feature1(contact.m_feature1),
+				m_feature2(contact.m_feature2)
+    {
+    }
+
+    GIM_CONTACT(const btVector3 &point,const btVector3 & normal,
+    	 			btScalar depth, int feature1, int feature2):
+				m_point(point),
+				m_normal(normal),
+				m_depth(depth),
+				m_feature1(feature1),
+				m_feature2(feature2)
+    {
+    }
+
+	//! Calcs key for coord classification
+    SIMD_FORCE_INLINE unsigned int calc_key_contact() const
+    {
+    	int _coords[] = {
+    		(int)(m_point[0]*1000.0f+1.0f),
+    		(int)(m_point[1]*1333.0f),
+    		(int)(m_point[2]*2133.0f+3.0f)};
+		unsigned int _hash=0;
+		unsigned int *_uitmp = (unsigned int *)(&_coords[0]);
+		_hash = *_uitmp;
+		_uitmp++;
+		_hash += (*_uitmp)<<4;
+		_uitmp++;
+		_hash += (*_uitmp)<<8;
+		return _hash;
+    }
+
+    SIMD_FORCE_INLINE void interpolate_normals( btVector3 * normals,int normal_count)
+    {
+    	btVector3 vec_sum(m_normal);
+		for(int i=0;i<normal_count;i++)
+		{
+			vec_sum += normals[i];
+		}
+
+		btScalar vec_sum_len = vec_sum.length2();
+		if(vec_sum_len <CONTACT_DIFF_EPSILON) return;
+
+		//GIM_INV_SQRT(vec_sum_len,vec_sum_len); // 1/sqrt(vec_sum_len)
+
+		m_normal = vec_sum/btSqrt(vec_sum_len);
+    }
+
+};
+
+
+class btContactArray:public btAlignedObjectArray<GIM_CONTACT>
+{
+public:
+	btContactArray()
+	{
+		reserve(64);
+	}
+
+	SIMD_FORCE_INLINE void push_contact(
+		const btVector3 &point,const btVector3 & normal,
+		btScalar depth, int feature1, int feature2)
+	{
+		push_back( GIM_CONTACT(point,normal,depth,feature1,feature2) );
+	}
+
+	SIMD_FORCE_INLINE void push_triangle_contacts(
+		const GIM_TRIANGLE_CONTACT & tricontact,
+		int feature1,int feature2)
+	{
+		for(int i = 0;i<tricontact.m_point_count ;i++ )
+		{
+			push_contact(
+				tricontact.m_points[i],
+				tricontact.m_separating_normal,
+				tricontact.m_penetration_depth,feature1,feature2);
+		}
+	}
+
+	void merge_contacts(const btContactArray & contacts, bool normal_contact_average = true);
+
+	void merge_contacts_unique(const btContactArray & contacts);
+};
+
+
+#endif // GIM_CONTACT_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/btGImpactBvh.cpp b/src/bullet/BulletCollision/Gimpact/btGImpactBvh.cpp
new file mode 100644
index 00000000..86323316
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGImpactBvh.cpp
@@ -0,0 +1,498 @@
+/*! \file gim_box_set.h
+\author Francisco Leon Najera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#include "btGImpactBvh.h"
+#include "LinearMath/btQuickprof.h"
+
+#ifdef TRI_COLLISION_PROFILING
+
+btClock g_tree_clock;
+
+float g_accum_tree_collision_time = 0;
+int g_count_traversing = 0;
+
+
+void bt_begin_gim02_tree_time()
+{
+	g_tree_clock.reset();
+}
+
+void bt_end_gim02_tree_time()
+{
+	g_accum_tree_collision_time += g_tree_clock.getTimeMicroseconds();
+	g_count_traversing++;
+}
+
+//! Gets the average time in miliseconds of tree collisions
+float btGImpactBvh::getAverageTreeCollisionTime()
+{
+	if(g_count_traversing == 0) return 0;
+
+	float avgtime = g_accum_tree_collision_time;
+	avgtime /= (float)g_count_traversing;
+
+	g_accum_tree_collision_time = 0;
+	g_count_traversing = 0;
+	return avgtime;
+
+//	float avgtime = g_count_traversing;
+//	g_count_traversing = 0;
+//	return avgtime;
+
+}
+
+#endif //TRI_COLLISION_PROFILING
+
+/////////////////////// btBvhTree /////////////////////////////////
+
+int btBvhTree::_calc_splitting_axis(
+	GIM_BVH_DATA_ARRAY & primitive_boxes, int startIndex,  int endIndex)
+{
+
+	int i;
+
+	btVector3 means(btScalar(0.),btScalar(0.),btScalar(0.));
+	btVector3 variance(btScalar(0.),btScalar(0.),btScalar(0.));
+	int numIndices = endIndex-startIndex;
+
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(primitive_boxes[i].m_bound.m_max +
+					 primitive_boxes[i].m_bound.m_min);
+		means+=center;
+	}
+	means *= (btScalar(1.)/(btScalar)numIndices);
+
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(primitive_boxes[i].m_bound.m_max +
+					 primitive_boxes[i].m_bound.m_min);
+		btVector3 diff2 = center-means;
+		diff2 = diff2 * diff2;
+		variance += diff2;
+	}
+	variance *= (btScalar(1.)/	((btScalar)numIndices-1)	);
+
+	return variance.maxAxis();
+}
+
+
+int btBvhTree::_sort_and_calc_splitting_index(
+	GIM_BVH_DATA_ARRAY & primitive_boxes, int startIndex,
+	int endIndex, int splitAxis)
+{
+	int i;
+	int splitIndex =startIndex;
+	int numIndices = endIndex - startIndex;
+
+	// average of centers
+	btScalar splitValue = 0.0f;
+
+	btVector3 means(btScalar(0.),btScalar(0.),btScalar(0.));
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(primitive_boxes[i].m_bound.m_max +
+					 primitive_boxes[i].m_bound.m_min);
+		means+=center;
+	}
+	means *= (btScalar(1.)/(btScalar)numIndices);
+
+	splitValue = means[splitAxis];
+
+
+	//sort leafNodes so all values larger then splitValue comes first, and smaller values start from 'splitIndex'.
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(primitive_boxes[i].m_bound.m_max +
+					 primitive_boxes[i].m_bound.m_min);
+		if (center[splitAxis] > splitValue)
+		{
+			//swap
+			primitive_boxes.swap(i,splitIndex);
+			//swapLeafNodes(i,splitIndex);
+			splitIndex++;
+		}
+	}
+
+	//if the splitIndex causes unbalanced trees, fix this by using the center in between startIndex and endIndex
+	//otherwise the tree-building might fail due to stack-overflows in certain cases.
+	//unbalanced1 is unsafe: it can cause stack overflows
+	//bool unbalanced1 = ((splitIndex==startIndex) || (splitIndex == (endIndex-1)));
+
+	//unbalanced2 should work too: always use center (perfect balanced trees)
+	//bool unbalanced2 = true;
+
+	//this should be safe too:
+	int rangeBalancedIndices = numIndices/3;
+	bool unbalanced = ((splitIndex<=(startIndex+rangeBalancedIndices)) || (splitIndex >=(endIndex-1-rangeBalancedIndices)));
+
+	if (unbalanced)
+	{
+		splitIndex = startIndex+ (numIndices>>1);
+	}
+
+	btAssert(!((splitIndex==startIndex) || (splitIndex == (endIndex))));
+
+	return splitIndex;
+
+}
+
+
+void btBvhTree::_build_sub_tree(GIM_BVH_DATA_ARRAY & primitive_boxes, int startIndex,  int endIndex)
+{
+	int curIndex = m_num_nodes;
+	m_num_nodes++;
+
+	btAssert((endIndex-startIndex)>0);
+
+	if ((endIndex-startIndex)==1)
+	{
+	    //We have a leaf node
+	    setNodeBound(curIndex,primitive_boxes[startIndex].m_bound);
+		m_node_array[curIndex].setDataIndex(primitive_boxes[startIndex].m_data);
+
+		return;
+	}
+	//calculate Best Splitting Axis and where to split it. Sort the incoming 'leafNodes' array within range 'startIndex/endIndex'.
+
+	//split axis
+	int splitIndex = _calc_splitting_axis(primitive_boxes,startIndex,endIndex);
+
+	splitIndex = _sort_and_calc_splitting_index(
+			primitive_boxes,startIndex,endIndex,
+			splitIndex//split axis
+			);
+
+
+	//calc this node bounding box
+
+	btAABB node_bound;
+	node_bound.invalidate();
+
+	for (int i=startIndex;i<endIndex;i++)
+	{
+		node_bound.merge(primitive_boxes[i].m_bound);
+	}
+
+	setNodeBound(curIndex,node_bound);
+
+
+	//build left branch
+	_build_sub_tree(primitive_boxes, startIndex, splitIndex );
+
+
+	//build right branch
+	 _build_sub_tree(primitive_boxes, splitIndex ,endIndex);
+
+	m_node_array[curIndex].setEscapeIndex(m_num_nodes - curIndex);
+
+
+}
+
+//! stackless build tree
+void btBvhTree::build_tree(
+	GIM_BVH_DATA_ARRAY & primitive_boxes)
+{
+	// initialize node count to 0
+	m_num_nodes = 0;
+	// allocate nodes
+	m_node_array.resize(primitive_boxes.size()*2);
+
+	_build_sub_tree(primitive_boxes, 0, primitive_boxes.size());
+}
+
+////////////////////////////////////class btGImpactBvh
+
+void btGImpactBvh::refit()
+{
+	int nodecount = getNodeCount();
+	while(nodecount--)
+	{
+		if(isLeafNode(nodecount))
+		{
+			btAABB leafbox;
+			m_primitive_manager->get_primitive_box(getNodeData(nodecount),leafbox);
+			setNodeBound(nodecount,leafbox);
+		}
+		else
+		{
+			//const GIM_BVH_TREE_NODE * nodepointer = get_node_pointer(nodecount);
+			//get left bound
+			btAABB bound;
+			bound.invalidate();
+
+			btAABB temp_box;
+
+			int child_node = getLeftNode(nodecount);
+			if(child_node)
+			{
+				getNodeBound(child_node,temp_box);
+				bound.merge(temp_box);
+			}
+
+			child_node = getRightNode(nodecount);
+			if(child_node)
+			{
+				getNodeBound(child_node,temp_box);
+				bound.merge(temp_box);
+			}
+
+			setNodeBound(nodecount,bound);
+		}
+	}
+}
+
+//! this rebuild the entire set
+void btGImpactBvh::buildSet()
+{
+	//obtain primitive boxes
+	GIM_BVH_DATA_ARRAY primitive_boxes;
+	primitive_boxes.resize(m_primitive_manager->get_primitive_count());
+
+	for (int i = 0;i<primitive_boxes.size() ;i++ )
+	{
+		 m_primitive_manager->get_primitive_box(i,primitive_boxes[i].m_bound);
+		 primitive_boxes[i].m_data = i;
+	}
+
+	m_box_tree.build_tree(primitive_boxes);
+}
+
+//! returns the indices of the primitives in the m_primitive_manager
+bool btGImpactBvh::boxQuery(const btAABB & box, btAlignedObjectArray<int> & collided_results) const
+{
+	int curIndex = 0;
+	int numNodes = getNodeCount();
+
+	while (curIndex < numNodes)
+	{
+		btAABB bound;
+		getNodeBound(curIndex,bound);
+
+		//catch bugs in tree data
+
+		bool aabbOverlap = bound.has_collision(box);
+		bool isleafnode = isLeafNode(curIndex);
+
+		if (isleafnode && aabbOverlap)
+		{
+			collided_results.push_back(getNodeData(curIndex));
+		}
+
+		if (aabbOverlap || isleafnode)
+		{
+			//next subnode
+			curIndex++;
+		}
+		else
+		{
+			//skip node
+			curIndex+= getEscapeNodeIndex(curIndex);
+		}
+	}
+	if(collided_results.size()>0) return true;
+	return false;
+}
+
+
+
+//! returns the indices of the primitives in the m_primitive_manager
+bool btGImpactBvh::rayQuery(
+	const btVector3 & ray_dir,const btVector3 & ray_origin ,
+	btAlignedObjectArray<int> & collided_results) const
+{
+	int curIndex = 0;
+	int numNodes = getNodeCount();
+
+	while (curIndex < numNodes)
+	{
+		btAABB bound;
+		getNodeBound(curIndex,bound);
+
+		//catch bugs in tree data
+
+		bool aabbOverlap = bound.collide_ray(ray_origin,ray_dir);
+		bool isleafnode = isLeafNode(curIndex);
+
+		if (isleafnode && aabbOverlap)
+		{
+			collided_results.push_back(getNodeData( curIndex));
+		}
+
+		if (aabbOverlap || isleafnode)
+		{
+			//next subnode
+			curIndex++;
+		}
+		else
+		{
+			//skip node
+			curIndex+= getEscapeNodeIndex(curIndex);
+		}
+	}
+	if(collided_results.size()>0) return true;
+	return false;
+}
+
+
+SIMD_FORCE_INLINE bool _node_collision(
+	btGImpactBvh * boxset0, btGImpactBvh * boxset1,
+	const BT_BOX_BOX_TRANSFORM_CACHE & trans_cache_1to0,
+	int node0 ,int node1, bool complete_primitive_tests)
+{
+	btAABB box0;
+	boxset0->getNodeBound(node0,box0);
+	btAABB box1;
+	boxset1->getNodeBound(node1,box1);
+
+	return box0.overlapping_trans_cache(box1,trans_cache_1to0,complete_primitive_tests );
+//	box1.appy_transform_trans_cache(trans_cache_1to0);
+//	return box0.has_collision(box1);
+
+}
+
+
+//stackless recursive collision routine
+static void _find_collision_pairs_recursive(
+	btGImpactBvh * boxset0, btGImpactBvh * boxset1,
+	btPairSet * collision_pairs,
+	const BT_BOX_BOX_TRANSFORM_CACHE & trans_cache_1to0,
+	int node0, int node1, bool complete_primitive_tests)
+{
+
+
+
+	if( _node_collision(
+		boxset0,boxset1,trans_cache_1to0,
+		node0,node1,complete_primitive_tests) ==false) return;//avoid colliding internal nodes
+
+	if(boxset0->isLeafNode(node0))
+	{
+		if(boxset1->isLeafNode(node1))
+		{
+			// collision result
+			collision_pairs->push_pair(
+				boxset0->getNodeData(node0),boxset1->getNodeData(node1));
+			return;
+		}
+		else
+		{
+
+			//collide left recursive
+
+			_find_collision_pairs_recursive(
+								boxset0,boxset1,
+								collision_pairs,trans_cache_1to0,
+								node0,boxset1->getLeftNode(node1),false);
+
+			//collide right recursive
+			_find_collision_pairs_recursive(
+								boxset0,boxset1,
+								collision_pairs,trans_cache_1to0,
+								node0,boxset1->getRightNode(node1),false);
+
+
+		}
+	}
+	else
+	{
+		if(boxset1->isLeafNode(node1))
+		{
+
+			//collide left recursive
+			_find_collision_pairs_recursive(
+								boxset0,boxset1,
+								collision_pairs,trans_cache_1to0,
+								boxset0->getLeftNode(node0),node1,false);
+
+
+			//collide right recursive
+
+			_find_collision_pairs_recursive(
+								boxset0,boxset1,
+								collision_pairs,trans_cache_1to0,
+								boxset0->getRightNode(node0),node1,false);
+
+
+		}
+		else
+		{
+			//collide left0 left1
+
+
+
+			_find_collision_pairs_recursive(
+				boxset0,boxset1,
+				collision_pairs,trans_cache_1to0,
+				boxset0->getLeftNode(node0),boxset1->getLeftNode(node1),false);
+
+			//collide left0 right1
+
+			_find_collision_pairs_recursive(
+				boxset0,boxset1,
+				collision_pairs,trans_cache_1to0,
+				boxset0->getLeftNode(node0),boxset1->getRightNode(node1),false);
+
+
+			//collide right0 left1
+
+			_find_collision_pairs_recursive(
+				boxset0,boxset1,
+				collision_pairs,trans_cache_1to0,
+				boxset0->getRightNode(node0),boxset1->getLeftNode(node1),false);
+
+			//collide right0 right1
+
+			_find_collision_pairs_recursive(
+				boxset0,boxset1,
+				collision_pairs,trans_cache_1to0,
+				boxset0->getRightNode(node0),boxset1->getRightNode(node1),false);
+
+		}// else if node1 is not a leaf
+	}// else if node0 is not a leaf
+}
+
+
+void btGImpactBvh::find_collision(btGImpactBvh * boxset0, const btTransform & trans0,
+		btGImpactBvh * boxset1, const btTransform & trans1,
+		btPairSet & collision_pairs)
+{
+
+	if(boxset0->getNodeCount()==0 || boxset1->getNodeCount()==0 ) return;
+
+	BT_BOX_BOX_TRANSFORM_CACHE trans_cache_1to0;
+
+	trans_cache_1to0.calc_from_homogenic(trans0,trans1);
+
+#ifdef TRI_COLLISION_PROFILING
+	bt_begin_gim02_tree_time();
+#endif //TRI_COLLISION_PROFILING
+
+	_find_collision_pairs_recursive(
+		boxset0,boxset1,
+		&collision_pairs,trans_cache_1to0,0,0,true);
+#ifdef TRI_COLLISION_PROFILING
+	bt_end_gim02_tree_time();
+#endif //TRI_COLLISION_PROFILING
+
+}
+
diff --git a/src/bullet/BulletCollision/Gimpact/btGImpactBvh.h b/src/bullet/BulletCollision/Gimpact/btGImpactBvh.h
new file mode 100644
index 00000000..6174ae97
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGImpactBvh.h
@@ -0,0 +1,396 @@
+#ifndef GIM_BOX_SET_H_INCLUDED
+#define GIM_BOX_SET_H_INCLUDED
+
+/*! \file gim_box_set.h
+\author Francisco Leon Najera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+#include "btBoxCollision.h"
+#include "btTriangleShapeEx.h"
+
+
+
+
+
+//! Overlapping pair
+struct GIM_PAIR
+{
+    int m_index1;
+    int m_index2;
+    GIM_PAIR()
+    {}
+
+    GIM_PAIR(const GIM_PAIR & p)
+    {
+    	m_index1 = p.m_index1;
+    	m_index2 = p.m_index2;
+	}
+
+	GIM_PAIR(int index1, int index2)
+    {
+    	m_index1 = index1;
+    	m_index2 = index2;
+	}
+};
+
+//! A pairset array
+class btPairSet: public btAlignedObjectArray<GIM_PAIR>
+{
+public:
+	btPairSet()
+	{
+		reserve(32);
+	}
+	inline void push_pair(int index1,int index2)
+	{
+		push_back(GIM_PAIR(index1,index2));
+	}
+
+	inline void push_pair_inv(int index1,int index2)
+	{
+		push_back(GIM_PAIR(index2,index1));
+	}
+};
+
+
+///GIM_BVH_DATA is an internal GIMPACT collision structure to contain axis aligned bounding box
+struct GIM_BVH_DATA
+{
+	btAABB m_bound;
+	int m_data;
+};
+
+//! Node Structure for trees
+class GIM_BVH_TREE_NODE
+{
+public:
+	btAABB m_bound;
+protected:
+	int	m_escapeIndexOrDataIndex;
+public:
+	GIM_BVH_TREE_NODE()
+	{
+		m_escapeIndexOrDataIndex = 0;
+	}
+
+	SIMD_FORCE_INLINE bool isLeafNode() const
+	{
+		//skipindex is negative (internal node), triangleindex >=0 (leafnode)
+		return (m_escapeIndexOrDataIndex>=0);
+	}
+
+	SIMD_FORCE_INLINE int getEscapeIndex() const
+	{
+		//btAssert(m_escapeIndexOrDataIndex < 0);
+		return -m_escapeIndexOrDataIndex;
+	}
+
+	SIMD_FORCE_INLINE void setEscapeIndex(int index)
+	{
+		m_escapeIndexOrDataIndex = -index;
+	}
+
+	SIMD_FORCE_INLINE int getDataIndex() const
+	{
+		//btAssert(m_escapeIndexOrDataIndex >= 0);
+
+		return m_escapeIndexOrDataIndex;
+	}
+
+	SIMD_FORCE_INLINE void setDataIndex(int index)
+	{
+		m_escapeIndexOrDataIndex = index;
+	}
+
+};
+
+
+class GIM_BVH_DATA_ARRAY:public btAlignedObjectArray<GIM_BVH_DATA>
+{
+};
+
+
+class GIM_BVH_TREE_NODE_ARRAY:public btAlignedObjectArray<GIM_BVH_TREE_NODE>
+{
+};
+
+
+
+
+//! Basic Box tree structure
+class btBvhTree
+{
+protected:
+	int m_num_nodes;
+	GIM_BVH_TREE_NODE_ARRAY m_node_array;
+protected:
+	int _sort_and_calc_splitting_index(
+		GIM_BVH_DATA_ARRAY & primitive_boxes,
+		 int startIndex,  int endIndex, int splitAxis);
+
+	int _calc_splitting_axis(GIM_BVH_DATA_ARRAY & primitive_boxes, int startIndex,  int endIndex);
+
+	void _build_sub_tree(GIM_BVH_DATA_ARRAY & primitive_boxes, int startIndex,  int endIndex);
+public:
+	btBvhTree()
+	{
+		m_num_nodes = 0;
+	}
+
+	//! prototype functions for box tree management
+	//!@{
+	void build_tree(GIM_BVH_DATA_ARRAY & primitive_boxes);
+
+	SIMD_FORCE_INLINE void clearNodes()
+	{
+		m_node_array.clear();
+		m_num_nodes = 0;
+	}
+
+	//! node count
+	SIMD_FORCE_INLINE int getNodeCount() const
+	{
+		return m_num_nodes;
+	}
+
+	//! tells if the node is a leaf
+	SIMD_FORCE_INLINE bool isLeafNode(int nodeindex) const
+	{
+		return m_node_array[nodeindex].isLeafNode();
+	}
+
+	SIMD_FORCE_INLINE int getNodeData(int nodeindex) const
+	{
+		return m_node_array[nodeindex].getDataIndex();
+	}
+
+	SIMD_FORCE_INLINE void getNodeBound(int nodeindex, btAABB & bound) const
+	{
+		bound = m_node_array[nodeindex].m_bound;
+	}
+
+	SIMD_FORCE_INLINE void setNodeBound(int nodeindex, const btAABB & bound)
+	{
+		m_node_array[nodeindex].m_bound = bound;
+	}
+
+	SIMD_FORCE_INLINE int getLeftNode(int nodeindex) const
+	{
+		return nodeindex+1;
+	}
+
+	SIMD_FORCE_INLINE int getRightNode(int nodeindex) const
+	{
+		if(m_node_array[nodeindex+1].isLeafNode()) return nodeindex+2;
+		return nodeindex+1 + m_node_array[nodeindex+1].getEscapeIndex();
+	}
+
+	SIMD_FORCE_INLINE int getEscapeNodeIndex(int nodeindex) const
+	{
+		return m_node_array[nodeindex].getEscapeIndex();
+	}
+
+	SIMD_FORCE_INLINE const GIM_BVH_TREE_NODE * get_node_pointer(int index = 0) const
+	{
+		return &m_node_array[index];
+	}
+
+	//!@}
+};
+
+
+//! Prototype Base class for primitive classification
+/*!
+This class is a wrapper for primitive collections.
+This tells relevant info for the Bounding Box set classes, which take care of space classification.
+This class can manage Compound shapes and trimeshes, and if it is managing trimesh then the  Hierarchy Bounding Box classes will take advantage of primitive Vs Box overlapping tests for getting optimal results and less Per Box compairisons.
+*/
+class btPrimitiveManagerBase
+{
+public:
+
+	virtual ~btPrimitiveManagerBase() {}
+
+	//! determines if this manager consist on only triangles, which special case will be optimized
+	virtual bool is_trimesh() const = 0;
+	virtual int get_primitive_count() const = 0;
+	virtual void get_primitive_box(int prim_index ,btAABB & primbox) const = 0;
+	//! retrieves only the points of the triangle, and the collision margin
+	virtual void get_primitive_triangle(int prim_index,btPrimitiveTriangle & triangle) const= 0;
+};
+
+
+//! Structure for containing Boxes
+/*!
+This class offers an structure for managing a box tree of primitives.
+Requires a Primitive prototype (like btPrimitiveManagerBase )
+*/
+class btGImpactBvh
+{
+protected:
+	btBvhTree m_box_tree;
+	btPrimitiveManagerBase * m_primitive_manager;
+
+protected:
+	//stackless refit
+	void refit();
+public:
+
+	//! this constructor doesn't build the tree. you must call	buildSet
+	btGImpactBvh()
+	{
+		m_primitive_manager = NULL;
+	}
+
+	//! this constructor doesn't build the tree. you must call	buildSet
+	btGImpactBvh(btPrimitiveManagerBase * primitive_manager)
+	{
+		m_primitive_manager = primitive_manager;
+	}
+
+	SIMD_FORCE_INLINE btAABB getGlobalBox()  const
+	{
+		btAABB totalbox;
+		getNodeBound(0, totalbox);
+		return totalbox;
+	}
+
+	SIMD_FORCE_INLINE void setPrimitiveManager(btPrimitiveManagerBase * primitive_manager)
+	{
+		m_primitive_manager = primitive_manager;
+	}
+
+	SIMD_FORCE_INLINE btPrimitiveManagerBase * getPrimitiveManager() const
+	{
+		return m_primitive_manager;
+	}
+
+
+//! node manager prototype functions
+///@{
+
+	//! this attemps to refit the box set.
+	SIMD_FORCE_INLINE void update()
+	{
+		refit();
+	}
+
+	//! this rebuild the entire set
+	void buildSet();
+
+	//! returns the indices of the primitives in the m_primitive_manager
+	bool boxQuery(const btAABB & box, btAlignedObjectArray<int> & collided_results) const;
+
+	//! returns the indices of the primitives in the m_primitive_manager
+	SIMD_FORCE_INLINE bool boxQueryTrans(const btAABB & box,
+		 const btTransform & transform, btAlignedObjectArray<int> & collided_results) const
+	{
+		btAABB transbox=box;
+		transbox.appy_transform(transform);
+		return boxQuery(transbox,collided_results);
+	}
+
+	//! returns the indices of the primitives in the m_primitive_manager
+	bool rayQuery(
+		const btVector3 & ray_dir,const btVector3 & ray_origin ,
+		btAlignedObjectArray<int> & collided_results) const;
+
+	//! tells if this set has hierarcht
+	SIMD_FORCE_INLINE bool hasHierarchy() const
+	{
+		return true;
+	}
+
+	//! tells if this set is a trimesh
+	SIMD_FORCE_INLINE bool isTrimesh()  const
+	{
+		return m_primitive_manager->is_trimesh();
+	}
+
+	//! node count
+	SIMD_FORCE_INLINE int getNodeCount() const
+	{
+		return m_box_tree.getNodeCount();
+	}
+
+	//! tells if the node is a leaf
+	SIMD_FORCE_INLINE bool isLeafNode(int nodeindex) const
+	{
+		return m_box_tree.isLeafNode(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE int getNodeData(int nodeindex) const
+	{
+		return m_box_tree.getNodeData(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE void getNodeBound(int nodeindex, btAABB & bound)  const
+	{
+		m_box_tree.getNodeBound(nodeindex, bound);
+	}
+
+	SIMD_FORCE_INLINE void setNodeBound(int nodeindex, const btAABB & bound)
+	{
+		m_box_tree.setNodeBound(nodeindex, bound);
+	}
+
+
+	SIMD_FORCE_INLINE int getLeftNode(int nodeindex) const
+	{
+		return m_box_tree.getLeftNode(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE int getRightNode(int nodeindex) const
+	{
+		return m_box_tree.getRightNode(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE int getEscapeNodeIndex(int nodeindex) const
+	{
+		return m_box_tree.getEscapeNodeIndex(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE void getNodeTriangle(int nodeindex,btPrimitiveTriangle & triangle) const
+	{
+		m_primitive_manager->get_primitive_triangle(getNodeData(nodeindex),triangle);
+	}
+
+
+	SIMD_FORCE_INLINE const GIM_BVH_TREE_NODE * get_node_pointer(int index = 0) const
+	{
+		return m_box_tree.get_node_pointer(index);
+	}
+
+#ifdef TRI_COLLISION_PROFILING
+	static float getAverageTreeCollisionTime();
+#endif //TRI_COLLISION_PROFILING
+
+	static void find_collision(btGImpactBvh * boxset1, const btTransform & trans1,
+		btGImpactBvh * boxset2, const btTransform & trans2,
+		btPairSet & collision_pairs);
+};
+
+
+#endif // GIM_BOXPRUNING_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.cpp b/src/bullet/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.cpp
new file mode 100644
index 00000000..2f2c09ff
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.cpp
@@ -0,0 +1,904 @@
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+/*
+Author: Francisco Len N�jera
+Concave-Concave Collision
+
+*/
+
+#include "BulletCollision/CollisionDispatch/btManifoldResult.h"
+#include "LinearMath/btIDebugDraw.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+#include "btGImpactCollisionAlgorithm.h"
+#include "btContactProcessing.h"
+#include "LinearMath/btQuickprof.h"
+
+
+//! Class for accessing the plane equation
+class btPlaneShape : public btStaticPlaneShape
+{
+public:
+
+	btPlaneShape(const btVector3& v, float f)
+		:btStaticPlaneShape(v,f)
+	{
+	}
+
+	void get_plane_equation(btVector4 &equation)
+	{
+		equation[0] = m_planeNormal[0];
+		equation[1] = m_planeNormal[1];
+		equation[2] = m_planeNormal[2];
+		equation[3] = m_planeConstant;
+	}
+
+
+	void get_plane_equation_transformed(const btTransform & trans,btVector4 &equation)
+	{
+		equation[0] = trans.getBasis().getRow(0).dot(m_planeNormal);
+		equation[1] = trans.getBasis().getRow(1).dot(m_planeNormal);
+		equation[2] = trans.getBasis().getRow(2).dot(m_planeNormal);
+		equation[3] = trans.getOrigin().dot(m_planeNormal) + m_planeConstant;
+	}
+};
+
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////
+#ifdef TRI_COLLISION_PROFILING
+
+btClock g_triangle_clock;
+
+float g_accum_triangle_collision_time = 0;
+int g_count_triangle_collision = 0;
+
+void bt_begin_gim02_tri_time()
+{
+	g_triangle_clock.reset();
+}
+
+void bt_end_gim02_tri_time()
+{
+	g_accum_triangle_collision_time += g_triangle_clock.getTimeMicroseconds();
+	g_count_triangle_collision++;
+}
+#endif //TRI_COLLISION_PROFILING
+//! Retrieving shapes shapes
+/*!
+Declared here due of insuficent space on Pool allocators
+*/
+//!@{
+class GIM_ShapeRetriever
+{
+public:
+	btGImpactShapeInterface * m_gim_shape;
+	btTriangleShapeEx m_trishape;
+	btTetrahedronShapeEx m_tetrashape;
+
+public:
+	class ChildShapeRetriever
+	{
+	public:
+		GIM_ShapeRetriever * m_parent;
+		virtual btCollisionShape * getChildShape(int index)
+		{
+			return m_parent->m_gim_shape->getChildShape(index);
+		}
+		virtual ~ChildShapeRetriever() {}
+	};
+
+	class TriangleShapeRetriever:public ChildShapeRetriever
+	{
+	public:
+
+		virtual btCollisionShape * getChildShape(int index)
+		{
+			m_parent->m_gim_shape->getBulletTriangle(index,m_parent->m_trishape);
+			return &m_parent->m_trishape;
+		}
+		virtual ~TriangleShapeRetriever() {}
+	};
+
+	class TetraShapeRetriever:public ChildShapeRetriever
+	{
+	public:
+
+		virtual btCollisionShape * getChildShape(int index)
+		{
+			m_parent->m_gim_shape->getBulletTetrahedron(index,m_parent->m_tetrashape);
+			return &m_parent->m_tetrashape;
+		}
+	};
+public:
+	ChildShapeRetriever m_child_retriever;
+	TriangleShapeRetriever m_tri_retriever;
+	TetraShapeRetriever  m_tetra_retriever;
+	ChildShapeRetriever * m_current_retriever;
+
+	GIM_ShapeRetriever(btGImpactShapeInterface * gim_shape)
+	{
+		m_gim_shape = gim_shape;
+		//select retriever
+		if(m_gim_shape->needsRetrieveTriangles())
+		{
+			m_current_retriever = &m_tri_retriever;
+		}
+		else if(m_gim_shape->needsRetrieveTetrahedrons())
+		{
+			m_current_retriever = &m_tetra_retriever;
+		}
+		else
+		{
+			m_current_retriever = &m_child_retriever;
+		}
+
+		m_current_retriever->m_parent = this;
+	}
+
+	btCollisionShape * getChildShape(int index)
+	{
+		return m_current_retriever->getChildShape(index);
+	}
+
+
+};
+
+
+
+//!@}
+
+
+#ifdef TRI_COLLISION_PROFILING
+
+//! Gets the average time in miliseconds of tree collisions
+float btGImpactCollisionAlgorithm::getAverageTreeCollisionTime()
+{
+	return btGImpactBoxSet::getAverageTreeCollisionTime();
+
+}
+
+//! Gets the average time in miliseconds of triangle collisions
+float btGImpactCollisionAlgorithm::getAverageTriangleCollisionTime()
+{
+	if(g_count_triangle_collision == 0) return 0;
+
+	float avgtime = g_accum_triangle_collision_time;
+	avgtime /= (float)g_count_triangle_collision;
+
+	g_accum_triangle_collision_time = 0;
+	g_count_triangle_collision = 0;
+
+	return avgtime;
+}
+
+#endif //TRI_COLLISION_PROFILING
+
+
+
+btGImpactCollisionAlgorithm::btGImpactCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+: btActivatingCollisionAlgorithm(ci,body0,body1)
+{
+	m_manifoldPtr = NULL;
+	m_convex_algorithm = NULL;
+}
+
+btGImpactCollisionAlgorithm::~btGImpactCollisionAlgorithm()
+{
+	clearCache();
+}
+
+
+
+
+
+void btGImpactCollisionAlgorithm::addContactPoint(btCollisionObject * body0,
+				btCollisionObject * body1,
+				const btVector3 & point,
+				const btVector3 & normal,
+				btScalar distance)
+{
+	m_resultOut->setShapeIdentifiersA(m_part0,m_triface0);
+	m_resultOut->setShapeIdentifiersB(m_part1,m_triface1);
+	checkManifold(body0,body1);
+	m_resultOut->addContactPoint(normal,point,distance);
+}
+
+
+void btGImpactCollisionAlgorithm::shape_vs_shape_collision(
+					  btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btCollisionShape * shape0,
+					  btCollisionShape * shape1)
+{
+
+	btCollisionShape* tmpShape0 = body0->getCollisionShape();
+	btCollisionShape* tmpShape1 = body1->getCollisionShape();
+	
+	body0->internalSetTemporaryCollisionShape(shape0);
+	body1->internalSetTemporaryCollisionShape(shape1);
+
+	{
+		btCollisionAlgorithm* algor = newAlgorithm(body0,body1);
+		// post :	checkManifold is called
+
+		m_resultOut->setShapeIdentifiersA(m_part0,m_triface0);
+		m_resultOut->setShapeIdentifiersB(m_part1,m_triface1);
+
+		algor->processCollision(body0,body1,*m_dispatchInfo,m_resultOut);
+
+		algor->~btCollisionAlgorithm();
+		m_dispatcher->freeCollisionAlgorithm(algor);
+	}
+
+	body0->internalSetTemporaryCollisionShape(tmpShape0);
+	body1->internalSetTemporaryCollisionShape(tmpShape1);
+}
+
+void btGImpactCollisionAlgorithm::convex_vs_convex_collision(
+					  btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btCollisionShape * shape0,
+					  btCollisionShape * shape1)
+{
+
+	btCollisionShape* tmpShape0 = body0->getCollisionShape();
+	btCollisionShape* tmpShape1 = body1->getCollisionShape();
+	
+	body0->internalSetTemporaryCollisionShape(shape0);
+	body1->internalSetTemporaryCollisionShape(shape1);
+
+
+	m_resultOut->setShapeIdentifiersA(m_part0,m_triface0);
+	m_resultOut->setShapeIdentifiersB(m_part1,m_triface1);
+
+	checkConvexAlgorithm(body0,body1);
+	m_convex_algorithm->processCollision(body0,body1,*m_dispatchInfo,m_resultOut);
+
+	body0->internalSetTemporaryCollisionShape(tmpShape0);
+	body1->internalSetTemporaryCollisionShape(tmpShape1);
+
+}
+
+
+
+
+void btGImpactCollisionAlgorithm::gimpact_vs_gimpact_find_pairs(
+					  const btTransform & trans0,
+					  const btTransform & trans1,
+					  btGImpactShapeInterface * shape0,
+					  btGImpactShapeInterface * shape1,btPairSet & pairset)
+{
+	if(shape0->hasBoxSet() && shape1->hasBoxSet())
+	{
+		btGImpactBoxSet::find_collision(shape0->getBoxSet(),trans0,shape1->getBoxSet(),trans1,pairset);
+	}
+	else
+	{
+		btAABB boxshape0;
+		btAABB boxshape1;
+		int i = shape0->getNumChildShapes();
+
+		while(i--)
+		{
+			shape0->getChildAabb(i,trans0,boxshape0.m_min,boxshape0.m_max);
+
+			int j = shape1->getNumChildShapes();
+			while(j--)
+			{
+				shape1->getChildAabb(i,trans1,boxshape1.m_min,boxshape1.m_max);
+
+				if(boxshape1.has_collision(boxshape0))
+				{
+					pairset.push_pair(i,j);
+				}
+			}
+		}
+	}
+
+
+}
+
+
+void btGImpactCollisionAlgorithm::gimpact_vs_shape_find_pairs(
+					  const btTransform & trans0,
+					  const btTransform & trans1,
+					  btGImpactShapeInterface * shape0,
+					  btCollisionShape * shape1,
+					  btAlignedObjectArray<int> & collided_primitives)
+{
+
+	btAABB boxshape;
+
+
+	if(shape0->hasBoxSet())
+	{
+		btTransform trans1to0 = trans0.inverse();
+		trans1to0 *= trans1;
+
+		shape1->getAabb(trans1to0,boxshape.m_min,boxshape.m_max);
+
+		shape0->getBoxSet()->boxQuery(boxshape, collided_primitives);
+	}
+	else
+	{
+		shape1->getAabb(trans1,boxshape.m_min,boxshape.m_max);
+
+		btAABB boxshape0;
+		int i = shape0->getNumChildShapes();
+
+		while(i--)
+		{
+			shape0->getChildAabb(i,trans0,boxshape0.m_min,boxshape0.m_max);
+
+			if(boxshape.has_collision(boxshape0))
+			{
+				collided_primitives.push_back(i);
+			}
+		}
+
+	}
+
+}
+
+
+void btGImpactCollisionAlgorithm::collide_gjk_triangles(btCollisionObject * body0,
+				  btCollisionObject * body1,
+				  btGImpactMeshShapePart * shape0,
+				  btGImpactMeshShapePart * shape1,
+				  const int * pairs, int pair_count)
+{
+	btTriangleShapeEx tri0;
+	btTriangleShapeEx tri1;
+
+	shape0->lockChildShapes();
+	shape1->lockChildShapes();
+
+	const int * pair_pointer = pairs;
+
+	while(pair_count--)
+	{
+
+		m_triface0 = *(pair_pointer);
+		m_triface1 = *(pair_pointer+1);
+		pair_pointer+=2;
+
+
+
+		shape0->getBulletTriangle(m_triface0,tri0);
+		shape1->getBulletTriangle(m_triface1,tri1);
+
+
+		//collide two convex shapes
+		if(tri0.overlap_test_conservative(tri1))
+		{
+			convex_vs_convex_collision(body0,body1,&tri0,&tri1);
+		}
+
+	}
+
+	shape0->unlockChildShapes();
+	shape1->unlockChildShapes();
+}
+
+void btGImpactCollisionAlgorithm::collide_sat_triangles(btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btGImpactMeshShapePart * shape0,
+					  btGImpactMeshShapePart * shape1,
+					  const int * pairs, int pair_count)
+{
+	btTransform orgtrans0 = body0->getWorldTransform();
+	btTransform orgtrans1 = body1->getWorldTransform();
+
+	btPrimitiveTriangle ptri0;
+	btPrimitiveTriangle ptri1;
+	GIM_TRIANGLE_CONTACT contact_data;
+
+	shape0->lockChildShapes();
+	shape1->lockChildShapes();
+
+	const int * pair_pointer = pairs;
+
+	while(pair_count--)
+	{
+
+		m_triface0 = *(pair_pointer);
+		m_triface1 = *(pair_pointer+1);
+		pair_pointer+=2;
+
+
+		shape0->getPrimitiveTriangle(m_triface0,ptri0);
+		shape1->getPrimitiveTriangle(m_triface1,ptri1);
+
+		#ifdef TRI_COLLISION_PROFILING
+		bt_begin_gim02_tri_time();
+		#endif
+
+		ptri0.applyTransform(orgtrans0);
+		ptri1.applyTransform(orgtrans1);
+
+
+		//build planes
+		ptri0.buildTriPlane();
+		ptri1.buildTriPlane();
+		// test conservative
+
+
+
+		if(ptri0.overlap_test_conservative(ptri1))
+		{
+			if(ptri0.find_triangle_collision_clip_method(ptri1,contact_data))
+			{
+
+				int j = contact_data.m_point_count;
+				while(j--)
+				{
+
+					addContactPoint(body0, body1,
+								contact_data.m_points[j],
+								contact_data.m_separating_normal,
+								-contact_data.m_penetration_depth);
+				}
+			}
+		}
+
+		#ifdef TRI_COLLISION_PROFILING
+		bt_end_gim02_tri_time();
+		#endif
+
+	}
+
+	shape0->unlockChildShapes();
+	shape1->unlockChildShapes();
+
+}
+
+
+void btGImpactCollisionAlgorithm::gimpact_vs_gimpact(
+						btCollisionObject * body0,
+					   	btCollisionObject * body1,
+					  	btGImpactShapeInterface * shape0,
+					  	btGImpactShapeInterface * shape1)
+{
+
+	if(shape0->getGImpactShapeType()==CONST_GIMPACT_TRIMESH_SHAPE)
+	{
+		btGImpactMeshShape * meshshape0 = static_cast<btGImpactMeshShape *>(shape0);
+		m_part0 = meshshape0->getMeshPartCount();
+
+		while(m_part0--)
+		{
+			gimpact_vs_gimpact(body0,body1,meshshape0->getMeshPart(m_part0),shape1);
+		}
+
+		return;
+	}
+
+	if(shape1->getGImpactShapeType()==CONST_GIMPACT_TRIMESH_SHAPE)
+	{
+		btGImpactMeshShape * meshshape1 = static_cast<btGImpactMeshShape *>(shape1);
+		m_part1 = meshshape1->getMeshPartCount();
+
+		while(m_part1--)
+		{
+
+			gimpact_vs_gimpact(body0,body1,shape0,meshshape1->getMeshPart(m_part1));
+
+		}
+
+		return;
+	}
+
+
+	btTransform orgtrans0 = body0->getWorldTransform();
+	btTransform orgtrans1 = body1->getWorldTransform();
+
+	btPairSet pairset;
+
+	gimpact_vs_gimpact_find_pairs(orgtrans0,orgtrans1,shape0,shape1,pairset);
+
+	if(pairset.size()== 0) return;
+
+	if(shape0->getGImpactShapeType() == CONST_GIMPACT_TRIMESH_SHAPE_PART &&
+		shape1->getGImpactShapeType() == CONST_GIMPACT_TRIMESH_SHAPE_PART)
+	{
+		btGImpactMeshShapePart * shapepart0 = static_cast<btGImpactMeshShapePart * >(shape0);
+		btGImpactMeshShapePart * shapepart1 = static_cast<btGImpactMeshShapePart * >(shape1);
+		//specialized function
+		#ifdef BULLET_TRIANGLE_COLLISION
+		collide_gjk_triangles(body0,body1,shapepart0,shapepart1,&pairset[0].m_index1,pairset.size());
+		#else
+		collide_sat_triangles(body0,body1,shapepart0,shapepart1,&pairset[0].m_index1,pairset.size());
+		#endif
+
+		return;
+	}
+
+	//general function
+
+	shape0->lockChildShapes();
+	shape1->lockChildShapes();
+
+	GIM_ShapeRetriever retriever0(shape0);
+	GIM_ShapeRetriever retriever1(shape1);
+
+	bool child_has_transform0 = shape0->childrenHasTransform();
+	bool child_has_transform1 = shape1->childrenHasTransform();
+
+	int i = pairset.size();
+	while(i--)
+	{
+		GIM_PAIR * pair = &pairset[i];
+		m_triface0 = pair->m_index1;
+		m_triface1 = pair->m_index2;
+		btCollisionShape * colshape0 = retriever0.getChildShape(m_triface0);
+		btCollisionShape * colshape1 = retriever1.getChildShape(m_triface1);
+
+		if(child_has_transform0)
+		{
+			body0->setWorldTransform(orgtrans0*shape0->getChildTransform(m_triface0));
+		}
+
+		if(child_has_transform1)
+		{
+			body1->setWorldTransform(orgtrans1*shape1->getChildTransform(m_triface1));
+		}
+
+		//collide two convex shapes
+		convex_vs_convex_collision(body0,body1,colshape0,colshape1);
+
+
+		if(child_has_transform0)
+		{
+			body0->setWorldTransform(orgtrans0);
+		}
+
+		if(child_has_transform1)
+		{
+			body1->setWorldTransform(orgtrans1);
+		}
+
+	}
+
+	shape0->unlockChildShapes();
+	shape1->unlockChildShapes();
+}
+
+void btGImpactCollisionAlgorithm::gimpact_vs_shape(btCollisionObject * body0,
+				  btCollisionObject * body1,
+				  btGImpactShapeInterface * shape0,
+				  btCollisionShape * shape1,bool swapped)
+{
+	if(shape0->getGImpactShapeType()==CONST_GIMPACT_TRIMESH_SHAPE)
+	{
+		btGImpactMeshShape * meshshape0 = static_cast<btGImpactMeshShape *>(shape0);
+		int& part = swapped ? m_part1 : m_part0;
+		part = meshshape0->getMeshPartCount();
+
+		while(part--)
+		{
+
+			gimpact_vs_shape(body0,
+				  body1,
+				  meshshape0->getMeshPart(part),
+				  shape1,swapped);
+
+		}
+
+		return;
+	}
+
+	#ifdef GIMPACT_VS_PLANE_COLLISION
+	if(shape0->getGImpactShapeType() == CONST_GIMPACT_TRIMESH_SHAPE_PART &&
+		shape1->getShapeType() == STATIC_PLANE_PROXYTYPE)
+	{
+		btGImpactMeshShapePart * shapepart = static_cast<btGImpactMeshShapePart *>(shape0);
+		btStaticPlaneShape * planeshape = static_cast<btStaticPlaneShape * >(shape1);
+		gimpacttrimeshpart_vs_plane_collision(body0,body1,shapepart,planeshape,swapped);
+		return;
+	}
+
+	#endif
+
+
+
+	if(shape1->isCompound())
+	{
+		btCompoundShape * compoundshape = static_cast<btCompoundShape *>(shape1);
+		gimpact_vs_compoundshape(body0,body1,shape0,compoundshape,swapped);
+		return;
+	}
+	else if(shape1->isConcave())
+	{
+		btConcaveShape * concaveshape = static_cast<btConcaveShape *>(shape1);
+		gimpact_vs_concave(body0,body1,shape0,concaveshape,swapped);
+		return;
+	}
+
+
+	btTransform orgtrans0 = body0->getWorldTransform();
+
+	btTransform orgtrans1 = body1->getWorldTransform();
+
+	btAlignedObjectArray<int> collided_results;
+
+	gimpact_vs_shape_find_pairs(orgtrans0,orgtrans1,shape0,shape1,collided_results);
+
+	if(collided_results.size() == 0) return;
+
+
+	shape0->lockChildShapes();
+
+	GIM_ShapeRetriever retriever0(shape0);
+
+
+	bool child_has_transform0 = shape0->childrenHasTransform();
+
+
+	int i = collided_results.size();
+
+	while(i--)
+	{
+		int child_index = collided_results[i];
+        if(swapped)
+    		m_triface1 = child_index;
+        else
+            m_triface0 = child_index;
+
+		btCollisionShape * colshape0 = retriever0.getChildShape(child_index);
+
+		if(child_has_transform0)
+		{
+			body0->setWorldTransform(orgtrans0*shape0->getChildTransform(child_index));
+		}
+
+		//collide two shapes
+		if(swapped)
+		{
+			shape_vs_shape_collision(body1,body0,shape1,colshape0);
+		}
+		else
+		{
+			shape_vs_shape_collision(body0,body1,colshape0,shape1);
+		}
+
+		//restore transforms
+		if(child_has_transform0)
+		{
+			body0->setWorldTransform(orgtrans0);
+		}
+
+	}
+
+	shape0->unlockChildShapes();
+
+}
+
+void btGImpactCollisionAlgorithm::gimpact_vs_compoundshape(btCollisionObject * body0,
+				  btCollisionObject * body1,
+				  btGImpactShapeInterface * shape0,
+				  btCompoundShape * shape1,bool swapped)
+{
+	btTransform orgtrans1 = body1->getWorldTransform();
+
+	int i = shape1->getNumChildShapes();
+	while(i--)
+	{
+
+		btCollisionShape * colshape1 = shape1->getChildShape(i);
+		btTransform childtrans1 = orgtrans1*shape1->getChildTransform(i);
+
+		body1->setWorldTransform(childtrans1);
+
+		//collide child shape
+		gimpact_vs_shape(body0, body1,
+					  shape0,colshape1,swapped);
+
+
+		//restore transforms
+		body1->setWorldTransform(orgtrans1);
+	}
+}
+
+void btGImpactCollisionAlgorithm::gimpacttrimeshpart_vs_plane_collision(
+					  btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btGImpactMeshShapePart * shape0,
+					  btStaticPlaneShape * shape1,bool swapped)
+{
+
+
+	btTransform orgtrans0 = body0->getWorldTransform();
+	btTransform orgtrans1 = body1->getWorldTransform();
+
+	btPlaneShape * planeshape = static_cast<btPlaneShape *>(shape1);
+	btVector4 plane;
+	planeshape->get_plane_equation_transformed(orgtrans1,plane);
+
+	//test box against plane
+
+	btAABB tribox;
+	shape0->getAabb(orgtrans0,tribox.m_min,tribox.m_max);
+	tribox.increment_margin(planeshape->getMargin());
+
+	if( tribox.plane_classify(plane)!= BT_CONST_COLLIDE_PLANE) return;
+
+	shape0->lockChildShapes();
+
+	btScalar margin = shape0->getMargin() + planeshape->getMargin();
+
+	btVector3 vertex;
+	int vi = shape0->getVertexCount();
+	while(vi--)
+	{
+		shape0->getVertex(vi,vertex);
+		vertex = orgtrans0(vertex);
+
+		btScalar distance = vertex.dot(plane) - plane[3] - margin;
+
+		if(distance<0.0)//add contact
+		{
+			if(swapped)
+			{
+				addContactPoint(body1, body0,
+					vertex,
+					-plane,
+					distance);
+			}
+			else
+			{
+				addContactPoint(body0, body1,
+					vertex,
+					plane,
+					distance);
+			}
+		}
+	}
+
+	shape0->unlockChildShapes();
+}
+
+
+
+
+class btGImpactTriangleCallback: public btTriangleCallback
+{
+public:
+	btGImpactCollisionAlgorithm * algorithm;
+	btCollisionObject * body0;
+	btCollisionObject * body1;
+	btGImpactShapeInterface * gimpactshape0;
+	bool swapped;
+	btScalar margin;
+
+	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex)
+	{
+		btTriangleShapeEx tri1(triangle[0],triangle[1],triangle[2]);
+		tri1.setMargin(margin);
+        if(swapped)
+        {
+            algorithm->setPart0(partId);
+            algorithm->setFace0(triangleIndex);
+        }
+        else
+        {
+            algorithm->setPart1(partId);
+            algorithm->setFace1(triangleIndex);
+        }
+		algorithm->gimpact_vs_shape(
+							body0,body1,gimpactshape0,&tri1,swapped);
+	}
+};
+
+
+
+
+void btGImpactCollisionAlgorithm::gimpact_vs_concave(
+				  btCollisionObject * body0,
+				  btCollisionObject * body1,
+				  btGImpactShapeInterface * shape0,
+				  btConcaveShape * shape1,bool swapped)
+{
+	//create the callback
+	btGImpactTriangleCallback tricallback;
+	tricallback.algorithm = this;
+	tricallback.body0 = body0;
+	tricallback.body1 = body1;
+	tricallback.gimpactshape0 = shape0;
+	tricallback.swapped = swapped;
+	tricallback.margin = shape1->getMargin();
+
+	//getting the trimesh AABB
+	btTransform gimpactInConcaveSpace;
+
+	gimpactInConcaveSpace = body1->getWorldTransform().inverse() * body0->getWorldTransform();
+
+	btVector3 minAABB,maxAABB;
+	shape0->getAabb(gimpactInConcaveSpace,minAABB,maxAABB);
+
+	shape1->processAllTriangles(&tricallback,minAABB,maxAABB);
+
+}
+
+
+
+void btGImpactCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+    clearCache();
+
+    m_resultOut = resultOut;
+	m_dispatchInfo = &dispatchInfo;
+    btGImpactShapeInterface * gimpactshape0;
+    btGImpactShapeInterface * gimpactshape1;
+
+	if (body0->getCollisionShape()->getShapeType()==GIMPACT_SHAPE_PROXYTYPE)
+	{
+		gimpactshape0 = static_cast<btGImpactShapeInterface *>(body0->getCollisionShape());
+
+		if( body1->getCollisionShape()->getShapeType()==GIMPACT_SHAPE_PROXYTYPE )
+		{
+			gimpactshape1 = static_cast<btGImpactShapeInterface *>(body1->getCollisionShape());
+
+			gimpact_vs_gimpact(body0,body1,gimpactshape0,gimpactshape1);
+		}
+		else
+		{
+			gimpact_vs_shape(body0,body1,gimpactshape0,body1->getCollisionShape(),false);
+		}
+
+	}
+	else if (body1->getCollisionShape()->getShapeType()==GIMPACT_SHAPE_PROXYTYPE )
+	{
+		gimpactshape1 = static_cast<btGImpactShapeInterface *>(body1->getCollisionShape());
+
+		gimpact_vs_shape(body1,body0,gimpactshape1,body0->getCollisionShape(),true);
+	}
+}
+
+
+btScalar btGImpactCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	return 1.f;
+
+}
+
+///////////////////////////////////// REGISTERING ALGORITHM //////////////////////////////////////////////
+
+
+
+//! Use this function for register the algorithm externally
+void btGImpactCollisionAlgorithm::registerAlgorithm(btCollisionDispatcher * dispatcher)
+{
+
+	static btGImpactCollisionAlgorithm::CreateFunc s_gimpact_cf;
+
+	int i;
+
+	for ( i = 0;i < MAX_BROADPHASE_COLLISION_TYPES ;i++ )
+	{
+		dispatcher->registerCollisionCreateFunc(GIMPACT_SHAPE_PROXYTYPE,i ,&s_gimpact_cf);
+	}
+
+	for ( i = 0;i < MAX_BROADPHASE_COLLISION_TYPES ;i++ )
+	{
+		dispatcher->registerCollisionCreateFunc(i,GIMPACT_SHAPE_PROXYTYPE ,&s_gimpact_cf);
+	}
+
+}
diff --git a/src/bullet/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.h b/src/bullet/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.h
new file mode 100644
index 00000000..6b6e07c9
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.h
@@ -0,0 +1,306 @@
+/*! \file btGImpactShape.h
+\author Francisco Leon Najera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_GIMPACT_BVH_CONCAVE_COLLISION_ALGORITHM_H
+#define BT_GIMPACT_BVH_CONCAVE_COLLISION_ALGORITHM_H
+
+#include "BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseInterface.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+class btDispatcher;
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+#include "btGImpactShape.h"
+#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
+#include "BulletCollision/CollisionShapes/btCompoundShape.h"
+#include "BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h"
+#include "LinearMath/btIDebugDraw.h"
+
+
+
+//! Collision Algorithm for GImpact Shapes
+/*!
+For register this algorithm in Bullet, proceed as following:
+ \code
+btCollisionDispatcher * dispatcher = static_cast<btCollisionDispatcher *>(m_dynamicsWorld ->getDispatcher());
+btGImpactCollisionAlgorithm::registerAlgorithm(dispatcher);
+ \endcode
+*/
+class btGImpactCollisionAlgorithm : public btActivatingCollisionAlgorithm
+{
+protected:
+	btCollisionAlgorithm * m_convex_algorithm;
+    btPersistentManifold * m_manifoldPtr;
+	btManifoldResult* m_resultOut;
+	const btDispatcherInfo * m_dispatchInfo;
+	int m_triface0;
+	int m_part0;
+	int m_triface1;
+	int m_part1;
+
+
+	//! Creates a new contact point
+	SIMD_FORCE_INLINE btPersistentManifold* newContactManifold(btCollisionObject* body0,btCollisionObject* body1)
+	{
+		m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
+		return m_manifoldPtr;
+	}
+
+	SIMD_FORCE_INLINE void destroyConvexAlgorithm()
+	{
+		if(m_convex_algorithm)
+		{
+			m_convex_algorithm->~btCollisionAlgorithm();
+			m_dispatcher->freeCollisionAlgorithm( m_convex_algorithm);
+			m_convex_algorithm = NULL;
+		}
+	}
+
+	SIMD_FORCE_INLINE void destroyContactManifolds()
+	{
+		if(m_manifoldPtr == NULL) return;
+		m_dispatcher->releaseManifold(m_manifoldPtr);
+		m_manifoldPtr = NULL;
+	}
+
+	SIMD_FORCE_INLINE void clearCache()
+	{
+		destroyContactManifolds();
+		destroyConvexAlgorithm();
+
+		m_triface0 = -1;
+		m_part0 = -1;
+		m_triface1 = -1;
+		m_part1 = -1;
+	}
+
+	SIMD_FORCE_INLINE btPersistentManifold* getLastManifold()
+	{
+		return m_manifoldPtr;
+	}
+
+
+	// Call before process collision
+	SIMD_FORCE_INLINE void checkManifold(btCollisionObject* body0,btCollisionObject* body1)
+	{
+		if(getLastManifold() == 0)
+		{
+			newContactManifold(body0,body1);
+		}
+
+		m_resultOut->setPersistentManifold(getLastManifold());
+	}
+
+	// Call before process collision
+	SIMD_FORCE_INLINE btCollisionAlgorithm * newAlgorithm(btCollisionObject* body0,btCollisionObject* body1)
+	{
+		checkManifold(body0,body1);
+
+		btCollisionAlgorithm * convex_algorithm = m_dispatcher->findAlgorithm(
+				body0,body1,getLastManifold());
+		return convex_algorithm ;
+	}
+
+	// Call before process collision
+	SIMD_FORCE_INLINE void checkConvexAlgorithm(btCollisionObject* body0,btCollisionObject* body1)
+	{
+		if(m_convex_algorithm) return;
+		m_convex_algorithm = newAlgorithm(body0,body1);
+	}
+
+
+
+
+	void addContactPoint(btCollisionObject * body0,
+					btCollisionObject * body1,
+					const btVector3 & point,
+					const btVector3 & normal,
+					btScalar distance);
+
+//! Collision routines
+//!@{
+
+	void collide_gjk_triangles(btCollisionObject * body0,
+				  btCollisionObject * body1,
+				  btGImpactMeshShapePart * shape0,
+				  btGImpactMeshShapePart * shape1,
+				  const int * pairs, int pair_count);
+
+	void collide_sat_triangles(btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btGImpactMeshShapePart * shape0,
+					  btGImpactMeshShapePart * shape1,
+					  const int * pairs, int pair_count);
+
+
+
+
+	void shape_vs_shape_collision(
+					  btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btCollisionShape * shape0,
+					  btCollisionShape * shape1);
+
+	void convex_vs_convex_collision(btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btCollisionShape * shape0,
+					  btCollisionShape * shape1);
+
+
+
+	void gimpact_vs_gimpact_find_pairs(
+					  const btTransform & trans0,
+					  const btTransform & trans1,
+					  btGImpactShapeInterface * shape0,
+					  btGImpactShapeInterface * shape1,btPairSet & pairset);
+
+	void gimpact_vs_shape_find_pairs(
+					  const btTransform & trans0,
+					  const btTransform & trans1,
+					  btGImpactShapeInterface * shape0,
+					  btCollisionShape * shape1,
+					  btAlignedObjectArray<int> & collided_primitives);
+
+
+	void gimpacttrimeshpart_vs_plane_collision(
+					  btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btGImpactMeshShapePart * shape0,
+					  btStaticPlaneShape * shape1,bool swapped);
+
+
+public:
+
+	btGImpactCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+
+	virtual ~btGImpactCollisionAlgorithm();
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	btScalar	calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		if (m_manifoldPtr)
+			manifoldArray.push_back(m_manifoldPtr);
+	}
+
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btGImpactCollisionAlgorithm));
+			return new(mem) btGImpactCollisionAlgorithm(ci,body0,body1);
+		}
+	};
+
+	//! Use this function for register the algorithm externally
+	static void registerAlgorithm(btCollisionDispatcher * dispatcher);
+#ifdef TRI_COLLISION_PROFILING
+	//! Gets the average time in miliseconds of tree collisions
+	static float getAverageTreeCollisionTime();
+
+	//! Gets the average time in miliseconds of triangle collisions
+	static float getAverageTriangleCollisionTime();
+#endif //TRI_COLLISION_PROFILING
+
+	//! Collides two gimpact shapes
+	/*!
+	\pre shape0 and shape1 couldn't be btGImpactMeshShape objects
+	*/
+
+
+	void gimpact_vs_gimpact(btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btGImpactShapeInterface * shape0,
+					  btGImpactShapeInterface * shape1);
+
+	void gimpact_vs_shape(btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btGImpactShapeInterface * shape0,
+					  btCollisionShape * shape1,bool swapped);
+
+	void gimpact_vs_compoundshape(btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btGImpactShapeInterface * shape0,
+					  btCompoundShape * shape1,bool swapped);
+
+	void gimpact_vs_concave(
+					  btCollisionObject * body0,
+					  btCollisionObject * body1,
+					  btGImpactShapeInterface * shape0,
+					  btConcaveShape * shape1,bool swapped);
+
+
+
+
+		/// Accessor/Mutator pairs for Part and triangleID
+    void 	setFace0(int value) 
+    { 
+    	m_triface0 = value; 
+    }
+    int getFace0() 
+    { 
+    	return m_triface0; 
+    }
+    void setFace1(int value) 
+    { 
+    	m_triface1 = value; 
+    }
+    int getFace1() 
+    { 
+    	return m_triface1; 
+    }
+    void setPart0(int value) 
+    { 
+    	m_part0 = value; 
+    }
+    int getPart0() 
+    { 
+    	return m_part0; 
+    }
+    void setPart1(int value) 
+    { 
+    	m_part1 = value; 
+		}
+    int getPart1() 
+    { 
+    	return m_part1; 
+    }
+
+};
+
+
+//algorithm details
+//#define BULLET_TRIANGLE_COLLISION 1
+#define GIMPACT_VS_PLANE_COLLISION 1
+
+
+
+#endif //BT_GIMPACT_BVH_CONCAVE_COLLISION_ALGORITHM_H
diff --git a/src/bullet/BulletCollision/Gimpact/btGImpactMassUtil.h b/src/bullet/BulletCollision/Gimpact/btGImpactMassUtil.h
new file mode 100644
index 00000000..2543aefc
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGImpactMassUtil.h
@@ -0,0 +1,60 @@
+/*! \file btGImpactMassUtil.h
+\author Francisco Leon Najera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef GIMPACT_MASS_UTIL_H
+#define GIMPACT_MASS_UTIL_H
+
+#include "LinearMath/btTransform.h"
+
+
+
+SIMD_FORCE_INLINE btVector3 gim_inertia_add_transformed(
+	const btVector3 & source_inertia, const btVector3 & added_inertia, const btTransform & transform)
+{
+	btMatrix3x3  rotatedTensor = transform.getBasis().scaled(added_inertia) * transform.getBasis().transpose();
+
+	btScalar x2 = transform.getOrigin()[0];
+	x2*= x2;
+	btScalar y2 = transform.getOrigin()[1];
+	y2*= y2;
+	btScalar z2 = transform.getOrigin()[2];
+	z2*= z2;
+
+	btScalar ix = rotatedTensor[0][0]*(y2+z2);
+	btScalar iy = rotatedTensor[1][1]*(x2+z2);
+	btScalar iz = rotatedTensor[2][2]*(x2+y2);
+
+	return btVector3(source_inertia[0]+ix,source_inertia[1]+iy,source_inertia[2] + iz);
+}
+
+SIMD_FORCE_INLINE btVector3 gim_get_point_inertia(const btVector3 & point, btScalar mass)
+{
+	btScalar x2 = point[0]*point[0];
+	btScalar y2 = point[1]*point[1];
+	btScalar z2 = point[2]*point[2];
+	return btVector3(mass*(y2+z2),mass*(x2+z2),mass*(x2+y2));
+}
+
+
+#endif //GIMPACT_MESH_SHAPE_H
diff --git a/src/bullet/BulletCollision/Gimpact/btGImpactQuantizedBvh.cpp b/src/bullet/BulletCollision/Gimpact/btGImpactQuantizedBvh.cpp
new file mode 100644
index 00000000..cd4dfdb6
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGImpactQuantizedBvh.cpp
@@ -0,0 +1,528 @@
+/*! \file gim_box_set.h
+\author Francisco Leon Najera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btGImpactQuantizedBvh.h"
+#include "LinearMath/btQuickprof.h"
+
+#ifdef TRI_COLLISION_PROFILING
+btClock g_q_tree_clock;
+
+
+float g_q_accum_tree_collision_time = 0;
+int g_q_count_traversing = 0;
+
+
+void bt_begin_gim02_q_tree_time()
+{
+	g_q_tree_clock.reset();
+}
+
+void bt_end_gim02_q_tree_time()
+{
+	g_q_accum_tree_collision_time += g_q_tree_clock.getTimeMicroseconds();
+	g_q_count_traversing++;
+}
+
+
+//! Gets the average time in miliseconds of tree collisions
+float btGImpactQuantizedBvh::getAverageTreeCollisionTime()
+{
+	if(g_q_count_traversing == 0) return 0;
+
+	float avgtime = g_q_accum_tree_collision_time;
+	avgtime /= (float)g_q_count_traversing;
+
+	g_q_accum_tree_collision_time = 0;
+	g_q_count_traversing = 0;
+	return avgtime;
+
+//	float avgtime = g_q_count_traversing;
+//	g_q_count_traversing = 0;
+//	return avgtime;
+
+}
+
+#endif //TRI_COLLISION_PROFILING
+
+/////////////////////// btQuantizedBvhTree /////////////////////////////////
+
+void btQuantizedBvhTree::calc_quantization(
+	GIM_BVH_DATA_ARRAY & primitive_boxes, btScalar boundMargin)
+{
+	//calc globa box
+	btAABB global_bound;
+	global_bound.invalidate();
+
+	for (int i=0;i<primitive_boxes.size() ;i++ )
+	{
+		global_bound.merge(primitive_boxes[i].m_bound);
+	}
+
+	bt_calc_quantization_parameters(
+		m_global_bound.m_min,m_global_bound.m_max,m_bvhQuantization,global_bound.m_min,global_bound.m_max,boundMargin);
+
+}
+
+
+
+int btQuantizedBvhTree::_calc_splitting_axis(
+	GIM_BVH_DATA_ARRAY & primitive_boxes, int startIndex,  int endIndex)
+{
+
+	int i;
+
+	btVector3 means(btScalar(0.),btScalar(0.),btScalar(0.));
+	btVector3 variance(btScalar(0.),btScalar(0.),btScalar(0.));
+	int numIndices = endIndex-startIndex;
+
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(primitive_boxes[i].m_bound.m_max +
+					 primitive_boxes[i].m_bound.m_min);
+		means+=center;
+	}
+	means *= (btScalar(1.)/(btScalar)numIndices);
+
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(primitive_boxes[i].m_bound.m_max +
+					 primitive_boxes[i].m_bound.m_min);
+		btVector3 diff2 = center-means;
+		diff2 = diff2 * diff2;
+		variance += diff2;
+	}
+	variance *= (btScalar(1.)/	((btScalar)numIndices-1)	);
+
+	return variance.maxAxis();
+}
+
+
+int btQuantizedBvhTree::_sort_and_calc_splitting_index(
+	GIM_BVH_DATA_ARRAY & primitive_boxes, int startIndex,
+	int endIndex, int splitAxis)
+{
+	int i;
+	int splitIndex =startIndex;
+	int numIndices = endIndex - startIndex;
+
+	// average of centers
+	btScalar splitValue = 0.0f;
+
+	btVector3 means(btScalar(0.),btScalar(0.),btScalar(0.));
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(primitive_boxes[i].m_bound.m_max +
+					 primitive_boxes[i].m_bound.m_min);
+		means+=center;
+	}
+	means *= (btScalar(1.)/(btScalar)numIndices);
+
+	splitValue = means[splitAxis];
+
+
+	//sort leafNodes so all values larger then splitValue comes first, and smaller values start from 'splitIndex'.
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(primitive_boxes[i].m_bound.m_max +
+					 primitive_boxes[i].m_bound.m_min);
+		if (center[splitAxis] > splitValue)
+		{
+			//swap
+			primitive_boxes.swap(i,splitIndex);
+			//swapLeafNodes(i,splitIndex);
+			splitIndex++;
+		}
+	}
+
+	//if the splitIndex causes unbalanced trees, fix this by using the center in between startIndex and endIndex
+	//otherwise the tree-building might fail due to stack-overflows in certain cases.
+	//unbalanced1 is unsafe: it can cause stack overflows
+	//bool unbalanced1 = ((splitIndex==startIndex) || (splitIndex == (endIndex-1)));
+
+	//unbalanced2 should work too: always use center (perfect balanced trees)
+	//bool unbalanced2 = true;
+
+	//this should be safe too:
+	int rangeBalancedIndices = numIndices/3;
+	bool unbalanced = ((splitIndex<=(startIndex+rangeBalancedIndices)) || (splitIndex >=(endIndex-1-rangeBalancedIndices)));
+
+	if (unbalanced)
+	{
+		splitIndex = startIndex+ (numIndices>>1);
+	}
+
+	btAssert(!((splitIndex==startIndex) || (splitIndex == (endIndex))));
+
+	return splitIndex;
+
+}
+
+
+void btQuantizedBvhTree::_build_sub_tree(GIM_BVH_DATA_ARRAY & primitive_boxes, int startIndex,  int endIndex)
+{
+	int curIndex = m_num_nodes;
+	m_num_nodes++;
+
+	btAssert((endIndex-startIndex)>0);
+
+	if ((endIndex-startIndex)==1)
+	{
+	    //We have a leaf node
+	    setNodeBound(curIndex,primitive_boxes[startIndex].m_bound);
+		m_node_array[curIndex].setDataIndex(primitive_boxes[startIndex].m_data);
+
+		return;
+	}
+	//calculate Best Splitting Axis and where to split it. Sort the incoming 'leafNodes' array within range 'startIndex/endIndex'.
+
+	//split axis
+	int splitIndex = _calc_splitting_axis(primitive_boxes,startIndex,endIndex);
+
+	splitIndex = _sort_and_calc_splitting_index(
+			primitive_boxes,startIndex,endIndex,
+			splitIndex//split axis
+			);
+
+
+	//calc this node bounding box
+
+	btAABB node_bound;
+	node_bound.invalidate();
+
+	for (int i=startIndex;i<endIndex;i++)
+	{
+		node_bound.merge(primitive_boxes[i].m_bound);
+	}
+
+	setNodeBound(curIndex,node_bound);
+
+
+	//build left branch
+	_build_sub_tree(primitive_boxes, startIndex, splitIndex );
+
+
+	//build right branch
+	 _build_sub_tree(primitive_boxes, splitIndex ,endIndex);
+
+	m_node_array[curIndex].setEscapeIndex(m_num_nodes - curIndex);
+
+
+}
+
+//! stackless build tree
+void btQuantizedBvhTree::build_tree(
+	GIM_BVH_DATA_ARRAY & primitive_boxes)
+{
+	calc_quantization(primitive_boxes);
+	// initialize node count to 0
+	m_num_nodes = 0;
+	// allocate nodes
+	m_node_array.resize(primitive_boxes.size()*2);
+
+	_build_sub_tree(primitive_boxes, 0, primitive_boxes.size());
+}
+
+////////////////////////////////////class btGImpactQuantizedBvh
+
+void btGImpactQuantizedBvh::refit()
+{
+	int nodecount = getNodeCount();
+	while(nodecount--)
+	{
+		if(isLeafNode(nodecount))
+		{
+			btAABB leafbox;
+			m_primitive_manager->get_primitive_box(getNodeData(nodecount),leafbox);
+			setNodeBound(nodecount,leafbox);
+		}
+		else
+		{
+			//const GIM_BVH_TREE_NODE * nodepointer = get_node_pointer(nodecount);
+			//get left bound
+			btAABB bound;
+			bound.invalidate();
+
+			btAABB temp_box;
+
+			int child_node = getLeftNode(nodecount);
+			if(child_node)
+			{
+				getNodeBound(child_node,temp_box);
+				bound.merge(temp_box);
+			}
+
+			child_node = getRightNode(nodecount);
+			if(child_node)
+			{
+				getNodeBound(child_node,temp_box);
+				bound.merge(temp_box);
+			}
+
+			setNodeBound(nodecount,bound);
+		}
+	}
+}
+
+//! this rebuild the entire set
+void btGImpactQuantizedBvh::buildSet()
+{
+	//obtain primitive boxes
+	GIM_BVH_DATA_ARRAY primitive_boxes;
+	primitive_boxes.resize(m_primitive_manager->get_primitive_count());
+
+	for (int i = 0;i<primitive_boxes.size() ;i++ )
+	{
+		 m_primitive_manager->get_primitive_box(i,primitive_boxes[i].m_bound);
+		 primitive_boxes[i].m_data = i;
+	}
+
+	m_box_tree.build_tree(primitive_boxes);
+}
+
+//! returns the indices of the primitives in the m_primitive_manager
+bool btGImpactQuantizedBvh::boxQuery(const btAABB & box, btAlignedObjectArray<int> & collided_results) const
+{
+	int curIndex = 0;
+	int numNodes = getNodeCount();
+
+	//quantize box
+
+	unsigned short quantizedMin[3];
+	unsigned short quantizedMax[3];
+
+	m_box_tree.quantizePoint(quantizedMin,box.m_min);
+	m_box_tree.quantizePoint(quantizedMax,box.m_max);
+
+
+	while (curIndex < numNodes)
+	{
+
+		//catch bugs in tree data
+
+		bool aabbOverlap = m_box_tree.testQuantizedBoxOverlapp(curIndex, quantizedMin,quantizedMax);
+		bool isleafnode = isLeafNode(curIndex);
+
+		if (isleafnode && aabbOverlap)
+		{
+			collided_results.push_back(getNodeData(curIndex));
+		}
+
+		if (aabbOverlap || isleafnode)
+		{
+			//next subnode
+			curIndex++;
+		}
+		else
+		{
+			//skip node
+			curIndex+= getEscapeNodeIndex(curIndex);
+		}
+	}
+	if(collided_results.size()>0) return true;
+	return false;
+}
+
+
+
+//! returns the indices of the primitives in the m_primitive_manager
+bool btGImpactQuantizedBvh::rayQuery(
+	const btVector3 & ray_dir,const btVector3 & ray_origin ,
+	btAlignedObjectArray<int> & collided_results) const
+{
+	int curIndex = 0;
+	int numNodes = getNodeCount();
+
+	while (curIndex < numNodes)
+	{
+		btAABB bound;
+		getNodeBound(curIndex,bound);
+
+		//catch bugs in tree data
+
+		bool aabbOverlap = bound.collide_ray(ray_origin,ray_dir);
+		bool isleafnode = isLeafNode(curIndex);
+
+		if (isleafnode && aabbOverlap)
+		{
+			collided_results.push_back(getNodeData( curIndex));
+		}
+
+		if (aabbOverlap || isleafnode)
+		{
+			//next subnode
+			curIndex++;
+		}
+		else
+		{
+			//skip node
+			curIndex+= getEscapeNodeIndex(curIndex);
+		}
+	}
+	if(collided_results.size()>0) return true;
+	return false;
+}
+
+
+SIMD_FORCE_INLINE bool _quantized_node_collision(
+	btGImpactQuantizedBvh * boxset0, btGImpactQuantizedBvh * boxset1,
+	const BT_BOX_BOX_TRANSFORM_CACHE & trans_cache_1to0,
+	int node0 ,int node1, bool complete_primitive_tests)
+{
+	btAABB box0;
+	boxset0->getNodeBound(node0,box0);
+	btAABB box1;
+	boxset1->getNodeBound(node1,box1);
+
+	return box0.overlapping_trans_cache(box1,trans_cache_1to0,complete_primitive_tests );
+//	box1.appy_transform_trans_cache(trans_cache_1to0);
+//	return box0.has_collision(box1);
+
+}
+
+
+//stackless recursive collision routine
+static void _find_quantized_collision_pairs_recursive(
+	btGImpactQuantizedBvh * boxset0, btGImpactQuantizedBvh * boxset1,
+	btPairSet * collision_pairs,
+	const BT_BOX_BOX_TRANSFORM_CACHE & trans_cache_1to0,
+	int node0, int node1, bool complete_primitive_tests)
+{
+
+
+
+	if( _quantized_node_collision(
+		boxset0,boxset1,trans_cache_1to0,
+		node0,node1,complete_primitive_tests) ==false) return;//avoid colliding internal nodes
+
+	if(boxset0->isLeafNode(node0))
+	{
+		if(boxset1->isLeafNode(node1))
+		{
+			// collision result
+			collision_pairs->push_pair(
+				boxset0->getNodeData(node0),boxset1->getNodeData(node1));
+			return;
+		}
+		else
+		{
+
+			//collide left recursive
+
+			_find_quantized_collision_pairs_recursive(
+								boxset0,boxset1,
+								collision_pairs,trans_cache_1to0,
+								node0,boxset1->getLeftNode(node1),false);
+
+			//collide right recursive
+			_find_quantized_collision_pairs_recursive(
+								boxset0,boxset1,
+								collision_pairs,trans_cache_1to0,
+								node0,boxset1->getRightNode(node1),false);
+
+
+		}
+	}
+	else
+	{
+		if(boxset1->isLeafNode(node1))
+		{
+
+			//collide left recursive
+			_find_quantized_collision_pairs_recursive(
+								boxset0,boxset1,
+								collision_pairs,trans_cache_1to0,
+								boxset0->getLeftNode(node0),node1,false);
+
+
+			//collide right recursive
+
+			_find_quantized_collision_pairs_recursive(
+								boxset0,boxset1,
+								collision_pairs,trans_cache_1to0,
+								boxset0->getRightNode(node0),node1,false);
+
+
+		}
+		else
+		{
+			//collide left0 left1
+
+
+
+			_find_quantized_collision_pairs_recursive(
+				boxset0,boxset1,
+				collision_pairs,trans_cache_1to0,
+				boxset0->getLeftNode(node0),boxset1->getLeftNode(node1),false);
+
+			//collide left0 right1
+
+			_find_quantized_collision_pairs_recursive(
+				boxset0,boxset1,
+				collision_pairs,trans_cache_1to0,
+				boxset0->getLeftNode(node0),boxset1->getRightNode(node1),false);
+
+
+			//collide right0 left1
+
+			_find_quantized_collision_pairs_recursive(
+				boxset0,boxset1,
+				collision_pairs,trans_cache_1to0,
+				boxset0->getRightNode(node0),boxset1->getLeftNode(node1),false);
+
+			//collide right0 right1
+
+			_find_quantized_collision_pairs_recursive(
+				boxset0,boxset1,
+				collision_pairs,trans_cache_1to0,
+				boxset0->getRightNode(node0),boxset1->getRightNode(node1),false);
+
+		}// else if node1 is not a leaf
+	}// else if node0 is not a leaf
+}
+
+
+void btGImpactQuantizedBvh::find_collision(btGImpactQuantizedBvh * boxset0, const btTransform & trans0,
+		btGImpactQuantizedBvh * boxset1, const btTransform & trans1,
+		btPairSet & collision_pairs)
+{
+
+	if(boxset0->getNodeCount()==0 || boxset1->getNodeCount()==0 ) return;
+
+	BT_BOX_BOX_TRANSFORM_CACHE trans_cache_1to0;
+
+	trans_cache_1to0.calc_from_homogenic(trans0,trans1);
+
+#ifdef TRI_COLLISION_PROFILING
+	bt_begin_gim02_q_tree_time();
+#endif //TRI_COLLISION_PROFILING
+
+	_find_quantized_collision_pairs_recursive(
+		boxset0,boxset1,
+		&collision_pairs,trans_cache_1to0,0,0,true);
+#ifdef TRI_COLLISION_PROFILING
+	bt_end_gim02_q_tree_time();
+#endif //TRI_COLLISION_PROFILING
+
+}
+
+
diff --git a/src/bullet/BulletCollision/Gimpact/btGImpactQuantizedBvh.h b/src/bullet/BulletCollision/Gimpact/btGImpactQuantizedBvh.h
new file mode 100644
index 00000000..9c990774
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGImpactQuantizedBvh.h
@@ -0,0 +1,372 @@
+#ifndef GIM_QUANTIZED_SET_H_INCLUDED
+#define GIM_QUANTIZED_SET_H_INCLUDED
+
+/*! \file btGImpactQuantizedBvh.h
+\author Francisco Leon Najera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btGImpactBvh.h"
+#include "btQuantization.h"
+
+
+
+
+
+///btQuantizedBvhNode is a compressed aabb node, 16 bytes.
+///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).
+ATTRIBUTE_ALIGNED16	(struct) BT_QUANTIZED_BVH_NODE
+{
+	//12 bytes
+	unsigned short int	m_quantizedAabbMin[3];
+	unsigned short int	m_quantizedAabbMax[3];
+	//4 bytes
+	int	m_escapeIndexOrDataIndex;
+
+	BT_QUANTIZED_BVH_NODE()
+	{
+		m_escapeIndexOrDataIndex = 0;
+	}
+
+	SIMD_FORCE_INLINE bool isLeafNode() const
+	{
+		//skipindex is negative (internal node), triangleindex >=0 (leafnode)
+		return (m_escapeIndexOrDataIndex>=0);
+	}
+
+	SIMD_FORCE_INLINE int getEscapeIndex() const
+	{
+		//btAssert(m_escapeIndexOrDataIndex < 0);
+		return -m_escapeIndexOrDataIndex;
+	}
+
+	SIMD_FORCE_INLINE void setEscapeIndex(int index)
+	{
+		m_escapeIndexOrDataIndex = -index;
+	}
+
+	SIMD_FORCE_INLINE int getDataIndex() const
+	{
+		//btAssert(m_escapeIndexOrDataIndex >= 0);
+
+		return m_escapeIndexOrDataIndex;
+	}
+
+	SIMD_FORCE_INLINE void setDataIndex(int index)
+	{
+		m_escapeIndexOrDataIndex = index;
+	}
+
+	SIMD_FORCE_INLINE bool testQuantizedBoxOverlapp(
+		unsigned short * quantizedMin,unsigned short * quantizedMax) const
+	{
+		if(m_quantizedAabbMin[0] > quantizedMax[0] ||
+		   m_quantizedAabbMax[0] < quantizedMin[0] ||
+		   m_quantizedAabbMin[1] > quantizedMax[1] ||
+		   m_quantizedAabbMax[1] < quantizedMin[1] ||
+		   m_quantizedAabbMin[2] > quantizedMax[2] ||
+		   m_quantizedAabbMax[2] < quantizedMin[2])
+		{
+			return false;
+		}
+		return true;
+	}
+
+};
+
+
+
+class GIM_QUANTIZED_BVH_NODE_ARRAY:public btAlignedObjectArray<BT_QUANTIZED_BVH_NODE>
+{
+};
+
+
+
+
+//! Basic Box tree structure
+class btQuantizedBvhTree
+{
+protected:
+	int m_num_nodes;
+	GIM_QUANTIZED_BVH_NODE_ARRAY m_node_array;
+	btAABB m_global_bound;
+	btVector3 m_bvhQuantization;
+protected:
+	void calc_quantization(GIM_BVH_DATA_ARRAY & primitive_boxes, btScalar boundMargin = btScalar(1.0) );
+
+	int _sort_and_calc_splitting_index(
+		GIM_BVH_DATA_ARRAY & primitive_boxes,
+		 int startIndex,  int endIndex, int splitAxis);
+
+	int _calc_splitting_axis(GIM_BVH_DATA_ARRAY & primitive_boxes, int startIndex,  int endIndex);
+
+	void _build_sub_tree(GIM_BVH_DATA_ARRAY & primitive_boxes, int startIndex,  int endIndex);
+public:
+	btQuantizedBvhTree()
+	{
+		m_num_nodes = 0;
+	}
+
+	//! prototype functions for box tree management
+	//!@{
+	void build_tree(GIM_BVH_DATA_ARRAY & primitive_boxes);
+
+	SIMD_FORCE_INLINE void quantizePoint(
+		unsigned short * quantizedpoint, const btVector3 & point) const
+	{
+		bt_quantize_clamp(quantizedpoint,point,m_global_bound.m_min,m_global_bound.m_max,m_bvhQuantization);
+	}
+
+
+	SIMD_FORCE_INLINE bool testQuantizedBoxOverlapp(
+		int node_index,
+		unsigned short * quantizedMin,unsigned short * quantizedMax) const
+	{
+		return m_node_array[node_index].testQuantizedBoxOverlapp(quantizedMin,quantizedMax);
+	}
+
+	SIMD_FORCE_INLINE void clearNodes()
+	{
+		m_node_array.clear();
+		m_num_nodes = 0;
+	}
+
+	//! node count
+	SIMD_FORCE_INLINE int getNodeCount() const
+	{
+		return m_num_nodes;
+	}
+
+	//! tells if the node is a leaf
+	SIMD_FORCE_INLINE bool isLeafNode(int nodeindex) const
+	{
+		return m_node_array[nodeindex].isLeafNode();
+	}
+
+	SIMD_FORCE_INLINE int getNodeData(int nodeindex) const
+	{
+		return m_node_array[nodeindex].getDataIndex();
+	}
+
+	SIMD_FORCE_INLINE void getNodeBound(int nodeindex, btAABB & bound) const
+	{
+		bound.m_min = bt_unquantize(
+			m_node_array[nodeindex].m_quantizedAabbMin,
+			m_global_bound.m_min,m_bvhQuantization);
+
+		bound.m_max = bt_unquantize(
+			m_node_array[nodeindex].m_quantizedAabbMax,
+			m_global_bound.m_min,m_bvhQuantization);
+	}
+
+	SIMD_FORCE_INLINE void setNodeBound(int nodeindex, const btAABB & bound)
+	{
+		bt_quantize_clamp(	m_node_array[nodeindex].m_quantizedAabbMin,
+							bound.m_min,
+							m_global_bound.m_min,
+							m_global_bound.m_max,
+							m_bvhQuantization);
+
+		bt_quantize_clamp(	m_node_array[nodeindex].m_quantizedAabbMax,
+							bound.m_max,
+							m_global_bound.m_min,
+							m_global_bound.m_max,
+							m_bvhQuantization);
+	}
+
+	SIMD_FORCE_INLINE int getLeftNode(int nodeindex) const
+	{
+		return nodeindex+1;
+	}
+
+	SIMD_FORCE_INLINE int getRightNode(int nodeindex) const
+	{
+		if(m_node_array[nodeindex+1].isLeafNode()) return nodeindex+2;
+		return nodeindex+1 + m_node_array[nodeindex+1].getEscapeIndex();
+	}
+
+	SIMD_FORCE_INLINE int getEscapeNodeIndex(int nodeindex) const
+	{
+		return m_node_array[nodeindex].getEscapeIndex();
+	}
+
+	SIMD_FORCE_INLINE const BT_QUANTIZED_BVH_NODE * get_node_pointer(int index = 0) const
+	{
+		return &m_node_array[index];
+	}
+
+	//!@}
+};
+
+
+
+//! Structure for containing Boxes
+/*!
+This class offers an structure for managing a box tree of primitives.
+Requires a Primitive prototype (like btPrimitiveManagerBase )
+*/
+class btGImpactQuantizedBvh
+{
+protected:
+	btQuantizedBvhTree m_box_tree;
+	btPrimitiveManagerBase * m_primitive_manager;
+
+protected:
+	//stackless refit
+	void refit();
+public:
+
+	//! this constructor doesn't build the tree. you must call	buildSet
+	btGImpactQuantizedBvh()
+	{
+		m_primitive_manager = NULL;
+	}
+
+	//! this constructor doesn't build the tree. you must call	buildSet
+	btGImpactQuantizedBvh(btPrimitiveManagerBase * primitive_manager)
+	{
+		m_primitive_manager = primitive_manager;
+	}
+
+	SIMD_FORCE_INLINE btAABB getGlobalBox()  const
+	{
+		btAABB totalbox;
+		getNodeBound(0, totalbox);
+		return totalbox;
+	}
+
+	SIMD_FORCE_INLINE void setPrimitiveManager(btPrimitiveManagerBase * primitive_manager)
+	{
+		m_primitive_manager = primitive_manager;
+	}
+
+	SIMD_FORCE_INLINE btPrimitiveManagerBase * getPrimitiveManager() const
+	{
+		return m_primitive_manager;
+	}
+
+
+//! node manager prototype functions
+///@{
+
+	//! this attemps to refit the box set.
+	SIMD_FORCE_INLINE void update()
+	{
+		refit();
+	}
+
+	//! this rebuild the entire set
+	void buildSet();
+
+	//! returns the indices of the primitives in the m_primitive_manager
+	bool boxQuery(const btAABB & box, btAlignedObjectArray<int> & collided_results) const;
+
+	//! returns the indices of the primitives in the m_primitive_manager
+	SIMD_FORCE_INLINE bool boxQueryTrans(const btAABB & box,
+		 const btTransform & transform, btAlignedObjectArray<int> & collided_results) const
+	{
+		btAABB transbox=box;
+		transbox.appy_transform(transform);
+		return boxQuery(transbox,collided_results);
+	}
+
+	//! returns the indices of the primitives in the m_primitive_manager
+	bool rayQuery(
+		const btVector3 & ray_dir,const btVector3 & ray_origin ,
+		btAlignedObjectArray<int> & collided_results) const;
+
+	//! tells if this set has hierarcht
+	SIMD_FORCE_INLINE bool hasHierarchy() const
+	{
+		return true;
+	}
+
+	//! tells if this set is a trimesh
+	SIMD_FORCE_INLINE bool isTrimesh()  const
+	{
+		return m_primitive_manager->is_trimesh();
+	}
+
+	//! node count
+	SIMD_FORCE_INLINE int getNodeCount() const
+	{
+		return m_box_tree.getNodeCount();
+	}
+
+	//! tells if the node is a leaf
+	SIMD_FORCE_INLINE bool isLeafNode(int nodeindex) const
+	{
+		return m_box_tree.isLeafNode(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE int getNodeData(int nodeindex) const
+	{
+		return m_box_tree.getNodeData(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE void getNodeBound(int nodeindex, btAABB & bound)  const
+	{
+		m_box_tree.getNodeBound(nodeindex, bound);
+	}
+
+	SIMD_FORCE_INLINE void setNodeBound(int nodeindex, const btAABB & bound)
+	{
+		m_box_tree.setNodeBound(nodeindex, bound);
+	}
+
+
+	SIMD_FORCE_INLINE int getLeftNode(int nodeindex) const
+	{
+		return m_box_tree.getLeftNode(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE int getRightNode(int nodeindex) const
+	{
+		return m_box_tree.getRightNode(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE int getEscapeNodeIndex(int nodeindex) const
+	{
+		return m_box_tree.getEscapeNodeIndex(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE void getNodeTriangle(int nodeindex,btPrimitiveTriangle & triangle) const
+	{
+		m_primitive_manager->get_primitive_triangle(getNodeData(nodeindex),triangle);
+	}
+
+
+	SIMD_FORCE_INLINE const BT_QUANTIZED_BVH_NODE * get_node_pointer(int index = 0) const
+	{
+		return m_box_tree.get_node_pointer(index);
+	}
+
+#ifdef TRI_COLLISION_PROFILING
+	static float getAverageTreeCollisionTime();
+#endif //TRI_COLLISION_PROFILING
+
+	static void find_collision(btGImpactQuantizedBvh * boxset1, const btTransform & trans1,
+		btGImpactQuantizedBvh * boxset2, const btTransform & trans2,
+		btPairSet & collision_pairs);
+};
+
+
+#endif // GIM_BOXPRUNING_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/btGImpactShape.cpp b/src/bullet/BulletCollision/Gimpact/btGImpactShape.cpp
new file mode 100644
index 00000000..cceace55
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGImpactShape.cpp
@@ -0,0 +1,203 @@
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btGImpactShape.h"
+#include "btGImpactMassUtil.h"
+
+
+#define CALC_EXACT_INERTIA 1
+
+void btGImpactCompoundShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	lockChildShapes();
+#ifdef CALC_EXACT_INERTIA
+	inertia.setValue(0.f,0.f,0.f);
+
+	int i = this->getNumChildShapes();
+	btScalar shapemass = mass/btScalar(i);
+
+	while(i--)
+	{
+		btVector3 temp_inertia;
+		m_childShapes[i]->calculateLocalInertia(shapemass,temp_inertia);
+		if(childrenHasTransform())
+		{
+			inertia = gim_inertia_add_transformed( inertia,temp_inertia,m_childTransforms[i]);
+		}
+		else
+		{
+			inertia = gim_inertia_add_transformed( inertia,temp_inertia,btTransform::getIdentity());
+		}
+
+	}
+
+#else
+
+	// Calc box inertia
+
+	btScalar lx= m_localAABB.m_max[0] - m_localAABB.m_min[0];
+	btScalar ly= m_localAABB.m_max[1] - m_localAABB.m_min[1];
+	btScalar lz= m_localAABB.m_max[2] - m_localAABB.m_min[2];
+	const btScalar x2 = lx*lx;
+	const btScalar y2 = ly*ly;
+	const btScalar z2 = lz*lz;
+	const btScalar scaledmass = mass * btScalar(0.08333333);
+
+	inertia = scaledmass * (btVector3(y2+z2,x2+z2,x2+y2));
+
+#endif
+	unlockChildShapes();
+}
+
+
+
+void btGImpactMeshShapePart::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+	lockChildShapes();
+
+
+#ifdef CALC_EXACT_INERTIA
+	inertia.setValue(0.f,0.f,0.f);
+
+	int i = this->getVertexCount();
+	btScalar pointmass = mass/btScalar(i);
+
+	while(i--)
+	{
+		btVector3 pointintertia;
+		this->getVertex(i,pointintertia);
+		pointintertia = gim_get_point_inertia(pointintertia,pointmass);
+		inertia+=pointintertia;
+	}
+
+#else
+
+	// Calc box inertia
+
+	btScalar lx= m_localAABB.m_max[0] - m_localAABB.m_min[0];
+	btScalar ly= m_localAABB.m_max[1] - m_localAABB.m_min[1];
+	btScalar lz= m_localAABB.m_max[2] - m_localAABB.m_min[2];
+	const btScalar x2 = lx*lx;
+	const btScalar y2 = ly*ly;
+	const btScalar z2 = lz*lz;
+	const btScalar scaledmass = mass * btScalar(0.08333333);
+
+	inertia = scaledmass * (btVector3(y2+z2,x2+z2,x2+y2));
+
+#endif
+
+	unlockChildShapes();
+}
+
+void btGImpactMeshShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
+{
+
+#ifdef CALC_EXACT_INERTIA
+	inertia.setValue(0.f,0.f,0.f);
+
+	int i = this->getMeshPartCount();
+	btScalar partmass = mass/btScalar(i);
+
+	while(i--)
+	{
+		btVector3 partinertia;
+		getMeshPart(i)->calculateLocalInertia(partmass,partinertia);
+		inertia+=partinertia;
+	}
+
+#else
+
+	// Calc box inertia
+
+	btScalar lx= m_localAABB.m_max[0] - m_localAABB.m_min[0];
+	btScalar ly= m_localAABB.m_max[1] - m_localAABB.m_min[1];
+	btScalar lz= m_localAABB.m_max[2] - m_localAABB.m_min[2];
+	const btScalar x2 = lx*lx;
+	const btScalar y2 = ly*ly;
+	const btScalar z2 = lz*lz;
+	const btScalar scaledmass = mass * btScalar(0.08333333);
+
+	inertia = scaledmass * (btVector3(y2+z2,x2+z2,x2+y2));
+
+#endif
+}
+
+void btGImpactMeshShape::rayTest(const btVector3& rayFrom, const btVector3& rayTo, btCollisionWorld::RayResultCallback& resultCallback) const
+{
+}
+
+
+void btGImpactMeshShapePart::processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+	lockChildShapes();
+	btAABB box;
+	box.m_min = aabbMin;
+	box.m_max = aabbMax;
+
+	btAlignedObjectArray<int> collided;
+	m_box_set.boxQuery(box,collided);
+
+	if(collided.size()==0)
+	{
+		unlockChildShapes();
+		return;
+	}
+
+	int part = (int)getPart();
+	btPrimitiveTriangle triangle;
+	int i = collided.size();
+	while(i--)
+	{
+		this->getPrimitiveTriangle(collided[i],triangle);
+		callback->processTriangle(triangle.m_vertices,part,collided[i]);
+	}
+	unlockChildShapes();
+
+}
+
+void btGImpactMeshShape::processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
+{
+	int i = m_mesh_parts.size();
+	while(i--)
+	{
+		m_mesh_parts[i]->processAllTriangles(callback,aabbMin,aabbMax);
+	}
+}
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btGImpactMeshShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btGImpactMeshShapeData* trimeshData = (btGImpactMeshShapeData*) dataBuffer;
+
+	btCollisionShape::serialize(&trimeshData->m_collisionShapeData,serializer);
+
+	m_meshInterface->serialize(&trimeshData->m_meshInterface, serializer);
+
+	trimeshData->m_collisionMargin = float(m_collisionMargin);
+
+	localScaling.serializeFloat(trimeshData->m_localScaling);
+
+	trimeshData->m_gimpactSubType = int(getGImpactShapeType());
+
+	return "btGImpactMeshShapeData";
+}
+
diff --git a/src/bullet/BulletCollision/Gimpact/btGImpactShape.h b/src/bullet/BulletCollision/Gimpact/btGImpactShape.h
new file mode 100644
index 00000000..90015bb9
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGImpactShape.h
@@ -0,0 +1,1171 @@
+/*! \file btGImpactShape.h
+\author Francisco Len N�jera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef GIMPACT_SHAPE_H
+#define GIMPACT_SHAPE_H
+
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+#include "BulletCollision/CollisionShapes/btStridingMeshInterface.h"
+#include "BulletCollision/CollisionShapes/btCollisionMargin.h"
+#include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
+#include "BulletCollision/CollisionShapes/btConcaveShape.h"
+#include "BulletCollision/CollisionShapes/btTetrahedronShape.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btMatrix3x3.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+#include "btGImpactQuantizedBvh.h" // box tree class
+
+
+//! declare Quantized trees, (you can change to float based trees)
+typedef btGImpactQuantizedBvh btGImpactBoxSet;
+
+enum eGIMPACT_SHAPE_TYPE
+{
+	CONST_GIMPACT_COMPOUND_SHAPE = 0,
+	CONST_GIMPACT_TRIMESH_SHAPE_PART,
+	CONST_GIMPACT_TRIMESH_SHAPE
+};
+
+
+//! Helper class for tetrahedrons
+class btTetrahedronShapeEx:public btBU_Simplex1to4
+{
+public:
+	btTetrahedronShapeEx()
+	{
+		m_numVertices = 4;
+	}
+
+
+	SIMD_FORCE_INLINE void setVertices(
+		const btVector3 & v0,const btVector3 & v1,
+		const btVector3 & v2,const btVector3 & v3)
+	{
+		m_vertices[0] = v0;
+		m_vertices[1] = v1;
+		m_vertices[2] = v2;
+		m_vertices[3] = v3;
+		recalcLocalAabb();
+	}
+};
+
+
+//! Base class for gimpact shapes
+class btGImpactShapeInterface : public btConcaveShape
+{
+protected:
+    btAABB m_localAABB;
+    bool m_needs_update;
+    btVector3  localScaling;
+    btGImpactBoxSet m_box_set;// optionally boxset
+
+	//! use this function for perfofm refit in bounding boxes
+    //! use this function for perfofm refit in bounding boxes
+    virtual void calcLocalAABB()
+    {
+		lockChildShapes();
+    	if(m_box_set.getNodeCount() == 0)
+    	{
+    		m_box_set.buildSet();
+    	}
+    	else
+    	{
+    		m_box_set.update();
+    	}
+    	unlockChildShapes();
+
+    	m_localAABB = m_box_set.getGlobalBox();
+    }
+
+
+public:
+	btGImpactShapeInterface()
+	{
+		m_shapeType=GIMPACT_SHAPE_PROXYTYPE;
+		m_localAABB.invalidate();
+		m_needs_update = true;
+		localScaling.setValue(1.f,1.f,1.f);
+	}
+
+
+	//! performs refit operation
+	/*!
+	Updates the entire Box set of this shape.
+	\pre postUpdate() must be called for attemps to calculating the box set, else this function
+		will does nothing.
+	\post if m_needs_update == true, then it calls calcLocalAABB();
+	*/
+    SIMD_FORCE_INLINE void updateBound()
+    {
+    	if(!m_needs_update) return;
+    	calcLocalAABB();
+    	m_needs_update  = false;
+    }
+
+    //! If the Bounding box is not updated, then this class attemps to calculate it.
+    /*!
+    \post Calls updateBound() for update the box set.
+    */
+    void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+    {
+        btAABB transformedbox = m_localAABB;
+        transformedbox.appy_transform(t);
+        aabbMin = transformedbox.m_min;
+        aabbMax = transformedbox.m_max;
+    }
+
+    //! Tells to this object that is needed to refit the box set
+    virtual void postUpdate()
+    {
+    	m_needs_update = true;
+    }
+
+	//! Obtains the local box, which is the global calculated box of the total of subshapes
+	SIMD_FORCE_INLINE const btAABB & getLocalBox()
+	{
+		return m_localAABB;
+	}
+
+
+    virtual int	getShapeType() const
+    {
+        return GIMPACT_SHAPE_PROXYTYPE;
+    }
+
+    /*!
+	\post You must call updateBound() for update the box set.
+	*/
+	virtual void	setLocalScaling(const btVector3& scaling)
+	{
+		localScaling = scaling;
+		postUpdate();
+	}
+
+	virtual const btVector3& getLocalScaling() const
+	{
+		return localScaling;
+	}
+
+
+	virtual void setMargin(btScalar margin)
+    {
+    	m_collisionMargin = margin;
+    	int i = getNumChildShapes();
+    	while(i--)
+    	{
+			btCollisionShape* child = getChildShape(i);
+			child->setMargin(margin);
+    	}
+
+		m_needs_update = true;
+    }
+
+
+	//! Subshape member functions
+	//!@{
+
+	//! Base method for determinig which kind of GIMPACT shape we get
+	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType() const = 0 ;
+
+	//! gets boxset
+	SIMD_FORCE_INLINE btGImpactBoxSet * getBoxSet()
+	{
+		return &m_box_set;
+	}
+
+	//! Determines if this class has a hierarchy structure for sorting its primitives
+	SIMD_FORCE_INLINE bool hasBoxSet()  const
+	{
+		if(m_box_set.getNodeCount() == 0) return false;
+		return true;
+	}
+
+	//! Obtains the primitive manager
+	virtual const btPrimitiveManagerBase * getPrimitiveManager()  const = 0;
+
+
+	//! Gets the number of children
+	virtual int	getNumChildShapes() const  = 0;
+
+	//! if true, then its children must get transforms.
+	virtual bool childrenHasTransform() const = 0;
+
+	//! Determines if this shape has triangles
+	virtual bool needsRetrieveTriangles() const = 0;
+
+	//! Determines if this shape has tetrahedrons
+	virtual bool needsRetrieveTetrahedrons() const = 0;
+
+	virtual void getBulletTriangle(int prim_index,btTriangleShapeEx & triangle) const = 0;
+
+	virtual void getBulletTetrahedron(int prim_index,btTetrahedronShapeEx & tetrahedron) const = 0;
+
+
+
+	//! call when reading child shapes
+	virtual void lockChildShapes() const
+	{
+	}
+
+	virtual void unlockChildShapes() const
+	{
+	}
+
+	//! if this trimesh
+	SIMD_FORCE_INLINE void getPrimitiveTriangle(int index,btPrimitiveTriangle & triangle) const
+	{
+		getPrimitiveManager()->get_primitive_triangle(index,triangle);
+	}
+
+
+	//! Retrieves the bound from a child
+    /*!
+    */
+    virtual void getChildAabb(int child_index,const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+    {
+        btAABB child_aabb;
+        getPrimitiveManager()->get_primitive_box(child_index,child_aabb);
+        child_aabb.appy_transform(t);
+        aabbMin = child_aabb.m_min;
+        aabbMax = child_aabb.m_max;
+    }
+
+	//! Gets the children
+	virtual btCollisionShape* getChildShape(int index) = 0;
+
+
+	//! Gets the child
+	virtual const btCollisionShape* getChildShape(int index) const = 0;
+
+	//! Gets the children transform
+	virtual btTransform	getChildTransform(int index) const = 0;
+
+	//! Sets the children transform
+	/*!
+	\post You must call updateBound() for update the box set.
+	*/
+	virtual void setChildTransform(int index, const btTransform & transform) = 0;
+
+	//!@}
+
+
+	//! virtual method for ray collision
+	virtual void rayTest(const btVector3& rayFrom, const btVector3& rayTo, btCollisionWorld::RayResultCallback& resultCallback)  const
+	{
+        (void) rayFrom; (void) rayTo; (void) resultCallback;
+	}
+
+	//! Function for retrieve triangles.
+	/*!
+	It gives the triangles in local space
+	*/
+	virtual void	processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
+	{
+        (void) callback; (void) aabbMin; (void) aabbMax;
+	}
+
+	//!@}
+
+};
+
+
+//! btGImpactCompoundShape allows to handle multiple btCollisionShape objects at once
+/*!
+This class only can manage Convex subshapes
+*/
+class btGImpactCompoundShape	: public btGImpactShapeInterface
+{
+public:
+	//! compound primitive manager
+	class CompoundPrimitiveManager:public btPrimitiveManagerBase
+	{
+	public:
+		virtual ~CompoundPrimitiveManager() {}
+		btGImpactCompoundShape * m_compoundShape;
+
+
+		CompoundPrimitiveManager(const CompoundPrimitiveManager& compound)
+            : btPrimitiveManagerBase()
+		{
+			m_compoundShape = compound.m_compoundShape;
+		}
+
+		CompoundPrimitiveManager(btGImpactCompoundShape * compoundShape)
+		{
+			m_compoundShape = compoundShape;
+		}
+
+		CompoundPrimitiveManager()
+		{
+			m_compoundShape = NULL;
+		}
+
+		virtual bool is_trimesh() const
+		{
+			return false;
+		}
+
+		virtual int get_primitive_count() const
+		{
+			return (int )m_compoundShape->getNumChildShapes();
+		}
+
+		virtual void get_primitive_box(int prim_index ,btAABB & primbox) const
+		{
+			btTransform prim_trans;
+			if(m_compoundShape->childrenHasTransform())
+			{
+				prim_trans = m_compoundShape->getChildTransform(prim_index);
+			}
+			else
+			{
+				prim_trans.setIdentity();
+			}
+			const btCollisionShape* shape = m_compoundShape->getChildShape(prim_index);
+			shape->getAabb(prim_trans,primbox.m_min,primbox.m_max);
+		}
+
+		virtual void get_primitive_triangle(int prim_index,btPrimitiveTriangle & triangle) const
+		{
+			btAssert(0);
+            (void) prim_index; (void) triangle;
+		}
+
+	};
+
+
+
+protected:
+	CompoundPrimitiveManager m_primitive_manager;
+	btAlignedObjectArray<btTransform>		m_childTransforms;
+	btAlignedObjectArray<btCollisionShape*>	m_childShapes;
+
+
+public:
+
+	btGImpactCompoundShape(bool children_has_transform = true)
+	{
+        (void) children_has_transform;
+		m_primitive_manager.m_compoundShape = this;
+		m_box_set.setPrimitiveManager(&m_primitive_manager);
+	}
+
+	virtual ~btGImpactCompoundShape()
+	{
+	}
+
+
+	//! if true, then its children must get transforms.
+	virtual bool childrenHasTransform() const
+	{
+		if(m_childTransforms.size()==0) return false;
+		return true;
+	}
+
+
+	//! Obtains the primitive manager
+	virtual const btPrimitiveManagerBase * getPrimitiveManager()  const
+	{
+		return &m_primitive_manager;
+	}
+
+	//! Obtains the compopund primitive manager
+	SIMD_FORCE_INLINE CompoundPrimitiveManager * getCompoundPrimitiveManager()
+	{
+		return &m_primitive_manager;
+	}
+
+	//! Gets the number of children
+	virtual int	getNumChildShapes() const
+	{
+		return m_childShapes.size();
+	}
+
+
+	//! Use this method for adding children. Only Convex shapes are allowed.
+	void addChildShape(const btTransform& localTransform,btCollisionShape* shape)
+	{
+		btAssert(shape->isConvex());
+		m_childTransforms.push_back(localTransform);
+		m_childShapes.push_back(shape);
+	}
+
+	//! Use this method for adding children. Only Convex shapes are allowed.
+	void addChildShape(btCollisionShape* shape)
+	{
+		btAssert(shape->isConvex());
+		m_childShapes.push_back(shape);
+	}
+
+	//! Gets the children
+	virtual btCollisionShape* getChildShape(int index)
+	{
+		return m_childShapes[index];
+	}
+
+	//! Gets the children
+	virtual const btCollisionShape* getChildShape(int index) const
+	{
+		return m_childShapes[index];
+	}
+
+	//! Retrieves the bound from a child
+    /*!
+    */
+    virtual void getChildAabb(int child_index,const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+    {
+
+    	if(childrenHasTransform())
+    	{
+    		m_childShapes[child_index]->getAabb(t*m_childTransforms[child_index],aabbMin,aabbMax);
+    	}
+    	else
+    	{
+    		m_childShapes[child_index]->getAabb(t,aabbMin,aabbMax);
+    	}
+    }
+
+
+	//! Gets the children transform
+	virtual btTransform	getChildTransform(int index) const
+	{
+		btAssert(m_childTransforms.size() == m_childShapes.size());
+		return m_childTransforms[index];
+	}
+
+	//! Sets the children transform
+	/*!
+	\post You must call updateBound() for update the box set.
+	*/
+	virtual void setChildTransform(int index, const btTransform & transform)
+	{
+		btAssert(m_childTransforms.size() == m_childShapes.size());
+		m_childTransforms[index] = transform;
+		postUpdate();
+	}
+
+	//! Determines if this shape has triangles
+	virtual bool needsRetrieveTriangles() const
+	{
+		return false;
+	}
+
+	//! Determines if this shape has tetrahedrons
+	virtual bool needsRetrieveTetrahedrons() const
+	{
+		return false;
+	}
+
+
+	virtual void getBulletTriangle(int prim_index,btTriangleShapeEx & triangle) const
+	{
+        (void) prim_index; (void) triangle;
+		btAssert(0);
+	}
+
+	virtual void getBulletTetrahedron(int prim_index,btTetrahedronShapeEx & tetrahedron) const
+	{
+        (void) prim_index; (void) tetrahedron;
+		btAssert(0);
+	}
+
+
+	//! Calculates the exact inertia tensor for this shape
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+	virtual const char*	getName()const
+	{
+		return "GImpactCompound";
+	}
+
+	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType() const
+	{
+		return CONST_GIMPACT_COMPOUND_SHAPE;
+	}
+
+};
+
+
+
+//! This class manages a sub part of a mesh supplied by the btStridingMeshInterface interface.
+/*!
+- Simply create this shape by passing the btStridingMeshInterface to the constructor btGImpactMeshShapePart, then you must call updateBound() after creating the mesh
+- When making operations with this shape, you must call <b>lock</b> before accessing to the trimesh primitives, and then call <b>unlock</b>
+- You can handle deformable meshes with this shape, by calling postUpdate() every time when changing the mesh vertices.
+
+*/
+class btGImpactMeshShapePart : public btGImpactShapeInterface
+{
+public:
+	//! Trimesh primitive manager
+	/*!
+	Manages the info from btStridingMeshInterface object and controls the Lock/Unlock mechanism
+	*/
+	class TrimeshPrimitiveManager:public btPrimitiveManagerBase
+	{
+	public:
+		btScalar m_margin;
+		btStridingMeshInterface * m_meshInterface;
+		btVector3 m_scale;
+		int m_part;
+		int m_lock_count;
+		const unsigned char *vertexbase;
+		int numverts;
+		PHY_ScalarType type;
+		int stride;
+		const unsigned char *indexbase;
+		int indexstride;
+		int  numfaces;
+		PHY_ScalarType indicestype;
+
+		TrimeshPrimitiveManager()
+		{
+			m_meshInterface = NULL;
+			m_part = 0;
+			m_margin = 0.01f;
+			m_scale = btVector3(1.f,1.f,1.f);
+			m_lock_count = 0;
+			vertexbase = 0;
+			numverts = 0;
+			stride = 0;
+			indexbase = 0;
+			indexstride = 0;
+			numfaces = 0;
+		}
+
+ 		TrimeshPrimitiveManager(const TrimeshPrimitiveManager & manager)
+            : btPrimitiveManagerBase()
+		{
+			m_meshInterface = manager.m_meshInterface;
+			m_part = manager.m_part;
+			m_margin = manager.m_margin;
+			m_scale = manager.m_scale;
+			m_lock_count = 0;
+			vertexbase = 0;
+			numverts = 0;
+			stride = 0;
+			indexbase = 0;
+			indexstride = 0;
+			numfaces = 0;
+
+		}
+
+		TrimeshPrimitiveManager(
+			btStridingMeshInterface * meshInterface,	int part)
+		{
+			m_meshInterface = meshInterface;
+			m_part = part;
+			m_scale = m_meshInterface->getScaling();
+			m_margin = 0.1f;
+			m_lock_count = 0;
+			vertexbase = 0;
+			numverts = 0;
+			stride = 0;
+			indexbase = 0;
+			indexstride = 0;
+			numfaces = 0;
+
+		}
+
+		virtual ~TrimeshPrimitiveManager() {}
+
+		void lock()
+		{
+			if(m_lock_count>0)
+			{
+				m_lock_count++;
+				return;
+			}
+			m_meshInterface->getLockedReadOnlyVertexIndexBase(
+				&vertexbase,numverts,
+				type, stride,&indexbase, indexstride, numfaces,indicestype,m_part);
+
+			m_lock_count = 1;
+		}
+
+		void unlock()
+		{
+			if(m_lock_count == 0) return;
+			if(m_lock_count>1)
+			{
+				--m_lock_count;
+				return;
+			}
+			m_meshInterface->unLockReadOnlyVertexBase(m_part);
+			vertexbase = NULL;
+			m_lock_count = 0;
+		}
+
+		virtual bool is_trimesh() const
+		{
+			return true;
+		}
+
+		virtual int get_primitive_count() const
+		{
+			return (int )numfaces;
+		}
+
+		SIMD_FORCE_INLINE int get_vertex_count() const
+		{
+			return (int )numverts;
+		}
+
+		SIMD_FORCE_INLINE void get_indices(int face_index,int &i0,int &i1,int &i2) const
+		{
+			if(indicestype == PHY_SHORT)
+			{
+				short * s_indices = (short *)(indexbase + face_index*indexstride);
+				i0 = s_indices[0];
+				i1 = s_indices[1];
+				i2 = s_indices[2];
+			}
+			else
+			{
+				int * i_indices = (int *)(indexbase + face_index*indexstride);
+				i0 = i_indices[0];
+				i1 = i_indices[1];
+				i2 = i_indices[2];
+			}
+		}
+
+		SIMD_FORCE_INLINE void get_vertex(int vertex_index, btVector3 & vertex) const
+		{
+			if(type == PHY_DOUBLE)
+			{
+				double * dvertices = (double *)(vertexbase + vertex_index*stride);
+				vertex[0] = btScalar(dvertices[0]*m_scale[0]);
+				vertex[1] = btScalar(dvertices[1]*m_scale[1]);
+				vertex[2] = btScalar(dvertices[2]*m_scale[2]);
+			}
+			else
+			{
+				float * svertices = (float *)(vertexbase + vertex_index*stride);
+				vertex[0] = svertices[0]*m_scale[0];
+				vertex[1] = svertices[1]*m_scale[1];
+				vertex[2] = svertices[2]*m_scale[2];
+			}
+		}
+
+		virtual void get_primitive_box(int prim_index ,btAABB & primbox) const
+		{
+			btPrimitiveTriangle  triangle;
+			get_primitive_triangle(prim_index,triangle);
+			primbox.calc_from_triangle_margin(
+				triangle.m_vertices[0],
+				triangle.m_vertices[1],triangle.m_vertices[2],triangle.m_margin);
+		}
+
+		virtual void get_primitive_triangle(int prim_index,btPrimitiveTriangle & triangle) const
+		{
+			int indices[3];
+			get_indices(prim_index,indices[0],indices[1],indices[2]);
+			get_vertex(indices[0],triangle.m_vertices[0]);
+			get_vertex(indices[1],triangle.m_vertices[1]);
+			get_vertex(indices[2],triangle.m_vertices[2]);
+			triangle.m_margin = m_margin;
+		}
+
+		SIMD_FORCE_INLINE void get_bullet_triangle(int prim_index,btTriangleShapeEx & triangle) const
+		{
+			int indices[3];
+			get_indices(prim_index,indices[0],indices[1],indices[2]);
+			get_vertex(indices[0],triangle.m_vertices1[0]);
+			get_vertex(indices[1],triangle.m_vertices1[1]);
+			get_vertex(indices[2],triangle.m_vertices1[2]);
+			triangle.setMargin(m_margin);
+		}
+
+	};
+
+
+protected:
+	TrimeshPrimitiveManager m_primitive_manager;
+public:
+
+	btGImpactMeshShapePart()
+	{
+		m_box_set.setPrimitiveManager(&m_primitive_manager);
+	}
+
+
+	btGImpactMeshShapePart(btStridingMeshInterface * meshInterface,	int part)
+	{
+		m_primitive_manager.m_meshInterface = meshInterface;
+		m_primitive_manager.m_part = part;
+		m_box_set.setPrimitiveManager(&m_primitive_manager);
+	}
+
+	virtual ~btGImpactMeshShapePart()
+	{
+	}
+
+	//! if true, then its children must get transforms.
+	virtual bool childrenHasTransform() const
+	{
+		return false;
+	}
+
+
+	//! call when reading child shapes
+	virtual void lockChildShapes() const
+	{
+		void * dummy = (void*)(m_box_set.getPrimitiveManager());
+		TrimeshPrimitiveManager * dummymanager = static_cast<TrimeshPrimitiveManager *>(dummy);
+		dummymanager->lock();
+	}
+
+	virtual void unlockChildShapes()  const
+	{
+		void * dummy = (void*)(m_box_set.getPrimitiveManager());
+		TrimeshPrimitiveManager * dummymanager = static_cast<TrimeshPrimitiveManager *>(dummy);
+		dummymanager->unlock();
+	}
+
+	//! Gets the number of children
+	virtual int	getNumChildShapes() const
+	{
+		return m_primitive_manager.get_primitive_count();
+	}
+
+
+	//! Gets the children
+	virtual btCollisionShape* getChildShape(int index)
+	{
+        (void) index;
+		btAssert(0);
+		return NULL;
+	}
+
+
+
+	//! Gets the child
+	virtual const btCollisionShape* getChildShape(int index) const
+	{
+        (void) index;
+		btAssert(0);
+		return NULL;
+	}
+
+	//! Gets the children transform
+	virtual btTransform	getChildTransform(int index) const
+	{
+        (void) index;
+		btAssert(0);
+		return btTransform();
+	}
+
+	//! Sets the children transform
+	/*!
+	\post You must call updateBound() for update the box set.
+	*/
+	virtual void setChildTransform(int index, const btTransform & transform)
+	{
+        (void) index;
+        (void) transform;
+		btAssert(0);
+	}
+
+
+	//! Obtains the primitive manager
+	virtual const btPrimitiveManagerBase * getPrimitiveManager()  const
+	{
+		return &m_primitive_manager;
+	}
+
+	SIMD_FORCE_INLINE TrimeshPrimitiveManager * getTrimeshPrimitiveManager()
+	{
+		return &m_primitive_manager;
+	}
+
+
+
+
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+
+
+
+	virtual const char*	getName()const
+	{
+		return "GImpactMeshShapePart";
+	}
+
+	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType() const
+	{
+		return CONST_GIMPACT_TRIMESH_SHAPE_PART;
+	}
+
+	//! Determines if this shape has triangles
+	virtual bool needsRetrieveTriangles() const
+	{
+		return true;
+	}
+
+	//! Determines if this shape has tetrahedrons
+	virtual bool needsRetrieveTetrahedrons() const
+	{
+		return false;
+	}
+
+	virtual void getBulletTriangle(int prim_index,btTriangleShapeEx & triangle) const
+	{
+		m_primitive_manager.get_bullet_triangle(prim_index,triangle);
+	}
+
+	virtual void getBulletTetrahedron(int prim_index,btTetrahedronShapeEx & tetrahedron) const
+	{
+        (void) prim_index;
+        (void) tetrahedron;
+		btAssert(0);
+	}
+
+
+
+	SIMD_FORCE_INLINE int getVertexCount() const
+	{
+		return m_primitive_manager.get_vertex_count();
+	}
+
+	SIMD_FORCE_INLINE void getVertex(int vertex_index, btVector3 & vertex) const
+	{
+		m_primitive_manager.get_vertex(vertex_index,vertex);
+	}
+
+	SIMD_FORCE_INLINE void setMargin(btScalar margin)
+    {
+    	m_primitive_manager.m_margin = margin;
+    	postUpdate();
+    }
+
+    SIMD_FORCE_INLINE btScalar getMargin() const
+    {
+    	return m_primitive_manager.m_margin;
+    }
+
+    virtual void	setLocalScaling(const btVector3& scaling)
+    {
+    	m_primitive_manager.m_scale = scaling;
+    	postUpdate();
+    }
+
+    virtual const btVector3& getLocalScaling() const
+    {
+    	return m_primitive_manager.m_scale;
+    }
+
+    SIMD_FORCE_INLINE int getPart() const
+    {
+    	return (int)m_primitive_manager.m_part;
+    }
+
+	virtual void	processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const;
+};
+
+
+//! This class manages a mesh supplied by the btStridingMeshInterface interface.
+/*!
+Set of btGImpactMeshShapePart parts
+- Simply create this shape by passing the btStridingMeshInterface to the constructor btGImpactMeshShape, then you must call updateBound() after creating the mesh
+
+- You can handle deformable meshes with this shape, by calling postUpdate() every time when changing the mesh vertices.
+
+*/
+class btGImpactMeshShape : public btGImpactShapeInterface
+{
+	btStridingMeshInterface* m_meshInterface;
+
+protected:
+	btAlignedObjectArray<btGImpactMeshShapePart*> m_mesh_parts;
+	void buildMeshParts(btStridingMeshInterface * meshInterface)
+	{
+		for (int i=0;i<meshInterface->getNumSubParts() ;++i )
+		{
+			btGImpactMeshShapePart * newpart = new btGImpactMeshShapePart(meshInterface,i);
+			m_mesh_parts.push_back(newpart);
+		}
+	}
+
+	//! use this function for perfofm refit in bounding boxes
+    virtual void calcLocalAABB()
+    {
+    	m_localAABB.invalidate();
+    	int i = m_mesh_parts.size();
+    	while(i--)
+    	{
+    		m_mesh_parts[i]->updateBound();
+    		m_localAABB.merge(m_mesh_parts[i]->getLocalBox());
+    	}
+    }
+
+public:
+	btGImpactMeshShape(btStridingMeshInterface * meshInterface)
+	{
+		m_meshInterface = meshInterface;
+		buildMeshParts(meshInterface);
+	}
+
+	virtual ~btGImpactMeshShape()
+	{
+		int i = m_mesh_parts.size();
+    	while(i--)
+    	{
+			btGImpactMeshShapePart * part = m_mesh_parts[i];
+			delete part;
+    	}
+		m_mesh_parts.clear();
+	}
+
+
+	btStridingMeshInterface* getMeshInterface()
+	{
+		return m_meshInterface;
+	}
+
+	const btStridingMeshInterface* getMeshInterface() const
+	{
+		return m_meshInterface;
+	}
+
+	int getMeshPartCount() const
+	{
+		return m_mesh_parts.size();
+	}
+
+	btGImpactMeshShapePart * getMeshPart(int index)
+	{
+		return m_mesh_parts[index];
+	}
+
+
+
+	const btGImpactMeshShapePart * getMeshPart(int index) const
+	{
+		return m_mesh_parts[index];
+	}
+
+
+	virtual void	setLocalScaling(const btVector3& scaling)
+	{
+		localScaling = scaling;
+
+		int i = m_mesh_parts.size();
+    	while(i--)
+    	{
+			btGImpactMeshShapePart * part = m_mesh_parts[i];
+			part->setLocalScaling(scaling);
+    	}
+
+		m_needs_update = true;
+	}
+
+	virtual void setMargin(btScalar margin)
+    {
+    	m_collisionMargin = margin;
+
+		int i = m_mesh_parts.size();
+    	while(i--)
+    	{
+			btGImpactMeshShapePart * part = m_mesh_parts[i];
+			part->setMargin(margin);
+    	}
+
+		m_needs_update = true;
+    }
+
+	//! Tells to this object that is needed to refit all the meshes
+    virtual void postUpdate()
+    {
+		int i = m_mesh_parts.size();
+    	while(i--)
+    	{
+			btGImpactMeshShapePart * part = m_mesh_parts[i];
+			part->postUpdate();
+    	}
+
+    	m_needs_update = true;
+    }
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const;
+
+
+	//! Obtains the primitive manager
+	virtual const btPrimitiveManagerBase * getPrimitiveManager()  const
+	{
+		btAssert(0);
+		return NULL;
+	}
+
+
+	//! Gets the number of children
+	virtual int	getNumChildShapes() const
+	{
+		btAssert(0);
+		return 0;
+	}
+
+
+	//! if true, then its children must get transforms.
+	virtual bool childrenHasTransform() const
+	{
+		btAssert(0);
+		return false;
+	}
+
+	//! Determines if this shape has triangles
+	virtual bool needsRetrieveTriangles() const
+	{
+		btAssert(0);
+		return false;
+	}
+
+	//! Determines if this shape has tetrahedrons
+	virtual bool needsRetrieveTetrahedrons() const
+	{
+		btAssert(0);
+		return false;
+	}
+
+	virtual void getBulletTriangle(int prim_index,btTriangleShapeEx & triangle) const
+	{
+        (void) prim_index; (void) triangle;
+		btAssert(0);
+	}
+
+	virtual void getBulletTetrahedron(int prim_index,btTetrahedronShapeEx & tetrahedron) const
+	{
+        (void) prim_index; (void) tetrahedron;
+		btAssert(0);
+	}
+
+	//! call when reading child shapes
+	virtual void lockChildShapes() const
+	{
+		btAssert(0);
+	}
+
+	virtual void unlockChildShapes() const
+	{
+		btAssert(0);
+	}
+
+
+
+
+	//! Retrieves the bound from a child
+    /*!
+    */
+    virtual void getChildAabb(int child_index,const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+    {
+        (void) child_index; (void) t; (void) aabbMin; (void) aabbMax;
+        btAssert(0);
+    }
+
+	//! Gets the children
+	virtual btCollisionShape* getChildShape(int index)
+	{
+        (void) index;
+		btAssert(0);
+		return NULL;
+	}
+
+
+	//! Gets the child
+	virtual const btCollisionShape* getChildShape(int index) const
+	{
+        (void) index;
+		btAssert(0);
+		return NULL;
+	}
+
+	//! Gets the children transform
+	virtual btTransform	getChildTransform(int index) const
+	{
+        (void) index;
+		btAssert(0);
+		return btTransform();
+	}
+
+	//! Sets the children transform
+	/*!
+	\post You must call updateBound() for update the box set.
+	*/
+	virtual void setChildTransform(int index, const btTransform & transform)
+	{
+        (void) index; (void) transform;
+		btAssert(0);
+	}
+
+
+	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType() const
+	{
+		return CONST_GIMPACT_TRIMESH_SHAPE;
+	}
+
+
+	virtual const char*	getName()const
+	{
+		return "GImpactMesh";
+	}
+
+	virtual void rayTest(const btVector3& rayFrom, const btVector3& rayTo, btCollisionWorld::RayResultCallback& resultCallback)  const;
+
+	//! Function for retrieve triangles.
+	/*!
+	It gives the triangles in local space
+	*/
+	virtual void	processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const;
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btGImpactMeshShapeData
+{
+	btCollisionShapeData	m_collisionShapeData;
+
+	btStridingMeshInterfaceData m_meshInterface;
+
+	btVector3FloatData	m_localScaling;
+
+	float	m_collisionMargin;
+
+	int		m_gimpactSubType;
+};
+
+SIMD_FORCE_INLINE	int	btGImpactMeshShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btGImpactMeshShapeData);
+}
+
+
+#endif //GIMPACT_MESH_SHAPE_H
diff --git a/src/bullet/BulletCollision/Gimpact/btGenericPoolAllocator.cpp b/src/bullet/BulletCollision/Gimpact/btGenericPoolAllocator.cpp
new file mode 100644
index 00000000..5d07d1ad
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGenericPoolAllocator.cpp
@@ -0,0 +1,283 @@
+/*! \file btGenericPoolAllocator.cpp
+\author Francisco Leon Najera. email projectileman@yahoo.com
+
+General purpose allocator class
+*/
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btGenericPoolAllocator.h"
+
+
+
+/// *************** btGenericMemoryPool ******************///////////
+
+size_t btGenericMemoryPool::allocate_from_free_nodes(size_t num_elements)
+{
+	size_t ptr = BT_UINT_MAX;
+
+	if(m_free_nodes_count == 0) return BT_UINT_MAX;
+	// find an avaliable free node with the correct size
+	size_t revindex = m_free_nodes_count;
+
+	while(revindex-- && ptr == BT_UINT_MAX)
+	{
+		if(m_allocated_sizes[m_free_nodes[revindex]]>=num_elements)
+		{
+			ptr = revindex;
+		}
+	}
+	if(ptr == BT_UINT_MAX) return BT_UINT_MAX; // not found
+
+
+	revindex = ptr;
+	ptr = m_free_nodes[revindex];
+	// post: ptr contains the node index, and revindex the index in m_free_nodes
+
+	size_t  finalsize = m_allocated_sizes[ptr];
+	finalsize -= num_elements;
+
+	m_allocated_sizes[ptr] = num_elements;
+
+	// post: finalsize>=0, m_allocated_sizes[ptr] has the requested size
+
+	if(finalsize>0) // preserve free node, there are some free memory
+	{
+		m_free_nodes[revindex] = ptr + num_elements;
+		m_allocated_sizes[ptr + num_elements] = finalsize;
+	}
+	else // delete free node
+	{
+		// swap with end
+		m_free_nodes[revindex] = m_free_nodes[m_free_nodes_count-1];
+		m_free_nodes_count--;
+	}
+
+	return ptr;
+}
+
+size_t btGenericMemoryPool::allocate_from_pool(size_t num_elements)
+{
+	if(m_allocated_count+num_elements>m_max_element_count) return BT_UINT_MAX;
+
+	size_t ptr = m_allocated_count;
+
+	m_allocated_sizes[m_allocated_count] = num_elements;
+	m_allocated_count+=num_elements;
+
+	return ptr;
+}
+
+
+void btGenericMemoryPool::init_pool(size_t element_size, size_t element_count)
+{
+	m_allocated_count = 0;
+	m_free_nodes_count = 0;
+
+	m_element_size = element_size;
+	m_max_element_count = element_count;
+
+
+
+
+	m_pool = (unsigned char *) btAlignedAlloc(m_element_size*m_max_element_count,16);
+	m_free_nodes = (size_t *) btAlignedAlloc(sizeof(size_t)*m_max_element_count,16);
+	m_allocated_sizes = (size_t *) btAlignedAlloc(sizeof(size_t)*m_max_element_count,16);
+
+	for (size_t i = 0;i< m_max_element_count;i++ )
+	{
+		m_allocated_sizes[i] = 0;
+	}
+}
+
+void btGenericMemoryPool::end_pool()
+{
+	btAlignedFree(m_pool);
+	btAlignedFree(m_free_nodes);
+	btAlignedFree(m_allocated_sizes);
+	m_allocated_count = 0;
+	m_free_nodes_count = 0;
+}
+
+
+//! Allocates memory in pool
+/*!
+\param size_bytes size in bytes of the buffer
+*/
+void * btGenericMemoryPool::allocate(size_t size_bytes)
+{
+
+	size_t module = size_bytes%m_element_size;
+	size_t element_count = size_bytes/m_element_size;
+	if(module>0) element_count++;
+
+	size_t alloc_pos = allocate_from_free_nodes(element_count);
+	// a free node is found
+	if(alloc_pos != BT_UINT_MAX)
+	{
+		return get_element_data(alloc_pos);
+	}
+	// allocate directly on pool
+	alloc_pos = allocate_from_pool(element_count);
+
+	if(alloc_pos == BT_UINT_MAX) return NULL; // not space
+	return get_element_data(alloc_pos);
+}
+
+bool btGenericMemoryPool::freeMemory(void * pointer)
+{
+	unsigned char * pointer_pos = (unsigned char *)pointer;
+	unsigned char * pool_pos = (unsigned char *)m_pool;
+	// calc offset
+	if(pointer_pos<pool_pos) return false;//other pool
+	size_t offset = size_t(pointer_pos - pool_pos);
+	if(offset>=get_pool_capacity()) return false;// far away
+
+	// find free position
+	m_free_nodes[m_free_nodes_count] = offset/m_element_size;
+	m_free_nodes_count++;
+	return true;
+}
+
+
+/// *******************! btGenericPoolAllocator *******************!///
+
+
+btGenericPoolAllocator::~btGenericPoolAllocator()
+{
+	// destroy pools
+	size_t i;
+	for (i=0;i<m_pool_count;i++)
+	{
+		m_pools[i]->end_pool();
+		btAlignedFree(m_pools[i]);
+	}
+}
+
+
+// creates a pool
+btGenericMemoryPool * btGenericPoolAllocator::push_new_pool()
+{
+	if(m_pool_count >= BT_DEFAULT_MAX_POOLS) return NULL;
+
+	btGenericMemoryPool * newptr = (btGenericMemoryPool *)btAlignedAlloc(sizeof(btGenericMemoryPool),16);
+
+	m_pools[m_pool_count] = newptr;
+
+	m_pools[m_pool_count]->init_pool(m_pool_element_size,m_pool_element_count);
+
+	m_pool_count++;
+	return newptr;
+}
+
+void * btGenericPoolAllocator::failback_alloc(size_t size_bytes)
+{
+
+	btGenericMemoryPool * pool = NULL;
+
+
+	if(size_bytes<=get_pool_capacity())
+	{
+		pool = 	push_new_pool();
+	}
+
+	if(pool==NULL) // failback
+	{
+		return btAlignedAlloc(size_bytes,16);
+	}
+
+	return pool->allocate(size_bytes);
+}
+
+bool btGenericPoolAllocator::failback_free(void * pointer)
+{
+	btAlignedFree(pointer);
+	return true;
+}
+
+
+//! Allocates memory in pool
+/*!
+\param size_bytes size in bytes of the buffer
+*/
+void * btGenericPoolAllocator::allocate(size_t size_bytes)
+{
+	void * ptr = NULL;
+
+	size_t i = 0;
+	while(i<m_pool_count && ptr == NULL)
+	{
+		ptr = m_pools[i]->allocate(size_bytes);
+		++i;
+	}
+
+	if(ptr) return ptr;
+
+	return failback_alloc(size_bytes);
+}
+
+bool btGenericPoolAllocator::freeMemory(void * pointer)
+{
+	bool result = false;
+
+	size_t i = 0;
+	while(i<m_pool_count && result == false)
+	{
+		result = m_pools[i]->freeMemory(pointer);
+		++i;
+	}
+
+	if(result) return true;
+
+	return failback_free(pointer);
+}
+
+/// ************** STANDARD ALLOCATOR ***************************///
+
+
+#define BT_DEFAULT_POOL_SIZE 32768
+#define BT_DEFAULT_POOL_ELEMENT_SIZE 8
+
+// main allocator
+class GIM_STANDARD_ALLOCATOR: public btGenericPoolAllocator
+{
+public:
+	GIM_STANDARD_ALLOCATOR():btGenericPoolAllocator(BT_DEFAULT_POOL_ELEMENT_SIZE,BT_DEFAULT_POOL_SIZE)
+	{
+	}
+};
+
+// global allocator
+GIM_STANDARD_ALLOCATOR g_main_allocator;
+
+
+void * btPoolAlloc(size_t size)
+{
+	return g_main_allocator.allocate(size);
+}
+
+void * btPoolRealloc(void *ptr, size_t oldsize, size_t newsize)
+{
+	void * newptr = btPoolAlloc(newsize);
+    size_t copysize = oldsize<newsize?oldsize:newsize;
+    memcpy(newptr,ptr,copysize);
+    btPoolFree(ptr);
+    return newptr;
+}
+
+void btPoolFree(void *ptr)
+{
+	g_main_allocator.freeMemory(ptr);
+}
diff --git a/src/bullet/BulletCollision/Gimpact/btGenericPoolAllocator.h b/src/bullet/BulletCollision/Gimpact/btGenericPoolAllocator.h
new file mode 100644
index 00000000..b46d8516
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGenericPoolAllocator.h
@@ -0,0 +1,163 @@
+/*! \file btGenericPoolAllocator.h
+\author Francisco Leon Najera. email projectileman@yahoo.com
+
+General purpose allocator class
+*/
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_GENERIC_POOL_ALLOCATOR_H
+#define BT_GENERIC_POOL_ALLOCATOR_H
+
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include "LinearMath/btAlignedAllocator.h"
+
+#define BT_UINT_MAX UINT_MAX
+#define BT_DEFAULT_MAX_POOLS 16
+
+
+//! Generic Pool class
+class btGenericMemoryPool
+{
+public:
+	unsigned char * m_pool; //[m_element_size*m_max_element_count];
+	size_t * m_free_nodes; //[m_max_element_count];//! free nodes
+	size_t * m_allocated_sizes;//[m_max_element_count];//! Number of elements allocated per node
+	size_t m_allocated_count;
+	size_t m_free_nodes_count;
+protected:
+	size_t m_element_size;
+	size_t m_max_element_count;
+
+	size_t allocate_from_free_nodes(size_t num_elements);
+	size_t allocate_from_pool(size_t num_elements);
+
+public:
+
+	void init_pool(size_t element_size, size_t element_count);
+
+	void end_pool();
+
+
+	btGenericMemoryPool(size_t element_size, size_t element_count)
+	{
+		init_pool(element_size, element_count);
+	}
+
+	~btGenericMemoryPool()
+	{
+		end_pool();
+	}
+
+
+	inline size_t get_pool_capacity()
+	{
+		return m_element_size*m_max_element_count;
+	}
+
+	inline size_t gem_element_size()
+	{
+		return m_element_size;
+	}
+
+	inline size_t get_max_element_count()
+	{
+		return m_max_element_count;
+	}
+
+	inline size_t get_allocated_count()
+	{
+		return m_allocated_count;
+	}
+
+	inline size_t get_free_positions_count()
+	{
+		return m_free_nodes_count;
+	}
+
+	inline void * get_element_data(size_t element_index)
+	{
+		return &m_pool[element_index*m_element_size];
+	}
+
+	//! Allocates memory in pool
+	/*!
+	\param size_bytes size in bytes of the buffer
+	*/
+	void * allocate(size_t size_bytes);
+
+	bool freeMemory(void * pointer);
+};
+
+
+
+
+//! Generic Allocator with pools
+/*!
+General purpose Allocator which can create Memory Pools dynamiacally as needed.
+*/
+class btGenericPoolAllocator
+{
+protected:
+	size_t m_pool_element_size;
+	size_t m_pool_element_count;
+public:
+	btGenericMemoryPool * m_pools[BT_DEFAULT_MAX_POOLS];
+	size_t m_pool_count;
+
+
+	inline size_t get_pool_capacity()
+	{
+		return m_pool_element_size*m_pool_element_count;
+	}
+
+
+protected:
+	// creates a pool
+	btGenericMemoryPool * push_new_pool();
+
+	void * failback_alloc(size_t size_bytes);
+
+	bool failback_free(void * pointer);
+public:
+
+	btGenericPoolAllocator(size_t pool_element_size, size_t pool_element_count)
+	{
+		m_pool_count = 0;
+		m_pool_element_size = pool_element_size;
+		m_pool_element_count = pool_element_count;
+	}
+
+	virtual ~btGenericPoolAllocator();
+
+	//! Allocates memory in pool
+	/*!
+	\param size_bytes size in bytes of the buffer
+	*/
+	void * allocate(size_t size_bytes);
+
+	bool freeMemory(void * pointer);
+};
+
+
+
+void * btPoolAlloc(size_t size);
+void * btPoolRealloc(void *ptr, size_t oldsize, size_t newsize);
+void btPoolFree(void *ptr);
+
+
+#endif
diff --git a/src/bullet/BulletCollision/Gimpact/btGeometryOperations.h b/src/bullet/BulletCollision/Gimpact/btGeometryOperations.h
new file mode 100644
index 00000000..60f06510
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btGeometryOperations.h
@@ -0,0 +1,212 @@
+#ifndef BT_BASIC_GEOMETRY_OPERATIONS_H_INCLUDED
+#define BT_BASIC_GEOMETRY_OPERATIONS_H_INCLUDED
+
+/*! \file btGeometryOperations.h
+*\author Francisco Leon Najera
+
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btBoxCollision.h"
+
+
+
+
+
+#define PLANEDIREPSILON 0.0000001f
+#define PARALELENORMALS 0.000001f
+
+
+#define BT_CLAMP(number,minval,maxval) (number<minval?minval:(number>maxval?maxval:number))
+
+/// Calc a plane from a triangle edge an a normal. plane is a vec4f
+SIMD_FORCE_INLINE void bt_edge_plane(const btVector3 & e1,const btVector3 &  e2, const btVector3 & normal,btVector4 & plane)
+{
+	btVector3 planenormal = (e2-e1).cross(normal);
+	planenormal.normalize();
+	plane.setValue(planenormal[0],planenormal[1],planenormal[2],e2.dot(planenormal));
+}
+
+
+
+//***************** SEGMENT and LINE FUNCTIONS **********************************///
+
+/*! Finds the closest point(cp) to (v) on a segment (e1,e2)
+ */
+SIMD_FORCE_INLINE void bt_closest_point_on_segment(
+	btVector3 & cp, const btVector3 & v,
+	const btVector3  &e1,const btVector3 &e2)
+{
+    btVector3 n = e2-e1;
+    cp = v - e1;
+	btScalar _scalar = cp.dot(n)/n.dot(n);
+	if(_scalar <0.0f)
+	{
+	    cp = e1;
+	}
+	else if(_scalar >1.0f)
+	{
+	    cp = e2;
+	}
+	else
+	{
+		cp = _scalar*n + e1;
+	}
+}
+
+
+//! line plane collision
+/*!
+*\return
+	-0  if the ray never intersects
+	-1 if the ray collides in front
+	-2 if the ray collides in back
+*/
+
+SIMD_FORCE_INLINE int bt_line_plane_collision(
+	const btVector4 & plane,
+	const btVector3 & vDir,
+	const btVector3 & vPoint,
+	btVector3 & pout,
+	btScalar &tparam,
+	btScalar tmin, btScalar tmax)
+{
+
+	btScalar _dotdir = vDir.dot(plane);
+
+	if(btFabs(_dotdir)<PLANEDIREPSILON)
+	{
+		tparam = tmax;
+	    return 0;
+	}
+
+	btScalar _dis = bt_distance_point_plane(plane,vPoint);
+	char returnvalue = _dis<0.0f? 2:1;
+	tparam = -_dis/_dotdir;
+
+	if(tparam<tmin)
+	{
+		returnvalue = 0;
+		tparam = tmin;
+	}
+	else if(tparam>tmax)
+	{
+		returnvalue = 0;
+		tparam = tmax;
+	}
+	pout = tparam*vDir + vPoint;
+	return returnvalue;
+}
+
+
+//! Find closest points on segments
+SIMD_FORCE_INLINE void bt_segment_collision(
+	const btVector3 & vA1,
+	const btVector3 & vA2,
+	const btVector3 & vB1,
+	const btVector3 & vB2,
+	btVector3 & vPointA,
+	btVector3 & vPointB)
+{
+    btVector3 AD = vA2 - vA1;
+    btVector3 BD = vB2 - vB1;
+    btVector3 N = AD.cross(BD);
+    btScalar tp = N.length2();
+
+    btVector4 _M;//plane
+
+    if(tp<SIMD_EPSILON)//ARE PARALELE
+    {
+    	//project B over A
+    	bool invert_b_order = false;
+    	_M[0] = vB1.dot(AD);
+    	_M[1] = vB2.dot(AD);
+
+    	if(_M[0]>_M[1])
+    	{
+    		invert_b_order  = true;
+    		BT_SWAP_NUMBERS(_M[0],_M[1]);
+    	}
+    	_M[2] = vA1.dot(AD);
+    	_M[3] = vA2.dot(AD);
+    	//mid points
+    	N[0] = (_M[0]+_M[1])*0.5f;
+    	N[1] = (_M[2]+_M[3])*0.5f;
+
+    	if(N[0]<N[1])
+    	{
+    		if(_M[1]<_M[2])
+    		{
+    			vPointB = invert_b_order?vB1:vB2;
+    			vPointA = vA1;
+    		}
+    		else if(_M[1]<_M[3])
+    		{
+    			vPointB = invert_b_order?vB1:vB2;
+    			bt_closest_point_on_segment(vPointA,vPointB,vA1,vA2);
+    		}
+    		else
+    		{
+    			vPointA = vA2;
+    			bt_closest_point_on_segment(vPointB,vPointA,vB1,vB2);
+    		}
+    	}
+    	else
+    	{
+    		if(_M[3]<_M[0])
+    		{
+    			vPointB = invert_b_order?vB2:vB1;
+    			vPointA = vA2;
+    		}
+    		else if(_M[3]<_M[1])
+    		{
+    			vPointA = vA2;
+    			bt_closest_point_on_segment(vPointB,vPointA,vB1,vB2);
+    		}
+    		else
+    		{
+    			vPointB = invert_b_order?vB1:vB2;
+    			bt_closest_point_on_segment(vPointA,vPointB,vA1,vA2);
+    		}
+    	}
+    	return;
+    }
+
+    N = N.cross(BD);
+    _M.setValue(N[0],N[1],N[2],vB1.dot(N));
+
+	// get point A as the plane collision point
+    bt_line_plane_collision(_M,AD,vA1,vPointA,tp,btScalar(0), btScalar(1));
+
+    /*Closest point on segment*/
+    vPointB = vPointA - vB1;
+	tp = vPointB.dot(BD);
+	tp/= BD.dot(BD);
+	tp = BT_CLAMP(tp,0.0f,1.0f);
+
+	vPointB = tp*BD + vB1;
+}
+
+
+
+
+
+#endif // GIM_VECTOR_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/btQuantization.h b/src/bullet/BulletCollision/Gimpact/btQuantization.h
new file mode 100644
index 00000000..bd2633cf
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btQuantization.h
@@ -0,0 +1,88 @@
+#ifndef BT_GIMPACT_QUANTIZATION_H_INCLUDED
+#define BT_GIMPACT_QUANTIZATION_H_INCLUDED
+
+/*! \file btQuantization.h
+*\author Francisco Leon Najera
+
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "LinearMath/btTransform.h"
+
+
+
+
+
+
+SIMD_FORCE_INLINE void bt_calc_quantization_parameters(
+	btVector3 & outMinBound,
+	btVector3 & outMaxBound,
+	btVector3 & bvhQuantization,
+	const btVector3& srcMinBound,const btVector3& srcMaxBound,
+	btScalar quantizationMargin)
+{
+	//enlarge the AABB to avoid division by zero when initializing the quantization values
+	btVector3 clampValue(quantizationMargin,quantizationMargin,quantizationMargin);
+	outMinBound = srcMinBound - clampValue;
+	outMaxBound = srcMaxBound + clampValue;
+	btVector3 aabbSize = outMaxBound - outMinBound;
+	bvhQuantization = btVector3(btScalar(65535.0),
+								btScalar(65535.0),
+								btScalar(65535.0)) / aabbSize;
+}
+
+
+SIMD_FORCE_INLINE void bt_quantize_clamp(
+	unsigned short* out,
+	const btVector3& point,
+	const btVector3 & min_bound,
+	const btVector3 & max_bound,
+	const btVector3 & bvhQuantization)
+{
+
+	btVector3 clampedPoint(point);
+	clampedPoint.setMax(min_bound);
+	clampedPoint.setMin(max_bound);
+
+	btVector3 v = (clampedPoint - min_bound) * bvhQuantization;
+	out[0] = (unsigned short)(v.getX()+0.5f);
+	out[1] = (unsigned short)(v.getY()+0.5f);
+	out[2] = (unsigned short)(v.getZ()+0.5f);
+}
+
+
+SIMD_FORCE_INLINE btVector3 bt_unquantize(
+	const unsigned short* vecIn,
+	const btVector3 & offset,
+	const btVector3 & bvhQuantization)
+{
+	btVector3	vecOut;
+	vecOut.setValue(
+		(btScalar)(vecIn[0]) / (bvhQuantization.getX()),
+		(btScalar)(vecIn[1]) / (bvhQuantization.getY()),
+		(btScalar)(vecIn[2]) / (bvhQuantization.getZ()));
+	vecOut += offset;
+	return vecOut;
+}
+
+
+
+#endif // BT_GIMPACT_QUANTIZATION_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/btTriangleShapeEx.cpp b/src/bullet/BulletCollision/Gimpact/btTriangleShapeEx.cpp
new file mode 100644
index 00000000..ca76cc54
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btTriangleShapeEx.cpp
@@ -0,0 +1,218 @@
+/*! \file btGImpactTriangleShape.h
+\author Francisco Leon Najera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btTriangleShapeEx.h"
+
+
+
+void GIM_TRIANGLE_CONTACT::merge_points(const btVector4 & plane,
+                                       btScalar margin, const btVector3 * points, int point_count)
+{
+    m_point_count = 0;
+    m_penetration_depth= -1000.0f;
+
+    int point_indices[MAX_TRI_CLIPPING];
+
+	int _k;
+
+    for ( _k=0;_k<point_count;_k++)
+    {
+        btScalar _dist = - bt_distance_point_plane(plane,points[_k]) + margin;
+
+        if (_dist>=0.0f)
+        {
+            if (_dist>m_penetration_depth)
+            {
+                m_penetration_depth = _dist;
+                point_indices[0] = _k;
+                m_point_count=1;
+            }
+            else if ((_dist+SIMD_EPSILON)>=m_penetration_depth)
+            {
+                point_indices[m_point_count] = _k;
+                m_point_count++;
+            }
+        }
+    }
+
+    for ( _k=0;_k<m_point_count;_k++)
+    {
+        m_points[_k] = points[point_indices[_k]];
+    }
+}
+
+///class btPrimitiveTriangle
+bool btPrimitiveTriangle::overlap_test_conservative(const btPrimitiveTriangle& other)
+{
+    btScalar total_margin = m_margin + other.m_margin;
+    // classify points on other triangle
+    btScalar dis0 = bt_distance_point_plane(m_plane,other.m_vertices[0]) - total_margin;
+
+    btScalar dis1 = bt_distance_point_plane(m_plane,other.m_vertices[1]) - total_margin;
+
+    btScalar dis2 = bt_distance_point_plane(m_plane,other.m_vertices[2]) - total_margin;
+
+    if (dis0>0.0f&&dis1>0.0f&&dis2>0.0f) return false;
+
+    // classify points on this triangle
+    dis0 = bt_distance_point_plane(other.m_plane,m_vertices[0]) - total_margin;
+
+    dis1 = bt_distance_point_plane(other.m_plane,m_vertices[1]) - total_margin;
+
+    dis2 = bt_distance_point_plane(other.m_plane,m_vertices[2]) - total_margin;
+
+    if (dis0>0.0f&&dis1>0.0f&&dis2>0.0f) return false;
+
+    return true;
+}
+
+int btPrimitiveTriangle::clip_triangle(btPrimitiveTriangle & other, btVector3 * clipped_points )
+{
+    // edge 0
+
+    btVector3 temp_points[MAX_TRI_CLIPPING];
+
+
+    btVector4 edgeplane;
+
+    get_edge_plane(0,edgeplane);
+
+
+    int clipped_count = bt_plane_clip_triangle(
+                            edgeplane,other.m_vertices[0],other.m_vertices[1],other.m_vertices[2],temp_points);
+
+    if (clipped_count == 0) return 0;
+
+    btVector3 temp_points1[MAX_TRI_CLIPPING];
+
+
+    // edge 1
+    get_edge_plane(1,edgeplane);
+
+
+    clipped_count = bt_plane_clip_polygon(edgeplane,temp_points,clipped_count,temp_points1);
+
+    if (clipped_count == 0) return 0;
+
+    // edge 2
+    get_edge_plane(2,edgeplane);
+
+    clipped_count = bt_plane_clip_polygon(
+                        edgeplane,temp_points1,clipped_count,clipped_points);
+
+    return clipped_count;
+}
+
+bool btPrimitiveTriangle::find_triangle_collision_clip_method(btPrimitiveTriangle & other, GIM_TRIANGLE_CONTACT & contacts)
+{
+    btScalar margin = m_margin + other.m_margin;
+
+    btVector3 clipped_points[MAX_TRI_CLIPPING];
+    int clipped_count;
+    //create planes
+    // plane v vs U points
+
+    GIM_TRIANGLE_CONTACT contacts1;
+
+    contacts1.m_separating_normal = m_plane;
+
+
+    clipped_count = clip_triangle(other,clipped_points);
+
+    if (clipped_count == 0 )
+    {
+        return false;//Reject
+    }
+
+    //find most deep interval face1
+    contacts1.merge_points(contacts1.m_separating_normal,margin,clipped_points,clipped_count);
+    if (contacts1.m_point_count == 0) return false; // too far
+    //Normal pointing to this triangle
+    contacts1.m_separating_normal *= -1.f;
+
+
+    //Clip tri1 by tri2 edges
+    GIM_TRIANGLE_CONTACT contacts2;
+    contacts2.m_separating_normal = other.m_plane;
+
+    clipped_count = other.clip_triangle(*this,clipped_points);
+
+    if (clipped_count == 0 )
+    {
+        return false;//Reject
+    }
+
+    //find most deep interval face1
+    contacts2.merge_points(contacts2.m_separating_normal,margin,clipped_points,clipped_count);
+    if (contacts2.m_point_count == 0) return false; // too far
+
+
+
+
+    ////check most dir for contacts
+    if (contacts2.m_penetration_depth<contacts1.m_penetration_depth)
+    {
+        contacts.copy_from(contacts2);
+    }
+    else
+    {
+        contacts.copy_from(contacts1);
+    }
+    return true;
+}
+
+
+
+///class btTriangleShapeEx: public btTriangleShape
+
+bool btTriangleShapeEx::overlap_test_conservative(const btTriangleShapeEx& other)
+{
+    btScalar total_margin = getMargin() + other.getMargin();
+
+    btVector4 plane0;
+    buildTriPlane(plane0);
+    btVector4 plane1;
+    other.buildTriPlane(plane1);
+
+    // classify points on other triangle
+    btScalar dis0 = bt_distance_point_plane(plane0,other.m_vertices1[0]) - total_margin;
+
+    btScalar dis1 = bt_distance_point_plane(plane0,other.m_vertices1[1]) - total_margin;
+
+    btScalar dis2 = bt_distance_point_plane(plane0,other.m_vertices1[2]) - total_margin;
+
+    if (dis0>0.0f&&dis1>0.0f&&dis2>0.0f) return false;
+
+    // classify points on this triangle
+    dis0 = bt_distance_point_plane(plane1,m_vertices1[0]) - total_margin;
+
+    dis1 = bt_distance_point_plane(plane1,m_vertices1[1]) - total_margin;
+
+    dis2 = bt_distance_point_plane(plane1,m_vertices1[2]) - total_margin;
+
+    if (dis0>0.0f&&dis1>0.0f&&dis2>0.0f) return false;
+
+    return true;
+}
+
+
diff --git a/src/bullet/BulletCollision/Gimpact/btTriangleShapeEx.h b/src/bullet/BulletCollision/Gimpact/btTriangleShapeEx.h
new file mode 100644
index 00000000..973c2ed1
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/btTriangleShapeEx.h
@@ -0,0 +1,180 @@
+/*! \file btGImpactShape.h
+\author Francisco Leon Najera
+*/
+/*
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2007 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef GIMPACT_TRIANGLE_SHAPE_EX_H
+#define GIMPACT_TRIANGLE_SHAPE_EX_H
+
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+#include "btBoxCollision.h"
+#include "btClipPolygon.h"
+#include "btGeometryOperations.h"
+
+
+#define MAX_TRI_CLIPPING 16
+
+//! Structure for collision
+struct GIM_TRIANGLE_CONTACT
+{
+    btScalar m_penetration_depth;
+    int m_point_count;
+    btVector4 m_separating_normal;
+    btVector3 m_points[MAX_TRI_CLIPPING];
+
+	SIMD_FORCE_INLINE void copy_from(const GIM_TRIANGLE_CONTACT& other)
+	{
+		m_penetration_depth = other.m_penetration_depth;
+		m_separating_normal = other.m_separating_normal;
+		m_point_count = other.m_point_count;
+		int i = m_point_count;
+		while(i--)
+		{
+			m_points[i] = other.m_points[i];
+		}
+	}
+
+	GIM_TRIANGLE_CONTACT()
+	{
+	}
+
+	GIM_TRIANGLE_CONTACT(const GIM_TRIANGLE_CONTACT& other)
+	{
+		copy_from(other);
+	}
+
+    //! classify points that are closer
+    void merge_points(const btVector4 & plane,
+    				btScalar margin, const btVector3 * points, int point_count);
+
+};
+
+
+
+class btPrimitiveTriangle
+{
+public:
+	btVector3 m_vertices[3];
+	btVector4 m_plane;
+	btScalar m_margin;
+	btScalar m_dummy;
+	btPrimitiveTriangle():m_margin(0.01f)
+	{
+
+	}
+
+
+	SIMD_FORCE_INLINE void buildTriPlane()
+	{
+		btVector3 normal = (m_vertices[1]-m_vertices[0]).cross(m_vertices[2]-m_vertices[0]);
+		normal.normalize();
+		m_plane.setValue(normal[0],normal[1],normal[2],m_vertices[0].dot(normal));
+	}
+
+	//! Test if triangles could collide
+	bool overlap_test_conservative(const btPrimitiveTriangle& other);
+
+	//! Calcs the plane which is paralele to the edge and perpendicular to the triangle plane
+	/*!
+	\pre this triangle must have its plane calculated.
+	*/
+	SIMD_FORCE_INLINE void get_edge_plane(int edge_index, btVector4 &plane)  const
+    {
+		const btVector3 & e0 = m_vertices[edge_index];
+		const btVector3 & e1 = m_vertices[(edge_index+1)%3];
+		bt_edge_plane(e0,e1,m_plane,plane);
+    }
+
+    void applyTransform(const btTransform& t)
+	{
+		m_vertices[0] = t(m_vertices[0]);
+		m_vertices[1] = t(m_vertices[1]);
+		m_vertices[2] = t(m_vertices[2]);
+	}
+
+	//! Clips the triangle against this
+	/*!
+	\pre clipped_points must have MAX_TRI_CLIPPING size, and this triangle must have its plane calculated.
+	\return the number of clipped points
+	*/
+    int clip_triangle(btPrimitiveTriangle & other, btVector3 * clipped_points );
+
+	//! Find collision using the clipping method
+	/*!
+	\pre this triangle and other must have their triangles calculated
+	*/
+    bool find_triangle_collision_clip_method(btPrimitiveTriangle & other, GIM_TRIANGLE_CONTACT & contacts);
+};
+
+
+
+//! Helper class for colliding Bullet Triangle Shapes
+/*!
+This class implements a better getAabb method than the previous btTriangleShape class
+*/
+class btTriangleShapeEx: public btTriangleShape
+{
+public:
+
+	btTriangleShapeEx():btTriangleShape(btVector3(0,0,0),btVector3(0,0,0),btVector3(0,0,0))
+	{
+	}
+
+	btTriangleShapeEx(const btVector3& p0,const btVector3& p1,const btVector3& p2):	btTriangleShape(p0,p1,p2)
+	{
+	}
+
+	btTriangleShapeEx(const btTriangleShapeEx & other):	btTriangleShape(other.m_vertices1[0],other.m_vertices1[1],other.m_vertices1[2])
+	{
+	}
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax)const
+	{
+		btVector3 tv0 = t(m_vertices1[0]);
+		btVector3 tv1 = t(m_vertices1[1]);
+		btVector3 tv2 = t(m_vertices1[2]);
+
+		btAABB trianglebox(tv0,tv1,tv2,m_collisionMargin);
+		aabbMin = trianglebox.m_min;
+		aabbMax = trianglebox.m_max;
+	}
+
+	void applyTransform(const btTransform& t)
+	{
+		m_vertices1[0] = t(m_vertices1[0]);
+		m_vertices1[1] = t(m_vertices1[1]);
+		m_vertices1[2] = t(m_vertices1[2]);
+	}
+
+	SIMD_FORCE_INLINE void buildTriPlane(btVector4 & plane) const
+	{
+		btVector3 normal = (m_vertices1[1]-m_vertices1[0]).cross(m_vertices1[2]-m_vertices1[0]);
+		normal.normalize();
+		plane.setValue(normal[0],normal[1],normal[2],m_vertices1[0].dot(normal));
+	}
+
+	bool overlap_test_conservative(const btTriangleShapeEx& other);
+};
+
+
+#endif //GIMPACT_TRIANGLE_MESH_SHAPE_H
diff --git a/src/bullet/BulletCollision/Gimpact/gim_array.h b/src/bullet/BulletCollision/Gimpact/gim_array.h
new file mode 100644
index 00000000..cfd5da8f
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_array.h
@@ -0,0 +1,326 @@
+#ifndef GIM_ARRAY_H_INCLUDED
+#define GIM_ARRAY_H_INCLUDED
+/*! \file gim_array.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+#include "gim_memory.h"
+
+
+#define GIM_ARRAY_GROW_INCREMENT 2
+#define GIM_ARRAY_GROW_FACTOR 2
+
+//!	Very simple array container with fast access and simd memory
+template<typename T>
+class gim_array
+{
+public:
+//! properties
+//!@{
+    T *m_data;
+    GUINT m_size;
+    GUINT m_allocated_size;
+//!@}
+//! protected operations
+//!@{
+
+    inline void destroyData()
+	{
+	    m_allocated_size = 0;
+		if(m_data==NULL) return;
+		gim_free(m_data);
+		m_data = NULL;
+	}
+
+	inline bool resizeData(GUINT newsize)
+	{
+		if(newsize==0)
+		{
+			destroyData();
+			return true;
+		}
+
+		if(m_size>0)
+		{
+            m_data = (T*)gim_realloc(m_data,m_size*sizeof(T),newsize*sizeof(T));
+		}
+		else
+		{
+		    m_data = (T*)gim_alloc(newsize*sizeof(T));
+		}
+		m_allocated_size = newsize;
+		return true;
+	}
+
+	inline bool growingCheck()
+	{
+		if(m_allocated_size<=m_size)
+		{
+		    GUINT requestsize = m_size;
+		    m_size = m_allocated_size;
+			if(resizeData((requestsize+GIM_ARRAY_GROW_INCREMENT)*GIM_ARRAY_GROW_FACTOR)==false) return false;
+		}
+		return true;
+	}
+
+//!@}
+//! public operations
+//!@{
+    inline  bool reserve(GUINT size)
+    {
+        if(m_allocated_size>=size) return false;
+        return resizeData(size);
+    }
+
+    inline void clear_range(GUINT start_range)
+    {
+        while(m_size>start_range)
+        {
+            m_data[--m_size].~T();
+        }
+    }
+
+    inline void clear()
+    {
+        if(m_size==0)return;
+        clear_range(0);
+    }
+
+    inline void clear_memory()
+    {
+        clear();
+        destroyData();
+    }
+
+    gim_array()
+    {
+        m_data = 0;
+        m_size = 0;
+        m_allocated_size = 0;
+    }
+
+    gim_array(GUINT reservesize)
+    {
+        m_data = 0;
+        m_size = 0;
+
+        m_allocated_size = 0;
+        reserve(reservesize);
+    }
+
+    ~gim_array()
+    {
+        clear_memory();
+    }
+
+    inline GUINT size() const
+    {
+        return m_size;
+    }
+
+    inline GUINT max_size() const
+    {
+        return m_allocated_size;
+    }
+
+    inline T & operator[](size_t i)
+	{
+		return m_data[i];
+	}
+	inline  const T & operator[](size_t i) const
+	{
+		return m_data[i];
+	}
+
+    inline T * pointer(){ return m_data;}
+    inline const T * pointer() const
+    { return m_data;}
+
+
+    inline T * get_pointer_at(GUINT i)
+	{
+		return m_data + i;
+	}
+
+	inline const T * get_pointer_at(GUINT i) const
+	{
+		return m_data + i;
+	}
+
+	inline T & at(GUINT i)
+	{
+		return m_data[i];
+	}
+
+	inline const T & at(GUINT i) const
+	{
+		return m_data[i];
+	}
+
+	inline T & front()
+	{
+		return *m_data;
+	}
+
+	inline const T & front() const
+	{
+		return *m_data;
+	}
+
+	inline T & back()
+	{
+		return m_data[m_size-1];
+	}
+
+	inline const T & back() const
+	{
+		return m_data[m_size-1];
+	}
+
+
+	inline void swap(GUINT i, GUINT j)
+	{
+	    gim_swap_elements(m_data,i,j);
+	}
+
+	inline void push_back(const T & obj)
+	{
+	    this->growingCheck();
+	    m_data[m_size] = obj;
+	    m_size++;
+	}
+
+	//!Simply increase the m_size, doesn't call the new element constructor
+	inline void push_back_mem()
+	{
+	    this->growingCheck();
+	    m_size++;
+	}
+
+	inline void push_back_memcpy(const T & obj)
+	{
+	    this->growingCheck();
+	    irr_simd_memcpy(&m_data[m_size],&obj,sizeof(T));
+	    m_size++;
+	}
+
+	inline void pop_back()
+	{
+	    m_size--;
+        m_data[m_size].~T();
+	}
+
+	//!Simply decrease the m_size, doesn't call the deleted element destructor
+	inline void pop_back_mem()
+	{
+	    m_size--;
+	}
+
+    //! fast erase
+	inline void erase(GUINT index)
+	{
+	    if(index<m_size-1)
+	    {
+	        swap(index,m_size-1);
+	    }
+	    pop_back();
+	}
+
+	inline void erase_sorted_mem(GUINT index)
+	{
+	    m_size--;
+	    for(GUINT i = index;i<m_size;i++)
+	    {
+	        gim_simd_memcpy(m_data+i,m_data+i+1,sizeof(T));
+	    }
+	}
+
+	inline void erase_sorted(GUINT index)
+	{
+	    m_data[index].~T();
+	    erase_sorted_mem(index);
+	}
+
+	inline void insert_mem(GUINT index)
+	{
+	    this->growingCheck();
+	    for(GUINT i = m_size;i>index;i--)
+	    {
+	        gim_simd_memcpy(m_data+i,m_data+i-1,sizeof(T));
+	    }
+	    m_size++;
+	}
+
+	inline void insert(const T & obj,GUINT index)
+	{
+	    insert_mem(index);
+	    m_data[index] = obj;
+	}
+
+	inline void resize(GUINT size, bool call_constructor = true)
+	{
+
+	    if(size>m_size)
+	    {
+            reserve(size);
+            if(call_constructor)
+            {
+            	T obj;
+                while(m_size<size)
+                {
+                    m_data[m_size] = obj;
+                    m_size++;
+                }
+            }
+            else
+            {
+            	m_size = size;
+            }
+	    }
+	    else if(size<m_size)
+	    {
+	        if(call_constructor) clear_range(size);
+	        m_size = size;
+	    }
+	}
+
+	inline void refit()
+	{
+	    resizeData(m_size);
+	}
+
+};
+
+
+
+
+
+#endif // GIM_CONTAINERS_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_basic_geometry_operations.h b/src/bullet/BulletCollision/Gimpact/gim_basic_geometry_operations.h
new file mode 100644
index 00000000..91527740
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_basic_geometry_operations.h
@@ -0,0 +1,543 @@
+#ifndef GIM_BASIC_GEOMETRY_OPERATIONS_H_INCLUDED
+#define GIM_BASIC_GEOMETRY_OPERATIONS_H_INCLUDED
+
+/*! \file gim_basic_geometry_operations.h
+*\author Francisco Leon Najera
+type independant geometry routines
+
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+
+#include "gim_linear_math.h"
+
+
+
+
+
+#define PLANEDIREPSILON 0.0000001f
+#define PARALELENORMALS 0.000001f
+
+
+#define TRIANGLE_NORMAL(v1,v2,v3,n)\
+{\
+	vec3f _dif1,_dif2;\
+    VEC_DIFF(_dif1,v2,v1);\
+    VEC_DIFF(_dif2,v3,v1);\
+    VEC_CROSS(n,_dif1,_dif2);\
+    VEC_NORMALIZE(n);\
+}\
+
+#define TRIANGLE_NORMAL_FAST(v1,v2,v3,n){\
+    vec3f _dif1,_dif2; \
+    VEC_DIFF(_dif1,v2,v1); \
+    VEC_DIFF(_dif2,v3,v1); \
+    VEC_CROSS(n,_dif1,_dif2); \
+}\
+
+/// plane is a vec4f
+#define TRIANGLE_PLANE(v1,v2,v3,plane) {\
+    TRIANGLE_NORMAL(v1,v2,v3,plane);\
+    plane[3] = VEC_DOT(v1,plane);\
+}\
+
+/// plane is a vec4f
+#define TRIANGLE_PLANE_FAST(v1,v2,v3,plane) {\
+    TRIANGLE_NORMAL_FAST(v1,v2,v3,plane);\
+    plane[3] = VEC_DOT(v1,plane);\
+}\
+
+/// Calc a plane from an edge an a normal. plane is a vec4f
+#define EDGE_PLANE(e1,e2,n,plane) {\
+    vec3f _dif; \
+    VEC_DIFF(_dif,e2,e1); \
+    VEC_CROSS(plane,_dif,n); \
+    VEC_NORMALIZE(plane); \
+    plane[3] = VEC_DOT(e1,plane);\
+}\
+
+#define DISTANCE_PLANE_POINT(plane,point) (VEC_DOT(plane,point) - plane[3])
+
+#define PROJECT_POINT_PLANE(point,plane,projected) {\
+	GREAL _dis;\
+	_dis = DISTANCE_PLANE_POINT(plane,point);\
+	VEC_SCALE(projected,-_dis,plane);\
+	VEC_SUM(projected,projected,point);	\
+}\
+
+//! Verifies if a point is in the plane hull
+template<typename CLASS_POINT,typename CLASS_PLANE>
+SIMD_FORCE_INLINE bool POINT_IN_HULL(
+	const CLASS_POINT& point,const CLASS_PLANE * planes,GUINT plane_count)
+{
+	GREAL _dis;
+	for (GUINT _i = 0;_i< plane_count;++_i)
+	{
+		_dis = DISTANCE_PLANE_POINT(planes[_i],point);
+	    if(_dis>0.0f) return false;
+	}
+	return true;
+}
+
+template<typename CLASS_POINT,typename CLASS_PLANE>
+SIMD_FORCE_INLINE void PLANE_CLIP_SEGMENT(
+	const CLASS_POINT& s1,
+	const CLASS_POINT &s2,const CLASS_PLANE &plane,CLASS_POINT &clipped)
+{
+	GREAL _dis1,_dis2;
+	_dis1 = DISTANCE_PLANE_POINT(plane,s1);
+	VEC_DIFF(clipped,s2,s1);
+	_dis2 = VEC_DOT(clipped,plane);
+	VEC_SCALE(clipped,-_dis1/_dis2,clipped);
+	VEC_SUM(clipped,clipped,s1);
+}
+
+enum ePLANE_INTERSECTION_TYPE
+{
+	G_BACK_PLANE = 0,
+	G_COLLIDE_PLANE,
+	G_FRONT_PLANE
+};
+
+enum eLINE_PLANE_INTERSECTION_TYPE
+{
+	G_FRONT_PLANE_S1 = 0,
+	G_FRONT_PLANE_S2,
+	G_BACK_PLANE_S1,
+	G_BACK_PLANE_S2,
+	G_COLLIDE_PLANE_S1,
+	G_COLLIDE_PLANE_S2
+};
+
+//! Confirms if the plane intersect the edge or nor
+/*!
+intersection type must have the following values
+<ul>
+<li> 0 : Segment in front of plane, s1 closest
+<li> 1 : Segment in front of plane, s2 closest
+<li> 2 : Segment in back of plane, s1 closest
+<li> 3 : Segment in back of plane, s2 closest
+<li> 4 : Segment collides plane, s1 in back
+<li> 5 : Segment collides plane, s2 in back
+</ul>
+*/
+
+template<typename CLASS_POINT,typename CLASS_PLANE>
+SIMD_FORCE_INLINE eLINE_PLANE_INTERSECTION_TYPE PLANE_CLIP_SEGMENT2(
+	const CLASS_POINT& s1,
+	const CLASS_POINT &s2,
+	const CLASS_PLANE &plane,CLASS_POINT &clipped)
+{
+	GREAL _dis1 = DISTANCE_PLANE_POINT(plane,s1);
+	GREAL _dis2 = DISTANCE_PLANE_POINT(plane,s2);
+	if(_dis1 >-G_EPSILON && _dis2 >-G_EPSILON)
+	{
+	    if(_dis1<_dis2) return G_FRONT_PLANE_S1;
+	    return G_FRONT_PLANE_S2;
+	}
+	else if(_dis1 <G_EPSILON && _dis2 <G_EPSILON)
+	{
+	    if(_dis1>_dis2) return G_BACK_PLANE_S1;
+	    return G_BACK_PLANE_S2;
+	}
+
+	VEC_DIFF(clipped,s2,s1);
+	_dis2 = VEC_DOT(clipped,plane);
+	VEC_SCALE(clipped,-_dis1/_dis2,clipped);
+	VEC_SUM(clipped,clipped,s1);
+	if(_dis1<_dis2) return G_COLLIDE_PLANE_S1;
+	return G_COLLIDE_PLANE_S2;
+}
+
+//! Confirms if the plane intersect the edge or not
+/*!
+clipped1 and clipped2 are the vertices behind the plane.
+clipped1 is the closest
+
+intersection_type must have the following values
+<ul>
+<li> 0 : Segment in front of plane, s1 closest
+<li> 1 : Segment in front of plane, s2 closest
+<li> 2 : Segment in back of plane, s1 closest
+<li> 3 : Segment in back of plane, s2 closest
+<li> 4 : Segment collides plane, s1 in back
+<li> 5 : Segment collides plane, s2 in back
+</ul>
+*/
+template<typename CLASS_POINT,typename CLASS_PLANE>
+SIMD_FORCE_INLINE eLINE_PLANE_INTERSECTION_TYPE PLANE_CLIP_SEGMENT_CLOSEST(
+	const CLASS_POINT& s1,
+	const CLASS_POINT &s2,
+	const CLASS_PLANE &plane,
+	CLASS_POINT &clipped1,CLASS_POINT &clipped2)
+{
+	eLINE_PLANE_INTERSECTION_TYPE intersection_type = PLANE_CLIP_SEGMENT2(s1,s2,plane,clipped1);
+	switch(intersection_type)
+	{
+	case G_FRONT_PLANE_S1:
+		VEC_COPY(clipped1,s1);
+	    VEC_COPY(clipped2,s2);
+		break;
+	case G_FRONT_PLANE_S2:
+		VEC_COPY(clipped1,s2);
+	    VEC_COPY(clipped2,s1);
+		break;
+	case G_BACK_PLANE_S1:
+		VEC_COPY(clipped1,s1);
+	    VEC_COPY(clipped2,s2);
+		break;
+	case G_BACK_PLANE_S2:
+		VEC_COPY(clipped1,s2);
+	    VEC_COPY(clipped2,s1);
+		break;
+	case G_COLLIDE_PLANE_S1:
+		VEC_COPY(clipped2,s1);
+		break;
+	case G_COLLIDE_PLANE_S2:
+		VEC_COPY(clipped2,s2);
+		break;
+	}
+	return intersection_type;
+}
+
+
+//! Finds the 2 smallest cartesian coordinates of a plane normal
+#define PLANE_MINOR_AXES(plane, i0, i1) VEC_MINOR_AXES(plane, i0, i1)
+
+//! Ray plane collision in one way
+/*!
+Intersects plane in one way only. The ray must face the plane (normals must be in opossite directions).<br/>
+It uses the PLANEDIREPSILON constant.
+*/
+template<typename T,typename CLASS_POINT,typename CLASS_PLANE>
+SIMD_FORCE_INLINE bool RAY_PLANE_COLLISION(
+	const CLASS_PLANE & plane,
+	const CLASS_POINT & vDir,
+	const CLASS_POINT & vPoint,
+	CLASS_POINT & pout,T &tparam)
+{
+	GREAL _dis,_dotdir;
+	_dotdir = VEC_DOT(plane,vDir);
+	if(_dotdir<PLANEDIREPSILON)
+	{
+	    return false;
+	}
+	_dis = DISTANCE_PLANE_POINT(plane,vPoint);
+	tparam = -_dis/_dotdir;
+	VEC_SCALE(pout,tparam,vDir);
+	VEC_SUM(pout,vPoint,pout);
+	return true;
+}
+
+//! line collision
+/*!
+*\return
+	-0  if the ray never intersects
+	-1 if the ray collides in front
+	-2 if the ray collides in back
+*/
+template<typename T,typename CLASS_POINT,typename CLASS_PLANE>
+SIMD_FORCE_INLINE GUINT LINE_PLANE_COLLISION(
+	const CLASS_PLANE & plane,
+	const CLASS_POINT & vDir,
+	const CLASS_POINT & vPoint,
+	CLASS_POINT & pout,
+	T &tparam,
+	T tmin, T tmax)
+{
+	GREAL _dis,_dotdir;
+	_dotdir = VEC_DOT(plane,vDir);
+	if(btFabs(_dotdir)<PLANEDIREPSILON)
+	{
+		tparam = tmax;
+	    return 0;
+	}
+	_dis = DISTANCE_PLANE_POINT(plane,vPoint);
+	char returnvalue = _dis<0.0f?2:1;
+	tparam = -_dis/_dotdir;
+
+	if(tparam<tmin)
+	{
+		returnvalue = 0;
+		tparam = tmin;
+	}
+	else if(tparam>tmax)
+	{
+		returnvalue = 0;
+		tparam = tmax;
+	}
+
+	VEC_SCALE(pout,tparam,vDir);
+	VEC_SUM(pout,vPoint,pout);
+	return returnvalue;
+}
+
+/*! \brief Returns the Ray on which 2 planes intersect if they do.
+    Written by Rodrigo Hernandez on ODE convex collision
+
+  \param p1 Plane 1
+  \param p2 Plane 2
+  \param p Contains the origin of the ray upon returning if planes intersect
+  \param d Contains the direction of the ray upon returning if planes intersect
+  \return true if the planes intersect, 0 if paralell.
+
+*/
+template<typename CLASS_POINT,typename CLASS_PLANE>
+SIMD_FORCE_INLINE bool INTERSECT_PLANES(
+		const CLASS_PLANE &p1,
+		const CLASS_PLANE &p2,
+		CLASS_POINT &p,
+		CLASS_POINT &d)
+{
+	VEC_CROSS(d,p1,p2);
+  	GREAL denom = VEC_DOT(d, d);
+  	if(GIM_IS_ZERO(denom)) return false;
+	vec3f _n;
+	_n[0]=p1[3]*p2[0] - p2[3]*p1[0];
+	_n[1]=p1[3]*p2[1] - p2[3]*p1[1];
+	_n[2]=p1[3]*p2[2] - p2[3]*p1[2];
+	VEC_CROSS(p,_n,d);
+	p[0]/=denom;
+	p[1]/=denom;
+	p[2]/=denom;
+	return true;
+}
+
+//***************** SEGMENT and LINE FUNCTIONS **********************************///
+
+/*! Finds the closest point(cp) to (v) on a segment (e1,e2)
+ */
+template<typename CLASS_POINT>
+SIMD_FORCE_INLINE void CLOSEST_POINT_ON_SEGMENT(
+	CLASS_POINT & cp, const CLASS_POINT & v,
+	const CLASS_POINT &e1,const CLASS_POINT &e2)
+{
+    vec3f _n;
+    VEC_DIFF(_n,e2,e1);
+    VEC_DIFF(cp,v,e1);
+	GREAL _scalar = VEC_DOT(cp, _n);
+	_scalar/= VEC_DOT(_n, _n);
+	if(_scalar <0.0f)
+	{
+	    VEC_COPY(cp,e1);
+	}
+	else if(_scalar >1.0f)
+	{
+	    VEC_COPY(cp,e2);
+	}
+	else
+	{
+        VEC_SCALE(cp,_scalar,_n);
+        VEC_SUM(cp,cp,e1);
+	}
+}
+
+
+/*! \brief Finds the line params where these lines intersect.
+
+\param dir1 Direction of line 1
+\param point1 Point of line 1
+\param dir2 Direction of line 2
+\param point2 Point of line 2
+\param t1 Result Parameter for line 1
+\param t2 Result Parameter for line 2
+\param dointersect  0  if the lines won't intersect, else 1
+
+*/
+template<typename T,typename CLASS_POINT>
+SIMD_FORCE_INLINE bool LINE_INTERSECTION_PARAMS(
+	const CLASS_POINT & dir1,
+	CLASS_POINT & point1,
+	const CLASS_POINT & dir2,
+	CLASS_POINT &  point2,
+	T& t1,T& t2)
+{
+    GREAL det;
+	GREAL e1e1 = VEC_DOT(dir1,dir1);
+	GREAL e1e2 = VEC_DOT(dir1,dir2);
+	GREAL e2e2 = VEC_DOT(dir2,dir2);
+	vec3f p1p2;
+    VEC_DIFF(p1p2,point1,point2);
+    GREAL p1p2e1 = VEC_DOT(p1p2,dir1);
+	GREAL p1p2e2 = VEC_DOT(p1p2,dir2);
+	det = e1e2*e1e2 - e1e1*e2e2;
+	if(GIM_IS_ZERO(det)) return false;
+	t1 = (e1e2*p1p2e2 - e2e2*p1p2e1)/det;
+	t2 = (e1e1*p1p2e2 - e1e2*p1p2e1)/det;
+	return true;
+}
+
+//! Find closest points on segments
+template<typename CLASS_POINT>
+SIMD_FORCE_INLINE void SEGMENT_COLLISION(
+	const CLASS_POINT & vA1,
+	const CLASS_POINT & vA2,
+	const CLASS_POINT & vB1,
+	const CLASS_POINT & vB2,
+	CLASS_POINT & vPointA,
+	CLASS_POINT & vPointB)
+{
+    CLASS_POINT _AD,_BD,_N;
+    vec4f _M;//plane
+    VEC_DIFF(_AD,vA2,vA1);
+    VEC_DIFF(_BD,vB2,vB1);
+    VEC_CROSS(_N,_AD,_BD);
+    GREAL _tp = VEC_DOT(_N,_N);
+    if(_tp<G_EPSILON)//ARE PARALELE
+    {
+    	//project B over A
+    	bool invert_b_order = false;
+    	_M[0] = VEC_DOT(vB1,_AD);
+    	_M[1] = VEC_DOT(vB2,_AD);
+    	if(_M[0]>_M[1])
+    	{
+    		invert_b_order  = true;
+    		GIM_SWAP_NUMBERS(_M[0],_M[1]);
+    	}
+    	_M[2] = VEC_DOT(vA1,_AD);
+    	_M[3] = VEC_DOT(vA2,_AD);
+    	//mid points
+    	_N[0] = (_M[0]+_M[1])*0.5f;
+    	_N[1] = (_M[2]+_M[3])*0.5f;
+
+    	if(_N[0]<_N[1])
+    	{
+    		if(_M[1]<_M[2])
+    		{
+    			vPointB = invert_b_order?vB1:vB2;
+    			vPointA = vA1;
+    		}
+    		else if(_M[1]<_M[3])
+    		{
+    			vPointB = invert_b_order?vB1:vB2;
+    			CLOSEST_POINT_ON_SEGMENT(vPointA,vPointB,vA1,vA2);
+    		}
+    		else
+    		{
+    			vPointA = vA2;
+    			CLOSEST_POINT_ON_SEGMENT(vPointB,vPointA,vB1,vB2);
+    		}
+    	}
+    	else
+    	{
+    		if(_M[3]<_M[0])
+    		{
+    			vPointB = invert_b_order?vB2:vB1;
+    			vPointA = vA2;
+    		}
+    		else if(_M[3]<_M[1])
+    		{
+    			vPointA = vA2;
+    			CLOSEST_POINT_ON_SEGMENT(vPointB,vPointA,vB1,vB2);
+    		}
+    		else
+    		{
+    			vPointB = invert_b_order?vB1:vB2;
+    			CLOSEST_POINT_ON_SEGMENT(vPointA,vPointB,vA1,vA2);
+    		}
+    	}
+    	return;
+    }
+
+
+    VEC_CROSS(_M,_N,_BD);
+    _M[3] = VEC_DOT(_M,vB1);
+
+    LINE_PLANE_COLLISION(_M,_AD,vA1,vPointA,_tp,btScalar(0), btScalar(1));
+    /*Closest point on segment*/
+    VEC_DIFF(vPointB,vPointA,vB1);
+	_tp = VEC_DOT(vPointB, _BD);
+	_tp/= VEC_DOT(_BD, _BD);
+	_tp = GIM_CLAMP(_tp,0.0f,1.0f);
+    VEC_SCALE(vPointB,_tp,_BD);
+    VEC_SUM(vPointB,vPointB,vB1);
+}
+
+
+
+
+//! Line box intersection in one dimension
+/*!
+
+*\param pos Position of the ray
+*\param dir Projection of the Direction of the ray
+*\param bmin Minimum bound of the box
+*\param bmax Maximum bound of the box
+*\param tfirst the minimum projection. Assign to 0 at first.
+*\param tlast the maximum projection. Assign to INFINITY at first.
+*\return true if there is an intersection.
+*/
+template<typename T>
+SIMD_FORCE_INLINE bool BOX_AXIS_INTERSECT(T pos, T dir,T bmin, T bmax, T & tfirst, T & tlast)
+{
+	if(GIM_IS_ZERO(dir))
+	{
+        return !(pos < bmin || pos > bmax);
+	}
+	GREAL a0 = (bmin - pos) / dir;
+	GREAL a1 = (bmax - pos) / dir;
+	if(a0 > a1)   GIM_SWAP_NUMBERS(a0, a1);
+	tfirst = GIM_MAX(a0, tfirst);
+	tlast = GIM_MIN(a1, tlast);
+	if (tlast < tfirst) return false;
+	return true;
+}
+
+
+//! Sorts 3 componets
+template<typename T>
+SIMD_FORCE_INLINE void SORT_3_INDICES(
+		const T * values,
+		GUINT * order_indices)
+{
+	//get minimum
+	order_indices[0] = values[0] < values[1] ? (values[0] < values[2] ? 0 : 2) : (values[1] < values[2] ? 1 : 2);
+
+	//get second and third
+	GUINT i0 = (order_indices[0] + 1)%3;
+	GUINT i1 = (i0 + 1)%3;
+
+	if(values[i0] < values[i1])
+	{
+		order_indices[1] = i0;
+		order_indices[2] = i1;
+	}
+	else
+	{
+		order_indices[1] = i1;
+		order_indices[2] = i0;
+	}
+}
+
+
+
+
+
+#endif // GIM_VECTOR_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_bitset.h b/src/bullet/BulletCollision/Gimpact/gim_bitset.h
new file mode 100644
index 00000000..7dee48a4
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_bitset.h
@@ -0,0 +1,123 @@
+#ifndef GIM_BITSET_H_INCLUDED
+#define GIM_BITSET_H_INCLUDED
+/*! \file gim_bitset.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+#include "gim_array.h"
+
+
+#define GUINT_BIT_COUNT 32
+#define GUINT_EXPONENT 5
+
+class gim_bitset
+{
+public:
+    gim_array<GUINT> m_container;
+
+    gim_bitset()
+    {
+
+    }
+
+    gim_bitset(GUINT bits_count)
+    {
+        resize(bits_count);
+    }
+
+    ~gim_bitset()
+    {
+    }
+
+	inline bool resize(GUINT newsize)
+	{
+		GUINT oldsize = m_container.size();
+		m_container.resize(newsize/GUINT_BIT_COUNT + 1,false);
+		while(oldsize<m_container.size())
+		{
+			m_container[oldsize] = 0;
+		}
+		return true;
+	}
+
+	inline GUINT size()
+	{
+		return m_container.size()*GUINT_BIT_COUNT;
+	}
+
+	inline void set_all()
+	{
+		for(GUINT i = 0;i<m_container.size();++i)
+		{
+			m_container[i] = 0xffffffff;
+		}
+	}
+
+	inline void clear_all()
+	{
+	    for(GUINT i = 0;i<m_container.size();++i)
+		{
+			m_container[i] = 0;
+		}
+	}
+
+	inline void set(GUINT bit_index)
+	{
+		if(bit_index>=size())
+		{
+			resize(bit_index);
+		}
+		m_container[bit_index >> GUINT_EXPONENT] |= (1 << (bit_index & (GUINT_BIT_COUNT-1)));
+	}
+
+	///Return 0 or 1
+	inline char get(GUINT bit_index)
+	{
+		if(bit_index>=size())
+		{
+			return 0;
+		}
+		char value = m_container[bit_index >> GUINT_EXPONENT] &
+					 (1 << (bit_index & (GUINT_BIT_COUNT-1)));
+		return value;
+	}
+
+	inline void clear(GUINT bit_index)
+	{
+	    m_container[bit_index >> GUINT_EXPONENT] &= ~(1 << (bit_index & (GUINT_BIT_COUNT-1)));
+	}
+};
+
+
+
+
+
+#endif // GIM_CONTAINERS_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_box_collision.h b/src/bullet/BulletCollision/Gimpact/gim_box_collision.h
new file mode 100644
index 00000000..b360dd47
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_box_collision.h
@@ -0,0 +1,590 @@
+#ifndef GIM_BOX_COLLISION_H_INCLUDED
+#define GIM_BOX_COLLISION_H_INCLUDED
+
+/*! \file gim_box_collision.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+#include "gim_basic_geometry_operations.h"
+#include "LinearMath/btTransform.h"
+
+
+
+//SIMD_FORCE_INLINE bool test_cross_edge_box(
+//	const btVector3 & edge,
+//	const btVector3 & absolute_edge,
+//	const btVector3 & pointa,
+//	const btVector3 & pointb, const btVector3 & extend,
+//	int dir_index0,
+//	int dir_index1
+//	int component_index0,
+//	int component_index1)
+//{
+//	// dir coords are -z and y
+//
+//	const btScalar dir0 = -edge[dir_index0];
+//	const btScalar dir1 = edge[dir_index1];
+//	btScalar pmin = pointa[component_index0]*dir0 + pointa[component_index1]*dir1;
+//	btScalar pmax = pointb[component_index0]*dir0 + pointb[component_index1]*dir1;
+//	//find minmax
+//	if(pmin>pmax)
+//	{
+//		GIM_SWAP_NUMBERS(pmin,pmax);
+//	}
+//	//find extends
+//	const btScalar rad = extend[component_index0] * absolute_edge[dir_index0] +
+//					extend[component_index1] * absolute_edge[dir_index1];
+//
+//	if(pmin>rad || -rad>pmax) return false;
+//	return true;
+//}
+//
+//SIMD_FORCE_INLINE bool test_cross_edge_box_X_axis(
+//	const btVector3 & edge,
+//	const btVector3 & absolute_edge,
+//	const btVector3 & pointa,
+//	const btVector3 & pointb, btVector3 & extend)
+//{
+//
+//	return test_cross_edge_box(edge,absolute_edge,pointa,pointb,extend,2,1,1,2);
+//}
+//
+//
+//SIMD_FORCE_INLINE bool test_cross_edge_box_Y_axis(
+//	const btVector3 & edge,
+//	const btVector3 & absolute_edge,
+//	const btVector3 & pointa,
+//	const btVector3 & pointb, btVector3 & extend)
+//{
+//
+//	return test_cross_edge_box(edge,absolute_edge,pointa,pointb,extend,0,2,2,0);
+//}
+//
+//SIMD_FORCE_INLINE bool test_cross_edge_box_Z_axis(
+//	const btVector3 & edge,
+//	const btVector3 & absolute_edge,
+//	const btVector3 & pointa,
+//	const btVector3 & pointb, btVector3 & extend)
+//{
+//
+//	return test_cross_edge_box(edge,absolute_edge,pointa,pointb,extend,1,0,0,1);
+//}
+
+#define TEST_CROSS_EDGE_BOX_MCR(edge,absolute_edge,pointa,pointb,_extend,i_dir_0,i_dir_1,i_comp_0,i_comp_1)\
+{\
+	const btScalar dir0 = -edge[i_dir_0];\
+	const btScalar dir1 = edge[i_dir_1];\
+	btScalar pmin = pointa[i_comp_0]*dir0 + pointa[i_comp_1]*dir1;\
+	btScalar pmax = pointb[i_comp_0]*dir0 + pointb[i_comp_1]*dir1;\
+	if(pmin>pmax)\
+	{\
+		GIM_SWAP_NUMBERS(pmin,pmax); \
+	}\
+	const btScalar abs_dir0 = absolute_edge[i_dir_0];\
+	const btScalar abs_dir1 = absolute_edge[i_dir_1];\
+	const btScalar rad = _extend[i_comp_0] * abs_dir0 + _extend[i_comp_1] * abs_dir1;\
+	if(pmin>rad || -rad>pmax) return false;\
+}\
+
+
+#define TEST_CROSS_EDGE_BOX_X_AXIS_MCR(edge,absolute_edge,pointa,pointb,_extend)\
+{\
+	TEST_CROSS_EDGE_BOX_MCR(edge,absolute_edge,pointa,pointb,_extend,2,1,1,2);\
+}\
+
+#define TEST_CROSS_EDGE_BOX_Y_AXIS_MCR(edge,absolute_edge,pointa,pointb,_extend)\
+{\
+	TEST_CROSS_EDGE_BOX_MCR(edge,absolute_edge,pointa,pointb,_extend,0,2,2,0);\
+}\
+
+#define TEST_CROSS_EDGE_BOX_Z_AXIS_MCR(edge,absolute_edge,pointa,pointb,_extend)\
+{\
+	TEST_CROSS_EDGE_BOX_MCR(edge,absolute_edge,pointa,pointb,_extend,1,0,0,1);\
+}\
+
+
+
+//!  Class for transforming a model1 to the space of model0
+class GIM_BOX_BOX_TRANSFORM_CACHE
+{
+public:
+    btVector3  m_T1to0;//!< Transforms translation of model1 to model 0
+	btMatrix3x3 m_R1to0;//!< Transforms Rotation of model1 to model 0, equal  to R0' * R1
+	btMatrix3x3 m_AR;//!< Absolute value of m_R1to0
+
+	SIMD_FORCE_INLINE void calc_absolute_matrix()
+	{
+		static const btVector3 vepsi(1e-6f,1e-6f,1e-6f);
+		m_AR[0] = vepsi + m_R1to0[0].absolute();
+		m_AR[1] = vepsi + m_R1to0[1].absolute();
+		m_AR[2] = vepsi + m_R1to0[2].absolute();
+	}
+
+	GIM_BOX_BOX_TRANSFORM_CACHE()
+	{
+	}
+
+
+	GIM_BOX_BOX_TRANSFORM_CACHE(mat4f  trans1_to_0)
+	{
+		COPY_MATRIX_3X3(m_R1to0,trans1_to_0)
+        MAT_GET_TRANSLATION(trans1_to_0,m_T1to0)
+		calc_absolute_matrix();
+	}
+
+	//! Calc the transformation relative  1 to 0. Inverts matrics by transposing
+	SIMD_FORCE_INLINE void calc_from_homogenic(const btTransform & trans0,const btTransform & trans1)
+	{
+
+		m_R1to0 = trans0.getBasis().transpose();
+		m_T1to0 = m_R1to0 * (-trans0.getOrigin());
+
+		m_T1to0 += m_R1to0*trans1.getOrigin();
+		m_R1to0 *= trans1.getBasis();
+
+		calc_absolute_matrix();
+	}
+
+	//! Calcs the full invertion of the matrices. Useful for scaling matrices
+	SIMD_FORCE_INLINE void calc_from_full_invert(const btTransform & trans0,const btTransform & trans1)
+	{
+		m_R1to0 = trans0.getBasis().inverse();
+		m_T1to0 = m_R1to0 * (-trans0.getOrigin());
+
+		m_T1to0 += m_R1to0*trans1.getOrigin();
+		m_R1to0 *= trans1.getBasis();
+
+		calc_absolute_matrix();
+	}
+
+	SIMD_FORCE_INLINE btVector3 transform(const btVector3 & point)
+	{
+		return btVector3(m_R1to0[0].dot(point) + m_T1to0.x(),
+			m_R1to0[1].dot(point) + m_T1to0.y(),
+			m_R1to0[2].dot(point) + m_T1to0.z());
+	}
+};
+
+
+#define BOX_PLANE_EPSILON 0.000001f
+
+//! Axis aligned box
+class GIM_AABB
+{
+public:
+	btVector3 m_min;
+	btVector3 m_max;
+
+	GIM_AABB()
+	{}
+
+
+	GIM_AABB(const btVector3 & V1,
+			 const btVector3 & V2,
+			 const btVector3 & V3)
+	{
+		m_min[0] = GIM_MIN3(V1[0],V2[0],V3[0]);
+		m_min[1] = GIM_MIN3(V1[1],V2[1],V3[1]);
+		m_min[2] = GIM_MIN3(V1[2],V2[2],V3[2]);
+
+		m_max[0] = GIM_MAX3(V1[0],V2[0],V3[0]);
+		m_max[1] = GIM_MAX3(V1[1],V2[1],V3[1]);
+		m_max[2] = GIM_MAX3(V1[2],V2[2],V3[2]);
+	}
+
+	GIM_AABB(const btVector3 & V1,
+			 const btVector3 & V2,
+			 const btVector3 & V3,
+			 GREAL margin)
+	{
+		m_min[0] = GIM_MIN3(V1[0],V2[0],V3[0]);
+		m_min[1] = GIM_MIN3(V1[1],V2[1],V3[1]);
+		m_min[2] = GIM_MIN3(V1[2],V2[2],V3[2]);
+
+		m_max[0] = GIM_MAX3(V1[0],V2[0],V3[0]);
+		m_max[1] = GIM_MAX3(V1[1],V2[1],V3[1]);
+		m_max[2] = GIM_MAX3(V1[2],V2[2],V3[2]);
+
+		m_min[0] -= margin;
+		m_min[1] -= margin;
+		m_min[2] -= margin;
+		m_max[0] += margin;
+		m_max[1] += margin;
+		m_max[2] += margin;
+	}
+
+	GIM_AABB(const GIM_AABB &other):
+		m_min(other.m_min),m_max(other.m_max)
+	{
+	}
+
+	GIM_AABB(const GIM_AABB &other,btScalar margin ):
+		m_min(other.m_min),m_max(other.m_max)
+	{
+		m_min[0] -= margin;
+		m_min[1] -= margin;
+		m_min[2] -= margin;
+		m_max[0] += margin;
+		m_max[1] += margin;
+		m_max[2] += margin;
+	}
+
+	SIMD_FORCE_INLINE void invalidate()
+	{
+		m_min[0] = G_REAL_INFINITY;
+		m_min[1] = G_REAL_INFINITY;
+		m_min[2] = G_REAL_INFINITY;
+		m_max[0] = -G_REAL_INFINITY;
+		m_max[1] = -G_REAL_INFINITY;
+		m_max[2] = -G_REAL_INFINITY;
+	}
+
+	SIMD_FORCE_INLINE void increment_margin(btScalar margin)
+	{
+		m_min[0] -= margin;
+		m_min[1] -= margin;
+		m_min[2] -= margin;
+		m_max[0] += margin;
+		m_max[1] += margin;
+		m_max[2] += margin;
+	}
+
+	SIMD_FORCE_INLINE void copy_with_margin(const GIM_AABB &other, btScalar margin)
+	{
+		m_min[0] = other.m_min[0] - margin;
+		m_min[1] = other.m_min[1] - margin;
+		m_min[2] = other.m_min[2] - margin;
+
+		m_max[0] = other.m_max[0] + margin;
+		m_max[1] = other.m_max[1] + margin;
+		m_max[2] = other.m_max[2] + margin;
+	}
+
+	template<typename CLASS_POINT>
+	SIMD_FORCE_INLINE void calc_from_triangle(
+							const CLASS_POINT & V1,
+							const CLASS_POINT & V2,
+							const CLASS_POINT & V3)
+	{
+		m_min[0] = GIM_MIN3(V1[0],V2[0],V3[0]);
+		m_min[1] = GIM_MIN3(V1[1],V2[1],V3[1]);
+		m_min[2] = GIM_MIN3(V1[2],V2[2],V3[2]);
+
+		m_max[0] = GIM_MAX3(V1[0],V2[0],V3[0]);
+		m_max[1] = GIM_MAX3(V1[1],V2[1],V3[1]);
+		m_max[2] = GIM_MAX3(V1[2],V2[2],V3[2]);
+	}
+
+	template<typename CLASS_POINT>
+	SIMD_FORCE_INLINE void calc_from_triangle_margin(
+							const CLASS_POINT & V1,
+							const CLASS_POINT & V2,
+							const CLASS_POINT & V3, btScalar margin)
+	{
+		m_min[0] = GIM_MIN3(V1[0],V2[0],V3[0]);
+		m_min[1] = GIM_MIN3(V1[1],V2[1],V3[1]);
+		m_min[2] = GIM_MIN3(V1[2],V2[2],V3[2]);
+
+		m_max[0] = GIM_MAX3(V1[0],V2[0],V3[0]);
+		m_max[1] = GIM_MAX3(V1[1],V2[1],V3[1]);
+		m_max[2] = GIM_MAX3(V1[2],V2[2],V3[2]);
+
+		m_min[0] -= margin;
+		m_min[1] -= margin;
+		m_min[2] -= margin;
+		m_max[0] += margin;
+		m_max[1] += margin;
+		m_max[2] += margin;
+	}
+
+	//! Apply a transform to an AABB
+	SIMD_FORCE_INLINE void appy_transform(const btTransform & trans)
+	{
+		btVector3 center = (m_max+m_min)*0.5f;
+		btVector3 extends = m_max - center;
+		// Compute new center
+		center = trans(center);
+
+		btVector3 textends(extends.dot(trans.getBasis().getRow(0).absolute()),
+ 				 extends.dot(trans.getBasis().getRow(1).absolute()),
+				 extends.dot(trans.getBasis().getRow(2).absolute()));
+
+		m_min = center - textends;
+		m_max = center + textends;
+	}
+
+	//! Merges a Box
+	SIMD_FORCE_INLINE void merge(const GIM_AABB & box)
+	{
+		m_min[0] = GIM_MIN(m_min[0],box.m_min[0]);
+		m_min[1] = GIM_MIN(m_min[1],box.m_min[1]);
+		m_min[2] = GIM_MIN(m_min[2],box.m_min[2]);
+
+		m_max[0] = GIM_MAX(m_max[0],box.m_max[0]);
+		m_max[1] = GIM_MAX(m_max[1],box.m_max[1]);
+		m_max[2] = GIM_MAX(m_max[2],box.m_max[2]);
+	}
+
+	//! Merges a point
+	template<typename CLASS_POINT>
+	SIMD_FORCE_INLINE void merge_point(const CLASS_POINT & point)
+	{
+		m_min[0] = GIM_MIN(m_min[0],point[0]);
+		m_min[1] = GIM_MIN(m_min[1],point[1]);
+		m_min[2] = GIM_MIN(m_min[2],point[2]);
+
+		m_max[0] = GIM_MAX(m_max[0],point[0]);
+		m_max[1] = GIM_MAX(m_max[1],point[1]);
+		m_max[2] = GIM_MAX(m_max[2],point[2]);
+	}
+
+	//! Gets the extend and center
+	SIMD_FORCE_INLINE void get_center_extend(btVector3 & center,btVector3 & extend)  const
+	{
+		center = (m_max+m_min)*0.5f;
+		extend = m_max - center;
+	}
+
+	//! Finds the intersecting box between this box and the other.
+	SIMD_FORCE_INLINE void find_intersection(const GIM_AABB & other, GIM_AABB & intersection)  const
+	{
+		intersection.m_min[0] = GIM_MAX(other.m_min[0],m_min[0]);
+		intersection.m_min[1] = GIM_MAX(other.m_min[1],m_min[1]);
+		intersection.m_min[2] = GIM_MAX(other.m_min[2],m_min[2]);
+
+		intersection.m_max[0] = GIM_MIN(other.m_max[0],m_max[0]);
+		intersection.m_max[1] = GIM_MIN(other.m_max[1],m_max[1]);
+		intersection.m_max[2] = GIM_MIN(other.m_max[2],m_max[2]);
+	}
+
+
+	SIMD_FORCE_INLINE bool has_collision(const GIM_AABB & other) const
+	{
+		if(m_min[0] > other.m_max[0] ||
+		   m_max[0] < other.m_min[0] ||
+		   m_min[1] > other.m_max[1] ||
+		   m_max[1] < other.m_min[1] ||
+		   m_min[2] > other.m_max[2] ||
+		   m_max[2] < other.m_min[2])
+		{
+			return false;
+		}
+		return true;
+	}
+
+	/*! \brief Finds the Ray intersection parameter.
+	\param aabb Aligned box
+	\param vorigin A vec3f with the origin of the ray
+	\param vdir A vec3f with the direction of the ray
+	*/
+	SIMD_FORCE_INLINE bool collide_ray(const btVector3 & vorigin,const btVector3 & vdir)
+	{
+		btVector3 extents,center;
+		this->get_center_extend(center,extents);;
+
+		btScalar Dx = vorigin[0] - center[0];
+		if(GIM_GREATER(Dx, extents[0]) && Dx*vdir[0]>=0.0f)	return false;
+		btScalar Dy = vorigin[1] - center[1];
+		if(GIM_GREATER(Dy, extents[1]) && Dy*vdir[1]>=0.0f)	return false;
+		btScalar Dz = vorigin[2] - center[2];
+		if(GIM_GREATER(Dz, extents[2]) && Dz*vdir[2]>=0.0f)	return false;
+
+
+		btScalar f = vdir[1] * Dz - vdir[2] * Dy;
+		if(btFabs(f) > extents[1]*btFabs(vdir[2]) + extents[2]*btFabs(vdir[1])) return false;
+		f = vdir[2] * Dx - vdir[0] * Dz;
+		if(btFabs(f) > extents[0]*btFabs(vdir[2]) + extents[2]*btFabs(vdir[0]))return false;
+		f = vdir[0] * Dy - vdir[1] * Dx;
+		if(btFabs(f) > extents[0]*btFabs(vdir[1]) + extents[1]*btFabs(vdir[0]))return false;
+		return true;
+	}
+
+
+	SIMD_FORCE_INLINE void projection_interval(const btVector3 & direction, btScalar &vmin, btScalar &vmax) const
+	{
+		btVector3 center = (m_max+m_min)*0.5f;
+		btVector3 extend = m_max-center;
+
+		btScalar _fOrigin =  direction.dot(center);
+		btScalar _fMaximumExtent = extend.dot(direction.absolute());
+		vmin = _fOrigin - _fMaximumExtent;
+		vmax = _fOrigin + _fMaximumExtent;
+	}
+
+	SIMD_FORCE_INLINE ePLANE_INTERSECTION_TYPE plane_classify(const btVector4 &plane) const
+	{
+		btScalar _fmin,_fmax;
+		this->projection_interval(plane,_fmin,_fmax);
+
+		if(plane[3] > _fmax + BOX_PLANE_EPSILON)
+		{
+			return G_BACK_PLANE; // 0
+		}
+
+		if(plane[3]+BOX_PLANE_EPSILON >=_fmin)
+		{
+			return G_COLLIDE_PLANE; //1
+		}
+		return G_FRONT_PLANE;//2
+	}
+
+	SIMD_FORCE_INLINE bool overlapping_trans_conservative(const GIM_AABB & box, btTransform & trans1_to_0)
+	{
+		GIM_AABB tbox = box;
+		tbox.appy_transform(trans1_to_0);
+		return has_collision(tbox);
+	}
+
+	//! transcache is the transformation cache from box to this AABB
+	SIMD_FORCE_INLINE bool overlapping_trans_cache(
+		const GIM_AABB & box,const GIM_BOX_BOX_TRANSFORM_CACHE & transcache, bool fulltest)
+	{
+
+		//Taken from OPCODE
+		btVector3 ea,eb;//extends
+		btVector3 ca,cb;//extends
+		get_center_extend(ca,ea);
+		box.get_center_extend(cb,eb);
+
+
+		btVector3 T;
+		btScalar t,t2;
+		int i;
+
+		// Class I : A's basis vectors
+		for(i=0;i<3;i++)
+		{
+			T[i] =  transcache.m_R1to0[i].dot(cb) + transcache.m_T1to0[i] - ca[i];
+			t = transcache.m_AR[i].dot(eb) + ea[i];
+			if(GIM_GREATER(T[i], t))	return false;
+		}
+		// Class II : B's basis vectors
+		for(i=0;i<3;i++)
+		{
+			t = MAT_DOT_COL(transcache.m_R1to0,T,i);
+			t2 = MAT_DOT_COL(transcache.m_AR,ea,i) + eb[i];
+			if(GIM_GREATER(t,t2))	return false;
+		}
+		// Class III : 9 cross products
+		if(fulltest)
+		{
+			int j,m,n,o,p,q,r;
+			for(i=0;i<3;i++)
+			{
+				m = (i+1)%3;
+				n = (i+2)%3;
+				o = i==0?1:0;
+				p = i==2?1:2;
+				for(j=0;j<3;j++)
+				{
+					q = j==2?1:2;
+					r = j==0?1:0;
+					t = T[n]*transcache.m_R1to0[m][j] - T[m]*transcache.m_R1to0[n][j];
+					t2 = ea[o]*transcache.m_AR[p][j] + ea[p]*transcache.m_AR[o][j] +
+						eb[r]*transcache.m_AR[i][q] + eb[q]*transcache.m_AR[i][r];
+					if(GIM_GREATER(t,t2))	return false;
+				}
+			}
+		}
+		return true;
+	}
+
+	//! Simple test for planes.
+	SIMD_FORCE_INLINE bool collide_plane(
+		const btVector4 & plane)
+	{
+		ePLANE_INTERSECTION_TYPE classify = plane_classify(plane);
+		return (classify == G_COLLIDE_PLANE);
+	}
+
+	//! test for a triangle, with edges
+	SIMD_FORCE_INLINE bool collide_triangle_exact(
+		const btVector3 & p1,
+		const btVector3 & p2,
+		const btVector3 & p3,
+		const btVector4 & triangle_plane)
+	{
+		if(!collide_plane(triangle_plane)) return false;
+
+		btVector3 center,extends;
+		this->get_center_extend(center,extends);
+
+		const btVector3 v1(p1 - center);
+		const btVector3 v2(p2 - center);
+		const btVector3 v3(p3 - center);
+
+		//First axis
+		btVector3 diff(v2 - v1);
+		btVector3 abs_diff = diff.absolute();
+		//Test With X axis
+		TEST_CROSS_EDGE_BOX_X_AXIS_MCR(diff,abs_diff,v1,v3,extends);
+		//Test With Y axis
+		TEST_CROSS_EDGE_BOX_Y_AXIS_MCR(diff,abs_diff,v1,v3,extends);
+		//Test With Z axis
+		TEST_CROSS_EDGE_BOX_Z_AXIS_MCR(diff,abs_diff,v1,v3,extends);
+
+
+		diff = v3 - v2;
+		abs_diff = diff.absolute();
+		//Test With X axis
+		TEST_CROSS_EDGE_BOX_X_AXIS_MCR(diff,abs_diff,v2,v1,extends);
+		//Test With Y axis
+		TEST_CROSS_EDGE_BOX_Y_AXIS_MCR(diff,abs_diff,v2,v1,extends);
+		//Test With Z axis
+		TEST_CROSS_EDGE_BOX_Z_AXIS_MCR(diff,abs_diff,v2,v1,extends);
+
+		diff = v1 - v3;
+		abs_diff = diff.absolute();
+		//Test With X axis
+		TEST_CROSS_EDGE_BOX_X_AXIS_MCR(diff,abs_diff,v3,v2,extends);
+		//Test With Y axis
+		TEST_CROSS_EDGE_BOX_Y_AXIS_MCR(diff,abs_diff,v3,v2,extends);
+		//Test With Z axis
+		TEST_CROSS_EDGE_BOX_Z_AXIS_MCR(diff,abs_diff,v3,v2,extends);
+
+		return true;
+	}
+};
+
+
+//! Compairison of transformation objects
+SIMD_FORCE_INLINE bool btCompareTransformsEqual(const btTransform & t1,const btTransform & t2)
+{
+	if(!(t1.getOrigin() == t2.getOrigin()) ) return false;
+
+	if(!(t1.getBasis().getRow(0) == t2.getBasis().getRow(0)) ) return false;
+	if(!(t1.getBasis().getRow(1) == t2.getBasis().getRow(1)) ) return false;
+	if(!(t1.getBasis().getRow(2) == t2.getBasis().getRow(2)) ) return false;
+	return true;
+}
+
+
+
+#endif // GIM_BOX_COLLISION_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_box_set.cpp b/src/bullet/BulletCollision/Gimpact/gim_box_set.cpp
new file mode 100644
index 00000000..0c3d7ba8
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_box_set.cpp
@@ -0,0 +1,182 @@
+
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+
+#include "gim_box_set.h"
+
+
+GUINT GIM_BOX_TREE::_calc_splitting_axis(
+	gim_array<GIM_AABB_DATA> & primitive_boxes, GUINT startIndex,  GUINT endIndex)
+{
+	GUINT i;
+
+	btVector3 means(btScalar(0.),btScalar(0.),btScalar(0.));
+	btVector3 variance(btScalar(0.),btScalar(0.),btScalar(0.));
+	GUINT numIndices = endIndex-startIndex;
+
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(primitive_boxes[i].m_bound.m_max +
+					 primitive_boxes[i].m_bound.m_min);
+		means+=center;
+	}
+	means *= (btScalar(1.)/(btScalar)numIndices);
+
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btVector3 center = btScalar(0.5)*(primitive_boxes[i].m_bound.m_max +
+					 primitive_boxes[i].m_bound.m_min);
+		btVector3 diff2 = center-means;
+		diff2 = diff2 * diff2;
+		variance += diff2;
+	}
+	variance *= (btScalar(1.)/	((btScalar)numIndices-1)	);
+
+	return variance.maxAxis();
+}
+
+
+GUINT GIM_BOX_TREE::_sort_and_calc_splitting_index(
+	gim_array<GIM_AABB_DATA> & primitive_boxes, GUINT startIndex,
+	GUINT endIndex, GUINT splitAxis)
+{
+	GUINT i;
+	GUINT splitIndex =startIndex;
+	GUINT numIndices = endIndex - startIndex;
+
+	// average of centers
+	btScalar splitValue = 0.0f;
+	for (i=startIndex;i<endIndex;i++)
+	{
+		splitValue+= 0.5f*(primitive_boxes[i].m_bound.m_max[splitAxis] +
+					 primitive_boxes[i].m_bound.m_min[splitAxis]);
+	}
+	splitValue /= (btScalar)numIndices;
+
+	//sort leafNodes so all values larger then splitValue comes first, and smaller values start from 'splitIndex'.
+	for (i=startIndex;i<endIndex;i++)
+	{
+		btScalar center = 0.5f*(primitive_boxes[i].m_bound.m_max[splitAxis] +
+					 primitive_boxes[i].m_bound.m_min[splitAxis]);
+		if (center > splitValue)
+		{
+			//swap
+			primitive_boxes.swap(i,splitIndex);
+			splitIndex++;
+		}
+	}
+
+	//if the splitIndex causes unbalanced trees, fix this by using the center in between startIndex and endIndex
+	//otherwise the tree-building might fail due to stack-overflows in certain cases.
+	//unbalanced1 is unsafe: it can cause stack overflows
+	//bool unbalanced1 = ((splitIndex==startIndex) || (splitIndex == (endIndex-1)));
+
+	//unbalanced2 should work too: always use center (perfect balanced trees)
+	//bool unbalanced2 = true;
+
+	//this should be safe too:
+	GUINT rangeBalancedIndices = numIndices/3;
+	bool unbalanced = ((splitIndex<=(startIndex+rangeBalancedIndices)) || (splitIndex >=(endIndex-1-rangeBalancedIndices)));
+
+	if (unbalanced)
+	{
+		splitIndex = startIndex+ (numIndices>>1);
+	}
+
+	btAssert(!((splitIndex==startIndex) || (splitIndex == (endIndex))));
+
+	return splitIndex;
+}
+
+
+void GIM_BOX_TREE::_build_sub_tree(gim_array<GIM_AABB_DATA> & primitive_boxes, GUINT startIndex,  GUINT endIndex)
+{
+	GUINT current_index = m_num_nodes++;
+
+	btAssert((endIndex-startIndex)>0);
+
+	if((endIndex-startIndex) == 1) //we got a leaf
+	{		
+		m_node_array[current_index].m_left = 0;
+		m_node_array[current_index].m_right = 0;
+		m_node_array[current_index].m_escapeIndex = 0;
+
+		m_node_array[current_index].m_bound = primitive_boxes[startIndex].m_bound;
+		m_node_array[current_index].m_data = primitive_boxes[startIndex].m_data;
+		return;
+	}
+
+	//configure inner node
+
+	GUINT splitIndex;
+
+	//calc this node bounding box
+	m_node_array[current_index].m_bound.invalidate();	
+	for (splitIndex=startIndex;splitIndex<endIndex;splitIndex++)
+	{
+		m_node_array[current_index].m_bound.merge(primitive_boxes[splitIndex].m_bound);
+	}
+
+	//calculate Best Splitting Axis and where to split it. Sort the incoming 'leafNodes' array within range 'startIndex/endIndex'.
+
+	//split axis
+	splitIndex = _calc_splitting_axis(primitive_boxes,startIndex,endIndex);
+
+	splitIndex = _sort_and_calc_splitting_index(
+			primitive_boxes,startIndex,endIndex,splitIndex);
+
+	//configure this inner node : the left node index
+	m_node_array[current_index].m_left = m_num_nodes;
+	//build left child tree
+	_build_sub_tree(primitive_boxes, startIndex, splitIndex );
+
+	//configure this inner node : the right node index
+	m_node_array[current_index].m_right = m_num_nodes;
+
+	//build right child tree
+	_build_sub_tree(primitive_boxes, splitIndex ,endIndex);
+
+	//configure this inner node : the escape index
+	m_node_array[current_index].m_escapeIndex  = m_num_nodes - current_index;
+}
+
+//! stackless build tree
+void GIM_BOX_TREE::build_tree(
+	gim_array<GIM_AABB_DATA> & primitive_boxes)
+{
+	// initialize node count to 0
+	m_num_nodes = 0;
+	// allocate nodes
+	m_node_array.resize(primitive_boxes.size()*2);
+	
+	_build_sub_tree(primitive_boxes, 0, primitive_boxes.size());
+}
+
+
diff --git a/src/bullet/BulletCollision/Gimpact/gim_box_set.h b/src/bullet/BulletCollision/Gimpact/gim_box_set.h
new file mode 100644
index 00000000..61d190a7
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_box_set.h
@@ -0,0 +1,674 @@
+#ifndef GIM_BOX_SET_H_INCLUDED
+#define GIM_BOX_SET_H_INCLUDED
+
+/*! \file gim_box_set.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+
+#include "gim_array.h"
+#include "gim_radixsort.h"
+#include "gim_box_collision.h"
+#include "gim_tri_collision.h"
+
+
+
+//! Overlapping pair
+struct GIM_PAIR
+{
+    GUINT m_index1;
+    GUINT m_index2;
+    GIM_PAIR()
+    {}
+
+    GIM_PAIR(const GIM_PAIR & p)
+    {
+    	m_index1 = p.m_index1;
+    	m_index2 = p.m_index2;
+	}
+
+	GIM_PAIR(GUINT index1, GUINT index2)
+    {
+    	m_index1 = index1;
+    	m_index2 = index2;
+	}
+};
+
+//! A pairset array
+class gim_pair_set: public gim_array<GIM_PAIR>
+{
+public:
+	gim_pair_set():gim_array<GIM_PAIR>(32)
+	{
+	}
+	inline void push_pair(GUINT index1,GUINT index2)
+	{
+		push_back(GIM_PAIR(index1,index2));
+	}
+
+	inline void push_pair_inv(GUINT index1,GUINT index2)
+	{
+		push_back(GIM_PAIR(index2,index1));
+	}
+};
+
+
+//! Prototype Base class for primitive classification
+/*!
+This class is a wrapper for primitive collections.
+This tells relevant info for the Bounding Box set classes, which take care of space classification.
+This class can manage Compound shapes and trimeshes, and if it is managing trimesh then the  Hierarchy Bounding Box classes will take advantage of primitive Vs Box overlapping tests for getting optimal results and less Per Box compairisons.
+*/
+class GIM_PRIMITIVE_MANAGER_PROTOTYPE
+{
+public:
+
+	virtual ~GIM_PRIMITIVE_MANAGER_PROTOTYPE() {}
+	//! determines if this manager consist on only triangles, which special case will be optimized
+	virtual bool is_trimesh() = 0;
+	virtual GUINT get_primitive_count() = 0;
+	virtual void get_primitive_box(GUINT prim_index ,GIM_AABB & primbox) = 0;
+	virtual void get_primitive_triangle(GUINT prim_index,GIM_TRIANGLE & triangle) = 0;
+};
+
+
+struct GIM_AABB_DATA
+{
+	GIM_AABB m_bound;
+	GUINT m_data;
+};
+
+//! Node Structure for trees
+struct GIM_BOX_TREE_NODE
+{
+	GIM_AABB m_bound;
+	GUINT m_left;//!< Left subtree
+	GUINT m_right;//!< Right subtree
+	GUINT m_escapeIndex;//!< Scape index for traversing
+	GUINT m_data;//!< primitive index if apply
+
+	GIM_BOX_TREE_NODE()
+	{
+	    m_left = 0;
+	    m_right = 0;
+	    m_escapeIndex = 0;
+	    m_data = 0;
+	}
+
+	SIMD_FORCE_INLINE bool is_leaf_node() const
+	{
+	    return  (!m_left && !m_right);
+	}
+};
+
+//! Basic Box tree structure
+class GIM_BOX_TREE
+{
+protected:
+	GUINT m_num_nodes;
+	gim_array<GIM_BOX_TREE_NODE> m_node_array;
+protected:
+	GUINT _sort_and_calc_splitting_index(
+		gim_array<GIM_AABB_DATA> & primitive_boxes,
+		 GUINT startIndex,  GUINT endIndex, GUINT splitAxis);
+
+	GUINT _calc_splitting_axis(gim_array<GIM_AABB_DATA> & primitive_boxes, GUINT startIndex,  GUINT endIndex);
+
+	void _build_sub_tree(gim_array<GIM_AABB_DATA> & primitive_boxes, GUINT startIndex,  GUINT endIndex);
+public:
+	GIM_BOX_TREE()
+	{
+		m_num_nodes = 0;
+	}
+
+	//! prototype functions for box tree management
+	//!@{
+	void build_tree(gim_array<GIM_AABB_DATA> & primitive_boxes);
+
+	SIMD_FORCE_INLINE void clearNodes()
+	{
+		m_node_array.clear();
+		m_num_nodes = 0;
+	}
+
+	//! node count
+	SIMD_FORCE_INLINE GUINT getNodeCount() const
+	{
+		return m_num_nodes;
+	}
+
+	//! tells if the node is a leaf
+	SIMD_FORCE_INLINE bool isLeafNode(GUINT nodeindex) const
+	{
+		return m_node_array[nodeindex].is_leaf_node();
+	}
+
+	SIMD_FORCE_INLINE GUINT getNodeData(GUINT nodeindex) const
+	{
+		return m_node_array[nodeindex].m_data;
+	}
+
+	SIMD_FORCE_INLINE void getNodeBound(GUINT nodeindex, GIM_AABB & bound) const
+	{
+		bound = m_node_array[nodeindex].m_bound;
+	}
+
+	SIMD_FORCE_INLINE void setNodeBound(GUINT nodeindex, const GIM_AABB & bound)
+	{
+		m_node_array[nodeindex].m_bound = bound;
+	}
+
+	SIMD_FORCE_INLINE GUINT getLeftNodeIndex(GUINT nodeindex)  const
+	{
+		return m_node_array[nodeindex].m_left;
+	}
+
+	SIMD_FORCE_INLINE GUINT getRightNodeIndex(GUINT nodeindex)  const
+	{
+		return m_node_array[nodeindex].m_right;
+	}
+
+	SIMD_FORCE_INLINE GUINT getScapeNodeIndex(GUINT nodeindex) const
+	{
+		return m_node_array[nodeindex].m_escapeIndex;
+	}
+
+	//!@}
+};
+
+
+//! Generic Box Tree Template
+/*!
+This class offers an structure for managing a box tree of primitives.
+Requires a Primitive prototype (like GIM_PRIMITIVE_MANAGER_PROTOTYPE ) and
+a Box tree structure ( like GIM_BOX_TREE).
+*/
+template<typename _GIM_PRIMITIVE_MANAGER_PROTOTYPE, typename _GIM_BOX_TREE_PROTOTYPE>
+class GIM_BOX_TREE_TEMPLATE_SET
+{
+protected:
+	_GIM_PRIMITIVE_MANAGER_PROTOTYPE m_primitive_manager;
+	_GIM_BOX_TREE_PROTOTYPE m_box_tree;
+protected:
+	//stackless refit
+	SIMD_FORCE_INLINE void refit()
+	{
+		GUINT nodecount = getNodeCount();
+		while(nodecount--)
+		{
+			if(isLeafNode(nodecount))
+			{
+				GIM_AABB leafbox;
+				m_primitive_manager.get_primitive_box(getNodeData(nodecount),leafbox);
+				setNodeBound(nodecount,leafbox);
+			}
+			else
+			{
+				//get left bound
+				GUINT childindex = getLeftNodeIndex(nodecount);
+				GIM_AABB bound;
+				getNodeBound(childindex,bound);
+				//get right bound
+				childindex = getRightNodeIndex(nodecount);
+				GIM_AABB bound2;
+				getNodeBound(childindex,bound2);
+				bound.merge(bound2);
+
+				setNodeBound(nodecount,bound);
+			}
+		}
+	}
+public:
+
+	GIM_BOX_TREE_TEMPLATE_SET()
+	{
+	}
+
+	SIMD_FORCE_INLINE GIM_AABB getGlobalBox()  const
+	{
+		GIM_AABB totalbox;
+		getNodeBound(0, totalbox);
+		return totalbox;
+	}
+
+	SIMD_FORCE_INLINE void setPrimitiveManager(const _GIM_PRIMITIVE_MANAGER_PROTOTYPE & primitive_manager)
+	{
+		m_primitive_manager = primitive_manager;
+	}
+
+	const _GIM_PRIMITIVE_MANAGER_PROTOTYPE & getPrimitiveManager() const
+	{
+		return m_primitive_manager;
+	}
+
+	_GIM_PRIMITIVE_MANAGER_PROTOTYPE & getPrimitiveManager()
+	{
+		return m_primitive_manager;
+	}
+
+//! node manager prototype functions
+///@{
+
+	//! this attemps to refit the box set.
+	SIMD_FORCE_INLINE void update()
+	{
+		refit();
+	}
+
+	//! this rebuild the entire set
+	SIMD_FORCE_INLINE void buildSet()
+	{
+		//obtain primitive boxes
+		gim_array<GIM_AABB_DATA> primitive_boxes;
+		primitive_boxes.resize(m_primitive_manager.get_primitive_count(),false);
+
+		for (GUINT i = 0;i<primitive_boxes.size() ;i++ )
+		{
+			 m_primitive_manager.get_primitive_box(i,primitive_boxes[i].m_bound);
+			 primitive_boxes[i].m_data = i;
+		}
+
+		m_box_tree.build_tree(primitive_boxes);
+	}
+
+	//! returns the indices of the primitives in the m_primitive_manager
+	SIMD_FORCE_INLINE bool boxQuery(const GIM_AABB & box, gim_array<GUINT> & collided_results) const
+	{
+		GUINT curIndex = 0;
+		GUINT numNodes = getNodeCount();
+
+		while (curIndex < numNodes)
+		{
+			GIM_AABB bound;
+			getNodeBound(curIndex,bound);
+
+			//catch bugs in tree data
+
+			bool aabbOverlap = bound.has_collision(box);
+			bool isleafnode = isLeafNode(curIndex);
+
+			if (isleafnode && aabbOverlap)
+			{
+				collided_results.push_back(getNodeData(curIndex));
+			}
+
+			if (aabbOverlap || isleafnode)
+			{
+				//next subnode
+				curIndex++;
+			}
+			else
+			{
+				//skip node
+				curIndex+= getScapeNodeIndex(curIndex);
+			}
+		}
+		if(collided_results.size()>0) return true;
+		return false;
+	}
+
+	//! returns the indices of the primitives in the m_primitive_manager
+	SIMD_FORCE_INLINE bool boxQueryTrans(const GIM_AABB & box,
+		 const btTransform & transform, gim_array<GUINT> & collided_results) const
+	{
+		GIM_AABB transbox=box;
+		transbox.appy_transform(transform);
+		return boxQuery(transbox,collided_results);
+	}
+
+	//! returns the indices of the primitives in the m_primitive_manager
+	SIMD_FORCE_INLINE bool rayQuery(
+		const btVector3 & ray_dir,const btVector3 & ray_origin ,
+		gim_array<GUINT> & collided_results) const
+	{
+		GUINT curIndex = 0;
+		GUINT numNodes = getNodeCount();
+
+		while (curIndex < numNodes)
+		{
+			GIM_AABB bound;
+			getNodeBound(curIndex,bound);
+
+			//catch bugs in tree data
+
+			bool aabbOverlap = bound.collide_ray(ray_origin,ray_dir);
+			bool isleafnode = isLeafNode(curIndex);
+
+			if (isleafnode && aabbOverlap)
+			{
+				collided_results.push_back(getNodeData( curIndex));
+			}
+
+			if (aabbOverlap || isleafnode)
+			{
+				//next subnode
+				curIndex++;
+			}
+			else
+			{
+				//skip node
+				curIndex+= getScapeNodeIndex(curIndex);
+			}
+		}
+		if(collided_results.size()>0) return true;
+		return false;
+	}
+
+	//! tells if this set has hierarcht
+	SIMD_FORCE_INLINE bool hasHierarchy() const
+	{
+		return true;
+	}
+
+	//! tells if this set is a trimesh
+	SIMD_FORCE_INLINE bool isTrimesh()  const
+	{
+		return m_primitive_manager.is_trimesh();
+	}
+
+	//! node count
+	SIMD_FORCE_INLINE GUINT getNodeCount() const
+	{
+		return m_box_tree.getNodeCount();
+	}
+
+	//! tells if the node is a leaf
+	SIMD_FORCE_INLINE bool isLeafNode(GUINT nodeindex) const
+	{
+		return m_box_tree.isLeafNode(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE GUINT getNodeData(GUINT nodeindex) const
+	{
+		return m_box_tree.getNodeData(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE void getNodeBound(GUINT nodeindex, GIM_AABB & bound)  const
+	{
+		m_box_tree.getNodeBound(nodeindex, bound);
+	}
+
+	SIMD_FORCE_INLINE void setNodeBound(GUINT nodeindex, const GIM_AABB & bound)
+	{
+		m_box_tree.setNodeBound(nodeindex, bound);
+	}
+
+	SIMD_FORCE_INLINE GUINT getLeftNodeIndex(GUINT nodeindex) const
+	{
+		return m_box_tree.getLeftNodeIndex(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE GUINT getRightNodeIndex(GUINT nodeindex) const
+	{
+		return m_box_tree.getRightNodeIndex(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE GUINT getScapeNodeIndex(GUINT nodeindex) const
+	{
+		return m_box_tree.getScapeNodeIndex(nodeindex);
+	}
+
+	SIMD_FORCE_INLINE void getNodeTriangle(GUINT nodeindex,GIM_TRIANGLE & triangle) const
+	{
+		m_primitive_manager.get_primitive_triangle(getNodeData(nodeindex),triangle);
+	}
+
+};
+
+//! Class for Box Tree Sets
+/*!
+this has the GIM_BOX_TREE implementation for bounding boxes.
+*/
+template<typename _GIM_PRIMITIVE_MANAGER_PROTOTYPE>
+class GIM_BOX_TREE_SET: public GIM_BOX_TREE_TEMPLATE_SET< _GIM_PRIMITIVE_MANAGER_PROTOTYPE, GIM_BOX_TREE>
+{
+public:
+
+};
+
+
+
+
+
+/// GIM_BOX_SET collision methods
+template<typename BOX_SET_CLASS0,typename BOX_SET_CLASS1>
+class GIM_TREE_TREE_COLLIDER
+{
+public:
+	gim_pair_set * m_collision_pairs;
+	BOX_SET_CLASS0 * m_boxset0;
+	BOX_SET_CLASS1 * m_boxset1;
+	GUINT current_node0;
+	GUINT current_node1;
+	bool node0_is_leaf;
+	bool node1_is_leaf;
+	bool t0_is_trimesh;
+	bool t1_is_trimesh;
+	bool node0_has_triangle;
+	bool node1_has_triangle;
+	GIM_AABB m_box0;
+	GIM_AABB m_box1;
+	GIM_BOX_BOX_TRANSFORM_CACHE trans_cache_1to0;
+	btTransform trans_cache_0to1;
+	GIM_TRIANGLE m_tri0;
+	btVector4 m_tri0_plane;
+	GIM_TRIANGLE m_tri1;
+	btVector4 m_tri1_plane;
+
+
+public:
+	GIM_TREE_TREE_COLLIDER()
+	{
+		current_node0 = G_UINT_INFINITY;
+		current_node1 = G_UINT_INFINITY;
+	}
+protected:
+	SIMD_FORCE_INLINE void retrieve_node0_triangle(GUINT node0)
+	{
+		if(node0_has_triangle) return;
+		m_boxset0->getNodeTriangle(node0,m_tri0);
+		//transform triangle
+		m_tri0.m_vertices[0] = trans_cache_0to1(m_tri0.m_vertices[0]);
+		m_tri0.m_vertices[1] = trans_cache_0to1(m_tri0.m_vertices[1]);
+		m_tri0.m_vertices[2] = trans_cache_0to1(m_tri0.m_vertices[2]);
+		m_tri0.get_plane(m_tri0_plane);
+
+		node0_has_triangle = true;
+	}
+
+	SIMD_FORCE_INLINE void retrieve_node1_triangle(GUINT node1)
+	{
+		if(node1_has_triangle) return;
+		m_boxset1->getNodeTriangle(node1,m_tri1);
+		//transform triangle
+		m_tri1.m_vertices[0] = trans_cache_1to0.transform(m_tri1.m_vertices[0]);
+		m_tri1.m_vertices[1] = trans_cache_1to0.transform(m_tri1.m_vertices[1]);
+		m_tri1.m_vertices[2] = trans_cache_1to0.transform(m_tri1.m_vertices[2]);
+		m_tri1.get_plane(m_tri1_plane);
+
+		node1_has_triangle = true;
+	}
+
+	SIMD_FORCE_INLINE void retrieve_node0_info(GUINT node0)
+	{
+		if(node0 == current_node0) return;
+		m_boxset0->getNodeBound(node0,m_box0);
+		node0_is_leaf = m_boxset0->isLeafNode(node0);
+		node0_has_triangle = false;
+		current_node0 = node0;
+	}
+
+	SIMD_FORCE_INLINE void retrieve_node1_info(GUINT node1)
+	{
+		if(node1 == current_node1) return;
+		m_boxset1->getNodeBound(node1,m_box1);
+		node1_is_leaf = m_boxset1->isLeafNode(node1);
+		node1_has_triangle = false;
+		current_node1 = node1;
+	}
+
+	SIMD_FORCE_INLINE bool node_collision(GUINT node0 ,GUINT node1)
+	{
+		retrieve_node0_info(node0);
+		retrieve_node1_info(node1);
+		bool result = m_box0.overlapping_trans_cache(m_box1,trans_cache_1to0,true);
+		if(!result) return false;
+
+		if(t0_is_trimesh && node0_is_leaf)
+		{
+			//perform primitive vs box collision
+			retrieve_node0_triangle(node0);
+			//do triangle vs box collision
+			m_box1.increment_margin(m_tri0.m_margin);
+
+			result = m_box1.collide_triangle_exact(
+				m_tri0.m_vertices[0],m_tri0.m_vertices[1],m_tri0.m_vertices[2],m_tri0_plane);
+
+			m_box1.increment_margin(-m_tri0.m_margin);
+
+			if(!result) return false;
+			return true;
+		}
+		else if(t1_is_trimesh && node1_is_leaf)
+		{
+			//perform primitive vs box collision
+			retrieve_node1_triangle(node1);
+			//do triangle vs box collision
+			m_box0.increment_margin(m_tri1.m_margin);
+
+			result = m_box0.collide_triangle_exact(
+				m_tri1.m_vertices[0],m_tri1.m_vertices[1],m_tri1.m_vertices[2],m_tri1_plane);
+
+			m_box0.increment_margin(-m_tri1.m_margin);
+
+			if(!result) return false;
+			return true;
+		}
+		return true;
+	}
+
+	//stackless collision routine
+	void find_collision_pairs()
+	{
+		gim_pair_set stack_collisions;
+		stack_collisions.reserve(32);
+
+		//add the first pair
+		stack_collisions.push_pair(0,0);
+
+
+		while(stack_collisions.size())
+		{
+			//retrieve the last pair and pop
+			GUINT node0 = stack_collisions.back().m_index1;
+			GUINT node1 = stack_collisions.back().m_index2;
+			stack_collisions.pop_back();
+			if(node_collision(node0,node1)) // a collision is found
+			{
+				if(node0_is_leaf)
+				{
+					if(node1_is_leaf)
+					{
+						m_collision_pairs->push_pair(m_boxset0->getNodeData(node0),m_boxset1->getNodeData(node1));
+					}
+					else
+					{
+						//collide left
+						stack_collisions.push_pair(node0,m_boxset1->getLeftNodeIndex(node1));
+
+						//collide right
+						stack_collisions.push_pair(node0,m_boxset1->getRightNodeIndex(node1));
+					}
+				}
+				else
+				{
+					if(node1_is_leaf)
+					{
+						//collide left
+						stack_collisions.push_pair(m_boxset0->getLeftNodeIndex(node0),node1);
+						//collide right
+						stack_collisions.push_pair(m_boxset0->getRightNodeIndex(node0),node1);
+					}
+					else
+					{
+						GUINT left0 = m_boxset0->getLeftNodeIndex(node0);
+						GUINT right0 = m_boxset0->getRightNodeIndex(node0);
+						GUINT left1 = m_boxset1->getLeftNodeIndex(node1);
+						GUINT right1 = m_boxset1->getRightNodeIndex(node1);
+						//collide left
+						stack_collisions.push_pair(left0,left1);
+						//collide right
+						stack_collisions.push_pair(left0,right1);
+						//collide left
+						stack_collisions.push_pair(right0,left1);
+						//collide right
+						stack_collisions.push_pair(right0,right1);
+
+					}// else if node1 is not a leaf
+				}// else if node0 is not a leaf
+
+			}// if(node_collision(node0,node1))
+		}//while(stack_collisions.size())
+	}
+public:
+	void find_collision(BOX_SET_CLASS0 * boxset1, const btTransform & trans1,
+		BOX_SET_CLASS1 * boxset2, const btTransform & trans2,
+		gim_pair_set & collision_pairs, bool complete_primitive_tests = true)
+	{
+		m_collision_pairs = &collision_pairs;
+		m_boxset0 = boxset1;
+		m_boxset1 = boxset2;
+
+		trans_cache_1to0.calc_from_homogenic(trans1,trans2);
+
+		trans_cache_0to1 =  trans2.inverse();
+		trans_cache_0to1 *= trans1;
+
+
+		if(complete_primitive_tests)
+		{
+			t0_is_trimesh = boxset1->getPrimitiveManager().is_trimesh();
+			t1_is_trimesh = boxset2->getPrimitiveManager().is_trimesh();
+		}
+		else
+		{
+			t0_is_trimesh = false;
+			t1_is_trimesh = false;
+		}
+
+		find_collision_pairs();
+	}
+};
+
+
+#endif // GIM_BOXPRUNING_H_INCLUDED
+
+
diff --git a/src/bullet/BulletCollision/Gimpact/gim_clip_polygon.h b/src/bullet/BulletCollision/Gimpact/gim_clip_polygon.h
new file mode 100644
index 00000000..e342459c
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_clip_polygon.h
@@ -0,0 +1,210 @@
+#ifndef GIM_CLIP_POLYGON_H_INCLUDED
+#define GIM_CLIP_POLYGON_H_INCLUDED
+
+/*! \file gim_tri_collision.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+
+//! This function calcs the distance from a 3D plane
+class DISTANCE_PLANE_3D_FUNC
+{
+public:
+	template<typename CLASS_POINT,typename CLASS_PLANE>
+	inline GREAL operator()(const CLASS_PLANE & plane, const CLASS_POINT & point)
+	{
+		return DISTANCE_PLANE_POINT(plane, point);
+	}
+};
+
+
+
+template<typename CLASS_POINT>
+SIMD_FORCE_INLINE void PLANE_CLIP_POLYGON_COLLECT(
+						const CLASS_POINT & point0,
+						const CLASS_POINT & point1,
+						GREAL dist0,
+						GREAL dist1,
+						CLASS_POINT * clipped,
+						GUINT & clipped_count)
+{
+	GUINT _prevclassif = (dist0>G_EPSILON);
+	GUINT _classif = (dist1>G_EPSILON);
+	if(_classif!=_prevclassif)
+	{
+		GREAL blendfactor = -dist0/(dist1-dist0);
+		VEC_BLEND(clipped[clipped_count],point0,point1,blendfactor);
+		clipped_count++;
+	}
+	if(!_classif)
+	{
+		VEC_COPY(clipped[clipped_count],point1);
+		clipped_count++;
+	}
+}
+
+
+//! Clips a polygon by a plane
+/*!
+*\return The count of the clipped counts
+*/
+template<typename CLASS_POINT,typename CLASS_PLANE, typename DISTANCE_PLANE_FUNC>
+SIMD_FORCE_INLINE GUINT PLANE_CLIP_POLYGON_GENERIC(
+						const CLASS_PLANE & plane,
+						const CLASS_POINT * polygon_points,
+						GUINT polygon_point_count,
+						CLASS_POINT * clipped,DISTANCE_PLANE_FUNC distance_func)
+{
+    GUINT clipped_count = 0;
+
+
+    //clip first point
+	GREAL firstdist = distance_func(plane,polygon_points[0]);;
+	if(!(firstdist>G_EPSILON))
+	{
+		VEC_COPY(clipped[clipped_count],polygon_points[0]);
+		clipped_count++;
+	}
+
+	GREAL olddist = firstdist;
+	for(GUINT _i=1;_i<polygon_point_count;_i++)
+	{		
+		GREAL dist = distance_func(plane,polygon_points[_i]);
+
+		PLANE_CLIP_POLYGON_COLLECT(
+						polygon_points[_i-1],polygon_points[_i],
+						olddist,
+						dist,
+						clipped,
+						clipped_count);
+
+
+		olddist = dist;		
+	}
+
+	//RETURN TO FIRST  point	
+
+	PLANE_CLIP_POLYGON_COLLECT(
+					polygon_points[polygon_point_count-1],polygon_points[0],
+					olddist,
+					firstdist,
+					clipped,
+					clipped_count);
+
+	return clipped_count;
+}
+
+//! Clips a polygon by a plane
+/*!
+*\return The count of the clipped counts
+*/
+template<typename CLASS_POINT,typename CLASS_PLANE, typename DISTANCE_PLANE_FUNC>
+SIMD_FORCE_INLINE GUINT PLANE_CLIP_TRIANGLE_GENERIC(
+						const CLASS_PLANE & plane,
+						const CLASS_POINT & point0,
+						const CLASS_POINT & point1,
+						const CLASS_POINT & point2,
+						CLASS_POINT * clipped,DISTANCE_PLANE_FUNC distance_func)
+{
+    GUINT clipped_count = 0;
+
+    //clip first point
+	GREAL firstdist = distance_func(plane,point0);;
+	if(!(firstdist>G_EPSILON))
+	{
+		VEC_COPY(clipped[clipped_count],point0);
+		clipped_count++;
+	}
+
+	// point 1
+	GREAL olddist = firstdist;
+	GREAL dist = distance_func(plane,point1);
+
+	PLANE_CLIP_POLYGON_COLLECT(
+					point0,point1,
+					olddist,
+					dist,
+					clipped,
+					clipped_count);
+
+	olddist = dist;
+
+
+	// point 2
+	dist = distance_func(plane,point2);
+
+	PLANE_CLIP_POLYGON_COLLECT(
+					point1,point2,
+					olddist,
+					dist,
+					clipped,
+					clipped_count);
+	olddist = dist;
+
+
+
+	//RETURN TO FIRST  point
+	PLANE_CLIP_POLYGON_COLLECT(
+					point2,point0,
+					olddist,
+					firstdist,
+					clipped,
+					clipped_count);
+
+	return clipped_count;
+}
+
+
+template<typename CLASS_POINT,typename CLASS_PLANE>
+SIMD_FORCE_INLINE GUINT PLANE_CLIP_POLYGON3D(
+						const CLASS_PLANE & plane,
+						const CLASS_POINT * polygon_points,
+						GUINT polygon_point_count,
+						CLASS_POINT * clipped)
+{
+	return PLANE_CLIP_POLYGON_GENERIC<CLASS_POINT,CLASS_PLANE>(plane,polygon_points,polygon_point_count,clipped,DISTANCE_PLANE_3D_FUNC());
+}
+
+
+template<typename CLASS_POINT,typename CLASS_PLANE>
+SIMD_FORCE_INLINE GUINT PLANE_CLIP_TRIANGLE3D(
+						const CLASS_PLANE & plane,
+						const CLASS_POINT & point0,
+						const CLASS_POINT & point1,
+						const CLASS_POINT & point2,
+						CLASS_POINT * clipped)
+{
+	return PLANE_CLIP_TRIANGLE_GENERIC<CLASS_POINT,CLASS_PLANE>(plane,point0,point1,point2,clipped,DISTANCE_PLANE_3D_FUNC());
+}
+
+
+
+#endif // GIM_TRI_COLLISION_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_contact.cpp b/src/bullet/BulletCollision/Gimpact/gim_contact.cpp
new file mode 100644
index 00000000..20e41de0
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_contact.cpp
@@ -0,0 +1,146 @@
+
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+#include "gim_contact.h"
+
+#define MAX_COINCIDENT 8
+
+void gim_contact_array::merge_contacts(
+	const gim_contact_array & contacts, bool normal_contact_average)
+{
+	clear();
+
+	if(contacts.size()==1)
+	{
+		push_back(contacts.back());
+		return;
+	}
+
+	gim_array<GIM_RSORT_TOKEN> keycontacts(contacts.size());
+	keycontacts.resize(contacts.size(),false);
+
+	//fill key contacts
+
+	GUINT i;
+
+	for (i = 0;i<contacts.size() ;i++ )
+	{
+		keycontacts[i].m_key = contacts[i].calc_key_contact();
+		keycontacts[i].m_value = i;
+	}
+
+	//sort keys
+	gim_heap_sort(keycontacts.pointer(),keycontacts.size(),GIM_RSORT_TOKEN_COMPARATOR());
+
+	// Merge contacts
+
+	GUINT coincident_count=0;
+	btVector3 coincident_normals[MAX_COINCIDENT];
+
+	GUINT last_key = keycontacts[0].m_key;
+	GUINT key = 0;
+
+	push_back(contacts[keycontacts[0].m_value]);
+	GIM_CONTACT * pcontact = &back();
+
+
+
+	for( i=1;i<keycontacts.size();i++)
+	{
+	    key = keycontacts[i].m_key;
+		const GIM_CONTACT * scontact = &contacts[keycontacts[i].m_value];
+
+		if(last_key ==  key)//same points
+		{
+			//merge contact
+			if(pcontact->m_depth - CONTACT_DIFF_EPSILON > scontact->m_depth)//)
+			{
+				*pcontact = *scontact;
+                coincident_count = 0;
+			}
+			else if(normal_contact_average)
+			{
+				if(btFabs(pcontact->m_depth - scontact->m_depth)<CONTACT_DIFF_EPSILON)
+                {
+                    if(coincident_count<MAX_COINCIDENT)
+                    {
+                    	coincident_normals[coincident_count] = scontact->m_normal;
+                        coincident_count++;
+                    }
+                }
+			}
+		}
+		else
+		{//add new contact
+
+		    if(normal_contact_average && coincident_count>0)
+		    {
+		    	pcontact->interpolate_normals(coincident_normals,coincident_count);
+		        coincident_count = 0;
+		    }
+
+		    push_back(*scontact);
+		    pcontact = &back();
+        }
+		last_key = key;
+	}
+}
+
+void gim_contact_array::merge_contacts_unique(const gim_contact_array & contacts)
+{
+	clear();
+
+	if(contacts.size()==1)
+	{
+		push_back(contacts.back());
+		return;
+	}
+
+	GIM_CONTACT average_contact = contacts.back();
+
+	for (GUINT i=1;i<contacts.size() ;i++ )
+	{
+		average_contact.m_point += contacts[i].m_point;
+		average_contact.m_normal += contacts[i].m_normal * contacts[i].m_depth;
+	}
+
+	//divide
+	GREAL divide_average = 1.0f/((GREAL)contacts.size());
+
+	average_contact.m_point *= divide_average;
+
+	average_contact.m_normal *= divide_average;
+
+	average_contact.m_depth = average_contact.m_normal.length();
+
+	average_contact.m_normal /= average_contact.m_depth;
+
+}
+
diff --git a/src/bullet/BulletCollision/Gimpact/gim_contact.h b/src/bullet/BulletCollision/Gimpact/gim_contact.h
new file mode 100644
index 00000000..5d9f8ef8
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_contact.h
@@ -0,0 +1,164 @@
+#ifndef GIM_CONTACT_H_INCLUDED
+#define GIM_CONTACT_H_INCLUDED
+
+/*! \file gim_contact.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+#include "gim_geometry.h"
+#include "gim_radixsort.h"
+#include "gim_array.h"
+
+
+/**
+Configuration var for applying interpolation of  contact normals
+*/
+#define NORMAL_CONTACT_AVERAGE 1
+#define CONTACT_DIFF_EPSILON 0.00001f
+
+/// Structure for collision results
+///Functions for managing and sorting contacts resulting from a collision query.
+///Contact lists must be create by calling \ref GIM_CREATE_CONTACT_LIST
+///After querys, contact lists must be destroy by calling \ref GIM_DYNARRAY_DESTROY
+///Contacts can be merge for avoid duplicate results by calling \ref gim_merge_contacts
+class GIM_CONTACT
+{
+public:
+    btVector3 m_point;
+    btVector3 m_normal;
+    GREAL m_depth;//Positive value indicates interpenetration
+    GREAL m_distance;//Padding not for use
+    GUINT m_feature1;//Face number
+    GUINT m_feature2;//Face number
+public:
+    GIM_CONTACT()
+    {
+    }
+
+    GIM_CONTACT(const GIM_CONTACT & contact):
+				m_point(contact.m_point),
+				m_normal(contact.m_normal),
+				m_depth(contact.m_depth),
+				m_feature1(contact.m_feature1),
+				m_feature2(contact.m_feature2)
+    {
+    	m_point = contact.m_point;
+    	m_normal = contact.m_normal;
+    	m_depth = contact.m_depth;
+    	m_feature1 = contact.m_feature1;
+    	m_feature2 = contact.m_feature2;
+    }
+
+    GIM_CONTACT(const btVector3 &point,const btVector3 & normal,
+    	 			GREAL depth, GUINT feature1, GUINT feature2):
+				m_point(point),
+				m_normal(normal),
+				m_depth(depth),
+				m_feature1(feature1),
+				m_feature2(feature2)
+    {
+    }
+
+	//! Calcs key for coord classification
+    SIMD_FORCE_INLINE GUINT calc_key_contact() const
+    {
+    	GINT _coords[] = {
+    		(GINT)(m_point[0]*1000.0f+1.0f),
+    		(GINT)(m_point[1]*1333.0f),
+    		(GINT)(m_point[2]*2133.0f+3.0f)};
+		GUINT _hash=0;
+		GUINT *_uitmp = (GUINT *)(&_coords[0]);
+		_hash = *_uitmp;
+		_uitmp++;
+		_hash += (*_uitmp)<<4;
+		_uitmp++;
+		_hash += (*_uitmp)<<8;
+		return _hash;
+    }
+
+    SIMD_FORCE_INLINE void interpolate_normals( btVector3 * normals,GUINT normal_count)
+    {
+    	btVector3 vec_sum(m_normal);
+		for(GUINT i=0;i<normal_count;i++)
+		{
+			vec_sum += normals[i];
+		}
+
+		GREAL vec_sum_len = vec_sum.length2();
+		if(vec_sum_len <CONTACT_DIFF_EPSILON) return;
+
+		GIM_INV_SQRT(vec_sum_len,vec_sum_len); // 1/sqrt(vec_sum_len)
+
+		m_normal = vec_sum*vec_sum_len;
+    }
+
+};
+
+
+class gim_contact_array:public gim_array<GIM_CONTACT>
+{
+public:
+	gim_contact_array():gim_array<GIM_CONTACT>(64)
+	{
+	}
+
+	SIMD_FORCE_INLINE void push_contact(const btVector3 &point,const btVector3 & normal,
+    	 			GREAL depth, GUINT feature1, GUINT feature2)
+	{
+		push_back_mem();
+		GIM_CONTACT & newele = back();
+		newele.m_point = point;
+		newele.m_normal = normal;
+		newele.m_depth = depth;
+		newele.m_feature1 = feature1;
+		newele.m_feature2 = feature2;
+	}
+
+	SIMD_FORCE_INLINE void push_triangle_contacts(
+		const GIM_TRIANGLE_CONTACT_DATA & tricontact,
+		GUINT feature1,GUINT feature2)
+	{
+		for(GUINT i = 0;i<tricontact.m_point_count ;i++ )
+		{
+			push_back_mem();
+			GIM_CONTACT & newele = back();
+			newele.m_point = tricontact.m_points[i];
+			newele.m_normal = tricontact.m_separating_normal;
+			newele.m_depth = tricontact.m_penetration_depth;
+			newele.m_feature1 = feature1;
+			newele.m_feature2 = feature2;
+		}
+	}
+
+	void merge_contacts(const gim_contact_array & contacts, bool normal_contact_average = true);
+	void merge_contacts_unique(const gim_contact_array & contacts);
+};
+
+#endif // GIM_CONTACT_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_geom_types.h b/src/bullet/BulletCollision/Gimpact/gim_geom_types.h
new file mode 100644
index 00000000..6b8f9ea6
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_geom_types.h
@@ -0,0 +1,97 @@
+#ifndef GIM_GEOM_TYPES_H_INCLUDED
+#define GIM_GEOM_TYPES_H_INCLUDED
+
+/*! \file gim_geom_types.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+
+#include "gim_math.h"
+
+
+
+//! Short Integer vector 2D
+typedef GSHORT vec2s[2];
+//! Integer vector 3D
+typedef GSHORT vec3s[3];
+//! Integer vector 4D
+typedef GSHORT vec4s[4];
+
+//! Short Integer vector 2D
+typedef GUSHORT vec2us[2];
+//! Integer vector 3D
+typedef GUSHORT vec3us[3];
+//! Integer vector 4D
+typedef GUSHORT vec4us[4];
+
+//! Integer vector 2D
+typedef GINT vec2i[2];
+//! Integer vector 3D
+typedef GINT vec3i[3];
+//! Integer vector 4D
+typedef GINT vec4i[4];
+
+//! Unsigned Integer vector 2D
+typedef GUINT vec2ui[2];
+//! Unsigned Integer vector 3D
+typedef GUINT vec3ui[3];
+//! Unsigned Integer vector 4D
+typedef GUINT vec4ui[4];
+
+//! Float vector 2D
+typedef GREAL vec2f[2];
+//! Float vector 3D
+typedef GREAL vec3f[3];
+//! Float vector 4D
+typedef GREAL vec4f[4];
+
+//! Double vector 2D
+typedef GREAL2 vec2d[2];
+//! Float vector 3D
+typedef GREAL2 vec3d[3];
+//! Float vector 4D
+typedef GREAL2 vec4d[4];
+
+//! Matrix 2D, row ordered
+typedef GREAL mat2f[2][2];
+//! Matrix 3D, row ordered
+typedef GREAL mat3f[3][3];
+//! Matrix 4D, row ordered
+typedef GREAL mat4f[4][4];
+
+//! Quaternion
+typedef GREAL quatf[4];
+
+//typedef struct _aabb3f aabb3f;
+
+
+
+#endif // GIM_GEOM_TYPES_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_geometry.h b/src/bullet/BulletCollision/Gimpact/gim_geometry.h
new file mode 100644
index 00000000..c67a6991
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_geometry.h
@@ -0,0 +1,42 @@
+#ifndef GIM_GEOMETRY_H_INCLUDED
+#define GIM_GEOMETRY_H_INCLUDED
+
+/*! \file gim_geometry.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+///Additional Headers for Collision
+#include "gim_basic_geometry_operations.h"
+#include "gim_clip_polygon.h"
+#include "gim_box_collision.h"
+#include "gim_tri_collision.h"
+
+#endif // GIM_VECTOR_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_hash_table.h b/src/bullet/BulletCollision/Gimpact/gim_hash_table.h
new file mode 100644
index 00000000..e4237c2c
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_hash_table.h
@@ -0,0 +1,902 @@
+#ifndef GIM_HASH_TABLE_H_INCLUDED
+#define GIM_HASH_TABLE_H_INCLUDED
+/*! \file gim_trimesh_data.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+#include "gim_radixsort.h"
+
+
+#define GIM_INVALID_HASH 0xffffffff //!< A very very high value
+#define GIM_DEFAULT_HASH_TABLE_SIZE 380
+#define GIM_DEFAULT_HASH_TABLE_NODE_SIZE 4
+#define GIM_HASH_TABLE_GROW_FACTOR 2
+
+#define GIM_MIN_RADIX_SORT_SIZE 860 //!< calibrated on a PIII
+
+template<typename T>
+struct GIM_HASH_TABLE_NODE
+{
+    GUINT m_key;
+    T m_data;
+    GIM_HASH_TABLE_NODE()
+    {
+    }
+
+    GIM_HASH_TABLE_NODE(const GIM_HASH_TABLE_NODE & value)
+    {
+        m_key = value.m_key;
+        m_data = value.m_data;
+    }
+
+    GIM_HASH_TABLE_NODE(GUINT key, const T & data)
+    {
+        m_key = key;
+        m_data = data;
+    }
+
+    bool operator <(const GIM_HASH_TABLE_NODE<T> & other) const
+	{
+		///inverse order, further objects are first
+		if(m_key <  other.m_key) return true;
+		return false;
+	}
+
+	bool operator >(const GIM_HASH_TABLE_NODE<T> & other) const
+	{
+		///inverse order, further objects are first
+		if(m_key >  other.m_key) return true;
+		return false;
+	}
+
+	bool operator ==(const GIM_HASH_TABLE_NODE<T> & other) const
+	{
+		///inverse order, further objects are first
+		if(m_key ==  other.m_key) return true;
+		return false;
+	}
+};
+
+///Macro for getting the key
+class GIM_HASH_NODE_GET_KEY
+{
+public:
+	template<class T>
+	inline GUINT operator()( const T& a)
+	{
+		return a.m_key;
+	}
+};
+
+
+
+///Macro for comparing the key and the element
+class GIM_HASH_NODE_CMP_KEY_MACRO
+{
+public:
+	template<class T>
+	inline int operator() ( const T& a, GUINT key)
+	{
+		return ((int)(a.m_key - key));
+	}
+};
+
+///Macro for comparing Hash nodes
+class GIM_HASH_NODE_CMP_MACRO
+{
+public:
+	template<class T>
+	inline int operator() ( const T& a, const T& b )
+	{
+		return ((int)(a.m_key - b.m_key));
+	}
+};
+
+
+
+
+
+//! Sorting for hash table
+/*!
+switch automatically between quicksort and radixsort
+*/
+template<typename T>
+void gim_sort_hash_node_array(T * array, GUINT array_count)
+{
+    if(array_count<GIM_MIN_RADIX_SORT_SIZE)
+    {
+    	gim_heap_sort(array,array_count,GIM_HASH_NODE_CMP_MACRO());
+    }
+    else
+    {
+    	memcopy_elements_func cmpfunc;
+    	gim_radix_sort(array,array_count,GIM_HASH_NODE_GET_KEY(),cmpfunc);
+    }
+}
+
+
+
+
+
+
+// Note: assumes long is at least 32 bits.
+#define GIM_NUM_PRIME 28
+
+static const GUINT gim_prime_list[GIM_NUM_PRIME] =
+{
+  53ul,         97ul,         193ul,       389ul,       769ul,
+  1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
+  49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
+  1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
+  50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul,
+  1610612741ul, 3221225473ul, 4294967291ul
+};
+
+inline GUINT gim_next_prime(GUINT number)
+{
+    //Find nearest upper prime
+    GUINT result_ind = 0;
+    gim_binary_search(gim_prime_list,0,(GIM_NUM_PRIME-2),number,result_ind);
+
+    // inv: result_ind < 28
+    return gim_prime_list[result_ind];
+}
+
+
+
+//! A compact hash table implementation
+/*!
+A memory aligned compact hash table that coud be treated as an array.
+It could be a simple sorted array without the overhead of the hash key bucked, or could
+be a formely hash table with an array of keys.
+You can use switch_to_hashtable() and switch_to_sorted_array for saving space or increase speed.
+</br>
+
+<ul>
+<li> if node_size = 0, then this container becomes a simple sorted array allocator. reserve_size is used for reserve memory in m_nodes.
+When the array size reaches the size equivalent to 'min_hash_table_size', then it becomes a hash table by calling check_for_switching_to_hashtable.
+<li> If node_size != 0, then this container becomes a hash table for ever
+</ul>
+
+*/
+template<class T>
+class gim_hash_table
+{
+protected:
+    typedef GIM_HASH_TABLE_NODE<T> _node_type;
+
+    //!The nodes
+    //array< _node_type, SuperAllocator<_node_type> > m_nodes;
+    gim_array< _node_type > m_nodes;
+    //SuperBufferedArray< _node_type > m_nodes;
+    bool m_sorted;
+
+    ///Hash table data management. The hash table has the indices to the corresponding m_nodes array
+    GUINT * m_hash_table;//!<
+    GUINT m_table_size;//!<
+    GUINT m_node_size;//!<
+    GUINT m_min_hash_table_size;
+
+
+
+    //! Returns the cell index
+    inline GUINT _find_cell(GUINT hashkey)
+    {
+        _node_type * nodesptr = m_nodes.pointer();
+        GUINT start_index = (hashkey%m_table_size)*m_node_size;
+        GUINT end_index = start_index + m_node_size;
+
+        while(start_index<end_index)
+        {
+            GUINT value = m_hash_table[start_index];
+            if(value != GIM_INVALID_HASH)
+            {
+                if(nodesptr[value].m_key == hashkey) return start_index;
+            }
+            start_index++;
+        }
+        return GIM_INVALID_HASH;
+    }
+
+    //! Find the avaliable cell for the hashkey, and return an existing cell if it has the same hash key
+    inline GUINT _find_avaliable_cell(GUINT hashkey)
+    {
+        _node_type * nodesptr = m_nodes.pointer();
+        GUINT avaliable_index = GIM_INVALID_HASH;
+        GUINT start_index = (hashkey%m_table_size)*m_node_size;
+        GUINT end_index = start_index + m_node_size;
+
+        while(start_index<end_index)
+        {
+            GUINT value = m_hash_table[start_index];
+            if(value == GIM_INVALID_HASH)
+            {
+                if(avaliable_index==GIM_INVALID_HASH)
+                {
+                    avaliable_index = start_index;
+                }
+            }
+            else if(nodesptr[value].m_key == hashkey)
+            {
+                return start_index;
+            }
+            start_index++;
+        }
+        return avaliable_index;
+    }
+
+
+
+    //! reserves the memory for the hash table.
+    /*!
+    \pre hash table must be empty
+    \post reserves the memory for the hash table, an initializes all elements to GIM_INVALID_HASH.
+    */
+    inline void _reserve_table_memory(GUINT newtablesize)
+    {
+        if(newtablesize==0) return;
+        if(m_node_size==0) return;
+
+        //Get a Prime size
+
+        m_table_size = gim_next_prime(newtablesize);
+
+        GUINT datasize = m_table_size*m_node_size;
+        //Alloc the data buffer
+        m_hash_table =  (GUINT *)gim_alloc(datasize*sizeof(GUINT));
+    }
+
+    inline void _invalidate_keys()
+    {
+        GUINT datasize = m_table_size*m_node_size;
+        for(GUINT i=0;i<datasize;i++)
+        {
+            m_hash_table[i] = GIM_INVALID_HASH;// invalidate keys
+        }
+    }
+
+    //! Clear all memory for the hash table
+    inline void _clear_table_memory()
+    {
+        if(m_hash_table==NULL) return;
+        gim_free(m_hash_table);
+        m_hash_table = NULL;
+        m_table_size = 0;
+    }
+
+    //! Invalidates the keys (Assigning GIM_INVALID_HASH to all) Reorders the hash keys
+    inline void _rehash()
+    {
+        _invalidate_keys();
+
+        _node_type * nodesptr = m_nodes.pointer();
+        for(GUINT i=0;i<(GUINT)m_nodes.size();i++)
+        {
+            GUINT nodekey = nodesptr[i].m_key;
+            if(nodekey != GIM_INVALID_HASH)
+            {
+                //Search for the avaliable cell in buffer
+                GUINT index = _find_avaliable_cell(nodekey);
+
+
+				if(m_hash_table[index]!=GIM_INVALID_HASH)
+				{//The new index is alreade used... discard this new incomming object, repeated key
+				    btAssert(m_hash_table[index]==nodekey);
+					nodesptr[i].m_key = GIM_INVALID_HASH;
+				}
+				else
+				{
+					//;
+					//Assign the value for alloc
+					m_hash_table[index] = i;
+				}
+            }
+        }
+    }
+
+    //! Resize hash table indices
+    inline void _resize_table(GUINT newsize)
+    {
+        //Clear memory
+        _clear_table_memory();
+        //Alloc the data
+        _reserve_table_memory(newsize);
+        //Invalidate keys and rehash
+        _rehash();
+    }
+
+    //! Destroy hash table memory
+    inline void _destroy()
+    {
+        if(m_hash_table==NULL) return;
+        _clear_table_memory();
+    }
+
+    //! Finds an avaliable hash table cell, and resizes the table if there isn't space
+    inline GUINT _assign_hash_table_cell(GUINT hashkey)
+    {
+        GUINT cell_index = _find_avaliable_cell(hashkey);
+
+        if(cell_index==GIM_INVALID_HASH)
+        {
+            //rehashing
+            _resize_table(m_table_size+1);
+            GUINT cell_index = _find_avaliable_cell(hashkey);
+            btAssert(cell_index!=GIM_INVALID_HASH);
+        }
+        return cell_index;
+    }
+
+    //! erase by index in hash table
+    inline bool _erase_by_index_hash_table(GUINT index)
+    {
+        if(index >= m_nodes.size()) return false;
+        if(m_nodes[index].m_key != GIM_INVALID_HASH)
+        {
+            //Search for the avaliable cell in buffer
+            GUINT cell_index = _find_cell(m_nodes[index].m_key);
+
+            btAssert(cell_index!=GIM_INVALID_HASH);
+            btAssert(m_hash_table[cell_index]==index);
+
+            m_hash_table[cell_index] = GIM_INVALID_HASH;
+        }
+
+        return this->_erase_unsorted(index);
+    }
+
+    //! erase by key in hash table
+    inline bool _erase_hash_table(GUINT hashkey)
+    {
+        if(hashkey == GIM_INVALID_HASH) return false;
+
+        //Search for the avaliable cell in buffer
+        GUINT cell_index = _find_cell(hashkey);
+        if(cell_index ==GIM_INVALID_HASH) return false;
+
+        GUINT index = m_hash_table[cell_index];
+        m_hash_table[cell_index] = GIM_INVALID_HASH;
+
+        return this->_erase_unsorted(index);
+    }
+
+
+
+    //! insert an element in hash table
+    /*!
+    If the element exists, this won't insert the element
+    \return the index in the array of the existing element,or GIM_INVALID_HASH if the element has been inserted
+    If so, the element has been inserted at the last position of the array.
+    */
+    inline GUINT _insert_hash_table(GUINT hashkey, const T & value)
+    {
+        if(hashkey==GIM_INVALID_HASH)
+        {
+            //Insert anyway
+            _insert_unsorted(hashkey,value);
+            return GIM_INVALID_HASH;
+        }
+
+        GUINT cell_index = _assign_hash_table_cell(hashkey);
+
+        GUINT value_key = m_hash_table[cell_index];
+
+        if(value_key!= GIM_INVALID_HASH) return value_key;// Not overrited
+
+        m_hash_table[cell_index] = m_nodes.size();
+
+        _insert_unsorted(hashkey,value);
+        return GIM_INVALID_HASH;
+    }
+
+    //! insert an element in hash table.
+    /*!
+    If the element exists, this replaces the element.
+    \return the index in the array of the existing element,or GIM_INVALID_HASH if the element has been inserted
+    If so, the element has been inserted at the last position of the array.
+    */
+    inline GUINT _insert_hash_table_replace(GUINT hashkey, const T & value)
+    {
+        if(hashkey==GIM_INVALID_HASH)
+        {
+            //Insert anyway
+            _insert_unsorted(hashkey,value);
+            return GIM_INVALID_HASH;
+        }
+
+        GUINT cell_index = _assign_hash_table_cell(hashkey);
+
+        GUINT value_key = m_hash_table[cell_index];
+
+        if(value_key!= GIM_INVALID_HASH)
+        {//replaces the existing
+            m_nodes[value_key] = _node_type(hashkey,value);
+            return value_key;// index of the replaced element
+        }
+
+        m_hash_table[cell_index] = m_nodes.size();
+
+        _insert_unsorted(hashkey,value);
+        return GIM_INVALID_HASH;
+
+    }
+
+    
+    ///Sorted array data management. The hash table has the indices to the corresponding m_nodes array
+    inline bool _erase_sorted(GUINT index)
+    {
+        if(index>=(GUINT)m_nodes.size()) return false;
+        m_nodes.erase_sorted(index);
+		if(m_nodes.size()<2) m_sorted = false;
+        return true;
+    }
+
+    //! faster, but unsorted
+    inline bool _erase_unsorted(GUINT index)
+    {
+        if(index>=m_nodes.size()) return false;
+
+        GUINT lastindex = m_nodes.size()-1;
+        if(index<lastindex && m_hash_table!=0)
+        {
+			GUINT hashkey =  m_nodes[lastindex].m_key;
+			if(hashkey!=GIM_INVALID_HASH)
+			{
+				//update the new position of the last element
+				GUINT cell_index = _find_cell(hashkey);
+				btAssert(cell_index!=GIM_INVALID_HASH);
+				//new position of the last element which will be swaped
+				m_hash_table[cell_index] = index;
+			}
+        }
+        m_nodes.erase(index);
+        m_sorted = false;
+        return true;
+    }
+
+    //! Insert in position ordered
+    /*!
+    Also checks if it is needed to transform this container to a hash table, by calling check_for_switching_to_hashtable
+    */
+    inline void _insert_in_pos(GUINT hashkey, const T & value, GUINT pos)
+    {
+        m_nodes.insert(_node_type(hashkey,value),pos);
+        this->check_for_switching_to_hashtable();
+    }
+
+    //! Insert an element in an ordered array
+    inline GUINT _insert_sorted(GUINT hashkey, const T & value)
+    {
+        if(hashkey==GIM_INVALID_HASH || size()==0)
+        {
+            m_nodes.push_back(_node_type(hashkey,value));
+            return GIM_INVALID_HASH;
+        }
+        //Insert at last position
+        //Sort element
+
+
+        GUINT result_ind=0;
+        GUINT last_index = m_nodes.size()-1;
+        _node_type * ptr = m_nodes.pointer();
+
+        bool found = gim_binary_search_ex(
+        	ptr,0,last_index,result_ind,hashkey,GIM_HASH_NODE_CMP_KEY_MACRO());
+
+
+        //Insert before found index
+        if(found)
+        {
+            return result_ind;
+        }
+        else
+        {
+            _insert_in_pos(hashkey, value, result_ind);
+        }
+        return GIM_INVALID_HASH;
+    }
+
+    inline GUINT _insert_sorted_replace(GUINT hashkey, const T & value)
+    {
+        if(hashkey==GIM_INVALID_HASH || size()==0)
+        {
+            m_nodes.push_back(_node_type(hashkey,value));
+            return GIM_INVALID_HASH;
+        }
+        //Insert at last position
+        //Sort element
+        GUINT result_ind;
+        GUINT last_index = m_nodes.size()-1;
+        _node_type * ptr = m_nodes.pointer();
+
+        bool found = gim_binary_search_ex(
+        	ptr,0,last_index,result_ind,hashkey,GIM_HASH_NODE_CMP_KEY_MACRO());
+
+        //Insert before found index
+        if(found)
+        {
+            m_nodes[result_ind] = _node_type(hashkey,value);
+        }
+        else
+        {
+            _insert_in_pos(hashkey, value, result_ind);
+        }
+        return result_ind;
+    }
+
+    //! Fast insertion in m_nodes array
+    inline GUINT  _insert_unsorted(GUINT hashkey, const T & value)
+    {
+        m_nodes.push_back(_node_type(hashkey,value));
+        m_sorted = false;
+        return GIM_INVALID_HASH;
+    }
+
+    
+
+public:
+
+    /*!
+        <li> if node_size = 0, then this container becomes a simple sorted array allocator. reserve_size is used for reserve memory in m_nodes.
+        When the array size reaches the size equivalent to 'min_hash_table_size', then it becomes a hash table by calling check_for_switching_to_hashtable.
+        <li> If node_size != 0, then this container becomes a hash table for ever
+        </ul>
+    */
+    gim_hash_table(GUINT reserve_size = GIM_DEFAULT_HASH_TABLE_SIZE,
+                     GUINT node_size = GIM_DEFAULT_HASH_TABLE_NODE_SIZE,
+                     GUINT min_hash_table_size = GIM_INVALID_HASH)
+    {
+        m_hash_table = NULL;
+        m_table_size = 0;
+        m_sorted = false;
+        m_node_size = node_size;
+        m_min_hash_table_size = min_hash_table_size;
+
+        if(m_node_size!=0)
+        {
+            if(reserve_size!=0)
+            {
+                m_nodes.reserve(reserve_size);
+                _reserve_table_memory(reserve_size);
+                _invalidate_keys();
+            }
+            else
+            {
+                m_nodes.reserve(GIM_DEFAULT_HASH_TABLE_SIZE);
+                _reserve_table_memory(GIM_DEFAULT_HASH_TABLE_SIZE);
+                _invalidate_keys();
+            }
+        }
+        else if(reserve_size!=0)
+        {
+            m_nodes.reserve(reserve_size);
+        }
+
+    }
+
+    ~gim_hash_table()
+    {
+        _destroy();
+    }
+
+    inline bool is_hash_table()
+    {
+        if(m_hash_table) return true;
+        return false;
+    }
+
+    inline bool is_sorted()
+    {
+        if(size()<2) return true;
+        return m_sorted;
+    }
+
+    bool sort()
+    {
+        if(is_sorted()) return true;
+        if(m_nodes.size()<2) return false;
+
+
+        _node_type * ptr = m_nodes.pointer();
+        GUINT siz = m_nodes.size();
+        gim_sort_hash_node_array(ptr,siz);
+        m_sorted=true;
+
+
+
+        if(m_hash_table)
+        {
+            _rehash();
+        }
+        return true;
+    }
+
+    bool switch_to_hashtable()
+    {
+        if(m_hash_table) return false;
+        if(m_node_size==0) m_node_size = GIM_DEFAULT_HASH_TABLE_NODE_SIZE;
+        if(m_nodes.size()<GIM_DEFAULT_HASH_TABLE_SIZE)
+        {
+            _resize_table(GIM_DEFAULT_HASH_TABLE_SIZE);
+        }
+        else
+        {
+            _resize_table(m_nodes.size()+1);
+        }
+
+        return true;
+    }
+
+    bool switch_to_sorted_array()
+    {
+        if(m_hash_table==NULL) return true;
+        _clear_table_memory();
+        return sort();
+    }
+
+    //!If the container reaches the
+    bool check_for_switching_to_hashtable()
+    {
+        if(this->m_hash_table) return true;
+
+        if(!(m_nodes.size()< m_min_hash_table_size))
+        {
+            if(m_node_size == 0)
+            {
+                m_node_size = GIM_DEFAULT_HASH_TABLE_NODE_SIZE;
+            }
+
+            _resize_table(m_nodes.size()+1);
+            return true;
+        }
+        return false;
+    }
+
+    inline void set_sorted(bool value)
+    {
+    	m_sorted = value;
+    }
+
+    //! Retrieves the amount of keys.
+    inline GUINT size() const
+    {
+        return m_nodes.size();
+    }
+
+    //! Retrieves the hash key.
+    inline GUINT get_key(GUINT index) const
+    {
+        return m_nodes[index].m_key;
+    }
+
+    //! Retrieves the value by index
+    /*!
+    */
+    inline T * get_value_by_index(GUINT index)
+    {
+        return &m_nodes[index].m_data;
+    }
+
+    inline const T& operator[](GUINT index) const
+    {
+        return m_nodes[index].m_data;
+    }
+
+    inline T& operator[](GUINT index)
+    {
+        return m_nodes[index].m_data;
+    }
+
+    //! Finds the index of the element with the key
+    /*!
+    \return the index in the array of the existing element,or GIM_INVALID_HASH if the element has been inserted
+    If so, the element has been inserted at the last position of the array.
+    */
+    inline GUINT find(GUINT hashkey)
+    {
+        if(m_hash_table)
+        {
+            GUINT cell_index = _find_cell(hashkey);
+            if(cell_index==GIM_INVALID_HASH) return GIM_INVALID_HASH;
+            return m_hash_table[cell_index];
+        }
+		GUINT last_index = m_nodes.size();
+        if(last_index<2)
+        {
+			if(last_index==0) return GIM_INVALID_HASH;
+            if(m_nodes[0].m_key == hashkey) return 0;
+            return GIM_INVALID_HASH;
+        }
+        else if(m_sorted)
+        {
+            //Binary search
+            GUINT result_ind = 0;
+			last_index--;
+            _node_type *  ptr =  m_nodes.pointer();
+
+            bool found = gim_binary_search_ex(ptr,0,last_index,result_ind,hashkey,GIM_HASH_NODE_CMP_KEY_MACRO());
+
+
+            if(found) return result_ind;
+        }
+        return GIM_INVALID_HASH;
+    }
+
+    //! Retrieves the value associated with the index
+    /*!
+    \return the found element, or null
+    */
+    inline T * get_value(GUINT hashkey)
+    {
+        GUINT index = find(hashkey);
+        if(index == GIM_INVALID_HASH) return NULL;
+        return &m_nodes[index].m_data;
+    }
+
+
+    /*!
+    */
+    inline bool erase_by_index(GUINT index)
+    {
+        if(index > m_nodes.size()) return false;
+
+        if(m_hash_table == NULL)
+        {
+            if(is_sorted())
+            {
+                return this->_erase_sorted(index);
+            }
+            else
+            {
+                return this->_erase_unsorted(index);
+            }
+        }
+        else
+        {
+            return this->_erase_by_index_hash_table(index);
+        }
+        return false;
+    }
+
+
+
+    inline bool erase_by_index_unsorted(GUINT index)
+    {
+        if(index > m_nodes.size()) return false;
+
+        if(m_hash_table == NULL)
+        {
+            return this->_erase_unsorted(index);
+        }
+        else
+        {
+            return this->_erase_by_index_hash_table(index);
+        }
+        return false;
+    }
+
+
+
+    /*!
+
+    */
+    inline bool erase_by_key(GUINT hashkey)
+    {
+        if(size()==0) return false;
+
+        if(m_hash_table)
+        {
+            return this->_erase_hash_table(hashkey);
+        }
+        //Binary search
+
+        if(is_sorted()==false) return false;
+
+        GUINT result_ind = find(hashkey);
+        if(result_ind!= GIM_INVALID_HASH)
+        {
+            return this->_erase_sorted(result_ind);
+        }
+        return false;
+    }
+
+    void clear()
+    {
+        m_nodes.clear();
+
+        if(m_hash_table==NULL) return;
+        GUINT datasize = m_table_size*m_node_size;
+        //Initialize the hashkeys.
+        GUINT i;
+        for(i=0;i<datasize;i++)
+        {
+            m_hash_table[i] = GIM_INVALID_HASH;// invalidate keys
+        }
+		m_sorted = false;
+    }
+
+    //! Insert an element into the hash
+    /*!
+    \return If GIM_INVALID_HASH, the object has been inserted succesfully. Else it returns the position
+    of the existing element.
+    */
+    inline GUINT insert(GUINT hashkey, const T & element)
+    {
+        if(m_hash_table)
+        {
+            return this->_insert_hash_table(hashkey,element);
+        }
+        if(this->is_sorted())
+        {
+            return this->_insert_sorted(hashkey,element);
+        }
+        return this->_insert_unsorted(hashkey,element);
+    }
+
+    //! Insert an element into the hash, and could overrite an existing object with the same hash.
+    /*!
+    \return If GIM_INVALID_HASH, the object has been inserted succesfully. Else it returns the position
+    of the replaced element.
+    */
+    inline GUINT insert_override(GUINT hashkey, const T & element)
+    {
+        if(m_hash_table)
+        {
+            return this->_insert_hash_table_replace(hashkey,element);
+        }
+        if(this->is_sorted())
+        {
+            return this->_insert_sorted_replace(hashkey,element);
+        }
+        this->_insert_unsorted(hashkey,element);
+        return m_nodes.size();
+    }
+
+
+
+    //! Insert an element into the hash,But if this container is a sorted array, this inserts it unsorted
+    /*!
+    */
+    inline GUINT insert_unsorted(GUINT hashkey,const T & element)
+    {
+        if(m_hash_table)
+        {
+            return this->_insert_hash_table(hashkey,element);
+        }
+        return this->_insert_unsorted(hashkey,element);
+    }
+
+
+};
+
+
+
+#endif // GIM_CONTAINERS_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_linear_math.h b/src/bullet/BulletCollision/Gimpact/gim_linear_math.h
new file mode 100644
index 00000000..64f11b49
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_linear_math.h
@@ -0,0 +1,1573 @@
+#ifndef GIM_LINEAR_H_INCLUDED
+#define GIM_LINEAR_H_INCLUDED
+
+/*! \file gim_linear_math.h
+*\author Francisco Leon Najera
+Type Independant Vector and matrix operations.
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+
+#include "gim_math.h"
+#include "gim_geom_types.h"
+
+
+
+
+//! Zero out a 2D vector
+#define VEC_ZERO_2(a)				\
+{						\
+   (a)[0] = (a)[1] = 0.0f;			\
+}\
+
+
+//! Zero out a 3D vector
+#define VEC_ZERO(a)				\
+{						\
+   (a)[0] = (a)[1] = (a)[2] = 0.0f;		\
+}\
+
+
+/// Zero out a 4D vector
+#define VEC_ZERO_4(a)				\
+{						\
+   (a)[0] = (a)[1] = (a)[2] = (a)[3] = 0.0f;	\
+}\
+
+
+/// Vector copy
+#define VEC_COPY_2(b,a)				\
+{						\
+   (b)[0] = (a)[0];				\
+   (b)[1] = (a)[1];				\
+}\
+
+
+/// Copy 3D vector
+#define VEC_COPY(b,a)				\
+{						\
+   (b)[0] = (a)[0];				\
+   (b)[1] = (a)[1];				\
+   (b)[2] = (a)[2];				\
+}\
+
+
+/// Copy 4D vector
+#define VEC_COPY_4(b,a)				\
+{						\
+   (b)[0] = (a)[0];				\
+   (b)[1] = (a)[1];				\
+   (b)[2] = (a)[2];				\
+   (b)[3] = (a)[3];				\
+}\
+
+/// VECTOR SWAP
+#define VEC_SWAP(b,a)				\
+{  \
+    GIM_SWAP_NUMBERS((b)[0],(a)[0]);\
+    GIM_SWAP_NUMBERS((b)[1],(a)[1]);\
+    GIM_SWAP_NUMBERS((b)[2],(a)[2]);\
+}\
+
+/// Vector difference
+#define VEC_DIFF_2(v21,v2,v1)			\
+{						\
+   (v21)[0] = (v2)[0] - (v1)[0];		\
+   (v21)[1] = (v2)[1] - (v1)[1];		\
+}\
+
+
+/// Vector difference
+#define VEC_DIFF(v21,v2,v1)			\
+{						\
+   (v21)[0] = (v2)[0] - (v1)[0];		\
+   (v21)[1] = (v2)[1] - (v1)[1];		\
+   (v21)[2] = (v2)[2] - (v1)[2];		\
+}\
+
+
+/// Vector difference
+#define VEC_DIFF_4(v21,v2,v1)			\
+{						\
+   (v21)[0] = (v2)[0] - (v1)[0];		\
+   (v21)[1] = (v2)[1] - (v1)[1];		\
+   (v21)[2] = (v2)[2] - (v1)[2];		\
+   (v21)[3] = (v2)[3] - (v1)[3];		\
+}\
+
+
+/// Vector sum
+#define VEC_SUM_2(v21,v2,v1)			\
+{						\
+   (v21)[0] = (v2)[0] + (v1)[0];		\
+   (v21)[1] = (v2)[1] + (v1)[1];		\
+}\
+
+
+/// Vector sum
+#define VEC_SUM(v21,v2,v1)			\
+{						\
+   (v21)[0] = (v2)[0] + (v1)[0];		\
+   (v21)[1] = (v2)[1] + (v1)[1];		\
+   (v21)[2] = (v2)[2] + (v1)[2];		\
+}\
+
+
+/// Vector sum
+#define VEC_SUM_4(v21,v2,v1)			\
+{						\
+   (v21)[0] = (v2)[0] + (v1)[0];		\
+   (v21)[1] = (v2)[1] + (v1)[1];		\
+   (v21)[2] = (v2)[2] + (v1)[2];		\
+   (v21)[3] = (v2)[3] + (v1)[3];		\
+}\
+
+
+/// scalar times vector
+#define VEC_SCALE_2(c,a,b)			\
+{						\
+   (c)[0] = (a)*(b)[0];				\
+   (c)[1] = (a)*(b)[1];				\
+}\
+
+
+/// scalar times vector
+#define VEC_SCALE(c,a,b)			\
+{						\
+   (c)[0] = (a)*(b)[0];				\
+   (c)[1] = (a)*(b)[1];				\
+   (c)[2] = (a)*(b)[2];				\
+}\
+
+
+/// scalar times vector
+#define VEC_SCALE_4(c,a,b)			\
+{						\
+   (c)[0] = (a)*(b)[0];				\
+   (c)[1] = (a)*(b)[1];				\
+   (c)[2] = (a)*(b)[2];				\
+   (c)[3] = (a)*(b)[3];				\
+}\
+
+
+/// accumulate scaled vector
+#define VEC_ACCUM_2(c,a,b)			\
+{						\
+   (c)[0] += (a)*(b)[0];			\
+   (c)[1] += (a)*(b)[1];			\
+}\
+
+
+/// accumulate scaled vector
+#define VEC_ACCUM(c,a,b)			\
+{						\
+   (c)[0] += (a)*(b)[0];			\
+   (c)[1] += (a)*(b)[1];			\
+   (c)[2] += (a)*(b)[2];			\
+}\
+
+
+/// accumulate scaled vector
+#define VEC_ACCUM_4(c,a,b)			\
+{						\
+   (c)[0] += (a)*(b)[0];			\
+   (c)[1] += (a)*(b)[1];			\
+   (c)[2] += (a)*(b)[2];			\
+   (c)[3] += (a)*(b)[3];			\
+}\
+
+
+/// Vector dot product
+#define VEC_DOT_2(a,b) ((a)[0]*(b)[0] + (a)[1]*(b)[1])
+
+
+/// Vector dot product
+#define VEC_DOT(a,b) ((a)[0]*(b)[0] + (a)[1]*(b)[1] + (a)[2]*(b)[2])
+
+/// Vector dot product
+#define VEC_DOT_4(a,b)	((a)[0]*(b)[0] + (a)[1]*(b)[1] + (a)[2]*(b)[2] + (a)[3]*(b)[3])
+
+/// vector impact parameter (squared)
+#define VEC_IMPACT_SQ(bsq,direction,position) {\
+   GREAL _llel_ = VEC_DOT(direction, position);\
+   bsq = VEC_DOT(position, position) - _llel_*_llel_;\
+}\
+
+
+/// vector impact parameter
+#define VEC_IMPACT(bsq,direction,position)	{\
+   VEC_IMPACT_SQ(bsq,direction,position);		\
+   GIM_SQRT(bsq,bsq);					\
+}\
+
+/// Vector length
+#define VEC_LENGTH_2(a,l)\
+{\
+    GREAL _pp = VEC_DOT_2(a,a);\
+    GIM_SQRT(_pp,l);\
+}\
+
+
+/// Vector length
+#define VEC_LENGTH(a,l)\
+{\
+    GREAL _pp = VEC_DOT(a,a);\
+    GIM_SQRT(_pp,l);\
+}\
+
+
+/// Vector length
+#define VEC_LENGTH_4(a,l)\
+{\
+    GREAL _pp = VEC_DOT_4(a,a);\
+    GIM_SQRT(_pp,l);\
+}\
+
+/// Vector inv length
+#define VEC_INV_LENGTH_2(a,l)\
+{\
+    GREAL _pp = VEC_DOT_2(a,a);\
+    GIM_INV_SQRT(_pp,l);\
+}\
+
+
+/// Vector inv length
+#define VEC_INV_LENGTH(a,l)\
+{\
+    GREAL _pp = VEC_DOT(a,a);\
+    GIM_INV_SQRT(_pp,l);\
+}\
+
+
+/// Vector inv length
+#define VEC_INV_LENGTH_4(a,l)\
+{\
+    GREAL _pp = VEC_DOT_4(a,a);\
+    GIM_INV_SQRT(_pp,l);\
+}\
+
+
+
+/// distance between two points
+#define VEC_DISTANCE(_len,_va,_vb) {\
+    vec3f _tmp_;				\
+    VEC_DIFF(_tmp_, _vb, _va);			\
+    VEC_LENGTH(_tmp_,_len);			\
+}\
+
+
+/// Vector length
+#define VEC_CONJUGATE_LENGTH(a,l)\
+{\
+    GREAL _pp = 1.0 - a[0]*a[0] - a[1]*a[1] - a[2]*a[2];\
+    GIM_SQRT(_pp,l);\
+}\
+
+
+/// Vector length
+#define VEC_NORMALIZE(a) {	\
+    GREAL len;\
+    VEC_INV_LENGTH(a,len); \
+    if(len<G_REAL_INFINITY)\
+    {\
+        a[0] *= len;				\
+        a[1] *= len;				\
+        a[2] *= len;				\
+    }						\
+}\
+
+/// Set Vector size
+#define VEC_RENORMALIZE(a,newlen) {	\
+    GREAL len;\
+    VEC_INV_LENGTH(a,len); \
+    if(len<G_REAL_INFINITY)\
+    {\
+        len *= newlen;\
+        a[0] *= len;				\
+        a[1] *= len;				\
+        a[2] *= len;				\
+    }						\
+}\
+
+/// Vector cross
+#define VEC_CROSS(c,a,b)		\
+{						\
+   c[0] = (a)[1] * (b)[2] - (a)[2] * (b)[1];	\
+   c[1] = (a)[2] * (b)[0] - (a)[0] * (b)[2];	\
+   c[2] = (a)[0] * (b)[1] - (a)[1] * (b)[0];	\
+}\
+
+
+/*! Vector perp -- assumes that n is of unit length
+ * accepts vector v, subtracts out any component parallel to n */
+#define VEC_PERPENDICULAR(vp,v,n)			\
+{						\
+   GREAL dot = VEC_DOT(v, n);			\
+   vp[0] = (v)[0] - dot*(n)[0];		\
+   vp[1] = (v)[1] - dot*(n)[1];		\
+   vp[2] = (v)[2] - dot*(n)[2];		\
+}\
+
+
+/*! Vector parallel -- assumes that n is of unit length */
+#define VEC_PARALLEL(vp,v,n)			\
+{						\
+   GREAL dot = VEC_DOT(v, n);			\
+   vp[0] = (dot) * (n)[0];			\
+   vp[1] = (dot) * (n)[1];			\
+   vp[2] = (dot) * (n)[2];			\
+}\
+
+/*! Same as Vector parallel --  n can have any length
+ * accepts vector v, subtracts out any component perpendicular to n */
+#define VEC_PROJECT(vp,v,n)			\
+{ \
+	GREAL scalar = VEC_DOT(v, n);			\
+	scalar/= VEC_DOT(n, n); \
+	vp[0] = (scalar) * (n)[0];			\
+    vp[1] = (scalar) * (n)[1];			\
+    vp[2] = (scalar) * (n)[2];			\
+}\
+
+
+/*! accepts vector v*/
+#define VEC_UNPROJECT(vp,v,n)			\
+{ \
+	GREAL scalar = VEC_DOT(v, n);			\
+	scalar = VEC_DOT(n, n)/scalar; \
+	vp[0] = (scalar) * (n)[0];			\
+    vp[1] = (scalar) * (n)[1];			\
+    vp[2] = (scalar) * (n)[2];			\
+}\
+
+
+/*! Vector reflection -- assumes n is of unit length
+ Takes vector v, reflects it against reflector n, and returns vr */
+#define VEC_REFLECT(vr,v,n)			\
+{						\
+   GREAL dot = VEC_DOT(v, n);			\
+   vr[0] = (v)[0] - 2.0 * (dot) * (n)[0];	\
+   vr[1] = (v)[1] - 2.0 * (dot) * (n)[1];	\
+   vr[2] = (v)[2] - 2.0 * (dot) * (n)[2];	\
+}\
+
+
+/*! Vector blending
+Takes two vectors a, b, blends them together with two scalars */
+#define VEC_BLEND_AB(vr,sa,a,sb,b)			\
+{						\
+   vr[0] = (sa) * (a)[0] + (sb) * (b)[0];	\
+   vr[1] = (sa) * (a)[1] + (sb) * (b)[1];	\
+   vr[2] = (sa) * (a)[2] + (sb) * (b)[2];	\
+}\
+
+/*! Vector blending
+Takes two vectors a, b, blends them together with s <=1 */
+#define VEC_BLEND(vr,a,b,s) VEC_BLEND_AB(vr,(1-s),a,s,b)
+
+#define VEC_SET3(a,b,op,c) a[0]=b[0] op c[0]; a[1]=b[1] op c[1]; a[2]=b[2] op c[2];
+
+//! Finds the bigger cartesian coordinate from a vector
+#define VEC_MAYOR_COORD(vec, maxc)\
+{\
+	GREAL A[] = {fabs(vec[0]),fabs(vec[1]),fabs(vec[2])};\
+    maxc =  A[0]>A[1]?(A[0]>A[2]?0:2):(A[1]>A[2]?1:2);\
+}\
+
+//! Finds the 2 smallest cartesian coordinates from a vector
+#define VEC_MINOR_AXES(vec, i0, i1)\
+{\
+	VEC_MAYOR_COORD(vec,i0);\
+	i0 = (i0+1)%3;\
+	i1 = (i0+1)%3;\
+}\
+
+
+
+
+#define VEC_EQUAL(v1,v2) (v1[0]==v2[0]&&v1[1]==v2[1]&&v1[2]==v2[2])
+
+#define VEC_NEAR_EQUAL(v1,v2) (GIM_NEAR_EQUAL(v1[0],v2[0])&&GIM_NEAR_EQUAL(v1[1],v2[1])&&GIM_NEAR_EQUAL(v1[2],v2[2]))
+
+
+/// Vector cross
+#define X_AXIS_CROSS_VEC(dst,src)\
+{					   \
+	dst[0] = 0.0f;     \
+	dst[1] = -src[2];  \
+	dst[2] = src[1];  \
+}\
+
+#define Y_AXIS_CROSS_VEC(dst,src)\
+{					   \
+	dst[0] = src[2];     \
+	dst[1] = 0.0f;  \
+	dst[2] = -src[0];  \
+}\
+
+#define Z_AXIS_CROSS_VEC(dst,src)\
+{					   \
+	dst[0] = -src[1];     \
+	dst[1] = src[0];  \
+	dst[2] = 0.0f;  \
+}\
+
+
+
+
+
+
+/// initialize matrix
+#define IDENTIFY_MATRIX_3X3(m)			\
+{						\
+   m[0][0] = 1.0;				\
+   m[0][1] = 0.0;				\
+   m[0][2] = 0.0;				\
+						\
+   m[1][0] = 0.0;				\
+   m[1][1] = 1.0;				\
+   m[1][2] = 0.0;				\
+						\
+   m[2][0] = 0.0;				\
+   m[2][1] = 0.0;				\
+   m[2][2] = 1.0;				\
+}\
+
+/*! initialize matrix */
+#define IDENTIFY_MATRIX_4X4(m)			\
+{						\
+   m[0][0] = 1.0;				\
+   m[0][1] = 0.0;				\
+   m[0][2] = 0.0;				\
+   m[0][3] = 0.0;				\
+						\
+   m[1][0] = 0.0;				\
+   m[1][1] = 1.0;				\
+   m[1][2] = 0.0;				\
+   m[1][3] = 0.0;				\
+						\
+   m[2][0] = 0.0;				\
+   m[2][1] = 0.0;				\
+   m[2][2] = 1.0;				\
+   m[2][3] = 0.0;				\
+						\
+   m[3][0] = 0.0;				\
+   m[3][1] = 0.0;				\
+   m[3][2] = 0.0;				\
+   m[3][3] = 1.0;				\
+}\
+
+/*! initialize matrix */
+#define ZERO_MATRIX_4X4(m)			\
+{						\
+   m[0][0] = 0.0;				\
+   m[0][1] = 0.0;				\
+   m[0][2] = 0.0;				\
+   m[0][3] = 0.0;				\
+						\
+   m[1][0] = 0.0;				\
+   m[1][1] = 0.0;				\
+   m[1][2] = 0.0;				\
+   m[1][3] = 0.0;				\
+						\
+   m[2][0] = 0.0;				\
+   m[2][1] = 0.0;				\
+   m[2][2] = 0.0;				\
+   m[2][3] = 0.0;				\
+						\
+   m[3][0] = 0.0;				\
+   m[3][1] = 0.0;				\
+   m[3][2] = 0.0;				\
+   m[3][3] = 0.0;				\
+}\
+
+/*! matrix rotation  X */
+#define ROTX_CS(m,cosine,sine)		\
+{					\
+   /* rotation about the x-axis */	\
+					\
+   m[0][0] = 1.0;			\
+   m[0][1] = 0.0;			\
+   m[0][2] = 0.0;			\
+   m[0][3] = 0.0;			\
+					\
+   m[1][0] = 0.0;			\
+   m[1][1] = (cosine);			\
+   m[1][2] = (sine);			\
+   m[1][3] = 0.0;			\
+					\
+   m[2][0] = 0.0;			\
+   m[2][1] = -(sine);			\
+   m[2][2] = (cosine);			\
+   m[2][3] = 0.0;			\
+					\
+   m[3][0] = 0.0;			\
+   m[3][1] = 0.0;			\
+   m[3][2] = 0.0;			\
+   m[3][3] = 1.0;			\
+}\
+
+/*! matrix rotation  Y */
+#define ROTY_CS(m,cosine,sine)		\
+{					\
+   /* rotation about the y-axis */	\
+					\
+   m[0][0] = (cosine);			\
+   m[0][1] = 0.0;			\
+   m[0][2] = -(sine);			\
+   m[0][3] = 0.0;			\
+					\
+   m[1][0] = 0.0;			\
+   m[1][1] = 1.0;			\
+   m[1][2] = 0.0;			\
+   m[1][3] = 0.0;			\
+					\
+   m[2][0] = (sine);			\
+   m[2][1] = 0.0;			\
+   m[2][2] = (cosine);			\
+   m[2][3] = 0.0;			\
+					\
+   m[3][0] = 0.0;			\
+   m[3][1] = 0.0;			\
+   m[3][2] = 0.0;			\
+   m[3][3] = 1.0;			\
+}\
+
+/*! matrix rotation  Z */
+#define ROTZ_CS(m,cosine,sine)		\
+{					\
+   /* rotation about the z-axis */	\
+					\
+   m[0][0] = (cosine);			\
+   m[0][1] = (sine);			\
+   m[0][2] = 0.0;			\
+   m[0][3] = 0.0;			\
+					\
+   m[1][0] = -(sine);			\
+   m[1][1] = (cosine);			\
+   m[1][2] = 0.0;			\
+   m[1][3] = 0.0;			\
+					\
+   m[2][0] = 0.0;			\
+   m[2][1] = 0.0;			\
+   m[2][2] = 1.0;			\
+   m[2][3] = 0.0;			\
+					\
+   m[3][0] = 0.0;			\
+   m[3][1] = 0.0;			\
+   m[3][2] = 0.0;			\
+   m[3][3] = 1.0;			\
+}\
+
+/*! matrix copy */
+#define COPY_MATRIX_2X2(b,a)	\
+{				\
+   b[0][0] = a[0][0];		\
+   b[0][1] = a[0][1];		\
+				\
+   b[1][0] = a[1][0];		\
+   b[1][1] = a[1][1];		\
+				\
+}\
+
+
+/*! matrix copy */
+#define COPY_MATRIX_2X3(b,a)	\
+{				\
+   b[0][0] = a[0][0];		\
+   b[0][1] = a[0][1];		\
+   b[0][2] = a[0][2];		\
+				\
+   b[1][0] = a[1][0];		\
+   b[1][1] = a[1][1];		\
+   b[1][2] = a[1][2];		\
+}\
+
+
+/*! matrix copy */
+#define COPY_MATRIX_3X3(b,a)	\
+{				\
+   b[0][0] = a[0][0];		\
+   b[0][1] = a[0][1];		\
+   b[0][2] = a[0][2];		\
+				\
+   b[1][0] = a[1][0];		\
+   b[1][1] = a[1][1];		\
+   b[1][2] = a[1][2];		\
+				\
+   b[2][0] = a[2][0];		\
+   b[2][1] = a[2][1];		\
+   b[2][2] = a[2][2];		\
+}\
+
+
+/*! matrix copy */
+#define COPY_MATRIX_4X4(b,a)	\
+{				\
+   b[0][0] = a[0][0];		\
+   b[0][1] = a[0][1];		\
+   b[0][2] = a[0][2];		\
+   b[0][3] = a[0][3];		\
+				\
+   b[1][0] = a[1][0];		\
+   b[1][1] = a[1][1];		\
+   b[1][2] = a[1][2];		\
+   b[1][3] = a[1][3];		\
+				\
+   b[2][0] = a[2][0];		\
+   b[2][1] = a[2][1];		\
+   b[2][2] = a[2][2];		\
+   b[2][3] = a[2][3];		\
+				\
+   b[3][0] = a[3][0];		\
+   b[3][1] = a[3][1];		\
+   b[3][2] = a[3][2];		\
+   b[3][3] = a[3][3];		\
+}\
+
+
+/*! matrix transpose */
+#define TRANSPOSE_MATRIX_2X2(b,a)	\
+{				\
+   b[0][0] = a[0][0];		\
+   b[0][1] = a[1][0];		\
+				\
+   b[1][0] = a[0][1];		\
+   b[1][1] = a[1][1];		\
+}\
+
+
+/*! matrix transpose */
+#define TRANSPOSE_MATRIX_3X3(b,a)	\
+{				\
+   b[0][0] = a[0][0];		\
+   b[0][1] = a[1][0];		\
+   b[0][2] = a[2][0];		\
+				\
+   b[1][0] = a[0][1];		\
+   b[1][1] = a[1][1];		\
+   b[1][2] = a[2][1];		\
+				\
+   b[2][0] = a[0][2];		\
+   b[2][1] = a[1][2];		\
+   b[2][2] = a[2][2];		\
+}\
+
+
+/*! matrix transpose */
+#define TRANSPOSE_MATRIX_4X4(b,a)	\
+{				\
+   b[0][0] = a[0][0];		\
+   b[0][1] = a[1][0];		\
+   b[0][2] = a[2][0];		\
+   b[0][3] = a[3][0];		\
+				\
+   b[1][0] = a[0][1];		\
+   b[1][1] = a[1][1];		\
+   b[1][2] = a[2][1];		\
+   b[1][3] = a[3][1];		\
+				\
+   b[2][0] = a[0][2];		\
+   b[2][1] = a[1][2];		\
+   b[2][2] = a[2][2];		\
+   b[2][3] = a[3][2];		\
+				\
+   b[3][0] = a[0][3];		\
+   b[3][1] = a[1][3];		\
+   b[3][2] = a[2][3];		\
+   b[3][3] = a[3][3];		\
+}\
+
+
+/*! multiply matrix by scalar */
+#define SCALE_MATRIX_2X2(b,s,a)		\
+{					\
+   b[0][0] = (s) * a[0][0];		\
+   b[0][1] = (s) * a[0][1];		\
+					\
+   b[1][0] = (s) * a[1][0];		\
+   b[1][1] = (s) * a[1][1];		\
+}\
+
+
+/*! multiply matrix by scalar */
+#define SCALE_MATRIX_3X3(b,s,a)		\
+{					\
+   b[0][0] = (s) * a[0][0];		\
+   b[0][1] = (s) * a[0][1];		\
+   b[0][2] = (s) * a[0][2];		\
+					\
+   b[1][0] = (s) * a[1][0];		\
+   b[1][1] = (s) * a[1][1];		\
+   b[1][2] = (s) * a[1][2];		\
+					\
+   b[2][0] = (s) * a[2][0];		\
+   b[2][1] = (s) * a[2][1];		\
+   b[2][2] = (s) * a[2][2];		\
+}\
+
+
+/*! multiply matrix by scalar */
+#define SCALE_MATRIX_4X4(b,s,a)		\
+{					\
+   b[0][0] = (s) * a[0][0];		\
+   b[0][1] = (s) * a[0][1];		\
+   b[0][2] = (s) * a[0][2];		\
+   b[0][3] = (s) * a[0][3];		\
+					\
+   b[1][0] = (s) * a[1][0];		\
+   b[1][1] = (s) * a[1][1];		\
+   b[1][2] = (s) * a[1][2];		\
+   b[1][3] = (s) * a[1][3];		\
+					\
+   b[2][0] = (s) * a[2][0];		\
+   b[2][1] = (s) * a[2][1];		\
+   b[2][2] = (s) * a[2][2];		\
+   b[2][3] = (s) * a[2][3];		\
+					\
+   b[3][0] = s * a[3][0];		\
+   b[3][1] = s * a[3][1];		\
+   b[3][2] = s * a[3][2];		\
+   b[3][3] = s * a[3][3];		\
+}\
+
+
+/*! multiply matrix by scalar */
+#define SCALE_VEC_MATRIX_2X2(b,svec,a)		\
+{					\
+   b[0][0] = svec[0] * a[0][0];		\
+   b[1][0] = svec[0] * a[1][0];		\
+					\
+   b[0][1] = svec[1] * a[0][1];		\
+   b[1][1] = svec[1] * a[1][1];		\
+}\
+
+
+/*! multiply matrix by scalar. Each columns is scaled by each scalar vector component */
+#define SCALE_VEC_MATRIX_3X3(b,svec,a)		\
+{					\
+   b[0][0] = svec[0] * a[0][0];		\
+   b[1][0] = svec[0] * a[1][0];		\
+   b[2][0] = svec[0] * a[2][0];		\
+					\
+   b[0][1] = svec[1] * a[0][1];		\
+   b[1][1] = svec[1] * a[1][1];		\
+   b[2][1] = svec[1] * a[2][1];		\
+					\
+   b[0][2] = svec[2] * a[0][2];		\
+   b[1][2] = svec[2] * a[1][2];		\
+   b[2][2] = svec[2] * a[2][2];		\
+}\
+
+
+/*! multiply matrix by scalar */
+#define SCALE_VEC_MATRIX_4X4(b,svec,a)		\
+{					\
+   b[0][0] = svec[0] * a[0][0];		\
+   b[1][0] = svec[0] * a[1][0];		\
+   b[2][0] = svec[0] * a[2][0];		\
+   b[3][0] = svec[0] * a[3][0];		\
+					\
+   b[0][1] = svec[1] * a[0][1];		\
+   b[1][1] = svec[1] * a[1][1];		\
+   b[2][1] = svec[1] * a[2][1];		\
+   b[3][1] = svec[1] * a[3][1];		\
+					\
+   b[0][2] = svec[2] * a[0][2];		\
+   b[1][2] = svec[2] * a[1][2];		\
+   b[2][2] = svec[2] * a[2][2];		\
+   b[3][2] = svec[2] * a[3][2];		\
+   \
+   b[0][3] = svec[3] * a[0][3];		\
+   b[1][3] = svec[3] * a[1][3];		\
+   b[2][3] = svec[3] * a[2][3];		\
+   b[3][3] = svec[3] * a[3][3];		\
+}\
+
+
+/*! multiply matrix by scalar */
+#define ACCUM_SCALE_MATRIX_2X2(b,s,a)		\
+{					\
+   b[0][0] += (s) * a[0][0];		\
+   b[0][1] += (s) * a[0][1];		\
+					\
+   b[1][0] += (s) * a[1][0];		\
+   b[1][1] += (s) * a[1][1];		\
+}\
+
+
+/*! multiply matrix by scalar */
+#define ACCUM_SCALE_MATRIX_3X3(b,s,a)		\
+{					\
+   b[0][0] += (s) * a[0][0];		\
+   b[0][1] += (s) * a[0][1];		\
+   b[0][2] += (s) * a[0][2];		\
+					\
+   b[1][0] += (s) * a[1][0];		\
+   b[1][1] += (s) * a[1][1];		\
+   b[1][2] += (s) * a[1][2];		\
+					\
+   b[2][0] += (s) * a[2][0];		\
+   b[2][1] += (s) * a[2][1];		\
+   b[2][2] += (s) * a[2][2];		\
+}\
+
+
+/*! multiply matrix by scalar */
+#define ACCUM_SCALE_MATRIX_4X4(b,s,a)		\
+{					\
+   b[0][0] += (s) * a[0][0];		\
+   b[0][1] += (s) * a[0][1];		\
+   b[0][2] += (s) * a[0][2];		\
+   b[0][3] += (s) * a[0][3];		\
+					\
+   b[1][0] += (s) * a[1][0];		\
+   b[1][1] += (s) * a[1][1];		\
+   b[1][2] += (s) * a[1][2];		\
+   b[1][3] += (s) * a[1][3];		\
+					\
+   b[2][0] += (s) * a[2][0];		\
+   b[2][1] += (s) * a[2][1];		\
+   b[2][2] += (s) * a[2][2];		\
+   b[2][3] += (s) * a[2][3];		\
+					\
+   b[3][0] += (s) * a[3][0];		\
+   b[3][1] += (s) * a[3][1];		\
+   b[3][2] += (s) * a[3][2];		\
+   b[3][3] += (s) * a[3][3];		\
+}\
+
+/*! matrix product */
+/*! c[x][y] = a[x][0]*b[0][y]+a[x][1]*b[1][y]+a[x][2]*b[2][y]+a[x][3]*b[3][y];*/
+#define MATRIX_PRODUCT_2X2(c,a,b)		\
+{						\
+   c[0][0] = a[0][0]*b[0][0]+a[0][1]*b[1][0];	\
+   c[0][1] = a[0][0]*b[0][1]+a[0][1]*b[1][1];	\
+						\
+   c[1][0] = a[1][0]*b[0][0]+a[1][1]*b[1][0];	\
+   c[1][1] = a[1][0]*b[0][1]+a[1][1]*b[1][1];	\
+						\
+}\
+
+/*! matrix product */
+/*! c[x][y] = a[x][0]*b[0][y]+a[x][1]*b[1][y]+a[x][2]*b[2][y]+a[x][3]*b[3][y];*/
+#define MATRIX_PRODUCT_3X3(c,a,b)				\
+{								\
+   c[0][0] = a[0][0]*b[0][0]+a[0][1]*b[1][0]+a[0][2]*b[2][0];	\
+   c[0][1] = a[0][0]*b[0][1]+a[0][1]*b[1][1]+a[0][2]*b[2][1];	\
+   c[0][2] = a[0][0]*b[0][2]+a[0][1]*b[1][2]+a[0][2]*b[2][2];	\
+								\
+   c[1][0] = a[1][0]*b[0][0]+a[1][1]*b[1][0]+a[1][2]*b[2][0];	\
+   c[1][1] = a[1][0]*b[0][1]+a[1][1]*b[1][1]+a[1][2]*b[2][1];	\
+   c[1][2] = a[1][0]*b[0][2]+a[1][1]*b[1][2]+a[1][2]*b[2][2];	\
+								\
+   c[2][0] = a[2][0]*b[0][0]+a[2][1]*b[1][0]+a[2][2]*b[2][0];	\
+   c[2][1] = a[2][0]*b[0][1]+a[2][1]*b[1][1]+a[2][2]*b[2][1];	\
+   c[2][2] = a[2][0]*b[0][2]+a[2][1]*b[1][2]+a[2][2]*b[2][2];	\
+}\
+
+
+/*! matrix product */
+/*! c[x][y] = a[x][0]*b[0][y]+a[x][1]*b[1][y]+a[x][2]*b[2][y]+a[x][3]*b[3][y];*/
+#define MATRIX_PRODUCT_4X4(c,a,b)		\
+{						\
+   c[0][0] = a[0][0]*b[0][0]+a[0][1]*b[1][0]+a[0][2]*b[2][0]+a[0][3]*b[3][0];\
+   c[0][1] = a[0][0]*b[0][1]+a[0][1]*b[1][1]+a[0][2]*b[2][1]+a[0][3]*b[3][1];\
+   c[0][2] = a[0][0]*b[0][2]+a[0][1]*b[1][2]+a[0][2]*b[2][2]+a[0][3]*b[3][2];\
+   c[0][3] = a[0][0]*b[0][3]+a[0][1]*b[1][3]+a[0][2]*b[2][3]+a[0][3]*b[3][3];\
+						\
+   c[1][0] = a[1][0]*b[0][0]+a[1][1]*b[1][0]+a[1][2]*b[2][0]+a[1][3]*b[3][0];\
+   c[1][1] = a[1][0]*b[0][1]+a[1][1]*b[1][1]+a[1][2]*b[2][1]+a[1][3]*b[3][1];\
+   c[1][2] = a[1][0]*b[0][2]+a[1][1]*b[1][2]+a[1][2]*b[2][2]+a[1][3]*b[3][2];\
+   c[1][3] = a[1][0]*b[0][3]+a[1][1]*b[1][3]+a[1][2]*b[2][3]+a[1][3]*b[3][3];\
+						\
+   c[2][0] = a[2][0]*b[0][0]+a[2][1]*b[1][0]+a[2][2]*b[2][0]+a[2][3]*b[3][0];\
+   c[2][1] = a[2][0]*b[0][1]+a[2][1]*b[1][1]+a[2][2]*b[2][1]+a[2][3]*b[3][1];\
+   c[2][2] = a[2][0]*b[0][2]+a[2][1]*b[1][2]+a[2][2]*b[2][2]+a[2][3]*b[3][2];\
+   c[2][3] = a[2][0]*b[0][3]+a[2][1]*b[1][3]+a[2][2]*b[2][3]+a[2][3]*b[3][3];\
+						\
+   c[3][0] = a[3][0]*b[0][0]+a[3][1]*b[1][0]+a[3][2]*b[2][0]+a[3][3]*b[3][0];\
+   c[3][1] = a[3][0]*b[0][1]+a[3][1]*b[1][1]+a[3][2]*b[2][1]+a[3][3]*b[3][1];\
+   c[3][2] = a[3][0]*b[0][2]+a[3][1]*b[1][2]+a[3][2]*b[2][2]+a[3][3]*b[3][2];\
+   c[3][3] = a[3][0]*b[0][3]+a[3][1]*b[1][3]+a[3][2]*b[2][3]+a[3][3]*b[3][3];\
+}\
+
+
+/*! matrix times vector */
+#define MAT_DOT_VEC_2X2(p,m,v)					\
+{								\
+   p[0] = m[0][0]*v[0] + m[0][1]*v[1];				\
+   p[1] = m[1][0]*v[0] + m[1][1]*v[1];				\
+}\
+
+
+/*! matrix times vector */
+#define MAT_DOT_VEC_3X3(p,m,v)					\
+{								\
+   p[0] = m[0][0]*v[0] + m[0][1]*v[1] + m[0][2]*v[2];		\
+   p[1] = m[1][0]*v[0] + m[1][1]*v[1] + m[1][2]*v[2];		\
+   p[2] = m[2][0]*v[0] + m[2][1]*v[1] + m[2][2]*v[2];		\
+}\
+
+
+/*! matrix times vector
+v is a vec4f
+*/
+#define MAT_DOT_VEC_4X4(p,m,v)					\
+{								\
+   p[0] = m[0][0]*v[0] + m[0][1]*v[1] + m[0][2]*v[2] + m[0][3]*v[3];	\
+   p[1] = m[1][0]*v[0] + m[1][1]*v[1] + m[1][2]*v[2] + m[1][3]*v[3];	\
+   p[2] = m[2][0]*v[0] + m[2][1]*v[1] + m[2][2]*v[2] + m[2][3]*v[3];	\
+   p[3] = m[3][0]*v[0] + m[3][1]*v[1] + m[3][2]*v[2] + m[3][3]*v[3];	\
+}\
+
+/*! matrix times vector
+v is a vec3f
+and m is a mat4f<br>
+Last column is added as the position
+*/
+#define MAT_DOT_VEC_3X4(p,m,v)					\
+{								\
+   p[0] = m[0][0]*v[0] + m[0][1]*v[1] + m[0][2]*v[2] + m[0][3];	\
+   p[1] = m[1][0]*v[0] + m[1][1]*v[1] + m[1][2]*v[2] + m[1][3];	\
+   p[2] = m[2][0]*v[0] + m[2][1]*v[1] + m[2][2]*v[2] + m[2][3];	\
+}\
+
+
+/*! vector transpose times matrix */
+/*! p[j] = v[0]*m[0][j] + v[1]*m[1][j] + v[2]*m[2][j]; */
+#define VEC_DOT_MAT_3X3(p,v,m)					\
+{								\
+   p[0] = v[0]*m[0][0] + v[1]*m[1][0] + v[2]*m[2][0];		\
+   p[1] = v[0]*m[0][1] + v[1]*m[1][1] + v[2]*m[2][1];		\
+   p[2] = v[0]*m[0][2] + v[1]*m[1][2] + v[2]*m[2][2];		\
+}\
+
+
+/*! affine matrix times vector */
+/** The matrix is assumed to be an affine matrix, with last two
+ * entries representing a translation */
+#define MAT_DOT_VEC_2X3(p,m,v)					\
+{								\
+   p[0] = m[0][0]*v[0] + m[0][1]*v[1] + m[0][2];		\
+   p[1] = m[1][0]*v[0] + m[1][1]*v[1] + m[1][2];		\
+}\
+
+//! Transform a plane
+#define MAT_TRANSFORM_PLANE_4X4(pout,m,plane)\
+{								\
+   pout[0] = m[0][0]*plane[0] + m[0][1]*plane[1]  + m[0][2]*plane[2];\
+   pout[1] = m[1][0]*plane[0] + m[1][1]*plane[1]  + m[1][2]*plane[2];\
+   pout[2] = m[2][0]*plane[0] + m[2][1]*plane[1]  + m[2][2]*plane[2];\
+   pout[3] = m[0][3]*pout[0] + m[1][3]*pout[1]  + m[2][3]*pout[2] + plane[3];\
+}\
+
+
+
+/** inverse transpose of matrix times vector
+ *
+ * This macro computes inverse transpose of matrix m,
+ * and multiplies vector v into it, to yeild vector p
+ *
+ * DANGER !!! Do Not use this on normal vectors!!!
+ * It will leave normals the wrong length !!!
+ * See macro below for use on normals.
+ */
+#define INV_TRANSP_MAT_DOT_VEC_2X2(p,m,v)			\
+{								\
+   GREAL det;						\
+								\
+   det = m[0][0]*m[1][1] - m[0][1]*m[1][0];			\
+   p[0] = m[1][1]*v[0] - m[1][0]*v[1];				\
+   p[1] = - m[0][1]*v[0] + m[0][0]*v[1];			\
+								\
+   /* if matrix not singular, and not orthonormal, then renormalize */ \
+   if ((det!=1.0f) && (det != 0.0f)) {				\
+      det = 1.0f / det;						\
+      p[0] *= det;						\
+      p[1] *= det;						\
+   }								\
+}\
+
+
+/** transform normal vector by inverse transpose of matrix
+ * and then renormalize the vector
+ *
+ * This macro computes inverse transpose of matrix m,
+ * and multiplies vector v into it, to yeild vector p
+ * Vector p is then normalized.
+ */
+#define NORM_XFORM_2X2(p,m,v)					\
+{								\
+   GREAL len;							\
+								\
+   /* do nothing if off-diagonals are zero and diagonals are 	\
+    * equal */							\
+   if ((m[0][1] != 0.0) || (m[1][0] != 0.0) || (m[0][0] != m[1][1])) { \
+      p[0] = m[1][1]*v[0] - m[1][0]*v[1];			\
+      p[1] = - m[0][1]*v[0] + m[0][0]*v[1];			\
+								\
+      len = p[0]*p[0] + p[1]*p[1];				\
+      GIM_INV_SQRT(len,len);					\
+      p[0] *= len;						\
+      p[1] *= len;						\
+   } else {							\
+      VEC_COPY_2 (p, v);					\
+   }								\
+}\
+
+
+/** outer product of vector times vector transpose
+ *
+ * The outer product of vector v and vector transpose t yeilds
+ * dyadic matrix m.
+ */
+#define OUTER_PRODUCT_2X2(m,v,t)				\
+{								\
+   m[0][0] = v[0] * t[0];					\
+   m[0][1] = v[0] * t[1];					\
+								\
+   m[1][0] = v[1] * t[0];					\
+   m[1][1] = v[1] * t[1];					\
+}\
+
+
+/** outer product of vector times vector transpose
+ *
+ * The outer product of vector v and vector transpose t yeilds
+ * dyadic matrix m.
+ */
+#define OUTER_PRODUCT_3X3(m,v,t)				\
+{								\
+   m[0][0] = v[0] * t[0];					\
+   m[0][1] = v[0] * t[1];					\
+   m[0][2] = v[0] * t[2];					\
+								\
+   m[1][0] = v[1] * t[0];					\
+   m[1][1] = v[1] * t[1];					\
+   m[1][2] = v[1] * t[2];					\
+								\
+   m[2][0] = v[2] * t[0];					\
+   m[2][1] = v[2] * t[1];					\
+   m[2][2] = v[2] * t[2];					\
+}\
+
+
+/** outer product of vector times vector transpose
+ *
+ * The outer product of vector v and vector transpose t yeilds
+ * dyadic matrix m.
+ */
+#define OUTER_PRODUCT_4X4(m,v,t)				\
+{								\
+   m[0][0] = v[0] * t[0];					\
+   m[0][1] = v[0] * t[1];					\
+   m[0][2] = v[0] * t[2];					\
+   m[0][3] = v[0] * t[3];					\
+								\
+   m[1][0] = v[1] * t[0];					\
+   m[1][1] = v[1] * t[1];					\
+   m[1][2] = v[1] * t[2];					\
+   m[1][3] = v[1] * t[3];					\
+								\
+   m[2][0] = v[2] * t[0];					\
+   m[2][1] = v[2] * t[1];					\
+   m[2][2] = v[2] * t[2];					\
+   m[2][3] = v[2] * t[3];					\
+								\
+   m[3][0] = v[3] * t[0];					\
+   m[3][1] = v[3] * t[1];					\
+   m[3][2] = v[3] * t[2];					\
+   m[3][3] = v[3] * t[3];					\
+}\
+
+
+/** outer product of vector times vector transpose
+ *
+ * The outer product of vector v and vector transpose t yeilds
+ * dyadic matrix m.
+ */
+#define ACCUM_OUTER_PRODUCT_2X2(m,v,t)				\
+{								\
+   m[0][0] += v[0] * t[0];					\
+   m[0][1] += v[0] * t[1];					\
+								\
+   m[1][0] += v[1] * t[0];					\
+   m[1][1] += v[1] * t[1];					\
+}\
+
+
+/** outer product of vector times vector transpose
+ *
+ * The outer product of vector v and vector transpose t yeilds
+ * dyadic matrix m.
+ */
+#define ACCUM_OUTER_PRODUCT_3X3(m,v,t)				\
+{								\
+   m[0][0] += v[0] * t[0];					\
+   m[0][1] += v[0] * t[1];					\
+   m[0][2] += v[0] * t[2];					\
+								\
+   m[1][0] += v[1] * t[0];					\
+   m[1][1] += v[1] * t[1];					\
+   m[1][2] += v[1] * t[2];					\
+								\
+   m[2][0] += v[2] * t[0];					\
+   m[2][1] += v[2] * t[1];					\
+   m[2][2] += v[2] * t[2];					\
+}\
+
+
+/** outer product of vector times vector transpose
+ *
+ * The outer product of vector v and vector transpose t yeilds
+ * dyadic matrix m.
+ */
+#define ACCUM_OUTER_PRODUCT_4X4(m,v,t)				\
+{								\
+   m[0][0] += v[0] * t[0];					\
+   m[0][1] += v[0] * t[1];					\
+   m[0][2] += v[0] * t[2];					\
+   m[0][3] += v[0] * t[3];					\
+								\
+   m[1][0] += v[1] * t[0];					\
+   m[1][1] += v[1] * t[1];					\
+   m[1][2] += v[1] * t[2];					\
+   m[1][3] += v[1] * t[3];					\
+								\
+   m[2][0] += v[2] * t[0];					\
+   m[2][1] += v[2] * t[1];					\
+   m[2][2] += v[2] * t[2];					\
+   m[2][3] += v[2] * t[3];					\
+								\
+   m[3][0] += v[3] * t[0];					\
+   m[3][1] += v[3] * t[1];					\
+   m[3][2] += v[3] * t[2];					\
+   m[3][3] += v[3] * t[3];					\
+}\
+
+
+/** determinant of matrix
+ *
+ * Computes determinant of matrix m, returning d
+ */
+#define DETERMINANT_2X2(d,m)					\
+{								\
+   d = m[0][0] * m[1][1] - m[0][1] * m[1][0];			\
+}\
+
+
+/** determinant of matrix
+ *
+ * Computes determinant of matrix m, returning d
+ */
+#define DETERMINANT_3X3(d,m)					\
+{								\
+   d = m[0][0] * (m[1][1]*m[2][2] - m[1][2] * m[2][1]);		\
+   d -= m[0][1] * (m[1][0]*m[2][2] - m[1][2] * m[2][0]);	\
+   d += m[0][2] * (m[1][0]*m[2][1] - m[1][1] * m[2][0]);	\
+}\
+
+
+/** i,j,th cofactor of a 4x4 matrix
+ *
+ */
+#define COFACTOR_4X4_IJ(fac,m,i,j) 				\
+{								\
+   GUINT __ii[4], __jj[4], __k;						\
+								\
+   for (__k=0; __k<i; __k++) __ii[__k] = __k;				\
+   for (__k=i; __k<3; __k++) __ii[__k] = __k+1;				\
+   for (__k=0; __k<j; __k++) __jj[__k] = __k;				\
+   for (__k=j; __k<3; __k++) __jj[__k] = __k+1;				\
+								\
+   (fac) = m[__ii[0]][__jj[0]] * (m[__ii[1]][__jj[1]]*m[__ii[2]][__jj[2]] 	\
+                            - m[__ii[1]][__jj[2]]*m[__ii[2]][__jj[1]]); \
+   (fac) -= m[__ii[0]][__jj[1]] * (m[__ii[1]][__jj[0]]*m[__ii[2]][__jj[2]]	\
+                             - m[__ii[1]][__jj[2]]*m[__ii[2]][__jj[0]]);\
+   (fac) += m[__ii[0]][__jj[2]] * (m[__ii[1]][__jj[0]]*m[__ii[2]][__jj[1]]	\
+                             - m[__ii[1]][__jj[1]]*m[__ii[2]][__jj[0]]);\
+								\
+   __k = i+j;							\
+   if ( __k != (__k/2)*2) {						\
+      (fac) = -(fac);						\
+   }								\
+}\
+
+
+/** determinant of matrix
+ *
+ * Computes determinant of matrix m, returning d
+ */
+#define DETERMINANT_4X4(d,m)					\
+{								\
+   GREAL cofac;						\
+   COFACTOR_4X4_IJ (cofac, m, 0, 0);				\
+   d = m[0][0] * cofac;						\
+   COFACTOR_4X4_IJ (cofac, m, 0, 1);				\
+   d += m[0][1] * cofac;					\
+   COFACTOR_4X4_IJ (cofac, m, 0, 2);				\
+   d += m[0][2] * cofac;					\
+   COFACTOR_4X4_IJ (cofac, m, 0, 3);				\
+   d += m[0][3] * cofac;					\
+}\
+
+
+/** cofactor of matrix
+ *
+ * Computes cofactor of matrix m, returning a
+ */
+#define COFACTOR_2X2(a,m)					\
+{								\
+   a[0][0] = (m)[1][1];						\
+   a[0][1] = - (m)[1][0];						\
+   a[1][0] = - (m)[0][1];						\
+   a[1][1] = (m)[0][0];						\
+}\
+
+
+/** cofactor of matrix
+ *
+ * Computes cofactor of matrix m, returning a
+ */
+#define COFACTOR_3X3(a,m)					\
+{								\
+   a[0][0] = m[1][1]*m[2][2] - m[1][2]*m[2][1];			\
+   a[0][1] = - (m[1][0]*m[2][2] - m[2][0]*m[1][2]);		\
+   a[0][2] = m[1][0]*m[2][1] - m[1][1]*m[2][0];			\
+   a[1][0] = - (m[0][1]*m[2][2] - m[0][2]*m[2][1]);		\
+   a[1][1] = m[0][0]*m[2][2] - m[0][2]*m[2][0];			\
+   a[1][2] = - (m[0][0]*m[2][1] - m[0][1]*m[2][0]);		\
+   a[2][0] = m[0][1]*m[1][2] - m[0][2]*m[1][1];			\
+   a[2][1] = - (m[0][0]*m[1][2] - m[0][2]*m[1][0]);		\
+   a[2][2] = m[0][0]*m[1][1] - m[0][1]*m[1][0]);		\
+}\
+
+
+/** cofactor of matrix
+ *
+ * Computes cofactor of matrix m, returning a
+ */
+#define COFACTOR_4X4(a,m)					\
+{								\
+   int i,j;							\
+								\
+   for (i=0; i<4; i++) {					\
+      for (j=0; j<4; j++) {					\
+         COFACTOR_4X4_IJ (a[i][j], m, i, j);			\
+      }								\
+   }								\
+}\
+
+
+/** adjoint of matrix
+ *
+ * Computes adjoint of matrix m, returning a
+ * (Note that adjoint is just the transpose of the cofactor matrix)
+ */
+#define ADJOINT_2X2(a,m)					\
+{								\
+   a[0][0] = (m)[1][1];						\
+   a[1][0] = - (m)[1][0];						\
+   a[0][1] = - (m)[0][1];						\
+   a[1][1] = (m)[0][0];						\
+}\
+
+
+/** adjoint of matrix
+ *
+ * Computes adjoint of matrix m, returning a
+ * (Note that adjoint is just the transpose of the cofactor matrix)
+ */
+#define ADJOINT_3X3(a,m)					\
+{								\
+   a[0][0] = m[1][1]*m[2][2] - m[1][2]*m[2][1];			\
+   a[1][0] = - (m[1][0]*m[2][2] - m[2][0]*m[1][2]);		\
+   a[2][0] = m[1][0]*m[2][1] - m[1][1]*m[2][0];			\
+   a[0][1] = - (m[0][1]*m[2][2] - m[0][2]*m[2][1]);		\
+   a[1][1] = m[0][0]*m[2][2] - m[0][2]*m[2][0];			\
+   a[2][1] = - (m[0][0]*m[2][1] - m[0][1]*m[2][0]);		\
+   a[0][2] = m[0][1]*m[1][2] - m[0][2]*m[1][1];			\
+   a[1][2] = - (m[0][0]*m[1][2] - m[0][2]*m[1][0]);		\
+   a[2][2] = m[0][0]*m[1][1] - m[0][1]*m[1][0]);		\
+}\
+
+
+/** adjoint of matrix
+ *
+ * Computes adjoint of matrix m, returning a
+ * (Note that adjoint is just the transpose of the cofactor matrix)
+ */
+#define ADJOINT_4X4(a,m)					\
+{								\
+   char _i_,_j_;							\
+								\
+   for (_i_=0; _i_<4; _i_++) {					\
+      for (_j_=0; _j_<4; _j_++) {					\
+         COFACTOR_4X4_IJ (a[_j_][_i_], m, _i_, _j_);			\
+      }								\
+   }								\
+}\
+
+
+/** compute adjoint of matrix and scale
+ *
+ * Computes adjoint of matrix m, scales it by s, returning a
+ */
+#define SCALE_ADJOINT_2X2(a,s,m)				\
+{								\
+   a[0][0] = (s) * m[1][1];					\
+   a[1][0] = - (s) * m[1][0];					\
+   a[0][1] = - (s) * m[0][1];					\
+   a[1][1] = (s) * m[0][0];					\
+}\
+
+
+/** compute adjoint of matrix and scale
+ *
+ * Computes adjoint of matrix m, scales it by s, returning a
+ */
+#define SCALE_ADJOINT_3X3(a,s,m)				\
+{								\
+   a[0][0] = (s) * (m[1][1] * m[2][2] - m[1][2] * m[2][1]);	\
+   a[1][0] = (s) * (m[1][2] * m[2][0] - m[1][0] * m[2][2]);	\
+   a[2][0] = (s) * (m[1][0] * m[2][1] - m[1][1] * m[2][0]);	\
+								\
+   a[0][1] = (s) * (m[0][2] * m[2][1] - m[0][1] * m[2][2]);	\
+   a[1][1] = (s) * (m[0][0] * m[2][2] - m[0][2] * m[2][0]);	\
+   a[2][1] = (s) * (m[0][1] * m[2][0] - m[0][0] * m[2][1]);	\
+								\
+   a[0][2] = (s) * (m[0][1] * m[1][2] - m[0][2] * m[1][1]);	\
+   a[1][2] = (s) * (m[0][2] * m[1][0] - m[0][0] * m[1][2]);	\
+   a[2][2] = (s) * (m[0][0] * m[1][1] - m[0][1] * m[1][0]);	\
+}\
+
+
+/** compute adjoint of matrix and scale
+ *
+ * Computes adjoint of matrix m, scales it by s, returning a
+ */
+#define SCALE_ADJOINT_4X4(a,s,m)				\
+{								\
+   char _i_,_j_; \
+   for (_i_=0; _i_<4; _i_++) {					\
+      for (_j_=0; _j_<4; _j_++) {					\
+         COFACTOR_4X4_IJ (a[_j_][_i_], m, _i_, _j_);			\
+         a[_j_][_i_] *= s;						\
+      }								\
+   }								\
+}\
+
+/** inverse of matrix
+ *
+ * Compute inverse of matrix a, returning determinant m and
+ * inverse b
+ */
+#define INVERT_2X2(b,det,a)			\
+{						\
+   GREAL _tmp_;					\
+   DETERMINANT_2X2 (det, a);			\
+   _tmp_ = 1.0 / (det);				\
+   SCALE_ADJOINT_2X2 (b, _tmp_, a);		\
+}\
+
+
+/** inverse of matrix
+ *
+ * Compute inverse of matrix a, returning determinant m and
+ * inverse b
+ */
+#define INVERT_3X3(b,det,a)			\
+{						\
+   GREAL _tmp_;					\
+   DETERMINANT_3X3 (det, a);			\
+   _tmp_ = 1.0 / (det);				\
+   SCALE_ADJOINT_3X3 (b, _tmp_, a);		\
+}\
+
+
+/** inverse of matrix
+ *
+ * Compute inverse of matrix a, returning determinant m and
+ * inverse b
+ */
+#define INVERT_4X4(b,det,a)			\
+{						\
+   GREAL _tmp_;					\
+   DETERMINANT_4X4 (det, a);			\
+   _tmp_ = 1.0 / (det);				\
+   SCALE_ADJOINT_4X4 (b, _tmp_, a);		\
+}\
+
+//! Get the triple(3) row of a transform matrix
+#define MAT_GET_ROW(mat,vec3,rowindex)\
+{\
+    vec3[0] = mat[rowindex][0];\
+    vec3[1] = mat[rowindex][1];\
+    vec3[2] = mat[rowindex][2]; \
+}\
+
+//! Get the tuple(2) row of a transform matrix
+#define MAT_GET_ROW2(mat,vec2,rowindex)\
+{\
+    vec2[0] = mat[rowindex][0];\
+    vec2[1] = mat[rowindex][1];\
+}\
+
+
+//! Get the quad (4) row of a transform matrix
+#define MAT_GET_ROW4(mat,vec4,rowindex)\
+{\
+    vec4[0] = mat[rowindex][0];\
+    vec4[1] = mat[rowindex][1];\
+    vec4[2] = mat[rowindex][2];\
+    vec4[3] = mat[rowindex][3];\
+}\
+
+//! Get the triple(3) col of a transform matrix
+#define MAT_GET_COL(mat,vec3,colindex)\
+{\
+    vec3[0] = mat[0][colindex];\
+    vec3[1] = mat[1][colindex];\
+    vec3[2] = mat[2][colindex]; \
+}\
+
+//! Get the tuple(2) col of a transform matrix
+#define MAT_GET_COL2(mat,vec2,colindex)\
+{\
+    vec2[0] = mat[0][colindex];\
+    vec2[1] = mat[1][colindex];\
+}\
+
+
+//! Get the quad (4) col of a transform matrix
+#define MAT_GET_COL4(mat,vec4,colindex)\
+{\
+    vec4[0] = mat[0][colindex];\
+    vec4[1] = mat[1][colindex];\
+    vec4[2] = mat[2][colindex];\
+    vec4[3] = mat[3][colindex];\
+}\
+
+//! Get the triple(3) col of a transform matrix
+#define MAT_GET_X(mat,vec3)\
+{\
+    MAT_GET_COL(mat,vec3,0);\
+}\
+
+//! Get the triple(3) col of a transform matrix
+#define MAT_GET_Y(mat,vec3)\
+{\
+    MAT_GET_COL(mat,vec3,1);\
+}\
+
+//! Get the triple(3) col of a transform matrix
+#define MAT_GET_Z(mat,vec3)\
+{\
+    MAT_GET_COL(mat,vec3,2);\
+}\
+
+
+//! Get the triple(3) col of a transform matrix
+#define MAT_SET_X(mat,vec3)\
+{\
+    mat[0][0] = vec3[0];\
+    mat[1][0] = vec3[1];\
+    mat[2][0] = vec3[2];\
+}\
+
+//! Get the triple(3) col of a transform matrix
+#define MAT_SET_Y(mat,vec3)\
+{\
+    mat[0][1] = vec3[0];\
+    mat[1][1] = vec3[1];\
+    mat[2][1] = vec3[2];\
+}\
+
+//! Get the triple(3) col of a transform matrix
+#define MAT_SET_Z(mat,vec3)\
+{\
+    mat[0][2] = vec3[0];\
+    mat[1][2] = vec3[1];\
+    mat[2][2] = vec3[2];\
+}\
+
+
+//! Get the triple(3) col of a transform matrix
+#define MAT_GET_TRANSLATION(mat,vec3)\
+{\
+    vec3[0] = mat[0][3];\
+    vec3[1] = mat[1][3];\
+    vec3[2] = mat[2][3]; \
+}\
+
+//! Set the triple(3) col of a transform matrix
+#define MAT_SET_TRANSLATION(mat,vec3)\
+{\
+    mat[0][3] = vec3[0];\
+    mat[1][3] = vec3[1];\
+    mat[2][3] = vec3[2]; \
+}\
+
+
+
+//! Returns the dot product between a vec3f and the row of a matrix
+#define MAT_DOT_ROW(mat,vec3,rowindex) (vec3[0]*mat[rowindex][0] + vec3[1]*mat[rowindex][1] + vec3[2]*mat[rowindex][2])
+
+//! Returns the dot product between a vec2f and the row of a matrix
+#define MAT_DOT_ROW2(mat,vec2,rowindex) (vec2[0]*mat[rowindex][0] + vec2[1]*mat[rowindex][1])
+
+//! Returns the dot product between a vec4f and the row of a matrix
+#define MAT_DOT_ROW4(mat,vec4,rowindex) (vec4[0]*mat[rowindex][0] + vec4[1]*mat[rowindex][1] + vec4[2]*mat[rowindex][2] + vec4[3]*mat[rowindex][3])
+
+
+//! Returns the dot product between a vec3f and the col of a matrix
+#define MAT_DOT_COL(mat,vec3,colindex) (vec3[0]*mat[0][colindex] + vec3[1]*mat[1][colindex] + vec3[2]*mat[2][colindex])
+
+//! Returns the dot product between a vec2f and the col of a matrix
+#define MAT_DOT_COL2(mat,vec2,colindex) (vec2[0]*mat[0][colindex] + vec2[1]*mat[1][colindex])
+
+//! Returns the dot product between a vec4f and the col of a matrix
+#define MAT_DOT_COL4(mat,vec4,colindex) (vec4[0]*mat[0][colindex] + vec4[1]*mat[1][colindex] + vec4[2]*mat[2][colindex] + vec4[3]*mat[3][colindex])
+
+/*!Transpose matrix times vector
+v is a vec3f
+and m is a mat4f<br>
+*/
+#define INV_MAT_DOT_VEC_3X3(p,m,v)					\
+{								\
+   p[0] = MAT_DOT_COL(m,v,0); \
+   p[1] = MAT_DOT_COL(m,v,1);	\
+   p[2] = MAT_DOT_COL(m,v,2);	\
+}\
+
+
+
+#endif // GIM_VECTOR_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_math.h b/src/bullet/BulletCollision/Gimpact/gim_math.h
new file mode 100644
index 00000000..939079e1
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_math.h
@@ -0,0 +1,157 @@
+#ifndef GIM_MATH_H_INCLUDED
+#define GIM_MATH_H_INCLUDED
+/*! \file gim_math.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+#include "LinearMath/btScalar.h"
+
+
+
+#define GREAL btScalar
+#define GREAL2 double
+#define GINT int
+#define GUINT unsigned int
+#define GSHORT short
+#define GUSHORT unsigned short
+#define GINT64 long long
+#define GUINT64 unsigned long long
+
+
+
+#define G_PI 3.14159265358979f
+#define G_HALF_PI 1.5707963f
+//267948966
+#define G_TWO_PI 6.28318530f
+//71795864
+#define G_ROOT3 1.73205f
+#define G_ROOT2 1.41421f
+#define G_UINT_INFINITY 0xffffffff //!< A very very high value
+#define G_REAL_INFINITY FLT_MAX
+#define	G_SIGN_BITMASK			0x80000000
+#define G_EPSILON SIMD_EPSILON
+
+
+
+enum GIM_SCALAR_TYPES
+{
+	G_STYPE_REAL =0,
+	G_STYPE_REAL2,
+	G_STYPE_SHORT,
+	G_STYPE_USHORT,
+	G_STYPE_INT,
+	G_STYPE_UINT,
+	G_STYPE_INT64,
+	G_STYPE_UINT64
+};
+
+
+
+#define G_DEGTORAD(X) ((X)*3.1415926f/180.0f)
+#define G_RADTODEG(X) ((X)*180.0f/3.1415926f)
+
+//! Integer representation of a floating-point value.
+#define GIM_IR(x)					((GUINT&)(x))
+
+//! Signed integer representation of a floating-point value.
+#define GIM_SIR(x)					((GINT&)(x))
+
+//! Absolute integer representation of a floating-point value
+#define GIM_AIR(x)					(GIM_IR(x)&0x7fffffff)
+
+//! Floating-point representation of an integer value.
+#define GIM_FR(x)					((GREAL&)(x))
+
+#define GIM_MAX(a,b) (a<b?b:a)
+#define GIM_MIN(a,b) (a>b?b:a)
+
+#define GIM_MAX3(a,b,c) GIM_MAX(a,GIM_MAX(b,c))
+#define GIM_MIN3(a,b,c) GIM_MIN(a,GIM_MIN(b,c))
+
+#define GIM_IS_ZERO(value) (value < G_EPSILON &&  value > -G_EPSILON)
+
+#define GIM_IS_NEGATIVE(value) (value <= -G_EPSILON)
+
+#define GIM_IS_POSISITVE(value) (value >= G_EPSILON)
+
+#define GIM_NEAR_EQUAL(v1,v2) GIM_IS_ZERO((v1-v2))
+
+///returns a clamped number
+#define GIM_CLAMP(number,minval,maxval) (number<minval?minval:(number>maxval?maxval:number))
+
+#define GIM_GREATER(x, y)	btFabs(x) > (y)
+
+///Swap numbers
+#define GIM_SWAP_NUMBERS(a,b){ \
+    a = a+b; \
+    b = a-b; \
+    a = a-b; \
+}\
+
+#define GIM_INV_SQRT(va,isva)\
+{\
+    if(va<=0.0000001f)\
+    {\
+        isva = G_REAL_INFINITY;\
+    }\
+    else\
+    {\
+        GREAL _x = va * 0.5f;\
+        GUINT _y = 0x5f3759df - ( GIM_IR(va) >> 1);\
+        isva = GIM_FR(_y);\
+        isva  = isva * ( 1.5f - ( _x * isva * isva ) );\
+    }\
+}\
+
+#define GIM_SQRT(va,sva)\
+{\
+    GIM_INV_SQRT(va,sva);\
+    sva = 1.0f/sva;\
+}\
+
+//! Computes 1.0f / sqrtf(x). Comes from Quake3. See http://www.magic-software.com/3DGEDInvSqrt.html
+inline GREAL gim_inv_sqrt(GREAL f)
+{
+    GREAL r;
+    GIM_INV_SQRT(f,r);
+    return r;
+}
+
+inline GREAL gim_sqrt(GREAL f)
+{
+    GREAL r;
+    GIM_SQRT(f,r);
+    return r;
+}
+
+
+
+#endif // GIM_MATH_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_memory.cpp b/src/bullet/BulletCollision/Gimpact/gim_memory.cpp
new file mode 100644
index 00000000..1636eb78
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_memory.cpp
@@ -0,0 +1,135 @@
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+
+#include "gim_memory.h"
+#include "stdlib.h"
+
+#ifdef GIM_SIMD_MEMORY
+#include "LinearMath/btAlignedAllocator.h"
+#endif
+
+static gim_alloc_function *g_allocfn = 0;
+static gim_alloca_function *g_allocafn = 0;
+static gim_realloc_function *g_reallocfn = 0;
+static gim_free_function *g_freefn = 0;
+
+void gim_set_alloc_handler (gim_alloc_function *fn)
+{
+  g_allocfn = fn;
+}
+
+void gim_set_alloca_handler (gim_alloca_function *fn)
+{
+  g_allocafn = fn;
+}
+
+void gim_set_realloc_handler (gim_realloc_function *fn)
+{
+  g_reallocfn = fn;
+}
+
+void gim_set_free_handler (gim_free_function *fn)
+{
+  g_freefn = fn;
+}
+
+gim_alloc_function *gim_get_alloc_handler()
+{
+  return g_allocfn;
+}
+
+gim_alloca_function *gim_get_alloca_handler()
+{
+  return g_allocafn;
+}
+
+
+gim_realloc_function *gim_get_realloc_handler ()
+{
+  return g_reallocfn;
+}
+
+
+gim_free_function  *gim_get_free_handler ()
+{
+  return g_freefn;
+}
+
+
+void * gim_alloc(size_t size)
+{
+	void * ptr;
+	if (g_allocfn)
+	{
+		ptr = g_allocfn(size);
+	}
+	else
+	{
+#ifdef GIM_SIMD_MEMORY
+		ptr = btAlignedAlloc(size,16);
+#else
+		ptr = malloc(size);
+#endif
+	}
+  	return ptr;
+}
+
+void * gim_alloca(size_t size)
+{
+  if (g_allocafn) return g_allocafn(size); else return gim_alloc(size);
+}
+
+
+void * gim_realloc(void *ptr, size_t oldsize, size_t newsize)
+{
+ 	void * newptr = gim_alloc(newsize);
+    size_t copysize = oldsize<newsize?oldsize:newsize;
+    gim_simd_memcpy(newptr,ptr,copysize);
+    gim_free(ptr);
+    return newptr;
+}
+
+void gim_free(void *ptr)
+{
+	if (!ptr) return;
+	if (g_freefn)
+	{
+	   g_freefn(ptr);
+	}
+	else
+	{
+	#ifdef GIM_SIMD_MEMORY
+		btAlignedFree(ptr);
+	#else
+		free(ptr);
+	#endif
+	}
+}
+
diff --git a/src/bullet/BulletCollision/Gimpact/gim_memory.h b/src/bullet/BulletCollision/Gimpact/gim_memory.h
new file mode 100644
index 00000000..e203888a
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_memory.h
@@ -0,0 +1,190 @@
+#ifndef GIM_MEMORY_H_INCLUDED
+#define GIM_MEMORY_H_INCLUDED
+/*! \file gim_memory.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+
+#include "gim_math.h"
+#include <string.h>
+
+#ifdef PREFETCH
+#include <xmmintrin.h>	// for prefetch
+#define pfval	64
+#define pfval2	128
+//! Prefetch 64
+#define pf(_x,_i)	_mm_prefetch((void *)(_x + _i + pfval), 0)
+//! Prefetch 128
+#define pf2(_x,_i)	_mm_prefetch((void *)(_x + _i + pfval2), 0)
+#else
+//! Prefetch 64
+#define pf(_x,_i)
+//! Prefetch 128
+#define pf2(_x,_i)
+#endif
+
+
+///Functions for manip packed arrays of numbers
+#define GIM_COPY_ARRAYS(dest_array,source_array,element_count)\
+{\
+    for (GUINT _i_=0;_i_<element_count ;++_i_)\
+    {\
+    	dest_array[_i_] = source_array[_i_];\
+    }\
+}\
+
+#define GIM_COPY_ARRAYS_1(dest_array,source_array,element_count,copy_macro)\
+{\
+    for (GUINT _i_=0;_i_<element_count ;++_i_)\
+    {\
+    	copy_macro(dest_array[_i_],source_array[_i_]);\
+    }\
+}\
+
+
+#define GIM_ZERO_ARRAY(array,element_count)\
+{\
+    for (GUINT _i_=0;_i_<element_count ;++_i_)\
+    {\
+    	array[_i_] = 0;\
+    }\
+}\
+
+#define GIM_CONSTANT_ARRAY(array,element_count,constant)\
+{\
+    for (GUINT _i_=0;_i_<element_count ;++_i_)\
+    {\
+    	array[_i_] = constant;\
+    }\
+}\
+
+
+///Function prototypes to allocate and free memory.
+typedef void * gim_alloc_function (size_t size);
+typedef void * gim_alloca_function (size_t size);//Allocs on the heap
+typedef void * gim_realloc_function (void *ptr, size_t oldsize, size_t newsize);
+typedef void gim_free_function (void *ptr);
+
+
+///Memory Function Handlers
+///set new memory management functions. if fn is 0, the default handlers are used.
+void gim_set_alloc_handler (gim_alloc_function *fn);
+void gim_set_alloca_handler (gim_alloca_function *fn);
+void gim_set_realloc_handler (gim_realloc_function *fn);
+void gim_set_free_handler (gim_free_function *fn);
+
+
+///get current memory management functions.
+gim_alloc_function *gim_get_alloc_handler (void);
+gim_alloca_function *gim_get_alloca_handler(void);
+gim_realloc_function *gim_get_realloc_handler (void);
+gim_free_function  *gim_get_free_handler (void);
+
+
+///Standar Memory functions
+void * gim_alloc(size_t size);
+void * gim_alloca(size_t size);
+void * gim_realloc(void *ptr, size_t oldsize, size_t newsize);
+void gim_free(void *ptr);
+
+
+
+#if defined (_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
+    #define GIM_SIMD_MEMORY 1
+#endif
+
+//! SIMD POINTER INTEGER
+#define SIMD_T GUINT64
+//! SIMD INTEGER SIZE
+#define SIMD_T_SIZE sizeof(SIMD_T)
+
+
+inline void gim_simd_memcpy(void * dst, const void * src, size_t copysize)
+{
+#ifdef GIM_SIMD_MEMORY
+/*
+//'long long int' is incompatible with visual studio 6...
+    //copy words
+    SIMD_T * ui_src_ptr = (SIMD_T *)src;
+    SIMD_T * ui_dst_ptr = (SIMD_T *)dst;
+    while(copysize>=SIMD_T_SIZE)
+    {
+        *(ui_dst_ptr++) = *(ui_src_ptr++);
+        copysize-=SIMD_T_SIZE;
+    }
+    if(copysize==0) return;
+*/
+
+    char * c_src_ptr = (char *)src;
+    char * c_dst_ptr = (char *)dst;
+    while(copysize>0)
+    {
+        *(c_dst_ptr++) = *(c_src_ptr++);
+        copysize--;
+    }
+    return;
+#else
+    memcpy(dst,src,copysize);
+#endif
+}
+
+
+
+template<class T>
+inline void gim_swap_elements(T* _array,size_t _i,size_t _j)
+{
+	T _e_tmp_ = _array[_i];
+	_array[_i] = _array[_j];
+	_array[_j] = _e_tmp_;
+}
+
+
+template<class T>
+inline void gim_swap_elements_memcpy(T* _array,size_t _i,size_t _j)
+{
+	char _e_tmp_[sizeof(T)];
+	gim_simd_memcpy(_e_tmp_,&_array[_i],sizeof(T));
+	gim_simd_memcpy(&_array[_i],&_array[_j],sizeof(T));
+	gim_simd_memcpy(&_array[_j],_e_tmp_,sizeof(T));
+}
+
+template <int SIZE>
+inline void gim_swap_elements_ptr(char * _array,size_t _i,size_t _j)
+{
+	char _e_tmp_[SIZE];
+	_i*=SIZE;
+	_j*=SIZE;
+	gim_simd_memcpy(_e_tmp_,_array+_i,SIZE);
+	gim_simd_memcpy(_array+_i,_array+_j,SIZE);
+	gim_simd_memcpy(_array+_j,_e_tmp_,SIZE);
+}
+
+#endif // GIM_MEMORY_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_radixsort.h b/src/bullet/BulletCollision/Gimpact/gim_radixsort.h
new file mode 100644
index 00000000..c246ef12
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_radixsort.h
@@ -0,0 +1,406 @@
+#ifndef GIM_RADIXSORT_H_INCLUDED
+#define GIM_RADIXSORT_H_INCLUDED
+/*! \file gim_radixsort.h
+\author Francisco Leon Najera.
+Based on the work of Michael Herf : "fast floating-point radix sort"
+Avaliable on http://www.stereopsis.com/radix.html
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+#include "gim_memory.h"
+
+///Macros for sorting.
+//! Prototype for comparators
+class less_comparator
+{
+	public:
+
+	template<class T,class Z>
+	inline int operator() ( const T& a, const Z& b )
+	{
+		return ( a<b?-1:(a>b?1:0));
+	}
+};
+
+//! Prototype for comparators
+class integer_comparator
+{
+	public:
+
+	template<class T>
+	inline int operator() ( const T& a, const T& b )
+	{
+		return (int)(a-b);
+	}
+};
+
+//!Prototype for getting the integer representation of an object
+class uint_key_func
+{
+public:
+	template<class T>
+	inline GUINT operator()( const T& a)
+	{
+		return (GUINT)a;
+	}
+};
+
+
+//!Prototype for copying elements
+class copy_elements_func
+{
+public:
+	template<class T>
+	inline void operator()(T& a,T& b)
+	{
+		a = b;
+	}
+};
+
+//!Prototype for copying elements
+class memcopy_elements_func
+{
+public:
+	template<class T>
+	inline void operator()(T& a,T& b)
+	{
+		gim_simd_memcpy(&a,&b,sizeof(T));
+	}
+};
+
+
+//! @{
+struct GIM_RSORT_TOKEN
+{
+    GUINT m_key;
+    GUINT m_value;
+    GIM_RSORT_TOKEN()
+    {
+    }
+    GIM_RSORT_TOKEN(const GIM_RSORT_TOKEN& rtoken)
+    {
+    	m_key = rtoken.m_key;
+    	m_value = rtoken.m_value;
+    }
+
+    inline bool operator <(const GIM_RSORT_TOKEN& other) const
+	{
+		return (m_key < other.m_key);
+	}
+
+	inline bool operator >(const GIM_RSORT_TOKEN& other) const
+	{
+		return (m_key > other.m_key);
+	}
+};
+
+//! Prototype for comparators
+class GIM_RSORT_TOKEN_COMPARATOR
+{
+	public:
+
+	inline int operator()( const GIM_RSORT_TOKEN& a, const GIM_RSORT_TOKEN& b )
+	{
+		return (int)((a.m_key) - (b.m_key));
+	}
+};
+
+
+
+#define kHist 2048
+// ---- utils for accessing 11-bit quantities
+#define D11_0(x)	(x & 0x7FF)
+#define D11_1(x)	(x >> 11 & 0x7FF)
+#define D11_2(x)	(x >> 22 )
+
+
+
+///Radix sort for unsigned integer keys
+inline void gim_radix_sort_rtokens(
+				GIM_RSORT_TOKEN * array,
+				GIM_RSORT_TOKEN * sorted, GUINT element_count)
+{
+	GUINT i;
+	GUINT b0[kHist * 3];
+	GUINT *b1 = b0 + kHist;
+	GUINT *b2 = b1 + kHist;
+	for (i = 0; i < kHist * 3; ++i)
+	{
+		b0[i] = 0;
+	}
+	GUINT fi;
+	GUINT pos;
+	for (i = 0; i < element_count; ++i)
+	{
+	    fi = array[i].m_key;
+		b0[D11_0(fi)] ++;
+		b1[D11_1(fi)] ++;
+		b2[D11_2(fi)] ++;
+	}
+	{
+		GUINT sum0 = 0, sum1 = 0, sum2 = 0;
+		GUINT tsum;
+		for (i = 0; i < kHist; ++i)
+		{
+			tsum = b0[i] + sum0;
+			b0[i] = sum0 - 1;
+			sum0 = tsum;
+			tsum = b1[i] + sum1;
+			b1[i] = sum1 - 1;
+			sum1 = tsum;
+			tsum = b2[i] + sum2;
+			b2[i] = sum2 - 1;
+			sum2 = tsum;
+		}
+	}
+	for (i = 0; i < element_count; ++i)
+	{
+        fi = array[i].m_key;
+		pos = D11_0(fi);
+		pos = ++b0[pos];
+		sorted[pos].m_key = array[i].m_key;
+		sorted[pos].m_value = array[i].m_value;
+	}
+	for (i = 0; i < element_count; ++i)
+	{
+        fi = sorted[i].m_key;
+		pos = D11_1(fi);
+		pos = ++b1[pos];
+		array[pos].m_key = sorted[i].m_key;
+		array[pos].m_value = sorted[i].m_value;
+	}
+	for (i = 0; i < element_count; ++i)
+	{
+        fi = array[i].m_key;
+		pos = D11_2(fi);
+		pos = ++b2[pos];
+		sorted[pos].m_key = array[i].m_key;
+		sorted[pos].m_value = array[i].m_value;
+	}
+}
+
+
+
+
+/// Get the sorted tokens from an array. For generic use. Tokens are IRR_RSORT_TOKEN
+/*!
+*\param array Array of elements to sort
+*\param sorted_tokens Tokens of sorted elements
+*\param element_count element count
+*\param uintkey_macro Functor which retrieves the integer representation of an array element
+*/
+template<typename T, class GETKEY_CLASS>
+void gim_radix_sort_array_tokens(
+			T* array ,
+			GIM_RSORT_TOKEN * sorted_tokens,
+			GUINT element_count,GETKEY_CLASS uintkey_macro)
+{
+	GIM_RSORT_TOKEN * _unsorted = (GIM_RSORT_TOKEN *) gim_alloc(sizeof(GIM_RSORT_TOKEN)*element_count);
+    for (GUINT _i=0;_i<element_count;++_i)
+    {
+        _unsorted[_i].m_key = uintkey_macro(array[_i]);
+        _unsorted[_i].m_value = _i;
+    }
+    gim_radix_sort_rtokens(_unsorted,sorted_tokens,element_count);
+    gim_free(_unsorted);
+    gim_free(_unsorted);
+}
+
+/// Sorts array in place. For generic use
+/*!
+\param type Type of the array
+\param array
+\param element_count
+\param get_uintkey_macro Macro for extract the Integer value of the element. Similar to SIMPLE_GET_UINTKEY
+\param copy_elements_macro Macro for copy elements, similar to SIMPLE_COPY_ELEMENTS
+*/
+template<typename T, class GETKEY_CLASS, class COPY_CLASS>
+void gim_radix_sort(
+	T * array, GUINT element_count,
+	GETKEY_CLASS get_uintkey_macro, COPY_CLASS copy_elements_macro)
+{
+	GIM_RSORT_TOKEN * _sorted = (GIM_RSORT_TOKEN  *) gim_alloc(sizeof(GIM_RSORT_TOKEN)*element_count);
+    gim_radix_sort_array_tokens(array,_sorted,element_count,get_uintkey_macro);
+    T * _original_array = (T *) gim_alloc(sizeof(T)*element_count);
+    gim_simd_memcpy(_original_array,array,sizeof(T)*element_count);
+    for (GUINT _i=0;_i<element_count;++_i)
+    {
+        copy_elements_macro(array[_i],_original_array[_sorted[_i].m_value]);
+    }
+    gim_free(_original_array);
+    gim_free(_sorted);
+}
+
+//! Failsafe Iterative binary search,
+/*!
+If the element is not found, it returns the nearest upper element position, may be the further position after the last element.
+\param _array
+\param _start_i the beginning of the array
+\param _end_i the ending  index of the array
+\param _search_key Value to find
+\param _comp_macro macro for comparing elements
+\param _found If true the value has found. Boolean
+\param _result_index the index of the found element, or if not found then it will get the index of the  closest bigger value
+*/
+template<class T, typename KEYCLASS, typename COMP_CLASS>
+bool  gim_binary_search_ex(
+		const T* _array, GUINT _start_i,
+		GUINT _end_i,GUINT & _result_index,
+		const KEYCLASS & _search_key,
+		COMP_CLASS _comp_macro)
+{
+	GUINT _k;
+	int _comp_result;
+	GUINT _i = _start_i;
+	GUINT _j = _end_i+1;
+	while (_i < _j)
+	{
+		_k = (_j+_i-1)/2;
+		_comp_result = _comp_macro(_array[_k], _search_key);
+		if (_comp_result == 0)
+		{
+			_result_index = _k;
+			return true;
+		}
+		else if (_comp_result < 0)
+		{
+			_i = _k+1;
+		}
+		else
+		{
+			_j = _k;
+		}
+	}
+	_result_index = _i;
+	return false;
+}
+
+
+
+//! Failsafe Iterative binary search,Template version
+/*!
+If the element is not found, it returns the nearest upper element position, may be the further position after the last element.
+\param _array
+\param _start_i the beginning of the array
+\param _end_i the ending  index of the array
+\param _search_key Value to find
+\param _result_index the index of the found element, or if not found then it will get the index of the  closest bigger value
+\return true if found, else false
+*/
+template<class T>
+bool gim_binary_search(
+	const T*_array,GUINT _start_i,
+	GUINT _end_i,const T & _search_key,
+	GUINT & _result_index)
+{
+	GUINT _i = _start_i;
+	GUINT _j = _end_i+1;
+	GUINT _k;
+	while(_i < _j)
+	{
+		_k = (_j+_i-1)/2;
+		if(_array[_k]==_search_key)
+		{
+			_result_index = _k;
+			return true;
+		}
+		else if (_array[_k]<_search_key)
+		{
+			_i = _k+1;
+		}
+		else
+		{
+			_j = _k;
+		}
+	}
+	_result_index = _i;
+	return false;
+}
+
+
+
+///heap sort from http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Sort/Heap/
+template <typename T, typename COMP_CLASS>
+void gim_down_heap(T *pArr, GUINT k, GUINT n,COMP_CLASS CompareFunc)
+{
+	/*  PRE: a[k+1..N] is a heap */
+	/* POST:  a[k..N]  is a heap */
+
+	T temp = pArr[k - 1];
+	/* k has child(s) */
+	while (k <= n/2)
+	{
+		int child = 2*k;
+
+		if ((child < (int)n) && CompareFunc(pArr[child - 1] , pArr[child])<0)
+		{
+			child++;
+		}
+		/* pick larger child */
+		if (CompareFunc(temp , pArr[child - 1])<0)
+		{
+			/* move child up */
+			pArr[k - 1] = pArr[child - 1];
+			k = child;
+		}
+		else
+		{
+			break;
+		}
+	}
+	pArr[k - 1] = temp;
+} /*downHeap*/
+
+
+template <typename T, typename COMP_CLASS>
+void gim_heap_sort(T *pArr, GUINT element_count, COMP_CLASS CompareFunc)
+{
+	/* sort a[0..N-1],  N.B. 0 to N-1 */
+	GUINT k;
+	GUINT n = element_count;
+	for (k = n/2; k > 0; k--)
+	{
+		gim_down_heap(pArr, k, n, CompareFunc);
+	}
+
+	/* a[1..N] is now a heap */
+	while ( n>=2 )
+	{
+		gim_swap_elements(pArr,0,n-1); /* largest of a[0..n-1] */
+		--n;
+		/* restore a[1..i-1] heap */
+		gim_down_heap(pArr, 1, n, CompareFunc);
+	}
+}
+
+
+
+
+#endif // GIM_RADIXSORT_H_INCLUDED
diff --git a/src/bullet/BulletCollision/Gimpact/gim_tri_collision.cpp b/src/bullet/BulletCollision/Gimpact/gim_tri_collision.cpp
new file mode 100644
index 00000000..f9727e1d
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_tri_collision.cpp
@@ -0,0 +1,640 @@
+
+/*! \file gim_tri_collision.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+#include "gim_tri_collision.h"
+
+
+#define TRI_LOCAL_EPSILON 0.000001f
+#define MIN_EDGE_EDGE_DIS 0.00001f
+
+
+class GIM_TRIANGLE_CALCULATION_CACHE
+{
+public:
+	GREAL margin;	
+	btVector3 tu_vertices[3];
+	btVector3 tv_vertices[3];
+	btVector4 tu_plane;
+	btVector4 tv_plane;
+	btVector3 closest_point_u;
+	btVector3 closest_point_v;
+	btVector3 edge_edge_dir;
+	btVector3 distances;
+	GREAL du[4];
+	GREAL du0du1;
+	GREAL du0du2;
+	GREAL dv[4];
+	GREAL dv0dv1;
+	GREAL dv0dv2;	
+	btVector3 temp_points[MAX_TRI_CLIPPING];
+	btVector3 temp_points1[MAX_TRI_CLIPPING];
+	btVector3 contact_points[MAX_TRI_CLIPPING];
+	
+
+
+	//! if returns false, the faces are paralele
+	SIMD_FORCE_INLINE bool compute_intervals(
+					const GREAL &D0,
+					const GREAL &D1,
+					const GREAL &D2,
+					const GREAL &D0D1,
+					const GREAL &D0D2,
+					GREAL & scale_edge0,
+					GREAL & scale_edge1,
+					GUINT &edge_index0,
+					GUINT &edge_index1)
+	{
+		if(D0D1>0.0f)
+		{
+			/* here we know that D0D2<=0.0 */
+			/* that is D0, D1 are on the same side, D2 on the other or on the plane */
+			scale_edge0 = -D2/(D0-D2);
+			scale_edge1 = -D1/(D2-D1);
+			edge_index0 = 2;edge_index1 = 1;
+		}
+		else if(D0D2>0.0f)
+		{
+			/* here we know that d0d1<=0.0 */
+			scale_edge0 = -D0/(D1-D0);
+			scale_edge1 = -D1/(D2-D1);
+			edge_index0 = 0;edge_index1 = 1;
+		}
+		else if(D1*D2>0.0f || D0!=0.0f)
+		{
+			/* here we know that d0d1<=0.0 or that D0!=0.0 */
+			scale_edge0 = -D0/(D1-D0);
+			scale_edge1 = -D2/(D0-D2);
+			edge_index0 = 0 ;edge_index1 = 2;
+		}
+		else
+		{
+			return false;
+		}
+		return true;
+	}
+
+
+	//! clip triangle
+	/*!
+	*/
+	SIMD_FORCE_INLINE GUINT clip_triangle(
+		const btVector4 & tri_plane,
+		const btVector3 * tripoints,
+		const btVector3 * srcpoints,
+		btVector3 * clip_points)
+	{
+		// edge 0
+
+		btVector4 edgeplane;
+
+		EDGE_PLANE(tripoints[0],tripoints[1],tri_plane,edgeplane);
+
+		GUINT clipped_count = PLANE_CLIP_TRIANGLE3D(
+			edgeplane,srcpoints[0],srcpoints[1],srcpoints[2],temp_points);
+
+		if(clipped_count == 0) return 0;
+
+		// edge 1
+
+		EDGE_PLANE(tripoints[1],tripoints[2],tri_plane,edgeplane);
+
+		clipped_count = PLANE_CLIP_POLYGON3D(
+			edgeplane,temp_points,clipped_count,temp_points1);
+
+		if(clipped_count == 0) return 0;
+
+		// edge 2
+
+		EDGE_PLANE(tripoints[2],tripoints[0],tri_plane,edgeplane);
+
+		clipped_count = PLANE_CLIP_POLYGON3D(
+			edgeplane,temp_points1,clipped_count,clip_points);
+
+		return clipped_count;
+
+
+		/*GUINT i0 = (tri_plane.closestAxis()+1)%3;
+		GUINT i1 = (i0+1)%3;
+		// edge 0
+		btVector3 temp_points[MAX_TRI_CLIPPING];
+		btVector3 temp_points1[MAX_TRI_CLIPPING];
+
+		GUINT clipped_count= PLANE_CLIP_TRIANGLE_GENERIC(
+			0,srcpoints[0],srcpoints[1],srcpoints[2],temp_points,
+			DISTANCE_EDGE(tripoints[0],tripoints[1],i0,i1));
+		
+		
+		if(clipped_count == 0) return 0;
+
+		// edge 1
+		clipped_count = PLANE_CLIP_POLYGON_GENERIC(
+			0,temp_points,clipped_count,temp_points1,
+			DISTANCE_EDGE(tripoints[1],tripoints[2],i0,i1));
+
+		if(clipped_count == 0) return 0;
+
+		// edge 2
+		clipped_count = PLANE_CLIP_POLYGON_GENERIC(
+			0,temp_points1,clipped_count,clipped_points,
+			DISTANCE_EDGE(tripoints[2],tripoints[0],i0,i1));
+
+		return clipped_count;*/
+	}
+
+	SIMD_FORCE_INLINE void sort_isect(
+		GREAL & isect0,GREAL & isect1,GUINT &e0,GUINT &e1,btVector3 & vec0,btVector3 & vec1)
+	{
+		if(isect1<isect0)
+		{
+			//swap
+			GIM_SWAP_NUMBERS(isect0,isect1);
+			GIM_SWAP_NUMBERS(e0,e1);
+			btVector3 tmp = vec0;
+			vec0 = vec1;
+			vec1 = tmp;
+		}
+	}
+
+	//! Test verifying interval intersection with the direction between planes
+	/*!
+	\pre tv_plane and tu_plane must be set
+	\post
+	distances[2] is set with the distance
+	closest_point_u, closest_point_v, edge_edge_dir are set too
+	\return
+	- 0: faces are paralele
+	- 1: face U casts face V
+	- 2: face V casts face U
+	- 3: nearest edges
+	*/
+	SIMD_FORCE_INLINE GUINT cross_line_intersection_test()
+	{
+		// Compute direction of intersection line
+		edge_edge_dir = tu_plane.cross(tv_plane);
+		GREAL Dlen;
+		VEC_LENGTH(edge_edge_dir,Dlen);
+
+		if(Dlen<0.0001)
+		{
+			return 0; //faces near paralele
+		}
+
+		edge_edge_dir*= 1/Dlen;//normalize
+
+
+		// Compute interval for triangle 1
+		GUINT tu_e0,tu_e1;//edge indices
+		GREAL tu_scale_e0,tu_scale_e1;//edge scale
+		if(!compute_intervals(du[0],du[1],du[2],
+			du0du1,du0du2,tu_scale_e0,tu_scale_e1,tu_e0,tu_e1)) return 0;
+
+		// Compute interval for triangle 2
+		GUINT tv_e0,tv_e1;//edge indices
+		GREAL tv_scale_e0,tv_scale_e1;//edge scale
+
+		if(!compute_intervals(dv[0],dv[1],dv[2],
+			dv0dv1,dv0dv2,tv_scale_e0,tv_scale_e1,tv_e0,tv_e1)) return 0;
+
+		//proyected vertices
+		btVector3 up_e0 = tu_vertices[tu_e0].lerp(tu_vertices[(tu_e0+1)%3],tu_scale_e0);
+		btVector3 up_e1 = tu_vertices[tu_e1].lerp(tu_vertices[(tu_e1+1)%3],tu_scale_e1);
+
+		btVector3 vp_e0 = tv_vertices[tv_e0].lerp(tv_vertices[(tv_e0+1)%3],tv_scale_e0);
+		btVector3 vp_e1 = tv_vertices[tv_e1].lerp(tv_vertices[(tv_e1+1)%3],tv_scale_e1);
+
+		//proyected intervals
+		GREAL isect_u[] = {up_e0.dot(edge_edge_dir),up_e1.dot(edge_edge_dir)};
+		GREAL isect_v[] = {vp_e0.dot(edge_edge_dir),vp_e1.dot(edge_edge_dir)};
+
+		sort_isect(isect_u[0],isect_u[1],tu_e0,tu_e1,up_e0,up_e1);
+		sort_isect(isect_v[0],isect_v[1],tv_e0,tv_e1,vp_e0,vp_e1);
+
+		const GREAL midpoint_u = 0.5f*(isect_u[0]+isect_u[1]); // midpoint
+		const GREAL midpoint_v = 0.5f*(isect_v[0]+isect_v[1]); // midpoint
+
+		if(midpoint_u<midpoint_v)
+		{
+			if(isect_u[1]>=isect_v[1]) // face U casts face V
+			{
+				return 1;
+			}
+			else if(isect_v[0]<=isect_u[0]) // face V casts face U
+			{
+				return 2;
+			}
+			// closest points
+			closest_point_u = up_e1;
+			closest_point_v = vp_e0;
+			// calc edges and separation
+
+			if(isect_u[1]+ MIN_EDGE_EDGE_DIS<isect_v[0]) //calc distance between two lines instead
+			{
+				SEGMENT_COLLISION(
+					tu_vertices[tu_e1],tu_vertices[(tu_e1+1)%3],
+					tv_vertices[tv_e0],tv_vertices[(tv_e0+1)%3],
+					closest_point_u,
+					closest_point_v);
+
+				edge_edge_dir = closest_point_u-closest_point_v;
+				VEC_LENGTH(edge_edge_dir,distances[2]);
+				edge_edge_dir *= 1.0f/distances[2];// normalize
+			}
+			else
+			{
+				distances[2] = isect_v[0]-isect_u[1];//distance negative
+				//edge_edge_dir *= -1.0f; //normal pointing from V to U
+			}
+
+		}
+		else
+		{
+			if(isect_v[1]>=isect_u[1]) // face V casts face U
+			{
+				return 2;
+			}
+			else if(isect_u[0]<=isect_v[0]) // face U casts face V
+			{
+				return 1;
+			}
+			// closest points
+			closest_point_u = up_e0;
+			closest_point_v = vp_e1;
+			// calc edges and separation
+
+			if(isect_v[1]+MIN_EDGE_EDGE_DIS<isect_u[0]) //calc distance between two lines instead
+			{
+				SEGMENT_COLLISION(
+					tu_vertices[tu_e0],tu_vertices[(tu_e0+1)%3],
+					tv_vertices[tv_e1],tv_vertices[(tv_e1+1)%3],
+					closest_point_u,
+					closest_point_v);
+
+				edge_edge_dir = closest_point_u-closest_point_v;
+				VEC_LENGTH(edge_edge_dir,distances[2]);
+				edge_edge_dir *= 1.0f/distances[2];// normalize
+			}
+			else
+			{
+				distances[2] = isect_u[0]-isect_v[1];//distance negative
+				//edge_edge_dir *= -1.0f; //normal pointing from V to U
+			}
+		}
+		return 3;
+	}
+
+
+	//! collides by two sides
+	SIMD_FORCE_INLINE bool triangle_collision(
+					const btVector3 & u0,
+					const btVector3 & u1,
+					const btVector3 & u2,
+					GREAL margin_u,
+					const btVector3 & v0,
+					const btVector3 & v1,
+					const btVector3 & v2,
+					GREAL margin_v,
+					GIM_TRIANGLE_CONTACT_DATA & contacts)
+	{
+
+		margin = margin_u + margin_v;
+
+		tu_vertices[0] = u0;
+		tu_vertices[1] = u1;
+		tu_vertices[2] = u2;
+
+		tv_vertices[0] = v0;
+		tv_vertices[1] = v1;
+		tv_vertices[2] = v2;
+
+		//create planes
+		// plane v vs U points
+
+		TRIANGLE_PLANE(tv_vertices[0],tv_vertices[1],tv_vertices[2],tv_plane);
+
+		du[0] = DISTANCE_PLANE_POINT(tv_plane,tu_vertices[0]);
+		du[1] = DISTANCE_PLANE_POINT(tv_plane,tu_vertices[1]);
+		du[2] = DISTANCE_PLANE_POINT(tv_plane,tu_vertices[2]);
+
+
+		du0du1 = du[0] * du[1];
+		du0du2 = du[0] * du[2];
+
+
+		if(du0du1>0.0f && du0du2>0.0f)	// same sign on all of them + not equal 0 ?
+		{
+			if(du[0]<0) //we need test behind the triangle plane
+			{
+				distances[0] = GIM_MAX3(du[0],du[1],du[2]);
+				distances[0] = -distances[0];
+				if(distances[0]>margin) return false; //never intersect
+
+				//reorder triangle v
+				VEC_SWAP(tv_vertices[0],tv_vertices[1]);
+				VEC_SCALE_4(tv_plane,-1.0f,tv_plane);
+			}
+			else
+			{
+				distances[0] = GIM_MIN3(du[0],du[1],du[2]);
+				if(distances[0]>margin) return false; //never intersect
+			}
+		}
+		else
+		{
+			//Look if we need to invert the triangle
+			distances[0] = (du[0]+du[1]+du[2])/3.0f; //centroid
+
+			if(distances[0]<0.0f)
+			{
+				//reorder triangle v
+				VEC_SWAP(tv_vertices[0],tv_vertices[1]);
+				VEC_SCALE_4(tv_plane,-1.0f,tv_plane);
+
+				distances[0] = GIM_MAX3(du[0],du[1],du[2]);
+				distances[0] = -distances[0];
+			}
+			else
+			{
+				distances[0] = GIM_MIN3(du[0],du[1],du[2]);
+			}
+		}
+
+
+		// plane U vs V points
+
+		TRIANGLE_PLANE(tu_vertices[0],tu_vertices[1],tu_vertices[2],tu_plane);
+
+		dv[0] = DISTANCE_PLANE_POINT(tu_plane,tv_vertices[0]);
+		dv[1] = DISTANCE_PLANE_POINT(tu_plane,tv_vertices[1]);
+		dv[2] = DISTANCE_PLANE_POINT(tu_plane,tv_vertices[2]);
+
+		dv0dv1 = dv[0] * dv[1];
+		dv0dv2 = dv[0] * dv[2];
+
+
+		if(dv0dv1>0.0f && dv0dv2>0.0f)	// same sign on all of them + not equal 0 ?
+		{
+			if(dv[0]<0) //we need test behind the triangle plane
+			{
+				distances[1] = GIM_MAX3(dv[0],dv[1],dv[2]);
+				distances[1] = -distances[1];
+				if(distances[1]>margin) return false; //never intersect
+
+				//reorder triangle u
+				VEC_SWAP(tu_vertices[0],tu_vertices[1]);
+				VEC_SCALE_4(tu_plane,-1.0f,tu_plane);
+			}
+			else
+			{
+				distances[1] = GIM_MIN3(dv[0],dv[1],dv[2]);
+				if(distances[1]>margin) return false; //never intersect
+			}
+		}
+		else
+		{
+			//Look if we need to invert the triangle
+			distances[1] = (dv[0]+dv[1]+dv[2])/3.0f; //centroid
+
+			if(distances[1]<0.0f)
+			{
+				//reorder triangle v
+				VEC_SWAP(tu_vertices[0],tu_vertices[1]);
+				VEC_SCALE_4(tu_plane,-1.0f,tu_plane);
+
+				distances[1] = GIM_MAX3(dv[0],dv[1],dv[2]);
+				distances[1] = -distances[1];
+			}
+			else
+			{
+				distances[1] = GIM_MIN3(dv[0],dv[1],dv[2]);
+			}
+		}
+
+		GUINT bl;
+		/* bl = cross_line_intersection_test();
+		if(bl==3)
+		{
+			//take edge direction too
+			bl = distances.maxAxis();
+		}
+		else
+		{*/
+			bl = 0;
+			if(distances[0]<distances[1]) bl = 1;
+		//}
+
+		if(bl==2) //edge edge separation
+		{
+			if(distances[2]>margin) return false;
+
+			contacts.m_penetration_depth = -distances[2] + margin;
+			contacts.m_points[0] = closest_point_v;
+			contacts.m_point_count = 1;
+			VEC_COPY(contacts.m_separating_normal,edge_edge_dir);
+
+			return true;
+		}
+
+		//clip face against other
+
+		
+		GUINT point_count;
+		//TODO
+		if(bl == 0) //clip U points against V
+		{
+			point_count = clip_triangle(tv_plane,tv_vertices,tu_vertices,contact_points);
+			if(point_count == 0) return false;						
+			contacts.merge_points(tv_plane,margin,contact_points,point_count);			
+		}
+		else //clip V points against U
+		{
+			point_count = clip_triangle(tu_plane,tu_vertices,tv_vertices,contact_points);
+			if(point_count == 0) return false;			
+			contacts.merge_points(tu_plane,margin,contact_points,point_count);
+			contacts.m_separating_normal *= -1.f;
+		}
+		if(contacts.m_point_count == 0) return false;
+		return true;
+	}
+
+};
+
+
+/*class GIM_TRIANGLE_CALCULATION_CACHE
+{
+public:
+	GREAL margin;
+	GUINT clipped_count;
+	btVector3 tu_vertices[3];
+	btVector3 tv_vertices[3];
+	btVector3 temp_points[MAX_TRI_CLIPPING];
+	btVector3 temp_points1[MAX_TRI_CLIPPING];
+	btVector3 clipped_points[MAX_TRI_CLIPPING];
+	GIM_TRIANGLE_CONTACT_DATA contacts1;
+	GIM_TRIANGLE_CONTACT_DATA contacts2;
+
+
+	//! clip triangle
+	GUINT clip_triangle(
+		const btVector4 & tri_plane,
+		const btVector3 * tripoints,
+		const btVector3 * srcpoints,
+		btVector3 * clipped_points)
+	{
+		// edge 0
+
+		btVector4 edgeplane;
+
+		EDGE_PLANE(tripoints[0],tripoints[1],tri_plane,edgeplane);
+
+		GUINT clipped_count = PLANE_CLIP_TRIANGLE3D(
+			edgeplane,srcpoints[0],srcpoints[1],srcpoints[2],temp_points);
+
+		if(clipped_count == 0) return 0;
+
+		// edge 1
+
+		EDGE_PLANE(tripoints[1],tripoints[2],tri_plane,edgeplane);
+
+		clipped_count = PLANE_CLIP_POLYGON3D(
+			edgeplane,temp_points,clipped_count,temp_points1);
+
+		if(clipped_count == 0) return 0;
+
+		// edge 2
+
+		EDGE_PLANE(tripoints[2],tripoints[0],tri_plane,edgeplane);
+
+		clipped_count = PLANE_CLIP_POLYGON3D(
+			edgeplane,temp_points1,clipped_count,clipped_points);
+
+		return clipped_count;
+	}
+
+
+
+
+	//! collides only on one side
+	bool triangle_collision(
+					const btVector3 & u0,
+					const btVector3 & u1,
+					const btVector3 & u2,
+					GREAL margin_u,
+					const btVector3 & v0,
+					const btVector3 & v1,
+					const btVector3 & v2,
+					GREAL margin_v,
+					GIM_TRIANGLE_CONTACT_DATA & contacts)
+	{
+
+		margin = margin_u + margin_v;
+
+		
+		tu_vertices[0] = u0;
+		tu_vertices[1] = u1;
+		tu_vertices[2] = u2;
+
+		tv_vertices[0] = v0;
+		tv_vertices[1] = v1;
+		tv_vertices[2] = v2;
+
+		//create planes
+		// plane v vs U points
+
+
+		TRIANGLE_PLANE(tv_vertices[0],tv_vertices[1],tv_vertices[2],contacts1.m_separating_normal);
+
+		clipped_count = clip_triangle(
+			contacts1.m_separating_normal,tv_vertices,tu_vertices,clipped_points);
+
+		if(clipped_count == 0 )
+		{
+			 return false;//Reject
+		}
+
+		//find most deep interval face1
+		contacts1.merge_points(contacts1.m_separating_normal,margin,clipped_points,clipped_count);
+		if(contacts1.m_point_count == 0) return false; // too far
+
+		//Normal pointing to triangle1
+		//contacts1.m_separating_normal *= -1.f;
+
+		//Clip tri1 by tri2 edges
+
+		TRIANGLE_PLANE(tu_vertices[0],tu_vertices[1],tu_vertices[2],contacts2.m_separating_normal);
+
+		clipped_count = clip_triangle(
+			contacts2.m_separating_normal,tu_vertices,tv_vertices,clipped_points);
+
+		if(clipped_count == 0 )
+		{
+			 return false;//Reject
+		}
+
+		//find most deep interval face1
+		contacts2.merge_points(contacts2.m_separating_normal,margin,clipped_points,clipped_count);
+		if(contacts2.m_point_count == 0) return false; // too far
+
+		contacts2.m_separating_normal *= -1.f;
+
+		////check most dir for contacts
+		if(contacts2.m_penetration_depth<contacts1.m_penetration_depth)
+		{
+			contacts.copy_from(contacts2);
+		}
+		else
+		{
+			contacts.copy_from(contacts1);
+		}
+		return true;
+	}
+
+
+};*/
+
+
+
+bool GIM_TRIANGLE::collide_triangle_hard_test(
+		const GIM_TRIANGLE & other,
+		GIM_TRIANGLE_CONTACT_DATA & contact_data) const
+{
+	GIM_TRIANGLE_CALCULATION_CACHE calc_cache;	
+	return calc_cache.triangle_collision(
+					m_vertices[0],m_vertices[1],m_vertices[2],m_margin,
+					other.m_vertices[0],other.m_vertices[1],other.m_vertices[2],other.m_margin,
+					contact_data);
+
+}
+
+
+
+
diff --git a/src/bullet/BulletCollision/Gimpact/gim_tri_collision.h b/src/bullet/BulletCollision/Gimpact/gim_tri_collision.h
new file mode 100644
index 00000000..5b552a1e
--- /dev/null
+++ b/src/bullet/BulletCollision/Gimpact/gim_tri_collision.h
@@ -0,0 +1,379 @@
+#ifndef GIM_TRI_COLLISION_H_INCLUDED
+#define GIM_TRI_COLLISION_H_INCLUDED
+
+/*! \file gim_tri_collision.h
+\author Francisco Leon Najera
+*/
+/*
+-----------------------------------------------------------------------------
+This source file is part of GIMPACT Library.
+
+For the latest info, see http://gimpact.sourceforge.net/
+
+Copyright (c) 2006 Francisco Leon Najera. C.C. 80087371.
+email: projectileman@yahoo.com
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of EITHER:
+   (1) The GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 2.1 of the License, or (at
+       your option) any later version. The text of the GNU Lesser
+       General Public License is included with this library in the
+       file GIMPACT-LICENSE-LGPL.TXT.
+   (2) The BSD-style license that is included with this library in
+       the file GIMPACT-LICENSE-BSD.TXT.
+   (3) The zlib/libpng license that is included with this library in
+       the file GIMPACT-LICENSE-ZLIB.TXT.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files
+ GIMPACT-LICENSE-LGPL.TXT, GIMPACT-LICENSE-ZLIB.TXT and GIMPACT-LICENSE-BSD.TXT for more details.
+
+-----------------------------------------------------------------------------
+*/
+
+#include "gim_box_collision.h"
+#include "gim_clip_polygon.h"
+
+
+
+
+#define MAX_TRI_CLIPPING 16
+
+//! Structure for collision
+struct GIM_TRIANGLE_CONTACT_DATA
+{
+    GREAL m_penetration_depth;
+    GUINT m_point_count;
+    btVector4 m_separating_normal;
+    btVector3 m_points[MAX_TRI_CLIPPING];
+
+	SIMD_FORCE_INLINE void copy_from(const GIM_TRIANGLE_CONTACT_DATA& other)
+	{
+		m_penetration_depth = other.m_penetration_depth;
+		m_separating_normal = other.m_separating_normal;
+		m_point_count = other.m_point_count;
+		GUINT i = m_point_count;
+		while(i--)
+		{
+			m_points[i] = other.m_points[i];
+		}
+	}
+
+	GIM_TRIANGLE_CONTACT_DATA()
+	{
+	}
+
+	GIM_TRIANGLE_CONTACT_DATA(const GIM_TRIANGLE_CONTACT_DATA& other)
+	{
+		copy_from(other);
+	}
+
+	
+	
+
+    //! classify points that are closer
+    template<typename DISTANCE_FUNC,typename CLASS_PLANE>
+    SIMD_FORCE_INLINE void mergepoints_generic(const CLASS_PLANE & plane,
+    				GREAL margin, const btVector3 * points, GUINT point_count, DISTANCE_FUNC distance_func)
+    {	
+    	m_point_count = 0;
+    	m_penetration_depth= -1000.0f;
+
+		GUINT point_indices[MAX_TRI_CLIPPING];
+
+		GUINT _k;
+
+		for(_k=0;_k<point_count;_k++)
+		{
+			GREAL _dist = -distance_func(plane,points[_k]) + margin;
+
+			if(_dist>=0.0f)
+			{
+				if(_dist>m_penetration_depth)
+				{
+					m_penetration_depth = _dist;
+					point_indices[0] = _k;
+					m_point_count=1;
+				}
+				else if((_dist+G_EPSILON)>=m_penetration_depth)
+				{
+					point_indices[m_point_count] = _k;
+					m_point_count++;
+				}
+			}
+		}
+
+		for( _k=0;_k<m_point_count;_k++)
+		{
+			m_points[_k] = points[point_indices[_k]];
+		}
+	}
+
+	//! classify points that are closer
+	SIMD_FORCE_INLINE void merge_points(const btVector4 & plane, GREAL margin,
+										 const btVector3 * points, GUINT point_count)
+	{
+		m_separating_normal = plane;
+		mergepoints_generic(plane, margin, points, point_count, DISTANCE_PLANE_3D_FUNC());
+	}
+};
+
+
+//! Class for colliding triangles
+class GIM_TRIANGLE
+{
+public:
+	btScalar m_margin;
+    btVector3 m_vertices[3];
+
+    GIM_TRIANGLE():m_margin(0.1f)
+    {
+    }
+
+    SIMD_FORCE_INLINE GIM_AABB get_box()  const
+    {
+    	return GIM_AABB(m_vertices[0],m_vertices[1],m_vertices[2],m_margin);
+    }
+
+    SIMD_FORCE_INLINE void get_normal(btVector3 &normal)  const
+    {
+    	TRIANGLE_NORMAL(m_vertices[0],m_vertices[1],m_vertices[2],normal);
+    }
+
+    SIMD_FORCE_INLINE void get_plane(btVector4 &plane)  const
+    {
+    	TRIANGLE_PLANE(m_vertices[0],m_vertices[1],m_vertices[2],plane);;
+    }
+
+    SIMD_FORCE_INLINE void apply_transform(const btTransform & trans)
+    {
+    	m_vertices[0] = trans(m_vertices[0]);
+    	m_vertices[1] = trans(m_vertices[1]);
+    	m_vertices[2] = trans(m_vertices[2]);
+    }
+
+    SIMD_FORCE_INLINE void get_edge_plane(GUINT edge_index,const btVector3 &triangle_normal,btVector4 &plane)  const
+    {
+		const btVector3 & e0 = m_vertices[edge_index];
+		const btVector3 & e1 = m_vertices[(edge_index+1)%3];
+		EDGE_PLANE(e0,e1,triangle_normal,plane);
+    }
+
+    //! Gets the relative transformation of this triangle
+    /*!
+    The transformation is oriented to the triangle normal , and aligned to the 1st edge of this triangle. The position corresponds to vertice 0:
+    - triangle normal corresponds to Z axis.
+    - 1st normalized edge corresponds to X axis,
+
+    */
+    SIMD_FORCE_INLINE void get_triangle_transform(btTransform & triangle_transform)  const
+    {
+    	btMatrix3x3 & matrix = triangle_transform.getBasis();
+
+    	btVector3 zaxis;
+    	get_normal(zaxis);
+    	MAT_SET_Z(matrix,zaxis);
+
+    	btVector3 xaxis = m_vertices[1] - m_vertices[0];
+    	VEC_NORMALIZE(xaxis);
+    	MAT_SET_X(matrix,xaxis);
+
+    	//y axis
+    	xaxis = zaxis.cross(xaxis);
+    	MAT_SET_Y(matrix,xaxis);
+
+    	triangle_transform.setOrigin(m_vertices[0]);
+    }
+
+
+	//! Test triangles by finding separating axis
+	/*!
+	\param other Triangle for collide
+	\param contact_data Structure for holding contact points, normal and penetration depth; The normal is pointing toward this triangle from the other triangle
+	*/
+	bool collide_triangle_hard_test(
+		const GIM_TRIANGLE & other,
+		GIM_TRIANGLE_CONTACT_DATA & contact_data) const;
+
+	//! Test boxes before doing hard test
+	/*!
+	\param other Triangle for collide
+	\param contact_data Structure for holding contact points, normal and penetration depth; The normal is pointing toward this triangle from the other triangle
+	\
+	*/
+	SIMD_FORCE_INLINE bool collide_triangle(
+		const GIM_TRIANGLE & other,
+		GIM_TRIANGLE_CONTACT_DATA & contact_data) const
+	{
+		//test box collisioin
+		GIM_AABB boxu(m_vertices[0],m_vertices[1],m_vertices[2],m_margin);
+		GIM_AABB boxv(other.m_vertices[0],other.m_vertices[1],other.m_vertices[2],other.m_margin);
+		if(!boxu.has_collision(boxv)) return false;
+
+		//do hard test
+		return collide_triangle_hard_test(other,contact_data);
+	}
+
+	/*!
+
+	Solve the System for u,v parameters:
+
+	u*axe1[i1] + v*axe2[i1] = vecproj[i1]
+	u*axe1[i2] + v*axe2[i2] = vecproj[i2]
+
+	sustitute:
+	v = (vecproj[i2] - u*axe1[i2])/axe2[i2]
+
+	then the first equation in terms of 'u':
+
+	--> u*axe1[i1] + ((vecproj[i2] - u*axe1[i2])/axe2[i2])*axe2[i1] = vecproj[i1]
+
+	--> u*axe1[i1] + vecproj[i2]*axe2[i1]/axe2[i2] - u*axe1[i2]*axe2[i1]/axe2[i2] = vecproj[i1]
+
+	--> u*(axe1[i1]  - axe1[i2]*axe2[i1]/axe2[i2]) = vecproj[i1] - vecproj[i2]*axe2[i1]/axe2[i2]
+
+	--> u*((axe1[i1]*axe2[i2]  - axe1[i2]*axe2[i1])/axe2[i2]) = (vecproj[i1]*axe2[i2] - vecproj[i2]*axe2[i1])/axe2[i2]
+
+	--> u*(axe1[i1]*axe2[i2]  - axe1[i2]*axe2[i1]) = vecproj[i1]*axe2[i2] - vecproj[i2]*axe2[i1]
+
+	--> u = (vecproj[i1]*axe2[i2] - vecproj[i2]*axe2[i1]) /(axe1[i1]*axe2[i2]  - axe1[i2]*axe2[i1])
+
+if 0.0<= u+v <=1.0 then they are inside of triangle
+
+	\return false if the point is outside of triangle.This function  doesn't take the margin
+	*/
+	SIMD_FORCE_INLINE bool get_uv_parameters(
+			const btVector3 & point,
+			const btVector3 & tri_plane,
+			GREAL & u, GREAL & v) const
+	{
+		btVector3 _axe1 = m_vertices[1]-m_vertices[0];
+		btVector3 _axe2 = m_vertices[2]-m_vertices[0];
+		btVector3 _vecproj = point - m_vertices[0];
+		GUINT _i1 = (tri_plane.closestAxis()+1)%3;
+		GUINT _i2 = (_i1+1)%3;
+		if(btFabs(_axe2[_i2])<G_EPSILON)
+		{
+			u = (_vecproj[_i2]*_axe2[_i1] - _vecproj[_i1]*_axe2[_i2]) /(_axe1[_i2]*_axe2[_i1]  - _axe1[_i1]*_axe2[_i2]);
+			v = (_vecproj[_i1] - u*_axe1[_i1])/_axe2[_i1];
+		}
+		else
+		{
+			u = (_vecproj[_i1]*_axe2[_i2] - _vecproj[_i2]*_axe2[_i1]) /(_axe1[_i1]*_axe2[_i2]  - _axe1[_i2]*_axe2[_i1]);
+			v = (_vecproj[_i2] - u*_axe1[_i2])/_axe2[_i2];
+		}
+
+		if(u<-G_EPSILON)
+		{
+			return false;
+		}
+		else if(v<-G_EPSILON)
+		{
+			return false;
+		}
+		else
+		{
+			btScalar sumuv;
+			sumuv = u+v;
+			if(sumuv<-G_EPSILON)
+			{
+				return false;
+			}
+			else if(sumuv-1.0f>G_EPSILON)
+			{
+				return false;
+			}
+		}
+		return true;
+	}
+
+	//! is point in triangle beam?
+	/*!
+	Test if point is in triangle, with m_margin tolerance
+	*/
+	SIMD_FORCE_INLINE bool is_point_inside(const btVector3 & point, const btVector3 & tri_normal) const
+	{
+		//Test with edge 0
+		btVector4 edge_plane;
+		this->get_edge_plane(0,tri_normal,edge_plane);
+		GREAL dist = DISTANCE_PLANE_POINT(edge_plane,point);
+		if(dist-m_margin>0.0f) return false; // outside plane
+
+		this->get_edge_plane(1,tri_normal,edge_plane);
+		dist = DISTANCE_PLANE_POINT(edge_plane,point);
+		if(dist-m_margin>0.0f) return false; // outside plane
+
+		this->get_edge_plane(2,tri_normal,edge_plane);
+		dist = DISTANCE_PLANE_POINT(edge_plane,point);
+		if(dist-m_margin>0.0f) return false; // outside plane
+		return true;
+	}
+
+
+	//! Bidireccional ray collision
+	SIMD_FORCE_INLINE bool ray_collision(
+		const btVector3 & vPoint,
+		const btVector3 & vDir, btVector3 & pout, btVector3 & triangle_normal,
+		GREAL & tparam, GREAL tmax = G_REAL_INFINITY)
+	{
+		btVector4 faceplane;
+		{
+			btVector3 dif1 = m_vertices[1] - m_vertices[0];
+			btVector3 dif2 = m_vertices[2] - m_vertices[0];
+    		VEC_CROSS(faceplane,dif1,dif2);
+    		faceplane[3] = m_vertices[0].dot(faceplane);
+		}
+
+		GUINT res = LINE_PLANE_COLLISION(faceplane,vDir,vPoint,pout,tparam, btScalar(0), tmax);
+		if(res == 0) return false;
+		if(! is_point_inside(pout,faceplane)) return false;
+
+		if(res==2) //invert normal
+		{
+			triangle_normal.setValue(-faceplane[0],-faceplane[1],-faceplane[2]);
+		}
+		else
+		{
+			triangle_normal.setValue(faceplane[0],faceplane[1],faceplane[2]);
+		}
+
+		VEC_NORMALIZE(triangle_normal);
+
+		return true;
+	}
+
+
+	//! one direccion ray collision
+	SIMD_FORCE_INLINE bool ray_collision_front_side(
+		const btVector3 & vPoint,
+		const btVector3 & vDir, btVector3 & pout, btVector3 & triangle_normal,
+		GREAL & tparam, GREAL tmax = G_REAL_INFINITY)
+	{
+		btVector4 faceplane;
+		{
+			btVector3 dif1 = m_vertices[1] - m_vertices[0];
+			btVector3 dif2 = m_vertices[2] - m_vertices[0];
+    		VEC_CROSS(faceplane,dif1,dif2);
+    		faceplane[3] = m_vertices[0].dot(faceplane);
+		}
+
+		GUINT res = LINE_PLANE_COLLISION(faceplane,vDir,vPoint,pout,tparam, btScalar(0), tmax);
+		if(res != 1) return false;
+
+		if(!is_point_inside(pout,faceplane)) return false;
+
+		triangle_normal.setValue(faceplane[0],faceplane[1],faceplane[2]);
+
+		VEC_NORMALIZE(triangle_normal);
+
+		return true;
+	}
+
+};
+
+
+
+
+#endif // GIM_TRI_COLLISION_H_INCLUDED
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.cpp
new file mode 100644
index 00000000..91fcea57
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.cpp
@@ -0,0 +1,243 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btContinuousConvexCollision.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h"
+#include "LinearMath/btTransformUtil.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+
+#include "btGjkPairDetector.h"
+#include "btPointCollector.h"
+#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
+
+
+
+btContinuousConvexCollision::btContinuousConvexCollision ( const btConvexShape*	convexA,const btConvexShape*	convexB,btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* penetrationDepthSolver)
+:m_simplexSolver(simplexSolver),
+m_penetrationDepthSolver(penetrationDepthSolver),
+m_convexA(convexA),m_convexB1(convexB),m_planeShape(0)
+{
+}
+
+
+btContinuousConvexCollision::btContinuousConvexCollision( const btConvexShape*	convexA,const btStaticPlaneShape*	plane)
+:m_simplexSolver(0),
+m_penetrationDepthSolver(0),
+m_convexA(convexA),m_convexB1(0),m_planeShape(plane)
+{
+}
+
+
+/// This maximum should not be necessary. It allows for untested/degenerate cases in production code.
+/// You don't want your game ever to lock-up.
+#define MAX_ITERATIONS 64
+
+void btContinuousConvexCollision::computeClosestPoints( const btTransform& transA, const btTransform& transB,btPointCollector& pointCollector)
+{
+	if (m_convexB1)
+	{
+		m_simplexSolver->reset();
+		btGjkPairDetector gjk(m_convexA,m_convexB1,m_convexA->getShapeType(),m_convexB1->getShapeType(),m_convexA->getMargin(),m_convexB1->getMargin(),m_simplexSolver,m_penetrationDepthSolver);		
+		btGjkPairDetector::ClosestPointInput input;
+		input.m_transformA = transA;
+		input.m_transformB = transB;
+		gjk.getClosestPoints(input,pointCollector,0);
+	} else
+	{
+		//convex versus plane
+		const btConvexShape* convexShape = m_convexA;
+		const btStaticPlaneShape* planeShape = m_planeShape;
+		
+		bool hasCollision = false;
+		const btVector3& planeNormal = planeShape->getPlaneNormal();
+		const btScalar& planeConstant = planeShape->getPlaneConstant();
+		
+		btTransform convexWorldTransform = transA;
+		btTransform convexInPlaneTrans;
+		convexInPlaneTrans= transB.inverse() * convexWorldTransform;
+		btTransform planeInConvex;
+		planeInConvex= convexWorldTransform.inverse() * transB;
+		
+		btVector3 vtx = convexShape->localGetSupportingVertex(planeInConvex.getBasis()*-planeNormal);
+
+		btVector3 vtxInPlane = convexInPlaneTrans(vtx);
+		btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant);
+
+		btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal;
+		btVector3 vtxInPlaneWorld = transB * vtxInPlaneProjected;
+		btVector3 normalOnSurfaceB = transB.getBasis() * planeNormal;
+
+		pointCollector.addContactPoint(
+			normalOnSurfaceB,
+			vtxInPlaneWorld,
+			distance);
+	}
+}
+
+bool	btContinuousConvexCollision::calcTimeOfImpact(
+				const btTransform& fromA,
+				const btTransform& toA,
+				const btTransform& fromB,
+				const btTransform& toB,
+				CastResult& result)
+{
+
+
+	/// compute linear and angular velocity for this interval, to interpolate
+	btVector3 linVelA,angVelA,linVelB,angVelB;
+	btTransformUtil::calculateVelocity(fromA,toA,btScalar(1.),linVelA,angVelA);
+	btTransformUtil::calculateVelocity(fromB,toB,btScalar(1.),linVelB,angVelB);
+
+
+	btScalar boundingRadiusA = m_convexA->getAngularMotionDisc();
+	btScalar boundingRadiusB = m_convexB1?m_convexB1->getAngularMotionDisc():0.f;
+
+	btScalar maxAngularProjectedVelocity = angVelA.length() * boundingRadiusA + angVelB.length() * boundingRadiusB;
+	btVector3 relLinVel = (linVelB-linVelA);
+
+	btScalar relLinVelocLength = (linVelB-linVelA).length();
+	
+	if ((relLinVelocLength+maxAngularProjectedVelocity) == 0.f)
+		return false;
+
+
+
+	btScalar lambda = btScalar(0.);
+	btVector3 v(1,0,0);
+
+	int maxIter = MAX_ITERATIONS;
+
+	btVector3 n;
+	n.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+	bool hasResult = false;
+	btVector3 c;
+
+	btScalar lastLambda = lambda;
+	//btScalar epsilon = btScalar(0.001);
+
+	int numIter = 0;
+	//first solution, using GJK
+
+
+	btScalar radius = 0.001f;
+//	result.drawCoordSystem(sphereTr);
+
+	btPointCollector	pointCollector1;
+
+	{
+	
+		computeClosestPoints(fromA,fromB,pointCollector1);
+
+		hasResult = pointCollector1.m_hasResult;
+		c = pointCollector1.m_pointInWorld;
+	}
+
+	if (hasResult)
+	{
+		btScalar dist;
+		dist = pointCollector1.m_distance + result.m_allowedPenetration;
+		n = pointCollector1.m_normalOnBInWorld;
+		btScalar projectedLinearVelocity = relLinVel.dot(n);
+		if ((projectedLinearVelocity+ maxAngularProjectedVelocity)<=SIMD_EPSILON)
+			return false;
+
+		//not close enough
+		while (dist > radius)
+		{
+			if (result.m_debugDrawer)
+			{
+				result.m_debugDrawer->drawSphere(c,0.2f,btVector3(1,1,1));
+			}
+			btScalar dLambda = btScalar(0.);
+
+			projectedLinearVelocity = relLinVel.dot(n);
+
+			
+			//don't report time of impact for motion away from the contact normal (or causes minor penetration)
+			if ((projectedLinearVelocity+ maxAngularProjectedVelocity)<=SIMD_EPSILON)
+				return false;
+			
+			dLambda = dist / (projectedLinearVelocity+ maxAngularProjectedVelocity);
+
+			
+			
+			lambda = lambda + dLambda;
+
+			if (lambda > btScalar(1.))
+				return false;
+
+			if (lambda < btScalar(0.))
+				return false;
+
+
+			//todo: next check with relative epsilon
+			if (lambda <= lastLambda)
+			{
+				return false;
+				//n.setValue(0,0,0);
+				break;
+			}
+			lastLambda = lambda;
+
+			
+
+			//interpolate to next lambda
+			btTransform interpolatedTransA,interpolatedTransB,relativeTrans;
+
+			btTransformUtil::integrateTransform(fromA,linVelA,angVelA,lambda,interpolatedTransA);
+			btTransformUtil::integrateTransform(fromB,linVelB,angVelB,lambda,interpolatedTransB);
+			relativeTrans = interpolatedTransB.inverseTimes(interpolatedTransA);
+
+			if (result.m_debugDrawer)
+			{
+				result.m_debugDrawer->drawSphere(interpolatedTransA.getOrigin(),0.2f,btVector3(1,0,0));
+			}
+
+			result.DebugDraw( lambda );
+
+			btPointCollector	pointCollector;
+			computeClosestPoints(interpolatedTransA,interpolatedTransB,pointCollector);
+
+			if (pointCollector.m_hasResult)
+			{
+				dist = pointCollector.m_distance+result.m_allowedPenetration;
+				c = pointCollector.m_pointInWorld;		
+				n = pointCollector.m_normalOnBInWorld;
+			} else
+			{
+				result.reportFailure(-1, numIter);
+				return false;
+			}
+
+			numIter++;
+			if (numIter > maxIter)
+			{
+				result.reportFailure(-2, numIter);
+				return false;
+			}
+		}
+	
+		result.m_fraction = lambda;
+		result.m_normal = n;
+		result.m_hitPoint = c;
+		return true;
+	}
+
+	return false;
+
+}
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h
new file mode 100644
index 00000000..bdc0572f
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h
@@ -0,0 +1,59 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_CONTINUOUS_COLLISION_CONVEX_CAST_H
+#define BT_CONTINUOUS_COLLISION_CONVEX_CAST_H
+
+#include "btConvexCast.h"
+#include "btSimplexSolverInterface.h"
+class btConvexPenetrationDepthSolver;
+class btConvexShape;
+class btStaticPlaneShape;
+
+/// btContinuousConvexCollision implements angular and linear time of impact for convex objects.
+/// Based on Brian Mirtich's Conservative Advancement idea (PhD thesis).
+/// Algorithm operates in worldspace, in order to keep inbetween motion globally consistent.
+/// It uses GJK at the moment. Future improvement would use minkowski sum / supporting vertex, merging innerloops
+class btContinuousConvexCollision : public btConvexCast
+{
+	btSimplexSolverInterface* m_simplexSolver;
+	btConvexPenetrationDepthSolver*	m_penetrationDepthSolver;
+	const btConvexShape*	m_convexA;
+	//second object is either a convex or a plane (code sharing)
+	const btConvexShape*	m_convexB1;
+	const btStaticPlaneShape*	m_planeShape;
+
+	void computeClosestPoints( const btTransform& transA, const btTransform& transB,struct btPointCollector& pointCollector);
+
+public:
+
+	btContinuousConvexCollision (const btConvexShape*	shapeA,const btConvexShape*	shapeB ,btSimplexSolverInterface* simplexSolver,btConvexPenetrationDepthSolver* penetrationDepthSolver);
+
+	btContinuousConvexCollision(const btConvexShape*	shapeA,const btStaticPlaneShape*	plane );
+
+	virtual bool	calcTimeOfImpact(
+				const btTransform& fromA,
+				const btTransform& toA,
+				const btTransform& fromB,
+				const btTransform& toB,
+				CastResult& result);
+
+
+};
+
+
+#endif //BT_CONTINUOUS_COLLISION_CONVEX_CAST_H
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btConvexCast.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btConvexCast.cpp
new file mode 100644
index 00000000..d2a1310b
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btConvexCast.cpp
@@ -0,0 +1,20 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btConvexCast.h"
+
+btConvexCast::~btConvexCast()
+{
+}
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btConvexCast.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btConvexCast.h
new file mode 100644
index 00000000..bfd79d03
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btConvexCast.h
@@ -0,0 +1,73 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_CONVEX_CAST_H
+#define BT_CONVEX_CAST_H
+
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btScalar.h"
+class btMinkowskiSumShape;
+#include "LinearMath/btIDebugDraw.h"
+
+/// btConvexCast is an interface for Casting
+class btConvexCast
+{
+public:
+
+
+	virtual ~btConvexCast();
+
+	///RayResult stores the closest result
+	/// alternatively, add a callback method to decide about closest/all results
+	struct	CastResult
+	{
+		//virtual bool	addRayResult(const btVector3& normal,btScalar	fraction) = 0;
+				
+		virtual void	DebugDraw(btScalar	fraction) {(void)fraction;}
+		virtual void	drawCoordSystem(const btTransform& trans) {(void)trans;}
+		virtual void	reportFailure(int errNo, int numIterations) {(void)errNo;(void)numIterations;}
+		CastResult()
+			:m_fraction(btScalar(BT_LARGE_FLOAT)),
+			m_debugDrawer(0),
+			m_allowedPenetration(btScalar(0))
+		{
+		}
+
+
+		virtual ~CastResult() {};
+
+		btTransform	m_hitTransformA;
+		btTransform	m_hitTransformB;
+		btVector3	m_normal;
+		btVector3   m_hitPoint;
+		btScalar	m_fraction; //input and output
+		btIDebugDraw* m_debugDrawer;
+		btScalar	m_allowedPenetration;
+
+	};
+
+
+	/// cast a convex against another convex object
+	virtual bool	calcTimeOfImpact(
+					const btTransform& fromA,
+					const btTransform& toA,
+					const btTransform& fromB,
+					const btTransform& toB,
+					CastResult& result) = 0;
+};
+
+#endif //BT_CONVEX_CAST_H
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h
new file mode 100644
index 00000000..72eb5aec
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h
@@ -0,0 +1,42 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_CONVEX_PENETRATION_DEPTH_H
+#define BT_CONVEX_PENETRATION_DEPTH_H
+
+class btStackAlloc;
+class btVector3;
+#include "btSimplexSolverInterface.h"
+class btConvexShape;
+class btTransform;
+
+///ConvexPenetrationDepthSolver provides an interface for penetration depth calculation.
+class btConvexPenetrationDepthSolver
+{
+public:	
+	
+	virtual ~btConvexPenetrationDepthSolver() {};
+	virtual bool calcPenDepth( btSimplexSolverInterface& simplexSolver,
+		const btConvexShape* convexA,const btConvexShape* convexB,
+					const btTransform& transA,const btTransform& transB,
+				btVector3& v, btVector3& pa, btVector3& pb,
+				class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc
+				) = 0;
+
+
+};
+#endif //BT_CONVEX_PENETRATION_DEPTH_H
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h
new file mode 100644
index 00000000..f958cc52
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h
@@ -0,0 +1,91 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_DISCRETE_COLLISION_DETECTOR1_INTERFACE_H
+#define BT_DISCRETE_COLLISION_DETECTOR1_INTERFACE_H
+
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btVector3.h"
+class btStackAlloc;
+
+/// This interface is made to be used by an iterative approach to do TimeOfImpact calculations
+/// This interface allows to query for closest points and penetration depth between two (convex) objects
+/// the closest point is on the second object (B), and the normal points from the surface on B towards A.
+/// distance is between closest points on B and closest point on A. So you can calculate closest point on A
+/// by taking closestPointInA = closestPointInB + m_distance * m_normalOnSurfaceB
+struct btDiscreteCollisionDetectorInterface
+{
+	
+	struct Result
+	{
+	
+		virtual ~Result(){}	
+
+		///setShapeIdentifiersA/B provides experimental support for per-triangle material / custom material combiner
+		virtual void setShapeIdentifiersA(int partId0,int index0)=0;
+		virtual void setShapeIdentifiersB(int partId1,int index1)=0;
+		virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)=0;
+	};
+
+	struct ClosestPointInput
+	{
+		ClosestPointInput()
+			:m_maximumDistanceSquared(btScalar(BT_LARGE_FLOAT)),
+			m_stackAlloc(0)
+		{
+		}
+
+		btTransform m_transformA;
+		btTransform m_transformB;
+		btScalar	m_maximumDistanceSquared;
+		btStackAlloc* m_stackAlloc;
+	};
+
+	virtual ~btDiscreteCollisionDetectorInterface() {};
+
+	//
+	// give either closest points (distance > 0) or penetration (distance)
+	// the normal always points from B towards A
+	//
+	virtual void	getClosestPoints(const ClosestPointInput& input,Result& output,class btIDebugDraw* debugDraw,bool swapResults=false) = 0;
+
+};
+
+struct btStorageResult : public btDiscreteCollisionDetectorInterface::Result
+{
+		btVector3	m_normalOnSurfaceB;
+		btVector3	m_closestPointInB;
+		btScalar	m_distance; //negative means penetration !
+
+		btStorageResult() : m_distance(btScalar(BT_LARGE_FLOAT))
+		{
+
+		}
+		virtual ~btStorageResult() {};
+
+		virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+		{
+			if (depth < m_distance)
+			{
+				m_normalOnSurfaceB = normalOnBInWorld;
+				m_closestPointInB = pointInWorld;
+				m_distance = depth;
+			}
+		}
+};
+
+#endif //BT_DISCRETE_COLLISION_DETECTOR1_INTERFACE_H
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.cpp
new file mode 100644
index 00000000..bef697a0
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.cpp
@@ -0,0 +1,176 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#include "btGjkConvexCast.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "btGjkPairDetector.h"
+#include "btPointCollector.h"
+#include "LinearMath/btTransformUtil.h"
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define MAX_ITERATIONS 64
+#else
+#define MAX_ITERATIONS 32
+#endif
+
+btGjkConvexCast::btGjkConvexCast(const btConvexShape* convexA,const btConvexShape* convexB,btSimplexSolverInterface* simplexSolver)
+:m_simplexSolver(simplexSolver),
+m_convexA(convexA),
+m_convexB(convexB)
+{
+}
+
+bool	btGjkConvexCast::calcTimeOfImpact(
+					const btTransform& fromA,
+					const btTransform& toA,
+					const btTransform& fromB,
+					const btTransform& toB,
+					CastResult& result)
+{
+
+
+	m_simplexSolver->reset();
+
+	/// compute linear velocity for this interval, to interpolate
+	//assume no rotation/angular velocity, assert here?
+	btVector3 linVelA,linVelB;
+	linVelA = toA.getOrigin()-fromA.getOrigin();
+	linVelB = toB.getOrigin()-fromB.getOrigin();
+
+	btScalar radius = btScalar(0.001);
+	btScalar lambda = btScalar(0.);
+	btVector3 v(1,0,0);
+
+	int maxIter = MAX_ITERATIONS;
+
+	btVector3 n;
+	n.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+	bool hasResult = false;
+	btVector3 c;
+	btVector3 r = (linVelA-linVelB);
+
+	btScalar lastLambda = lambda;
+	//btScalar epsilon = btScalar(0.001);
+
+	int numIter = 0;
+	//first solution, using GJK
+
+
+	btTransform identityTrans;
+	identityTrans.setIdentity();
+
+
+//	result.drawCoordSystem(sphereTr);
+
+	btPointCollector	pointCollector;
+
+		
+	btGjkPairDetector gjk(m_convexA,m_convexB,m_simplexSolver,0);//m_penetrationDepthSolver);		
+	btGjkPairDetector::ClosestPointInput input;
+
+	//we don't use margins during CCD
+	//	gjk.setIgnoreMargin(true);
+
+	input.m_transformA = fromA;
+	input.m_transformB = fromB;
+	gjk.getClosestPoints(input,pointCollector,0);
+
+	hasResult = pointCollector.m_hasResult;
+	c = pointCollector.m_pointInWorld;
+
+	if (hasResult)
+	{
+		btScalar dist;
+		dist = pointCollector.m_distance;
+		n = pointCollector.m_normalOnBInWorld;
+
+	
+
+		//not close enough
+		while (dist > radius)
+		{
+			numIter++;
+			if (numIter > maxIter)
+			{
+				return false; //todo: report a failure
+			}
+			btScalar dLambda = btScalar(0.);
+
+			btScalar projectedLinearVelocity = r.dot(n);
+			
+			dLambda = dist / (projectedLinearVelocity);
+
+			lambda = lambda - dLambda;
+
+			if (lambda > btScalar(1.))
+				return false;
+
+			if (lambda < btScalar(0.))
+				return false;
+
+			//todo: next check with relative epsilon
+			if (lambda <= lastLambda)
+			{
+				return false;
+				//n.setValue(0,0,0);
+				break;
+			}
+			lastLambda = lambda;
+
+			//interpolate to next lambda
+			result.DebugDraw( lambda );
+			input.m_transformA.getOrigin().setInterpolate3(fromA.getOrigin(),toA.getOrigin(),lambda);
+			input.m_transformB.getOrigin().setInterpolate3(fromB.getOrigin(),toB.getOrigin(),lambda);
+			
+			gjk.getClosestPoints(input,pointCollector,0);
+			if (pointCollector.m_hasResult)
+			{
+				if (pointCollector.m_distance < btScalar(0.))
+				{
+					result.m_fraction = lastLambda;
+					n = pointCollector.m_normalOnBInWorld;
+					result.m_normal=n;
+					result.m_hitPoint = pointCollector.m_pointInWorld;
+					return true;
+				}
+				c = pointCollector.m_pointInWorld;		
+				n = pointCollector.m_normalOnBInWorld;
+				dist = pointCollector.m_distance;
+			} else
+			{
+				//??
+				return false;
+			}
+
+		}
+
+		//is n normalized?
+		//don't report time of impact for motion away from the contact normal (or causes minor penetration)
+		if (n.dot(r)>=-result.m_allowedPenetration)
+			return false;
+
+		result.m_fraction = lambda;
+		result.m_normal = n;
+		result.m_hitPoint = c;
+		return true;
+	}
+
+	return false;
+
+
+}
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h
new file mode 100644
index 00000000..6a42ee63
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h
@@ -0,0 +1,50 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_GJK_CONVEX_CAST_H
+#define BT_GJK_CONVEX_CAST_H
+
+#include "BulletCollision/CollisionShapes/btCollisionMargin.h"
+
+#include "LinearMath/btVector3.h"
+#include "btConvexCast.h"
+class btConvexShape;
+class btMinkowskiSumShape;
+#include "btSimplexSolverInterface.h"
+
+///GjkConvexCast performs a raycast on a convex object using support mapping.
+class btGjkConvexCast : public btConvexCast
+{
+	btSimplexSolverInterface*	m_simplexSolver;
+	const btConvexShape*	m_convexA;
+	const btConvexShape*	m_convexB;
+
+public:
+
+	btGjkConvexCast(const btConvexShape*	convexA,const btConvexShape* convexB,btSimplexSolverInterface* simplexSolver);
+
+	/// cast a convex against another convex object
+	virtual bool	calcTimeOfImpact(
+					const btTransform& fromA,
+					const btTransform& toA,
+					const btTransform& fromB,
+					const btTransform& toB,
+					CastResult& result);
+
+};
+
+#endif //BT_GJK_CONVEX_CAST_H
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp
new file mode 100644
index 00000000..f74261d4
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp
@@ -0,0 +1,989 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2008 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the
+use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+claim that you wrote the original software. If you use this software in a
+product, an acknowledgment in the product documentation would be appreciated
+but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/*
+GJK-EPA collision solver by Nathanael Presson, 2008
+*/
+#include "BulletCollision/CollisionShapes/btConvexInternalShape.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "btGjkEpa2.h"
+
+#if defined(DEBUG) || defined (_DEBUG)
+#include <stdio.h> //for debug printf
+#ifdef __SPU__
+#include <spu_printf.h>
+#define printf spu_printf
+#endif //__SPU__
+#endif
+
+namespace gjkepa2_impl
+{
+
+	// Config
+
+	/* GJK	*/ 
+#define GJK_MAX_ITERATIONS	128
+#define GJK_ACCURARY		((btScalar)0.0001)
+#define GJK_MIN_DISTANCE	((btScalar)0.0001)
+#define GJK_DUPLICATED_EPS	((btScalar)0.0001)
+#define GJK_SIMPLEX2_EPS	((btScalar)0.0)
+#define GJK_SIMPLEX3_EPS	((btScalar)0.0)
+#define GJK_SIMPLEX4_EPS	((btScalar)0.0)
+
+	/* EPA	*/ 
+#define EPA_MAX_VERTICES	64
+#define EPA_MAX_FACES		(EPA_MAX_VERTICES*2)
+#define EPA_MAX_ITERATIONS	255
+#define EPA_ACCURACY		((btScalar)0.0001)
+#define EPA_FALLBACK		(10*EPA_ACCURACY)
+#define EPA_PLANE_EPS		((btScalar)0.00001)
+#define EPA_INSIDE_EPS		((btScalar)0.01)
+
+
+	// Shorthands
+	typedef unsigned int	U;
+	typedef unsigned char	U1;
+
+	// MinkowskiDiff
+	struct	MinkowskiDiff
+	{
+		const btConvexShape*	m_shapes[2];
+		btMatrix3x3				m_toshape1;
+		btTransform				m_toshape0;
+#ifdef __SPU__
+		bool					m_enableMargin;
+#else
+		btVector3				(btConvexShape::*Ls)(const btVector3&) const;
+#endif//__SPU__
+		
+
+		MinkowskiDiff()
+		{
+
+		}
+#ifdef __SPU__
+			void					EnableMargin(bool enable)
+		{
+			m_enableMargin = enable;
+		}	
+		inline btVector3		Support0(const btVector3& d) const
+		{
+			if (m_enableMargin)
+			{
+				return m_shapes[0]->localGetSupportVertexNonVirtual(d);
+			} else
+			{
+				return m_shapes[0]->localGetSupportVertexWithoutMarginNonVirtual(d);
+			}
+		}
+		inline btVector3		Support1(const btVector3& d) const
+		{
+			if (m_enableMargin)
+			{
+				return m_toshape0*(m_shapes[1]->localGetSupportVertexNonVirtual(m_toshape1*d));
+			} else
+			{
+				return m_toshape0*(m_shapes[1]->localGetSupportVertexWithoutMarginNonVirtual(m_toshape1*d));
+			}
+		}
+#else
+		void					EnableMargin(bool enable)
+		{
+			if(enable)
+				Ls=&btConvexShape::localGetSupportVertexNonVirtual;
+			else
+				Ls=&btConvexShape::localGetSupportVertexWithoutMarginNonVirtual;
+		}	
+		inline btVector3		Support0(const btVector3& d) const
+		{
+			return(((m_shapes[0])->*(Ls))(d));
+		}
+		inline btVector3		Support1(const btVector3& d) const
+		{
+			return(m_toshape0*((m_shapes[1])->*(Ls))(m_toshape1*d));
+		}
+#endif //__SPU__
+
+		inline btVector3		Support(const btVector3& d) const
+		{
+			return(Support0(d)-Support1(-d));
+		}
+		btVector3				Support(const btVector3& d,U index) const
+		{
+			if(index)
+				return(Support1(d));
+			else
+				return(Support0(d));
+		}
+	};
+
+	typedef	MinkowskiDiff	tShape;
+
+
+	// GJK
+	struct	GJK
+	{
+		/* Types		*/ 
+		struct	sSV
+		{
+			btVector3	d,w;
+		};
+		struct	sSimplex
+		{
+			sSV*		c[4];
+			btScalar	p[4];
+			U			rank;
+		};
+		struct	eStatus	{ enum _ {
+			Valid,
+			Inside,
+			Failed		};};
+			/* Fields		*/ 
+			tShape			m_shape;
+			btVector3		m_ray;
+			btScalar		m_distance;
+			sSimplex		m_simplices[2];
+			sSV				m_store[4];
+			sSV*			m_free[4];
+			U				m_nfree;
+			U				m_current;
+			sSimplex*		m_simplex;
+			eStatus::_		m_status;
+			/* Methods		*/ 
+			GJK()
+			{
+				Initialize();
+			}
+			void				Initialize()
+			{
+				m_ray		=	btVector3(0,0,0);
+				m_nfree		=	0;
+				m_status	=	eStatus::Failed;
+				m_current	=	0;
+				m_distance	=	0;
+			}
+			eStatus::_			Evaluate(const tShape& shapearg,const btVector3& guess)
+			{
+				U			iterations=0;
+				btScalar	sqdist=0;
+				btScalar	alpha=0;
+				btVector3	lastw[4];
+				U			clastw=0;
+				/* Initialize solver		*/ 
+				m_free[0]			=	&m_store[0];
+				m_free[1]			=	&m_store[1];
+				m_free[2]			=	&m_store[2];
+				m_free[3]			=	&m_store[3];
+				m_nfree				=	4;
+				m_current			=	0;
+				m_status			=	eStatus::Valid;
+				m_shape				=	shapearg;
+				m_distance			=	0;
+				/* Initialize simplex		*/ 
+				m_simplices[0].rank	=	0;
+				m_ray				=	guess;
+				const btScalar	sqrl=	m_ray.length2();
+				appendvertice(m_simplices[0],sqrl>0?-m_ray:btVector3(1,0,0));
+				m_simplices[0].p[0]	=	1;
+				m_ray				=	m_simplices[0].c[0]->w;	
+				sqdist				=	sqrl;
+				lastw[0]			=
+					lastw[1]			=
+					lastw[2]			=
+					lastw[3]			=	m_ray;
+				/* Loop						*/ 
+				do	{
+					const U		next=1-m_current;
+					sSimplex&	cs=m_simplices[m_current];
+					sSimplex&	ns=m_simplices[next];
+					/* Check zero							*/ 
+					const btScalar	rl=m_ray.length();
+					if(rl<GJK_MIN_DISTANCE)
+					{/* Touching or inside				*/ 
+						m_status=eStatus::Inside;
+						break;
+					}
+					/* Append new vertice in -'v' direction	*/ 
+					appendvertice(cs,-m_ray);
+					const btVector3&	w=cs.c[cs.rank-1]->w;
+					bool				found=false;
+					for(U i=0;i<4;++i)
+					{
+						if((w-lastw[i]).length2()<GJK_DUPLICATED_EPS)
+						{ found=true;break; }
+					}
+					if(found)
+					{/* Return old simplex				*/ 
+						removevertice(m_simplices[m_current]);
+						break;
+					}
+					else
+					{/* Update lastw					*/ 
+						lastw[clastw=(clastw+1)&3]=w;
+					}
+					/* Check for termination				*/ 
+					const btScalar	omega=btDot(m_ray,w)/rl;
+					alpha=btMax(omega,alpha);
+					if(((rl-alpha)-(GJK_ACCURARY*rl))<=0)
+					{/* Return old simplex				*/ 
+						removevertice(m_simplices[m_current]);
+						break;
+					}		
+					/* Reduce simplex						*/ 
+					btScalar	weights[4];
+					U			mask=0;
+					switch(cs.rank)
+					{
+					case	2:	sqdist=projectorigin(	cs.c[0]->w,
+									cs.c[1]->w,
+									weights,mask);break;
+					case	3:	sqdist=projectorigin(	cs.c[0]->w,
+									cs.c[1]->w,
+									cs.c[2]->w,
+									weights,mask);break;
+					case	4:	sqdist=projectorigin(	cs.c[0]->w,
+									cs.c[1]->w,
+									cs.c[2]->w,
+									cs.c[3]->w,
+									weights,mask);break;
+					}
+					if(sqdist>=0)
+					{/* Valid	*/ 
+						ns.rank		=	0;
+						m_ray		=	btVector3(0,0,0);
+						m_current	=	next;
+						for(U i=0,ni=cs.rank;i<ni;++i)
+						{
+							if(mask&(1<<i))
+							{
+								ns.c[ns.rank]		=	cs.c[i];
+								ns.p[ns.rank++]		=	weights[i];
+								m_ray				+=	cs.c[i]->w*weights[i];
+							}
+							else
+							{
+								m_free[m_nfree++]	=	cs.c[i];
+							}
+						}
+						if(mask==15) m_status=eStatus::Inside;
+					}
+					else
+					{/* Return old simplex				*/ 
+						removevertice(m_simplices[m_current]);
+						break;
+					}
+					m_status=((++iterations)<GJK_MAX_ITERATIONS)?m_status:eStatus::Failed;
+				} while(m_status==eStatus::Valid);
+				m_simplex=&m_simplices[m_current];
+				switch(m_status)
+				{
+				case	eStatus::Valid:		m_distance=m_ray.length();break;
+				case	eStatus::Inside:	m_distance=0;break;
+				default:
+					{
+					}
+				}	
+				return(m_status);
+			}
+			bool					EncloseOrigin()
+			{
+				switch(m_simplex->rank)
+				{
+				case	1:
+					{
+						for(U i=0;i<3;++i)
+						{
+							btVector3		axis=btVector3(0,0,0);
+							axis[i]=1;
+							appendvertice(*m_simplex, axis);
+							if(EncloseOrigin())	return(true);
+							removevertice(*m_simplex);
+							appendvertice(*m_simplex,-axis);
+							if(EncloseOrigin())	return(true);
+							removevertice(*m_simplex);
+						}
+					}
+					break;
+				case	2:
+					{
+						const btVector3	d=m_simplex->c[1]->w-m_simplex->c[0]->w;
+						for(U i=0;i<3;++i)
+						{
+							btVector3		axis=btVector3(0,0,0);
+							axis[i]=1;
+							const btVector3	p=btCross(d,axis);
+							if(p.length2()>0)
+							{
+								appendvertice(*m_simplex, p);
+								if(EncloseOrigin())	return(true);
+								removevertice(*m_simplex);
+								appendvertice(*m_simplex,-p);
+								if(EncloseOrigin())	return(true);
+								removevertice(*m_simplex);
+							}
+						}
+					}
+					break;
+				case	3:
+					{
+						const btVector3	n=btCross(m_simplex->c[1]->w-m_simplex->c[0]->w,
+							m_simplex->c[2]->w-m_simplex->c[0]->w);
+						if(n.length2()>0)
+						{
+							appendvertice(*m_simplex,n);
+							if(EncloseOrigin())	return(true);
+							removevertice(*m_simplex);
+							appendvertice(*m_simplex,-n);
+							if(EncloseOrigin())	return(true);
+							removevertice(*m_simplex);
+						}
+					}
+					break;
+				case	4:
+					{
+						if(btFabs(det(	m_simplex->c[0]->w-m_simplex->c[3]->w,
+							m_simplex->c[1]->w-m_simplex->c[3]->w,
+							m_simplex->c[2]->w-m_simplex->c[3]->w))>0)
+							return(true);
+					}
+					break;
+				}
+				return(false);
+			}
+			/* Internals	*/ 
+			void				getsupport(const btVector3& d,sSV& sv) const
+			{
+				sv.d	=	d/d.length();
+				sv.w	=	m_shape.Support(sv.d);
+			}
+			void				removevertice(sSimplex& simplex)
+			{
+				m_free[m_nfree++]=simplex.c[--simplex.rank];
+			}
+			void				appendvertice(sSimplex& simplex,const btVector3& v)
+			{
+				simplex.p[simplex.rank]=0;
+				simplex.c[simplex.rank]=m_free[--m_nfree];
+				getsupport(v,*simplex.c[simplex.rank++]);
+			}
+			static btScalar		det(const btVector3& a,const btVector3& b,const btVector3& c)
+			{
+				return(	a.y()*b.z()*c.x()+a.z()*b.x()*c.y()-
+					a.x()*b.z()*c.y()-a.y()*b.x()*c.z()+
+					a.x()*b.y()*c.z()-a.z()*b.y()*c.x());
+			}
+			static btScalar		projectorigin(	const btVector3& a,
+				const btVector3& b,
+				btScalar* w,U& m)
+			{
+				const btVector3	d=b-a;
+				const btScalar	l=d.length2();
+				if(l>GJK_SIMPLEX2_EPS)
+				{
+					const btScalar	t(l>0?-btDot(a,d)/l:0);
+					if(t>=1)		{ w[0]=0;w[1]=1;m=2;return(b.length2()); }
+					else if(t<=0)	{ w[0]=1;w[1]=0;m=1;return(a.length2()); }
+					else			{ w[0]=1-(w[1]=t);m=3;return((a+d*t).length2()); }
+				}
+				return(-1);
+			}
+			static btScalar		projectorigin(	const btVector3& a,
+				const btVector3& b,
+				const btVector3& c,
+				btScalar* w,U& m)
+			{
+				static const U		imd3[]={1,2,0};
+				const btVector3*	vt[]={&a,&b,&c};
+				const btVector3		dl[]={a-b,b-c,c-a};
+				const btVector3		n=btCross(dl[0],dl[1]);
+				const btScalar		l=n.length2();
+				if(l>GJK_SIMPLEX3_EPS)
+				{
+					btScalar	mindist=-1;
+					btScalar	subw[2]={0.f,0.f};
+					U			subm(0);
+					for(U i=0;i<3;++i)
+					{
+						if(btDot(*vt[i],btCross(dl[i],n))>0)
+						{
+							const U			j=imd3[i];
+							const btScalar	subd(projectorigin(*vt[i],*vt[j],subw,subm));
+							if((mindist<0)||(subd<mindist))
+							{
+								mindist		=	subd;
+								m			=	static_cast<U>(((subm&1)?1<<i:0)+((subm&2)?1<<j:0));
+								w[i]		=	subw[0];
+								w[j]		=	subw[1];
+								w[imd3[j]]	=	0;				
+							}
+						}
+					}
+					if(mindist<0)
+					{
+						const btScalar	d=btDot(a,n);	
+						const btScalar	s=btSqrt(l);
+						const btVector3	p=n*(d/l);
+						mindist	=	p.length2();
+						m		=	7;
+						w[0]	=	(btCross(dl[1],b-p)).length()/s;
+						w[1]	=	(btCross(dl[2],c-p)).length()/s;
+						w[2]	=	1-(w[0]+w[1]);
+					}
+					return(mindist);
+				}
+				return(-1);
+			}
+			static btScalar		projectorigin(	const btVector3& a,
+				const btVector3& b,
+				const btVector3& c,
+				const btVector3& d,
+				btScalar* w,U& m)
+			{
+				static const U		imd3[]={1,2,0};
+				const btVector3*	vt[]={&a,&b,&c,&d};
+				const btVector3		dl[]={a-d,b-d,c-d};
+				const btScalar		vl=det(dl[0],dl[1],dl[2]);
+				const bool			ng=(vl*btDot(a,btCross(b-c,a-b)))<=0;
+				if(ng&&(btFabs(vl)>GJK_SIMPLEX4_EPS))
+				{
+					btScalar	mindist=-1;
+					btScalar	subw[3]={0.f,0.f,0.f};
+					U			subm(0);
+					for(U i=0;i<3;++i)
+					{
+						const U			j=imd3[i];
+						const btScalar	s=vl*btDot(d,btCross(dl[i],dl[j]));
+						if(s>0)
+						{
+							const btScalar	subd=projectorigin(*vt[i],*vt[j],d,subw,subm);
+							if((mindist<0)||(subd<mindist))
+							{
+								mindist		=	subd;
+								m			=	static_cast<U>((subm&1?1<<i:0)+
+									(subm&2?1<<j:0)+
+									(subm&4?8:0));
+								w[i]		=	subw[0];
+								w[j]		=	subw[1];
+								w[imd3[j]]	=	0;
+								w[3]		=	subw[2];
+							}
+						}
+					}
+					if(mindist<0)
+					{
+						mindist	=	0;
+						m		=	15;
+						w[0]	=	det(c,b,d)/vl;
+						w[1]	=	det(a,c,d)/vl;
+						w[2]	=	det(b,a,d)/vl;
+						w[3]	=	1-(w[0]+w[1]+w[2]);
+					}
+					return(mindist);
+				}
+				return(-1);
+			}
+	};
+
+	// EPA
+	struct	EPA
+	{
+		/* Types		*/ 
+		typedef	GJK::sSV	sSV;
+		struct	sFace
+		{
+			btVector3	n;
+			btScalar	d;
+			btScalar	p;
+			sSV*		c[3];
+			sFace*		f[3];
+			sFace*		l[2];
+			U1			e[3];
+			U1			pass;
+		};
+		struct	sList
+		{
+			sFace*		root;
+			U			count;
+			sList() : root(0),count(0)	{}
+		};
+		struct	sHorizon
+		{
+			sFace*		cf;
+			sFace*		ff;
+			U			nf;
+			sHorizon() : cf(0),ff(0),nf(0)	{}
+		};
+		struct	eStatus { enum _ {
+			Valid,
+			Touching,
+			Degenerated,
+			NonConvex,
+			InvalidHull,		
+			OutOfFaces,
+			OutOfVertices,
+			AccuraryReached,
+			FallBack,
+			Failed		};};
+			/* Fields		*/ 
+			eStatus::_		m_status;
+			GJK::sSimplex	m_result;
+			btVector3		m_normal;
+			btScalar		m_depth;
+			sSV				m_sv_store[EPA_MAX_VERTICES];
+			sFace			m_fc_store[EPA_MAX_FACES];
+			U				m_nextsv;
+			sList			m_hull;
+			sList			m_stock;
+			/* Methods		*/ 
+			EPA()
+			{
+				Initialize();	
+			}
+
+
+			static inline void		bind(sFace* fa,U ea,sFace* fb,U eb)
+			{
+				fa->e[ea]=(U1)eb;fa->f[ea]=fb;
+				fb->e[eb]=(U1)ea;fb->f[eb]=fa;
+			}
+			static inline void		append(sList& list,sFace* face)
+			{
+				face->l[0]	=	0;
+				face->l[1]	=	list.root;
+				if(list.root) list.root->l[0]=face;
+				list.root	=	face;
+				++list.count;
+			}
+			static inline void		remove(sList& list,sFace* face)
+			{
+				if(face->l[1]) face->l[1]->l[0]=face->l[0];
+				if(face->l[0]) face->l[0]->l[1]=face->l[1];
+				if(face==list.root) list.root=face->l[1];
+				--list.count;
+			}
+
+
+			void				Initialize()
+			{
+				m_status	=	eStatus::Failed;
+				m_normal	=	btVector3(0,0,0);
+				m_depth		=	0;
+				m_nextsv	=	0;
+				for(U i=0;i<EPA_MAX_FACES;++i)
+				{
+					append(m_stock,&m_fc_store[EPA_MAX_FACES-i-1]);
+				}
+			}
+			eStatus::_			Evaluate(GJK& gjk,const btVector3& guess)
+			{
+				GJK::sSimplex&	simplex=*gjk.m_simplex;
+				if((simplex.rank>1)&&gjk.EncloseOrigin())
+				{
+
+					/* Clean up				*/ 
+					while(m_hull.root)
+					{
+						sFace*	f = m_hull.root;
+						remove(m_hull,f);
+						append(m_stock,f);
+					}
+					m_status	=	eStatus::Valid;
+					m_nextsv	=	0;
+					/* Orient simplex		*/ 
+					if(gjk.det(	simplex.c[0]->w-simplex.c[3]->w,
+						simplex.c[1]->w-simplex.c[3]->w,
+						simplex.c[2]->w-simplex.c[3]->w)<0)
+					{
+						btSwap(simplex.c[0],simplex.c[1]);
+						btSwap(simplex.p[0],simplex.p[1]);
+					}
+					/* Build initial hull	*/ 
+					sFace*	tetra[]={newface(simplex.c[0],simplex.c[1],simplex.c[2],true),
+						newface(simplex.c[1],simplex.c[0],simplex.c[3],true),
+						newface(simplex.c[2],simplex.c[1],simplex.c[3],true),
+						newface(simplex.c[0],simplex.c[2],simplex.c[3],true)};
+					if(m_hull.count==4)
+					{
+						sFace*		best=findbest();
+						sFace		outer=*best;
+						U			pass=0;
+						U			iterations=0;
+						bind(tetra[0],0,tetra[1],0);
+						bind(tetra[0],1,tetra[2],0);
+						bind(tetra[0],2,tetra[3],0);
+						bind(tetra[1],1,tetra[3],2);
+						bind(tetra[1],2,tetra[2],1);
+						bind(tetra[2],2,tetra[3],1);
+						m_status=eStatus::Valid;
+						for(;iterations<EPA_MAX_ITERATIONS;++iterations)
+						{
+							if(m_nextsv<EPA_MAX_VERTICES)
+							{	
+								sHorizon		horizon;
+								sSV*			w=&m_sv_store[m_nextsv++];
+								bool			valid=true;					
+								best->pass	=	(U1)(++pass);
+								gjk.getsupport(best->n,*w);
+								const btScalar	wdist=btDot(best->n,w->w)-best->d;
+								if(wdist>EPA_ACCURACY)
+								{
+									for(U j=0;(j<3)&&valid;++j)
+									{
+										valid&=expand(	pass,w,
+											best->f[j],best->e[j],
+											horizon);
+									}
+									if(valid&&(horizon.nf>=3))
+									{
+										bind(horizon.cf,1,horizon.ff,2);
+										remove(m_hull,best);
+										append(m_stock,best);
+										best=findbest();
+										if(best->p>=outer.p) outer=*best;
+									} else { m_status=eStatus::InvalidHull;break; }
+								} else { m_status=eStatus::AccuraryReached;break; }
+							} else { m_status=eStatus::OutOfVertices;break; }
+						}
+						const btVector3	projection=outer.n*outer.d;
+						m_normal	=	outer.n;
+						m_depth		=	outer.d;
+						m_result.rank	=	3;
+						m_result.c[0]	=	outer.c[0];
+						m_result.c[1]	=	outer.c[1];
+						m_result.c[2]	=	outer.c[2];
+						m_result.p[0]	=	btCross(	outer.c[1]->w-projection,
+							outer.c[2]->w-projection).length();
+						m_result.p[1]	=	btCross(	outer.c[2]->w-projection,
+							outer.c[0]->w-projection).length();
+						m_result.p[2]	=	btCross(	outer.c[0]->w-projection,
+							outer.c[1]->w-projection).length();
+						const btScalar	sum=m_result.p[0]+m_result.p[1]+m_result.p[2];
+						m_result.p[0]	/=	sum;
+						m_result.p[1]	/=	sum;
+						m_result.p[2]	/=	sum;
+						return(m_status);
+					}
+				}
+				/* Fallback		*/ 
+				m_status	=	eStatus::FallBack;
+				m_normal	=	-guess;
+				const btScalar	nl=m_normal.length();
+				if(nl>0)
+					m_normal	=	m_normal/nl;
+				else
+					m_normal	=	btVector3(1,0,0);
+				m_depth	=	0;
+				m_result.rank=1;
+				m_result.c[0]=simplex.c[0];
+				m_result.p[0]=1;	
+				return(m_status);
+			}
+			sFace*				newface(sSV* a,sSV* b,sSV* c,bool forced)
+			{
+				if(m_stock.root)
+				{
+					sFace*	face=m_stock.root;
+					remove(m_stock,face);
+					append(m_hull,face);
+					face->pass	=	0;
+					face->c[0]	=	a;
+					face->c[1]	=	b;
+					face->c[2]	=	c;
+					face->n		=	btCross(b->w-a->w,c->w-a->w);
+					const btScalar	l=face->n.length();
+					const bool		v=l>EPA_ACCURACY;
+					face->p		=	btMin(btMin(
+						btDot(a->w,btCross(face->n,a->w-b->w)),
+						btDot(b->w,btCross(face->n,b->w-c->w))),
+						btDot(c->w,btCross(face->n,c->w-a->w)))	/
+						(v?l:1);
+					face->p		=	face->p>=-EPA_INSIDE_EPS?0:face->p;
+					if(v)
+					{
+						face->d		=	btDot(a->w,face->n)/l;
+						face->n		/=	l;
+						if(forced||(face->d>=-EPA_PLANE_EPS))
+						{
+							return(face);
+						} else m_status=eStatus::NonConvex;
+					} else m_status=eStatus::Degenerated;
+					remove(m_hull,face);
+					append(m_stock,face);
+					return(0);
+				}
+				m_status=m_stock.root?eStatus::OutOfVertices:eStatus::OutOfFaces;
+				return(0);
+			}
+			sFace*				findbest()
+			{
+				sFace*		minf=m_hull.root;
+				btScalar	mind=minf->d*minf->d;
+				btScalar	maxp=minf->p;
+				for(sFace* f=minf->l[1];f;f=f->l[1])
+				{
+					const btScalar	sqd=f->d*f->d;
+					if((f->p>=maxp)&&(sqd<mind))
+					{
+						minf=f;
+						mind=sqd;
+						maxp=f->p;
+					}
+				}
+				return(minf);
+			}
+			bool				expand(U pass,sSV* w,sFace* f,U e,sHorizon& horizon)
+			{
+				static const U	i1m3[]={1,2,0};
+				static const U	i2m3[]={2,0,1};
+				if(f->pass!=pass)
+				{
+					const U	e1=i1m3[e];
+					if((btDot(f->n,w->w)-f->d)<-EPA_PLANE_EPS)
+					{
+						sFace*	nf=newface(f->c[e1],f->c[e],w,false);
+						if(nf)
+						{
+							bind(nf,0,f,e);
+							if(horizon.cf) bind(horizon.cf,1,nf,2); else horizon.ff=nf;
+							horizon.cf=nf;
+							++horizon.nf;
+							return(true);
+						}
+					}
+					else
+					{
+						const U	e2=i2m3[e];
+						f->pass		=	(U1)pass;
+						if(	expand(pass,w,f->f[e1],f->e[e1],horizon)&&
+							expand(pass,w,f->f[e2],f->e[e2],horizon))
+						{
+							remove(m_hull,f);
+							append(m_stock,f);
+							return(true);
+						}
+					}
+				}
+				return(false);
+			}
+
+	};
+
+	//
+	static void	Initialize(	const btConvexShape* shape0,const btTransform& wtrs0,
+		const btConvexShape* shape1,const btTransform& wtrs1,
+		btGjkEpaSolver2::sResults& results,
+		tShape& shape,
+		bool withmargins)
+	{
+		/* Results		*/ 
+		results.witnesses[0]	=
+			results.witnesses[1]	=	btVector3(0,0,0);
+		results.status			=	btGjkEpaSolver2::sResults::Separated;
+		/* Shape		*/ 
+		shape.m_shapes[0]		=	shape0;
+		shape.m_shapes[1]		=	shape1;
+		shape.m_toshape1		=	wtrs1.getBasis().transposeTimes(wtrs0.getBasis());
+		shape.m_toshape0		=	wtrs0.inverseTimes(wtrs1);
+		shape.EnableMargin(withmargins);
+	}
+
+}
+
+//
+// Api
+//
+
+using namespace	gjkepa2_impl;
+
+//
+int			btGjkEpaSolver2::StackSizeRequirement()
+{
+	return(sizeof(GJK)+sizeof(EPA));
+}
+
+//
+bool		btGjkEpaSolver2::Distance(	const btConvexShape*	shape0,
+									  const btTransform&		wtrs0,
+									  const btConvexShape*	shape1,
+									  const btTransform&		wtrs1,
+									  const btVector3&		guess,
+									  sResults&				results)
+{
+	tShape			shape;
+	Initialize(shape0,wtrs0,shape1,wtrs1,results,shape,false);
+	GJK				gjk;
+	GJK::eStatus::_	gjk_status=gjk.Evaluate(shape,guess);
+	if(gjk_status==GJK::eStatus::Valid)
+	{
+		btVector3	w0=btVector3(0,0,0);
+		btVector3	w1=btVector3(0,0,0);
+		for(U i=0;i<gjk.m_simplex->rank;++i)
+		{
+			const btScalar	p=gjk.m_simplex->p[i];
+			w0+=shape.Support( gjk.m_simplex->c[i]->d,0)*p;
+			w1+=shape.Support(-gjk.m_simplex->c[i]->d,1)*p;
+		}
+		results.witnesses[0]	=	wtrs0*w0;
+		results.witnesses[1]	=	wtrs0*w1;
+		results.normal			=	w0-w1;
+		results.distance		=	results.normal.length();
+		results.normal			/=	results.distance>GJK_MIN_DISTANCE?results.distance:1;
+		return(true);
+	}
+	else
+	{
+		results.status	=	gjk_status==GJK::eStatus::Inside?
+			sResults::Penetrating	:
+		sResults::GJK_Failed	;
+		return(false);
+	}
+}
+
+//
+bool	btGjkEpaSolver2::Penetration(	const btConvexShape*	shape0,
+									 const btTransform&		wtrs0,
+									 const btConvexShape*	shape1,
+									 const btTransform&		wtrs1,
+									 const btVector3&		guess,
+									 sResults&				results,
+									 bool					usemargins)
+{
+	tShape			shape;
+	Initialize(shape0,wtrs0,shape1,wtrs1,results,shape,usemargins);
+	GJK				gjk;	
+	GJK::eStatus::_	gjk_status=gjk.Evaluate(shape,-guess);
+	switch(gjk_status)
+	{
+	case	GJK::eStatus::Inside:
+		{
+			EPA				epa;
+			EPA::eStatus::_	epa_status=epa.Evaluate(gjk,-guess);
+			if(epa_status!=EPA::eStatus::Failed)
+			{
+				btVector3	w0=btVector3(0,0,0);
+				for(U i=0;i<epa.m_result.rank;++i)
+				{
+					w0+=shape.Support(epa.m_result.c[i]->d,0)*epa.m_result.p[i];
+				}
+				results.status			=	sResults::Penetrating;
+				results.witnesses[0]	=	wtrs0*w0;
+				results.witnesses[1]	=	wtrs0*(w0-epa.m_normal*epa.m_depth);
+				results.normal			=	-epa.m_normal;
+				results.distance		=	-epa.m_depth;
+				return(true);
+			} else results.status=sResults::EPA_Failed;
+		}
+		break;
+	case	GJK::eStatus::Failed:
+		results.status=sResults::GJK_Failed;
+		break;
+		default:
+					{
+					}
+	}
+	return(false);
+}
+
+#ifndef __SPU__
+//
+btScalar	btGjkEpaSolver2::SignedDistance(const btVector3& position,
+											btScalar margin,
+											const btConvexShape* shape0,
+											const btTransform& wtrs0,
+											sResults& results)
+{
+	tShape			shape;
+	btSphereShape	shape1(margin);
+	btTransform		wtrs1(btQuaternion(0,0,0,1),position);
+	Initialize(shape0,wtrs0,&shape1,wtrs1,results,shape,false);
+	GJK				gjk;	
+	GJK::eStatus::_	gjk_status=gjk.Evaluate(shape,btVector3(1,1,1));
+	if(gjk_status==GJK::eStatus::Valid)
+	{
+		btVector3	w0=btVector3(0,0,0);
+		btVector3	w1=btVector3(0,0,0);
+		for(U i=0;i<gjk.m_simplex->rank;++i)
+		{
+			const btScalar	p=gjk.m_simplex->p[i];
+			w0+=shape.Support( gjk.m_simplex->c[i]->d,0)*p;
+			w1+=shape.Support(-gjk.m_simplex->c[i]->d,1)*p;
+		}
+		results.witnesses[0]	=	wtrs0*w0;
+		results.witnesses[1]	=	wtrs0*w1;
+		const btVector3	delta=	results.witnesses[1]-
+			results.witnesses[0];
+		const btScalar	margin=	shape0->getMarginNonVirtual()+
+			shape1.getMarginNonVirtual();
+		const btScalar	length=	delta.length();	
+		results.normal			=	delta/length;
+		results.witnesses[0]	+=	results.normal*margin;
+		return(length-margin);
+	}
+	else
+	{
+		if(gjk_status==GJK::eStatus::Inside)
+		{
+			if(Penetration(shape0,wtrs0,&shape1,wtrs1,gjk.m_ray,results))
+			{
+				const btVector3	delta=	results.witnesses[0]-
+					results.witnesses[1];
+				const btScalar	length=	delta.length();
+				if (length >= SIMD_EPSILON)
+					results.normal	=	delta/length;			
+				return(-length);
+			}
+		}	
+	}
+	return(SIMD_INFINITY);
+}
+
+//
+bool	btGjkEpaSolver2::SignedDistance(const btConvexShape*	shape0,
+										const btTransform&		wtrs0,
+										const btConvexShape*	shape1,
+										const btTransform&		wtrs1,
+										const btVector3&		guess,
+										sResults&				results)
+{
+	if(!Distance(shape0,wtrs0,shape1,wtrs1,guess,results))
+		return(Penetration(shape0,wtrs0,shape1,wtrs1,guess,results,false));
+	else
+		return(true);
+}
+#endif //__SPU__
+
+/* Symbols cleanup		*/ 
+
+#undef GJK_MAX_ITERATIONS
+#undef GJK_ACCURARY
+#undef GJK_MIN_DISTANCE
+#undef GJK_DUPLICATED_EPS
+#undef GJK_SIMPLEX2_EPS
+#undef GJK_SIMPLEX3_EPS
+#undef GJK_SIMPLEX4_EPS
+
+#undef EPA_MAX_VERTICES
+#undef EPA_MAX_FACES
+#undef EPA_MAX_ITERATIONS
+#undef EPA_ACCURACY
+#undef EPA_FALLBACK
+#undef EPA_PLANE_EPS
+#undef EPA_INSIDE_EPS
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.h
new file mode 100644
index 00000000..ac501d5e
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.h
@@ -0,0 +1,75 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2008 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the
+use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+claim that you wrote the original software. If you use this software in a
+product, an acknowledgment in the product documentation would be appreciated
+but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/*
+GJK-EPA collision solver by Nathanael Presson, 2008
+*/
+#ifndef BT_GJK_EPA2_H
+#define BT_GJK_EPA2_H
+
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+
+///btGjkEpaSolver contributed under zlib by Nathanael Presson
+struct	btGjkEpaSolver2
+{
+struct	sResults
+	{
+	enum eStatus
+		{
+		Separated,		/* Shapes doesnt penetrate												*/ 
+		Penetrating,	/* Shapes are penetrating												*/ 
+		GJK_Failed,		/* GJK phase fail, no big issue, shapes are probably just 'touching'	*/ 
+		EPA_Failed		/* EPA phase fail, bigger problem, need to save parameters, and debug	*/ 
+		}		status;
+	btVector3	witnesses[2];
+	btVector3	normal;
+	btScalar	distance;
+	};
+
+static int		StackSizeRequirement();
+
+static bool		Distance(	const btConvexShape* shape0,const btTransform& wtrs0,
+							const btConvexShape* shape1,const btTransform& wtrs1,
+							const btVector3& guess,
+							sResults& results);
+
+static bool		Penetration(const btConvexShape* shape0,const btTransform& wtrs0,
+							const btConvexShape* shape1,const btTransform& wtrs1,
+							const btVector3& guess,
+							sResults& results,
+							bool usemargins=true);
+#ifndef __SPU__
+static btScalar	SignedDistance(	const btVector3& position,
+								btScalar margin,
+								const btConvexShape* shape,
+								const btTransform& wtrs,
+								sResults& results);
+							
+static bool		SignedDistance(	const btConvexShape* shape0,const btTransform& wtrs0,
+								const btConvexShape* shape1,const btTransform& wtrs1,
+								const btVector3& guess,
+								sResults& results);
+#endif //__SPU__
+
+};
+
+#endif //BT_GJK_EPA2_H
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.cpp
new file mode 100644
index 00000000..c6dc3f3a
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.cpp
@@ -0,0 +1,66 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+EPA Copyright (c) Ricardo Padrela 2006
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "btGjkEpaPenetrationDepthSolver.h"
+
+
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpa2.h"
+
+bool btGjkEpaPenetrationDepthSolver::calcPenDepth( btSimplexSolverInterface& simplexSolver,
+											  const btConvexShape* pConvexA, const btConvexShape* pConvexB,
+											  const btTransform& transformA, const btTransform& transformB,
+											  btVector3& v, btVector3& wWitnessOnA, btVector3& wWitnessOnB,
+											  class btIDebugDraw* debugDraw, btStackAlloc* stackAlloc )
+{
+
+	(void)debugDraw;
+	(void)v;
+	(void)simplexSolver;
+
+//	const btScalar				radialmargin(btScalar(0.));
+	
+	btVector3	guessVector(transformA.getOrigin()-transformB.getOrigin());
+	btGjkEpaSolver2::sResults	results;
+	
+
+	if(btGjkEpaSolver2::Penetration(pConvexA,transformA,
+								pConvexB,transformB,
+								guessVector,results))
+	
+		{
+	//	debugDraw->drawLine(results.witnesses[1],results.witnesses[1]+results.normal,btVector3(255,0,0));
+		//resultOut->addContactPoint(results.normal,results.witnesses[1],-results.depth);
+		wWitnessOnA = results.witnesses[0];
+		wWitnessOnB = results.witnesses[1];
+		v = results.normal;
+		return true;		
+		} else
+	{
+		if(btGjkEpaSolver2::Distance(pConvexA,transformA,pConvexB,transformB,guessVector,results))
+		{
+			wWitnessOnA = results.witnesses[0];
+			wWitnessOnB = results.witnesses[1];
+			v = results.normal;
+			return false;
+		}
+	}
+
+	return false;
+}
+
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h
new file mode 100644
index 00000000..a49689a1
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h
@@ -0,0 +1,43 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+EPA Copyright (c) Ricardo Padrela 2006 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef BT_GJP_EPA_PENETRATION_DEPTH_H
+#define BT_GJP_EPA_PENETRATION_DEPTH_H
+
+#include "btConvexPenetrationDepthSolver.h"
+
+///EpaPenetrationDepthSolver uses the Expanding Polytope Algorithm to
+///calculate the penetration depth between two convex shapes.
+class btGjkEpaPenetrationDepthSolver : public btConvexPenetrationDepthSolver
+{
+	public :
+
+		btGjkEpaPenetrationDepthSolver()
+		{
+		}
+
+		bool			calcPenDepth( btSimplexSolverInterface& simplexSolver,
+									  const btConvexShape* pConvexA, const btConvexShape* pConvexB,
+									  const btTransform& transformA, const btTransform& transformB,
+									  btVector3& v, btVector3& wWitnessOnA, btVector3& wWitnessOnB,
+									  class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc );
+
+	private :
+
+};
+
+#endif	// BT_GJP_EPA_PENETRATION_DEPTH_H
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp
new file mode 100644
index 00000000..8af16b9c
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp
@@ -0,0 +1,457 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btGjkPairDetector.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h"
+#include "BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h"
+
+
+
+#if defined(DEBUG) || defined (_DEBUG)
+//#define TEST_NON_VIRTUAL 1
+#include <stdio.h> //for debug printf
+#ifdef __SPU__
+#include <spu_printf.h>
+#define printf spu_printf
+//#define DEBUG_SPU_COLLISION_DETECTION 1
+#endif //__SPU__
+#endif
+
+//must be above the machine epsilon
+#define REL_ERROR2 btScalar(1.0e-6)
+
+//temp globals, to improve GJK/EPA/penetration calculations
+int gNumDeepPenetrationChecks = 0;
+int gNumGjkChecks = 0;
+
+
+btGjkPairDetector::btGjkPairDetector(const btConvexShape* objectA,const btConvexShape* objectB,btSimplexSolverInterface* simplexSolver,btConvexPenetrationDepthSolver*	penetrationDepthSolver)
+:m_cachedSeparatingAxis(btScalar(0.),btScalar(1.),btScalar(0.)),
+m_penetrationDepthSolver(penetrationDepthSolver),
+m_simplexSolver(simplexSolver),
+m_minkowskiA(objectA),
+m_minkowskiB(objectB),
+m_shapeTypeA(objectA->getShapeType()),
+m_shapeTypeB(objectB->getShapeType()),
+m_marginA(objectA->getMargin()),
+m_marginB(objectB->getMargin()),
+m_ignoreMargin(false),
+m_lastUsedMethod(-1),
+m_catchDegeneracies(1)
+{
+}
+btGjkPairDetector::btGjkPairDetector(const btConvexShape* objectA,const btConvexShape* objectB,int shapeTypeA,int shapeTypeB,btScalar marginA, btScalar marginB, btSimplexSolverInterface* simplexSolver,btConvexPenetrationDepthSolver*	penetrationDepthSolver)
+:m_cachedSeparatingAxis(btScalar(0.),btScalar(1.),btScalar(0.)),
+m_penetrationDepthSolver(penetrationDepthSolver),
+m_simplexSolver(simplexSolver),
+m_minkowskiA(objectA),
+m_minkowskiB(objectB),
+m_shapeTypeA(shapeTypeA),
+m_shapeTypeB(shapeTypeB),
+m_marginA(marginA),
+m_marginB(marginB),
+m_ignoreMargin(false),
+m_lastUsedMethod(-1),
+m_catchDegeneracies(1)
+{
+}
+
+void	btGjkPairDetector::getClosestPoints(const ClosestPointInput& input,Result& output,class btIDebugDraw* debugDraw,bool swapResults)
+{
+	(void)swapResults;
+
+	getClosestPointsNonVirtual(input,output,debugDraw);
+}
+
+#ifdef __SPU__
+void btGjkPairDetector::getClosestPointsNonVirtual(const ClosestPointInput& input,Result& output,class btIDebugDraw* debugDraw)
+#else
+void btGjkPairDetector::getClosestPointsNonVirtual(const ClosestPointInput& input,Result& output,class btIDebugDraw* debugDraw)
+#endif
+{
+	m_cachedSeparatingDistance = 0.f;
+
+	btScalar distance=btScalar(0.);
+	btVector3	normalInB(btScalar(0.),btScalar(0.),btScalar(0.));
+	btVector3 pointOnA,pointOnB;
+	btTransform	localTransA = input.m_transformA;
+	btTransform localTransB = input.m_transformB;
+	btVector3 positionOffset = (localTransA.getOrigin() + localTransB.getOrigin()) * btScalar(0.5);
+	localTransA.getOrigin() -= positionOffset;
+	localTransB.getOrigin() -= positionOffset;
+
+	bool check2d = m_minkowskiA->isConvex2d() && m_minkowskiB->isConvex2d();
+
+	btScalar marginA = m_marginA;
+	btScalar marginB = m_marginB;
+
+	gNumGjkChecks++;
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+	spu_printf("inside gjk\n");
+#endif
+	//for CCD we don't use margins
+	if (m_ignoreMargin)
+	{
+		marginA = btScalar(0.);
+		marginB = btScalar(0.);
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+		spu_printf("ignoring margin\n");
+#endif
+	}
+
+	m_curIter = 0;
+	int gGjkMaxIter = 1000;//this is to catch invalid input, perhaps check for #NaN?
+	m_cachedSeparatingAxis.setValue(0,1,0);
+
+	bool isValid = false;
+	bool checkSimplex = false;
+	bool checkPenetration = true;
+	m_degenerateSimplex = 0;
+
+	m_lastUsedMethod = -1;
+
+	{
+		btScalar squaredDistance = BT_LARGE_FLOAT;
+		btScalar delta = btScalar(0.);
+		
+		btScalar margin = marginA + marginB;
+		
+		
+
+		m_simplexSolver->reset();
+		
+		for ( ; ; )
+		//while (true)
+		{
+
+			btVector3 seperatingAxisInA = (-m_cachedSeparatingAxis)* input.m_transformA.getBasis();
+			btVector3 seperatingAxisInB = m_cachedSeparatingAxis* input.m_transformB.getBasis();
+
+#if 1
+
+			btVector3 pInA = m_minkowskiA->localGetSupportVertexWithoutMarginNonVirtual(seperatingAxisInA);
+			btVector3 qInB = m_minkowskiB->localGetSupportVertexWithoutMarginNonVirtual(seperatingAxisInB);
+
+//			btVector3 pInA  = localGetSupportingVertexWithoutMargin(m_shapeTypeA, m_minkowskiA, seperatingAxisInA,input.m_convexVertexData[0]);//, &featureIndexA);
+//			btVector3 qInB  = localGetSupportingVertexWithoutMargin(m_shapeTypeB, m_minkowskiB, seperatingAxisInB,input.m_convexVertexData[1]);//, &featureIndexB);
+
+#else
+#ifdef __SPU__
+			btVector3 pInA = m_minkowskiA->localGetSupportVertexWithoutMarginNonVirtual(seperatingAxisInA);
+			btVector3 qInB = m_minkowskiB->localGetSupportVertexWithoutMarginNonVirtual(seperatingAxisInB);
+#else
+			btVector3 pInA = m_minkowskiA->localGetSupportingVertexWithoutMargin(seperatingAxisInA);
+			btVector3 qInB = m_minkowskiB->localGetSupportingVertexWithoutMargin(seperatingAxisInB);
+#ifdef TEST_NON_VIRTUAL
+			btVector3 pInAv = m_minkowskiA->localGetSupportingVertexWithoutMargin(seperatingAxisInA);
+			btVector3 qInBv = m_minkowskiB->localGetSupportingVertexWithoutMargin(seperatingAxisInB);
+			btAssert((pInAv-pInA).length() < 0.0001);
+			btAssert((qInBv-qInB).length() < 0.0001);
+#endif //
+#endif //__SPU__
+#endif
+
+
+			btVector3  pWorld = localTransA(pInA);	
+			btVector3  qWorld = localTransB(qInB);
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+		spu_printf("got local supporting vertices\n");
+#endif
+
+			if (check2d)
+			{
+				pWorld[2] = 0.f;
+				qWorld[2] = 0.f;
+			}
+
+			btVector3 w	= pWorld - qWorld;
+			delta = m_cachedSeparatingAxis.dot(w);
+
+			// potential exit, they don't overlap
+			if ((delta > btScalar(0.0)) && (delta * delta > squaredDistance * input.m_maximumDistanceSquared)) 
+			{
+				m_degenerateSimplex = 10;
+				checkSimplex=true;
+				//checkPenetration = false;
+				break;
+			}
+
+			//exit 0: the new point is already in the simplex, or we didn't come any closer
+			if (m_simplexSolver->inSimplex(w))
+			{
+				m_degenerateSimplex = 1;
+				checkSimplex = true;
+				break;
+			}
+			// are we getting any closer ?
+			btScalar f0 = squaredDistance - delta;
+			btScalar f1 = squaredDistance * REL_ERROR2;
+
+			if (f0 <= f1)
+			{
+				if (f0 <= btScalar(0.))
+				{
+					m_degenerateSimplex = 2;
+				} else
+				{
+					m_degenerateSimplex = 11;
+				}
+				checkSimplex = true;
+				break;
+			}
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+		spu_printf("addVertex 1\n");
+#endif
+			//add current vertex to simplex
+			m_simplexSolver->addVertex(w, pWorld, qWorld);
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+		spu_printf("addVertex 2\n");
+#endif
+			btVector3 newCachedSeparatingAxis;
+
+			//calculate the closest point to the origin (update vector v)
+			if (!m_simplexSolver->closest(newCachedSeparatingAxis))
+			{
+				m_degenerateSimplex = 3;
+				checkSimplex = true;
+				break;
+			}
+
+			if(newCachedSeparatingAxis.length2()<REL_ERROR2)
+            {
+				m_cachedSeparatingAxis = newCachedSeparatingAxis;
+                m_degenerateSimplex = 6;
+                checkSimplex = true;
+                break;
+            }
+
+			btScalar previousSquaredDistance = squaredDistance;
+			squaredDistance = newCachedSeparatingAxis.length2();
+#if 0
+///warning: this termination condition leads to some problems in 2d test case see Bullet/Demos/Box2dDemo
+			if (squaredDistance>previousSquaredDistance)
+			{
+				m_degenerateSimplex = 7;
+				squaredDistance = previousSquaredDistance;
+                checkSimplex = false;
+                break;
+			}
+#endif //
+			
+
+			//redundant m_simplexSolver->compute_points(pointOnA, pointOnB);
+
+			//are we getting any closer ?
+			if (previousSquaredDistance - squaredDistance <= SIMD_EPSILON * previousSquaredDistance) 
+			{ 
+//				m_simplexSolver->backup_closest(m_cachedSeparatingAxis);
+				checkSimplex = true;
+				m_degenerateSimplex = 12;
+				
+				break;
+			}
+
+			m_cachedSeparatingAxis = newCachedSeparatingAxis;
+
+			  //degeneracy, this is typically due to invalid/uninitialized worldtransforms for a btCollisionObject   
+              if (m_curIter++ > gGjkMaxIter)   
+              {   
+                      #if defined(DEBUG) || defined (_DEBUG) || defined (DEBUG_SPU_COLLISION_DETECTION)
+
+                              printf("btGjkPairDetector maxIter exceeded:%i\n",m_curIter);   
+                              printf("sepAxis=(%f,%f,%f), squaredDistance = %f, shapeTypeA=%i,shapeTypeB=%i\n",   
+                              m_cachedSeparatingAxis.getX(),   
+                              m_cachedSeparatingAxis.getY(),   
+                              m_cachedSeparatingAxis.getZ(),   
+                              squaredDistance,   
+                              m_minkowskiA->getShapeType(),   
+                              m_minkowskiB->getShapeType());   
+
+                      #endif   
+                      break;   
+
+              } 
+
+
+			bool check = (!m_simplexSolver->fullSimplex());
+			//bool check = (!m_simplexSolver->fullSimplex() && squaredDistance > SIMD_EPSILON * m_simplexSolver->maxVertex());
+
+			if (!check)
+			{
+				//do we need this backup_closest here ?
+//				m_simplexSolver->backup_closest(m_cachedSeparatingAxis);
+				m_degenerateSimplex = 13;
+				break;
+			}
+		}
+
+		if (checkSimplex)
+		{
+			m_simplexSolver->compute_points(pointOnA, pointOnB);
+			normalInB = m_cachedSeparatingAxis;
+			btScalar lenSqr =m_cachedSeparatingAxis.length2();
+			
+			//valid normal
+			if (lenSqr < 0.0001)
+			{
+				m_degenerateSimplex = 5;
+			} 
+			if (lenSqr > SIMD_EPSILON*SIMD_EPSILON)
+			{
+				btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
+				normalInB *= rlen; //normalize
+				btScalar s = btSqrt(squaredDistance);
+			
+				btAssert(s > btScalar(0.0));
+				pointOnA -= m_cachedSeparatingAxis * (marginA / s);
+				pointOnB += m_cachedSeparatingAxis * (marginB / s);
+				distance = ((btScalar(1.)/rlen) - margin);
+				isValid = true;
+				
+				m_lastUsedMethod = 1;
+			} else
+			{
+				m_lastUsedMethod = 2;
+			}
+		}
+
+		bool catchDegeneratePenetrationCase = 
+			(m_catchDegeneracies && m_penetrationDepthSolver && m_degenerateSimplex && ((distance+margin) < 0.01));
+
+		//if (checkPenetration && !isValid)
+		if (checkPenetration && (!isValid || catchDegeneratePenetrationCase ))
+		{
+			//penetration case
+
+			//if there is no way to handle penetrations, bail out
+			if (m_penetrationDepthSolver)
+			{
+				// Penetration depth case.
+				btVector3 tmpPointOnA,tmpPointOnB;
+				
+				gNumDeepPenetrationChecks++;
+				m_cachedSeparatingAxis.setZero();
+
+				bool isValid2 = m_penetrationDepthSolver->calcPenDepth( 
+					*m_simplexSolver, 
+					m_minkowskiA,m_minkowskiB,
+					localTransA,localTransB,
+					m_cachedSeparatingAxis, tmpPointOnA, tmpPointOnB,
+					debugDraw,input.m_stackAlloc
+					);
+
+
+				if (isValid2)
+				{
+					btVector3 tmpNormalInB = tmpPointOnB-tmpPointOnA;
+					btScalar lenSqr = tmpNormalInB.length2();
+					if (lenSqr <= (SIMD_EPSILON*SIMD_EPSILON))
+					{
+						tmpNormalInB = m_cachedSeparatingAxis;
+						lenSqr = m_cachedSeparatingAxis.length2();
+					}
+
+					if (lenSqr > (SIMD_EPSILON*SIMD_EPSILON))
+					{
+						tmpNormalInB /= btSqrt(lenSqr);
+						btScalar distance2 = -(tmpPointOnA-tmpPointOnB).length();
+						//only replace valid penetrations when the result is deeper (check)
+						if (!isValid || (distance2 < distance))
+						{
+							distance = distance2;
+							pointOnA = tmpPointOnA;
+							pointOnB = tmpPointOnB;
+							normalInB = tmpNormalInB;
+							isValid = true;
+							m_lastUsedMethod = 3;
+						} else
+						{
+							m_lastUsedMethod = 8;
+						}
+					} else
+					{
+						m_lastUsedMethod = 9;
+					}
+				} else
+
+				{
+					///this is another degenerate case, where the initial GJK calculation reports a degenerate case
+					///EPA reports no penetration, and the second GJK (using the supporting vector without margin)
+					///reports a valid positive distance. Use the results of the second GJK instead of failing.
+					///thanks to Jacob.Langford for the reproduction case
+					///http://code.google.com/p/bullet/issues/detail?id=250
+
+				
+					if (m_cachedSeparatingAxis.length2() > btScalar(0.))
+					{
+						btScalar distance2 = (tmpPointOnA-tmpPointOnB).length()-margin;
+						//only replace valid distances when the distance is less
+						if (!isValid || (distance2 < distance))
+						{
+							distance = distance2;
+							pointOnA = tmpPointOnA;
+							pointOnB = tmpPointOnB;
+							pointOnA -= m_cachedSeparatingAxis * marginA ;
+							pointOnB += m_cachedSeparatingAxis * marginB ;
+							normalInB = m_cachedSeparatingAxis;
+							normalInB.normalize();
+							isValid = true;
+							m_lastUsedMethod = 6;
+						} else
+						{
+							m_lastUsedMethod = 5;
+						}
+					}
+				}
+				
+			}
+
+		}
+	}
+
+	
+
+	if (isValid && ((distance < 0) || (distance*distance < input.m_maximumDistanceSquared)))
+	{
+#if 0
+///some debugging
+//		if (check2d)
+		{
+			printf("n = %2.3f,%2.3f,%2.3f. ",normalInB[0],normalInB[1],normalInB[2]);
+			printf("distance = %2.3f exit=%d deg=%d\n",distance,m_lastUsedMethod,m_degenerateSimplex);
+		}
+#endif 
+
+		m_cachedSeparatingAxis = normalInB;
+		m_cachedSeparatingDistance = distance;
+
+		output.addContactPoint(
+			normalInB,
+			pointOnB+positionOffset,
+			distance);
+
+	}
+
+
+}
+
+
+
+
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h
new file mode 100644
index 00000000..2277a19d
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h
@@ -0,0 +1,103 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+
+#ifndef BT_GJK_PAIR_DETECTOR_H
+#define BT_GJK_PAIR_DETECTOR_H
+
+#include "btDiscreteCollisionDetectorInterface.h"
+#include "BulletCollision/CollisionShapes/btCollisionMargin.h"
+
+class btConvexShape;
+#include "btSimplexSolverInterface.h"
+class btConvexPenetrationDepthSolver;
+
+/// btGjkPairDetector uses GJK to implement the btDiscreteCollisionDetectorInterface
+class btGjkPairDetector : public btDiscreteCollisionDetectorInterface
+{
+	
+
+	btVector3	m_cachedSeparatingAxis;
+	btConvexPenetrationDepthSolver*	m_penetrationDepthSolver;
+	btSimplexSolverInterface* m_simplexSolver;
+	const btConvexShape* m_minkowskiA;
+	const btConvexShape* m_minkowskiB;
+	int	m_shapeTypeA;
+	int m_shapeTypeB;
+	btScalar	m_marginA;
+	btScalar	m_marginB;
+
+	bool		m_ignoreMargin;
+	btScalar	m_cachedSeparatingDistance;
+	
+
+public:
+
+	//some debugging to fix degeneracy problems
+	int			m_lastUsedMethod;
+	int			m_curIter;
+	int			m_degenerateSimplex;
+	int			m_catchDegeneracies;
+
+
+	btGjkPairDetector(const btConvexShape* objectA,const btConvexShape* objectB,btSimplexSolverInterface* simplexSolver,btConvexPenetrationDepthSolver*	penetrationDepthSolver);
+	btGjkPairDetector(const btConvexShape* objectA,const btConvexShape* objectB,int shapeTypeA,int shapeTypeB,btScalar marginA, btScalar marginB, btSimplexSolverInterface* simplexSolver,btConvexPenetrationDepthSolver*	penetrationDepthSolver);
+	virtual ~btGjkPairDetector() {};
+
+	virtual void	getClosestPoints(const ClosestPointInput& input,Result& output,class btIDebugDraw* debugDraw,bool swapResults=false);
+
+	void	getClosestPointsNonVirtual(const ClosestPointInput& input,Result& output,class btIDebugDraw* debugDraw);
+	
+
+	void setMinkowskiA(btConvexShape* minkA)
+	{
+		m_minkowskiA = minkA;
+	}
+
+	void setMinkowskiB(btConvexShape* minkB)
+	{
+		m_minkowskiB = minkB;
+	}
+	void setCachedSeperatingAxis(const btVector3& seperatingAxis)
+	{
+		m_cachedSeparatingAxis = seperatingAxis;
+	}
+
+	const btVector3& getCachedSeparatingAxis() const
+	{
+		return m_cachedSeparatingAxis;
+	}
+	btScalar	getCachedSeparatingDistance() const
+	{
+		return m_cachedSeparatingDistance;
+	}
+
+	void	setPenetrationDepthSolver(btConvexPenetrationDepthSolver*	penetrationDepthSolver)
+	{
+		m_penetrationDepthSolver = penetrationDepthSolver;
+	}
+
+	///don't use setIgnoreMargin, it's for Bullet's internal use
+	void	setIgnoreMargin(bool ignoreMargin)
+	{
+		m_ignoreMargin = ignoreMargin;
+	}
+
+
+};
+
+#endif //BT_GJK_PAIR_DETECTOR_H
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btManifoldPoint.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btManifoldPoint.h
new file mode 100644
index 00000000..0ce9dd25
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btManifoldPoint.h
@@ -0,0 +1,158 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_MANIFOLD_CONTACT_POINT_H
+#define BT_MANIFOLD_CONTACT_POINT_H
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btTransformUtil.h"
+
+#ifdef PFX_USE_FREE_VECTORMATH
+	#include "physics_effects/base_level/solver/pfx_constraint_row.h"
+typedef sce::PhysicsEffects::PfxConstraintRow btConstraintRow;
+#else
+	// Don't change following order of parameters
+	ATTRIBUTE_ALIGNED16(struct) btConstraintRow {
+		btScalar m_normal[3];
+		btScalar m_rhs;
+		btScalar m_jacDiagInv;
+		btScalar m_lowerLimit;
+		btScalar m_upperLimit;
+		btScalar m_accumImpulse;
+	};
+	typedef btConstraintRow PfxConstraintRow;
+#endif //PFX_USE_FREE_VECTORMATH
+
+
+
+/// ManifoldContactPoint collects and maintains persistent contactpoints.
+/// used to improve stability and performance of rigidbody dynamics response.
+class btManifoldPoint
+	{
+		public:
+			btManifoldPoint()
+				:m_userPersistentData(0),
+				m_appliedImpulse(0.f),
+				m_lateralFrictionInitialized(false),
+				m_appliedImpulseLateral1(0.f),
+				m_appliedImpulseLateral2(0.f),
+				m_contactMotion1(0.f),
+				m_contactMotion2(0.f),
+				m_contactCFM1(0.f),
+				m_contactCFM2(0.f),
+				m_lifeTime(0)
+			{
+			}
+
+			btManifoldPoint( const btVector3 &pointA, const btVector3 &pointB, 
+					const btVector3 &normal, 
+					btScalar distance ) :
+					m_localPointA( pointA ), 
+					m_localPointB( pointB ), 
+					m_normalWorldOnB( normal ), 
+					m_distance1( distance ),
+					m_combinedFriction(btScalar(0.)),
+					m_combinedRestitution(btScalar(0.)),
+					m_userPersistentData(0),
+					m_appliedImpulse(0.f),
+					m_lateralFrictionInitialized(false),
+					m_appliedImpulseLateral1(0.f),
+					m_appliedImpulseLateral2(0.f),
+					m_contactMotion1(0.f),
+					m_contactMotion2(0.f),
+					m_contactCFM1(0.f),
+					m_contactCFM2(0.f),
+					m_lifeTime(0)
+			{
+				mConstraintRow[0].m_accumImpulse = 0.f;
+				mConstraintRow[1].m_accumImpulse = 0.f;
+				mConstraintRow[2].m_accumImpulse = 0.f;
+			}
+
+			
+
+			btVector3 m_localPointA;			
+			btVector3 m_localPointB;			
+			btVector3	m_positionWorldOnB;
+			///m_positionWorldOnA is redundant information, see getPositionWorldOnA(), but for clarity
+			btVector3	m_positionWorldOnA;
+			btVector3 m_normalWorldOnB;
+		
+			btScalar	m_distance1;
+			btScalar	m_combinedFriction;
+			btScalar	m_combinedRestitution;
+
+         //BP mod, store contact triangles.
+         int	   m_partId0;
+         int      m_partId1;
+         int      m_index0;
+         int      m_index1;
+				
+			mutable void*	m_userPersistentData;
+			btScalar		m_appliedImpulse;
+
+			bool			m_lateralFrictionInitialized;
+			btScalar		m_appliedImpulseLateral1;
+			btScalar		m_appliedImpulseLateral2;
+			btScalar		m_contactMotion1;
+			btScalar		m_contactMotion2;
+			btScalar		m_contactCFM1;
+			btScalar		m_contactCFM2;
+
+			int				m_lifeTime;//lifetime of the contactpoint in frames
+			
+			btVector3		m_lateralFrictionDir1;
+			btVector3		m_lateralFrictionDir2;
+
+
+
+			btConstraintRow mConstraintRow[3];
+
+
+			btScalar getDistance() const
+			{
+				return m_distance1;
+			}
+			int	getLifeTime() const
+			{
+				return m_lifeTime;
+			}
+
+			const btVector3& getPositionWorldOnA() const {
+				return m_positionWorldOnA;
+//				return m_positionWorldOnB + m_normalWorldOnB * m_distance1;
+			}
+
+			const btVector3& getPositionWorldOnB() const
+			{
+				return m_positionWorldOnB;
+			}
+
+			void	setDistance(btScalar dist)
+			{
+				m_distance1 = dist;
+			}
+			
+			///this returns the most recent applied impulse, to satisfy contact constraints by the constraint solver
+			btScalar	getAppliedImpulse() const
+			{
+				return m_appliedImpulse;
+			}
+
+			
+
+	};
+
+#endif //BT_MANIFOLD_CONTACT_POINT_H
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.cpp
new file mode 100644
index 00000000..fe31f08d
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.cpp
@@ -0,0 +1,362 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btMinkowskiPenetrationDepthSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h"
+#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+
+#define NUM_UNITSPHERE_POINTS 42
+
+
+bool btMinkowskiPenetrationDepthSolver::calcPenDepth(btSimplexSolverInterface& simplexSolver,
+												   const btConvexShape* convexA,const btConvexShape* convexB,
+												   const btTransform& transA,const btTransform& transB,
+												   btVector3& v, btVector3& pa, btVector3& pb,
+												   class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc
+												   )
+{
+
+	(void)stackAlloc;
+	(void)v;
+	
+	bool check2d= convexA->isConvex2d() && convexB->isConvex2d();
+
+	struct btIntermediateResult : public btDiscreteCollisionDetectorInterface::Result
+	{
+
+		btIntermediateResult():m_hasResult(false)
+		{
+		}
+		
+		btVector3 m_normalOnBInWorld;
+		btVector3 m_pointInWorld;
+		btScalar m_depth;
+		bool	m_hasResult;
+
+		virtual void setShapeIdentifiersA(int partId0,int index0)
+		{
+			(void)partId0;
+			(void)index0;
+		}
+		virtual void setShapeIdentifiersB(int partId1,int index1)
+		{
+			(void)partId1;
+			(void)index1;
+		}
+		void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+		{
+			m_normalOnBInWorld = normalOnBInWorld;
+			m_pointInWorld = pointInWorld;
+			m_depth = depth;
+			m_hasResult = true;
+		}
+	};
+
+	//just take fixed number of orientation, and sample the penetration depth in that direction
+	btScalar minProj = btScalar(BT_LARGE_FLOAT);
+	btVector3 minNorm(btScalar(0.), btScalar(0.), btScalar(0.));
+	btVector3 minA,minB;
+	btVector3 seperatingAxisInA,seperatingAxisInB;
+	btVector3 pInA,qInB,pWorld,qWorld,w;
+
+#ifndef __SPU__
+#define USE_BATCHED_SUPPORT 1
+#endif
+#ifdef USE_BATCHED_SUPPORT
+
+	btVector3	supportVerticesABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+	btVector3	supportVerticesBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+	btVector3	seperatingAxisInABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+	btVector3	seperatingAxisInBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+	int i;
+
+	int numSampleDirections = NUM_UNITSPHERE_POINTS;
+
+	for (i=0;i<numSampleDirections;i++)
+	{
+		btVector3 norm = getPenetrationDirections()[i];
+		seperatingAxisInABatch[i] =  (-norm) * transA.getBasis() ;
+		seperatingAxisInBBatch[i] =  norm   * transB.getBasis() ;
+	}
+
+	{
+		int numPDA = convexA->getNumPreferredPenetrationDirections();
+		if (numPDA)
+		{
+			for (int i=0;i<numPDA;i++)
+			{
+				btVector3 norm;
+				convexA->getPreferredPenetrationDirection(i,norm);
+				norm  = transA.getBasis() * norm;
+				getPenetrationDirections()[numSampleDirections] = norm;
+				seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
+				seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
+				numSampleDirections++;
+			}
+		}
+	}
+
+	{
+		int numPDB = convexB->getNumPreferredPenetrationDirections();
+		if (numPDB)
+		{
+			for (int i=0;i<numPDB;i++)
+			{
+				btVector3 norm;
+				convexB->getPreferredPenetrationDirection(i,norm);
+				norm  = transB.getBasis() * norm;
+				getPenetrationDirections()[numSampleDirections] = norm;
+				seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
+				seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
+				numSampleDirections++;
+			}
+		}
+	}
+
+
+
+
+	convexA->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInABatch,supportVerticesABatch,numSampleDirections);
+	convexB->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInBBatch,supportVerticesBBatch,numSampleDirections);
+
+	for (i=0;i<numSampleDirections;i++)
+	{
+		btVector3 norm = getPenetrationDirections()[i];
+		if (check2d)
+		{
+			norm[2] = 0.f;
+		}
+		if (norm.length2()>0.01)
+		{
+
+			seperatingAxisInA = seperatingAxisInABatch[i];
+			seperatingAxisInB = seperatingAxisInBBatch[i];
+
+			pInA = supportVerticesABatch[i];
+			qInB = supportVerticesBBatch[i];
+
+			pWorld = transA(pInA);	
+			qWorld = transB(qInB);
+			if (check2d)
+			{
+				pWorld[2] = 0.f;
+				qWorld[2] = 0.f;
+			}
+
+			w	= qWorld - pWorld;
+			btScalar delta = norm.dot(w);
+			//find smallest delta
+			if (delta < minProj)
+			{
+				minProj = delta;
+				minNorm = norm;
+				minA = pWorld;
+				minB = qWorld;
+			}
+		}
+	}	
+#else
+
+	int numSampleDirections = NUM_UNITSPHERE_POINTS;
+
+#ifndef __SPU__
+	{
+		int numPDA = convexA->getNumPreferredPenetrationDirections();
+		if (numPDA)
+		{
+			for (int i=0;i<numPDA;i++)
+			{
+				btVector3 norm;
+				convexA->getPreferredPenetrationDirection(i,norm);
+				norm  = transA.getBasis() * norm;
+				getPenetrationDirections()[numSampleDirections] = norm;
+				numSampleDirections++;
+			}
+		}
+	}
+
+	{
+		int numPDB = convexB->getNumPreferredPenetrationDirections();
+		if (numPDB)
+		{
+			for (int i=0;i<numPDB;i++)
+			{
+				btVector3 norm;
+				convexB->getPreferredPenetrationDirection(i,norm);
+				norm  = transB.getBasis() * norm;
+				getPenetrationDirections()[numSampleDirections] = norm;
+				numSampleDirections++;
+			}
+		}
+	}
+#endif // __SPU__
+
+	for (int i=0;i<numSampleDirections;i++)
+	{
+		const btVector3& norm = getPenetrationDirections()[i];
+		seperatingAxisInA = (-norm)* transA.getBasis();
+		seperatingAxisInB = norm* transB.getBasis();
+		pInA = convexA->localGetSupportVertexWithoutMarginNonVirtual(seperatingAxisInA);
+		qInB = convexB->localGetSupportVertexWithoutMarginNonVirtual(seperatingAxisInB);
+		pWorld = transA(pInA);	
+		qWorld = transB(qInB);
+		w	= qWorld - pWorld;
+		btScalar delta = norm.dot(w);
+		//find smallest delta
+		if (delta < minProj)
+		{
+			minProj = delta;
+			minNorm = norm;
+			minA = pWorld;
+			minB = qWorld;
+		}
+	}
+#endif //USE_BATCHED_SUPPORT
+
+	//add the margins
+
+	minA += minNorm*convexA->getMarginNonVirtual();
+	minB -= minNorm*convexB->getMarginNonVirtual();
+	//no penetration
+	if (minProj < btScalar(0.))
+		return false;
+
+	btScalar extraSeparation = 0.5f;///scale dependent
+	minProj += extraSeparation+(convexA->getMarginNonVirtual() + convexB->getMarginNonVirtual());
+
+
+
+
+
+//#define DEBUG_DRAW 1
+#ifdef DEBUG_DRAW
+	if (debugDraw)
+	{
+		btVector3 color(0,1,0);
+		debugDraw->drawLine(minA,minB,color);
+		color = btVector3 (1,1,1);
+		btVector3 vec = minB-minA;
+		btScalar prj2 = minNorm.dot(vec);
+		debugDraw->drawLine(minA,minA+(minNorm*minProj),color);
+
+	}
+#endif //DEBUG_DRAW
+
+	
+
+	btGjkPairDetector gjkdet(convexA,convexB,&simplexSolver,0);
+
+	btScalar offsetDist = minProj;
+	btVector3 offset = minNorm * offsetDist;
+	
+
+
+	btGjkPairDetector::ClosestPointInput input;
+		
+	btVector3 newOrg = transA.getOrigin() + offset;
+
+	btTransform displacedTrans = transA;
+	displacedTrans.setOrigin(newOrg);
+
+	input.m_transformA = displacedTrans;
+	input.m_transformB = transB;
+	input.m_maximumDistanceSquared = btScalar(BT_LARGE_FLOAT);//minProj;
+	
+	btIntermediateResult res;
+	gjkdet.setCachedSeperatingAxis(-minNorm);
+	gjkdet.getClosestPoints(input,res,debugDraw);
+
+	btScalar correctedMinNorm = minProj - res.m_depth;
+
+
+	//the penetration depth is over-estimated, relax it
+	btScalar penetration_relaxation= btScalar(1.);
+	minNorm*=penetration_relaxation;
+	
+
+	if (res.m_hasResult)
+	{
+
+		pa = res.m_pointInWorld - minNorm * correctedMinNorm;
+		pb = res.m_pointInWorld;
+		v = minNorm;
+		
+#ifdef DEBUG_DRAW
+		if (debugDraw)
+		{
+			btVector3 color(1,0,0);
+			debugDraw->drawLine(pa,pb,color);
+		}
+#endif//DEBUG_DRAW
+
+
+	}
+	return res.m_hasResult;
+}
+
+btVector3*	btMinkowskiPenetrationDepthSolver::getPenetrationDirections()
+{
+	static btVector3	sPenetrationDirections[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] = 
+	{
+	btVector3(btScalar(0.000000) , btScalar(-0.000000),btScalar(-1.000000)),
+	btVector3(btScalar(0.723608) , btScalar(-0.525725),btScalar(-0.447219)),
+	btVector3(btScalar(-0.276388) , btScalar(-0.850649),btScalar(-0.447219)),
+	btVector3(btScalar(-0.894426) , btScalar(-0.000000),btScalar(-0.447216)),
+	btVector3(btScalar(-0.276388) , btScalar(0.850649),btScalar(-0.447220)),
+	btVector3(btScalar(0.723608) , btScalar(0.525725),btScalar(-0.447219)),
+	btVector3(btScalar(0.276388) , btScalar(-0.850649),btScalar(0.447220)),
+	btVector3(btScalar(-0.723608) , btScalar(-0.525725),btScalar(0.447219)),
+	btVector3(btScalar(-0.723608) , btScalar(0.525725),btScalar(0.447219)),
+	btVector3(btScalar(0.276388) , btScalar(0.850649),btScalar(0.447219)),
+	btVector3(btScalar(0.894426) , btScalar(0.000000),btScalar(0.447216)),
+	btVector3(btScalar(-0.000000) , btScalar(0.000000),btScalar(1.000000)),
+	btVector3(btScalar(0.425323) , btScalar(-0.309011),btScalar(-0.850654)),
+	btVector3(btScalar(-0.162456) , btScalar(-0.499995),btScalar(-0.850654)),
+	btVector3(btScalar(0.262869) , btScalar(-0.809012),btScalar(-0.525738)),
+	btVector3(btScalar(0.425323) , btScalar(0.309011),btScalar(-0.850654)),
+	btVector3(btScalar(0.850648) , btScalar(-0.000000),btScalar(-0.525736)),
+	btVector3(btScalar(-0.525730) , btScalar(-0.000000),btScalar(-0.850652)),
+	btVector3(btScalar(-0.688190) , btScalar(-0.499997),btScalar(-0.525736)),
+	btVector3(btScalar(-0.162456) , btScalar(0.499995),btScalar(-0.850654)),
+	btVector3(btScalar(-0.688190) , btScalar(0.499997),btScalar(-0.525736)),
+	btVector3(btScalar(0.262869) , btScalar(0.809012),btScalar(-0.525738)),
+	btVector3(btScalar(0.951058) , btScalar(0.309013),btScalar(0.000000)),
+	btVector3(btScalar(0.951058) , btScalar(-0.309013),btScalar(0.000000)),
+	btVector3(btScalar(0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+	btVector3(btScalar(0.000000) , btScalar(-1.000000),btScalar(0.000000)),
+	btVector3(btScalar(-0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+	btVector3(btScalar(-0.951058) , btScalar(-0.309013),btScalar(-0.000000)),
+	btVector3(btScalar(-0.951058) , btScalar(0.309013),btScalar(-0.000000)),
+	btVector3(btScalar(-0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+	btVector3(btScalar(-0.000000) , btScalar(1.000000),btScalar(-0.000000)),
+	btVector3(btScalar(0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+	btVector3(btScalar(0.688190) , btScalar(-0.499997),btScalar(0.525736)),
+	btVector3(btScalar(-0.262869) , btScalar(-0.809012),btScalar(0.525738)),
+	btVector3(btScalar(-0.850648) , btScalar(0.000000),btScalar(0.525736)),
+	btVector3(btScalar(-0.262869) , btScalar(0.809012),btScalar(0.525738)),
+	btVector3(btScalar(0.688190) , btScalar(0.499997),btScalar(0.525736)),
+	btVector3(btScalar(0.525730) , btScalar(0.000000),btScalar(0.850652)),
+	btVector3(btScalar(0.162456) , btScalar(-0.499995),btScalar(0.850654)),
+	btVector3(btScalar(-0.425323) , btScalar(-0.309011),btScalar(0.850654)),
+	btVector3(btScalar(-0.425323) , btScalar(0.309011),btScalar(0.850654)),
+	btVector3(btScalar(0.162456) , btScalar(0.499995),btScalar(0.850654))
+	};
+
+	return sPenetrationDirections;
+}
+
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h
new file mode 100644
index 00000000..6a8fe52f
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h
@@ -0,0 +1,40 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+#define BT_MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+
+#include "btConvexPenetrationDepthSolver.h"
+
+///MinkowskiPenetrationDepthSolver implements bruteforce penetration depth estimation.
+///Implementation is based on sampling the depth using support mapping, and using GJK step to get the witness points.
+class btMinkowskiPenetrationDepthSolver : public btConvexPenetrationDepthSolver
+{
+protected:
+
+	static btVector3*	getPenetrationDirections();
+
+public:
+
+	virtual bool calcPenDepth( btSimplexSolverInterface& simplexSolver,
+	const btConvexShape* convexA,const btConvexShape* convexB,
+				const btTransform& transA,const btTransform& transB,
+			btVector3& v, btVector3& pa, btVector3& pb,
+			class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc
+			);
+};
+
+#endif //BT_MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btPersistentManifold.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btPersistentManifold.cpp
new file mode 100644
index 00000000..954b8395
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btPersistentManifold.cpp
@@ -0,0 +1,302 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btPersistentManifold.h"
+#include "LinearMath/btTransform.h"
+
+
+btScalar					gContactBreakingThreshold = btScalar(0.02);
+ContactDestroyedCallback	gContactDestroyedCallback = 0;
+ContactProcessedCallback	gContactProcessedCallback = 0;
+///gContactCalcArea3Points will approximate the convex hull area using 3 points
+///when setting it to false, it will use 4 points to compute the area: it is more accurate but slower
+bool						gContactCalcArea3Points = true;
+
+
+btPersistentManifold::btPersistentManifold()
+:btTypedObject(BT_PERSISTENT_MANIFOLD_TYPE),
+m_body0(0),
+m_body1(0),
+m_cachedPoints (0),
+m_index1a(0)
+{
+}
+
+
+
+
+#ifdef DEBUG_PERSISTENCY
+#include <stdio.h>
+void	btPersistentManifold::DebugPersistency()
+{
+	int i;
+	printf("DebugPersistency : numPoints %d\n",m_cachedPoints);
+	for (i=0;i<m_cachedPoints;i++)
+	{
+		printf("m_pointCache[%d].m_userPersistentData = %x\n",i,m_pointCache[i].m_userPersistentData);
+	}
+}
+#endif //DEBUG_PERSISTENCY
+
+void btPersistentManifold::clearUserCache(btManifoldPoint& pt)
+{
+
+	void* oldPtr = pt.m_userPersistentData;
+	if (oldPtr)
+	{
+#ifdef DEBUG_PERSISTENCY
+		int i;
+		int occurance = 0;
+		for (i=0;i<m_cachedPoints;i++)
+		{
+			if (m_pointCache[i].m_userPersistentData == oldPtr)
+			{
+				occurance++;
+				if (occurance>1)
+					printf("error in clearUserCache\n");
+			}
+		}
+		btAssert(occurance<=0);
+#endif //DEBUG_PERSISTENCY
+
+		if (pt.m_userPersistentData && gContactDestroyedCallback)
+		{
+			(*gContactDestroyedCallback)(pt.m_userPersistentData);
+			pt.m_userPersistentData = 0;
+		}
+		
+#ifdef DEBUG_PERSISTENCY
+		DebugPersistency();
+#endif
+	}
+
+	
+}
+
+static inline btScalar calcArea4Points(const btVector3 &p0,const btVector3 &p1,const btVector3 &p2,const btVector3 &p3)
+{
+	// It calculates possible 3 area constructed from random 4 points and returns the biggest one.
+
+	btVector3 a[3],b[3];
+	a[0] = p0 - p1;
+	a[1] = p0 - p2;
+	a[2] = p0 - p3;
+	b[0] = p2 - p3;
+	b[1] = p1 - p3;
+	b[2] = p1 - p2;
+
+	//todo: Following 3 cross production can be easily optimized by SIMD.
+	btVector3 tmp0 = a[0].cross(b[0]);
+	btVector3 tmp1 = a[1].cross(b[1]);
+	btVector3 tmp2 = a[2].cross(b[2]);
+
+	return btMax(btMax(tmp0.length2(),tmp1.length2()),tmp2.length2());
+}
+
+int btPersistentManifold::sortCachedPoints(const btManifoldPoint& pt) 
+{
+		//calculate 4 possible cases areas, and take biggest area
+		//also need to keep 'deepest'
+		
+		int maxPenetrationIndex = -1;
+#define KEEP_DEEPEST_POINT 1
+#ifdef KEEP_DEEPEST_POINT
+		btScalar maxPenetration = pt.getDistance();
+		for (int i=0;i<4;i++)
+		{
+			if (m_pointCache[i].getDistance() < maxPenetration)
+			{
+				maxPenetrationIndex = i;
+				maxPenetration = m_pointCache[i].getDistance();
+			}
+		}
+#endif //KEEP_DEEPEST_POINT
+		
+		btScalar res0(btScalar(0.)),res1(btScalar(0.)),res2(btScalar(0.)),res3(btScalar(0.));
+
+	if (gContactCalcArea3Points)
+	{
+		if (maxPenetrationIndex != 0)
+		{
+			btVector3 a0 = pt.m_localPointA-m_pointCache[1].m_localPointA;
+			btVector3 b0 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA;
+			btVector3 cross = a0.cross(b0);
+			res0 = cross.length2();
+		}
+		if (maxPenetrationIndex != 1)
+		{
+			btVector3 a1 = pt.m_localPointA-m_pointCache[0].m_localPointA;
+			btVector3 b1 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA;
+			btVector3 cross = a1.cross(b1);
+			res1 = cross.length2();
+		}
+
+		if (maxPenetrationIndex != 2)
+		{
+			btVector3 a2 = pt.m_localPointA-m_pointCache[0].m_localPointA;
+			btVector3 b2 = m_pointCache[3].m_localPointA-m_pointCache[1].m_localPointA;
+			btVector3 cross = a2.cross(b2);
+			res2 = cross.length2();
+		}
+
+		if (maxPenetrationIndex != 3)
+		{
+			btVector3 a3 = pt.m_localPointA-m_pointCache[0].m_localPointA;
+			btVector3 b3 = m_pointCache[2].m_localPointA-m_pointCache[1].m_localPointA;
+			btVector3 cross = a3.cross(b3);
+			res3 = cross.length2();
+		}
+	} 
+	else
+	{
+		if(maxPenetrationIndex != 0) {
+			res0 = calcArea4Points(pt.m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA);
+		}
+
+		if(maxPenetrationIndex != 1) {
+			res1 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA);
+		}
+
+		if(maxPenetrationIndex != 2) {
+			res2 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[3].m_localPointA);
+		}
+
+		if(maxPenetrationIndex != 3) {
+			res3 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA);
+		}
+	}
+	btVector4 maxvec(res0,res1,res2,res3);
+	int biggestarea = maxvec.closestAxis4();
+	return biggestarea;
+	
+}
+
+
+int btPersistentManifold::getCacheEntry(const btManifoldPoint& newPoint) const
+{
+	btScalar shortestDist =  getContactBreakingThreshold() * getContactBreakingThreshold();
+	int size = getNumContacts();
+	int nearestPoint = -1;
+	for( int i = 0; i < size; i++ )
+	{
+		const btManifoldPoint &mp = m_pointCache[i];
+
+		btVector3 diffA =  mp.m_localPointA- newPoint.m_localPointA;
+		const btScalar distToManiPoint = diffA.dot(diffA);
+		if( distToManiPoint < shortestDist )
+		{
+			shortestDist = distToManiPoint;
+			nearestPoint = i;
+		}
+	}
+	return nearestPoint;
+}
+
+int btPersistentManifold::addManifoldPoint(const btManifoldPoint& newPoint)
+{
+	btAssert(validContactDistance(newPoint));
+
+	int insertIndex = getNumContacts();
+	if (insertIndex == MANIFOLD_CACHE_SIZE)
+	{
+#if MANIFOLD_CACHE_SIZE >= 4
+		//sort cache so best points come first, based on area
+		insertIndex = sortCachedPoints(newPoint);
+#else
+		insertIndex = 0;
+#endif
+		clearUserCache(m_pointCache[insertIndex]);
+		
+	} else
+	{
+		m_cachedPoints++;
+
+		
+	}
+	if (insertIndex<0)
+		insertIndex=0;
+
+	btAssert(m_pointCache[insertIndex].m_userPersistentData==0);
+	m_pointCache[insertIndex] = newPoint;
+	return insertIndex;
+}
+
+btScalar	btPersistentManifold::getContactBreakingThreshold() const
+{
+	return m_contactBreakingThreshold;
+}
+
+
+
+void btPersistentManifold::refreshContactPoints(const btTransform& trA,const btTransform& trB)
+{
+	int i;
+#ifdef DEBUG_PERSISTENCY
+	printf("refreshContactPoints posA = (%f,%f,%f) posB = (%f,%f,%f)\n",
+		trA.getOrigin().getX(),
+		trA.getOrigin().getY(),
+		trA.getOrigin().getZ(),
+		trB.getOrigin().getX(),
+		trB.getOrigin().getY(),
+		trB.getOrigin().getZ());
+#endif //DEBUG_PERSISTENCY
+	/// first refresh worldspace positions and distance
+	for (i=getNumContacts()-1;i>=0;i--)
+	{
+		btManifoldPoint &manifoldPoint = m_pointCache[i];
+		manifoldPoint.m_positionWorldOnA = trA( manifoldPoint.m_localPointA );
+		manifoldPoint.m_positionWorldOnB = trB( manifoldPoint.m_localPointB );
+		manifoldPoint.m_distance1 = (manifoldPoint.m_positionWorldOnA -  manifoldPoint.m_positionWorldOnB).dot(manifoldPoint.m_normalWorldOnB);
+		manifoldPoint.m_lifeTime++;
+	}
+
+	/// then 
+	btScalar distance2d;
+	btVector3 projectedDifference,projectedPoint;
+	for (i=getNumContacts()-1;i>=0;i--)
+	{
+		
+		btManifoldPoint &manifoldPoint = m_pointCache[i];
+		//contact becomes invalid when signed distance exceeds margin (projected on contactnormal direction)
+		if (!validContactDistance(manifoldPoint))
+		{
+			removeContactPoint(i);
+		} else
+		{
+			//contact also becomes invalid when relative movement orthogonal to normal exceeds margin
+			projectedPoint = manifoldPoint.m_positionWorldOnA - manifoldPoint.m_normalWorldOnB * manifoldPoint.m_distance1;
+			projectedDifference = manifoldPoint.m_positionWorldOnB - projectedPoint;
+			distance2d = projectedDifference.dot(projectedDifference);
+			if (distance2d  > getContactBreakingThreshold()*getContactBreakingThreshold() )
+			{
+				removeContactPoint(i);
+			} else
+			{
+				//contact point processed callback
+				if (gContactProcessedCallback)
+					(*gContactProcessedCallback)(manifoldPoint,m_body0,m_body1);
+			}
+		}
+	}
+#ifdef DEBUG_PERSISTENCY
+	DebugPersistency();
+#endif //
+}
+
+
+
+
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btPersistentManifold.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btPersistentManifold.h
new file mode 100644
index 00000000..d877f099
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btPersistentManifold.h
@@ -0,0 +1,228 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_PERSISTENT_MANIFOLD_H
+#define BT_PERSISTENT_MANIFOLD_H
+
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btTransform.h"
+#include "btManifoldPoint.h"
+#include "LinearMath/btAlignedAllocator.h"
+
+struct btCollisionResult;
+
+///maximum contact breaking and merging threshold
+extern btScalar gContactBreakingThreshold;
+
+typedef bool (*ContactDestroyedCallback)(void* userPersistentData);
+typedef bool (*ContactProcessedCallback)(btManifoldPoint& cp,void* body0,void* body1);
+extern ContactDestroyedCallback	gContactDestroyedCallback;
+extern ContactProcessedCallback gContactProcessedCallback;
+
+//the enum starts at 1024 to avoid type conflicts with btTypedConstraint
+enum btContactManifoldTypes
+{
+	MIN_CONTACT_MANIFOLD_TYPE = 1024,
+	BT_PERSISTENT_MANIFOLD_TYPE
+};
+
+#define MANIFOLD_CACHE_SIZE 4
+
+///btPersistentManifold is a contact point cache, it stays persistent as long as objects are overlapping in the broadphase.
+///Those contact points are created by the collision narrow phase.
+///The cache can be empty, or hold 1,2,3 or 4 points. Some collision algorithms (GJK) might only add one point at a time.
+///updates/refreshes old contact points, and throw them away if necessary (distance becomes too large)
+///reduces the cache to 4 points, when more then 4 points are added, using following rules:
+///the contact point with deepest penetration is always kept, and it tries to maximuze the area covered by the points
+///note that some pairs of objects might have more then one contact manifold.
+
+
+ATTRIBUTE_ALIGNED128( class) btPersistentManifold : public btTypedObject
+//ATTRIBUTE_ALIGNED16( class) btPersistentManifold : public btTypedObject
+{
+
+	btManifoldPoint m_pointCache[MANIFOLD_CACHE_SIZE];
+
+	/// this two body pointers can point to the physics rigidbody class.
+	/// void* will allow any rigidbody class
+	void* m_body0;
+	void* m_body1;
+
+	int	m_cachedPoints;
+
+	btScalar	m_contactBreakingThreshold;
+	btScalar	m_contactProcessingThreshold;
+
+	
+	/// sort cached points so most isolated points come first
+	int	sortCachedPoints(const btManifoldPoint& pt);
+
+	int		findContactPoint(const btManifoldPoint* unUsed, int numUnused,const btManifoldPoint& pt);
+
+public:
+
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	int	m_companionIdA;
+	int	m_companionIdB;
+
+	int m_index1a;
+
+	btPersistentManifold();
+
+	btPersistentManifold(void* body0,void* body1,int , btScalar contactBreakingThreshold,btScalar contactProcessingThreshold)
+		: btTypedObject(BT_PERSISTENT_MANIFOLD_TYPE),
+	m_body0(body0),m_body1(body1),m_cachedPoints(0),
+		m_contactBreakingThreshold(contactBreakingThreshold),
+		m_contactProcessingThreshold(contactProcessingThreshold)
+	{
+	}
+
+	SIMD_FORCE_INLINE void* getBody0() { return m_body0;}
+	SIMD_FORCE_INLINE void* getBody1() { return m_body1;}
+
+	SIMD_FORCE_INLINE const void* getBody0() const { return m_body0;}
+	SIMD_FORCE_INLINE const void* getBody1() const { return m_body1;}
+
+	void	setBodies(void* body0,void* body1)
+	{
+		m_body0 = body0;
+		m_body1 = body1;
+	}
+
+	void clearUserCache(btManifoldPoint& pt);
+
+#ifdef DEBUG_PERSISTENCY
+	void	DebugPersistency();
+#endif //
+	
+	SIMD_FORCE_INLINE int	getNumContacts() const { return m_cachedPoints;}
+
+	SIMD_FORCE_INLINE const btManifoldPoint& getContactPoint(int index) const
+	{
+		btAssert(index < m_cachedPoints);
+		return m_pointCache[index];
+	}
+
+	SIMD_FORCE_INLINE btManifoldPoint& getContactPoint(int index)
+	{
+		btAssert(index < m_cachedPoints);
+		return m_pointCache[index];
+	}
+
+	///@todo: get this margin from the current physics / collision environment
+	btScalar	getContactBreakingThreshold() const;
+
+	btScalar	getContactProcessingThreshold() const
+	{
+		return m_contactProcessingThreshold;
+	}
+	
+	int getCacheEntry(const btManifoldPoint& newPoint) const;
+
+	int addManifoldPoint( const btManifoldPoint& newPoint);
+
+	void removeContactPoint (int index)
+	{
+		clearUserCache(m_pointCache[index]);
+
+		int lastUsedIndex = getNumContacts() - 1;
+//		m_pointCache[index] = m_pointCache[lastUsedIndex];
+		if(index != lastUsedIndex) 
+		{
+			m_pointCache[index] = m_pointCache[lastUsedIndex]; 
+			//get rid of duplicated userPersistentData pointer
+			m_pointCache[lastUsedIndex].m_userPersistentData = 0;
+			m_pointCache[lastUsedIndex].mConstraintRow[0].m_accumImpulse = 0.f;
+			m_pointCache[lastUsedIndex].mConstraintRow[1].m_accumImpulse = 0.f;
+			m_pointCache[lastUsedIndex].mConstraintRow[2].m_accumImpulse = 0.f;
+
+			m_pointCache[lastUsedIndex].m_appliedImpulse = 0.f;
+			m_pointCache[lastUsedIndex].m_lateralFrictionInitialized = false;
+			m_pointCache[lastUsedIndex].m_appliedImpulseLateral1 = 0.f;
+			m_pointCache[lastUsedIndex].m_appliedImpulseLateral2 = 0.f;
+			m_pointCache[lastUsedIndex].m_lifeTime = 0;
+		}
+
+		btAssert(m_pointCache[lastUsedIndex].m_userPersistentData==0);
+		m_cachedPoints--;
+	}
+	void replaceContactPoint(const btManifoldPoint& newPoint,int insertIndex)
+	{
+		btAssert(validContactDistance(newPoint));
+
+#define MAINTAIN_PERSISTENCY 1
+#ifdef MAINTAIN_PERSISTENCY
+		int	lifeTime = m_pointCache[insertIndex].getLifeTime();
+		btScalar	appliedImpulse = m_pointCache[insertIndex].mConstraintRow[0].m_accumImpulse;
+		btScalar	appliedLateralImpulse1 = m_pointCache[insertIndex].mConstraintRow[1].m_accumImpulse;
+		btScalar	appliedLateralImpulse2 = m_pointCache[insertIndex].mConstraintRow[2].m_accumImpulse;
+//		bool isLateralFrictionInitialized = m_pointCache[insertIndex].m_lateralFrictionInitialized;
+		
+		
+			
+		btAssert(lifeTime>=0);
+		void* cache = m_pointCache[insertIndex].m_userPersistentData;
+		
+		m_pointCache[insertIndex] = newPoint;
+
+		m_pointCache[insertIndex].m_userPersistentData = cache;
+		m_pointCache[insertIndex].m_appliedImpulse = appliedImpulse;
+		m_pointCache[insertIndex].m_appliedImpulseLateral1 = appliedLateralImpulse1;
+		m_pointCache[insertIndex].m_appliedImpulseLateral2 = appliedLateralImpulse2;
+		
+		m_pointCache[insertIndex].mConstraintRow[0].m_accumImpulse =  appliedImpulse;
+		m_pointCache[insertIndex].mConstraintRow[1].m_accumImpulse = appliedLateralImpulse1;
+		m_pointCache[insertIndex].mConstraintRow[2].m_accumImpulse = appliedLateralImpulse2;
+
+
+		m_pointCache[insertIndex].m_lifeTime = lifeTime;
+#else
+		clearUserCache(m_pointCache[insertIndex]);
+		m_pointCache[insertIndex] = newPoint;
+	
+#endif
+	}
+
+	
+	bool validContactDistance(const btManifoldPoint& pt) const
+	{
+		return pt.m_distance1 <= getContactBreakingThreshold();
+	}
+	/// calculated new worldspace coordinates and depth, and reject points that exceed the collision margin
+	void	refreshContactPoints(  const btTransform& trA,const btTransform& trB);
+
+	
+	SIMD_FORCE_INLINE	void	clearManifold()
+	{
+		int i;
+		for (i=0;i<m_cachedPoints;i++)
+		{
+			clearUserCache(m_pointCache[i]);
+		}
+		m_cachedPoints = 0;
+	}
+
+
+
+}
+;
+
+
+
+
+
+#endif //BT_PERSISTENT_MANIFOLD_H
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btPointCollector.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btPointCollector.h
new file mode 100644
index 00000000..18da1710
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btPointCollector.h
@@ -0,0 +1,64 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_POINT_COLLECTOR_H
+#define BT_POINT_COLLECTOR_H
+
+#include "btDiscreteCollisionDetectorInterface.h"
+
+
+
+struct btPointCollector : public btDiscreteCollisionDetectorInterface::Result
+{
+	
+	
+	btVector3 m_normalOnBInWorld;
+	btVector3 m_pointInWorld;
+	btScalar	m_distance;//negative means penetration
+
+	bool	m_hasResult;
+
+	btPointCollector () 
+		: m_distance(btScalar(BT_LARGE_FLOAT)),m_hasResult(false)
+	{
+	}
+
+	virtual void setShapeIdentifiersA(int partId0,int index0)
+	{
+		(void)partId0;
+		(void)index0;
+			
+	}
+	virtual void setShapeIdentifiersB(int partId1,int index1)
+	{
+		(void)partId1;
+		(void)index1;
+	}
+
+	virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+	{
+		if (depth< m_distance)
+		{
+			m_hasResult = true;
+			m_normalOnBInWorld = normalOnBInWorld;
+			m_pointInWorld = pointInWorld;
+			//negative means penetration
+			m_distance = depth;
+		}
+	}
+};
+
+#endif //BT_POINT_COLLECTOR_H
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.cpp
new file mode 100644
index 00000000..db190911
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.cpp
@@ -0,0 +1,440 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+///This file was written by Erwin Coumans
+///Separating axis rest based on work from Pierre Terdiman, see
+///And contact clipping based on work from Simon Hobbs
+
+
+#include "btPolyhedralContactClipping.h"
+#include "BulletCollision/CollisionShapes/btConvexPolyhedron.h"
+
+#include <float.h> //for FLT_MAX
+
+int gExpectedNbTests=0;
+int gActualNbTests = 0;
+bool gUseInternalObject = true;
+
+// Clips a face to the back of a plane
+void btPolyhedralContactClipping::clipFace(const btVertexArray& pVtxIn, btVertexArray& ppVtxOut, const btVector3& planeNormalWS,btScalar planeEqWS)
+{
+	
+	int ve;
+	btScalar ds, de;
+	int numVerts = pVtxIn.size();
+	if (numVerts < 2)
+		return;
+
+	btVector3 firstVertex=pVtxIn[pVtxIn.size()-1];
+	btVector3 endVertex = pVtxIn[0];
+	
+	ds = planeNormalWS.dot(firstVertex)+planeEqWS;
+
+	for (ve = 0; ve < numVerts; ve++)
+	{
+		endVertex=pVtxIn[ve];
+
+		de = planeNormalWS.dot(endVertex)+planeEqWS;
+
+		if (ds<0)
+		{
+			if (de<0)
+			{
+				// Start < 0, end < 0, so output endVertex
+				ppVtxOut.push_back(endVertex);
+			}
+			else
+			{
+				// Start < 0, end >= 0, so output intersection
+				ppVtxOut.push_back( 	firstVertex.lerp(endVertex,btScalar(ds * 1.f/(ds - de))));
+			}
+		}
+		else
+		{
+			if (de<0)
+			{
+				// Start >= 0, end < 0 so output intersection and end
+				ppVtxOut.push_back(firstVertex.lerp(endVertex,btScalar(ds * 1.f/(ds - de))));
+				ppVtxOut.push_back(endVertex);
+			}
+		}
+		firstVertex = endVertex;
+		ds = de;
+	}
+}
+
+
+static bool TestSepAxis(const btConvexPolyhedron& hullA, const btConvexPolyhedron& hullB, const btTransform& transA,const btTransform& transB, const btVector3& sep_axis, btScalar& depth)
+{
+	btScalar Min0,Max0;
+	btScalar Min1,Max1;
+	hullA.project(transA,sep_axis, Min0, Max0);
+	hullB.project(transB, sep_axis, Min1, Max1);
+
+	if(Max0<Min1 || Max1<Min0)
+		return false;
+
+	btScalar d0 = Max0 - Min1;
+	assert(d0>=0.0f);
+	btScalar d1 = Max1 - Min0;
+	assert(d1>=0.0f);
+	depth = d0<d1 ? d0:d1;
+	return true;
+}
+
+
+
+static int gActualSATPairTests=0;
+
+inline bool IsAlmostZero(const btVector3& v)
+{
+	if(fabsf(v.x())>1e-6 || fabsf(v.y())>1e-6 || fabsf(v.z())>1e-6)	return false;
+	return true;
+}
+
+#ifdef TEST_INTERNAL_OBJECTS
+
+inline void BoxSupport(const btScalar extents[3], const btScalar sv[3], btScalar p[3])
+{
+	// This version is ~11.000 cycles (4%) faster overall in one of the tests.
+//	IR(p[0]) = IR(extents[0])|(IR(sv[0])&SIGN_BITMASK);
+//	IR(p[1]) = IR(extents[1])|(IR(sv[1])&SIGN_BITMASK);
+//	IR(p[2]) = IR(extents[2])|(IR(sv[2])&SIGN_BITMASK);
+	p[0] = sv[0] < 0.0f ? -extents[0] : extents[0];
+	p[1] = sv[1] < 0.0f ? -extents[1] : extents[1];
+	p[2] = sv[2] < 0.0f ? -extents[2] : extents[2];
+}
+
+void InverseTransformPoint3x3(btVector3& out, const btVector3& in, const btTransform& tr)
+{
+	const btMatrix3x3& rot = tr.getBasis();
+	const btVector3& r0 = rot[0];
+	const btVector3& r1 = rot[1];
+	const btVector3& r2 = rot[2];
+
+	const btScalar x = r0.x()*in.x() + r1.x()*in.y() + r2.x()*in.z();
+	const btScalar y = r0.y()*in.x() + r1.y()*in.y() + r2.y()*in.z();
+	const btScalar z = r0.z()*in.x() + r1.z()*in.y() + r2.z()*in.z();
+
+	out.setValue(x, y, z);
+}
+
+ bool TestInternalObjects( const btTransform& trans0, const btTransform& trans1, const btVector3& delta_c, const btVector3& axis, const btConvexPolyhedron& convex0, const btConvexPolyhedron& convex1, btScalar dmin)
+{
+	const btScalar dp = delta_c.dot(axis);
+
+	btVector3 localAxis0;
+	InverseTransformPoint3x3(localAxis0, axis,trans0);
+	btVector3 localAxis1;
+	InverseTransformPoint3x3(localAxis1, axis,trans1);
+
+	btScalar p0[3];
+	BoxSupport(convex0.m_extents, localAxis0, p0);
+	btScalar p1[3];
+	BoxSupport(convex1.m_extents, localAxis1, p1);
+
+	const btScalar Radius0 = p0[0]*localAxis0.x() + p0[1]*localAxis0.y() + p0[2]*localAxis0.z();
+	const btScalar Radius1 = p1[0]*localAxis1.x() + p1[1]*localAxis1.y() + p1[2]*localAxis1.z();
+
+	const btScalar MinRadius = Radius0>convex0.m_radius ? Radius0 : convex0.m_radius;
+	const btScalar MaxRadius = Radius1>convex1.m_radius ? Radius1 : convex1.m_radius;
+
+	const btScalar MinMaxRadius = MaxRadius + MinRadius;
+	const btScalar d0 = MinMaxRadius + dp;
+	const btScalar d1 = MinMaxRadius - dp;
+
+	const btScalar depth = d0<d1 ? d0:d1;
+	if(depth>dmin)
+		return false;
+	return true;
+}
+#endif //TEST_INTERNAL_OBJECTS
+
+
+bool btPolyhedralContactClipping::findSeparatingAxis(	const btConvexPolyhedron& hullA, const btConvexPolyhedron& hullB, const btTransform& transA,const btTransform& transB, btVector3& sep)
+{
+	gActualSATPairTests++;
+
+//#ifdef TEST_INTERNAL_OBJECTS
+	const btVector3 c0 = transA * hullA.m_localCenter;
+	const btVector3 c1 = transB * hullB.m_localCenter;
+	const btVector3 DeltaC2 = c0 - c1;
+//#endif
+
+	btScalar dmin = FLT_MAX;
+	int curPlaneTests=0;
+
+	int numFacesA = hullA.m_faces.size();
+	// Test normals from hullA
+	for(int i=0;i<numFacesA;i++)
+	{
+		const btVector3 Normal(hullA.m_faces[i].m_plane[0], hullA.m_faces[i].m_plane[1], hullA.m_faces[i].m_plane[2]);
+		const btVector3 faceANormalWS = transA.getBasis() * Normal;
+		if (DeltaC2.dot(faceANormalWS)<0)
+			continue;
+
+		curPlaneTests++;
+#ifdef TEST_INTERNAL_OBJECTS
+		gExpectedNbTests++;
+		if(gUseInternalObject && !TestInternalObjects(transA,transB, DeltaC2, faceANormalWS, hullA, hullB, dmin))
+			continue;
+		gActualNbTests++;
+#endif
+
+		btScalar d;
+		if(!TestSepAxis( hullA, hullB, transA,transB, faceANormalWS, d))
+			return false;
+
+		if(d<dmin)
+		{
+			dmin = d;
+			sep = faceANormalWS;
+		}
+	}
+
+	int numFacesB = hullB.m_faces.size();
+	// Test normals from hullB
+	for(int i=0;i<numFacesB;i++)
+	{
+		const btVector3 Normal(hullB.m_faces[i].m_plane[0], hullB.m_faces[i].m_plane[1], hullB.m_faces[i].m_plane[2]);
+		const btVector3 WorldNormal = transB.getBasis() * Normal;
+		if (DeltaC2.dot(WorldNormal)<0)
+			continue;
+
+		curPlaneTests++;
+#ifdef TEST_INTERNAL_OBJECTS
+		gExpectedNbTests++;
+		if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, WorldNormal, hullA, hullB, dmin))
+			continue;
+		gActualNbTests++;
+#endif
+
+		btScalar d;
+		if(!TestSepAxis(hullA, hullB,transA,transB, WorldNormal,d))
+			return false;
+
+		if(d<dmin)
+		{
+			dmin = d;
+			sep = WorldNormal;
+		}
+	}
+
+	btVector3 edgeAstart,edgeAend,edgeBstart,edgeBend;
+
+	int curEdgeEdge = 0;
+	// Test edges
+	for(int e0=0;e0<hullA.m_uniqueEdges.size();e0++)
+	{
+		const btVector3 edge0 = hullA.m_uniqueEdges[e0];
+		const btVector3 WorldEdge0 = transA.getBasis() * edge0;
+		for(int e1=0;e1<hullB.m_uniqueEdges.size();e1++)
+		{
+			const btVector3 edge1 = hullB.m_uniqueEdges[e1];
+			const btVector3 WorldEdge1 = transB.getBasis() * edge1;
+
+			btVector3 Cross = WorldEdge0.cross(WorldEdge1);
+			curEdgeEdge++;
+			if(!IsAlmostZero(Cross))
+			{
+				Cross = Cross.normalize();
+				if (DeltaC2.dot(Cross)<0)
+					continue;
+
+
+#ifdef TEST_INTERNAL_OBJECTS
+				gExpectedNbTests++;
+				if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, Cross, hullA, hullB, dmin))
+					continue;
+				gActualNbTests++;
+#endif
+
+				btScalar dist;
+				if(!TestSepAxis( hullA, hullB, transA,transB, Cross, dist))
+					return false;
+
+				if(dist<dmin)
+				{
+					dmin = dist;
+					sep = Cross;
+				}
+			}
+		}
+
+	}
+
+	const btVector3 deltaC = transB.getOrigin() - transA.getOrigin();
+	if((deltaC.dot(sep))>0.0f)
+		sep = -sep;
+
+	return true;
+}
+
+void	btPolyhedralContactClipping::clipFaceAgainstHull(const btVector3& separatingNormal, const btConvexPolyhedron& hullA,  const btTransform& transA, btVertexArray& worldVertsB1, const btScalar minDist, btScalar maxDist,btDiscreteCollisionDetectorInterface::Result& resultOut)
+{
+	btVertexArray worldVertsB2;
+	btVertexArray* pVtxIn = &worldVertsB1;
+	btVertexArray* pVtxOut = &worldVertsB2;
+	pVtxOut->reserve(pVtxIn->size());
+
+	int closestFaceA=-1;
+	{
+		btScalar dmin = FLT_MAX;
+		for(int face=0;face<hullA.m_faces.size();face++)
+		{
+			const btVector3 Normal(hullA.m_faces[face].m_plane[0], hullA.m_faces[face].m_plane[1], hullA.m_faces[face].m_plane[2]);
+			const btVector3 faceANormalWS = transA.getBasis() * Normal;
+		
+			btScalar d = faceANormalWS.dot(separatingNormal);
+			if (d < dmin)
+			{
+				dmin = d;
+				closestFaceA = face;
+			}
+		}
+	}
+	if (closestFaceA<0)
+		return;
+
+	const btFace& polyA = hullA.m_faces[closestFaceA];
+
+		// clip polygon to back of planes of all faces of hull A that are adjacent to witness face
+	int numContacts = pVtxIn->size();
+	int numVerticesA = polyA.m_indices.size();
+	for(int e0=0;e0<numVerticesA;e0++)
+	{
+		const btVector3& a = hullA.m_vertices[polyA.m_indices[e0]];
+		const btVector3& b = hullA.m_vertices[polyA.m_indices[(e0+1)%numVerticesA]];
+		const btVector3 edge0 = a - b;
+		const btVector3 WorldEdge0 = transA.getBasis() * edge0;
+		btVector3 worldPlaneAnormal1 = transA.getBasis()* btVector3(polyA.m_plane[0],polyA.m_plane[1],polyA.m_plane[2]);
+
+		btVector3 planeNormalWS1 = -WorldEdge0.cross(worldPlaneAnormal1);//.cross(WorldEdge0);
+		btVector3 worldA1 = transA*a;
+		btScalar planeEqWS1 = -worldA1.dot(planeNormalWS1);
+		
+//int otherFace=0;
+#ifdef BLA1
+		int otherFace = polyA.m_connectedFaces[e0];
+		btVector3 localPlaneNormal (hullA.m_faces[otherFace].m_plane[0],hullA.m_faces[otherFace].m_plane[1],hullA.m_faces[otherFace].m_plane[2]);
+		btScalar localPlaneEq = hullA.m_faces[otherFace].m_plane[3];
+
+		btVector3 planeNormalWS = transA.getBasis()*localPlaneNormal;
+		btScalar planeEqWS=localPlaneEq-planeNormalWS.dot(transA.getOrigin());
+#else 
+		btVector3 planeNormalWS = planeNormalWS1;
+		btScalar planeEqWS=planeEqWS1;
+		
+#endif
+		//clip face
+
+		clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
+		btSwap(pVtxIn,pVtxOut);
+		pVtxOut->resize(0);
+	}
+
+
+
+//#define ONLY_REPORT_DEEPEST_POINT
+
+	btVector3 point;
+	
+
+	// only keep points that are behind the witness face
+	{
+		btVector3 localPlaneNormal (polyA.m_plane[0],polyA.m_plane[1],polyA.m_plane[2]);
+		btScalar localPlaneEq = polyA.m_plane[3];
+		btVector3 planeNormalWS = transA.getBasis()*localPlaneNormal;
+		btScalar planeEqWS=localPlaneEq-planeNormalWS.dot(transA.getOrigin());
+		for (int i=0;i<pVtxIn->size();i++)
+		{
+			
+			btScalar depth = planeNormalWS.dot(pVtxIn->at(i))+planeEqWS;
+			if (depth <=minDist)
+			{
+//				printf("clamped: depth=%f to minDist=%f\n",depth,minDist);
+				depth = minDist;
+			}
+
+			if (depth <=maxDist)
+			{
+				btVector3 point = pVtxIn->at(i);
+#ifdef ONLY_REPORT_DEEPEST_POINT
+				curMaxDist = depth;
+#else
+#if 0
+				if (depth<-3)
+				{
+					printf("error in btPolyhedralContactClipping depth = %f\n", depth);
+					printf("likely wrong separatingNormal passed in\n");
+				} 
+#endif				
+				resultOut.addContactPoint(separatingNormal,point,depth);
+#endif
+			}
+		}
+	}
+#ifdef ONLY_REPORT_DEEPEST_POINT
+	if (curMaxDist<maxDist)
+	{
+		resultOut.addContactPoint(separatingNormal,point,curMaxDist);
+	}
+#endif //ONLY_REPORT_DEEPEST_POINT
+
+}
+
+
+void	btPolyhedralContactClipping::clipHullAgainstHull(const btVector3& separatingNormal1, const btConvexPolyhedron& hullA, const btConvexPolyhedron& hullB, const btTransform& transA,const btTransform& transB, const btScalar minDist, btScalar maxDist,btDiscreteCollisionDetectorInterface::Result& resultOut)
+{
+
+	btVector3 separatingNormal = separatingNormal1.normalized();
+	const btVector3 c0 = transA * hullA.m_localCenter;
+	const btVector3 c1 = transB * hullB.m_localCenter;
+	const btVector3 DeltaC2 = c0 - c1;
+
+
+	btScalar curMaxDist=maxDist;
+	int closestFaceB=-1;
+	btScalar dmax = -FLT_MAX;
+	{
+		for(int face=0;face<hullB.m_faces.size();face++)
+		{
+			const btVector3 Normal(hullB.m_faces[face].m_plane[0], hullB.m_faces[face].m_plane[1], hullB.m_faces[face].m_plane[2]);
+			const btVector3 WorldNormal = transB.getBasis() * Normal;
+			btScalar d = WorldNormal.dot(separatingNormal);
+			if (d > dmax)
+			{
+				dmax = d;
+				closestFaceB = face;
+			}
+		}
+	}
+				btVertexArray worldVertsB1;
+				{
+					const btFace& polyB = hullB.m_faces[closestFaceB];
+					const int numVertices = polyB.m_indices.size();
+					for(int e0=0;e0<numVertices;e0++)
+					{
+						const btVector3& b = hullB.m_vertices[polyB.m_indices[e0]];
+						worldVertsB1.push_back(transB*b);
+					}
+				}
+
+	
+	if (closestFaceB>=0)
+		clipFaceAgainstHull(separatingNormal, hullA, transA,worldVertsB1, minDist, maxDist,resultOut);
+
+}
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.h
new file mode 100644
index 00000000..99103df2
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.h
@@ -0,0 +1,46 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+///This file was written by Erwin Coumans
+
+
+#ifndef BT_POLYHEDRAL_CONTACT_CLIPPING_H
+#define BT_POLYHEDRAL_CONTACT_CLIPPING_H
+
+
+#include "LinearMath/btAlignedObjectArray.h"
+#include "LinearMath/btTransform.h"
+#include "btDiscreteCollisionDetectorInterface.h"
+
+class btConvexPolyhedron;
+
+typedef btAlignedObjectArray<btVector3> btVertexArray;
+
+// Clips a face to the back of a plane
+struct btPolyhedralContactClipping
+{
+	static void clipHullAgainstHull(const btVector3& separatingNormal, const btConvexPolyhedron& hullA, const btConvexPolyhedron& hullB, const btTransform& transA,const btTransform& transB, const btScalar minDist, btScalar maxDist, btDiscreteCollisionDetectorInterface::Result& resultOut);
+	static void	clipFaceAgainstHull(const btVector3& separatingNormal, const btConvexPolyhedron& hullA,  const btTransform& transA, btVertexArray& worldVertsB1, const btScalar minDist, btScalar maxDist,btDiscreteCollisionDetectorInterface::Result& resultOut);
+
+	static bool findSeparatingAxis(	const btConvexPolyhedron& hullA, const btConvexPolyhedron& hullB, const btTransform& transA,const btTransform& transB, btVector3& sep);
+
+	///the clipFace method is used internally
+	static void clipFace(const btVertexArray& pVtxIn, btVertexArray& ppVtxOut, const btVector3& planeNormalWS,btScalar planeEqWS);
+
+};
+
+#endif // BT_POLYHEDRAL_CONTACT_CLIPPING_H
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btRaycastCallback.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btRaycastCallback.cpp
new file mode 100644
index 00000000..fbe579ce
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btRaycastCallback.cpp
@@ -0,0 +1,177 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//#include <stdio.h>
+
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+#include "BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h"
+#include "BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h"
+#include "btRaycastCallback.h"
+
+btTriangleRaycastCallback::btTriangleRaycastCallback(const btVector3& from,const btVector3& to, unsigned int flags)
+	:
+	m_from(from),
+	m_to(to),
+   //@BP Mod
+   m_flags(flags),
+	m_hitFraction(btScalar(1.))
+{
+
+}
+
+
+
+void btTriangleRaycastCallback::processTriangle(btVector3* triangle,int partId, int triangleIndex)
+{
+	const btVector3 &vert0=triangle[0];
+	const btVector3 &vert1=triangle[1];
+	const btVector3 &vert2=triangle[2];
+
+	btVector3 v10; v10 = vert1 - vert0 ;
+	btVector3 v20; v20 = vert2 - vert0 ;
+
+	btVector3 triangleNormal; triangleNormal = v10.cross( v20 );
+	
+	const btScalar dist = vert0.dot(triangleNormal);
+	btScalar dist_a = triangleNormal.dot(m_from) ;
+	dist_a-= dist;
+	btScalar dist_b = triangleNormal.dot(m_to);
+	dist_b -= dist;
+
+	if ( dist_a * dist_b >= btScalar(0.0) )
+	{
+		return ; // same sign
+	}
+   //@BP Mod - Backface filtering
+   if (((m_flags & kF_FilterBackfaces) != 0) && (dist_a > btScalar(0.0)))
+   {
+      // Backface, skip check
+      return;
+   }
+	
+	const btScalar proj_length=dist_a-dist_b;
+	const btScalar distance = (dist_a)/(proj_length);
+	// Now we have the intersection point on the plane, we'll see if it's inside the triangle
+	// Add an epsilon as a tolerance for the raycast,
+	// in case the ray hits exacly on the edge of the triangle.
+	// It must be scaled for the triangle size.
+	
+	if(distance < m_hitFraction)
+	{
+		
+
+		btScalar edge_tolerance =triangleNormal.length2();		
+		edge_tolerance *= btScalar(-0.0001);
+		btVector3 point; point.setInterpolate3( m_from, m_to, distance);
+		{
+			btVector3 v0p; v0p = vert0 - point;
+			btVector3 v1p; v1p = vert1 - point;
+			btVector3 cp0; cp0 = v0p.cross( v1p );
+
+			if ( (btScalar)(cp0.dot(triangleNormal)) >=edge_tolerance) 
+			{
+						
+
+				btVector3 v2p; v2p = vert2 -  point;
+				btVector3 cp1;
+				cp1 = v1p.cross( v2p);
+				if ( (btScalar)(cp1.dot(triangleNormal)) >=edge_tolerance) 
+				{
+					btVector3 cp2;
+					cp2 = v2p.cross(v0p);
+					
+					if ( (btScalar)(cp2.dot(triangleNormal)) >=edge_tolerance) 
+					{
+                  //@BP Mod
+                  // Triangle normal isn't normalized
+				      triangleNormal.normalize();
+
+                  //@BP Mod - Allow for unflipped normal when raycasting against backfaces
+                  if (((m_flags & kF_KeepUnflippedNormal) != 0) || (dist_a <= btScalar(0.0)))
+						{
+							m_hitFraction = reportHit(-triangleNormal,distance,partId,triangleIndex);
+						}
+						else
+						{
+                     m_hitFraction = reportHit(triangleNormal,distance,partId,triangleIndex);
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+
+btTriangleConvexcastCallback::btTriangleConvexcastCallback (const btConvexShape* convexShape, const btTransform& convexShapeFrom, const btTransform& convexShapeTo, const btTransform& triangleToWorld, const btScalar triangleCollisionMargin)
+{
+	m_convexShape = convexShape;
+	m_convexShapeFrom = convexShapeFrom;
+	m_convexShapeTo = convexShapeTo;
+	m_triangleToWorld = triangleToWorld;
+	m_hitFraction = 1.0f;
+	m_triangleCollisionMargin = triangleCollisionMargin;
+	m_allowedPenetration = 0.f;
+}
+
+void
+btTriangleConvexcastCallback::processTriangle (btVector3* triangle, int partId, int triangleIndex)
+{
+	btTriangleShape triangleShape (triangle[0], triangle[1], triangle[2]);
+    triangleShape.setMargin(m_triangleCollisionMargin);
+
+	btVoronoiSimplexSolver	simplexSolver;
+	btGjkEpaPenetrationDepthSolver	gjkEpaPenetrationSolver;
+
+//#define  USE_SUBSIMPLEX_CONVEX_CAST 1
+//if you reenable USE_SUBSIMPLEX_CONVEX_CAST see commented out code below
+#ifdef USE_SUBSIMPLEX_CONVEX_CAST
+	btSubsimplexConvexCast convexCaster(m_convexShape, &triangleShape, &simplexSolver);
+#else
+	//btGjkConvexCast	convexCaster(m_convexShape,&triangleShape,&simplexSolver);
+	btContinuousConvexCollision convexCaster(m_convexShape,&triangleShape,&simplexSolver,&gjkEpaPenetrationSolver);
+#endif //#USE_SUBSIMPLEX_CONVEX_CAST
+	
+	btConvexCast::CastResult castResult;
+	castResult.m_fraction = btScalar(1.);
+	castResult.m_allowedPenetration = m_allowedPenetration;
+	if (convexCaster.calcTimeOfImpact(m_convexShapeFrom,m_convexShapeTo,m_triangleToWorld, m_triangleToWorld, castResult))
+	{
+		//add hit
+		if (castResult.m_normal.length2() > btScalar(0.0001))
+		{					
+			if (castResult.m_fraction < m_hitFraction)
+			{
+/* btContinuousConvexCast's normal is already in world space */
+/*
+#ifdef USE_SUBSIMPLEX_CONVEX_CAST
+				//rotate normal into worldspace
+				castResult.m_normal = m_convexShapeFrom.getBasis() * castResult.m_normal;
+#endif //USE_SUBSIMPLEX_CONVEX_CAST
+*/
+				castResult.m_normal.normalize();
+
+				reportHit (castResult.m_normal,
+							castResult.m_hitPoint,
+							castResult.m_fraction,
+							partId,
+							triangleIndex);
+			}
+		}
+	}
+}
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btRaycastCallback.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btRaycastCallback.h
new file mode 100644
index 00000000..f012889a
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btRaycastCallback.h
@@ -0,0 +1,72 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_RAYCAST_TRI_CALLBACK_H
+#define BT_RAYCAST_TRI_CALLBACK_H
+
+#include "BulletCollision/CollisionShapes/btTriangleCallback.h"
+#include "LinearMath/btTransform.h"
+struct btBroadphaseProxy;
+class btConvexShape;
+
+class  btTriangleRaycastCallback: public btTriangleCallback
+{
+public:
+
+	//input
+	btVector3 m_from;
+	btVector3 m_to;
+
+   //@BP Mod - allow backface filtering and unflipped normals
+   enum EFlags
+   {
+      kF_None                 = 0,
+      kF_FilterBackfaces      = 1 << 0,
+      kF_KeepUnflippedNormal  = 1 << 1,   // Prevents returned face normal getting flipped when a ray hits a back-facing triangle
+
+      kF_Terminator        = 0xFFFFFFFF
+   };
+   unsigned int m_flags;
+
+	btScalar	m_hitFraction;
+
+	btTriangleRaycastCallback(const btVector3& from,const btVector3& to, unsigned int flags=0);
+	
+	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex);
+
+	virtual btScalar reportHit(const btVector3& hitNormalLocal, btScalar hitFraction, int partId, int triangleIndex ) = 0;
+	
+};
+
+class btTriangleConvexcastCallback : public btTriangleCallback
+{
+public:
+	const btConvexShape* m_convexShape;
+	btTransform m_convexShapeFrom;
+	btTransform m_convexShapeTo;
+	btTransform m_triangleToWorld;
+	btScalar m_hitFraction;
+	btScalar m_triangleCollisionMargin;
+	btScalar m_allowedPenetration;
+
+	btTriangleConvexcastCallback (const btConvexShape* convexShape, const btTransform& convexShapeFrom, const btTransform& convexShapeTo, const btTransform& triangleToWorld, const btScalar triangleCollisionMargin);
+
+	virtual void processTriangle (btVector3* triangle, int partId, int triangleIndex);
+
+	virtual btScalar reportHit (const btVector3& hitNormalLocal, const btVector3& hitPointLocal, btScalar hitFraction, int partId, int triangleIndex) = 0;
+};
+
+#endif //BT_RAYCAST_TRI_CALLBACK_H
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h
new file mode 100644
index 00000000..da8a1391
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h
@@ -0,0 +1,63 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_SIMPLEX_SOLVER_INTERFACE_H
+#define BT_SIMPLEX_SOLVER_INTERFACE_H
+
+#include "LinearMath/btVector3.h"
+
+#define NO_VIRTUAL_INTERFACE 1
+#ifdef NO_VIRTUAL_INTERFACE
+#include "btVoronoiSimplexSolver.h"
+#define btSimplexSolverInterface btVoronoiSimplexSolver
+#else
+
+/// btSimplexSolverInterface can incrementally calculate distance between origin and up to 4 vertices
+/// Used by GJK or Linear Casting. Can be implemented by the Johnson-algorithm or alternative approaches based on
+/// voronoi regions or barycentric coordinates
+class btSimplexSolverInterface
+{
+	public:
+		virtual ~btSimplexSolverInterface() {};
+
+	virtual void reset() = 0;
+
+	virtual void addVertex(const btVector3& w, const btVector3& p, const btVector3& q) = 0;
+	
+	virtual bool closest(btVector3& v) = 0;
+
+	virtual btScalar maxVertex() = 0;
+
+	virtual bool fullSimplex() const = 0;
+
+	virtual int getSimplex(btVector3 *pBuf, btVector3 *qBuf, btVector3 *yBuf) const = 0;
+
+	virtual bool inSimplex(const btVector3& w) = 0;
+	
+	virtual void backup_closest(btVector3& v) = 0;
+
+	virtual bool emptySimplex() const = 0;
+
+	virtual void compute_points(btVector3& p1, btVector3& p2) = 0;
+
+	virtual int numVertices() const =0;
+
+
+};
+#endif
+#endif //BT_SIMPLEX_SOLVER_INTERFACE_H
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.cpp
new file mode 100644
index 00000000..18eb662d
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.cpp
@@ -0,0 +1,160 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btSubSimplexConvexCast.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+
+#include "BulletCollision/CollisionShapes/btMinkowskiSumShape.h"
+#include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h"
+#include "btPointCollector.h"
+#include "LinearMath/btTransformUtil.h"
+
+btSubsimplexConvexCast::btSubsimplexConvexCast (const btConvexShape* convexA,const btConvexShape* convexB,btSimplexSolverInterface* simplexSolver)
+:m_simplexSolver(simplexSolver),
+m_convexA(convexA),m_convexB(convexB)
+{
+}
+
+///Typically the conservative advancement reaches solution in a few iterations, clip it to 32 for degenerate cases.
+///See discussion about this here http://continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=565
+#ifdef BT_USE_DOUBLE_PRECISION
+#define MAX_ITERATIONS 64
+#else
+#define MAX_ITERATIONS 32
+#endif
+bool	btSubsimplexConvexCast::calcTimeOfImpact(
+		const btTransform& fromA,
+		const btTransform& toA,
+		const btTransform& fromB,
+		const btTransform& toB,
+		CastResult& result)
+{
+
+	m_simplexSolver->reset();
+
+	btVector3 linVelA,linVelB;
+	linVelA = toA.getOrigin()-fromA.getOrigin();
+	linVelB = toB.getOrigin()-fromB.getOrigin();
+
+	btScalar lambda = btScalar(0.);
+
+	btTransform interpolatedTransA = fromA;
+	btTransform interpolatedTransB = fromB;
+
+	///take relative motion
+	btVector3 r = (linVelA-linVelB);
+	btVector3 v;
+	
+	btVector3 supVertexA = fromA(m_convexA->localGetSupportingVertex(-r*fromA.getBasis()));
+	btVector3 supVertexB = fromB(m_convexB->localGetSupportingVertex(r*fromB.getBasis()));
+	v = supVertexA-supVertexB;
+	int maxIter = MAX_ITERATIONS;
+
+	btVector3 n;
+	n.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+	bool hasResult = false;
+	btVector3 c;
+
+	btScalar lastLambda = lambda;
+
+
+	btScalar dist2 = v.length2();
+#ifdef BT_USE_DOUBLE_PRECISION
+	btScalar epsilon = btScalar(0.0001);
+#else
+	btScalar epsilon = btScalar(0.0001);
+#endif //BT_USE_DOUBLE_PRECISION
+	btVector3	w,p;
+	btScalar VdotR;
+	
+	while ( (dist2 > epsilon) && maxIter--)
+	{
+		supVertexA = interpolatedTransA(m_convexA->localGetSupportingVertex(-v*interpolatedTransA.getBasis()));
+		supVertexB = interpolatedTransB(m_convexB->localGetSupportingVertex(v*interpolatedTransB.getBasis()));
+		w = supVertexA-supVertexB;
+
+		btScalar VdotW = v.dot(w);
+
+		if (lambda > btScalar(1.0))
+		{
+			return false;
+		}
+
+		if ( VdotW > btScalar(0.))
+		{
+			VdotR = v.dot(r);
+
+			if (VdotR >= -(SIMD_EPSILON*SIMD_EPSILON))
+				return false;
+			else
+			{
+				lambda = lambda - VdotW / VdotR;
+				//interpolate to next lambda
+				//	x = s + lambda * r;
+				interpolatedTransA.getOrigin().setInterpolate3(fromA.getOrigin(),toA.getOrigin(),lambda);
+				interpolatedTransB.getOrigin().setInterpolate3(fromB.getOrigin(),toB.getOrigin(),lambda);
+				//m_simplexSolver->reset();
+				//check next line
+				 w = supVertexA-supVertexB;
+				lastLambda = lambda;
+				n = v;
+				hasResult = true;
+			}
+		} 
+		///Just like regular GJK only add the vertex if it isn't already (close) to current vertex, it would lead to divisions by zero and NaN etc.
+		if (!m_simplexSolver->inSimplex(w))
+			m_simplexSolver->addVertex( w, supVertexA , supVertexB);
+
+		if (m_simplexSolver->closest(v))
+		{
+			dist2 = v.length2();
+			hasResult = true;
+			//todo: check this normal for validity
+			//n=v;
+			//printf("V=%f , %f, %f\n",v[0],v[1],v[2]);
+			//printf("DIST2=%f\n",dist2);
+			//printf("numverts = %i\n",m_simplexSolver->numVertices());
+		} else
+		{
+			dist2 = btScalar(0.);
+		} 
+	}
+
+	//int numiter = MAX_ITERATIONS - maxIter;
+//	printf("number of iterations: %d", numiter);
+	
+	//don't report a time of impact when moving 'away' from the hitnormal
+	
+
+	result.m_fraction = lambda;
+	if (n.length2() >= (SIMD_EPSILON*SIMD_EPSILON))
+		result.m_normal = n.normalized();
+	else
+		result.m_normal = btVector3(btScalar(0.0), btScalar(0.0), btScalar(0.0));
+
+	//don't report time of impact for motion away from the contact normal (or causes minor penetration)
+	if (result.m_normal.dot(r)>=-result.m_allowedPenetration)
+		return false;
+
+	btVector3 hitA,hitB;
+	m_simplexSolver->compute_points(hitA,hitB);
+	result.m_hitPoint=hitB;
+	return true;
+}
+
+
+
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h
new file mode 100644
index 00000000..6c812798
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h
@@ -0,0 +1,50 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_SUBSIMPLEX_CONVEX_CAST_H
+#define BT_SUBSIMPLEX_CONVEX_CAST_H
+
+#include "btConvexCast.h"
+#include "btSimplexSolverInterface.h"
+class btConvexShape;
+
+/// btSubsimplexConvexCast implements Gino van den Bergens' paper
+///"Ray Casting against bteral Convex Objects with Application to Continuous Collision Detection"
+/// GJK based Ray Cast, optimized version
+/// Objects should not start in overlap, otherwise results are not defined.
+class btSubsimplexConvexCast : public btConvexCast
+{
+	btSimplexSolverInterface* m_simplexSolver;
+	const btConvexShape*	m_convexA;
+	const btConvexShape*	m_convexB;
+
+public:
+
+	btSubsimplexConvexCast (const btConvexShape*	shapeA,const btConvexShape*	shapeB,btSimplexSolverInterface* simplexSolver);
+
+	//virtual ~btSubsimplexConvexCast();
+	///SimsimplexConvexCast calculateTimeOfImpact calculates the time of impact+normal for the linear cast (sweep) between two moving objects.
+	///Precondition is that objects should not penetration/overlap at the start from the interval. Overlap can be tested using btGjkPairDetector.
+	virtual bool	calcTimeOfImpact(
+			const btTransform& fromA,
+			const btTransform& toA,
+			const btTransform& fromB,
+			const btTransform& toB,
+			CastResult& result);
+
+};
+
+#endif //BT_SUBSIMPLEX_CONVEX_CAST_H
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.cpp b/src/bullet/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.cpp
new file mode 100644
index 00000000..a775198a
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.cpp
@@ -0,0 +1,609 @@
+
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+	
+	Elsevier CDROM license agreements grants nonexclusive license to use the software
+	for any purpose, commercial or non-commercial as long as the following credit is included
+	identifying the original source of the software:
+
+	Parts of the source are "from the book Real-Time Collision Detection by
+	Christer Ericson, published by Morgan Kaufmann Publishers,
+	(c) 2005 Elsevier Inc."
+		
+*/
+
+
+#include "btVoronoiSimplexSolver.h"
+
+#define VERTA  0
+#define VERTB  1
+#define VERTC  2
+#define VERTD  3
+
+#define CATCH_DEGENERATE_TETRAHEDRON 1
+void	btVoronoiSimplexSolver::removeVertex(int index)
+{
+	
+	btAssert(m_numVertices>0);
+	m_numVertices--;
+	m_simplexVectorW[index] = m_simplexVectorW[m_numVertices];
+	m_simplexPointsP[index] = m_simplexPointsP[m_numVertices];
+	m_simplexPointsQ[index] = m_simplexPointsQ[m_numVertices];
+}
+
+void	btVoronoiSimplexSolver::reduceVertices (const btUsageBitfield& usedVerts)
+{
+	if ((numVertices() >= 4) && (!usedVerts.usedVertexD))
+		removeVertex(3);
+
+	if ((numVertices() >= 3) && (!usedVerts.usedVertexC))
+		removeVertex(2);
+
+	if ((numVertices() >= 2) && (!usedVerts.usedVertexB))
+		removeVertex(1);
+	
+	if ((numVertices() >= 1) && (!usedVerts.usedVertexA))
+		removeVertex(0);
+
+}
+
+
+
+
+
+//clear the simplex, remove all the vertices
+void btVoronoiSimplexSolver::reset()
+{
+	m_cachedValidClosest = false;
+	m_numVertices = 0;
+	m_needsUpdate = true;
+	m_lastW = btVector3(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+	m_cachedBC.reset();
+}
+
+
+
+	//add a vertex
+void btVoronoiSimplexSolver::addVertex(const btVector3& w, const btVector3& p, const btVector3& q)
+{
+	m_lastW = w;
+	m_needsUpdate = true;
+
+	m_simplexVectorW[m_numVertices] = w;
+	m_simplexPointsP[m_numVertices] = p;
+	m_simplexPointsQ[m_numVertices] = q;
+
+	m_numVertices++;
+}
+
+bool	btVoronoiSimplexSolver::updateClosestVectorAndPoints()
+{
+	
+	if (m_needsUpdate)
+	{
+		m_cachedBC.reset();
+
+		m_needsUpdate = false;
+
+		switch (numVertices())
+		{
+		case 0:
+				m_cachedValidClosest = false;
+				break;
+		case 1:
+			{
+				m_cachedP1 = m_simplexPointsP[0];
+				m_cachedP2 = m_simplexPointsQ[0];
+				m_cachedV = m_cachedP1-m_cachedP2; //== m_simplexVectorW[0]
+				m_cachedBC.reset();
+				m_cachedBC.setBarycentricCoordinates(btScalar(1.),btScalar(0.),btScalar(0.),btScalar(0.));
+				m_cachedValidClosest = m_cachedBC.isValid();
+				break;
+			};
+		case 2:
+			{
+			//closest point origin from line segment
+					const btVector3& from = m_simplexVectorW[0];
+					const btVector3& to = m_simplexVectorW[1];
+					btVector3 nearest;
+
+					btVector3 p (btScalar(0.),btScalar(0.),btScalar(0.));
+					btVector3 diff = p - from;
+					btVector3 v = to - from;
+					btScalar t = v.dot(diff);
+					
+					if (t > 0) {
+						btScalar dotVV = v.dot(v);
+						if (t < dotVV) {
+							t /= dotVV;
+							diff -= t*v;
+							m_cachedBC.m_usedVertices.usedVertexA = true;
+							m_cachedBC.m_usedVertices.usedVertexB = true;
+						} else {
+							t = 1;
+							diff -= v;
+							//reduce to 1 point
+							m_cachedBC.m_usedVertices.usedVertexB = true;
+						}
+					} else
+					{
+						t = 0;
+						//reduce to 1 point
+						m_cachedBC.m_usedVertices.usedVertexA = true;
+					}
+					m_cachedBC.setBarycentricCoordinates(1-t,t);
+					nearest = from + t*v;
+
+					m_cachedP1 = m_simplexPointsP[0] + t * (m_simplexPointsP[1] - m_simplexPointsP[0]);
+					m_cachedP2 = m_simplexPointsQ[0] + t * (m_simplexPointsQ[1] - m_simplexPointsQ[0]);
+					m_cachedV = m_cachedP1 - m_cachedP2;
+					
+					reduceVertices(m_cachedBC.m_usedVertices);
+
+					m_cachedValidClosest = m_cachedBC.isValid();
+					break;
+			}
+		case 3: 
+			{ 
+				//closest point origin from triangle 
+				btVector3 p (btScalar(0.),btScalar(0.),btScalar(0.)); 
+
+				const btVector3& a = m_simplexVectorW[0]; 
+				const btVector3& b = m_simplexVectorW[1]; 
+				const btVector3& c = m_simplexVectorW[2]; 
+
+				closestPtPointTriangle(p,a,b,c,m_cachedBC); 
+				m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] + 
+				m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + 
+				m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2]; 
+
+				m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] + 
+				m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + 
+				m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2]; 
+
+				m_cachedV = m_cachedP1-m_cachedP2; 
+
+				reduceVertices (m_cachedBC.m_usedVertices); 
+				m_cachedValidClosest = m_cachedBC.isValid(); 
+
+				break; 
+			}
+		case 4:
+			{
+
+				
+				btVector3 p (btScalar(0.),btScalar(0.),btScalar(0.));
+				
+				const btVector3& a = m_simplexVectorW[0];
+				const btVector3& b = m_simplexVectorW[1];
+				const btVector3& c = m_simplexVectorW[2];
+				const btVector3& d = m_simplexVectorW[3];
+
+				bool hasSeperation = closestPtPointTetrahedron(p,a,b,c,d,m_cachedBC);
+
+				if (hasSeperation)
+				{
+
+					m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] +
+						m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] +
+						m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2] +
+						m_simplexPointsP[3] * m_cachedBC.m_barycentricCoords[3];
+
+					m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] +
+						m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] +
+						m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2] +
+						m_simplexPointsQ[3] * m_cachedBC.m_barycentricCoords[3];
+
+					m_cachedV = m_cachedP1-m_cachedP2;
+					reduceVertices (m_cachedBC.m_usedVertices);
+				} else
+				{
+//					printf("sub distance got penetration\n");
+
+					if (m_cachedBC.m_degenerate)
+					{
+						m_cachedValidClosest = false;
+					} else
+					{
+						m_cachedValidClosest = true;
+						//degenerate case == false, penetration = true + zero
+						m_cachedV.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+					}
+					break;
+				}
+
+				m_cachedValidClosest = m_cachedBC.isValid();
+
+				//closest point origin from tetrahedron
+				break;
+			}
+		default:
+			{
+				m_cachedValidClosest = false;
+			}
+		};
+	}
+
+	return m_cachedValidClosest;
+
+}
+
+//return/calculate the closest vertex
+bool btVoronoiSimplexSolver::closest(btVector3& v)
+{
+	bool succes = updateClosestVectorAndPoints();
+	v = m_cachedV;
+	return succes;
+}
+
+
+
+btScalar btVoronoiSimplexSolver::maxVertex()
+{
+	int i, numverts = numVertices();
+	btScalar maxV = btScalar(0.);
+	for (i=0;i<numverts;i++)
+	{
+		btScalar curLen2 = m_simplexVectorW[i].length2();
+		if (maxV < curLen2)
+			maxV = curLen2;
+	}
+	return maxV;
+}
+
+
+
+	//return the current simplex
+int btVoronoiSimplexSolver::getSimplex(btVector3 *pBuf, btVector3 *qBuf, btVector3 *yBuf) const
+{
+	int i;
+	for (i=0;i<numVertices();i++)
+	{
+		yBuf[i] = m_simplexVectorW[i];
+		pBuf[i] = m_simplexPointsP[i];
+		qBuf[i] = m_simplexPointsQ[i];
+	}
+	return numVertices();
+}
+
+
+
+
+bool btVoronoiSimplexSolver::inSimplex(const btVector3& w)
+{
+	bool found = false;
+	int i, numverts = numVertices();
+	//btScalar maxV = btScalar(0.);
+	
+	//w is in the current (reduced) simplex
+	for (i=0;i<numverts;i++)
+	{
+#ifdef BT_USE_EQUAL_VERTEX_THRESHOLD
+		if ( m_simplexVectorW[i].distance2(w) <= m_equalVertexThreshold)
+#else
+		if (m_simplexVectorW[i] == w)
+#endif
+			found = true;
+	}
+
+	//check in case lastW is already removed
+	if (w == m_lastW)
+		return true;
+    	
+	return found;
+}
+
+void btVoronoiSimplexSolver::backup_closest(btVector3& v) 
+{
+	v = m_cachedV;
+}
+
+
+bool btVoronoiSimplexSolver::emptySimplex() const 
+{
+	return (numVertices() == 0);
+
+}
+
+void btVoronoiSimplexSolver::compute_points(btVector3& p1, btVector3& p2) 
+{
+	updateClosestVectorAndPoints();
+	p1 = m_cachedP1;
+	p2 = m_cachedP2;
+
+}
+
+
+
+
+bool	btVoronoiSimplexSolver::closestPtPointTriangle(const btVector3& p, const btVector3& a, const btVector3& b, const btVector3& c,btSubSimplexClosestResult& result)
+{
+	result.m_usedVertices.reset();
+
+    // Check if P in vertex region outside A
+    btVector3 ab = b - a;
+    btVector3 ac = c - a;
+    btVector3 ap = p - a;
+    btScalar d1 = ab.dot(ap);
+    btScalar d2 = ac.dot(ap);
+    if (d1 <= btScalar(0.0) && d2 <= btScalar(0.0)) 
+	{
+		result.m_closestPointOnSimplex = a;
+		result.m_usedVertices.usedVertexA = true;
+		result.setBarycentricCoordinates(1,0,0);
+		return true;// a; // barycentric coordinates (1,0,0)
+	}
+
+    // Check if P in vertex region outside B
+    btVector3 bp = p - b;
+    btScalar d3 = ab.dot(bp);
+    btScalar d4 = ac.dot(bp);
+    if (d3 >= btScalar(0.0) && d4 <= d3) 
+	{
+		result.m_closestPointOnSimplex = b;
+		result.m_usedVertices.usedVertexB = true;
+		result.setBarycentricCoordinates(0,1,0);
+
+		return true; // b; // barycentric coordinates (0,1,0)
+	}
+    // Check if P in edge region of AB, if so return projection of P onto AB
+    btScalar vc = d1*d4 - d3*d2;
+    if (vc <= btScalar(0.0) && d1 >= btScalar(0.0) && d3 <= btScalar(0.0)) {
+        btScalar v = d1 / (d1 - d3);
+		result.m_closestPointOnSimplex = a + v * ab;
+		result.m_usedVertices.usedVertexA = true;
+		result.m_usedVertices.usedVertexB = true;
+		result.setBarycentricCoordinates(1-v,v,0);
+		return true;
+        //return a + v * ab; // barycentric coordinates (1-v,v,0)
+    }
+
+    // Check if P in vertex region outside C
+    btVector3 cp = p - c;
+    btScalar d5 = ab.dot(cp);
+    btScalar d6 = ac.dot(cp);
+    if (d6 >= btScalar(0.0) && d5 <= d6) 
+	{
+		result.m_closestPointOnSimplex = c;
+		result.m_usedVertices.usedVertexC = true;
+		result.setBarycentricCoordinates(0,0,1);
+		return true;//c; // barycentric coordinates (0,0,1)
+	}
+
+    // Check if P in edge region of AC, if so return projection of P onto AC
+    btScalar vb = d5*d2 - d1*d6;
+    if (vb <= btScalar(0.0) && d2 >= btScalar(0.0) && d6 <= btScalar(0.0)) {
+        btScalar w = d2 / (d2 - d6);
+		result.m_closestPointOnSimplex = a + w * ac;
+		result.m_usedVertices.usedVertexA = true;
+		result.m_usedVertices.usedVertexC = true;
+		result.setBarycentricCoordinates(1-w,0,w);
+		return true;
+        //return a + w * ac; // barycentric coordinates (1-w,0,w)
+    }
+
+    // Check if P in edge region of BC, if so return projection of P onto BC
+    btScalar va = d3*d6 - d5*d4;
+    if (va <= btScalar(0.0) && (d4 - d3) >= btScalar(0.0) && (d5 - d6) >= btScalar(0.0)) {
+        btScalar w = (d4 - d3) / ((d4 - d3) + (d5 - d6));
+		
+		result.m_closestPointOnSimplex = b + w * (c - b);
+		result.m_usedVertices.usedVertexB = true;
+		result.m_usedVertices.usedVertexC = true;
+		result.setBarycentricCoordinates(0,1-w,w);
+		return true;		
+       // return b + w * (c - b); // barycentric coordinates (0,1-w,w)
+    }
+
+    // P inside face region. Compute Q through its barycentric coordinates (u,v,w)
+    btScalar denom = btScalar(1.0) / (va + vb + vc);
+    btScalar v = vb * denom;
+    btScalar w = vc * denom;
+    
+	result.m_closestPointOnSimplex = a + ab * v + ac * w;
+	result.m_usedVertices.usedVertexA = true;
+	result.m_usedVertices.usedVertexB = true;
+	result.m_usedVertices.usedVertexC = true;
+	result.setBarycentricCoordinates(1-v-w,v,w);
+	
+	return true;
+//	return a + ab * v + ac * w; // = u*a + v*b + w*c, u = va * denom = btScalar(1.0) - v - w
+
+}
+
+
+
+
+
+/// Test if point p and d lie on opposite sides of plane through abc
+int btVoronoiSimplexSolver::pointOutsideOfPlane(const btVector3& p, const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d)
+{
+	btVector3 normal = (b-a).cross(c-a);
+
+    btScalar signp = (p - a).dot(normal); // [AP AB AC]
+    btScalar signd = (d - a).dot( normal); // [AD AB AC]
+
+#ifdef CATCH_DEGENERATE_TETRAHEDRON
+#ifdef BT_USE_DOUBLE_PRECISION
+if (signd * signd < (btScalar(1e-8) * btScalar(1e-8)))
+	{
+		return -1;
+	}
+#else
+	if (signd * signd < (btScalar(1e-4) * btScalar(1e-4)))
+	{
+//		printf("affine dependent/degenerate\n");//
+		return -1;
+	}
+#endif
+
+#endif
+	// Points on opposite sides if expression signs are opposite
+    return signp * signd < btScalar(0.);
+}
+
+
+bool	btVoronoiSimplexSolver::closestPtPointTetrahedron(const btVector3& p, const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d, btSubSimplexClosestResult& finalResult)
+{
+	btSubSimplexClosestResult tempResult;
+
+    // Start out assuming point inside all halfspaces, so closest to itself
+	finalResult.m_closestPointOnSimplex = p;
+	finalResult.m_usedVertices.reset();
+    finalResult.m_usedVertices.usedVertexA = true;
+	finalResult.m_usedVertices.usedVertexB = true;
+	finalResult.m_usedVertices.usedVertexC = true;
+	finalResult.m_usedVertices.usedVertexD = true;
+
+    int pointOutsideABC = pointOutsideOfPlane(p, a, b, c, d);
+	int pointOutsideACD = pointOutsideOfPlane(p, a, c, d, b);
+  	int	pointOutsideADB = pointOutsideOfPlane(p, a, d, b, c);
+	int	pointOutsideBDC = pointOutsideOfPlane(p, b, d, c, a);
+
+   if (pointOutsideABC < 0 || pointOutsideACD < 0 || pointOutsideADB < 0 || pointOutsideBDC < 0)
+   {
+	   finalResult.m_degenerate = true;
+	   return false;
+   }
+
+   if (!pointOutsideABC  && !pointOutsideACD && !pointOutsideADB && !pointOutsideBDC)
+	 {
+		 return false;
+	 }
+
+
+    btScalar bestSqDist = FLT_MAX;
+    // If point outside face abc then compute closest point on abc
+	if (pointOutsideABC) 
+	{
+        closestPtPointTriangle(p, a, b, c,tempResult);
+		btVector3 q = tempResult.m_closestPointOnSimplex;
+		
+        btScalar sqDist = (q - p).dot( q - p);
+        // Update best closest point if (squared) distance is less than current best
+        if (sqDist < bestSqDist) {
+			bestSqDist = sqDist;
+			finalResult.m_closestPointOnSimplex = q;
+			//convert result bitmask!
+			finalResult.m_usedVertices.reset();
+			finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA;
+			finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexB;
+			finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC;
+			finalResult.setBarycentricCoordinates(
+					tempResult.m_barycentricCoords[VERTA],
+					tempResult.m_barycentricCoords[VERTB],
+					tempResult.m_barycentricCoords[VERTC],
+					0
+			);
+
+		}
+    }
+  
+
+	// Repeat test for face acd
+	if (pointOutsideACD) 
+	{
+        closestPtPointTriangle(p, a, c, d,tempResult);
+		btVector3 q = tempResult.m_closestPointOnSimplex;
+		//convert result bitmask!
+
+        btScalar sqDist = (q - p).dot( q - p);
+        if (sqDist < bestSqDist) 
+		{
+			bestSqDist = sqDist;
+			finalResult.m_closestPointOnSimplex = q;
+			finalResult.m_usedVertices.reset();
+			finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA;
+
+			finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexB;
+			finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexC;
+			finalResult.setBarycentricCoordinates(
+					tempResult.m_barycentricCoords[VERTA],
+					0,
+					tempResult.m_barycentricCoords[VERTB],
+					tempResult.m_barycentricCoords[VERTC]
+			);
+
+		}
+    }
+    // Repeat test for face adb
+
+	
+	if (pointOutsideADB)
+	{
+		closestPtPointTriangle(p, a, d, b,tempResult);
+		btVector3 q = tempResult.m_closestPointOnSimplex;
+		//convert result bitmask!
+
+        btScalar sqDist = (q - p).dot( q - p);
+        if (sqDist < bestSqDist) 
+		{
+			bestSqDist = sqDist;
+			finalResult.m_closestPointOnSimplex = q;
+			finalResult.m_usedVertices.reset();
+			finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA;
+			finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexC;
+			
+			finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB;
+			finalResult.setBarycentricCoordinates(
+					tempResult.m_barycentricCoords[VERTA],
+					tempResult.m_barycentricCoords[VERTC],
+					0,
+					tempResult.m_barycentricCoords[VERTB]
+			);
+
+		}
+    }
+    // Repeat test for face bdc
+    
+
+	if (pointOutsideBDC)
+	{
+        closestPtPointTriangle(p, b, d, c,tempResult);
+		btVector3 q = tempResult.m_closestPointOnSimplex;
+		//convert result bitmask!
+        btScalar sqDist = (q - p).dot( q - p);
+        if (sqDist < bestSqDist) 
+		{
+			bestSqDist = sqDist;
+			finalResult.m_closestPointOnSimplex = q;
+			finalResult.m_usedVertices.reset();
+			//
+			finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexA;
+			finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC;
+			finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB;
+
+			finalResult.setBarycentricCoordinates(
+					0,
+					tempResult.m_barycentricCoords[VERTA],
+					tempResult.m_barycentricCoords[VERTC],
+					tempResult.m_barycentricCoords[VERTB]
+			);
+
+		}
+    }
+
+	//help! we ended up full !
+	
+	if (finalResult.m_usedVertices.usedVertexA &&
+		finalResult.m_usedVertices.usedVertexB &&
+		finalResult.m_usedVertices.usedVertexC &&
+		finalResult.m_usedVertices.usedVertexD) 
+	{
+		return true;
+	}
+
+    return true;
+}
+
diff --git a/src/bullet/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h b/src/bullet/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h
new file mode 100644
index 00000000..f1c7613e
--- /dev/null
+++ b/src/bullet/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h
@@ -0,0 +1,179 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_VORONOI_SIMPLEX_SOLVER_H
+#define BT_VORONOI_SIMPLEX_SOLVER_H
+
+#include "btSimplexSolverInterface.h"
+
+
+
+#define VORONOI_SIMPLEX_MAX_VERTS 5
+
+///disable next define, or use defaultCollisionConfiguration->getSimplexSolver()->setEqualVertexThreshold(0.f) to disable/configure
+#define BT_USE_EQUAL_VERTEX_THRESHOLD
+#define VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD 0.0001f
+
+
+struct btUsageBitfield{
+	btUsageBitfield()
+	{
+		reset();
+	}
+
+	void reset()
+	{
+		usedVertexA = false;
+		usedVertexB = false;
+		usedVertexC = false;
+		usedVertexD = false;
+	}
+	unsigned short usedVertexA	: 1;
+	unsigned short usedVertexB	: 1;
+	unsigned short usedVertexC	: 1;
+	unsigned short usedVertexD	: 1;
+	unsigned short unused1		: 1;
+	unsigned short unused2		: 1;
+	unsigned short unused3		: 1;
+	unsigned short unused4		: 1;
+};
+
+
+struct	btSubSimplexClosestResult
+{
+	btVector3	m_closestPointOnSimplex;
+	//MASK for m_usedVertices
+	//stores the simplex vertex-usage, using the MASK, 
+	// if m_usedVertices & MASK then the related vertex is used
+	btUsageBitfield	m_usedVertices;
+	btScalar	m_barycentricCoords[4];
+	bool m_degenerate;
+
+	void	reset()
+	{
+		m_degenerate = false;
+		setBarycentricCoordinates();
+		m_usedVertices.reset();
+	}
+	bool	isValid()
+	{
+		bool valid = (m_barycentricCoords[0] >= btScalar(0.)) &&
+			(m_barycentricCoords[1] >= btScalar(0.)) &&
+			(m_barycentricCoords[2] >= btScalar(0.)) &&
+			(m_barycentricCoords[3] >= btScalar(0.));
+
+
+		return valid;
+	}
+	void	setBarycentricCoordinates(btScalar a=btScalar(0.),btScalar b=btScalar(0.),btScalar c=btScalar(0.),btScalar d=btScalar(0.))
+	{
+		m_barycentricCoords[0] = a;
+		m_barycentricCoords[1] = b;
+		m_barycentricCoords[2] = c;
+		m_barycentricCoords[3] = d;
+	}
+
+};
+
+/// btVoronoiSimplexSolver is an implementation of the closest point distance algorithm from a 1-4 points simplex to the origin.
+/// Can be used with GJK, as an alternative to Johnson distance algorithm.
+#ifdef NO_VIRTUAL_INTERFACE
+class btVoronoiSimplexSolver
+#else
+class btVoronoiSimplexSolver : public btSimplexSolverInterface
+#endif
+{
+public:
+
+	int	m_numVertices;
+
+	btVector3	m_simplexVectorW[VORONOI_SIMPLEX_MAX_VERTS];
+	btVector3	m_simplexPointsP[VORONOI_SIMPLEX_MAX_VERTS];
+	btVector3	m_simplexPointsQ[VORONOI_SIMPLEX_MAX_VERTS];
+
+	
+
+	btVector3	m_cachedP1;
+	btVector3	m_cachedP2;
+	btVector3	m_cachedV;
+	btVector3	m_lastW;
+	
+	btScalar	m_equalVertexThreshold;
+	bool		m_cachedValidClosest;
+
+
+	btSubSimplexClosestResult m_cachedBC;
+
+	bool	m_needsUpdate;
+	
+	void	removeVertex(int index);
+	void	reduceVertices (const btUsageBitfield& usedVerts);
+	bool	updateClosestVectorAndPoints();
+
+	bool	closestPtPointTetrahedron(const btVector3& p, const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d, btSubSimplexClosestResult& finalResult);
+	int		pointOutsideOfPlane(const btVector3& p, const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d);
+	bool	closestPtPointTriangle(const btVector3& p, const btVector3& a, const btVector3& b, const btVector3& c,btSubSimplexClosestResult& result);
+
+public:
+
+	btVoronoiSimplexSolver()
+		:  m_equalVertexThreshold(VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD)
+	{
+	}
+	 void reset();
+
+	 void addVertex(const btVector3& w, const btVector3& p, const btVector3& q);
+
+	 void	setEqualVertexThreshold(btScalar threshold)
+	 {
+		 m_equalVertexThreshold = threshold;
+	 }
+
+	 btScalar	getEqualVertexThreshold() const
+	 {
+		 return m_equalVertexThreshold;
+	 }
+
+	 bool closest(btVector3& v);
+
+	 btScalar maxVertex();
+
+	 bool fullSimplex() const
+	 {
+		 return (m_numVertices == 4);
+	 }
+
+	 int getSimplex(btVector3 *pBuf, btVector3 *qBuf, btVector3 *yBuf) const;
+
+	 bool inSimplex(const btVector3& w);
+	
+	 void backup_closest(btVector3& v) ;
+
+	 bool emptySimplex() const ;
+
+	 void compute_points(btVector3& p1, btVector3& p2) ;
+
+	 int numVertices() const 
+	 {
+		 return m_numVertices;
+	 }
+
+
+};
+
+#endif //BT_VORONOI_SIMPLEX_SOLVER_H
+
diff --git a/src/bullet/BulletDynamics/Character/btCharacterControllerInterface.h b/src/bullet/BulletDynamics/Character/btCharacterControllerInterface.h
new file mode 100644
index 00000000..c81813c9
--- /dev/null
+++ b/src/bullet/BulletDynamics/Character/btCharacterControllerInterface.h
@@ -0,0 +1,46 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2008 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CHARACTER_CONTROLLER_INTERFACE_H
+#define BT_CHARACTER_CONTROLLER_INTERFACE_H
+
+#include "LinearMath/btVector3.h"
+#include "BulletDynamics/Dynamics/btActionInterface.h"
+
+class btCollisionShape;
+class btRigidBody;
+class btCollisionWorld;
+
+class btCharacterControllerInterface : public btActionInterface
+{
+public:
+	btCharacterControllerInterface () {};
+	virtual ~btCharacterControllerInterface () {};
+	
+	virtual void	setWalkDirection(const btVector3& walkDirection) = 0;
+	virtual void	setVelocityForTimeInterval(const btVector3& velocity, btScalar timeInterval) = 0;
+	virtual void	reset () = 0;
+	virtual void	warp (const btVector3& origin) = 0;
+
+	virtual void	preStep ( btCollisionWorld* collisionWorld) = 0;
+	virtual void	playerStep (btCollisionWorld* collisionWorld, btScalar dt) = 0;
+	virtual bool	canJump () const = 0;
+	virtual void	jump () = 0;
+
+	virtual bool	onGround () const = 0;
+};
+
+#endif //BT_CHARACTER_CONTROLLER_INTERFACE_H
+
diff --git a/src/bullet/BulletDynamics/Character/btKinematicCharacterController.cpp b/src/bullet/BulletDynamics/Character/btKinematicCharacterController.cpp
new file mode 100644
index 00000000..f733dc0c
--- /dev/null
+++ b/src/bullet/BulletDynamics/Character/btKinematicCharacterController.cpp
@@ -0,0 +1,641 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2008 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "LinearMath/btIDebugDraw.h"
+#include "BulletCollision/CollisionDispatch/btGhostObject.h"
+#include "BulletCollision/CollisionShapes/btMultiSphereShape.h"
+#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
+#include "LinearMath/btDefaultMotionState.h"
+#include "btKinematicCharacterController.h"
+
+
+// static helper method
+static btVector3
+getNormalizedVector(const btVector3& v)
+{
+	btVector3 n = v.normalized();
+	if (n.length() < SIMD_EPSILON) {
+		n.setValue(0, 0, 0);
+	}
+	return n;
+}
+
+
+///@todo Interact with dynamic objects,
+///Ride kinematicly animated platforms properly
+///More realistic (or maybe just a config option) falling
+/// -> Should integrate falling velocity manually and use that in stepDown()
+///Support jumping
+///Support ducking
+class btKinematicClosestNotMeRayResultCallback : public btCollisionWorld::ClosestRayResultCallback
+{
+public:
+	btKinematicClosestNotMeRayResultCallback (btCollisionObject* me) : btCollisionWorld::ClosestRayResultCallback(btVector3(0.0, 0.0, 0.0), btVector3(0.0, 0.0, 0.0))
+	{
+		m_me = me;
+	}
+
+	virtual btScalar addSingleResult(btCollisionWorld::LocalRayResult& rayResult,bool normalInWorldSpace)
+	{
+		if (rayResult.m_collisionObject == m_me)
+			return 1.0;
+
+		return ClosestRayResultCallback::addSingleResult (rayResult, normalInWorldSpace);
+	}
+protected:
+	btCollisionObject* m_me;
+};
+
+class btKinematicClosestNotMeConvexResultCallback : public btCollisionWorld::ClosestConvexResultCallback
+{
+public:
+	btKinematicClosestNotMeConvexResultCallback (btCollisionObject* me, const btVector3& up, btScalar minSlopeDot)
+	: btCollisionWorld::ClosestConvexResultCallback(btVector3(0.0, 0.0, 0.0), btVector3(0.0, 0.0, 0.0))
+	, m_me(me)
+	, m_up(up)
+	, m_minSlopeDot(minSlopeDot)
+	{
+	}
+
+	virtual btScalar addSingleResult(btCollisionWorld::LocalConvexResult& convexResult,bool normalInWorldSpace)
+	{
+		if (convexResult.m_hitCollisionObject == m_me)
+			return btScalar(1.0);
+
+		btVector3 hitNormalWorld;
+		if (normalInWorldSpace)
+		{
+			hitNormalWorld = convexResult.m_hitNormalLocal;
+		} else
+		{
+			///need to transform normal into worldspace
+			hitNormalWorld = convexResult.m_hitCollisionObject->getWorldTransform().getBasis()*convexResult.m_hitNormalLocal;
+		}
+
+		btScalar dotUp = m_up.dot(hitNormalWorld);
+		if (dotUp < m_minSlopeDot) {
+			return btScalar(1.0);
+		}
+
+		return ClosestConvexResultCallback::addSingleResult (convexResult, normalInWorldSpace);
+	}
+protected:
+	btCollisionObject* m_me;
+	const btVector3 m_up;
+	btScalar m_minSlopeDot;
+};
+
+/*
+ * Returns the reflection direction of a ray going 'direction' hitting a surface with normal 'normal'
+ *
+ * from: http://www-cs-students.stanford.edu/~adityagp/final/node3.html
+ */
+btVector3 btKinematicCharacterController::computeReflectionDirection (const btVector3& direction, const btVector3& normal)
+{
+	return direction - (btScalar(2.0) * direction.dot(normal)) * normal;
+}
+
+/*
+ * Returns the portion of 'direction' that is parallel to 'normal'
+ */
+btVector3 btKinematicCharacterController::parallelComponent (const btVector3& direction, const btVector3& normal)
+{
+	btScalar magnitude = direction.dot(normal);
+	return normal * magnitude;
+}
+
+/*
+ * Returns the portion of 'direction' that is perpindicular to 'normal'
+ */
+btVector3 btKinematicCharacterController::perpindicularComponent (const btVector3& direction, const btVector3& normal)
+{
+	return direction - parallelComponent(direction, normal);
+}
+
+btKinematicCharacterController::btKinematicCharacterController (btPairCachingGhostObject* ghostObject,btConvexShape* convexShape,btScalar stepHeight, int upAxis)
+{
+	m_upAxis = upAxis;
+	m_addedMargin = 0.02;
+	m_walkDirection.setValue(0,0,0);
+	m_useGhostObjectSweepTest = true;
+	m_ghostObject = ghostObject;
+	m_stepHeight = stepHeight;
+	m_turnAngle = btScalar(0.0);
+	m_convexShape=convexShape;	
+	m_useWalkDirection = true;	// use walk direction by default, legacy behavior
+	m_velocityTimeInterval = 0.0;
+	m_verticalVelocity = 0.0;
+	m_verticalOffset = 0.0;
+	m_gravity = 9.8 * 3 ; // 3G acceleration.
+	m_fallSpeed = 55.0; // Terminal velocity of a sky diver in m/s.
+	m_jumpSpeed = 10.0; // ?
+	m_wasOnGround = false;
+	m_wasJumping = false;
+	setMaxSlope(btRadians(45.0));
+}
+
+btKinematicCharacterController::~btKinematicCharacterController ()
+{
+}
+
+btPairCachingGhostObject* btKinematicCharacterController::getGhostObject()
+{
+	return m_ghostObject;
+}
+
+bool btKinematicCharacterController::recoverFromPenetration ( btCollisionWorld* collisionWorld)
+{
+
+	bool penetration = false;
+
+	collisionWorld->getDispatcher()->dispatchAllCollisionPairs(m_ghostObject->getOverlappingPairCache(), collisionWorld->getDispatchInfo(), collisionWorld->getDispatcher());
+
+	m_currentPosition = m_ghostObject->getWorldTransform().getOrigin();
+	
+	btScalar maxPen = btScalar(0.0);
+	for (int i = 0; i < m_ghostObject->getOverlappingPairCache()->getNumOverlappingPairs(); i++)
+	{
+		m_manifoldArray.resize(0);
+
+		btBroadphasePair* collisionPair = &m_ghostObject->getOverlappingPairCache()->getOverlappingPairArray()[i];
+		
+		if (collisionPair->m_algorithm)
+			collisionPair->m_algorithm->getAllContactManifolds(m_manifoldArray);
+
+		
+		for (int j=0;j<m_manifoldArray.size();j++)
+		{
+			btPersistentManifold* manifold = m_manifoldArray[j];
+			btScalar directionSign = manifold->getBody0() == m_ghostObject ? btScalar(-1.0) : btScalar(1.0);
+			for (int p=0;p<manifold->getNumContacts();p++)
+			{
+				const btManifoldPoint&pt = manifold->getContactPoint(p);
+
+				btScalar dist = pt.getDistance();
+
+				if (dist < 0.0)
+				{
+					if (dist < maxPen)
+					{
+						maxPen = dist;
+						m_touchingNormal = pt.m_normalWorldOnB * directionSign;//??
+
+					}
+					m_currentPosition += pt.m_normalWorldOnB * directionSign * dist * btScalar(0.2);
+					penetration = true;
+				} else {
+					//printf("touching %f\n", dist);
+				}
+			}
+			
+			//manifold->clearManifold();
+		}
+	}
+	btTransform newTrans = m_ghostObject->getWorldTransform();
+	newTrans.setOrigin(m_currentPosition);
+	m_ghostObject->setWorldTransform(newTrans);
+//	printf("m_touchingNormal = %f,%f,%f\n",m_touchingNormal[0],m_touchingNormal[1],m_touchingNormal[2]);
+	return penetration;
+}
+
+void btKinematicCharacterController::stepUp ( btCollisionWorld* world)
+{
+	// phase 1: up
+	btTransform start, end;
+	m_targetPosition = m_currentPosition + getUpAxisDirections()[m_upAxis] * (m_stepHeight + (m_verticalOffset > 0.f?m_verticalOffset:0.f));
+
+	start.setIdentity ();
+	end.setIdentity ();
+
+	/* FIXME: Handle penetration properly */
+	start.setOrigin (m_currentPosition + getUpAxisDirections()[m_upAxis] * (m_convexShape->getMargin() + m_addedMargin));
+	end.setOrigin (m_targetPosition);
+
+	btKinematicClosestNotMeConvexResultCallback callback (m_ghostObject, -getUpAxisDirections()[m_upAxis], btScalar(0.7071));
+	callback.m_collisionFilterGroup = getGhostObject()->getBroadphaseHandle()->m_collisionFilterGroup;
+	callback.m_collisionFilterMask = getGhostObject()->getBroadphaseHandle()->m_collisionFilterMask;
+	
+	if (m_useGhostObjectSweepTest)
+	{
+		m_ghostObject->convexSweepTest (m_convexShape, start, end, callback, world->getDispatchInfo().m_allowedCcdPenetration);
+	}
+	else
+	{
+		world->convexSweepTest (m_convexShape, start, end, callback);
+	}
+	
+	if (callback.hasHit())
+	{
+		// Only modify the position if the hit was a slope and not a wall or ceiling.
+		if(callback.m_hitNormalWorld.dot(getUpAxisDirections()[m_upAxis]) > 0.0)
+		{
+			// we moved up only a fraction of the step height
+			m_currentStepOffset = m_stepHeight * callback.m_closestHitFraction;
+			m_currentPosition.setInterpolate3 (m_currentPosition, m_targetPosition, callback.m_closestHitFraction);
+		}
+		m_verticalVelocity = 0.0;
+		m_verticalOffset = 0.0;
+	} else {
+		m_currentStepOffset = m_stepHeight;
+		m_currentPosition = m_targetPosition;
+	}
+}
+
+void btKinematicCharacterController::updateTargetPositionBasedOnCollision (const btVector3& hitNormal, btScalar tangentMag, btScalar normalMag)
+{
+	btVector3 movementDirection = m_targetPosition - m_currentPosition;
+	btScalar movementLength = movementDirection.length();
+	if (movementLength>SIMD_EPSILON)
+	{
+		movementDirection.normalize();
+
+		btVector3 reflectDir = computeReflectionDirection (movementDirection, hitNormal);
+		reflectDir.normalize();
+
+		btVector3 parallelDir, perpindicularDir;
+
+		parallelDir = parallelComponent (reflectDir, hitNormal);
+		perpindicularDir = perpindicularComponent (reflectDir, hitNormal);
+
+		m_targetPosition = m_currentPosition;
+		if (0)//tangentMag != 0.0)
+		{
+			btVector3 parComponent = parallelDir * btScalar (tangentMag*movementLength);
+//			printf("parComponent=%f,%f,%f\n",parComponent[0],parComponent[1],parComponent[2]);
+			m_targetPosition +=  parComponent;
+		}
+
+		if (normalMag != 0.0)
+		{
+			btVector3 perpComponent = perpindicularDir * btScalar (normalMag*movementLength);
+//			printf("perpComponent=%f,%f,%f\n",perpComponent[0],perpComponent[1],perpComponent[2]);
+			m_targetPosition += perpComponent;
+		}
+	} else
+	{
+//		printf("movementLength don't normalize a zero vector\n");
+	}
+}
+
+void btKinematicCharacterController::stepForwardAndStrafe ( btCollisionWorld* collisionWorld, const btVector3& walkMove)
+{
+	// printf("m_normalizedDirection=%f,%f,%f\n",
+	// 	m_normalizedDirection[0],m_normalizedDirection[1],m_normalizedDirection[2]);
+	// phase 2: forward and strafe
+	btTransform start, end;
+	m_targetPosition = m_currentPosition + walkMove;
+
+	start.setIdentity ();
+	end.setIdentity ();
+	
+	btScalar fraction = 1.0;
+	btScalar distance2 = (m_currentPosition-m_targetPosition).length2();
+//	printf("distance2=%f\n",distance2);
+
+	if (m_touchingContact)
+	{
+		if (m_normalizedDirection.dot(m_touchingNormal) > btScalar(0.0))
+		{
+			updateTargetPositionBasedOnCollision (m_touchingNormal);
+		}
+	}
+
+	int maxIter = 10;
+
+	while (fraction > btScalar(0.01) && maxIter-- > 0)
+	{
+		start.setOrigin (m_currentPosition);
+		end.setOrigin (m_targetPosition);
+		btVector3 sweepDirNegative(m_currentPosition - m_targetPosition);
+
+		btKinematicClosestNotMeConvexResultCallback callback (m_ghostObject, sweepDirNegative, btScalar(0.0));
+		callback.m_collisionFilterGroup = getGhostObject()->getBroadphaseHandle()->m_collisionFilterGroup;
+		callback.m_collisionFilterMask = getGhostObject()->getBroadphaseHandle()->m_collisionFilterMask;
+
+
+		btScalar margin = m_convexShape->getMargin();
+		m_convexShape->setMargin(margin + m_addedMargin);
+
+
+		if (m_useGhostObjectSweepTest)
+		{
+			m_ghostObject->convexSweepTest (m_convexShape, start, end, callback, collisionWorld->getDispatchInfo().m_allowedCcdPenetration);
+		} else
+		{
+			collisionWorld->convexSweepTest (m_convexShape, start, end, callback, collisionWorld->getDispatchInfo().m_allowedCcdPenetration);
+		}
+		
+		m_convexShape->setMargin(margin);
+
+		
+		fraction -= callback.m_closestHitFraction;
+
+		if (callback.hasHit())
+		{	
+			// we moved only a fraction
+			btScalar hitDistance;
+			hitDistance = (callback.m_hitPointWorld - m_currentPosition).length();
+
+//			m_currentPosition.setInterpolate3 (m_currentPosition, m_targetPosition, callback.m_closestHitFraction);
+
+			updateTargetPositionBasedOnCollision (callback.m_hitNormalWorld);
+			btVector3 currentDir = m_targetPosition - m_currentPosition;
+			distance2 = currentDir.length2();
+			if (distance2 > SIMD_EPSILON)
+			{
+				currentDir.normalize();
+				/* See Quake2: "If velocity is against original velocity, stop ead to avoid tiny oscilations in sloping corners." */
+				if (currentDir.dot(m_normalizedDirection) <= btScalar(0.0))
+				{
+					break;
+				}
+			} else
+			{
+//				printf("currentDir: don't normalize a zero vector\n");
+				break;
+			}
+
+		} else {
+			// we moved whole way
+			m_currentPosition = m_targetPosition;
+		}
+
+	//	if (callback.m_closestHitFraction == 0.f)
+	//		break;
+
+	}
+}
+
+void btKinematicCharacterController::stepDown ( btCollisionWorld* collisionWorld, btScalar dt)
+{
+	btTransform start, end;
+
+	// phase 3: down
+	/*btScalar additionalDownStep = (m_wasOnGround && !onGround()) ? m_stepHeight : 0.0;
+	btVector3 step_drop = getUpAxisDirections()[m_upAxis] * (m_currentStepOffset + additionalDownStep);
+	btScalar downVelocity = (additionalDownStep == 0.0 && m_verticalVelocity<0.0?-m_verticalVelocity:0.0) * dt;
+	btVector3 gravity_drop = getUpAxisDirections()[m_upAxis] * downVelocity; 
+	m_targetPosition -= (step_drop + gravity_drop);*/
+
+	btScalar downVelocity = (m_verticalVelocity<0.f?-m_verticalVelocity:0.f) * dt;
+	if(downVelocity > 0.0 && downVelocity < m_stepHeight
+		&& (m_wasOnGround || !m_wasJumping))
+	{
+		downVelocity = m_stepHeight;
+	}
+
+	btVector3 step_drop = getUpAxisDirections()[m_upAxis] * (m_currentStepOffset + downVelocity);
+	m_targetPosition -= step_drop;
+
+	start.setIdentity ();
+	end.setIdentity ();
+
+	start.setOrigin (m_currentPosition);
+	end.setOrigin (m_targetPosition);
+
+	btKinematicClosestNotMeConvexResultCallback callback (m_ghostObject, getUpAxisDirections()[m_upAxis], m_maxSlopeCosine);
+	callback.m_collisionFilterGroup = getGhostObject()->getBroadphaseHandle()->m_collisionFilterGroup;
+	callback.m_collisionFilterMask = getGhostObject()->getBroadphaseHandle()->m_collisionFilterMask;
+	
+	if (m_useGhostObjectSweepTest)
+	{
+		m_ghostObject->convexSweepTest (m_convexShape, start, end, callback, collisionWorld->getDispatchInfo().m_allowedCcdPenetration);
+	} else
+	{
+		collisionWorld->convexSweepTest (m_convexShape, start, end, callback, collisionWorld->getDispatchInfo().m_allowedCcdPenetration);
+	}
+
+	if (callback.hasHit())
+	{
+		// we dropped a fraction of the height -> hit floor
+		m_currentPosition.setInterpolate3 (m_currentPosition, m_targetPosition, callback.m_closestHitFraction);
+		m_verticalVelocity = 0.0;
+		m_verticalOffset = 0.0;
+		m_wasJumping = false;
+	} else {
+		// we dropped the full height
+		
+		m_currentPosition = m_targetPosition;
+	}
+}
+
+
+
+void btKinematicCharacterController::setWalkDirection
+(
+const btVector3& walkDirection
+)
+{
+	m_useWalkDirection = true;
+	m_walkDirection = walkDirection;
+	m_normalizedDirection = getNormalizedVector(m_walkDirection);
+}
+
+
+
+void btKinematicCharacterController::setVelocityForTimeInterval
+(
+const btVector3& velocity,
+btScalar timeInterval
+)
+{
+//	printf("setVelocity!\n");
+//	printf("  interval: %f\n", timeInterval);
+//	printf("  velocity: (%f, %f, %f)\n",
+//		 velocity.x(), velocity.y(), velocity.z());
+
+	m_useWalkDirection = false;
+	m_walkDirection = velocity;
+	m_normalizedDirection = getNormalizedVector(m_walkDirection);
+	m_velocityTimeInterval = timeInterval;
+}
+
+
+
+void btKinematicCharacterController::reset ()
+{
+}
+
+void btKinematicCharacterController::warp (const btVector3& origin)
+{
+	btTransform xform;
+	xform.setIdentity();
+	xform.setOrigin (origin);
+	m_ghostObject->setWorldTransform (xform);
+}
+
+
+void btKinematicCharacterController::preStep (  btCollisionWorld* collisionWorld)
+{
+	
+	int numPenetrationLoops = 0;
+	m_touchingContact = false;
+	while (recoverFromPenetration (collisionWorld))
+	{
+		numPenetrationLoops++;
+		m_touchingContact = true;
+		if (numPenetrationLoops > 4)
+		{
+			//printf("character could not recover from penetration = %d\n", numPenetrationLoops);
+			break;
+		}
+	}
+
+	m_currentPosition = m_ghostObject->getWorldTransform().getOrigin();
+	m_targetPosition = m_currentPosition;
+//	printf("m_targetPosition=%f,%f,%f\n",m_targetPosition[0],m_targetPosition[1],m_targetPosition[2]);
+
+	
+}
+
+#include <stdio.h>
+
+void btKinematicCharacterController::playerStep (  btCollisionWorld* collisionWorld, btScalar dt)
+{
+//	printf("playerStep(): ");
+//	printf("  dt = %f", dt);
+
+	// quick check...
+	if (!m_useWalkDirection && m_velocityTimeInterval <= 0.0) {
+//		printf("\n");
+		return;		// no motion
+	}
+
+	m_wasOnGround = onGround();
+
+	// Update fall velocity.
+	m_verticalVelocity -= m_gravity * dt;
+	if(m_verticalVelocity > 0.0 && m_verticalVelocity > m_jumpSpeed)
+	{
+		m_verticalVelocity = m_jumpSpeed;
+	}
+	if(m_verticalVelocity < 0.0 && btFabs(m_verticalVelocity) > btFabs(m_fallSpeed))
+	{
+		m_verticalVelocity = -btFabs(m_fallSpeed);
+	}
+	m_verticalOffset = m_verticalVelocity * dt;
+
+
+	btTransform xform;
+	xform = m_ghostObject->getWorldTransform ();
+
+//	printf("walkDirection(%f,%f,%f)\n",walkDirection[0],walkDirection[1],walkDirection[2]);
+//	printf("walkSpeed=%f\n",walkSpeed);
+
+	stepUp (collisionWorld);
+	if (m_useWalkDirection) {
+		stepForwardAndStrafe (collisionWorld, m_walkDirection);
+	} else {
+		//printf("  time: %f", m_velocityTimeInterval);
+		// still have some time left for moving!
+		btScalar dtMoving =
+			(dt < m_velocityTimeInterval) ? dt : m_velocityTimeInterval;
+		m_velocityTimeInterval -= dt;
+
+		// how far will we move while we are moving?
+		btVector3 move = m_walkDirection * dtMoving;
+
+		//printf("  dtMoving: %f", dtMoving);
+
+		// okay, step
+		stepForwardAndStrafe(collisionWorld, move);
+	}
+	stepDown (collisionWorld, dt);
+
+	// printf("\n");
+
+	xform.setOrigin (m_currentPosition);
+	m_ghostObject->setWorldTransform (xform);
+}
+
+void btKinematicCharacterController::setFallSpeed (btScalar fallSpeed)
+{
+	m_fallSpeed = fallSpeed;
+}
+
+void btKinematicCharacterController::setJumpSpeed (btScalar jumpSpeed)
+{
+	m_jumpSpeed = jumpSpeed;
+}
+
+void btKinematicCharacterController::setMaxJumpHeight (btScalar maxJumpHeight)
+{
+	m_maxJumpHeight = maxJumpHeight;
+}
+
+bool btKinematicCharacterController::canJump () const
+{
+	return onGround();
+}
+
+void btKinematicCharacterController::jump ()
+{
+	if (!canJump())
+		return;
+
+	m_verticalVelocity = m_jumpSpeed;
+	m_wasJumping = true;
+
+#if 0
+	currently no jumping.
+	btTransform xform;
+	m_rigidBody->getMotionState()->getWorldTransform (xform);
+	btVector3 up = xform.getBasis()[1];
+	up.normalize ();
+	btScalar magnitude = (btScalar(1.0)/m_rigidBody->getInvMass()) * btScalar(8.0);
+	m_rigidBody->applyCentralImpulse (up * magnitude);
+#endif
+}
+
+void btKinematicCharacterController::setGravity(btScalar gravity)
+{
+	m_gravity = gravity;
+}
+
+btScalar btKinematicCharacterController::getGravity() const
+{
+	return m_gravity;
+}
+
+void btKinematicCharacterController::setMaxSlope(btScalar slopeRadians)
+{
+	m_maxSlopeRadians = slopeRadians;
+	m_maxSlopeCosine = btCos(slopeRadians);
+}
+
+btScalar btKinematicCharacterController::getMaxSlope() const
+{
+	return m_maxSlopeRadians;
+}
+
+bool btKinematicCharacterController::onGround () const
+{
+	return m_verticalVelocity == 0.0 && m_verticalOffset == 0.0;
+}
+
+
+btVector3* btKinematicCharacterController::getUpAxisDirections()
+{
+	static btVector3 sUpAxisDirection[3] = { btVector3(1.0f, 0.0f, 0.0f), btVector3(0.0f, 1.0f, 0.0f), btVector3(0.0f, 0.0f, 1.0f) };
+	
+	return sUpAxisDirection;
+}
+
+void btKinematicCharacterController::debugDraw(btIDebugDraw* debugDrawer)
+{
+}
diff --git a/src/bullet/BulletDynamics/Character/btKinematicCharacterController.h b/src/bullet/BulletDynamics/Character/btKinematicCharacterController.h
new file mode 100644
index 00000000..ef01f8a3
--- /dev/null
+++ b/src/bullet/BulletDynamics/Character/btKinematicCharacterController.h
@@ -0,0 +1,163 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2008 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_KINEMATIC_CHARACTER_CONTROLLER_H
+#define BT_KINEMATIC_CHARACTER_CONTROLLER_H
+
+#include "LinearMath/btVector3.h"
+
+#include "btCharacterControllerInterface.h"
+
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+
+
+class btCollisionShape;
+class btConvexShape;
+class btRigidBody;
+class btCollisionWorld;
+class btCollisionDispatcher;
+class btPairCachingGhostObject;
+
+///btKinematicCharacterController is an object that supports a sliding motion in a world.
+///It uses a ghost object and convex sweep test to test for upcoming collisions. This is combined with discrete collision detection to recover from penetrations.
+///Interaction between btKinematicCharacterController and dynamic rigid bodies needs to be explicity implemented by the user.
+class btKinematicCharacterController : public btCharacterControllerInterface
+{
+protected:
+
+	btScalar m_halfHeight;
+	
+	btPairCachingGhostObject* m_ghostObject;
+	btConvexShape*	m_convexShape;//is also in m_ghostObject, but it needs to be convex, so we store it here to avoid upcast
+	
+	btScalar m_verticalVelocity;
+	btScalar m_verticalOffset;
+	btScalar m_fallSpeed;
+	btScalar m_jumpSpeed;
+	btScalar m_maxJumpHeight;
+	btScalar m_maxSlopeRadians; // Slope angle that is set (used for returning the exact value)
+	btScalar m_maxSlopeCosine;  // Cosine equivalent of m_maxSlopeRadians (calculated once when set, for optimization)
+	btScalar m_gravity;
+
+	btScalar m_turnAngle;
+	
+	btScalar m_stepHeight;
+
+	btScalar	m_addedMargin;//@todo: remove this and fix the code
+
+	///this is the desired walk direction, set by the user
+	btVector3	m_walkDirection;
+	btVector3	m_normalizedDirection;
+
+	//some internal variables
+	btVector3 m_currentPosition;
+	btScalar  m_currentStepOffset;
+	btVector3 m_targetPosition;
+
+	///keep track of the contact manifolds
+	btManifoldArray	m_manifoldArray;
+
+	bool m_touchingContact;
+	btVector3 m_touchingNormal;
+
+	bool  m_wasOnGround;
+	bool  m_wasJumping;
+	bool	m_useGhostObjectSweepTest;
+	bool	m_useWalkDirection;
+	btScalar	m_velocityTimeInterval;
+	int m_upAxis;
+
+	static btVector3* getUpAxisDirections();
+
+	btVector3 computeReflectionDirection (const btVector3& direction, const btVector3& normal);
+	btVector3 parallelComponent (const btVector3& direction, const btVector3& normal);
+	btVector3 perpindicularComponent (const btVector3& direction, const btVector3& normal);
+
+	bool recoverFromPenetration ( btCollisionWorld* collisionWorld);
+	void stepUp (btCollisionWorld* collisionWorld);
+	void updateTargetPositionBasedOnCollision (const btVector3& hit_normal, btScalar tangentMag = btScalar(0.0), btScalar normalMag = btScalar(1.0));
+	void stepForwardAndStrafe (btCollisionWorld* collisionWorld, const btVector3& walkMove);
+	void stepDown (btCollisionWorld* collisionWorld, btScalar dt);
+public:
+	btKinematicCharacterController (btPairCachingGhostObject* ghostObject,btConvexShape* convexShape,btScalar stepHeight, int upAxis = 1);
+	~btKinematicCharacterController ();
+	
+
+	///btActionInterface interface
+	virtual void updateAction( btCollisionWorld* collisionWorld,btScalar deltaTime)
+	{
+		preStep ( collisionWorld);
+		playerStep (collisionWorld, deltaTime);
+	}
+	
+	///btActionInterface interface
+	void	debugDraw(btIDebugDraw* debugDrawer);
+
+	void setUpAxis (int axis)
+	{
+		if (axis < 0)
+			axis = 0;
+		if (axis > 2)
+			axis = 2;
+		m_upAxis = axis;
+	}
+
+	/// This should probably be called setPositionIncrementPerSimulatorStep.
+	/// This is neither a direction nor a velocity, but the amount to
+	///	increment the position each simulation iteration, regardless
+	///	of dt.
+	/// This call will reset any velocity set by setVelocityForTimeInterval().
+	virtual void	setWalkDirection(const btVector3& walkDirection);
+
+	/// Caller provides a velocity with which the character should move for
+	///	the given time period.  After the time period, velocity is reset
+	///	to zero.
+	/// This call will reset any walk direction set by setWalkDirection().
+	/// Negative time intervals will result in no motion.
+	virtual void setVelocityForTimeInterval(const btVector3& velocity,
+				btScalar timeInterval);
+
+	void reset ();
+	void warp (const btVector3& origin);
+
+	void preStep (  btCollisionWorld* collisionWorld);
+	void playerStep ( btCollisionWorld* collisionWorld, btScalar dt);
+
+	void setFallSpeed (btScalar fallSpeed);
+	void setJumpSpeed (btScalar jumpSpeed);
+	void setMaxJumpHeight (btScalar maxJumpHeight);
+	bool canJump () const;
+
+	void jump ();
+
+	void setGravity(btScalar gravity);
+	btScalar getGravity() const;
+
+	/// The max slope determines the maximum angle that the controller can walk up.
+	/// The slope angle is measured in radians.
+	void setMaxSlope(btScalar slopeRadians);
+	btScalar getMaxSlope() const;
+
+	btPairCachingGhostObject* getGhostObject();
+	void	setUseGhostSweepTest(bool useGhostObjectSweepTest)
+	{
+		m_useGhostObjectSweepTest = useGhostObjectSweepTest;
+	}
+
+	bool onGround () const;
+};
+
+#endif // BT_KINEMATIC_CHARACTER_CONTROLLER_H
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btConeTwistConstraint.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btConeTwistConstraint.cpp
new file mode 100644
index 00000000..755544f0
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btConeTwistConstraint.cpp
@@ -0,0 +1,1132 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+btConeTwistConstraint is Copyright (c) 2007 Starbreeze Studios
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+Written by: Marcus Hennix
+*/
+
+
+#include "btConeTwistConstraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btTransformUtil.h"
+#include "LinearMath/btMinMax.h"
+#include <new>
+
+
+
+//#define CONETWIST_USE_OBSOLETE_SOLVER true
+#define CONETWIST_USE_OBSOLETE_SOLVER false
+#define CONETWIST_DEF_FIX_THRESH btScalar(.05f)
+
+
+SIMD_FORCE_INLINE btScalar computeAngularImpulseDenominator(const btVector3& axis, const btMatrix3x3& invInertiaWorld)
+{
+	btVector3 vec = axis * invInertiaWorld;
+	return axis.dot(vec);
+}
+
+
+
+
+btConeTwistConstraint::btConeTwistConstraint(btRigidBody& rbA,btRigidBody& rbB, 
+											 const btTransform& rbAFrame,const btTransform& rbBFrame)
+											 :btTypedConstraint(CONETWIST_CONSTRAINT_TYPE, rbA,rbB),m_rbAFrame(rbAFrame),m_rbBFrame(rbBFrame),
+											 m_angularOnly(false),
+											 m_useSolveConstraintObsolete(CONETWIST_USE_OBSOLETE_SOLVER)
+{
+	init();
+}
+
+btConeTwistConstraint::btConeTwistConstraint(btRigidBody& rbA,const btTransform& rbAFrame)
+											:btTypedConstraint(CONETWIST_CONSTRAINT_TYPE,rbA),m_rbAFrame(rbAFrame),
+											 m_angularOnly(false),
+											 m_useSolveConstraintObsolete(CONETWIST_USE_OBSOLETE_SOLVER)
+{
+	m_rbBFrame = m_rbAFrame;
+	init();	
+}
+
+
+void btConeTwistConstraint::init()
+{
+	m_angularOnly = false;
+	m_solveTwistLimit = false;
+	m_solveSwingLimit = false;
+	m_bMotorEnabled = false;
+	m_maxMotorImpulse = btScalar(-1);
+
+	setLimit(btScalar(BT_LARGE_FLOAT), btScalar(BT_LARGE_FLOAT), btScalar(BT_LARGE_FLOAT));
+	m_damping = btScalar(0.01);
+	m_fixThresh = CONETWIST_DEF_FIX_THRESH;
+	m_flags = 0;
+	m_linCFM = btScalar(0.f);
+	m_linERP = btScalar(0.7f);
+	m_angCFM = btScalar(0.f);
+}
+
+
+void btConeTwistConstraint::getInfo1 (btConstraintInfo1* info)
+{
+	if (m_useSolveConstraintObsolete)
+	{
+		info->m_numConstraintRows = 0;
+		info->nub = 0;
+	} 
+	else
+	{
+		info->m_numConstraintRows = 3;
+		info->nub = 3;
+		calcAngleInfo2(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform(),m_rbA.getInvInertiaTensorWorld(),m_rbB.getInvInertiaTensorWorld());
+		if(m_solveSwingLimit)
+		{
+			info->m_numConstraintRows++;
+			info->nub--;
+			if((m_swingSpan1 < m_fixThresh) && (m_swingSpan2 < m_fixThresh))
+			{
+				info->m_numConstraintRows++;
+				info->nub--;
+			}
+		}
+		if(m_solveTwistLimit)
+		{
+			info->m_numConstraintRows++;
+			info->nub--;
+		}
+	}
+}
+
+void btConeTwistConstraint::getInfo1NonVirtual (btConstraintInfo1* info)
+{
+	//always reserve 6 rows: object transform is not available on SPU
+	info->m_numConstraintRows = 6;
+	info->nub = 0;
+		
+}
+	
+
+void btConeTwistConstraint::getInfo2 (btConstraintInfo2* info)
+{
+	getInfo2NonVirtual(info,m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform(),m_rbA.getInvInertiaTensorWorld(),m_rbB.getInvInertiaTensorWorld());
+}
+
+void btConeTwistConstraint::getInfo2NonVirtual (btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btMatrix3x3& invInertiaWorldA,const btMatrix3x3& invInertiaWorldB)
+{
+	calcAngleInfo2(transA,transB,invInertiaWorldA,invInertiaWorldB);
+	
+	btAssert(!m_useSolveConstraintObsolete);
+    // set jacobian
+    info->m_J1linearAxis[0] = 1;
+    info->m_J1linearAxis[info->rowskip+1] = 1;
+    info->m_J1linearAxis[2*info->rowskip+2] = 1;
+	btVector3 a1 = transA.getBasis() * m_rbAFrame.getOrigin();
+	{
+		btVector3* angular0 = (btVector3*)(info->m_J1angularAxis);
+		btVector3* angular1 = (btVector3*)(info->m_J1angularAxis+info->rowskip);
+		btVector3* angular2 = (btVector3*)(info->m_J1angularAxis+2*info->rowskip);
+		btVector3 a1neg = -a1;
+		a1neg.getSkewSymmetricMatrix(angular0,angular1,angular2);
+	}
+	btVector3 a2 = transB.getBasis() * m_rbBFrame.getOrigin();
+	{
+		btVector3* angular0 = (btVector3*)(info->m_J2angularAxis);
+		btVector3* angular1 = (btVector3*)(info->m_J2angularAxis+info->rowskip);
+		btVector3* angular2 = (btVector3*)(info->m_J2angularAxis+2*info->rowskip);
+		a2.getSkewSymmetricMatrix(angular0,angular1,angular2);
+	}
+    // set right hand side
+	btScalar linERP = (m_flags & BT_CONETWIST_FLAGS_LIN_ERP) ? m_linERP : info->erp;
+    btScalar k = info->fps * linERP;
+    int j;
+	for (j=0; j<3; j++)
+    {
+        info->m_constraintError[j*info->rowskip] = k * (a2[j] + transB.getOrigin()[j] - a1[j] - transA.getOrigin()[j]);
+		info->m_lowerLimit[j*info->rowskip] = -SIMD_INFINITY;
+		info->m_upperLimit[j*info->rowskip] = SIMD_INFINITY;
+		if(m_flags & BT_CONETWIST_FLAGS_LIN_CFM)
+		{
+			info->cfm[j*info->rowskip] = m_linCFM;
+		}
+    }
+	int row = 3;
+    int srow = row * info->rowskip;
+	btVector3 ax1;
+	// angular limits
+	if(m_solveSwingLimit)
+	{
+		btScalar *J1 = info->m_J1angularAxis;
+		btScalar *J2 = info->m_J2angularAxis;
+		if((m_swingSpan1 < m_fixThresh) && (m_swingSpan2 < m_fixThresh))
+		{
+			btTransform trA = transA*m_rbAFrame;
+			btVector3 p = trA.getBasis().getColumn(1);
+			btVector3 q = trA.getBasis().getColumn(2);
+			int srow1 = srow + info->rowskip;
+			J1[srow+0] = p[0];
+			J1[srow+1] = p[1];
+			J1[srow+2] = p[2];
+			J1[srow1+0] = q[0];
+			J1[srow1+1] = q[1];
+			J1[srow1+2] = q[2];
+			J2[srow+0] = -p[0];
+			J2[srow+1] = -p[1];
+			J2[srow+2] = -p[2];
+			J2[srow1+0] = -q[0];
+			J2[srow1+1] = -q[1];
+			J2[srow1+2] = -q[2];
+			btScalar fact = info->fps * m_relaxationFactor;
+			info->m_constraintError[srow] =   fact * m_swingAxis.dot(p);
+			info->m_constraintError[srow1] =  fact * m_swingAxis.dot(q);
+			info->m_lowerLimit[srow] = -SIMD_INFINITY;
+			info->m_upperLimit[srow] = SIMD_INFINITY;
+			info->m_lowerLimit[srow1] = -SIMD_INFINITY;
+			info->m_upperLimit[srow1] = SIMD_INFINITY;
+			srow = srow1 + info->rowskip;
+		}
+		else
+		{
+			ax1 = m_swingAxis * m_relaxationFactor * m_relaxationFactor;
+			J1[srow+0] = ax1[0];
+			J1[srow+1] = ax1[1];
+			J1[srow+2] = ax1[2];
+			J2[srow+0] = -ax1[0];
+			J2[srow+1] = -ax1[1];
+			J2[srow+2] = -ax1[2];
+			btScalar k = info->fps * m_biasFactor;
+
+			info->m_constraintError[srow] = k * m_swingCorrection;
+			if(m_flags & BT_CONETWIST_FLAGS_ANG_CFM)
+			{
+				info->cfm[srow] = m_angCFM;
+			}
+			// m_swingCorrection is always positive or 0
+			info->m_lowerLimit[srow] = 0;
+			info->m_upperLimit[srow] = SIMD_INFINITY;
+			srow += info->rowskip;
+		}
+	}
+	if(m_solveTwistLimit)
+	{
+		ax1 = m_twistAxis * m_relaxationFactor * m_relaxationFactor;
+		btScalar *J1 = info->m_J1angularAxis;
+		btScalar *J2 = info->m_J2angularAxis;
+		J1[srow+0] = ax1[0];
+		J1[srow+1] = ax1[1];
+		J1[srow+2] = ax1[2];
+		J2[srow+0] = -ax1[0];
+		J2[srow+1] = -ax1[1];
+		J2[srow+2] = -ax1[2];
+		btScalar k = info->fps * m_biasFactor;
+		info->m_constraintError[srow] = k * m_twistCorrection;
+		if(m_flags & BT_CONETWIST_FLAGS_ANG_CFM)
+		{
+			info->cfm[srow] = m_angCFM;
+		}
+		if(m_twistSpan > 0.0f)
+		{
+
+			if(m_twistCorrection > 0.0f)
+			{
+				info->m_lowerLimit[srow] = 0;
+				info->m_upperLimit[srow] = SIMD_INFINITY;
+			} 
+			else
+			{
+				info->m_lowerLimit[srow] = -SIMD_INFINITY;
+				info->m_upperLimit[srow] = 0;
+			} 
+		}
+		else
+		{
+			info->m_lowerLimit[srow] = -SIMD_INFINITY;
+			info->m_upperLimit[srow] = SIMD_INFINITY;
+		}
+		srow += info->rowskip;
+	}
+}
+	
+
+
+void	btConeTwistConstraint::buildJacobian()
+{
+	if (m_useSolveConstraintObsolete)
+	{
+		m_appliedImpulse = btScalar(0.);
+		m_accTwistLimitImpulse = btScalar(0.);
+		m_accSwingLimitImpulse = btScalar(0.);
+		m_accMotorImpulse = btVector3(0.,0.,0.);
+
+		if (!m_angularOnly)
+		{
+			btVector3 pivotAInW = m_rbA.getCenterOfMassTransform()*m_rbAFrame.getOrigin();
+			btVector3 pivotBInW = m_rbB.getCenterOfMassTransform()*m_rbBFrame.getOrigin();
+			btVector3 relPos = pivotBInW - pivotAInW;
+
+			btVector3 normal[3];
+			if (relPos.length2() > SIMD_EPSILON)
+			{
+				normal[0] = relPos.normalized();
+			}
+			else
+			{
+				normal[0].setValue(btScalar(1.0),0,0);
+			}
+
+			btPlaneSpace1(normal[0], normal[1], normal[2]);
+
+			for (int i=0;i<3;i++)
+			{
+				new (&m_jac[i]) btJacobianEntry(
+				m_rbA.getCenterOfMassTransform().getBasis().transpose(),
+				m_rbB.getCenterOfMassTransform().getBasis().transpose(),
+				pivotAInW - m_rbA.getCenterOfMassPosition(),
+				pivotBInW - m_rbB.getCenterOfMassPosition(),
+				normal[i],
+				m_rbA.getInvInertiaDiagLocal(),
+				m_rbA.getInvMass(),
+				m_rbB.getInvInertiaDiagLocal(),
+				m_rbB.getInvMass());
+			}
+		}
+
+		calcAngleInfo2(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform(),m_rbA.getInvInertiaTensorWorld(),m_rbB.getInvInertiaTensorWorld());
+	}
+}
+
+
+
+void	btConeTwistConstraint::solveConstraintObsolete(btRigidBody& bodyA,btRigidBody& bodyB,btScalar	timeStep)
+{
+	#ifndef __SPU__
+	if (m_useSolveConstraintObsolete)
+	{
+		btVector3 pivotAInW = m_rbA.getCenterOfMassTransform()*m_rbAFrame.getOrigin();
+		btVector3 pivotBInW = m_rbB.getCenterOfMassTransform()*m_rbBFrame.getOrigin();
+
+		btScalar tau = btScalar(0.3);
+
+		//linear part
+		if (!m_angularOnly)
+		{
+			btVector3 rel_pos1 = pivotAInW - m_rbA.getCenterOfMassPosition(); 
+			btVector3 rel_pos2 = pivotBInW - m_rbB.getCenterOfMassPosition();
+
+			btVector3 vel1;
+			bodyA.internalGetVelocityInLocalPointObsolete(rel_pos1,vel1);
+			btVector3 vel2;
+			bodyB.internalGetVelocityInLocalPointObsolete(rel_pos2,vel2);
+			btVector3 vel = vel1 - vel2;
+
+			for (int i=0;i<3;i++)
+			{		
+				const btVector3& normal = m_jac[i].m_linearJointAxis;
+				btScalar jacDiagABInv = btScalar(1.) / m_jac[i].getDiagonal();
+
+				btScalar rel_vel;
+				rel_vel = normal.dot(vel);
+				//positional error (zeroth order error)
+				btScalar depth = -(pivotAInW - pivotBInW).dot(normal); //this is the error projected on the normal
+				btScalar impulse = depth*tau/timeStep  * jacDiagABInv -  rel_vel * jacDiagABInv;
+				m_appliedImpulse += impulse;
+				
+				btVector3 ftorqueAxis1 = rel_pos1.cross(normal);
+				btVector3 ftorqueAxis2 = rel_pos2.cross(normal);
+				bodyA.internalApplyImpulse(normal*m_rbA.getInvMass(), m_rbA.getInvInertiaTensorWorld()*ftorqueAxis1,impulse);
+				bodyB.internalApplyImpulse(normal*m_rbB.getInvMass(), m_rbB.getInvInertiaTensorWorld()*ftorqueAxis2,-impulse);
+		
+			}
+		}
+
+		// apply motor
+		if (m_bMotorEnabled)
+		{
+			// compute current and predicted transforms
+			btTransform trACur = m_rbA.getCenterOfMassTransform();
+			btTransform trBCur = m_rbB.getCenterOfMassTransform();
+			btVector3 omegaA; bodyA.internalGetAngularVelocity(omegaA);
+			btVector3 omegaB; bodyB.internalGetAngularVelocity(omegaB);
+			btTransform trAPred; trAPred.setIdentity(); 
+			btVector3 zerovec(0,0,0);
+			btTransformUtil::integrateTransform(
+				trACur, zerovec, omegaA, timeStep, trAPred);
+			btTransform trBPred; trBPred.setIdentity(); 
+			btTransformUtil::integrateTransform(
+				trBCur, zerovec, omegaB, timeStep, trBPred);
+
+			// compute desired transforms in world
+			btTransform trPose(m_qTarget);
+			btTransform trABDes = m_rbBFrame * trPose * m_rbAFrame.inverse();
+			btTransform trADes = trBPred * trABDes;
+			btTransform trBDes = trAPred * trABDes.inverse();
+
+			// compute desired omegas in world
+			btVector3 omegaADes, omegaBDes;
+			
+			btTransformUtil::calculateVelocity(trACur, trADes, timeStep, zerovec, omegaADes);
+			btTransformUtil::calculateVelocity(trBCur, trBDes, timeStep, zerovec, omegaBDes);
+
+			// compute delta omegas
+			btVector3 dOmegaA = omegaADes - omegaA;
+			btVector3 dOmegaB = omegaBDes - omegaB;
+
+			// compute weighted avg axis of dOmega (weighting based on inertias)
+			btVector3 axisA, axisB;
+			btScalar kAxisAInv = 0, kAxisBInv = 0;
+
+			if (dOmegaA.length2() > SIMD_EPSILON)
+			{
+				axisA = dOmegaA.normalized();
+				kAxisAInv = getRigidBodyA().computeAngularImpulseDenominator(axisA);
+			}
+
+			if (dOmegaB.length2() > SIMD_EPSILON)
+			{
+				axisB = dOmegaB.normalized();
+				kAxisBInv = getRigidBodyB().computeAngularImpulseDenominator(axisB);
+			}
+
+			btVector3 avgAxis = kAxisAInv * axisA + kAxisBInv * axisB;
+
+			static bool bDoTorque = true;
+			if (bDoTorque && avgAxis.length2() > SIMD_EPSILON)
+			{
+				avgAxis.normalize();
+				kAxisAInv = getRigidBodyA().computeAngularImpulseDenominator(avgAxis);
+				kAxisBInv = getRigidBodyB().computeAngularImpulseDenominator(avgAxis);
+				btScalar kInvCombined = kAxisAInv + kAxisBInv;
+
+				btVector3 impulse = (kAxisAInv * dOmegaA - kAxisBInv * dOmegaB) /
+									(kInvCombined * kInvCombined);
+
+				if (m_maxMotorImpulse >= 0)
+				{
+					btScalar fMaxImpulse = m_maxMotorImpulse;
+					if (m_bNormalizedMotorStrength)
+						fMaxImpulse = fMaxImpulse/kAxisAInv;
+
+					btVector3 newUnclampedAccImpulse = m_accMotorImpulse + impulse;
+					btScalar  newUnclampedMag = newUnclampedAccImpulse.length();
+					if (newUnclampedMag > fMaxImpulse)
+					{
+						newUnclampedAccImpulse.normalize();
+						newUnclampedAccImpulse *= fMaxImpulse;
+						impulse = newUnclampedAccImpulse - m_accMotorImpulse;
+					}
+					m_accMotorImpulse += impulse;
+				}
+
+				btScalar  impulseMag  = impulse.length();
+				btVector3 impulseAxis =  impulse / impulseMag;
+
+				bodyA.internalApplyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*impulseAxis, impulseMag);
+				bodyB.internalApplyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*impulseAxis, -impulseMag);
+
+			}
+		}
+		else if (m_damping > SIMD_EPSILON) // no motor: do a little damping
+		{
+			btVector3 angVelA; bodyA.internalGetAngularVelocity(angVelA);
+			btVector3 angVelB; bodyB.internalGetAngularVelocity(angVelB);
+			btVector3 relVel = angVelB - angVelA;
+			if (relVel.length2() > SIMD_EPSILON)
+			{
+				btVector3 relVelAxis = relVel.normalized();
+				btScalar m_kDamping =  btScalar(1.) /
+					(getRigidBodyA().computeAngularImpulseDenominator(relVelAxis) +
+					 getRigidBodyB().computeAngularImpulseDenominator(relVelAxis));
+				btVector3 impulse = m_damping * m_kDamping * relVel;
+
+				btScalar  impulseMag  = impulse.length();
+				btVector3 impulseAxis = impulse / impulseMag;
+				bodyA.internalApplyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*impulseAxis, impulseMag);
+				bodyB.internalApplyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*impulseAxis, -impulseMag);
+			}
+		}
+
+		// joint limits
+		{
+			///solve angular part
+			btVector3 angVelA;
+			bodyA.internalGetAngularVelocity(angVelA);
+			btVector3 angVelB;
+			bodyB.internalGetAngularVelocity(angVelB);
+
+			// solve swing limit
+			if (m_solveSwingLimit)
+			{
+				btScalar amplitude = m_swingLimitRatio * m_swingCorrection*m_biasFactor/timeStep;
+				btScalar relSwingVel = (angVelB - angVelA).dot(m_swingAxis);
+				if (relSwingVel > 0)
+					amplitude += m_swingLimitRatio * relSwingVel * m_relaxationFactor;
+				btScalar impulseMag = amplitude * m_kSwing;
+
+				// Clamp the accumulated impulse
+				btScalar temp = m_accSwingLimitImpulse;
+				m_accSwingLimitImpulse = btMax(m_accSwingLimitImpulse + impulseMag, btScalar(0.0) );
+				impulseMag = m_accSwingLimitImpulse - temp;
+
+				btVector3 impulse = m_swingAxis * impulseMag;
+
+				// don't let cone response affect twist
+				// (this can happen since body A's twist doesn't match body B's AND we use an elliptical cone limit)
+				{
+					btVector3 impulseTwistCouple = impulse.dot(m_twistAxisA) * m_twistAxisA;
+					btVector3 impulseNoTwistCouple = impulse - impulseTwistCouple;
+					impulse = impulseNoTwistCouple;
+				}
+
+				impulseMag = impulse.length();
+				btVector3 noTwistSwingAxis = impulse / impulseMag;
+
+				bodyA.internalApplyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*noTwistSwingAxis, impulseMag);
+				bodyB.internalApplyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*noTwistSwingAxis, -impulseMag);
+			}
+
+
+			// solve twist limit
+			if (m_solveTwistLimit)
+			{
+				btScalar amplitude = m_twistLimitRatio * m_twistCorrection*m_biasFactor/timeStep;
+				btScalar relTwistVel = (angVelB - angVelA).dot( m_twistAxis );
+				if (relTwistVel > 0) // only damp when moving towards limit (m_twistAxis flipping is important)
+					amplitude += m_twistLimitRatio * relTwistVel * m_relaxationFactor;
+				btScalar impulseMag = amplitude * m_kTwist;
+
+				// Clamp the accumulated impulse
+				btScalar temp = m_accTwistLimitImpulse;
+				m_accTwistLimitImpulse = btMax(m_accTwistLimitImpulse + impulseMag, btScalar(0.0) );
+				impulseMag = m_accTwistLimitImpulse - temp;
+
+				btVector3 impulse = m_twistAxis * impulseMag;
+
+				bodyA.internalApplyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*m_twistAxis,impulseMag);
+				bodyB.internalApplyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*m_twistAxis,-impulseMag);
+			}		
+		}
+	}
+#else
+btAssert(0);
+#endif //__SPU__
+}
+
+
+
+
+void	btConeTwistConstraint::updateRHS(btScalar	timeStep)
+{
+	(void)timeStep;
+
+}
+
+
+#ifndef __SPU__
+void btConeTwistConstraint::calcAngleInfo()
+{
+	m_swingCorrection = btScalar(0.);
+	m_twistLimitSign = btScalar(0.);
+	m_solveTwistLimit = false;
+	m_solveSwingLimit = false;
+
+	btVector3 b1Axis1,b1Axis2,b1Axis3;
+	btVector3 b2Axis1,b2Axis2;
+
+	b1Axis1 = getRigidBodyA().getCenterOfMassTransform().getBasis() * this->m_rbAFrame.getBasis().getColumn(0);
+	b2Axis1 = getRigidBodyB().getCenterOfMassTransform().getBasis() * this->m_rbBFrame.getBasis().getColumn(0);
+
+	btScalar swing1=btScalar(0.),swing2 = btScalar(0.);
+
+	btScalar swx=btScalar(0.),swy = btScalar(0.);
+	btScalar thresh = btScalar(10.);
+	btScalar fact;
+
+	// Get Frame into world space
+	if (m_swingSpan1 >= btScalar(0.05f))
+	{
+		b1Axis2 = getRigidBodyA().getCenterOfMassTransform().getBasis() * this->m_rbAFrame.getBasis().getColumn(1);
+		swx = b2Axis1.dot(b1Axis1);
+		swy = b2Axis1.dot(b1Axis2);
+		swing1  = btAtan2Fast(swy, swx);
+		fact = (swy*swy + swx*swx) * thresh * thresh;
+		fact = fact / (fact + btScalar(1.0));
+		swing1 *= fact; 
+	}
+
+	if (m_swingSpan2 >= btScalar(0.05f))
+	{
+		b1Axis3 = getRigidBodyA().getCenterOfMassTransform().getBasis() * this->m_rbAFrame.getBasis().getColumn(2);			
+		swx = b2Axis1.dot(b1Axis1);
+		swy = b2Axis1.dot(b1Axis3);
+		swing2  = btAtan2Fast(swy, swx);
+		fact = (swy*swy + swx*swx) * thresh * thresh;
+		fact = fact / (fact + btScalar(1.0));
+		swing2 *= fact; 
+	}
+
+	btScalar RMaxAngle1Sq = 1.0f / (m_swingSpan1*m_swingSpan1);		
+	btScalar RMaxAngle2Sq = 1.0f / (m_swingSpan2*m_swingSpan2);	
+	btScalar EllipseAngle = btFabs(swing1*swing1)* RMaxAngle1Sq + btFabs(swing2*swing2) * RMaxAngle2Sq;
+
+	if (EllipseAngle > 1.0f)
+	{
+		m_swingCorrection = EllipseAngle-1.0f;
+		m_solveSwingLimit = true;
+		// Calculate necessary axis & factors
+		m_swingAxis = b2Axis1.cross(b1Axis2* b2Axis1.dot(b1Axis2) + b1Axis3* b2Axis1.dot(b1Axis3));
+		m_swingAxis.normalize();
+		btScalar swingAxisSign = (b2Axis1.dot(b1Axis1) >= 0.0f) ? 1.0f : -1.0f;
+		m_swingAxis *= swingAxisSign;
+	}
+
+	// Twist limits
+	if (m_twistSpan >= btScalar(0.))
+	{
+		btVector3 b2Axis2 = getRigidBodyB().getCenterOfMassTransform().getBasis() * this->m_rbBFrame.getBasis().getColumn(1);
+		btQuaternion rotationArc = shortestArcQuat(b2Axis1,b1Axis1);
+		btVector3 TwistRef = quatRotate(rotationArc,b2Axis2); 
+		btScalar twist = btAtan2Fast( TwistRef.dot(b1Axis3), TwistRef.dot(b1Axis2) );
+		m_twistAngle = twist;
+
+//		btScalar lockedFreeFactor = (m_twistSpan > btScalar(0.05f)) ? m_limitSoftness : btScalar(0.);
+		btScalar lockedFreeFactor = (m_twistSpan > btScalar(0.05f)) ? btScalar(1.0f) : btScalar(0.);
+		if (twist <= -m_twistSpan*lockedFreeFactor)
+		{
+			m_twistCorrection = -(twist + m_twistSpan);
+			m_solveTwistLimit = true;
+			m_twistAxis = (b2Axis1 + b1Axis1) * 0.5f;
+			m_twistAxis.normalize();
+			m_twistAxis *= -1.0f;
+		}
+		else if (twist >  m_twistSpan*lockedFreeFactor)
+		{
+			m_twistCorrection = (twist - m_twistSpan);
+			m_solveTwistLimit = true;
+			m_twistAxis = (b2Axis1 + b1Axis1) * 0.5f;
+			m_twistAxis.normalize();
+		}
+	}
+}
+#endif //__SPU__
+
+static btVector3 vTwist(1,0,0); // twist axis in constraint's space
+
+
+
+void btConeTwistConstraint::calcAngleInfo2(const btTransform& transA, const btTransform& transB, const btMatrix3x3& invInertiaWorldA,const btMatrix3x3& invInertiaWorldB)
+{
+	m_swingCorrection = btScalar(0.);
+	m_twistLimitSign = btScalar(0.);
+	m_solveTwistLimit = false;
+	m_solveSwingLimit = false;
+	// compute rotation of A wrt B (in constraint space)
+	if (m_bMotorEnabled && (!m_useSolveConstraintObsolete))
+	{	// it is assumed that setMotorTarget() was alredy called 
+		// and motor target m_qTarget is within constraint limits
+		// TODO : split rotation to pure swing and pure twist
+		// compute desired transforms in world
+		btTransform trPose(m_qTarget);
+		btTransform trA = transA * m_rbAFrame;
+		btTransform trB = transB * m_rbBFrame;
+		btTransform trDeltaAB = trB * trPose * trA.inverse();
+		btQuaternion qDeltaAB = trDeltaAB.getRotation();
+		btVector3 swingAxis = 	btVector3(qDeltaAB.x(), qDeltaAB.y(), qDeltaAB.z());
+		float swingAxisLen2 = swingAxis.length2();
+		if(btFuzzyZero(swingAxisLen2))
+		{
+		   return;
+		}
+		m_swingAxis = swingAxis;
+		m_swingAxis.normalize();
+		m_swingCorrection = qDeltaAB.getAngle();
+		if(!btFuzzyZero(m_swingCorrection))
+		{
+			m_solveSwingLimit = true;
+		}
+		return;
+	}
+
+
+	{
+		// compute rotation of A wrt B (in constraint space)
+		btQuaternion qA = transA.getRotation() * m_rbAFrame.getRotation();
+		btQuaternion qB = transB.getRotation() * m_rbBFrame.getRotation();
+		btQuaternion qAB = qB.inverse() * qA;
+		// split rotation into cone and twist
+		// (all this is done from B's perspective. Maybe I should be averaging axes...)
+		btVector3 vConeNoTwist = quatRotate(qAB, vTwist); vConeNoTwist.normalize();
+		btQuaternion qABCone  = shortestArcQuat(vTwist, vConeNoTwist); qABCone.normalize();
+		btQuaternion qABTwist = qABCone.inverse() * qAB; qABTwist.normalize();
+
+		if (m_swingSpan1 >= m_fixThresh && m_swingSpan2 >= m_fixThresh)
+		{
+			btScalar swingAngle, swingLimit = 0; btVector3 swingAxis;
+			computeConeLimitInfo(qABCone, swingAngle, swingAxis, swingLimit);
+
+			if (swingAngle > swingLimit * m_limitSoftness)
+			{
+				m_solveSwingLimit = true;
+
+				// compute limit ratio: 0->1, where
+				// 0 == beginning of soft limit
+				// 1 == hard/real limit
+				m_swingLimitRatio = 1.f;
+				if (swingAngle < swingLimit && m_limitSoftness < 1.f - SIMD_EPSILON)
+				{
+					m_swingLimitRatio = (swingAngle - swingLimit * m_limitSoftness)/
+										(swingLimit - swingLimit * m_limitSoftness);
+				}				
+
+				// swing correction tries to get back to soft limit
+				m_swingCorrection = swingAngle - (swingLimit * m_limitSoftness);
+
+				// adjustment of swing axis (based on ellipse normal)
+				adjustSwingAxisToUseEllipseNormal(swingAxis);
+
+				// Calculate necessary axis & factors		
+				m_swingAxis = quatRotate(qB, -swingAxis);
+
+				m_twistAxisA.setValue(0,0,0);
+
+				m_kSwing =  btScalar(1.) /
+					(computeAngularImpulseDenominator(m_swingAxis,invInertiaWorldA) +
+					 computeAngularImpulseDenominator(m_swingAxis,invInertiaWorldB));
+			}
+		}
+		else
+		{
+			// you haven't set any limits;
+			// or you're trying to set at least one of the swing limits too small. (if so, do you really want a conetwist constraint?)
+			// anyway, we have either hinge or fixed joint
+			btVector3 ivA = transA.getBasis() * m_rbAFrame.getBasis().getColumn(0);
+			btVector3 jvA = transA.getBasis() * m_rbAFrame.getBasis().getColumn(1);
+			btVector3 kvA = transA.getBasis() * m_rbAFrame.getBasis().getColumn(2);
+			btVector3 ivB = transB.getBasis() * m_rbBFrame.getBasis().getColumn(0);
+			btVector3 target;
+			btScalar x = ivB.dot(ivA);
+			btScalar y = ivB.dot(jvA);
+			btScalar z = ivB.dot(kvA);
+			if((m_swingSpan1 < m_fixThresh) && (m_swingSpan2 < m_fixThresh))
+			{ // fixed. We'll need to add one more row to constraint
+				if((!btFuzzyZero(y)) || (!(btFuzzyZero(z))))
+				{
+					m_solveSwingLimit = true;
+					m_swingAxis = -ivB.cross(ivA);
+				}
+			}
+			else
+			{
+				if(m_swingSpan1 < m_fixThresh)
+				{ // hinge around Y axis
+					if(!(btFuzzyZero(y)))
+					{
+						m_solveSwingLimit = true;
+						if(m_swingSpan2 >= m_fixThresh)
+						{
+							y = btScalar(0.f);
+							btScalar span2 = btAtan2(z, x);
+							if(span2 > m_swingSpan2)
+							{
+								x = btCos(m_swingSpan2);
+								z = btSin(m_swingSpan2);
+							}
+							else if(span2 < -m_swingSpan2)
+							{
+								x =  btCos(m_swingSpan2);
+								z = -btSin(m_swingSpan2);
+							}
+						}
+					}
+				}
+				else
+				{ // hinge around Z axis
+					if(!btFuzzyZero(z))
+					{
+						m_solveSwingLimit = true;
+						if(m_swingSpan1 >= m_fixThresh)
+						{
+							z = btScalar(0.f);
+							btScalar span1 = btAtan2(y, x);
+							if(span1 > m_swingSpan1)
+							{
+								x = btCos(m_swingSpan1);
+								y = btSin(m_swingSpan1);
+							}
+							else if(span1 < -m_swingSpan1)
+							{
+								x =  btCos(m_swingSpan1);
+								y = -btSin(m_swingSpan1);
+							}
+						}
+					}
+				}
+				target[0] = x * ivA[0] + y * jvA[0] + z * kvA[0];
+				target[1] = x * ivA[1] + y * jvA[1] + z * kvA[1];
+				target[2] = x * ivA[2] + y * jvA[2] + z * kvA[2];
+				target.normalize();
+				m_swingAxis = -ivB.cross(target);
+				m_swingCorrection = m_swingAxis.length();
+				m_swingAxis.normalize();
+			}
+		}
+
+		if (m_twistSpan >= btScalar(0.f))
+		{
+			btVector3 twistAxis;
+			computeTwistLimitInfo(qABTwist, m_twistAngle, twistAxis);
+
+			if (m_twistAngle > m_twistSpan*m_limitSoftness)
+			{
+				m_solveTwistLimit = true;
+
+				m_twistLimitRatio = 1.f;
+				if (m_twistAngle < m_twistSpan && m_limitSoftness < 1.f - SIMD_EPSILON)
+				{
+					m_twistLimitRatio = (m_twistAngle - m_twistSpan * m_limitSoftness)/
+										(m_twistSpan  - m_twistSpan * m_limitSoftness);
+				}
+
+				// twist correction tries to get back to soft limit
+				m_twistCorrection = m_twistAngle - (m_twistSpan * m_limitSoftness);
+
+				m_twistAxis = quatRotate(qB, -twistAxis);
+
+				m_kTwist = btScalar(1.) /
+					(computeAngularImpulseDenominator(m_twistAxis,invInertiaWorldA) +
+					 computeAngularImpulseDenominator(m_twistAxis,invInertiaWorldB));
+			}
+
+			if (m_solveSwingLimit)
+				m_twistAxisA = quatRotate(qA, -twistAxis);
+		}
+		else
+		{
+			m_twistAngle = btScalar(0.f);
+		}
+	}
+}
+
+
+
+// given a cone rotation in constraint space, (pre: twist must already be removed)
+// this method computes its corresponding swing angle and axis.
+// more interestingly, it computes the cone/swing limit (angle) for this cone "pose".
+void btConeTwistConstraint::computeConeLimitInfo(const btQuaternion& qCone,
+												 btScalar& swingAngle, // out
+												 btVector3& vSwingAxis, // out
+												 btScalar& swingLimit) // out
+{
+	swingAngle = qCone.getAngle();
+	if (swingAngle > SIMD_EPSILON)
+	{
+		vSwingAxis = btVector3(qCone.x(), qCone.y(), qCone.z());
+		vSwingAxis.normalize();
+		if (fabs(vSwingAxis.x()) > SIMD_EPSILON)
+		{
+			// non-zero twist?! this should never happen.
+			int wtf = 0; wtf = wtf;
+		}
+
+		// Compute limit for given swing. tricky:
+		// Given a swing axis, we're looking for the intersection with the bounding cone ellipse.
+		// (Since we're dealing with angles, this ellipse is embedded on the surface of a sphere.)
+
+		// For starters, compute the direction from center to surface of ellipse.
+		// This is just the perpendicular (ie. rotate 2D vector by PI/2) of the swing axis.
+		// (vSwingAxis is the cone rotation (in z,y); change vars and rotate to (x,y) coords.)
+		btScalar xEllipse =  vSwingAxis.y();
+		btScalar yEllipse = -vSwingAxis.z();
+
+		// Now, we use the slope of the vector (using x/yEllipse) and find the length
+		// of the line that intersects the ellipse:
+		//  x^2   y^2
+		//  --- + --- = 1, where a and b are semi-major axes 2 and 1 respectively (ie. the limits)
+		//  a^2   b^2
+		// Do the math and it should be clear.
+
+		swingLimit = m_swingSpan1; // if xEllipse == 0, we have a pure vSwingAxis.z rotation: just use swingspan1
+		if (fabs(xEllipse) > SIMD_EPSILON)
+		{
+			btScalar surfaceSlope2 = (yEllipse*yEllipse)/(xEllipse*xEllipse);
+			btScalar norm = 1 / (m_swingSpan2 * m_swingSpan2);
+			norm += surfaceSlope2 / (m_swingSpan1 * m_swingSpan1);
+			btScalar swingLimit2 = (1 + surfaceSlope2) / norm;
+			swingLimit = sqrt(swingLimit2);
+		}
+
+		// test!
+		/*swingLimit = m_swingSpan2;
+		if (fabs(vSwingAxis.z()) > SIMD_EPSILON)
+		{
+		btScalar mag_2 = m_swingSpan1*m_swingSpan1 + m_swingSpan2*m_swingSpan2;
+		btScalar sinphi = m_swingSpan2 / sqrt(mag_2);
+		btScalar phi = asin(sinphi);
+		btScalar theta = atan2(fabs(vSwingAxis.y()),fabs(vSwingAxis.z()));
+		btScalar alpha = 3.14159f - theta - phi;
+		btScalar sinalpha = sin(alpha);
+		swingLimit = m_swingSpan1 * sinphi/sinalpha;
+		}*/
+	}
+	else if (swingAngle < 0)
+	{
+		// this should never happen!
+		int wtf = 0; wtf = wtf;
+	}
+}
+
+btVector3 btConeTwistConstraint::GetPointForAngle(btScalar fAngleInRadians, btScalar fLength) const
+{
+	// compute x/y in ellipse using cone angle (0 -> 2*PI along surface of cone)
+	btScalar xEllipse = btCos(fAngleInRadians);
+	btScalar yEllipse = btSin(fAngleInRadians);
+
+	// Use the slope of the vector (using x/yEllipse) and find the length
+	// of the line that intersects the ellipse:
+	//  x^2   y^2
+	//  --- + --- = 1, where a and b are semi-major axes 2 and 1 respectively (ie. the limits)
+	//  a^2   b^2
+	// Do the math and it should be clear.
+
+	float swingLimit = m_swingSpan1; // if xEllipse == 0, just use axis b (1)
+	if (fabs(xEllipse) > SIMD_EPSILON)
+	{
+		btScalar surfaceSlope2 = (yEllipse*yEllipse)/(xEllipse*xEllipse);
+		btScalar norm = 1 / (m_swingSpan2 * m_swingSpan2);
+		norm += surfaceSlope2 / (m_swingSpan1 * m_swingSpan1);
+		btScalar swingLimit2 = (1 + surfaceSlope2) / norm;
+		swingLimit = sqrt(swingLimit2);
+	}
+
+	// convert into point in constraint space:
+	// note: twist is x-axis, swing 1 and 2 are along the z and y axes respectively
+	btVector3 vSwingAxis(0, xEllipse, -yEllipse);
+	btQuaternion qSwing(vSwingAxis, swingLimit);
+	btVector3 vPointInConstraintSpace(fLength,0,0);
+	return quatRotate(qSwing, vPointInConstraintSpace);
+}
+
+// given a twist rotation in constraint space, (pre: cone must already be removed)
+// this method computes its corresponding angle and axis.
+void btConeTwistConstraint::computeTwistLimitInfo(const btQuaternion& qTwist,
+												  btScalar& twistAngle, // out
+												  btVector3& vTwistAxis) // out
+{
+	btQuaternion qMinTwist = qTwist;
+	twistAngle = qTwist.getAngle();
+
+	if (twistAngle > SIMD_PI) // long way around. flip quat and recalculate.
+	{
+		qMinTwist = -(qTwist);
+		twistAngle = qMinTwist.getAngle();
+	}
+	if (twistAngle < 0)
+	{
+		// this should never happen
+		int wtf = 0; wtf = wtf;			
+	}
+
+	vTwistAxis = btVector3(qMinTwist.x(), qMinTwist.y(), qMinTwist.z());
+	if (twistAngle > SIMD_EPSILON)
+		vTwistAxis.normalize();
+}
+
+
+void btConeTwistConstraint::adjustSwingAxisToUseEllipseNormal(btVector3& vSwingAxis) const
+{
+	// the swing axis is computed as the "twist-free" cone rotation,
+	// but the cone limit is not circular, but elliptical (if swingspan1 != swingspan2).
+	// so, if we're outside the limits, the closest way back inside the cone isn't 
+	// along the vector back to the center. better (and more stable) to use the ellipse normal.
+
+	// convert swing axis to direction from center to surface of ellipse
+	// (ie. rotate 2D vector by PI/2)
+	btScalar y = -vSwingAxis.z();
+	btScalar z =  vSwingAxis.y();
+
+	// do the math...
+	if (fabs(z) > SIMD_EPSILON) // avoid division by 0. and we don't need an update if z == 0.
+	{
+		// compute gradient/normal of ellipse surface at current "point"
+		btScalar grad = y/z;
+		grad *= m_swingSpan2 / m_swingSpan1;
+
+		// adjust y/z to represent normal at point (instead of vector to point)
+		if (y > 0)
+			y =  fabs(grad * z);
+		else
+			y = -fabs(grad * z);
+
+		// convert ellipse direction back to swing axis
+		vSwingAxis.setZ(-y);
+		vSwingAxis.setY( z);
+		vSwingAxis.normalize();
+	}
+}
+
+
+
+void btConeTwistConstraint::setMotorTarget(const btQuaternion &q)
+{
+	btTransform trACur = m_rbA.getCenterOfMassTransform();
+	btTransform trBCur = m_rbB.getCenterOfMassTransform();
+	btTransform trABCur = trBCur.inverse() * trACur;
+	btQuaternion qABCur = trABCur.getRotation();
+	btTransform trConstraintCur = (trBCur * m_rbBFrame).inverse() * (trACur * m_rbAFrame);
+	btQuaternion qConstraintCur = trConstraintCur.getRotation();
+
+	btQuaternion qConstraint = m_rbBFrame.getRotation().inverse() * q * m_rbAFrame.getRotation();
+	setMotorTargetInConstraintSpace(qConstraint);
+}
+
+
+void btConeTwistConstraint::setMotorTargetInConstraintSpace(const btQuaternion &q)
+{
+	m_qTarget = q;
+
+	// clamp motor target to within limits
+	{
+		btScalar softness = 1.f;//m_limitSoftness;
+
+		// split into twist and cone
+		btVector3 vTwisted = quatRotate(m_qTarget, vTwist);
+		btQuaternion qTargetCone  = shortestArcQuat(vTwist, vTwisted); qTargetCone.normalize();
+		btQuaternion qTargetTwist = qTargetCone.inverse() * m_qTarget; qTargetTwist.normalize();
+
+		// clamp cone
+		if (m_swingSpan1 >= btScalar(0.05f) && m_swingSpan2 >= btScalar(0.05f))
+		{
+			btScalar swingAngle, swingLimit; btVector3 swingAxis;
+			computeConeLimitInfo(qTargetCone, swingAngle, swingAxis, swingLimit);
+
+			if (fabs(swingAngle) > SIMD_EPSILON)
+			{
+				if (swingAngle > swingLimit*softness)
+					swingAngle = swingLimit*softness;
+				else if (swingAngle < -swingLimit*softness)
+					swingAngle = -swingLimit*softness;
+				qTargetCone = btQuaternion(swingAxis, swingAngle);
+			}
+		}
+
+		// clamp twist
+		if (m_twistSpan >= btScalar(0.05f))
+		{
+			btScalar twistAngle; btVector3 twistAxis;
+			computeTwistLimitInfo(qTargetTwist, twistAngle, twistAxis);
+
+			if (fabs(twistAngle) > SIMD_EPSILON)
+			{
+				// eddy todo: limitSoftness used here???
+				if (twistAngle > m_twistSpan*softness)
+					twistAngle = m_twistSpan*softness;
+				else if (twistAngle < -m_twistSpan*softness)
+					twistAngle = -m_twistSpan*softness;
+				qTargetTwist = btQuaternion(twistAxis, twistAngle);
+			}
+		}
+
+		m_qTarget = qTargetCone * qTargetTwist;
+	}
+}
+
+///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+///If no axis is provided, it uses the default axis for this constraint.
+void btConeTwistConstraint::setParam(int num, btScalar value, int axis)
+{
+	switch(num)
+	{
+		case BT_CONSTRAINT_ERP :
+		case BT_CONSTRAINT_STOP_ERP :
+			if((axis >= 0) && (axis < 3)) 
+			{
+				m_linERP = value;
+				m_flags |= BT_CONETWIST_FLAGS_LIN_ERP;
+			}
+			else
+			{
+				m_biasFactor = value;
+			}
+			break;
+		case BT_CONSTRAINT_CFM :
+		case BT_CONSTRAINT_STOP_CFM :
+			if((axis >= 0) && (axis < 3)) 
+			{
+				m_linCFM = value;
+				m_flags |= BT_CONETWIST_FLAGS_LIN_CFM;
+			}
+			else
+			{
+				m_angCFM = value;
+				m_flags |= BT_CONETWIST_FLAGS_ANG_CFM;
+			}
+			break;
+		default:
+			btAssertConstrParams(0);
+			break;
+	}
+}
+
+///return the local value of parameter
+btScalar btConeTwistConstraint::getParam(int num, int axis) const 
+{
+	btScalar retVal = 0;
+	switch(num)
+	{
+		case BT_CONSTRAINT_ERP :
+		case BT_CONSTRAINT_STOP_ERP :
+			if((axis >= 0) && (axis < 3)) 
+			{
+				btAssertConstrParams(m_flags & BT_CONETWIST_FLAGS_LIN_ERP);
+				retVal = m_linERP;
+			}
+			else if((axis >= 3) && (axis < 6)) 
+			{
+				retVal = m_biasFactor;
+			}
+			else
+			{
+				btAssertConstrParams(0);
+			}
+			break;
+		case BT_CONSTRAINT_CFM :
+		case BT_CONSTRAINT_STOP_CFM :
+			if((axis >= 0) && (axis < 3)) 
+			{
+				btAssertConstrParams(m_flags & BT_CONETWIST_FLAGS_LIN_CFM);
+				retVal = m_linCFM;
+			}
+			else if((axis >= 3) && (axis < 6)) 
+			{
+				btAssertConstrParams(m_flags & BT_CONETWIST_FLAGS_ANG_CFM);
+				retVal = m_angCFM;
+			}
+			else
+			{
+				btAssertConstrParams(0);
+			}
+			break;
+		default : 
+			btAssertConstrParams(0);
+	}
+	return retVal;
+}
+
+
+void btConeTwistConstraint::setFrames(const btTransform & frameA, const btTransform & frameB)
+{
+	m_rbAFrame = frameA;
+	m_rbBFrame = frameB;
+	buildJacobian();
+	//calculateTransforms();
+}
+
+ 
+
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btConeTwistConstraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btConeTwistConstraint.h
new file mode 100644
index 00000000..868e62f0
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btConeTwistConstraint.h
@@ -0,0 +1,346 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+btConeTwistConstraint is Copyright (c) 2007 Starbreeze Studios
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+Written by: Marcus Hennix
+*/
+
+
+
+/*
+Overview:
+
+btConeTwistConstraint can be used to simulate ragdoll joints (upper arm, leg etc).
+It is a fixed translation, 3 degree-of-freedom (DOF) rotational "joint".
+It divides the 3 rotational DOFs into swing (movement within a cone) and twist.
+Swing is divided into swing1 and swing2 which can have different limits, giving an elliptical shape.
+(Note: the cone's base isn't flat, so this ellipse is "embedded" on the surface of a sphere.)
+
+In the contraint's frame of reference:
+twist is along the x-axis,
+and swing 1 and 2 are along the z and y axes respectively.
+*/
+
+
+
+#ifndef BT_CONETWISTCONSTRAINT_H
+#define BT_CONETWISTCONSTRAINT_H
+
+#include "LinearMath/btVector3.h"
+#include "btJacobianEntry.h"
+#include "btTypedConstraint.h"
+
+class btRigidBody;
+
+enum btConeTwistFlags
+{
+	BT_CONETWIST_FLAGS_LIN_CFM = 1,
+	BT_CONETWIST_FLAGS_LIN_ERP = 2,
+	BT_CONETWIST_FLAGS_ANG_CFM = 4
+};
+
+///btConeTwistConstraint can be used to simulate ragdoll joints (upper arm, leg etc)
+class btConeTwistConstraint : public btTypedConstraint
+{
+#ifdef IN_PARALLELL_SOLVER
+public:
+#endif
+	btJacobianEntry	m_jac[3]; //3 orthogonal linear constraints
+
+	btTransform m_rbAFrame; 
+	btTransform m_rbBFrame;
+
+	btScalar	m_limitSoftness;
+	btScalar	m_biasFactor;
+	btScalar	m_relaxationFactor;
+
+	btScalar	m_damping;
+
+	btScalar	m_swingSpan1;
+	btScalar	m_swingSpan2;
+	btScalar	m_twistSpan;
+
+	btScalar	m_fixThresh;
+
+	btVector3   m_swingAxis;
+	btVector3	m_twistAxis;
+
+	btScalar	m_kSwing;
+	btScalar	m_kTwist;
+
+	btScalar	m_twistLimitSign;
+	btScalar	m_swingCorrection;
+	btScalar	m_twistCorrection;
+
+	btScalar	m_twistAngle;
+
+	btScalar	m_accSwingLimitImpulse;
+	btScalar	m_accTwistLimitImpulse;
+
+	bool		m_angularOnly;
+	bool		m_solveTwistLimit;
+	bool		m_solveSwingLimit;
+
+	bool	m_useSolveConstraintObsolete;
+
+	// not yet used...
+	btScalar	m_swingLimitRatio;
+	btScalar	m_twistLimitRatio;
+	btVector3   m_twistAxisA;
+
+	// motor
+	bool		 m_bMotorEnabled;
+	bool		 m_bNormalizedMotorStrength;
+	btQuaternion m_qTarget;
+	btScalar	 m_maxMotorImpulse;
+	btVector3	 m_accMotorImpulse;
+	
+	// parameters
+	int			m_flags;
+	btScalar	m_linCFM;
+	btScalar	m_linERP;
+	btScalar	m_angCFM;
+	
+protected:
+
+	void init();
+
+	void computeConeLimitInfo(const btQuaternion& qCone, // in
+		btScalar& swingAngle, btVector3& vSwingAxis, btScalar& swingLimit); // all outs
+
+	void computeTwistLimitInfo(const btQuaternion& qTwist, // in
+		btScalar& twistAngle, btVector3& vTwistAxis); // all outs
+
+	void adjustSwingAxisToUseEllipseNormal(btVector3& vSwingAxis) const;
+
+
+public:
+
+	btConeTwistConstraint(btRigidBody& rbA,btRigidBody& rbB,const btTransform& rbAFrame, const btTransform& rbBFrame);
+	
+	btConeTwistConstraint(btRigidBody& rbA,const btTransform& rbAFrame);
+
+	virtual void	buildJacobian();
+
+	virtual void getInfo1 (btConstraintInfo1* info);
+
+	void	getInfo1NonVirtual(btConstraintInfo1* info);
+	
+	virtual void getInfo2 (btConstraintInfo2* info);
+	
+	void	getInfo2NonVirtual(btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btMatrix3x3& invInertiaWorldA,const btMatrix3x3& invInertiaWorldB);
+
+	virtual	void	solveConstraintObsolete(btRigidBody& bodyA,btRigidBody& bodyB,btScalar	timeStep);
+
+	void	updateRHS(btScalar	timeStep);
+
+
+	const btRigidBody& getRigidBodyA() const
+	{
+		return m_rbA;
+	}
+	const btRigidBody& getRigidBodyB() const
+	{
+		return m_rbB;
+	}
+
+	void	setAngularOnly(bool angularOnly)
+	{
+		m_angularOnly = angularOnly;
+	}
+
+	void	setLimit(int limitIndex,btScalar limitValue)
+	{
+		switch (limitIndex)
+		{
+		case 3:
+			{
+				m_twistSpan = limitValue;
+				break;
+			}
+		case 4:
+			{
+				m_swingSpan2 = limitValue;
+				break;
+			}
+		case 5:
+			{
+				m_swingSpan1 = limitValue;
+				break;
+			}
+		default:
+			{
+			}
+		};
+	}
+
+	// setLimit(), a few notes:
+	// _softness:
+	//		0->1, recommend ~0.8->1.
+	//		describes % of limits where movement is free.
+	//		beyond this softness %, the limit is gradually enforced until the "hard" (1.0) limit is reached.
+	// _biasFactor:
+	//		0->1?, recommend 0.3 +/-0.3 or so.
+	//		strength with which constraint resists zeroth order (angular, not angular velocity) limit violation.
+	// __relaxationFactor:
+	//		0->1, recommend to stay near 1.
+	//		the lower the value, the less the constraint will fight velocities which violate the angular limits.
+	void	setLimit(btScalar _swingSpan1,btScalar _swingSpan2,btScalar _twistSpan, btScalar _softness = 1.f, btScalar _biasFactor = 0.3f, btScalar _relaxationFactor = 1.0f)
+	{
+		m_swingSpan1 = _swingSpan1;
+		m_swingSpan2 = _swingSpan2;
+		m_twistSpan  = _twistSpan;
+
+		m_limitSoftness =  _softness;
+		m_biasFactor = _biasFactor;
+		m_relaxationFactor = _relaxationFactor;
+	}
+
+	const btTransform& getAFrame() { return m_rbAFrame; };	
+	const btTransform& getBFrame() { return m_rbBFrame; };
+
+	inline int getSolveTwistLimit()
+	{
+		return m_solveTwistLimit;
+	}
+
+	inline int getSolveSwingLimit()
+	{
+		return m_solveTwistLimit;
+	}
+
+	inline btScalar getTwistLimitSign()
+	{
+		return m_twistLimitSign;
+	}
+
+	void calcAngleInfo();
+	void calcAngleInfo2(const btTransform& transA, const btTransform& transB,const btMatrix3x3& invInertiaWorldA,const btMatrix3x3& invInertiaWorldB);
+
+	inline btScalar getSwingSpan1()
+	{
+		return m_swingSpan1;
+	}
+	inline btScalar getSwingSpan2()
+	{
+		return m_swingSpan2;
+	}
+	inline btScalar getTwistSpan()
+	{
+		return m_twistSpan;
+	}
+	inline btScalar getTwistAngle()
+	{
+		return m_twistAngle;
+	}
+	bool isPastSwingLimit() { return m_solveSwingLimit; }
+
+	void setDamping(btScalar damping) { m_damping = damping; }
+
+	void enableMotor(bool b) { m_bMotorEnabled = b; }
+	void setMaxMotorImpulse(btScalar maxMotorImpulse) { m_maxMotorImpulse = maxMotorImpulse; m_bNormalizedMotorStrength = false; }
+	void setMaxMotorImpulseNormalized(btScalar maxMotorImpulse) { m_maxMotorImpulse = maxMotorImpulse; m_bNormalizedMotorStrength = true; }
+
+	btScalar getFixThresh() { return m_fixThresh; }
+	void setFixThresh(btScalar fixThresh) { m_fixThresh = fixThresh; }
+
+	// setMotorTarget:
+	// q: the desired rotation of bodyA wrt bodyB.
+	// note: if q violates the joint limits, the internal target is clamped to avoid conflicting impulses (very bad for stability)
+	// note: don't forget to enableMotor()
+	void setMotorTarget(const btQuaternion &q);
+
+	// same as above, but q is the desired rotation of frameA wrt frameB in constraint space
+	void setMotorTargetInConstraintSpace(const btQuaternion &q);
+
+	btVector3 GetPointForAngle(btScalar fAngleInRadians, btScalar fLength) const;
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void setParam(int num, btScalar value, int axis = -1);
+
+	virtual void setFrames(const btTransform& frameA, const btTransform& frameB);
+
+	const btTransform& getFrameOffsetA() const
+	{
+		return m_rbAFrame;
+	}
+
+	const btTransform& getFrameOffsetB() const
+	{
+		return m_rbBFrame;
+	}
+
+
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const;
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btConeTwistConstraintData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btTransformFloatData m_rbAFrame;
+	btTransformFloatData m_rbBFrame;
+
+	//limits
+	float	m_swingSpan1;
+	float	m_swingSpan2;
+	float	m_twistSpan;
+	float	m_limitSoftness;
+	float	m_biasFactor;
+	float	m_relaxationFactor;
+
+	float	m_damping;
+		
+	char m_pad[4];
+
+};
+	
+
+
+SIMD_FORCE_INLINE int	btConeTwistConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btConeTwistConstraintData);
+
+}
+
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE const char*	btConeTwistConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btConeTwistConstraintData* cone = (btConeTwistConstraintData*) dataBuffer;
+	btTypedConstraint::serialize(&cone->m_typeConstraintData,serializer);
+
+	m_rbAFrame.serializeFloat(cone->m_rbAFrame);
+	m_rbBFrame.serializeFloat(cone->m_rbBFrame);
+	
+	cone->m_swingSpan1 = float(m_swingSpan1);
+	cone->m_swingSpan2 = float(m_swingSpan2);
+	cone->m_twistSpan = float(m_twistSpan);
+	cone->m_limitSoftness = float(m_limitSoftness);
+	cone->m_biasFactor = float(m_biasFactor);
+	cone->m_relaxationFactor = float(m_relaxationFactor);
+	cone->m_damping = float(m_damping);
+
+	return "btConeTwistConstraintData";
+}
+
+
+#endif //BT_CONETWISTCONSTRAINT_H
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btConstraintSolver.h b/src/bullet/BulletDynamics/ConstraintSolver/btConstraintSolver.h
new file mode 100644
index 00000000..6f673102
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btConstraintSolver.h
@@ -0,0 +1,52 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONSTRAINT_SOLVER_H
+#define BT_CONSTRAINT_SOLVER_H
+
+#include "LinearMath/btScalar.h"
+
+class btPersistentManifold;
+class btRigidBody;
+class btCollisionObject;
+class btTypedConstraint;
+struct btContactSolverInfo;
+struct btBroadphaseProxy;
+class btIDebugDraw;
+class btStackAlloc;
+class	btDispatcher;
+/// btConstraintSolver provides solver interface
+class btConstraintSolver
+{
+
+public:
+
+	virtual ~btConstraintSolver() {}
+	
+	virtual void prepareSolve (int /* numBodies */, int /* numManifolds */) {;}
+
+	///solve a group of constraints
+	virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints, const btContactSolverInfo& info,class btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher) = 0;
+
+	virtual void allSolved (const btContactSolverInfo& /* info */,class btIDebugDraw* /* debugDrawer */, btStackAlloc* /* stackAlloc */) {;}
+
+	///clear internal cached data and reset random seed
+	virtual	void	reset() = 0;
+};
+
+
+
+
+#endif //BT_CONSTRAINT_SOLVER_H
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btContactConstraint.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btContactConstraint.cpp
new file mode 100644
index 00000000..88859182
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btContactConstraint.cpp
@@ -0,0 +1,178 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btContactConstraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btVector3.h"
+#include "btJacobianEntry.h"
+#include "btContactSolverInfo.h"
+#include "LinearMath/btMinMax.h"
+#include "BulletCollision/NarrowPhaseCollision/btManifoldPoint.h"
+
+
+
+btContactConstraint::btContactConstraint(btPersistentManifold* contactManifold,btRigidBody& rbA,btRigidBody& rbB)
+:btTypedConstraint(CONTACT_CONSTRAINT_TYPE,rbA,rbB),
+	m_contactManifold(*contactManifold)
+{
+
+}
+
+btContactConstraint::~btContactConstraint()
+{
+
+}
+
+void	btContactConstraint::setContactManifold(btPersistentManifold* contactManifold)
+{
+	m_contactManifold = *contactManifold;
+}
+
+void btContactConstraint::getInfo1 (btConstraintInfo1* info)
+{
+
+}
+
+void btContactConstraint::getInfo2 (btConstraintInfo2* info)
+{
+
+}
+
+void	btContactConstraint::buildJacobian()
+{
+
+}
+
+
+
+
+
+#include "btContactConstraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btVector3.h"
+#include "btJacobianEntry.h"
+#include "btContactSolverInfo.h"
+#include "LinearMath/btMinMax.h"
+#include "BulletCollision/NarrowPhaseCollision/btManifoldPoint.h"
+
+
+
+//response  between two dynamic objects without friction, assuming 0 penetration depth
+btScalar resolveSingleCollision(
+        btRigidBody* body1,
+        btCollisionObject* colObj2,
+		const btVector3& contactPositionWorld,
+		const btVector3& contactNormalOnB,
+        const btContactSolverInfo& solverInfo,
+		btScalar distance)
+{
+	btRigidBody* body2 = btRigidBody::upcast(colObj2);
+    
+	
+    const btVector3& normal = contactNormalOnB;
+
+    btVector3 rel_pos1 = contactPositionWorld - body1->getWorldTransform().getOrigin(); 
+    btVector3 rel_pos2 = contactPositionWorld - colObj2->getWorldTransform().getOrigin();
+    
+    btVector3 vel1 = body1->getVelocityInLocalPoint(rel_pos1);
+	btVector3 vel2 = body2? body2->getVelocityInLocalPoint(rel_pos2) : btVector3(0,0,0);
+    btVector3 vel = vel1 - vel2;
+    btScalar rel_vel;
+    rel_vel = normal.dot(vel);
+    
+    btScalar combinedRestitution = body1->getRestitution() * colObj2->getRestitution();
+    btScalar restitution = combinedRestitution* -rel_vel;
+
+    btScalar positionalError = solverInfo.m_erp *-distance /solverInfo.m_timeStep ;
+    btScalar velocityError = -(1.0f + restitution) * rel_vel;// * damping;
+	btScalar denom0 = body1->computeImpulseDenominator(contactPositionWorld,normal);
+	btScalar denom1 = body2? body2->computeImpulseDenominator(contactPositionWorld,normal) : 0.f;
+	btScalar relaxation = 1.f;
+	btScalar jacDiagABInv = relaxation/(denom0+denom1);
+
+    btScalar penetrationImpulse = positionalError * jacDiagABInv;
+    btScalar velocityImpulse = velocityError * jacDiagABInv;
+
+    btScalar normalImpulse = penetrationImpulse+velocityImpulse;
+    normalImpulse = 0.f > normalImpulse ? 0.f: normalImpulse;
+
+	body1->applyImpulse(normal*(normalImpulse), rel_pos1);
+    if (body2)
+		body2->applyImpulse(-normal*(normalImpulse), rel_pos2);
+    
+    return normalImpulse;
+}
+
+
+//bilateral constraint between two dynamic objects
+void resolveSingleBilateral(btRigidBody& body1, const btVector3& pos1,
+                      btRigidBody& body2, const btVector3& pos2,
+                      btScalar distance, const btVector3& normal,btScalar& impulse ,btScalar timeStep)
+{
+	(void)timeStep;
+	(void)distance;
+
+
+	btScalar normalLenSqr = normal.length2();
+	btAssert(btFabs(normalLenSqr) < btScalar(1.1));
+	if (normalLenSqr > btScalar(1.1))
+	{
+		impulse = btScalar(0.);
+		return;
+	}
+	btVector3 rel_pos1 = pos1 - body1.getCenterOfMassPosition(); 
+	btVector3 rel_pos2 = pos2 - body2.getCenterOfMassPosition();
+	//this jacobian entry could be re-used for all iterations
+	
+	btVector3 vel1 = body1.getVelocityInLocalPoint(rel_pos1);
+	btVector3 vel2 = body2.getVelocityInLocalPoint(rel_pos2);
+	btVector3 vel = vel1 - vel2;
+	
+
+	   btJacobianEntry jac(body1.getCenterOfMassTransform().getBasis().transpose(),
+		body2.getCenterOfMassTransform().getBasis().transpose(),
+		rel_pos1,rel_pos2,normal,body1.getInvInertiaDiagLocal(),body1.getInvMass(),
+		body2.getInvInertiaDiagLocal(),body2.getInvMass());
+
+	btScalar jacDiagAB = jac.getDiagonal();
+	btScalar jacDiagABInv = btScalar(1.) / jacDiagAB;
+	
+	  btScalar rel_vel = jac.getRelativeVelocity(
+		body1.getLinearVelocity(),
+		body1.getCenterOfMassTransform().getBasis().transpose() * body1.getAngularVelocity(),
+		body2.getLinearVelocity(),
+		body2.getCenterOfMassTransform().getBasis().transpose() * body2.getAngularVelocity()); 
+	btScalar a;
+	a=jacDiagABInv;
+
+
+	rel_vel = normal.dot(vel);
+	
+	//todo: move this into proper structure
+	btScalar contactDamping = btScalar(0.2);
+
+#ifdef ONLY_USE_LINEAR_MASS
+	btScalar massTerm = btScalar(1.) / (body1.getInvMass() + body2.getInvMass());
+	impulse = - contactDamping * rel_vel * massTerm;
+#else	
+	btScalar velocityImpulse = -contactDamping * rel_vel * jacDiagABInv;
+	impulse = velocityImpulse;
+#endif
+}
+
+
+
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btContactConstraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btContactConstraint.h
new file mode 100644
index 00000000..477c79d1
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btContactConstraint.h
@@ -0,0 +1,71 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONTACT_CONSTRAINT_H
+#define BT_CONTACT_CONSTRAINT_H
+
+#include "LinearMath/btVector3.h"
+#include "btJacobianEntry.h"
+#include "btTypedConstraint.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+
+///btContactConstraint can be automatically created to solve contact constraints using the unified btTypedConstraint interface
+ATTRIBUTE_ALIGNED16(class) btContactConstraint : public btTypedConstraint
+{
+protected:
+
+	btPersistentManifold m_contactManifold;
+
+public:
+
+
+	btContactConstraint(btPersistentManifold* contactManifold,btRigidBody& rbA,btRigidBody& rbB);
+
+	void	setContactManifold(btPersistentManifold* contactManifold);
+
+	btPersistentManifold* getContactManifold()
+	{
+		return &m_contactManifold;
+	}
+
+	const btPersistentManifold* getContactManifold() const
+	{
+		return &m_contactManifold;
+	}
+
+	virtual ~btContactConstraint();
+
+	virtual void getInfo1 (btConstraintInfo1* info);
+
+	virtual void getInfo2 (btConstraintInfo2* info);
+
+	///obsolete methods
+	virtual void	buildJacobian();
+
+
+};
+
+///very basic collision resolution without friction
+btScalar resolveSingleCollision(btRigidBody* body1, class btCollisionObject* colObj2, const btVector3& contactPositionWorld,const btVector3& contactNormalOnB, const struct btContactSolverInfo& solverInfo,btScalar distance);
+
+
+///resolveSingleBilateral is an obsolete methods used for vehicle friction between two dynamic objects
+void resolveSingleBilateral(btRigidBody& body1, const btVector3& pos1,
+                      btRigidBody& body2, const btVector3& pos2,
+                      btScalar distance, const btVector3& normal,btScalar& impulse ,btScalar timeStep);
+
+
+
+#endif //BT_CONTACT_CONSTRAINT_H
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h b/src/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h
new file mode 100644
index 00000000..6204cb3d
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h
@@ -0,0 +1,87 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONTACT_SOLVER_INFO
+#define BT_CONTACT_SOLVER_INFO
+
+enum	btSolverMode
+{
+	SOLVER_RANDMIZE_ORDER = 1,
+	SOLVER_FRICTION_SEPARATE = 2,
+	SOLVER_USE_WARMSTARTING = 4,
+	SOLVER_USE_FRICTION_WARMSTARTING = 8,
+	SOLVER_USE_2_FRICTION_DIRECTIONS = 16,
+	SOLVER_ENABLE_FRICTION_DIRECTION_CACHING = 32,
+	SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION = 64,
+	SOLVER_CACHE_FRIENDLY = 128,
+	SOLVER_SIMD = 256,	//enabled for Windows, the solver innerloop is branchless SIMD, 40% faster than FPU/scalar version
+	SOLVER_CUDA = 512	//will be open sourced during Game Developers Conference 2009. Much faster.
+};
+
+struct btContactSolverInfoData
+{
+	
+
+	btScalar	m_tau;
+	btScalar	m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
+	btScalar	m_friction;
+	btScalar	m_timeStep;
+	btScalar	m_restitution;
+	int		m_numIterations;
+	btScalar	m_maxErrorReduction;
+	btScalar	m_sor;
+	btScalar	m_erp;//used as Baumgarte factor
+	btScalar	m_erp2;//used in Split Impulse
+	btScalar	m_globalCfm;//constraint force mixing
+	int			m_splitImpulse;
+	btScalar	m_splitImpulsePenetrationThreshold;
+	btScalar	m_linearSlop;
+	btScalar	m_warmstartingFactor;
+
+	int			m_solverMode;
+	int	m_restingContactRestitutionThreshold;
+	int			m_minimumSolverBatchSize;
+
+
+};
+
+struct btContactSolverInfo : public btContactSolverInfoData
+{
+
+	
+
+	inline btContactSolverInfo()
+	{
+		m_tau = btScalar(0.6);
+		m_damping = btScalar(1.0);
+		m_friction = btScalar(0.3);
+		m_restitution = btScalar(0.);
+		m_maxErrorReduction = btScalar(20.);
+		m_numIterations = 10;
+		m_erp = btScalar(0.2);
+		m_erp2 = btScalar(0.1);
+		m_globalCfm = btScalar(0.);
+		m_sor = btScalar(1.);
+		m_splitImpulse = false;
+		m_splitImpulsePenetrationThreshold = -0.02f;
+		m_linearSlop = btScalar(0.0);
+		m_warmstartingFactor=btScalar(0.85);
+		m_solverMode = SOLVER_USE_WARMSTARTING | SOLVER_SIMD;// | SOLVER_RANDMIZE_ORDER;
+		m_restingContactRestitutionThreshold = 2;//resting contact lifetime threshold to disable restitution
+		m_minimumSolverBatchSize = 128; //try to combine islands until the amount of constraints reaches this limit
+	}
+};
+
+#endif //BT_CONTACT_SOLVER_INFO
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.cpp
new file mode 100644
index 00000000..8ff9940b
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.cpp
@@ -0,0 +1,1078 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+/*
+2007-09-09
+Refactored by Francisco Le?n
+email: projectileman@yahoo.com
+http://gimpact.sf.net
+*/
+
+#include "btGeneric6DofConstraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btTransformUtil.h"
+#include "LinearMath/btTransformUtil.h"
+#include <new>
+
+
+
+#define D6_USE_OBSOLETE_METHOD false
+#define D6_USE_FRAME_OFFSET true
+
+
+
+
+
+
+btGeneric6DofConstraint::btGeneric6DofConstraint(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB, bool useLinearReferenceFrameA)
+: btTypedConstraint(D6_CONSTRAINT_TYPE, rbA, rbB)
+, m_frameInA(frameInA)
+, m_frameInB(frameInB),
+m_useLinearReferenceFrameA(useLinearReferenceFrameA),
+m_useOffsetForConstraintFrame(D6_USE_FRAME_OFFSET),
+m_flags(0),
+m_useSolveConstraintObsolete(D6_USE_OBSOLETE_METHOD)
+{
+	calculateTransforms();
+}
+
+
+
+btGeneric6DofConstraint::btGeneric6DofConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameB)
+        : btTypedConstraint(D6_CONSTRAINT_TYPE, getFixedBody(), rbB),
+		m_frameInB(frameInB),
+		m_useLinearReferenceFrameA(useLinearReferenceFrameB),
+		m_useOffsetForConstraintFrame(D6_USE_FRAME_OFFSET),
+		m_flags(0),
+		m_useSolveConstraintObsolete(false)
+{
+	///not providing rigidbody A means implicitly using worldspace for body A
+	m_frameInA = rbB.getCenterOfMassTransform() * m_frameInB;
+	calculateTransforms();
+}
+
+
+
+
+#define GENERIC_D6_DISABLE_WARMSTARTING 1
+
+
+
+btScalar btGetMatrixElem(const btMatrix3x3& mat, int index);
+btScalar btGetMatrixElem(const btMatrix3x3& mat, int index)
+{
+	int i = index%3;
+	int j = index/3;
+	return mat[i][j];
+}
+
+
+
+///MatrixToEulerXYZ from http://www.geometrictools.com/LibFoundation/Mathematics/Wm4Matrix3.inl.html
+bool	matrixToEulerXYZ(const btMatrix3x3& mat,btVector3& xyz);
+bool	matrixToEulerXYZ(const btMatrix3x3& mat,btVector3& xyz)
+{
+	//	// rot =  cy*cz          -cy*sz           sy
+	//	//        cz*sx*sy+cx*sz  cx*cz-sx*sy*sz -cy*sx
+	//	//       -cx*cz*sy+sx*sz  cz*sx+cx*sy*sz  cx*cy
+	//
+
+	btScalar fi = btGetMatrixElem(mat,2);
+	if (fi < btScalar(1.0f))
+	{
+		if (fi > btScalar(-1.0f))
+		{
+			xyz[0] = btAtan2(-btGetMatrixElem(mat,5),btGetMatrixElem(mat,8));
+			xyz[1] = btAsin(btGetMatrixElem(mat,2));
+			xyz[2] = btAtan2(-btGetMatrixElem(mat,1),btGetMatrixElem(mat,0));
+			return true;
+		}
+		else
+		{
+			// WARNING.  Not unique.  XA - ZA = -atan2(r10,r11)
+			xyz[0] = -btAtan2(btGetMatrixElem(mat,3),btGetMatrixElem(mat,4));
+			xyz[1] = -SIMD_HALF_PI;
+			xyz[2] = btScalar(0.0);
+			return false;
+		}
+	}
+	else
+	{
+		// WARNING.  Not unique.  XAngle + ZAngle = atan2(r10,r11)
+		xyz[0] = btAtan2(btGetMatrixElem(mat,3),btGetMatrixElem(mat,4));
+		xyz[1] = SIMD_HALF_PI;
+		xyz[2] = 0.0;
+	}
+	return false;
+}
+
+//////////////////////////// btRotationalLimitMotor ////////////////////////////////////
+
+int btRotationalLimitMotor::testLimitValue(btScalar test_value)
+{
+	if(m_loLimit>m_hiLimit)
+	{
+		m_currentLimit = 0;//Free from violation
+		return 0;
+	}
+	if (test_value < m_loLimit)
+	{
+		m_currentLimit = 1;//low limit violation
+		m_currentLimitError =  test_value - m_loLimit;
+		if(m_currentLimitError>SIMD_PI) 
+			m_currentLimitError-=SIMD_2_PI;
+		else if(m_currentLimitError<-SIMD_PI) 
+			m_currentLimitError+=SIMD_2_PI;
+		return 1;
+	}
+	else if (test_value> m_hiLimit)
+	{
+		m_currentLimit = 2;//High limit violation
+		m_currentLimitError = test_value - m_hiLimit;
+		if(m_currentLimitError>SIMD_PI) 
+			m_currentLimitError-=SIMD_2_PI;
+		else if(m_currentLimitError<-SIMD_PI) 
+			m_currentLimitError+=SIMD_2_PI;
+		return 2;
+	};
+
+	m_currentLimit = 0;//Free from violation
+	return 0;
+
+}
+
+
+
+btScalar btRotationalLimitMotor::solveAngularLimits(
+	btScalar timeStep,btVector3& axis,btScalar jacDiagABInv,
+	btRigidBody * body0, btRigidBody * body1 )
+{
+	if (needApplyTorques()==false) return 0.0f;
+
+	btScalar target_velocity = m_targetVelocity;
+	btScalar maxMotorForce = m_maxMotorForce;
+
+	//current error correction
+	if (m_currentLimit!=0)
+	{
+		target_velocity = -m_stopERP*m_currentLimitError/(timeStep);
+		maxMotorForce = m_maxLimitForce;
+	}
+
+	maxMotorForce *= timeStep;
+
+	// current velocity difference
+
+	btVector3 angVelA;
+	body0->internalGetAngularVelocity(angVelA);
+	btVector3 angVelB;
+	body1->internalGetAngularVelocity(angVelB);
+
+	btVector3 vel_diff;
+	vel_diff = angVelA-angVelB;
+
+
+
+	btScalar rel_vel = axis.dot(vel_diff);
+
+	// correction velocity
+	btScalar motor_relvel = m_limitSoftness*(target_velocity  - m_damping*rel_vel);
+
+
+	if ( motor_relvel < SIMD_EPSILON && motor_relvel > -SIMD_EPSILON  )
+	{
+		return 0.0f;//no need for applying force
+	}
+
+
+	// correction impulse
+	btScalar unclippedMotorImpulse = (1+m_bounce)*motor_relvel*jacDiagABInv;
+
+	// clip correction impulse
+	btScalar clippedMotorImpulse;
+
+	///@todo: should clip against accumulated impulse
+	if (unclippedMotorImpulse>0.0f)
+	{
+		clippedMotorImpulse =  unclippedMotorImpulse > maxMotorForce? maxMotorForce: unclippedMotorImpulse;
+	}
+	else
+	{
+		clippedMotorImpulse =  unclippedMotorImpulse < -maxMotorForce ? -maxMotorForce: unclippedMotorImpulse;
+	}
+
+
+	// sort with accumulated impulses
+	btScalar	lo = btScalar(-BT_LARGE_FLOAT);
+	btScalar	hi = btScalar(BT_LARGE_FLOAT);
+
+	btScalar oldaccumImpulse = m_accumulatedImpulse;
+	btScalar sum = oldaccumImpulse + clippedMotorImpulse;
+	m_accumulatedImpulse = sum > hi ? btScalar(0.) : sum < lo ? btScalar(0.) : sum;
+
+	clippedMotorImpulse = m_accumulatedImpulse - oldaccumImpulse;
+
+	btVector3 motorImp = clippedMotorImpulse * axis;
+
+	//body0->applyTorqueImpulse(motorImp);
+	//body1->applyTorqueImpulse(-motorImp);
+
+	body0->internalApplyImpulse(btVector3(0,0,0), body0->getInvInertiaTensorWorld()*axis,clippedMotorImpulse);
+	body1->internalApplyImpulse(btVector3(0,0,0), body1->getInvInertiaTensorWorld()*axis,-clippedMotorImpulse);
+
+
+	return clippedMotorImpulse;
+
+
+}
+
+//////////////////////////// End btRotationalLimitMotor ////////////////////////////////////
+
+
+
+
+//////////////////////////// btTranslationalLimitMotor ////////////////////////////////////
+
+
+int btTranslationalLimitMotor::testLimitValue(int limitIndex, btScalar test_value)
+{
+	btScalar loLimit = m_lowerLimit[limitIndex];
+	btScalar hiLimit = m_upperLimit[limitIndex];
+	if(loLimit > hiLimit)
+	{
+		m_currentLimit[limitIndex] = 0;//Free from violation
+		m_currentLimitError[limitIndex] = btScalar(0.f);
+		return 0;
+	}
+
+	if (test_value < loLimit)
+	{
+		m_currentLimit[limitIndex] = 2;//low limit violation
+		m_currentLimitError[limitIndex] =  test_value - loLimit;
+		return 2;
+	}
+	else if (test_value> hiLimit)
+	{
+		m_currentLimit[limitIndex] = 1;//High limit violation
+		m_currentLimitError[limitIndex] = test_value - hiLimit;
+		return 1;
+	};
+
+	m_currentLimit[limitIndex] = 0;//Free from violation
+	m_currentLimitError[limitIndex] = btScalar(0.f);
+	return 0;
+}
+
+
+
+btScalar btTranslationalLimitMotor::solveLinearAxis(
+	btScalar timeStep,
+	btScalar jacDiagABInv,
+	btRigidBody& body1,const btVector3 &pointInA,
+	btRigidBody& body2,const btVector3 &pointInB,
+	int limit_index,
+	const btVector3 & axis_normal_on_a,
+	const btVector3 & anchorPos)
+{
+
+	///find relative velocity
+	//    btVector3 rel_pos1 = pointInA - body1.getCenterOfMassPosition();
+	//    btVector3 rel_pos2 = pointInB - body2.getCenterOfMassPosition();
+	btVector3 rel_pos1 = anchorPos - body1.getCenterOfMassPosition();
+	btVector3 rel_pos2 = anchorPos - body2.getCenterOfMassPosition();
+
+	btVector3 vel1;
+	body1.internalGetVelocityInLocalPointObsolete(rel_pos1,vel1);
+	btVector3 vel2;
+	body2.internalGetVelocityInLocalPointObsolete(rel_pos2,vel2);
+	btVector3 vel = vel1 - vel2;
+
+	btScalar rel_vel = axis_normal_on_a.dot(vel);
+
+
+
+	/// apply displacement correction
+
+	//positional error (zeroth order error)
+	btScalar depth = -(pointInA - pointInB).dot(axis_normal_on_a);
+	btScalar	lo = btScalar(-BT_LARGE_FLOAT);
+	btScalar	hi = btScalar(BT_LARGE_FLOAT);
+
+	btScalar minLimit = m_lowerLimit[limit_index];
+	btScalar maxLimit = m_upperLimit[limit_index];
+
+	//handle the limits
+	if (minLimit < maxLimit)
+	{
+		{
+			if (depth > maxLimit)
+			{
+				depth -= maxLimit;
+				lo = btScalar(0.);
+
+			}
+			else
+			{
+				if (depth < minLimit)
+				{
+					depth -= minLimit;
+					hi = btScalar(0.);
+				}
+				else
+				{
+					return 0.0f;
+				}
+			}
+		}
+	}
+
+	btScalar normalImpulse= m_limitSoftness*(m_restitution*depth/timeStep - m_damping*rel_vel) * jacDiagABInv;
+
+
+
+
+	btScalar oldNormalImpulse = m_accumulatedImpulse[limit_index];
+	btScalar sum = oldNormalImpulse + normalImpulse;
+	m_accumulatedImpulse[limit_index] = sum > hi ? btScalar(0.) : sum < lo ? btScalar(0.) : sum;
+	normalImpulse = m_accumulatedImpulse[limit_index] - oldNormalImpulse;
+
+	btVector3 impulse_vector = axis_normal_on_a * normalImpulse;
+	//body1.applyImpulse( impulse_vector, rel_pos1);
+	//body2.applyImpulse(-impulse_vector, rel_pos2);
+
+	btVector3 ftorqueAxis1 = rel_pos1.cross(axis_normal_on_a);
+	btVector3 ftorqueAxis2 = rel_pos2.cross(axis_normal_on_a);
+	body1.internalApplyImpulse(axis_normal_on_a*body1.getInvMass(), body1.getInvInertiaTensorWorld()*ftorqueAxis1,normalImpulse);
+	body2.internalApplyImpulse(axis_normal_on_a*body2.getInvMass(), body2.getInvInertiaTensorWorld()*ftorqueAxis2,-normalImpulse);
+
+
+
+
+	return normalImpulse;
+}
+
+//////////////////////////// btTranslationalLimitMotor ////////////////////////////////////
+
+void btGeneric6DofConstraint::calculateAngleInfo()
+{
+	btMatrix3x3 relative_frame = m_calculatedTransformA.getBasis().inverse()*m_calculatedTransformB.getBasis();
+	matrixToEulerXYZ(relative_frame,m_calculatedAxisAngleDiff);
+	// in euler angle mode we do not actually constrain the angular velocity
+	// along the axes axis[0] and axis[2] (although we do use axis[1]) :
+	//
+	//    to get			constrain w2-w1 along		...not
+	//    ------			---------------------		------
+	//    d(angle[0])/dt = 0	ax[1] x ax[2]			ax[0]
+	//    d(angle[1])/dt = 0	ax[1]
+	//    d(angle[2])/dt = 0	ax[0] x ax[1]			ax[2]
+	//
+	// constraining w2-w1 along an axis 'a' means that a'*(w2-w1)=0.
+	// to prove the result for angle[0], write the expression for angle[0] from
+	// GetInfo1 then take the derivative. to prove this for angle[2] it is
+	// easier to take the euler rate expression for d(angle[2])/dt with respect
+	// to the components of w and set that to 0.
+	btVector3 axis0 = m_calculatedTransformB.getBasis().getColumn(0);
+	btVector3 axis2 = m_calculatedTransformA.getBasis().getColumn(2);
+
+	m_calculatedAxis[1] = axis2.cross(axis0);
+	m_calculatedAxis[0] = m_calculatedAxis[1].cross(axis2);
+	m_calculatedAxis[2] = axis0.cross(m_calculatedAxis[1]);
+
+	m_calculatedAxis[0].normalize();
+	m_calculatedAxis[1].normalize();
+	m_calculatedAxis[2].normalize();
+
+}
+
+void btGeneric6DofConstraint::calculateTransforms()
+{
+	calculateTransforms(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+}
+
+void btGeneric6DofConstraint::calculateTransforms(const btTransform& transA,const btTransform& transB)
+{
+	m_calculatedTransformA = transA * m_frameInA;
+	m_calculatedTransformB = transB * m_frameInB;
+	calculateLinearInfo();
+	calculateAngleInfo();
+	if(m_useOffsetForConstraintFrame)
+	{	//  get weight factors depending on masses
+		btScalar miA = getRigidBodyA().getInvMass();
+		btScalar miB = getRigidBodyB().getInvMass();
+		m_hasStaticBody = (miA < SIMD_EPSILON) || (miB < SIMD_EPSILON);
+		btScalar miS = miA + miB;
+		if(miS > btScalar(0.f))
+		{
+			m_factA = miB / miS;
+		}
+		else 
+		{
+			m_factA = btScalar(0.5f);
+		}
+		m_factB = btScalar(1.0f) - m_factA;
+	}
+}
+
+
+
+void btGeneric6DofConstraint::buildLinearJacobian(
+	btJacobianEntry & jacLinear,const btVector3 & normalWorld,
+	const btVector3 & pivotAInW,const btVector3 & pivotBInW)
+{
+	new (&jacLinear) btJacobianEntry(
+        m_rbA.getCenterOfMassTransform().getBasis().transpose(),
+        m_rbB.getCenterOfMassTransform().getBasis().transpose(),
+        pivotAInW - m_rbA.getCenterOfMassPosition(),
+        pivotBInW - m_rbB.getCenterOfMassPosition(),
+        normalWorld,
+        m_rbA.getInvInertiaDiagLocal(),
+        m_rbA.getInvMass(),
+        m_rbB.getInvInertiaDiagLocal(),
+        m_rbB.getInvMass());
+}
+
+
+
+void btGeneric6DofConstraint::buildAngularJacobian(
+	btJacobianEntry & jacAngular,const btVector3 & jointAxisW)
+{
+	 new (&jacAngular)	btJacobianEntry(jointAxisW,
+                                      m_rbA.getCenterOfMassTransform().getBasis().transpose(),
+                                      m_rbB.getCenterOfMassTransform().getBasis().transpose(),
+                                      m_rbA.getInvInertiaDiagLocal(),
+                                      m_rbB.getInvInertiaDiagLocal());
+
+}
+
+
+
+bool btGeneric6DofConstraint::testAngularLimitMotor(int axis_index)
+{
+	btScalar angle = m_calculatedAxisAngleDiff[axis_index];
+	angle = btAdjustAngleToLimits(angle, m_angularLimits[axis_index].m_loLimit, m_angularLimits[axis_index].m_hiLimit);
+	m_angularLimits[axis_index].m_currentPosition = angle;
+	//test limits
+	m_angularLimits[axis_index].testLimitValue(angle);
+	return m_angularLimits[axis_index].needApplyTorques();
+}
+
+
+
+void btGeneric6DofConstraint::buildJacobian()
+{
+#ifndef __SPU__
+	if (m_useSolveConstraintObsolete)
+	{
+
+		// Clear accumulated impulses for the next simulation step
+		m_linearLimits.m_accumulatedImpulse.setValue(btScalar(0.), btScalar(0.), btScalar(0.));
+		int i;
+		for(i = 0; i < 3; i++)
+		{
+			m_angularLimits[i].m_accumulatedImpulse = btScalar(0.);
+		}
+		//calculates transform
+		calculateTransforms(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+
+		//  const btVector3& pivotAInW = m_calculatedTransformA.getOrigin();
+		//  const btVector3& pivotBInW = m_calculatedTransformB.getOrigin();
+		calcAnchorPos();
+		btVector3 pivotAInW = m_AnchorPos;
+		btVector3 pivotBInW = m_AnchorPos;
+
+		// not used here
+		//    btVector3 rel_pos1 = pivotAInW - m_rbA.getCenterOfMassPosition();
+		//    btVector3 rel_pos2 = pivotBInW - m_rbB.getCenterOfMassPosition();
+
+		btVector3 normalWorld;
+		//linear part
+		for (i=0;i<3;i++)
+		{
+			if (m_linearLimits.isLimited(i))
+			{
+				if (m_useLinearReferenceFrameA)
+					normalWorld = m_calculatedTransformA.getBasis().getColumn(i);
+				else
+					normalWorld = m_calculatedTransformB.getBasis().getColumn(i);
+
+				buildLinearJacobian(
+					m_jacLinear[i],normalWorld ,
+					pivotAInW,pivotBInW);
+
+			}
+		}
+
+		// angular part
+		for (i=0;i<3;i++)
+		{
+			//calculates error angle
+			if (testAngularLimitMotor(i))
+			{
+				normalWorld = this->getAxis(i);
+				// Create angular atom
+				buildAngularJacobian(m_jacAng[i],normalWorld);
+			}
+		}
+
+	}
+#endif //__SPU__
+
+}
+
+
+void btGeneric6DofConstraint::getInfo1 (btConstraintInfo1* info)
+{
+	if (m_useSolveConstraintObsolete)
+	{
+		info->m_numConstraintRows = 0;
+		info->nub = 0;
+	} else
+	{
+		//prepare constraint
+		calculateTransforms(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+		info->m_numConstraintRows = 0;
+		info->nub = 6;
+		int i;
+		//test linear limits
+		for(i = 0; i < 3; i++)
+		{
+			if(m_linearLimits.needApplyForce(i))
+			{
+				info->m_numConstraintRows++;
+				info->nub--;
+			}
+		}
+		//test angular limits
+		for (i=0;i<3 ;i++ )
+		{
+			if(testAngularLimitMotor(i))
+			{
+				info->m_numConstraintRows++;
+				info->nub--;
+			}
+		}
+	}
+}
+
+void btGeneric6DofConstraint::getInfo1NonVirtual (btConstraintInfo1* info)
+{
+	if (m_useSolveConstraintObsolete)
+	{
+		info->m_numConstraintRows = 0;
+		info->nub = 0;
+	} else
+	{
+		//pre-allocate all 6
+		info->m_numConstraintRows = 6;
+		info->nub = 0;
+	}
+}
+
+
+void btGeneric6DofConstraint::getInfo2 (btConstraintInfo2* info)
+{
+	btAssert(!m_useSolveConstraintObsolete);
+
+	const btTransform& transA = m_rbA.getCenterOfMassTransform();
+	const btTransform& transB = m_rbB.getCenterOfMassTransform();
+	const btVector3& linVelA = m_rbA.getLinearVelocity();
+	const btVector3& linVelB = m_rbB.getLinearVelocity();
+	const btVector3& angVelA = m_rbA.getAngularVelocity();
+	const btVector3& angVelB = m_rbB.getAngularVelocity();
+
+	if(m_useOffsetForConstraintFrame)
+	{ // for stability better to solve angular limits first
+		int row = setAngularLimits(info, 0,transA,transB,linVelA,linVelB,angVelA,angVelB);
+		setLinearLimits(info, row, transA,transB,linVelA,linVelB,angVelA,angVelB);
+	}
+	else
+	{ // leave old version for compatibility
+		int row = setLinearLimits(info, 0, transA,transB,linVelA,linVelB,angVelA,angVelB);
+		setAngularLimits(info, row,transA,transB,linVelA,linVelB,angVelA,angVelB);
+	}
+
+}
+
+
+void btGeneric6DofConstraint::getInfo2NonVirtual (btConstraintInfo2* info, const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB)
+{
+	
+	btAssert(!m_useSolveConstraintObsolete);
+	//prepare constraint
+	calculateTransforms(transA,transB);
+
+	int i;
+	for (i=0;i<3 ;i++ )
+	{
+		testAngularLimitMotor(i);
+	}
+
+	if(m_useOffsetForConstraintFrame)
+	{ // for stability better to solve angular limits first
+		int row = setAngularLimits(info, 0,transA,transB,linVelA,linVelB,angVelA,angVelB);
+		setLinearLimits(info, row, transA,transB,linVelA,linVelB,angVelA,angVelB);
+	}
+	else
+	{ // leave old version for compatibility
+		int row = setLinearLimits(info, 0, transA,transB,linVelA,linVelB,angVelA,angVelB);
+		setAngularLimits(info, row,transA,transB,linVelA,linVelB,angVelA,angVelB);
+	}
+}
+
+
+
+int btGeneric6DofConstraint::setLinearLimits(btConstraintInfo2* info, int row, const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB)
+{
+//	int row = 0;
+	//solve linear limits
+	btRotationalLimitMotor limot;
+	for (int i=0;i<3 ;i++ )
+	{
+		if(m_linearLimits.needApplyForce(i))
+		{ // re-use rotational motor code
+			limot.m_bounce = btScalar(0.f);
+			limot.m_currentLimit = m_linearLimits.m_currentLimit[i];
+			limot.m_currentPosition = m_linearLimits.m_currentLinearDiff[i];
+			limot.m_currentLimitError  = m_linearLimits.m_currentLimitError[i];
+			limot.m_damping  = m_linearLimits.m_damping;
+			limot.m_enableMotor  = m_linearLimits.m_enableMotor[i];
+			limot.m_hiLimit  = m_linearLimits.m_upperLimit[i];
+			limot.m_limitSoftness  = m_linearLimits.m_limitSoftness;
+			limot.m_loLimit  = m_linearLimits.m_lowerLimit[i];
+			limot.m_maxLimitForce  = btScalar(0.f);
+			limot.m_maxMotorForce  = m_linearLimits.m_maxMotorForce[i];
+			limot.m_targetVelocity  = m_linearLimits.m_targetVelocity[i];
+			btVector3 axis = m_calculatedTransformA.getBasis().getColumn(i);
+			int flags = m_flags >> (i * BT_6DOF_FLAGS_AXIS_SHIFT);
+			limot.m_normalCFM	= (flags & BT_6DOF_FLAGS_CFM_NORM) ? m_linearLimits.m_normalCFM[i] : info->cfm[0];
+			limot.m_stopCFM		= (flags & BT_6DOF_FLAGS_CFM_STOP) ? m_linearLimits.m_stopCFM[i] : info->cfm[0];
+			limot.m_stopERP		= (flags & BT_6DOF_FLAGS_ERP_STOP) ? m_linearLimits.m_stopERP[i] : info->erp;
+			if(m_useOffsetForConstraintFrame)
+			{
+				int indx1 = (i + 1) % 3;
+				int indx2 = (i + 2) % 3;
+				int rotAllowed = 1; // rotations around orthos to current axis
+				if(m_angularLimits[indx1].m_currentLimit && m_angularLimits[indx2].m_currentLimit)
+				{
+					rotAllowed = 0;
+				}
+				row += get_limit_motor_info2(&limot, transA,transB,linVelA,linVelB,angVelA,angVelB, info, row, axis, 0, rotAllowed);
+			}
+			else
+			{
+				row += get_limit_motor_info2(&limot, transA,transB,linVelA,linVelB,angVelA,angVelB, info, row, axis, 0);
+			}
+		}
+	}
+	return row;
+}
+
+
+
+int btGeneric6DofConstraint::setAngularLimits(btConstraintInfo2 *info, int row_offset, const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB)
+{
+	btGeneric6DofConstraint * d6constraint = this;
+	int row = row_offset;
+	//solve angular limits
+	for (int i=0;i<3 ;i++ )
+	{
+		if(d6constraint->getRotationalLimitMotor(i)->needApplyTorques())
+		{
+			btVector3 axis = d6constraint->getAxis(i);
+			int flags = m_flags >> ((i + 3) * BT_6DOF_FLAGS_AXIS_SHIFT);
+			if(!(flags & BT_6DOF_FLAGS_CFM_NORM))
+			{
+				m_angularLimits[i].m_normalCFM = info->cfm[0];
+			}
+			if(!(flags & BT_6DOF_FLAGS_CFM_STOP))
+			{
+				m_angularLimits[i].m_stopCFM = info->cfm[0];
+			}
+			if(!(flags & BT_6DOF_FLAGS_ERP_STOP))
+			{
+				m_angularLimits[i].m_stopERP = info->erp;
+			}
+			row += get_limit_motor_info2(d6constraint->getRotationalLimitMotor(i),
+												transA,transB,linVelA,linVelB,angVelA,angVelB, info,row,axis,1);
+		}
+	}
+
+	return row;
+}
+
+
+
+
+void	btGeneric6DofConstraint::updateRHS(btScalar	timeStep)
+{
+	(void)timeStep;
+
+}
+
+
+void btGeneric6DofConstraint::setFrames(const btTransform& frameA, const btTransform& frameB)
+{
+	m_frameInA = frameA;
+	m_frameInB = frameB;
+	buildJacobian();
+	calculateTransforms();
+}
+
+
+
+btVector3 btGeneric6DofConstraint::getAxis(int axis_index) const
+{
+	return m_calculatedAxis[axis_index];
+}
+
+
+btScalar	btGeneric6DofConstraint::getRelativePivotPosition(int axisIndex) const
+{
+	return m_calculatedLinearDiff[axisIndex];
+}
+
+
+btScalar btGeneric6DofConstraint::getAngle(int axisIndex) const
+{
+	return m_calculatedAxisAngleDiff[axisIndex];
+}
+
+
+
+void btGeneric6DofConstraint::calcAnchorPos(void)
+{
+	btScalar imA = m_rbA.getInvMass();
+	btScalar imB = m_rbB.getInvMass();
+	btScalar weight;
+	if(imB == btScalar(0.0))
+	{
+		weight = btScalar(1.0);
+	}
+	else
+	{
+		weight = imA / (imA + imB);
+	}
+	const btVector3& pA = m_calculatedTransformA.getOrigin();
+	const btVector3& pB = m_calculatedTransformB.getOrigin();
+	m_AnchorPos = pA * weight + pB * (btScalar(1.0) - weight);
+	return;
+}
+
+
+
+void btGeneric6DofConstraint::calculateLinearInfo()
+{
+	m_calculatedLinearDiff = m_calculatedTransformB.getOrigin() - m_calculatedTransformA.getOrigin();
+	m_calculatedLinearDiff = m_calculatedTransformA.getBasis().inverse() * m_calculatedLinearDiff;
+	for(int i = 0; i < 3; i++)
+	{
+		m_linearLimits.m_currentLinearDiff[i] = m_calculatedLinearDiff[i];
+		m_linearLimits.testLimitValue(i, m_calculatedLinearDiff[i]);
+	}
+}
+
+
+
+int btGeneric6DofConstraint::get_limit_motor_info2(
+	btRotationalLimitMotor * limot,
+	const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB,
+	btConstraintInfo2 *info, int row, btVector3& ax1, int rotational,int rotAllowed)
+{
+    int srow = row * info->rowskip;
+    int powered = limot->m_enableMotor;
+    int limit = limot->m_currentLimit;
+    if (powered || limit)
+    {   // if the joint is powered, or has joint limits, add in the extra row
+        btScalar *J1 = rotational ? info->m_J1angularAxis : info->m_J1linearAxis;
+        btScalar *J2 = rotational ? info->m_J2angularAxis : 0;
+        J1[srow+0] = ax1[0];
+        J1[srow+1] = ax1[1];
+        J1[srow+2] = ax1[2];
+        if(rotational)
+        {
+            J2[srow+0] = -ax1[0];
+            J2[srow+1] = -ax1[1];
+            J2[srow+2] = -ax1[2];
+        }
+        if((!rotational))
+        {
+			if (m_useOffsetForConstraintFrame)
+			{
+				btVector3 tmpA, tmpB, relA, relB;
+				// get vector from bodyB to frameB in WCS
+				relB = m_calculatedTransformB.getOrigin() - transB.getOrigin();
+				// get its projection to constraint axis
+				btVector3 projB = ax1 * relB.dot(ax1);
+				// get vector directed from bodyB to constraint axis (and orthogonal to it)
+				btVector3 orthoB = relB - projB;
+				// same for bodyA
+				relA = m_calculatedTransformA.getOrigin() - transA.getOrigin();
+				btVector3 projA = ax1 * relA.dot(ax1);
+				btVector3 orthoA = relA - projA;
+				// get desired offset between frames A and B along constraint axis
+				btScalar desiredOffs = limot->m_currentPosition - limot->m_currentLimitError;
+				// desired vector from projection of center of bodyA to projection of center of bodyB to constraint axis
+				btVector3 totalDist = projA + ax1 * desiredOffs - projB;
+				// get offset vectors relA and relB
+				relA = orthoA + totalDist * m_factA;
+				relB = orthoB - totalDist * m_factB;
+				tmpA = relA.cross(ax1);
+				tmpB = relB.cross(ax1);
+				if(m_hasStaticBody && (!rotAllowed))
+				{
+					tmpA *= m_factA;
+					tmpB *= m_factB;
+				}
+				int i;
+				for (i=0; i<3; i++) info->m_J1angularAxis[srow+i] = tmpA[i];
+				for (i=0; i<3; i++) info->m_J2angularAxis[srow+i] = -tmpB[i];
+			} else
+			{
+				btVector3 ltd;	// Linear Torque Decoupling vector
+				btVector3 c = m_calculatedTransformB.getOrigin() - transA.getOrigin();
+				ltd = c.cross(ax1);
+				info->m_J1angularAxis[srow+0] = ltd[0];
+				info->m_J1angularAxis[srow+1] = ltd[1];
+				info->m_J1angularAxis[srow+2] = ltd[2];
+
+				c = m_calculatedTransformB.getOrigin() - transB.getOrigin();
+				ltd = -c.cross(ax1);
+				info->m_J2angularAxis[srow+0] = ltd[0];
+				info->m_J2angularAxis[srow+1] = ltd[1];
+				info->m_J2angularAxis[srow+2] = ltd[2];
+			}
+        }
+        // if we're limited low and high simultaneously, the joint motor is
+        // ineffective
+        if (limit && (limot->m_loLimit == limot->m_hiLimit)) powered = 0;
+        info->m_constraintError[srow] = btScalar(0.f);
+        if (powered)
+        {
+			info->cfm[srow] = limot->m_normalCFM;
+            if(!limit)
+            {
+				btScalar tag_vel = rotational ? limot->m_targetVelocity : -limot->m_targetVelocity;
+
+				btScalar mot_fact = getMotorFactor(	limot->m_currentPosition, 
+													limot->m_loLimit,
+													limot->m_hiLimit, 
+													tag_vel, 
+													info->fps * limot->m_stopERP);
+				info->m_constraintError[srow] += mot_fact * limot->m_targetVelocity;
+                info->m_lowerLimit[srow] = -limot->m_maxMotorForce;
+                info->m_upperLimit[srow] = limot->m_maxMotorForce;
+            }
+        }
+        if(limit)
+        {
+            btScalar k = info->fps * limot->m_stopERP;
+			if(!rotational)
+			{
+				info->m_constraintError[srow] += k * limot->m_currentLimitError;
+			}
+			else
+			{
+				info->m_constraintError[srow] += -k * limot->m_currentLimitError;
+			}
+			info->cfm[srow] = limot->m_stopCFM;
+            if (limot->m_loLimit == limot->m_hiLimit)
+            {   // limited low and high simultaneously
+                info->m_lowerLimit[srow] = -SIMD_INFINITY;
+                info->m_upperLimit[srow] = SIMD_INFINITY;
+            }
+            else
+            {
+                if (limit == 1)
+                {
+                    info->m_lowerLimit[srow] = 0;
+                    info->m_upperLimit[srow] = SIMD_INFINITY;
+                }
+                else
+                {
+                    info->m_lowerLimit[srow] = -SIMD_INFINITY;
+                    info->m_upperLimit[srow] = 0;
+                }
+                // deal with bounce
+                if (limot->m_bounce > 0)
+                {
+                    // calculate joint velocity
+                    btScalar vel;
+                    if (rotational)
+                    {
+                        vel = angVelA.dot(ax1);
+//make sure that if no body -> angVelB == zero vec
+//                        if (body1)
+                            vel -= angVelB.dot(ax1);
+                    }
+                    else
+                    {
+                        vel = linVelA.dot(ax1);
+//make sure that if no body -> angVelB == zero vec
+//                        if (body1)
+                            vel -= linVelB.dot(ax1);
+                    }
+                    // only apply bounce if the velocity is incoming, and if the
+                    // resulting c[] exceeds what we already have.
+                    if (limit == 1)
+                    {
+                        if (vel < 0)
+                        {
+                            btScalar newc = -limot->m_bounce* vel;
+                            if (newc > info->m_constraintError[srow]) 
+								info->m_constraintError[srow] = newc;
+                        }
+                    }
+                    else
+                    {
+                        if (vel > 0)
+                        {
+                            btScalar newc = -limot->m_bounce * vel;
+                            if (newc < info->m_constraintError[srow]) 
+								info->m_constraintError[srow] = newc;
+                        }
+                    }
+                }
+            }
+        }
+        return 1;
+    }
+    else return 0;
+}
+
+
+
+
+
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+void btGeneric6DofConstraint::setParam(int num, btScalar value, int axis)
+{
+	if((axis >= 0) && (axis < 3))
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_STOP_ERP : 
+				m_linearLimits.m_stopERP[axis] = value;
+				m_flags |= BT_6DOF_FLAGS_ERP_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			case BT_CONSTRAINT_STOP_CFM : 
+				m_linearLimits.m_stopCFM[axis] = value;
+				m_flags |= BT_6DOF_FLAGS_CFM_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			case BT_CONSTRAINT_CFM : 
+				m_linearLimits.m_normalCFM[axis] = value;
+				m_flags |= BT_6DOF_FLAGS_CFM_NORM << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else if((axis >=3) && (axis < 6))
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_STOP_ERP : 
+				m_angularLimits[axis - 3].m_stopERP = value;
+				m_flags |= BT_6DOF_FLAGS_ERP_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			case BT_CONSTRAINT_STOP_CFM : 
+				m_angularLimits[axis - 3].m_stopCFM = value;
+				m_flags |= BT_6DOF_FLAGS_CFM_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			case BT_CONSTRAINT_CFM : 
+				m_angularLimits[axis - 3].m_normalCFM = value;
+				m_flags |= BT_6DOF_FLAGS_CFM_NORM << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else
+	{
+		btAssertConstrParams(0);
+	}
+}
+
+	///return the local value of parameter
+btScalar btGeneric6DofConstraint::getParam(int num, int axis) const 
+{
+	btScalar retVal = 0;
+	if((axis >= 0) && (axis < 3))
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_STOP_ERP : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_ERP_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_linearLimits.m_stopERP[axis];
+				break;
+			case BT_CONSTRAINT_STOP_CFM : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_CFM_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_linearLimits.m_stopCFM[axis];
+				break;
+			case BT_CONSTRAINT_CFM : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_CFM_NORM << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_linearLimits.m_normalCFM[axis];
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else if((axis >=3) && (axis < 6))
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_STOP_ERP : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_ERP_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_angularLimits[axis - 3].m_stopERP;
+				break;
+			case BT_CONSTRAINT_STOP_CFM : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_CFM_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_angularLimits[axis - 3].m_stopCFM;
+				break;
+			case BT_CONSTRAINT_CFM : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_CFM_NORM << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_angularLimits[axis - 3].m_normalCFM;
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else
+	{
+		btAssertConstrParams(0);
+	}
+	return retVal;
+}
+
+ 
+
+void btGeneric6DofConstraint::setAxis(const btVector3& axis1,const btVector3& axis2)
+{
+	btVector3 zAxis = axis1.normalized();
+	btVector3 yAxis = axis2.normalized();
+	btVector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system
+	
+	btTransform frameInW;
+	frameInW.setIdentity();
+	frameInW.getBasis().setValue(	xAxis[0], yAxis[0], zAxis[0],	
+	                                xAxis[1], yAxis[1], zAxis[1],
+	                               xAxis[2], yAxis[2], zAxis[2]);
+	
+	// now get constraint frame in local coordinate systems
+	m_frameInA = m_rbA.getCenterOfMassTransform().inverse() * frameInW;
+	m_frameInB = m_rbB.getCenterOfMassTransform().inverse() * frameInW;
+	
+	calculateTransforms();
+}
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h
new file mode 100644
index 00000000..b4410811
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h
@@ -0,0 +1,614 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/// 2009 March: btGeneric6DofConstraint refactored by Roman Ponomarev
+/// Added support for generic constraint solver through getInfo1/getInfo2 methods
+
+/*
+2007-09-09
+btGeneric6DofConstraint Refactored by Francisco Le?n
+email: projectileman@yahoo.com
+http://gimpact.sf.net
+*/
+
+
+#ifndef BT_GENERIC_6DOF_CONSTRAINT_H
+#define BT_GENERIC_6DOF_CONSTRAINT_H
+
+#include "LinearMath/btVector3.h"
+#include "btJacobianEntry.h"
+#include "btTypedConstraint.h"
+
+class btRigidBody;
+
+
+
+
+//! Rotation Limit structure for generic joints
+class btRotationalLimitMotor
+{
+public:
+    //! limit_parameters
+    //!@{
+    btScalar m_loLimit;//!< joint limit
+    btScalar m_hiLimit;//!< joint limit
+    btScalar m_targetVelocity;//!< target motor velocity
+    btScalar m_maxMotorForce;//!< max force on motor
+    btScalar m_maxLimitForce;//!< max force on limit
+    btScalar m_damping;//!< Damping.
+    btScalar m_limitSoftness;//! Relaxation factor
+    btScalar m_normalCFM;//!< Constraint force mixing factor
+    btScalar m_stopERP;//!< Error tolerance factor when joint is at limit
+    btScalar m_stopCFM;//!< Constraint force mixing factor when joint is at limit
+    btScalar m_bounce;//!< restitution factor
+    bool m_enableMotor;
+
+    //!@}
+
+    //! temp_variables
+    //!@{
+    btScalar m_currentLimitError;//!  How much is violated this limit
+    btScalar m_currentPosition;     //!  current value of angle 
+    int m_currentLimit;//!< 0=free, 1=at lo limit, 2=at hi limit
+    btScalar m_accumulatedImpulse;
+    //!@}
+
+    btRotationalLimitMotor()
+    {
+    	m_accumulatedImpulse = 0.f;
+        m_targetVelocity = 0;
+        m_maxMotorForce = 0.1f;
+        m_maxLimitForce = 300.0f;
+        m_loLimit = 1.0f;
+        m_hiLimit = -1.0f;
+		m_normalCFM = 0.f;
+		m_stopERP = 0.2f;
+		m_stopCFM = 0.f;
+        m_bounce = 0.0f;
+        m_damping = 1.0f;
+        m_limitSoftness = 0.5f;
+        m_currentLimit = 0;
+        m_currentLimitError = 0;
+        m_enableMotor = false;
+    }
+
+    btRotationalLimitMotor(const btRotationalLimitMotor & limot)
+    {
+        m_targetVelocity = limot.m_targetVelocity;
+        m_maxMotorForce = limot.m_maxMotorForce;
+        m_limitSoftness = limot.m_limitSoftness;
+        m_loLimit = limot.m_loLimit;
+        m_hiLimit = limot.m_hiLimit;
+		m_normalCFM = limot.m_normalCFM;
+		m_stopERP = limot.m_stopERP;
+		m_stopCFM =	limot.m_stopCFM;
+        m_bounce = limot.m_bounce;
+        m_currentLimit = limot.m_currentLimit;
+        m_currentLimitError = limot.m_currentLimitError;
+        m_enableMotor = limot.m_enableMotor;
+    }
+
+
+
+	//! Is limited
+    bool isLimited()
+    {
+    	if(m_loLimit > m_hiLimit) return false;
+    	return true;
+    }
+
+	//! Need apply correction
+    bool needApplyTorques()
+    {
+    	if(m_currentLimit == 0 && m_enableMotor == false) return false;
+    	return true;
+    }
+
+	//! calculates  error
+	/*!
+	calculates m_currentLimit and m_currentLimitError.
+	*/
+	int testLimitValue(btScalar test_value);
+
+	//! apply the correction impulses for two bodies
+    btScalar solveAngularLimits(btScalar timeStep,btVector3& axis, btScalar jacDiagABInv,btRigidBody * body0, btRigidBody * body1);
+
+};
+
+
+
+class btTranslationalLimitMotor
+{
+public:
+	btVector3 m_lowerLimit;//!< the constraint lower limits
+    btVector3 m_upperLimit;//!< the constraint upper limits
+    btVector3 m_accumulatedImpulse;
+    //! Linear_Limit_parameters
+    //!@{
+    btScalar	m_limitSoftness;//!< Softness for linear limit
+    btScalar	m_damping;//!< Damping for linear limit
+    btScalar	m_restitution;//! Bounce parameter for linear limit
+	btVector3	m_normalCFM;//!< Constraint force mixing factor
+    btVector3	m_stopERP;//!< Error tolerance factor when joint is at limit
+	btVector3	m_stopCFM;//!< Constraint force mixing factor when joint is at limit
+    //!@}
+	bool		m_enableMotor[3];
+    btVector3	m_targetVelocity;//!< target motor velocity
+    btVector3	m_maxMotorForce;//!< max force on motor
+    btVector3	m_currentLimitError;//!  How much is violated this limit
+    btVector3	m_currentLinearDiff;//!  Current relative offset of constraint frames
+    int			m_currentLimit[3];//!< 0=free, 1=at lower limit, 2=at upper limit
+
+    btTranslationalLimitMotor()
+    {
+    	m_lowerLimit.setValue(0.f,0.f,0.f);
+    	m_upperLimit.setValue(0.f,0.f,0.f);
+    	m_accumulatedImpulse.setValue(0.f,0.f,0.f);
+		m_normalCFM.setValue(0.f, 0.f, 0.f);
+		m_stopERP.setValue(0.2f, 0.2f, 0.2f);
+		m_stopCFM.setValue(0.f, 0.f, 0.f);
+
+    	m_limitSoftness = 0.7f;
+    	m_damping = btScalar(1.0f);
+    	m_restitution = btScalar(0.5f);
+		for(int i=0; i < 3; i++) 
+		{
+			m_enableMotor[i] = false;
+			m_targetVelocity[i] = btScalar(0.f);
+			m_maxMotorForce[i] = btScalar(0.f);
+		}
+    }
+
+    btTranslationalLimitMotor(const btTranslationalLimitMotor & other )
+    {
+    	m_lowerLimit = other.m_lowerLimit;
+    	m_upperLimit = other.m_upperLimit;
+    	m_accumulatedImpulse = other.m_accumulatedImpulse;
+
+    	m_limitSoftness = other.m_limitSoftness ;
+    	m_damping = other.m_damping;
+    	m_restitution = other.m_restitution;
+		m_normalCFM = other.m_normalCFM;
+		m_stopERP = other.m_stopERP;
+		m_stopCFM = other.m_stopCFM;
+
+		for(int i=0; i < 3; i++) 
+		{
+			m_enableMotor[i] = other.m_enableMotor[i];
+			m_targetVelocity[i] = other.m_targetVelocity[i];
+			m_maxMotorForce[i] = other.m_maxMotorForce[i];
+		}
+    }
+
+    //! Test limit
+	/*!
+    - free means upper < lower,
+    - locked means upper == lower
+    - limited means upper > lower
+    - limitIndex: first 3 are linear, next 3 are angular
+    */
+    inline bool	isLimited(int limitIndex)
+    {
+       return (m_upperLimit[limitIndex] >= m_lowerLimit[limitIndex]);
+    }
+    inline bool needApplyForce(int limitIndex)
+    {
+    	if(m_currentLimit[limitIndex] == 0 && m_enableMotor[limitIndex] == false) return false;
+    	return true;
+    }
+	int testLimitValue(int limitIndex, btScalar test_value);
+
+
+    btScalar solveLinearAxis(
+    	btScalar timeStep,
+        btScalar jacDiagABInv,
+        btRigidBody& body1,const btVector3 &pointInA,
+        btRigidBody& body2,const btVector3 &pointInB,
+        int limit_index,
+        const btVector3 & axis_normal_on_a,
+		const btVector3 & anchorPos);
+
+
+};
+
+enum bt6DofFlags
+{
+	BT_6DOF_FLAGS_CFM_NORM = 1,
+	BT_6DOF_FLAGS_CFM_STOP = 2,
+	BT_6DOF_FLAGS_ERP_STOP = 4
+};
+#define BT_6DOF_FLAGS_AXIS_SHIFT 3 // bits per axis
+
+
+/// btGeneric6DofConstraint between two rigidbodies each with a pivotpoint that descibes the axis location in local space
+/*!
+btGeneric6DofConstraint can leave any of the 6 degree of freedom 'free' or 'locked'.
+currently this limit supports rotational motors<br>
+<ul>
+<li> For Linear limits, use btGeneric6DofConstraint.setLinearUpperLimit, btGeneric6DofConstraint.setLinearLowerLimit. You can set the parameters with the btTranslationalLimitMotor structure accsesible through the btGeneric6DofConstraint.getTranslationalLimitMotor method.
+At this moment translational motors are not supported. May be in the future. </li>
+
+<li> For Angular limits, use the btRotationalLimitMotor structure for configuring the limit.
+This is accessible through btGeneric6DofConstraint.getLimitMotor method,
+This brings support for limit parameters and motors. </li>
+
+<li> Angulars limits have these possible ranges:
+<table border=1 >
+<tr>
+	<td><b>AXIS</b></td>
+	<td><b>MIN ANGLE</b></td>
+	<td><b>MAX ANGLE</b></td>
+</tr><tr>
+	<td>X</td>
+	<td>-PI</td>
+	<td>PI</td>
+</tr><tr>
+	<td>Y</td>
+	<td>-PI/2</td>
+	<td>PI/2</td>
+</tr><tr>
+	<td>Z</td>
+	<td>-PI</td>
+	<td>PI</td>
+</tr>
+</table>
+</li>
+</ul>
+
+*/
+class btGeneric6DofConstraint : public btTypedConstraint
+{
+protected:
+
+	//! relative_frames
+    //!@{
+	btTransform	m_frameInA;//!< the constraint space w.r.t body A
+    btTransform	m_frameInB;//!< the constraint space w.r.t body B
+    //!@}
+
+    //! Jacobians
+    //!@{
+    btJacobianEntry	m_jacLinear[3];//!< 3 orthogonal linear constraints
+    btJacobianEntry	m_jacAng[3];//!< 3 orthogonal angular constraints
+    //!@}
+
+	//! Linear_Limit_parameters
+    //!@{
+    btTranslationalLimitMotor m_linearLimits;
+    //!@}
+
+
+    //! hinge_parameters
+    //!@{
+    btRotationalLimitMotor m_angularLimits[3];
+	//!@}
+
+
+protected:
+    //! temporal variables
+    //!@{
+    btScalar m_timeStep;
+    btTransform m_calculatedTransformA;
+    btTransform m_calculatedTransformB;
+    btVector3 m_calculatedAxisAngleDiff;
+    btVector3 m_calculatedAxis[3];
+    btVector3 m_calculatedLinearDiff;
+	btScalar	m_factA;
+	btScalar	m_factB;
+	bool		m_hasStaticBody;
+    
+	btVector3 m_AnchorPos; // point betwen pivots of bodies A and B to solve linear axes
+
+    bool	m_useLinearReferenceFrameA;
+	bool	m_useOffsetForConstraintFrame;
+    
+	int		m_flags;
+
+    //!@}
+
+    btGeneric6DofConstraint&	operator=(btGeneric6DofConstraint&	other)
+    {
+        btAssert(0);
+        (void) other;
+        return *this;
+    }
+
+
+	int setAngularLimits(btConstraintInfo2 *info, int row_offset,const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB);
+
+	int setLinearLimits(btConstraintInfo2 *info, int row, const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB);
+
+    void buildLinearJacobian(
+        btJacobianEntry & jacLinear,const btVector3 & normalWorld,
+        const btVector3 & pivotAInW,const btVector3 & pivotBInW);
+
+    void buildAngularJacobian(btJacobianEntry & jacAngular,const btVector3 & jointAxisW);
+
+	// tests linear limits
+	void calculateLinearInfo();
+
+	//! calcs the euler angles between the two bodies.
+    void calculateAngleInfo();
+
+
+
+public:
+
+	///for backwards compatibility during the transition to 'getInfo/getInfo2'
+	bool		m_useSolveConstraintObsolete;
+
+    btGeneric6DofConstraint(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB ,bool useLinearReferenceFrameA);
+    btGeneric6DofConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameB);
+    
+	//! Calcs global transform of the offsets
+	/*!
+	Calcs the global transform for the joint offset for body A an B, and also calcs the agle differences between the bodies.
+	\sa btGeneric6DofConstraint.getCalculatedTransformA , btGeneric6DofConstraint.getCalculatedTransformB, btGeneric6DofConstraint.calculateAngleInfo
+	*/
+    void calculateTransforms(const btTransform& transA,const btTransform& transB);
+
+	void calculateTransforms();
+
+	//! Gets the global transform of the offset for body A
+    /*!
+    \sa btGeneric6DofConstraint.getFrameOffsetA, btGeneric6DofConstraint.getFrameOffsetB, btGeneric6DofConstraint.calculateAngleInfo.
+    */
+    const btTransform & getCalculatedTransformA() const
+    {
+    	return m_calculatedTransformA;
+    }
+
+    //! Gets the global transform of the offset for body B
+    /*!
+    \sa btGeneric6DofConstraint.getFrameOffsetA, btGeneric6DofConstraint.getFrameOffsetB, btGeneric6DofConstraint.calculateAngleInfo.
+    */
+    const btTransform & getCalculatedTransformB() const
+    {
+    	return m_calculatedTransformB;
+    }
+
+    const btTransform & getFrameOffsetA() const
+    {
+    	return m_frameInA;
+    }
+
+    const btTransform & getFrameOffsetB() const
+    {
+    	return m_frameInB;
+    }
+
+
+    btTransform & getFrameOffsetA()
+    {
+    	return m_frameInA;
+    }
+
+    btTransform & getFrameOffsetB()
+    {
+    	return m_frameInB;
+    }
+
+
+	//! performs Jacobian calculation, and also calculates angle differences and axis
+    virtual void	buildJacobian();
+
+	virtual void getInfo1 (btConstraintInfo1* info);
+
+	void getInfo1NonVirtual (btConstraintInfo1* info);
+
+	virtual void getInfo2 (btConstraintInfo2* info);
+
+	void getInfo2NonVirtual (btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB);
+
+
+    void	updateRHS(btScalar	timeStep);
+
+	//! Get the rotation axis in global coordinates
+	/*!
+	\pre btGeneric6DofConstraint.buildJacobian must be called previously.
+	*/
+    btVector3 getAxis(int axis_index) const;
+
+    //! Get the relative Euler angle
+    /*!
+	\pre btGeneric6DofConstraint::calculateTransforms() must be called previously.
+	*/
+    btScalar getAngle(int axis_index) const;
+
+	//! Get the relative position of the constraint pivot
+    /*!
+	\pre btGeneric6DofConstraint::calculateTransforms() must be called previously.
+	*/
+	btScalar getRelativePivotPosition(int axis_index) const;
+
+	void setFrames(const btTransform & frameA, const btTransform & frameB);
+
+	//! Test angular limit.
+	/*!
+	Calculates angular correction and returns true if limit needs to be corrected.
+	\pre btGeneric6DofConstraint::calculateTransforms() must be called previously.
+	*/
+    bool testAngularLimitMotor(int axis_index);
+
+    void	setLinearLowerLimit(const btVector3& linearLower)
+    {
+    	m_linearLimits.m_lowerLimit = linearLower;
+    }
+
+	void	getLinearLowerLimit(btVector3& linearLower)
+	{
+		linearLower = m_linearLimits.m_lowerLimit;
+	}
+
+	void	setLinearUpperLimit(const btVector3& linearUpper)
+	{
+		m_linearLimits.m_upperLimit = linearUpper;
+	}
+
+	void	getLinearUpperLimit(btVector3& linearUpper)
+	{
+		linearUpper = m_linearLimits.m_upperLimit;
+	}
+
+    void	setAngularLowerLimit(const btVector3& angularLower)
+    {
+		for(int i = 0; i < 3; i++) 
+			m_angularLimits[i].m_loLimit = btNormalizeAngle(angularLower[i]);
+    }
+
+	void	getAngularLowerLimit(btVector3& angularLower)
+	{
+		for(int i = 0; i < 3; i++) 
+			angularLower[i] = m_angularLimits[i].m_loLimit;
+	}
+
+    void	setAngularUpperLimit(const btVector3& angularUpper)
+    {
+		for(int i = 0; i < 3; i++)
+			m_angularLimits[i].m_hiLimit = btNormalizeAngle(angularUpper[i]);
+    }
+
+	void	getAngularUpperLimit(btVector3& angularUpper)
+	{
+		for(int i = 0; i < 3; i++)
+			angularUpper[i] = m_angularLimits[i].m_hiLimit;
+	}
+
+	//! Retrieves the angular limit informacion
+    btRotationalLimitMotor * getRotationalLimitMotor(int index)
+    {
+    	return &m_angularLimits[index];
+    }
+
+    //! Retrieves the  limit informacion
+    btTranslationalLimitMotor * getTranslationalLimitMotor()
+    {
+    	return &m_linearLimits;
+    }
+
+    //first 3 are linear, next 3 are angular
+    void setLimit(int axis, btScalar lo, btScalar hi)
+    {
+    	if(axis<3)
+    	{
+    		m_linearLimits.m_lowerLimit[axis] = lo;
+    		m_linearLimits.m_upperLimit[axis] = hi;
+    	}
+    	else
+    	{
+			lo = btNormalizeAngle(lo);
+			hi = btNormalizeAngle(hi);
+    		m_angularLimits[axis-3].m_loLimit = lo;
+    		m_angularLimits[axis-3].m_hiLimit = hi;
+    	}
+    }
+
+	//! Test limit
+	/*!
+    - free means upper < lower,
+    - locked means upper == lower
+    - limited means upper > lower
+    - limitIndex: first 3 are linear, next 3 are angular
+    */
+    bool	isLimited(int limitIndex)
+    {
+    	if(limitIndex<3)
+    	{
+			return m_linearLimits.isLimited(limitIndex);
+
+    	}
+        return m_angularLimits[limitIndex-3].isLimited();
+    }
+
+	virtual void calcAnchorPos(void); // overridable
+
+	int get_limit_motor_info2(	btRotationalLimitMotor * limot,
+								const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB,
+								btConstraintInfo2 *info, int row, btVector3& ax1, int rotational, int rotAllowed = false);
+
+	// access for UseFrameOffset
+	bool getUseFrameOffset() { return m_useOffsetForConstraintFrame; }
+	void setUseFrameOffset(bool frameOffsetOnOff) { m_useOffsetForConstraintFrame = frameOffsetOnOff; }
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void setParam(int num, btScalar value, int axis = -1);
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const;
+
+	void setAxis( const btVector3& axis1, const btVector3& axis2);
+
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+	
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btGeneric6DofConstraintData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btTransformFloatData m_rbAFrame; // constraint axii. Assumes z is hinge axis.
+	btTransformFloatData m_rbBFrame;
+	
+	btVector3FloatData	m_linearUpperLimit;
+	btVector3FloatData	m_linearLowerLimit;
+
+	btVector3FloatData	m_angularUpperLimit;
+	btVector3FloatData	m_angularLowerLimit;
+	
+	int	m_useLinearReferenceFrameA;
+	int m_useOffsetForConstraintFrame;
+};
+
+SIMD_FORCE_INLINE	int	btGeneric6DofConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btGeneric6DofConstraintData);
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btGeneric6DofConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+
+	btGeneric6DofConstraintData* dof = (btGeneric6DofConstraintData*)dataBuffer;
+	btTypedConstraint::serialize(&dof->m_typeConstraintData,serializer);
+
+	m_frameInA.serializeFloat(dof->m_rbAFrame);
+	m_frameInB.serializeFloat(dof->m_rbBFrame);
+
+		
+	int i;
+	for (i=0;i<3;i++)
+	{
+		dof->m_angularLowerLimit.m_floats[i] =  float(m_angularLimits[i].m_loLimit);
+		dof->m_angularUpperLimit.m_floats[i] =  float(m_angularLimits[i].m_hiLimit);
+		dof->m_linearLowerLimit.m_floats[i] = float(m_linearLimits.m_lowerLimit[i]);
+		dof->m_linearUpperLimit.m_floats[i] = float(m_linearLimits.m_upperLimit[i]);
+	}
+	
+	dof->m_useLinearReferenceFrameA = m_useLinearReferenceFrameA? 1 : 0;
+	dof->m_useOffsetForConstraintFrame = m_useOffsetForConstraintFrame ? 1 : 0;
+
+	return "btGeneric6DofConstraintData";
+}
+
+
+
+
+
+#endif //BT_GENERIC_6DOF_CONSTRAINT_H
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.cpp
new file mode 100644
index 00000000..2b387149
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.cpp
@@ -0,0 +1,185 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btGeneric6DofSpringConstraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btTransformUtil.h"
+
+
+btGeneric6DofSpringConstraint::btGeneric6DofSpringConstraint(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB ,bool useLinearReferenceFrameA)
+	: btGeneric6DofConstraint(rbA, rbB, frameInA, frameInB, useLinearReferenceFrameA)
+{
+    init();
+}
+
+
+btGeneric6DofSpringConstraint::btGeneric6DofSpringConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameB)
+        : btGeneric6DofConstraint(rbB, frameInB, useLinearReferenceFrameB)
+{
+    init();
+}
+
+
+void btGeneric6DofSpringConstraint::init()
+{
+	m_objectType = D6_SPRING_CONSTRAINT_TYPE;
+
+	for(int i = 0; i < 6; i++)
+	{
+		m_springEnabled[i] = false;
+		m_equilibriumPoint[i] = btScalar(0.f);
+		m_springStiffness[i] = btScalar(0.f);
+		m_springDamping[i] = btScalar(1.f);
+	}
+}
+
+
+void btGeneric6DofSpringConstraint::enableSpring(int index, bool onOff)
+{
+	btAssert((index >= 0) && (index < 6));
+	m_springEnabled[index] = onOff;
+	if(index < 3)
+	{
+		m_linearLimits.m_enableMotor[index] = onOff;
+	}
+	else
+	{
+		m_angularLimits[index - 3].m_enableMotor = onOff;
+	}
+}
+
+
+
+void btGeneric6DofSpringConstraint::setStiffness(int index, btScalar stiffness)
+{
+	btAssert((index >= 0) && (index < 6));
+	m_springStiffness[index] = stiffness;
+}
+
+
+void btGeneric6DofSpringConstraint::setDamping(int index, btScalar damping)
+{
+	btAssert((index >= 0) && (index < 6));
+	m_springDamping[index] = damping;
+}
+
+
+void btGeneric6DofSpringConstraint::setEquilibriumPoint()
+{
+	calculateTransforms();
+	int i;
+
+	for( i = 0; i < 3; i++)
+	{
+		m_equilibriumPoint[i] = m_calculatedLinearDiff[i];
+	}
+	for(i = 0; i < 3; i++)
+	{
+		m_equilibriumPoint[i + 3] = m_calculatedAxisAngleDiff[i];
+	}
+}
+
+
+
+void btGeneric6DofSpringConstraint::setEquilibriumPoint(int index)
+{
+	btAssert((index >= 0) && (index < 6));
+	calculateTransforms();
+	if(index < 3)
+	{
+		m_equilibriumPoint[index] = m_calculatedLinearDiff[index];
+	}
+	else
+	{
+		m_equilibriumPoint[index] = m_calculatedAxisAngleDiff[index - 3];
+	}
+}
+
+void btGeneric6DofSpringConstraint::setEquilibriumPoint(int index, btScalar val)
+{
+	btAssert((index >= 0) && (index < 6));
+	m_equilibriumPoint[index] = val;
+}
+
+
+void btGeneric6DofSpringConstraint::internalUpdateSprings(btConstraintInfo2* info)
+{
+	// it is assumed that calculateTransforms() have been called before this call
+	int i;
+	btVector3 relVel = m_rbB.getLinearVelocity() - m_rbA.getLinearVelocity();
+	for(i = 0; i < 3; i++)
+	{
+		if(m_springEnabled[i])
+		{
+			// get current position of constraint
+			btScalar currPos = m_calculatedLinearDiff[i];
+			// calculate difference
+			btScalar delta = currPos - m_equilibriumPoint[i];
+			// spring force is (delta * m_stiffness) according to Hooke's Law
+			btScalar force = delta * m_springStiffness[i];
+			btScalar velFactor = info->fps * m_springDamping[i] / btScalar(info->m_numIterations);
+			m_linearLimits.m_targetVelocity[i] =  velFactor * force;
+			m_linearLimits.m_maxMotorForce[i] =  btFabs(force) / info->fps;
+		}
+	}
+	for(i = 0; i < 3; i++)
+	{
+		if(m_springEnabled[i + 3])
+		{
+			// get current position of constraint
+			btScalar currPos = m_calculatedAxisAngleDiff[i];
+			// calculate difference
+			btScalar delta = currPos - m_equilibriumPoint[i+3];
+			// spring force is (-delta * m_stiffness) according to Hooke's Law
+			btScalar force = -delta * m_springStiffness[i+3];
+			btScalar velFactor = info->fps * m_springDamping[i+3] / btScalar(info->m_numIterations);
+			m_angularLimits[i].m_targetVelocity = velFactor * force;
+			m_angularLimits[i].m_maxMotorForce = btFabs(force) / info->fps;
+		}
+	}
+}
+
+
+void btGeneric6DofSpringConstraint::getInfo2(btConstraintInfo2* info)
+{
+	// this will be called by constraint solver at the constraint setup stage
+	// set current motor parameters
+	internalUpdateSprings(info);
+	// do the rest of job for constraint setup
+	btGeneric6DofConstraint::getInfo2(info);
+}
+
+
+void btGeneric6DofSpringConstraint::setAxis(const btVector3& axis1,const btVector3& axis2)
+{
+	btVector3 zAxis = axis1.normalized();
+	btVector3 yAxis = axis2.normalized();
+	btVector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system
+
+	btTransform frameInW;
+	frameInW.setIdentity();
+	frameInW.getBasis().setValue(	xAxis[0], yAxis[0], zAxis[0],	
+                                xAxis[1], yAxis[1], zAxis[1],
+                                xAxis[2], yAxis[2], zAxis[2]);
+
+	// now get constraint frame in local coordinate systems
+	m_frameInA = m_rbA.getCenterOfMassTransform().inverse() * frameInW;
+	m_frameInB = m_rbB.getCenterOfMassTransform().inverse() * frameInW;
+
+  calculateTransforms();
+}
+
+
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h
new file mode 100644
index 00000000..31e0cd53
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h
@@ -0,0 +1,99 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_GENERIC_6DOF_SPRING_CONSTRAINT_H
+#define BT_GENERIC_6DOF_SPRING_CONSTRAINT_H
+
+
+#include "LinearMath/btVector3.h"
+#include "btTypedConstraint.h"
+#include "btGeneric6DofConstraint.h"
+
+
+/// Generic 6 DOF constraint that allows to set spring motors to any translational and rotational DOF
+
+/// DOF index used in enableSpring() and setStiffness() means:
+/// 0 : translation X
+/// 1 : translation Y
+/// 2 : translation Z
+/// 3 : rotation X (3rd Euler rotational around new position of X axis, range [-PI+epsilon, PI-epsilon] )
+/// 4 : rotation Y (2nd Euler rotational around new position of Y axis, range [-PI/2+epsilon, PI/2-epsilon] )
+/// 5 : rotation Z (1st Euler rotational around Z axis, range [-PI+epsilon, PI-epsilon] )
+
+class btGeneric6DofSpringConstraint : public btGeneric6DofConstraint
+{
+protected:
+	bool		m_springEnabled[6];
+	btScalar	m_equilibriumPoint[6];
+	btScalar	m_springStiffness[6];
+	btScalar	m_springDamping[6]; // between 0 and 1 (1 == no damping)
+	void init();
+	void internalUpdateSprings(btConstraintInfo2* info);
+public: 
+    btGeneric6DofSpringConstraint(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB ,bool useLinearReferenceFrameA);
+    btGeneric6DofSpringConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameB);
+	void enableSpring(int index, bool onOff);
+	void setStiffness(int index, btScalar stiffness);
+	void setDamping(int index, btScalar damping);
+	void setEquilibriumPoint(); // set the current constraint position/orientation as an equilibrium point for all DOF
+	void setEquilibriumPoint(int index);  // set the current constraint position/orientation as an equilibrium point for given DOF
+	void setEquilibriumPoint(int index, btScalar val);
+
+	virtual void setAxis( const btVector3& axis1, const btVector3& axis2);
+
+	virtual void getInfo2 (btConstraintInfo2* info);
+
+	virtual	int	calculateSerializeBufferSize() const;
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+};
+
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btGeneric6DofSpringConstraintData
+{
+	btGeneric6DofConstraintData	m_6dofData;
+	
+	int			m_springEnabled[6];
+	float		m_equilibriumPoint[6];
+	float		m_springStiffness[6];
+	float		m_springDamping[6];
+};
+
+SIMD_FORCE_INLINE	int	btGeneric6DofSpringConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btGeneric6DofSpringConstraintData);
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btGeneric6DofSpringConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btGeneric6DofSpringConstraintData* dof = (btGeneric6DofSpringConstraintData*)dataBuffer;
+	btGeneric6DofConstraint::serialize(&dof->m_6dofData,serializer);
+
+	int i;
+	for (i=0;i<6;i++)
+	{
+		dof->m_equilibriumPoint[i] = m_equilibriumPoint[i];
+		dof->m_springDamping[i] = m_springDamping[i];
+		dof->m_springEnabled[i] = m_springEnabled[i]? 1 : 0;
+		dof->m_springStiffness[i] = m_springStiffness[i];
+	}
+	return "btGeneric6DofConstraintData";
+}
+
+#endif // BT_GENERIC_6DOF_SPRING_CONSTRAINT_H
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btHinge2Constraint.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btHinge2Constraint.cpp
new file mode 100644
index 00000000..29123d52
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btHinge2Constraint.cpp
@@ -0,0 +1,66 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#include "btHinge2Constraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btTransformUtil.h"
+
+
+
+// constructor
+// anchor, axis1 and axis2 are in world coordinate system
+// axis1 must be orthogonal to axis2
+btHinge2Constraint::btHinge2Constraint(btRigidBody& rbA, btRigidBody& rbB, btVector3& anchor, btVector3& axis1, btVector3& axis2)
+: btGeneric6DofSpringConstraint(rbA, rbB, btTransform::getIdentity(), btTransform::getIdentity(), true),
+ m_anchor(anchor),
+ m_axis1(axis1),
+ m_axis2(axis2)
+{
+	// build frame basis
+	// 6DOF constraint uses Euler angles and to define limits
+	// it is assumed that rotational order is :
+	// Z - first, allowed limits are (-PI,PI);
+	// new position of Y - second (allowed limits are (-PI/2 + epsilon, PI/2 - epsilon), where epsilon is a small positive number 
+	// used to prevent constraint from instability on poles;
+	// new position of X, allowed limits are (-PI,PI);
+	// So to simulate ODE Universal joint we should use parent axis as Z, child axis as Y and limit all other DOFs
+	// Build the frame in world coordinate system first
+	btVector3 zAxis = axis1.normalize();
+	btVector3 xAxis = axis2.normalize();
+	btVector3 yAxis = zAxis.cross(xAxis); // we want right coordinate system
+	btTransform frameInW;
+	frameInW.setIdentity();
+	frameInW.getBasis().setValue(	xAxis[0], yAxis[0], zAxis[0],	
+									xAxis[1], yAxis[1], zAxis[1],
+									xAxis[2], yAxis[2], zAxis[2]);
+	frameInW.setOrigin(anchor);
+	// now get constraint frame in local coordinate systems
+	m_frameInA = rbA.getCenterOfMassTransform().inverse() * frameInW;
+	m_frameInB = rbB.getCenterOfMassTransform().inverse() * frameInW;
+	// sei limits
+	setLinearLowerLimit(btVector3(0.f, 0.f, -1.f));
+	setLinearUpperLimit(btVector3(0.f, 0.f,  1.f));
+	// like front wheels of a car
+	setAngularLowerLimit(btVector3(1.f,  0.f, -SIMD_HALF_PI * 0.5f)); 
+	setAngularUpperLimit(btVector3(-1.f, 0.f,  SIMD_HALF_PI * 0.5f));
+	// enable suspension
+	enableSpring(2, true);
+	setStiffness(2, SIMD_PI * SIMD_PI * 4.f); // period 1 sec for 1 kilogramm weel :-)
+	setDamping(2, 0.01f);
+	setEquilibriumPoint();
+}
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btHinge2Constraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btHinge2Constraint.h
new file mode 100644
index 00000000..a76452dd
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btHinge2Constraint.h
@@ -0,0 +1,58 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_HINGE2_CONSTRAINT_H
+#define BT_HINGE2_CONSTRAINT_H
+
+
+
+#include "LinearMath/btVector3.h"
+#include "btTypedConstraint.h"
+#include "btGeneric6DofSpringConstraint.h"
+
+
+
+// Constraint similar to ODE Hinge2 Joint
+// has 3 degrees of frredom:
+// 2 rotational degrees of freedom, similar to Euler rotations around Z (axis 1) and X (axis 2)
+// 1 translational (along axis Z) with suspension spring
+
+class btHinge2Constraint : public btGeneric6DofSpringConstraint
+{
+protected:
+	btVector3	m_anchor;
+	btVector3	m_axis1;
+	btVector3	m_axis2;
+public:
+	// constructor
+	// anchor, axis1 and axis2 are in world coordinate system
+	// axis1 must be orthogonal to axis2
+    btHinge2Constraint(btRigidBody& rbA, btRigidBody& rbB, btVector3& anchor, btVector3& axis1, btVector3& axis2);
+	// access
+	const btVector3& getAnchor() { return m_calculatedTransformA.getOrigin(); }
+	const btVector3& getAnchor2() { return m_calculatedTransformB.getOrigin(); }
+	const btVector3& getAxis1() { return m_axis1; }
+	const btVector3& getAxis2() { return m_axis2; }
+	btScalar getAngle1() { return getAngle(2); }
+	btScalar getAngle2() { return getAngle(0); }
+	// limits
+	void setUpperLimit(btScalar ang1max) { setAngularUpperLimit(btVector3(-1.f, 0.f, ang1max)); }
+	void setLowerLimit(btScalar ang1min) { setAngularLowerLimit(btVector3( 1.f, 0.f, ang1min)); }
+};
+
+
+
+#endif // BT_HINGE2_CONSTRAINT_H
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btHingeConstraint.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btHingeConstraint.cpp
new file mode 100644
index 00000000..9e3a2bae
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btHingeConstraint.cpp
@@ -0,0 +1,1034 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btHingeConstraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btTransformUtil.h"
+#include "LinearMath/btMinMax.h"
+#include <new>
+#include "btSolverBody.h"
+
+
+
+//#define HINGE_USE_OBSOLETE_SOLVER false
+#define HINGE_USE_OBSOLETE_SOLVER false
+
+#define HINGE_USE_FRAME_OFFSET true
+
+#ifndef __SPU__
+
+
+
+
+
+btHingeConstraint::btHingeConstraint(btRigidBody& rbA,btRigidBody& rbB, const btVector3& pivotInA,const btVector3& pivotInB,
+									 const btVector3& axisInA,const btVector3& axisInB, bool useReferenceFrameA)
+									 :btTypedConstraint(HINGE_CONSTRAINT_TYPE, rbA,rbB),
+#ifdef _BT_USE_CENTER_LIMIT_
+									 m_limit(),
+#endif
+									 m_angularOnly(false),
+									 m_enableAngularMotor(false),
+									 m_useSolveConstraintObsolete(HINGE_USE_OBSOLETE_SOLVER),
+									 m_useOffsetForConstraintFrame(HINGE_USE_FRAME_OFFSET),
+									 m_useReferenceFrameA(useReferenceFrameA),
+									 m_flags(0)
+{
+	m_rbAFrame.getOrigin() = pivotInA;
+	
+	// since no frame is given, assume this to be zero angle and just pick rb transform axis
+	btVector3 rbAxisA1 = rbA.getCenterOfMassTransform().getBasis().getColumn(0);
+
+	btVector3 rbAxisA2;
+	btScalar projection = axisInA.dot(rbAxisA1);
+	if (projection >= 1.0f - SIMD_EPSILON) {
+		rbAxisA1 = -rbA.getCenterOfMassTransform().getBasis().getColumn(2);
+		rbAxisA2 = rbA.getCenterOfMassTransform().getBasis().getColumn(1);
+	} else if (projection <= -1.0f + SIMD_EPSILON) {
+		rbAxisA1 = rbA.getCenterOfMassTransform().getBasis().getColumn(2);
+		rbAxisA2 = rbA.getCenterOfMassTransform().getBasis().getColumn(1);      
+	} else {
+		rbAxisA2 = axisInA.cross(rbAxisA1);
+		rbAxisA1 = rbAxisA2.cross(axisInA);
+	}
+
+	m_rbAFrame.getBasis().setValue( rbAxisA1.getX(),rbAxisA2.getX(),axisInA.getX(),
+									rbAxisA1.getY(),rbAxisA2.getY(),axisInA.getY(),
+									rbAxisA1.getZ(),rbAxisA2.getZ(),axisInA.getZ() );
+
+	btQuaternion rotationArc = shortestArcQuat(axisInA,axisInB);
+	btVector3 rbAxisB1 =  quatRotate(rotationArc,rbAxisA1);
+	btVector3 rbAxisB2 =  axisInB.cross(rbAxisB1);	
+	
+	m_rbBFrame.getOrigin() = pivotInB;
+	m_rbBFrame.getBasis().setValue( rbAxisB1.getX(),rbAxisB2.getX(),axisInB.getX(),
+									rbAxisB1.getY(),rbAxisB2.getY(),axisInB.getY(),
+									rbAxisB1.getZ(),rbAxisB2.getZ(),axisInB.getZ() );
+	
+#ifndef	_BT_USE_CENTER_LIMIT_
+	//start with free
+	m_lowerLimit = btScalar(1.0f);
+	m_upperLimit = btScalar(-1.0f);
+	m_biasFactor = 0.3f;
+	m_relaxationFactor = 1.0f;
+	m_limitSoftness = 0.9f;
+	m_solveLimit = false;
+#endif
+	m_referenceSign = m_useReferenceFrameA ? btScalar(-1.f) : btScalar(1.f);
+}
+
+
+
+btHingeConstraint::btHingeConstraint(btRigidBody& rbA,const btVector3& pivotInA,const btVector3& axisInA, bool useReferenceFrameA)
+:btTypedConstraint(HINGE_CONSTRAINT_TYPE, rbA),
+#ifdef _BT_USE_CENTER_LIMIT_
+m_limit(),
+#endif
+m_angularOnly(false), m_enableAngularMotor(false), 
+m_useSolveConstraintObsolete(HINGE_USE_OBSOLETE_SOLVER),
+m_useOffsetForConstraintFrame(HINGE_USE_FRAME_OFFSET),
+m_useReferenceFrameA(useReferenceFrameA),
+m_flags(0)
+{
+
+	// since no frame is given, assume this to be zero angle and just pick rb transform axis
+	// fixed axis in worldspace
+	btVector3 rbAxisA1, rbAxisA2;
+	btPlaneSpace1(axisInA, rbAxisA1, rbAxisA2);
+
+	m_rbAFrame.getOrigin() = pivotInA;
+	m_rbAFrame.getBasis().setValue( rbAxisA1.getX(),rbAxisA2.getX(),axisInA.getX(),
+									rbAxisA1.getY(),rbAxisA2.getY(),axisInA.getY(),
+									rbAxisA1.getZ(),rbAxisA2.getZ(),axisInA.getZ() );
+
+	btVector3 axisInB = rbA.getCenterOfMassTransform().getBasis() * axisInA;
+
+	btQuaternion rotationArc = shortestArcQuat(axisInA,axisInB);
+	btVector3 rbAxisB1 =  quatRotate(rotationArc,rbAxisA1);
+	btVector3 rbAxisB2 = axisInB.cross(rbAxisB1);
+
+
+	m_rbBFrame.getOrigin() = rbA.getCenterOfMassTransform()(pivotInA);
+	m_rbBFrame.getBasis().setValue( rbAxisB1.getX(),rbAxisB2.getX(),axisInB.getX(),
+									rbAxisB1.getY(),rbAxisB2.getY(),axisInB.getY(),
+									rbAxisB1.getZ(),rbAxisB2.getZ(),axisInB.getZ() );
+	
+#ifndef	_BT_USE_CENTER_LIMIT_
+	//start with free
+	m_lowerLimit = btScalar(1.0f);
+	m_upperLimit = btScalar(-1.0f);
+	m_biasFactor = 0.3f;
+	m_relaxationFactor = 1.0f;
+	m_limitSoftness = 0.9f;
+	m_solveLimit = false;
+#endif
+	m_referenceSign = m_useReferenceFrameA ? btScalar(-1.f) : btScalar(1.f);
+}
+
+
+
+btHingeConstraint::btHingeConstraint(btRigidBody& rbA,btRigidBody& rbB, 
+								     const btTransform& rbAFrame, const btTransform& rbBFrame, bool useReferenceFrameA)
+:btTypedConstraint(HINGE_CONSTRAINT_TYPE, rbA,rbB),m_rbAFrame(rbAFrame),m_rbBFrame(rbBFrame),
+#ifdef _BT_USE_CENTER_LIMIT_
+m_limit(),
+#endif
+m_angularOnly(false),
+m_enableAngularMotor(false),
+m_useSolveConstraintObsolete(HINGE_USE_OBSOLETE_SOLVER),
+m_useOffsetForConstraintFrame(HINGE_USE_FRAME_OFFSET),
+m_useReferenceFrameA(useReferenceFrameA),
+m_flags(0)
+{
+#ifndef	_BT_USE_CENTER_LIMIT_
+	//start with free
+	m_lowerLimit = btScalar(1.0f);
+	m_upperLimit = btScalar(-1.0f);
+	m_biasFactor = 0.3f;
+	m_relaxationFactor = 1.0f;
+	m_limitSoftness = 0.9f;
+	m_solveLimit = false;
+#endif
+	m_referenceSign = m_useReferenceFrameA ? btScalar(-1.f) : btScalar(1.f);
+}			
+
+
+
+btHingeConstraint::btHingeConstraint(btRigidBody& rbA, const btTransform& rbAFrame, bool useReferenceFrameA)
+:btTypedConstraint(HINGE_CONSTRAINT_TYPE, rbA),m_rbAFrame(rbAFrame),m_rbBFrame(rbAFrame),
+#ifdef _BT_USE_CENTER_LIMIT_
+m_limit(),
+#endif
+m_angularOnly(false),
+m_enableAngularMotor(false),
+m_useSolveConstraintObsolete(HINGE_USE_OBSOLETE_SOLVER),
+m_useOffsetForConstraintFrame(HINGE_USE_FRAME_OFFSET),
+m_useReferenceFrameA(useReferenceFrameA),
+m_flags(0)
+{
+	///not providing rigidbody B means implicitly using worldspace for body B
+
+	m_rbBFrame.getOrigin() = m_rbA.getCenterOfMassTransform()(m_rbAFrame.getOrigin());
+#ifndef	_BT_USE_CENTER_LIMIT_
+	//start with free
+	m_lowerLimit = btScalar(1.0f);
+	m_upperLimit = btScalar(-1.0f);
+	m_biasFactor = 0.3f;
+	m_relaxationFactor = 1.0f;
+	m_limitSoftness = 0.9f;
+	m_solveLimit = false;
+#endif
+	m_referenceSign = m_useReferenceFrameA ? btScalar(-1.f) : btScalar(1.f);
+}
+
+
+
+void	btHingeConstraint::buildJacobian()
+{
+	if (m_useSolveConstraintObsolete)
+	{
+		m_appliedImpulse = btScalar(0.);
+		m_accMotorImpulse = btScalar(0.);
+
+		if (!m_angularOnly)
+		{
+			btVector3 pivotAInW = m_rbA.getCenterOfMassTransform()*m_rbAFrame.getOrigin();
+			btVector3 pivotBInW = m_rbB.getCenterOfMassTransform()*m_rbBFrame.getOrigin();
+			btVector3 relPos = pivotBInW - pivotAInW;
+
+			btVector3 normal[3];
+			if (relPos.length2() > SIMD_EPSILON)
+			{
+				normal[0] = relPos.normalized();
+			}
+			else
+			{
+				normal[0].setValue(btScalar(1.0),0,0);
+			}
+
+			btPlaneSpace1(normal[0], normal[1], normal[2]);
+
+			for (int i=0;i<3;i++)
+			{
+				new (&m_jac[i]) btJacobianEntry(
+				m_rbA.getCenterOfMassTransform().getBasis().transpose(),
+				m_rbB.getCenterOfMassTransform().getBasis().transpose(),
+				pivotAInW - m_rbA.getCenterOfMassPosition(),
+				pivotBInW - m_rbB.getCenterOfMassPosition(),
+				normal[i],
+				m_rbA.getInvInertiaDiagLocal(),
+				m_rbA.getInvMass(),
+				m_rbB.getInvInertiaDiagLocal(),
+				m_rbB.getInvMass());
+			}
+		}
+
+		//calculate two perpendicular jointAxis, orthogonal to hingeAxis
+		//these two jointAxis require equal angular velocities for both bodies
+
+		//this is unused for now, it's a todo
+		btVector3 jointAxis0local;
+		btVector3 jointAxis1local;
+		
+		btPlaneSpace1(m_rbAFrame.getBasis().getColumn(2),jointAxis0local,jointAxis1local);
+
+		btVector3 jointAxis0 = getRigidBodyA().getCenterOfMassTransform().getBasis() * jointAxis0local;
+		btVector3 jointAxis1 = getRigidBodyA().getCenterOfMassTransform().getBasis() * jointAxis1local;
+		btVector3 hingeAxisWorld = getRigidBodyA().getCenterOfMassTransform().getBasis() * m_rbAFrame.getBasis().getColumn(2);
+			
+		new (&m_jacAng[0])	btJacobianEntry(jointAxis0,
+			m_rbA.getCenterOfMassTransform().getBasis().transpose(),
+			m_rbB.getCenterOfMassTransform().getBasis().transpose(),
+			m_rbA.getInvInertiaDiagLocal(),
+			m_rbB.getInvInertiaDiagLocal());
+
+		new (&m_jacAng[1])	btJacobianEntry(jointAxis1,
+			m_rbA.getCenterOfMassTransform().getBasis().transpose(),
+			m_rbB.getCenterOfMassTransform().getBasis().transpose(),
+			m_rbA.getInvInertiaDiagLocal(),
+			m_rbB.getInvInertiaDiagLocal());
+
+		new (&m_jacAng[2])	btJacobianEntry(hingeAxisWorld,
+			m_rbA.getCenterOfMassTransform().getBasis().transpose(),
+			m_rbB.getCenterOfMassTransform().getBasis().transpose(),
+			m_rbA.getInvInertiaDiagLocal(),
+			m_rbB.getInvInertiaDiagLocal());
+
+			// clear accumulator
+			m_accLimitImpulse = btScalar(0.);
+
+			// test angular limit
+			testLimit(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+
+		//Compute K = J*W*J' for hinge axis
+		btVector3 axisA =  getRigidBodyA().getCenterOfMassTransform().getBasis() *  m_rbAFrame.getBasis().getColumn(2);
+		m_kHinge =   1.0f / (getRigidBodyA().computeAngularImpulseDenominator(axisA) +
+							 getRigidBodyB().computeAngularImpulseDenominator(axisA));
+
+	}
+}
+
+
+#endif //__SPU__
+
+
+void btHingeConstraint::getInfo1(btConstraintInfo1* info)
+{
+	if (m_useSolveConstraintObsolete)
+	{
+		info->m_numConstraintRows = 0;
+		info->nub = 0;
+	}
+	else
+	{
+		info->m_numConstraintRows = 5; // Fixed 3 linear + 2 angular
+		info->nub = 1; 
+		//always add the row, to avoid computation (data is not available yet)
+		//prepare constraint
+		testLimit(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+		if(getSolveLimit() || getEnableAngularMotor())
+		{
+			info->m_numConstraintRows++; // limit 3rd anguar as well
+			info->nub--; 
+		}
+
+	}
+}
+
+void btHingeConstraint::getInfo1NonVirtual(btConstraintInfo1* info)
+{
+	if (m_useSolveConstraintObsolete)
+	{
+		info->m_numConstraintRows = 0;
+		info->nub = 0;
+	}
+	else
+	{
+		//always add the 'limit' row, to avoid computation (data is not available yet)
+		info->m_numConstraintRows = 6; // Fixed 3 linear + 2 angular
+		info->nub = 0; 
+	}
+}
+
+void btHingeConstraint::getInfo2 (btConstraintInfo2* info)
+{
+	if(m_useOffsetForConstraintFrame)
+	{
+		getInfo2InternalUsingFrameOffset(info, m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform(),m_rbA.getAngularVelocity(),m_rbB.getAngularVelocity());
+	}
+	else
+	{
+		getInfo2Internal(info, m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform(),m_rbA.getAngularVelocity(),m_rbB.getAngularVelocity());
+	}
+}
+
+
+void	btHingeConstraint::getInfo2NonVirtual (btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btVector3& angVelA,const btVector3& angVelB)
+{
+	///the regular (virtual) implementation getInfo2 already performs 'testLimit' during getInfo1, so we need to do it now
+	testLimit(transA,transB);
+
+	getInfo2Internal(info,transA,transB,angVelA,angVelB);
+}
+
+
+void btHingeConstraint::getInfo2Internal(btConstraintInfo2* info, const btTransform& transA,const btTransform& transB,const btVector3& angVelA,const btVector3& angVelB)
+{
+
+	btAssert(!m_useSolveConstraintObsolete);
+	int i, skip = info->rowskip;
+	// transforms in world space
+	btTransform trA = transA*m_rbAFrame;
+	btTransform trB = transB*m_rbBFrame;
+	// pivot point
+	btVector3 pivotAInW = trA.getOrigin();
+	btVector3 pivotBInW = trB.getOrigin();
+#if 0
+	if (0)
+	{
+		for (i=0;i<6;i++)
+		{
+			info->m_J1linearAxis[i*skip]=0;
+			info->m_J1linearAxis[i*skip+1]=0;
+			info->m_J1linearAxis[i*skip+2]=0;
+
+			info->m_J1angularAxis[i*skip]=0;
+			info->m_J1angularAxis[i*skip+1]=0;
+			info->m_J1angularAxis[i*skip+2]=0;
+
+			info->m_J2angularAxis[i*skip]=0;
+			info->m_J2angularAxis[i*skip+1]=0;
+			info->m_J2angularAxis[i*skip+2]=0;
+
+			info->m_constraintError[i*skip]=0.f;
+		}
+	}
+#endif //#if 0
+	// linear (all fixed)
+
+	if (!m_angularOnly)
+	{
+		info->m_J1linearAxis[0] = 1;
+		info->m_J1linearAxis[skip + 1] = 1;
+		info->m_J1linearAxis[2 * skip + 2] = 1;
+	}	
+
+
+
+
+	btVector3 a1 = pivotAInW - transA.getOrigin();
+	{
+		btVector3* angular0 = (btVector3*)(info->m_J1angularAxis);
+		btVector3* angular1 = (btVector3*)(info->m_J1angularAxis + skip);
+		btVector3* angular2 = (btVector3*)(info->m_J1angularAxis + 2 * skip);
+		btVector3 a1neg = -a1;
+		a1neg.getSkewSymmetricMatrix(angular0,angular1,angular2);
+	}
+	btVector3 a2 = pivotBInW - transB.getOrigin();
+	{
+		btVector3* angular0 = (btVector3*)(info->m_J2angularAxis);
+		btVector3* angular1 = (btVector3*)(info->m_J2angularAxis + skip);
+		btVector3* angular2 = (btVector3*)(info->m_J2angularAxis + 2 * skip);
+		a2.getSkewSymmetricMatrix(angular0,angular1,angular2);
+	}
+	// linear RHS
+    btScalar k = info->fps * info->erp;
+	if (!m_angularOnly)
+	{
+		for(i = 0; i < 3; i++)
+		{
+			info->m_constraintError[i * skip] = k * (pivotBInW[i] - pivotAInW[i]);
+		}
+	}
+	// make rotations around X and Y equal
+	// the hinge axis should be the only unconstrained
+	// rotational axis, the angular velocity of the two bodies perpendicular to
+	// the hinge axis should be equal. thus the constraint equations are
+	//    p*w1 - p*w2 = 0
+	//    q*w1 - q*w2 = 0
+	// where p and q are unit vectors normal to the hinge axis, and w1 and w2
+	// are the angular velocity vectors of the two bodies.
+	// get hinge axis (Z)
+	btVector3 ax1 = trA.getBasis().getColumn(2);
+	// get 2 orthos to hinge axis (X, Y)
+	btVector3 p = trA.getBasis().getColumn(0);
+	btVector3 q = trA.getBasis().getColumn(1);
+	// set the two hinge angular rows 
+    int s3 = 3 * info->rowskip;
+    int s4 = 4 * info->rowskip;
+
+	info->m_J1angularAxis[s3 + 0] = p[0];
+	info->m_J1angularAxis[s3 + 1] = p[1];
+	info->m_J1angularAxis[s3 + 2] = p[2];
+	info->m_J1angularAxis[s4 + 0] = q[0];
+	info->m_J1angularAxis[s4 + 1] = q[1];
+	info->m_J1angularAxis[s4 + 2] = q[2];
+
+	info->m_J2angularAxis[s3 + 0] = -p[0];
+	info->m_J2angularAxis[s3 + 1] = -p[1];
+	info->m_J2angularAxis[s3 + 2] = -p[2];
+	info->m_J2angularAxis[s4 + 0] = -q[0];
+	info->m_J2angularAxis[s4 + 1] = -q[1];
+	info->m_J2angularAxis[s4 + 2] = -q[2];
+    // compute the right hand side of the constraint equation. set relative
+    // body velocities along p and q to bring the hinge back into alignment.
+    // if ax1,ax2 are the unit length hinge axes as computed from body1 and
+    // body2, we need to rotate both bodies along the axis u = (ax1 x ax2).
+    // if `theta' is the angle between ax1 and ax2, we need an angular velocity
+    // along u to cover angle erp*theta in one step :
+    //   |angular_velocity| = angle/time = erp*theta / stepsize
+    //                      = (erp*fps) * theta
+    //    angular_velocity  = |angular_velocity| * (ax1 x ax2) / |ax1 x ax2|
+    //                      = (erp*fps) * theta * (ax1 x ax2) / sin(theta)
+    // ...as ax1 and ax2 are unit length. if theta is smallish,
+    // theta ~= sin(theta), so
+    //    angular_velocity  = (erp*fps) * (ax1 x ax2)
+    // ax1 x ax2 is in the plane space of ax1, so we project the angular
+    // velocity to p and q to find the right hand side.
+    btVector3 ax2 = trB.getBasis().getColumn(2);
+	btVector3 u = ax1.cross(ax2);
+	info->m_constraintError[s3] = k * u.dot(p);
+	info->m_constraintError[s4] = k * u.dot(q);
+	// check angular limits
+	int nrow = 4; // last filled row
+	int srow;
+	btScalar limit_err = btScalar(0.0);
+	int limit = 0;
+	if(getSolveLimit())
+	{
+#ifdef	_BT_USE_CENTER_LIMIT_
+	limit_err = m_limit.getCorrection() * m_referenceSign;
+#else
+	limit_err = m_correction * m_referenceSign;
+#endif
+	limit = (limit_err > btScalar(0.0)) ? 1 : 2;
+
+	}
+	// if the hinge has joint limits or motor, add in the extra row
+	int powered = 0;
+	if(getEnableAngularMotor())
+	{
+		powered = 1;
+	}
+	if(limit || powered) 
+	{
+		nrow++;
+		srow = nrow * info->rowskip;
+		info->m_J1angularAxis[srow+0] = ax1[0];
+		info->m_J1angularAxis[srow+1] = ax1[1];
+		info->m_J1angularAxis[srow+2] = ax1[2];
+
+		info->m_J2angularAxis[srow+0] = -ax1[0];
+		info->m_J2angularAxis[srow+1] = -ax1[1];
+		info->m_J2angularAxis[srow+2] = -ax1[2];
+
+		btScalar lostop = getLowerLimit();
+		btScalar histop = getUpperLimit();
+		if(limit && (lostop == histop))
+		{  // the joint motor is ineffective
+			powered = 0;
+		}
+		info->m_constraintError[srow] = btScalar(0.0f);
+		btScalar currERP = (m_flags & BT_HINGE_FLAGS_ERP_STOP) ? m_stopERP : info->erp;
+		if(powered)
+		{
+			if(m_flags & BT_HINGE_FLAGS_CFM_NORM)
+			{
+				info->cfm[srow] = m_normalCFM;
+			}
+			btScalar mot_fact = getMotorFactor(m_hingeAngle, lostop, histop, m_motorTargetVelocity, info->fps * currERP);
+			info->m_constraintError[srow] += mot_fact * m_motorTargetVelocity * m_referenceSign;
+			info->m_lowerLimit[srow] = - m_maxMotorImpulse;
+			info->m_upperLimit[srow] =   m_maxMotorImpulse;
+		}
+		if(limit)
+		{
+			k = info->fps * currERP;
+			info->m_constraintError[srow] += k * limit_err;
+			if(m_flags & BT_HINGE_FLAGS_CFM_STOP)
+			{
+				info->cfm[srow] = m_stopCFM;
+			}
+			if(lostop == histop) 
+			{
+				// limited low and high simultaneously
+				info->m_lowerLimit[srow] = -SIMD_INFINITY;
+				info->m_upperLimit[srow] = SIMD_INFINITY;
+			}
+			else if(limit == 1) 
+			{ // low limit
+				info->m_lowerLimit[srow] = 0;
+				info->m_upperLimit[srow] = SIMD_INFINITY;
+			}
+			else 
+			{ // high limit
+				info->m_lowerLimit[srow] = -SIMD_INFINITY;
+				info->m_upperLimit[srow] = 0;
+			}
+			// bounce (we'll use slider parameter abs(1.0 - m_dampingLimAng) for that)
+#ifdef	_BT_USE_CENTER_LIMIT_
+			btScalar bounce = m_limit.getRelaxationFactor();
+#else
+			btScalar bounce = m_relaxationFactor;
+#endif
+			if(bounce > btScalar(0.0))
+			{
+				btScalar vel = angVelA.dot(ax1);
+				vel -= angVelB.dot(ax1);
+				// only apply bounce if the velocity is incoming, and if the
+				// resulting c[] exceeds what we already have.
+				if(limit == 1)
+				{	// low limit
+					if(vel < 0)
+					{
+						btScalar newc = -bounce * vel;
+						if(newc > info->m_constraintError[srow])
+						{
+							info->m_constraintError[srow] = newc;
+						}
+					}
+				}
+				else
+				{	// high limit - all those computations are reversed
+					if(vel > 0)
+					{
+						btScalar newc = -bounce * vel;
+						if(newc < info->m_constraintError[srow])
+						{
+							info->m_constraintError[srow] = newc;
+						}
+					}
+				}
+			}
+#ifdef	_BT_USE_CENTER_LIMIT_
+			info->m_constraintError[srow] *= m_limit.getBiasFactor();
+#else
+			info->m_constraintError[srow] *= m_biasFactor;
+#endif
+		} // if(limit)
+	} // if angular limit or powered
+}
+
+
+void btHingeConstraint::setFrames(const btTransform & frameA, const btTransform & frameB)
+{
+	m_rbAFrame = frameA;
+	m_rbBFrame = frameB;
+	buildJacobian();
+}
+
+
+void	btHingeConstraint::updateRHS(btScalar	timeStep)
+{
+	(void)timeStep;
+
+}
+
+
+btScalar btHingeConstraint::getHingeAngle()
+{
+	return getHingeAngle(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+}
+
+btScalar btHingeConstraint::getHingeAngle(const btTransform& transA,const btTransform& transB)
+{
+	const btVector3 refAxis0  = transA.getBasis() * m_rbAFrame.getBasis().getColumn(0);
+	const btVector3 refAxis1  = transA.getBasis() * m_rbAFrame.getBasis().getColumn(1);
+	const btVector3 swingAxis = transB.getBasis() * m_rbBFrame.getBasis().getColumn(1);
+//	btScalar angle = btAtan2Fast(swingAxis.dot(refAxis0), swingAxis.dot(refAxis1));
+	btScalar angle = btAtan2(swingAxis.dot(refAxis0), swingAxis.dot(refAxis1));
+	return m_referenceSign * angle;
+}
+
+
+
+void btHingeConstraint::testLimit(const btTransform& transA,const btTransform& transB)
+{
+	// Compute limit information
+	m_hingeAngle = getHingeAngle(transA,transB);
+#ifdef	_BT_USE_CENTER_LIMIT_
+	m_limit.test(m_hingeAngle);
+#else
+	m_correction = btScalar(0.);
+	m_limitSign = btScalar(0.);
+	m_solveLimit = false;
+	if (m_lowerLimit <= m_upperLimit)
+	{
+		m_hingeAngle = btAdjustAngleToLimits(m_hingeAngle, m_lowerLimit, m_upperLimit);
+		if (m_hingeAngle <= m_lowerLimit)
+		{
+			m_correction = (m_lowerLimit - m_hingeAngle);
+			m_limitSign = 1.0f;
+			m_solveLimit = true;
+		} 
+		else if (m_hingeAngle >= m_upperLimit)
+		{
+			m_correction = m_upperLimit - m_hingeAngle;
+			m_limitSign = -1.0f;
+			m_solveLimit = true;
+		}
+	}
+#endif
+	return;
+}
+
+
+static btVector3 vHinge(0, 0, btScalar(1));
+
+void btHingeConstraint::setMotorTarget(const btQuaternion& qAinB, btScalar dt)
+{
+	// convert target from body to constraint space
+	btQuaternion qConstraint = m_rbBFrame.getRotation().inverse() * qAinB * m_rbAFrame.getRotation();
+	qConstraint.normalize();
+
+	// extract "pure" hinge component
+	btVector3 vNoHinge = quatRotate(qConstraint, vHinge); vNoHinge.normalize();
+	btQuaternion qNoHinge = shortestArcQuat(vHinge, vNoHinge);
+	btQuaternion qHinge = qNoHinge.inverse() * qConstraint;
+	qHinge.normalize();
+
+	// compute angular target, clamped to limits
+	btScalar targetAngle = qHinge.getAngle();
+	if (targetAngle > SIMD_PI) // long way around. flip quat and recalculate.
+	{
+		qHinge = -(qHinge);
+		targetAngle = qHinge.getAngle();
+	}
+	if (qHinge.getZ() < 0)
+		targetAngle = -targetAngle;
+
+	setMotorTarget(targetAngle, dt);
+}
+
+void btHingeConstraint::setMotorTarget(btScalar targetAngle, btScalar dt)
+{
+#ifdef	_BT_USE_CENTER_LIMIT_
+	m_limit.fit(targetAngle);
+#else
+	if (m_lowerLimit < m_upperLimit)
+	{
+		if (targetAngle < m_lowerLimit)
+			targetAngle = m_lowerLimit;
+		else if (targetAngle > m_upperLimit)
+			targetAngle = m_upperLimit;
+	}
+#endif
+	// compute angular velocity
+	btScalar curAngle  = getHingeAngle(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+	btScalar dAngle = targetAngle - curAngle;
+	m_motorTargetVelocity = dAngle / dt;
+}
+
+
+
+void btHingeConstraint::getInfo2InternalUsingFrameOffset(btConstraintInfo2* info, const btTransform& transA,const btTransform& transB,const btVector3& angVelA,const btVector3& angVelB)
+{
+	btAssert(!m_useSolveConstraintObsolete);
+	int i, s = info->rowskip;
+	// transforms in world space
+	btTransform trA = transA*m_rbAFrame;
+	btTransform trB = transB*m_rbBFrame;
+	// pivot point
+	btVector3 pivotAInW = trA.getOrigin();
+	btVector3 pivotBInW = trB.getOrigin();
+#if 1
+	// difference between frames in WCS
+	btVector3 ofs = trB.getOrigin() - trA.getOrigin();
+	// now get weight factors depending on masses
+	btScalar miA = getRigidBodyA().getInvMass();
+	btScalar miB = getRigidBodyB().getInvMass();
+	bool hasStaticBody = (miA < SIMD_EPSILON) || (miB < SIMD_EPSILON);
+	btScalar miS = miA + miB;
+	btScalar factA, factB;
+	if(miS > btScalar(0.f))
+	{
+		factA = miB / miS;
+	}
+	else 
+	{
+		factA = btScalar(0.5f);
+	}
+	factB = btScalar(1.0f) - factA;
+	// get the desired direction of hinge axis
+	// as weighted sum of Z-orthos of frameA and frameB in WCS
+	btVector3 ax1A = trA.getBasis().getColumn(2);
+	btVector3 ax1B = trB.getBasis().getColumn(2);
+	btVector3 ax1 = ax1A * factA + ax1B * factB;
+	ax1.normalize();
+	// fill first 3 rows 
+	// we want: velA + wA x relA == velB + wB x relB
+	btTransform bodyA_trans = transA;
+	btTransform bodyB_trans = transB;
+	int s0 = 0;
+	int s1 = s;
+	int s2 = s * 2;
+	int nrow = 2; // last filled row
+	btVector3 tmpA, tmpB, relA, relB, p, q;
+	// get vector from bodyB to frameB in WCS
+	relB = trB.getOrigin() - bodyB_trans.getOrigin();
+	// get its projection to hinge axis
+	btVector3 projB = ax1 * relB.dot(ax1);
+	// get vector directed from bodyB to hinge axis (and orthogonal to it)
+	btVector3 orthoB = relB - projB;
+	// same for bodyA
+	relA = trA.getOrigin() - bodyA_trans.getOrigin();
+	btVector3 projA = ax1 * relA.dot(ax1);
+	btVector3 orthoA = relA - projA;
+	btVector3 totalDist = projA - projB;
+	// get offset vectors relA and relB
+	relA = orthoA + totalDist * factA;
+	relB = orthoB - totalDist * factB;
+	// now choose average ortho to hinge axis
+	p = orthoB * factA + orthoA * factB;
+	btScalar len2 = p.length2();
+	if(len2 > SIMD_EPSILON)
+	{
+		p /= btSqrt(len2);
+	}
+	else
+	{
+		p = trA.getBasis().getColumn(1);
+	}
+	// make one more ortho
+	q = ax1.cross(p);
+	// fill three rows
+	tmpA = relA.cross(p);
+	tmpB = relB.cross(p);
+    for (i=0; i<3; i++) info->m_J1angularAxis[s0+i] = tmpA[i];
+    for (i=0; i<3; i++) info->m_J2angularAxis[s0+i] = -tmpB[i];
+	tmpA = relA.cross(q);
+	tmpB = relB.cross(q);
+	if(hasStaticBody && getSolveLimit())
+	{ // to make constraint between static and dynamic objects more rigid
+		// remove wA (or wB) from equation if angular limit is hit
+		tmpB *= factB;
+		tmpA *= factA;
+	}
+	for (i=0; i<3; i++) info->m_J1angularAxis[s1+i] = tmpA[i];
+    for (i=0; i<3; i++) info->m_J2angularAxis[s1+i] = -tmpB[i];
+	tmpA = relA.cross(ax1);
+	tmpB = relB.cross(ax1);
+	if(hasStaticBody)
+	{ // to make constraint between static and dynamic objects more rigid
+		// remove wA (or wB) from equation
+		tmpB *= factB;
+		tmpA *= factA;
+	}
+	for (i=0; i<3; i++) info->m_J1angularAxis[s2+i] = tmpA[i];
+    for (i=0; i<3; i++) info->m_J2angularAxis[s2+i] = -tmpB[i];
+
+	btScalar k = info->fps * info->erp;
+
+	if (!m_angularOnly)
+	{
+		for (i=0; i<3; i++) info->m_J1linearAxis[s0+i] = p[i];
+		for (i=0; i<3; i++) info->m_J1linearAxis[s1+i] = q[i];
+		for (i=0; i<3; i++) info->m_J1linearAxis[s2+i] = ax1[i];
+	
+	// compute three elements of right hand side
+	
+		btScalar rhs = k * p.dot(ofs);
+		info->m_constraintError[s0] = rhs;
+		rhs = k * q.dot(ofs);
+		info->m_constraintError[s1] = rhs;
+		rhs = k * ax1.dot(ofs);
+		info->m_constraintError[s2] = rhs;
+	}
+	// the hinge axis should be the only unconstrained
+	// rotational axis, the angular velocity of the two bodies perpendicular to
+	// the hinge axis should be equal. thus the constraint equations are
+	//    p*w1 - p*w2 = 0
+	//    q*w1 - q*w2 = 0
+	// where p and q are unit vectors normal to the hinge axis, and w1 and w2
+	// are the angular velocity vectors of the two bodies.
+	int s3 = 3 * s;
+	int s4 = 4 * s;
+	info->m_J1angularAxis[s3 + 0] = p[0];
+	info->m_J1angularAxis[s3 + 1] = p[1];
+	info->m_J1angularAxis[s3 + 2] = p[2];
+	info->m_J1angularAxis[s4 + 0] = q[0];
+	info->m_J1angularAxis[s4 + 1] = q[1];
+	info->m_J1angularAxis[s4 + 2] = q[2];
+
+	info->m_J2angularAxis[s3 + 0] = -p[0];
+	info->m_J2angularAxis[s3 + 1] = -p[1];
+	info->m_J2angularAxis[s3 + 2] = -p[2];
+	info->m_J2angularAxis[s4 + 0] = -q[0];
+	info->m_J2angularAxis[s4 + 1] = -q[1];
+	info->m_J2angularAxis[s4 + 2] = -q[2];
+	// compute the right hand side of the constraint equation. set relative
+	// body velocities along p and q to bring the hinge back into alignment.
+	// if ax1A,ax1B are the unit length hinge axes as computed from bodyA and
+	// bodyB, we need to rotate both bodies along the axis u = (ax1 x ax2).
+	// if "theta" is the angle between ax1 and ax2, we need an angular velocity
+	// along u to cover angle erp*theta in one step :
+	//   |angular_velocity| = angle/time = erp*theta / stepsize
+	//                      = (erp*fps) * theta
+	//    angular_velocity  = |angular_velocity| * (ax1 x ax2) / |ax1 x ax2|
+	//                      = (erp*fps) * theta * (ax1 x ax2) / sin(theta)
+	// ...as ax1 and ax2 are unit length. if theta is smallish,
+	// theta ~= sin(theta), so
+	//    angular_velocity  = (erp*fps) * (ax1 x ax2)
+	// ax1 x ax2 is in the plane space of ax1, so we project the angular
+	// velocity to p and q to find the right hand side.
+	k = info->fps * info->erp;
+	btVector3 u = ax1A.cross(ax1B);
+	info->m_constraintError[s3] = k * u.dot(p);
+	info->m_constraintError[s4] = k * u.dot(q);
+#endif
+	// check angular limits
+	nrow = 4; // last filled row
+	int srow;
+	btScalar limit_err = btScalar(0.0);
+	int limit = 0;
+	if(getSolveLimit())
+	{
+#ifdef	_BT_USE_CENTER_LIMIT_
+	limit_err = m_limit.getCorrection() * m_referenceSign;
+#else
+	limit_err = m_correction * m_referenceSign;
+#endif
+	limit = (limit_err > btScalar(0.0)) ? 1 : 2;
+
+	}
+	// if the hinge has joint limits or motor, add in the extra row
+	int powered = 0;
+	if(getEnableAngularMotor())
+	{
+		powered = 1;
+	}
+	if(limit || powered) 
+	{
+		nrow++;
+		srow = nrow * info->rowskip;
+		info->m_J1angularAxis[srow+0] = ax1[0];
+		info->m_J1angularAxis[srow+1] = ax1[1];
+		info->m_J1angularAxis[srow+2] = ax1[2];
+
+		info->m_J2angularAxis[srow+0] = -ax1[0];
+		info->m_J2angularAxis[srow+1] = -ax1[1];
+		info->m_J2angularAxis[srow+2] = -ax1[2];
+
+		btScalar lostop = getLowerLimit();
+		btScalar histop = getUpperLimit();
+		if(limit && (lostop == histop))
+		{  // the joint motor is ineffective
+			powered = 0;
+		}
+		info->m_constraintError[srow] = btScalar(0.0f);
+		btScalar currERP = (m_flags & BT_HINGE_FLAGS_ERP_STOP) ? m_stopERP : info->erp;
+		if(powered)
+		{
+			if(m_flags & BT_HINGE_FLAGS_CFM_NORM)
+			{
+				info->cfm[srow] = m_normalCFM;
+			}
+			btScalar mot_fact = getMotorFactor(m_hingeAngle, lostop, histop, m_motorTargetVelocity, info->fps * currERP);
+			info->m_constraintError[srow] += mot_fact * m_motorTargetVelocity * m_referenceSign;
+			info->m_lowerLimit[srow] = - m_maxMotorImpulse;
+			info->m_upperLimit[srow] =   m_maxMotorImpulse;
+		}
+		if(limit)
+		{
+			k = info->fps * currERP;
+			info->m_constraintError[srow] += k * limit_err;
+			if(m_flags & BT_HINGE_FLAGS_CFM_STOP)
+			{
+				info->cfm[srow] = m_stopCFM;
+			}
+			if(lostop == histop) 
+			{
+				// limited low and high simultaneously
+				info->m_lowerLimit[srow] = -SIMD_INFINITY;
+				info->m_upperLimit[srow] = SIMD_INFINITY;
+			}
+			else if(limit == 1) 
+			{ // low limit
+				info->m_lowerLimit[srow] = 0;
+				info->m_upperLimit[srow] = SIMD_INFINITY;
+			}
+			else 
+			{ // high limit
+				info->m_lowerLimit[srow] = -SIMD_INFINITY;
+				info->m_upperLimit[srow] = 0;
+			}
+			// bounce (we'll use slider parameter abs(1.0 - m_dampingLimAng) for that)
+#ifdef	_BT_USE_CENTER_LIMIT_
+			btScalar bounce = m_limit.getRelaxationFactor();
+#else
+			btScalar bounce = m_relaxationFactor;
+#endif
+			if(bounce > btScalar(0.0))
+			{
+				btScalar vel = angVelA.dot(ax1);
+				vel -= angVelB.dot(ax1);
+				// only apply bounce if the velocity is incoming, and if the
+				// resulting c[] exceeds what we already have.
+				if(limit == 1)
+				{	// low limit
+					if(vel < 0)
+					{
+						btScalar newc = -bounce * vel;
+						if(newc > info->m_constraintError[srow])
+						{
+							info->m_constraintError[srow] = newc;
+						}
+					}
+				}
+				else
+				{	// high limit - all those computations are reversed
+					if(vel > 0)
+					{
+						btScalar newc = -bounce * vel;
+						if(newc < info->m_constraintError[srow])
+						{
+							info->m_constraintError[srow] = newc;
+						}
+					}
+				}
+			}
+#ifdef	_BT_USE_CENTER_LIMIT_
+			info->m_constraintError[srow] *= m_limit.getBiasFactor();
+#else
+			info->m_constraintError[srow] *= m_biasFactor;
+#endif
+		} // if(limit)
+	} // if angular limit or powered
+}
+
+
+///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+///If no axis is provided, it uses the default axis for this constraint.
+void btHingeConstraint::setParam(int num, btScalar value, int axis)
+{
+	if((axis == -1) || (axis == 5))
+	{
+		switch(num)
+		{	
+			case BT_CONSTRAINT_STOP_ERP :
+				m_stopERP = value;
+				m_flags |= BT_HINGE_FLAGS_ERP_STOP;
+				break;
+			case BT_CONSTRAINT_STOP_CFM :
+				m_stopCFM = value;
+				m_flags |= BT_HINGE_FLAGS_CFM_STOP;
+				break;
+			case BT_CONSTRAINT_CFM :
+				m_normalCFM = value;
+				m_flags |= BT_HINGE_FLAGS_CFM_NORM;
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else
+	{
+		btAssertConstrParams(0);
+	}
+}
+
+///return the local value of parameter
+btScalar btHingeConstraint::getParam(int num, int axis) const 
+{
+	btScalar retVal = 0;
+	if((axis == -1) || (axis == 5))
+	{
+		switch(num)
+		{	
+			case BT_CONSTRAINT_STOP_ERP :
+				btAssertConstrParams(m_flags & BT_HINGE_FLAGS_ERP_STOP);
+				retVal = m_stopERP;
+				break;
+			case BT_CONSTRAINT_STOP_CFM :
+				btAssertConstrParams(m_flags & BT_HINGE_FLAGS_CFM_STOP);
+				retVal = m_stopCFM;
+				break;
+			case BT_CONSTRAINT_CFM :
+				btAssertConstrParams(m_flags & BT_HINGE_FLAGS_CFM_NORM);
+				retVal = m_normalCFM;
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else
+	{
+		btAssertConstrParams(0);
+	}
+	return retVal;
+}
+
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btHingeConstraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btHingeConstraint.h
new file mode 100644
index 00000000..cb2973e1
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btHingeConstraint.h
@@ -0,0 +1,381 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/* Hinge Constraint by Dirk Gregorius. Limits added by Marcus Hennix at Starbreeze Studios */
+
+#ifndef BT_HINGECONSTRAINT_H
+#define BT_HINGECONSTRAINT_H
+
+#define _BT_USE_CENTER_LIMIT_ 1
+
+
+#include "LinearMath/btVector3.h"
+#include "btJacobianEntry.h"
+#include "btTypedConstraint.h"
+
+class btRigidBody;
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btHingeConstraintData	btHingeConstraintDoubleData
+#define btHingeConstraintDataName	"btHingeConstraintDoubleData"
+#else
+#define btHingeConstraintData	btHingeConstraintFloatData
+#define btHingeConstraintDataName	"btHingeConstraintFloatData"
+#endif //BT_USE_DOUBLE_PRECISION
+
+
+
+enum btHingeFlags
+{
+	BT_HINGE_FLAGS_CFM_STOP = 1,
+	BT_HINGE_FLAGS_ERP_STOP = 2,
+	BT_HINGE_FLAGS_CFM_NORM = 4
+};
+
+
+/// hinge constraint between two rigidbodies each with a pivotpoint that descibes the axis location in local space
+/// axis defines the orientation of the hinge axis
+ATTRIBUTE_ALIGNED16(class) btHingeConstraint : public btTypedConstraint
+{
+#ifdef IN_PARALLELL_SOLVER
+public:
+#endif
+	btJacobianEntry	m_jac[3]; //3 orthogonal linear constraints
+	btJacobianEntry	m_jacAng[3]; //2 orthogonal angular constraints+ 1 for limit/motor
+
+	btTransform m_rbAFrame; // constraint axii. Assumes z is hinge axis.
+	btTransform m_rbBFrame;
+
+	btScalar	m_motorTargetVelocity;
+	btScalar	m_maxMotorImpulse;
+
+
+#ifdef	_BT_USE_CENTER_LIMIT_
+	btAngularLimit	m_limit;
+#else
+	btScalar	m_lowerLimit;	
+	btScalar	m_upperLimit;	
+	btScalar	m_limitSign;
+	btScalar	m_correction;
+
+	btScalar	m_limitSoftness; 
+	btScalar	m_biasFactor; 
+	btScalar	m_relaxationFactor; 
+
+	bool		m_solveLimit;
+#endif
+
+	btScalar	m_kHinge;
+
+
+	btScalar	m_accLimitImpulse;
+	btScalar	m_hingeAngle;
+	btScalar	m_referenceSign;
+
+	bool		m_angularOnly;
+	bool		m_enableAngularMotor;
+	bool		m_useSolveConstraintObsolete;
+	bool		m_useOffsetForConstraintFrame;
+	bool		m_useReferenceFrameA;
+
+	btScalar	m_accMotorImpulse;
+
+	int			m_flags;
+	btScalar	m_normalCFM;
+	btScalar	m_stopCFM;
+	btScalar	m_stopERP;
+
+	
+public:
+
+	btHingeConstraint(btRigidBody& rbA,btRigidBody& rbB, const btVector3& pivotInA,const btVector3& pivotInB, const btVector3& axisInA,const btVector3& axisInB, bool useReferenceFrameA = false);
+
+	btHingeConstraint(btRigidBody& rbA,const btVector3& pivotInA,const btVector3& axisInA, bool useReferenceFrameA = false);
+	
+	btHingeConstraint(btRigidBody& rbA,btRigidBody& rbB, const btTransform& rbAFrame, const btTransform& rbBFrame, bool useReferenceFrameA = false);
+
+	btHingeConstraint(btRigidBody& rbA,const btTransform& rbAFrame, bool useReferenceFrameA = false);
+
+
+	virtual void	buildJacobian();
+
+	virtual void getInfo1 (btConstraintInfo1* info);
+
+	void getInfo1NonVirtual(btConstraintInfo1* info);
+
+	virtual void getInfo2 (btConstraintInfo2* info);
+
+	void	getInfo2NonVirtual(btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btVector3& angVelA,const btVector3& angVelB);
+
+	void	getInfo2Internal(btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btVector3& angVelA,const btVector3& angVelB);
+	void	getInfo2InternalUsingFrameOffset(btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btVector3& angVelA,const btVector3& angVelB);
+		
+
+	void	updateRHS(btScalar	timeStep);
+
+	const btRigidBody& getRigidBodyA() const
+	{
+		return m_rbA;
+	}
+	const btRigidBody& getRigidBodyB() const
+	{
+		return m_rbB;
+	}
+
+	btRigidBody& getRigidBodyA()	
+	{		
+		return m_rbA;	
+	}	
+
+	btRigidBody& getRigidBodyB()	
+	{		
+		return m_rbB;	
+	}
+
+	btTransform& getFrameOffsetA()
+	{
+	return m_rbAFrame;
+	}
+
+	btTransform& getFrameOffsetB()
+	{
+		return m_rbBFrame;
+	}
+
+	void setFrames(const btTransform& frameA, const btTransform& frameB);
+	
+	void	setAngularOnly(bool angularOnly)
+	{
+		m_angularOnly = angularOnly;
+	}
+
+	void	enableAngularMotor(bool enableMotor,btScalar targetVelocity,btScalar maxMotorImpulse)
+	{
+		m_enableAngularMotor  = enableMotor;
+		m_motorTargetVelocity = targetVelocity;
+		m_maxMotorImpulse = maxMotorImpulse;
+	}
+
+	// extra motor API, including ability to set a target rotation (as opposed to angular velocity)
+	// note: setMotorTarget sets angular velocity under the hood, so you must call it every tick to
+	//       maintain a given angular target.
+	void enableMotor(bool enableMotor) 	{ m_enableAngularMotor = enableMotor; }
+	void setMaxMotorImpulse(btScalar maxMotorImpulse) { m_maxMotorImpulse = maxMotorImpulse; }
+	void setMotorTarget(const btQuaternion& qAinB, btScalar dt); // qAinB is rotation of body A wrt body B.
+	void setMotorTarget(btScalar targetAngle, btScalar dt);
+
+
+	void	setLimit(btScalar low,btScalar high,btScalar _softness = 0.9f, btScalar _biasFactor = 0.3f, btScalar _relaxationFactor = 1.0f)
+	{
+#ifdef	_BT_USE_CENTER_LIMIT_
+		m_limit.set(low, high, _softness, _biasFactor, _relaxationFactor);
+#else
+		m_lowerLimit = btNormalizeAngle(low);
+		m_upperLimit = btNormalizeAngle(high);
+		m_limitSoftness =  _softness;
+		m_biasFactor = _biasFactor;
+		m_relaxationFactor = _relaxationFactor;
+#endif
+	}
+
+	void	setAxis(btVector3& axisInA)
+	{
+		btVector3 rbAxisA1, rbAxisA2;
+		btPlaneSpace1(axisInA, rbAxisA1, rbAxisA2);
+		btVector3 pivotInA = m_rbAFrame.getOrigin();
+//		m_rbAFrame.getOrigin() = pivotInA;
+		m_rbAFrame.getBasis().setValue( rbAxisA1.getX(),rbAxisA2.getX(),axisInA.getX(),
+										rbAxisA1.getY(),rbAxisA2.getY(),axisInA.getY(),
+										rbAxisA1.getZ(),rbAxisA2.getZ(),axisInA.getZ() );
+
+		btVector3 axisInB = m_rbA.getCenterOfMassTransform().getBasis() * axisInA;
+
+		btQuaternion rotationArc = shortestArcQuat(axisInA,axisInB);
+		btVector3 rbAxisB1 =  quatRotate(rotationArc,rbAxisA1);
+		btVector3 rbAxisB2 = axisInB.cross(rbAxisB1);
+
+		m_rbBFrame.getOrigin() = m_rbB.getCenterOfMassTransform().inverse()(m_rbA.getCenterOfMassTransform()(pivotInA));
+
+		m_rbBFrame.getBasis().setValue( rbAxisB1.getX(),rbAxisB2.getX(),axisInB.getX(),
+										rbAxisB1.getY(),rbAxisB2.getY(),axisInB.getY(),
+										rbAxisB1.getZ(),rbAxisB2.getZ(),axisInB.getZ() );
+		m_rbBFrame.getBasis() = m_rbB.getCenterOfMassTransform().getBasis().inverse() * m_rbBFrame.getBasis();
+
+	}
+
+	btScalar	getLowerLimit() const
+	{
+#ifdef	_BT_USE_CENTER_LIMIT_
+	return m_limit.getLow();
+#else
+	return m_lowerLimit;
+#endif
+	}
+
+	btScalar	getUpperLimit() const
+	{
+#ifdef	_BT_USE_CENTER_LIMIT_
+	return m_limit.getHigh();
+#else		
+	return m_upperLimit;
+#endif
+	}
+
+
+	btScalar getHingeAngle();
+
+	btScalar getHingeAngle(const btTransform& transA,const btTransform& transB);
+
+	void testLimit(const btTransform& transA,const btTransform& transB);
+
+
+	const btTransform& getAFrame() const { return m_rbAFrame; };	
+	const btTransform& getBFrame() const { return m_rbBFrame; };
+
+	btTransform& getAFrame() { return m_rbAFrame; };	
+	btTransform& getBFrame() { return m_rbBFrame; };
+
+	inline int getSolveLimit()
+	{
+#ifdef	_BT_USE_CENTER_LIMIT_
+	return m_limit.isLimit();
+#else
+	return m_solveLimit;
+#endif
+	}
+
+	inline btScalar getLimitSign()
+	{
+#ifdef	_BT_USE_CENTER_LIMIT_
+	return m_limit.getSign();
+#else
+		return m_limitSign;
+#endif
+	}
+
+	inline bool getAngularOnly() 
+	{ 
+		return m_angularOnly; 
+	}
+	inline bool getEnableAngularMotor() 
+	{ 
+		return m_enableAngularMotor; 
+	}
+	inline btScalar getMotorTargetVelosity() 
+	{ 
+		return m_motorTargetVelocity; 
+	}
+	inline btScalar getMaxMotorImpulse() 
+	{ 
+		return m_maxMotorImpulse; 
+	}
+	// access for UseFrameOffset
+	bool getUseFrameOffset() { return m_useOffsetForConstraintFrame; }
+	void setUseFrameOffset(bool frameOffsetOnOff) { m_useOffsetForConstraintFrame = frameOffsetOnOff; }
+
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void	setParam(int num, btScalar value, int axis = -1);
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const;
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btHingeConstraintDoubleData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btTransformDoubleData m_rbAFrame; // constraint axii. Assumes z is hinge axis.
+	btTransformDoubleData m_rbBFrame;
+	int			m_useReferenceFrameA;
+	int			m_angularOnly;
+	int			m_enableAngularMotor;
+	float	m_motorTargetVelocity;
+	float	m_maxMotorImpulse;
+
+	float	m_lowerLimit;
+	float	m_upperLimit;
+	float	m_limitSoftness;
+	float	m_biasFactor;
+	float	m_relaxationFactor;
+
+};
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btHingeConstraintFloatData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btTransformFloatData m_rbAFrame; // constraint axii. Assumes z is hinge axis.
+	btTransformFloatData m_rbBFrame;
+	int			m_useReferenceFrameA;
+	int			m_angularOnly;
+	
+	int			m_enableAngularMotor;
+	float	m_motorTargetVelocity;
+	float	m_maxMotorImpulse;
+
+	float	m_lowerLimit;
+	float	m_upperLimit;
+	float	m_limitSoftness;
+	float	m_biasFactor;
+	float	m_relaxationFactor;
+
+};
+
+
+
+SIMD_FORCE_INLINE	int	btHingeConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btHingeConstraintData);
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btHingeConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btHingeConstraintData* hingeData = (btHingeConstraintData*)dataBuffer;
+	btTypedConstraint::serialize(&hingeData->m_typeConstraintData,serializer);
+
+	m_rbAFrame.serialize(hingeData->m_rbAFrame);
+	m_rbBFrame.serialize(hingeData->m_rbBFrame);
+
+	hingeData->m_angularOnly = m_angularOnly;
+	hingeData->m_enableAngularMotor = m_enableAngularMotor;
+	hingeData->m_maxMotorImpulse = float(m_maxMotorImpulse);
+	hingeData->m_motorTargetVelocity = float(m_motorTargetVelocity);
+	hingeData->m_useReferenceFrameA = m_useReferenceFrameA;
+#ifdef	_BT_USE_CENTER_LIMIT_
+	hingeData->m_lowerLimit = float(m_limit.getLow());
+	hingeData->m_upperLimit = float(m_limit.getHigh());
+	hingeData->m_limitSoftness = float(m_limit.getSoftness());
+	hingeData->m_biasFactor = float(m_limit.getBiasFactor());
+	hingeData->m_relaxationFactor = float(m_limit.getRelaxationFactor());
+#else
+	hingeData->m_lowerLimit = float(m_lowerLimit);
+	hingeData->m_upperLimit = float(m_upperLimit);
+	hingeData->m_limitSoftness = float(m_limitSoftness);
+	hingeData->m_biasFactor = float(m_biasFactor);
+	hingeData->m_relaxationFactor = float(m_relaxationFactor);
+#endif
+
+	return btHingeConstraintDataName;
+}
+
+#endif //BT_HINGECONSTRAINT_H
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btJacobianEntry.h b/src/bullet/BulletDynamics/ConstraintSolver/btJacobianEntry.h
new file mode 100644
index 00000000..f1994a2d
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btJacobianEntry.h
@@ -0,0 +1,156 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_JACOBIAN_ENTRY_H
+#define BT_JACOBIAN_ENTRY_H
+
+#include "LinearMath/btVector3.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+
+
+//notes:
+// Another memory optimization would be to store m_1MinvJt in the remaining 3 w components
+// which makes the btJacobianEntry memory layout 16 bytes
+// if you only are interested in angular part, just feed massInvA and massInvB zero
+
+/// Jacobian entry is an abstraction that allows to describe constraints
+/// it can be used in combination with a constraint solver
+/// Can be used to relate the effect of an impulse to the constraint error
+ATTRIBUTE_ALIGNED16(class) btJacobianEntry
+{
+public:
+	btJacobianEntry() {};
+	//constraint between two different rigidbodies
+	btJacobianEntry(
+		const btMatrix3x3& world2A,
+		const btMatrix3x3& world2B,
+		const btVector3& rel_pos1,const btVector3& rel_pos2,
+		const btVector3& jointAxis,
+		const btVector3& inertiaInvA, 
+		const btScalar massInvA,
+		const btVector3& inertiaInvB,
+		const btScalar massInvB)
+		:m_linearJointAxis(jointAxis)
+	{
+		m_aJ = world2A*(rel_pos1.cross(m_linearJointAxis));
+		m_bJ = world2B*(rel_pos2.cross(-m_linearJointAxis));
+		m_0MinvJt	= inertiaInvA * m_aJ;
+		m_1MinvJt = inertiaInvB * m_bJ;
+		m_Adiag = massInvA + m_0MinvJt.dot(m_aJ) + massInvB + m_1MinvJt.dot(m_bJ);
+
+		btAssert(m_Adiag > btScalar(0.0));
+	}
+
+	//angular constraint between two different rigidbodies
+	btJacobianEntry(const btVector3& jointAxis,
+		const btMatrix3x3& world2A,
+		const btMatrix3x3& world2B,
+		const btVector3& inertiaInvA,
+		const btVector3& inertiaInvB)
+		:m_linearJointAxis(btVector3(btScalar(0.),btScalar(0.),btScalar(0.)))
+	{
+		m_aJ= world2A*jointAxis;
+		m_bJ = world2B*-jointAxis;
+		m_0MinvJt	= inertiaInvA * m_aJ;
+		m_1MinvJt = inertiaInvB * m_bJ;
+		m_Adiag =  m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);
+
+		btAssert(m_Adiag > btScalar(0.0));
+	}
+
+	//angular constraint between two different rigidbodies
+	btJacobianEntry(const btVector3& axisInA,
+		const btVector3& axisInB,
+		const btVector3& inertiaInvA,
+		const btVector3& inertiaInvB)
+		: m_linearJointAxis(btVector3(btScalar(0.),btScalar(0.),btScalar(0.)))
+		, m_aJ(axisInA)
+		, m_bJ(-axisInB)
+	{
+		m_0MinvJt	= inertiaInvA * m_aJ;
+		m_1MinvJt = inertiaInvB * m_bJ;
+		m_Adiag =  m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);
+
+		btAssert(m_Adiag > btScalar(0.0));
+	}
+
+	//constraint on one rigidbody
+	btJacobianEntry(
+		const btMatrix3x3& world2A,
+		const btVector3& rel_pos1,const btVector3& rel_pos2,
+		const btVector3& jointAxis,
+		const btVector3& inertiaInvA, 
+		const btScalar massInvA)
+		:m_linearJointAxis(jointAxis)
+	{
+		m_aJ= world2A*(rel_pos1.cross(jointAxis));
+		m_bJ = world2A*(rel_pos2.cross(-jointAxis));
+		m_0MinvJt	= inertiaInvA * m_aJ;
+		m_1MinvJt = btVector3(btScalar(0.),btScalar(0.),btScalar(0.));
+		m_Adiag = massInvA + m_0MinvJt.dot(m_aJ);
+
+		btAssert(m_Adiag > btScalar(0.0));
+	}
+
+	btScalar	getDiagonal() const { return m_Adiag; }
+
+	// for two constraints on the same rigidbody (for example vehicle friction)
+	btScalar	getNonDiagonal(const btJacobianEntry& jacB, const btScalar massInvA) const
+	{
+		const btJacobianEntry& jacA = *this;
+		btScalar lin = massInvA * jacA.m_linearJointAxis.dot(jacB.m_linearJointAxis);
+		btScalar ang = jacA.m_0MinvJt.dot(jacB.m_aJ);
+		return lin + ang;
+	}
+
+	
+
+	// for two constraints on sharing two same rigidbodies (for example two contact points between two rigidbodies)
+	btScalar	getNonDiagonal(const btJacobianEntry& jacB,const btScalar massInvA,const btScalar massInvB) const
+	{
+		const btJacobianEntry& jacA = *this;
+		btVector3 lin = jacA.m_linearJointAxis * jacB.m_linearJointAxis;
+		btVector3 ang0 = jacA.m_0MinvJt * jacB.m_aJ;
+		btVector3 ang1 = jacA.m_1MinvJt * jacB.m_bJ;
+		btVector3 lin0 = massInvA * lin ;
+		btVector3 lin1 = massInvB * lin;
+		btVector3 sum = ang0+ang1+lin0+lin1;
+		return sum[0]+sum[1]+sum[2];
+	}
+
+	btScalar getRelativeVelocity(const btVector3& linvelA,const btVector3& angvelA,const btVector3& linvelB,const btVector3& angvelB)
+	{
+		btVector3 linrel = linvelA - linvelB;
+		btVector3 angvela  = angvelA * m_aJ;
+		btVector3 angvelb  = angvelB * m_bJ;
+		linrel *= m_linearJointAxis;
+		angvela += angvelb;
+		angvela += linrel;
+		btScalar rel_vel2 = angvela[0]+angvela[1]+angvela[2];
+		return rel_vel2 + SIMD_EPSILON;
+	}
+//private:
+
+	btVector3	m_linearJointAxis;
+	btVector3	m_aJ;
+	btVector3	m_bJ;
+	btVector3	m_0MinvJt;
+	btVector3	m_1MinvJt;
+	//Optimization: can be stored in the w/last component of one of the vectors
+	btScalar	m_Adiag;
+
+};
+
+#endif //BT_JACOBIAN_ENTRY_H
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.cpp
new file mode 100644
index 00000000..7e0d93b9
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.cpp
@@ -0,0 +1,230 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btPoint2PointConstraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include <new>
+
+
+
+
+
+btPoint2PointConstraint::btPoint2PointConstraint(btRigidBody& rbA,btRigidBody& rbB, const btVector3& pivotInA,const btVector3& pivotInB)
+:btTypedConstraint(POINT2POINT_CONSTRAINT_TYPE,rbA,rbB),m_pivotInA(pivotInA),m_pivotInB(pivotInB),
+m_flags(0),
+m_useSolveConstraintObsolete(false)
+{
+
+}
+
+
+btPoint2PointConstraint::btPoint2PointConstraint(btRigidBody& rbA,const btVector3& pivotInA)
+:btTypedConstraint(POINT2POINT_CONSTRAINT_TYPE,rbA),m_pivotInA(pivotInA),m_pivotInB(rbA.getCenterOfMassTransform()(pivotInA)),
+m_flags(0),
+m_useSolveConstraintObsolete(false)
+{
+	
+}
+
+void	btPoint2PointConstraint::buildJacobian()
+{
+
+	///we need it for both methods
+	{
+		m_appliedImpulse = btScalar(0.);
+
+		btVector3	normal(0,0,0);
+
+		for (int i=0;i<3;i++)
+		{
+			normal[i] = 1;
+			new (&m_jac[i]) btJacobianEntry(
+			m_rbA.getCenterOfMassTransform().getBasis().transpose(),
+			m_rbB.getCenterOfMassTransform().getBasis().transpose(),
+			m_rbA.getCenterOfMassTransform()*m_pivotInA - m_rbA.getCenterOfMassPosition(),
+			m_rbB.getCenterOfMassTransform()*m_pivotInB - m_rbB.getCenterOfMassPosition(),
+			normal,
+			m_rbA.getInvInertiaDiagLocal(),
+			m_rbA.getInvMass(),
+			m_rbB.getInvInertiaDiagLocal(),
+			m_rbB.getInvMass());
+		normal[i] = 0;
+		}
+	}
+
+
+}
+
+void btPoint2PointConstraint::getInfo1 (btConstraintInfo1* info)
+{
+	getInfo1NonVirtual(info);
+}
+
+void btPoint2PointConstraint::getInfo1NonVirtual (btConstraintInfo1* info)
+{
+	if (m_useSolveConstraintObsolete)
+	{
+		info->m_numConstraintRows = 0;
+		info->nub = 0;
+	} else
+	{
+		info->m_numConstraintRows = 3;
+		info->nub = 3;
+	}
+}
+
+
+
+
+void btPoint2PointConstraint::getInfo2 (btConstraintInfo2* info)
+{
+	getInfo2NonVirtual(info, m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+}
+
+void btPoint2PointConstraint::getInfo2NonVirtual (btConstraintInfo2* info, const btTransform& body0_trans, const btTransform& body1_trans)
+{
+	btAssert(!m_useSolveConstraintObsolete);
+
+	 //retrieve matrices
+
+	// anchor points in global coordinates with respect to body PORs.
+   
+    // set jacobian
+    info->m_J1linearAxis[0] = 1;
+	info->m_J1linearAxis[info->rowskip+1] = 1;
+	info->m_J1linearAxis[2*info->rowskip+2] = 1;
+
+	btVector3 a1 = body0_trans.getBasis()*getPivotInA();
+	{
+		btVector3* angular0 = (btVector3*)(info->m_J1angularAxis);
+		btVector3* angular1 = (btVector3*)(info->m_J1angularAxis+info->rowskip);
+		btVector3* angular2 = (btVector3*)(info->m_J1angularAxis+2*info->rowskip);
+		btVector3 a1neg = -a1;
+		a1neg.getSkewSymmetricMatrix(angular0,angular1,angular2);
+	}
+    
+	/*info->m_J2linearAxis[0] = -1;
+    info->m_J2linearAxis[s+1] = -1;
+    info->m_J2linearAxis[2*s+2] = -1;
+	*/
+	
+	btVector3 a2 = body1_trans.getBasis()*getPivotInB();
+   
+	{
+		btVector3 a2n = -a2;
+		btVector3* angular0 = (btVector3*)(info->m_J2angularAxis);
+		btVector3* angular1 = (btVector3*)(info->m_J2angularAxis+info->rowskip);
+		btVector3* angular2 = (btVector3*)(info->m_J2angularAxis+2*info->rowskip);
+		a2.getSkewSymmetricMatrix(angular0,angular1,angular2);
+	}
+    
+
+
+    // set right hand side
+	btScalar currERP = (m_flags & BT_P2P_FLAGS_ERP) ? m_erp : info->erp;
+    btScalar k = info->fps * currERP;
+    int j;
+	for (j=0; j<3; j++)
+    {
+        info->m_constraintError[j*info->rowskip] = k * (a2[j] + body1_trans.getOrigin()[j] - a1[j] - body0_trans.getOrigin()[j]);
+		//printf("info->m_constraintError[%d]=%f\n",j,info->m_constraintError[j]);
+    }
+	if(m_flags & BT_P2P_FLAGS_CFM)
+	{
+		for (j=0; j<3; j++)
+		{
+			info->cfm[j*info->rowskip] = m_cfm;
+		}
+	}
+
+	btScalar impulseClamp = m_setting.m_impulseClamp;//
+	for (j=0; j<3; j++)
+    {
+		if (m_setting.m_impulseClamp > 0)
+		{
+			info->m_lowerLimit[j*info->rowskip] = -impulseClamp;
+			info->m_upperLimit[j*info->rowskip] = impulseClamp;
+		}
+	}
+	info->m_damping = m_setting.m_damping;
+	
+}
+
+
+
+void	btPoint2PointConstraint::updateRHS(btScalar	timeStep)
+{
+	(void)timeStep;
+
+}
+
+///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+///If no axis is provided, it uses the default axis for this constraint.
+void btPoint2PointConstraint::setParam(int num, btScalar value, int axis)
+{
+	if(axis != -1)
+	{
+		btAssertConstrParams(0);
+	}
+	else
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_ERP :
+			case BT_CONSTRAINT_STOP_ERP :
+				m_erp = value; 
+				m_flags |= BT_P2P_FLAGS_ERP;
+				break;
+			case BT_CONSTRAINT_CFM :
+			case BT_CONSTRAINT_STOP_CFM :
+				m_cfm = value; 
+				m_flags |= BT_P2P_FLAGS_CFM;
+				break;
+			default: 
+				btAssertConstrParams(0);
+		}
+	}
+}
+
+///return the local value of parameter
+btScalar btPoint2PointConstraint::getParam(int num, int axis) const 
+{
+	btScalar retVal(SIMD_INFINITY);
+	if(axis != -1)
+	{
+		btAssertConstrParams(0);
+	}
+	else
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_ERP :
+			case BT_CONSTRAINT_STOP_ERP :
+				btAssertConstrParams(m_flags & BT_P2P_FLAGS_ERP);
+				retVal = m_erp; 
+				break;
+			case BT_CONSTRAINT_CFM :
+			case BT_CONSTRAINT_STOP_CFM :
+				btAssertConstrParams(m_flags & BT_P2P_FLAGS_CFM);
+				retVal = m_cfm; 
+				break;
+			default: 
+				btAssertConstrParams(0);
+		}
+	}
+	return retVal;
+}
+	
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h
new file mode 100644
index 00000000..b3bda03e
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h
@@ -0,0 +1,161 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_POINT2POINTCONSTRAINT_H
+#define BT_POINT2POINTCONSTRAINT_H
+
+#include "LinearMath/btVector3.h"
+#include "btJacobianEntry.h"
+#include "btTypedConstraint.h"
+
+class btRigidBody;
+
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btPoint2PointConstraintData	btPoint2PointConstraintDoubleData
+#define btPoint2PointConstraintDataName	"btPoint2PointConstraintDoubleData"
+#else
+#define btPoint2PointConstraintData	btPoint2PointConstraintFloatData
+#define btPoint2PointConstraintDataName	"btPoint2PointConstraintFloatData"
+#endif //BT_USE_DOUBLE_PRECISION
+
+struct	btConstraintSetting
+{
+	btConstraintSetting()	:
+		m_tau(btScalar(0.3)),
+		m_damping(btScalar(1.)),
+		m_impulseClamp(btScalar(0.))
+	{
+	}
+	btScalar		m_tau;
+	btScalar		m_damping;
+	btScalar		m_impulseClamp;
+};
+
+enum btPoint2PointFlags
+{
+	BT_P2P_FLAGS_ERP = 1,
+	BT_P2P_FLAGS_CFM = 2
+};
+
+/// point to point constraint between two rigidbodies each with a pivotpoint that descibes the 'ballsocket' location in local space
+ATTRIBUTE_ALIGNED16(class) btPoint2PointConstraint : public btTypedConstraint
+{
+#ifdef IN_PARALLELL_SOLVER
+public:
+#endif
+	btJacobianEntry	m_jac[3]; //3 orthogonal linear constraints
+	
+	btVector3	m_pivotInA;
+	btVector3	m_pivotInB;
+	
+	int			m_flags;
+	btScalar	m_erp;
+	btScalar	m_cfm;
+	
+public:
+
+	///for backwards compatibility during the transition to 'getInfo/getInfo2'
+	bool		m_useSolveConstraintObsolete;
+
+	btConstraintSetting	m_setting;
+
+	btPoint2PointConstraint(btRigidBody& rbA,btRigidBody& rbB, const btVector3& pivotInA,const btVector3& pivotInB);
+
+	btPoint2PointConstraint(btRigidBody& rbA,const btVector3& pivotInA);
+
+
+	virtual void	buildJacobian();
+
+	virtual void getInfo1 (btConstraintInfo1* info);
+
+	void getInfo1NonVirtual (btConstraintInfo1* info);
+
+	virtual void getInfo2 (btConstraintInfo2* info);
+
+	void getInfo2NonVirtual (btConstraintInfo2* info, const btTransform& body0_trans, const btTransform& body1_trans);
+
+	void	updateRHS(btScalar	timeStep);
+
+	void	setPivotA(const btVector3& pivotA)
+	{
+		m_pivotInA = pivotA;
+	}
+
+	void	setPivotB(const btVector3& pivotB)
+	{
+		m_pivotInB = pivotB;
+	}
+
+	const btVector3& getPivotInA() const
+	{
+		return m_pivotInA;
+	}
+
+	const btVector3& getPivotInB() const
+	{
+		return m_pivotInB;
+	}
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void	setParam(int num, btScalar value, int axis = -1);
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const;
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btPoint2PointConstraintFloatData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btVector3FloatData	m_pivotInA;
+	btVector3FloatData	m_pivotInB;
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btPoint2PointConstraintDoubleData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btVector3DoubleData	m_pivotInA;
+	btVector3DoubleData	m_pivotInB;
+};
+
+
+SIMD_FORCE_INLINE	int	btPoint2PointConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btPoint2PointConstraintData);
+
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btPoint2PointConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btPoint2PointConstraintData* p2pData = (btPoint2PointConstraintData*)dataBuffer;
+
+	btTypedConstraint::serialize(&p2pData->m_typeConstraintData,serializer);
+	m_pivotInA.serialize(p2pData->m_pivotInA);
+	m_pivotInB.serialize(p2pData->m_pivotInB);
+
+	return btPoint2PointConstraintDataName;
+}
+
+#endif //BT_POINT2POINTCONSTRAINT_H
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp
new file mode 100644
index 00000000..ab074224
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp
@@ -0,0 +1,1269 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//#define COMPUTE_IMPULSE_DENOM 1
+//It is not necessary (redundant) to refresh contact manifolds, this refresh has been moved to the collision algorithms.
+
+#include "btSequentialImpulseConstraintSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "btContactConstraint.h"
+#include "btSolve2LinearConstraint.h"
+#include "btContactSolverInfo.h"
+#include "LinearMath/btIDebugDraw.h"
+#include "btJacobianEntry.h"
+#include "LinearMath/btMinMax.h"
+#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
+#include <new>
+#include "LinearMath/btStackAlloc.h"
+#include "LinearMath/btQuickprof.h"
+#include "btSolverBody.h"
+#include "btSolverConstraint.h"
+#include "LinearMath/btAlignedObjectArray.h"
+#include <string.h> //for memset
+
+int		gNumSplitImpulseRecoveries = 0;
+
+btSequentialImpulseConstraintSolver::btSequentialImpulseConstraintSolver()
+:m_btSeed2(0)
+{
+
+}
+
+btSequentialImpulseConstraintSolver::~btSequentialImpulseConstraintSolver()
+{
+}
+
+#ifdef USE_SIMD
+#include <emmintrin.h>
+#define btVecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
+static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
+{
+	__m128 result = _mm_mul_ps( vec0, vec1);
+	return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) );
+}
+#endif//USE_SIMD
+
+// Project Gauss Seidel or the equivalent Sequential Impulse
+void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGenericSIMD(btRigidBody& body1,btRigidBody& body2,const btSolverConstraint& c)
+{
+#ifdef USE_SIMD
+	__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse);
+	__m128	lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
+	__m128	upperLimit1 = _mm_set1_ps(c.m_upperLimit);
+	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse),_mm_set1_ps(c.m_cfm)));
+	__m128 deltaVel1Dotn	=	_mm_add_ps(btSimdDot3(c.m_contactNormal.mVec128,body1.internalGetDeltaLinearVelocity().mVec128), btSimdDot3(c.m_relpos1CrossNormal.mVec128,body1.internalGetDeltaAngularVelocity().mVec128));
+	__m128 deltaVel2Dotn	=	_mm_sub_ps(btSimdDot3(c.m_relpos2CrossNormal.mVec128,body2.internalGetDeltaAngularVelocity().mVec128),btSimdDot3((c.m_contactNormal).mVec128,body2.internalGetDeltaLinearVelocity().mVec128));
+	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel1Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
+	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel2Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
+	btSimdScalar sum = _mm_add_ps(cpAppliedImp,deltaImpulse);
+	btSimdScalar resultLowerLess,resultUpperLess;
+	resultLowerLess = _mm_cmplt_ps(sum,lowerLimit1);
+	resultUpperLess = _mm_cmplt_ps(sum,upperLimit1);
+	__m128 lowMinApplied = _mm_sub_ps(lowerLimit1,cpAppliedImp);
+	deltaImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse) );
+	c.m_appliedImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum) );
+	__m128 upperMinApplied = _mm_sub_ps(upperLimit1,cpAppliedImp);
+	deltaImpulse = _mm_or_ps( _mm_and_ps(resultUpperLess, deltaImpulse), _mm_andnot_ps(resultUpperLess, upperMinApplied) );
+	c.m_appliedImpulse = _mm_or_ps( _mm_and_ps(resultUpperLess, c.m_appliedImpulse), _mm_andnot_ps(resultUpperLess, upperLimit1) );
+	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.internalGetInvMass().mVec128);
+	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.internalGetInvMass().mVec128);
+	__m128 impulseMagnitude = deltaImpulse;
+	body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
+	body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
+	body2.internalGetDeltaLinearVelocity().mVec128 = _mm_sub_ps(body2.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
+	body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
+#else
+	resolveSingleConstraintRowGeneric(body1,body2,c);
+#endif
+}
+
+// Project Gauss Seidel or the equivalent Sequential Impulse
+ void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGeneric(btRigidBody& body1,btRigidBody& body2,const btSolverConstraint& c)
+{
+	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
+	const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.internalGetDeltaLinearVelocity()) 	+ c.m_relpos1CrossNormal.dot(body1.internalGetDeltaAngularVelocity());
+	const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.internalGetDeltaLinearVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetDeltaAngularVelocity());
+
+//	const btScalar delta_rel_vel	=	deltaVel1Dotn-deltaVel2Dotn;
+	deltaImpulse	-=	deltaVel1Dotn*c.m_jacDiagABInv;
+	deltaImpulse	-=	deltaVel2Dotn*c.m_jacDiagABInv;
+
+	const btScalar sum = btScalar(c.m_appliedImpulse) + deltaImpulse;
+	if (sum < c.m_lowerLimit)
+	{
+		deltaImpulse = c.m_lowerLimit-c.m_appliedImpulse;
+		c.m_appliedImpulse = c.m_lowerLimit;
+	}
+	else if (sum > c.m_upperLimit) 
+	{
+		deltaImpulse = c.m_upperLimit-c.m_appliedImpulse;
+		c.m_appliedImpulse = c.m_upperLimit;
+	}
+	else
+	{
+		c.m_appliedImpulse = sum;
+	}
+		body1.internalApplyImpulse(c.m_contactNormal*body1.internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
+		body2.internalApplyImpulse(-c.m_contactNormal*body2.internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
+}
+
+ void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowLowerLimitSIMD(btRigidBody& body1,btRigidBody& body2,const btSolverConstraint& c)
+{
+#ifdef USE_SIMD
+	__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse);
+	__m128	lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
+	__m128	upperLimit1 = _mm_set1_ps(c.m_upperLimit);
+	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse),_mm_set1_ps(c.m_cfm)));
+	__m128 deltaVel1Dotn	=	_mm_add_ps(btSimdDot3(c.m_contactNormal.mVec128,body1.internalGetDeltaLinearVelocity().mVec128), btSimdDot3(c.m_relpos1CrossNormal.mVec128,body1.internalGetDeltaAngularVelocity().mVec128));
+	__m128 deltaVel2Dotn	=	_mm_sub_ps(btSimdDot3(c.m_relpos2CrossNormal.mVec128,body2.internalGetDeltaAngularVelocity().mVec128),btSimdDot3((c.m_contactNormal).mVec128,body2.internalGetDeltaLinearVelocity().mVec128));
+	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel1Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
+	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel2Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
+	btSimdScalar sum = _mm_add_ps(cpAppliedImp,deltaImpulse);
+	btSimdScalar resultLowerLess,resultUpperLess;
+	resultLowerLess = _mm_cmplt_ps(sum,lowerLimit1);
+	resultUpperLess = _mm_cmplt_ps(sum,upperLimit1);
+	__m128 lowMinApplied = _mm_sub_ps(lowerLimit1,cpAppliedImp);
+	deltaImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse) );
+	c.m_appliedImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum) );
+	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.internalGetInvMass().mVec128);
+	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.internalGetInvMass().mVec128);
+	__m128 impulseMagnitude = deltaImpulse;
+	body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
+	body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
+	body2.internalGetDeltaLinearVelocity().mVec128 = _mm_sub_ps(body2.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
+	body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
+#else
+	resolveSingleConstraintRowLowerLimit(body1,body2,c);
+#endif
+}
+
+// Project Gauss Seidel or the equivalent Sequential Impulse
+ void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowLowerLimit(btRigidBody& body1,btRigidBody& body2,const btSolverConstraint& c)
+{
+	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
+	const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.internalGetDeltaLinearVelocity()) 	+ c.m_relpos1CrossNormal.dot(body1.internalGetDeltaAngularVelocity());
+	const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.internalGetDeltaLinearVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetDeltaAngularVelocity());
+
+	deltaImpulse	-=	deltaVel1Dotn*c.m_jacDiagABInv;
+	deltaImpulse	-=	deltaVel2Dotn*c.m_jacDiagABInv;
+	const btScalar sum = btScalar(c.m_appliedImpulse) + deltaImpulse;
+	if (sum < c.m_lowerLimit)
+	{
+		deltaImpulse = c.m_lowerLimit-c.m_appliedImpulse;
+		c.m_appliedImpulse = c.m_lowerLimit;
+	}
+	else
+	{
+		c.m_appliedImpulse = sum;
+	}
+	body1.internalApplyImpulse(c.m_contactNormal*body1.internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
+	body2.internalApplyImpulse(-c.m_contactNormal*body2.internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
+}
+
+
+void	btSequentialImpulseConstraintSolver::resolveSplitPenetrationImpulseCacheFriendly(
+        btRigidBody& body1,
+        btRigidBody& body2,
+        const btSolverConstraint& c)
+{
+		if (c.m_rhsPenetration)
+        {
+			gNumSplitImpulseRecoveries++;
+			btScalar deltaImpulse = c.m_rhsPenetration-btScalar(c.m_appliedPushImpulse)*c.m_cfm;
+			const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.internalGetPushVelocity()) 	+ c.m_relpos1CrossNormal.dot(body1.internalGetTurnVelocity());
+			const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.internalGetPushVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetTurnVelocity());
+
+			deltaImpulse	-=	deltaVel1Dotn*c.m_jacDiagABInv;
+			deltaImpulse	-=	deltaVel2Dotn*c.m_jacDiagABInv;
+			const btScalar sum = btScalar(c.m_appliedPushImpulse) + deltaImpulse;
+			if (sum < c.m_lowerLimit)
+			{
+				deltaImpulse = c.m_lowerLimit-c.m_appliedPushImpulse;
+				c.m_appliedPushImpulse = c.m_lowerLimit;
+			}
+			else
+			{
+				c.m_appliedPushImpulse = sum;
+			}
+			body1.internalApplyPushImpulse(c.m_contactNormal*body1.internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
+			body2.internalApplyPushImpulse(-c.m_contactNormal*body2.internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
+        }
+}
+
+ void btSequentialImpulseConstraintSolver::resolveSplitPenetrationSIMD(btRigidBody& body1,btRigidBody& body2,const btSolverConstraint& c)
+{
+#ifdef USE_SIMD
+	if (!c.m_rhsPenetration)
+		return;
+
+	gNumSplitImpulseRecoveries++;
+
+	__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedPushImpulse);
+	__m128	lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
+	__m128	upperLimit1 = _mm_set1_ps(c.m_upperLimit);
+	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhsPenetration), _mm_mul_ps(_mm_set1_ps(c.m_appliedPushImpulse),_mm_set1_ps(c.m_cfm)));
+	__m128 deltaVel1Dotn	=	_mm_add_ps(btSimdDot3(c.m_contactNormal.mVec128,body1.internalGetPushVelocity().mVec128), btSimdDot3(c.m_relpos1CrossNormal.mVec128,body1.internalGetTurnVelocity().mVec128));
+	__m128 deltaVel2Dotn	=	_mm_sub_ps(btSimdDot3(c.m_relpos2CrossNormal.mVec128,body2.internalGetTurnVelocity().mVec128),btSimdDot3((c.m_contactNormal).mVec128,body2.internalGetPushVelocity().mVec128));
+	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel1Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
+	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel2Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
+	btSimdScalar sum = _mm_add_ps(cpAppliedImp,deltaImpulse);
+	btSimdScalar resultLowerLess,resultUpperLess;
+	resultLowerLess = _mm_cmplt_ps(sum,lowerLimit1);
+	resultUpperLess = _mm_cmplt_ps(sum,upperLimit1);
+	__m128 lowMinApplied = _mm_sub_ps(lowerLimit1,cpAppliedImp);
+	deltaImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse) );
+	c.m_appliedPushImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum) );
+	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.internalGetInvMass().mVec128);
+	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.internalGetInvMass().mVec128);
+	__m128 impulseMagnitude = deltaImpulse;
+	body1.internalGetPushVelocity().mVec128 = _mm_add_ps(body1.internalGetPushVelocity().mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
+	body1.internalGetTurnVelocity().mVec128 = _mm_add_ps(body1.internalGetTurnVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
+	body2.internalGetPushVelocity().mVec128 = _mm_sub_ps(body2.internalGetPushVelocity().mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
+	body2.internalGetTurnVelocity().mVec128 = _mm_add_ps(body2.internalGetTurnVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
+#else
+	resolveSplitPenetrationImpulseCacheFriendly(body1,body2,c);
+#endif
+}
+
+
+
+unsigned long btSequentialImpulseConstraintSolver::btRand2()
+{
+	m_btSeed2 = (1664525L*m_btSeed2 + 1013904223L) & 0xffffffff;
+	return m_btSeed2;
+}
+
+
+
+//See ODE: adam's all-int straightforward(?) dRandInt (0..n-1)
+int btSequentialImpulseConstraintSolver::btRandInt2 (int n)
+{
+	// seems good; xor-fold and modulus
+	const unsigned long un = static_cast<unsigned long>(n);
+	unsigned long r = btRand2();
+
+	// note: probably more aggressive than it needs to be -- might be
+	//       able to get away without one or two of the innermost branches.
+	if (un <= 0x00010000UL) {
+		r ^= (r >> 16);
+		if (un <= 0x00000100UL) {
+			r ^= (r >> 8);
+			if (un <= 0x00000010UL) {
+				r ^= (r >> 4);
+				if (un <= 0x00000004UL) {
+					r ^= (r >> 2);
+					if (un <= 0x00000002UL) {
+						r ^= (r >> 1);
+					}
+				}
+			}
+		}
+	}
+
+	return (int) (r % un);
+}
+
+
+#if 0
+void	btSequentialImpulseConstraintSolver::initSolverBody(btSolverBody* solverBody, btCollisionObject* collisionObject)
+{
+	btRigidBody* rb = collisionObject? btRigidBody::upcast(collisionObject) : 0;
+
+	solverBody->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
+	solverBody->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
+	solverBody->internalGetPushVelocity().setValue(0.f,0.f,0.f);
+	solverBody->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
+
+	if (rb)
+	{
+		solverBody->internalGetInvMass() = btVector3(rb->getInvMass(),rb->getInvMass(),rb->getInvMass())*rb->getLinearFactor();
+		solverBody->m_originalBody = rb;
+		solverBody->m_angularFactor = rb->getAngularFactor();
+	} else
+	{
+		solverBody->internalGetInvMass().setValue(0,0,0);
+		solverBody->m_originalBody = 0;
+		solverBody->m_angularFactor.setValue(1,1,1);
+	}
+}
+#endif
+
+
+
+
+
+btScalar btSequentialImpulseConstraintSolver::restitutionCurve(btScalar rel_vel, btScalar restitution)
+{
+	btScalar rest = restitution * -rel_vel;
+	return rest;
+}
+
+
+
+void	applyAnisotropicFriction(btCollisionObject* colObj,btVector3& frictionDirection);
+void	applyAnisotropicFriction(btCollisionObject* colObj,btVector3& frictionDirection)
+{
+	if (colObj && colObj->hasAnisotropicFriction())
+	{
+		// transform to local coordinates
+		btVector3 loc_lateral = frictionDirection * colObj->getWorldTransform().getBasis();
+		const btVector3& friction_scaling = colObj->getAnisotropicFriction();
+		//apply anisotropic friction
+		loc_lateral *= friction_scaling;
+		// ... and transform it back to global coordinates
+		frictionDirection = colObj->getWorldTransform().getBasis() * loc_lateral;
+	}
+}
+
+
+void btSequentialImpulseConstraintSolver::setupFrictionConstraint(btSolverConstraint& solverConstraint, const btVector3& normalAxis,btRigidBody* solverBodyA,btRigidBody* solverBodyB,btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, btScalar desiredVelocity, btScalar cfmSlip)
+{
+
+
+	btRigidBody* body0=btRigidBody::upcast(colObj0);
+	btRigidBody* body1=btRigidBody::upcast(colObj1);
+
+	solverConstraint.m_contactNormal = normalAxis;
+
+	solverConstraint.m_solverBodyA = body0 ? body0 : &getFixedBody();
+	solverConstraint.m_solverBodyB = body1 ? body1 : &getFixedBody();
+
+	solverConstraint.m_friction = cp.m_combinedFriction;
+	solverConstraint.m_originalContactPoint = 0;
+
+	solverConstraint.m_appliedImpulse = 0.f;
+	solverConstraint.m_appliedPushImpulse = 0.f;
+
+	{
+		btVector3 ftorqueAxis1 = rel_pos1.cross(solverConstraint.m_contactNormal);
+		solverConstraint.m_relpos1CrossNormal = ftorqueAxis1;
+		solverConstraint.m_angularComponentA = body0 ? body0->getInvInertiaTensorWorld()*ftorqueAxis1*body0->getAngularFactor() : btVector3(0,0,0);
+	}
+	{
+		btVector3 ftorqueAxis1 = rel_pos2.cross(-solverConstraint.m_contactNormal);
+		solverConstraint.m_relpos2CrossNormal = ftorqueAxis1;
+		solverConstraint.m_angularComponentB = body1 ? body1->getInvInertiaTensorWorld()*ftorqueAxis1*body1->getAngularFactor() : btVector3(0,0,0);
+	}
+
+#ifdef COMPUTE_IMPULSE_DENOM
+	btScalar denom0 = rb0->computeImpulseDenominator(pos1,solverConstraint.m_contactNormal);
+	btScalar denom1 = rb1->computeImpulseDenominator(pos2,solverConstraint.m_contactNormal);
+#else
+	btVector3 vec;
+	btScalar denom0 = 0.f;
+	btScalar denom1 = 0.f;
+	if (body0)
+	{
+		vec = ( solverConstraint.m_angularComponentA).cross(rel_pos1);
+		denom0 = body0->getInvMass() + normalAxis.dot(vec);
+	}
+	if (body1)
+	{
+		vec = ( -solverConstraint.m_angularComponentB).cross(rel_pos2);
+		denom1 = body1->getInvMass() + normalAxis.dot(vec);
+	}
+
+
+#endif //COMPUTE_IMPULSE_DENOM
+	btScalar denom = relaxation/(denom0+denom1);
+	solverConstraint.m_jacDiagABInv = denom;
+
+#ifdef _USE_JACOBIAN
+	solverConstraint.m_jac =  btJacobianEntry (
+		rel_pos1,rel_pos2,solverConstraint.m_contactNormal,
+		body0->getInvInertiaDiagLocal(),
+		body0->getInvMass(),
+		body1->getInvInertiaDiagLocal(),
+		body1->getInvMass());
+#endif //_USE_JACOBIAN
+
+
+	{
+		btScalar rel_vel;
+		btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0?body0->getLinearVelocity():btVector3(0,0,0)) 
+			+ solverConstraint.m_relpos1CrossNormal.dot(body0?body0->getAngularVelocity():btVector3(0,0,0));
+		btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1?body1->getLinearVelocity():btVector3(0,0,0)) 
+			+ solverConstraint.m_relpos2CrossNormal.dot(body1?body1->getAngularVelocity():btVector3(0,0,0));
+
+		rel_vel = vel1Dotn+vel2Dotn;
+
+//		btScalar positionalError = 0.f;
+
+		btSimdScalar velocityError =  desiredVelocity - rel_vel;
+		btSimdScalar	velocityImpulse = velocityError * btSimdScalar(solverConstraint.m_jacDiagABInv);
+		solverConstraint.m_rhs = velocityImpulse;
+		solverConstraint.m_cfm = cfmSlip;
+		solverConstraint.m_lowerLimit = 0;
+		solverConstraint.m_upperLimit = 1e10f;
+	}
+}
+
+
+
+btSolverConstraint&	btSequentialImpulseConstraintSolver::addFrictionConstraint(const btVector3& normalAxis,btRigidBody* solverBodyA,btRigidBody* solverBodyB,int frictionIndex,btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, btScalar desiredVelocity, btScalar cfmSlip)
+{
+	btSolverConstraint& solverConstraint = m_tmpSolverContactFrictionConstraintPool.expandNonInitializing();
+	solverConstraint.m_frictionIndex = frictionIndex;
+	setupFrictionConstraint(solverConstraint, normalAxis, solverBodyA, solverBodyB, cp, rel_pos1, rel_pos2, 
+							colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
+	return solverConstraint;
+}
+
+int	btSequentialImpulseConstraintSolver::getOrInitSolverBody(btCollisionObject& body)
+{
+#if 0
+	int solverBodyIdA = -1;
+
+	if (body.getCompanionId() >= 0)
+	{
+		//body has already been converted
+		solverBodyIdA = body.getCompanionId();
+	} else
+	{
+		btRigidBody* rb = btRigidBody::upcast(&body);
+		if (rb && rb->getInvMass())
+		{
+			solverBodyIdA = m_tmpSolverBodyPool.size();
+			btSolverBody& solverBody = m_tmpSolverBodyPool.expand();
+			initSolverBody(&solverBody,&body);
+			body.setCompanionId(solverBodyIdA);
+		} else
+		{
+			return 0;//assume first one is a fixed solver body
+		}
+	}
+	return solverBodyIdA;
+#endif
+	return 0;
+}
+#include <stdio.h>
+
+
+void btSequentialImpulseConstraintSolver::setupContactConstraint(btSolverConstraint& solverConstraint, 
+																 btCollisionObject* colObj0, btCollisionObject* colObj1,
+																 btManifoldPoint& cp, const btContactSolverInfo& infoGlobal,
+																 btVector3& vel, btScalar& rel_vel, btScalar& relaxation,
+																 btVector3& rel_pos1, btVector3& rel_pos2)
+{
+			btRigidBody* rb0 = btRigidBody::upcast(colObj0);
+			btRigidBody* rb1 = btRigidBody::upcast(colObj1);
+
+			const btVector3& pos1 = cp.getPositionWorldOnA();
+			const btVector3& pos2 = cp.getPositionWorldOnB();
+
+//			btVector3 rel_pos1 = pos1 - colObj0->getWorldTransform().getOrigin(); 
+//			btVector3 rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin();
+			rel_pos1 = pos1 - colObj0->getWorldTransform().getOrigin(); 
+			rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin();
+
+			relaxation = 1.f;
+
+			btVector3 torqueAxis0 = rel_pos1.cross(cp.m_normalWorldOnB);
+			solverConstraint.m_angularComponentA = rb0 ? rb0->getInvInertiaTensorWorld()*torqueAxis0*rb0->getAngularFactor() : btVector3(0,0,0);
+			btVector3 torqueAxis1 = rel_pos2.cross(cp.m_normalWorldOnB);		
+			solverConstraint.m_angularComponentB = rb1 ? rb1->getInvInertiaTensorWorld()*-torqueAxis1*rb1->getAngularFactor() : btVector3(0,0,0);
+
+				{
+#ifdef COMPUTE_IMPULSE_DENOM
+					btScalar denom0 = rb0->computeImpulseDenominator(pos1,cp.m_normalWorldOnB);
+					btScalar denom1 = rb1->computeImpulseDenominator(pos2,cp.m_normalWorldOnB);
+#else							
+					btVector3 vec;
+					btScalar denom0 = 0.f;
+					btScalar denom1 = 0.f;
+					if (rb0)
+					{
+						vec = ( solverConstraint.m_angularComponentA).cross(rel_pos1);
+						denom0 = rb0->getInvMass() + cp.m_normalWorldOnB.dot(vec);
+					}
+					if (rb1)
+					{
+						vec = ( -solverConstraint.m_angularComponentB).cross(rel_pos2);
+						denom1 = rb1->getInvMass() + cp.m_normalWorldOnB.dot(vec);
+					}
+#endif //COMPUTE_IMPULSE_DENOM		
+
+					btScalar denom = relaxation/(denom0+denom1);
+					solverConstraint.m_jacDiagABInv = denom;
+				}
+
+				solverConstraint.m_contactNormal = cp.m_normalWorldOnB;
+				solverConstraint.m_relpos1CrossNormal = rel_pos1.cross(cp.m_normalWorldOnB);
+				solverConstraint.m_relpos2CrossNormal = rel_pos2.cross(-cp.m_normalWorldOnB);
+
+
+
+
+			btVector3 vel1 = rb0 ? rb0->getVelocityInLocalPoint(rel_pos1) : btVector3(0,0,0);
+			btVector3 vel2 = rb1 ? rb1->getVelocityInLocalPoint(rel_pos2) : btVector3(0,0,0);
+			vel  = vel1 - vel2;
+			rel_vel = cp.m_normalWorldOnB.dot(vel);
+
+				btScalar penetration = cp.getDistance()+infoGlobal.m_linearSlop;
+
+
+				solverConstraint.m_friction = cp.m_combinedFriction;
+
+				btScalar restitution = 0.f;
+				
+				if (cp.m_lifeTime>infoGlobal.m_restingContactRestitutionThreshold)
+				{
+					restitution = 0.f;
+				} else
+				{
+					restitution =  restitutionCurve(rel_vel, cp.m_combinedRestitution);
+					if (restitution <= btScalar(0.))
+					{
+						restitution = 0.f;
+					};
+				}
+
+
+				///warm starting (or zero if disabled)
+				if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
+				{
+					solverConstraint.m_appliedImpulse = cp.m_appliedImpulse * infoGlobal.m_warmstartingFactor;
+					if (rb0)
+						rb0->internalApplyImpulse(solverConstraint.m_contactNormal*rb0->getInvMass()*rb0->getLinearFactor(),solverConstraint.m_angularComponentA,solverConstraint.m_appliedImpulse);
+					if (rb1)
+						rb1->internalApplyImpulse(solverConstraint.m_contactNormal*rb1->getInvMass()*rb1->getLinearFactor(),-solverConstraint.m_angularComponentB,-(btScalar)solverConstraint.m_appliedImpulse);
+				} else
+				{
+					solverConstraint.m_appliedImpulse = 0.f;
+				}
+
+				solverConstraint.m_appliedPushImpulse = 0.f;
+
+				{
+					btScalar rel_vel;
+					btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(rb0?rb0->getLinearVelocity():btVector3(0,0,0)) 
+						+ solverConstraint.m_relpos1CrossNormal.dot(rb0?rb0->getAngularVelocity():btVector3(0,0,0));
+					btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rb1?rb1->getLinearVelocity():btVector3(0,0,0)) 
+						+ solverConstraint.m_relpos2CrossNormal.dot(rb1?rb1->getAngularVelocity():btVector3(0,0,0));
+
+					rel_vel = vel1Dotn+vel2Dotn;
+
+					btScalar positionalError = 0.f;
+					btScalar	velocityError = restitution - rel_vel;// * damping;
+
+					if (penetration>0)
+					{
+						positionalError = 0;
+						velocityError -= penetration / infoGlobal.m_timeStep;
+					} else
+					{
+						positionalError = -penetration * infoGlobal.m_erp/infoGlobal.m_timeStep;
+					}
+
+					btScalar  penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
+					btScalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
+					if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
+					{
+						//combine position and velocity into rhs
+						solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
+						solverConstraint.m_rhsPenetration = 0.f;
+					} else
+					{
+						//split position and velocity into rhs and m_rhsPenetration
+						solverConstraint.m_rhs = velocityImpulse;
+						solverConstraint.m_rhsPenetration = penetrationImpulse;
+					}
+					solverConstraint.m_cfm = 0.f;
+					solverConstraint.m_lowerLimit = 0;
+					solverConstraint.m_upperLimit = 1e10f;
+				}
+
+
+
+
+}
+
+
+
+void btSequentialImpulseConstraintSolver::setFrictionConstraintImpulse( btSolverConstraint& solverConstraint, 
+																		btRigidBody* rb0, btRigidBody* rb1, 
+																 btManifoldPoint& cp, const btContactSolverInfo& infoGlobal)
+{
+					if (infoGlobal.m_solverMode & SOLVER_USE_FRICTION_WARMSTARTING)
+					{
+						{
+							btSolverConstraint& frictionConstraint1 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex];
+							if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
+							{
+								frictionConstraint1.m_appliedImpulse = cp.m_appliedImpulseLateral1 * infoGlobal.m_warmstartingFactor;
+								if (rb0)
+									rb0->internalApplyImpulse(frictionConstraint1.m_contactNormal*rb0->getInvMass()*rb0->getLinearFactor(),frictionConstraint1.m_angularComponentA,frictionConstraint1.m_appliedImpulse);
+								if (rb1)
+									rb1->internalApplyImpulse(frictionConstraint1.m_contactNormal*rb1->getInvMass()*rb1->getLinearFactor(),-frictionConstraint1.m_angularComponentB,-(btScalar)frictionConstraint1.m_appliedImpulse);
+							} else
+							{
+								frictionConstraint1.m_appliedImpulse = 0.f;
+							}
+						}
+
+						if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+						{
+							btSolverConstraint& frictionConstraint2 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex+1];
+							if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
+							{
+								frictionConstraint2.m_appliedImpulse = cp.m_appliedImpulseLateral2 * infoGlobal.m_warmstartingFactor;
+								if (rb0)
+									rb0->internalApplyImpulse(frictionConstraint2.m_contactNormal*rb0->getInvMass(),frictionConstraint2.m_angularComponentA,frictionConstraint2.m_appliedImpulse);
+								if (rb1)
+									rb1->internalApplyImpulse(frictionConstraint2.m_contactNormal*rb1->getInvMass(),-frictionConstraint2.m_angularComponentB,-(btScalar)frictionConstraint2.m_appliedImpulse);
+							} else
+							{
+								frictionConstraint2.m_appliedImpulse = 0.f;
+							}
+						}
+					} else
+					{
+						btSolverConstraint& frictionConstraint1 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex];
+						frictionConstraint1.m_appliedImpulse = 0.f;
+						if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+						{
+							btSolverConstraint& frictionConstraint2 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex+1];
+							frictionConstraint2.m_appliedImpulse = 0.f;
+						}
+					}
+}
+
+
+
+
+void	btSequentialImpulseConstraintSolver::convertContact(btPersistentManifold* manifold,const btContactSolverInfo& infoGlobal)
+{
+	btCollisionObject* colObj0=0,*colObj1=0;
+
+	colObj0 = (btCollisionObject*)manifold->getBody0();
+	colObj1 = (btCollisionObject*)manifold->getBody1();
+
+
+	btRigidBody* solverBodyA = btRigidBody::upcast(colObj0);
+	btRigidBody* solverBodyB = btRigidBody::upcast(colObj1);
+
+	///avoid collision response between two static objects
+	if ((!solverBodyA || !solverBodyA->getInvMass()) && (!solverBodyB || !solverBodyB->getInvMass()))
+		return;
+
+	for (int j=0;j<manifold->getNumContacts();j++)
+	{
+
+		btManifoldPoint& cp = manifold->getContactPoint(j);
+
+		if (cp.getDistance() <= manifold->getContactProcessingThreshold())
+		{
+			btVector3 rel_pos1;
+			btVector3 rel_pos2;
+			btScalar relaxation;
+			btScalar rel_vel;
+			btVector3 vel;
+
+			int frictionIndex = m_tmpSolverContactConstraintPool.size();
+			btSolverConstraint& solverConstraint = m_tmpSolverContactConstraintPool.expandNonInitializing();
+			btRigidBody* rb0 = btRigidBody::upcast(colObj0);
+			btRigidBody* rb1 = btRigidBody::upcast(colObj1);
+			solverConstraint.m_solverBodyA = rb0? rb0 : &getFixedBody();
+			solverConstraint.m_solverBodyB = rb1? rb1 : &getFixedBody();
+			solverConstraint.m_originalContactPoint = &cp;
+
+			setupContactConstraint(solverConstraint, colObj0, colObj1, cp, infoGlobal, vel, rel_vel, relaxation, rel_pos1, rel_pos2);
+
+//			const btVector3& pos1 = cp.getPositionWorldOnA();
+//			const btVector3& pos2 = cp.getPositionWorldOnB();
+
+			/////setup the friction constraints
+
+			solverConstraint.m_frictionIndex = m_tmpSolverContactFrictionConstraintPool.size();
+
+			if (!(infoGlobal.m_solverMode & SOLVER_ENABLE_FRICTION_DIRECTION_CACHING) || !cp.m_lateralFrictionInitialized)
+			{
+				cp.m_lateralFrictionDir1 = vel - cp.m_normalWorldOnB * rel_vel;
+				btScalar lat_rel_vel = cp.m_lateralFrictionDir1.length2();
+				if (!(infoGlobal.m_solverMode & SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION) && lat_rel_vel > SIMD_EPSILON)
+				{
+					cp.m_lateralFrictionDir1 /= btSqrt(lat_rel_vel);
+					if((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+					{
+						cp.m_lateralFrictionDir2 = cp.m_lateralFrictionDir1.cross(cp.m_normalWorldOnB);
+						cp.m_lateralFrictionDir2.normalize();//??
+						applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir2);
+						applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir2);
+						addFrictionConstraint(cp.m_lateralFrictionDir2,solverBodyA,solverBodyB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+					}
+
+					applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir1);
+					applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir1);
+					addFrictionConstraint(cp.m_lateralFrictionDir1,solverBodyA,solverBodyB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+					cp.m_lateralFrictionInitialized = true;
+				} else
+				{
+					//re-calculate friction direction every frame, todo: check if this is really needed
+					btPlaneSpace1(cp.m_normalWorldOnB,cp.m_lateralFrictionDir1,cp.m_lateralFrictionDir2);
+					if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+					{
+						applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir2);
+						applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir2);
+						addFrictionConstraint(cp.m_lateralFrictionDir2,solverBodyA,solverBodyB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+					}
+
+					applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir1);
+					applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir1);
+					addFrictionConstraint(cp.m_lateralFrictionDir1,solverBodyA,solverBodyB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+
+					cp.m_lateralFrictionInitialized = true;
+				}
+
+			} else
+			{
+				addFrictionConstraint(cp.m_lateralFrictionDir1,solverBodyA,solverBodyB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation,cp.m_contactMotion1, cp.m_contactCFM1);
+				if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+					addFrictionConstraint(cp.m_lateralFrictionDir2,solverBodyA,solverBodyB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation, cp.m_contactMotion2, cp.m_contactCFM2);
+			}
+			
+			setFrictionConstraintImpulse( solverConstraint, rb0, rb1, cp, infoGlobal);
+
+		}
+	}
+}
+
+btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
+{
+	BT_PROFILE("solveGroupCacheFriendlySetup");
+	(void)stackAlloc;
+	(void)debugDrawer;
+
+	m_maxOverrideNumSolverIterations = 0;
+
+	if (!(numConstraints + numManifolds))
+	{
+		//		printf("empty\n");
+		return 0.f;
+	}
+
+	if (infoGlobal.m_splitImpulse)
+	{
+		for (int i = 0; i < numBodies; i++)
+		{
+			btRigidBody* body = btRigidBody::upcast(bodies[i]);
+			if (body)
+			{	
+				body->internalGetDeltaLinearVelocity().setZero();
+				body->internalGetDeltaAngularVelocity().setZero();
+				body->internalGetPushVelocity().setZero();
+				body->internalGetTurnVelocity().setZero();
+			}
+		}
+	}
+	else
+	{
+		for (int i = 0; i < numBodies; i++)
+		{
+			btRigidBody* body = btRigidBody::upcast(bodies[i]);
+			if (body)
+			{	
+				body->internalGetDeltaLinearVelocity().setZero();
+				body->internalGetDeltaAngularVelocity().setZero();
+			}
+		}
+	}
+
+	if (1)
+	{
+		int j;
+		for (j=0;j<numConstraints;j++)
+		{
+			btTypedConstraint* constraint = constraints[j];
+			constraint->buildJacobian();
+			constraint->internalSetAppliedImpulse(0.0f);
+		}
+	}
+	//btRigidBody* rb0=0,*rb1=0;
+
+	//if (1)
+	{
+		{
+
+			int totalNumRows = 0;
+			int i;
+			
+			m_tmpConstraintSizesPool.resize(numConstraints);
+			//calculate the total number of contraint rows
+			for (i=0;i<numConstraints;i++)
+			{
+				btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
+				if (constraints[i]->isEnabled())
+				{
+					constraints[i]->getInfo1(&info1);
+				} else
+				{
+					info1.m_numConstraintRows = 0;
+					info1.nub = 0;
+				}
+				totalNumRows += info1.m_numConstraintRows;
+			}
+			m_tmpSolverNonContactConstraintPool.resize(totalNumRows);
+
+			
+			///setup the btSolverConstraints
+			int currentRow = 0;
+
+			for (i=0;i<numConstraints;i++)
+			{
+				const btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
+				
+				if (info1.m_numConstraintRows)
+				{
+					btAssert(currentRow<totalNumRows);
+
+					btSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
+					btTypedConstraint* constraint = constraints[i];
+					btRigidBody& rbA = constraint->getRigidBodyA();
+					btRigidBody& rbB = constraint->getRigidBodyB();
+
+
+					int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
+					if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
+						m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
+
+
+					int j;
+					for ( j=0;j<info1.m_numConstraintRows;j++)
+					{
+						memset(&currentConstraintRow[j],0,sizeof(btSolverConstraint));
+						currentConstraintRow[j].m_lowerLimit = -SIMD_INFINITY;
+						currentConstraintRow[j].m_upperLimit = SIMD_INFINITY;
+						currentConstraintRow[j].m_appliedImpulse = 0.f;
+						currentConstraintRow[j].m_appliedPushImpulse = 0.f;
+						currentConstraintRow[j].m_solverBodyA = &rbA;
+						currentConstraintRow[j].m_solverBodyB = &rbB;
+						currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
+					}
+
+					rbA.internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
+					rbA.internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
+					rbB.internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
+					rbB.internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
+
+
+
+					btTypedConstraint::btConstraintInfo2 info2;
+					info2.fps = 1.f/infoGlobal.m_timeStep;
+					info2.erp = infoGlobal.m_erp;
+					info2.m_J1linearAxis = currentConstraintRow->m_contactNormal;
+					info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
+					info2.m_J2linearAxis = 0;
+					info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
+					info2.rowskip = sizeof(btSolverConstraint)/sizeof(btScalar);//check this
+					///the size of btSolverConstraint needs be a multiple of btScalar
+					btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
+					info2.m_constraintError = &currentConstraintRow->m_rhs;
+					currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
+					info2.m_damping = infoGlobal.m_damping;
+					info2.cfm = &currentConstraintRow->m_cfm;
+					info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
+					info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
+					info2.m_numIterations = infoGlobal.m_numIterations;
+					constraints[i]->getInfo2(&info2);
+
+					///finalize the constraint setup
+					for ( j=0;j<info1.m_numConstraintRows;j++)
+					{
+						btSolverConstraint& solverConstraint = currentConstraintRow[j];
+
+						if (solverConstraint.m_upperLimit>=constraints[i]->getBreakingImpulseThreshold())
+						{
+							solverConstraint.m_upperLimit = constraints[i]->getBreakingImpulseThreshold();
+						}
+
+						if (solverConstraint.m_lowerLimit<=-constraints[i]->getBreakingImpulseThreshold())
+						{
+							solverConstraint.m_lowerLimit = -constraints[i]->getBreakingImpulseThreshold();
+						}
+
+						solverConstraint.m_originalContactPoint = constraint;
+
+						{
+							const btVector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
+							solverConstraint.m_angularComponentA = constraint->getRigidBodyA().getInvInertiaTensorWorld()*ftorqueAxis1*constraint->getRigidBodyA().getAngularFactor();
+						}
+						{
+							const btVector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
+							solverConstraint.m_angularComponentB = constraint->getRigidBodyB().getInvInertiaTensorWorld()*ftorqueAxis2*constraint->getRigidBodyB().getAngularFactor();
+						}
+
+						{
+							btVector3 iMJlA = solverConstraint.m_contactNormal*rbA.getInvMass();
+							btVector3 iMJaA = rbA.getInvInertiaTensorWorld()*solverConstraint.m_relpos1CrossNormal;
+							btVector3 iMJlB = solverConstraint.m_contactNormal*rbB.getInvMass();//sign of normal?
+							btVector3 iMJaB = rbB.getInvInertiaTensorWorld()*solverConstraint.m_relpos2CrossNormal;
+
+							btScalar sum = iMJlA.dot(solverConstraint.m_contactNormal);
+							sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
+							sum += iMJlB.dot(solverConstraint.m_contactNormal);
+							sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
+
+							solverConstraint.m_jacDiagABInv = btScalar(1.)/sum;
+						}
+
+
+						///fix rhs
+						///todo: add force/torque accelerators
+						{
+							btScalar rel_vel;
+							btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.getLinearVelocity()) + solverConstraint.m_relpos1CrossNormal.dot(rbA.getAngularVelocity());
+							btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.getLinearVelocity()) + solverConstraint.m_relpos2CrossNormal.dot(rbB.getAngularVelocity());
+
+							rel_vel = vel1Dotn+vel2Dotn;
+
+							btScalar restitution = 0.f;
+							btScalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
+							btScalar	velocityError = restitution - rel_vel * info2.m_damping;
+							btScalar	penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
+							btScalar	velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
+							solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
+							solverConstraint.m_appliedImpulse = 0.f;
+
+						}
+					}
+				}
+				currentRow+=m_tmpConstraintSizesPool[i].m_numConstraintRows;
+			}
+		}
+
+		{
+			int i;
+			btPersistentManifold* manifold = 0;
+//			btCollisionObject* colObj0=0,*colObj1=0;
+
+
+			for (i=0;i<numManifolds;i++)
+			{
+				manifold = manifoldPtr[i];
+				convertContact(manifold,infoGlobal);
+			}
+		}
+	}
+
+	btContactSolverInfo info = infoGlobal;
+
+
+	int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
+	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
+	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
+
+	///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints
+	m_orderNonContactConstraintPool.resize(numNonContactPool);
+	m_orderTmpConstraintPool.resize(numConstraintPool);
+	m_orderFrictionConstraintPool.resize(numFrictionPool);
+	{
+		int i;
+		for (i=0;i<numNonContactPool;i++)
+		{
+			m_orderNonContactConstraintPool[i] = i;
+		}
+		for (i=0;i<numConstraintPool;i++)
+		{
+			m_orderTmpConstraintPool[i] = i;
+		}
+		for (i=0;i<numFrictionPool;i++)
+		{
+			m_orderFrictionConstraintPool[i] = i;
+		}
+	}
+
+	return 0.f;
+
+}
+
+btScalar btSequentialImpulseConstraintSolver::solveSingleIteration(int iteration, btCollisionObject** /*bodies */,int /*numBodies*/,btPersistentManifold** /*manifoldPtr*/, int /*numManifolds*/,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* /*debugDrawer*/,btStackAlloc* /*stackAlloc*/)
+{
+
+	int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
+	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
+	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
+
+	int j;
+
+	if (infoGlobal.m_solverMode & SOLVER_RANDMIZE_ORDER)
+	{
+		if ((iteration & 7) == 0) {
+			for (j=0; j<numNonContactPool; ++j) {
+				int tmp = m_orderNonContactConstraintPool[j];
+				int swapi = btRandInt2(j+1);
+				m_orderNonContactConstraintPool[j] = m_orderNonContactConstraintPool[swapi];
+				m_orderNonContactConstraintPool[swapi] = tmp;
+			}
+
+			//contact/friction constraints are not solved more than 
+			if (iteration< infoGlobal.m_numIterations)
+			{
+				for (j=0; j<numConstraintPool; ++j) {
+					int tmp = m_orderTmpConstraintPool[j];
+					int swapi = btRandInt2(j+1);
+					m_orderTmpConstraintPool[j] = m_orderTmpConstraintPool[swapi];
+					m_orderTmpConstraintPool[swapi] = tmp;
+				}
+
+				for (j=0; j<numFrictionPool; ++j) {
+					int tmp = m_orderFrictionConstraintPool[j];
+					int swapi = btRandInt2(j+1);
+					m_orderFrictionConstraintPool[j] = m_orderFrictionConstraintPool[swapi];
+					m_orderFrictionConstraintPool[swapi] = tmp;
+				}
+			}
+		}
+	}
+
+	if (infoGlobal.m_solverMode & SOLVER_SIMD)
+	{
+		///solve all joint constraints, using SIMD, if available
+		for (j=0;j<m_tmpSolverNonContactConstraintPool.size();j++)
+		{
+			btSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[m_orderNonContactConstraintPool[j]];
+			if (iteration < constraint.m_overrideNumSolverIterations)
+				resolveSingleConstraintRowGenericSIMD(*constraint.m_solverBodyA,*constraint.m_solverBodyB,constraint);
+		}
+
+		if (iteration< infoGlobal.m_numIterations)
+		{
+			for (j=0;j<numConstraints;j++)
+			{
+				constraints[j]->solveConstraintObsolete(constraints[j]->getRigidBodyA(),constraints[j]->getRigidBodyB(),infoGlobal.m_timeStep);
+			}
+
+			///solve all contact constraints using SIMD, if available
+			int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+			for (j=0;j<numPoolConstraints;j++)
+			{
+				const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
+				resolveSingleConstraintRowLowerLimitSIMD(*solveManifold.m_solverBodyA,*solveManifold.m_solverBodyB,solveManifold);
+
+			}
+		
+			///solve all friction constraints, using SIMD, if available
+			int numFrictionPoolConstraints = m_tmpSolverContactFrictionConstraintPool.size();
+			for (j=0;j<numFrictionPoolConstraints;j++)
+			{
+				btSolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[j]];
+				btScalar totalImpulse = m_tmpSolverContactConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
+
+				if (totalImpulse>btScalar(0))
+				{
+					solveManifold.m_lowerLimit = -(solveManifold.m_friction*totalImpulse);
+					solveManifold.m_upperLimit = solveManifold.m_friction*totalImpulse;
+
+					resolveSingleConstraintRowGenericSIMD(*solveManifold.m_solverBodyA,	*solveManifold.m_solverBodyB,solveManifold);
+				}
+			}
+		}
+	} else
+	{
+
+		///solve all joint constraints
+		for (j=0;j<m_tmpSolverNonContactConstraintPool.size();j++)
+		{
+			btSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[m_orderNonContactConstraintPool[j]];
+			if (iteration < constraint.m_overrideNumSolverIterations)
+				resolveSingleConstraintRowGeneric(*constraint.m_solverBodyA,*constraint.m_solverBodyB,constraint);
+		}
+
+		if (iteration< infoGlobal.m_numIterations)
+		{
+			for (j=0;j<numConstraints;j++)
+			{
+				constraints[j]->solveConstraintObsolete(constraints[j]->getRigidBodyA(),constraints[j]->getRigidBodyB(),infoGlobal.m_timeStep);
+			}
+			///solve all contact constraints
+			int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+			for (j=0;j<numPoolConstraints;j++)
+			{
+				const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
+				resolveSingleConstraintRowLowerLimit(*solveManifold.m_solverBodyA,*solveManifold.m_solverBodyB,solveManifold);
+			}
+			///solve all friction constraints
+			int numFrictionPoolConstraints = m_tmpSolverContactFrictionConstraintPool.size();
+			for (j=0;j<numFrictionPoolConstraints;j++)
+			{
+				btSolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[j]];
+				btScalar totalImpulse = m_tmpSolverContactConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
+
+				if (totalImpulse>btScalar(0))
+				{
+					solveManifold.m_lowerLimit = -(solveManifold.m_friction*totalImpulse);
+					solveManifold.m_upperLimit = solveManifold.m_friction*totalImpulse;
+
+					resolveSingleConstraintRowGeneric(*solveManifold.m_solverBodyA,*solveManifold.m_solverBodyB,solveManifold);
+				}
+			}
+		}
+	}
+	return 0.f;
+}
+
+
+void btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
+{
+	int iteration;
+	if (infoGlobal.m_splitImpulse)
+	{
+		if (infoGlobal.m_solverMode & SOLVER_SIMD)
+		{
+			for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++)
+			{
+				{
+					int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+					int j;
+					for (j=0;j<numPoolConstraints;j++)
+					{
+						const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
+
+						resolveSplitPenetrationSIMD(*solveManifold.m_solverBodyA,*solveManifold.m_solverBodyB,solveManifold);
+					}
+				}
+			}
+		}
+		else
+		{
+			for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++)
+			{
+				{
+					int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+					int j;
+					for (j=0;j<numPoolConstraints;j++)
+					{
+						const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
+
+						resolveSplitPenetrationImpulseCacheFriendly(*solveManifold.m_solverBodyA,*solveManifold.m_solverBodyB,solveManifold);
+					}
+				}
+			}
+		}
+	}
+}
+
+btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations(btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
+{
+	BT_PROFILE("solveGroupCacheFriendlyIterations");
+
+	{
+		///this is a special step to resolve penetrations (just for contacts)
+		solveGroupCacheFriendlySplitImpulseIterations(bodies ,numBodies,manifoldPtr, numManifolds,constraints,numConstraints,infoGlobal,debugDrawer,stackAlloc);
+
+		int maxIterations = m_maxOverrideNumSolverIterations > infoGlobal.m_numIterations? m_maxOverrideNumSolverIterations : infoGlobal.m_numIterations;
+
+		for ( int iteration = 0 ; iteration< maxIterations ; iteration++)
+		//for ( int iteration = maxIterations-1  ; iteration >= 0;iteration--)
+		{			
+			solveSingleIteration(iteration, bodies ,numBodies,manifoldPtr, numManifolds,constraints,numConstraints,infoGlobal,debugDrawer,stackAlloc);
+		}
+		
+	}
+	return 0.f;
+}
+
+btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCollisionObject** bodies ,int numBodies,btPersistentManifold** /*manifoldPtr*/, int /*numManifolds*/,btTypedConstraint** /*constraints*/,int /* numConstraints*/,const btContactSolverInfo& infoGlobal,btIDebugDraw* /*debugDrawer*/,btStackAlloc* /*stackAlloc*/)
+{
+	int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+	int i,j;
+
+	for (j=0;j<numPoolConstraints;j++)
+	{
+
+		const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[j];
+		btManifoldPoint* pt = (btManifoldPoint*) solveManifold.m_originalContactPoint;
+		btAssert(pt);
+		pt->m_appliedImpulse = solveManifold.m_appliedImpulse;
+		if (infoGlobal.m_solverMode & SOLVER_USE_FRICTION_WARMSTARTING)
+		{
+			pt->m_appliedImpulseLateral1 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
+			pt->m_appliedImpulseLateral2 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex+1].m_appliedImpulse;
+		}
+
+		//do a callback here?
+	}
+
+	numPoolConstraints = m_tmpSolverNonContactConstraintPool.size();
+	for (j=0;j<numPoolConstraints;j++)
+	{
+		const btSolverConstraint& solverConstr = m_tmpSolverNonContactConstraintPool[j];
+		btTypedConstraint* constr = (btTypedConstraint*)solverConstr.m_originalContactPoint;
+		constr->internalSetAppliedImpulse(solverConstr.m_appliedImpulse);
+		if (btFabs(solverConstr.m_appliedImpulse)>=constr->getBreakingImpulseThreshold())
+		{
+			constr->setEnabled(false);
+		}
+	}
+
+
+	if (infoGlobal.m_splitImpulse)
+	{		
+		for ( i=0;i<numBodies;i++)
+		{
+			btRigidBody* body = btRigidBody::upcast(bodies[i]);
+			if (body)
+				body->internalWritebackVelocity(infoGlobal.m_timeStep);
+		}
+	} else
+	{
+		for ( i=0;i<numBodies;i++)
+		{
+			btRigidBody* body = btRigidBody::upcast(bodies[i]);
+			if (body)
+				body->internalWritebackVelocity();
+		}
+	}
+
+
+	m_tmpSolverContactConstraintPool.resize(0);
+	m_tmpSolverNonContactConstraintPool.resize(0);
+	m_tmpSolverContactFrictionConstraintPool.resize(0);
+
+	return 0.f;
+}
+
+
+
+/// btSequentialImpulseConstraintSolver Sequentially applies impulses
+btScalar btSequentialImpulseConstraintSolver::solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc,btDispatcher* /*dispatcher*/)
+{
+
+	BT_PROFILE("solveGroup");
+	//you need to provide at least some bodies
+	btAssert(bodies);
+	btAssert(numBodies);
+
+	solveGroupCacheFriendlySetup( bodies, numBodies, manifoldPtr,  numManifolds,constraints, numConstraints,infoGlobal,debugDrawer, stackAlloc);
+
+	solveGroupCacheFriendlyIterations(bodies, numBodies, manifoldPtr,  numManifolds,constraints, numConstraints,infoGlobal,debugDrawer, stackAlloc);
+
+	solveGroupCacheFriendlyFinish(bodies, numBodies, manifoldPtr,  numManifolds,constraints, numConstraints,infoGlobal,debugDrawer, stackAlloc);
+	
+	return 0.f;
+}
+
+void	btSequentialImpulseConstraintSolver::reset()
+{
+	m_btSeed2 = 0;
+}
+
+btRigidBody& btSequentialImpulseConstraintSolver::getFixedBody()
+{
+	static btRigidBody s_fixed(0, 0,0);
+	s_fixed.setMassProps(btScalar(0.),btVector3(btScalar(0.),btScalar(0.),btScalar(0.)));
+	return s_fixed;
+}
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h b/src/bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h
new file mode 100644
index 00000000..bb377db8
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h
@@ -0,0 +1,130 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_H
+#define BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_H
+
+#include "btConstraintSolver.h"
+class btIDebugDraw;
+#include "btContactConstraint.h"
+#include "btSolverBody.h"
+#include "btSolverConstraint.h"
+#include "btTypedConstraint.h"
+#include "BulletCollision/NarrowPhaseCollision/btManifoldPoint.h"
+
+///The btSequentialImpulseConstraintSolver is a fast SIMD implementation of the Projected Gauss Seidel (iterative LCP) method.
+class btSequentialImpulseConstraintSolver : public btConstraintSolver
+{
+protected:
+
+	btConstraintArray			m_tmpSolverContactConstraintPool;
+	btConstraintArray			m_tmpSolverNonContactConstraintPool;
+	btConstraintArray			m_tmpSolverContactFrictionConstraintPool;
+	btAlignedObjectArray<int>	m_orderTmpConstraintPool;
+	btAlignedObjectArray<int>	m_orderNonContactConstraintPool;
+	btAlignedObjectArray<int>	m_orderFrictionConstraintPool;
+	btAlignedObjectArray<btTypedConstraint::btConstraintInfo1> m_tmpConstraintSizesPool;
+	int							m_maxOverrideNumSolverIterations;
+
+	void setupFrictionConstraint(	btSolverConstraint& solverConstraint, const btVector3& normalAxis,btRigidBody* solverBodyA,btRigidBody* solverBodyIdB,
+									btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,
+									btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, 
+									btScalar desiredVelocity=0., btScalar cfmSlip=0.);
+
+	btSolverConstraint&	addFrictionConstraint(const btVector3& normalAxis,btRigidBody* solverBodyA,btRigidBody* solverBodyB,int frictionIndex,btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, btScalar desiredVelocity=0., btScalar cfmSlip=0.);
+	
+	void setupContactConstraint(btSolverConstraint& solverConstraint, btCollisionObject* colObj0, btCollisionObject* colObj1, btManifoldPoint& cp, 
+								const btContactSolverInfo& infoGlobal, btVector3& vel, btScalar& rel_vel, btScalar& relaxation, 
+								btVector3& rel_pos1, btVector3& rel_pos2);
+
+	void setFrictionConstraintImpulse( btSolverConstraint& solverConstraint, btRigidBody* rb0, btRigidBody* rb1, 
+										 btManifoldPoint& cp, const btContactSolverInfo& infoGlobal);
+
+	///m_btSeed2 is used for re-arranging the constraint rows. improves convergence/quality of friction
+	unsigned long	m_btSeed2;
+
+//	void	initSolverBody(btSolverBody* solverBody, btCollisionObject* collisionObject);
+	btScalar restitutionCurve(btScalar rel_vel, btScalar restitution);
+
+	void	convertContact(btPersistentManifold* manifold,const btContactSolverInfo& infoGlobal);
+
+
+	void	resolveSplitPenetrationSIMD(
+        btRigidBody& body1,
+        btRigidBody& body2,
+        const btSolverConstraint& contactConstraint);
+
+	void	resolveSplitPenetrationImpulseCacheFriendly(
+        btRigidBody& body1,
+        btRigidBody& body2,
+        const btSolverConstraint& contactConstraint);
+
+	//internal method
+	int	getOrInitSolverBody(btCollisionObject& body);
+
+	void	resolveSingleConstraintRowGeneric(btRigidBody& body1,btRigidBody& body2,const btSolverConstraint& contactConstraint);
+
+	void	resolveSingleConstraintRowGenericSIMD(btRigidBody& body1,btRigidBody& body2,const btSolverConstraint& contactConstraint);
+	
+	void	resolveSingleConstraintRowLowerLimit(btRigidBody& body1,btRigidBody& body2,const btSolverConstraint& contactConstraint);
+	
+	void	resolveSingleConstraintRowLowerLimitSIMD(btRigidBody& body1,btRigidBody& body2,const btSolverConstraint& contactConstraint);
+		
+protected:
+	static btRigidBody& getFixedBody();
+	
+	virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
+	virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
+	btScalar solveSingleIteration(int iteration, btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
+
+	virtual btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
+	virtual btScalar solveGroupCacheFriendlyIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
+
+
+public:
+
+	
+	btSequentialImpulseConstraintSolver();
+	virtual ~btSequentialImpulseConstraintSolver();
+
+	virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher);
+	
+
+	
+	///clear internal cached data and reset random seed
+	virtual	void	reset();
+	
+	unsigned long btRand2();
+
+	int btRandInt2 (int n);
+
+	void	setRandSeed(unsigned long seed)
+	{
+		m_btSeed2 = seed;
+	}
+	unsigned long	getRandSeed() const
+	{
+		return m_btSeed2;
+	}
+
+};
+
+#ifndef BT_PREFER_SIMD
+typedef btSequentialImpulseConstraintSolver btSequentialImpulseConstraintSolverPrefered;
+#endif
+
+
+#endif //BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_H
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btSliderConstraint.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btSliderConstraint.cpp
new file mode 100755
index 00000000..b69f46da
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btSliderConstraint.cpp
@@ -0,0 +1,857 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/*
+Added by Roman Ponomarev (rponom@gmail.com)
+April 04, 2008
+*/
+
+
+
+#include "btSliderConstraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btTransformUtil.h"
+#include <new>
+
+#define USE_OFFSET_FOR_CONSTANT_FRAME true
+
+void btSliderConstraint::initParams()
+{
+    m_lowerLinLimit = btScalar(1.0);
+    m_upperLinLimit = btScalar(-1.0);
+    m_lowerAngLimit = btScalar(0.);
+    m_upperAngLimit = btScalar(0.);
+	m_softnessDirLin = SLIDER_CONSTRAINT_DEF_SOFTNESS;
+	m_restitutionDirLin = SLIDER_CONSTRAINT_DEF_RESTITUTION;
+	m_dampingDirLin = btScalar(0.);
+	m_cfmDirLin = SLIDER_CONSTRAINT_DEF_CFM;
+	m_softnessDirAng = SLIDER_CONSTRAINT_DEF_SOFTNESS;
+	m_restitutionDirAng = SLIDER_CONSTRAINT_DEF_RESTITUTION;
+	m_dampingDirAng = btScalar(0.);
+	m_cfmDirAng = SLIDER_CONSTRAINT_DEF_CFM;
+	m_softnessOrthoLin = SLIDER_CONSTRAINT_DEF_SOFTNESS;
+	m_restitutionOrthoLin = SLIDER_CONSTRAINT_DEF_RESTITUTION;
+	m_dampingOrthoLin = SLIDER_CONSTRAINT_DEF_DAMPING;
+	m_cfmOrthoLin = SLIDER_CONSTRAINT_DEF_CFM;
+	m_softnessOrthoAng = SLIDER_CONSTRAINT_DEF_SOFTNESS;
+	m_restitutionOrthoAng = SLIDER_CONSTRAINT_DEF_RESTITUTION;
+	m_dampingOrthoAng = SLIDER_CONSTRAINT_DEF_DAMPING;
+	m_cfmOrthoAng = SLIDER_CONSTRAINT_DEF_CFM;
+	m_softnessLimLin = SLIDER_CONSTRAINT_DEF_SOFTNESS;
+	m_restitutionLimLin = SLIDER_CONSTRAINT_DEF_RESTITUTION;
+	m_dampingLimLin = SLIDER_CONSTRAINT_DEF_DAMPING;
+	m_cfmLimLin = SLIDER_CONSTRAINT_DEF_CFM;
+	m_softnessLimAng = SLIDER_CONSTRAINT_DEF_SOFTNESS;
+	m_restitutionLimAng = SLIDER_CONSTRAINT_DEF_RESTITUTION;
+	m_dampingLimAng = SLIDER_CONSTRAINT_DEF_DAMPING;
+	m_cfmLimAng = SLIDER_CONSTRAINT_DEF_CFM;
+
+	m_poweredLinMotor = false;
+    m_targetLinMotorVelocity = btScalar(0.);
+    m_maxLinMotorForce = btScalar(0.);
+	m_accumulatedLinMotorImpulse = btScalar(0.0);
+
+	m_poweredAngMotor = false;
+    m_targetAngMotorVelocity = btScalar(0.);
+    m_maxAngMotorForce = btScalar(0.);
+	m_accumulatedAngMotorImpulse = btScalar(0.0);
+
+	m_flags = 0;
+	m_flags = 0;
+
+	m_useOffsetForConstraintFrame = USE_OFFSET_FOR_CONSTANT_FRAME;
+
+	calculateTransforms(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+}
+
+
+
+
+
+btSliderConstraint::btSliderConstraint(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB, bool useLinearReferenceFrameA)
+        : btTypedConstraint(SLIDER_CONSTRAINT_TYPE, rbA, rbB),
+		m_useSolveConstraintObsolete(false),
+		m_frameInA(frameInA),
+        m_frameInB(frameInB),
+		m_useLinearReferenceFrameA(useLinearReferenceFrameA)
+{
+	initParams();
+}
+
+
+
+btSliderConstraint::btSliderConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameA)
+        : btTypedConstraint(SLIDER_CONSTRAINT_TYPE, getFixedBody(), rbB),
+		m_useSolveConstraintObsolete(false),
+		m_frameInB(frameInB),
+		m_useLinearReferenceFrameA(useLinearReferenceFrameA)
+{
+	///not providing rigidbody A means implicitly using worldspace for body A
+	m_frameInA = rbB.getCenterOfMassTransform() * m_frameInB;
+//	m_frameInA.getOrigin() = m_rbA.getCenterOfMassTransform()(m_frameInA.getOrigin());
+
+	initParams();
+}
+
+
+
+
+
+
+void btSliderConstraint::getInfo1(btConstraintInfo1* info)
+{
+	if (m_useSolveConstraintObsolete)
+	{
+		info->m_numConstraintRows = 0;
+		info->nub = 0;
+	}
+	else
+	{
+		info->m_numConstraintRows = 4; // Fixed 2 linear + 2 angular
+		info->nub = 2; 
+		//prepare constraint
+		calculateTransforms(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+		testAngLimits();
+		testLinLimits();
+		if(getSolveLinLimit() || getPoweredLinMotor())
+		{
+			info->m_numConstraintRows++; // limit 3rd linear as well
+			info->nub--; 
+		}
+		if(getSolveAngLimit() || getPoweredAngMotor())
+		{
+			info->m_numConstraintRows++; // limit 3rd angular as well
+			info->nub--; 
+		}
+	}
+}
+
+void btSliderConstraint::getInfo1NonVirtual(btConstraintInfo1* info)
+{
+
+	info->m_numConstraintRows = 6; // Fixed 2 linear + 2 angular + 1 limit (even if not used)
+	info->nub = 0; 
+}
+
+void btSliderConstraint::getInfo2(btConstraintInfo2* info)
+{
+	getInfo2NonVirtual(info,m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform(), m_rbA.getLinearVelocity(),m_rbB.getLinearVelocity(), m_rbA.getInvMass(),m_rbB.getInvMass());
+}
+
+
+
+
+
+
+
+void btSliderConstraint::calculateTransforms(const btTransform& transA,const btTransform& transB)
+{
+	if(m_useLinearReferenceFrameA || (!m_useSolveConstraintObsolete))
+	{
+		m_calculatedTransformA = transA * m_frameInA;
+		m_calculatedTransformB = transB * m_frameInB;
+	}
+	else
+	{
+		m_calculatedTransformA = transB * m_frameInB;
+		m_calculatedTransformB = transA * m_frameInA;
+	}
+	m_realPivotAInW = m_calculatedTransformA.getOrigin();
+	m_realPivotBInW = m_calculatedTransformB.getOrigin();
+	m_sliderAxis = m_calculatedTransformA.getBasis().getColumn(0); // along X
+	if(m_useLinearReferenceFrameA || m_useSolveConstraintObsolete)
+	{
+		m_delta = m_realPivotBInW - m_realPivotAInW;
+	}
+	else
+	{
+		m_delta = m_realPivotAInW - m_realPivotBInW;
+	}
+	m_projPivotInW = m_realPivotAInW + m_sliderAxis.dot(m_delta) * m_sliderAxis;
+    btVector3 normalWorld;
+    int i;
+    //linear part
+    for(i = 0; i < 3; i++)
+    {
+		normalWorld = m_calculatedTransformA.getBasis().getColumn(i);
+		m_depth[i] = m_delta.dot(normalWorld);
+    }
+}
+ 
+
+
+void btSliderConstraint::testLinLimits(void)
+{
+	m_solveLinLim = false;
+	m_linPos = m_depth[0];
+	if(m_lowerLinLimit <= m_upperLinLimit)
+	{
+		if(m_depth[0] > m_upperLinLimit)
+		{
+			m_depth[0] -= m_upperLinLimit;
+			m_solveLinLim = true;
+		}
+		else if(m_depth[0] < m_lowerLinLimit)
+		{
+			m_depth[0] -= m_lowerLinLimit;
+			m_solveLinLim = true;
+		}
+		else
+		{
+			m_depth[0] = btScalar(0.);
+		}
+	}
+	else
+	{
+		m_depth[0] = btScalar(0.);
+	}
+}
+
+
+
+void btSliderConstraint::testAngLimits(void)
+{
+	m_angDepth = btScalar(0.);
+	m_solveAngLim = false;
+	if(m_lowerAngLimit <= m_upperAngLimit)
+	{
+		const btVector3 axisA0 = m_calculatedTransformA.getBasis().getColumn(1);
+		const btVector3 axisA1 = m_calculatedTransformA.getBasis().getColumn(2);
+		const btVector3 axisB0 = m_calculatedTransformB.getBasis().getColumn(1);
+//		btScalar rot = btAtan2Fast(axisB0.dot(axisA1), axisB0.dot(axisA0));  
+		btScalar rot = btAtan2(axisB0.dot(axisA1), axisB0.dot(axisA0));  
+		rot = btAdjustAngleToLimits(rot, m_lowerAngLimit, m_upperAngLimit);
+		m_angPos = rot;
+		if(rot < m_lowerAngLimit)
+		{
+			m_angDepth = rot - m_lowerAngLimit;
+			m_solveAngLim = true;
+		} 
+		else if(rot > m_upperAngLimit)
+		{
+			m_angDepth = rot - m_upperAngLimit;
+			m_solveAngLim = true;
+		}
+	}
+}
+
+btVector3 btSliderConstraint::getAncorInA(void)
+{
+	btVector3 ancorInA;
+	ancorInA = m_realPivotAInW + (m_lowerLinLimit + m_upperLinLimit) * btScalar(0.5) * m_sliderAxis;
+	ancorInA = m_rbA.getCenterOfMassTransform().inverse() * ancorInA;
+	return ancorInA;
+}
+
+
+
+btVector3 btSliderConstraint::getAncorInB(void)
+{
+	btVector3 ancorInB;
+	ancorInB = m_frameInB.getOrigin();
+	return ancorInB;
+}
+
+
+void btSliderConstraint::getInfo2NonVirtual(btConstraintInfo2* info, const btTransform& transA,const btTransform& transB, const btVector3& linVelA,const btVector3& linVelB, btScalar rbAinvMass,btScalar rbBinvMass  )
+{
+	const btTransform& trA = getCalculatedTransformA();
+	const btTransform& trB = getCalculatedTransformB();
+	
+	btAssert(!m_useSolveConstraintObsolete);
+	int i, s = info->rowskip;
+	
+	btScalar signFact = m_useLinearReferenceFrameA ? btScalar(1.0f) : btScalar(-1.0f);
+	
+	// difference between frames in WCS
+	btVector3 ofs = trB.getOrigin() - trA.getOrigin();
+	// now get weight factors depending on masses
+	btScalar miA = rbAinvMass;
+	btScalar miB = rbBinvMass;
+	bool hasStaticBody = (miA < SIMD_EPSILON) || (miB < SIMD_EPSILON);
+	btScalar miS = miA + miB;
+	btScalar factA, factB;
+	if(miS > btScalar(0.f))
+	{
+		factA = miB / miS;
+	}
+	else 
+	{
+		factA = btScalar(0.5f);
+	}
+	factB = btScalar(1.0f) - factA;
+	btVector3 ax1, p, q;
+	btVector3 ax1A = trA.getBasis().getColumn(0);
+	btVector3 ax1B = trB.getBasis().getColumn(0);
+	if(m_useOffsetForConstraintFrame)
+	{
+		// get the desired direction of slider axis
+		// as weighted sum of X-orthos of frameA and frameB in WCS
+		ax1 = ax1A * factA + ax1B * factB;
+		ax1.normalize();
+		// construct two orthos to slider axis
+		btPlaneSpace1 (ax1, p, q);
+	}
+	else
+	{ // old way - use frameA
+		ax1 = trA.getBasis().getColumn(0);
+		// get 2 orthos to slider axis (Y, Z)
+		p = trA.getBasis().getColumn(1);
+		q = trA.getBasis().getColumn(2);
+	}
+	// make rotations around these orthos equal
+	// the slider axis should be the only unconstrained
+	// rotational axis, the angular velocity of the two bodies perpendicular to
+	// the slider axis should be equal. thus the constraint equations are
+	//    p*w1 - p*w2 = 0
+	//    q*w1 - q*w2 = 0
+	// where p and q are unit vectors normal to the slider axis, and w1 and w2
+	// are the angular velocity vectors of the two bodies.
+	info->m_J1angularAxis[0] = p[0];
+	info->m_J1angularAxis[1] = p[1];
+	info->m_J1angularAxis[2] = p[2];
+	info->m_J1angularAxis[s+0] = q[0];
+	info->m_J1angularAxis[s+1] = q[1];
+	info->m_J1angularAxis[s+2] = q[2];
+
+	info->m_J2angularAxis[0] = -p[0];
+	info->m_J2angularAxis[1] = -p[1];
+	info->m_J2angularAxis[2] = -p[2];
+	info->m_J2angularAxis[s+0] = -q[0];
+	info->m_J2angularAxis[s+1] = -q[1];
+	info->m_J2angularAxis[s+2] = -q[2];
+	// compute the right hand side of the constraint equation. set relative
+	// body velocities along p and q to bring the slider back into alignment.
+	// if ax1A,ax1B are the unit length slider axes as computed from bodyA and
+	// bodyB, we need to rotate both bodies along the axis u = (ax1 x ax2).
+	// if "theta" is the angle between ax1 and ax2, we need an angular velocity
+	// along u to cover angle erp*theta in one step :
+	//   |angular_velocity| = angle/time = erp*theta / stepsize
+	//                      = (erp*fps) * theta
+	//    angular_velocity  = |angular_velocity| * (ax1 x ax2) / |ax1 x ax2|
+	//                      = (erp*fps) * theta * (ax1 x ax2) / sin(theta)
+	// ...as ax1 and ax2 are unit length. if theta is smallish,
+	// theta ~= sin(theta), so
+	//    angular_velocity  = (erp*fps) * (ax1 x ax2)
+	// ax1 x ax2 is in the plane space of ax1, so we project the angular
+	// velocity to p and q to find the right hand side.
+//	btScalar k = info->fps * info->erp * getSoftnessOrthoAng();
+	btScalar currERP = (m_flags & BT_SLIDER_FLAGS_ERP_ORTANG) ? m_softnessOrthoAng : m_softnessOrthoAng * info->erp;
+	btScalar k = info->fps * currERP;
+
+	btVector3 u = ax1A.cross(ax1B);
+	info->m_constraintError[0] = k * u.dot(p);
+	info->m_constraintError[s] = k * u.dot(q);
+	if(m_flags & BT_SLIDER_FLAGS_CFM_ORTANG)
+	{
+		info->cfm[0] = m_cfmOrthoAng;
+		info->cfm[s] = m_cfmOrthoAng;
+	}
+
+	int nrow = 1; // last filled row
+	int srow;
+	btScalar limit_err;
+	int limit;
+	int powered;
+
+	// next two rows. 
+	// we want: velA + wA x relA == velB + wB x relB ... but this would
+	// result in three equations, so we project along two orthos to the slider axis
+
+	btTransform bodyA_trans = transA;
+	btTransform bodyB_trans = transB;
+	nrow++;
+	int s2 = nrow * s;
+	nrow++;
+	int s3 = nrow * s;
+	btVector3 tmpA(0,0,0), tmpB(0,0,0), relA(0,0,0), relB(0,0,0), c(0,0,0);
+	if(m_useOffsetForConstraintFrame)
+	{
+		// get vector from bodyB to frameB in WCS
+		relB = trB.getOrigin() - bodyB_trans.getOrigin();
+		// get its projection to slider axis
+		btVector3 projB = ax1 * relB.dot(ax1);
+		// get vector directed from bodyB to slider axis (and orthogonal to it)
+		btVector3 orthoB = relB - projB;
+		// same for bodyA
+		relA = trA.getOrigin() - bodyA_trans.getOrigin();
+		btVector3 projA = ax1 * relA.dot(ax1);
+		btVector3 orthoA = relA - projA;
+		// get desired offset between frames A and B along slider axis
+		btScalar sliderOffs = m_linPos - m_depth[0];
+		// desired vector from projection of center of bodyA to projection of center of bodyB to slider axis
+		btVector3 totalDist = projA + ax1 * sliderOffs - projB;
+		// get offset vectors relA and relB
+		relA = orthoA + totalDist * factA;
+		relB = orthoB - totalDist * factB;
+		// now choose average ortho to slider axis
+		p = orthoB * factA + orthoA * factB;
+		btScalar len2 = p.length2();
+		if(len2 > SIMD_EPSILON)
+		{
+			p /= btSqrt(len2);
+		}
+		else
+		{
+			p = trA.getBasis().getColumn(1);
+		}
+		// make one more ortho
+		q = ax1.cross(p);
+		// fill two rows
+		tmpA = relA.cross(p);
+		tmpB = relB.cross(p);
+		for (i=0; i<3; i++) info->m_J1angularAxis[s2+i] = tmpA[i];
+		for (i=0; i<3; i++) info->m_J2angularAxis[s2+i] = -tmpB[i];
+		tmpA = relA.cross(q);
+		tmpB = relB.cross(q);
+		if(hasStaticBody && getSolveAngLimit())
+		{ // to make constraint between static and dynamic objects more rigid
+			// remove wA (or wB) from equation if angular limit is hit
+			tmpB *= factB;
+			tmpA *= factA;
+		}
+		for (i=0; i<3; i++) info->m_J1angularAxis[s3+i] = tmpA[i];
+		for (i=0; i<3; i++) info->m_J2angularAxis[s3+i] = -tmpB[i];
+		for (i=0; i<3; i++) info->m_J1linearAxis[s2+i] = p[i];
+		for (i=0; i<3; i++) info->m_J1linearAxis[s3+i] = q[i];
+	}
+	else
+	{	// old way - maybe incorrect if bodies are not on the slider axis
+		// see discussion "Bug in slider constraint" http://bulletphysics.org/Bullet/phpBB3/viewtopic.php?f=9&t=4024&start=0
+		c = bodyB_trans.getOrigin() - bodyA_trans.getOrigin();
+		btVector3 tmp = c.cross(p);
+		for (i=0; i<3; i++) info->m_J1angularAxis[s2+i] = factA*tmp[i];
+		for (i=0; i<3; i++) info->m_J2angularAxis[s2+i] = factB*tmp[i];
+		tmp = c.cross(q);
+		for (i=0; i<3; i++) info->m_J1angularAxis[s3+i] = factA*tmp[i];
+		for (i=0; i<3; i++) info->m_J2angularAxis[s3+i] = factB*tmp[i];
+
+		for (i=0; i<3; i++) info->m_J1linearAxis[s2+i] = p[i];
+		for (i=0; i<3; i++) info->m_J1linearAxis[s3+i] = q[i];
+	}
+	// compute two elements of right hand side
+
+	//	k = info->fps * info->erp * getSoftnessOrthoLin();
+	currERP = (m_flags & BT_SLIDER_FLAGS_ERP_ORTLIN) ? m_softnessOrthoLin : m_softnessOrthoLin * info->erp;
+	k = info->fps * currERP;
+
+	btScalar rhs = k * p.dot(ofs);
+	info->m_constraintError[s2] = rhs;
+	rhs = k * q.dot(ofs);
+	info->m_constraintError[s3] = rhs;
+	if(m_flags & BT_SLIDER_FLAGS_CFM_ORTLIN)
+	{
+		info->cfm[s2] = m_cfmOrthoLin;
+		info->cfm[s3] = m_cfmOrthoLin;
+	}
+
+
+	// check linear limits
+	limit_err = btScalar(0.0);
+	limit = 0;
+	if(getSolveLinLimit())
+	{
+		limit_err = getLinDepth() *  signFact;
+		limit = (limit_err > btScalar(0.0)) ? 2 : 1;
+	}
+	powered = 0;
+	if(getPoweredLinMotor())
+	{
+		powered = 1;
+	}
+	// if the slider has joint limits or motor, add in the extra row
+	if (limit || powered) 
+	{
+		nrow++;
+		srow = nrow * info->rowskip;
+		info->m_J1linearAxis[srow+0] = ax1[0];
+		info->m_J1linearAxis[srow+1] = ax1[1];
+		info->m_J1linearAxis[srow+2] = ax1[2];
+		// linear torque decoupling step:
+		//
+		// we have to be careful that the linear constraint forces (+/- ax1) applied to the two bodies
+		// do not create a torque couple. in other words, the points that the
+		// constraint force is applied at must lie along the same ax1 axis.
+		// a torque couple will result in limited slider-jointed free
+		// bodies from gaining angular momentum.
+		if(m_useOffsetForConstraintFrame)
+		{
+			// this is needed only when bodyA and bodyB are both dynamic.
+			if(!hasStaticBody)
+			{
+				tmpA = relA.cross(ax1);
+				tmpB = relB.cross(ax1);
+				info->m_J1angularAxis[srow+0] = tmpA[0];
+				info->m_J1angularAxis[srow+1] = tmpA[1];
+				info->m_J1angularAxis[srow+2] = tmpA[2];
+				info->m_J2angularAxis[srow+0] = -tmpB[0];
+				info->m_J2angularAxis[srow+1] = -tmpB[1];
+				info->m_J2angularAxis[srow+2] = -tmpB[2];
+			}
+		}
+		else
+		{ // The old way. May be incorrect if bodies are not on the slider axis
+			btVector3 ltd;	// Linear Torque Decoupling vector (a torque)
+			ltd = c.cross(ax1);
+			info->m_J1angularAxis[srow+0] = factA*ltd[0];
+			info->m_J1angularAxis[srow+1] = factA*ltd[1];
+			info->m_J1angularAxis[srow+2] = factA*ltd[2];
+			info->m_J2angularAxis[srow+0] = factB*ltd[0];
+			info->m_J2angularAxis[srow+1] = factB*ltd[1];
+			info->m_J2angularAxis[srow+2] = factB*ltd[2];
+		}
+		// right-hand part
+		btScalar lostop = getLowerLinLimit();
+		btScalar histop = getUpperLinLimit();
+		if(limit && (lostop == histop))
+		{  // the joint motor is ineffective
+			powered = 0;
+		}
+		info->m_constraintError[srow] = 0.;
+		info->m_lowerLimit[srow] = 0.;
+		info->m_upperLimit[srow] = 0.;
+		currERP = (m_flags & BT_SLIDER_FLAGS_ERP_LIMLIN) ? m_softnessLimLin : info->erp;
+		if(powered)
+		{
+			if(m_flags & BT_SLIDER_FLAGS_CFM_DIRLIN)
+			{
+				info->cfm[srow] = m_cfmDirLin;
+			}
+			btScalar tag_vel = getTargetLinMotorVelocity();
+			btScalar mot_fact = getMotorFactor(m_linPos, m_lowerLinLimit, m_upperLinLimit, tag_vel, info->fps * currERP);
+			info->m_constraintError[srow] -= signFact * mot_fact * getTargetLinMotorVelocity();
+			info->m_lowerLimit[srow] += -getMaxLinMotorForce() * info->fps;
+			info->m_upperLimit[srow] += getMaxLinMotorForce() * info->fps;
+		}
+		if(limit)
+		{
+			k = info->fps * currERP;
+			info->m_constraintError[srow] += k * limit_err;
+			if(m_flags & BT_SLIDER_FLAGS_CFM_LIMLIN)
+			{
+				info->cfm[srow] = m_cfmLimLin;
+			}
+			if(lostop == histop) 
+			{	// limited low and high simultaneously
+				info->m_lowerLimit[srow] = -SIMD_INFINITY;
+				info->m_upperLimit[srow] = SIMD_INFINITY;
+			}
+			else if(limit == 1) 
+			{ // low limit
+				info->m_lowerLimit[srow] = -SIMD_INFINITY;
+				info->m_upperLimit[srow] = 0;
+			}
+			else 
+			{ // high limit
+				info->m_lowerLimit[srow] = 0;
+				info->m_upperLimit[srow] = SIMD_INFINITY;
+			}
+			// bounce (we'll use slider parameter abs(1.0 - m_dampingLimLin) for that)
+			btScalar bounce = btFabs(btScalar(1.0) - getDampingLimLin());
+			if(bounce > btScalar(0.0))
+			{
+				btScalar vel = linVelA.dot(ax1);
+				vel -= linVelB.dot(ax1);
+				vel *= signFact;
+				// only apply bounce if the velocity is incoming, and if the
+				// resulting c[] exceeds what we already have.
+				if(limit == 1)
+				{	// low limit
+					if(vel < 0)
+					{
+						btScalar newc = -bounce * vel;
+						if (newc > info->m_constraintError[srow])
+						{
+							info->m_constraintError[srow] = newc;
+						}
+					}
+				}
+				else
+				{ // high limit - all those computations are reversed
+					if(vel > 0)
+					{
+						btScalar newc = -bounce * vel;
+						if(newc < info->m_constraintError[srow]) 
+						{
+							info->m_constraintError[srow] = newc;
+						}
+					}
+				}
+			}
+			info->m_constraintError[srow] *= getSoftnessLimLin();
+		} // if(limit)
+	} // if linear limit
+	// check angular limits
+	limit_err = btScalar(0.0);
+	limit = 0;
+	if(getSolveAngLimit())
+	{
+		limit_err = getAngDepth();
+		limit = (limit_err > btScalar(0.0)) ? 1 : 2;
+	}
+	// if the slider has joint limits, add in the extra row
+	powered = 0;
+	if(getPoweredAngMotor())
+	{
+		powered = 1;
+	}
+	if(limit || powered) 
+	{
+		nrow++;
+		srow = nrow * info->rowskip;
+		info->m_J1angularAxis[srow+0] = ax1[0];
+		info->m_J1angularAxis[srow+1] = ax1[1];
+		info->m_J1angularAxis[srow+2] = ax1[2];
+
+		info->m_J2angularAxis[srow+0] = -ax1[0];
+		info->m_J2angularAxis[srow+1] = -ax1[1];
+		info->m_J2angularAxis[srow+2] = -ax1[2];
+
+		btScalar lostop = getLowerAngLimit();
+		btScalar histop = getUpperAngLimit();
+		if(limit && (lostop == histop))
+		{  // the joint motor is ineffective
+			powered = 0;
+		}
+		currERP = (m_flags & BT_SLIDER_FLAGS_ERP_LIMANG) ? m_softnessLimAng : info->erp;
+		if(powered)
+		{
+			if(m_flags & BT_SLIDER_FLAGS_CFM_DIRANG)
+			{
+				info->cfm[srow] = m_cfmDirAng;
+			}
+			btScalar mot_fact = getMotorFactor(m_angPos, m_lowerAngLimit, m_upperAngLimit, getTargetAngMotorVelocity(), info->fps * currERP);
+			info->m_constraintError[srow] = mot_fact * getTargetAngMotorVelocity();
+			info->m_lowerLimit[srow] = -getMaxAngMotorForce() * info->fps;
+			info->m_upperLimit[srow] = getMaxAngMotorForce() * info->fps;
+		}
+		if(limit)
+		{
+			k = info->fps * currERP;
+			info->m_constraintError[srow] += k * limit_err;
+			if(m_flags & BT_SLIDER_FLAGS_CFM_LIMANG)
+			{
+				info->cfm[srow] = m_cfmLimAng;
+			}
+			if(lostop == histop) 
+			{
+				// limited low and high simultaneously
+				info->m_lowerLimit[srow] = -SIMD_INFINITY;
+				info->m_upperLimit[srow] = SIMD_INFINITY;
+			}
+			else if(limit == 1) 
+			{ // low limit
+				info->m_lowerLimit[srow] = 0;
+				info->m_upperLimit[srow] = SIMD_INFINITY;
+			}
+			else 
+			{ // high limit
+				info->m_lowerLimit[srow] = -SIMD_INFINITY;
+				info->m_upperLimit[srow] = 0;
+			}
+			// bounce (we'll use slider parameter abs(1.0 - m_dampingLimAng) for that)
+			btScalar bounce = btFabs(btScalar(1.0) - getDampingLimAng());
+			if(bounce > btScalar(0.0))
+			{
+				btScalar vel = m_rbA.getAngularVelocity().dot(ax1);
+				vel -= m_rbB.getAngularVelocity().dot(ax1);
+				// only apply bounce if the velocity is incoming, and if the
+				// resulting c[] exceeds what we already have.
+				if(limit == 1)
+				{	// low limit
+					if(vel < 0)
+					{
+						btScalar newc = -bounce * vel;
+						if(newc > info->m_constraintError[srow])
+						{
+							info->m_constraintError[srow] = newc;
+						}
+					}
+				}
+				else
+				{	// high limit - all those computations are reversed
+					if(vel > 0)
+					{
+						btScalar newc = -bounce * vel;
+						if(newc < info->m_constraintError[srow])
+						{
+							info->m_constraintError[srow] = newc;
+						}
+					}
+				}
+			}
+			info->m_constraintError[srow] *= getSoftnessLimAng();
+		} // if(limit)
+	} // if angular limit or powered
+}
+
+
+///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+///If no axis is provided, it uses the default axis for this constraint.
+void btSliderConstraint::setParam(int num, btScalar value, int axis)
+{
+	switch(num)
+	{
+	case BT_CONSTRAINT_STOP_ERP :
+		if(axis < 1)
+		{
+			m_softnessLimLin = value;
+			m_flags |= BT_SLIDER_FLAGS_ERP_LIMLIN;
+		}
+		else if(axis < 3)
+		{
+			m_softnessOrthoLin = value;
+			m_flags |= BT_SLIDER_FLAGS_ERP_ORTLIN;
+		}
+		else if(axis == 3)
+		{
+			m_softnessLimAng = value;
+			m_flags |= BT_SLIDER_FLAGS_ERP_LIMANG;
+		}
+		else if(axis < 6)
+		{
+			m_softnessOrthoAng = value;
+			m_flags |= BT_SLIDER_FLAGS_ERP_ORTANG;
+		}
+		else
+		{
+			btAssertConstrParams(0);
+		}
+		break;
+	case BT_CONSTRAINT_CFM :
+		if(axis < 1)
+		{
+			m_cfmDirLin = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_DIRLIN;
+		}
+		else if(axis == 3)
+		{
+			m_cfmDirAng = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_DIRANG;
+		}
+		else
+		{
+			btAssertConstrParams(0);
+		}
+		break;
+	case BT_CONSTRAINT_STOP_CFM :
+		if(axis < 1)
+		{
+			m_cfmLimLin = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_LIMLIN;
+		}
+		else if(axis < 3)
+		{
+			m_cfmOrthoLin = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_ORTLIN;
+		}
+		else if(axis == 3)
+		{
+			m_cfmLimAng = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_LIMANG;
+		}
+		else if(axis < 6)
+		{
+			m_cfmOrthoAng = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_ORTANG;
+		}
+		else
+		{
+			btAssertConstrParams(0);
+		}
+		break;
+	}
+}
+
+///return the local value of parameter
+btScalar btSliderConstraint::getParam(int num, int axis) const 
+{
+	btScalar retVal(SIMD_INFINITY);
+	switch(num)
+	{
+	case BT_CONSTRAINT_STOP_ERP :
+		if(axis < 1)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_ERP_LIMLIN);
+			retVal = m_softnessLimLin;
+		}
+		else if(axis < 3)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_ERP_ORTLIN);
+			retVal = m_softnessOrthoLin;
+		}
+		else if(axis == 3)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_ERP_LIMANG);
+			retVal = m_softnessLimAng;
+		}
+		else if(axis < 6)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_ERP_ORTANG);
+			retVal = m_softnessOrthoAng;
+		}
+		else
+		{
+			btAssertConstrParams(0);
+		}
+		break;
+	case BT_CONSTRAINT_CFM :
+		if(axis < 1)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_DIRLIN);
+			retVal = m_cfmDirLin;
+		}
+		else if(axis == 3)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_DIRANG);
+			retVal = m_cfmDirAng;
+		}
+		else
+		{
+			btAssertConstrParams(0);
+		}
+		break;
+	case BT_CONSTRAINT_STOP_CFM :
+		if(axis < 1)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_LIMLIN);
+			retVal = m_cfmLimLin;
+		}
+		else if(axis < 3)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_ORTLIN);
+			retVal = m_cfmOrthoLin;
+		}
+		else if(axis == 3)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_LIMANG);
+			retVal = m_cfmLimAng;
+		}
+		else if(axis < 6)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_ORTANG);
+			retVal = m_cfmOrthoAng;
+		}
+		else
+		{
+			btAssertConstrParams(0);
+		}
+		break;
+	}
+	return retVal;
+}
+
+
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btSliderConstraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btSliderConstraint.h
new file mode 100755
index 00000000..2edc8d2b
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btSliderConstraint.h
@@ -0,0 +1,333 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/*
+Added by Roman Ponomarev (rponom@gmail.com)
+April 04, 2008
+
+TODO:
+ - add clamping od accumulated impulse to improve stability
+ - add conversion for ODE constraint solver
+*/
+
+#ifndef BT_SLIDER_CONSTRAINT_H
+#define BT_SLIDER_CONSTRAINT_H
+
+
+
+#include "LinearMath/btVector3.h"
+#include "btJacobianEntry.h"
+#include "btTypedConstraint.h"
+
+
+
+class btRigidBody;
+
+
+
+#define SLIDER_CONSTRAINT_DEF_SOFTNESS		(btScalar(1.0))
+#define SLIDER_CONSTRAINT_DEF_DAMPING		(btScalar(1.0))
+#define SLIDER_CONSTRAINT_DEF_RESTITUTION	(btScalar(0.7))
+#define SLIDER_CONSTRAINT_DEF_CFM			(btScalar(0.f))
+
+
+enum btSliderFlags
+{
+	BT_SLIDER_FLAGS_CFM_DIRLIN = (1 << 0),
+	BT_SLIDER_FLAGS_ERP_DIRLIN = (1 << 1),
+	BT_SLIDER_FLAGS_CFM_DIRANG = (1 << 2),
+	BT_SLIDER_FLAGS_ERP_DIRANG = (1 << 3),
+	BT_SLIDER_FLAGS_CFM_ORTLIN = (1 << 4),
+	BT_SLIDER_FLAGS_ERP_ORTLIN = (1 << 5),
+	BT_SLIDER_FLAGS_CFM_ORTANG = (1 << 6),
+	BT_SLIDER_FLAGS_ERP_ORTANG = (1 << 7),
+	BT_SLIDER_FLAGS_CFM_LIMLIN = (1 << 8),
+	BT_SLIDER_FLAGS_ERP_LIMLIN = (1 << 9),
+	BT_SLIDER_FLAGS_CFM_LIMANG = (1 << 10),
+	BT_SLIDER_FLAGS_ERP_LIMANG = (1 << 11)
+};
+
+
+class btSliderConstraint : public btTypedConstraint
+{
+protected:
+	///for backwards compatibility during the transition to 'getInfo/getInfo2'
+	bool		m_useSolveConstraintObsolete;
+	bool		m_useOffsetForConstraintFrame;
+	btTransform	m_frameInA;
+    btTransform	m_frameInB;
+	// use frameA fo define limits, if true
+	bool m_useLinearReferenceFrameA;
+	// linear limits
+	btScalar m_lowerLinLimit;
+	btScalar m_upperLinLimit;
+	// angular limits
+	btScalar m_lowerAngLimit;
+	btScalar m_upperAngLimit;
+	// softness, restitution and damping for different cases
+	// DirLin - moving inside linear limits
+	// LimLin - hitting linear limit
+	// DirAng - moving inside angular limits
+	// LimAng - hitting angular limit
+	// OrthoLin, OrthoAng - against constraint axis
+	btScalar m_softnessDirLin;
+	btScalar m_restitutionDirLin;
+	btScalar m_dampingDirLin;
+	btScalar m_cfmDirLin;
+
+	btScalar m_softnessDirAng;
+	btScalar m_restitutionDirAng;
+	btScalar m_dampingDirAng;
+	btScalar m_cfmDirAng;
+
+	btScalar m_softnessLimLin;
+	btScalar m_restitutionLimLin;
+	btScalar m_dampingLimLin;
+	btScalar m_cfmLimLin;
+
+	btScalar m_softnessLimAng;
+	btScalar m_restitutionLimAng;
+	btScalar m_dampingLimAng;
+	btScalar m_cfmLimAng;
+
+	btScalar m_softnessOrthoLin;
+	btScalar m_restitutionOrthoLin;
+	btScalar m_dampingOrthoLin;
+	btScalar m_cfmOrthoLin;
+
+	btScalar m_softnessOrthoAng;
+	btScalar m_restitutionOrthoAng;
+	btScalar m_dampingOrthoAng;
+	btScalar m_cfmOrthoAng;
+	
+	// for interlal use
+	bool m_solveLinLim;
+	bool m_solveAngLim;
+
+	int m_flags;
+
+	btJacobianEntry	m_jacLin[3];
+	btScalar		m_jacLinDiagABInv[3];
+
+    btJacobianEntry	m_jacAng[3];
+
+	btScalar m_timeStep;
+    btTransform m_calculatedTransformA;
+    btTransform m_calculatedTransformB;
+
+	btVector3 m_sliderAxis;
+	btVector3 m_realPivotAInW;
+	btVector3 m_realPivotBInW;
+	btVector3 m_projPivotInW;
+	btVector3 m_delta;
+	btVector3 m_depth;
+	btVector3 m_relPosA;
+	btVector3 m_relPosB;
+
+	btScalar m_linPos;
+	btScalar m_angPos;
+
+	btScalar m_angDepth;
+	btScalar m_kAngle;
+
+	bool	 m_poweredLinMotor;
+    btScalar m_targetLinMotorVelocity;
+    btScalar m_maxLinMotorForce;
+    btScalar m_accumulatedLinMotorImpulse;
+	
+	bool	 m_poweredAngMotor;
+    btScalar m_targetAngMotorVelocity;
+    btScalar m_maxAngMotorForce;
+    btScalar m_accumulatedAngMotorImpulse;
+
+	//------------------------    
+	void initParams();
+public:
+	// constructors
+    btSliderConstraint(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB ,bool useLinearReferenceFrameA);
+    btSliderConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameA);
+
+	// overrides
+
+    virtual void getInfo1 (btConstraintInfo1* info);
+
+	void getInfo1NonVirtual(btConstraintInfo1* info);
+	
+	virtual void getInfo2 (btConstraintInfo2* info);
+
+	void getInfo2NonVirtual(btConstraintInfo2* info, const btTransform& transA, const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB, btScalar rbAinvMass,btScalar rbBinvMass);
+
+
+	// access
+    const btRigidBody& getRigidBodyA() const { return m_rbA; }
+    const btRigidBody& getRigidBodyB() const { return m_rbB; }
+    const btTransform & getCalculatedTransformA() const { return m_calculatedTransformA; }
+    const btTransform & getCalculatedTransformB() const { return m_calculatedTransformB; }
+    const btTransform & getFrameOffsetA() const { return m_frameInA; }
+    const btTransform & getFrameOffsetB() const { return m_frameInB; }
+    btTransform & getFrameOffsetA() { return m_frameInA; }
+    btTransform & getFrameOffsetB() { return m_frameInB; }
+    btScalar getLowerLinLimit() { return m_lowerLinLimit; }
+    void setLowerLinLimit(btScalar lowerLimit) { m_lowerLinLimit = lowerLimit; }
+    btScalar getUpperLinLimit() { return m_upperLinLimit; }
+    void setUpperLinLimit(btScalar upperLimit) { m_upperLinLimit = upperLimit; }
+    btScalar getLowerAngLimit() { return m_lowerAngLimit; }
+    void setLowerAngLimit(btScalar lowerLimit) { m_lowerAngLimit = btNormalizeAngle(lowerLimit); }
+    btScalar getUpperAngLimit() { return m_upperAngLimit; }
+    void setUpperAngLimit(btScalar upperLimit) { m_upperAngLimit = btNormalizeAngle(upperLimit); }
+	bool getUseLinearReferenceFrameA() { return m_useLinearReferenceFrameA; }
+	btScalar getSoftnessDirLin() { return m_softnessDirLin; }
+	btScalar getRestitutionDirLin() { return m_restitutionDirLin; }
+	btScalar getDampingDirLin() { return m_dampingDirLin ; }
+	btScalar getSoftnessDirAng() { return m_softnessDirAng; }
+	btScalar getRestitutionDirAng() { return m_restitutionDirAng; }
+	btScalar getDampingDirAng() { return m_dampingDirAng; }
+	btScalar getSoftnessLimLin() { return m_softnessLimLin; }
+	btScalar getRestitutionLimLin() { return m_restitutionLimLin; }
+	btScalar getDampingLimLin() { return m_dampingLimLin; }
+	btScalar getSoftnessLimAng() { return m_softnessLimAng; }
+	btScalar getRestitutionLimAng() { return m_restitutionLimAng; }
+	btScalar getDampingLimAng() { return m_dampingLimAng; }
+	btScalar getSoftnessOrthoLin() { return m_softnessOrthoLin; }
+	btScalar getRestitutionOrthoLin() { return m_restitutionOrthoLin; }
+	btScalar getDampingOrthoLin() { return m_dampingOrthoLin; }
+	btScalar getSoftnessOrthoAng() { return m_softnessOrthoAng; }
+	btScalar getRestitutionOrthoAng() { return m_restitutionOrthoAng; }
+	btScalar getDampingOrthoAng() { return m_dampingOrthoAng; }
+	void setSoftnessDirLin(btScalar softnessDirLin) { m_softnessDirLin = softnessDirLin; }
+	void setRestitutionDirLin(btScalar restitutionDirLin) { m_restitutionDirLin = restitutionDirLin; }
+	void setDampingDirLin(btScalar dampingDirLin) { m_dampingDirLin = dampingDirLin; }
+	void setSoftnessDirAng(btScalar softnessDirAng) { m_softnessDirAng = softnessDirAng; }
+	void setRestitutionDirAng(btScalar restitutionDirAng) { m_restitutionDirAng = restitutionDirAng; }
+	void setDampingDirAng(btScalar dampingDirAng) { m_dampingDirAng = dampingDirAng; }
+	void setSoftnessLimLin(btScalar softnessLimLin) { m_softnessLimLin = softnessLimLin; }
+	void setRestitutionLimLin(btScalar restitutionLimLin) { m_restitutionLimLin = restitutionLimLin; }
+	void setDampingLimLin(btScalar dampingLimLin) { m_dampingLimLin = dampingLimLin; }
+	void setSoftnessLimAng(btScalar softnessLimAng) { m_softnessLimAng = softnessLimAng; }
+	void setRestitutionLimAng(btScalar restitutionLimAng) { m_restitutionLimAng = restitutionLimAng; }
+	void setDampingLimAng(btScalar dampingLimAng) { m_dampingLimAng = dampingLimAng; }
+	void setSoftnessOrthoLin(btScalar softnessOrthoLin) { m_softnessOrthoLin = softnessOrthoLin; }
+	void setRestitutionOrthoLin(btScalar restitutionOrthoLin) { m_restitutionOrthoLin = restitutionOrthoLin; }
+	void setDampingOrthoLin(btScalar dampingOrthoLin) { m_dampingOrthoLin = dampingOrthoLin; }
+	void setSoftnessOrthoAng(btScalar softnessOrthoAng) { m_softnessOrthoAng = softnessOrthoAng; }
+	void setRestitutionOrthoAng(btScalar restitutionOrthoAng) { m_restitutionOrthoAng = restitutionOrthoAng; }
+	void setDampingOrthoAng(btScalar dampingOrthoAng) { m_dampingOrthoAng = dampingOrthoAng; }
+	void setPoweredLinMotor(bool onOff) { m_poweredLinMotor = onOff; }
+	bool getPoweredLinMotor() { return m_poweredLinMotor; }
+	void setTargetLinMotorVelocity(btScalar targetLinMotorVelocity) { m_targetLinMotorVelocity = targetLinMotorVelocity; }
+	btScalar getTargetLinMotorVelocity() { return m_targetLinMotorVelocity; }
+	void setMaxLinMotorForce(btScalar maxLinMotorForce) { m_maxLinMotorForce = maxLinMotorForce; }
+	btScalar getMaxLinMotorForce() { return m_maxLinMotorForce; }
+	void setPoweredAngMotor(bool onOff) { m_poweredAngMotor = onOff; }
+	bool getPoweredAngMotor() { return m_poweredAngMotor; }
+	void setTargetAngMotorVelocity(btScalar targetAngMotorVelocity) { m_targetAngMotorVelocity = targetAngMotorVelocity; }
+	btScalar getTargetAngMotorVelocity() { return m_targetAngMotorVelocity; }
+	void setMaxAngMotorForce(btScalar maxAngMotorForce) { m_maxAngMotorForce = maxAngMotorForce; }
+	btScalar getMaxAngMotorForce() { return m_maxAngMotorForce; }
+
+	btScalar getLinearPos() const { return m_linPos; }
+	btScalar getAngularPos() const { return m_angPos; }
+	
+	
+
+	// access for ODE solver
+	bool getSolveLinLimit() { return m_solveLinLim; }
+	btScalar getLinDepth() { return m_depth[0]; }
+	bool getSolveAngLimit() { return m_solveAngLim; }
+	btScalar getAngDepth() { return m_angDepth; }
+	// shared code used by ODE solver
+	void	calculateTransforms(const btTransform& transA,const btTransform& transB);
+	void	testLinLimits();
+	void	testAngLimits();
+	// access for PE Solver
+	btVector3 getAncorInA();
+	btVector3 getAncorInB();
+	// access for UseFrameOffset
+	bool getUseFrameOffset() { return m_useOffsetForConstraintFrame; }
+	void setUseFrameOffset(bool frameOffsetOnOff) { m_useOffsetForConstraintFrame = frameOffsetOnOff; }
+
+	void setFrames(const btTransform& frameA, const btTransform& frameB) 
+	{ 
+		m_frameInA=frameA; 
+		m_frameInB=frameB;
+		calculateTransforms(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+		buildJacobian();
+	} 
+
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void	setParam(int num, btScalar value, int axis = -1);
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const;
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btSliderConstraintData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btTransformFloatData m_rbAFrame; // constraint axii. Assumes z is hinge axis.
+	btTransformFloatData m_rbBFrame;
+	
+	float	m_linearUpperLimit;
+	float	m_linearLowerLimit;
+
+	float	m_angularUpperLimit;
+	float	m_angularLowerLimit;
+
+	int	m_useLinearReferenceFrameA;
+	int m_useOffsetForConstraintFrame;
+
+};
+
+
+SIMD_FORCE_INLINE		int	btSliderConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btSliderConstraintData);
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btSliderConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+
+	btSliderConstraintData* sliderData = (btSliderConstraintData*) dataBuffer;
+	btTypedConstraint::serialize(&sliderData->m_typeConstraintData,serializer);
+
+	m_frameInA.serializeFloat(sliderData->m_rbAFrame);
+	m_frameInB.serializeFloat(sliderData->m_rbBFrame);
+
+	sliderData->m_linearUpperLimit = float(m_upperLinLimit);
+	sliderData->m_linearLowerLimit = float(m_lowerLinLimit);
+
+	sliderData->m_angularUpperLimit = float(m_upperAngLimit);
+	sliderData->m_angularLowerLimit = float(m_lowerAngLimit);
+
+	sliderData->m_useLinearReferenceFrameA = m_useLinearReferenceFrameA;
+	sliderData->m_useOffsetForConstraintFrame = m_useOffsetForConstraintFrame;
+
+	return "btSliderConstraintData";
+}
+
+
+
+#endif //BT_SLIDER_CONSTRAINT_H
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.cpp
new file mode 100644
index 00000000..0c7dbd66
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.cpp
@@ -0,0 +1,255 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#include "btSolve2LinearConstraint.h"
+
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btVector3.h"
+#include "btJacobianEntry.h"
+
+
+void btSolve2LinearConstraint::resolveUnilateralPairConstraint(
+												   btRigidBody* body1,
+		btRigidBody* body2,
+
+						const btMatrix3x3& world2A,
+						const btMatrix3x3& world2B,
+						
+						const btVector3& invInertiaADiag,
+						const btScalar invMassA,
+						const btVector3& linvelA,const btVector3& angvelA,
+						const btVector3& rel_posA1,
+						const btVector3& invInertiaBDiag,
+						const btScalar invMassB,
+						const btVector3& linvelB,const btVector3& angvelB,
+						const btVector3& rel_posA2,
+
+					  btScalar depthA, const btVector3& normalA, 
+					  const btVector3& rel_posB1,const btVector3& rel_posB2,
+					  btScalar depthB, const btVector3& normalB, 
+					  btScalar& imp0,btScalar& imp1)
+{
+	(void)linvelA;
+	(void)linvelB;
+	(void)angvelB;
+	(void)angvelA;
+
+
+
+	imp0 = btScalar(0.);
+	imp1 = btScalar(0.);
+
+	btScalar len = btFabs(normalA.length()) - btScalar(1.);
+	if (btFabs(len) >= SIMD_EPSILON)
+		return;
+
+	btAssert(len < SIMD_EPSILON);
+
+
+	//this jacobian entry could be re-used for all iterations
+	btJacobianEntry jacA(world2A,world2B,rel_posA1,rel_posA2,normalA,invInertiaADiag,invMassA,
+		invInertiaBDiag,invMassB);
+	btJacobianEntry jacB(world2A,world2B,rel_posB1,rel_posB2,normalB,invInertiaADiag,invMassA,
+		invInertiaBDiag,invMassB);
+	
+	//const btScalar vel0 = jacA.getRelativeVelocity(linvelA,angvelA,linvelB,angvelB);
+	//const btScalar vel1 = jacB.getRelativeVelocity(linvelA,angvelA,linvelB,angvelB);
+
+	const btScalar vel0 = normalA.dot(body1->getVelocityInLocalPoint(rel_posA1)-body2->getVelocityInLocalPoint(rel_posA1));
+	const btScalar vel1 = normalB.dot(body1->getVelocityInLocalPoint(rel_posB1)-body2->getVelocityInLocalPoint(rel_posB1));
+
+//	btScalar penetrationImpulse = (depth*contactTau*timeCorrection)  * massTerm;//jacDiagABInv
+	btScalar massTerm = btScalar(1.) / (invMassA + invMassB);
+
+
+	// calculate rhs (or error) terms
+	const btScalar dv0 = depthA  * m_tau * massTerm - vel0 * m_damping;
+	const btScalar dv1 = depthB  * m_tau * massTerm - vel1 * m_damping;
+
+
+	// dC/dv * dv = -C
+	
+	// jacobian * impulse = -error
+	//
+
+	//impulse = jacobianInverse * -error
+
+	// inverting 2x2 symmetric system (offdiagonal are equal!)
+	// 
+
+
+	btScalar nonDiag = jacA.getNonDiagonal(jacB,invMassA,invMassB);
+	btScalar	invDet = btScalar(1.0) / (jacA.getDiagonal() * jacB.getDiagonal() - nonDiag * nonDiag );
+	
+	//imp0 = dv0 * jacA.getDiagonal() * invDet + dv1 * -nonDiag * invDet;
+	//imp1 = dv1 * jacB.getDiagonal() * invDet + dv0 * - nonDiag * invDet;
+
+	imp0 = dv0 * jacA.getDiagonal() * invDet + dv1 * -nonDiag * invDet;
+	imp1 = dv1 * jacB.getDiagonal() * invDet + dv0 * - nonDiag * invDet;
+
+	//[a b]								  [d -c]
+	//[c d] inverse = (1 / determinant) * [-b a] where determinant is (ad - bc)
+
+	//[jA nD] * [imp0] = [dv0]
+	//[nD jB]   [imp1]   [dv1]
+
+}
+
+
+
+void btSolve2LinearConstraint::resolveBilateralPairConstraint(
+						btRigidBody* body1,
+						btRigidBody* body2,
+						const btMatrix3x3& world2A,
+						const btMatrix3x3& world2B,
+						
+						const btVector3& invInertiaADiag,
+						const btScalar invMassA,
+						const btVector3& linvelA,const btVector3& angvelA,
+						const btVector3& rel_posA1,
+						const btVector3& invInertiaBDiag,
+						const btScalar invMassB,
+						const btVector3& linvelB,const btVector3& angvelB,
+						const btVector3& rel_posA2,
+
+					  btScalar depthA, const btVector3& normalA, 
+					  const btVector3& rel_posB1,const btVector3& rel_posB2,
+					  btScalar depthB, const btVector3& normalB, 
+					  btScalar& imp0,btScalar& imp1)
+{
+
+	(void)linvelA;
+	(void)linvelB;
+	(void)angvelA;
+	(void)angvelB;
+
+
+
+	imp0 = btScalar(0.);
+	imp1 = btScalar(0.);
+
+	btScalar len = btFabs(normalA.length()) - btScalar(1.);
+	if (btFabs(len) >= SIMD_EPSILON)
+		return;
+
+	btAssert(len < SIMD_EPSILON);
+
+
+	//this jacobian entry could be re-used for all iterations
+	btJacobianEntry jacA(world2A,world2B,rel_posA1,rel_posA2,normalA,invInertiaADiag,invMassA,
+		invInertiaBDiag,invMassB);
+	btJacobianEntry jacB(world2A,world2B,rel_posB1,rel_posB2,normalB,invInertiaADiag,invMassA,
+		invInertiaBDiag,invMassB);
+	
+	//const btScalar vel0 = jacA.getRelativeVelocity(linvelA,angvelA,linvelB,angvelB);
+	//const btScalar vel1 = jacB.getRelativeVelocity(linvelA,angvelA,linvelB,angvelB);
+
+	const btScalar vel0 = normalA.dot(body1->getVelocityInLocalPoint(rel_posA1)-body2->getVelocityInLocalPoint(rel_posA1));
+	const btScalar vel1 = normalB.dot(body1->getVelocityInLocalPoint(rel_posB1)-body2->getVelocityInLocalPoint(rel_posB1));
+
+	// calculate rhs (or error) terms
+	const btScalar dv0 = depthA  * m_tau - vel0 * m_damping;
+	const btScalar dv1 = depthB  * m_tau - vel1 * m_damping;
+
+	// dC/dv * dv = -C
+	
+	// jacobian * impulse = -error
+	//
+
+	//impulse = jacobianInverse * -error
+
+	// inverting 2x2 symmetric system (offdiagonal are equal!)
+	// 
+
+
+	btScalar nonDiag = jacA.getNonDiagonal(jacB,invMassA,invMassB);
+	btScalar	invDet = btScalar(1.0) / (jacA.getDiagonal() * jacB.getDiagonal() - nonDiag * nonDiag );
+	
+	//imp0 = dv0 * jacA.getDiagonal() * invDet + dv1 * -nonDiag * invDet;
+	//imp1 = dv1 * jacB.getDiagonal() * invDet + dv0 * - nonDiag * invDet;
+
+	imp0 = dv0 * jacA.getDiagonal() * invDet + dv1 * -nonDiag * invDet;
+	imp1 = dv1 * jacB.getDiagonal() * invDet + dv0 * - nonDiag * invDet;
+
+	//[a b]								  [d -c]
+	//[c d] inverse = (1 / determinant) * [-b a] where determinant is (ad - bc)
+
+	//[jA nD] * [imp0] = [dv0]
+	//[nD jB]   [imp1]   [dv1]
+
+	if ( imp0 > btScalar(0.0))
+	{
+		if ( imp1 > btScalar(0.0) )
+		{
+			//both positive
+		}
+		else
+		{
+			imp1 = btScalar(0.);
+
+			// now imp0>0 imp1<0
+			imp0 = dv0 / jacA.getDiagonal();
+			if ( imp0 > btScalar(0.0) )
+			{
+			} else
+			{
+				imp0 = btScalar(0.);
+			}
+		}
+	}
+	else
+	{
+		imp0 = btScalar(0.);
+
+		imp1 = dv1 / jacB.getDiagonal();
+		if ( imp1 <= btScalar(0.0) )
+		{
+			imp1 = btScalar(0.);
+			// now imp0>0 imp1<0
+			imp0 = dv0 / jacA.getDiagonal();
+			if ( imp0 > btScalar(0.0) )
+			{
+			} else
+			{
+				imp0 = btScalar(0.);
+			}
+		} else
+		{
+		}
+	}
+}
+
+
+/*
+void btSolve2LinearConstraint::resolveAngularConstraint(	const btMatrix3x3& invInertiaAWS,
+											const btScalar invMassA,
+											const btVector3& linvelA,const btVector3& angvelA,
+											const btVector3& rel_posA1,
+											const btMatrix3x3& invInertiaBWS,
+											const btScalar invMassB,
+											const btVector3& linvelB,const btVector3& angvelB,
+											const btVector3& rel_posA2,
+
+											btScalar depthA, const btVector3& normalA, 
+											const btVector3& rel_posB1,const btVector3& rel_posB2,
+											btScalar depthB, const btVector3& normalB, 
+											btScalar& imp0,btScalar& imp1)
+{
+
+}
+*/
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.h
new file mode 100644
index 00000000..e8bfabf8
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.h
@@ -0,0 +1,107 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOLVE_2LINEAR_CONSTRAINT_H
+#define BT_SOLVE_2LINEAR_CONSTRAINT_H
+
+#include "LinearMath/btMatrix3x3.h"
+#include "LinearMath/btVector3.h"
+
+
+class btRigidBody;
+
+
+
+/// constraint class used for lateral tyre friction.
+class	btSolve2LinearConstraint
+{
+	btScalar	m_tau;
+	btScalar	m_damping;
+
+public:
+
+	btSolve2LinearConstraint(btScalar tau,btScalar damping)
+	{
+		m_tau = tau;
+		m_damping = damping;
+	}
+	//
+	// solve unilateral constraint (equality, direct method)
+	//
+	void resolveUnilateralPairConstraint(		
+														   btRigidBody* body0,
+		btRigidBody* body1,
+
+		const btMatrix3x3& world2A,
+						const btMatrix3x3& world2B,
+						
+						const btVector3& invInertiaADiag,
+						const btScalar invMassA,
+						const btVector3& linvelA,const btVector3& angvelA,
+						const btVector3& rel_posA1,
+						const btVector3& invInertiaBDiag,
+						const btScalar invMassB,
+						const btVector3& linvelB,const btVector3& angvelB,
+						const btVector3& rel_posA2,
+
+					  btScalar depthA, const btVector3& normalA, 
+					  const btVector3& rel_posB1,const btVector3& rel_posB2,
+					  btScalar depthB, const btVector3& normalB, 
+					  btScalar& imp0,btScalar& imp1);
+
+
+	//
+	// solving 2x2 lcp problem (inequality, direct solution )
+	//
+	void resolveBilateralPairConstraint(
+			btRigidBody* body0,
+						btRigidBody* body1,
+		const btMatrix3x3& world2A,
+						const btMatrix3x3& world2B,
+						
+						const btVector3& invInertiaADiag,
+						const btScalar invMassA,
+						const btVector3& linvelA,const btVector3& angvelA,
+						const btVector3& rel_posA1,
+						const btVector3& invInertiaBDiag,
+						const btScalar invMassB,
+						const btVector3& linvelB,const btVector3& angvelB,
+						const btVector3& rel_posA2,
+
+					  btScalar depthA, const btVector3& normalA, 
+					  const btVector3& rel_posB1,const btVector3& rel_posB2,
+					  btScalar depthB, const btVector3& normalB, 
+					  btScalar& imp0,btScalar& imp1);
+
+/*
+	void resolveAngularConstraint(	const btMatrix3x3& invInertiaAWS,
+						const btScalar invMassA,
+						const btVector3& linvelA,const btVector3& angvelA,
+						const btVector3& rel_posA1,
+						const btMatrix3x3& invInertiaBWS,
+						const btScalar invMassB,
+						const btVector3& linvelB,const btVector3& angvelB,
+						const btVector3& rel_posA2,
+
+					  btScalar depthA, const btVector3& normalA, 
+					  const btVector3& rel_posB1,const btVector3& rel_posB2,
+					  btScalar depthB, const btVector3& normalB, 
+					  btScalar& imp0,btScalar& imp1);
+
+*/
+
+};
+
+#endif //BT_SOLVE_2LINEAR_CONSTRAINT_H
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btSolverBody.h b/src/bullet/BulletDynamics/ConstraintSolver/btSolverBody.h
new file mode 100644
index 00000000..8de51581
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btSolverBody.h
@@ -0,0 +1,191 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOLVER_BODY_H
+#define BT_SOLVER_BODY_H
+
+class	btRigidBody;
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btMatrix3x3.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btAlignedAllocator.h"
+#include "LinearMath/btTransformUtil.h"
+
+///Until we get other contributions, only use SIMD on Windows, when using Visual Studio 2008 or later, and not double precision
+#ifdef BT_USE_SSE
+#define USE_SIMD 1
+#endif //
+
+
+#ifdef USE_SIMD
+
+struct	btSimdScalar
+{
+	SIMD_FORCE_INLINE	btSimdScalar()
+	{
+
+	}
+
+	SIMD_FORCE_INLINE	btSimdScalar(float	fl)
+	:m_vec128 (_mm_set1_ps(fl))
+	{
+	}
+
+	SIMD_FORCE_INLINE	btSimdScalar(__m128 v128)
+		:m_vec128(v128)
+	{
+	}
+	union
+	{
+		__m128		m_vec128;
+		float		m_floats[4];
+		int			m_ints[4];
+		btScalar	m_unusedPadding;
+	};
+	SIMD_FORCE_INLINE	__m128	get128()
+	{
+		return m_vec128;
+	}
+
+	SIMD_FORCE_INLINE	const __m128	get128() const
+	{
+		return m_vec128;
+	}
+
+	SIMD_FORCE_INLINE	void	set128(__m128 v128)
+	{
+		m_vec128 = v128;
+	}
+
+	SIMD_FORCE_INLINE	operator       __m128()       
+	{ 
+		return m_vec128; 
+	}
+	SIMD_FORCE_INLINE	operator const __m128() const 
+	{ 
+		return m_vec128; 
+	}
+	
+	SIMD_FORCE_INLINE	operator float() const 
+	{ 
+		return m_floats[0]; 
+	}
+
+};
+
+///@brief Return the elementwise product of two btSimdScalar
+SIMD_FORCE_INLINE btSimdScalar 
+operator*(const btSimdScalar& v1, const btSimdScalar& v2) 
+{
+	return btSimdScalar(_mm_mul_ps(v1.get128(),v2.get128()));
+}
+
+///@brief Return the elementwise product of two btSimdScalar
+SIMD_FORCE_INLINE btSimdScalar 
+operator+(const btSimdScalar& v1, const btSimdScalar& v2) 
+{
+	return btSimdScalar(_mm_add_ps(v1.get128(),v2.get128()));
+}
+
+
+#else
+#define btSimdScalar btScalar
+#endif
+
+///The btSolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance.
+ATTRIBUTE_ALIGNED64 (struct)	btSolverBodyObsolete
+{
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	btVector3		m_deltaLinearVelocity;
+	btVector3		m_deltaAngularVelocity;
+	btVector3		m_angularFactor;
+	btVector3		m_invMass;
+	btRigidBody*	m_originalBody;
+	btVector3		m_pushVelocity;
+	btVector3		m_turnVelocity;
+
+	
+	SIMD_FORCE_INLINE void	getVelocityInLocalPointObsolete(const btVector3& rel_pos, btVector3& velocity ) const
+	{
+		if (m_originalBody)
+			velocity = m_originalBody->getLinearVelocity()+m_deltaLinearVelocity + (m_originalBody->getAngularVelocity()+m_deltaAngularVelocity).cross(rel_pos);
+		else
+			velocity.setValue(0,0,0);
+	}
+
+	SIMD_FORCE_INLINE void	getAngularVelocity(btVector3& angVel) const
+	{
+		if (m_originalBody)
+			angVel = m_originalBody->getAngularVelocity()+m_deltaAngularVelocity;
+		else
+			angVel.setValue(0,0,0);
+	}
+
+
+	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
+	SIMD_FORCE_INLINE void applyImpulse(const btVector3& linearComponent, const btVector3& angularComponent,const btScalar impulseMagnitude)
+	{
+		//if (m_invMass)
+		{
+			m_deltaLinearVelocity += linearComponent*impulseMagnitude;
+			m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
+		}
+	}
+
+	SIMD_FORCE_INLINE void internalApplyPushImpulse(const btVector3& linearComponent, const btVector3& angularComponent,btScalar impulseMagnitude)
+	{
+		if (m_originalBody)
+		{
+			m_pushVelocity += linearComponent*impulseMagnitude;
+			m_turnVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
+		}
+	}
+	
+	void	writebackVelocity()
+	{
+		if (m_originalBody)
+		{
+			m_originalBody->setLinearVelocity(m_originalBody->getLinearVelocity()+ m_deltaLinearVelocity);
+			m_originalBody->setAngularVelocity(m_originalBody->getAngularVelocity()+m_deltaAngularVelocity);
+			
+			//m_originalBody->setCompanionId(-1);
+		}
+	}
+
+
+	void	writebackVelocity(btScalar timeStep)
+	{
+        (void) timeStep;
+		if (m_originalBody)
+		{
+			m_originalBody->setLinearVelocity(m_originalBody->getLinearVelocity()+ m_deltaLinearVelocity);
+			m_originalBody->setAngularVelocity(m_originalBody->getAngularVelocity()+m_deltaAngularVelocity);
+			
+			//correct the position/orientation based on push/turn recovery
+			btTransform newTransform;
+			btTransformUtil::integrateTransform(m_originalBody->getWorldTransform(),m_pushVelocity,m_turnVelocity,timeStep,newTransform);
+			m_originalBody->setWorldTransform(newTransform);
+			
+			//m_originalBody->setCompanionId(-1);
+		}
+	}
+	
+
+
+};
+
+#endif //BT_SOLVER_BODY_H
+
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btSolverConstraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btSolverConstraint.h
new file mode 100644
index 00000000..179e79d7
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btSolverConstraint.h
@@ -0,0 +1,98 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOLVER_CONSTRAINT_H
+#define BT_SOLVER_CONSTRAINT_H
+
+class	btRigidBody;
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btMatrix3x3.h"
+#include "btJacobianEntry.h"
+
+//#define NO_FRICTION_TANGENTIALS 1
+#include "btSolverBody.h"
+
+
+///1D constraint along a normal axis between bodyA and bodyB. It can be combined to solve contact and friction constraints.
+ATTRIBUTE_ALIGNED64 (struct)	btSolverConstraint
+{
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	btVector3		m_relpos1CrossNormal;
+	btVector3		m_contactNormal;
+
+	btVector3		m_relpos2CrossNormal;
+	//btVector3		m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal
+
+	btVector3		m_angularComponentA;
+	btVector3		m_angularComponentB;
+	
+	mutable btSimdScalar	m_appliedPushImpulse;
+	mutable btSimdScalar	m_appliedImpulse;
+	
+	
+	btScalar	m_friction;
+	btScalar	m_jacDiagABInv;
+	union
+	{
+		int	m_numConsecutiveRowsPerKernel;
+		btScalar	m_unusedPadding0;
+	};
+
+	int	m_overrideNumSolverIterations;
+
+	union
+	{
+		int			m_frictionIndex;
+		btScalar	m_unusedPadding1;
+	};
+	union
+	{
+		btRigidBody*	m_solverBodyA;
+		int				m_companionIdA;
+	};
+	union
+	{
+		btRigidBody*	m_solverBodyB;
+		int				m_companionIdB;
+	};
+	
+	union
+	{
+		void*		m_originalContactPoint;
+		btScalar	m_unusedPadding4;
+	};
+
+	btScalar		m_rhs;
+	btScalar		m_cfm;
+	btScalar		m_lowerLimit;
+	btScalar		m_upperLimit;
+
+	btScalar		m_rhsPenetration;
+
+	enum		btSolverConstraintType
+	{
+		BT_SOLVER_CONTACT_1D = 0,
+		BT_SOLVER_FRICTION_1D
+	};
+};
+
+typedef btAlignedObjectArray<btSolverConstraint>	btConstraintArray;
+
+
+#endif //BT_SOLVER_CONSTRAINT_H
+
+
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btTypedConstraint.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btTypedConstraint.cpp
new file mode 100644
index 00000000..06bde5e7
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btTypedConstraint.cpp
@@ -0,0 +1,220 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btTypedConstraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btSerializer.h"
+
+
+#define DEFAULT_DEBUGDRAW_SIZE btScalar(0.3f)
+
+btTypedConstraint::btTypedConstraint(btTypedConstraintType type, btRigidBody& rbA)
+:btTypedObject(type),
+m_userConstraintType(-1),
+m_userConstraintId(-1),
+m_breakingImpulseThreshold(SIMD_INFINITY),
+m_isEnabled(true),
+m_needsFeedback(false),
+m_overrideNumSolverIterations(-1),
+m_rbA(rbA),
+m_rbB(getFixedBody()),
+m_appliedImpulse(btScalar(0.)),
+m_dbgDrawSize(DEFAULT_DEBUGDRAW_SIZE)
+{
+}
+
+
+btTypedConstraint::btTypedConstraint(btTypedConstraintType type, btRigidBody& rbA,btRigidBody& rbB)
+:btTypedObject(type),
+m_userConstraintType(-1),
+m_userConstraintId(-1),
+m_breakingImpulseThreshold(SIMD_INFINITY),
+m_isEnabled(true),
+m_needsFeedback(false),
+m_overrideNumSolverIterations(-1),
+m_rbA(rbA),
+m_rbB(rbB),
+m_appliedImpulse(btScalar(0.)),
+m_dbgDrawSize(DEFAULT_DEBUGDRAW_SIZE)
+{
+}
+
+
+
+
+btScalar btTypedConstraint::getMotorFactor(btScalar pos, btScalar lowLim, btScalar uppLim, btScalar vel, btScalar timeFact)
+{
+	if(lowLim > uppLim)
+	{
+		return btScalar(1.0f);
+	}
+	else if(lowLim == uppLim)
+	{
+		return btScalar(0.0f);
+	}
+	btScalar lim_fact = btScalar(1.0f);
+	btScalar delta_max = vel / timeFact;
+	if(delta_max < btScalar(0.0f))
+	{
+		if((pos >= lowLim) && (pos < (lowLim - delta_max)))
+		{
+			lim_fact = (lowLim - pos) / delta_max;
+		}
+		else if(pos  < lowLim)
+		{
+			lim_fact = btScalar(0.0f);
+		}
+		else
+		{
+			lim_fact = btScalar(1.0f);
+		}
+	}
+	else if(delta_max > btScalar(0.0f))
+	{
+		if((pos <= uppLim) && (pos > (uppLim - delta_max)))
+		{
+			lim_fact = (uppLim - pos) / delta_max;
+		}
+		else if(pos  > uppLim)
+		{
+			lim_fact = btScalar(0.0f);
+		}
+		else
+		{
+			lim_fact = btScalar(1.0f);
+		}
+	}
+	else
+	{
+			lim_fact = btScalar(0.0f);
+	}
+	return lim_fact;
+}
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btTypedConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btTypedConstraintData* tcd = (btTypedConstraintData*) dataBuffer;
+
+	tcd->m_rbA = (btRigidBodyData*)serializer->getUniquePointer(&m_rbA);
+	tcd->m_rbB = (btRigidBodyData*)serializer->getUniquePointer(&m_rbB);
+	char* name = (char*) serializer->findNameForPointer(this);
+	tcd->m_name = (char*)serializer->getUniquePointer(name);
+	if (tcd->m_name)
+	{
+		serializer->serializeName(name);
+	}
+
+	tcd->m_objectType = m_objectType;
+	tcd->m_needsFeedback = m_needsFeedback;
+	tcd->m_overrideNumSolverIterations = m_overrideNumSolverIterations;
+	tcd->m_breakingImpulseThreshold = float(m_breakingImpulseThreshold);
+	tcd->m_isEnabled = m_isEnabled? 1: 0;
+	
+	tcd->m_userConstraintId =m_userConstraintId;
+	tcd->m_userConstraintType =m_userConstraintType;
+
+	tcd->m_appliedImpulse = float(m_appliedImpulse);
+	tcd->m_dbgDrawSize = float(m_dbgDrawSize );
+
+	tcd->m_disableCollisionsBetweenLinkedBodies = false;
+
+	int i;
+	for (i=0;i<m_rbA.getNumConstraintRefs();i++)
+		if (m_rbA.getConstraintRef(i) == this)
+			tcd->m_disableCollisionsBetweenLinkedBodies = true;
+	for (i=0;i<m_rbB.getNumConstraintRefs();i++)
+		if (m_rbB.getConstraintRef(i) == this)
+			tcd->m_disableCollisionsBetweenLinkedBodies = true;
+
+	return "btTypedConstraintData";
+}
+
+btRigidBody& btTypedConstraint::getFixedBody()
+{
+	static btRigidBody s_fixed(0, 0,0);
+	s_fixed.setMassProps(btScalar(0.),btVector3(btScalar(0.),btScalar(0.),btScalar(0.)));
+	return s_fixed;
+}
+
+
+void btAngularLimit::set(btScalar low, btScalar high, btScalar _softness, btScalar _biasFactor, btScalar _relaxationFactor)
+{
+	m_halfRange = (high - low) / 2.0f;
+	m_center = btNormalizeAngle(low + m_halfRange);
+	m_softness =  _softness;
+	m_biasFactor = _biasFactor;
+	m_relaxationFactor = _relaxationFactor;
+}
+
+void btAngularLimit::test(const btScalar angle)
+{
+	m_correction = 0.0f;
+	m_sign = 0.0f;
+	m_solveLimit = false;
+
+	if (m_halfRange >= 0.0f)
+	{
+		btScalar deviation = btNormalizeAngle(angle - m_center);
+		if (deviation < -m_halfRange)
+		{
+			m_solveLimit = true;
+			m_correction = - (deviation + m_halfRange);
+			m_sign = +1.0f;
+		}
+		else if (deviation > m_halfRange)
+		{
+			m_solveLimit = true;
+			m_correction = m_halfRange - deviation;
+			m_sign = -1.0f;
+		}
+	}
+}
+
+
+btScalar btAngularLimit::getError() const
+{
+	return m_correction * m_sign;
+}
+
+void btAngularLimit::fit(btScalar& angle) const
+{
+	if (m_halfRange > 0.0f)
+	{
+		btScalar relativeAngle = btNormalizeAngle(angle - m_center);
+		if (!btEqual(relativeAngle, m_halfRange))
+		{
+			if (relativeAngle > 0.0f)
+			{
+				angle = getHigh();
+			}
+			else
+			{
+				angle = getLow();
+			}
+		}
+	}
+}
+
+btScalar btAngularLimit::getLow() const
+{
+	return btNormalizeAngle(m_center - m_halfRange);
+}
+
+btScalar btAngularLimit::getHigh() const
+{
+	return btNormalizeAngle(m_center + m_halfRange);
+}
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btTypedConstraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btTypedConstraint.h
new file mode 100644
index 00000000..a16e869a
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btTypedConstraint.h
@@ -0,0 +1,452 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2010 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_TYPED_CONSTRAINT_H
+#define BT_TYPED_CONSTRAINT_H
+
+class btRigidBody;
+#include "LinearMath/btScalar.h"
+#include "btSolverConstraint.h"
+
+class btSerializer;
+
+//Don't change any of the existing enum values, so add enum types at the end for serialization compatibility
+enum btTypedConstraintType
+{
+	POINT2POINT_CONSTRAINT_TYPE=3,
+	HINGE_CONSTRAINT_TYPE,
+	CONETWIST_CONSTRAINT_TYPE,
+	D6_CONSTRAINT_TYPE,
+	SLIDER_CONSTRAINT_TYPE,
+	CONTACT_CONSTRAINT_TYPE,
+	D6_SPRING_CONSTRAINT_TYPE,
+	MAX_CONSTRAINT_TYPE
+};
+
+
+enum btConstraintParams
+{
+	BT_CONSTRAINT_ERP=1,
+	BT_CONSTRAINT_STOP_ERP,
+	BT_CONSTRAINT_CFM,
+	BT_CONSTRAINT_STOP_CFM
+};
+
+#if 1
+	#define btAssertConstrParams(_par) btAssert(_par) 
+#else
+	#define btAssertConstrParams(_par)
+#endif
+
+
+///TypedConstraint is the baseclass for Bullet constraints and vehicles
+class btTypedConstraint : public btTypedObject
+{
+	int	m_userConstraintType;
+
+	union
+	{
+		int	m_userConstraintId;
+		void* m_userConstraintPtr;
+	};
+
+	btScalar	m_breakingImpulseThreshold;
+	bool		m_isEnabled;
+	bool		m_needsFeedback;
+	int			m_overrideNumSolverIterations;
+
+
+	btTypedConstraint&	operator=(btTypedConstraint&	other)
+	{
+		btAssert(0);
+		(void) other;
+		return *this;
+	}
+
+protected:
+	btRigidBody&	m_rbA;
+	btRigidBody&	m_rbB;
+	btScalar	m_appliedImpulse;
+	btScalar	m_dbgDrawSize;
+
+	///internal method used by the constraint solver, don't use them directly
+	btScalar getMotorFactor(btScalar pos, btScalar lowLim, btScalar uppLim, btScalar vel, btScalar timeFact);
+	
+
+public:
+
+	virtual ~btTypedConstraint() {};
+	btTypedConstraint(btTypedConstraintType type, btRigidBody& rbA);
+	btTypedConstraint(btTypedConstraintType type, btRigidBody& rbA,btRigidBody& rbB);
+
+	struct btConstraintInfo1 {
+		int m_numConstraintRows,nub;
+	};
+
+	static btRigidBody& getFixedBody();
+
+	struct btConstraintInfo2 {
+		// integrator parameters: frames per second (1/stepsize), default error
+		// reduction parameter (0..1).
+		btScalar fps,erp;
+
+		// for the first and second body, pointers to two (linear and angular)
+		// n*3 jacobian sub matrices, stored by rows. these matrices will have
+		// been initialized to 0 on entry. if the second body is zero then the
+		// J2xx pointers may be 0.
+		btScalar *m_J1linearAxis,*m_J1angularAxis,*m_J2linearAxis,*m_J2angularAxis;
+
+		// elements to jump from one row to the next in J's
+		int rowskip;
+
+		// right hand sides of the equation J*v = c + cfm * lambda. cfm is the
+		// "constraint force mixing" vector. c is set to zero on entry, cfm is
+		// set to a constant value (typically very small or zero) value on entry.
+		btScalar *m_constraintError,*cfm;
+
+		// lo and hi limits for variables (set to -/+ infinity on entry).
+		btScalar *m_lowerLimit,*m_upperLimit;
+
+		// findex vector for variables. see the LCP solver interface for a
+		// description of what this does. this is set to -1 on entry.
+		// note that the returned indexes are relative to the first index of
+		// the constraint.
+		int *findex;
+		// number of solver iterations
+		int m_numIterations;
+
+		//damping of the velocity
+		btScalar	m_damping;
+	};
+
+	int	getOverrideNumSolverIterations() const
+	{
+		return m_overrideNumSolverIterations;
+	}
+
+	///override the number of constraint solver iterations used to solve this constraint
+	///-1 will use the default number of iterations, as specified in SolverInfo.m_numIterations
+	void setOverrideNumSolverIterations(int overideNumIterations)
+	{
+		m_overrideNumSolverIterations = overideNumIterations;
+	}
+
+	///internal method used by the constraint solver, don't use them directly
+	virtual void	buildJacobian() {};
+
+	///internal method used by the constraint solver, don't use them directly
+	virtual	void	setupSolverConstraint(btConstraintArray& ca, int solverBodyA,int solverBodyB, btScalar timeStep)
+	{
+        (void)ca;
+        (void)solverBodyA;
+        (void)solverBodyB;
+        (void)timeStep;
+	}
+	
+	///internal method used by the constraint solver, don't use them directly
+	virtual void getInfo1 (btConstraintInfo1* info)=0;
+
+	///internal method used by the constraint solver, don't use them directly
+	virtual void getInfo2 (btConstraintInfo2* info)=0;
+
+	///internal method used by the constraint solver, don't use them directly
+	void	internalSetAppliedImpulse(btScalar appliedImpulse)
+	{
+		m_appliedImpulse = appliedImpulse;
+	}
+	///internal method used by the constraint solver, don't use them directly
+	btScalar	internalGetAppliedImpulse()
+	{
+		return m_appliedImpulse;
+	}
+
+
+	btScalar	getBreakingImpulseThreshold() const
+	{
+		return 	m_breakingImpulseThreshold;
+	}
+
+	void	setBreakingImpulseThreshold(btScalar threshold)
+	{
+		m_breakingImpulseThreshold = threshold;
+	}
+
+	bool	isEnabled() const
+	{
+		return m_isEnabled;
+	}
+
+	void	setEnabled(bool enabled)
+	{
+		m_isEnabled=enabled;
+	}
+
+
+	///internal method used by the constraint solver, don't use them directly
+	virtual	void	solveConstraintObsolete(btRigidBody& /*bodyA*/,btRigidBody& /*bodyB*/,btScalar	/*timeStep*/) {};
+
+	
+	const btRigidBody& getRigidBodyA() const
+	{
+		return m_rbA;
+	}
+	const btRigidBody& getRigidBodyB() const
+	{
+		return m_rbB;
+	}
+
+	btRigidBody& getRigidBodyA() 
+	{
+		return m_rbA;
+	}
+	btRigidBody& getRigidBodyB()
+	{
+		return m_rbB;
+	}
+
+	int getUserConstraintType() const
+	{
+		return m_userConstraintType ;
+	}
+
+	void	setUserConstraintType(int userConstraintType)
+	{
+		m_userConstraintType = userConstraintType;
+	};
+
+	void	setUserConstraintId(int uid)
+	{
+		m_userConstraintId = uid;
+	}
+
+	int getUserConstraintId() const
+	{
+		return m_userConstraintId;
+	}
+
+	void	setUserConstraintPtr(void* ptr)
+	{
+		m_userConstraintPtr = ptr;
+	}
+
+	void*	getUserConstraintPtr()
+	{
+		return m_userConstraintPtr;
+	}
+
+	int getUid() const
+	{
+		return m_userConstraintId;   
+	} 
+
+	bool	needsFeedback() const
+	{
+		return m_needsFeedback;
+	}
+
+	///enableFeedback will allow to read the applied linear and angular impulse
+	///use getAppliedImpulse, getAppliedLinearImpulse and getAppliedAngularImpulse to read feedback information
+	void	enableFeedback(bool needsFeedback)
+	{
+		m_needsFeedback = needsFeedback;
+	}
+
+	///getAppliedImpulse is an estimated total applied impulse. 
+	///This feedback could be used to determine breaking constraints or playing sounds.
+	btScalar	getAppliedImpulse() const
+	{
+		btAssert(m_needsFeedback);
+		return m_appliedImpulse;
+	}
+
+	btTypedConstraintType getConstraintType () const
+	{
+		return btTypedConstraintType(m_objectType);
+	}
+	
+	void setDbgDrawSize(btScalar dbgDrawSize)
+	{
+		m_dbgDrawSize = dbgDrawSize;
+	}
+	btScalar getDbgDrawSize()
+	{
+		return m_dbgDrawSize;
+	}
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void	setParam(int num, btScalar value, int axis = -1) = 0;
+
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const = 0;
+	
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+};
+
+// returns angle in range [-SIMD_2_PI, SIMD_2_PI], closest to one of the limits 
+// all arguments should be normalized angles (i.e. in range [-SIMD_PI, SIMD_PI])
+SIMD_FORCE_INLINE btScalar btAdjustAngleToLimits(btScalar angleInRadians, btScalar angleLowerLimitInRadians, btScalar angleUpperLimitInRadians)
+{
+	if(angleLowerLimitInRadians >= angleUpperLimitInRadians)
+	{
+		return angleInRadians;
+	}
+	else if(angleInRadians < angleLowerLimitInRadians)
+	{
+		btScalar diffLo = btFabs(btNormalizeAngle(angleLowerLimitInRadians - angleInRadians));
+		btScalar diffHi = btFabs(btNormalizeAngle(angleUpperLimitInRadians - angleInRadians));
+		return (diffLo < diffHi) ? angleInRadians : (angleInRadians + SIMD_2_PI);
+	}
+	else if(angleInRadians > angleUpperLimitInRadians)
+	{
+		btScalar diffHi = btFabs(btNormalizeAngle(angleInRadians - angleUpperLimitInRadians));
+		btScalar diffLo = btFabs(btNormalizeAngle(angleInRadians - angleLowerLimitInRadians));
+		return (diffLo < diffHi) ? (angleInRadians - SIMD_2_PI) : angleInRadians;
+	}
+	else
+	{
+		return angleInRadians;
+	}
+}
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btTypedConstraintData
+{
+	btRigidBodyData		*m_rbA;
+	btRigidBodyData		*m_rbB;
+	char	*m_name;
+
+	int	m_objectType;
+	int	m_userConstraintType;
+	int	m_userConstraintId;
+	int	m_needsFeedback;
+
+	float	m_appliedImpulse;
+	float	m_dbgDrawSize;
+
+	int	m_disableCollisionsBetweenLinkedBodies;
+	int	m_overrideNumSolverIterations;
+
+	float	m_breakingImpulseThreshold;
+	int		m_isEnabled;
+	
+};
+
+SIMD_FORCE_INLINE	int	btTypedConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btTypedConstraintData);
+}
+
+
+
+class btAngularLimit
+{
+private:
+	btScalar 
+		m_center,
+		m_halfRange,
+		m_softness,
+		m_biasFactor,
+		m_relaxationFactor,
+		m_correction,
+		m_sign;
+
+	bool
+		m_solveLimit;
+
+public:
+	/// Default constructor initializes limit as inactive, allowing free constraint movement
+	btAngularLimit()
+		:m_center(0.0f),
+		m_halfRange(-1.0f),
+		m_softness(0.9f),
+		m_biasFactor(0.3f),
+		m_relaxationFactor(1.0f),
+		m_correction(0.0f),
+		m_sign(0.0f),
+		m_solveLimit(false)
+	{}
+
+	/// Sets all limit's parameters.
+	/// When low > high limit becomes inactive.
+	/// When high - low > 2PI limit is ineffective too becouse no angle can exceed the limit
+	void set(btScalar low, btScalar high, btScalar _softness = 0.9f, btScalar _biasFactor = 0.3f, btScalar _relaxationFactor = 1.0f);
+
+	/// Checks conastaint angle against limit. If limit is active and the angle violates the limit
+	/// correction is calculated.
+	void test(const btScalar angle);
+
+	/// Returns limit's softness
+	inline btScalar getSoftness() const
+	{
+		return m_softness;
+	}
+
+	/// Returns limit's bias factor
+	inline btScalar getBiasFactor() const
+	{
+		return m_biasFactor;
+	}
+
+	/// Returns limit's relaxation factor
+	inline btScalar getRelaxationFactor() const
+	{
+		return m_relaxationFactor;
+	}
+
+	/// Returns correction value evaluated when test() was invoked 
+	inline btScalar getCorrection() const
+	{
+		return m_correction;
+	}
+
+	/// Returns sign value evaluated when test() was invoked 
+	inline btScalar getSign() const
+	{
+		return m_sign;
+	}
+
+	/// Gives half of the distance between min and max limit angle
+	inline btScalar getHalfRange() const
+	{
+		return m_halfRange;
+	}
+
+	/// Returns true when the last test() invocation recognized limit violation
+	inline bool isLimit() const
+	{
+		return m_solveLimit;
+	}
+
+	/// Checks given angle against limit. If limit is active and angle doesn't fit it, the angle
+	/// returned is modified so it equals to the limit closest to given angle.
+	void fit(btScalar& angle) const;
+
+	/// Returns correction value multiplied by sign value
+	btScalar getError() const;
+
+	btScalar getLow() const;
+
+	btScalar getHigh() const;
+
+};
+
+
+
+#endif //BT_TYPED_CONSTRAINT_H
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btUniversalConstraint.cpp b/src/bullet/BulletDynamics/ConstraintSolver/btUniversalConstraint.cpp
new file mode 100644
index 00000000..b009f41a
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btUniversalConstraint.cpp
@@ -0,0 +1,87 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#include "btUniversalConstraint.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btTransformUtil.h"
+
+
+
+#define UNIV_EPS btScalar(0.01f)
+
+
+// constructor
+// anchor, axis1 and axis2 are in world coordinate system
+// axis1 must be orthogonal to axis2
+btUniversalConstraint::btUniversalConstraint(btRigidBody& rbA, btRigidBody& rbB, const btVector3& anchor, const btVector3& axis1, const btVector3& axis2)
+: btGeneric6DofConstraint(rbA, rbB, btTransform::getIdentity(), btTransform::getIdentity(), true),
+ m_anchor(anchor),
+ m_axis1(axis1),
+ m_axis2(axis2)
+{
+	// build frame basis
+	// 6DOF constraint uses Euler angles and to define limits
+	// it is assumed that rotational order is :
+	// Z - first, allowed limits are (-PI,PI);
+	// new position of Y - second (allowed limits are (-PI/2 + epsilon, PI/2 - epsilon), where epsilon is a small positive number 
+	// used to prevent constraint from instability on poles;
+	// new position of X, allowed limits are (-PI,PI);
+	// So to simulate ODE Universal joint we should use parent axis as Z, child axis as Y and limit all other DOFs
+	// Build the frame in world coordinate system first
+	btVector3 zAxis = m_axis1.normalize();
+	btVector3 yAxis = m_axis2.normalize();
+	btVector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system
+	btTransform frameInW;
+	frameInW.setIdentity();
+	frameInW.getBasis().setValue(	xAxis[0], yAxis[0], zAxis[0],	
+									xAxis[1], yAxis[1], zAxis[1],
+									xAxis[2], yAxis[2], zAxis[2]);
+	frameInW.setOrigin(anchor);
+	// now get constraint frame in local coordinate systems
+	m_frameInA = rbA.getCenterOfMassTransform().inverse() * frameInW;
+	m_frameInB = rbB.getCenterOfMassTransform().inverse() * frameInW;
+	// sei limits
+	setLinearLowerLimit(btVector3(0., 0., 0.));
+	setLinearUpperLimit(btVector3(0., 0., 0.));
+	setAngularLowerLimit(btVector3(0.f, -SIMD_HALF_PI + UNIV_EPS, -SIMD_PI + UNIV_EPS));
+	setAngularUpperLimit(btVector3(0.f,  SIMD_HALF_PI - UNIV_EPS,  SIMD_PI - UNIV_EPS));
+}
+
+void btUniversalConstraint::setAxis(const btVector3& axis1,const btVector3& axis2)
+{
+  m_axis1 = axis1;
+  m_axis2 = axis2;
+
+	btVector3 zAxis = axis1.normalized();
+	btVector3 yAxis = axis2.normalized();
+	btVector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system
+
+	btTransform frameInW;
+	frameInW.setIdentity();
+	frameInW.getBasis().setValue(	xAxis[0], yAxis[0], zAxis[0],	
+                                xAxis[1], yAxis[1], zAxis[1],
+                                xAxis[2], yAxis[2], zAxis[2]);
+	frameInW.setOrigin(m_anchor);
+
+	// now get constraint frame in local coordinate systems
+	m_frameInA = m_rbA.getCenterOfMassTransform().inverse() * frameInW;
+	m_frameInB = m_rbB.getCenterOfMassTransform().inverse() * frameInW;
+
+  calculateTransforms();
+}
+
+
diff --git a/src/bullet/BulletDynamics/ConstraintSolver/btUniversalConstraint.h b/src/bullet/BulletDynamics/ConstraintSolver/btUniversalConstraint.h
new file mode 100644
index 00000000..a8693916
--- /dev/null
+++ b/src/bullet/BulletDynamics/ConstraintSolver/btUniversalConstraint.h
@@ -0,0 +1,62 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_UNIVERSAL_CONSTRAINT_H
+#define BT_UNIVERSAL_CONSTRAINT_H
+
+
+
+#include "LinearMath/btVector3.h"
+#include "btTypedConstraint.h"
+#include "btGeneric6DofConstraint.h"
+
+
+
+/// Constraint similar to ODE Universal Joint
+/// has 2 rotatioonal degrees of freedom, similar to Euler rotations around Z (axis 1)
+/// and Y (axis 2)
+/// Description from ODE manual : 
+/// "Given axis 1 on body 1, and axis 2 on body 2 that is perpendicular to axis 1, it keeps them perpendicular. 
+/// In other words, rotation of the two bodies about the direction perpendicular to the two axes will be equal."
+
+class btUniversalConstraint : public btGeneric6DofConstraint
+{
+protected:
+	btVector3	m_anchor;
+	btVector3	m_axis1;
+	btVector3	m_axis2;
+public:
+	// constructor
+	// anchor, axis1 and axis2 are in world coordinate system
+	// axis1 must be orthogonal to axis2
+    btUniversalConstraint(btRigidBody& rbA, btRigidBody& rbB, const btVector3& anchor, const btVector3& axis1, const btVector3& axis2);
+	// access
+	const btVector3& getAnchor() { return m_calculatedTransformA.getOrigin(); }
+	const btVector3& getAnchor2() { return m_calculatedTransformB.getOrigin(); }
+	const btVector3& getAxis1() { return m_axis1; }
+	const btVector3& getAxis2() { return m_axis2; }
+	btScalar getAngle1() { return getAngle(2); }
+	btScalar getAngle2() { return getAngle(1); }
+	// limits
+	void setUpperLimit(btScalar ang1max, btScalar ang2max) { setAngularUpperLimit(btVector3(0.f, ang1max, ang2max)); }
+	void setLowerLimit(btScalar ang1min, btScalar ang2min) { setAngularLowerLimit(btVector3(0.f, ang1min, ang2min)); }
+
+	void setAxis( const btVector3& axis1, const btVector3& axis2);
+};
+
+
+
+#endif // BT_UNIVERSAL_CONSTRAINT_H
+
diff --git a/src/bullet/BulletDynamics/Dynamics/Bullet-C-API.cpp b/src/bullet/BulletDynamics/Dynamics/Bullet-C-API.cpp
new file mode 100644
index 00000000..bd8e2748
--- /dev/null
+++ b/src/bullet/BulletDynamics/Dynamics/Bullet-C-API.cpp
@@ -0,0 +1,405 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/*
+	Draft high-level generic physics C-API. For low-level access, use the physics SDK native API's.
+	Work in progress, functionality will be added on demand.
+
+	If possible, use the richer Bullet C++ API, by including <src/btBulletDynamicsCommon.h>
+*/
+
+#include "Bullet-C-Api.h"
+#include "btBulletDynamicsCommon.h"
+#include "LinearMath/btAlignedAllocator.h"
+
+
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btScalar.h"	
+#include "LinearMath/btMatrix3x3.h"
+#include "LinearMath/btTransform.h"
+#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+
+#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
+#include "BulletCollision/NarrowPhaseCollision/btPointCollector.h"
+#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpa2.h"
+#include "BulletCollision/CollisionShapes/btMinkowskiSumShape.h"
+#include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
+#include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h"
+#include "BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h"
+
+
+/*
+	Create and Delete a Physics SDK	
+*/
+
+struct	btPhysicsSdk
+{
+
+//	btDispatcher*				m_dispatcher;
+//	btOverlappingPairCache*		m_pairCache;
+//	btConstraintSolver*			m_constraintSolver
+
+	btVector3	m_worldAabbMin;
+	btVector3	m_worldAabbMax;
+
+
+	//todo: version, hardware/optimization settings etc?
+	btPhysicsSdk()
+		:m_worldAabbMin(-1000,-1000,-1000),
+		m_worldAabbMax(1000,1000,1000)
+	{
+
+	}
+
+	
+};
+
+plPhysicsSdkHandle	plNewBulletSdk()
+{
+	void* mem = btAlignedAlloc(sizeof(btPhysicsSdk),16);
+	return (plPhysicsSdkHandle)new (mem)btPhysicsSdk;
+}
+
+void		plDeletePhysicsSdk(plPhysicsSdkHandle	physicsSdk)
+{
+	btPhysicsSdk* phys = reinterpret_cast<btPhysicsSdk*>(physicsSdk);
+	btAlignedFree(phys);	
+}
+
+
+/* Dynamics World */
+plDynamicsWorldHandle plCreateDynamicsWorld(plPhysicsSdkHandle physicsSdkHandle)
+{
+	btPhysicsSdk* physicsSdk = reinterpret_cast<btPhysicsSdk*>(physicsSdkHandle);
+	void* mem = btAlignedAlloc(sizeof(btDefaultCollisionConfiguration),16);
+	btDefaultCollisionConfiguration* collisionConfiguration = new (mem)btDefaultCollisionConfiguration();
+	mem = btAlignedAlloc(sizeof(btCollisionDispatcher),16);
+	btDispatcher*				dispatcher = new (mem)btCollisionDispatcher(collisionConfiguration);
+	mem = btAlignedAlloc(sizeof(btAxisSweep3),16);
+	btBroadphaseInterface*		pairCache = new (mem)btAxisSweep3(physicsSdk->m_worldAabbMin,physicsSdk->m_worldAabbMax);
+	mem = btAlignedAlloc(sizeof(btSequentialImpulseConstraintSolver),16);
+	btConstraintSolver*			constraintSolver = new(mem) btSequentialImpulseConstraintSolver();
+
+	mem = btAlignedAlloc(sizeof(btDiscreteDynamicsWorld),16);
+	return (plDynamicsWorldHandle) new (mem)btDiscreteDynamicsWorld(dispatcher,pairCache,constraintSolver,collisionConfiguration);
+}
+void           plDeleteDynamicsWorld(plDynamicsWorldHandle world)
+{
+	//todo: also clean up the other allocations, axisSweep, pairCache,dispatcher,constraintSolver,collisionConfiguration
+	btDynamicsWorld* dynamicsWorld = reinterpret_cast< btDynamicsWorld* >(world);
+	btAlignedFree(dynamicsWorld);
+}
+
+void	plStepSimulation(plDynamicsWorldHandle world,	plReal	timeStep)
+{
+	btDynamicsWorld* dynamicsWorld = reinterpret_cast< btDynamicsWorld* >(world);
+	btAssert(dynamicsWorld);
+	dynamicsWorld->stepSimulation(timeStep);
+}
+
+void plAddRigidBody(plDynamicsWorldHandle world, plRigidBodyHandle object)
+{
+	btDynamicsWorld* dynamicsWorld = reinterpret_cast< btDynamicsWorld* >(world);
+	btAssert(dynamicsWorld);
+	btRigidBody* body = reinterpret_cast< btRigidBody* >(object);
+	btAssert(body);
+
+	dynamicsWorld->addRigidBody(body);
+}
+
+void plRemoveRigidBody(plDynamicsWorldHandle world, plRigidBodyHandle object)
+{
+	btDynamicsWorld* dynamicsWorld = reinterpret_cast< btDynamicsWorld* >(world);
+	btAssert(dynamicsWorld);
+	btRigidBody* body = reinterpret_cast< btRigidBody* >(object);
+	btAssert(body);
+
+	dynamicsWorld->removeRigidBody(body);
+}
+
+/* Rigid Body  */
+
+plRigidBodyHandle plCreateRigidBody(	void* user_data,  float mass, plCollisionShapeHandle cshape )
+{
+	btTransform trans;
+	trans.setIdentity();
+	btVector3 localInertia(0,0,0);
+	btCollisionShape* shape = reinterpret_cast<btCollisionShape*>( cshape);
+	btAssert(shape);
+	if (mass)
+	{
+		shape->calculateLocalInertia(mass,localInertia);
+	}
+	void* mem = btAlignedAlloc(sizeof(btRigidBody),16);
+	btRigidBody::btRigidBodyConstructionInfo rbci(mass, 0,shape,localInertia);
+	btRigidBody* body = new (mem)btRigidBody(rbci);
+	body->setWorldTransform(trans);
+	body->setUserPointer(user_data);
+	return (plRigidBodyHandle) body;
+}
+
+void plDeleteRigidBody(plRigidBodyHandle cbody)
+{
+	btRigidBody* body = reinterpret_cast< btRigidBody* >(cbody);
+	btAssert(body);
+	btAlignedFree( body);
+}
+
+
+/* Collision Shape definition */
+
+plCollisionShapeHandle plNewSphereShape(plReal radius)
+{
+	void* mem = btAlignedAlloc(sizeof(btSphereShape),16);
+	return (plCollisionShapeHandle) new (mem)btSphereShape(radius);
+	
+}
+	
+plCollisionShapeHandle plNewBoxShape(plReal x, plReal y, plReal z)
+{
+	void* mem = btAlignedAlloc(sizeof(btBoxShape),16);
+	return (plCollisionShapeHandle) new (mem)btBoxShape(btVector3(x,y,z));
+}
+
+plCollisionShapeHandle plNewCapsuleShape(plReal radius, plReal height)
+{
+	//capsule is convex hull of 2 spheres, so use btMultiSphereShape
+	
+	const int numSpheres = 2;
+	btVector3 positions[numSpheres] = {btVector3(0,height,0),btVector3(0,-height,0)};
+	btScalar radi[numSpheres] = {radius,radius};
+	void* mem = btAlignedAlloc(sizeof(btMultiSphereShape),16);
+	return (plCollisionShapeHandle) new (mem)btMultiSphereShape(positions,radi,numSpheres);
+}
+plCollisionShapeHandle plNewConeShape(plReal radius, plReal height)
+{
+	void* mem = btAlignedAlloc(sizeof(btConeShape),16);
+	return (plCollisionShapeHandle) new (mem)btConeShape(radius,height);
+}
+
+plCollisionShapeHandle plNewCylinderShape(plReal radius, plReal height)
+{
+	void* mem = btAlignedAlloc(sizeof(btCylinderShape),16);
+	return (plCollisionShapeHandle) new (mem)btCylinderShape(btVector3(radius,height,radius));
+}
+
+/* Convex Meshes */
+plCollisionShapeHandle plNewConvexHullShape()
+{
+	void* mem = btAlignedAlloc(sizeof(btConvexHullShape),16);
+	return (plCollisionShapeHandle) new (mem)btConvexHullShape();
+}
+
+
+/* Concave static triangle meshes */
+plMeshInterfaceHandle		   plNewMeshInterface()
+{
+	return 0;
+}
+
+plCollisionShapeHandle plNewCompoundShape()
+{
+	void* mem = btAlignedAlloc(sizeof(btCompoundShape),16);
+	return (plCollisionShapeHandle) new (mem)btCompoundShape();
+}
+
+void	plAddChildShape(plCollisionShapeHandle compoundShapeHandle,plCollisionShapeHandle childShapeHandle, plVector3 childPos,plQuaternion childOrn)
+{
+	btCollisionShape* colShape = reinterpret_cast<btCollisionShape*>(compoundShapeHandle);
+	btAssert(colShape->getShapeType() == COMPOUND_SHAPE_PROXYTYPE);
+	btCompoundShape* compoundShape = reinterpret_cast<btCompoundShape*>(colShape);
+	btCollisionShape* childShape = reinterpret_cast<btCollisionShape*>(childShapeHandle);
+	btTransform	localTrans;
+	localTrans.setIdentity();
+	localTrans.setOrigin(btVector3(childPos[0],childPos[1],childPos[2]));
+	localTrans.setRotation(btQuaternion(childOrn[0],childOrn[1],childOrn[2],childOrn[3]));
+	compoundShape->addChildShape(localTrans,childShape);
+}
+
+void plSetEuler(plReal yaw,plReal pitch,plReal roll, plQuaternion orient)
+{
+	btQuaternion orn;
+	orn.setEuler(yaw,pitch,roll);
+	orient[0] = orn.getX();
+	orient[1] = orn.getY();
+	orient[2] = orn.getZ();
+	orient[3] = orn.getW();
+
+}
+
+
+//	extern  void		plAddTriangle(plMeshInterfaceHandle meshHandle, plVector3 v0,plVector3 v1,plVector3 v2);
+//	extern  plCollisionShapeHandle plNewStaticTriangleMeshShape(plMeshInterfaceHandle);
+
+
+void		plAddVertex(plCollisionShapeHandle cshape, plReal x,plReal y,plReal z)
+{
+	btCollisionShape* colShape = reinterpret_cast<btCollisionShape*>( cshape);
+	(void)colShape;
+	btAssert(colShape->getShapeType()==CONVEX_HULL_SHAPE_PROXYTYPE);
+	btConvexHullShape* convexHullShape = reinterpret_cast<btConvexHullShape*>( cshape);
+	convexHullShape->addPoint(btVector3(x,y,z));
+
+}
+
+void plDeleteShape(plCollisionShapeHandle cshape)
+{
+	btCollisionShape* shape = reinterpret_cast<btCollisionShape*>( cshape);
+	btAssert(shape);
+	btAlignedFree(shape);
+}
+void plSetScaling(plCollisionShapeHandle cshape, plVector3 cscaling)
+{
+	btCollisionShape* shape = reinterpret_cast<btCollisionShape*>( cshape);
+	btAssert(shape);
+	btVector3 scaling(cscaling[0],cscaling[1],cscaling[2]);
+	shape->setLocalScaling(scaling);	
+}
+
+
+
+void plSetPosition(plRigidBodyHandle object, const plVector3 position)
+{
+	btRigidBody* body = reinterpret_cast< btRigidBody* >(object);
+	btAssert(body);
+	btVector3 pos(position[0],position[1],position[2]);
+	btTransform worldTrans = body->getWorldTransform();
+	worldTrans.setOrigin(pos);
+	body->setWorldTransform(worldTrans);
+}
+
+void plSetOrientation(plRigidBodyHandle object, const plQuaternion orientation)
+{
+	btRigidBody* body = reinterpret_cast< btRigidBody* >(object);
+	btAssert(body);
+	btQuaternion orn(orientation[0],orientation[1],orientation[2],orientation[3]);
+	btTransform worldTrans = body->getWorldTransform();
+	worldTrans.setRotation(orn);
+	body->setWorldTransform(worldTrans);
+}
+
+void	plSetOpenGLMatrix(plRigidBodyHandle object, plReal* matrix)
+{
+	btRigidBody* body = reinterpret_cast< btRigidBody* >(object);
+	btAssert(body);
+	btTransform& worldTrans = body->getWorldTransform();
+	worldTrans.setFromOpenGLMatrix(matrix);
+}
+
+void	plGetOpenGLMatrix(plRigidBodyHandle object, plReal* matrix)
+{
+	btRigidBody* body = reinterpret_cast< btRigidBody* >(object);
+	btAssert(body);
+	body->getWorldTransform().getOpenGLMatrix(matrix);
+
+}
+
+void	plGetPosition(plRigidBodyHandle object,plVector3 position)
+{
+	btRigidBody* body = reinterpret_cast< btRigidBody* >(object);
+	btAssert(body);
+	const btVector3& pos = body->getWorldTransform().getOrigin();
+	position[0] = pos.getX();
+	position[1] = pos.getY();
+	position[2] = pos.getZ();
+}
+
+void plGetOrientation(plRigidBodyHandle object,plQuaternion orientation)
+{
+	btRigidBody* body = reinterpret_cast< btRigidBody* >(object);
+	btAssert(body);
+	const btQuaternion& orn = body->getWorldTransform().getRotation();
+	orientation[0] = orn.getX();
+	orientation[1] = orn.getY();
+	orientation[2] = orn.getZ();
+	orientation[3] = orn.getW();
+}
+
+
+
+//plRigidBodyHandle plRayCast(plDynamicsWorldHandle world, const plVector3 rayStart, const plVector3 rayEnd, plVector3 hitpoint, plVector3 normal);
+
+//	extern  plRigidBodyHandle plObjectCast(plDynamicsWorldHandle world, const plVector3 rayStart, const plVector3 rayEnd, plVector3 hitpoint, plVector3 normal);
+
+double plNearestPoints(float p1[3], float p2[3], float p3[3], float q1[3], float q2[3], float q3[3], float *pa, float *pb, float normal[3])
+{
+	btVector3 vp(p1[0], p1[1], p1[2]);
+	btTriangleShape trishapeA(vp, 
+				  btVector3(p2[0], p2[1], p2[2]), 
+				  btVector3(p3[0], p3[1], p3[2]));
+	trishapeA.setMargin(0.000001f);
+	btVector3 vq(q1[0], q1[1], q1[2]);
+	btTriangleShape trishapeB(vq, 
+				  btVector3(q2[0], q2[1], q2[2]), 
+				  btVector3(q3[0], q3[1], q3[2]));
+	trishapeB.setMargin(0.000001f);
+	
+	// btVoronoiSimplexSolver sGjkSimplexSolver;
+	// btGjkEpaPenetrationDepthSolver penSolverPtr;	
+	
+	static btSimplexSolverInterface sGjkSimplexSolver;
+	sGjkSimplexSolver.reset();
+	
+	static btGjkEpaPenetrationDepthSolver Solver0;
+	static btMinkowskiPenetrationDepthSolver Solver1;
+		
+	btConvexPenetrationDepthSolver* Solver = NULL;
+	
+	Solver = &Solver1;	
+		
+	btGjkPairDetector convexConvex(&trishapeA ,&trishapeB,&sGjkSimplexSolver,Solver);
+	
+	convexConvex.m_catchDegeneracies = 1;
+	
+	// btGjkPairDetector convexConvex(&trishapeA ,&trishapeB,&sGjkSimplexSolver,0);
+	
+	btPointCollector gjkOutput;
+	btGjkPairDetector::ClosestPointInput input;
+	
+		
+	btTransform tr;
+	tr.setIdentity();
+	
+	input.m_transformA = tr;
+	input.m_transformB = tr;
+	
+	convexConvex.getClosestPoints(input, gjkOutput, 0);
+	
+	
+	if (gjkOutput.m_hasResult)
+	{
+		
+		pb[0] = pa[0] = gjkOutput.m_pointInWorld[0];
+		pb[1] = pa[1] = gjkOutput.m_pointInWorld[1];
+		pb[2] = pa[2] = gjkOutput.m_pointInWorld[2];
+
+		pb[0]+= gjkOutput.m_normalOnBInWorld[0] * gjkOutput.m_distance;
+		pb[1]+= gjkOutput.m_normalOnBInWorld[1] * gjkOutput.m_distance;
+		pb[2]+= gjkOutput.m_normalOnBInWorld[2] * gjkOutput.m_distance;
+		
+		normal[0] = gjkOutput.m_normalOnBInWorld[0];
+		normal[1] = gjkOutput.m_normalOnBInWorld[1];
+		normal[2] = gjkOutput.m_normalOnBInWorld[2];
+
+		return gjkOutput.m_distance;
+	}
+	return -1.0f;	
+}
+
diff --git a/src/bullet/BulletDynamics/Dynamics/btActionInterface.h b/src/bullet/BulletDynamics/Dynamics/btActionInterface.h
new file mode 100644
index 00000000..e1fea3a4
--- /dev/null
+++ b/src/bullet/BulletDynamics/Dynamics/btActionInterface.h
@@ -0,0 +1,46 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef _BT_ACTION_INTERFACE_H
+#define _BT_ACTION_INTERFACE_H
+
+class btIDebugDraw;
+class btCollisionWorld;
+
+#include "LinearMath/btScalar.h"
+#include "btRigidBody.h"
+
+///Basic interface to allow actions such as vehicles and characters to be updated inside a btDynamicsWorld
+class btActionInterface
+{
+protected:
+
+	static btRigidBody& getFixedBody();
+	
+	
+public:
+
+	virtual ~btActionInterface()
+	{
+	}
+
+	virtual void updateAction( btCollisionWorld* collisionWorld, btScalar deltaTimeStep)=0;
+
+	virtual void debugDraw(btIDebugDraw* debugDrawer) = 0;
+
+};
+
+#endif //_BT_ACTION_INTERFACE_H
+
diff --git a/src/bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp b/src/bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp
new file mode 100644
index 00000000..954ef241
--- /dev/null
+++ b/src/bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp
@@ -0,0 +1,1257 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btDiscreteDynamicsWorld.h"
+
+//collision detection
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "BulletCollision/CollisionDispatch/btSimulationIslandManager.h"
+#include "LinearMath/btTransformUtil.h"
+#include "LinearMath/btQuickprof.h"
+
+//rigidbody & constraints
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
+#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
+#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btHingeConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btConeTwistConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btSliderConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btContactConstraint.h"
+
+
+#include "LinearMath/btIDebugDraw.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+
+
+#include "BulletDynamics/Dynamics/btActionInterface.h"
+#include "LinearMath/btQuickprof.h"
+#include "LinearMath/btMotionState.h"
+
+#include "LinearMath/btSerializer.h"
+
+#if 0
+btAlignedObjectArray<btVector3> debugContacts;
+btAlignedObjectArray<btVector3> debugNormals;
+int startHit=2;
+int firstHit=startHit;
+#endif
+
+SIMD_FORCE_INLINE	int	btGetConstraintIslandId(const btTypedConstraint* lhs)
+{
+	int islandId;
+	
+	const btCollisionObject& rcolObj0 = lhs->getRigidBodyA();
+	const btCollisionObject& rcolObj1 = lhs->getRigidBodyB();
+	islandId= rcolObj0.getIslandTag()>=0?rcolObj0.getIslandTag():rcolObj1.getIslandTag();
+	return islandId;
+
+}
+
+
+class btSortConstraintOnIslandPredicate
+{
+	public:
+
+		bool operator() ( const btTypedConstraint* lhs, const btTypedConstraint* rhs ) const
+		{
+			int rIslandId0,lIslandId0;
+			rIslandId0 = btGetConstraintIslandId(rhs);
+			lIslandId0 = btGetConstraintIslandId(lhs);
+			return lIslandId0 < rIslandId0;
+		}
+};
+
+struct InplaceSolverIslandCallback : public btSimulationIslandManager::IslandCallback
+{
+	btContactSolverInfo*	m_solverInfo;
+	btConstraintSolver*		m_solver;
+	btTypedConstraint**		m_sortedConstraints;
+	int						m_numConstraints;
+	btIDebugDraw*			m_debugDrawer;
+	btStackAlloc*			m_stackAlloc;
+	btDispatcher*			m_dispatcher;
+	
+	btAlignedObjectArray<btCollisionObject*> m_bodies;
+	btAlignedObjectArray<btPersistentManifold*> m_manifolds;
+	btAlignedObjectArray<btTypedConstraint*> m_constraints;
+
+
+	InplaceSolverIslandCallback(
+		btConstraintSolver*	solver,
+		btStackAlloc* stackAlloc,
+		btDispatcher* dispatcher)
+		:m_solverInfo(NULL),
+		m_solver(solver),
+		m_sortedConstraints(NULL),
+		m_numConstraints(0),
+		m_debugDrawer(NULL),
+		m_stackAlloc(stackAlloc),
+		m_dispatcher(dispatcher)
+	{
+
+	}
+
+	InplaceSolverIslandCallback& operator=(InplaceSolverIslandCallback& other)
+	{
+		btAssert(0);
+		(void)other;
+		return *this;
+	}
+
+	SIMD_FORCE_INLINE void setup ( btContactSolverInfo* solverInfo, btTypedConstraint** sortedConstraints,	int	numConstraints,	btIDebugDraw* debugDrawer)
+	{
+		btAssert(solverInfo);
+		m_solverInfo = solverInfo;
+		m_sortedConstraints = sortedConstraints;
+		m_numConstraints = numConstraints;
+		m_debugDrawer = debugDrawer;
+		m_bodies.resize (0);
+		m_manifolds.resize (0);
+		m_constraints.resize (0);
+	}
+
+	
+	virtual	void	processIsland(btCollisionObject** bodies,int numBodies,btPersistentManifold**	manifolds,int numManifolds, int islandId)
+	{
+		if (islandId<0)
+		{
+			if (numManifolds + m_numConstraints)
+			{
+				///we don't split islands, so all constraints/contact manifolds/bodies are passed into the solver regardless the island id
+				m_solver->solveGroup( bodies,numBodies,manifolds, numManifolds,&m_sortedConstraints[0],m_numConstraints,*m_solverInfo,m_debugDrawer,m_stackAlloc,m_dispatcher);
+			}
+		} else
+		{
+				//also add all non-contact constraints/joints for this island
+			btTypedConstraint** startConstraint = 0;
+			int numCurConstraints = 0;
+			int i;
+			
+			//find the first constraint for this island
+			for (i=0;i<m_numConstraints;i++)
+			{
+				if (btGetConstraintIslandId(m_sortedConstraints[i]) == islandId)
+				{
+					startConstraint = &m_sortedConstraints[i];
+					break;
+				}
+			}
+			//count the number of constraints in this island
+			for (;i<m_numConstraints;i++)
+			{
+				if (btGetConstraintIslandId(m_sortedConstraints[i]) == islandId)
+				{
+					numCurConstraints++;
+				}
+			}
+
+			if (m_solverInfo->m_minimumSolverBatchSize<=1)
+			{
+				///only call solveGroup if there is some work: avoid virtual function call, its overhead can be excessive
+				if (numManifolds + numCurConstraints)
+				{
+					m_solver->solveGroup( bodies,numBodies,manifolds, numManifolds,startConstraint,numCurConstraints,*m_solverInfo,m_debugDrawer,m_stackAlloc,m_dispatcher);
+				}
+			} else
+			{
+				
+				for (i=0;i<numBodies;i++)
+					m_bodies.push_back(bodies[i]);
+				for (i=0;i<numManifolds;i++)
+					m_manifolds.push_back(manifolds[i]);
+				for (i=0;i<numCurConstraints;i++)
+					m_constraints.push_back(startConstraint[i]);
+				if ((m_constraints.size()+m_manifolds.size())>m_solverInfo->m_minimumSolverBatchSize)
+				{
+					processConstraints();
+				} else
+				{
+					//printf("deferred\n");
+				}
+			}
+		}
+	}
+	void	processConstraints()
+	{
+		if (m_manifolds.size() + m_constraints.size()>0)
+		{
+
+			btCollisionObject** bodies = m_bodies.size()? &m_bodies[0]:0;
+			btPersistentManifold** manifold = m_manifolds.size()?&m_manifolds[0]:0;
+			btTypedConstraint** constraints = m_constraints.size()?&m_constraints[0]:0;
+			
+			m_solver->solveGroup( bodies,m_bodies.size(),manifold, m_manifolds.size(),constraints, m_constraints.size() ,*m_solverInfo,m_debugDrawer,m_stackAlloc,m_dispatcher);
+		}
+		m_bodies.resize(0);
+		m_manifolds.resize(0);
+		m_constraints.resize(0);
+
+	}
+
+};
+
+
+
+btDiscreteDynamicsWorld::btDiscreteDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver, btCollisionConfiguration* collisionConfiguration)
+:btDynamicsWorld(dispatcher,pairCache,collisionConfiguration),
+m_constraintSolver(constraintSolver),
+m_gravity(0,-10,0),
+m_localTime(0),
+m_synchronizeAllMotionStates(false),
+m_profileTimings(0),
+m_sortedConstraints	(),
+m_solverIslandCallback ( NULL )
+{
+	if (!m_constraintSolver)
+	{
+		void* mem = btAlignedAlloc(sizeof(btSequentialImpulseConstraintSolver),16);
+		m_constraintSolver = new (mem) btSequentialImpulseConstraintSolver;
+		m_ownsConstraintSolver = true;
+	} else
+	{
+		m_ownsConstraintSolver = false;
+	}
+
+	{
+		void* mem = btAlignedAlloc(sizeof(btSimulationIslandManager),16);
+		m_islandManager = new (mem) btSimulationIslandManager();
+	}
+
+	m_ownsIslandManager = true;
+
+	{
+		void* mem = btAlignedAlloc(sizeof(InplaceSolverIslandCallback),16);
+		m_solverIslandCallback = new (mem) InplaceSolverIslandCallback (constraintSolver, m_stackAlloc, dispatcher);
+	}
+}
+
+
+btDiscreteDynamicsWorld::~btDiscreteDynamicsWorld()
+{
+	//only delete it when we created it
+	if (m_ownsIslandManager)
+	{
+		m_islandManager->~btSimulationIslandManager();
+		btAlignedFree( m_islandManager);
+	}
+	if (m_solverIslandCallback)
+	{
+		m_solverIslandCallback->~InplaceSolverIslandCallback();
+		btAlignedFree(m_solverIslandCallback);
+	}
+	if (m_ownsConstraintSolver)
+	{
+
+		m_constraintSolver->~btConstraintSolver();
+		btAlignedFree(m_constraintSolver);
+	}
+}
+
+void	btDiscreteDynamicsWorld::saveKinematicState(btScalar timeStep)
+{
+///would like to iterate over m_nonStaticRigidBodies, but unfortunately old API allows
+///to switch status _after_ adding kinematic objects to the world
+///fix it for Bullet 3.x release
+	for (int i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		btRigidBody* body = btRigidBody::upcast(colObj);
+		if (body && body->getActivationState() != ISLAND_SLEEPING)
+		{
+			if (body->isKinematicObject())
+			{
+				//to calculate velocities next frame
+				body->saveKinematicState(timeStep);
+			}
+		}
+	}
+
+}
+
+void	btDiscreteDynamicsWorld::debugDrawWorld()
+{
+	BT_PROFILE("debugDrawWorld");
+
+	btCollisionWorld::debugDrawWorld();
+
+	bool drawConstraints = false;
+	if (getDebugDrawer())
+	{
+		int mode = getDebugDrawer()->getDebugMode();
+		if(mode  & (btIDebugDraw::DBG_DrawConstraints | btIDebugDraw::DBG_DrawConstraintLimits))
+		{
+			drawConstraints = true;
+		}
+	}
+	if(drawConstraints)
+	{
+		for(int i = getNumConstraints()-1; i>=0 ;i--)
+		{
+			btTypedConstraint* constraint = getConstraint(i);
+			debugDrawConstraint(constraint);
+		}
+	}
+
+
+
+    if (getDebugDrawer() && (getDebugDrawer()->getDebugMode() & (btIDebugDraw::DBG_DrawWireframe | btIDebugDraw::DBG_DrawAabb | btIDebugDraw::DBG_DrawNormals)))
+	{
+		int i;
+
+		if (getDebugDrawer() && getDebugDrawer()->getDebugMode())
+		{
+			for (i=0;i<m_actions.size();i++)
+			{
+				m_actions[i]->debugDraw(m_debugDrawer);
+			}
+		}
+	}
+}
+
+void	btDiscreteDynamicsWorld::clearForces()
+{
+	///@todo: iterate over awake simulation islands!
+	for ( int i=0;i<m_nonStaticRigidBodies.size();i++)
+	{
+		btRigidBody* body = m_nonStaticRigidBodies[i];
+		//need to check if next line is ok
+		//it might break backward compatibility (people applying forces on sleeping objects get never cleared and accumulate on wake-up
+		body->clearForces();
+	}
+}	
+
+///apply gravity, call this once per timestep
+void	btDiscreteDynamicsWorld::applyGravity()
+{
+	///@todo: iterate over awake simulation islands!
+	for ( int i=0;i<m_nonStaticRigidBodies.size();i++)
+	{
+		btRigidBody* body = m_nonStaticRigidBodies[i];
+		if (body->isActive())
+		{
+			body->applyGravity();
+		}
+	}
+}
+
+
+void	btDiscreteDynamicsWorld::synchronizeSingleMotionState(btRigidBody* body)
+{
+	btAssert(body);
+
+	if (body->getMotionState() && !body->isStaticOrKinematicObject())
+	{
+		//we need to call the update at least once, even for sleeping objects
+		//otherwise the 'graphics' transform never updates properly
+		///@todo: add 'dirty' flag
+		//if (body->getActivationState() != ISLAND_SLEEPING)
+		{
+			btTransform interpolatedTransform;
+			btTransformUtil::integrateTransform(body->getInterpolationWorldTransform(),
+				body->getInterpolationLinearVelocity(),body->getInterpolationAngularVelocity(),m_localTime*body->getHitFraction(),interpolatedTransform);
+			body->getMotionState()->setWorldTransform(interpolatedTransform);
+		}
+	}
+}
+
+
+void	btDiscreteDynamicsWorld::synchronizeMotionStates()
+{
+	BT_PROFILE("synchronizeMotionStates");
+	if (m_synchronizeAllMotionStates)
+	{
+		//iterate  over all collision objects
+		for ( int i=0;i<m_collisionObjects.size();i++)
+		{
+			btCollisionObject* colObj = m_collisionObjects[i];
+			btRigidBody* body = btRigidBody::upcast(colObj);
+			if (body)
+				synchronizeSingleMotionState(body);
+		}
+	} else
+	{
+		//iterate over all active rigid bodies
+		for ( int i=0;i<m_nonStaticRigidBodies.size();i++)
+		{
+			btRigidBody* body = m_nonStaticRigidBodies[i];
+			if (body->isActive())
+				synchronizeSingleMotionState(body);
+		}
+	}
+}
+
+
+int	btDiscreteDynamicsWorld::stepSimulation( btScalar timeStep,int maxSubSteps, btScalar fixedTimeStep)
+{
+	startProfiling(timeStep);
+
+	BT_PROFILE("stepSimulation");
+
+	int numSimulationSubSteps = 0;
+
+	if (maxSubSteps)
+	{
+		//fixed timestep with interpolation
+		m_localTime += timeStep;
+		if (m_localTime >= fixedTimeStep)
+		{
+			numSimulationSubSteps = int( m_localTime / fixedTimeStep);
+			m_localTime -= numSimulationSubSteps * fixedTimeStep;
+		}
+	} else
+	{
+		//variable timestep
+		fixedTimeStep = timeStep;
+		m_localTime = timeStep;
+		if (btFuzzyZero(timeStep))
+		{
+			numSimulationSubSteps = 0;
+			maxSubSteps = 0;
+		} else
+		{
+			numSimulationSubSteps = 1;
+			maxSubSteps = 1;
+		}
+	}
+
+	//process some debugging flags
+	if (getDebugDrawer())
+	{
+		btIDebugDraw* debugDrawer = getDebugDrawer ();
+		gDisableDeactivation = (debugDrawer->getDebugMode() & btIDebugDraw::DBG_NoDeactivation) != 0;
+	}
+	if (numSimulationSubSteps)
+	{
+
+		//clamp the number of substeps, to prevent simulation grinding spiralling down to a halt
+		int clampedSimulationSteps = (numSimulationSubSteps > maxSubSteps)? maxSubSteps : numSimulationSubSteps;
+
+		saveKinematicState(fixedTimeStep*clampedSimulationSteps);
+
+		applyGravity();
+
+		
+
+		for (int i=0;i<clampedSimulationSteps;i++)
+		{
+			internalSingleStepSimulation(fixedTimeStep);
+			synchronizeMotionStates();
+		}
+
+	} else
+	{
+		synchronizeMotionStates();
+	}
+
+	clearForces();
+
+#ifndef BT_NO_PROFILE
+	CProfileManager::Increment_Frame_Counter();
+#endif //BT_NO_PROFILE
+	
+	return numSimulationSubSteps;
+}
+
+void	btDiscreteDynamicsWorld::internalSingleStepSimulation(btScalar timeStep)
+{
+	
+	BT_PROFILE("internalSingleStepSimulation");
+
+	if(0 != m_internalPreTickCallback) {
+		(*m_internalPreTickCallback)(this, timeStep);
+	}	
+
+	///apply gravity, predict motion
+	predictUnconstraintMotion(timeStep);
+
+	btDispatcherInfo& dispatchInfo = getDispatchInfo();
+
+	dispatchInfo.m_timeStep = timeStep;
+	dispatchInfo.m_stepCount = 0;
+	dispatchInfo.m_debugDraw = getDebugDrawer();
+
+
+	///perform collision detection
+	performDiscreteCollisionDetection();
+
+
+	calculateSimulationIslands();
+
+	
+	getSolverInfo().m_timeStep = timeStep;
+	
+
+
+	///solve contact and other joint constraints
+	solveConstraints(getSolverInfo());
+	
+	///CallbackTriggers();
+
+	///integrate transforms
+	integrateTransforms(timeStep);
+
+	///update vehicle simulation
+	updateActions(timeStep);
+	
+	updateActivationState( timeStep );
+
+	if(0 != m_internalTickCallback) {
+		(*m_internalTickCallback)(this, timeStep);
+	}	
+}
+
+void	btDiscreteDynamicsWorld::setGravity(const btVector3& gravity)
+{
+	m_gravity = gravity;
+	for ( int i=0;i<m_nonStaticRigidBodies.size();i++)
+	{
+		btRigidBody* body = m_nonStaticRigidBodies[i];
+		if (body->isActive() && !(body->getFlags() &BT_DISABLE_WORLD_GRAVITY))
+		{
+			body->setGravity(gravity);
+		}
+	}
+}
+
+btVector3 btDiscreteDynamicsWorld::getGravity () const
+{
+	return m_gravity;
+}
+
+void	btDiscreteDynamicsWorld::addCollisionObject(btCollisionObject* collisionObject,short int collisionFilterGroup,short int collisionFilterMask)
+{
+	btCollisionWorld::addCollisionObject(collisionObject,collisionFilterGroup,collisionFilterMask);
+}
+
+void	btDiscreteDynamicsWorld::removeCollisionObject(btCollisionObject* collisionObject)
+{
+	btRigidBody* body = btRigidBody::upcast(collisionObject);
+	if (body)
+		removeRigidBody(body);
+	else
+		btCollisionWorld::removeCollisionObject(collisionObject);
+}
+
+void	btDiscreteDynamicsWorld::removeRigidBody(btRigidBody* body)
+{
+	m_nonStaticRigidBodies.remove(body);
+	btCollisionWorld::removeCollisionObject(body);
+}
+
+
+void	btDiscreteDynamicsWorld::addRigidBody(btRigidBody* body)
+{
+	if (!body->isStaticOrKinematicObject() && !(body->getFlags() &BT_DISABLE_WORLD_GRAVITY))
+	{
+		body->setGravity(m_gravity);
+	}
+
+	if (body->getCollisionShape())
+	{
+		if (!body->isStaticObject())
+		{
+			m_nonStaticRigidBodies.push_back(body);
+		} else
+		{
+			body->setActivationState(ISLAND_SLEEPING);
+		}
+
+		bool isDynamic = !(body->isStaticObject() || body->isKinematicObject());
+		short collisionFilterGroup = isDynamic? short(btBroadphaseProxy::DefaultFilter) : short(btBroadphaseProxy::StaticFilter);
+		short collisionFilterMask = isDynamic? 	short(btBroadphaseProxy::AllFilter) : 	short(btBroadphaseProxy::AllFilter ^ btBroadphaseProxy::StaticFilter);
+
+		addCollisionObject(body,collisionFilterGroup,collisionFilterMask);
+	}
+}
+
+void	btDiscreteDynamicsWorld::addRigidBody(btRigidBody* body, short group, short mask)
+{
+	if (!body->isStaticOrKinematicObject() && !(body->getFlags() &BT_DISABLE_WORLD_GRAVITY))
+	{
+		body->setGravity(m_gravity);
+	}
+
+	if (body->getCollisionShape())
+	{
+		if (!body->isStaticObject())
+		{
+			m_nonStaticRigidBodies.push_back(body);
+		}
+		 else
+		{
+			body->setActivationState(ISLAND_SLEEPING);
+		}
+		addCollisionObject(body,group,mask);
+	}
+}
+
+
+void	btDiscreteDynamicsWorld::updateActions(btScalar timeStep)
+{
+	BT_PROFILE("updateActions");
+	
+	for ( int i=0;i<m_actions.size();i++)
+	{
+		m_actions[i]->updateAction( this, timeStep);
+	}
+}
+	
+	
+void	btDiscreteDynamicsWorld::updateActivationState(btScalar timeStep)
+{
+	BT_PROFILE("updateActivationState");
+
+	for ( int i=0;i<m_nonStaticRigidBodies.size();i++)
+	{
+		btRigidBody* body = m_nonStaticRigidBodies[i];
+		if (body)
+		{
+			body->updateDeactivation(timeStep);
+
+			if (body->wantsSleeping())
+			{
+				if (body->isStaticOrKinematicObject())
+				{
+					body->setActivationState(ISLAND_SLEEPING);
+				} else
+				{
+					if (body->getActivationState() == ACTIVE_TAG)
+						body->setActivationState( WANTS_DEACTIVATION );
+					if (body->getActivationState() == ISLAND_SLEEPING) 
+					{
+						body->setAngularVelocity(btVector3(0,0,0));
+						body->setLinearVelocity(btVector3(0,0,0));
+					}
+
+				}
+			} else
+			{
+				if (body->getActivationState() != DISABLE_DEACTIVATION)
+					body->setActivationState( ACTIVE_TAG );
+			}
+		}
+	}
+}
+
+void	btDiscreteDynamicsWorld::addConstraint(btTypedConstraint* constraint,bool disableCollisionsBetweenLinkedBodies)
+{
+	m_constraints.push_back(constraint);
+	if (disableCollisionsBetweenLinkedBodies)
+	{
+		constraint->getRigidBodyA().addConstraintRef(constraint);
+		constraint->getRigidBodyB().addConstraintRef(constraint);
+	}
+}
+
+void	btDiscreteDynamicsWorld::removeConstraint(btTypedConstraint* constraint)
+{
+	m_constraints.remove(constraint);
+	constraint->getRigidBodyA().removeConstraintRef(constraint);
+	constraint->getRigidBodyB().removeConstraintRef(constraint);
+}
+
+void	btDiscreteDynamicsWorld::addAction(btActionInterface* action)
+{
+	m_actions.push_back(action);
+}
+
+void	btDiscreteDynamicsWorld::removeAction(btActionInterface* action)
+{
+	m_actions.remove(action);
+}
+
+
+void	btDiscreteDynamicsWorld::addVehicle(btActionInterface* vehicle)
+{
+	addAction(vehicle);
+}
+
+void	btDiscreteDynamicsWorld::removeVehicle(btActionInterface* vehicle)
+{
+	removeAction(vehicle);
+}
+
+void	btDiscreteDynamicsWorld::addCharacter(btActionInterface* character)
+{
+	addAction(character);
+}
+
+void	btDiscreteDynamicsWorld::removeCharacter(btActionInterface* character)
+{
+	removeAction(character);
+}
+
+
+
+
+void	btDiscreteDynamicsWorld::solveConstraints(btContactSolverInfo& solverInfo)
+{
+	BT_PROFILE("solveConstraints");
+	
+	m_sortedConstraints.resize( m_constraints.size());
+	int i; 
+	for (i=0;i<getNumConstraints();i++)
+	{
+		m_sortedConstraints[i] = m_constraints[i];
+	}
+
+//	btAssert(0);
+		
+	
+
+	m_sortedConstraints.quickSort(btSortConstraintOnIslandPredicate());
+	
+	btTypedConstraint** constraintsPtr = getNumConstraints() ? &m_sortedConstraints[0] : 0;
+	
+	m_solverIslandCallback->setup(&solverInfo,constraintsPtr,m_sortedConstraints.size(),getDebugDrawer());
+	m_constraintSolver->prepareSolve(getCollisionWorld()->getNumCollisionObjects(), getCollisionWorld()->getDispatcher()->getNumManifolds());
+	
+	/// solve all the constraints for this island
+	m_islandManager->buildAndProcessIslands(getCollisionWorld()->getDispatcher(),getCollisionWorld(),m_solverIslandCallback);
+
+	m_solverIslandCallback->processConstraints();
+
+	m_constraintSolver->allSolved(solverInfo, m_debugDrawer, m_stackAlloc);
+}
+
+
+void	btDiscreteDynamicsWorld::calculateSimulationIslands()
+{
+	BT_PROFILE("calculateSimulationIslands");
+
+	getSimulationIslandManager()->updateActivationState(getCollisionWorld(),getCollisionWorld()->getDispatcher());
+
+	{
+		int i;
+		int numConstraints = int(m_constraints.size());
+		for (i=0;i< numConstraints ; i++ )
+		{
+			btTypedConstraint* constraint = m_constraints[i];
+			if (constraint->isEnabled())
+			{
+				const btRigidBody* colObj0 = &constraint->getRigidBodyA();
+				const btRigidBody* colObj1 = &constraint->getRigidBodyB();
+
+				if (((colObj0) && (!(colObj0)->isStaticOrKinematicObject())) &&
+					((colObj1) && (!(colObj1)->isStaticOrKinematicObject())))
+				{
+					if (colObj0->isActive() || colObj1->isActive())
+					{
+
+						getSimulationIslandManager()->getUnionFind().unite((colObj0)->getIslandTag(),
+							(colObj1)->getIslandTag());
+					}
+				}
+			}
+		}
+	}
+
+	//Store the island id in each body
+	getSimulationIslandManager()->storeIslandActivationState(getCollisionWorld());
+
+	
+}
+
+
+
+
+class btClosestNotMeConvexResultCallback : public btCollisionWorld::ClosestConvexResultCallback
+{
+public:
+
+	btCollisionObject* m_me;
+	btScalar m_allowedPenetration;
+	btOverlappingPairCache* m_pairCache;
+	btDispatcher* m_dispatcher;
+
+public:
+	btClosestNotMeConvexResultCallback (btCollisionObject* me,const btVector3& fromA,const btVector3& toA,btOverlappingPairCache* pairCache,btDispatcher* dispatcher) : 
+	  btCollisionWorld::ClosestConvexResultCallback(fromA,toA),
+		m_me(me),
+		m_allowedPenetration(0.0f),
+		m_pairCache(pairCache),
+		m_dispatcher(dispatcher)
+	{
+	}
+
+	virtual btScalar addSingleResult(btCollisionWorld::LocalConvexResult& convexResult,bool normalInWorldSpace)
+	{
+		if (convexResult.m_hitCollisionObject == m_me)
+			return 1.0f;
+
+		//ignore result if there is no contact response
+		if(!convexResult.m_hitCollisionObject->hasContactResponse())
+			return 1.0f;
+
+		btVector3 linVelA,linVelB;
+		linVelA = m_convexToWorld-m_convexFromWorld;
+		linVelB = btVector3(0,0,0);//toB.getOrigin()-fromB.getOrigin();
+
+		btVector3 relativeVelocity = (linVelA-linVelB);
+		//don't report time of impact for motion away from the contact normal (or causes minor penetration)
+		if (convexResult.m_hitNormalLocal.dot(relativeVelocity)>=-m_allowedPenetration)
+			return 1.f;
+
+		return ClosestConvexResultCallback::addSingleResult (convexResult, normalInWorldSpace);
+	}
+
+	virtual bool needsCollision(btBroadphaseProxy* proxy0) const
+	{
+		//don't collide with itself
+		if (proxy0->m_clientObject == m_me)
+			return false;
+
+		///don't do CCD when the collision filters are not matching
+		if (!ClosestConvexResultCallback::needsCollision(proxy0))
+			return false;
+
+		btCollisionObject* otherObj = (btCollisionObject*) proxy0->m_clientObject;
+
+		//call needsResponse, see http://code.google.com/p/bullet/issues/detail?id=179
+		if (m_dispatcher->needsResponse(m_me,otherObj))
+		{
+#if 0
+			///don't do CCD when there are already contact points (touching contact/penetration)
+			btAlignedObjectArray<btPersistentManifold*> manifoldArray;
+			btBroadphasePair* collisionPair = m_pairCache->findPair(m_me->getBroadphaseHandle(),proxy0);
+			if (collisionPair)
+			{
+				if (collisionPair->m_algorithm)
+				{
+					manifoldArray.resize(0);
+					collisionPair->m_algorithm->getAllContactManifolds(manifoldArray);
+					for (int j=0;j<manifoldArray.size();j++)
+					{
+						btPersistentManifold* manifold = manifoldArray[j];
+						if (manifold->getNumContacts()>0)
+							return false;
+					}
+				}
+			}
+#endif
+			return true;
+		}
+
+		return false;
+	}
+
+
+};
+
+///internal debugging variable. this value shouldn't be too high
+int gNumClampedCcdMotions=0;
+
+void	btDiscreteDynamicsWorld::integrateTransforms(btScalar timeStep)
+{
+	BT_PROFILE("integrateTransforms");
+	btTransform predictedTrans;
+	for ( int i=0;i<m_nonStaticRigidBodies.size();i++)
+	{
+		btRigidBody* body = m_nonStaticRigidBodies[i];
+		body->setHitFraction(1.f);
+
+		if (body->isActive() && (!body->isStaticOrKinematicObject()))
+		{
+
+			body->predictIntegratedTransform(timeStep, predictedTrans);
+			
+			btScalar squareMotion = (predictedTrans.getOrigin()-body->getWorldTransform().getOrigin()).length2();
+
+			
+
+			if (getDispatchInfo().m_useContinuous && body->getCcdSquareMotionThreshold() && body->getCcdSquareMotionThreshold() < squareMotion)
+			{
+				BT_PROFILE("CCD motion clamping");
+				if (body->getCollisionShape()->isConvex())
+				{
+					gNumClampedCcdMotions++;
+#ifdef USE_STATIC_ONLY
+					class StaticOnlyCallback : public btClosestNotMeConvexResultCallback
+					{
+					public:
+
+						StaticOnlyCallback (btCollisionObject* me,const btVector3& fromA,const btVector3& toA,btOverlappingPairCache* pairCache,btDispatcher* dispatcher) : 
+						  btClosestNotMeConvexResultCallback(me,fromA,toA,pairCache,dispatcher)
+						{
+						}
+
+					  	virtual bool needsCollision(btBroadphaseProxy* proxy0) const
+						{
+							btCollisionObject* otherObj = (btCollisionObject*) proxy0->m_clientObject;
+							if (!otherObj->isStaticOrKinematicObject())
+								return false;
+							return btClosestNotMeConvexResultCallback::needsCollision(proxy0);
+						}
+					};
+
+					StaticOnlyCallback sweepResults(body,body->getWorldTransform().getOrigin(),predictedTrans.getOrigin(),getBroadphase()->getOverlappingPairCache(),getDispatcher());
+#else
+					btClosestNotMeConvexResultCallback sweepResults(body,body->getWorldTransform().getOrigin(),predictedTrans.getOrigin(),getBroadphase()->getOverlappingPairCache(),getDispatcher());
+#endif
+					//btConvexShape* convexShape = static_cast<btConvexShape*>(body->getCollisionShape());
+					btSphereShape tmpSphere(body->getCcdSweptSphereRadius());//btConvexShape* convexShape = static_cast<btConvexShape*>(body->getCollisionShape());
+					sweepResults.m_allowedPenetration=getDispatchInfo().m_allowedCcdPenetration;
+
+					sweepResults.m_collisionFilterGroup = body->getBroadphaseProxy()->m_collisionFilterGroup;
+					sweepResults.m_collisionFilterMask  = body->getBroadphaseProxy()->m_collisionFilterMask;
+					btTransform modifiedPredictedTrans = predictedTrans;
+					modifiedPredictedTrans.setBasis(body->getWorldTransform().getBasis());
+
+					convexSweepTest(&tmpSphere,body->getWorldTransform(),modifiedPredictedTrans,sweepResults);
+					if (sweepResults.hasHit() && (sweepResults.m_closestHitFraction < 1.f))
+					{
+						
+						//printf("clamped integration to hit fraction = %f\n",fraction);
+						body->setHitFraction(sweepResults.m_closestHitFraction);
+						body->predictIntegratedTransform(timeStep*body->getHitFraction(), predictedTrans);
+						body->setHitFraction(0.f);
+						body->proceedToTransform( predictedTrans);
+
+#if 0
+						btVector3 linVel = body->getLinearVelocity();
+
+						btScalar maxSpeed = body->getCcdMotionThreshold()/getSolverInfo().m_timeStep;
+						btScalar maxSpeedSqr = maxSpeed*maxSpeed;
+						if (linVel.length2()>maxSpeedSqr)
+						{
+							linVel.normalize();
+							linVel*= maxSpeed;
+							body->setLinearVelocity(linVel);
+							btScalar ms2 = body->getLinearVelocity().length2();
+							body->predictIntegratedTransform(timeStep, predictedTrans);
+
+							btScalar sm2 = (predictedTrans.getOrigin()-body->getWorldTransform().getOrigin()).length2();
+							btScalar smt = body->getCcdSquareMotionThreshold();
+							printf("sm2=%f\n",sm2);
+						}
+#else
+						//response  between two dynamic objects without friction, assuming 0 penetration depth
+						btScalar appliedImpulse = 0.f;
+						btScalar depth = 0.f;
+						appliedImpulse = resolveSingleCollision(body,sweepResults.m_hitCollisionObject,sweepResults.m_hitPointWorld,sweepResults.m_hitNormalWorld,getSolverInfo(), depth);
+						
+
+#endif
+
+        				continue;
+					}
+				}
+			}
+			
+
+			body->proceedToTransform( predictedTrans);
+		}
+	}
+}
+
+
+
+
+
+
+void	btDiscreteDynamicsWorld::predictUnconstraintMotion(btScalar timeStep)
+{
+	BT_PROFILE("predictUnconstraintMotion");
+	for ( int i=0;i<m_nonStaticRigidBodies.size();i++)
+	{
+		btRigidBody* body = m_nonStaticRigidBodies[i];
+		if (!body->isStaticOrKinematicObject())
+		{
+			body->integrateVelocities( timeStep);
+			//damping
+			body->applyDamping(timeStep);
+
+			body->predictIntegratedTransform(timeStep,body->getInterpolationWorldTransform());
+		}
+	}
+}
+
+
+void	btDiscreteDynamicsWorld::startProfiling(btScalar timeStep)
+{
+	(void)timeStep;
+
+#ifndef BT_NO_PROFILE
+	CProfileManager::Reset();
+#endif //BT_NO_PROFILE
+
+}
+
+
+
+
+	
+
+void btDiscreteDynamicsWorld::debugDrawConstraint(btTypedConstraint* constraint)
+{
+	bool drawFrames = (getDebugDrawer()->getDebugMode() & btIDebugDraw::DBG_DrawConstraints) != 0;
+	bool drawLimits = (getDebugDrawer()->getDebugMode() & btIDebugDraw::DBG_DrawConstraintLimits) != 0;
+	btScalar dbgDrawSize = constraint->getDbgDrawSize();
+	if(dbgDrawSize <= btScalar(0.f))
+	{
+		return;
+	}
+
+	switch(constraint->getConstraintType())
+	{
+		case POINT2POINT_CONSTRAINT_TYPE:
+			{
+				btPoint2PointConstraint* p2pC = (btPoint2PointConstraint*)constraint;
+				btTransform tr;
+				tr.setIdentity();
+				btVector3 pivot = p2pC->getPivotInA();
+				pivot = p2pC->getRigidBodyA().getCenterOfMassTransform() * pivot; 
+				tr.setOrigin(pivot);
+				getDebugDrawer()->drawTransform(tr, dbgDrawSize);
+				// that ideally should draw the same frame	
+				pivot = p2pC->getPivotInB();
+				pivot = p2pC->getRigidBodyB().getCenterOfMassTransform() * pivot; 
+				tr.setOrigin(pivot);
+				if(drawFrames) getDebugDrawer()->drawTransform(tr, dbgDrawSize);
+			}
+			break;
+		case HINGE_CONSTRAINT_TYPE:
+			{
+				btHingeConstraint* pHinge = (btHingeConstraint*)constraint;
+				btTransform tr = pHinge->getRigidBodyA().getCenterOfMassTransform() * pHinge->getAFrame();
+				if(drawFrames) getDebugDrawer()->drawTransform(tr, dbgDrawSize);
+				tr = pHinge->getRigidBodyB().getCenterOfMassTransform() * pHinge->getBFrame();
+				if(drawFrames) getDebugDrawer()->drawTransform(tr, dbgDrawSize);
+				btScalar minAng = pHinge->getLowerLimit();
+				btScalar maxAng = pHinge->getUpperLimit();
+				if(minAng == maxAng)
+				{
+					break;
+				}
+				bool drawSect = true;
+				if(minAng > maxAng)
+				{
+					minAng = btScalar(0.f);
+					maxAng = SIMD_2_PI;
+					drawSect = false;
+				}
+				if(drawLimits) 
+				{
+					btVector3& center = tr.getOrigin();
+					btVector3 normal = tr.getBasis().getColumn(2);
+					btVector3 axis = tr.getBasis().getColumn(0);
+					getDebugDrawer()->drawArc(center, normal, axis, dbgDrawSize, dbgDrawSize, minAng, maxAng, btVector3(0,0,0), drawSect);
+				}
+			}
+			break;
+		case CONETWIST_CONSTRAINT_TYPE:
+			{
+				btConeTwistConstraint* pCT = (btConeTwistConstraint*)constraint;
+				btTransform tr = pCT->getRigidBodyA().getCenterOfMassTransform() * pCT->getAFrame();
+				if(drawFrames) getDebugDrawer()->drawTransform(tr, dbgDrawSize);
+				tr = pCT->getRigidBodyB().getCenterOfMassTransform() * pCT->getBFrame();
+				if(drawFrames) getDebugDrawer()->drawTransform(tr, dbgDrawSize);
+				if(drawLimits)
+				{
+					//const btScalar length = btScalar(5);
+					const btScalar length = dbgDrawSize;
+					static int nSegments = 8*4;
+					btScalar fAngleInRadians = btScalar(2.*3.1415926) * (btScalar)(nSegments-1)/btScalar(nSegments);
+					btVector3 pPrev = pCT->GetPointForAngle(fAngleInRadians, length);
+					pPrev = tr * pPrev;
+					for (int i=0; i<nSegments; i++)
+					{
+						fAngleInRadians = btScalar(2.*3.1415926) * (btScalar)i/btScalar(nSegments);
+						btVector3 pCur = pCT->GetPointForAngle(fAngleInRadians, length);
+						pCur = tr * pCur;
+						getDebugDrawer()->drawLine(pPrev, pCur, btVector3(0,0,0));
+
+						if (i%(nSegments/8) == 0)
+							getDebugDrawer()->drawLine(tr.getOrigin(), pCur, btVector3(0,0,0));
+
+						pPrev = pCur;
+					}						
+					btScalar tws = pCT->getTwistSpan();
+					btScalar twa = pCT->getTwistAngle();
+					bool useFrameB = (pCT->getRigidBodyB().getInvMass() > btScalar(0.f));
+					if(useFrameB)
+					{
+						tr = pCT->getRigidBodyB().getCenterOfMassTransform() * pCT->getBFrame();
+					}
+					else
+					{
+						tr = pCT->getRigidBodyA().getCenterOfMassTransform() * pCT->getAFrame();
+					}
+					btVector3 pivot = tr.getOrigin();
+					btVector3 normal = tr.getBasis().getColumn(0);
+					btVector3 axis1 = tr.getBasis().getColumn(1);
+					getDebugDrawer()->drawArc(pivot, normal, axis1, dbgDrawSize, dbgDrawSize, -twa-tws, -twa+tws, btVector3(0,0,0), true);
+
+				}
+			}
+			break;
+		case D6_SPRING_CONSTRAINT_TYPE:
+		case D6_CONSTRAINT_TYPE:
+			{
+				btGeneric6DofConstraint* p6DOF = (btGeneric6DofConstraint*)constraint;
+				btTransform tr = p6DOF->getCalculatedTransformA();
+				if(drawFrames) getDebugDrawer()->drawTransform(tr, dbgDrawSize);
+				tr = p6DOF->getCalculatedTransformB();
+				if(drawFrames) getDebugDrawer()->drawTransform(tr, dbgDrawSize);
+				if(drawLimits) 
+				{
+					tr = p6DOF->getCalculatedTransformA();
+					const btVector3& center = p6DOF->getCalculatedTransformB().getOrigin();
+					btVector3 up = tr.getBasis().getColumn(2);
+					btVector3 axis = tr.getBasis().getColumn(0);
+					btScalar minTh = p6DOF->getRotationalLimitMotor(1)->m_loLimit;
+					btScalar maxTh = p6DOF->getRotationalLimitMotor(1)->m_hiLimit;
+					btScalar minPs = p6DOF->getRotationalLimitMotor(2)->m_loLimit;
+					btScalar maxPs = p6DOF->getRotationalLimitMotor(2)->m_hiLimit;
+					getDebugDrawer()->drawSpherePatch(center, up, axis, dbgDrawSize * btScalar(.9f), minTh, maxTh, minPs, maxPs, btVector3(0,0,0));
+					axis = tr.getBasis().getColumn(1);
+					btScalar ay = p6DOF->getAngle(1);
+					btScalar az = p6DOF->getAngle(2);
+					btScalar cy = btCos(ay);
+					btScalar sy = btSin(ay);
+					btScalar cz = btCos(az);
+					btScalar sz = btSin(az);
+					btVector3 ref;
+					ref[0] = cy*cz*axis[0] + cy*sz*axis[1] - sy*axis[2];
+					ref[1] = -sz*axis[0] + cz*axis[1];
+					ref[2] = cz*sy*axis[0] + sz*sy*axis[1] + cy*axis[2];
+					tr = p6DOF->getCalculatedTransformB();
+					btVector3 normal = -tr.getBasis().getColumn(0);
+					btScalar minFi = p6DOF->getRotationalLimitMotor(0)->m_loLimit;
+					btScalar maxFi = p6DOF->getRotationalLimitMotor(0)->m_hiLimit;
+					if(minFi > maxFi)
+					{
+						getDebugDrawer()->drawArc(center, normal, ref, dbgDrawSize, dbgDrawSize, -SIMD_PI, SIMD_PI, btVector3(0,0,0), false);
+					}
+					else if(minFi < maxFi)
+					{
+						getDebugDrawer()->drawArc(center, normal, ref, dbgDrawSize, dbgDrawSize, minFi, maxFi, btVector3(0,0,0), true);
+					}
+					tr = p6DOF->getCalculatedTransformA();
+					btVector3 bbMin = p6DOF->getTranslationalLimitMotor()->m_lowerLimit;
+					btVector3 bbMax = p6DOF->getTranslationalLimitMotor()->m_upperLimit;
+					getDebugDrawer()->drawBox(bbMin, bbMax, tr, btVector3(0,0,0));
+				}
+			}
+			break;
+		case SLIDER_CONSTRAINT_TYPE:
+			{
+				btSliderConstraint* pSlider = (btSliderConstraint*)constraint;
+				btTransform tr = pSlider->getCalculatedTransformA();
+				if(drawFrames) getDebugDrawer()->drawTransform(tr, dbgDrawSize);
+				tr = pSlider->getCalculatedTransformB();
+				if(drawFrames) getDebugDrawer()->drawTransform(tr, dbgDrawSize);
+				if(drawLimits)
+				{
+					btTransform tr = pSlider->getUseLinearReferenceFrameA() ? pSlider->getCalculatedTransformA() : pSlider->getCalculatedTransformB();
+					btVector3 li_min = tr * btVector3(pSlider->getLowerLinLimit(), 0.f, 0.f);
+					btVector3 li_max = tr * btVector3(pSlider->getUpperLinLimit(), 0.f, 0.f);
+					getDebugDrawer()->drawLine(li_min, li_max, btVector3(0, 0, 0));
+					btVector3 normal = tr.getBasis().getColumn(0);
+					btVector3 axis = tr.getBasis().getColumn(1);
+					btScalar a_min = pSlider->getLowerAngLimit();
+					btScalar a_max = pSlider->getUpperAngLimit();
+					const btVector3& center = pSlider->getCalculatedTransformB().getOrigin();
+					getDebugDrawer()->drawArc(center, normal, axis, dbgDrawSize, dbgDrawSize, a_min, a_max, btVector3(0,0,0), true);
+				}
+			}
+			break;
+		default : 
+			break;
+	}
+	return;
+}
+
+
+
+
+
+void	btDiscreteDynamicsWorld::setConstraintSolver(btConstraintSolver* solver)
+{
+	if (m_ownsConstraintSolver)
+	{
+		btAlignedFree( m_constraintSolver);
+	}
+	m_ownsConstraintSolver = false;
+	m_constraintSolver = solver;
+}
+
+btConstraintSolver* btDiscreteDynamicsWorld::getConstraintSolver()
+{
+	return m_constraintSolver;
+}
+
+
+int		btDiscreteDynamicsWorld::getNumConstraints() const
+{
+	return int(m_constraints.size());
+}
+btTypedConstraint* btDiscreteDynamicsWorld::getConstraint(int index)
+{
+	return m_constraints[index];
+}
+const btTypedConstraint* btDiscreteDynamicsWorld::getConstraint(int index) const
+{
+	return m_constraints[index];
+}
+
+
+
+void	btDiscreteDynamicsWorld::serializeRigidBodies(btSerializer* serializer)
+{
+	int i;
+	//serialize all collision objects
+	for (i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		if (colObj->getInternalType() & btCollisionObject::CO_RIGID_BODY)
+		{
+			int len = colObj->calculateSerializeBufferSize();
+			btChunk* chunk = serializer->allocate(len,1);
+			const char* structType = colObj->serialize(chunk->m_oldPtr, serializer);
+			serializer->finalizeChunk(chunk,structType,BT_RIGIDBODY_CODE,colObj);
+		}
+	}
+
+	for (i=0;i<m_constraints.size();i++)
+	{
+		btTypedConstraint* constraint = m_constraints[i];
+		int size = constraint->calculateSerializeBufferSize();
+		btChunk* chunk = serializer->allocate(size,1);
+		const char* structType = constraint->serialize(chunk->m_oldPtr,serializer);
+		serializer->finalizeChunk(chunk,structType,BT_CONSTRAINT_CODE,constraint);
+	}
+}
+
+
+void	btDiscreteDynamicsWorld::serialize(btSerializer* serializer)
+{
+
+	serializer->startSerialization();
+
+	serializeRigidBodies(serializer);
+
+	serializeCollisionObjects(serializer);
+
+	serializer->finishSerialization();
+}
+
diff --git a/src/bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h b/src/bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h
new file mode 100644
index 00000000..23a38dd2
--- /dev/null
+++ b/src/bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h
@@ -0,0 +1,203 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_DISCRETE_DYNAMICS_WORLD_H
+#define BT_DISCRETE_DYNAMICS_WORLD_H
+
+#include "btDynamicsWorld.h"
+
+class btDispatcher;
+class btOverlappingPairCache;
+class btConstraintSolver;
+class btSimulationIslandManager;
+class btTypedConstraint;
+class btActionInterface;
+
+class btIDebugDraw;
+struct InplaceSolverIslandCallback;
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+
+///btDiscreteDynamicsWorld provides discrete rigid body simulation
+///those classes replace the obsolete CcdPhysicsEnvironment/CcdPhysicsController
+class btDiscreteDynamicsWorld : public btDynamicsWorld
+{
+protected:
+	
+    btAlignedObjectArray<btTypedConstraint*>	m_sortedConstraints;
+	InplaceSolverIslandCallback* 	m_solverIslandCallback;
+
+	btConstraintSolver*	m_constraintSolver;
+
+	btSimulationIslandManager*	m_islandManager;
+
+	btAlignedObjectArray<btTypedConstraint*> m_constraints;
+
+	btAlignedObjectArray<btRigidBody*> m_nonStaticRigidBodies;
+
+	btVector3	m_gravity;
+
+	//for variable timesteps
+	btScalar	m_localTime;
+	//for variable timesteps
+
+	bool	m_ownsIslandManager;
+	bool	m_ownsConstraintSolver;
+	bool	m_synchronizeAllMotionStates;
+
+	btAlignedObjectArray<btActionInterface*>	m_actions;
+	
+	int	m_profileTimings;
+
+	virtual void	predictUnconstraintMotion(btScalar timeStep);
+	
+	virtual void	integrateTransforms(btScalar timeStep);
+		
+	virtual void	calculateSimulationIslands();
+
+	virtual void	solveConstraints(btContactSolverInfo& solverInfo);
+	
+	void	updateActivationState(btScalar timeStep);
+
+	void	updateActions(btScalar timeStep);
+
+	void	startProfiling(btScalar timeStep);
+
+	virtual void	internalSingleStepSimulation( btScalar timeStep);
+
+
+	virtual void	saveKinematicState(btScalar timeStep);
+
+	void	serializeRigidBodies(btSerializer* serializer);
+
+public:
+
+
+	///this btDiscreteDynamicsWorld constructor gets created objects from the user, and will not delete those
+	btDiscreteDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration);
+
+	virtual ~btDiscreteDynamicsWorld();
+
+	///if maxSubSteps > 0, it will interpolate motion between fixedTimeStep's
+	virtual int	stepSimulation( btScalar timeStep,int maxSubSteps=1, btScalar fixedTimeStep=btScalar(1.)/btScalar(60.));
+
+
+	virtual void	synchronizeMotionStates();
+
+	///this can be useful to synchronize a single rigid body -> graphics object
+	void	synchronizeSingleMotionState(btRigidBody* body);
+
+	virtual void	addConstraint(btTypedConstraint* constraint, bool disableCollisionsBetweenLinkedBodies=false);
+
+	virtual void	removeConstraint(btTypedConstraint* constraint);
+
+	virtual void	addAction(btActionInterface*);
+
+	virtual void	removeAction(btActionInterface*);
+	
+	btSimulationIslandManager*	getSimulationIslandManager()
+	{
+		return m_islandManager;
+	}
+
+	const btSimulationIslandManager*	getSimulationIslandManager() const 
+	{
+		return m_islandManager;
+	}
+
+	btCollisionWorld*	getCollisionWorld()
+	{
+		return this;
+	}
+
+	virtual void	setGravity(const btVector3& gravity);
+
+	virtual btVector3 getGravity () const;
+
+	virtual void	addCollisionObject(btCollisionObject* collisionObject,short int collisionFilterGroup=btBroadphaseProxy::StaticFilter,short int collisionFilterMask=btBroadphaseProxy::AllFilter ^ btBroadphaseProxy::StaticFilter);
+
+	virtual void	addRigidBody(btRigidBody* body);
+
+	virtual void	addRigidBody(btRigidBody* body, short group, short mask);
+
+	virtual void	removeRigidBody(btRigidBody* body);
+
+	///removeCollisionObject will first check if it is a rigid body, if so call removeRigidBody otherwise call btCollisionWorld::removeCollisionObject
+	virtual void	removeCollisionObject(btCollisionObject* collisionObject);
+
+
+	void	debugDrawConstraint(btTypedConstraint* constraint);
+
+	virtual void	debugDrawWorld();
+
+	virtual void	setConstraintSolver(btConstraintSolver* solver);
+
+	virtual btConstraintSolver* getConstraintSolver();
+	
+	virtual	int		getNumConstraints() const;
+
+	virtual btTypedConstraint* getConstraint(int index)	;
+
+	virtual const btTypedConstraint* getConstraint(int index) const;
+
+	
+	virtual btDynamicsWorldType	getWorldType() const
+	{
+		return BT_DISCRETE_DYNAMICS_WORLD;
+	}
+	
+	///the forces on each rigidbody is accumulating together with gravity. clear this after each timestep.
+	virtual void	clearForces();
+
+	///apply gravity, call this once per timestep
+	virtual void	applyGravity();
+
+	virtual void	setNumTasks(int numTasks)
+	{
+        (void) numTasks;
+	}
+
+	///obsolete, use updateActions instead
+	virtual void updateVehicles(btScalar timeStep)
+	{
+		updateActions(timeStep);
+	}
+
+	///obsolete, use addAction instead
+	virtual void	addVehicle(btActionInterface* vehicle);
+	///obsolete, use removeAction instead
+	virtual void	removeVehicle(btActionInterface* vehicle);
+	///obsolete, use addAction instead
+	virtual void	addCharacter(btActionInterface* character);
+	///obsolete, use removeAction instead
+	virtual void	removeCharacter(btActionInterface* character);
+
+	void	setSynchronizeAllMotionStates(bool synchronizeAll)
+	{
+		m_synchronizeAllMotionStates = synchronizeAll;
+	}
+	bool getSynchronizeAllMotionStates() const
+	{
+		return m_synchronizeAllMotionStates;
+	}
+
+	///Preliminary serialization test for Bullet 2.76. Loading those files requires a separate parser (see Bullet/Demos/SerializeDemo)
+	virtual	void	serialize(btSerializer* serializer);
+
+};
+
+#endif //BT_DISCRETE_DYNAMICS_WORLD_H
diff --git a/src/bullet/BulletDynamics/Dynamics/btDynamicsWorld.h b/src/bullet/BulletDynamics/Dynamics/btDynamicsWorld.h
new file mode 100644
index 00000000..6b009337
--- /dev/null
+++ b/src/bullet/BulletDynamics/Dynamics/btDynamicsWorld.h
@@ -0,0 +1,151 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_DYNAMICS_WORLD_H
+#define BT_DYNAMICS_WORLD_H
+
+#include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
+#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
+
+class btTypedConstraint;
+class btActionInterface;
+class btConstraintSolver;
+class btDynamicsWorld;
+
+
+/// Type for the callback for each tick
+typedef void (*btInternalTickCallback)(btDynamicsWorld *world, btScalar timeStep);
+
+enum btDynamicsWorldType
+{
+	BT_SIMPLE_DYNAMICS_WORLD=1,
+	BT_DISCRETE_DYNAMICS_WORLD=2,
+	BT_CONTINUOUS_DYNAMICS_WORLD=3,
+	BT_SOFT_RIGID_DYNAMICS_WORLD=4
+};
+
+///The btDynamicsWorld is the interface class for several dynamics implementation, basic, discrete, parallel, and continuous etc.
+class btDynamicsWorld : public btCollisionWorld
+{
+
+protected:
+		btInternalTickCallback m_internalTickCallback;
+		btInternalTickCallback m_internalPreTickCallback;
+		void*	m_worldUserInfo;
+
+		btContactSolverInfo	m_solverInfo;
+
+public:
+		
+
+		btDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* broadphase,btCollisionConfiguration* collisionConfiguration)
+		:btCollisionWorld(dispatcher,broadphase,collisionConfiguration), m_internalTickCallback(0),m_internalPreTickCallback(0), m_worldUserInfo(0)
+		{
+		}
+
+		virtual ~btDynamicsWorld()
+		{
+		}
+		
+		///stepSimulation proceeds the simulation over 'timeStep', units in preferably in seconds.
+		///By default, Bullet will subdivide the timestep in constant substeps of each 'fixedTimeStep'.
+		///in order to keep the simulation real-time, the maximum number of substeps can be clamped to 'maxSubSteps'.
+		///You can disable subdividing the timestep/substepping by passing maxSubSteps=0 as second argument to stepSimulation, but in that case you have to keep the timeStep constant.
+		virtual int		stepSimulation( btScalar timeStep,int maxSubSteps=1, btScalar fixedTimeStep=btScalar(1.)/btScalar(60.))=0;
+			
+		virtual void	debugDrawWorld() = 0;
+				
+		virtual void	addConstraint(btTypedConstraint* constraint, bool disableCollisionsBetweenLinkedBodies=false) 
+		{ 
+			(void)constraint; (void)disableCollisionsBetweenLinkedBodies;
+		}
+
+		virtual void	removeConstraint(btTypedConstraint* constraint) {(void)constraint;}
+
+		virtual void	addAction(btActionInterface* action) = 0;
+
+		virtual void	removeAction(btActionInterface* action) = 0;
+
+		//once a rigidbody is added to the dynamics world, it will get this gravity assigned
+		//existing rigidbodies in the world get gravity assigned too, during this method
+		virtual void	setGravity(const btVector3& gravity) = 0;
+		virtual btVector3 getGravity () const = 0;
+
+		virtual void	synchronizeMotionStates() = 0;
+
+		virtual void	addRigidBody(btRigidBody* body) = 0;
+
+		virtual void	addRigidBody(btRigidBody* body, short group, short mask) = 0;
+
+		virtual void	removeRigidBody(btRigidBody* body) = 0;
+
+		virtual void	setConstraintSolver(btConstraintSolver* solver) = 0;
+
+		virtual btConstraintSolver* getConstraintSolver() = 0;
+		
+		virtual	int		getNumConstraints() const {	return 0;		}
+		
+		virtual btTypedConstraint* getConstraint(int index)		{	(void)index;		return 0;		}
+		
+		virtual const btTypedConstraint* getConstraint(int index) const	{	(void)index;	return 0;	}
+
+		virtual btDynamicsWorldType	getWorldType() const=0;
+
+		virtual void	clearForces() = 0;
+
+		/// Set the callback for when an internal tick (simulation substep) happens, optional user info
+		void setInternalTickCallback(btInternalTickCallback cb,	void* worldUserInfo=0,bool isPreTick=false) 
+		{ 
+			if (isPreTick)
+			{
+				m_internalPreTickCallback = cb;
+			} else
+			{
+				m_internalTickCallback = cb; 
+			}
+			m_worldUserInfo = worldUserInfo;
+		}
+
+		void	setWorldUserInfo(void* worldUserInfo)
+		{
+			m_worldUserInfo = worldUserInfo;
+		}
+
+		void*	getWorldUserInfo() const
+		{
+			return m_worldUserInfo;
+		}
+
+		btContactSolverInfo& getSolverInfo()
+		{
+			return m_solverInfo;
+		}
+
+
+		///obsolete, use addAction instead.
+		virtual void	addVehicle(btActionInterface* vehicle) {(void)vehicle;}
+		///obsolete, use removeAction instead
+		virtual void	removeVehicle(btActionInterface* vehicle) {(void)vehicle;}
+		///obsolete, use addAction instead.
+		virtual void	addCharacter(btActionInterface* character) {(void)character;}
+		///obsolete, use removeAction instead
+		virtual void	removeCharacter(btActionInterface* character) {(void)character;}
+
+
+};
+
+#endif //BT_DYNAMICS_WORLD_H
+
+
diff --git a/src/bullet/BulletDynamics/Dynamics/btRigidBody.cpp b/src/bullet/BulletDynamics/Dynamics/btRigidBody.cpp
new file mode 100644
index 00000000..911b5072
--- /dev/null
+++ b/src/bullet/BulletDynamics/Dynamics/btRigidBody.cpp
@@ -0,0 +1,403 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btRigidBody.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "LinearMath/btMinMax.h"
+#include "LinearMath/btTransformUtil.h"
+#include "LinearMath/btMotionState.h"
+#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
+#include "LinearMath/btSerializer.h"
+
+//'temporarily' global variables
+btScalar	gDeactivationTime = btScalar(2.);
+bool	gDisableDeactivation = false;
+static int uniqueId = 0;
+
+
+btRigidBody::btRigidBody(const btRigidBody::btRigidBodyConstructionInfo& constructionInfo)
+{
+	setupRigidBody(constructionInfo);
+}
+
+btRigidBody::btRigidBody(btScalar mass, btMotionState *motionState, btCollisionShape *collisionShape, const btVector3 &localInertia)
+{
+	btRigidBodyConstructionInfo cinfo(mass,motionState,collisionShape,localInertia);
+	setupRigidBody(cinfo);
+}
+
+void	btRigidBody::setupRigidBody(const btRigidBody::btRigidBodyConstructionInfo& constructionInfo)
+{
+
+	m_internalType=CO_RIGID_BODY;
+
+	m_linearVelocity.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
+	m_angularVelocity.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+	m_angularFactor.setValue(1,1,1);
+	m_linearFactor.setValue(1,1,1);
+	m_gravity.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
+	m_gravity_acceleration.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
+	m_totalForce.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
+	m_totalTorque.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0)),
+    setDamping(constructionInfo.m_linearDamping, constructionInfo.m_angularDamping);
+
+	m_linearSleepingThreshold = constructionInfo.m_linearSleepingThreshold;
+	m_angularSleepingThreshold = constructionInfo.m_angularSleepingThreshold;
+	m_optionalMotionState = constructionInfo.m_motionState;
+	m_contactSolverType = 0;
+	m_frictionSolverType = 0;
+	m_additionalDamping = constructionInfo.m_additionalDamping;
+	m_additionalDampingFactor = constructionInfo.m_additionalDampingFactor;
+	m_additionalLinearDampingThresholdSqr = constructionInfo.m_additionalLinearDampingThresholdSqr;
+	m_additionalAngularDampingThresholdSqr = constructionInfo.m_additionalAngularDampingThresholdSqr;
+	m_additionalAngularDampingFactor = constructionInfo.m_additionalAngularDampingFactor;
+
+	if (m_optionalMotionState)
+	{
+		m_optionalMotionState->getWorldTransform(m_worldTransform);
+	} else
+	{
+		m_worldTransform = constructionInfo.m_startWorldTransform;
+	}
+
+	m_interpolationWorldTransform = m_worldTransform;
+	m_interpolationLinearVelocity.setValue(0,0,0);
+	m_interpolationAngularVelocity.setValue(0,0,0);
+	
+	//moved to btCollisionObject
+	m_friction = constructionInfo.m_friction;
+	m_restitution = constructionInfo.m_restitution;
+
+	setCollisionShape( constructionInfo.m_collisionShape );
+	m_debugBodyId = uniqueId++;
+	
+	setMassProps(constructionInfo.m_mass, constructionInfo.m_localInertia);
+	updateInertiaTensor();
+
+	m_rigidbodyFlags = 0;
+
+
+	m_deltaLinearVelocity.setZero();
+	m_deltaAngularVelocity.setZero();
+	m_invMass = m_inverseMass*m_linearFactor;
+	m_pushVelocity.setZero();
+	m_turnVelocity.setZero();
+
+	
+
+}
+
+
+void btRigidBody::predictIntegratedTransform(btScalar timeStep,btTransform& predictedTransform) 
+{
+	btTransformUtil::integrateTransform(m_worldTransform,m_linearVelocity,m_angularVelocity,timeStep,predictedTransform);
+}
+
+void			btRigidBody::saveKinematicState(btScalar timeStep)
+{
+	//todo: clamp to some (user definable) safe minimum timestep, to limit maximum angular/linear velocities
+	if (timeStep != btScalar(0.))
+	{
+		//if we use motionstate to synchronize world transforms, get the new kinematic/animated world transform
+		if (getMotionState())
+			getMotionState()->getWorldTransform(m_worldTransform);
+		btVector3 linVel,angVel;
+		
+		btTransformUtil::calculateVelocity(m_interpolationWorldTransform,m_worldTransform,timeStep,m_linearVelocity,m_angularVelocity);
+		m_interpolationLinearVelocity = m_linearVelocity;
+		m_interpolationAngularVelocity = m_angularVelocity;
+		m_interpolationWorldTransform = m_worldTransform;
+		//printf("angular = %f %f %f\n",m_angularVelocity.getX(),m_angularVelocity.getY(),m_angularVelocity.getZ());
+	}
+}
+	
+void	btRigidBody::getAabb(btVector3& aabbMin,btVector3& aabbMax) const
+{
+	getCollisionShape()->getAabb(m_worldTransform,aabbMin,aabbMax);
+}
+
+
+
+
+void btRigidBody::setGravity(const btVector3& acceleration) 
+{
+	if (m_inverseMass != btScalar(0.0))
+	{
+		m_gravity = acceleration * (btScalar(1.0) / m_inverseMass);
+	}
+	m_gravity_acceleration = acceleration;
+}
+
+
+
+
+
+
+void btRigidBody::setDamping(btScalar lin_damping, btScalar ang_damping)
+{
+	m_linearDamping = btClamped(lin_damping, (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
+	m_angularDamping = btClamped(ang_damping, (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
+}
+
+
+
+
+///applyDamping damps the velocity, using the given m_linearDamping and m_angularDamping
+void			btRigidBody::applyDamping(btScalar timeStep)
+{
+	//On new damping: see discussion/issue report here: http://code.google.com/p/bullet/issues/detail?id=74
+	//todo: do some performance comparisons (but other parts of the engine are probably bottleneck anyway
+
+//#define USE_OLD_DAMPING_METHOD 1
+#ifdef USE_OLD_DAMPING_METHOD
+	m_linearVelocity *= GEN_clamped((btScalar(1.) - timeStep * m_linearDamping), (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
+	m_angularVelocity *= GEN_clamped((btScalar(1.) - timeStep * m_angularDamping), (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
+#else
+	m_linearVelocity *= btPow(btScalar(1)-m_linearDamping, timeStep);
+	m_angularVelocity *= btPow(btScalar(1)-m_angularDamping, timeStep);
+#endif
+
+	if (m_additionalDamping)
+	{
+		//Additional damping can help avoiding lowpass jitter motion, help stability for ragdolls etc.
+		//Such damping is undesirable, so once the overall simulation quality of the rigid body dynamics system has improved, this should become obsolete
+		if ((m_angularVelocity.length2() < m_additionalAngularDampingThresholdSqr) &&
+			(m_linearVelocity.length2() < m_additionalLinearDampingThresholdSqr))
+		{
+			m_angularVelocity *= m_additionalDampingFactor;
+			m_linearVelocity *= m_additionalDampingFactor;
+		}
+	
+
+		btScalar speed = m_linearVelocity.length();
+		if (speed < m_linearDamping)
+		{
+			btScalar dampVel = btScalar(0.005);
+			if (speed > dampVel)
+			{
+				btVector3 dir = m_linearVelocity.normalized();
+				m_linearVelocity -=  dir * dampVel;
+			} else
+			{
+				m_linearVelocity.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+			}
+		}
+
+		btScalar angSpeed = m_angularVelocity.length();
+		if (angSpeed < m_angularDamping)
+		{
+			btScalar angDampVel = btScalar(0.005);
+			if (angSpeed > angDampVel)
+			{
+				btVector3 dir = m_angularVelocity.normalized();
+				m_angularVelocity -=  dir * angDampVel;
+			} else
+			{
+				m_angularVelocity.setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+			}
+		}
+	}
+}
+
+
+void btRigidBody::applyGravity()
+{
+	if (isStaticOrKinematicObject())
+		return;
+	
+	applyCentralForce(m_gravity);	
+
+}
+
+void btRigidBody::proceedToTransform(const btTransform& newTrans)
+{
+	setCenterOfMassTransform( newTrans );
+}
+	
+
+void btRigidBody::setMassProps(btScalar mass, const btVector3& inertia)
+{
+	if (mass == btScalar(0.))
+	{
+		m_collisionFlags |= btCollisionObject::CF_STATIC_OBJECT;
+		m_inverseMass = btScalar(0.);
+	} else
+	{
+		m_collisionFlags &= (~btCollisionObject::CF_STATIC_OBJECT);
+		m_inverseMass = btScalar(1.0) / mass;
+	}
+
+	//Fg = m * a
+	m_gravity = mass * m_gravity_acceleration;
+	
+	m_invInertiaLocal.setValue(inertia.x() != btScalar(0.0) ? btScalar(1.0) / inertia.x(): btScalar(0.0),
+				   inertia.y() != btScalar(0.0) ? btScalar(1.0) / inertia.y(): btScalar(0.0),
+				   inertia.z() != btScalar(0.0) ? btScalar(1.0) / inertia.z(): btScalar(0.0));
+
+	m_invMass = m_linearFactor*m_inverseMass;
+}
+
+	
+
+void btRigidBody::updateInertiaTensor() 
+{
+	m_invInertiaTensorWorld = m_worldTransform.getBasis().scaled(m_invInertiaLocal) * m_worldTransform.getBasis().transpose();
+}
+
+
+void btRigidBody::integrateVelocities(btScalar step) 
+{
+	if (isStaticOrKinematicObject())
+		return;
+
+	m_linearVelocity += m_totalForce * (m_inverseMass * step);
+	m_angularVelocity += m_invInertiaTensorWorld * m_totalTorque * step;
+
+#define MAX_ANGVEL SIMD_HALF_PI
+	/// clamp angular velocity. collision calculations will fail on higher angular velocities	
+	btScalar angvel = m_angularVelocity.length();
+	if (angvel*step > MAX_ANGVEL)
+	{
+		m_angularVelocity *= (MAX_ANGVEL/step) /angvel;
+	}
+
+}
+
+btQuaternion btRigidBody::getOrientation() const
+{
+		btQuaternion orn;
+		m_worldTransform.getBasis().getRotation(orn);
+		return orn;
+}
+	
+	
+void btRigidBody::setCenterOfMassTransform(const btTransform& xform)
+{
+
+	if (isKinematicObject())
+	{
+		m_interpolationWorldTransform = m_worldTransform;
+	} else
+	{
+		m_interpolationWorldTransform = xform;
+	}
+	m_interpolationLinearVelocity = getLinearVelocity();
+	m_interpolationAngularVelocity = getAngularVelocity();
+	m_worldTransform = xform;
+	updateInertiaTensor();
+}
+
+
+bool btRigidBody::checkCollideWithOverride(btCollisionObject* co)
+{
+	btRigidBody* otherRb = btRigidBody::upcast(co);
+	if (!otherRb)
+		return true;
+
+	for (int i = 0; i < m_constraintRefs.size(); ++i)
+	{
+		btTypedConstraint* c = m_constraintRefs[i];
+		if (c->isEnabled())
+			if (&c->getRigidBodyA() == otherRb || &c->getRigidBodyB() == otherRb)
+				return false;
+	}
+
+	return true;
+}
+
+void	btRigidBody::internalWritebackVelocity(btScalar timeStep)
+{
+    (void) timeStep;
+	if (m_inverseMass)
+	{
+		setLinearVelocity(getLinearVelocity()+ m_deltaLinearVelocity);
+		setAngularVelocity(getAngularVelocity()+m_deltaAngularVelocity);
+		
+		//correct the position/orientation based on push/turn recovery
+		btTransform newTransform;
+		btTransformUtil::integrateTransform(getWorldTransform(),m_pushVelocity,m_turnVelocity,timeStep,newTransform);
+		setWorldTransform(newTransform);
+		//m_originalBody->setCompanionId(-1);
+	}
+//	m_deltaLinearVelocity.setZero();
+//	m_deltaAngularVelocity .setZero();
+//	m_pushVelocity.setZero();
+//	m_turnVelocity.setZero();
+}
+
+
+
+void btRigidBody::addConstraintRef(btTypedConstraint* c)
+{
+	int index = m_constraintRefs.findLinearSearch(c);
+	if (index == m_constraintRefs.size())
+		m_constraintRefs.push_back(c); 
+
+	m_checkCollideWith = true;
+}
+
+void btRigidBody::removeConstraintRef(btTypedConstraint* c)
+{
+	m_constraintRefs.remove(c);
+	m_checkCollideWith = m_constraintRefs.size() > 0;
+}
+
+int	btRigidBody::calculateSerializeBufferSize()	const
+{
+	int sz = sizeof(btRigidBodyData);
+	return sz;
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btRigidBody::serialize(void* dataBuffer, class btSerializer* serializer) const
+{
+	btRigidBodyData* rbd = (btRigidBodyData*) dataBuffer;
+
+	btCollisionObject::serialize(&rbd->m_collisionObjectData, serializer);
+
+	m_invInertiaTensorWorld.serialize(rbd->m_invInertiaTensorWorld);
+	m_linearVelocity.serialize(rbd->m_linearVelocity);
+	m_angularVelocity.serialize(rbd->m_angularVelocity);
+	rbd->m_inverseMass = m_inverseMass;
+	m_angularFactor.serialize(rbd->m_angularFactor);
+	m_linearFactor.serialize(rbd->m_linearFactor);
+	m_gravity.serialize(rbd->m_gravity);
+	m_gravity_acceleration.serialize(rbd->m_gravity_acceleration);
+	m_invInertiaLocal.serialize(rbd->m_invInertiaLocal);
+	m_totalForce.serialize(rbd->m_totalForce);
+	m_totalTorque.serialize(rbd->m_totalTorque);
+	rbd->m_linearDamping = m_linearDamping;
+	rbd->m_angularDamping = m_angularDamping;
+	rbd->m_additionalDamping = m_additionalDamping;
+	rbd->m_additionalDampingFactor = m_additionalDampingFactor;
+	rbd->m_additionalLinearDampingThresholdSqr = m_additionalLinearDampingThresholdSqr;
+	rbd->m_additionalAngularDampingThresholdSqr = m_additionalAngularDampingThresholdSqr;
+	rbd->m_additionalAngularDampingFactor = m_additionalAngularDampingFactor;
+	rbd->m_linearSleepingThreshold=m_linearSleepingThreshold;
+	rbd->m_angularSleepingThreshold = m_angularSleepingThreshold;
+
+	return btRigidBodyDataName;
+}
+
+
+
+void btRigidBody::serializeSingleObject(class btSerializer* serializer) const
+{
+	btChunk* chunk = serializer->allocate(calculateSerializeBufferSize(),1);
+	const char* structType = serialize(chunk->m_oldPtr, serializer);
+	serializer->finalizeChunk(chunk,structType,BT_RIGIDBODY_CODE,(void*)this);
+}
+
+
diff --git a/src/bullet/BulletDynamics/Dynamics/btRigidBody.h b/src/bullet/BulletDynamics/Dynamics/btRigidBody.h
new file mode 100644
index 00000000..7c121e6d
--- /dev/null
+++ b/src/bullet/BulletDynamics/Dynamics/btRigidBody.h
@@ -0,0 +1,691 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_RIGIDBODY_H
+#define BT_RIGIDBODY_H
+
+#include "LinearMath/btAlignedObjectArray.h"
+#include "LinearMath/btTransform.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+
+class btCollisionShape;
+class btMotionState;
+class btTypedConstraint;
+
+
+extern btScalar gDeactivationTime;
+extern bool gDisableDeactivation;
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btRigidBodyData	btRigidBodyDoubleData
+#define btRigidBodyDataName	"btRigidBodyDoubleData"
+#else
+#define btRigidBodyData	btRigidBodyFloatData
+#define btRigidBodyDataName	"btRigidBodyFloatData"
+#endif //BT_USE_DOUBLE_PRECISION
+
+
+enum	btRigidBodyFlags
+{
+	BT_DISABLE_WORLD_GRAVITY = 1
+};
+
+
+///The btRigidBody is the main class for rigid body objects. It is derived from btCollisionObject, so it keeps a pointer to a btCollisionShape.
+///It is recommended for performance and memory use to share btCollisionShape objects whenever possible.
+///There are 3 types of rigid bodies: 
+///- A) Dynamic rigid bodies, with positive mass. Motion is controlled by rigid body dynamics.
+///- B) Fixed objects with zero mass. They are not moving (basically collision objects)
+///- C) Kinematic objects, which are objects without mass, but the user can move them. There is on-way interaction, and Bullet calculates a velocity based on the timestep and previous and current world transform.
+///Bullet automatically deactivates dynamic rigid bodies, when the velocity is below a threshold for a given time.
+///Deactivated (sleeping) rigid bodies don't take any processing time, except a minor broadphase collision detection impact (to allow active objects to activate/wake up sleeping objects)
+class btRigidBody  : public btCollisionObject
+{
+
+	btMatrix3x3	m_invInertiaTensorWorld;
+	btVector3		m_linearVelocity;
+	btVector3		m_angularVelocity;
+	btScalar		m_inverseMass;
+	btVector3		m_linearFactor;
+
+	btVector3		m_gravity;	
+	btVector3		m_gravity_acceleration;
+	btVector3		m_invInertiaLocal;
+	btVector3		m_totalForce;
+	btVector3		m_totalTorque;
+	
+	btScalar		m_linearDamping;
+	btScalar		m_angularDamping;
+
+	bool			m_additionalDamping;
+	btScalar		m_additionalDampingFactor;
+	btScalar		m_additionalLinearDampingThresholdSqr;
+	btScalar		m_additionalAngularDampingThresholdSqr;
+	btScalar		m_additionalAngularDampingFactor;
+
+
+	btScalar		m_linearSleepingThreshold;
+	btScalar		m_angularSleepingThreshold;
+
+	//m_optionalMotionState allows to automatic synchronize the world transform for active objects
+	btMotionState*	m_optionalMotionState;
+
+	//keep track of typed constraints referencing this rigid body
+	btAlignedObjectArray<btTypedConstraint*> m_constraintRefs;
+
+	int				m_rigidbodyFlags;
+	
+	int				m_debugBodyId;
+	
+
+protected:
+
+	ATTRIBUTE_ALIGNED64(btVector3		m_deltaLinearVelocity);
+	btVector3		m_deltaAngularVelocity;
+	btVector3		m_angularFactor;
+	btVector3		m_invMass;
+	btVector3		m_pushVelocity;
+	btVector3		m_turnVelocity;
+
+
+public:
+
+
+	///The btRigidBodyConstructionInfo structure provides information to create a rigid body. Setting mass to zero creates a fixed (non-dynamic) rigid body.
+	///For dynamic objects, you can use the collision shape to approximate the local inertia tensor, otherwise use the zero vector (default argument)
+	///You can use the motion state to synchronize the world transform between physics and graphics objects. 
+	///And if the motion state is provided, the rigid body will initialize its initial world transform from the motion state,
+	///m_startWorldTransform is only used when you don't provide a motion state.
+	struct	btRigidBodyConstructionInfo
+	{
+		btScalar			m_mass;
+
+		///When a motionState is provided, the rigid body will initialize its world transform from the motion state
+		///In this case, m_startWorldTransform is ignored.
+		btMotionState*		m_motionState;
+		btTransform	m_startWorldTransform;
+
+		btCollisionShape*	m_collisionShape;
+		btVector3			m_localInertia;
+		btScalar			m_linearDamping;
+		btScalar			m_angularDamping;
+
+		///best simulation results when friction is non-zero
+		btScalar			m_friction;
+		///best simulation results using zero restitution.
+		btScalar			m_restitution;
+
+		btScalar			m_linearSleepingThreshold;
+		btScalar			m_angularSleepingThreshold;
+
+		//Additional damping can help avoiding lowpass jitter motion, help stability for ragdolls etc.
+		//Such damping is undesirable, so once the overall simulation quality of the rigid body dynamics system has improved, this should become obsolete
+		bool				m_additionalDamping;
+		btScalar			m_additionalDampingFactor;
+		btScalar			m_additionalLinearDampingThresholdSqr;
+		btScalar			m_additionalAngularDampingThresholdSqr;
+		btScalar			m_additionalAngularDampingFactor;
+
+		btRigidBodyConstructionInfo(	btScalar mass, btMotionState* motionState, btCollisionShape* collisionShape, const btVector3& localInertia=btVector3(0,0,0)):
+		m_mass(mass),
+			m_motionState(motionState),
+			m_collisionShape(collisionShape),
+			m_localInertia(localInertia),
+			m_linearDamping(btScalar(0.)),
+			m_angularDamping(btScalar(0.)),
+			m_friction(btScalar(0.5)),
+			m_restitution(btScalar(0.)),
+			m_linearSleepingThreshold(btScalar(0.8)),
+			m_angularSleepingThreshold(btScalar(1.f)),
+			m_additionalDamping(false),
+			m_additionalDampingFactor(btScalar(0.005)),
+			m_additionalLinearDampingThresholdSqr(btScalar(0.01)),
+			m_additionalAngularDampingThresholdSqr(btScalar(0.01)),
+			m_additionalAngularDampingFactor(btScalar(0.01))
+		{
+			m_startWorldTransform.setIdentity();
+		}
+	};
+
+	///btRigidBody constructor using construction info
+	btRigidBody(	const btRigidBodyConstructionInfo& constructionInfo);
+
+	///btRigidBody constructor for backwards compatibility. 
+	///To specify friction (etc) during rigid body construction, please use the other constructor (using btRigidBodyConstructionInfo)
+	btRigidBody(	btScalar mass, btMotionState* motionState, btCollisionShape* collisionShape, const btVector3& localInertia=btVector3(0,0,0));
+
+
+	virtual ~btRigidBody()
+        { 
+                //No constraints should point to this rigidbody
+		//Remove constraints from the dynamics world before you delete the related rigidbodies. 
+                btAssert(m_constraintRefs.size()==0); 
+        }
+
+protected:
+
+	///setupRigidBody is only used internally by the constructor
+	void	setupRigidBody(const btRigidBodyConstructionInfo& constructionInfo);
+
+public:
+
+	void			proceedToTransform(const btTransform& newTrans); 
+	
+	///to keep collision detection and dynamics separate we don't store a rigidbody pointer
+	///but a rigidbody is derived from btCollisionObject, so we can safely perform an upcast
+	static const btRigidBody*	upcast(const btCollisionObject* colObj)
+	{
+		if (colObj->getInternalType()&btCollisionObject::CO_RIGID_BODY)
+			return (const btRigidBody*)colObj;
+		return 0;
+	}
+	static btRigidBody*	upcast(btCollisionObject* colObj)
+	{
+		if (colObj->getInternalType()&btCollisionObject::CO_RIGID_BODY)
+			return (btRigidBody*)colObj;
+		return 0;
+	}
+	
+	/// continuous collision detection needs prediction
+	void			predictIntegratedTransform(btScalar step, btTransform& predictedTransform) ;
+	
+	void			saveKinematicState(btScalar step);
+	
+	void			applyGravity();
+	
+	void			setGravity(const btVector3& acceleration);  
+
+	const btVector3&	getGravity() const
+	{
+		return m_gravity_acceleration;
+	}
+
+	void			setDamping(btScalar lin_damping, btScalar ang_damping);
+
+	btScalar getLinearDamping() const
+	{
+		return m_linearDamping;
+	}
+
+	btScalar getAngularDamping() const
+	{
+		return m_angularDamping;
+	}
+
+	btScalar getLinearSleepingThreshold() const
+	{
+		return m_linearSleepingThreshold;
+	}
+
+	btScalar getAngularSleepingThreshold() const
+	{
+		return m_angularSleepingThreshold;
+	}
+
+	void			applyDamping(btScalar timeStep);
+
+	SIMD_FORCE_INLINE const btCollisionShape*	getCollisionShape() const {
+		return m_collisionShape;
+	}
+
+	SIMD_FORCE_INLINE btCollisionShape*	getCollisionShape() {
+			return m_collisionShape;
+	}
+	
+	void			setMassProps(btScalar mass, const btVector3& inertia);
+	
+	const btVector3& getLinearFactor() const
+	{
+		return m_linearFactor;
+	}
+	void setLinearFactor(const btVector3& linearFactor)
+	{
+		m_linearFactor = linearFactor;
+		m_invMass = m_linearFactor*m_inverseMass;
+	}
+	btScalar		getInvMass() const { return m_inverseMass; }
+	const btMatrix3x3& getInvInertiaTensorWorld() const { 
+		return m_invInertiaTensorWorld; 
+	}
+		
+	void			integrateVelocities(btScalar step);
+
+	void			setCenterOfMassTransform(const btTransform& xform);
+
+	void			applyCentralForce(const btVector3& force)
+	{
+		m_totalForce += force*m_linearFactor;
+	}
+
+	const btVector3& getTotalForce() const
+	{
+		return m_totalForce;
+	};
+
+	const btVector3& getTotalTorque() const
+	{
+		return m_totalTorque;
+	};
+    
+	const btVector3& getInvInertiaDiagLocal() const
+	{
+		return m_invInertiaLocal;
+	};
+
+	void	setInvInertiaDiagLocal(const btVector3& diagInvInertia)
+	{
+		m_invInertiaLocal = diagInvInertia;
+	}
+
+	void	setSleepingThresholds(btScalar linear,btScalar angular)
+	{
+		m_linearSleepingThreshold = linear;
+		m_angularSleepingThreshold = angular;
+	}
+
+	void	applyTorque(const btVector3& torque)
+	{
+		m_totalTorque += torque*m_angularFactor;
+	}
+	
+	void	applyForce(const btVector3& force, const btVector3& rel_pos) 
+	{
+		applyCentralForce(force);
+		applyTorque(rel_pos.cross(force*m_linearFactor));
+	}
+	
+	void applyCentralImpulse(const btVector3& impulse)
+	{
+		m_linearVelocity += impulse *m_linearFactor * m_inverseMass;
+	}
+	
+  	void applyTorqueImpulse(const btVector3& torque)
+	{
+			m_angularVelocity += m_invInertiaTensorWorld * torque * m_angularFactor;
+	}
+	
+	void applyImpulse(const btVector3& impulse, const btVector3& rel_pos) 
+	{
+		if (m_inverseMass != btScalar(0.))
+		{
+			applyCentralImpulse(impulse);
+			if (m_angularFactor)
+			{
+				applyTorqueImpulse(rel_pos.cross(impulse*m_linearFactor));
+			}
+		}
+	}
+
+	void clearForces() 
+	{
+		m_totalForce.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
+		m_totalTorque.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
+	}
+	
+	void updateInertiaTensor();    
+	
+	const btVector3&     getCenterOfMassPosition() const { 
+		return m_worldTransform.getOrigin(); 
+	}
+	btQuaternion getOrientation() const;
+	
+	const btTransform&  getCenterOfMassTransform() const { 
+		return m_worldTransform; 
+	}
+	const btVector3&   getLinearVelocity() const { 
+		return m_linearVelocity; 
+	}
+	const btVector3&    getAngularVelocity() const { 
+		return m_angularVelocity; 
+	}
+	
+
+	inline void setLinearVelocity(const btVector3& lin_vel)
+	{ 
+		m_linearVelocity = lin_vel; 
+	}
+
+	inline void setAngularVelocity(const btVector3& ang_vel) 
+	{ 
+		m_angularVelocity = ang_vel; 
+	}
+
+	btVector3 getVelocityInLocalPoint(const btVector3& rel_pos) const
+	{
+		//we also calculate lin/ang velocity for kinematic objects
+		return m_linearVelocity + m_angularVelocity.cross(rel_pos);
+
+		//for kinematic objects, we could also use use:
+		//		return 	(m_worldTransform(rel_pos) - m_interpolationWorldTransform(rel_pos)) / m_kinematicTimeStep;
+	}
+
+	void translate(const btVector3& v) 
+	{
+		m_worldTransform.getOrigin() += v; 
+	}
+
+	
+	void	getAabb(btVector3& aabbMin,btVector3& aabbMax) const;
+
+
+
+
+	
+	SIMD_FORCE_INLINE btScalar computeImpulseDenominator(const btVector3& pos, const btVector3& normal) const
+	{
+		btVector3 r0 = pos - getCenterOfMassPosition();
+
+		btVector3 c0 = (r0).cross(normal);
+
+		btVector3 vec = (c0 * getInvInertiaTensorWorld()).cross(r0);
+
+		return m_inverseMass + normal.dot(vec);
+
+	}
+
+	SIMD_FORCE_INLINE btScalar computeAngularImpulseDenominator(const btVector3& axis) const
+	{
+		btVector3 vec = axis * getInvInertiaTensorWorld();
+		return axis.dot(vec);
+	}
+
+	SIMD_FORCE_INLINE void	updateDeactivation(btScalar timeStep)
+	{
+		if ( (getActivationState() == ISLAND_SLEEPING) || (getActivationState() == DISABLE_DEACTIVATION))
+			return;
+
+		if ((getLinearVelocity().length2() < m_linearSleepingThreshold*m_linearSleepingThreshold) &&
+			(getAngularVelocity().length2() < m_angularSleepingThreshold*m_angularSleepingThreshold))
+		{
+			m_deactivationTime += timeStep;
+		} else
+		{
+			m_deactivationTime=btScalar(0.);
+			setActivationState(0);
+		}
+
+	}
+
+	SIMD_FORCE_INLINE bool	wantsSleeping()
+	{
+
+		if (getActivationState() == DISABLE_DEACTIVATION)
+			return false;
+
+		//disable deactivation
+		if (gDisableDeactivation || (gDeactivationTime == btScalar(0.)))
+			return false;
+
+		if ( (getActivationState() == ISLAND_SLEEPING) || (getActivationState() == WANTS_DEACTIVATION))
+			return true;
+
+		if (m_deactivationTime> gDeactivationTime)
+		{
+			return true;
+		}
+		return false;
+	}
+
+
+	
+	const btBroadphaseProxy*	getBroadphaseProxy() const
+	{
+		return m_broadphaseHandle;
+	}
+	btBroadphaseProxy*	getBroadphaseProxy() 
+	{
+		return m_broadphaseHandle;
+	}
+	void	setNewBroadphaseProxy(btBroadphaseProxy* broadphaseProxy)
+	{
+		m_broadphaseHandle = broadphaseProxy;
+	}
+
+	//btMotionState allows to automatic synchronize the world transform for active objects
+	btMotionState*	getMotionState()
+	{
+		return m_optionalMotionState;
+	}
+	const btMotionState*	getMotionState() const
+	{
+		return m_optionalMotionState;
+	}
+	void	setMotionState(btMotionState* motionState)
+	{
+		m_optionalMotionState = motionState;
+		if (m_optionalMotionState)
+			motionState->getWorldTransform(m_worldTransform);
+	}
+
+	//for experimental overriding of friction/contact solver func
+	int	m_contactSolverType;
+	int	m_frictionSolverType;
+
+	void	setAngularFactor(const btVector3& angFac)
+	{
+		m_angularFactor = angFac;
+	}
+
+	void	setAngularFactor(btScalar angFac)
+	{
+		m_angularFactor.setValue(angFac,angFac,angFac);
+	}
+	const btVector3&	getAngularFactor() const
+	{
+		return m_angularFactor;
+	}
+
+	//is this rigidbody added to a btCollisionWorld/btDynamicsWorld/btBroadphase?
+	bool	isInWorld() const
+	{
+		return (getBroadphaseProxy() != 0);
+	}
+
+	virtual bool checkCollideWithOverride(btCollisionObject* co);
+
+	void addConstraintRef(btTypedConstraint* c);
+	void removeConstraintRef(btTypedConstraint* c);
+
+	btTypedConstraint* getConstraintRef(int index)
+	{
+		return m_constraintRefs[index];
+	}
+
+	int getNumConstraintRefs() const
+	{
+		return m_constraintRefs.size();
+	}
+
+	void	setFlags(int flags)
+	{
+		m_rigidbodyFlags = flags;
+	}
+
+	int getFlags() const
+	{
+		return m_rigidbodyFlags;
+	}
+
+	const btVector3& getDeltaLinearVelocity() const
+	{
+		return m_deltaLinearVelocity;
+	}
+
+	const btVector3& getDeltaAngularVelocity() const
+	{
+		return m_deltaAngularVelocity;
+	}
+
+	const btVector3& getPushVelocity() const 
+	{
+		return m_pushVelocity;
+	}
+
+	const btVector3& getTurnVelocity() const 
+	{
+		return m_turnVelocity;
+	}
+
+
+	////////////////////////////////////////////////
+	///some internal methods, don't use them
+		
+	btVector3& internalGetDeltaLinearVelocity()
+	{
+		return m_deltaLinearVelocity;
+	}
+
+	btVector3& internalGetDeltaAngularVelocity()
+	{
+		return m_deltaAngularVelocity;
+	}
+
+	const btVector3& internalGetAngularFactor() const
+	{
+		return m_angularFactor;
+	}
+
+	const btVector3& internalGetInvMass() const
+	{
+		return m_invMass;
+	}
+	
+	btVector3& internalGetPushVelocity()
+	{
+		return m_pushVelocity;
+	}
+
+	btVector3& internalGetTurnVelocity()
+	{
+		return m_turnVelocity;
+	}
+
+	SIMD_FORCE_INLINE void	internalGetVelocityInLocalPointObsolete(const btVector3& rel_pos, btVector3& velocity ) const
+	{
+		velocity = getLinearVelocity()+m_deltaLinearVelocity + (getAngularVelocity()+m_deltaAngularVelocity).cross(rel_pos);
+	}
+
+	SIMD_FORCE_INLINE void	internalGetAngularVelocity(btVector3& angVel) const
+	{
+		angVel = getAngularVelocity()+m_deltaAngularVelocity;
+	}
+
+
+	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
+	SIMD_FORCE_INLINE void internalApplyImpulse(const btVector3& linearComponent, const btVector3& angularComponent,const btScalar impulseMagnitude)
+	{
+		if (m_inverseMass)
+		{
+			m_deltaLinearVelocity += linearComponent*impulseMagnitude;
+			m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
+		}
+	}
+
+	SIMD_FORCE_INLINE void internalApplyPushImpulse(const btVector3& linearComponent, const btVector3& angularComponent,btScalar impulseMagnitude)
+	{
+		if (m_inverseMass)
+		{
+			m_pushVelocity += linearComponent*impulseMagnitude;
+			m_turnVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
+		}
+	}
+	
+	void	internalWritebackVelocity()
+	{
+		if (m_inverseMass)
+		{
+			setLinearVelocity(getLinearVelocity()+ m_deltaLinearVelocity);
+			setAngularVelocity(getAngularVelocity()+m_deltaAngularVelocity);
+			//m_deltaLinearVelocity.setZero();
+			//m_deltaAngularVelocity .setZero();
+			//m_originalBody->setCompanionId(-1);
+		}
+	}
+
+
+	void	internalWritebackVelocity(btScalar timeStep);
+
+	
+
+	///////////////////////////////////////////////
+
+	virtual	int	calculateSerializeBufferSize()	const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer,  class btSerializer* serializer) const;
+
+	virtual void serializeSingleObject(class btSerializer* serializer) const;
+
+};
+
+//@todo add m_optionalMotionState and m_constraintRefs to btRigidBodyData
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btRigidBodyFloatData
+{
+	btCollisionObjectFloatData	m_collisionObjectData;
+	btMatrix3x3FloatData		m_invInertiaTensorWorld;
+	btVector3FloatData		m_linearVelocity;
+	btVector3FloatData		m_angularVelocity;
+	btVector3FloatData		m_angularFactor;
+	btVector3FloatData		m_linearFactor;
+	btVector3FloatData		m_gravity;	
+	btVector3FloatData		m_gravity_acceleration;
+	btVector3FloatData		m_invInertiaLocal;
+	btVector3FloatData		m_totalForce;
+	btVector3FloatData		m_totalTorque;
+	float					m_inverseMass;
+	float					m_linearDamping;
+	float					m_angularDamping;
+	float					m_additionalDampingFactor;
+	float					m_additionalLinearDampingThresholdSqr;
+	float					m_additionalAngularDampingThresholdSqr;
+	float					m_additionalAngularDampingFactor;
+	float					m_linearSleepingThreshold;
+	float					m_angularSleepingThreshold;
+	int						m_additionalDamping;
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btRigidBodyDoubleData
+{
+	btCollisionObjectDoubleData	m_collisionObjectData;
+	btMatrix3x3DoubleData		m_invInertiaTensorWorld;
+	btVector3DoubleData		m_linearVelocity;
+	btVector3DoubleData		m_angularVelocity;
+	btVector3DoubleData		m_angularFactor;
+	btVector3DoubleData		m_linearFactor;
+	btVector3DoubleData		m_gravity;	
+	btVector3DoubleData		m_gravity_acceleration;
+	btVector3DoubleData		m_invInertiaLocal;
+	btVector3DoubleData		m_totalForce;
+	btVector3DoubleData		m_totalTorque;
+	double					m_inverseMass;
+	double					m_linearDamping;
+	double					m_angularDamping;
+	double					m_additionalDampingFactor;
+	double					m_additionalLinearDampingThresholdSqr;
+	double					m_additionalAngularDampingThresholdSqr;
+	double					m_additionalAngularDampingFactor;
+	double					m_linearSleepingThreshold;
+	double					m_angularSleepingThreshold;
+	int						m_additionalDamping;
+	char	m_padding[4];
+};
+
+
+
+#endif //BT_RIGIDBODY_H
+
diff --git a/src/bullet/BulletDynamics/Dynamics/btSimpleDynamicsWorld.cpp b/src/bullet/BulletDynamics/Dynamics/btSimpleDynamicsWorld.cpp
new file mode 100644
index 00000000..5fc2f3cf
--- /dev/null
+++ b/src/bullet/BulletDynamics/Dynamics/btSimpleDynamicsWorld.cpp
@@ -0,0 +1,280 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btSimpleDynamicsWorld.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
+#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
+
+
+/*
+  Make sure this dummy function never changes so that it
+  can be used by probes that are checking whether the
+  library is actually installed.
+*/
+extern "C" 
+{
+	void btBulletDynamicsProbe ();
+	void btBulletDynamicsProbe () {}
+}
+
+
+
+
+btSimpleDynamicsWorld::btSimpleDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration)
+:btDynamicsWorld(dispatcher,pairCache,collisionConfiguration),
+m_constraintSolver(constraintSolver),
+m_ownsConstraintSolver(false),
+m_gravity(0,0,-10)
+{
+
+}
+
+
+btSimpleDynamicsWorld::~btSimpleDynamicsWorld()
+{
+	if (m_ownsConstraintSolver)
+		btAlignedFree( m_constraintSolver);
+}
+
+int		btSimpleDynamicsWorld::stepSimulation( btScalar timeStep,int maxSubSteps, btScalar fixedTimeStep)
+{
+	(void)fixedTimeStep;
+	(void)maxSubSteps;
+
+
+	///apply gravity, predict motion
+	predictUnconstraintMotion(timeStep);
+
+	btDispatcherInfo&	dispatchInfo = getDispatchInfo();
+	dispatchInfo.m_timeStep = timeStep;
+	dispatchInfo.m_stepCount = 0;
+	dispatchInfo.m_debugDraw = getDebugDrawer();
+
+	///perform collision detection
+	performDiscreteCollisionDetection();
+
+	///solve contact constraints
+	int numManifolds = m_dispatcher1->getNumManifolds();
+	if (numManifolds)
+	{
+		btPersistentManifold** manifoldPtr = ((btCollisionDispatcher*)m_dispatcher1)->getInternalManifoldPointer();
+		
+		btContactSolverInfo infoGlobal;
+		infoGlobal.m_timeStep = timeStep;
+		m_constraintSolver->prepareSolve(0,numManifolds);
+		m_constraintSolver->solveGroup(&getCollisionObjectArray()[0],getNumCollisionObjects(),manifoldPtr, numManifolds,0,0,infoGlobal,m_debugDrawer, m_stackAlloc,m_dispatcher1);
+		m_constraintSolver->allSolved(infoGlobal,m_debugDrawer, m_stackAlloc);
+	}
+
+	///integrate transforms
+	integrateTransforms(timeStep);
+		
+	updateAabbs();
+
+	synchronizeMotionStates();
+
+	clearForces();
+
+	return 1;
+
+}
+
+void	btSimpleDynamicsWorld::clearForces()
+{
+	///@todo: iterate over awake simulation islands!
+	for ( int i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		
+		btRigidBody* body = btRigidBody::upcast(colObj);
+		if (body)
+		{
+			body->clearForces();
+		}
+	}
+}	
+
+
+void	btSimpleDynamicsWorld::setGravity(const btVector3& gravity)
+{
+	m_gravity = gravity;
+	for ( int i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		btRigidBody* body = btRigidBody::upcast(colObj);
+		if (body)
+		{
+			body->setGravity(gravity);
+		}
+	}
+}
+
+btVector3 btSimpleDynamicsWorld::getGravity () const
+{
+	return m_gravity;
+}
+
+void	btSimpleDynamicsWorld::removeRigidBody(btRigidBody* body)
+{
+	btCollisionWorld::removeCollisionObject(body);
+}
+
+void	btSimpleDynamicsWorld::removeCollisionObject(btCollisionObject* collisionObject)
+{
+	btRigidBody* body = btRigidBody::upcast(collisionObject);
+	if (body)
+		removeRigidBody(body);
+	else
+		btCollisionWorld::removeCollisionObject(collisionObject);
+}
+
+
+void	btSimpleDynamicsWorld::addRigidBody(btRigidBody* body)
+{
+	body->setGravity(m_gravity);
+
+	if (body->getCollisionShape())
+	{
+		addCollisionObject(body);
+	}
+}
+
+void	btSimpleDynamicsWorld::addRigidBody(btRigidBody* body, short group, short mask)
+{
+	body->setGravity(m_gravity);
+
+	if (body->getCollisionShape())
+	{
+		addCollisionObject(body,group,mask);
+	}
+}
+
+
+void	btSimpleDynamicsWorld::debugDrawWorld()
+{
+
+}
+				
+void	btSimpleDynamicsWorld::addAction(btActionInterface* action)
+{
+
+}
+
+void	btSimpleDynamicsWorld::removeAction(btActionInterface* action)
+{
+
+}
+
+
+void	btSimpleDynamicsWorld::updateAabbs()
+{
+	btTransform predictedTrans;
+	for ( int i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		btRigidBody* body = btRigidBody::upcast(colObj);
+		if (body)
+		{
+			if (body->isActive() && (!body->isStaticObject()))
+			{
+				btVector3 minAabb,maxAabb;
+				colObj->getCollisionShape()->getAabb(colObj->getWorldTransform(), minAabb,maxAabb);
+				btBroadphaseInterface* bp = getBroadphase();
+				bp->setAabb(body->getBroadphaseHandle(),minAabb,maxAabb, m_dispatcher1);
+			}
+		}
+	}
+}
+
+void	btSimpleDynamicsWorld::integrateTransforms(btScalar timeStep)
+{
+	btTransform predictedTrans;
+	for ( int i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		btRigidBody* body = btRigidBody::upcast(colObj);
+		if (body)
+		{
+			if (body->isActive() && (!body->isStaticObject()))
+			{
+				body->predictIntegratedTransform(timeStep, predictedTrans);
+				body->proceedToTransform( predictedTrans);
+			}
+		}
+	}
+}
+
+
+
+void	btSimpleDynamicsWorld::predictUnconstraintMotion(btScalar timeStep)
+{
+	for ( int i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		btRigidBody* body = btRigidBody::upcast(colObj);
+		if (body)
+		{
+			if (!body->isStaticObject())
+			{
+				if (body->isActive())
+				{
+					body->applyGravity();
+					body->integrateVelocities( timeStep);
+					body->applyDamping(timeStep);
+					body->predictIntegratedTransform(timeStep,body->getInterpolationWorldTransform());
+				}
+			}
+		}
+	}
+}
+
+
+void	btSimpleDynamicsWorld::synchronizeMotionStates()
+{
+	///@todo: iterate over awake simulation islands!
+	for ( int i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		btRigidBody* body = btRigidBody::upcast(colObj);
+		if (body && body->getMotionState())
+		{
+			if (body->getActivationState() != ISLAND_SLEEPING)
+			{
+				body->getMotionState()->setWorldTransform(body->getWorldTransform());
+			}
+		}
+	}
+
+}
+
+
+void	btSimpleDynamicsWorld::setConstraintSolver(btConstraintSolver* solver)
+{
+	if (m_ownsConstraintSolver)
+	{
+		btAlignedFree(m_constraintSolver);
+	}
+	m_ownsConstraintSolver = false;
+	m_constraintSolver = solver;
+}
+
+btConstraintSolver* btSimpleDynamicsWorld::getConstraintSolver()
+{
+	return m_constraintSolver;
+}
diff --git a/src/bullet/BulletDynamics/Dynamics/btSimpleDynamicsWorld.h b/src/bullet/BulletDynamics/Dynamics/btSimpleDynamicsWorld.h
new file mode 100644
index 00000000..d48d2e39
--- /dev/null
+++ b/src/bullet/BulletDynamics/Dynamics/btSimpleDynamicsWorld.h
@@ -0,0 +1,89 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SIMPLE_DYNAMICS_WORLD_H
+#define BT_SIMPLE_DYNAMICS_WORLD_H
+
+#include "btDynamicsWorld.h"
+
+class btDispatcher;
+class btOverlappingPairCache;
+class btConstraintSolver;
+
+///The btSimpleDynamicsWorld serves as unit-test and to verify more complicated and optimized dynamics worlds.
+///Please use btDiscreteDynamicsWorld instead
+class btSimpleDynamicsWorld : public btDynamicsWorld
+{
+protected:
+
+	btConstraintSolver*	m_constraintSolver;
+
+	bool	m_ownsConstraintSolver;
+
+	void	predictUnconstraintMotion(btScalar timeStep);
+	
+	void	integrateTransforms(btScalar timeStep);
+		
+	btVector3	m_gravity;
+	
+public:
+
+
+
+	///this btSimpleDynamicsWorld constructor creates dispatcher, broadphase pairCache and constraintSolver
+	btSimpleDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration);
+
+	virtual ~btSimpleDynamicsWorld();
+		
+	///maxSubSteps/fixedTimeStep for interpolation is currently ignored for btSimpleDynamicsWorld, use btDiscreteDynamicsWorld instead
+	virtual int	stepSimulation( btScalar timeStep,int maxSubSteps=1, btScalar fixedTimeStep=btScalar(1.)/btScalar(60.));
+
+	virtual void	setGravity(const btVector3& gravity);
+
+	virtual btVector3 getGravity () const;
+
+	virtual void	addRigidBody(btRigidBody* body);
+
+	virtual void	addRigidBody(btRigidBody* body, short group, short mask);
+
+	virtual void	removeRigidBody(btRigidBody* body);
+
+	virtual void	debugDrawWorld();
+				
+	virtual void	addAction(btActionInterface* action);
+
+	virtual void	removeAction(btActionInterface* action);
+
+	///removeCollisionObject will first check if it is a rigid body, if so call removeRigidBody otherwise call btCollisionWorld::removeCollisionObject
+	virtual void	removeCollisionObject(btCollisionObject* collisionObject);
+	
+	virtual void	updateAabbs();
+
+	virtual void	synchronizeMotionStates();
+
+	virtual void	setConstraintSolver(btConstraintSolver* solver);
+
+	virtual btConstraintSolver* getConstraintSolver();
+
+	virtual btDynamicsWorldType	getWorldType() const
+	{
+		return BT_SIMPLE_DYNAMICS_WORLD;
+	}
+
+	virtual void	clearForces();
+
+};
+
+#endif //BT_SIMPLE_DYNAMICS_WORLD_H
diff --git a/src/bullet/BulletDynamics/Vehicle/btRaycastVehicle.cpp b/src/bullet/BulletDynamics/Vehicle/btRaycastVehicle.cpp
new file mode 100644
index 00000000..5b467883
--- /dev/null
+++ b/src/bullet/BulletDynamics/Vehicle/btRaycastVehicle.cpp
@@ -0,0 +1,771 @@
+/*
+ * Copyright (c) 2005 Erwin Coumans http://continuousphysics.com/Bullet/
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies.
+ * Erwin Coumans makes no representations about the suitability 
+ * of this software for any purpose.  
+ * It is provided "as is" without express or implied warranty.
+*/
+
+#include "LinearMath/btVector3.h"
+#include "btRaycastVehicle.h"
+
+#include "BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btJacobianEntry.h"
+#include "LinearMath/btQuaternion.h"
+#include "BulletDynamics/Dynamics/btDynamicsWorld.h"
+#include "btVehicleRaycaster.h"
+#include "btWheelInfo.h"
+#include "LinearMath/btMinMax.h"
+#include "LinearMath/btIDebugDraw.h"
+#include "BulletDynamics/ConstraintSolver/btContactConstraint.h"
+
+#define ROLLING_INFLUENCE_FIX
+
+
+btRigidBody& btActionInterface::getFixedBody()
+{
+	static btRigidBody s_fixed(0, 0,0);
+	s_fixed.setMassProps(btScalar(0.),btVector3(btScalar(0.),btScalar(0.),btScalar(0.)));
+	return s_fixed;
+}
+
+btRaycastVehicle::btRaycastVehicle(const btVehicleTuning& tuning,btRigidBody* chassis,	btVehicleRaycaster* raycaster )
+:m_vehicleRaycaster(raycaster),
+m_pitchControl(btScalar(0.))
+{
+	m_chassisBody = chassis;
+	m_indexRightAxis = 0;
+	m_indexUpAxis = 2;
+	m_indexForwardAxis = 1;
+	defaultInit(tuning);
+}
+
+
+void btRaycastVehicle::defaultInit(const btVehicleTuning& tuning)
+{
+	(void)tuning;
+	m_currentVehicleSpeedKmHour = btScalar(0.);
+	m_steeringValue = btScalar(0.);
+	
+}
+
+	
+
+btRaycastVehicle::~btRaycastVehicle()
+{
+}
+
+
+//
+// basically most of the code is general for 2 or 4 wheel vehicles, but some of it needs to be reviewed
+//
+btWheelInfo&	btRaycastVehicle::addWheel( const btVector3& connectionPointCS, const btVector3& wheelDirectionCS0,const btVector3& wheelAxleCS, btScalar suspensionRestLength, btScalar wheelRadius,const btVehicleTuning& tuning, bool isFrontWheel)
+{
+
+	btWheelInfoConstructionInfo ci;
+
+	ci.m_chassisConnectionCS = connectionPointCS;
+	ci.m_wheelDirectionCS = wheelDirectionCS0;
+	ci.m_wheelAxleCS = wheelAxleCS;
+	ci.m_suspensionRestLength = suspensionRestLength;
+	ci.m_wheelRadius = wheelRadius;
+	ci.m_suspensionStiffness = tuning.m_suspensionStiffness;
+	ci.m_wheelsDampingCompression = tuning.m_suspensionCompression;
+	ci.m_wheelsDampingRelaxation = tuning.m_suspensionDamping;
+	ci.m_frictionSlip = tuning.m_frictionSlip;
+	ci.m_bIsFrontWheel = isFrontWheel;
+	ci.m_maxSuspensionTravelCm = tuning.m_maxSuspensionTravelCm;
+	ci.m_maxSuspensionForce = tuning.m_maxSuspensionForce;
+
+	m_wheelInfo.push_back( btWheelInfo(ci));
+	
+	btWheelInfo& wheel = m_wheelInfo[getNumWheels()-1];
+	
+	updateWheelTransformsWS( wheel , false );
+	updateWheelTransform(getNumWheels()-1,false);
+	return wheel;
+}
+
+
+
+
+const btTransform&	btRaycastVehicle::getWheelTransformWS( int wheelIndex ) const
+{
+	btAssert(wheelIndex < getNumWheels());
+	const btWheelInfo& wheel = m_wheelInfo[wheelIndex];
+	return wheel.m_worldTransform;
+
+}
+
+void	btRaycastVehicle::updateWheelTransform( int wheelIndex , bool interpolatedTransform)
+{
+	
+	btWheelInfo& wheel = m_wheelInfo[ wheelIndex ];
+	updateWheelTransformsWS(wheel,interpolatedTransform);
+	btVector3 up = -wheel.m_raycastInfo.m_wheelDirectionWS;
+	const btVector3& right = wheel.m_raycastInfo.m_wheelAxleWS;
+	btVector3 fwd = up.cross(right);
+	fwd = fwd.normalize();
+//	up = right.cross(fwd);
+//	up.normalize();
+
+	//rotate around steering over de wheelAxleWS
+	btScalar steering = wheel.m_steering;
+	
+	btQuaternion steeringOrn(up,steering);//wheel.m_steering);
+	btMatrix3x3 steeringMat(steeringOrn);
+
+	btQuaternion rotatingOrn(right,-wheel.m_rotation);
+	btMatrix3x3 rotatingMat(rotatingOrn);
+
+	btMatrix3x3 basis2(
+		right[0],fwd[0],up[0],
+		right[1],fwd[1],up[1],
+		right[2],fwd[2],up[2]
+	);
+	
+	wheel.m_worldTransform.setBasis(steeringMat * rotatingMat * basis2);
+	wheel.m_worldTransform.setOrigin(
+		wheel.m_raycastInfo.m_hardPointWS + wheel.m_raycastInfo.m_wheelDirectionWS * wheel.m_raycastInfo.m_suspensionLength
+	);
+}
+
+void btRaycastVehicle::resetSuspension()
+{
+
+	int i;
+	for (i=0;i<m_wheelInfo.size();	i++)
+	{
+			btWheelInfo& wheel = m_wheelInfo[i];
+			wheel.m_raycastInfo.m_suspensionLength = wheel.getSuspensionRestLength();
+			wheel.m_suspensionRelativeVelocity = btScalar(0.0);
+			
+			wheel.m_raycastInfo.m_contactNormalWS = - wheel.m_raycastInfo.m_wheelDirectionWS;
+			//wheel_info.setContactFriction(btScalar(0.0));
+			wheel.m_clippedInvContactDotSuspension = btScalar(1.0);
+	}
+}
+
+void	btRaycastVehicle::updateWheelTransformsWS(btWheelInfo& wheel , bool interpolatedTransform)
+{
+	wheel.m_raycastInfo.m_isInContact = false;
+
+	btTransform chassisTrans = getChassisWorldTransform();
+	if (interpolatedTransform && (getRigidBody()->getMotionState()))
+	{
+		getRigidBody()->getMotionState()->getWorldTransform(chassisTrans);
+	}
+
+	wheel.m_raycastInfo.m_hardPointWS = chassisTrans( wheel.m_chassisConnectionPointCS );
+	wheel.m_raycastInfo.m_wheelDirectionWS = chassisTrans.getBasis() *  wheel.m_wheelDirectionCS ;
+	wheel.m_raycastInfo.m_wheelAxleWS = chassisTrans.getBasis() * wheel.m_wheelAxleCS;
+}
+
+btScalar btRaycastVehicle::rayCast(btWheelInfo& wheel)
+{
+	updateWheelTransformsWS( wheel,false);
+
+	
+	btScalar depth = -1;
+	
+	btScalar raylen = wheel.getSuspensionRestLength()+wheel.m_wheelsRadius;
+
+	btVector3 rayvector = wheel.m_raycastInfo.m_wheelDirectionWS * (raylen);
+	const btVector3& source = wheel.m_raycastInfo.m_hardPointWS;
+	wheel.m_raycastInfo.m_contactPointWS = source + rayvector;
+	const btVector3& target = wheel.m_raycastInfo.m_contactPointWS;
+
+	btScalar param = btScalar(0.);
+	
+	btVehicleRaycaster::btVehicleRaycasterResult	rayResults;
+
+	btAssert(m_vehicleRaycaster);
+
+	void* object = m_vehicleRaycaster->castRay(source,target,rayResults);
+
+	wheel.m_raycastInfo.m_groundObject = 0;
+
+	if (object)
+	{
+		param = rayResults.m_distFraction;
+		depth = raylen * rayResults.m_distFraction;
+		wheel.m_raycastInfo.m_contactNormalWS  = rayResults.m_hitNormalInWorld;
+		wheel.m_raycastInfo.m_isInContact = true;
+		
+		wheel.m_raycastInfo.m_groundObject = &getFixedBody();///@todo for driving on dynamic/movable objects!;
+		//wheel.m_raycastInfo.m_groundObject = object;
+
+
+		btScalar hitDistance = param*raylen;
+		wheel.m_raycastInfo.m_suspensionLength = hitDistance - wheel.m_wheelsRadius;
+		//clamp on max suspension travel
+
+		btScalar  minSuspensionLength = wheel.getSuspensionRestLength() - wheel.m_maxSuspensionTravelCm*btScalar(0.01);
+		btScalar maxSuspensionLength = wheel.getSuspensionRestLength()+ wheel.m_maxSuspensionTravelCm*btScalar(0.01);
+		if (wheel.m_raycastInfo.m_suspensionLength < minSuspensionLength)
+		{
+			wheel.m_raycastInfo.m_suspensionLength = minSuspensionLength;
+		}
+		if (wheel.m_raycastInfo.m_suspensionLength > maxSuspensionLength)
+		{
+			wheel.m_raycastInfo.m_suspensionLength = maxSuspensionLength;
+		}
+
+		wheel.m_raycastInfo.m_contactPointWS = rayResults.m_hitPointInWorld;
+
+		btScalar denominator= wheel.m_raycastInfo.m_contactNormalWS.dot( wheel.m_raycastInfo.m_wheelDirectionWS );
+
+		btVector3 chassis_velocity_at_contactPoint;
+		btVector3 relpos = wheel.m_raycastInfo.m_contactPointWS-getRigidBody()->getCenterOfMassPosition();
+
+		chassis_velocity_at_contactPoint = getRigidBody()->getVelocityInLocalPoint(relpos);
+
+		btScalar projVel = wheel.m_raycastInfo.m_contactNormalWS.dot( chassis_velocity_at_contactPoint );
+
+		if ( denominator >= btScalar(-0.1))
+		{
+			wheel.m_suspensionRelativeVelocity = btScalar(0.0);
+			wheel.m_clippedInvContactDotSuspension = btScalar(1.0) / btScalar(0.1);
+		}
+		else
+		{
+			btScalar inv = btScalar(-1.) / denominator;
+			wheel.m_suspensionRelativeVelocity = projVel * inv;
+			wheel.m_clippedInvContactDotSuspension = inv;
+		}
+			
+	} else
+	{
+		//put wheel info as in rest position
+		wheel.m_raycastInfo.m_suspensionLength = wheel.getSuspensionRestLength();
+		wheel.m_suspensionRelativeVelocity = btScalar(0.0);
+		wheel.m_raycastInfo.m_contactNormalWS = - wheel.m_raycastInfo.m_wheelDirectionWS;
+		wheel.m_clippedInvContactDotSuspension = btScalar(1.0);
+	}
+
+	return depth;
+}
+
+
+const btTransform& btRaycastVehicle::getChassisWorldTransform() const
+{
+	/*if (getRigidBody()->getMotionState())
+	{
+		btTransform chassisWorldTrans;
+		getRigidBody()->getMotionState()->getWorldTransform(chassisWorldTrans);
+		return chassisWorldTrans;
+	}
+	*/
+
+	
+	return getRigidBody()->getCenterOfMassTransform();
+}
+
+
+void btRaycastVehicle::updateVehicle( btScalar step )
+{
+	{
+		for (int i=0;i<getNumWheels();i++)
+		{
+			updateWheelTransform(i,false);
+		}
+	}
+
+
+	m_currentVehicleSpeedKmHour = btScalar(3.6) * getRigidBody()->getLinearVelocity().length();
+	
+	const btTransform& chassisTrans = getChassisWorldTransform();
+
+	btVector3 forwardW (
+		chassisTrans.getBasis()[0][m_indexForwardAxis],
+		chassisTrans.getBasis()[1][m_indexForwardAxis],
+		chassisTrans.getBasis()[2][m_indexForwardAxis]);
+
+	if (forwardW.dot(getRigidBody()->getLinearVelocity()) < btScalar(0.))
+	{
+		m_currentVehicleSpeedKmHour *= btScalar(-1.);
+	}
+
+	//
+	// simulate suspension
+	//
+	
+	int i=0;
+	for (i=0;i<m_wheelInfo.size();i++)
+	{
+		btScalar depth; 
+		depth = rayCast( m_wheelInfo[i]);
+	}
+
+	updateSuspension(step);
+
+	
+	for (i=0;i<m_wheelInfo.size();i++)
+	{
+		//apply suspension force
+		btWheelInfo& wheel = m_wheelInfo[i];
+		
+		btScalar suspensionForce = wheel.m_wheelsSuspensionForce;
+		
+		if (suspensionForce > wheel.m_maxSuspensionForce)
+		{
+			suspensionForce = wheel.m_maxSuspensionForce;
+		}
+		btVector3 impulse = wheel.m_raycastInfo.m_contactNormalWS * suspensionForce * step;
+		btVector3 relpos = wheel.m_raycastInfo.m_contactPointWS - getRigidBody()->getCenterOfMassPosition();
+		
+		getRigidBody()->applyImpulse(impulse, relpos);
+	
+	}
+	
+
+	
+	updateFriction( step);
+
+	
+	for (i=0;i<m_wheelInfo.size();i++)
+	{
+		btWheelInfo& wheel = m_wheelInfo[i];
+		btVector3 relpos = wheel.m_raycastInfo.m_hardPointWS - getRigidBody()->getCenterOfMassPosition();
+		btVector3 vel = getRigidBody()->getVelocityInLocalPoint( relpos );
+
+		if (wheel.m_raycastInfo.m_isInContact)
+		{
+			const btTransform&	chassisWorldTransform = getChassisWorldTransform();
+
+			btVector3 fwd (
+				chassisWorldTransform.getBasis()[0][m_indexForwardAxis],
+				chassisWorldTransform.getBasis()[1][m_indexForwardAxis],
+				chassisWorldTransform.getBasis()[2][m_indexForwardAxis]);
+
+			btScalar proj = fwd.dot(wheel.m_raycastInfo.m_contactNormalWS);
+			fwd -= wheel.m_raycastInfo.m_contactNormalWS * proj;
+
+			btScalar proj2 = fwd.dot(vel);
+			
+			wheel.m_deltaRotation = (proj2 * step) / (wheel.m_wheelsRadius);
+			wheel.m_rotation += wheel.m_deltaRotation;
+
+		} else
+		{
+			wheel.m_rotation += wheel.m_deltaRotation;
+		}
+		
+		wheel.m_deltaRotation *= btScalar(0.99);//damping of rotation when not in contact
+
+	}
+
+
+
+}
+
+
+void	btRaycastVehicle::setSteeringValue(btScalar steering,int wheel)
+{
+	btAssert(wheel>=0 && wheel < getNumWheels());
+
+	btWheelInfo& wheelInfo = getWheelInfo(wheel);
+	wheelInfo.m_steering = steering;
+}
+
+
+
+btScalar	btRaycastVehicle::getSteeringValue(int wheel) const
+{
+	return getWheelInfo(wheel).m_steering;
+}
+
+
+void	btRaycastVehicle::applyEngineForce(btScalar force, int wheel)
+{
+	btAssert(wheel>=0 && wheel < getNumWheels());
+	btWheelInfo& wheelInfo = getWheelInfo(wheel);
+	wheelInfo.m_engineForce = force;
+}
+
+
+const btWheelInfo&	btRaycastVehicle::getWheelInfo(int index) const
+{
+	btAssert((index >= 0) && (index < 	getNumWheels()));
+	
+	return m_wheelInfo[index];
+}
+
+btWheelInfo&	btRaycastVehicle::getWheelInfo(int index) 
+{
+	btAssert((index >= 0) && (index < 	getNumWheels()));
+	
+	return m_wheelInfo[index];
+}
+
+void btRaycastVehicle::setBrake(btScalar brake,int wheelIndex)
+{
+	btAssert((wheelIndex >= 0) && (wheelIndex < 	getNumWheels()));
+	getWheelInfo(wheelIndex).m_brake = brake;
+}
+
+
+void	btRaycastVehicle::updateSuspension(btScalar deltaTime)
+{
+	(void)deltaTime;
+
+	btScalar chassisMass = btScalar(1.) / m_chassisBody->getInvMass();
+	
+	for (int w_it=0; w_it<getNumWheels(); w_it++)
+	{
+		btWheelInfo &wheel_info = m_wheelInfo[w_it];
+		
+		if ( wheel_info.m_raycastInfo.m_isInContact )
+		{
+			btScalar force;
+			//	Spring
+			{
+				btScalar	susp_length			= wheel_info.getSuspensionRestLength();
+				btScalar	current_length = wheel_info.m_raycastInfo.m_suspensionLength;
+
+				btScalar length_diff = (susp_length - current_length);
+
+				force = wheel_info.m_suspensionStiffness
+					* length_diff * wheel_info.m_clippedInvContactDotSuspension;
+			}
+		
+			// Damper
+			{
+				btScalar projected_rel_vel = wheel_info.m_suspensionRelativeVelocity;
+				{
+					btScalar	susp_damping;
+					if ( projected_rel_vel < btScalar(0.0) )
+					{
+						susp_damping = wheel_info.m_wheelsDampingCompression;
+					}
+					else
+					{
+						susp_damping = wheel_info.m_wheelsDampingRelaxation;
+					}
+					force -= susp_damping * projected_rel_vel;
+				}
+			}
+
+			// RESULT
+			wheel_info.m_wheelsSuspensionForce = force * chassisMass;
+			if (wheel_info.m_wheelsSuspensionForce < btScalar(0.))
+			{
+				wheel_info.m_wheelsSuspensionForce = btScalar(0.);
+			}
+		}
+		else
+		{
+			wheel_info.m_wheelsSuspensionForce = btScalar(0.0);
+		}
+	}
+
+}
+
+
+struct btWheelContactPoint
+{
+	btRigidBody* m_body0;
+	btRigidBody* m_body1;
+	btVector3	m_frictionPositionWorld;
+	btVector3	m_frictionDirectionWorld;
+	btScalar	m_jacDiagABInv;
+	btScalar	m_maxImpulse;
+
+
+	btWheelContactPoint(btRigidBody* body0,btRigidBody* body1,const btVector3& frictionPosWorld,const btVector3& frictionDirectionWorld, btScalar maxImpulse)
+		:m_body0(body0),
+		m_body1(body1),
+		m_frictionPositionWorld(frictionPosWorld),
+		m_frictionDirectionWorld(frictionDirectionWorld),
+		m_maxImpulse(maxImpulse)
+	{
+		btScalar denom0 = body0->computeImpulseDenominator(frictionPosWorld,frictionDirectionWorld);
+		btScalar denom1 = body1->computeImpulseDenominator(frictionPosWorld,frictionDirectionWorld);
+		btScalar	relaxation = 1.f;
+		m_jacDiagABInv = relaxation/(denom0+denom1);
+	}
+
+
+
+};
+
+btScalar calcRollingFriction(btWheelContactPoint& contactPoint);
+btScalar calcRollingFriction(btWheelContactPoint& contactPoint)
+{
+
+	btScalar j1=0.f;
+
+	const btVector3& contactPosWorld = contactPoint.m_frictionPositionWorld;
+
+	btVector3 rel_pos1 = contactPosWorld - contactPoint.m_body0->getCenterOfMassPosition(); 
+	btVector3 rel_pos2 = contactPosWorld - contactPoint.m_body1->getCenterOfMassPosition();
+	
+	btScalar maxImpulse  = contactPoint.m_maxImpulse;
+	
+	btVector3 vel1 = contactPoint.m_body0->getVelocityInLocalPoint(rel_pos1);
+	btVector3 vel2 = contactPoint.m_body1->getVelocityInLocalPoint(rel_pos2);
+	btVector3 vel = vel1 - vel2;
+
+	btScalar vrel = contactPoint.m_frictionDirectionWorld.dot(vel);
+
+	// calculate j that moves us to zero relative velocity
+	j1 = -vrel * contactPoint.m_jacDiagABInv;
+	btSetMin(j1, maxImpulse);
+	btSetMax(j1, -maxImpulse);
+
+	return j1;
+}
+
+
+
+
+btScalar sideFrictionStiffness2 = btScalar(1.0);
+void	btRaycastVehicle::updateFriction(btScalar	timeStep)
+{
+
+		//calculate the impulse, so that the wheels don't move sidewards
+		int numWheel = getNumWheels();
+		if (!numWheel)
+			return;
+
+		m_forwardWS.resize(numWheel);
+		m_axle.resize(numWheel);
+		m_forwardImpulse.resize(numWheel);
+		m_sideImpulse.resize(numWheel);
+		
+		int numWheelsOnGround = 0;
+	
+
+		//collapse all those loops into one!
+		for (int i=0;i<getNumWheels();i++)
+		{
+			btWheelInfo& wheelInfo = m_wheelInfo[i];
+			class btRigidBody* groundObject = (class btRigidBody*) wheelInfo.m_raycastInfo.m_groundObject;
+			if (groundObject)
+				numWheelsOnGround++;
+			m_sideImpulse[i] = btScalar(0.);
+			m_forwardImpulse[i] = btScalar(0.);
+
+		}
+	
+		{
+	
+			for (int i=0;i<getNumWheels();i++)
+			{
+
+				btWheelInfo& wheelInfo = m_wheelInfo[i];
+					
+				class btRigidBody* groundObject = (class btRigidBody*) wheelInfo.m_raycastInfo.m_groundObject;
+
+				if (groundObject)
+				{
+
+					const btTransform& wheelTrans = getWheelTransformWS( i );
+
+					btMatrix3x3 wheelBasis0 = wheelTrans.getBasis();
+					m_axle[i] = btVector3(	
+						wheelBasis0[0][m_indexRightAxis],
+						wheelBasis0[1][m_indexRightAxis],
+						wheelBasis0[2][m_indexRightAxis]);
+					
+					const btVector3& surfNormalWS = wheelInfo.m_raycastInfo.m_contactNormalWS;
+					btScalar proj = m_axle[i].dot(surfNormalWS);
+					m_axle[i] -= surfNormalWS * proj;
+					m_axle[i] = m_axle[i].normalize();
+					
+					m_forwardWS[i] = surfNormalWS.cross(m_axle[i]);
+					m_forwardWS[i].normalize();
+
+				
+					resolveSingleBilateral(*m_chassisBody, wheelInfo.m_raycastInfo.m_contactPointWS,
+							  *groundObject, wheelInfo.m_raycastInfo.m_contactPointWS,
+							  btScalar(0.), m_axle[i],m_sideImpulse[i],timeStep);
+
+					m_sideImpulse[i] *= sideFrictionStiffness2;
+						
+				}
+				
+
+			}
+		}
+
+	btScalar sideFactor = btScalar(1.);
+	btScalar fwdFactor = 0.5;
+
+	bool sliding = false;
+	{
+		for (int wheel =0;wheel <getNumWheels();wheel++)
+		{
+			btWheelInfo& wheelInfo = m_wheelInfo[wheel];
+			class btRigidBody* groundObject = (class btRigidBody*) wheelInfo.m_raycastInfo.m_groundObject;
+
+			btScalar	rollingFriction = 0.f;
+
+			if (groundObject)
+			{
+				if (wheelInfo.m_engineForce != 0.f)
+				{
+					rollingFriction = wheelInfo.m_engineForce* timeStep;
+				} else
+				{
+					btScalar defaultRollingFrictionImpulse = 0.f;
+					btScalar maxImpulse = wheelInfo.m_brake ? wheelInfo.m_brake : defaultRollingFrictionImpulse;
+					btWheelContactPoint contactPt(m_chassisBody,groundObject,wheelInfo.m_raycastInfo.m_contactPointWS,m_forwardWS[wheel],maxImpulse);
+					rollingFriction = calcRollingFriction(contactPt);
+				}
+			}
+
+			//switch between active rolling (throttle), braking and non-active rolling friction (no throttle/break)
+			
+
+
+
+			m_forwardImpulse[wheel] = btScalar(0.);
+			m_wheelInfo[wheel].m_skidInfo= btScalar(1.);
+
+			if (groundObject)
+			{
+				m_wheelInfo[wheel].m_skidInfo= btScalar(1.);
+				
+				btScalar maximp = wheelInfo.m_wheelsSuspensionForce * timeStep * wheelInfo.m_frictionSlip;
+				btScalar maximpSide = maximp;
+
+				btScalar maximpSquared = maximp * maximpSide;
+			
+
+				m_forwardImpulse[wheel] = rollingFriction;//wheelInfo.m_engineForce* timeStep;
+
+				btScalar x = (m_forwardImpulse[wheel] ) * fwdFactor;
+				btScalar y = (m_sideImpulse[wheel] ) * sideFactor;
+				
+				btScalar impulseSquared = (x*x + y*y);
+
+				if (impulseSquared > maximpSquared)
+				{
+					sliding = true;
+					
+					btScalar factor = maximp / btSqrt(impulseSquared);
+					
+					m_wheelInfo[wheel].m_skidInfo *= factor;
+				}
+			} 
+
+		}
+	}
+
+	
+
+
+		if (sliding)
+		{
+			for (int wheel = 0;wheel < getNumWheels(); wheel++)
+			{
+				if (m_sideImpulse[wheel] != btScalar(0.))
+				{
+					if (m_wheelInfo[wheel].m_skidInfo< btScalar(1.))
+					{
+						m_forwardImpulse[wheel] *=	m_wheelInfo[wheel].m_skidInfo;
+						m_sideImpulse[wheel] *= m_wheelInfo[wheel].m_skidInfo;
+					}
+				}
+			}
+		}
+
+		// apply the impulses
+		{
+			for (int wheel = 0;wheel<getNumWheels() ; wheel++)
+			{
+				btWheelInfo& wheelInfo = m_wheelInfo[wheel];
+
+				btVector3 rel_pos = wheelInfo.m_raycastInfo.m_contactPointWS - 
+						m_chassisBody->getCenterOfMassPosition();
+
+				if (m_forwardImpulse[wheel] != btScalar(0.))
+				{
+					m_chassisBody->applyImpulse(m_forwardWS[wheel]*(m_forwardImpulse[wheel]),rel_pos);
+				}
+				if (m_sideImpulse[wheel] != btScalar(0.))
+				{
+					class btRigidBody* groundObject = (class btRigidBody*) m_wheelInfo[wheel].m_raycastInfo.m_groundObject;
+
+					btVector3 rel_pos2 = wheelInfo.m_raycastInfo.m_contactPointWS - 
+						groundObject->getCenterOfMassPosition();
+
+					
+					btVector3 sideImp = m_axle[wheel] * m_sideImpulse[wheel];
+
+#if defined ROLLING_INFLUENCE_FIX // fix. It only worked if car's up was along Y - VT.
+					btVector3 vChassisWorldUp = getRigidBody()->getCenterOfMassTransform().getBasis().getColumn(m_indexUpAxis);
+					rel_pos -= vChassisWorldUp * (vChassisWorldUp.dot(rel_pos) * (1.f-wheelInfo.m_rollInfluence));
+#else
+					rel_pos[m_indexUpAxis] *= wheelInfo.m_rollInfluence;
+#endif
+					m_chassisBody->applyImpulse(sideImp,rel_pos);
+
+					//apply friction impulse on the ground
+					groundObject->applyImpulse(-sideImp,rel_pos2);
+				}
+			}
+		}
+
+	
+}
+
+
+
+void	btRaycastVehicle::debugDraw(btIDebugDraw* debugDrawer)
+{
+
+	for (int v=0;v<this->getNumWheels();v++)
+	{
+		btVector3 wheelColor(0,1,1);
+		if (getWheelInfo(v).m_raycastInfo.m_isInContact)
+		{
+			wheelColor.setValue(0,0,1);
+		} else
+		{
+			wheelColor.setValue(1,0,1);
+		}
+
+		btVector3 wheelPosWS = getWheelInfo(v).m_worldTransform.getOrigin();
+
+		btVector3 axle = btVector3(	
+			getWheelInfo(v).m_worldTransform.getBasis()[0][getRightAxis()],
+			getWheelInfo(v).m_worldTransform.getBasis()[1][getRightAxis()],
+			getWheelInfo(v).m_worldTransform.getBasis()[2][getRightAxis()]);
+
+		//debug wheels (cylinders)
+		debugDrawer->drawLine(wheelPosWS,wheelPosWS+axle,wheelColor);
+		debugDrawer->drawLine(wheelPosWS,getWheelInfo(v).m_raycastInfo.m_contactPointWS,wheelColor);
+
+	}
+}
+
+
+void* btDefaultVehicleRaycaster::castRay(const btVector3& from,const btVector3& to, btVehicleRaycasterResult& result)
+{
+//	RayResultCallback& resultCallback;
+
+	btCollisionWorld::ClosestRayResultCallback rayCallback(from,to);
+
+	m_dynamicsWorld->rayTest(from, to, rayCallback);
+
+	if (rayCallback.hasHit())
+	{
+		
+		btRigidBody* body = btRigidBody::upcast(rayCallback.m_collisionObject);
+        if (body && body->hasContactResponse())
+		{
+			result.m_hitPointInWorld = rayCallback.m_hitPointWorld;
+			result.m_hitNormalInWorld = rayCallback.m_hitNormalWorld;
+			result.m_hitNormalInWorld.normalize();
+			result.m_distFraction = rayCallback.m_closestHitFraction;
+			return body;
+		}
+	}
+	return 0;
+}
+
diff --git a/src/bullet/BulletDynamics/Vehicle/btRaycastVehicle.h b/src/bullet/BulletDynamics/Vehicle/btRaycastVehicle.h
new file mode 100644
index 00000000..f59555f9
--- /dev/null
+++ b/src/bullet/BulletDynamics/Vehicle/btRaycastVehicle.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2005 Erwin Coumans http://continuousphysics.com/Bullet/
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies.
+ * Erwin Coumans makes no representations about the suitability 
+ * of this software for any purpose.  
+ * It is provided "as is" without express or implied warranty.
+*/
+#ifndef BT_RAYCASTVEHICLE_H
+#define BT_RAYCASTVEHICLE_H
+
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
+#include "btVehicleRaycaster.h"
+class btDynamicsWorld;
+#include "LinearMath/btAlignedObjectArray.h"
+#include "btWheelInfo.h"
+#include "BulletDynamics/Dynamics/btActionInterface.h"
+
+class btVehicleTuning;
+
+///rayCast vehicle, very special constraint that turn a rigidbody into a vehicle.
+class btRaycastVehicle : public btActionInterface
+{
+
+		btAlignedObjectArray<btVector3>	m_forwardWS;
+		btAlignedObjectArray<btVector3>	m_axle;
+		btAlignedObjectArray<btScalar>	m_forwardImpulse;
+		btAlignedObjectArray<btScalar>	m_sideImpulse;
+	
+		///backwards compatibility
+		int	m_userConstraintType;
+		int	m_userConstraintId;
+
+public:
+	class btVehicleTuning
+		{
+			public:
+
+			btVehicleTuning()
+				:m_suspensionStiffness(btScalar(5.88)),
+				m_suspensionCompression(btScalar(0.83)),
+				m_suspensionDamping(btScalar(0.88)),
+				m_maxSuspensionTravelCm(btScalar(500.)),
+				m_frictionSlip(btScalar(10.5)),
+				m_maxSuspensionForce(btScalar(6000.))
+			{
+			}
+			btScalar	m_suspensionStiffness;
+			btScalar	m_suspensionCompression;
+			btScalar	m_suspensionDamping;
+			btScalar	m_maxSuspensionTravelCm;
+			btScalar	m_frictionSlip;
+			btScalar	m_maxSuspensionForce;
+
+		};
+private:
+
+	btScalar	m_tau;
+	btScalar	m_damping;
+	btVehicleRaycaster*	m_vehicleRaycaster;
+	btScalar		m_pitchControl;
+	btScalar	m_steeringValue; 
+	btScalar m_currentVehicleSpeedKmHour;
+
+	btRigidBody* m_chassisBody;
+
+	int m_indexRightAxis;
+	int m_indexUpAxis;
+	int	m_indexForwardAxis;
+
+	void defaultInit(const btVehicleTuning& tuning);
+
+public:
+
+	//constructor to create a car from an existing rigidbody
+	btRaycastVehicle(const btVehicleTuning& tuning,btRigidBody* chassis,	btVehicleRaycaster* raycaster );
+
+	virtual ~btRaycastVehicle() ;
+
+
+	///btActionInterface interface
+	virtual void updateAction( btCollisionWorld* collisionWorld, btScalar step)
+	{
+        (void) collisionWorld;
+		updateVehicle(step);
+	}
+	
+
+	///btActionInterface interface
+	void	debugDraw(btIDebugDraw* debugDrawer);
+			
+	const btTransform& getChassisWorldTransform() const;
+	
+	btScalar rayCast(btWheelInfo& wheel);
+
+	virtual void updateVehicle(btScalar step);
+	
+	
+	void resetSuspension();
+
+	btScalar	getSteeringValue(int wheel) const;
+
+	void	setSteeringValue(btScalar steering,int wheel);
+
+
+	void	applyEngineForce(btScalar force, int wheel);
+
+	const btTransform&	getWheelTransformWS( int wheelIndex ) const;
+
+	void	updateWheelTransform( int wheelIndex, bool interpolatedTransform = true );
+	
+//	void	setRaycastWheelInfo( int wheelIndex , bool isInContact, const btVector3& hitPoint, const btVector3& hitNormal,btScalar depth);
+
+	btWheelInfo&	addWheel( const btVector3& connectionPointCS0, const btVector3& wheelDirectionCS0,const btVector3& wheelAxleCS,btScalar suspensionRestLength,btScalar wheelRadius,const btVehicleTuning& tuning, bool isFrontWheel);
+
+	inline int		getNumWheels() const {
+		return int (m_wheelInfo.size());
+	}
+	
+	btAlignedObjectArray<btWheelInfo>	m_wheelInfo;
+
+
+	const btWheelInfo&	getWheelInfo(int index) const;
+
+	btWheelInfo&	getWheelInfo(int index);
+
+	void	updateWheelTransformsWS(btWheelInfo& wheel , bool interpolatedTransform = true);
+
+	
+	void setBrake(btScalar brake,int wheelIndex);
+
+	void	setPitchControl(btScalar pitch)
+	{
+		m_pitchControl = pitch;
+	}
+	
+	void	updateSuspension(btScalar deltaTime);
+
+	virtual void	updateFriction(btScalar	timeStep);
+
+
+
+	inline btRigidBody* getRigidBody()
+	{
+		return m_chassisBody;
+	}
+
+	const btRigidBody* getRigidBody() const
+	{
+		return m_chassisBody;
+	}
+
+	inline int	getRightAxis() const
+	{
+		return m_indexRightAxis;
+	}
+	inline int getUpAxis() const
+	{
+		return m_indexUpAxis;
+	}
+
+	inline int getForwardAxis() const
+	{
+		return m_indexForwardAxis;
+	}
+
+	
+	///Worldspace forward vector
+	btVector3 getForwardVector() const
+	{
+		const btTransform& chassisTrans = getChassisWorldTransform(); 
+
+		btVector3 forwardW ( 
+			  chassisTrans.getBasis()[0][m_indexForwardAxis], 
+			  chassisTrans.getBasis()[1][m_indexForwardAxis], 
+			  chassisTrans.getBasis()[2][m_indexForwardAxis]); 
+
+		return forwardW;
+	}
+
+	///Velocity of vehicle (positive if velocity vector has same direction as foward vector)
+	btScalar	getCurrentSpeedKmHour() const
+	{
+		return m_currentVehicleSpeedKmHour;
+	}
+
+	virtual void	setCoordinateSystem(int rightIndex,int upIndex,int forwardIndex)
+	{
+		m_indexRightAxis = rightIndex;
+		m_indexUpAxis = upIndex;
+		m_indexForwardAxis = forwardIndex;
+	}
+
+
+	///backwards compatibility
+	int getUserConstraintType() const
+	{
+		return m_userConstraintType ;
+	}
+
+	void	setUserConstraintType(int userConstraintType)
+	{
+		m_userConstraintType = userConstraintType;
+	};
+
+	void	setUserConstraintId(int uid)
+	{
+		m_userConstraintId = uid;
+	}
+
+	int getUserConstraintId() const
+	{
+		return m_userConstraintId;
+	}
+
+};
+
+class btDefaultVehicleRaycaster : public btVehicleRaycaster
+{
+	btDynamicsWorld*	m_dynamicsWorld;
+public:
+	btDefaultVehicleRaycaster(btDynamicsWorld* world)
+		:m_dynamicsWorld(world)
+	{
+	}
+
+	virtual void* castRay(const btVector3& from,const btVector3& to, btVehicleRaycasterResult& result);
+
+};
+
+
+#endif //BT_RAYCASTVEHICLE_H
+
diff --git a/src/bullet/BulletDynamics/Vehicle/btVehicleRaycaster.h b/src/bullet/BulletDynamics/Vehicle/btVehicleRaycaster.h
new file mode 100644
index 00000000..3cc909c6
--- /dev/null
+++ b/src/bullet/BulletDynamics/Vehicle/btVehicleRaycaster.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2005 Erwin Coumans http://bulletphysics.org
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies.
+ * Erwin Coumans makes no representations about the suitability 
+ * of this software for any purpose.  
+ * It is provided "as is" without express or implied warranty.
+*/
+#ifndef BT_VEHICLE_RAYCASTER_H
+#define BT_VEHICLE_RAYCASTER_H
+
+#include "LinearMath/btVector3.h"
+
+/// btVehicleRaycaster is provides interface for between vehicle simulation and raycasting
+struct btVehicleRaycaster
+{
+virtual ~btVehicleRaycaster()
+{
+}
+	struct btVehicleRaycasterResult
+	{
+		btVehicleRaycasterResult() :m_distFraction(btScalar(-1.)){};
+		btVector3	m_hitPointInWorld;
+		btVector3	m_hitNormalInWorld;
+		btScalar	m_distFraction;
+	};
+
+	virtual void* castRay(const btVector3& from,const btVector3& to, btVehicleRaycasterResult& result) = 0;
+
+};
+
+#endif //BT_VEHICLE_RAYCASTER_H
+
diff --git a/src/bullet/BulletDynamics/Vehicle/btWheelInfo.cpp b/src/bullet/BulletDynamics/Vehicle/btWheelInfo.cpp
new file mode 100644
index 00000000..ef93c16f
--- /dev/null
+++ b/src/bullet/BulletDynamics/Vehicle/btWheelInfo.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2005 Erwin Coumans http://continuousphysics.com/Bullet/
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies.
+ * Erwin Coumans makes no representations about the suitability 
+ * of this software for any purpose.  
+ * It is provided "as is" without express or implied warranty.
+*/
+#include "btWheelInfo.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h" // for pointvelocity
+
+
+btScalar btWheelInfo::getSuspensionRestLength() const
+{
+
+	return m_suspensionRestLength1;
+
+}
+
+void	btWheelInfo::updateWheel(const btRigidBody& chassis,RaycastInfo& raycastInfo)
+{
+	(void)raycastInfo;
+
+	
+	if (m_raycastInfo.m_isInContact)
+
+	{
+		btScalar	project= m_raycastInfo.m_contactNormalWS.dot( m_raycastInfo.m_wheelDirectionWS );
+		btVector3	 chassis_velocity_at_contactPoint;
+		btVector3 relpos = m_raycastInfo.m_contactPointWS - chassis.getCenterOfMassPosition();
+		chassis_velocity_at_contactPoint = chassis.getVelocityInLocalPoint( relpos );
+		btScalar projVel = m_raycastInfo.m_contactNormalWS.dot( chassis_velocity_at_contactPoint );
+		if ( project >= btScalar(-0.1))
+		{
+			m_suspensionRelativeVelocity = btScalar(0.0);
+			m_clippedInvContactDotSuspension = btScalar(1.0) / btScalar(0.1);
+		}
+		else
+		{
+			btScalar inv = btScalar(-1.) / project;
+			m_suspensionRelativeVelocity = projVel * inv;
+			m_clippedInvContactDotSuspension = inv;
+		}
+		
+	}
+
+	else	// Not in contact : position wheel in a nice (rest length) position
+	{
+		m_raycastInfo.m_suspensionLength = this->getSuspensionRestLength();
+		m_suspensionRelativeVelocity = btScalar(0.0);
+		m_raycastInfo.m_contactNormalWS = -m_raycastInfo.m_wheelDirectionWS;
+		m_clippedInvContactDotSuspension = btScalar(1.0);
+	}
+}
diff --git a/src/bullet/BulletDynamics/Vehicle/btWheelInfo.h b/src/bullet/BulletDynamics/Vehicle/btWheelInfo.h
new file mode 100644
index 00000000..f916053e
--- /dev/null
+++ b/src/bullet/BulletDynamics/Vehicle/btWheelInfo.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2005 Erwin Coumans http://continuousphysics.com/Bullet/
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies.
+ * Erwin Coumans makes no representations about the suitability 
+ * of this software for any purpose.  
+ * It is provided "as is" without express or implied warranty.
+*/
+#ifndef BT_WHEEL_INFO_H
+#define BT_WHEEL_INFO_H
+
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btTransform.h"
+
+class btRigidBody;
+
+struct btWheelInfoConstructionInfo
+{
+	btVector3	m_chassisConnectionCS;
+	btVector3	m_wheelDirectionCS;
+	btVector3	m_wheelAxleCS;
+	btScalar	m_suspensionRestLength;
+	btScalar	m_maxSuspensionTravelCm;
+	btScalar	m_wheelRadius;
+	
+	btScalar		m_suspensionStiffness;
+	btScalar		m_wheelsDampingCompression;
+	btScalar		m_wheelsDampingRelaxation;
+	btScalar		m_frictionSlip;
+	btScalar		m_maxSuspensionForce;
+	bool m_bIsFrontWheel;
+	
+};
+
+/// btWheelInfo contains information per wheel about friction and suspension.
+struct btWheelInfo
+{
+	struct RaycastInfo
+	{
+		//set by raycaster
+		btVector3	m_contactNormalWS;//contactnormal
+		btVector3	m_contactPointWS;//raycast hitpoint
+		btScalar	m_suspensionLength;
+		btVector3	m_hardPointWS;//raycast starting point
+		btVector3	m_wheelDirectionWS; //direction in worldspace
+		btVector3	m_wheelAxleWS; // axle in worldspace
+		bool		m_isInContact;
+		void*		m_groundObject; //could be general void* ptr
+	};
+
+	RaycastInfo	m_raycastInfo;
+
+	btTransform	m_worldTransform;
+	
+	btVector3	m_chassisConnectionPointCS; //const
+	btVector3	m_wheelDirectionCS;//const
+	btVector3	m_wheelAxleCS; // const or modified by steering
+	btScalar	m_suspensionRestLength1;//const
+	btScalar	m_maxSuspensionTravelCm;
+	btScalar getSuspensionRestLength() const;
+	btScalar	m_wheelsRadius;//const
+	btScalar	m_suspensionStiffness;//const
+	btScalar	m_wheelsDampingCompression;//const
+	btScalar	m_wheelsDampingRelaxation;//const
+	btScalar	m_frictionSlip;
+	btScalar	m_steering;
+	btScalar	m_rotation;
+	btScalar	m_deltaRotation;
+	btScalar	m_rollInfluence;
+	btScalar	m_maxSuspensionForce;
+
+	btScalar	m_engineForce;
+
+	btScalar	m_brake;
+	
+	bool m_bIsFrontWheel;
+	
+	void*		m_clientInfo;//can be used to store pointer to sync transforms...
+
+	btWheelInfo(btWheelInfoConstructionInfo& ci)
+
+	{
+
+		m_suspensionRestLength1 = ci.m_suspensionRestLength;
+		m_maxSuspensionTravelCm = ci.m_maxSuspensionTravelCm;
+
+		m_wheelsRadius = ci.m_wheelRadius;
+		m_suspensionStiffness = ci.m_suspensionStiffness;
+		m_wheelsDampingCompression = ci.m_wheelsDampingCompression;
+		m_wheelsDampingRelaxation = ci.m_wheelsDampingRelaxation;
+		m_chassisConnectionPointCS = ci.m_chassisConnectionCS;
+		m_wheelDirectionCS = ci.m_wheelDirectionCS;
+		m_wheelAxleCS = ci.m_wheelAxleCS;
+		m_frictionSlip = ci.m_frictionSlip;
+		m_steering = btScalar(0.);
+		m_engineForce = btScalar(0.);
+		m_rotation = btScalar(0.);
+		m_deltaRotation = btScalar(0.);
+		m_brake = btScalar(0.);
+		m_rollInfluence = btScalar(0.1);
+		m_bIsFrontWheel = ci.m_bIsFrontWheel;
+		m_maxSuspensionForce = ci.m_maxSuspensionForce;
+
+	}
+
+	void	updateWheel(const btRigidBody& chassis,RaycastInfo& raycastInfo);
+
+	btScalar	m_clippedInvContactDotSuspension;
+	btScalar	m_suspensionRelativeVelocity;
+	//calculated by suspension
+	btScalar	m_wheelsSuspensionForce;
+	btScalar	m_skidInfo;
+
+};
+
+#endif //BT_WHEEL_INFO_H
+
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverBuffer_DX11.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverBuffer_DX11.h
new file mode 100644
index 00000000..b6a99cc1
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverBuffer_DX11.h
@@ -0,0 +1,323 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
+#define BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
+
+// DX11 support
+#include <windows.h>
+#include <crtdbg.h>
+#include <d3d11.h>
+#include <d3dx11.h>
+#include <d3dcompiler.h>
+
+#ifndef SAFE_RELEASE
+#define SAFE_RELEASE(p)      { if(p) { (p)->Release(); (p)=NULL; } }
+#endif
+
+/**
+ * DX11 Buffer that tracks a host buffer on use to ensure size-correctness.
+ */
+template <typename ElementType> class btDX11Buffer
+{
+protected:
+	ID3D11Device*				m_d3dDevice;
+	ID3D11DeviceContext*		m_d3dDeviceContext;
+
+	ID3D11Buffer*               m_Buffer;
+	ID3D11ShaderResourceView*   m_SRV;
+	ID3D11UnorderedAccessView*  m_UAV;
+	btAlignedObjectArray< ElementType >*	m_CPUBuffer;
+
+	// TODO: Separate this from the main class
+	// as read back buffers can be shared between buffers
+	ID3D11Buffer*               m_readBackBuffer;
+
+	int m_gpuSize;
+	bool m_onGPU;
+
+	bool m_readOnlyOnGPU;
+	
+	bool createBuffer( ID3D11Buffer *preexistingBuffer = 0)
+	{
+		HRESULT hr = S_OK;
+
+		// Create all CS buffers
+		if( preexistingBuffer )
+		{
+			m_Buffer = preexistingBuffer;
+		} else {
+			D3D11_BUFFER_DESC buffer_desc;
+			ZeroMemory(&buffer_desc, sizeof(buffer_desc));		
+			buffer_desc.Usage = D3D11_USAGE_DEFAULT;
+			if( m_readOnlyOnGPU )
+				buffer_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
+			else
+				buffer_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
+			buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
+			
+			buffer_desc.ByteWidth = m_CPUBuffer->size() * sizeof(ElementType);
+			// At a minimum the buffer must exist
+			if( buffer_desc.ByteWidth == 0 )
+				buffer_desc.ByteWidth = sizeof(ElementType);
+			buffer_desc.StructureByteStride = sizeof(ElementType);
+			hr = m_d3dDevice->CreateBuffer(&buffer_desc, NULL, &m_Buffer);
+			if( FAILED( hr ) )
+		        return (hr==S_OK);
+		} 
+
+		if( m_readOnlyOnGPU )
+		{
+			D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
+			ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
+			srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
+			srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
+
+			srvbuffer_desc.Buffer.ElementWidth = m_CPUBuffer->size();
+			if( srvbuffer_desc.Buffer.ElementWidth == 0 )
+				srvbuffer_desc.Buffer.ElementWidth = 1;
+			hr = m_d3dDevice->CreateShaderResourceView(m_Buffer, &srvbuffer_desc, &m_SRV);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+		} else {
+			// Create SRV
+			D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
+			ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
+			srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
+			srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
+
+			srvbuffer_desc.Buffer.ElementWidth = m_CPUBuffer->size();
+			if( srvbuffer_desc.Buffer.ElementWidth == 0 )
+				srvbuffer_desc.Buffer.ElementWidth = 1;
+			hr = m_d3dDevice->CreateShaderResourceView(m_Buffer, &srvbuffer_desc, &m_SRV);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+
+			// Create UAV
+			D3D11_UNORDERED_ACCESS_VIEW_DESC uavbuffer_desc;
+			ZeroMemory(&uavbuffer_desc, sizeof(uavbuffer_desc));
+			uavbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
+			uavbuffer_desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
+
+			uavbuffer_desc.Buffer.NumElements = m_CPUBuffer->size();
+			if( uavbuffer_desc.Buffer.NumElements == 0 )
+				uavbuffer_desc.Buffer.NumElements = 1;
+			hr = m_d3dDevice->CreateUnorderedAccessView(m_Buffer, &uavbuffer_desc, &m_UAV);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+
+			// Create read back buffer
+			D3D11_BUFFER_DESC readback_buffer_desc;
+			ZeroMemory(&readback_buffer_desc, sizeof(readback_buffer_desc));
+
+			readback_buffer_desc.ByteWidth = m_CPUBuffer->size() * sizeof(ElementType);
+			readback_buffer_desc.Usage = D3D11_USAGE_STAGING;
+			readback_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+			readback_buffer_desc.StructureByteStride = sizeof(ElementType);
+			hr = m_d3dDevice->CreateBuffer(&readback_buffer_desc, NULL, &m_readBackBuffer);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+		}
+
+		m_gpuSize = m_CPUBuffer->size();
+		return true;
+	}
+
+
+
+public:
+	btDX11Buffer( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext, btAlignedObjectArray< ElementType > *CPUBuffer, bool readOnly )
+	{
+		m_d3dDevice = d3dDevice;
+		m_d3dDeviceContext = d3dDeviceContext;
+		m_Buffer = 0;
+		m_SRV = 0;
+		m_UAV = 0;
+		m_readBackBuffer = 0;
+
+		m_CPUBuffer = CPUBuffer;
+
+		m_gpuSize = 0;
+		m_onGPU = false;
+
+		m_readOnlyOnGPU = readOnly;
+	}
+
+	virtual ~btDX11Buffer()
+	{
+		SAFE_RELEASE(m_Buffer);
+		SAFE_RELEASE(m_SRV);
+		SAFE_RELEASE(m_UAV);
+		SAFE_RELEASE(m_readBackBuffer);
+	}
+
+	ID3D11ShaderResourceView* &getSRV()
+	{
+		return m_SRV;
+	}
+
+	ID3D11UnorderedAccessView* &getUAV()
+	{
+		return m_UAV;
+	}
+
+	ID3D11Buffer* &getBuffer()
+	{
+		return m_Buffer;
+	}
+
+	/**
+	 * Move the data to the GPU if it is not there already.
+	 */
+	bool moveToGPU()
+	{
+		// Reallocate if GPU size is too small
+		if( (m_CPUBuffer->size() > m_gpuSize ) )
+			m_onGPU = false;
+		if( !m_onGPU && m_CPUBuffer->size() > 0 )
+		{
+			// If the buffer doesn't exist or the CPU-side buffer has changed size, create
+			// We should really delete the old one, too, but let's leave that for later
+			if( !m_Buffer || (m_CPUBuffer->size() != m_gpuSize) )
+			{
+				SAFE_RELEASE(m_Buffer);
+				SAFE_RELEASE(m_SRV);
+				SAFE_RELEASE(m_UAV);
+				SAFE_RELEASE(m_readBackBuffer);
+				if( !createBuffer() )
+				{
+					btAssert("Buffer creation failed.");
+					return false;
+				}
+			}
+
+			if( m_gpuSize > 0 )
+			{
+				D3D11_BOX destRegion;
+				destRegion.left = 0;
+				destRegion.front = 0;
+				destRegion.top = 0;
+				destRegion.bottom = 1;
+				destRegion.back = 1;
+				destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
+				m_d3dDeviceContext->UpdateSubresource(m_Buffer, 0, &destRegion, &((*m_CPUBuffer)[0]), 0, 0);
+
+				m_onGPU = true;
+			}
+
+		}
+
+		return true;
+	}
+
+	/**
+	 * Move the data back from the GPU if it is on there and isn't read only.
+	 */
+	bool moveFromGPU()
+	{
+		if( m_CPUBuffer->size() > 0 )
+		{
+			if( m_onGPU && !m_readOnlyOnGPU )
+			{
+				// Copy back
+				D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; 
+				//m_pd3dImmediateContext->CopyResource(m_phAngVelReadBackBuffer, m_phAngVel);
+
+				D3D11_BOX destRegion;	
+				destRegion.left = 0;
+				destRegion.front = 0;
+				destRegion.top = 0;
+				destRegion.bottom = 1;
+				destRegion.back = 1;
+
+				destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
+				m_d3dDeviceContext->CopySubresourceRegion(
+					m_readBackBuffer,
+					0,
+					0,
+					0,
+					0 ,
+					m_Buffer,
+					0,
+					&destRegion
+					);
+
+				m_d3dDeviceContext->Map(m_readBackBuffer, 0, D3D11_MAP_READ, 0, &MappedResource);   
+				//memcpy(m_hAngVel, MappedResource.pData, (m_maxObjs * sizeof(float) ));
+				memcpy(&((*m_CPUBuffer)[0]), MappedResource.pData, ((m_CPUBuffer->size()) * sizeof(ElementType) ));		
+				m_d3dDeviceContext->Unmap(m_readBackBuffer, 0);
+
+				m_onGPU = false;
+			}
+		}
+
+		return true;
+	}
+
+
+	/**
+	 * Copy the data back from the GPU without changing its state to be CPU-side.
+	 * Useful if we just want to view it on the host for visualization.
+	 */
+	bool copyFromGPU()
+	{
+		if( m_CPUBuffer->size() > 0 )
+		{
+			if( m_onGPU && !m_readOnlyOnGPU )
+			{
+				// Copy back
+				D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; 
+
+				D3D11_BOX destRegion;	
+				destRegion.left = 0;
+				destRegion.front = 0;
+				destRegion.top = 0;
+				destRegion.bottom = 1;
+				destRegion.back = 1;
+
+				destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
+				m_d3dDeviceContext->CopySubresourceRegion(
+					m_readBackBuffer,
+					0,
+					0,
+					0,
+					0 ,
+					m_Buffer,
+					0,
+					&destRegion
+					);
+
+				m_d3dDeviceContext->Map(m_readBackBuffer, 0, D3D11_MAP_READ, 0, &MappedResource);   
+				//memcpy(m_hAngVel, MappedResource.pData, (m_maxObjs * sizeof(float) ));
+				memcpy(&((*m_CPUBuffer)[0]), MappedResource.pData, ((m_CPUBuffer->size()) * sizeof(ElementType) ));		
+				m_d3dDeviceContext->Unmap(m_readBackBuffer, 0);
+			}
+		}
+
+		return true;
+	}
+
+	/**
+	 * Call if data has changed on the CPU.
+	 * Can then trigger a move to the GPU as necessary.
+	 */
+	virtual void changedOnCPU()
+	{
+		m_onGPU = false;
+	}
+}; // class btDX11Buffer
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
\ No newline at end of file
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h
new file mode 100644
index 00000000..454c3c8c
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h
@@ -0,0 +1,103 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
+#define BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+
+class btSoftBodyLinkDataDX11 : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+
+	btDX11Buffer<LinkNodePair>				m_dx11Links;
+	btDX11Buffer<float>											m_dx11LinkStrength;
+	btDX11Buffer<float>											m_dx11LinksMassLSC;
+	btDX11Buffer<float>											m_dx11LinksRestLengthSquared;
+	btDX11Buffer<Vectormath::Aos::Vector3>						m_dx11LinksCLength;
+	btDX11Buffer<float>											m_dx11LinksLengthRatio;
+	btDX11Buffer<float>											m_dx11LinksRestLength;
+	btDX11Buffer<float>											m_dx11LinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_batchStartLengths;
+
+
+	//ID3D11Buffer*               readBackBuffer;
+	
+	btSoftBodyLinkDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+
+	virtual ~btSoftBodyLinkDataDX11();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( const LinkDescription &link, int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+};
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h
new file mode 100644
index 00000000..6eb26c68
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h
@@ -0,0 +1,173 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+#ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
+#define BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+
+class btSoftBodyLinkDataDX11SIMDAware : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+	const int m_wavefrontSize;
+	const int m_linksPerWorkItem;
+	const int m_maxLinksPerWavefront;
+	int m_maxBatchesWithinWave;
+	int m_maxVerticesWithinWave;
+	int m_numWavefronts;
+
+	int m_maxVertex;
+
+	struct NumBatchesVerticesPair
+	{
+		int numBatches;
+		int numVertices;
+	};
+
+	// Array storing number of links in each wavefront
+	btAlignedObjectArray<int>									m_linksPerWavefront;
+	btAlignedObjectArray<NumBatchesVerticesPair>				m_numBatchesAndVerticesWithinWaves;
+	btDX11Buffer< NumBatchesVerticesPair >						m_dx11NumBatchesAndVerticesWithinWaves;
+
+	// All arrays here will contain batches of m_maxLinksPerWavefront links
+	// ordered by wavefront.
+	// with either global vertex pairs or local vertex pairs
+	btAlignedObjectArray< int >									m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
+	btDX11Buffer<int>											m_dx11WavefrontVerticesGlobalAddresses;
+	btAlignedObjectArray< LinkNodePair >						m_linkVerticesLocalAddresses; // Vertex pair for the link
+	btDX11Buffer<LinkNodePair>									m_dx11LinkVerticesLocalAddresses;
+	btDX11Buffer<float>											m_dx11LinkStrength;
+	btDX11Buffer<float>											m_dx11LinksMassLSC;
+	btDX11Buffer<float>											m_dx11LinksRestLengthSquared;
+	btDX11Buffer<float>											m_dx11LinksRestLength;
+	btDX11Buffer<float>											m_dx11LinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_wavefrontBatchStartLengths;
+
+
+	//ID3D11Buffer*               readBackBuffer;
+	
+	btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+
+	virtual ~btSoftBodyLinkDataDX11SIMDAware();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( const LinkDescription &link, int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+
+	int getMaxVerticesPerWavefront()
+	{
+		return m_maxVerticesWithinWave;
+	}
+
+	int getWavefrontSize()
+	{
+		return m_wavefrontSize;
+	}
+
+	int getLinksPerWorkItem()
+	{
+		return m_linksPerWorkItem;
+	}
+
+	int getMaxLinksPerWavefront()
+	{
+		return m_maxLinksPerWavefront;
+	}
+
+	int getMaxBatchesPerWavefront()
+	{
+		return m_maxBatchesWithinWave;
+	}
+
+	int getNumWavefronts()
+	{
+		return m_numWavefronts;
+	}
+
+	NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
+	{
+		return m_numBatchesAndVerticesWithinWaves[wavefront];
+	}
+
+	int getVertexGlobalAddresses( int vertexIndex )
+	{
+		return m_wavefrontVerticesGlobalAddresses[vertexIndex];
+	}
+
+	/**
+	 * Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
+	 */
+	LinkNodePair getVertexPairLocalAddresses( int linkIndex )
+	{
+		return m_linkVerticesLocalAddresses[linkIndex];
+	}
+
+};
+
+
+#endif // #ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h
new file mode 100644
index 00000000..7012fabd
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h
@@ -0,0 +1,96 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
+#define BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+class btSoftBodyTriangleDataDX11 : public btSoftBodyTriangleData
+{
+public:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+	btDX11Buffer<btSoftBodyTriangleData::TriangleNodeSet>							m_dx11VertexIndices;
+	btDX11Buffer<float>									m_dx11Area;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11Normal;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_triangleAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_batchStartLengths;
+
+	//ID3D11Buffer*               readBackBuffer;
+
+public:
+	btSoftBodyTriangleDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+
+	virtual ~btSoftBodyTriangleDataDX11();
+
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createTriangles( int numTriangles );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex );
+
+	virtual bool onAccelerator();
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+	/**
+	 * Generate (and later update) the batching for the entire triangle set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexBuffer_DX11.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexBuffer_DX11.h
new file mode 100644
index 00000000..66bd90fa
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexBuffer_DX11.h
@@ -0,0 +1,107 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H
+#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H 
+
+
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+
+#include <windows.h>
+#include <crtdbg.h>
+#include <d3d11.h>
+#include <d3dx11.h>
+#include <d3dcompiler.h>
+
+class btDX11VertexBufferDescriptor : public btVertexBufferDescriptor
+{
+protected:
+	/** Context of the DX11 device on which the vertex buffer is stored. */
+	ID3D11DeviceContext* m_context;
+	/** DX11 vertex buffer */
+	ID3D11Buffer* m_vertexBuffer;
+	/** UAV for DX11 buffer */
+	ID3D11UnorderedAccessView*  m_vertexBufferUAV;
+
+
+public:
+	/**
+	 * buffer is a pointer to the DX11 buffer to place the vertex data in.
+	 * UAV is a pointer to the UAV representation of the buffer laid out in floats.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 */
+	btDX11VertexBufferDescriptor( ID3D11DeviceContext* context, ID3D11Buffer* buffer, ID3D11UnorderedAccessView *UAV, int vertexOffset, int vertexStride )
+	{
+		m_context = context;
+		m_vertexBuffer = buffer;
+		m_vertexBufferUAV = UAV;
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+	}
+
+	/**
+	 * buffer is a pointer to the DX11 buffer to place the vertex data in.
+	 * UAV is a pointer to the UAV representation of the buffer laid out in floats.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 * normalOffset is the offset in floats to the first normal.
+	 * normalStride is the stride in floats between normals.
+	 */
+	btDX11VertexBufferDescriptor( ID3D11DeviceContext* context, ID3D11Buffer* buffer, ID3D11UnorderedAccessView *UAV, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
+	{
+		m_context = context;
+		m_vertexBuffer = buffer;
+		m_vertexBufferUAV = UAV;
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+		
+		m_normalOffset = normalOffset;
+		m_normalStride = normalStride;
+		m_hasNormals = true;
+	}
+
+	virtual ~btDX11VertexBufferDescriptor()
+	{
+
+	}
+
+	/**
+	 * Return the type of the vertex buffer descriptor.
+	 */
+	virtual BufferTypes getBufferType() const
+	{
+		return DX11_BUFFER;
+	}
+
+	virtual ID3D11DeviceContext* getContext() const
+	{
+		return m_context;
+	}
+
+	virtual ID3D11Buffer* getbtDX11Buffer() const
+	{
+		return m_vertexBuffer;
+	}
+
+	virtual ID3D11UnorderedAccessView* getDX11UAV() const
+	{
+		return m_vertexBufferUAV;
+	}		
+};
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H
\ No newline at end of file
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h
new file mode 100644
index 00000000..dd7cc84c
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h
@@ -0,0 +1,63 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+
+#ifndef BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
+#define BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
+
+class btSoftBodyLinkData;
+class btSoftBodyLinkData::LinkDescription;
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+class btSoftBodyVertexDataDX11 : public btSoftBodyVertexData
+{
+protected:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+public:
+	btDX11Buffer<int>										m_dx11ClothIdentifier;
+	btDX11Buffer<Vectormath::Aos::Point3>					m_dx11VertexPosition;
+	btDX11Buffer<Vectormath::Aos::Point3>					m_dx11VertexPreviousPosition;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11VertexVelocity;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11VertexForceAccumulator;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11VertexNormal;
+	btDX11Buffer<float>									m_dx11VertexInverseMass;
+	btDX11Buffer<float>									m_dx11VertexArea;
+	btDX11Buffer<int>										m_dx11VertexTriangleCount;
+
+
+	//ID3D11Buffer*               readBackBuffer;
+
+public:
+	btSoftBodyVertexDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+	virtual ~btSoftBodyVertexDataDX11();
+
+	virtual bool onAccelerator();
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
+};
+
+
+#endif // #ifndef BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
+
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp
new file mode 100644
index 00000000..1f71425c
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp
@@ -0,0 +1,2236 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "vectormath/vmInclude.h"
+
+#include "btSoftBodySolver_DX11.h"
+#include "btSoftBodySolverVertexBuffer_DX11.h"
+#include "BulletSoftBody/btSoftBody.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include <stdio.h> //printf
+#define MSTRINGIFY(A) #A
+static char* PrepareLinksHLSLString = 
+#include "HLSL/PrepareLinks.hlsl"
+static char* UpdatePositionsFromVelocitiesHLSLString = 
+#include "HLSL/UpdatePositionsFromVelocities.hlsl"
+static char* SolvePositionsHLSLString = 
+#include "HLSL/SolvePositions.hlsl"
+static char* UpdateNodesHLSLString = 
+#include "HLSL/UpdateNodes.hlsl"
+static char* UpdatePositionsHLSLString = 
+#include "HLSL/UpdatePositions.hlsl"
+static char* UpdateConstantsHLSLString = 
+#include "HLSL/UpdateConstants.hlsl"
+static char* IntegrateHLSLString = 
+#include "HLSL/Integrate.hlsl"
+static char* ApplyForcesHLSLString = 
+#include "HLSL/ApplyForces.hlsl"
+static char* UpdateNormalsHLSLString = 
+#include "HLSL/UpdateNormals.hlsl"
+static char* OutputToVertexArrayHLSLString = 
+#include "HLSL/OutputToVertexArray.hlsl"
+static char* VSolveLinksHLSLString = 
+#include "HLSL/VSolveLinks.hlsl"
+static char* ComputeBoundsHLSLString = 
+#include "HLSL/ComputeBounds.hlsl"
+static char* SolveCollisionsAndUpdateVelocitiesHLSLString =
+#include "HLSL/SolveCollisionsAndUpdateVelocities.hlsl"
+
+
+btSoftBodyLinkDataDX11::btSoftBodyLinkDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : 
+		m_dx11Links( d3dDevice, d3dDeviceContext, &m_links, false ),
+		m_dx11LinkStrength( d3dDevice, d3dDeviceContext, &m_linkStrength, false ),
+		m_dx11LinksMassLSC( d3dDevice, d3dDeviceContext, &m_linksMassLSC, false ),
+		m_dx11LinksRestLengthSquared( d3dDevice, d3dDeviceContext, &m_linksRestLengthSquared, false ),
+		m_dx11LinksCLength( d3dDevice, d3dDeviceContext, &m_linksCLength, false ),
+		m_dx11LinksLengthRatio( d3dDevice, d3dDeviceContext, &m_linksLengthRatio, false ),
+		m_dx11LinksRestLength( d3dDevice, d3dDeviceContext, &m_linksRestLength, false ),
+		m_dx11LinksMaterialLinearStiffnessCoefficient( d3dDevice, d3dDeviceContext, &m_linksMaterialLinearStiffnessCoefficient, false )
+{
+	m_d3dDevice = d3dDevice;
+	m_d3dDeviceContext = d3dDeviceContext;
+}
+
+btSoftBodyLinkDataDX11::~btSoftBodyLinkDataDX11()
+{
+}
+
+static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
+{
+	Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
+	return outVec;
+}
+
+void btSoftBodyLinkDataDX11::createLinks( int numLinks )
+{
+	int previousSize = m_links.size();
+	int newSize = previousSize + numLinks;
+
+	btSoftBodyLinkData::createLinks( numLinks );
+
+	// Resize the link addresses array as well
+	m_linkAddresses.resize( newSize );
+}
+
+void btSoftBodyLinkDataDX11::setLinkAt( const btSoftBodyLinkData::LinkDescription &link, int linkIndex )
+{
+	btSoftBodyLinkData::setLinkAt( link, linkIndex );
+
+	// Set the link index correctly for initialisation
+	m_linkAddresses[linkIndex] = linkIndex;
+}
+
+bool btSoftBodyLinkDataDX11::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyLinkDataDX11::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11Links.moveToGPU();
+	success = success && m_dx11LinkStrength.moveToGPU();
+	success = success && m_dx11LinksMassLSC.moveToGPU();
+	success = success && m_dx11LinksRestLengthSquared.moveToGPU();
+	success = success && m_dx11LinksCLength.moveToGPU();
+	success = success && m_dx11LinksLengthRatio.moveToGPU();
+	success = success && m_dx11LinksRestLength.moveToGPU();
+	success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyLinkDataDX11::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11Links.moveFromGPU();
+	success = success && m_dx11LinkStrength.moveFromGPU();
+	success = success && m_dx11LinksMassLSC.moveFromGPU();
+	success = success && m_dx11LinksRestLengthSquared.moveFromGPU();
+	success = success && m_dx11LinksCLength.moveFromGPU();
+	success = success && m_dx11LinksLengthRatio.moveFromGPU();
+	success = success && m_dx11LinksRestLength.moveFromGPU();
+	success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveFromGPU();
+
+	if( success )
+		m_onGPU = false;
+
+	return success;
+}
+
+void btSoftBodyLinkDataDX11::generateBatches()
+{
+	int numLinks = getNumLinks();
+
+	// Do the graph colouring here temporarily
+	btAlignedObjectArray< int > batchValues;
+	batchValues.resize( numLinks, 0 );
+
+	// Find the maximum vertex value internally for now
+	int maxVertex = 0;
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{
+		int vertex0 = getVertexPair(linkIndex).vertex0;
+		int vertex1 = getVertexPair(linkIndex).vertex1;
+		if( vertex0 > maxVertex )
+			maxVertex = vertex0;
+		if( vertex1 > maxVertex )
+			maxVertex = vertex1;
+	}
+	int numVertices = maxVertex + 1;
+
+	// Set of lists, one for each node, specifying which colours are connected
+	// to that node.
+	// No two edges into a node can share a colour.
+	btAlignedObjectArray< btAlignedObjectArray< int > > vertexConnectedColourLists;
+	vertexConnectedColourLists.resize(numVertices);
+
+
+
+	// Simple algorithm that chooses the lowest batch number
+	// that none of the links attached to either of the connected 
+	// nodes is in
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{				
+		int linkLocation = m_linkAddresses[linkIndex];
+
+		int vertex0 = getVertexPair(linkLocation).vertex0;
+		int vertex1 = getVertexPair(linkLocation).vertex1;
+
+		// Get the two node colour lists
+		btAlignedObjectArray< int > &colourListVertex0( vertexConnectedColourLists[vertex0] );
+		btAlignedObjectArray< int > &colourListVertex1( vertexConnectedColourLists[vertex1] );
+
+		// Choose the minimum colour that is in neither list
+		int colour = 0;
+		while( colourListVertex0.findLinearSearch(colour) != colourListVertex0.size() || colourListVertex1.findLinearSearch(colour) != colourListVertex1.size()  )
+			++colour;
+		// i should now be the minimum colour in neither list
+		// Add to the two lists so that future edges don't share
+		// And store the colour against this edge
+
+		colourListVertex0.push_back(colour);
+		colourListVertex1.push_back(colour);
+		batchValues[linkIndex] = colour;
+	}
+
+	// Check the colour counts
+	btAlignedObjectArray< int > batchCounts;
+	for( int i = 0; i < numLinks; ++i )
+	{
+		int batch = batchValues[i];
+		if( batch >= batchCounts.size() )
+			batchCounts.push_back(1);
+		else
+			++(batchCounts[batch]);
+	}
+
+	m_batchStartLengths.resize(batchCounts.size());
+	if( m_batchStartLengths.size() > 0 )
+	{
+		m_batchStartLengths[0] = BatchPair( 0, 0 );
+
+		int sum = 0;
+		for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex )
+		{
+			m_batchStartLengths[batchIndex].start = sum;
+			m_batchStartLengths[batchIndex].length = batchCounts[batchIndex];
+			sum += batchCounts[batchIndex];
+		}
+	}
+
+	/////////////////////////////
+	// Sort data based on batches
+
+	// Create source arrays by copying originals
+	btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair>				m_links_Backup(m_links);
+	btAlignedObjectArray<float>											m_linkStrength_Backup(m_linkStrength);
+	btAlignedObjectArray<float>											m_linksMassLSC_Backup(m_linksMassLSC);
+	btAlignedObjectArray<float>											m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
+	btAlignedObjectArray<Vectormath::Aos::Vector3>						m_linksCLength_Backup(m_linksCLength);
+	btAlignedObjectArray<float>											m_linksLengthRatio_Backup(m_linksLengthRatio);
+	btAlignedObjectArray<float>											m_linksRestLength_Backup(m_linksRestLength);
+	btAlignedObjectArray<float>											m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
+
+
+	for( int batch = 0; batch < batchCounts.size(); ++batch )
+		batchCounts[batch] = 0;
+
+	// Do sort as single pass into destination arrays	
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int linkLocation = m_linkAddresses[linkIndex];
+
+		// Obtain batch and calculate target location for the
+		// next element in that batch, incrementing the batch counter
+		// afterwards
+		int batch = batchValues[linkIndex];
+		int newLocation = m_batchStartLengths[batch].start + batchCounts[batch];
+
+		batchCounts[batch] = batchCounts[batch] + 1;
+		m_links[newLocation] = m_links_Backup[linkLocation];
+#if 1
+		m_linkStrength[newLocation] = m_linkStrength_Backup[linkLocation];
+		m_linksMassLSC[newLocation] = m_linksMassLSC_Backup[linkLocation];
+		m_linksRestLengthSquared[newLocation] = m_linksRestLengthSquared_Backup[linkLocation];
+		m_linksLengthRatio[newLocation] = m_linksLengthRatio_Backup[linkLocation];
+		m_linksRestLength[newLocation] = m_linksRestLength_Backup[linkLocation];
+		m_linksMaterialLinearStiffnessCoefficient[newLocation] = m_linksMaterialLinearStiffnessCoefficient_Backup[linkLocation];
+#endif
+		// Update the locations array to account for the moved entry
+		m_linkAddresses[linkIndex] = newLocation;
+	}
+} // void btSoftBodyLinkDataDX11::generateBatches()
+
+
+
+btSoftBodyVertexDataDX11::btSoftBodyVertexDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : 
+	m_dx11ClothIdentifier( d3dDevice, d3dDeviceContext, &m_clothIdentifier, false ),
+	m_dx11VertexPosition( d3dDevice, d3dDeviceContext, &m_vertexPosition, false ),
+	m_dx11VertexPreviousPosition( d3dDevice, d3dDeviceContext, &m_vertexPreviousPosition, false ),
+	m_dx11VertexVelocity( d3dDevice, d3dDeviceContext, &m_vertexVelocity, false ),
+	m_dx11VertexForceAccumulator( d3dDevice, d3dDeviceContext, &m_vertexForceAccumulator, false ),
+	m_dx11VertexNormal( d3dDevice, d3dDeviceContext, &m_vertexNormal, false ),
+	m_dx11VertexInverseMass( d3dDevice, d3dDeviceContext, &m_vertexInverseMass, false ),
+	m_dx11VertexArea( d3dDevice, d3dDeviceContext, &m_vertexArea, false ),
+	m_dx11VertexTriangleCount( d3dDevice, d3dDeviceContext, &m_vertexTriangleCount, false )
+{
+	m_d3dDevice = d3dDevice;
+	m_d3dDeviceContext = d3dDeviceContext;
+}
+
+btSoftBodyVertexDataDX11::~btSoftBodyVertexDataDX11()
+{
+
+}
+
+bool btSoftBodyVertexDataDX11::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyVertexDataDX11::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11ClothIdentifier.moveToGPU();
+	success = success && m_dx11VertexPosition.moveToGPU();
+	success = success && m_dx11VertexPreviousPosition.moveToGPU();
+	success = success && m_dx11VertexVelocity.moveToGPU();
+	success = success && m_dx11VertexForceAccumulator.moveToGPU();
+	success = success && m_dx11VertexNormal.moveToGPU();
+	success = success && m_dx11VertexInverseMass.moveToGPU();
+	success = success && m_dx11VertexArea.moveToGPU();
+	success = success && m_dx11VertexTriangleCount.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyVertexDataDX11::moveFromAccelerator(bool bCopy, bool bCopyMinimum)
+{
+	bool success = true;
+
+	if (!bCopy)
+	{
+		success = success && m_dx11ClothIdentifier.moveFromGPU();
+		success = success && m_dx11VertexPosition.moveFromGPU();
+		success = success && m_dx11VertexPreviousPosition.moveFromGPU();
+		success = success && m_dx11VertexVelocity.moveFromGPU();
+		success = success && m_dx11VertexForceAccumulator.moveFromGPU();
+		success = success && m_dx11VertexNormal.moveFromGPU();
+		success = success && m_dx11VertexInverseMass.moveFromGPU();
+		success = success && m_dx11VertexArea.moveFromGPU();
+		success = success && m_dx11VertexTriangleCount.moveFromGPU();
+	}
+	else
+	{
+		if (bCopyMinimum)
+		{
+			success = success && m_dx11VertexPosition.copyFromGPU();
+			success = success && m_dx11VertexNormal.copyFromGPU();
+		}
+		else
+		{
+			success = success && m_dx11ClothIdentifier.copyFromGPU();
+			success = success && m_dx11VertexPosition.copyFromGPU();
+			success = success && m_dx11VertexPreviousPosition.copyFromGPU();
+			success = success && m_dx11VertexVelocity.copyFromGPU();
+			success = success && m_dx11VertexForceAccumulator.copyFromGPU();
+			success = success && m_dx11VertexNormal.copyFromGPU();
+			success = success && m_dx11VertexInverseMass.copyFromGPU();
+			success = success && m_dx11VertexArea.copyFromGPU();
+			success = success && m_dx11VertexTriangleCount.copyFromGPU();
+		}
+	}
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+
+btSoftBodyTriangleDataDX11::btSoftBodyTriangleDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : 
+	m_dx11VertexIndices( d3dDevice, d3dDeviceContext, &m_vertexIndices, false ),
+	m_dx11Area( d3dDevice, d3dDeviceContext, &m_area, false ),
+	m_dx11Normal( d3dDevice, d3dDeviceContext, &m_normal, false )
+{
+	m_d3dDevice = d3dDevice;
+	m_d3dDeviceContext = d3dDeviceContext;
+}
+
+btSoftBodyTriangleDataDX11::~btSoftBodyTriangleDataDX11()
+{
+
+}
+
+
+/** Allocate enough space in all link-related arrays to fit numLinks links */
+void btSoftBodyTriangleDataDX11::createTriangles( int numTriangles )
+{
+	int previousSize = getNumTriangles();
+	int newSize = previousSize + numTriangles;
+
+	btSoftBodyTriangleData::createTriangles( numTriangles );
+
+	// Resize the link addresses array as well
+	m_triangleAddresses.resize( newSize );
+}
+
+/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+void btSoftBodyTriangleDataDX11::setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex )
+{
+	btSoftBodyTriangleData::setTriangleAt( triangle, triangleIndex );
+
+	m_triangleAddresses[triangleIndex] = triangleIndex;
+}
+
+bool btSoftBodyTriangleDataDX11::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyTriangleDataDX11::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11VertexIndices.moveToGPU();
+	success = success && m_dx11Area.moveToGPU();
+	success = success && m_dx11Normal.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyTriangleDataDX11::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11VertexIndices.moveFromGPU();
+	success = success && m_dx11Area.moveFromGPU();
+	success = success && m_dx11Normal.moveFromGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+/**
+ * Generate (and later update) the batching for the entire triangle set.
+ * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+ * In theory we could delay it until just before we need the cloth.
+ * It's a one-off overhead, though, so that is a later optimisation.
+ */
+void btSoftBodyTriangleDataDX11::generateBatches()
+{
+	int numTriangles = getNumTriangles();
+	if( numTriangles == 0 )
+		return;
+
+	// Do the graph colouring here temporarily
+	btAlignedObjectArray< int > batchValues;
+	batchValues.resize( numTriangles );
+
+	// Find the maximum vertex value internally for now
+	int maxVertex = 0;
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		int vertex0 = getVertexSet(triangleIndex).vertex0;
+		int vertex1 = getVertexSet(triangleIndex).vertex1;
+		int vertex2 = getVertexSet(triangleIndex).vertex2;
+		
+		if( vertex0 > maxVertex )
+			maxVertex = vertex0;
+		if( vertex1 > maxVertex )
+			maxVertex = vertex1;
+		if( vertex2 > maxVertex )
+			maxVertex = vertex2;
+	}
+	int numVertices = maxVertex + 1;
+
+	// Set of lists, one for each node, specifying which colours are connected
+	// to that node.
+	// No two edges into a node can share a colour.
+	btAlignedObjectArray< btAlignedObjectArray< int > > vertexConnectedColourLists;
+	vertexConnectedColourLists.resize(numVertices);
+
+
+	//std::cout << "\n";
+	// Simple algorithm that chooses the lowest batch number
+	// that none of the faces attached to either of the connected 
+	// nodes is in
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int triangleLocation = m_triangleAddresses[triangleIndex];
+
+		int vertex0 = getVertexSet(triangleLocation).vertex0;
+		int vertex1 = getVertexSet(triangleLocation).vertex1;
+		int vertex2 = getVertexSet(triangleLocation).vertex2;
+
+		// Get the three node colour lists
+		btAlignedObjectArray< int > &colourListVertex0( vertexConnectedColourLists[vertex0] );
+		btAlignedObjectArray< int > &colourListVertex1( vertexConnectedColourLists[vertex1] );
+		btAlignedObjectArray< int > &colourListVertex2( vertexConnectedColourLists[vertex2] );
+
+		// Choose the minimum colour that is in none of the lists
+		int colour = 0;
+		while( 
+			colourListVertex0.findLinearSearch(colour) != colourListVertex0.size() || 
+			colourListVertex1.findLinearSearch(colour) != colourListVertex1.size() ||
+			colourListVertex2.findLinearSearch(colour) != colourListVertex2.size() )
+		{
+			++colour;
+		}
+		// i should now be the minimum colour in neither list
+		// Add to the three lists so that future edges don't share
+		// And store the colour against this face
+		colourListVertex0.push_back(colour);
+		colourListVertex1.push_back(colour);
+		colourListVertex2.push_back(colour);
+
+		batchValues[triangleIndex] = colour;
+	}
+
+
+	// Check the colour counts
+	btAlignedObjectArray< int > batchCounts;
+	for( int i = 0; i < numTriangles; ++i )
+	{
+		int batch = batchValues[i];
+		if( batch >= batchCounts.size() )
+			batchCounts.push_back(1);
+		else
+			++(batchCounts[batch]);
+	}
+
+
+	m_batchStartLengths.resize(batchCounts.size());
+	m_batchStartLengths[0] = BatchPair( 0, 0 );
+
+
+	int sum = 0;
+	for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex )
+	{
+		m_batchStartLengths[batchIndex].start = sum;
+		m_batchStartLengths[batchIndex].length = batchCounts[batchIndex];
+		sum += batchCounts[batchIndex];
+	}
+	
+	/////////////////////////////
+	// Sort data based on batches
+	
+	// Create source arrays by copying originals
+	btAlignedObjectArray<btSoftBodyTriangleData::TriangleNodeSet>							m_vertexIndices_Backup(m_vertexIndices);
+	btAlignedObjectArray<float>										m_area_Backup(m_area);
+	btAlignedObjectArray<Vectormath::Aos::Vector3>					m_normal_Backup(m_normal);
+
+
+	for( int batch = 0; batch < batchCounts.size(); ++batch )
+		batchCounts[batch] = 0;
+
+	// Do sort as single pass into destination arrays	
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int triangleLocation = m_triangleAddresses[triangleIndex];
+
+		// Obtain batch and calculate target location for the
+		// next element in that batch, incrementing the batch counter
+		// afterwards
+		int batch = batchValues[triangleIndex];
+		int newLocation = m_batchStartLengths[batch].start + batchCounts[batch];
+
+		batchCounts[batch] = batchCounts[batch] + 1;
+		m_vertexIndices[newLocation] = m_vertexIndices_Backup[triangleLocation];
+		m_area[newLocation] = m_area_Backup[triangleLocation];
+		m_normal[newLocation] = m_normal_Backup[triangleLocation];
+
+		// Update the locations array to account for the moved entry
+		m_triangleAddresses[triangleIndex] = newLocation;
+	}
+} // btSoftBodyTriangleDataDX11::generateBatches
+
+
+
+
+
+
+
+
+
+
+
+
+btDX11SoftBodySolver::btDX11SoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory) :
+	m_dx11Device( dx11Device ),
+	m_dx11Context( dx11Context ),
+	dxFunctions( m_dx11Device, m_dx11Context, dx11CompileFromMemory ),
+	m_linkData(m_dx11Device, m_dx11Context),
+	m_vertexData(m_dx11Device, m_dx11Context),
+	m_triangleData(m_dx11Device, m_dx11Context),
+	m_dx11PerClothAcceleration( m_dx11Device, m_dx11Context, &m_perClothAcceleration, true ),
+	m_dx11PerClothWindVelocity( m_dx11Device, m_dx11Context, &m_perClothWindVelocity, true ),
+	m_dx11PerClothDampingFactor( m_dx11Device, m_dx11Context, &m_perClothDampingFactor, true ),
+	m_dx11PerClothVelocityCorrectionCoefficient( m_dx11Device, m_dx11Context, &m_perClothVelocityCorrectionCoefficient, true ),
+	m_dx11PerClothLiftFactor( m_dx11Device, m_dx11Context, &m_perClothLiftFactor, true ),
+	m_dx11PerClothDragFactor( m_dx11Device, m_dx11Context, &m_perClothDragFactor, true ),
+	m_dx11PerClothMediumDensity( m_dx11Device, m_dx11Context, &m_perClothMediumDensity, true ),
+	m_dx11PerClothCollisionObjects( m_dx11Device, m_dx11Context, &m_perClothCollisionObjects, true ),
+	m_dx11CollisionObjectDetails( m_dx11Device, m_dx11Context, &m_collisionObjectDetails, true ),
+	m_dx11PerClothMinBounds( m_dx11Device, m_dx11Context, &m_perClothMinBounds, false ),
+	m_dx11PerClothMaxBounds( m_dx11Device, m_dx11Context, &m_perClothMaxBounds, false ),
+	m_dx11PerClothFriction( m_dx11Device, m_dx11Context, &m_perClothFriction, false ),
+	m_enableUpdateBounds(false)
+{
+	// Initial we will clearly need to update solver constants
+	// For now this is global for the cloths linked with this solver - we should probably make this body specific 
+	// for performance in future once we understand more clearly when constants need to be updated
+	m_updateSolverConstants = true;
+
+	m_shadersInitialized = false;
+}
+
+btDX11SoftBodySolver::~btDX11SoftBodySolver()
+{	
+	releaseKernels();
+}
+
+void btDX11SoftBodySolver::releaseKernels()
+{
+	
+	SAFE_RELEASE( prepareLinksKernel.kernel );
+	SAFE_RELEASE( prepareLinksKernel.constBuffer );
+	SAFE_RELEASE( integrateKernel.kernel );
+	SAFE_RELEASE( integrateKernel.constBuffer );
+	SAFE_RELEASE( integrateKernel.kernel );
+	SAFE_RELEASE( solvePositionsFromLinksKernel.constBuffer );
+	SAFE_RELEASE( solvePositionsFromLinksKernel.kernel );
+	SAFE_RELEASE( updatePositionsFromVelocitiesKernel.constBuffer );
+	SAFE_RELEASE( updatePositionsFromVelocitiesKernel.kernel );
+	SAFE_RELEASE( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer );
+	SAFE_RELEASE( updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel );
+	SAFE_RELEASE( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer );
+	SAFE_RELEASE( updateVelocitiesFromPositionsWithVelocitiesKernel.kernel );
+	SAFE_RELEASE( resetNormalsAndAreasKernel.constBuffer );
+	SAFE_RELEASE( resetNormalsAndAreasKernel.kernel );
+	SAFE_RELEASE( normalizeNormalsAndAreasKernel.constBuffer );
+	SAFE_RELEASE( normalizeNormalsAndAreasKernel.kernel );
+	SAFE_RELEASE( updateSoftBodiesKernel.constBuffer );
+	SAFE_RELEASE( updateSoftBodiesKernel.kernel );
+	SAFE_RELEASE( solveCollisionsAndUpdateVelocitiesKernel.kernel );
+	SAFE_RELEASE( solveCollisionsAndUpdateVelocitiesKernel.constBuffer );
+	SAFE_RELEASE( computeBoundsKernel.kernel );
+	SAFE_RELEASE( computeBoundsKernel.constBuffer );
+	SAFE_RELEASE( vSolveLinksKernel.kernel );
+	SAFE_RELEASE( vSolveLinksKernel.constBuffer );
+
+	SAFE_RELEASE( addVelocityKernel.constBuffer );
+	SAFE_RELEASE( addVelocityKernel.kernel );
+	SAFE_RELEASE( applyForcesKernel.constBuffer );
+	SAFE_RELEASE( applyForcesKernel.kernel );
+
+	m_shadersInitialized = false;
+}
+
+
+void btDX11SoftBodySolver::copyBackToSoftBodies(bool bMove)
+{
+	// Move the vertex data back to the host first
+	m_vertexData.moveFromAccelerator(!bMove);
+
+	// Loop over soft bodies, copying all the vertex positions back for each body in turn
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[ softBodyIndex ];
+		btSoftBody *softBody = softBodyInterface->getSoftBody();
+
+		int firstVertex = softBodyInterface->getFirstVertex();
+		int numVertices = softBodyInterface->getNumVertices();
+
+		// Copy vertices from solver back into the softbody
+		for( int vertex = 0; vertex < numVertices; ++vertex )
+		{
+			using Vectormath::Aos::Point3;
+			Point3 vertexPosition( getVertexData().getVertexPositions()[firstVertex + vertex] );
+
+			softBody->m_nodes[vertex].m_x.setX( vertexPosition.getX() );
+			softBody->m_nodes[vertex].m_x.setY( vertexPosition.getY() );
+			softBody->m_nodes[vertex].m_x.setZ( vertexPosition.getZ() );
+
+			softBody->m_nodes[vertex].m_n.setX( vertexPosition.getX() );
+			softBody->m_nodes[vertex].m_n.setY( vertexPosition.getY() );
+			softBody->m_nodes[vertex].m_n.setZ( vertexPosition.getZ() );
+		}
+	}
+} // btDX11SoftBodySolver::copyBackToSoftBodies
+
+
+void btDX11SoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies, bool forceUpdate )
+{
+	if( forceUpdate || m_softBodySet.size() != softBodies.size() )
+	{
+		// Have a change in the soft body set so update, reloading all the data
+		getVertexData().clear();
+		getTriangleData().clear();
+		getLinkData().clear();
+		m_softBodySet.resize(0);
+
+
+		for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = softBodies[ softBodyIndex ];
+			using Vectormath::Aos::Matrix3;
+			using Vectormath::Aos::Point3;
+
+			// Create SoftBody that will store the information within the solver
+			btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody );
+			m_softBodySet.push_back( newSoftBody );
+
+			m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
+			m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
+			m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
+			m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
+			m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
+			m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
+			// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
+			m_perClothMinBounds.push_back( UIntVector3( 0, 0, 0 ) );
+			m_perClothMaxBounds.push_back( UIntVector3( UINT_MAX, UINT_MAX, UINT_MAX ) );
+			m_perClothFriction.push_back( softBody->getFriction() );
+			m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
+
+			// Add space for new vertices and triangles in the default solver for now
+			// TODO: Include space here for tearing too later
+			int firstVertex = getVertexData().getNumVertices();
+			int numVertices = softBody->m_nodes.size();
+			int maxVertices = numVertices;
+			// Allocate space for new vertices in all the vertex arrays
+			getVertexData().createVertices( maxVertices, softBodyIndex );
+
+			int firstTriangle = getTriangleData().getNumTriangles();
+			int numTriangles = softBody->m_faces.size();
+			int maxTriangles = numTriangles;
+			getTriangleData().createTriangles( maxTriangles );
+
+			// Copy vertices from softbody into the solver
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
+				btSoftBodyVertexData::VertexDescription desc;
+
+				// TODO: Position in the softbody might be pre-transformed
+				// or we may need to adapt for the pose.
+				//desc.setPosition( cloth.getMeshTransform()*multPoint );
+				desc.setPosition( multPoint );
+
+				float vertexInverseMass = softBody->m_nodes[vertex].m_im;
+				desc.setInverseMass(vertexInverseMass);
+				getVertexData().setVertexAt( desc, firstVertex + vertex );
+			}
+
+			// Copy triangles similarly
+			// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
+			for( int triangle = 0; triangle < numTriangles; ++triangle )
+			{
+				// Note that large array storage is relative to the array not to the cloth
+				// So we need to add firstVertex to each value
+				int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
+				int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
+				int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
+				btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
+				getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
+				
+				// Increase vertex triangle counts for this triangle		
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
+			}
+
+			int firstLink = getLinkData().getNumLinks();
+			int numLinks = softBody->m_links.size();
+			int maxLinks = numLinks;
+			
+			// Allocate space for the links
+			getLinkData().createLinks( numLinks );
+
+			// Add the links
+			for( int link = 0; link < numLinks; ++link )
+			{
+				int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
+				int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
+
+				btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
+				newLink.setLinkStrength(1.f);
+				getLinkData().setLinkAt(newLink, firstLink + link);
+			}
+			
+			newSoftBody->setFirstVertex( firstVertex );
+			newSoftBody->setFirstTriangle( firstTriangle );
+			newSoftBody->setNumVertices( numVertices );
+			newSoftBody->setMaxVertices( maxVertices );
+			newSoftBody->setNumTriangles( numTriangles );
+			newSoftBody->setMaxTriangles( maxTriangles );
+			newSoftBody->setFirstLink( firstLink );
+			newSoftBody->setNumLinks( numLinks );
+		}
+
+
+
+		updateConstants(0.f);
+
+
+		m_linkData.generateBatches();		
+		m_triangleData.generateBatches();
+	}
+}
+
+
+btSoftBodyLinkData &btDX11SoftBodySolver::getLinkData()
+{
+	// TODO: Consider setting link data to "changed" here
+	return m_linkData;
+}
+
+btSoftBodyVertexData &btDX11SoftBodySolver::getVertexData()
+{
+	// TODO: Consider setting vertex data to "changed" here
+	return m_vertexData;
+}
+
+btSoftBodyTriangleData &btDX11SoftBodySolver::getTriangleData()
+{
+	// TODO: Consider setting triangle data to "changed" here
+	return m_triangleData;
+}
+
+bool btDX11SoftBodySolver::checkInitialized()
+{
+	if( !m_shadersInitialized )
+		if( buildShaders() )
+			m_shadersInitialized = true;
+
+	return m_shadersInitialized;
+}
+
+void btDX11SoftBodySolver::resetNormalsAndAreas( int numVertices )
+{
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	UpdateSoftBodiesCB constBuffer;
+	
+	constBuffer.numNodes = numVertices;
+	constBuffer.epsilon = FLT_EPSILON;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) );	
+	m_dx11Context->Unmap( integrateKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( resetNormalsAndAreasKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::resetNormalsAndAreas
+
+void btDX11SoftBodySolver::normalizeNormalsAndAreas( int numVertices )
+{
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	UpdateSoftBodiesCB constBuffer;
+	
+	constBuffer.numNodes = numVertices;
+	constBuffer.epsilon = FLT_EPSILON;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) );	
+	m_dx11Context->Unmap( integrateKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer );
+
+	// Set resources and dispatch	
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexTriangleCount.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( normalizeNormalsAndAreasKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::normalizeNormalsAndAreas
+
+void btDX11SoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles )
+{
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	UpdateSoftBodiesCB constBuffer;
+	
+	constBuffer.startFace = firstTriangle;
+	constBuffer.numFaces = numTriangles;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( updateSoftBodiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) );	
+	m_dx11Context->Unmap( updateSoftBodiesKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &updateSoftBodiesKernel.constBuffer );
+
+	// Set resources and dispatch	
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_triangleData.m_dx11VertexIndices.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &(m_triangleData.m_dx11Normal.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &(m_triangleData.m_dx11Area.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( updateSoftBodiesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (numTriangles + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::executeUpdateSoftBodies
+
+void btDX11SoftBodySolver::updateSoftBodies()
+{
+	using namespace Vectormath::Aos;
+
+
+	int numVertices = m_vertexData.getNumVertices();
+	int numTriangles = m_triangleData.getNumTriangles();
+
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+	m_triangleData.moveToAccelerator();
+
+	resetNormalsAndAreas( numVertices );
+
+
+	// Go through triangle batches so updates occur correctly
+	for( int batchIndex = 0; batchIndex < m_triangleData.m_batchStartLengths.size(); ++batchIndex )
+	{
+
+		int startTriangle = m_triangleData.m_batchStartLengths[batchIndex].start;
+		int numTriangles = m_triangleData.m_batchStartLengths[batchIndex].length;
+
+		executeUpdateSoftBodies( startTriangle, numTriangles );
+	}
+
+
+	normalizeNormalsAndAreas( numVertices );
+	
+
+} // btDX11SoftBodySolver::updateSoftBodies
+
+
+Vectormath::Aos::Vector3 btDX11SoftBodySolver::ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a )
+{
+	return a*Vectormath::Aos::dot(v, a);
+}
+
+void btDX11SoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce )
+{
+	float dtInverseMass = solverdt*inverseMass;
+	if( Vectormath::Aos::lengthSqr(force * dtInverseMass) > Vectormath::Aos::lengthSqr(vertexVelocity) )
+	{
+		vertexForce -= ProjectOnAxis( vertexVelocity, normalize( force ) )/dtInverseMass;
+	} else {
+		vertexForce += force;
+	}
+}
+
+void btDX11SoftBodySolver::applyForces( float solverdt )
+{		
+	using namespace Vectormath::Aos;
+
+
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+	m_dx11PerClothAcceleration.moveToGPU();
+	m_dx11PerClothLiftFactor.moveToGPU();
+	m_dx11PerClothDragFactor.moveToGPU();
+	m_dx11PerClothMediumDensity.moveToGPU();
+	m_dx11PerClothWindVelocity.moveToGPU();
+
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	ApplyForcesCB constBuffer;
+	
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.solverdt = solverdt;
+	constBuffer.epsilon = FLT_EPSILON;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(ApplyForcesCB) );	
+	m_dx11Context->Unmap( integrateKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer );
+
+	// Set resources and dispatch	
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexNormal.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexArea.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 4, 1, &(m_dx11PerClothLiftFactor.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 5, 1, &(m_dx11PerClothDragFactor.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 6, 1, &(m_dx11PerClothWindVelocity.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 7, 1, &(m_dx11PerClothAcceleration.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 8, 1, &(m_dx11PerClothMediumDensity.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( applyForcesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 6, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 7, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 8, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::applyForces
+
+/**
+ * Integrate motion on the solver.
+ */
+void btDX11SoftBodySolver::integrate( float solverdt )
+{
+	// TEMPORARY COPIES
+	m_vertexData.moveToAccelerator();
+
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	IntegrateCB constBuffer;
+	
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.solverdt = solverdt;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(IntegrateCB) );	
+	m_dx11Context->Unmap( integrateKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( integrateKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::integrate
+
+float btDX11SoftBodySolver::computeTriangleArea( 
+	const Vectormath::Aos::Point3 &vertex0,
+	const Vectormath::Aos::Point3 &vertex1,
+	const Vectormath::Aos::Point3 &vertex2 )
+{
+	Vectormath::Aos::Vector3 a = vertex1 - vertex0;
+	Vectormath::Aos::Vector3 b = vertex2 - vertex0;
+	Vectormath::Aos::Vector3 crossProduct = cross(a, b);
+	float area = length( crossProduct );
+	return area;
+} // btDX11SoftBodySolver::computeTriangleArea
+
+
+void btDX11SoftBodySolver::updateBounds()
+{	
+	using Vectormath::Aos::Point3;
+	// Interpretation structure for float and int
+	
+	struct FPRep {
+		unsigned int mantissa  : 23;
+		unsigned int exponent : 8;
+		unsigned int sign    : 1;
+	};
+	union FloatAsInt
+	{
+		float floatValue;
+		int intValue;
+		unsigned int uintValue;
+		FPRep fpRep;
+	};
+
+	
+	// Update bounds array to min and max int values to allow easy atomics
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		m_perClothMinBounds[softBodyIndex] = UIntVector3( UINT_MAX, UINT_MAX, UINT_MAX );
+		m_perClothMaxBounds[softBodyIndex] = UIntVector3( 0, 0, 0 );
+	}
+	
+	m_dx11PerClothMinBounds.moveToGPU();
+	m_dx11PerClothMaxBounds.moveToGPU();
+
+
+	computeBounds( );
+
+
+	m_dx11PerClothMinBounds.moveFromGPU();
+	m_dx11PerClothMaxBounds.moveFromGPU();
+
+
+	
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		UIntVector3 minBoundUInt = m_perClothMinBounds[softBodyIndex];
+		UIntVector3 maxBoundUInt = m_perClothMaxBounds[softBodyIndex];
+				
+		// Convert back to float
+		FloatAsInt fai;
+
+		btVector3 minBound;
+		fai.uintValue = minBoundUInt.x;
+	    fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		minBound.setX( fai.floatValue );
+		fai.uintValue = minBoundUInt.y;
+		fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		minBound.setY( fai.floatValue );
+		fai.uintValue = minBoundUInt.z;
+		fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		minBound.setZ( fai.floatValue );
+
+		btVector3 maxBound;
+		fai.uintValue = maxBoundUInt.x;
+		fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		maxBound.setX( fai.floatValue );
+		fai.uintValue = maxBoundUInt.y;
+		fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		maxBound.setY( fai.floatValue );
+		fai.uintValue = maxBoundUInt.z;
+		fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		maxBound.setZ( fai.floatValue );
+		
+		// And finally assign to the soft body
+		m_softBodySet[softBodyIndex]->updateBounds( minBound, maxBound );
+	}
+}
+
+void btDX11SoftBodySolver::updateConstants( float timeStep )
+{
+	using namespace Vectormath::Aos;
+
+	if( m_updateSolverConstants )
+	{
+		m_updateSolverConstants = false;
+
+		// Will have to redo this if we change the structure (tear, maybe) or various other possible changes
+
+		// Initialise link constants
+		const int numLinks = m_linkData.getNumLinks();
+		for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+		{
+			btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
+			m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
+			float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
+			float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
+			float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
+			float massLSC = (invMass0 + invMass1)/linearStiffness;
+			m_linkData.getMassLSC(linkIndex) = massLSC;
+			float restLength = m_linkData.getRestLength(linkIndex);
+			float restLengthSquared = restLength*restLength;
+			m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
+		}
+	}
+} // btDX11SoftBodySolver::updateConstants
+
+/**
+ * Sort the collision object details array and generate indexing into it for the per-cloth collision object array.
+ */
+void btDX11SoftBodySolver::prepareCollisionConstraints()
+{
+	// First do a simple sort on the collision objects
+	btAlignedObjectArray<int> numObjectsPerClothPrefixSum;
+	btAlignedObjectArray<int> numObjectsPerCloth;
+	numObjectsPerCloth.resize( m_softBodySet.size(), 0 );
+	numObjectsPerClothPrefixSum.resize( m_softBodySet.size(), 0 );
+
+
+	class QuickSortCompare
+	{
+		public:
+
+		bool operator() ( const CollisionShapeDescription& a, const CollisionShapeDescription& b ) const
+		{
+			return ( a.softBodyIdentifier < b.softBodyIdentifier );
+		}
+	};
+
+	QuickSortCompare comparator;
+	m_collisionObjectDetails.quickSort( comparator );
+
+	// Generating indexing for perClothCollisionObjects
+	// First clear the previous values with the "no collision object for cloth" constant
+	for( int clothIndex = 0; clothIndex < m_perClothCollisionObjects.size(); ++clothIndex )
+	{
+		m_perClothCollisionObjects[clothIndex].firstObject = -1;
+		m_perClothCollisionObjects[clothIndex].endObject = -1;
+	}
+	int currentCloth = 0;
+	int startIndex = 0;
+	for( int collisionObject = 0; collisionObject < m_collisionObjectDetails.size(); ++collisionObject )
+	{
+		int nextCloth = m_collisionObjectDetails[collisionObject].softBodyIdentifier;
+		if( nextCloth != currentCloth )
+		{	
+			// Changed cloth in the array
+			// Set the end index and the range is what we need for currentCloth
+			m_perClothCollisionObjects[currentCloth].firstObject = startIndex;
+			m_perClothCollisionObjects[currentCloth].endObject = collisionObject;
+			currentCloth = nextCloth;
+			startIndex = collisionObject;
+		}
+	}
+
+	// And update last cloth	
+	m_perClothCollisionObjects[currentCloth].firstObject = startIndex;
+	m_perClothCollisionObjects[currentCloth].endObject =  m_collisionObjectDetails.size();
+	
+} // btDX11SoftBodySolver::prepareCollisionConstraints
+
+
+void btDX11SoftBodySolver::solveConstraints( float solverdt )
+{
+
+	//std::cerr << "'GPU' solve constraints\n";
+	using Vectormath::Aos::Vector3;
+	using Vectormath::Aos::Point3;
+	using Vectormath::Aos::lengthSqr;
+	using Vectormath::Aos::dot;
+
+	// Prepare links
+	int numLinks = m_linkData.getNumLinks();
+	int numVertices = m_vertexData.getNumVertices();
+
+	float kst = 1.f;
+	float ti = 0.f;
+
+
+	m_dx11PerClothDampingFactor.moveToGPU();
+	m_dx11PerClothVelocityCorrectionCoefficient.moveToGPU();
+
+
+	// Ensure data is on accelerator
+	m_linkData.moveToAccelerator();
+	m_vertexData.moveToAccelerator();
+
+
+	prepareLinks();	
+
+	for( int iteration = 0; iteration < m_numberOfVelocityIterations ; ++iteration )
+	{
+		for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
+		{
+			int startLink = m_linkData.m_batchStartLengths[i].start;
+			int numLinks = m_linkData.m_batchStartLengths[i].length;
+
+			solveLinksForVelocity( startLink, numLinks, kst );
+		}
+	}
+
+	
+	prepareCollisionConstraints();
+
+	// Compute new positions from velocity
+	// Also update the previous position so that our position computation is now based on the new position from the velocity solution
+	// rather than based directly on the original positions
+	if( m_numberOfVelocityIterations > 0 )
+	{
+		updateVelocitiesFromPositionsWithVelocities( 1.f/solverdt );
+	} else {
+		updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt );
+	}
+
+
+	// Solve drift
+	for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+	{
+		for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
+		{
+			int startLink = m_linkData.m_batchStartLengths[i].start;
+			int numLinks = m_linkData.m_batchStartLengths[i].length;
+
+			solveLinksForPosition( startLink, numLinks, kst, ti );
+		}
+		
+	} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+
+	// At this point assume that the force array is blank - we will overwrite it
+	solveCollisionsAndUpdateVelocities( 1.f/solverdt );
+} // btDX11SoftBodySolver::solveConstraints
+
+
+
+
+//////////////////////////////////////
+// Kernel dispatches
+void btDX11SoftBodySolver::prepareLinks()
+{
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	PrepareLinksCB constBuffer;
+	
+	constBuffer.numLinks = m_linkData.getNumLinks();
+	
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( prepareLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(PrepareLinksCB) );	
+	m_dx11Context->Unmap( prepareLinksKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &prepareLinksKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11Links.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11LinksMassLSC.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_linkData.m_dx11LinksLengthRatio.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_linkData.m_dx11LinksCLength.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( prepareLinksKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numLinks + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+		
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}
+} // btDX11SoftBodySolver::prepareLinks
+
+
+void btDX11SoftBodySolver::updatePositionsFromVelocities( float solverdt )
+{
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	UpdatePositionsFromVelocitiesCB constBuffer;
+	
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.solverSDT = solverdt;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( updatePositionsFromVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdatePositionsFromVelocitiesCB) );	
+	m_dx11Context->Unmap( updatePositionsFromVelocitiesKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &updatePositionsFromVelocitiesKernel.constBuffer );
+
+	// Set resources and dispatch			
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( updatePositionsFromVelocitiesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::updatePositionsFromVelocities
+
+void btDX11SoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti )
+{
+	// Copy kernel parameters to GPU
+	SolvePositionsFromLinksKernelCB constBuffer;
+
+	// Set the first link of the batch
+	// and the batch size
+	constBuffer.startLink = startLink;
+	constBuffer.numLinks = numLinks;
+
+	constBuffer.kst = kst;
+	constBuffer.ti = ti;
+	
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( solvePositionsFromLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(SolvePositionsFromLinksKernelCB) );	
+	m_dx11Context->Unmap( solvePositionsFromLinksKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &solvePositionsFromLinksKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11Links.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11LinksMassLSC.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_linkData.m_dx11LinksRestLengthSquared.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( solvePositionsFromLinksKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numLinks + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+	
+} // btDX11SoftBodySolver::solveLinksForPosition
+
+void btDX11SoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, float kst )
+{
+	// Copy kernel parameters to GPU
+	VSolveLinksCB constBuffer;
+
+	// Set the first link of the batch
+	// and the batch size
+
+	constBuffer.startLink = startLink;
+	constBuffer.numLinks = numLinks;
+	constBuffer.kst = kst;
+	
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( vSolveLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(VSolveLinksCB) );	
+	m_dx11Context->Unmap( vSolveLinksKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &vSolveLinksKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11Links.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11LinksLengthRatio.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_linkData.m_dx11LinksCLength.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( vSolveLinksKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numLinks + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::solveLinksForVelocity
+
+
+void btDX11SoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt )
+{
+	// Copy kernel parameters to GPU
+	UpdateVelocitiesFromPositionsWithVelocitiesCB constBuffer;
+
+	// Set the first link of the batch
+	// and the batch size
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.isolverdt = isolverdt;
+
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) );	
+	m_dx11Context->Unmap( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_dx11PerClothVelocityCorrectionCoefficient.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 4, 1, &(m_dx11PerClothDampingFactor.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL );
+
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( updateVelocitiesFromPositionsWithVelocitiesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+
+} // btDX11SoftBodySolver::updateVelocitiesFromPositionsWithVelocities
+
+void btDX11SoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt )
+{
+	// Copy kernel parameters to GPU
+	UpdateVelocitiesFromPositionsWithoutVelocitiesCB constBuffer;
+
+	// Set the first link of the batch
+	// and the batch size
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.isolverdt = isolverdt;
+
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB) );	
+	m_dx11Context->Unmap( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_dx11PerClothDampingFactor.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL );
+
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+
+} // btDX11SoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities
+
+
+void btDX11SoftBodySolver::computeBounds( )
+{
+	ComputeBoundsCB constBuffer;
+	m_vertexData.moveToAccelerator();
+
+	// Set the first link of the batch
+	// and the batch size
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.numSoftBodies = m_softBodySet.size();
+
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( computeBoundsKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(ComputeBoundsCB) );	
+	m_dx11Context->Unmap( computeBoundsKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &computeBoundsKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_dx11PerClothMinBounds.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_dx11PerClothMaxBounds.getUAV()), NULL );
+	
+	// Execute the kernel
+	m_dx11Context->CSSetShader( computeBoundsKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+}
+
+void btDX11SoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt )
+{
+
+	// Copy kernel parameters to GPU
+	m_vertexData.moveToAccelerator();
+	m_dx11PerClothFriction.moveToGPU();
+	m_dx11PerClothDampingFactor.moveToGPU();
+	m_dx11PerClothCollisionObjects.moveToGPU();
+	m_dx11CollisionObjectDetails.moveToGPU();
+
+	SolveCollisionsAndUpdateVelocitiesCB constBuffer;
+
+	// Set the first link of the batch
+	// and the batch size
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.isolverdt = isolverdt;
+
+
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( solveCollisionsAndUpdateVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(SolveCollisionsAndUpdateVelocitiesCB) );	
+	m_dx11Context->Unmap( solveCollisionsAndUpdateVelocitiesKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &solveCollisionsAndUpdateVelocitiesKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) );	
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_dx11PerClothFriction.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_dx11PerClothDampingFactor.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 4, 1, &(m_dx11PerClothCollisionObjects.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 5, 1, &(m_dx11CollisionObjectDetails.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
+	
+	// Execute the kernel
+	m_dx11Context->CSSetShader( solveCollisionsAndUpdateVelocitiesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+
+} // btDX11SoftBodySolver::solveCollisionsAndUpdateVelocities
+
+// End kernel dispatches
+/////////////////////////////////////
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+btDX11SoftBodySolver::btAcceleratedSoftBodyInterface *btDX11SoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody )
+{
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex];
+		if( softBodyInterface->getSoftBody() == softBody )
+			return softBodyInterface;
+	}
+	return 0;
+}
+
+const btDX11SoftBodySolver::btAcceleratedSoftBodyInterface * const btDX11SoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) const
+{
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex];
+		if( softBodyInterface->getSoftBody() == softBody )
+			return softBodyInterface;
+	}
+	return 0;
+}
+
+int btDX11SoftBodySolver::findSoftBodyIndex( const btSoftBody* const softBody )
+{
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex];
+		if( softBodyInterface->getSoftBody() == softBody )
+			return softBodyIndex;
+	}
+	return 1;
+}
+
+
+void btSoftBodySolverOutputDXtoCPU::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
+{
+	
+
+	btSoftBodySolver *solver = softBody->getSoftBodySolver();
+	btAssert( solver->getSolverType() == btSoftBodySolver::DX_SOLVER || solver->getSolverType() == btSoftBodySolver::DX_SIMD_SOLVER );
+	btDX11SoftBodySolver *dxSolver = static_cast< btDX11SoftBodySolver * >( solver );
+
+	btDX11SoftBodySolver::btAcceleratedSoftBodyInterface * currentCloth = dxSolver->findSoftBodyInterface( softBody );
+	btSoftBodyVertexDataDX11 &vertexData( dxSolver->m_vertexData );
+	
+
+	const int firstVertex = currentCloth->getFirstVertex();
+	const int lastVertex = firstVertex + currentCloth->getNumVertices();
+
+	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER )
+	{		
+		// If we're doing a CPU-buffer copy must copy the data back to the host first
+		vertexData.m_dx11VertexPosition.copyFromGPU();
+		vertexData.m_dx11VertexNormal.copyFromGPU();
+
+		const int firstVertex = currentCloth->getFirstVertex();
+		const int lastVertex = firstVertex + currentCloth->getNumVertices();
+		const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer);						
+		float *basePointer = cpuVertexBuffer->getBasePointer();						
+
+		if( vertexBuffer->hasVertexPositions() )
+		{
+			const int vertexOffset = cpuVertexBuffer->getVertexOffset();
+			const int vertexStride = cpuVertexBuffer->getVertexStride();
+			float *vertexPointer = basePointer + vertexOffset;
+
+			for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex )
+			{
+				Vectormath::Aos::Point3 position = vertexData.getPosition(vertexIndex);
+				*(vertexPointer + 0) = position.getX();
+				*(vertexPointer + 1) = position.getY();
+				*(vertexPointer + 2) = position.getZ();
+				vertexPointer += vertexStride;
+			}
+		}
+		if( vertexBuffer->hasNormals() )
+		{
+			const int normalOffset = cpuVertexBuffer->getNormalOffset();
+			const int normalStride = cpuVertexBuffer->getNormalStride();
+			float *normalPointer = basePointer + normalOffset;
+
+			for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex )
+			{
+				Vectormath::Aos::Vector3 normal = vertexData.getNormal(vertexIndex);
+				*(normalPointer + 0) = normal.getX();
+				*(normalPointer + 1) = normal.getY();
+				*(normalPointer + 2) = normal.getZ();
+				normalPointer += normalStride;
+			}
+		}
+	} 
+} // btDX11SoftBodySolver::outputToVertexBuffers
+
+
+
+bool btSoftBodySolverOutputDXtoDX::checkInitialized()
+{
+	if( !m_shadersInitialized )
+		if( buildShaders() )
+			m_shadersInitialized = true;
+
+	return m_shadersInitialized;
+}
+
+void btSoftBodySolverOutputDXtoDX::releaseKernels()
+{
+	SAFE_RELEASE( outputToVertexArrayWithNormalsKernel.constBuffer );
+	SAFE_RELEASE( outputToVertexArrayWithNormalsKernel.kernel );
+	SAFE_RELEASE( outputToVertexArrayWithoutNormalsKernel.constBuffer );
+	SAFE_RELEASE( outputToVertexArrayWithoutNormalsKernel.kernel );
+
+	m_shadersInitialized = false;
+}
+
+
+bool btSoftBodySolverOutputDXtoDX::buildShaders()
+{
+	// Ensure current kernels are released first
+	releaseKernels();
+
+	bool returnVal = true;
+
+	if( m_shadersInitialized )
+		return true;
+	
+
+	outputToVertexArrayWithNormalsKernel = dxFunctions.compileComputeShaderFromString( OutputToVertexArrayHLSLString, "OutputToVertexArrayWithNormalsKernel", sizeof(OutputToVertexArrayCB) );
+	if( !outputToVertexArrayWithNormalsKernel.constBuffer)
+		returnVal = false;
+	outputToVertexArrayWithoutNormalsKernel = dxFunctions.compileComputeShaderFromString( OutputToVertexArrayHLSLString, "OutputToVertexArrayWithoutNormalsKernel", sizeof(OutputToVertexArrayCB) );
+	if( !outputToVertexArrayWithoutNormalsKernel.constBuffer )
+		returnVal = false;
+
+
+	if( returnVal )
+		m_shadersInitialized = true;
+
+	return returnVal;
+}
+
+
+void btSoftBodySolverOutputDXtoDX::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
+{
+	
+
+	btSoftBodySolver *solver = softBody->getSoftBodySolver();
+	btAssert( solver->getSolverType() == btSoftBodySolver::DX_SOLVER || solver->getSolverType() == btSoftBodySolver::DX_SIMD_SOLVER );
+	btDX11SoftBodySolver *dxSolver = static_cast< btDX11SoftBodySolver * >( solver );
+	checkInitialized();
+	btDX11SoftBodySolver::btAcceleratedSoftBodyInterface * currentCloth = dxSolver->findSoftBodyInterface( softBody );
+	btSoftBodyVertexDataDX11 &vertexData( dxSolver->m_vertexData );
+
+
+	const int firstVertex = currentCloth->getFirstVertex();
+	const int lastVertex = firstVertex + currentCloth->getNumVertices();
+
+	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER )
+	{		
+		btSoftBodySolverOutputDXtoDX::copySoftBodyToVertexBuffer( softBody, vertexBuffer );
+	} else 	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::DX11_BUFFER )
+	{
+		// Do a DX11 copy shader DX to DX copy
+
+		const btDX11VertexBufferDescriptor *dx11VertexBuffer = static_cast< btDX11VertexBufferDescriptor* >(vertexBuffer);	
+
+		// No need to batch link solver, it is entirely parallel
+		// Copy kernel parameters to GPU
+		OutputToVertexArrayCB constBuffer;
+		ID3D11ComputeShader* outputToVertexArrayShader = outputToVertexArrayWithoutNormalsKernel.kernel;
+		ID3D11Buffer* outputToVertexArrayConstBuffer = outputToVertexArrayWithoutNormalsKernel.constBuffer;
+		
+		constBuffer.startNode = firstVertex;
+		constBuffer.numNodes = currentCloth->getNumVertices();
+		constBuffer.positionOffset = vertexBuffer->getVertexOffset();
+		constBuffer.positionStride = vertexBuffer->getVertexStride();
+		if( vertexBuffer->hasNormals() )
+		{
+			constBuffer.normalOffset = vertexBuffer->getNormalOffset();
+			constBuffer.normalStride = vertexBuffer->getNormalStride();
+			outputToVertexArrayShader = outputToVertexArrayWithNormalsKernel.kernel;
+			outputToVertexArrayConstBuffer = outputToVertexArrayWithNormalsKernel.constBuffer;
+		}	
+		
+		// TODO: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+		D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+		dxFunctions.m_dx11Context->Map( outputToVertexArrayConstBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+		memcpy( MappedResource.pData, &constBuffer, sizeof(OutputToVertexArrayCB) );	
+		dxFunctions.m_dx11Context->Unmap( outputToVertexArrayConstBuffer, 0 );
+		dxFunctions.m_dx11Context->CSSetConstantBuffers( 0, 1, &outputToVertexArrayConstBuffer );
+
+		// Set resources and dispatch
+		dxFunctions.m_dx11Context->CSSetShaderResources( 0, 1, &(vertexData.m_dx11VertexPosition.getSRV()) );
+		dxFunctions.m_dx11Context->CSSetShaderResources( 1, 1, &(vertexData.m_dx11VertexNormal.getSRV()) );
+
+		ID3D11UnorderedAccessView* dx11UAV = dx11VertexBuffer->getDX11UAV();
+		dxFunctions.m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(dx11UAV), NULL );
+
+		// Execute the kernel
+		dxFunctions.m_dx11Context->CSSetShader( outputToVertexArrayShader, NULL, 0 );
+
+		int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+		dxFunctions.m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+		{
+			// Tidy up 
+			ID3D11ShaderResourceView* pViewNULL = NULL;
+			dxFunctions.m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+			dxFunctions.m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+
+			ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+			dxFunctions.m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+
+			ID3D11Buffer *pBufferNull = NULL;
+			dxFunctions.m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+		}	
+	}
+} // btDX11SoftBodySolver::outputToVertexBuffers
+
+
+
+
+DXFunctions::KernelDesc DXFunctions::compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros )
+{
+	const char *cs5String = "cs_5_0";
+
+	HRESULT hr = S_OK;
+	ID3DBlob* pErrorBlob = NULL;
+	ID3DBlob* pBlob = NULL;
+	ID3D11ComputeShader*		kernelPointer = 0;
+
+	hr = m_dx11CompileFromMemory( 
+		shaderString,
+		strlen(shaderString),
+		shaderName,
+		compileMacros,
+		NULL,
+		shaderName,
+		cs5String,
+		D3D10_SHADER_ENABLE_STRICTNESS,
+		NULL,
+		NULL,
+		&pBlob,
+		&pErrorBlob,
+		NULL
+		);
+
+	if( FAILED(hr) )
+	{
+		if( pErrorBlob ) {
+			btAssert( "Compilation of compute shader failed\n" );
+			char *debugString = (char*)pErrorBlob->GetBufferPointer();
+			OutputDebugStringA( debugString );
+		}
+	
+		SAFE_RELEASE( pErrorBlob );
+		SAFE_RELEASE( pBlob );    
+
+		DXFunctions::KernelDesc descriptor;
+		descriptor.kernel = 0;
+		descriptor.constBuffer = 0;
+		return descriptor;
+	}    
+
+	// Create the Compute Shader
+	hr = m_dx11Device->CreateComputeShader( pBlob->GetBufferPointer(), pBlob->GetBufferSize(), NULL, &kernelPointer );
+	if( FAILED( hr ) )
+	{
+		DXFunctions::KernelDesc descriptor;
+		descriptor.kernel = 0;
+		descriptor.constBuffer = 0;
+		return descriptor;
+	}
+
+	ID3D11Buffer* constBuffer = 0;
+	if( constBufferSize > 0 )
+	{
+		// Create the constant buffer
+		D3D11_BUFFER_DESC constant_buffer_desc;
+		ZeroMemory(&constant_buffer_desc, sizeof(constant_buffer_desc));
+		constant_buffer_desc.ByteWidth = constBufferSize;
+		constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
+		constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
+		constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
+		m_dx11Device->CreateBuffer(&constant_buffer_desc, NULL, &constBuffer);
+		if( FAILED( hr ) )
+		{
+			KernelDesc descriptor;
+			descriptor.kernel = 0;
+			descriptor.constBuffer = 0;
+			return descriptor;
+		}
+	}
+
+	SAFE_RELEASE( pErrorBlob );
+	SAFE_RELEASE( pBlob );
+
+	DXFunctions::KernelDesc descriptor;
+	descriptor.kernel = kernelPointer;
+	descriptor.constBuffer = constBuffer;
+	return descriptor;
+} // compileComputeShader
+
+
+
+bool btDX11SoftBodySolver::buildShaders()
+{
+	// Ensure current kernels are released first
+	releaseKernels();
+
+	bool returnVal = true;
+
+	if( m_shadersInitialized )
+		return true;
+
+	prepareLinksKernel = dxFunctions.compileComputeShaderFromString( PrepareLinksHLSLString, "PrepareLinksKernel", sizeof(PrepareLinksCB) );
+	if( !prepareLinksKernel.constBuffer )
+		returnVal = false;
+	updatePositionsFromVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsFromVelocitiesHLSLString, "UpdatePositionsFromVelocitiesKernel", sizeof(UpdatePositionsFromVelocitiesCB) );
+	if( !updatePositionsFromVelocitiesKernel.constBuffer )
+		returnVal = false;
+	solvePositionsFromLinksKernel = dxFunctions.compileComputeShaderFromString( SolvePositionsHLSLString, "SolvePositionsFromLinksKernel", sizeof(SolvePositionsFromLinksKernelCB) );
+	if( !updatePositionsFromVelocitiesKernel.constBuffer )
+		returnVal = false;
+	vSolveLinksKernel = dxFunctions.compileComputeShaderFromString( VSolveLinksHLSLString, "VSolveLinksKernel", sizeof(VSolveLinksCB) );
+	if( !vSolveLinksKernel.constBuffer )
+		returnVal = false;
+	updateVelocitiesFromPositionsWithVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNodesHLSLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) );
+	if( !updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer )
+		returnVal = false;
+	updateVelocitiesFromPositionsWithoutVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsHLSLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB) );
+	if( !updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer )
+		returnVal = false;
+	integrateKernel = dxFunctions.compileComputeShaderFromString( IntegrateHLSLString, "IntegrateKernel", sizeof(IntegrateCB) );
+	if( !integrateKernel.constBuffer )
+		returnVal = false;
+	applyForcesKernel = dxFunctions.compileComputeShaderFromString( ApplyForcesHLSLString, "ApplyForcesKernel", sizeof(ApplyForcesCB) );
+	if( !applyForcesKernel.constBuffer )
+		returnVal = false;
+	solveCollisionsAndUpdateVelocitiesKernel = dxFunctions.compileComputeShaderFromString( SolveCollisionsAndUpdateVelocitiesHLSLString, "SolveCollisionsAndUpdateVelocitiesKernel", sizeof(SolveCollisionsAndUpdateVelocitiesCB) );
+	if( !solveCollisionsAndUpdateVelocitiesKernel.constBuffer )
+		returnVal = false;
+
+	// TODO: Rename to UpdateSoftBodies
+	resetNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "ResetNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !resetNormalsAndAreasKernel.constBuffer )
+		returnVal = false;
+	normalizeNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "NormalizeNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !normalizeNormalsAndAreasKernel.constBuffer )
+		returnVal = false;
+	updateSoftBodiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "UpdateSoftBodiesKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !updateSoftBodiesKernel.constBuffer )
+		returnVal = false;
+
+	computeBoundsKernel = dxFunctions.compileComputeShaderFromString( ComputeBoundsHLSLString, "ComputeBoundsKernel", sizeof(ComputeBoundsCB) );
+	if( !computeBoundsKernel.constBuffer )
+		returnVal = false;
+
+
+
+	if( returnVal )
+		m_shadersInitialized = true;
+
+	return returnVal;
+}
+
+
+static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
+{
+	Vectormath::Aos::Transform3 outTransform;
+	outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
+	outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
+	outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
+	outTransform.setCol(3, toVector3(transform.getOrigin()));
+	return outTransform;	
+}
+
+
+void btDX11SoftBodySolver::btAcceleratedSoftBodyInterface::updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound )
+{
+	float scalarMargin = this->getSoftBody()->getCollisionShape()->getMargin();
+	btVector3 vectorMargin( scalarMargin, scalarMargin, scalarMargin );
+	m_softBody->m_bounds[0] = lowerBound - vectorMargin;
+	m_softBody->m_bounds[1] = upperBound + vectorMargin;
+}
+
+void btDX11SoftBodySolver::processCollision( btSoftBody*, btSoftBody* )
+{
+
+}
+
+// Add the collision object to the set to deal with for a particular soft body
+void btDX11SoftBodySolver::processCollision( btSoftBody *softBody, btCollisionObject* collisionObject )
+{
+	int softBodyIndex = findSoftBodyIndex( softBody );
+
+	if( softBodyIndex >= 0 )
+	{
+		btCollisionShape *collisionShape = collisionObject->getCollisionShape();
+		float friction = collisionObject->getFriction();
+		int shapeType = collisionShape->getShapeType();
+		if( shapeType == CAPSULE_SHAPE_PROXYTYPE )
+		{
+			// Add to the list of expected collision objects
+			CollisionShapeDescription newCollisionShapeDescription;
+			newCollisionShapeDescription.softBodyIdentifier = softBodyIndex;
+			newCollisionShapeDescription.collisionShapeType = shapeType;
+			// TODO: May need to transpose this matrix either here or in HLSL
+			newCollisionShapeDescription.shapeTransform = toTransform3(collisionObject->getWorldTransform());
+			btCapsuleShape *capsule = static_cast<btCapsuleShape*>( collisionShape );
+			newCollisionShapeDescription.radius = capsule->getRadius();
+			newCollisionShapeDescription.halfHeight = capsule->getHalfHeight();
+			newCollisionShapeDescription.margin = capsule->getMargin();
+			newCollisionShapeDescription.friction = friction;
+			btRigidBody* body = static_cast< btRigidBody* >( collisionObject );
+			newCollisionShapeDescription.linearVelocity = toVector3(body->getLinearVelocity());
+			newCollisionShapeDescription.angularVelocity = toVector3(body->getAngularVelocity());
+			m_collisionObjectDetails.push_back( newCollisionShapeDescription );
+
+		} else {
+#ifdef _DEBUG
+			printf("Unsupported collision shape type\n");
+#endif
+		}
+	} else {
+		btAssert("Unknown soft body");
+	}
+} // btDX11SoftBodySolver::processCollision
+
+
+
+void btDX11SoftBodySolver::predictMotion( float timeStep )
+{
+	// Clear the collision shape array for the next frame
+	// Ensure that the DX11 ones are moved off the device so they will be updated correctly
+	m_dx11CollisionObjectDetails.changedOnCPU();
+	m_dx11PerClothCollisionObjects.changedOnCPU();
+	m_collisionObjectDetails.clear();
+
+	// Fill the force arrays with current acceleration data etc
+	m_perClothWindVelocity.resize( m_softBodySet.size() );
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
+		
+		m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
+	}
+	m_dx11PerClothWindVelocity.changedOnCPU();
+
+	// Apply forces that we know about to the cloths
+	applyForces(  timeStep * getTimeScale() );
+
+	// Itegrate motion for all soft bodies dealt with by the solver
+	integrate( timeStep * getTimeScale() );
+
+	// Update bounds
+	// Will update the bounds for all softBodies being dealt with by the solver and 
+	// set the values in the btSoftBody object
+	if (m_enableUpdateBounds)
+		updateBounds();
+
+	// End prediction work for solvers
+}
+
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h
new file mode 100644
index 00000000..939eabaf
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h
@@ -0,0 +1,691 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
+#define BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
+
+
+#include "vectormath/vmInclude.h"
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "btSoftBodySolverVertexBuffer_DX11.h"
+#include "btSoftBodySolverLinkData_DX11.h"
+#include "btSoftBodySolverVertexData_DX11.h"
+#include "btSoftBodySolverTriangleData_DX11.h"
+
+
+
+class DXFunctions
+{
+public:
+	
+	typedef HRESULT (WINAPI * CompileFromMemoryFunc)(LPCSTR,SIZE_T,LPCSTR,const D3D10_SHADER_MACRO*,LPD3D10INCLUDE,LPCSTR,LPCSTR,UINT,UINT,ID3DX11ThreadPump*,ID3D10Blob**,ID3D10Blob**,HRESULT*);
+
+	ID3D11Device *		 m_dx11Device;
+	ID3D11DeviceContext* m_dx11Context;
+	CompileFromMemoryFunc m_dx11CompileFromMemory;
+
+	DXFunctions(ID3D11Device *dx11Device, ID3D11DeviceContext* dx11Context, CompileFromMemoryFunc dx11CompileFromMemory) :
+		m_dx11Device( dx11Device ),
+		m_dx11Context( dx11Context ),
+		m_dx11CompileFromMemory( dx11CompileFromMemory )
+	{
+
+	}
+
+	class KernelDesc
+	{
+	protected:
+		
+
+	public:
+		ID3D11ComputeShader* kernel;
+		ID3D11Buffer* constBuffer;
+
+		KernelDesc()
+		{
+			kernel = 0;
+			constBuffer = 0;
+		}
+
+		virtual ~KernelDesc()
+		{
+			// TODO: this should probably destroy its kernel but we need to be careful
+			// in case KernelDescs are copied
+		}
+	}; 
+
+	/**
+	 * Compile a compute shader kernel from a string and return the appropriate KernelDesc object.
+	 */
+	KernelDesc compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros = 0 );
+
+};
+
+class btDX11SoftBodySolver : public btSoftBodySolver
+{
+protected:
+	/**
+	 * Entry in the collision shape array.
+	 * Specifies the shape type, the transform matrix and the necessary details of the collisionShape.
+	 */
+	struct CollisionShapeDescription
+	{
+		Vectormath::Aos::Transform3 shapeTransform;
+		Vectormath::Aos::Vector3 linearVelocity;
+		Vectormath::Aos::Vector3 angularVelocity;
+
+		int softBodyIdentifier;
+		int collisionShapeType;
+	
+		// Both needed for capsule
+		float radius;
+		float halfHeight;
+		
+		float margin;
+		float friction;
+
+		CollisionShapeDescription()
+		{
+			collisionShapeType = 0;
+			margin = 0;
+			friction = 0;
+		}
+	};
+
+	struct UIntVector3
+	{
+		UIntVector3()
+		{
+			x = 0;
+			y = 0;
+			z = 0;
+			_padding = 0;
+		}
+		
+		UIntVector3( unsigned int x_, unsigned int y_, unsigned int z_ )
+		{
+			x = x_;
+			y = y_;
+			z = z_;
+			_padding = 0;
+		}
+			
+		unsigned int x;
+		unsigned int y;
+		unsigned int z;
+		unsigned int _padding;
+	};
+
+
+
+public:
+	/**
+	 * SoftBody class to maintain information about a soft body instance
+	 * within a solver.
+	 * This data addresses the main solver arrays.
+	 */
+	class btAcceleratedSoftBodyInterface
+	{
+	protected:
+		/** Current number of vertices that are part of this cloth */
+		int m_numVertices;
+		/** Maximum number of vertices allocated to be part of this cloth */
+		int m_maxVertices;
+		/** Current number of triangles that are part of this cloth */
+		int m_numTriangles;
+		/** Maximum number of triangles allocated to be part of this cloth */
+		int m_maxTriangles;
+		/** Index of first vertex in the world allocated to this cloth */
+		int m_firstVertex;
+		/** Index of first triangle in the world allocated to this cloth */
+		int m_firstTriangle;
+		/** Index of first link in the world allocated to this cloth */
+		int m_firstLink;
+		/** Maximum number of links allocated to this cloth */
+		int m_maxLinks;
+		/** Current number of links allocated to this cloth */
+		int m_numLinks;
+
+		/** The actual soft body this data represents */
+		btSoftBody *m_softBody;
+
+
+	public:
+		btAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
+		  m_softBody( softBody )
+		{
+			m_numVertices = 0;
+			m_maxVertices = 0;
+			m_numTriangles = 0;
+			m_maxTriangles = 0;
+			m_firstVertex = 0;
+			m_firstTriangle = 0;
+			m_firstLink = 0;
+			m_maxLinks = 0;
+			m_numLinks = 0;
+		}
+		int getNumVertices() const
+		{
+			return m_numVertices;
+		}
+
+		int getNumTriangles() const
+		{
+			return m_numTriangles;
+		}
+
+		int getMaxVertices() const
+		{
+			return m_maxVertices;
+		}
+
+		int getMaxTriangles() const
+		{
+			return m_maxTriangles;
+		}
+
+		int getFirstVertex() const
+		{
+			return m_firstVertex;
+		}
+
+		int getFirstTriangle() const
+		{
+			return m_firstTriangle;
+		}
+
+
+		/**
+		 * Update the bounds in the btSoftBody object
+		 */
+		void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound );
+
+		
+		// TODO: All of these set functions will have to do checks and
+		// update the world because restructuring of the arrays will be necessary
+		// Reasonable use of "friend"?
+		void setNumVertices( int numVertices )
+		{
+			m_numVertices = numVertices;
+		}	
+	
+		void setNumTriangles( int numTriangles )
+		{
+			m_numTriangles = numTriangles;
+		}
+
+		void setMaxVertices( int maxVertices )
+		{
+			m_maxVertices = maxVertices;
+		}
+
+		void setMaxTriangles( int maxTriangles )
+		{
+			m_maxTriangles = maxTriangles;
+		}
+
+		void setFirstVertex( int firstVertex )
+		{
+			m_firstVertex = firstVertex;
+		}
+
+		void setFirstTriangle( int firstTriangle )
+		{
+			m_firstTriangle = firstTriangle;
+		}
+
+		void setMaxLinks( int maxLinks )
+		{
+			m_maxLinks = maxLinks;
+		}
+
+		void setNumLinks( int numLinks )
+		{
+			m_numLinks = numLinks;
+		}
+
+		void setFirstLink( int firstLink )
+		{
+			m_firstLink = firstLink;
+		}
+
+		int getMaxLinks()
+		{
+			return m_maxLinks;
+		}
+
+		int getNumLinks()
+		{
+			return m_numLinks;
+		}
+
+		int getFirstLink()
+		{
+			return m_firstLink;
+		}
+
+		btSoftBody* getSoftBody()
+		{
+			return m_softBody;
+		}
+
+	};
+
+	
+	struct CollisionObjectIndices
+	{
+		CollisionObjectIndices( int f, int e )
+		{
+			firstObject = f;
+			endObject = e;
+		}
+
+		int firstObject;
+		int endObject;
+	};
+
+
+
+
+
+	struct PrepareLinksCB
+	{		
+		int numLinks;
+		int padding0;
+		int padding1;
+		int padding2;
+	};
+
+	struct SolvePositionsFromLinksKernelCB
+	{		
+		int startLink;
+		int numLinks;
+		float kst;
+		float ti;
+	};
+
+	struct IntegrateCB
+	{
+		int numNodes;
+		float solverdt;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdatePositionsFromVelocitiesCB
+	{
+		int numNodes;
+		float solverSDT;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdateVelocitiesFromPositionsWithoutVelocitiesCB
+	{
+		int numNodes;
+		float isolverdt;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdateVelocitiesFromPositionsWithVelocitiesCB
+	{
+		int numNodes;
+		float isolverdt;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdateSoftBodiesCB
+	{
+		int numNodes;
+		int startFace;
+		int numFaces;
+		float epsilon;
+	};
+
+
+	struct ApplyForcesCB
+	{
+		unsigned int numNodes;
+		float solverdt;
+		float epsilon;
+		int padding3;
+	};
+
+	struct AddVelocityCB
+	{
+		int startNode;
+		int lastNode;
+		float velocityX;
+		float velocityY;
+		float velocityZ;
+		int padding1;
+		int padding2;
+		int padding3;
+	};
+
+	struct VSolveLinksCB
+	{
+		int startLink;
+		int numLinks;
+		float kst;
+		int padding;
+	};
+
+	struct ComputeBoundsCB
+	{
+		int numNodes;
+		int numSoftBodies;
+		int padding1;
+		int padding2;
+	};
+
+	struct SolveCollisionsAndUpdateVelocitiesCB
+	{
+		unsigned int numNodes;
+		float isolverdt;
+		int padding0;
+		int padding1;
+	};
+
+	
+
+
+protected:
+	ID3D11Device *		 m_dx11Device;
+	ID3D11DeviceContext* m_dx11Context;
+	
+	DXFunctions dxFunctions;
+public:
+	/** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */
+	btSoftBodyLinkDataDX11 m_linkData;
+	btSoftBodyVertexDataDX11 m_vertexData;
+	btSoftBodyTriangleDataDX11 m_triangleData;
+
+protected:
+
+	/** Variable to define whether we need to update solver constants on the next iteration */
+	bool m_updateSolverConstants;
+
+	bool m_shadersInitialized;
+
+	/** 
+	 * Cloths owned by this solver.
+	 * Only our cloths are in this array.
+	 */
+	btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet;
+
+	/** Acceleration value to be applied to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothAcceleration;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11PerClothAcceleration;
+
+	/** Wind velocity to be applied normal to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothWindVelocity;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11PerClothWindVelocity;
+
+	/** Velocity damping factor */
+	btAlignedObjectArray< float >						m_perClothDampingFactor;
+	btDX11Buffer<float>									m_dx11PerClothDampingFactor;
+
+	/** Velocity correction coefficient */
+	btAlignedObjectArray< float >						m_perClothVelocityCorrectionCoefficient;
+	btDX11Buffer<float>									m_dx11PerClothVelocityCorrectionCoefficient;
+
+	/** Lift parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothLiftFactor;
+	btDX11Buffer<float>									m_dx11PerClothLiftFactor;
+	
+	/** Drag parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothDragFactor;
+	btDX11Buffer<float>									m_dx11PerClothDragFactor;
+
+	/** Density of the medium in which each cloth sits */
+	btAlignedObjectArray< float >						m_perClothMediumDensity;
+	btDX11Buffer<float>									m_dx11PerClothMediumDensity;
+
+	
+	/** 
+	 * Collision shape details: pair of index of first collision shape for the cloth and number of collision objects.
+	 */
+	btAlignedObjectArray< CollisionObjectIndices >		m_perClothCollisionObjects;
+	btDX11Buffer<CollisionObjectIndices>				m_dx11PerClothCollisionObjects;
+
+	/** 
+	 * Collision shapes being passed across to the cloths in this solver.
+	 */
+	btAlignedObjectArray< CollisionShapeDescription >	m_collisionObjectDetails;
+	btDX11Buffer< CollisionShapeDescription >			m_dx11CollisionObjectDetails;
+
+	/** 
+	 * Minimum bounds for each cloth.
+	 * Updated by GPU and returned for use by broad phase.
+	 * These are int vectors as a reminder that they store the int representation of a float, not a float.
+	 * Bit 31 is inverted - is floats are stored with int-sortable values.
+	 */
+	btAlignedObjectArray< UIntVector3 >	m_perClothMinBounds;
+	btDX11Buffer< UIntVector3 >			m_dx11PerClothMinBounds;
+
+	/** 
+	 * Maximum bounds for each cloth.
+	 * Updated by GPU and returned for use by broad phase.
+	 * These are int vectors as a reminder that they store the int representation of a float, not a float.
+	 * Bit 31 is inverted - is floats are stored with int-sortable values.
+	 */
+	btAlignedObjectArray< UIntVector3 >	m_perClothMaxBounds;
+	btDX11Buffer< UIntVector3 >			m_dx11PerClothMaxBounds;
+
+	
+	/** 
+	 * Friction coefficient for each cloth
+	 */
+	btAlignedObjectArray< float >	m_perClothFriction;
+	btDX11Buffer< float >			m_dx11PerClothFriction;
+
+	DXFunctions::KernelDesc		prepareLinksKernel;
+	DXFunctions::KernelDesc		solvePositionsFromLinksKernel;
+	DXFunctions::KernelDesc		vSolveLinksKernel;
+	DXFunctions::KernelDesc		integrateKernel;
+	DXFunctions::KernelDesc		addVelocityKernel;
+	DXFunctions::KernelDesc		updatePositionsFromVelocitiesKernel;
+	DXFunctions::KernelDesc		updateVelocitiesFromPositionsWithoutVelocitiesKernel;
+	DXFunctions::KernelDesc		updateVelocitiesFromPositionsWithVelocitiesKernel;
+	DXFunctions::KernelDesc		solveCollisionsAndUpdateVelocitiesKernel;
+	DXFunctions::KernelDesc		resetNormalsAndAreasKernel;
+	DXFunctions::KernelDesc		normalizeNormalsAndAreasKernel;
+	DXFunctions::KernelDesc		computeBoundsKernel;
+	DXFunctions::KernelDesc		updateSoftBodiesKernel;
+
+	DXFunctions::KernelDesc		applyForcesKernel;
+
+	bool	m_enableUpdateBounds;
+
+	/**
+	 * Integrate motion on the solver.
+	 */
+	virtual void integrate( float solverdt );
+	float computeTriangleArea( 
+		const Vectormath::Aos::Point3 &vertex0,
+		const Vectormath::Aos::Point3 &vertex1,
+		const Vectormath::Aos::Point3 &vertex2 );
+
+
+	virtual bool buildShaders();
+
+	void resetNormalsAndAreas( int numVertices );
+
+	void normalizeNormalsAndAreas( int numVertices );
+
+	void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
+
+	void prepareCollisionConstraints();
+
+	Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
+
+	void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
+
+	virtual void applyForces( float solverdt );
+	
+	virtual void updateConstants( float timeStep );
+	int findSoftBodyIndex( const btSoftBody* const softBody );
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	virtual void prepareLinks();
+
+	void updatePositionsFromVelocities( float solverdt );
+	void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+	void solveLinksForVelocity( int startLink, int numLinks, float kst );
+	
+	void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
+	void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
+	void computeBounds( );
+	void solveCollisionsAndUpdateVelocities( float isolverdt );
+
+	// End kernel dispatches
+	/////////////////////////////////////
+
+	void updateBounds();
+
+	
+	void releaseKernels();
+
+public:
+	btDX11SoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory);
+
+	virtual ~btDX11SoftBodySolver();
+	
+	
+	virtual SolverTypes getSolverType() const
+	{
+		return DX_SOLVER;
+	}
+
+	void	setEnableUpdateBounds(bool enableBounds)
+	{
+		m_enableUpdateBounds = enableBounds;
+	}
+	bool getEnableUpdateBounds() const
+	{
+		return  m_enableUpdateBounds;
+	}
+
+
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+	virtual btSoftBodyVertexData &getVertexData();
+
+	virtual btSoftBodyTriangleData &getTriangleData();
+
+
+
+	
+
+	btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
+	const btAcceleratedSoftBodyInterface * const findSoftBodyInterface( const btSoftBody* const softBody ) const;
+
+	virtual bool checkInitialized();
+
+	virtual void updateSoftBodies( );
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void copyBackToSoftBodies(bool bMove = true);
+
+	virtual void solveConstraints( float solverdt );
+
+	virtual void predictMotion( float solverdt );
+
+	
+	virtual void processCollision( btSoftBody *, btCollisionObject* );
+
+	virtual void processCollision( btSoftBody*, btSoftBody* );
+
+};
+
+
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the DX to CPU version.
+ */
+class btSoftBodySolverOutputDXtoCPU : public btSoftBodySolverOutput
+{
+protected:
+
+public:
+	btSoftBodySolverOutputDXtoCPU()
+	{
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the DX to DX version and subclasses DX to CPU so that it works for that too.
+ */
+class btSoftBodySolverOutputDXtoDX : public btSoftBodySolverOutputDXtoCPU
+{
+protected:
+	struct OutputToVertexArrayCB
+	{
+		int startNode;
+		int numNodes;
+		int positionOffset;
+		int positionStride;
+		
+		int normalOffset;	
+		int normalStride;
+		int padding1;
+		int padding2;
+	};
+	
+	DXFunctions dxFunctions;
+	DXFunctions::KernelDesc outputToVertexArrayWithNormalsKernel;
+	DXFunctions::KernelDesc outputToVertexArrayWithoutNormalsKernel;
+
+	
+	bool m_shadersInitialized;
+
+	bool checkInitialized();
+	bool buildShaders();
+	void releaseKernels();
+
+public:
+	btSoftBodySolverOutputDXtoDX(ID3D11Device *dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory) :
+	  dxFunctions( dx11Device, dx11Context, dx11CompileFromMemory )
+	{
+		m_shadersInitialized = false;
+	}
+
+	~btSoftBodySolverOutputDXtoDX()
+	{
+		releaseKernels();
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+#endif // #ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
+
+
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp
new file mode 100644
index 00000000..5c73ee5d
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp
@@ -0,0 +1,1051 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <cstdio>
+
+
+#define WAVEFRONT_SIZE 32
+#define WAVEFRONT_BLOCK_MULTIPLIER 2
+#define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
+#define LINKS_PER_SIMD_LANE 16
+
+#define STRINGIFY( S ) STRINGIFY2( S )
+#define STRINGIFY2( S ) #S
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "vectormath/vmInclude.h"
+
+#include "btSoftBodySolverLinkData_DX11SIMDAware.h"
+#include "btSoftBodySolver_DX11SIMDAware.h"
+#include "btSoftBodySolverVertexBuffer_DX11.h"
+#include "BulletSoftBody/btSoftBody.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+
+#define MSTRINGIFY(A) #A
+static char* UpdatePositionsFromVelocitiesHLSLString = 
+#include "HLSL/UpdatePositionsFromVelocities.hlsl"
+static char* SolvePositionsSIMDBatchedHLSLString = 
+#include "HLSL/SolvePositionsSIMDBatched.hlsl"
+static char* UpdateNodesHLSLString = 
+#include "HLSL/UpdateNodes.hlsl"
+static char* UpdatePositionsHLSLString = 
+#include "HLSL/UpdatePositions.hlsl"
+static char* UpdateConstantsHLSLString = 
+#include "HLSL/UpdateConstants.hlsl"
+static char* IntegrateHLSLString = 
+#include "HLSL/Integrate.hlsl"
+static char* ApplyForcesHLSLString = 
+#include "HLSL/ApplyForces.hlsl"
+static char* UpdateNormalsHLSLString = 
+#include "HLSL/UpdateNormals.hlsl"
+static char* OutputToVertexArrayHLSLString = 
+#include "HLSL/OutputToVertexArray.hlsl"
+static char* VSolveLinksHLSLString = 
+#include "HLSL/VSolveLinks.hlsl"
+static char* ComputeBoundsHLSLString = 
+#include "HLSL/ComputeBounds.hlsl"
+static char* SolveCollisionsAndUpdateVelocitiesHLSLString =
+#include "HLSL/solveCollisionsAndUpdateVelocitiesSIMDBatched.hlsl"
+
+
+
+btSoftBodyLinkDataDX11SIMDAware::btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : 
+		m_d3dDevice( d3dDevice ),
+		m_d3dDeviceContext( d3dDeviceContext ),
+		m_wavefrontSize( WAVEFRONT_SIZE ),
+		m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
+		m_maxBatchesWithinWave( 0 ),
+		m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
+		m_numWavefronts( 0 ),
+		m_maxVertex( 0 ),
+		m_dx11NumBatchesAndVerticesWithinWaves( d3dDevice, d3dDeviceContext, &m_numBatchesAndVerticesWithinWaves, true ),
+		m_dx11WavefrontVerticesGlobalAddresses( d3dDevice, d3dDeviceContext, &m_wavefrontVerticesGlobalAddresses, true ),
+		m_dx11LinkVerticesLocalAddresses( d3dDevice, d3dDeviceContext, &m_linkVerticesLocalAddresses, true ),
+		m_dx11LinkStrength( d3dDevice, d3dDeviceContext, &m_linkStrength, true ),
+		m_dx11LinksMassLSC( d3dDevice, d3dDeviceContext, &m_linksMassLSC, true ),
+		m_dx11LinksRestLengthSquared( d3dDevice, d3dDeviceContext, &m_linksRestLengthSquared, true ),
+		m_dx11LinksRestLength( d3dDevice, d3dDeviceContext, &m_linksRestLength, true ),
+		m_dx11LinksMaterialLinearStiffnessCoefficient( d3dDevice, d3dDeviceContext, &m_linksMaterialLinearStiffnessCoefficient, true )
+{
+	m_d3dDevice = d3dDevice;
+	m_d3dDeviceContext = d3dDeviceContext;
+}
+
+btSoftBodyLinkDataDX11SIMDAware::~btSoftBodyLinkDataDX11SIMDAware()
+{
+}
+
+static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
+{
+	Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
+	return outVec;
+}
+
+void btSoftBodyLinkDataDX11SIMDAware::createLinks( int numLinks )
+{
+	int previousSize = m_links.size();
+	int newSize = previousSize + numLinks;
+
+	btSoftBodyLinkData::createLinks( numLinks );
+
+	// Resize the link addresses array as well
+	m_linkAddresses.resize( newSize );
+}
+
+void btSoftBodyLinkDataDX11SIMDAware::setLinkAt( const btSoftBodyLinkData::LinkDescription &link, int linkIndex )
+{
+	btSoftBodyLinkData::setLinkAt( link, linkIndex );
+
+	if( link.getVertex0() > m_maxVertex )
+		m_maxVertex = link.getVertex0();
+	if( link.getVertex1() > m_maxVertex )
+		m_maxVertex = link.getVertex1();
+
+	// Set the link index correctly for initialisation
+	m_linkAddresses[linkIndex] = linkIndex;
+}
+
+bool btSoftBodyLinkDataDX11SIMDAware::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyLinkDataDX11SIMDAware::moveToAccelerator()
+{
+	bool success = true;
+
+	success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveToGPU();
+	success = success && m_dx11WavefrontVerticesGlobalAddresses.moveToGPU();
+	success = success && m_dx11LinkVerticesLocalAddresses.moveToGPU();
+	success = success && m_dx11LinkStrength.moveToGPU();
+	success = success && m_dx11LinksMassLSC.moveToGPU();
+	success = success && m_dx11LinksRestLengthSquared.moveToGPU();
+	success = success && m_dx11LinksRestLength.moveToGPU();
+	success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyLinkDataDX11SIMDAware::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveFromGPU();
+	success = success && m_dx11WavefrontVerticesGlobalAddresses.moveFromGPU();
+	success = success && m_dx11LinkVerticesLocalAddresses.moveFromGPU();
+	success = success && m_dx11LinkStrength.moveFromGPU();
+	success = success && m_dx11LinksMassLSC.moveFromGPU();
+	success = success && m_dx11LinksRestLengthSquared.moveFromGPU();
+	success = success && m_dx11LinksRestLength.moveFromGPU();
+	success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveFromGPU();
+
+	if( success )
+		m_onGPU = false;
+
+	return success;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+btDX11SIMDAwareSoftBodySolver::btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory) :
+	btDX11SoftBodySolver( dx11Device, dx11Context, dx11CompileFromMemory ),
+	m_linkData(m_dx11Device, m_dx11Context)
+{
+	// Initial we will clearly need to update solver constants
+	// For now this is global for the cloths linked with this solver - we should probably make this body specific 
+	// for performance in future once we understand more clearly when constants need to be updated
+	m_updateSolverConstants = true;
+
+	m_shadersInitialized = false;
+}
+
+btDX11SIMDAwareSoftBodySolver::~btDX11SIMDAwareSoftBodySolver()
+{
+	releaseKernels();
+}
+
+
+btSoftBodyLinkData &btDX11SIMDAwareSoftBodySolver::getLinkData()
+{
+	// TODO: Consider setting link data to "changed" here
+	return m_linkData;
+}
+
+
+
+void btDX11SIMDAwareSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate)
+{
+	if(forceUpdate || m_softBodySet.size() != softBodies.size() )
+	{
+		// Have a change in the soft body set so update, reloading all the data
+		getVertexData().clear();
+		getTriangleData().clear();
+		getLinkData().clear();
+		m_softBodySet.resize(0);
+
+
+		for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = softBodies[ softBodyIndex ];
+			using Vectormath::Aos::Matrix3;
+			using Vectormath::Aos::Point3;
+
+			// Create SoftBody that will store the information within the solver
+			btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody );
+			m_softBodySet.push_back( newSoftBody );
+
+			m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
+			m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
+			m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
+			m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
+			m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
+			m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
+			// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
+			m_perClothMinBounds.push_back( UIntVector3( 0, 0, 0 ) );
+			m_perClothMaxBounds.push_back( UIntVector3( UINT_MAX, UINT_MAX, UINT_MAX ) );
+			m_perClothFriction.push_back( softBody->getFriction() );
+			m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
+
+			// Add space for new vertices and triangles in the default solver for now
+			// TODO: Include space here for tearing too later
+			int firstVertex = getVertexData().getNumVertices();
+			int numVertices = softBody->m_nodes.size();
+			// Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
+			// maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
+			int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
+			// Allocate space for new vertices in all the vertex arrays
+			getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
+
+			int firstTriangle = getTriangleData().getNumTriangles();
+			int numTriangles = softBody->m_faces.size();
+			int maxTriangles = numTriangles;
+			getTriangleData().createTriangles( maxTriangles );
+
+			// Copy vertices from softbody into the solver
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
+				btSoftBodyVertexData::VertexDescription desc;
+
+				// TODO: Position in the softbody might be pre-transformed
+				// or we may need to adapt for the pose.
+				//desc.setPosition( cloth.getMeshTransform()*multPoint );
+				desc.setPosition( multPoint );
+
+				float vertexInverseMass = softBody->m_nodes[vertex].m_im;
+				desc.setInverseMass(vertexInverseMass);
+				getVertexData().setVertexAt( desc, firstVertex + vertex );
+			}
+
+			// Copy triangles similarly
+			// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
+			for( int triangle = 0; triangle < numTriangles; ++triangle )
+			{
+				// Note that large array storage is relative to the array not to the cloth
+				// So we need to add firstVertex to each value
+				int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
+				int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
+				int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
+				btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
+				getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
+				
+				// Increase vertex triangle counts for this triangle		
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
+			}
+
+			int firstLink = getLinkData().getNumLinks();
+			int numLinks = softBody->m_links.size();
+			int maxLinks = numLinks;
+			
+			// Allocate space for the links
+			getLinkData().createLinks( numLinks );
+
+			// Add the links
+			for( int link = 0; link < numLinks; ++link )
+			{
+				int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
+				int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
+
+				btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
+				newLink.setLinkStrength(1.f);
+				getLinkData().setLinkAt(newLink, firstLink + link);
+			}
+			
+			newSoftBody->setFirstVertex( firstVertex );
+			newSoftBody->setFirstTriangle( firstTriangle );
+			newSoftBody->setNumVertices( numVertices );
+			newSoftBody->setMaxVertices( maxVertices );
+			newSoftBody->setNumTriangles( numTriangles );
+			newSoftBody->setMaxTriangles( maxTriangles );
+			newSoftBody->setFirstLink( firstLink );
+			newSoftBody->setNumLinks( numLinks );
+		}
+
+
+
+		updateConstants(0.f);
+
+
+		m_linkData.generateBatches();		
+		m_triangleData.generateBatches();
+
+		
+		// Build the shaders to match the batching parameters
+		buildShaders();
+	}
+
+}
+
+
+
+void btDX11SIMDAwareSoftBodySolver::solveConstraints( float solverdt )
+{
+
+	//std::cerr << "'GPU' solve constraints\n";
+	using Vectormath::Aos::Vector3;
+	using Vectormath::Aos::Point3;
+	using Vectormath::Aos::lengthSqr;
+	using Vectormath::Aos::dot;
+
+	// Prepare links
+	int numLinks = m_linkData.getNumLinks();
+	int numVertices = m_vertexData.getNumVertices();
+
+	float kst = 1.f;
+	float ti = 0.f;
+
+
+	m_dx11PerClothDampingFactor.moveToGPU();
+	m_dx11PerClothVelocityCorrectionCoefficient.moveToGPU();
+
+	
+
+	// Ensure data is on accelerator
+	m_linkData.moveToAccelerator();
+	m_vertexData.moveToAccelerator();
+
+
+	
+	prepareCollisionConstraints();
+
+
+	// Solve drift
+  	for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+	{
+
+		for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
+		{
+			int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
+			int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
+
+			solveLinksForPosition( startWave, numWaves, kst, ti );
+		}	
+
+	} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+
+
+
+	
+	// At this point assume that the force array is blank - we will overwrite it
+	solveCollisionsAndUpdateVelocities( 1.f/solverdt );
+
+} // btDX11SIMDAwareSoftBodySolver::solveConstraints
+
+
+void btDX11SIMDAwareSoftBodySolver::updateConstants( float timeStep )
+{
+	using namespace Vectormath::Aos;
+
+	if( m_updateSolverConstants )
+	{
+		m_updateSolverConstants = false;
+
+		// Will have to redo this if we change the structure (tear, maybe) or various other possible changes
+
+		// Initialise link constants
+		const int numLinks = m_linkData.getNumLinks();
+		for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+		{
+			btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
+			m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
+			float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
+			float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
+			float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
+			float massLSC = (invMass0 + invMass1)/linearStiffness;
+			m_linkData.getMassLSC(linkIndex) = massLSC;
+			float restLength = m_linkData.getRestLength(linkIndex);
+			float restLengthSquared = restLength*restLength;
+			m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
+		}
+	}
+} // btDX11SIMDAwareSoftBodySolver::updateConstants
+
+//////////////////////////////////////
+// Kernel dispatches
+
+
+void btDX11SIMDAwareSoftBodySolver::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
+{
+
+
+	m_vertexData.moveToAccelerator();
+	m_linkData.moveToAccelerator();
+
+	// Copy kernel parameters to GPU
+	SolvePositionsFromLinksKernelCB constBuffer;
+
+	// Set the first wave of the batch and the number of waves
+	constBuffer.startWave = startWave;
+	constBuffer.numWaves = numWaves;
+
+	constBuffer.kst = kst;
+	constBuffer.ti = ti;
+	
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( solvePositionsFromLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(SolvePositionsFromLinksKernelCB) );	
+	m_dx11Context->Unmap( solvePositionsFromLinksKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &solvePositionsFromLinksKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11NumBatchesAndVerticesWithinWaves.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11WavefrontVerticesGlobalAddresses.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_linkData.m_dx11LinkVerticesLocalAddresses.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 4, 1, &(m_linkData.m_dx11LinksMassLSC.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 5, 1, &(m_linkData.m_dx11LinksRestLengthSquared.getSRV()) );
+	
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( solvePositionsFromLinksKernel.kernel, NULL, 0 );
+
+	int	numBlocks = ((constBuffer.numWaves + WAVEFRONT_BLOCK_MULTIPLIER - 1) / WAVEFRONT_BLOCK_MULTIPLIER );
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SIMDAwareSoftBodySolver::solveLinksForPosition
+
+
+
+// End kernel dispatches
+/////////////////////////////////////
+
+
+
+
+
+
+
+
+
+bool btDX11SIMDAwareSoftBodySolver::buildShaders()
+{
+	// Ensure current kernels are released first
+	releaseKernels();
+
+	bool returnVal = true;
+
+
+	if( m_shadersInitialized )
+		return true;
+
+	
+	updatePositionsFromVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsFromVelocitiesHLSLString, "UpdatePositionsFromVelocitiesKernel", sizeof(UpdatePositionsFromVelocitiesCB) );
+	if( !updatePositionsFromVelocitiesKernel.constBuffer )
+		returnVal = false;
+	
+	char maxVerticesPerWavefront[20];
+	char maxBatchesPerWavefront[20];
+	char waveFrontSize[20];
+	char waveFrontBlockMultiplier[20];
+	char blockSize[20];
+
+	sprintf(maxVerticesPerWavefront, "%d", m_linkData.getMaxVerticesPerWavefront());
+	sprintf(maxBatchesPerWavefront, "%d", m_linkData.getMaxBatchesPerWavefront());
+	sprintf(waveFrontSize, "%d", m_linkData.getWavefrontSize());	
+	sprintf(waveFrontBlockMultiplier, "%d", WAVEFRONT_BLOCK_MULTIPLIER);
+	sprintf(blockSize, "%d", WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
+	
+	D3D10_SHADER_MACRO solvePositionsMacros[6] = { "MAX_NUM_VERTICES_PER_WAVE", maxVerticesPerWavefront, "MAX_BATCHES_PER_WAVE", maxBatchesPerWavefront, "WAVEFRONT_SIZE", waveFrontSize, "WAVEFRONT_BLOCK_MULTIPLIER", waveFrontBlockMultiplier, "BLOCK_SIZE", blockSize, 0, 0 };
+
+	solvePositionsFromLinksKernel = dxFunctions.compileComputeShaderFromString( SolvePositionsSIMDBatchedHLSLString, "SolvePositionsFromLinksKernel", sizeof(SolvePositionsFromLinksKernelCB), solvePositionsMacros );
+	if( !solvePositionsFromLinksKernel.constBuffer )
+		returnVal = false;
+
+	updateVelocitiesFromPositionsWithVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNodesHLSLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) );
+	if( !updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer )
+		returnVal = false;
+	updateVelocitiesFromPositionsWithoutVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsHLSLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB));
+	if( !updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer )
+		returnVal = false;
+	integrateKernel = dxFunctions.compileComputeShaderFromString( IntegrateHLSLString, "IntegrateKernel", sizeof(IntegrateCB) );
+	if( !integrateKernel.constBuffer )
+		returnVal = false;
+	applyForcesKernel = dxFunctions.compileComputeShaderFromString( ApplyForcesHLSLString, "ApplyForcesKernel", sizeof(ApplyForcesCB) );
+	if( !applyForcesKernel.constBuffer )
+		returnVal = false;
+	solveCollisionsAndUpdateVelocitiesKernel = dxFunctions.compileComputeShaderFromString( SolveCollisionsAndUpdateVelocitiesHLSLString, "SolveCollisionsAndUpdateVelocitiesKernel", sizeof(SolveCollisionsAndUpdateVelocitiesCB) );
+	if( !solveCollisionsAndUpdateVelocitiesKernel.constBuffer )
+		returnVal = false;
+	resetNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "ResetNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !resetNormalsAndAreasKernel.constBuffer )
+		returnVal = false;
+	normalizeNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "NormalizeNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !normalizeNormalsAndAreasKernel.constBuffer )
+		returnVal = false;
+	updateSoftBodiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "UpdateSoftBodiesKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !updateSoftBodiesKernel.constBuffer )
+		returnVal = false;
+	
+	computeBoundsKernel = dxFunctions.compileComputeShaderFromString( ComputeBoundsHLSLString, "ComputeBoundsKernel", sizeof(ComputeBoundsCB) );
+	if( !computeBoundsKernel.constBuffer )
+		returnVal = false;
+
+	if( returnVal )
+		m_shadersInitialized = true;
+
+	return returnVal;
+} // btDX11SIMDAwareSoftBodySolver::buildShaders
+
+static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
+{
+	Vectormath::Aos::Transform3 outTransform;
+	outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
+	outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
+	outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
+	outTransform.setCol(3, toVector3(transform.getOrigin()));
+	return outTransform;	
+}
+
+
+
+
+
+
+
+
+
+
+
+
+static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
+{
+	// A per-batch map of truth values stating whether a given vertex is in that batch
+	// This allows us to significantly optimize the batching
+	btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
+
+	for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
+	{
+		btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
+
+		int batch = 0;
+		bool placed = false;
+		while( batch < wavefrontBatches.size() && !placed )
+		{
+			// Test the current batch, see if this wave shares any vertex with the waves in the batch
+			bool foundSharedVertex = false;
+			for( int link = 0; link < wavefront.size(); ++link )
+			{
+				btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+				if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
+				{
+					foundSharedVertex = true;
+				}
+			}
+
+			if( !foundSharedVertex )
+			{
+				wavefrontBatches[batch].push_back( waveIndex );	
+				// Insert vertices into this batch too
+				for( int link = 0; link < wavefront.size(); ++link )
+				{
+					btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+					(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
+					(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
+				}
+				placed = true;
+			}
+			batch++;
+		}
+		if( batch == wavefrontBatches.size() && !placed )
+		{
+			wavefrontBatches.resize( batch + 1 );
+			wavefrontBatches[batch].push_back( waveIndex );
+
+			// And resize map as well
+			mapOfVerticesInBatches.resize( batch + 1 );
+			
+			// Resize maps with total number of vertices
+			mapOfVerticesInBatches[batch].resize( numVertices+1, false );
+
+			// Insert vertices into this batch too
+			for( int link = 0; link < wavefront.size(); ++link )
+			{
+				btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+				(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
+				(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
+			}
+		}
+	}
+	mapOfVerticesInBatches.clear();
+}
+
+// Function to remove an object from a vector maintaining correct ordering of the vector
+template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
+{
+	int currentSize = vectorToUpdate.size();
+	for( int i = indexToRemove; i < (currentSize-1); ++i )
+	{
+		vectorToUpdate[i] = vectorToUpdate[i+1];
+	}
+	if( currentSize > 0 )
+		vectorToUpdate.resize( currentSize - 1 );
+}
+
+/**
+ * Insert element into vectorToUpdate at index index.
+ */
+template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
+{
+	vectorToUpdate.resize( vectorToUpdate.size() + 1 );
+	for( int i = (vectorToUpdate.size() - 1); i > index; --i )
+	{
+		vectorToUpdate[i] = vectorToUpdate[i-1];
+	}
+	vectorToUpdate[index] = element;
+}
+
+/** 
+ * Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness.
+ * ie it treats vectorToUpdate as an ordered set.
+ */
+template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
+{
+	int index = 0;
+	while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
+	{
+		index++;
+	}
+	if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
+		insertAtIndex( vectorToUpdate, index, element );
+}
+
+static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
+{
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		numLinksPerVertex[nodes.vertex0]++;
+		numLinksPerVertex[nodes.vertex1]++;
+	}
+	int maxLinksPerVertex = 0;
+	for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
+	{
+		maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
+	}
+	maxLinks = maxLinksPerVertex;
+
+	btAlignedObjectArray< int > linksFoundPerVertex;
+	linksFoundPerVertex.resize( numVertices, 0 );
+
+	listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
+
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		{
+			// Do vertex 0
+			int vertexIndex = nodes.vertex0;
+			int linkForVertex = linksFoundPerVertex[nodes.vertex0];
+			int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
+
+			listOfLinksPerVertex[linkAddress] = linkIndex;
+
+			linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
+		}
+		{
+			// Do vertex 1
+			int vertexIndex = nodes.vertex1;
+			int linkForVertex = linksFoundPerVertex[nodes.vertex1];
+			int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
+
+			listOfLinksPerVertex[linkAddress] = linkIndex;
+
+			linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
+		}
+	}
+}
+
+static void computeBatchingIntoWavefronts( 
+	btSoftBodyLinkData &linkData, 
+	int wavefrontSize, 
+	int linksPerWorkItem, 
+	int maxLinksPerWavefront, 
+	btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, 
+	btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
+	btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
+	)
+{
+	
+
+	// Attempt generation of larger batches of links.
+	btAlignedObjectArray< bool > processedLink;
+	processedLink.resize( linkData.getNumLinks() );
+	btAlignedObjectArray< int > listOfLinksPerVertex;
+	int maxLinksPerVertex = 0;
+
+	// Count num vertices
+	int numVertices = 0;
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		numVertices = btMax( numVertices, nodes.vertex0 + 1 );
+		numVertices = btMax( numVertices, nodes.vertex1 + 1 );
+	}
+
+	// Need list of links per vertex
+	// Compute valence of each vertex
+	btAlignedObjectArray <int> numLinksPerVertex;
+	numLinksPerVertex.resize(0);
+	numLinksPerVertex.resize( numVertices, 0 );
+
+	generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
+
+
+	// At this point we know what links we have for each vertex so we can start batching
+	
+	// We want a vertex to start with, let's go with 0
+	int currentVertex = 0;
+	int linksProcessed = 0;
+
+	btAlignedObjectArray <int> verticesToProcess;
+
+	while( linksProcessed < linkData.getNumLinks() )
+	{
+		// Next wavefront
+		int nextWavefront = linksForWavefronts.size();
+		linksForWavefronts.resize( nextWavefront + 1 );
+		btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
+		verticesForWavefronts.resize( nextWavefront + 1 );
+		btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
+
+		linksForWavefront.resize(0);
+
+		// Loop to find enough links to fill the wavefront
+		// Stopping if we either run out of links, or fill it
+		while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
+		{
+			// Go through the links for the current vertex
+			for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
+			{
+				int linkAddress = currentVertex * maxLinksPerVertex + link;
+				int linkIndex = listOfLinksPerVertex[linkAddress];
+				
+				// If we have not already processed this link, add it to the wavefront
+				// Claim it as another processed link
+				// Add the vertex at the far end to the list of vertices to process.
+				if( !processedLink[linkIndex] )
+				{
+					linksForWavefront.push_back( linkIndex );
+					linksProcessed++;
+					processedLink[linkIndex] = true;
+					int v0 = linkData.getVertexPair(linkIndex).vertex0;
+					int v1 = linkData.getVertexPair(linkIndex).vertex1;
+					if( v0 == currentVertex )
+						verticesToProcess.push_back( v1 );
+					else
+						verticesToProcess.push_back( v0 );
+				}
+			}
+			if( verticesToProcess.size() > 0 )
+			{
+				// Get the element on the front of the queue and remove it
+				currentVertex = verticesToProcess[0];
+				removeFromVector( verticesToProcess, 0 );
+			} else {		
+				// If we've not yet processed all the links, find the first unprocessed one
+				// and select one of its vertices as the current vertex
+				if( linksProcessed < linkData.getNumLinks() )
+				{
+					int searchLink = 0;
+					while( processedLink[searchLink] )
+						searchLink++;
+					currentVertex = linkData.getVertexPair(searchLink).vertex0;
+				}	
+			}
+		}
+
+		// We have either finished or filled a wavefront
+		for( int link = 0; link < linksForWavefront.size(); ++link )
+		{
+			int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
+			int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
+			insertUniqueAndOrderedIntoVector( vertexSet, v0 );
+			insertUniqueAndOrderedIntoVector( vertexSet, v1 );
+		}
+		// Iterate over links mapped to the wave and batch those
+		// We can run a batch on each cycle trivially
+		
+		batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
+		btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
+		
+
+		for( int link = 0; link < linksForWavefront.size(); ++link )
+		{
+			int linkIndex = linksForWavefront[link];
+			btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
+			
+			int batch = 0;
+			bool placed = false;
+			while( batch < batchesWithinWave.size() && !placed )
+			{
+				bool foundSharedVertex = false;
+				if( batchesWithinWave[batch].size() >= wavefrontSize )
+				{
+					// If we have already filled this batch, move on to another
+					foundSharedVertex = true;
+				} else {
+					for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
+					{
+						btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
+
+						if( vertices.vertex0 == vertices2.vertex0 ||
+							vertices.vertex1 == vertices2.vertex0 ||
+							vertices.vertex0 == vertices2.vertex1 ||
+							vertices.vertex1 == vertices2.vertex1 )
+						{
+							foundSharedVertex = true;
+							break;
+						}
+					}
+				}
+				if( !foundSharedVertex )
+				{
+					batchesWithinWave[batch].push_back( linkIndex );
+					placed = true;
+				} else {
+					++batch;
+				}
+			}
+			if( batch == batchesWithinWave.size() && !placed )
+			{
+				batchesWithinWave.resize( batch + 1 );
+				batchesWithinWave[batch].push_back( linkIndex );
+			}
+		}
+		
+	}
+
+}
+
+void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
+{
+	btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
+	btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
+	btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
+	btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
+
+	// Group the links into wavefronts
+	computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
+
+
+	// Batch the wavefronts
+	generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
+
+	m_numWavefronts = linksForWavefronts.size();
+
+	// At this point we have a description of which links we need to process in each wavefront
+
+	// First correctly fill the batch ranges vector
+	int numBatches = wavefrontBatches.size();
+	m_wavefrontBatchStartLengths.resize(0);
+	int prefixSum = 0;
+	for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
+	{
+		int wavesInBatch = wavefrontBatches[batchIndex].size();
+		int nextPrefixSum = prefixSum + wavesInBatch;
+		m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
+
+		prefixSum += wavesInBatch;
+	}
+	
+	// Also find max number of batches within a wave
+	m_maxBatchesWithinWave = 0;
+	m_maxVerticesWithinWave = 0;
+	m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
+	for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
+	{
+		// See if the number of batches in this wave is greater than the current maxium
+		int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
+		int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
+		m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
+		m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
+	}
+	
+	// Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
+	m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
+
+	// Now we know the maximum number of vertices per-wave we can resize the global vertices array
+	m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
+
+	// Grab backup copies of all the link data arrays for the sorting process
+	btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair>				m_links_Backup(m_links);
+	btAlignedObjectArray<float>											m_linkStrength_Backup(m_linkStrength);
+	btAlignedObjectArray<float>											m_linksMassLSC_Backup(m_linksMassLSC);
+	btAlignedObjectArray<float>											m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
+	//btAlignedObjectArray<Vectormath::Aos::Vector3>						m_linksCLength_Backup(m_linksCLength);
+	//btAlignedObjectArray<float>											m_linksLengthRatio_Backup(m_linksLengthRatio);
+	btAlignedObjectArray<float>											m_linksRestLength_Backup(m_linksRestLength);
+	btAlignedObjectArray<float>											m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
+
+	// Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
+	m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );	
+		
+	// Then re-order links into wavefront blocks
+
+	// Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
+	int wavefrontCount = 0;
+
+	// Iterate over batches of wavefronts, then wavefronts in the batch
+	for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
+	{
+		btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
+		int wavefrontsInBatch = batch.size();
+
+		
+		for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
+		{	
+
+			int originalWavefrontIndex = batch[wavefrontIndex];
+			btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
+			int verticesUsedByWavefront = wavefrontVertices.size();
+
+			// Copy the set of vertices into the correctly structured array for use on the device
+			// Fill the non-vertices with -1s
+			// so we can mask out those reads
+			for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
+			{
+				m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
+			}
+			for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
+			{
+				m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
+			}
+
+			// Obtain the set of batches within the current wavefront
+			btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
+			// Set the size of the batches for use in the solver, correctly ordered
+			NumBatchesVerticesPair batchesAndVertices;
+			batchesAndVertices.numBatches = batchesWithinWavefront.size();
+			batchesAndVertices.numVertices = verticesUsedByWavefront;
+			m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
+			
+
+			// Now iterate over batches within the wavefront to structure the links correctly
+			for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
+			{
+				btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
+				int wavefrontBatchSize = linksInBatch.size();
+
+				int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
+
+				for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
+				{
+					int originalLinkAddress = linksInBatch[linkIndex];
+					// Reorder simple arrays trivially
+					m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
+					m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
+					m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
+					m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
+					m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
+					m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
+
+					// The local address is more complicated. We need to work out where a given vertex will end up
+					// by searching the set of vertices for this link and using the index as the local address
+					btSoftBodyLinkData::LinkNodePair localPair;
+					btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
+					localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
+					localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
+					m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
+				}
+				for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
+				{
+					// Put 0s into these arrays for padding for cleanliness
+					m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
+					m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
+
+
+					// For local addresses of junk data choose a set of addresses just above the range of valid ones 
+					// and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
+					// The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
+					// off the end of that range so we need no control
+					btSoftBodyLinkData::LinkNodePair localPair;
+					localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
+					localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
+					m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
+				}
+
+			}
+
+			
+			wavefrontCount++;
+		}
+
+	
+	}
+
+} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h
new file mode 100644
index 00000000..34881973
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h
@@ -0,0 +1,81 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "vectormath/vmInclude.h"
+#include "btSoftBodySolver_DX11.h"
+#include "btSoftBodySolverVertexBuffer_DX11.h"
+#include "btSoftBodySolverLinkData_DX11SIMDAware.h"
+#include "btSoftBodySolverVertexData_DX11.h"
+#include "btSoftBodySolverTriangleData_DX11.h"
+
+
+#ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
+#define BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
+
+class btDX11SIMDAwareSoftBodySolver : public btDX11SoftBodySolver
+{
+protected:
+	struct SolvePositionsFromLinksKernelCB
+	{		
+		int startWave;
+		int numWaves;
+		float kst;
+		float ti;
+	};
+
+
+	/** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */
+	btSoftBodyLinkDataDX11SIMDAware m_linkData;
+		
+	/** Variable to define whether we need to update solver constants on the next iteration */
+	bool m_updateSolverConstants;
+
+	
+	virtual bool buildShaders();
+
+	void updateConstants( float timeStep );
+
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	
+
+	void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+
+	// End kernel dispatches
+	/////////////////////////////////////
+
+
+
+public:
+	btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory);
+
+	virtual ~btDX11SIMDAwareSoftBodySolver();
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void solveConstraints( float solverdt );
+	
+	virtual SolverTypes getSolverType() const
+	{
+		return DX_SIMD_SOLVER;
+	}
+	
+};
+
+#endif // #ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
+
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp
new file mode 100644
index 00000000..dfa60e66
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp
@@ -0,0 +1,249 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <MiniCL/cl_MiniCL_Defs.h>
+
+#define MSTRINGIFY(A) A
+#include "../OpenCLC10/ApplyForces.cl"
+#include "../OpenCLC10/Integrate.cl"
+#include "../OpenCLC10/PrepareLinks.cl"
+#include "../OpenCLC10/SolvePositions.cl"
+#include "../OpenCLC10/UpdateNodes.cl"
+#include "../OpenCLC10/UpdateNormals.cl"
+#include "../OpenCLC10/UpdatePositions.cl"
+#include "../OpenCLC10/UpdatePositionsFromVelocities.cl"
+#include "../OpenCLC10/VSolveLinks.cl"
+#include "../OpenCLC10/UpdateFixedVertexPositions.cl"
+//#include "../OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
+
+
+MINICL_REGISTER(PrepareLinksKernel)
+MINICL_REGISTER(VSolveLinksKernel)
+MINICL_REGISTER(UpdatePositionsFromVelocitiesKernel)
+MINICL_REGISTER(SolvePositionsFromLinksKernel)
+MINICL_REGISTER(updateVelocitiesFromPositionsWithVelocitiesKernel)
+MINICL_REGISTER(updateVelocitiesFromPositionsWithoutVelocitiesKernel)
+MINICL_REGISTER(IntegrateKernel)
+MINICL_REGISTER(ApplyForcesKernel)
+MINICL_REGISTER(ResetNormalsAndAreasKernel)
+MINICL_REGISTER(NormalizeNormalsAndAreasKernel)
+MINICL_REGISTER(UpdateSoftBodiesKernel)
+MINICL_REGISTER(UpdateFixedVertexPositions)
+
+float mydot3a(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+
+typedef struct 
+{
+	int firstObject;
+	int endObject;
+} CollisionObjectIndices;
+
+typedef struct 
+{
+	float4 shapeTransform[4]; // column major 4x4 matrix
+	float4 linearVelocity;
+	float4 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+	
+
+	// Shape information
+	// Compressed from the union
+	float radius;
+	float halfHeight;
+	int upAxis;
+		
+	float margin;
+	float friction;
+
+	int padding0;
+	
+} CollisionShapeDescription;
+
+// From btBroadphaseProxy.h
+__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+// Multiply column-major matrix against vector
+float4 matrixVectorMul( float4 matrix[4], float4 vector )
+{
+	float4 returnVector;
+	float4 row0 = float4(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
+	float4 row1 = float4(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
+	float4 row2 = float4(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
+	float4 row3 = float4(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
+	returnVector.x = dot(row0, vector);
+	returnVector.y = dot(row1, vector);
+	returnVector.z = dot(row2, vector);
+	returnVector.w = dot(row3, vector);
+	return returnVector;
+}
+
+__kernel void 
+SolveCollisionsAndUpdateVelocitiesKernel( 
+	const int numNodes,
+	const float isolverdt,
+	__global int *g_vertexClothIdentifier,
+	__global float4 *g_vertexPreviousPositions,
+	__global float * g_perClothFriction,
+	__global float * g_clothDampingFactor,
+	__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
+	__global CollisionShapeDescription * g_collisionObjectDetails,
+	__global float4 * g_vertexForces,
+	__global float4 *g_vertexVelocities,
+	__global float4 *g_vertexPositions GUID_ARG)
+{
+	int nodeID = get_global_id(0);
+	float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
+	
+	if( get_global_id(0) < numNodes )
+	{	
+		int clothIdentifier = g_vertexClothIdentifier[nodeID];
+		
+		// Abort if this is not a valid cloth
+		if( clothIdentifier < 0 )
+			return;
+
+
+		float4 position (g_vertexPositions[nodeID].xyz, 1.f);
+		float4 previousPosition (g_vertexPreviousPositions[nodeID].xyz, 1.f);
+			
+		float clothFriction = g_perClothFriction[clothIdentifier];
+		float dampingFactor = g_clothDampingFactor[clothIdentifier];
+		float velocityCoefficient = (1.f - dampingFactor);		
+		float4 difference = position - previousPosition;
+		float4 velocity = difference*velocityCoefficient*isolverdt;
+		
+		CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+	
+		int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+		
+		if( numObjects > 0 )
+		{
+			// We have some possible collisions to deal with
+			for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
+			{
+				CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
+				float colliderFriction = shapeDescription.friction;
+
+				if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+				{
+					// Colliding with a capsule
+
+					float capsuleHalfHeight = shapeDescription.halfHeight;
+					float capsuleRadius = shapeDescription.radius;
+					float capsuleMargin = shapeDescription.margin;
+					int capsuleupAxis = shapeDescription.upAxis;
+
+					// Four columns of worldTransform matrix
+					float4 worldTransform[4];
+					worldTransform[0] = shapeDescription.shapeTransform[0];
+					worldTransform[1] = shapeDescription.shapeTransform[1];
+					worldTransform[2] = shapeDescription.shapeTransform[2];
+					worldTransform[3] = shapeDescription.shapeTransform[3];
+
+					// Correctly define capsule centerline vector 
+					float4 c1 (0.f, 0.f, 0.f, 1.f); 
+					float4 c2 (0.f, 0.f, 0.f, 1.f);
+					c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
+					c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
+					c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
+					c2.x = -c1.x;
+					c2.y = -c1.y;
+					c2.z = -c1.z;
+
+
+					float4 worldC1 = matrixVectorMul(worldTransform, c1);
+					float4 worldC2 = matrixVectorMul(worldTransform, c2);
+					float4 segment = (worldC2 - worldC1);
+
+					// compute distance of tangent to vertex along line segment in capsule
+					float distanceAlongSegment = -( mydot3a( (worldC1 - position), segment ) / mydot3a(segment, segment) );
+
+					float4 closestPoint = (worldC1 + (segment * distanceAlongSegment));
+					float distanceFromLine = length(position - closestPoint);
+					float distanceFromC1 = length(worldC1 - position);
+					float distanceFromC2 = length(worldC2 - position);
+					
+					// Final distance from collision, point to push from, direction to push in
+					// for impulse force
+					float dist;
+					float4 normalVector;
+					if( distanceAlongSegment < 0 )
+					{
+						dist = distanceFromC1;
+						normalVector = float4(normalize(position - worldC1).xyz, 0.f);
+					} else if( distanceAlongSegment > 1.f ) {
+						dist = distanceFromC2;
+						normalVector = float4(normalize(position - worldC2).xyz, 0.f);	
+					} else {
+						dist = distanceFromLine;
+						normalVector = float4(normalize(position - closestPoint).xyz, 0.f);
+					}
+						
+					float4 colliderLinearVelocity = shapeDescription.linearVelocity;
+					float4 colliderAngularVelocity = shapeDescription.angularVelocity;
+					float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - float4(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
+
+					float minDistance = capsuleRadius + capsuleMargin;
+					
+					// In case of no collision, this is the value of velocity
+					velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+					
+					
+					// Check for a collision
+					if( dist < minDistance )
+					{
+						// Project back to surface along normal
+						position = position + float4(normalVector*(minDistance - dist)*0.9f);
+						velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+						float4 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+						float4 p1 = normalize(cross(normalVector, segment));
+						float4 p2 = normalize(cross(p1, normalVector));
+						// Full friction is sum of velocities in each direction of plane
+						float4 frictionVector = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
+
+						// Real friction is peak friction corrected by friction coefficients
+						frictionVector = frictionVector * (colliderFriction*clothFriction);
+
+						float approachSpeed = dot(relativeVelocity, normalVector);
+
+						if( approachSpeed <= 0.0f )
+							forceOnVertex -= frictionVector;
+					}
+				}
+			}
+		}
+
+		g_vertexVelocities[nodeID] = float4(velocity.xyz, 0.f);	
+
+		// Update external force
+		g_vertexForces[nodeID] = float4(forceOnVertex.xyz, 0.f);
+
+		g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
+	}
+}
+
+
+MINICL_REGISTER(SolveCollisionsAndUpdateVelocitiesKernel);
+
+
+
+
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h
new file mode 100644
index 00000000..f824f281
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h
@@ -0,0 +1,209 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
+
+// OpenCL support
+
+#ifdef USE_MINICL
+	#include "MiniCL/cl.h"
+#else //USE_MINICL
+	#ifdef __APPLE__
+		#include <OpenCL/OpenCL.h>
+	#else
+		#include <CL/cl.h>
+	#endif //__APPLE__
+#endif//USE_MINICL
+
+#ifndef SAFE_RELEASE
+#define SAFE_RELEASE(p)      { if(p) { (p)->Release(); (p)=NULL; } }
+#endif
+
+template <typename ElementType> class btOpenCLBuffer
+{
+public:
+
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_clContext;
+	cl_mem				m_buffer;
+
+
+
+	btAlignedObjectArray< ElementType > * m_CPUBuffer;
+	
+	int  m_gpuSize;
+	bool m_onGPU;
+	bool m_readOnlyOnGPU;
+	bool m_allocated;
+
+
+	bool createBuffer( cl_mem* preexistingBuffer = 0)
+	{
+
+		cl_int err;
+		 
+
+		if( preexistingBuffer )
+		{
+			m_buffer = *preexistingBuffer;
+		} 
+		else {
+
+			cl_mem_flags flags= m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
+
+			size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+			// At a minimum the buffer must exist
+			if( size == 0 )
+				size = sizeof(ElementType);
+			m_buffer = clCreateBuffer(m_clContext, flags, size, 0, &err);
+			if( err != CL_SUCCESS )
+			{
+				btAssert( "Buffer::Buffer(m_buffer)");
+			}
+		}
+
+		m_gpuSize = m_CPUBuffer->size();
+
+		return true;
+	}
+
+public:
+	btOpenCLBuffer( cl_command_queue	commandQue,cl_context ctx, btAlignedObjectArray< ElementType >* CPUBuffer, bool readOnly)
+		:m_cqCommandQue(commandQue),
+		m_clContext(ctx),
+		m_buffer(0),
+		m_CPUBuffer(CPUBuffer),
+		m_gpuSize(0),
+		m_onGPU(false),
+		m_readOnlyOnGPU(readOnly),
+		m_allocated(false)
+	{
+	}
+
+	~btOpenCLBuffer()
+	{
+		clReleaseMemObject(m_buffer);
+	}
+
+
+	bool moveToGPU()
+	{
+
+
+		cl_int err;
+
+		if( (m_CPUBuffer->size() != m_gpuSize) )
+		{
+			m_onGPU = false;
+		}
+
+		if( !m_allocated && m_CPUBuffer->size() == 0  )
+		{
+			// If it isn't on the GPU and yet there is no data on the CPU side this may cause a problem with some kernels.
+			// We should create *something* on the device side
+			if (!createBuffer()) {
+				return false;
+			}
+			m_allocated = true;
+		}
+
+		if( !m_onGPU && m_CPUBuffer->size() > 0 )
+		{
+			if (!m_allocated || (m_CPUBuffer->size() != m_gpuSize)) {
+				if (!createBuffer()) {
+					return false;
+				}
+				m_allocated = true;
+			}
+			
+			size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+			err = clEnqueueWriteBuffer(m_cqCommandQue,m_buffer,
+				CL_FALSE,
+				0,
+				size, 
+				&((*m_CPUBuffer)[0]),0,0,0);
+			if( err != CL_SUCCESS )
+			{
+				btAssert( "CommandQueue::enqueueWriteBuffer(m_buffer)" );
+			}
+
+			m_onGPU = true;
+		}
+
+		return true;
+
+	}
+
+	bool moveFromGPU()
+	{
+
+		cl_int err;
+
+		if (m_CPUBuffer->size() > 0) {
+			if (m_onGPU && !m_readOnlyOnGPU) {
+				size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+				err = clEnqueueReadBuffer(m_cqCommandQue,
+					m_buffer,
+					CL_TRUE,
+					0,
+					size,
+					&((*m_CPUBuffer)[0]),0,0,0);
+
+				if( err != CL_SUCCESS )
+				{
+					btAssert( "CommandQueue::enqueueReadBuffer(m_buffer)" );
+				}
+
+				m_onGPU = false;
+			}
+		}
+
+		return true;
+	}
+
+	bool copyFromGPU()
+	{
+
+		cl_int err;
+		size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+
+		if (m_CPUBuffer->size() > 0) {
+			if (m_onGPU && !m_readOnlyOnGPU) {
+				err = clEnqueueReadBuffer(m_cqCommandQue,
+					m_buffer,
+					CL_TRUE,
+					0,size, 
+					&((*m_CPUBuffer)[0]),0,0,0);
+
+				if( err != CL_SUCCESS )
+				{
+					btAssert( "CommandQueue::enqueueReadBuffer(m_buffer)");
+				}
+
+			}
+		}
+
+		return true;
+	}
+
+	virtual void changedOnCPU()
+	{
+		m_onGPU = false;
+	}
+}; // class btOpenCLBuffer
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
\ No newline at end of file
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h
new file mode 100644
index 00000000..6921f7da
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h
@@ -0,0 +1,99 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
+
+
+class btSoftBodyLinkDataOpenCL : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+
+	cl_command_queue	m_cqCommandQue;
+
+
+	btOpenCLBuffer<LinkNodePair> m_clLinks;
+	btOpenCLBuffer<float>							      m_clLinkStrength;
+	btOpenCLBuffer<float>								  m_clLinksMassLSC;
+	btOpenCLBuffer<float>								  m_clLinksRestLengthSquared;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			  m_clLinksCLength;
+	btOpenCLBuffer<float>								  m_clLinksLengthRatio;
+	btOpenCLBuffer<float>								  m_clLinksRestLength;
+	btOpenCLBuffer<float>								  m_clLinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_batchStartLengths;
+
+	btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx);
+
+	virtual ~btSoftBodyLinkDataOpenCL();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( 
+		const LinkDescription &link, 
+		int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h
new file mode 100644
index 00000000..b20e8055
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h
@@ -0,0 +1,169 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
+#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
+
+
+class btSoftBodyLinkDataOpenCLSIMDAware : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+
+	cl_command_queue	m_cqCommandQue;
+
+	const int m_wavefrontSize;
+	const int m_linksPerWorkItem;
+	const int m_maxLinksPerWavefront;
+	int m_maxBatchesWithinWave;
+	int m_maxVerticesWithinWave;
+	int m_numWavefronts;
+
+	int m_maxVertex;
+
+	struct NumBatchesVerticesPair
+	{
+		int numBatches;
+		int numVertices;
+	};
+
+	btAlignedObjectArray<int>							  m_linksPerWavefront;
+	btAlignedObjectArray<NumBatchesVerticesPair>		  m_numBatchesAndVerticesWithinWaves;
+	btOpenCLBuffer< NumBatchesVerticesPair >			  m_clNumBatchesAndVerticesWithinWaves;
+
+	// All arrays here will contain batches of m_maxLinksPerWavefront links
+	// ordered by wavefront.
+	// with either global vertex pairs or local vertex pairs
+	btAlignedObjectArray< int >							  m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
+	btOpenCLBuffer<int>									  m_clWavefrontVerticesGlobalAddresses;
+	btAlignedObjectArray< LinkNodePair >				  m_linkVerticesLocalAddresses; // Vertex pair for the link
+	btOpenCLBuffer<LinkNodePair>						  m_clLinkVerticesLocalAddresses;
+	btOpenCLBuffer<float>							      m_clLinkStrength;
+	btOpenCLBuffer<float>								  m_clLinksMassLSC;
+	btOpenCLBuffer<float>								  m_clLinksRestLengthSquared;
+	btOpenCLBuffer<float>								  m_clLinksRestLength;
+	btOpenCLBuffer<float>								  m_clLinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+	
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_wavefrontBatchStartLengths;
+
+	btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue, cl_context ctx);
+
+	virtual ~btSoftBodyLinkDataOpenCLSIMDAware();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( 
+		const LinkDescription &link, 
+		int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+
+	int getMaxVerticesPerWavefront()
+	{
+		return m_maxVerticesWithinWave;
+	}
+
+	int getWavefrontSize()
+	{
+		return m_wavefrontSize;
+	}
+
+	int getLinksPerWorkItem()
+	{
+		return m_linksPerWorkItem;
+	}
+
+	int getMaxLinksPerWavefront()
+	{
+		return m_maxLinksPerWavefront;
+	}
+
+	int getMaxBatchesPerWavefront()
+	{
+		return m_maxBatchesWithinWave;
+	}
+
+	int getNumWavefronts()
+	{
+		return m_numWavefronts;
+	}
+
+	NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
+	{
+		return m_numBatchesAndVerticesWithinWaves[wavefront];
+	}
+
+	int getVertexGlobalAddresses( int vertexIndex )
+	{
+		return m_wavefrontVerticesGlobalAddresses[vertexIndex];
+	}
+
+	/**
+	 * Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
+	 */
+	LinkNodePair getVertexPairLocalAddresses( int linkIndex )
+	{
+		return m_linkVerticesLocalAddresses[linkIndex];
+	}
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp
new file mode 100644
index 00000000..1000440b
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp
@@ -0,0 +1,126 @@
+#include "btSoftBodySolverOutputCLtoGL.h"
+#include <stdio.h> //@todo: remove the debugging printf at some stage
+#include "btSoftBodySolver_OpenCL.h"
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+#include "btSoftBodySolverVertexBuffer_OpenGL.h"
+#include "BulletSoftBody/btSoftBody.h"
+
+////OpenCL 1.0 kernels don't use float3
+#define MSTRINGIFY(A) #A
+static char* OutputToVertexArrayCLString =
+#include "OpenCLC10/OutputToVertexArray.cl"
+
+	
+#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
+
+static const size_t workGroupSize = 128;
+
+void btSoftBodySolverOutputCLtoGL::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
+{
+
+	btSoftBodySolver *solver = softBody->getSoftBodySolver();
+	btAssert( solver->getSolverType() == btSoftBodySolver::CL_SOLVER || solver->getSolverType() == btSoftBodySolver::CL_SIMD_SOLVER );
+	btOpenCLSoftBodySolver *dxSolver = static_cast< btOpenCLSoftBodySolver * >( solver );
+	checkInitialized();
+	btOpenCLAcceleratedSoftBodyInterface* currentCloth = dxSolver->findSoftBodyInterface( softBody );
+	btSoftBodyVertexDataOpenCL &vertexData( dxSolver->m_vertexData );	
+
+	const int firstVertex = currentCloth->getFirstVertex();
+	const int lastVertex = firstVertex + currentCloth->getNumVertices();
+
+	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::OPENGL_BUFFER ) {		
+
+		const btOpenGLInteropVertexBufferDescriptor *openGLVertexBuffer = static_cast< btOpenGLInteropVertexBufferDescriptor* >(vertexBuffer);						
+		cl_int ciErrNum = CL_SUCCESS;    
+
+		cl_mem clBuffer = openGLVertexBuffer->getBuffer();		
+		cl_kernel outputKernel = outputToVertexArrayWithNormalsKernel;
+		if( !vertexBuffer->hasNormals() )
+			outputKernel = outputToVertexArrayWithoutNormalsKernel;
+
+		ciErrNum = clEnqueueAcquireGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
+		}
+
+		int numVertices = currentCloth->getNumVertices();
+
+		ciErrNum = clSetKernelArg(outputKernel, 0, sizeof(int), &firstVertex );
+		ciErrNum = clSetKernelArg(outputKernel, 1, sizeof(int), &numVertices );
+		ciErrNum = clSetKernelArg(outputKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
+		if( vertexBuffer->hasVertexPositions() )
+		{
+			int vertexOffset = vertexBuffer->getVertexOffset();
+			int vertexStride = vertexBuffer->getVertexStride();
+			ciErrNum = clSetKernelArg(outputKernel, 3, sizeof(int), &vertexOffset );
+			ciErrNum = clSetKernelArg(outputKernel, 4, sizeof(int), &vertexStride );
+			ciErrNum = clSetKernelArg(outputKernel, 5, sizeof(cl_mem), (void*)&vertexData.m_clVertexPosition.m_buffer );
+
+		}
+		if( vertexBuffer->hasNormals() )
+		{
+			int normalOffset = vertexBuffer->getNormalOffset();
+			int normalStride = vertexBuffer->getNormalStride();
+			ciErrNum = clSetKernelArg(outputKernel, 6, sizeof(int), &normalOffset );
+			ciErrNum = clSetKernelArg(outputKernel, 7, sizeof(int), &normalStride );
+			ciErrNum = clSetKernelArg(outputKernel, 8, sizeof(cl_mem), (void*)&vertexData.m_clVertexNormal.m_buffer );
+
+		}
+		size_t	numWorkItems = workGroupSize*((vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, outputKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(copySoftBodyToVertexBuffer)");
+		}
+
+		ciErrNum = clEnqueueReleaseGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, 0);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueReleaseGLObjects(copySoftBodyToVertexBuffer)");
+		}
+	} else {
+		btAssert( "Undefined output for this solver output" == false );
+	}
+
+	// clFinish in here may not be the best thing. It's possible that we should have a waitForFrameComplete function.
+	clFinish(m_cqCommandQue);
+
+} // btSoftBodySolverOutputCLtoGL::outputToVertexBuffers
+
+bool btSoftBodySolverOutputCLtoGL::buildShaders()
+{
+	// Ensure current kernels are released first
+	releaseKernels();
+
+	bool returnVal = true;
+
+	if( m_shadersInitialized )
+		return true;
+	
+	outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
+	outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
+
+
+	if( returnVal )
+		m_shadersInitialized = true;
+
+	return returnVal;
+} // btSoftBodySolverOutputCLtoGL::buildShaders
+
+void btSoftBodySolverOutputCLtoGL::releaseKernels()
+{
+	RELEASE_CL_KERNEL( outputToVertexArrayWithNormalsKernel );
+	RELEASE_CL_KERNEL( outputToVertexArrayWithoutNormalsKernel );
+
+	m_shadersInitialized = false;
+} // btSoftBodySolverOutputCLtoGL::releaseKernels
+
+bool btSoftBodySolverOutputCLtoGL::checkInitialized()
+{
+	if( !m_shadersInitialized )
+		if( buildShaders() )
+			m_shadersInitialized = true;
+
+	return m_shadersInitialized;
+}
\ No newline at end of file
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.h
new file mode 100644
index 00000000..ab3ea264
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.h
@@ -0,0 +1,62 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
+#define BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
+
+#include "btSoftBodySolver_OpenCL.h"
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the CL to GL interop version.
+ */
+class btSoftBodySolverOutputCLtoGL : public btSoftBodySolverOutput
+{
+protected:
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_cxMainContext;
+	CLFunctions			clFunctions;
+	
+	cl_kernel		outputToVertexArrayWithNormalsKernel;
+	cl_kernel		outputToVertexArrayWithoutNormalsKernel;
+
+	bool m_shadersInitialized;
+	
+	virtual bool checkInitialized();	
+	virtual bool buildShaders();
+	void releaseKernels();
+public:
+	btSoftBodySolverOutputCLtoGL(cl_command_queue cqCommandQue, cl_context cxMainContext) :
+		m_cqCommandQue( cqCommandQue ),
+		m_cxMainContext( cxMainContext ),
+		clFunctions(cqCommandQue, cxMainContext),
+		outputToVertexArrayWithNormalsKernel( 0 ),
+		outputToVertexArrayWithoutNormalsKernel( 0 ),
+		m_shadersInitialized( false )
+	{
+	}
+
+	virtual ~btSoftBodySolverOutputCLtoGL()
+	{
+		releaseKernels();
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
\ No newline at end of file
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h
new file mode 100644
index 00000000..7e376785
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h
@@ -0,0 +1,84 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
+
+
+class btSoftBodyTriangleDataOpenCL : public btSoftBodyTriangleData
+{
+public:
+	bool				m_onGPU;
+	cl_command_queue    m_queue;
+
+	btOpenCLBuffer<btSoftBodyTriangleData::TriangleNodeSet>					m_clVertexIndices;
+	btOpenCLBuffer<float>								m_clArea;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			m_clNormal;
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_triangleAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	struct btSomePair
+	{
+		btSomePair() {}
+		btSomePair(int f,int s)
+			:first(f),second(s)
+		{
+		}
+		int first;
+		int second;
+	};
+	btAlignedObjectArray< btSomePair >		m_batchStartLengths;
+
+public:
+	btSoftBodyTriangleDataOpenCL( cl_command_queue queue, cl_context ctx );
+
+	virtual ~btSoftBodyTriangleDataOpenCL();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createTriangles( int numTriangles );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire triangle set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+}; // class btSoftBodyTriangleDataOpenCL
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
+
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h
new file mode 100644
index 00000000..7c223ecc
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h
@@ -0,0 +1,166 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
+#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H 
+
+
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+#ifdef USE_MINICL
+	#include "MiniCL/cl.h"
+#else //USE_MINICL
+	#ifdef __APPLE__
+		#include <OpenCL/OpenCL.h>
+	#else
+		#include <CL/cl.h>
+		#include <CL/cl_gl.h>
+	#endif //__APPLE__
+#endif//USE_MINICL
+
+
+#ifdef _WIN32//for glut.h
+#include <windows.h>
+#endif
+
+//think different
+#if defined(__APPLE__) && !defined (VMDMESA)
+#include <OpenGL/OpenGL.h>
+#include <OpenGL/gl.h>
+#include <OpenGL/glu.h>
+#include <GLUT/glut.h>
+#else
+
+
+#ifdef _WINDOWS
+#include <windows.h>
+#include <GL/gl.h>
+#include <GL/glu.h>
+#else
+#include <GL/glut.h>
+#endif //_WINDOWS
+#endif //APPLE
+
+
+
+class btOpenGLInteropVertexBufferDescriptor : public btVertexBufferDescriptor
+{
+protected:
+	/** OpenCL context */
+	cl_context			m_context;
+
+	/** OpenCL command queue */
+	cl_command_queue	m_commandQueue;
+	
+	/** OpenCL interop buffer */
+	cl_mem m_buffer;
+
+	/** VBO in GL that is the basis of the interop buffer */
+	GLuint m_openGLVBO;
+
+
+public:
+	/**
+	 * context is the OpenCL context this interop buffer will work in.
+	 * queue is the command queue that kernels and data movement will be enqueued into.
+	 * openGLVBO is the OpenGL vertex buffer data will be copied into.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 */
+	btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride )
+	{
+#ifndef USE_MINICL
+		cl_int ciErrNum = CL_SUCCESS;
+		m_context = context;
+		m_commandQueue = cqCommandQue;
+		
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+
+		m_openGLVBO = openGLVBO;
+		
+		m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
+		}
+
+		m_hasVertexPositions = true;
+#else
+		btAssert(0);//MiniCL shouldn't get here
+#endif
+	}
+
+	/**
+	 * context is the OpenCL context this interop buffer will work in.
+	 * queue is the command queue that kernels and data movement will be enqueued into.
+	 * openGLVBO is the OpenGL vertex buffer data will be copied into.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 * normalOffset is the offset in floats to the first normal.
+	 * normalStride is the stride in floats between normals.
+	 */
+	btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
+	{
+#ifndef USE_MINICL
+		cl_int ciErrNum = CL_SUCCESS;
+		m_context = context;
+		m_commandQueue = cqCommandQue;
+		
+		m_openGLVBO = openGLVBO;
+		
+		m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
+		}
+
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+
+		m_normalOffset = normalOffset;
+		m_normalStride = normalStride;
+		m_hasNormals = true;
+#else
+		btAssert(0);
+#endif //USE_MINICL
+		
+	}
+
+	virtual ~btOpenGLInteropVertexBufferDescriptor()
+	{
+		clReleaseMemObject( m_buffer );
+	}
+
+	/**
+	 * Return the type of the vertex buffer descriptor.
+	 */
+	virtual BufferTypes getBufferType() const
+	{
+		return OPENGL_BUFFER;
+	}
+
+	virtual cl_context getContext() const
+	{
+		return m_context;
+	}
+
+	virtual cl_mem getBuffer() const
+	{
+		return m_buffer;
+	}	
+};
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h
new file mode 100644
index 00000000..531c3427
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h
@@ -0,0 +1,52 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+#ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
+
+
+class btSoftBodyVertexDataOpenCL : public btSoftBodyVertexData
+{
+protected:
+	bool		m_onGPU;
+	cl_command_queue	m_queue;
+
+public:
+	btOpenCLBuffer<int>									m_clClothIdentifier;
+	btOpenCLBuffer<Vectormath::Aos::Point3>				m_clVertexPosition;
+	btOpenCLBuffer<Vectormath::Aos::Point3>				m_clVertexPreviousPosition;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>				m_clVertexVelocity;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>				m_clVertexForceAccumulator;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>				m_clVertexNormal;
+	btOpenCLBuffer<float>									m_clVertexInverseMass;
+	btOpenCLBuffer<float>									m_clVertexArea;
+	btOpenCLBuffer<int>									m_clVertexTriangleCount;
+public:
+	btSoftBodyVertexDataOpenCL( cl_command_queue queue,  cl_context ctx);
+
+	virtual ~btSoftBodyVertexDataOpenCL();
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
+};
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp
new file mode 100644
index 00000000..f84448a6
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp
@@ -0,0 +1,1820 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "vectormath/vmInclude.h"
+#include <stdio.h> //@todo: remove the debugging printf at some stage
+#include "btSoftBodySolver_OpenCL.h"
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+#include "BulletSoftBody/btSoftBody.h"
+#include "BulletSoftBody/btSoftBodyInternals.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "LinearMath/btQuickprof.h"
+#include <limits.h>
+
+
+#define BT_SUPPRESS_OPENCL_ASSERTS
+
+#ifdef USE_MINICL
+	#include "MiniCL/cl.h"
+#else //USE_MINICL
+	#ifdef __APPLE__
+		#include <OpenCL/OpenCL.h>
+	#else
+		#include <CL/cl.h>
+	#endif //__APPLE__
+#endif//USE_MINICL
+
+#define BT_DEFAULT_WORKGROUPSIZE 64
+
+
+#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
+
+
+//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
+
+////OpenCL 1.0 kernels don't use float3
+#define MSTRINGIFY(A) #A
+static const char* PrepareLinksCLString = 
+#include "OpenCLC10/PrepareLinks.cl"
+static const char* UpdatePositionsFromVelocitiesCLString = 
+#include "OpenCLC10/UpdatePositionsFromVelocities.cl"
+static const char* SolvePositionsCLString = 
+#include "OpenCLC10/SolvePositions.cl"
+static const char* UpdateNodesCLString = 
+#include "OpenCLC10/UpdateNodes.cl"
+static const char* UpdatePositionsCLString = 
+#include "OpenCLC10/UpdatePositions.cl"
+static const char* UpdateConstantsCLString = 
+#include "OpenCLC10/UpdateConstants.cl"
+static const char* IntegrateCLString = 
+#include "OpenCLC10/Integrate.cl"
+static const char* ApplyForcesCLString = 
+#include "OpenCLC10/ApplyForces.cl"
+static const char* UpdateFixedVertexPositionsCLString = 
+#include "OpenCLC10/UpdateFixedVertexPositions.cl"
+static const char* UpdateNormalsCLString = 
+#include "OpenCLC10/UpdateNormals.cl"
+static const char* VSolveLinksCLString = 
+#include "OpenCLC10/VSolveLinks.cl"
+static const char* SolveCollisionsAndUpdateVelocitiesCLString =
+#include "OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
+
+
+btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx) :
+    m_queue(queue),
+	m_clClothIdentifier( queue, ctx, &m_clothIdentifier, false ),
+	m_clVertexPosition( queue, ctx, &m_vertexPosition, false ),
+	m_clVertexPreviousPosition( queue, ctx, &m_vertexPreviousPosition, false ),
+	m_clVertexVelocity( queue, ctx, &m_vertexVelocity, false ),
+	m_clVertexForceAccumulator( queue, ctx, &m_vertexForceAccumulator, false ),
+	m_clVertexNormal( queue, ctx, &m_vertexNormal, false ),
+	m_clVertexInverseMass( queue, ctx, &m_vertexInverseMass, false ),
+	m_clVertexArea( queue, ctx, &m_vertexArea, false ),
+	m_clVertexTriangleCount( queue, ctx, &m_vertexTriangleCount, false )
+{
+}
+
+btSoftBodyVertexDataOpenCL::~btSoftBodyVertexDataOpenCL()
+{
+
+}
+
+bool btSoftBodyVertexDataOpenCL::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyVertexDataOpenCL::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_clClothIdentifier.moveToGPU();
+	success = success && m_clVertexPosition.moveToGPU();
+	success = success && m_clVertexPreviousPosition.moveToGPU();
+	success = success && m_clVertexVelocity.moveToGPU();
+	success = success && m_clVertexForceAccumulator.moveToGPU();
+	success = success && m_clVertexNormal.moveToGPU();
+	success = success && m_clVertexInverseMass.moveToGPU();
+	success = success && m_clVertexArea.moveToGPU();
+	success = success && m_clVertexTriangleCount.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyVertexDataOpenCL::moveFromAccelerator(bool bCopy, bool bCopyMinimum)
+{
+	bool success = true;
+
+	if (!bCopy)
+	{
+		success = success && m_clClothIdentifier.moveFromGPU();
+		success = success && m_clVertexPosition.moveFromGPU();
+		success = success && m_clVertexPreviousPosition.moveFromGPU();
+		success = success && m_clVertexVelocity.moveFromGPU();
+		success = success && m_clVertexForceAccumulator.moveFromGPU();
+		success = success && m_clVertexNormal.moveFromGPU();
+		success = success && m_clVertexInverseMass.moveFromGPU();
+		success = success && m_clVertexArea.moveFromGPU();
+		success = success && m_clVertexTriangleCount.moveFromGPU();
+	}
+	else
+	{
+		if (bCopyMinimum)
+		{
+			success = success && m_clVertexPosition.copyFromGPU();
+			success = success && m_clVertexNormal.copyFromGPU();
+		}
+		else
+		{
+			success = success && m_clClothIdentifier.copyFromGPU();
+			success = success && m_clVertexPosition.copyFromGPU();
+			success = success && m_clVertexPreviousPosition.copyFromGPU();
+			success = success && m_clVertexVelocity.copyFromGPU();
+			success = success && m_clVertexForceAccumulator.copyFromGPU();
+			success = success && m_clVertexNormal.copyFromGPU();
+			success = success && m_clVertexInverseMass.copyFromGPU();
+			success = success && m_clVertexArea.copyFromGPU();
+			success = success && m_clVertexTriangleCount.copyFromGPU();
+		}
+	}
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl_command_queue queue,  cl_context ctx) 
+:m_cqCommandQue(queue),
+	m_clLinks( queue, ctx, &m_links, false ),
+	m_clLinkStrength( queue, ctx, &m_linkStrength, false ),
+	m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ),
+	m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ),
+	m_clLinksCLength( queue, ctx, &m_linksCLength, false ),
+	m_clLinksLengthRatio( queue, ctx, &m_linksLengthRatio, false ),
+	m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ),
+	m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false )
+{
+}
+
+btSoftBodyLinkDataOpenCL::~btSoftBodyLinkDataOpenCL()
+{
+}
+
+static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
+{
+	Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
+	return outVec;
+}
+
+/** Allocate enough space in all link-related arrays to fit numLinks links */
+void btSoftBodyLinkDataOpenCL::createLinks( int numLinks )
+{
+	int previousSize = m_links.size();
+	int newSize = previousSize + numLinks;
+
+	btSoftBodyLinkData::createLinks( numLinks );
+
+	// Resize the link addresses array as well
+	m_linkAddresses.resize( newSize );
+}
+
+/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+void btSoftBodyLinkDataOpenCL::setLinkAt( 
+	const LinkDescription &link, 
+	int linkIndex )
+{
+	btSoftBodyLinkData::setLinkAt( link, linkIndex );
+
+	// Set the link index correctly for initialisation
+	m_linkAddresses[linkIndex] = linkIndex;
+}
+
+bool btSoftBodyLinkDataOpenCL::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyLinkDataOpenCL::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_clLinks.moveToGPU();
+	success = success && m_clLinkStrength.moveToGPU();
+	success = success && m_clLinksMassLSC.moveToGPU();
+	success = success && m_clLinksRestLengthSquared.moveToGPU();
+	success = success && m_clLinksCLength.moveToGPU();
+	success = success && m_clLinksLengthRatio.moveToGPU();
+	success = success && m_clLinksRestLength.moveToGPU();
+	success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveToGPU();
+
+	if( success ) {
+		m_onGPU = true;
+	}
+
+	return success;
+}
+
+bool btSoftBodyLinkDataOpenCL::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_clLinks.moveFromGPU();
+	success = success && m_clLinkStrength.moveFromGPU();
+	success = success && m_clLinksMassLSC.moveFromGPU();
+	success = success && m_clLinksRestLengthSquared.moveFromGPU();
+	success = success && m_clLinksCLength.moveFromGPU();
+	success = success && m_clLinksLengthRatio.moveFromGPU();
+	success = success && m_clLinksRestLength.moveFromGPU();
+	success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveFromGPU();
+
+	if( success ) {
+		m_onGPU = false;
+	}
+
+	return success;
+}
+
+/**
+ * Generate (and later update) the batching for the entire link set.
+ * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+ * In theory we could delay it until just before we need the cloth.
+ * It's a one-off overhead, though, so that is a later optimisation.
+ */
+void btSoftBodyLinkDataOpenCL::generateBatches()
+{
+	int numLinks = getNumLinks();
+
+	// Do the graph colouring here temporarily
+	btAlignedObjectArray< int > batchValues;
+	batchValues.resize( numLinks, 0 );
+
+	// Find the maximum vertex value internally for now
+	int maxVertex = 0;
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{
+		int vertex0 = getVertexPair(linkIndex).vertex0;
+		int vertex1 = getVertexPair(linkIndex).vertex1;
+		if( vertex0 > maxVertex )
+			maxVertex = vertex0;
+		if( vertex1 > maxVertex )
+			maxVertex = vertex1;
+	}
+	int numVertices = maxVertex + 1;
+
+	// Set of lists, one for each node, specifying which colours are connected
+	// to that node.
+	// No two edges into a node can share a colour.
+	btAlignedObjectArray< btAlignedObjectArray< int > > vertexConnectedColourLists;
+	vertexConnectedColourLists.resize(numVertices);
+
+	// Simple algorithm that chooses the lowest batch number
+	// that none of the links attached to either of the connected 
+	// nodes is in
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{				
+		int linkLocation = m_linkAddresses[linkIndex];
+
+		int vertex0 = getVertexPair(linkLocation).vertex0;
+		int vertex1 = getVertexPair(linkLocation).vertex1;
+
+		// Get the two node colour lists
+		btAlignedObjectArray< int > &colourListVertex0( vertexConnectedColourLists[vertex0] );
+		btAlignedObjectArray< int > &colourListVertex1( vertexConnectedColourLists[vertex1] );
+
+		// Choose the minimum colour that is in neither list
+		int colour = 0;
+		while( colourListVertex0.findLinearSearch(colour) != colourListVertex0.size() || colourListVertex1.findLinearSearch(colour) != colourListVertex1.size()  )
+			++colour;
+		// i should now be the minimum colour in neither list
+		// Add to the two lists so that future edges don't share
+		// And store the colour against this edge
+
+		colourListVertex0.push_back(colour);
+		colourListVertex1.push_back(colour);
+		batchValues[linkIndex] = colour;
+	}
+
+	// Check the colour counts
+	btAlignedObjectArray< int > batchCounts;
+	for( int i = 0; i < numLinks; ++i )
+	{
+		int batch = batchValues[i];
+		if( batch >= batchCounts.size() )
+			batchCounts.push_back(1);
+		else
+			++(batchCounts[batch]);
+	}
+
+	m_batchStartLengths.resize(batchCounts.size());
+	if( m_batchStartLengths.size() > 0 )
+	{
+		m_batchStartLengths.resize(batchCounts.size());
+		m_batchStartLengths[0] = BatchPair(0, 0);
+
+		int sum = 0;
+		for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex )
+		{
+			m_batchStartLengths[batchIndex].start = sum;
+			m_batchStartLengths[batchIndex].length = batchCounts[batchIndex];
+			sum += batchCounts[batchIndex];
+		}
+	}
+
+	/////////////////////////////
+	// Sort data based on batches
+
+	// Create source arrays by copying originals
+	btAlignedObjectArray<LinkNodePair>									m_links_Backup(m_links);
+	btAlignedObjectArray<float>											m_linkStrength_Backup(m_linkStrength);
+	btAlignedObjectArray<float>											m_linksMassLSC_Backup(m_linksMassLSC);
+	btAlignedObjectArray<float>											m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
+	btAlignedObjectArray<Vectormath::Aos::Vector3>						m_linksCLength_Backup(m_linksCLength);
+	btAlignedObjectArray<float>											m_linksLengthRatio_Backup(m_linksLengthRatio);
+	btAlignedObjectArray<float>											m_linksRestLength_Backup(m_linksRestLength);
+	btAlignedObjectArray<float>											m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
+
+
+	for( int batch = 0; batch < batchCounts.size(); ++batch )
+		batchCounts[batch] = 0;
+
+	// Do sort as single pass into destination arrays	
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int linkLocation = m_linkAddresses[linkIndex];
+
+		// Obtain batch and calculate target location for the
+		// next element in that batch, incrementing the batch counter
+		// afterwards
+		int batch = batchValues[linkIndex];
+		int newLocation = m_batchStartLengths[batch].start + batchCounts[batch];
+
+		batchCounts[batch] = batchCounts[batch] + 1;
+		m_links[newLocation] = m_links_Backup[linkLocation];
+#if 1
+		m_linkStrength[newLocation] = m_linkStrength_Backup[linkLocation];
+		m_linksMassLSC[newLocation] = m_linksMassLSC_Backup[linkLocation];
+		m_linksRestLengthSquared[newLocation] = m_linksRestLengthSquared_Backup[linkLocation];
+		m_linksLengthRatio[newLocation] = m_linksLengthRatio_Backup[linkLocation];
+		m_linksRestLength[newLocation] = m_linksRestLength_Backup[linkLocation];
+		m_linksMaterialLinearStiffnessCoefficient[newLocation] = m_linksMaterialLinearStiffnessCoefficient_Backup[linkLocation];
+#endif
+		// Update the locations array to account for the moved entry
+		m_linkAddresses[linkIndex] = newLocation;
+	}
+
+
+} // void generateBatches()
+
+
+
+
+
+btSoftBodyTriangleDataOpenCL::btSoftBodyTriangleDataOpenCL( cl_command_queue queue , cl_context ctx) : 
+    m_queue( queue ),
+	m_clVertexIndices( queue, ctx, &m_vertexIndices, false ),
+	m_clArea( queue, ctx, &m_area, false ),
+	m_clNormal( queue, ctx, &m_normal, false )
+{
+}
+
+btSoftBodyTriangleDataOpenCL::~btSoftBodyTriangleDataOpenCL()
+{
+}
+
+/** Allocate enough space in all link-related arrays to fit numLinks links */
+void btSoftBodyTriangleDataOpenCL::createTriangles( int numTriangles )
+{
+	int previousSize = getNumTriangles();
+	int newSize = previousSize + numTriangles;
+
+	btSoftBodyTriangleData::createTriangles( numTriangles );
+
+	// Resize the link addresses array as well
+	m_triangleAddresses.resize( newSize );
+}
+
+/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+void btSoftBodyTriangleDataOpenCL::setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex )
+{
+	btSoftBodyTriangleData::setTriangleAt( triangle, triangleIndex );
+
+	m_triangleAddresses[triangleIndex] = triangleIndex;
+}
+
+bool btSoftBodyTriangleDataOpenCL::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyTriangleDataOpenCL::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_clVertexIndices.moveToGPU();
+	success = success && m_clArea.moveToGPU();
+	success = success && m_clNormal.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyTriangleDataOpenCL::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_clVertexIndices.moveFromGPU();
+	success = success && m_clArea.moveFromGPU();
+	success = success && m_clNormal.moveFromGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+/**
+ * Generate (and later update) the batching for the entire triangle set.
+ * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+ * In theory we could delay it until just before we need the cloth.
+ * It's a one-off overhead, though, so that is a later optimisation.
+ */
+void btSoftBodyTriangleDataOpenCL::generateBatches()
+{
+	int numTriangles = getNumTriangles();
+	if( numTriangles == 0 )
+		return;
+
+	// Do the graph colouring here temporarily
+	btAlignedObjectArray< int > batchValues;
+	batchValues.resize( numTriangles );
+
+	// Find the maximum vertex value internally for now
+	int maxVertex = 0;
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		int vertex0 = getVertexSet(triangleIndex).vertex0;
+		int vertex1 = getVertexSet(triangleIndex).vertex1;
+		int vertex2 = getVertexSet(triangleIndex).vertex2;
+		
+		if( vertex0 > maxVertex )
+			maxVertex = vertex0;
+		if( vertex1 > maxVertex )
+			maxVertex = vertex1;
+		if( vertex2 > maxVertex )
+			maxVertex = vertex2;
+	}
+	int numVertices = maxVertex + 1;
+
+	// Set of lists, one for each node, specifying which colours are connected
+	// to that node.
+	// No two edges into a node can share a colour.
+	btAlignedObjectArray< btAlignedObjectArray< int > > vertexConnectedColourLists;
+	vertexConnectedColourLists.resize(numVertices);
+
+
+	//std::cout << "\n";
+	// Simple algorithm that chooses the lowest batch number
+	// that none of the faces attached to either of the connected 
+	// nodes is in
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int triangleLocation = m_triangleAddresses[triangleIndex];
+
+		int vertex0 = getVertexSet(triangleLocation).vertex0;
+		int vertex1 = getVertexSet(triangleLocation).vertex1;
+		int vertex2 = getVertexSet(triangleLocation).vertex2;
+
+		// Get the three node colour lists
+		btAlignedObjectArray< int > &colourListVertex0( vertexConnectedColourLists[vertex0] );
+		btAlignedObjectArray< int > &colourListVertex1( vertexConnectedColourLists[vertex1] );
+		btAlignedObjectArray< int > &colourListVertex2( vertexConnectedColourLists[vertex2] );
+
+		// Choose the minimum colour that is in none of the lists
+		int colour = 0;
+		while( 
+			colourListVertex0.findLinearSearch(colour) != colourListVertex0.size() || 
+			colourListVertex1.findLinearSearch(colour) != colourListVertex1.size() ||
+			colourListVertex2.findLinearSearch(colour) != colourListVertex2.size() )
+		{
+			++colour;
+		}
+		// i should now be the minimum colour in neither list
+		// Add to the three lists so that future edges don't share
+		// And store the colour against this face
+		colourListVertex0.push_back(colour);
+		colourListVertex1.push_back(colour);
+		colourListVertex2.push_back(colour);
+
+		batchValues[triangleIndex] = colour;
+	}
+
+
+	// Check the colour counts
+	btAlignedObjectArray< int > batchCounts;
+	for( int i = 0; i < numTriangles; ++i )
+	{
+		int batch = batchValues[i];
+		if( batch >= batchCounts.size() )
+			batchCounts.push_back(1);
+		else
+			++(batchCounts[batch]);
+	}
+
+
+	m_batchStartLengths.resize(batchCounts.size());
+	m_batchStartLengths[0] = btSomePair(0,0);
+
+
+	int sum = 0;
+	for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex )
+	{
+		m_batchStartLengths[batchIndex].first = sum;
+		m_batchStartLengths[batchIndex].second = batchCounts[batchIndex];
+		sum += batchCounts[batchIndex];
+	}
+	
+	/////////////////////////////
+	// Sort data based on batches
+	
+	// Create source arrays by copying originals
+	btAlignedObjectArray<btSoftBodyTriangleData::TriangleNodeSet>							m_vertexIndices_Backup(m_vertexIndices);
+	btAlignedObjectArray<float>										m_area_Backup(m_area);
+	btAlignedObjectArray<Vectormath::Aos::Vector3>					m_normal_Backup(m_normal);
+
+
+	for( int batch = 0; batch < batchCounts.size(); ++batch )
+		batchCounts[batch] = 0;
+
+	// Do sort as single pass into destination arrays	
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int triangleLocation = m_triangleAddresses[triangleIndex];
+
+		// Obtain batch and calculate target location for the
+		// next element in that batch, incrementing the batch counter
+		// afterwards
+		int batch = batchValues[triangleIndex];
+		int newLocation = m_batchStartLengths[batch].first + batchCounts[batch];
+
+		batchCounts[batch] = batchCounts[batch] + 1;
+		m_vertexIndices[newLocation] = m_vertexIndices_Backup[triangleLocation];
+		m_area[newLocation] = m_area_Backup[triangleLocation];
+		m_normal[newLocation] = m_normal_Backup[triangleLocation];
+
+		// Update the locations array to account for the moved entry
+		m_triangleAddresses[triangleIndex] = newLocation;
+	}
+} // btSoftBodyTriangleDataOpenCL::generateBatches
+
+
+
+
+
+
+
+btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
+	m_linkData(queue, ctx),
+	m_vertexData(queue, ctx),
+	m_triangleData(queue, ctx),
+	m_defaultCLFunctions(queue, ctx),
+	m_currentCLFunctions(&m_defaultCLFunctions),
+	m_clPerClothAcceleration(queue, ctx, &m_perClothAcceleration, true ),
+	m_clPerClothWindVelocity(queue, ctx, &m_perClothWindVelocity, true ),
+	m_clPerClothDampingFactor(queue,ctx, &m_perClothDampingFactor, true ),
+	m_clPerClothVelocityCorrectionCoefficient(queue, ctx,&m_perClothVelocityCorrectionCoefficient, true ),
+	m_clPerClothLiftFactor(queue, ctx,&m_perClothLiftFactor, true ),
+	m_clPerClothDragFactor(queue, ctx,&m_perClothDragFactor, true ),
+	m_clPerClothMediumDensity(queue, ctx,&m_perClothMediumDensity, true ),
+	m_clPerClothCollisionObjects( queue, ctx, &m_perClothCollisionObjects, true ),
+	m_clCollisionObjectDetails( queue, ctx, &m_collisionObjectDetails, true ),
+	m_clPerClothFriction( queue, ctx, &m_perClothFriction, false ),
+	m_clAnchorPosition( queue, ctx, &m_anchorPosition, true ),
+	m_clAnchorIndex( queue, ctx, &m_anchorIndex, true),
+	m_cqCommandQue( queue ),
+	m_cxMainContext(ctx),
+	m_defaultWorkGroupSize(BT_DEFAULT_WORKGROUPSIZE),
+	m_bUpdateAnchoredNodePos(bUpdateAchchoredNodePos)
+{
+
+	// Initial we will clearly need to update solver constants
+	// For now this is global for the cloths linked with this solver - we should probably make this body specific 
+	// for performance in future once we understand more clearly when constants need to be updated
+	m_updateSolverConstants = true;
+
+	m_shadersInitialized = false;
+
+	m_prepareLinksKernel = 0;
+	m_solvePositionsFromLinksKernel = 0;
+	m_updateConstantsKernel = 0;
+	m_integrateKernel = 0;
+	m_addVelocityKernel = 0;
+	m_updatePositionsFromVelocitiesKernel = 0;
+	m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = 0;
+	m_updateVelocitiesFromPositionsWithVelocitiesKernel = 0;
+	m_vSolveLinksKernel = 0;
+	m_solveCollisionsAndUpdateVelocitiesKernel = 0;
+	m_resetNormalsAndAreasKernel = 0;
+	m_updateSoftBodiesKernel = 0;
+	m_normalizeNormalsAndAreasKernel = 0;
+	m_outputToVertexArrayKernel = 0;
+	m_applyForcesKernel = 0;
+	m_updateFixedVertexPositionsKernel = 0;
+}
+
+btOpenCLSoftBodySolver::~btOpenCLSoftBodySolver()
+{
+	releaseKernels();
+}
+
+void btOpenCLSoftBodySolver::releaseKernels()
+{
+	RELEASE_CL_KERNEL( m_prepareLinksKernel );
+	RELEASE_CL_KERNEL( m_solvePositionsFromLinksKernel );
+	RELEASE_CL_KERNEL( m_updateConstantsKernel );
+	RELEASE_CL_KERNEL( m_integrateKernel );
+	RELEASE_CL_KERNEL( m_addVelocityKernel );
+	RELEASE_CL_KERNEL( m_updatePositionsFromVelocitiesKernel );
+	RELEASE_CL_KERNEL( m_updateVelocitiesFromPositionsWithoutVelocitiesKernel );
+	RELEASE_CL_KERNEL( m_updateVelocitiesFromPositionsWithVelocitiesKernel );
+	RELEASE_CL_KERNEL( m_vSolveLinksKernel );
+	RELEASE_CL_KERNEL( m_solveCollisionsAndUpdateVelocitiesKernel );
+	RELEASE_CL_KERNEL( m_resetNormalsAndAreasKernel );
+	RELEASE_CL_KERNEL( m_normalizeNormalsAndAreasKernel );
+	RELEASE_CL_KERNEL( m_outputToVertexArrayKernel );
+	RELEASE_CL_KERNEL( m_applyForcesKernel );
+	RELEASE_CL_KERNEL( m_updateFixedVertexPositionsKernel );
+
+	m_shadersInitialized = false;
+}
+
+void btOpenCLSoftBodySolver::copyBackToSoftBodies(bool bMove)
+{
+
+	// Move the vertex data back to the host first
+	m_vertexData.moveFromAccelerator(!bMove);
+
+	// Loop over soft bodies, copying all the vertex positions back for each body in turn
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btOpenCLAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[ softBodyIndex ];
+		btSoftBody *softBody = softBodyInterface->getSoftBody();
+
+		int firstVertex = softBodyInterface->getFirstVertex();
+		int numVertices = softBodyInterface->getNumVertices();
+
+		// Copy vertices from solver back into the softbody
+		for( int vertex = 0; vertex < numVertices; ++vertex )
+		{
+			using Vectormath::Aos::Point3;
+			Point3 vertexPosition( m_vertexData.getVertexPositions()[firstVertex + vertex] );
+			Point3 normal(m_vertexData.getNormal(firstVertex + vertex));
+
+			softBody->m_nodes[vertex].m_x.setX( vertexPosition.getX() );
+			softBody->m_nodes[vertex].m_x.setY( vertexPosition.getY() );
+			softBody->m_nodes[vertex].m_x.setZ( vertexPosition.getZ() );
+
+			softBody->m_nodes[vertex].m_n.setX( normal.getX() );
+			softBody->m_nodes[vertex].m_n.setY( normal.getY() );
+			softBody->m_nodes[vertex].m_n.setZ( normal.getZ() );
+		}
+	}	
+} // btOpenCLSoftBodySolver::copyBackToSoftBodies
+
+void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies, bool forceUpdate )
+{
+	if( forceUpdate || m_softBodySet.size() != softBodies.size() )
+	{
+		// Have a change in the soft body set so update, reloading all the data
+		getVertexData().clear();
+		getTriangleData().clear();
+		getLinkData().clear();
+		m_softBodySet.resize(0);
+		m_anchorIndex.clear();
+
+		int maxPiterations = 0;
+		int maxViterations = 0;
+
+		for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = softBodies[ softBodyIndex ];
+			using Vectormath::Aos::Matrix3;
+			using Vectormath::Aos::Point3;
+
+			// Create SoftBody that will store the information within the solver
+			btOpenCLAcceleratedSoftBodyInterface *newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody );
+			m_softBodySet.push_back( newSoftBody );
+
+			m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
+			m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
+			m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
+			m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
+			m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
+			m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
+			// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
+			m_perClothFriction.push_back(softBody->m_cfg.kDF);
+			m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
+
+			// Add space for new vertices and triangles in the default solver for now
+			// TODO: Include space here for tearing too later
+			int firstVertex = getVertexData().getNumVertices();
+			int numVertices = softBody->m_nodes.size();
+			int maxVertices = numVertices;
+			// Allocate space for new vertices in all the vertex arrays
+			getVertexData().createVertices( maxVertices, softBodyIndex );
+
+			int firstTriangle = getTriangleData().getNumTriangles();
+			int numTriangles = softBody->m_faces.size();
+			int maxTriangles = numTriangles;
+			getTriangleData().createTriangles( maxTriangles );
+
+			// Copy vertices from softbody into the solver
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
+				btSoftBodyVertexData::VertexDescription desc;
+
+				// TODO: Position in the softbody might be pre-transformed
+				// or we may need to adapt for the pose.
+				//desc.setPosition( cloth.getMeshTransform()*multPoint );
+				desc.setPosition( multPoint );
+
+				float vertexInverseMass = softBody->m_nodes[vertex].m_im;
+				desc.setInverseMass(vertexInverseMass);
+				getVertexData().setVertexAt( desc, firstVertex + vertex );
+
+				m_anchorIndex.push_back(-1.0);
+			}
+
+			// Copy triangles similarly
+			// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
+			for( int triangle = 0; triangle < numTriangles; ++triangle )
+			{
+				// Note that large array storage is relative to the array not to the cloth
+				// So we need to add firstVertex to each value
+				int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
+				int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
+				int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
+				btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
+				getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
+				
+				// Increase vertex triangle counts for this triangle		
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
+			}
+
+			int firstLink = getLinkData().getNumLinks();
+			int numLinks = softBody->m_links.size();
+			int maxLinks = numLinks;
+			
+			// Allocate space for the links
+			getLinkData().createLinks( numLinks );
+
+			// Add the links
+			for( int link = 0; link < numLinks; ++link )
+			{
+				int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
+				int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
+
+				btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
+				newLink.setLinkStrength(1.f);
+				getLinkData().setLinkAt(newLink, firstLink + link);
+			}
+			
+			newSoftBody->setFirstVertex( firstVertex );
+			newSoftBody->setFirstTriangle( firstTriangle );
+			newSoftBody->setNumVertices( numVertices );
+			newSoftBody->setMaxVertices( maxVertices );
+			newSoftBody->setNumTriangles( numTriangles );
+			newSoftBody->setMaxTriangles( maxTriangles );
+			newSoftBody->setFirstLink( firstLink );
+			newSoftBody->setNumLinks( numLinks );
+
+			// Find maximum piterations and viterations
+			int piterations = softBody->m_cfg.piterations;
+
+            if ( piterations > maxPiterations )
+                  maxPiterations = piterations;
+
+            int viterations = softBody->m_cfg.viterations;
+
+			if ( viterations > maxViterations )
+                  maxViterations = viterations;
+
+			// zero mass
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				if ( softBody->m_nodes[vertex].m_im == 0 )
+				{
+					AnchorNodeInfoCL nodeInfo;
+					nodeInfo.clVertexIndex = firstVertex + vertex;
+					nodeInfo.pNode = &softBody->m_nodes[vertex];
+
+					m_anchorNodeInfoArray.push_back(nodeInfo);
+				}
+			}			
+
+			// anchor position
+			if ( numVertices > 0 )
+			{
+				for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
+				{
+					btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
+					btSoftBody::Node* firstNode = &softBody->m_nodes[0];
+
+					AnchorNodeInfoCL nodeInfo;
+					nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
+					nodeInfo.pNode = anchorNode;
+
+					m_anchorNodeInfoArray.push_back(nodeInfo);
+				}
+			}			
+		}
+
+		
+		m_anchorPosition.clear();		
+		m_anchorPosition.resize(m_anchorNodeInfoArray.size());
+
+		for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
+		{
+			const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
+			m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
+			getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
+		}
+		
+		updateConstants(0.f);
+
+		// set position and velocity iterations
+		setNumberOfPositionIterations(maxPiterations);
+		setNumberOfVelocityIterations(maxViterations);
+
+		// set wind velocity
+		m_perClothWindVelocity.resize( m_softBodySet.size() );
+		for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();			
+			m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
+		}
+
+		m_clPerClothWindVelocity.changedOnCPU();
+
+		// generate batches
+		m_linkData.generateBatches();		
+		m_triangleData.generateBatches();
+
+		// Build the shaders to match the batching parameters
+		buildShaders();
+	}
+}
+
+
+btSoftBodyLinkData &btOpenCLSoftBodySolver::getLinkData()
+{
+	// TODO: Consider setting link data to "changed" here
+	return m_linkData;
+}
+
+btSoftBodyVertexData &btOpenCLSoftBodySolver::getVertexData()
+{
+	// TODO: Consider setting vertex data to "changed" here
+	return m_vertexData;
+}
+
+btSoftBodyTriangleData &btOpenCLSoftBodySolver::getTriangleData()
+{
+	// TODO: Consider setting triangle data to "changed" here
+	return m_triangleData;
+}
+
+void btOpenCLSoftBodySolver::resetNormalsAndAreas( int numVertices )
+{
+	cl_int ciErrNum;
+	ciErrNum = clSetKernelArg(m_resetNormalsAndAreasKernel, 0, sizeof(numVertices), (void*)&numVertices); //oclCHECKERROR(ciErrNum, CL_SUCCESS);
+	ciErrNum = clSetKernelArg(m_resetNormalsAndAreasKernel, 1, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexNormal.m_buffer);//oclCHECKERROR(ciErrNum, CL_SUCCESS);
+	ciErrNum = clSetKernelArg(m_resetNormalsAndAreasKernel,  2, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexArea.m_buffer); //oclCHECKERROR(ciErrNum, CL_SUCCESS);
+	size_t numWorkItems = m_defaultWorkGroupSize*((numVertices + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, m_resetNormalsAndAreasKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0 );
+
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 && "enqueueNDRangeKernel(m_resetNormalsAndAreasKernel)" );
+		}
+	}
+
+}
+
+void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices )
+{
+	cl_int ciErrNum;
+
+	ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 0, sizeof(int),(void*) &numVertices);
+	ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 1, sizeof(cl_mem), &m_vertexData.m_clVertexTriangleCount.m_buffer);
+	ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer);
+	ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer);
+	size_t	numWorkItems = m_defaultWorkGroupSize*((numVertices + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, m_normalizeNormalsAndAreasKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 && "enqueueNDRangeKernel(m_normalizeNormalsAndAreasKernel)");
+		}
+	}
+
+}
+
+void btOpenCLSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles )
+{
+	cl_int ciErrNum;
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 0, sizeof(int), (void*) &firstTriangle);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 1, sizeof(int), &numTriangles);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 2, sizeof(cl_mem), &m_triangleData.m_clVertexIndices.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 6, sizeof(cl_mem), &m_triangleData.m_clNormal.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 7, sizeof(cl_mem), &m_triangleData.m_clArea.m_buffer);
+
+	size_t numWorkItems = m_defaultWorkGroupSize*((numTriangles + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, m_updateSoftBodiesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_normalizeNormalsAndAreasKernel)");
+	}
+
+}
+
+void btOpenCLSoftBodySolver::updateSoftBodies()
+{
+	using namespace Vectormath::Aos;
+
+
+	int numVertices = m_vertexData.getNumVertices();
+	int numTriangles = m_triangleData.getNumTriangles();
+
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+	m_triangleData.moveToAccelerator();
+
+	resetNormalsAndAreas( numVertices );
+
+
+	// Go through triangle batches so updates occur correctly
+	for( int batchIndex = 0; batchIndex < m_triangleData.m_batchStartLengths.size(); ++batchIndex )
+	{
+
+		int startTriangle = m_triangleData.m_batchStartLengths[batchIndex].first;
+		int numTriangles = m_triangleData.m_batchStartLengths[batchIndex].second;
+
+		executeUpdateSoftBodies( startTriangle, numTriangles );
+	}
+
+
+	normalizeNormalsAndAreas( numVertices );
+} // updateSoftBodies
+
+
+Vectormath::Aos::Vector3 btOpenCLSoftBodySolver::ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a )
+{
+	return a*Vectormath::Aos::dot(v, a);
+}
+
+void btOpenCLSoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce )
+{
+	float dtInverseMass = solverdt*inverseMass;
+	if( Vectormath::Aos::lengthSqr(force * dtInverseMass) > Vectormath::Aos::lengthSqr(vertexVelocity) )
+	{
+		vertexForce -= ProjectOnAxis( vertexVelocity, normalize( force ) )/dtInverseMass;
+	} else {
+		vertexForce += force;
+	}
+}
+
+void btOpenCLSoftBodySolver::updateFixedVertexPositions()
+{	
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+	m_clAnchorPosition.moveToGPU();
+	m_clAnchorIndex.moveToGPU();
+
+	cl_int ciErrNum ;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,1, sizeof(cl_mem), &m_clAnchorIndex.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,2, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,3, sizeof(cl_mem), &m_clAnchorPosition.m_buffer);
+
+	size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_updateFixedVertexPositionsKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(m_updateFixedVertexPositionsKernel)");
+		}
+	}
+
+}
+
+void btOpenCLSoftBodySolver::applyForces( float solverdt )
+{	
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+	m_clPerClothAcceleration.moveToGPU();
+	m_clPerClothLiftFactor.moveToGPU();
+	m_clPerClothDragFactor.moveToGPU();
+	m_clPerClothMediumDensity.moveToGPU();
+	m_clPerClothWindVelocity.moveToGPU();	
+
+	cl_int ciErrNum ;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 1, sizeof(float), &solverdt);
+	float fl = FLT_EPSILON;
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 2, sizeof(float), &fl);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clClothIdentifier.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 7, sizeof(cl_mem), &m_clPerClothLiftFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 8 ,sizeof(cl_mem), &m_clPerClothDragFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 9, sizeof(cl_mem), &m_clPerClothWindVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel,10, sizeof(cl_mem), &m_clPerClothAcceleration.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel,11, sizeof(cl_mem), &m_clPerClothMediumDensity.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel,12, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel,13, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
+
+	size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_applyForcesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(m_applyForcesKernel)");
+		}
+	}
+
+}
+
+/**
+ * Integrate motion on the solver.
+ */
+void btOpenCLSoftBodySolver::integrate( float solverdt )
+{
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_integrateKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 1, sizeof(float), &solverdt);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
+
+	size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_integrateKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(m_integrateKernel)");
+		}
+	}
+
+}
+
+float btOpenCLSoftBodySolver::computeTriangleArea( 
+	const Vectormath::Aos::Point3 &vertex0,
+	const Vectormath::Aos::Point3 &vertex1,
+	const Vectormath::Aos::Point3 &vertex2 )
+{
+	Vectormath::Aos::Vector3 a = vertex1 - vertex0;
+	Vectormath::Aos::Vector3 b = vertex2 - vertex0;
+	Vectormath::Aos::Vector3 crossProduct = cross(a, b);
+	float area = length( crossProduct );
+	return area;
+}
+
+
+void btOpenCLSoftBodySolver::updateBounds()
+{	
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btVector3 minBound(-1e30,-1e30,-1e30), maxBound(1e30,1e30,1e30);
+		m_softBodySet[softBodyIndex]->updateBounds( minBound, maxBound );
+	}
+
+} // btOpenCLSoftBodySolver::updateBounds
+
+
+void btOpenCLSoftBodySolver::updateConstants( float timeStep )
+{			
+
+	using namespace Vectormath::Aos;
+
+	if( m_updateSolverConstants )
+	{
+		m_updateSolverConstants = false;
+
+		// Will have to redo this if we change the structure (tear, maybe) or various other possible changes
+
+		// Initialise link constants
+		const int numLinks = m_linkData.getNumLinks();
+		for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+		{
+			btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
+			m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
+			float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
+			float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
+			float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
+			float massLSC = (invMass0 + invMass1)/linearStiffness;
+			m_linkData.getMassLSC(linkIndex) = massLSC;
+			float restLength = m_linkData.getRestLength(linkIndex);
+			float restLengthSquared = restLength*restLength;
+			m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
+		}
+	}
+
+}
+
+class QuickSortCompare
+{
+	public:
+
+	bool operator() ( const CollisionShapeDescription& a, const CollisionShapeDescription& b ) const
+	{
+		return ( a.softBodyIdentifier < b.softBodyIdentifier );
+	}
+};
+
+
+/**
+ * Sort the collision object details array and generate indexing into it for the per-cloth collision object array.
+ */
+void btOpenCLSoftBodySolver::prepareCollisionConstraints()
+{
+	// First do a simple sort on the collision objects
+	btAlignedObjectArray<int> numObjectsPerClothPrefixSum;
+	btAlignedObjectArray<int> numObjectsPerCloth;
+	numObjectsPerCloth.resize( m_softBodySet.size(), 0 );
+	numObjectsPerClothPrefixSum.resize( m_softBodySet.size(), 0 );
+
+
+	
+	m_collisionObjectDetails.quickSort( QuickSortCompare() );
+
+	if (!m_perClothCollisionObjects.size())
+		return;
+
+	// Generating indexing for perClothCollisionObjects
+	// First clear the previous values with the "no collision object for cloth" constant
+	for( int clothIndex = 0; clothIndex < m_perClothCollisionObjects.size(); ++clothIndex )
+	{
+		m_perClothCollisionObjects[clothIndex].firstObject = -1;
+		m_perClothCollisionObjects[clothIndex].endObject = -1;
+	}
+	int currentCloth = 0;
+	int startIndex = 0;
+	for( int collisionObject = 0; collisionObject < m_collisionObjectDetails.size(); ++collisionObject )
+	{
+		int nextCloth = m_collisionObjectDetails[collisionObject].softBodyIdentifier;
+		if( nextCloth != currentCloth )
+		{	
+			// Changed cloth in the array
+			// Set the end index and the range is what we need for currentCloth
+			m_perClothCollisionObjects[currentCloth].firstObject = startIndex;
+			m_perClothCollisionObjects[currentCloth].endObject = collisionObject;
+			currentCloth = nextCloth;
+			startIndex = collisionObject;
+		}
+	}
+
+	// And update last cloth	
+	m_perClothCollisionObjects[currentCloth].firstObject = startIndex;
+	m_perClothCollisionObjects[currentCloth].endObject =  m_collisionObjectDetails.size();
+	
+} // btOpenCLSoftBodySolver::prepareCollisionConstraints
+
+
+
+void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
+{
+
+	using Vectormath::Aos::Vector3;
+	using Vectormath::Aos::Point3;
+	using Vectormath::Aos::lengthSqr;
+	using Vectormath::Aos::dot;
+
+	// Prepare links
+	int numLinks = m_linkData.getNumLinks();
+	int numVertices = m_vertexData.getNumVertices();
+
+	float kst = 1.f;
+	float ti = 0.f;
+
+
+	m_clPerClothDampingFactor.moveToGPU();
+	m_clPerClothVelocityCorrectionCoefficient.moveToGPU();
+
+
+	// Ensure data is on accelerator
+	m_linkData.moveToAccelerator();
+	m_vertexData.moveToAccelerator();
+
+	prepareLinks();	
+
+
+
+	for( int iteration = 0; iteration < m_numberOfVelocityIterations ; ++iteration )
+	{
+		for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
+		{
+			int startLink = m_linkData.m_batchStartLengths[i].start;
+			int numLinks = m_linkData.m_batchStartLengths[i].length;
+
+			solveLinksForVelocity( startLink, numLinks, kst );
+		}
+	}
+
+	
+	prepareCollisionConstraints();
+
+	// Compute new positions from velocity
+	// Also update the previous position so that our position computation is now based on the new position from the velocity solution
+	// rather than based directly on the original positions
+	if( m_numberOfVelocityIterations > 0 )
+	{
+		updateVelocitiesFromPositionsWithVelocities( 1.f/solverdt );
+	} else {
+		updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt );
+	}
+
+	// Solve position
+	for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+	{
+		for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
+		{
+			int startLink = m_linkData.m_batchStartLengths[i].start;
+			int numLinks = m_linkData.m_batchStartLengths[i].length;
+
+			solveLinksForPosition( startLink, numLinks, kst, ti );
+		}
+		
+	} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+
+	
+	// At this point assume that the force array is blank - we will overwrite it
+	solveCollisionsAndUpdateVelocities( 1.f/solverdt );
+
+}
+
+
+//////////////////////////////////////
+// Kernel dispatches
+void btOpenCLSoftBodySolver::prepareLinks()
+{
+	cl_int ciErrNum;
+	int numLinks = m_linkData.getNumLinks();
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,0, sizeof(int), &numLinks);
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,1, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer);
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,2, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clLinksLengthRatio.m_buffer);
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clLinksCLength.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((m_linkData.getNumLinks() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_prepareLinksKernel, 1 , NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_prepareLinksKernel)");
+	}
+
+}
+
+void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt )
+{
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,1, sizeof(float), &solverdt);
+	ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,2, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,4, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_updatePositionsFromVelocitiesKernel, 1, NULL, &numWorkItems,&m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_updatePositionsFromVelocitiesKernel)");
+	}
+
+}
+
+void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti )
+{
+	cl_int ciErrNum;
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startLink);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numLinks);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,2, sizeof(float), &kst);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,3, sizeof(float), &ti);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((numLinks + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum!= CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_solvePositionsFromLinksKernel)");
+	}
+
+} // solveLinksForPosition
+
+
+void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, float kst )
+{
+	cl_int ciErrNum;
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 0, sizeof(int), &startLink);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 1, sizeof(int), &numLinks);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 2, sizeof(float), &kst);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 3, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 4, sizeof(cl_mem), &m_linkData.m_clLinksLengthRatio.m_buffer);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 5, sizeof(cl_mem), &m_linkData.m_clLinksCLength.m_buffer);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 7, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((numLinks + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_vSolveLinksKernel,1,NULL,&numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_vSolveLinksKernel)");
+	}
+
+}
+
+void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt )
+{
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel,0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 1, sizeof(float), &isolverdt);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clClothIdentifier.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 5, sizeof(cl_mem), &m_clPerClothVelocityCorrectionCoefficient.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 6, sizeof(cl_mem), &m_clPerClothDampingFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 7, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 8, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_updateVelocitiesFromPositionsWithVelocitiesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_updateVelocitiesFromPositionsWithVelocitiesKernel)");
+	}
+
+
+} // updateVelocitiesFromPositionsWithVelocities
+
+void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt )
+{
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 1, sizeof(float), &isolverdt);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 4, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 6, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 7, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel)");
+	}
+
+} // updateVelocitiesFromPositionsWithoutVelocities
+
+
+
+void btOpenCLSoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt )
+{
+	// Copy kernel parameters to GPU
+	m_vertexData.moveToAccelerator();
+	m_clPerClothFriction.moveToGPU();
+	m_clPerClothDampingFactor.moveToGPU();
+	m_clPerClothCollisionObjects.moveToGPU();
+	m_clCollisionObjectDetails.moveToGPU();
+
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 1, sizeof(int), &isolverdt);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 4, sizeof(cl_mem),&m_clPerClothFriction.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 6, sizeof(cl_mem),&m_clPerClothCollisionObjects.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 7, sizeof(cl_mem),&m_clCollisionObjectDetails.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 8, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 9, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 10, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solveCollisionsAndUpdateVelocitiesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel)");
+		}
+	}
+
+} // btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities
+
+
+
+// End kernel dispatches
+/////////////////////////////////////
+
+
+void btSoftBodySolverOutputCLtoCPU::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
+{
+
+	btSoftBodySolver *solver = softBody->getSoftBodySolver();
+	btAssert( solver->getSolverType() == btSoftBodySolver::CL_SOLVER || solver->getSolverType() == btSoftBodySolver::CL_SIMD_SOLVER );
+	btOpenCLSoftBodySolver *dxSolver = static_cast< btOpenCLSoftBodySolver * >( solver );
+
+	btOpenCLAcceleratedSoftBodyInterface* currentCloth = dxSolver->findSoftBodyInterface( softBody );
+	btSoftBodyVertexDataOpenCL &vertexData( dxSolver->m_vertexData );
+	
+
+	const int firstVertex = currentCloth->getFirstVertex();
+	const int lastVertex = firstVertex + currentCloth->getNumVertices();
+
+	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER )
+	{		
+		const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer);						
+		float *basePointer = cpuVertexBuffer->getBasePointer();						
+
+		vertexData.m_clVertexPosition.copyFromGPU();
+		vertexData.m_clVertexNormal.copyFromGPU();
+
+		if( vertexBuffer->hasVertexPositions() )
+		{
+			const int vertexOffset = cpuVertexBuffer->getVertexOffset();
+			const int vertexStride = cpuVertexBuffer->getVertexStride();
+			float *vertexPointer = basePointer + vertexOffset;
+
+			for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex )
+			{
+				Vectormath::Aos::Point3 position = vertexData.getPosition(vertexIndex);
+				*(vertexPointer + 0) = position.getX();
+				*(vertexPointer + 1) = position.getY();
+				*(vertexPointer + 2) = position.getZ();
+				vertexPointer += vertexStride;
+			}
+		}
+		if( vertexBuffer->hasNormals() )
+		{
+			const int normalOffset = cpuVertexBuffer->getNormalOffset();
+			const int normalStride = cpuVertexBuffer->getNormalStride();
+			float *normalPointer = basePointer + normalOffset;
+
+			for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex )
+			{
+				Vectormath::Aos::Vector3 normal = vertexData.getNormal(vertexIndex);
+				*(normalPointer + 0) = normal.getX();
+				*(normalPointer + 1) = normal.getY();
+				*(normalPointer + 2) = normal.getZ();
+				normalPointer += normalStride;
+			}
+		}
+	}
+
+} // btSoftBodySolverOutputCLtoCPU::outputToVertexBuffers
+
+
+
+cl_kernel CLFunctions::compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros ,const char* orgSrcFileNameForCaching)
+{
+	printf("compiling kernelName: %s ",kernelName);
+	cl_kernel kernel=0;
+	cl_int ciErrNum;
+	size_t program_length = strlen(kernelSource);
+
+	cl_program m_cpProgram = clCreateProgramWithSource(m_cxMainContext, 1, (const char**)&kernelSource, &program_length, &ciErrNum);
+//	oclCHECKERROR(ciErrNum, CL_SUCCESS);
+		
+    // Build the program with 'mad' Optimization option
+
+	
+#ifdef MAC
+	char* flags = "-cl-mad-enable -DMAC -DGUID_ARG";
+#else
+	//const char* flags = "-DGUID_ARG= -fno-alias";
+	const char* flags = "-DGUID_ARG= ";
+#endif
+
+	char* compileFlags = new char[strlen(additionalMacros) + strlen(flags) + 5];
+	sprintf(compileFlags, "%s %s", flags, additionalMacros);
+    ciErrNum = clBuildProgram(m_cpProgram, 0, NULL, compileFlags, NULL, NULL);
+    if (ciErrNum != CL_SUCCESS)
+    {
+		size_t numDevices;
+		clGetProgramInfo( m_cpProgram, CL_PROGRAM_DEVICES, 0, 0, &numDevices );
+		cl_device_id *devices = new cl_device_id[numDevices];
+		clGetProgramInfo( m_cpProgram, CL_PROGRAM_DEVICES, numDevices, devices, &numDevices );
+        for( int i = 0; i < 2; ++i )
+		{
+			char *build_log;
+			size_t ret_val_size;
+			clGetProgramBuildInfo(m_cpProgram, devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
+			build_log = new char[ret_val_size+1];
+			clGetProgramBuildInfo(m_cpProgram, devices[i], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
+    
+			// to be carefully, terminate with \0
+			// there's no information in the reference whether the string is 0 terminated or not
+			build_log[ret_val_size] = '\0';
+        
+
+			printf("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log);
+			delete[] build_log;
+		}
+#ifndef BT_SUPPRESS_OPENCL_ASSERTS
+		btAssert(0);
+#endif //BT_SUPPRESS_OPENCL_ASSERTS
+		m_kernelCompilationFailures++;
+		return 0;
+    }
+	
+	
+    // Create the kernel
+    kernel = clCreateKernel(m_cpProgram, kernelName, &ciErrNum);
+    if (ciErrNum != CL_SUCCESS)
+    {
+		const char* msg = "";
+        switch(ciErrNum)
+        {
+        case CL_INVALID_PROGRAM:
+            msg = "Program is not a valid program object.";
+            break;
+        case CL_INVALID_PROGRAM_EXECUTABLE:
+            msg = "There is no successfully built executable for program.";
+            break;
+        case CL_INVALID_KERNEL_NAME:
+            msg = "kernel_name is not found in program.";
+            break;
+        case CL_INVALID_KERNEL_DEFINITION:
+            msg = "the function definition for __kernel function given by kernel_name such as the number of arguments, the argument types are not the same for all devices for which the program executable has been built.";
+            break;
+        case CL_INVALID_VALUE:
+            msg = "kernel_name is NULL.";
+            break;
+        case CL_OUT_OF_HOST_MEMORY:
+            msg = "Failure to allocate resources required by the OpenCL implementation on the host.";
+            break;
+		default:
+			{
+			}
+        }
+
+        printf("Error in clCreateKernel for kernel '%s', error is \"%s\", Line %u in file %s !!!\n\n", kernelName, msg, __LINE__, __FILE__);
+
+#ifndef BT_SUPPRESS_OPENCL_ASSERTS
+		btAssert(0);
+#endif //BT_SUPPRESS_OPENCL_ASSERTS
+		m_kernelCompilationFailures++;
+		return 0;
+    }
+
+	printf("ready. \n");
+	delete [] compileFlags;
+	if (!kernel)
+		m_kernelCompilationFailures++;
+	return kernel;
+
+}
+
+void btOpenCLSoftBodySolver::predictMotion( float timeStep )
+{
+	// Clear the collision shape array for the next frame
+	// Ensure that the DX11 ones are moved off the device so they will be updated correctly
+	m_clCollisionObjectDetails.changedOnCPU();
+	m_clPerClothCollisionObjects.changedOnCPU();
+	m_collisionObjectDetails.clear();	
+
+	if ( m_bUpdateAnchoredNodePos )
+	{
+		// In OpenCL cloth solver, if softbody node has zero inverse mass(infinite mass) or anchor attached, 
+		// we need to update the node position in case the node or anchor is animated externally.
+		// If there is no such node, we can eliminate the unnecessary CPU-to-GPU data trasferring. 
+		for ( int i = 0; i < m_anchorNodeInfoArray.size(); i++ )
+		{
+			const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[i];
+			btSoftBody::Node* node = anchorNodeInfo.pNode;
+
+			using Vectormath::Aos::Point3;
+			Point3 pos((float)node->m_x.getX(), (float)node->m_x.getY(), (float)node->m_x.getZ());				
+			m_anchorPosition[i] = pos;
+		}
+
+		if ( m_anchorNodeInfoArray.size() > 0 )
+			m_clAnchorPosition.changedOnCPU();
+
+		updateFixedVertexPositions();
+	}
+		
+	{
+		BT_PROFILE("applyForces");
+		// Apply forces that we know about to the cloths
+		applyForces(  timeStep * getTimeScale() );
+	}
+
+	{
+		BT_PROFILE("integrate");
+		// Itegrate motion for all soft bodies dealt with by the solver
+		integrate( timeStep * getTimeScale() );
+	}
+
+	{
+		BT_PROFILE("updateBounds");
+		updateBounds();
+	}
+	// End prediction work for solvers
+}
+
+static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
+{
+	Vectormath::Aos::Transform3 outTransform;
+	outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
+	outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
+	outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
+	outTransform.setCol(3, toVector3(transform.getOrigin()));
+	return outTransform;	
+}
+
+void btOpenCLAcceleratedSoftBodyInterface::updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound )
+{
+	float scalarMargin = (float)getSoftBody()->getCollisionShape()->getMargin();
+	btVector3 vectorMargin( scalarMargin, scalarMargin, scalarMargin );
+	m_softBody->m_bounds[0] = lowerBound - vectorMargin;
+	m_softBody->m_bounds[1] = upperBound + vectorMargin;
+}  // btOpenCLSoftBodySolver::btDX11AcceleratedSoftBodyInterface::updateBounds
+
+void btOpenCLSoftBodySolver::processCollision( btSoftBody*, btSoftBody* )
+{
+
+}
+
+// Add the collision object to the set to deal with for a particular soft body
+void btOpenCLSoftBodySolver::processCollision( btSoftBody *softBody, btCollisionObject* collisionObject )
+{
+ 	int softBodyIndex = findSoftBodyIndex( softBody );
+
+	if( softBodyIndex >= 0 )
+	{
+		btCollisionShape *collisionShape = collisionObject->getCollisionShape();
+		float friction = collisionObject->getFriction();
+		int shapeType = collisionShape->getShapeType();
+		if( shapeType == CAPSULE_SHAPE_PROXYTYPE )
+		{
+			// Add to the list of expected collision objects
+			CollisionShapeDescription newCollisionShapeDescription;
+			newCollisionShapeDescription.softBodyIdentifier = softBodyIndex;
+			newCollisionShapeDescription.collisionShapeType = shapeType;
+			// TODO: May need to transpose this matrix either here or in HLSL
+			newCollisionShapeDescription.shapeTransform = toTransform3(collisionObject->getWorldTransform());
+			btCapsuleShape *capsule = static_cast<btCapsuleShape*>( collisionShape );
+			newCollisionShapeDescription.radius = capsule->getRadius();
+			newCollisionShapeDescription.halfHeight = capsule->getHalfHeight();
+			newCollisionShapeDescription.margin = capsule->getMargin();
+			newCollisionShapeDescription.upAxis = capsule->getUpAxis();
+			newCollisionShapeDescription.friction = friction;
+			btRigidBody* body = static_cast< btRigidBody* >( collisionObject );
+			newCollisionShapeDescription.linearVelocity = toVector3(body->getLinearVelocity());
+			newCollisionShapeDescription.angularVelocity = toVector3(body->getAngularVelocity());
+			m_collisionObjectDetails.push_back( newCollisionShapeDescription );
+
+		} 		
+		else {
+#ifdef _DEBUG
+			printf("Unsupported collision shape type\n");
+#endif
+			//btAssert(0 && "Unsupported collision shape type\n");
+		}
+	} else {
+		btAssert(0 && "Unknown soft body");
+	}
+} // btOpenCLSoftBodySolver::processCollision
+
+
+
+
+
+btOpenCLAcceleratedSoftBodyInterface* btOpenCLSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody )
+{
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btOpenCLAcceleratedSoftBodyInterface* softBodyInterface = m_softBodySet[softBodyIndex];
+		if( softBodyInterface->getSoftBody() == softBody )
+			return softBodyInterface;
+	}
+	return 0;
+}
+
+
+int btOpenCLSoftBodySolver::findSoftBodyIndex( const btSoftBody* const softBody )
+{
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btOpenCLAcceleratedSoftBodyInterface* softBodyInterface = m_softBodySet[softBodyIndex];
+		if( softBodyInterface->getSoftBody() == softBody )
+			return softBodyIndex;
+	}
+	return 1;
+}
+
+bool btOpenCLSoftBodySolver::checkInitialized()
+{
+	if( !m_shadersInitialized )
+		if( buildShaders() )
+			m_shadersInitialized = true;
+
+	return m_shadersInitialized;
+}
+
+bool btOpenCLSoftBodySolver::buildShaders()
+{
+	if( m_shadersInitialized )
+		return true;
+
+	const char* additionalMacros="";
+
+	// Ensure current kernels are released first
+	releaseKernels();
+	
+	m_currentCLFunctions->clearKernelCompilationFailures();
+
+	m_prepareLinksKernel = m_currentCLFunctions->compileCLKernelFromString( PrepareLinksCLString, "PrepareLinksKernel",additionalMacros,"OpenCLC10/PrepareLinks.cl" );
+	m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
+	m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel",additionalMacros,"OpenCLC10/SolvePositions.cl" );
+	m_vSolveLinksKernel = m_currentCLFunctions->compileCLKernelFromString( VSolveLinksCLString, "VSolveLinksKernel" ,additionalMacros,"OpenCLC10/VSolveLinks.cl");
+	m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdateNodes.cl");
+	m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdatePositions.cl");
+	m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel" ,additionalMacros,"OpenCLC10/SolveCollisionsAndUpdateVelocities.cl");
+	m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel" ,additionalMacros,"OpenCLC10/Integrate.cl");
+	m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel" ,additionalMacros,"OpenCLC10/ApplyForces.cl");
+	m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" , additionalMacros, "OpenCLC10/UpdateFixedVertexPositions.cl");
+
+	// TODO: Rename to UpdateSoftBodies
+	m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
+	m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
+	m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
+
+
+	if( m_currentCLFunctions->getKernelCompilationFailures()==0 )
+		m_shadersInitialized = true;
+
+	return m_shadersInitialized;
+}
+
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h
new file mode 100644
index 00000000..cc8db089
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h
@@ -0,0 +1,527 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_OPENCL_H
+
+#include "stddef.h" //for size_t
+#include "vectormath/vmInclude.h"
+
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "BulletSoftBody/btSoftBody.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+#include "btSoftBodySolverLinkData_OpenCL.h"
+#include "btSoftBodySolverVertexData_OpenCL.h"
+#include "btSoftBodySolverTriangleData_OpenCL.h"
+
+class CLFunctions
+{
+protected:
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_cxMainContext;
+
+	int	m_kernelCompilationFailures;
+
+
+public:
+	CLFunctions(cl_command_queue cqCommandQue, cl_context cxMainContext) :
+		m_cqCommandQue( cqCommandQue ),
+		m_cxMainContext( cxMainContext ),
+		m_kernelCompilationFailures(0)
+	{
+	}
+
+	int getKernelCompilationFailures() const
+	{
+		return m_kernelCompilationFailures;
+	}
+
+	/**
+	 * Compile a compute shader kernel from a string and return the appropriate cl_kernel object.
+	 */	
+	virtual cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros, const char* srcFileNameForCaching);
+
+	void	clearKernelCompilationFailures()
+	{
+		m_kernelCompilationFailures=0;
+	}
+};
+
+/**
+ * Entry in the collision shape array.
+ * Specifies the shape type, the transform matrix and the necessary details of the collisionShape.
+ */
+struct CollisionShapeDescription
+{
+	Vectormath::Aos::Transform3 shapeTransform;
+	Vectormath::Aos::Vector3 linearVelocity;
+	Vectormath::Aos::Vector3 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+
+	// Both needed for capsule
+	float radius;
+	float halfHeight;
+	int upAxis;
+	
+	float margin;
+	float friction;
+
+	CollisionShapeDescription()
+	{
+		collisionShapeType = 0;
+		margin = 0;
+		friction = 0;
+	}
+};
+
+/**
+	 * SoftBody class to maintain information about a soft body instance
+	 * within a solver.
+	 * This data addresses the main solver arrays.
+	 */
+class btOpenCLAcceleratedSoftBodyInterface
+{
+protected:
+	/** Current number of vertices that are part of this cloth */
+	int m_numVertices;
+	/** Maximum number of vertices allocated to be part of this cloth */
+	int m_maxVertices;
+	/** Current number of triangles that are part of this cloth */
+	int m_numTriangles;
+	/** Maximum number of triangles allocated to be part of this cloth */
+	int m_maxTriangles;
+	/** Index of first vertex in the world allocated to this cloth */
+	int m_firstVertex;
+	/** Index of first triangle in the world allocated to this cloth */
+	int m_firstTriangle;
+	/** Index of first link in the world allocated to this cloth */
+	int m_firstLink;
+	/** Maximum number of links allocated to this cloth */
+	int m_maxLinks;
+	/** Current number of links allocated to this cloth */
+	int m_numLinks;
+
+	/** The actual soft body this data represents */
+	btSoftBody *m_softBody;
+
+
+public:
+	btOpenCLAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
+	  m_softBody( softBody )
+	{
+		m_numVertices = 0;
+		m_maxVertices = 0;
+		m_numTriangles = 0;
+		m_maxTriangles = 0;
+		m_firstVertex = 0;
+		m_firstTriangle = 0;
+		m_firstLink = 0;
+		m_maxLinks = 0;
+		m_numLinks = 0;
+	}
+	int getNumVertices()
+	{
+		return m_numVertices;
+	}
+
+	int getNumTriangles()
+	{
+		return m_numTriangles;
+	}
+
+	int getMaxVertices()
+	{
+		return m_maxVertices;
+	}
+
+	int getMaxTriangles()
+	{
+		return m_maxTriangles;
+	}
+
+	int getFirstVertex()
+	{
+		return m_firstVertex;
+	}
+
+	int getFirstTriangle()
+	{
+		return m_firstTriangle;
+	}
+	
+	/**
+	 * Update the bounds in the btSoftBody object
+	 */
+	void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound );
+
+	// TODO: All of these set functions will have to do checks and
+	// update the world because restructuring of the arrays will be necessary
+	// Reasonable use of "friend"?
+	void setNumVertices( int numVertices )
+	{
+		m_numVertices = numVertices;
+	}	
+
+	void setNumTriangles( int numTriangles )
+	{
+		m_numTriangles = numTriangles;
+	}
+
+	void setMaxVertices( int maxVertices )
+	{
+		m_maxVertices = maxVertices;
+	}
+
+	void setMaxTriangles( int maxTriangles )
+	{
+		m_maxTriangles = maxTriangles;
+	}
+
+	void setFirstVertex( int firstVertex )
+	{
+		m_firstVertex = firstVertex;
+	}
+
+	void setFirstTriangle( int firstTriangle )
+	{
+		m_firstTriangle = firstTriangle;
+	}
+
+	void setMaxLinks( int maxLinks )
+	{
+		m_maxLinks = maxLinks;
+	}
+
+	void setNumLinks( int numLinks )
+	{
+		m_numLinks = numLinks;
+	}
+
+	void setFirstLink( int firstLink )
+	{
+		m_firstLink = firstLink;
+	}
+
+	int getMaxLinks()
+	{
+		return m_maxLinks;
+	}
+
+	int getNumLinks()
+	{
+		return m_numLinks;
+	}
+
+	int getFirstLink()
+	{
+		return m_firstLink;
+	}
+
+	btSoftBody* getSoftBody()
+	{
+		return m_softBody;
+	}
+
+};
+
+
+
+class btOpenCLSoftBodySolver : public btSoftBodySolver
+{
+public:
+	
+
+	struct UIntVector3
+	{
+		UIntVector3()
+		{
+			x = 0;
+			y = 0;
+			z = 0;
+			_padding = 0;
+		}
+		
+		UIntVector3( unsigned int x_, unsigned int y_, unsigned int z_ )
+		{
+			x = x_;
+			y = y_;
+			z = z_;
+			_padding = 0;
+		}
+			
+		unsigned int x;
+		unsigned int y;
+		unsigned int z;
+		unsigned int _padding;
+	};
+
+	struct CollisionObjectIndices
+	{
+		CollisionObjectIndices( int f, int e )
+		{
+			firstObject = f;
+			endObject = e;
+		}
+
+		int firstObject;
+		int endObject;
+	};
+
+	btSoftBodyLinkDataOpenCL m_linkData;
+	btSoftBodyVertexDataOpenCL m_vertexData;
+	btSoftBodyTriangleDataOpenCL m_triangleData;
+
+protected:
+
+	CLFunctions m_defaultCLFunctions;
+	CLFunctions* m_currentCLFunctions;
+
+	/** Variable to define whether we need to update solver constants on the next iteration */
+	bool m_updateSolverConstants;
+
+	bool m_shadersInitialized;
+
+	/** 
+	 * Cloths owned by this solver.
+	 * Only our cloths are in this array.
+	 */
+	btAlignedObjectArray< btOpenCLAcceleratedSoftBodyInterface * > m_softBodySet;
+
+	/** Acceleration value to be applied to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothAcceleration;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			m_clPerClothAcceleration;
+
+	/** Wind velocity to be applied normal to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothWindVelocity;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			m_clPerClothWindVelocity;
+
+	/** Velocity damping factor */
+	btAlignedObjectArray< float >						m_perClothDampingFactor;
+	btOpenCLBuffer<float>								m_clPerClothDampingFactor;
+
+	/** Velocity correction coefficient */
+	btAlignedObjectArray< float >						m_perClothVelocityCorrectionCoefficient;
+	btOpenCLBuffer<float>								m_clPerClothVelocityCorrectionCoefficient;
+
+	/** Lift parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothLiftFactor;
+	btOpenCLBuffer<float>								m_clPerClothLiftFactor;
+	
+	/** Drag parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothDragFactor;
+	btOpenCLBuffer<float>								m_clPerClothDragFactor;
+
+	/** Density of the medium in which each cloth sits */
+	btAlignedObjectArray< float >						m_perClothMediumDensity;
+	btOpenCLBuffer<float>								m_clPerClothMediumDensity;
+
+	/** 
+	 * Collision shape details: pair of index of first collision shape for the cloth and number of collision objects.
+	 */
+	btAlignedObjectArray< CollisionObjectIndices >		m_perClothCollisionObjects;
+	btOpenCLBuffer<CollisionObjectIndices>				m_clPerClothCollisionObjects;
+
+	/** 
+	 * Collision shapes being passed across to the cloths in this solver.
+	 */
+	btAlignedObjectArray< CollisionShapeDescription >	m_collisionObjectDetails;
+	btOpenCLBuffer< CollisionShapeDescription >			m_clCollisionObjectDetails;
+
+
+	
+	/** 
+	 * Friction coefficient for each cloth
+	 */
+	btAlignedObjectArray< float >	m_perClothFriction;
+	btOpenCLBuffer< float >			m_clPerClothFriction;
+
+	// anchor node info
+	struct AnchorNodeInfoCL
+	{
+		int clVertexIndex;
+		btSoftBody::Node* pNode;
+	};
+
+	btAlignedObjectArray<AnchorNodeInfoCL> m_anchorNodeInfoArray;
+	btAlignedObjectArray<Vectormath::Aos::Point3> m_anchorPosition;
+	btOpenCLBuffer<Vectormath::Aos::Point3>		  m_clAnchorPosition;
+	btAlignedObjectArray<int> m_anchorIndex;
+	btOpenCLBuffer<int>		  m_clAnchorIndex;
+
+	bool m_bUpdateAnchoredNodePos;
+
+	cl_kernel		m_prepareLinksKernel;
+	cl_kernel		m_solvePositionsFromLinksKernel;
+	cl_kernel		m_updateConstantsKernel;
+	cl_kernel		m_integrateKernel;
+	cl_kernel		m_addVelocityKernel;
+	cl_kernel		m_updatePositionsFromVelocitiesKernel;
+	cl_kernel		m_updateVelocitiesFromPositionsWithoutVelocitiesKernel;
+	cl_kernel		m_updateVelocitiesFromPositionsWithVelocitiesKernel;
+	cl_kernel		m_vSolveLinksKernel;
+	cl_kernel		m_solveCollisionsAndUpdateVelocitiesKernel;
+	cl_kernel		m_resetNormalsAndAreasKernel;
+	cl_kernel		m_normalizeNormalsAndAreasKernel;
+	cl_kernel		m_updateSoftBodiesKernel;
+
+	cl_kernel		m_outputToVertexArrayKernel;
+	cl_kernel		m_applyForcesKernel;
+	cl_kernel       m_updateFixedVertexPositionsKernel;	
+
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_cxMainContext;
+	
+	size_t				m_defaultWorkGroupSize;
+
+
+	virtual bool buildShaders();
+
+	void resetNormalsAndAreas( int numVertices );
+
+	void normalizeNormalsAndAreas( int numVertices );
+
+	void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
+
+	void prepareCollisionConstraints();
+	
+	Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
+
+	void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
+	
+
+	int findSoftBodyIndex( const btSoftBody* const softBody );
+
+	virtual void applyForces( float solverdt );
+
+	void updateFixedVertexPositions();
+
+	/**
+	 * Integrate motion on the solver.
+	 */
+	virtual void integrate( float solverdt );
+
+	virtual void updateConstants( float timeStep );
+
+	float computeTriangleArea( 
+		const Vectormath::Aos::Point3 &vertex0,
+		const Vectormath::Aos::Point3 &vertex1,
+		const Vectormath::Aos::Point3 &vertex2 );
+
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	void prepareLinks();
+
+	void solveLinksForVelocity( int startLink, int numLinks, float kst );
+
+	void updatePositionsFromVelocities( float solverdt );
+
+	virtual void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+	
+	void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
+
+	void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
+	virtual void solveCollisionsAndUpdateVelocities( float isolverdt );
+
+	// End kernel dispatches
+	/////////////////////////////////////
+	
+	void updateBounds();
+
+	void releaseKernels();
+
+public:
+	btOpenCLSoftBodySolver(cl_command_queue queue,cl_context	ctx, bool bUpdateAchchoredNodePos = false);
+
+	virtual ~btOpenCLSoftBodySolver();
+
+
+	
+	btOpenCLAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+	virtual btSoftBodyVertexData &getVertexData();
+
+	virtual btSoftBodyTriangleData &getTriangleData();
+
+	virtual SolverTypes getSolverType() const
+	{
+		return CL_SOLVER;
+	}
+
+
+	virtual bool checkInitialized();
+
+	virtual void updateSoftBodies( );
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void copyBackToSoftBodies(bool bMove = true);
+
+	virtual void solveConstraints( float solverdt );
+
+	virtual void predictMotion( float solverdt );
+
+	virtual void processCollision( btSoftBody *, btCollisionObject* );
+
+	virtual void processCollision( btSoftBody*, btSoftBody* );
+
+	virtual void	setDefaultWorkgroupSize(size_t workGroupSize)
+	{
+		m_defaultWorkGroupSize = workGroupSize;
+	}
+	virtual size_t	getDefaultWorkGroupSize() const
+	{
+		return m_defaultWorkGroupSize;
+	}
+
+	void	setCLFunctions(CLFunctions* funcs)
+	{
+		if (funcs)
+			m_currentCLFunctions = funcs;
+		else
+			m_currentCLFunctions  = &m_defaultCLFunctions;
+	}
+
+}; // btOpenCLSoftBodySolver
+
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the CL to CPU version.
+ */
+class btSoftBodySolverOutputCLtoCPU : public btSoftBodySolverOutput
+{
+protected:
+
+public:
+	btSoftBodySolverOutputCLtoCPU()
+	{
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp
new file mode 100644
index 00000000..f97af57f
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp
@@ -0,0 +1,1101 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "vectormath/vmInclude.h"
+#include <stdio.h> //@todo: remove the debugging printf at some stage
+#include "btSoftBodySolver_OpenCLSIMDAware.h"
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+#include "BulletSoftBody/btSoftBody.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include <limits.h>
+
+#define WAVEFRONT_SIZE 32
+#define WAVEFRONT_BLOCK_MULTIPLIER 2
+#define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
+#define LINKS_PER_SIMD_LANE 16
+
+static const size_t workGroupSize = GROUP_SIZE;
+
+
+//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
+
+////OpenCL 1.0 kernels don't use float3
+#define MSTRINGIFY(A) #A
+static const char* UpdatePositionsFromVelocitiesCLString = 
+#include "OpenCLC10/UpdatePositionsFromVelocities.cl"
+static const char* SolvePositionsCLString = 
+#include "OpenCLC10/SolvePositionsSIMDBatched.cl"
+static const char* UpdateNodesCLString = 
+#include "OpenCLC10/UpdateNodes.cl"
+static const char* UpdatePositionsCLString = 
+#include "OpenCLC10/UpdatePositions.cl"
+static const char* UpdateConstantsCLString = 
+#include "OpenCLC10/UpdateConstants.cl"
+static const char* IntegrateCLString = 
+#include "OpenCLC10/Integrate.cl"
+static const char* ApplyForcesCLString = 
+#include "OpenCLC10/ApplyForces.cl"
+static const char* UpdateFixedVertexPositionsCLString = 
+#include "OpenCLC10/UpdateFixedVertexPositions.cl"
+static const char* UpdateNormalsCLString = 
+#include "OpenCLC10/UpdateNormals.cl"
+static const char* VSolveLinksCLString = 
+#include "OpenCLC10/VSolveLinks.cl"
+static const char* SolveCollisionsAndUpdateVelocitiesCLString =
+#include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
+static const char* OutputToVertexArrayCLString =
+#include "OpenCLC10/OutputToVertexArray.cl"
+
+
+
+btSoftBodyLinkDataOpenCLSIMDAware::btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue,  cl_context ctx) :
+	m_cqCommandQue(queue),
+	m_wavefrontSize( WAVEFRONT_SIZE ),
+	m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
+	m_maxBatchesWithinWave( 0 ),
+	m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
+	m_numWavefronts( 0 ),
+	m_maxVertex( 0 ),
+	m_clNumBatchesAndVerticesWithinWaves( queue, ctx, &m_numBatchesAndVerticesWithinWaves, true ),
+	m_clWavefrontVerticesGlobalAddresses( queue, ctx, &m_wavefrontVerticesGlobalAddresses, true ),
+	m_clLinkVerticesLocalAddresses( queue, ctx, &m_linkVerticesLocalAddresses, true ),
+	m_clLinkStrength( queue, ctx, &m_linkStrength, false ),
+	m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ),
+	m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ),
+	m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ),
+	m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false )
+{
+}
+
+btSoftBodyLinkDataOpenCLSIMDAware::~btSoftBodyLinkDataOpenCLSIMDAware()
+{
+}
+
+static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
+{
+	Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
+	return outVec;
+}
+
+/** Allocate enough space in all link-related arrays to fit numLinks links */
+void btSoftBodyLinkDataOpenCLSIMDAware::createLinks( int numLinks )
+{
+	int previousSize = m_links.size();
+	int newSize = previousSize + numLinks;
+
+	btSoftBodyLinkData::createLinks( numLinks );
+
+	// Resize the link addresses array as well
+	m_linkAddresses.resize( newSize );
+}
+
+/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+void btSoftBodyLinkDataOpenCLSIMDAware::setLinkAt( 
+	const LinkDescription &link, 
+	int linkIndex )
+{
+	btSoftBodyLinkData::setLinkAt( link, linkIndex );
+
+	if( link.getVertex0() > m_maxVertex )
+		m_maxVertex = link.getVertex0();
+	if( link.getVertex1() > m_maxVertex )
+		m_maxVertex = link.getVertex1();
+
+	// Set the link index correctly for initialisation
+	m_linkAddresses[linkIndex] = linkIndex;
+}
+
+bool btSoftBodyLinkDataOpenCLSIMDAware::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyLinkDataOpenCLSIMDAware::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
+	success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
+	success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
+	success = success && m_clLinkStrength.moveToGPU();
+	success = success && m_clLinksMassLSC.moveToGPU();
+	success = success && m_clLinksRestLengthSquared.moveToGPU();
+	success = success && m_clLinksRestLength.moveToGPU();
+	success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveToGPU();
+
+	if( success ) {
+		m_onGPU = true;
+	}
+
+	return success;
+}
+
+bool btSoftBodyLinkDataOpenCLSIMDAware::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
+	success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
+	success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
+	success = success && m_clLinkStrength.moveFromGPU();
+	success = success && m_clLinksMassLSC.moveFromGPU();
+	success = success && m_clLinksRestLengthSquared.moveFromGPU();
+	success = success && m_clLinksRestLength.moveFromGPU();
+	success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveFromGPU();
+
+	if( success ) {
+		m_onGPU = false;
+	}
+
+	return success;
+}
+
+
+
+
+
+
+
+
+btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
+	btOpenCLSoftBodySolver( queue, ctx, bUpdateAchchoredNodePos ),
+	m_linkData(queue, ctx)
+{
+	// Initial we will clearly need to update solver constants
+	// For now this is global for the cloths linked with this solver - we should probably make this body specific 
+	// for performance in future once we understand more clearly when constants need to be updated
+	m_updateSolverConstants = true;
+
+	m_shadersInitialized = false;
+}
+
+btOpenCLSoftBodySolverSIMDAware::~btOpenCLSoftBodySolverSIMDAware()
+{
+	releaseKernels();
+}
+
+void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate)
+{
+	if( forceUpdate || m_softBodySet.size() != softBodies.size() )
+	{
+		// Have a change in the soft body set so update, reloading all the data
+		getVertexData().clear();
+		getTriangleData().clear();
+		getLinkData().clear();
+		m_softBodySet.resize(0);
+		m_anchorIndex.clear();
+
+		int maxPiterations = 0;
+		int maxViterations = 0;
+
+		for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = softBodies[ softBodyIndex ];
+			using Vectormath::Aos::Matrix3;
+			using Vectormath::Aos::Point3;
+
+			// Create SoftBody that will store the information within the solver
+			btOpenCLAcceleratedSoftBodyInterface* newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody );
+			m_softBodySet.push_back( newSoftBody );
+
+			m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
+			m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
+			m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
+			m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
+			m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
+			m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
+			// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
+			m_perClothFriction.push_back(softBody->m_cfg.kDF);
+			m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
+
+			// Add space for new vertices and triangles in the default solver for now
+			// TODO: Include space here for tearing too later
+			int firstVertex = getVertexData().getNumVertices();
+			int numVertices = softBody->m_nodes.size();
+			// Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
+			// maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
+			int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
+			// Allocate space for new vertices in all the vertex arrays
+			getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
+
+
+			int firstTriangle = getTriangleData().getNumTriangles();
+			int numTriangles = softBody->m_faces.size();
+			int maxTriangles = numTriangles;
+			getTriangleData().createTriangles( maxTriangles );
+
+			// Copy vertices from softbody into the solver
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
+				btSoftBodyVertexData::VertexDescription desc;
+
+				// TODO: Position in the softbody might be pre-transformed
+				// or we may need to adapt for the pose.
+				//desc.setPosition( cloth.getMeshTransform()*multPoint );
+				desc.setPosition( multPoint );
+
+				float vertexInverseMass = softBody->m_nodes[vertex].m_im;
+				desc.setInverseMass(vertexInverseMass);
+				getVertexData().setVertexAt( desc, firstVertex + vertex );
+
+				m_anchorIndex.push_back(-1.0);
+			}
+			for( int vertex = numVertices; vertex < maxVertices; ++vertex )
+			{
+				m_anchorIndex.push_back(-1.0);
+			}
+
+			// Copy triangles similarly
+			// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
+			for( int triangle = 0; triangle < numTriangles; ++triangle )
+			{
+				// Note that large array storage is relative to the array not to the cloth
+				// So we need to add firstVertex to each value
+				int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
+				int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
+				int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
+				btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
+				getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
+				
+				// Increase vertex triangle counts for this triangle		
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
+			}
+
+			int firstLink = getLinkData().getNumLinks();
+			int numLinks = softBody->m_links.size();
+			int maxLinks = numLinks;
+			
+			// Allocate space for the links
+			getLinkData().createLinks( numLinks );
+
+			// Add the links
+			for( int link = 0; link < numLinks; ++link )
+			{
+				int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
+				int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
+
+				btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
+				newLink.setLinkStrength(1.f);
+				getLinkData().setLinkAt(newLink, firstLink + link);
+			}
+			
+			newSoftBody->setFirstVertex( firstVertex );
+			newSoftBody->setFirstTriangle( firstTriangle );
+			newSoftBody->setNumVertices( numVertices );
+			newSoftBody->setMaxVertices( maxVertices );
+			newSoftBody->setNumTriangles( numTriangles );
+			newSoftBody->setMaxTriangles( maxTriangles );
+			newSoftBody->setFirstLink( firstLink );
+			newSoftBody->setNumLinks( numLinks );
+
+			// Find maximum piterations and viterations
+			int piterations = softBody->m_cfg.piterations;
+
+            if ( piterations > maxPiterations )
+                  maxPiterations = piterations;
+
+            int viterations = softBody->m_cfg.viterations;
+
+			if ( viterations > maxViterations )
+                  maxViterations = viterations;
+
+			// zero mass
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				if ( softBody->m_nodes[vertex].m_im == 0 )
+				{
+					AnchorNodeInfoCL nodeInfo;
+					nodeInfo.clVertexIndex = firstVertex + vertex;
+					nodeInfo.pNode = &softBody->m_nodes[vertex];
+
+					m_anchorNodeInfoArray.push_back(nodeInfo);
+				}
+			}			
+
+			// anchor position
+			if ( numVertices > 0 )
+			{
+				for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
+				{
+					btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
+					btSoftBody::Node* firstNode = &softBody->m_nodes[0];
+
+					AnchorNodeInfoCL nodeInfo;
+					nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
+					nodeInfo.pNode = anchorNode;
+
+					m_anchorNodeInfoArray.push_back(nodeInfo);
+				}
+			}			
+		}
+
+		m_anchorPosition.clear();		
+		m_anchorPosition.resize(m_anchorNodeInfoArray.size());
+
+		for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
+		{
+			const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
+			m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
+			getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
+		}
+		
+		updateConstants(0.f);
+
+		// set position and velocity iterations
+		setNumberOfPositionIterations(maxPiterations);
+		setNumberOfVelocityIterations(maxViterations);
+
+		// set wind velocity
+		m_perClothWindVelocity.resize( m_softBodySet.size() );
+		for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();			
+			m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
+		}
+
+		m_clPerClothWindVelocity.changedOnCPU();
+
+		// generate batches
+		m_linkData.generateBatches();		
+		m_triangleData.generateBatches();
+
+		// Build the shaders to match the batching parameters
+		buildShaders();
+	}
+}
+
+
+btSoftBodyLinkData &btOpenCLSoftBodySolverSIMDAware::getLinkData()
+{
+	// TODO: Consider setting link data to "changed" here
+	return m_linkData;
+}
+
+
+
+
+void btOpenCLSoftBodySolverSIMDAware::updateConstants( float timeStep )
+{			
+
+	using namespace Vectormath::Aos;
+
+	if( m_updateSolverConstants )
+	{
+		m_updateSolverConstants = false;
+
+		// Will have to redo this if we change the structure (tear, maybe) or various other possible changes
+
+		// Initialise link constants
+		const int numLinks = m_linkData.getNumLinks();
+		for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+		{
+			btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
+			m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
+			float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
+			float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
+			float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
+			float massLSC = (invMass0 + invMass1)/linearStiffness;
+			m_linkData.getMassLSC(linkIndex) = massLSC;
+			float restLength = m_linkData.getRestLength(linkIndex);
+			float restLengthSquared = restLength*restLength;
+			m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
+		}
+	}
+
+}
+
+
+
+void btOpenCLSoftBodySolverSIMDAware::solveConstraints( float solverdt )
+{
+
+	using Vectormath::Aos::Vector3;
+	using Vectormath::Aos::Point3;
+	using Vectormath::Aos::lengthSqr;
+	using Vectormath::Aos::dot;
+
+	// Prepare links
+	int numLinks = m_linkData.getNumLinks();
+	int numVertices = m_vertexData.getNumVertices();
+
+	float kst = 1.f;
+	float ti = 0.f;
+
+
+	m_clPerClothDampingFactor.moveToGPU();
+	m_clPerClothVelocityCorrectionCoefficient.moveToGPU();
+
+
+	// Ensure data is on accelerator
+	m_linkData.moveToAccelerator();
+	m_vertexData.moveToAccelerator();
+
+	
+	//prepareLinks();	
+
+	prepareCollisionConstraints();
+
+	// Solve drift
+	for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+	{
+
+		for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
+		{
+			int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
+			int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
+			solveLinksForPosition( startWave, numWaves, kst, ti );
+		}
+	} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+
+	
+	// At this point assume that the force array is blank - we will overwrite it
+	solveCollisionsAndUpdateVelocities( 1.f/solverdt );
+}
+
+
+//////////////////////////////////////
+// Kernel dispatches
+
+
+void btOpenCLSoftBodySolverSIMDAware::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
+{
+	cl_int ciErrNum;
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startWave);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numWaves);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,2, sizeof(float), &kst);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,3, sizeof(float), &ti);
+	
+	
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clNumBatchesAndVerticesWithinWaves.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clWavefrontVerticesGlobalAddresses.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinkVerticesLocalAddresses.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
+
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,9, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,10, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,11, WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_int2), 0);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,12, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float4), 0);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,13, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float), 0);
+
+	size_t	numWorkItems = workGroupSize*((numWaves*WAVEFRONT_SIZE + (workGroupSize-1)) / workGroupSize);
+	
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&workGroupSize,0,0,0);
+	
+	if( ciErrNum!= CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_solvePositionsFromLinksKernel)");
+	}
+
+} // solveLinksForPosition
+
+void btOpenCLSoftBodySolverSIMDAware::solveCollisionsAndUpdateVelocities( float isolverdt )
+{
+	// Copy kernel parameters to GPU
+	m_vertexData.moveToAccelerator();
+	m_clPerClothFriction.moveToGPU();
+	m_clPerClothDampingFactor.moveToGPU();
+	m_clPerClothCollisionObjects.moveToGPU();
+	m_clCollisionObjectDetails.moveToGPU();
+	
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 1, sizeof(int), &isolverdt);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 4, sizeof(cl_mem),&m_clPerClothFriction.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 6, sizeof(cl_mem),&m_clPerClothCollisionObjects.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 7, sizeof(cl_mem),&m_clCollisionObjectDetails.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 8, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 9, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 10, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 11, sizeof(CollisionShapeDescription)*16,0);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 12, sizeof(cl_mem),&m_vertexData.m_clVertexInverseMass.m_buffer);
+	size_t	numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
+	
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solveCollisionsAndUpdateVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);
+		
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(m_solveCollisionsAndUpdateVelocitiesKernel)");
+		}
+	}
+
+} // btOpenCLSoftBodySolverSIMDAware::updateVelocitiesFromPositionsWithoutVelocities
+
+// End kernel dispatches
+/////////////////////////////////////
+
+
+
+bool btOpenCLSoftBodySolverSIMDAware::buildShaders()
+{
+	releaseKernels();
+
+	if( m_shadersInitialized )
+		return true;
+
+	const char* additionalMacros="";
+
+	m_currentCLFunctions->clearKernelCompilationFailures();
+
+	char *wavefrontMacros = new char[256];
+
+	sprintf(
+		wavefrontMacros, 
+		"-DMAX_NUM_VERTICES_PER_WAVE=%d -DMAX_BATCHES_PER_WAVE=%d -DWAVEFRONT_SIZE=%d -DWAVEFRONT_BLOCK_MULTIPLIER=%d -DBLOCK_SIZE=%d", 
+		m_linkData.getMaxVerticesPerWavefront(),
+		m_linkData.getMaxBatchesPerWavefront(),
+		m_linkData.getWavefrontSize(),
+		WAVEFRONT_BLOCK_MULTIPLIER,
+		WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
+	
+	m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
+	m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros ,"OpenCLC10/SolvePositionsSIMDBatched.cl");
+	m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", additionalMacros ,"OpenCLC10/UpdateNodes.cl");
+	m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositions.cl");
+	m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", additionalMacros ,"OpenCLC10/Integrate.cl");
+	m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", additionalMacros,"OpenCLC10/ApplyForces.cl" );
+	m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" ,additionalMacros,"OpenCLC10/UpdateFixedVertexPositions.cl");
+	m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", additionalMacros ,"OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl");
+
+	// TODO: Rename to UpdateSoftBodies
+	m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
+	m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
+	m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
+
+	delete [] wavefrontMacros;
+
+	if( m_currentCLFunctions->getKernelCompilationFailures()==0)
+	{
+		m_shadersInitialized = true;
+	}
+
+	return m_shadersInitialized;
+}
+
+
+
+
+static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
+{
+	Vectormath::Aos::Transform3 outTransform;
+	outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
+	outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
+	outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
+	outTransform.setCol(3, toVector3(transform.getOrigin()));
+	return outTransform;	
+}
+
+
+static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
+{
+	// A per-batch map of truth values stating whether a given vertex is in that batch
+	// This allows us to significantly optimize the batching
+	btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
+
+	for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
+	{
+		btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
+
+		int batch = 0;
+		bool placed = false;
+		while( batch < wavefrontBatches.size() && !placed )
+		{
+			// Test the current batch, see if this wave shares any vertex with the waves in the batch
+			bool foundSharedVertex = false;
+			for( int link = 0; link < wavefront.size(); ++link )
+			{
+				btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+				if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
+				{
+					foundSharedVertex = true;
+				}
+			}
+
+			if( !foundSharedVertex )
+			{
+				wavefrontBatches[batch].push_back( waveIndex );	
+				// Insert vertices into this batch too
+				for( int link = 0; link < wavefront.size(); ++link )
+				{
+					btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+					(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
+					(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
+				}
+				placed = true;
+			}
+			batch++;
+		}
+		if( batch == wavefrontBatches.size() && !placed )
+		{
+			wavefrontBatches.resize( batch + 1 );
+			wavefrontBatches[batch].push_back( waveIndex );
+
+			// And resize map as well
+			mapOfVerticesInBatches.resize( batch + 1 );
+			
+			// Resize maps with total number of vertices
+			mapOfVerticesInBatches[batch].resize( numVertices+1, false );
+
+			// Insert vertices into this batch too
+			for( int link = 0; link < wavefront.size(); ++link )
+			{
+				btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+				(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
+				(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
+			}
+		}
+	}
+	mapOfVerticesInBatches.clear();
+}
+
+// Function to remove an object from a vector maintaining correct ordering of the vector
+template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
+{
+	int currentSize = vectorToUpdate.size();
+	for( int i = indexToRemove; i < (currentSize-1); ++i )
+	{
+		vectorToUpdate[i] = vectorToUpdate[i+1];
+	}
+	if( currentSize > 0 )
+		vectorToUpdate.resize( currentSize - 1 );
+}
+
+/**
+ * Insert element into vectorToUpdate at index index.
+ */
+template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
+{
+	vectorToUpdate.resize( vectorToUpdate.size() + 1 );
+	for( int i = (vectorToUpdate.size() - 1); i > index; --i )
+	{
+		vectorToUpdate[i] = vectorToUpdate[i-1];
+	}
+	vectorToUpdate[index] = element;
+}
+
+/** 
+ * Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness.
+ * ie it treats vectorToUpdate as an ordered set.
+ */
+template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
+{
+	int index = 0;
+	while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
+	{
+		index++;
+	}
+	if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
+		insertAtIndex( vectorToUpdate, index, element );
+}
+
+static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
+{
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		numLinksPerVertex[nodes.vertex0]++;
+		numLinksPerVertex[nodes.vertex1]++;
+	}
+	int maxLinksPerVertex = 0;
+	for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
+	{
+		maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
+	}
+	maxLinks = maxLinksPerVertex;
+
+	btAlignedObjectArray< int > linksFoundPerVertex;
+	linksFoundPerVertex.resize( numVertices, 0 );
+
+	listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
+
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		{
+			// Do vertex 0
+			int vertexIndex = nodes.vertex0;
+			int linkForVertex = linksFoundPerVertex[nodes.vertex0];
+			int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
+
+			listOfLinksPerVertex[linkAddress] = linkIndex;
+
+			linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
+		}
+		{
+			// Do vertex 1
+			int vertexIndex = nodes.vertex1;
+			int linkForVertex = linksFoundPerVertex[nodes.vertex1];
+			int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
+
+			listOfLinksPerVertex[linkAddress] = linkIndex;
+
+			linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
+		}
+	}
+}
+
+static void computeBatchingIntoWavefronts( 
+	btSoftBodyLinkData &linkData, 
+	int wavefrontSize, 
+	int linksPerWorkItem, 
+	int maxLinksPerWavefront, 
+	btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, 
+	btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
+	btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
+	)
+{
+	
+
+	// Attempt generation of larger batches of links.
+	btAlignedObjectArray< bool > processedLink;
+	processedLink.resize( linkData.getNumLinks() );
+	btAlignedObjectArray< int > listOfLinksPerVertex;
+	int maxLinksPerVertex = 0;
+
+	// Count num vertices
+	int numVertices = 0;
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		numVertices = btMax( numVertices, nodes.vertex0 + 1 );
+		numVertices = btMax( numVertices, nodes.vertex1 + 1 );
+	}
+
+	// Need list of links per vertex
+	// Compute valence of each vertex
+	btAlignedObjectArray <int> numLinksPerVertex;
+	numLinksPerVertex.resize(0);
+	numLinksPerVertex.resize( numVertices, 0 );
+
+	generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
+
+	if (!numVertices)
+		return;
+
+	for( int vertex = 0; vertex < 10; ++vertex )
+	{
+		for( int link = 0; link < numLinksPerVertex[vertex]; ++link )
+		{
+			int linkAddress = vertex * maxLinksPerVertex + link;
+		}
+	}
+
+
+	// At this point we know what links we have for each vertex so we can start batching
+	
+	// We want a vertex to start with, let's go with 0
+	int currentVertex = 0;
+	int linksProcessed = 0;
+
+	btAlignedObjectArray <int> verticesToProcess;
+
+	while( linksProcessed < linkData.getNumLinks() )
+	{
+		// Next wavefront
+		int nextWavefront = linksForWavefronts.size();
+		linksForWavefronts.resize( nextWavefront + 1 );
+		btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
+		verticesForWavefronts.resize( nextWavefront + 1 );
+		btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
+
+		linksForWavefront.resize(0);
+
+		// Loop to find enough links to fill the wavefront
+		// Stopping if we either run out of links, or fill it
+		while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
+		{
+			// Go through the links for the current vertex
+			for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
+			{
+				int linkAddress = currentVertex * maxLinksPerVertex + link;
+				int linkIndex = listOfLinksPerVertex[linkAddress];
+				
+				// If we have not already processed this link, add it to the wavefront
+				// Claim it as another processed link
+				// Add the vertex at the far end to the list of vertices to process.
+				if( !processedLink[linkIndex] )
+				{
+					linksForWavefront.push_back( linkIndex );
+					linksProcessed++;
+					processedLink[linkIndex] = true;
+					int v0 = linkData.getVertexPair(linkIndex).vertex0;
+					int v1 = linkData.getVertexPair(linkIndex).vertex1;
+					if( v0 == currentVertex )
+						verticesToProcess.push_back( v1 );
+					else
+						verticesToProcess.push_back( v0 );
+				}
+			}
+			if( verticesToProcess.size() > 0 )
+			{
+				// Get the element on the front of the queue and remove it
+				currentVertex = verticesToProcess[0];
+				removeFromVector( verticesToProcess, 0 );
+			} else {		
+				// If we've not yet processed all the links, find the first unprocessed one
+				// and select one of its vertices as the current vertex
+				if( linksProcessed < linkData.getNumLinks() )
+				{
+					int searchLink = 0;
+					while( processedLink[searchLink] )
+						searchLink++;
+					currentVertex = linkData.getVertexPair(searchLink).vertex0;
+				}	
+			}
+		}
+
+		// We have either finished or filled a wavefront
+		for( int link = 0; link < linksForWavefront.size(); ++link )
+		{
+			int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
+			int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
+			insertUniqueAndOrderedIntoVector( vertexSet, v0 );
+			insertUniqueAndOrderedIntoVector( vertexSet, v1 );
+		}
+		// Iterate over links mapped to the wave and batch those
+		// We can run a batch on each cycle trivially
+		
+		batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
+		btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
+		
+
+		for( int link = 0; link < linksForWavefront.size(); ++link )
+		{
+			int linkIndex = linksForWavefront[link];
+			btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
+			
+			int batch = 0;
+			bool placed = false;
+			while( batch < batchesWithinWave.size() && !placed )
+			{
+				bool foundSharedVertex = false;
+				if( batchesWithinWave[batch].size() >= wavefrontSize )
+				{
+					// If we have already filled this batch, move on to another
+					foundSharedVertex = true;
+				} else {
+					for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
+					{
+						btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
+
+						if( vertices.vertex0 == vertices2.vertex0 ||
+							vertices.vertex1 == vertices2.vertex0 ||
+							vertices.vertex0 == vertices2.vertex1 ||
+							vertices.vertex1 == vertices2.vertex1 )
+						{
+							foundSharedVertex = true;
+							break;
+						}
+					}
+				}
+				if( !foundSharedVertex )
+				{
+					batchesWithinWave[batch].push_back( linkIndex );
+					placed = true;
+				} else {
+					++batch;
+				}
+			}
+			if( batch == batchesWithinWave.size() && !placed )
+			{
+				batchesWithinWave.resize( batch + 1 );
+				batchesWithinWave[batch].push_back( linkIndex );
+			}
+		}
+		
+	}
+
+}
+
+void btSoftBodyLinkDataOpenCLSIMDAware::generateBatches()
+{
+	btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
+	btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
+	btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
+	btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
+
+	// Group the links into wavefronts
+	computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
+
+
+	// Batch the wavefronts
+	generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
+
+	m_numWavefronts = linksForWavefronts.size();
+
+	// At this point we have a description of which links we need to process in each wavefront
+
+	// First correctly fill the batch ranges vector
+	int numBatches = wavefrontBatches.size();
+	m_wavefrontBatchStartLengths.resize(0);
+	int prefixSum = 0;
+	for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
+	{
+		int wavesInBatch = wavefrontBatches[batchIndex].size();
+		int nextPrefixSum = prefixSum + wavesInBatch;
+		m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
+
+		prefixSum += wavesInBatch;
+	}
+	
+	// Also find max number of batches within a wave
+	m_maxBatchesWithinWave = 0;
+	m_maxVerticesWithinWave = 0;
+	m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
+	for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
+	{
+		// See if the number of batches in this wave is greater than the current maxium
+		int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
+		int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
+		m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
+		m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
+	}
+	
+	// Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
+	m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
+
+	// Now we know the maximum number of vertices per-wave we can resize the global vertices array
+	m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
+
+	// Grab backup copies of all the link data arrays for the sorting process
+	btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair>				m_links_Backup(m_links);
+	btAlignedObjectArray<float>											m_linkStrength_Backup(m_linkStrength);
+	btAlignedObjectArray<float>											m_linksMassLSC_Backup(m_linksMassLSC);
+	btAlignedObjectArray<float>											m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
+	//btAlignedObjectArray<Vectormath::Aos::Vector3>						m_linksCLength_Backup(m_linksCLength);
+	//btAlignedObjectArray<float>											m_linksLengthRatio_Backup(m_linksLengthRatio);
+	btAlignedObjectArray<float>											m_linksRestLength_Backup(m_linksRestLength);
+	btAlignedObjectArray<float>											m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
+
+	// Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
+	m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );	
+		
+	// Then re-order links into wavefront blocks
+
+	// Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
+	int wavefrontCount = 0;
+
+	// Iterate over batches of wavefronts, then wavefronts in the batch
+	for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
+	{
+		btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
+		int wavefrontsInBatch = batch.size();
+
+		
+		for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
+		{	
+
+			int originalWavefrontIndex = batch[wavefrontIndex];
+			btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
+			int verticesUsedByWavefront = wavefrontVertices.size();
+
+			// Copy the set of vertices into the correctly structured array for use on the device
+			// Fill the non-vertices with -1s
+			// so we can mask out those reads
+			for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
+			{
+				m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
+			}
+			for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
+			{
+				m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
+			}
+
+			// Obtain the set of batches within the current wavefront
+			btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
+			// Set the size of the batches for use in the solver, correctly ordered
+			NumBatchesVerticesPair batchesAndVertices;
+			batchesAndVertices.numBatches = batchesWithinWavefront.size();
+			batchesAndVertices.numVertices = verticesUsedByWavefront;
+			m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
+			
+
+			// Now iterate over batches within the wavefront to structure the links correctly
+			for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
+			{
+				btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
+				int wavefrontBatchSize = linksInBatch.size();
+
+				int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
+
+				for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
+				{
+					int originalLinkAddress = linksInBatch[linkIndex];
+					// Reorder simple arrays trivially
+					m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
+					m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
+					m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
+					m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
+					m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
+					m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
+
+					// The local address is more complicated. We need to work out where a given vertex will end up
+					// by searching the set of vertices for this link and using the index as the local address
+					btSoftBodyLinkData::LinkNodePair localPair;
+					btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
+					localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
+					localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
+					m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
+				}
+				for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
+				{
+					// Put 0s into these arrays for padding for cleanliness
+					m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
+					m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
+
+
+					// For local addresses of junk data choose a set of addresses just above the range of valid ones 
+					// and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
+					// The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
+					// off the end of that range so we need no control
+					btSoftBodyLinkData::LinkNodePair localPair;
+					localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
+					localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
+					m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
+				}
+
+			}
+
+			
+			wavefrontCount++;
+		}
+
+	
+	}
+
+} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
+
+
+
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h
new file mode 100644
index 00000000..8cd838ad
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h
@@ -0,0 +1,81 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
+#define BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
+
+#include "stddef.h" //for size_t
+#include "vectormath/vmInclude.h"
+
+#include "btSoftBodySolver_OpenCL.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+#include "btSoftBodySolverLinkData_OpenCLSIMDAware.h"
+#include "btSoftBodySolverVertexData_OpenCL.h"
+#include "btSoftBodySolverTriangleData_OpenCL.h"
+
+
+
+
+
+class btOpenCLSoftBodySolverSIMDAware : public btOpenCLSoftBodySolver
+{
+protected:
+	
+
+	btSoftBodyLinkDataOpenCLSIMDAware m_linkData;
+
+
+
+
+	virtual bool buildShaders();
+
+
+	void updateConstants( float timeStep );
+
+	float computeTriangleArea( 
+		const Vectormath::Aos::Point3 &vertex0,
+		const Vectormath::Aos::Point3 &vertex1,
+		const Vectormath::Aos::Point3 &vertex2 );
+
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+	
+	void solveCollisionsAndUpdateVelocities( float isolverdt );
+	// End kernel dispatches
+	/////////////////////////////////////
+
+public:
+	btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context	ctx, bool bUpdateAchchoredNodePos = false);
+
+	virtual ~btOpenCLSoftBodySolverSIMDAware();
+
+	virtual SolverTypes getSolverType() const
+	{
+		return CL_SIMD_SOLVER;
+	}
+
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void solveConstraints( float solverdt );
+
+}; // btOpenCLSoftBodySolverSIMDAware
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
diff --git a/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h
new file mode 100644
index 00000000..ab6721fb
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h
@@ -0,0 +1,748 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_DATA_H
+#define BT_SOFT_BODY_SOLVER_DATA_H
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "vectormath/vmInclude.h"
+
+
+class btSoftBodyLinkData
+{
+public:
+	/**
+	 * Class representing a link as a set of three indices into the vertex array.
+	 */
+	class LinkNodePair
+	{
+	public:
+		int vertex0;
+		int vertex1;
+
+		LinkNodePair()
+		{
+			vertex0 = 0;
+			vertex1 = 0;
+		}
+
+		LinkNodePair( int v0, int v1 )
+		{
+			vertex0 = v0;
+			vertex1 = v1;
+		}
+	};
+
+	/**
+	 * Class describing a link for input into the system.
+	 */
+	class LinkDescription
+	{
+	protected:
+		int m_vertex0;
+		int m_vertex1;
+		float m_linkLinearStiffness;
+		float m_linkStrength;
+
+	public:
+
+		LinkDescription()
+		{
+			m_vertex0 = 0;
+			m_vertex1 = 0;
+			m_linkLinearStiffness = 1.0;
+			m_linkStrength = 1.0;
+		}
+
+		LinkDescription( int newVertex0, int newVertex1, float linkLinearStiffness )
+		{
+			m_vertex0 = newVertex0;
+			m_vertex1 = newVertex1;
+			m_linkLinearStiffness = linkLinearStiffness;
+			m_linkStrength = 1.0;
+		}
+
+		LinkNodePair getVertexPair() const
+		{
+			LinkNodePair nodes;
+			nodes.vertex0 = m_vertex0;
+			nodes.vertex1 = m_vertex1;
+			return nodes;
+		}
+
+		void setVertex0( int vertex )
+		{
+			m_vertex0 = vertex;
+		}
+
+		void setVertex1( int vertex )
+		{
+			m_vertex1 = vertex;
+		}
+
+		void setLinkLinearStiffness( float linearStiffness )
+		{
+			m_linkLinearStiffness = linearStiffness;
+		}
+
+		void setLinkStrength( float strength )
+		{
+			m_linkStrength = strength;
+		}
+
+		int getVertex0() const
+		{
+			return m_vertex0;
+		}
+
+		int getVertex1() const
+		{
+			return m_vertex1;
+		}
+
+		float getLinkStrength() const
+		{
+			return m_linkStrength;
+		}
+
+		float getLinkLinearStiffness() const
+		{
+			return m_linkLinearStiffness;
+		}
+	};
+
+
+protected:
+	// NOTE:
+	// Vertex reference data is stored relative to global array, not relative to individual cloth.
+	// Values must be correct if being passed into single-cloth VBOs or when migrating from one solver
+	// to another.
+
+	btAlignedObjectArray< LinkNodePair > m_links; // Vertex pair for the link
+	btAlignedObjectArray< float >								m_linkStrength; // Strength of each link
+	// (inverseMassA + inverseMassB)/ linear stiffness coefficient
+	btAlignedObjectArray< float >								m_linksMassLSC; 
+	btAlignedObjectArray< float >								m_linksRestLengthSquared; 
+	// Current vector length of link
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >			m_linksCLength;
+	// 1/(current length * current length * massLSC)
+	btAlignedObjectArray< float >								m_linksLengthRatio; 
+	btAlignedObjectArray< float >								m_linksRestLength;
+	btAlignedObjectArray< float >								m_linksMaterialLinearStiffnessCoefficient;
+
+public:
+	btSoftBodyLinkData()
+	{
+	}
+
+	virtual ~btSoftBodyLinkData()
+	{
+	}
+
+	virtual void clear()
+	{
+		m_links.resize(0);
+		m_linkStrength.resize(0);
+		m_linksMassLSC.resize(0);
+		m_linksRestLengthSquared.resize(0);
+		m_linksLengthRatio.resize(0);
+		m_linksRestLength.resize(0);
+		m_linksMaterialLinearStiffnessCoefficient.resize(0);
+	}
+
+	int getNumLinks()
+	{
+		return m_links.size();
+	}
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks )
+	{
+		int previousSize = m_links.size();
+		int newSize = previousSize + numLinks;
+
+		// Resize all the arrays that store link data
+		m_links.resize( newSize );
+		m_linkStrength.resize( newSize );
+		m_linksMassLSC.resize( newSize );
+		m_linksRestLengthSquared.resize( newSize );
+		m_linksCLength.resize( newSize );
+		m_linksLengthRatio.resize( newSize );
+		m_linksRestLength.resize( newSize );
+		m_linksMaterialLinearStiffnessCoefficient.resize( newSize );
+	}
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( const LinkDescription &link, int linkIndex )
+	{
+		m_links[linkIndex] = link.getVertexPair();
+		m_linkStrength[linkIndex] = link.getLinkStrength();
+		m_linksMassLSC[linkIndex] = 0.f;
+		m_linksRestLengthSquared[linkIndex] = 0.f;
+		m_linksCLength[linkIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_linksLengthRatio[linkIndex] = 0.f;
+		m_linksRestLength[linkIndex] = 0.f;
+		m_linksMaterialLinearStiffnessCoefficient[linkIndex] = link.getLinkLinearStiffness();
+	}
+
+
+	/**
+	 * Return true if data is on the accelerator.
+	 * The CPU version of this class will return true here because
+	 * the CPU is the same as the accelerator.
+	 */
+	virtual bool onAccelerator()
+	{
+		return true;
+	}
+	
+	/**
+	 * Move data from host memory to the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveToAccelerator()
+	{
+		return true;
+	}
+
+	/**
+	 * Move data from host memory from the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveFromAccelerator()
+	{
+		return true;
+	}
+
+
+
+	/**
+	 * Return reference to the vertex index pair for link linkIndex as stored on the host.
+	 */
+	LinkNodePair &getVertexPair( int linkIndex )
+	{
+		return m_links[linkIndex];
+	}
+
+	/** 
+	 * Return reference to strength of link linkIndex as stored on the host.
+	 */
+	float &getStrength( int linkIndex )
+	{
+		return m_linkStrength[linkIndex];
+	}
+
+	/**
+	 * Return a reference to the strength of the link corrected for link sorting.
+	 * This is important if we are using data on an accelerator which has the data sorted in some fashion.
+	 */
+	virtual float &getStrengthCorrected( int linkIndex )
+	{
+		return getStrength( linkIndex );
+	}
+
+	/**
+	 * Return reference to the rest length of link linkIndex as stored on the host.
+	 */
+	float &getRestLength( int linkIndex )
+	{
+		return m_linksRestLength[linkIndex];
+	}
+
+	/**
+	 * Return reference to linear stiffness coefficient for link linkIndex as stored on the host.
+	 */
+	float &getLinearStiffnessCoefficient( int linkIndex )
+	{
+		return m_linksMaterialLinearStiffnessCoefficient[linkIndex];
+	}
+
+	/**
+	 * Return reference to the MassLSC value for link linkIndex as stored on the host.
+	 */
+	float &getMassLSC( int linkIndex )
+	{
+		return m_linksMassLSC[linkIndex];
+	}
+
+	/**
+	 * Return reference to rest length squared for link linkIndex as stored on the host.
+	 */
+	float &getRestLengthSquared( int linkIndex )
+	{
+		return m_linksRestLengthSquared[linkIndex];
+	}
+
+	/**
+	 * Return reference to current length of link linkIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getCurrentLength( int linkIndex )
+	{
+		return m_linksCLength[linkIndex];
+	}
+
+	 /**
+	  * Return the link length ratio from for link linkIndex as stored on the host.
+	  */
+	 float &getLinkLengthRatio( int linkIndex )
+	 {
+		 return m_linksLengthRatio[linkIndex];
+	 }
+};
+
+
+
+/**
+ * Wrapper for vertex data information.
+ * By wrapping it like this we stand a good chance of being able to optimise for storage format easily.
+ * It should also help us make sure all the data structures remain consistent.
+ */
+class btSoftBodyVertexData
+{
+public:
+	/**
+	 * Class describing a vertex for input into the system.
+	 */
+	class VertexDescription
+	{
+	private:
+		Vectormath::Aos::Point3 m_position;
+		/** Inverse mass. If this is 0f then the mass was 0 because that simplifies calculations. */
+		float m_inverseMass;
+
+	public:
+		VertexDescription()
+		{	
+			m_position = Vectormath::Aos::Point3( 0.f, 0.f, 0.f );
+			m_inverseMass = 0.f;
+		}
+
+		VertexDescription( const Vectormath::Aos::Point3 &position, float mass )
+		{
+			m_position = position;
+			if( mass > 0.f )
+				m_inverseMass = 1.0f/mass;
+			else
+				m_inverseMass = 0.f;
+		}
+
+		void setPosition( const Vectormath::Aos::Point3 &position )
+		{
+			m_position = position;
+		}
+
+		void setInverseMass( float inverseMass )
+		{
+			m_inverseMass = inverseMass;
+		}
+
+		void setMass( float mass )
+		{
+			if( mass > 0.f )
+				m_inverseMass = 1.0f/mass;
+			else
+				m_inverseMass = 0.f;
+		}
+
+		Vectormath::Aos::Point3 getPosition() const
+		{
+			return m_position;
+		}
+
+		float getInverseMass() const
+		{
+			return m_inverseMass;
+		}
+
+		float getMass() const
+		{
+			if( m_inverseMass == 0.f )
+				return 0.f;
+			else
+				return 1.0f/m_inverseMass;
+		}
+	};
+protected:
+
+	// identifier for the individual cloth
+	// For the CPU we don't really need this as we can grab the cloths and iterate over only their vertices
+	// For a parallel accelerator knowing on a per-vertex basis which cloth we're part of will help for obtaining
+	// per-cloth data
+	// For sorting etc it might also be helpful to be able to use in-array data such as this.
+	btAlignedObjectArray< int >							m_clothIdentifier;
+	btAlignedObjectArray< Vectormath::Aos::Point3 >		m_vertexPosition;			// vertex positions
+	btAlignedObjectArray< Vectormath::Aos::Point3 >		m_vertexPreviousPosition;	// vertex positions
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_vertexVelocity;			// Velocity
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_vertexForceAccumulator;	// Force accumulator
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_vertexNormal;				// Normals
+	btAlignedObjectArray< float >						m_vertexInverseMass;		// Inverse mass
+	btAlignedObjectArray< float >						m_vertexArea;				// Area controlled by the vertex
+	btAlignedObjectArray< int >							m_vertexTriangleCount;		// Number of triangles touching this vertex
+
+public:
+	btSoftBodyVertexData()
+	{
+	}
+
+	virtual ~btSoftBodyVertexData()
+	{
+	}
+
+	virtual void clear()
+	{
+		m_clothIdentifier.resize(0);
+		m_vertexPosition.resize(0);
+		m_vertexPreviousPosition.resize(0);
+		m_vertexVelocity.resize(0);
+		m_vertexForceAccumulator.resize(0);
+		m_vertexNormal.resize(0);
+		m_vertexInverseMass.resize(0);
+		m_vertexArea.resize(0);
+		m_vertexTriangleCount.resize(0);
+	}
+
+	int getNumVertices()
+	{
+		return m_vertexPosition.size();
+	}
+
+	int getClothIdentifier( int vertexIndex )
+	{
+		return m_clothIdentifier[vertexIndex];
+	}
+
+	void setVertexAt( const VertexDescription &vertex, int vertexIndex )
+	{
+		m_vertexPosition[vertexIndex] = vertex.getPosition();
+		m_vertexPreviousPosition[vertexIndex] = vertex.getPosition();
+		m_vertexVelocity[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_vertexForceAccumulator[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_vertexNormal[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_vertexInverseMass[vertexIndex] = vertex.getInverseMass();
+		m_vertexArea[vertexIndex] = 0.f;
+		m_vertexTriangleCount[vertexIndex] = 0;
+	}
+
+	/** 
+	 * Create numVertices new vertices for cloth clothIdentifier 
+	 * maxVertices allows a buffer zone of extra vertices for alignment or tearing reasons.
+	 */
+	void createVertices( int numVertices, int clothIdentifier, int maxVertices = 0 )
+	{
+		int previousSize = m_vertexPosition.size();
+		if( maxVertices == 0 )
+			maxVertices = numVertices;
+		int newSize = previousSize + maxVertices;
+
+		// Resize all the arrays that store vertex data
+		m_clothIdentifier.resize( newSize );
+		m_vertexPosition.resize( newSize );
+		m_vertexPreviousPosition.resize( newSize );
+		m_vertexVelocity.resize( newSize );
+		m_vertexForceAccumulator.resize( newSize );
+		m_vertexNormal.resize( newSize );
+		m_vertexInverseMass.resize( newSize );
+		m_vertexArea.resize( newSize );
+		m_vertexTriangleCount.resize( newSize );
+
+		for( int vertexIndex = previousSize; vertexIndex < newSize; ++vertexIndex )
+			m_clothIdentifier[vertexIndex] = clothIdentifier;
+		for( int vertexIndex = (previousSize + numVertices); vertexIndex < newSize; ++vertexIndex )
+			m_clothIdentifier[vertexIndex] = -1;
+	}
+
+	// Get and set methods in header so they can be inlined
+
+	/**
+	 * Return a reference to the position of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Point3 &getPosition( int vertexIndex )
+	{
+		return m_vertexPosition[vertexIndex];
+	}
+
+	Vectormath::Aos::Point3 getPosition( int vertexIndex ) const
+	{
+		return m_vertexPosition[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the previous position of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Point3 &getPreviousPosition( int vertexIndex )
+	{
+		return m_vertexPreviousPosition[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the velocity of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getVelocity( int vertexIndex )
+	{
+		return m_vertexVelocity[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the force accumulator of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getForceAccumulator( int vertexIndex )
+	{
+		return m_vertexForceAccumulator[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the normal of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getNormal( int vertexIndex )
+	{
+		return m_vertexNormal[vertexIndex];
+	}
+
+	Vectormath::Aos::Vector3 getNormal( int vertexIndex ) const
+	{
+		return m_vertexNormal[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the inverse mass of vertex vertexIndex as stored on the host.
+	 */
+	float &getInverseMass( int vertexIndex )
+	{
+		return m_vertexInverseMass[vertexIndex];
+	}
+
+	/**
+	 * Get access to the area controlled by this vertex.
+	 */
+	float &getArea( int vertexIndex )
+	{
+		return m_vertexArea[vertexIndex];
+	}
+
+	/**
+	 * Get access to the array of how many triangles touch each vertex.
+	 */
+	int &getTriangleCount( int vertexIndex )
+	{
+		return m_vertexTriangleCount[vertexIndex];
+	}
+
+
+
+	/**
+	 * Return true if data is on the accelerator.
+	 * The CPU version of this class will return true here because
+	 * the CPU is the same as the accelerator.
+	 */
+	virtual bool onAccelerator()
+	{
+		return true;
+	}
+	
+	/**
+	 * Move data from host memory to the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveToAccelerator()
+	{
+		return true;
+	}
+
+	/**
+	 * Move data to host memory from the accelerator if bCopy is false.
+	 * If bCopy is true, copy data to host memory from the accelerator so that data 
+	 * won't be moved to accelerator when moveToAccelerator() is called next time. 
+	 * If bCopyMinimum is true, only vertex position and normal are copied.
+	 * bCopyMinimum will be meaningful only if bCopy is true.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true)
+	{
+		return true;
+	}
+
+	btAlignedObjectArray< Vectormath::Aos::Point3 >	&getVertexPositions()
+	{
+		return m_vertexPosition;
+	}
+};
+
+
+class btSoftBodyTriangleData
+{
+public:
+	/**
+	 * Class representing a triangle as a set of three indices into the
+	 * vertex array.
+	 */
+	class TriangleNodeSet
+	{
+	public:
+		int vertex0;
+		int vertex1;
+		int vertex2;
+		int _padding;
+
+		TriangleNodeSet( )
+		{
+			vertex0 = 0;
+			vertex1 = 0;
+			vertex2 = 0;
+			_padding = -1;
+		}
+
+		TriangleNodeSet( int newVertex0, int newVertex1, int newVertex2 )
+		{
+			vertex0 = newVertex0;
+			vertex1 = newVertex1;
+			vertex2 = newVertex2;
+		}
+	};
+
+	class TriangleDescription
+	{
+	protected:
+		int m_vertex0;
+		int m_vertex1;
+		int m_vertex2;
+
+	public:
+		TriangleDescription()
+		{
+			m_vertex0 = 0;
+			m_vertex1 = 0;
+			m_vertex2 = 0;
+		}
+
+		TriangleDescription( int newVertex0, int newVertex1, int newVertex2 )
+		{
+			m_vertex0 = newVertex0;
+			m_vertex1 = newVertex1;
+			m_vertex2 = newVertex2;
+		}
+
+		TriangleNodeSet getVertexSet() const
+		{
+			btSoftBodyTriangleData::TriangleNodeSet nodes;
+			nodes.vertex0 = m_vertex0;
+			nodes.vertex1 = m_vertex1;
+			nodes.vertex2 = m_vertex2;
+			return nodes;
+		}
+	};
+
+protected:
+	// NOTE:
+	// Vertex reference data is stored relative to global array, not relative to individual cloth.
+	// Values must be correct if being passed into single-cloth VBOs or when migrating from one solver
+	// to another.
+	btAlignedObjectArray< TriangleNodeSet > m_vertexIndices;
+	btAlignedObjectArray< float > m_area;
+	btAlignedObjectArray< Vectormath::Aos::Vector3 > m_normal;
+
+public:
+	btSoftBodyTriangleData()
+	{
+	}
+
+	virtual ~btSoftBodyTriangleData()
+	{
+
+	}
+
+	virtual void clear()
+	{
+		m_vertexIndices.resize(0);
+		m_area.resize(0);
+		m_normal.resize(0);
+	}
+
+	int getNumTriangles()
+	{
+		return m_vertexIndices.size();
+	}
+
+	virtual void setTriangleAt( const TriangleDescription &triangle, int triangleIndex )
+	{
+		m_vertexIndices[triangleIndex] = triangle.getVertexSet();
+	}
+
+	virtual void createTriangles( int numTriangles )		
+	{
+		int previousSize = m_vertexIndices.size();
+		int newSize = previousSize + numTriangles;
+
+		// Resize all the arrays that store triangle data
+		m_vertexIndices.resize( newSize );
+		m_area.resize( newSize );
+		m_normal.resize( newSize );
+	}
+
+	/**
+	 * Return the vertex index set for triangle triangleIndex as stored on the host.
+	 */
+	const TriangleNodeSet &getVertexSet( int triangleIndex )
+	{
+		return m_vertexIndices[triangleIndex];
+	}
+
+	/**
+	 * Get access to the triangle area.
+	 */
+	float &getTriangleArea( int triangleIndex )
+	{
+		return m_area[triangleIndex];
+	}
+
+	/**
+	 * Get access to the normal vector for this triangle.
+	 */
+	Vectormath::Aos::Vector3 &getNormal( int triangleIndex )
+	{
+		return m_normal[triangleIndex];
+	}
+
+	/**
+	 * Return true if data is on the accelerator.
+	 * The CPU version of this class will return true here because
+	 * the CPU is the same as the accelerator.
+	 */
+	virtual bool onAccelerator()
+	{
+		return true;
+	}
+	
+	/**
+	 * Move data from host memory to the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveToAccelerator()
+	{
+		return true;
+	}
+
+	/**
+	 * Move data from host memory from the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveFromAccelerator()
+	{
+		return true;
+	}
+};
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_DATA_H
+
diff --git a/src/bullet/BulletMultiThreaded/HeapManager.h b/src/bullet/BulletMultiThreaded/HeapManager.h
new file mode 100644
index 00000000..b2da4ef5
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/HeapManager.h
@@ -0,0 +1,117 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef BT_HEAP_MANAGER_H__
+#define BT_HEAP_MANAGER_H__
+
+#ifdef __SPU__
+	#define HEAP_STACK_SIZE 32
+#else
+	#define HEAP_STACK_SIZE 64
+#endif
+
+#define MIN_ALLOC_SIZE 16
+
+
+class HeapManager
+{
+private:
+	ATTRIBUTE_ALIGNED16(unsigned char *mHeap);
+	ATTRIBUTE_ALIGNED16(unsigned int mHeapBytes);
+	ATTRIBUTE_ALIGNED16(unsigned char *mPoolStack[HEAP_STACK_SIZE]);
+	ATTRIBUTE_ALIGNED16(unsigned int mCurStack);
+	
+public:
+	enum {ALIGN16,ALIGN128};
+
+	HeapManager(unsigned char *buf,int bytes)
+	{
+		mHeap = buf;
+		mHeapBytes = bytes;
+		clear();
+	}
+	
+	~HeapManager()
+	{
+	}
+	
+	int getAllocated()
+	{
+		return (int)(mPoolStack[mCurStack]-mHeap);
+	}
+	
+	int getRest()
+	{
+		return mHeapBytes-getAllocated();
+	}
+
+	void *allocate(size_t bytes,int alignment = ALIGN16)
+	{
+		if(bytes <= 0) bytes = MIN_ALLOC_SIZE;
+		btAssert(mCurStack < (HEAP_STACK_SIZE-1));
+
+		
+#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64__)
+		unsigned long long p = (unsigned long long )mPoolStack[mCurStack];
+		if(alignment == ALIGN128) {
+			p = ((p+127) & 0xffffffffffffff80);
+			bytes = (bytes+127) & 0xffffffffffffff80;
+		}
+		else {
+			bytes = (bytes+15) & 0xfffffffffffffff0;
+		}
+
+		btAssert(bytes <=(mHeapBytes-(p-(unsigned long long )mHeap)) );
+		
+#else
+		unsigned long p = (unsigned long )mPoolStack[mCurStack];
+		if(alignment == ALIGN128) {
+			p = ((p+127) & 0xffffff80);
+			bytes = (bytes+127) & 0xffffff80;
+		}
+		else {
+			bytes = (bytes+15) & 0xfffffff0;
+		}
+		btAssert(bytes <=(mHeapBytes-(p-(unsigned long)mHeap)) );
+#endif
+		unsigned char * bla = (unsigned char *)(p + bytes);
+		mPoolStack[++mCurStack] = bla;
+		return (void*)p;
+	}
+
+	void deallocate(void *p)
+	{
+		(void) p;
+		mCurStack--;
+	}
+	
+	void clear()
+	{
+		mPoolStack[0] = mHeap;
+		mCurStack = 0;
+	}
+
+//	void printStack()
+//	{
+//		for(unsigned int i=0;i<=mCurStack;i++) {
+//			PRINTF("memStack %2d 0x%x\n",i,(uint32_t)mPoolStack[i]);
+//		}
+//	}
+
+};
+
+#endif //BT_HEAP_MANAGER_H__
+
diff --git a/src/bullet/BulletMultiThreaded/PlatformDefinitions.h b/src/bullet/BulletMultiThreaded/PlatformDefinitions.h
new file mode 100644
index 00000000..142103a0
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/PlatformDefinitions.h
@@ -0,0 +1,99 @@
+#ifndef BT_TYPE_DEFINITIONS_H
+#define BT_TYPE_DEFINITIONS_H
+
+///This file provides some platform/compiler checks for common definitions
+#include "LinearMath/btScalar.h"
+#include "LinearMath/btMinMax.h"
+
+#ifdef PFX_USE_FREE_VECTORMATH
+#include "physics_effects/base_level/base/pfx_vectormath_include.win32.h"
+typedef Vectormath::Aos::Vector3    vmVector3;
+typedef Vectormath::Aos::Quat       vmQuat;
+typedef Vectormath::Aos::Matrix3    vmMatrix3;
+typedef Vectormath::Aos::Transform3 vmTransform3;
+typedef Vectormath::Aos::Point3     vmPoint3;
+#else
+#include "vectormath/vmInclude.h"
+#endif//PFX_USE_FREE_VECTORMATH
+
+
+
+
+
+#ifdef _WIN32
+
+typedef union
+{
+  unsigned int u;
+  void *p;
+} addr64;
+
+#define USE_WIN32_THREADING 1
+
+		#if defined(__MINGW32__) || defined(__CYGWIN__) || (defined (_MSC_VER) && _MSC_VER < 1300)
+		#else
+		#endif //__MINGW32__
+
+		typedef unsigned char     uint8_t;
+#ifndef __PHYSICS_COMMON_H__
+#ifndef PFX_USE_FREE_VECTORMATH
+#ifndef __BT_SKIP_UINT64_H
+		typedef unsigned long int uint64_t;
+#endif //__BT_SKIP_UINT64_H
+#endif //PFX_USE_FREE_VECTORMATH
+		typedef unsigned int      uint32_t;
+#endif //__PHYSICS_COMMON_H__
+		typedef unsigned short    uint16_t;
+
+		#include <malloc.h>
+		#define memalign(alignment, size) malloc(size);
+			
+#include <string.h> //memcpy
+
+		
+
+		#include <stdio.h>		
+		#define spu_printf printf
+		
+#else
+		#include <stdint.h>
+		#include <stdlib.h>
+		#include <string.h> //for memcpy
+
+#if defined	(__CELLOS_LV2__)
+	// Playstation 3 Cell SDK
+#include <spu_printf.h>
+		
+#else
+	// posix system
+
+#define USE_PTHREADS    (1)
+
+#ifdef USE_LIBSPE2
+#include <stdio.h>		
+#define spu_printf printf	
+#define DWORD unsigned int
+			typedef union
+			{
+			  unsigned long long ull;
+			  unsigned int ui[2];
+			  void *p;
+			} addr64;
+#endif // USE_LIBSPE2
+
+#endif	//__CELLOS_LV2__
+	
+#endif
+
+#ifdef __SPU__
+#include <stdio.h>		
+#define printf spu_printf
+#endif
+
+/* Included here because we need uint*_t typedefs */
+#include "PpuAddressSpace.h"
+
+#endif //BT_TYPE_DEFINITIONS_H
+
+
+
diff --git a/src/bullet/BulletMultiThreaded/PosixThreadSupport.cpp b/src/bullet/BulletMultiThreaded/PosixThreadSupport.cpp
new file mode 100644
index 00000000..c8b49ee3
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/PosixThreadSupport.cpp
@@ -0,0 +1,399 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <stdio.h>
+#include "PosixThreadSupport.h"
+#ifdef USE_PTHREADS
+#include <errno.h>
+#include <unistd.h>
+
+#include "SpuCollisionTaskProcess.h"
+#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
+
+#define checkPThreadFunction(returnValue) \
+    if(0 != returnValue) { \
+        printf("PThread problem at line %i in file %s: %i %d\n", __LINE__, __FILE__, returnValue, errno); \
+    }
+
+// The number of threads should be equal to the number of available cores
+// Todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
+
+// PosixThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+// Setup and initialize SPU/CELL/Libspe2
+PosixThreadSupport::PosixThreadSupport(ThreadConstructionInfo& threadConstructionInfo)
+{
+	startThreads(threadConstructionInfo);
+}
+
+// cleanup/shutdown Libspe2
+PosixThreadSupport::~PosixThreadSupport()
+{
+	stopSPU();
+}
+
+#if (defined (__APPLE__))
+#define NAMED_SEMAPHORES
+#endif
+
+// this semaphore will signal, if and how many threads are finished with their work
+static sem_t* mainSemaphore=0;
+
+static sem_t* createSem(const char* baseName)
+{
+	static int semCount = 0;
+#ifdef NAMED_SEMAPHORES
+        /// Named semaphore begin
+        char name[32];
+        snprintf(name, 32, "/%s-%d-%4.4d", baseName, getpid(), semCount++); 
+        sem_t* tempSem = sem_open(name, O_CREAT, 0600, 0);
+
+        if (tempSem != reinterpret_cast<sem_t *>(SEM_FAILED))
+        {
+//        printf("Created \"%s\" Semaphore %p\n", name, tempSem);
+        }
+        else
+	{
+		//printf("Error creating Semaphore %d\n", errno);
+		exit(-1);
+	}
+        /// Named semaphore end
+#else
+	sem_t* tempSem = new sem_t;
+	checkPThreadFunction(sem_init(tempSem, 0, 0));
+#endif
+	return tempSem;
+}
+
+static void destroySem(sem_t* semaphore)
+{
+#ifdef NAMED_SEMAPHORES
+	checkPThreadFunction(sem_close(semaphore));
+#else
+	checkPThreadFunction(sem_destroy(semaphore));
+	delete semaphore;
+#endif	
+}
+
+static void *threadFunction(void *argument) 
+{
+
+	PosixThreadSupport::btSpuStatus* status = (PosixThreadSupport::btSpuStatus*)argument;
+
+	
+	while (1)
+	{
+            checkPThreadFunction(sem_wait(status->startSemaphore));
+		
+		void* userPtr = status->m_userPtr;
+
+		if (userPtr)
+		{
+			btAssert(status->m_status);
+			status->m_userThreadFunc(userPtr,status->m_lsMemory);
+			status->m_status = 2;
+			checkPThreadFunction(sem_post(mainSemaphore));
+	                status->threadUsed++;
+		} else {
+			//exit Thread
+			status->m_status = 3;
+			checkPThreadFunction(sem_post(mainSemaphore));
+			printf("Thread with taskId %i exiting\n",status->m_taskId);
+			break;
+		}
+		
+	}
+
+	printf("Thread TERMINATED\n");
+	return 0;
+
+}
+
+///send messages to SPUs
+void PosixThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId)
+{
+	///	gMidphaseSPU.sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (uint32_t) &taskDesc);
+	
+	///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
+	
+
+
+	switch (uiCommand)
+	{
+	case 	CMD_GATHER_AND_PROCESS_PAIRLIST:
+		{
+			btSpuStatus&	spuStatus = m_activeSpuStatus[taskId];
+			btAssert(taskId >= 0);
+			btAssert(taskId < m_activeSpuStatus.size());
+
+			spuStatus.m_commandId = uiCommand;
+			spuStatus.m_status = 1;
+			spuStatus.m_userPtr = (void*)uiArgument0;
+
+			// fire event to start new task
+			checkPThreadFunction(sem_post(spuStatus.startSemaphore));
+			break;
+		}
+	default:
+		{
+			///not implemented
+			btAssert(0);
+		}
+
+	};
+
+
+}
+
+
+///check for messages from SPUs
+void PosixThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
+{
+	///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
+	
+	///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
+
+
+	btAssert(m_activeSpuStatus.size());
+
+        // wait for any of the threads to finish
+	checkPThreadFunction(sem_wait(mainSemaphore));
+        
+	// get at least one thread which has finished
+        size_t last = -1;
+        
+        for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t) {
+            if(2 == m_activeSpuStatus[t].m_status) {
+                last = t;
+                break;
+            }
+        }
+
+	btSpuStatus& spuStatus = m_activeSpuStatus[last];
+
+	btAssert(spuStatus.m_status > 1);
+	spuStatus.m_status = 0;
+
+	// need to find an active spu
+	btAssert(last >= 0);
+
+	*puiArgument0 = spuStatus.m_taskId;
+	*puiArgument1 = spuStatus.m_status;
+}
+
+
+
+void PosixThreadSupport::startThreads(ThreadConstructionInfo& threadConstructionInfo)
+{
+        printf("%s creating %i threads.\n", __FUNCTION__, threadConstructionInfo.m_numThreads);
+	m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads);
+        
+	mainSemaphore = createSem("main");                
+	//checkPThreadFunction(sem_wait(mainSemaphore));
+   
+	for (int i=0;i < threadConstructionInfo.m_numThreads;i++)
+	{
+		printf("starting thread %d\n",i);
+
+		btSpuStatus&	spuStatus = m_activeSpuStatus[i];
+
+		spuStatus.startSemaphore = createSem("threadLocal");                
+                
+                checkPThreadFunction(pthread_create(&spuStatus.thread, NULL, &threadFunction, (void*)&spuStatus));
+
+		spuStatus.m_userPtr=0;
+
+		spuStatus.m_taskId = i;
+		spuStatus.m_commandId = 0;
+		spuStatus.m_status = 0;
+		spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
+		spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
+        spuStatus.threadUsed = 0;
+
+		printf("started thread %d \n",i);
+		
+	}
+
+}
+
+void PosixThreadSupport::startSPU()
+{
+}
+
+
+///tell the task scheduler we are done with the SPU tasks
+void PosixThreadSupport::stopSPU()
+{
+	for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t) 
+	{
+            btSpuStatus&	spuStatus = m_activeSpuStatus[t];
+            printf("%s: Thread %i used: %ld\n", __FUNCTION__, int(t), spuStatus.threadUsed);
+
+	spuStatus.m_userPtr = 0;       
+ 	checkPThreadFunction(sem_post(spuStatus.startSemaphore));
+	checkPThreadFunction(sem_wait(mainSemaphore));
+
+	printf("destroy semaphore\n"); 
+            destroySem(spuStatus.startSemaphore);
+            printf("semaphore destroyed\n");
+		checkPThreadFunction(pthread_join(spuStatus.thread,0));
+        }
+	printf("destroy main semaphore\n");
+        destroySem(mainSemaphore);
+	printf("main semaphore destroyed\n");
+	m_activeSpuStatus.clear();
+}
+
+class PosixCriticalSection : public btCriticalSection 
+{
+	pthread_mutex_t m_mutex;
+	
+public:
+	PosixCriticalSection() 
+	{
+		pthread_mutex_init(&m_mutex, NULL);
+	}
+	virtual ~PosixCriticalSection() 
+	{
+		pthread_mutex_destroy(&m_mutex);
+	}
+	
+	ATTRIBUTE_ALIGNED16(unsigned int mCommonBuff[32]);
+	
+	virtual unsigned int getSharedParam(int i)
+	{
+		return mCommonBuff[i];
+	}
+	virtual void setSharedParam(int i,unsigned int p)
+	{
+		mCommonBuff[i] = p;
+	}
+	
+	virtual void lock()
+	{
+		pthread_mutex_lock(&m_mutex);
+	}
+	virtual void unlock()
+	{
+		pthread_mutex_unlock(&m_mutex);
+	}
+};
+
+
+#if defined(_POSIX_BARRIERS) && (_POSIX_BARRIERS - 20012L) >= 0
+/* OK to use barriers on this platform */
+class PosixBarrier : public btBarrier 
+{
+	pthread_barrier_t m_barr;
+	int m_numThreads;
+public:
+	PosixBarrier()
+	:m_numThreads(0)	{	}
+	virtual ~PosixBarrier()	{
+		pthread_barrier_destroy(&m_barr);
+	}
+	
+	virtual void sync()
+	{
+		int rc = pthread_barrier_wait(&m_barr);
+		if(rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD)
+		{
+			printf("Could not wait on barrier\n");
+			exit(-1);
+		}
+	}
+	virtual void setMaxCount(int numThreads)
+	{
+		int result = pthread_barrier_init(&m_barr, NULL, numThreads);
+		m_numThreads = numThreads;
+		btAssert(result==0);
+	}
+	virtual int  getMaxCount()
+	{
+		return m_numThreads;
+	}
+};
+#else
+/* Not OK to use barriers on this platform - insert alternate code here */
+class PosixBarrier : public btBarrier 
+{
+	pthread_mutex_t m_mutex;
+	pthread_cond_t m_cond;
+	
+	int m_numThreads;
+	int	m_called;
+	
+public:
+	PosixBarrier()
+	:m_numThreads(0)
+	{
+	}
+	virtual ~PosixBarrier() 
+	{
+		if (m_numThreads>0)
+		{
+			pthread_mutex_destroy(&m_mutex);
+			pthread_cond_destroy(&m_cond);
+		}
+	}
+	
+	virtual void sync()
+	{		
+		pthread_mutex_lock(&m_mutex);
+		m_called++;
+		if (m_called == m_numThreads) {
+			m_called = 0;
+			pthread_cond_broadcast(&m_cond);
+		} else {
+			pthread_cond_wait(&m_cond,&m_mutex);
+		}
+		pthread_mutex_unlock(&m_mutex);
+		
+	}
+	virtual void setMaxCount(int numThreads)
+	{
+		if (m_numThreads>0)
+		{
+			pthread_mutex_destroy(&m_mutex);
+			pthread_cond_destroy(&m_cond);
+		}
+		m_called = 0;
+		pthread_mutex_init(&m_mutex,NULL);
+		pthread_cond_init(&m_cond,NULL);
+		m_numThreads = numThreads;
+	}
+	virtual int  getMaxCount()
+	{
+		return m_numThreads;
+	}
+};
+
+#endif//_POSIX_BARRIERS
+
+
+
+btBarrier* PosixThreadSupport::createBarrier()
+{
+	PosixBarrier* barrier = new PosixBarrier();
+	barrier->setMaxCount(getNumTasks());
+	return barrier;
+}
+
+btCriticalSection* PosixThreadSupport::createCriticalSection()
+{
+	return new PosixCriticalSection();
+}
+
+#endif // USE_PTHREADS
+
diff --git a/src/bullet/BulletMultiThreaded/PosixThreadSupport.h b/src/bullet/BulletMultiThreaded/PosixThreadSupport.h
new file mode 100644
index 00000000..ca47e450
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/PosixThreadSupport.h
@@ -0,0 +1,142 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_POSIX_THREAD_SUPPORT_H
+#define BT_POSIX_THREAD_SUPPORT_H
+
+
+#include "LinearMath/btScalar.h"
+#include "PlatformDefinitions.h"
+
+#ifdef USE_PTHREADS //platform specifc defines are defined in PlatformDefinitions.h
+
+#ifndef _XOPEN_SOURCE
+#define _XOPEN_SOURCE 600 //for definition of pthread_barrier_t, see http://pages.cs.wisc.edu/~travitch/pthreads_primer.html
+#endif //_XOPEN_SOURCE
+#include <pthread.h>
+#include <semaphore.h>
+
+
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+#include "btThreadSupportInterface.h"
+
+
+typedef void (*PosixThreadFunc)(void* userPtr,void* lsMemory);
+typedef void* (*PosixlsMemorySetupFunc)();
+
+// PosixThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+class PosixThreadSupport : public btThreadSupportInterface 
+{
+public:
+    typedef enum sStatus {
+        STATUS_BUSY,
+        STATUS_READY,
+        STATUS_FINISHED
+    } Status;
+
+	// placeholder, until libspe2 support is there
+	struct	btSpuStatus
+	{
+		uint32_t	m_taskId;
+		uint32_t	m_commandId;
+		uint32_t	m_status;
+
+		PosixThreadFunc	m_userThreadFunc;
+		void*	m_userPtr; //for taskDesc etc
+		void*	m_lsMemory; //initialized using PosixLocalStoreMemorySetupFunc
+
+                pthread_t thread;
+                sem_t* startSemaphore;
+
+        unsigned long threadUsed;
+	};
+private:
+
+	btAlignedObjectArray<btSpuStatus>	m_activeSpuStatus;
+public:
+	///Setup and initialize SPU/CELL/Libspe2
+
+	
+
+	struct	ThreadConstructionInfo
+	{
+		ThreadConstructionInfo(const char* uniqueName,
+									PosixThreadFunc userThreadFunc,
+									PosixlsMemorySetupFunc	lsMemoryFunc,
+									int numThreads=1,
+									int threadStackSize=65535
+									)
+									:m_uniqueName(uniqueName),
+									m_userThreadFunc(userThreadFunc),
+									m_lsMemoryFunc(lsMemoryFunc),
+									m_numThreads(numThreads),
+									m_threadStackSize(threadStackSize)
+		{
+
+		}
+
+		const char*					m_uniqueName;
+		PosixThreadFunc			m_userThreadFunc;
+		PosixlsMemorySetupFunc	m_lsMemoryFunc;
+		int						m_numThreads;
+		int						m_threadStackSize;
+
+	};
+
+	PosixThreadSupport(ThreadConstructionInfo& threadConstructionInfo);
+
+///cleanup/shutdown Libspe2
+	virtual	~PosixThreadSupport();
+
+	void	startThreads(ThreadConstructionInfo&	threadInfo);
+
+
+///send messages to SPUs
+	virtual	void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1);
+
+///check for messages from SPUs
+	virtual	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
+
+///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+	virtual	void startSPU();
+
+///tell the task scheduler we are done with the SPU tasks
+	virtual	void stopSPU();
+
+	virtual void setNumTasks(int numTasks) {}
+
+	virtual int getNumTasks() const
+	{
+		return m_activeSpuStatus.size();
+	}
+
+	virtual btBarrier* createBarrier();
+
+	virtual btCriticalSection* createCriticalSection();
+	
+	virtual void*	getThreadLocalMemory(int taskId)
+	{
+		return m_activeSpuStatus[taskId].m_lsMemory;
+	}
+
+};
+
+#endif // USE_PTHREADS
+
+#endif // BT_POSIX_THREAD_SUPPORT_H
+
+
diff --git a/src/bullet/BulletMultiThreaded/PpuAddressSpace.h b/src/bullet/BulletMultiThreaded/PpuAddressSpace.h
new file mode 100644
index 00000000..6f228274
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/PpuAddressSpace.h
@@ -0,0 +1,37 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2010 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_PPU_ADDRESS_SPACE_H
+#define BT_PPU_ADDRESS_SPACE_H
+
+
+#ifdef _WIN32
+//stop those casting warnings until we have a better solution for ppu_address_t / void* / uint64 conversions
+#pragma warning (disable: 4311)
+#pragma warning (disable: 4312)
+#endif //_WIN32
+
+
+#if defined(_WIN64)
+	typedef unsigned __int64 ppu_address_t;
+#elif defined(__LP64__) || defined(__x86_64__)
+	typedef uint64_t ppu_address_t;
+#else
+	typedef uint32_t ppu_address_t;
+#endif //defined(_WIN64)
+
+#endif //BT_PPU_ADDRESS_SPACE_H
+
diff --git a/src/bullet/BulletMultiThreaded/SequentialThreadSupport.cpp b/src/bullet/BulletMultiThreaded/SequentialThreadSupport.cpp
new file mode 100644
index 00000000..8cc72418
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SequentialThreadSupport.cpp
@@ -0,0 +1,169 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SequentialThreadSupport.h"
+
+
+#include "SpuCollisionTaskProcess.h"
+#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
+
+SequentialThreadSupport::SequentialThreadSupport(SequentialThreadConstructionInfo& threadConstructionInfo)
+{
+	startThreads(threadConstructionInfo);
+}
+
+///cleanup/shutdown Libspe2
+SequentialThreadSupport::~SequentialThreadSupport()
+{
+	stopSPU();
+}
+
+#include <stdio.h>
+
+///send messages to SPUs
+void SequentialThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId)
+{
+	switch (uiCommand)
+	{
+	case 	CMD_GATHER_AND_PROCESS_PAIRLIST:
+		{
+			btSpuStatus&	spuStatus = m_activeSpuStatus[0];
+			spuStatus.m_userPtr=(void*)uiArgument0;
+			spuStatus.m_userThreadFunc(spuStatus.m_userPtr,spuStatus.m_lsMemory);
+		}
+	break;
+	default:
+		{
+			///not implemented
+			btAssert(0 && "Not implemented");
+		}
+
+	};
+
+
+}
+
+///check for messages from SPUs
+void SequentialThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
+{
+	btAssert(m_activeSpuStatus.size());
+	btSpuStatus& spuStatus = m_activeSpuStatus[0];
+	*puiArgument0 = spuStatus.m_taskId;
+	*puiArgument1 = spuStatus.m_status;
+}
+
+void SequentialThreadSupport::startThreads(SequentialThreadConstructionInfo& threadConstructionInfo)
+{
+	m_activeSpuStatus.resize(1);
+	printf("STS: Not starting any threads\n");
+	btSpuStatus& spuStatus = m_activeSpuStatus[0];
+	spuStatus.m_userPtr = 0;
+	spuStatus.m_taskId = 0;
+	spuStatus.m_commandId = 0;
+	spuStatus.m_status = 0;
+	spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
+	spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
+	printf("STS: Created local store at %p for task %s\n", spuStatus.m_lsMemory, threadConstructionInfo.m_uniqueName);
+}
+
+void SequentialThreadSupport::startSPU()
+{
+}
+
+void SequentialThreadSupport::stopSPU()
+{
+	m_activeSpuStatus.clear();
+}
+
+void SequentialThreadSupport::setNumTasks(int numTasks)
+{
+	printf("SequentialThreadSupport::setNumTasks(%d) is not implemented and has no effect\n",numTasks);
+}
+
+
+
+
+class btDummyBarrier : public btBarrier
+{
+private:
+		
+public:
+	btDummyBarrier()
+	{
+	}
+	
+	virtual ~btDummyBarrier()
+	{
+	}
+	
+	void sync()
+	{
+	}
+	
+	virtual void setMaxCount(int n) {}
+	virtual int  getMaxCount() {return 1;}
+};
+
+class btDummyCriticalSection : public btCriticalSection
+{
+	
+public:
+	btDummyCriticalSection()
+	{
+	}
+	
+	virtual ~btDummyCriticalSection()
+	{
+	}
+	
+	unsigned int getSharedParam(int i)
+	{
+		btAssert(i>=0&&i<31);
+		return mCommonBuff[i+1];
+	}
+	
+	void setSharedParam(int i,unsigned int p)
+	{
+		btAssert(i>=0&&i<31);
+		mCommonBuff[i+1] = p;
+	}
+	
+	void lock()
+	{
+		mCommonBuff[0] = 1;
+	}
+	
+	void unlock()
+	{
+		mCommonBuff[0] = 0;
+	}
+};
+
+
+
+
+btBarrier*	SequentialThreadSupport::createBarrier()
+{
+	return new btDummyBarrier();
+}
+
+btCriticalSection* SequentialThreadSupport::createCriticalSection()
+{
+	return new btDummyCriticalSection();
+	
+}
+
+
+
diff --git a/src/bullet/BulletMultiThreaded/SequentialThreadSupport.h b/src/bullet/BulletMultiThreaded/SequentialThreadSupport.h
new file mode 100644
index 00000000..2b9ade82
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SequentialThreadSupport.h
@@ -0,0 +1,96 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "LinearMath/btScalar.h"
+#include "PlatformDefinitions.h"
+
+
+#ifndef BT_SEQUENTIAL_THREAD_SUPPORT_H
+#define BT_SEQUENTIAL_THREAD_SUPPORT_H
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+#include "btThreadSupportInterface.h"
+
+typedef void (*SequentialThreadFunc)(void* userPtr,void* lsMemory);
+typedef void* (*SequentiallsMemorySetupFunc)();
+
+
+
+///The SequentialThreadSupport is a portable non-parallel implementation of the btThreadSupportInterface
+///This is useful for debugging and porting SPU Tasks to other platforms.
+class SequentialThreadSupport : public btThreadSupportInterface 
+{
+public:
+	struct	btSpuStatus
+	{
+		uint32_t	m_taskId;
+		uint32_t	m_commandId;
+		uint32_t	m_status;
+
+		SequentialThreadFunc	m_userThreadFunc;
+
+		void*	m_userPtr; //for taskDesc etc
+		void*	m_lsMemory; //initialized using SequentiallsMemorySetupFunc
+	};
+private:
+	btAlignedObjectArray<btSpuStatus>	m_activeSpuStatus;
+	btAlignedObjectArray<void*>			m_completeHandles;	
+public:
+	struct	SequentialThreadConstructionInfo
+	{
+		SequentialThreadConstructionInfo (const char* uniqueName,
+									SequentialThreadFunc userThreadFunc,
+									SequentiallsMemorySetupFunc	lsMemoryFunc
+									)
+									:m_uniqueName(uniqueName),
+									m_userThreadFunc(userThreadFunc),
+									m_lsMemoryFunc(lsMemoryFunc)
+		{
+
+		}
+
+		const char*						m_uniqueName;
+		SequentialThreadFunc		m_userThreadFunc;
+		SequentiallsMemorySetupFunc	m_lsMemoryFunc;
+	};
+
+	SequentialThreadSupport(SequentialThreadConstructionInfo& threadConstructionInfo);
+	virtual	~SequentialThreadSupport();
+	void	startThreads(SequentialThreadConstructionInfo&	threadInfo);
+///send messages to SPUs
+	virtual	void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1);
+///check for messages from SPUs
+	virtual	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
+///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+	virtual	void startSPU();
+///tell the task scheduler we are done with the SPU tasks
+	virtual	void stopSPU();
+
+	virtual void setNumTasks(int numTasks);
+
+	virtual int getNumTasks() const
+	{
+		return 1;
+	}
+	virtual btBarrier*	createBarrier();
+
+	virtual btCriticalSection* createCriticalSection();
+	
+
+};
+
+#endif //BT_SEQUENTIAL_THREAD_SUPPORT_H
+
diff --git a/src/bullet/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp b/src/bullet/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp
new file mode 100644
index 00000000..182aa269
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuCollisionObjectWrapper.cpp
@@ -0,0 +1,48 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuCollisionObjectWrapper.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+
+SpuCollisionObjectWrapper::SpuCollisionObjectWrapper ()
+{
+}
+
+#ifndef __SPU__
+SpuCollisionObjectWrapper::SpuCollisionObjectWrapper (const btCollisionObject* collisionObject)
+{
+	m_shapeType = collisionObject->getCollisionShape()->getShapeType ();
+	m_collisionObjectPtr = (ppu_address_t)collisionObject;
+	m_margin = collisionObject->getCollisionShape()->getMargin ();
+}
+#endif
+
+int
+SpuCollisionObjectWrapper::getShapeType () const
+{
+	return m_shapeType;
+}
+
+float
+SpuCollisionObjectWrapper::getCollisionMargin () const
+{
+	return m_margin;
+}
+
+ppu_address_t
+SpuCollisionObjectWrapper::getCollisionObjectPtr () const
+{
+	return m_collisionObjectPtr;
+}
diff --git a/src/bullet/BulletMultiThreaded/SpuCollisionObjectWrapper.h b/src/bullet/BulletMultiThreaded/SpuCollisionObjectWrapper.h
new file mode 100644
index 00000000..f90da277
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuCollisionObjectWrapper.h
@@ -0,0 +1,40 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SPU_COLLISION_OBJECT_WRAPPER_H
+#define BT_SPU_COLLISION_OBJECT_WRAPPER_H
+
+#include "PlatformDefinitions.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+
+ATTRIBUTE_ALIGNED16(class) SpuCollisionObjectWrapper
+{
+protected:
+	int m_shapeType;
+	float m_margin;
+	ppu_address_t m_collisionObjectPtr;
+
+public:
+	SpuCollisionObjectWrapper ();
+
+	SpuCollisionObjectWrapper (const btCollisionObject* collisionObject);
+
+	int           getShapeType () const;
+	float         getCollisionMargin () const;
+	ppu_address_t getCollisionObjectPtr () const;
+};
+
+
+#endif //BT_SPU_COLLISION_OBJECT_WRAPPER_H
diff --git a/src/bullet/BulletMultiThreaded/SpuCollisionTaskProcess.cpp b/src/bullet/BulletMultiThreaded/SpuCollisionTaskProcess.cpp
new file mode 100644
index 00000000..f606d136
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuCollisionTaskProcess.cpp
@@ -0,0 +1,317 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+//#define DEBUG_SPU_TASK_SCHEDULING 1
+
+
+//class OptimizedBvhNode;
+
+#include "SpuCollisionTaskProcess.h"
+
+
+
+
+void	SpuCollisionTaskProcess::setNumTasks(int maxNumTasks)
+{
+	if (int(m_maxNumOutstandingTasks) != maxNumTasks)
+	{
+		m_maxNumOutstandingTasks = maxNumTasks;
+		m_taskBusy.resize(m_maxNumOutstandingTasks);
+		m_spuGatherTaskDesc.resize(m_maxNumOutstandingTasks);
+
+		for (int i = 0; i < m_taskBusy.size(); i++)
+		{
+			m_taskBusy[i] = false;
+		}
+
+		///re-allocate task memory buffers
+		if (m_workUnitTaskBuffers != 0)
+		{
+			btAlignedFree(m_workUnitTaskBuffers);
+		}
+		
+		m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*m_maxNumOutstandingTasks, 128);
+	}
+	
+}
+
+
+
+SpuCollisionTaskProcess::SpuCollisionTaskProcess(class	btThreadSupportInterface*	threadInterface, unsigned int	maxNumOutstandingTasks)
+:m_threadInterface(threadInterface),
+m_maxNumOutstandingTasks(0)
+{
+	m_workUnitTaskBuffers = (unsigned char *)0;
+	setNumTasks(maxNumOutstandingTasks);
+	m_numBusyTasks = 0;
+	m_currentTask = 0;
+	m_currentPage = 0;
+	m_currentPageEntry = 0;
+
+#ifdef DEBUG_SpuCollisionTaskProcess
+	m_initialized = false;
+#endif
+
+	m_threadInterface->startSPU();
+
+	//printf("sizeof vec_float4: %d\n", sizeof(vec_float4));
+	printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", int(sizeof(SpuGatherAndProcessWorkUnitInput)));
+
+}
+
+SpuCollisionTaskProcess::~SpuCollisionTaskProcess()
+{
+	
+	if (m_workUnitTaskBuffers != 0)
+	{
+		btAlignedFree(m_workUnitTaskBuffers);
+		m_workUnitTaskBuffers = 0;
+	}
+	
+
+
+	m_threadInterface->stopSPU();
+	
+}
+
+
+
+void SpuCollisionTaskProcess::initialize2(bool useEpa)
+{
+
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("SpuCollisionTaskProcess::initialize()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+	
+	for (int i = 0; i < int (m_maxNumOutstandingTasks); i++)
+	{
+		m_taskBusy[i] = false;
+	}
+	m_numBusyTasks = 0;
+	m_currentTask = 0;
+	m_currentPage = 0;
+	m_currentPageEntry = 0;
+	m_useEpa = useEpa;
+
+#ifdef DEBUG_SpuCollisionTaskProcess
+	m_initialized = true;
+	btAssert(MIDPHASE_NUM_WORKUNITS_PER_TASK*sizeof(SpuGatherAndProcessWorkUnitInput) <= MIDPHASE_WORKUNIT_TASK_SIZE);
+#endif
+}
+
+
+void SpuCollisionTaskProcess::issueTask2()
+{
+
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("SpuCollisionTaskProcess::issueTask (m_currentTask= %d\n)", m_currentTask);
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+	m_taskBusy[m_currentTask] = true;
+	m_numBusyTasks++;
+
+
+	SpuGatherAndProcessPairsTaskDesc& taskDesc = m_spuGatherTaskDesc[m_currentTask];
+	taskDesc.m_useEpa = m_useEpa;
+
+	{
+		// send task description in event message
+		// no error checking here...
+		// but, currently, event queue can be no larger than NUM_WORKUNIT_TASKS.
+	
+		taskDesc.m_inPairPtr = reinterpret_cast<uint64_t>(MIDPHASE_TASK_PTR(m_currentTask));
+	
+		taskDesc.taskId = m_currentTask;
+		taskDesc.numPages = m_currentPage+1;
+		taskDesc.numOnLastPage = m_currentPageEntry;
+	}
+
+
+
+	m_threadInterface->sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (ppu_address_t) &taskDesc,m_currentTask);
+
+	// if all tasks busy, wait for spu event to clear the task.
+	
+
+	if (m_numBusyTasks >= m_maxNumOutstandingTasks)
+	{
+		unsigned int taskId;
+		unsigned int outputSize;
+
+		
+		for (int i=0;i<int (m_maxNumOutstandingTasks);i++)
+		  {
+			  if (m_taskBusy[i])
+			  {
+				  taskId = i;
+				  break;
+			  }
+		  }
+
+	  btAssert(taskId>=0);
+
+	  
+		m_threadInterface->waitForResponse(&taskId, &outputSize);
+
+//		printf("issueTask taskId %d completed, numBusy=%d\n",taskId,m_numBusyTasks);
+
+		//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
+
+		//postProcess(taskId, outputSize);
+
+		m_taskBusy[taskId] = false;
+
+		m_numBusyTasks--;
+	}
+	
+}
+
+void SpuCollisionTaskProcess::addWorkToTask(void* pairArrayPtr,int startIndex,int endIndex)
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("#");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+	
+#ifdef DEBUG_SpuCollisionTaskProcess
+	btAssert(m_initialized);
+	btAssert(m_workUnitTaskBuffers);
+
+#endif
+
+	bool batch = true;
+
+	if (batch)
+	{
+		if (m_currentPageEntry == MIDPHASE_NUM_WORKUNITS_PER_PAGE)
+		{
+			if (m_currentPage == MIDPHASE_NUM_WORKUNIT_PAGES-1)
+			{
+				// task buffer is full, issue current task.
+				// if all task buffers busy, this waits until SPU is done.
+				issueTask2();
+
+				// find new task buffer
+				for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++)
+				{
+					if (!m_taskBusy[i])
+					{
+						m_currentTask = i;
+						//init the task data
+
+						break;
+					}
+				}
+
+				m_currentPage = 0;
+			}
+			else
+			{
+				m_currentPage++;
+			}
+
+			m_currentPageEntry = 0;
+		}
+	}
+
+	{
+
+
+
+		SpuGatherAndProcessWorkUnitInput &wuInput = 
+			*(reinterpret_cast<SpuGatherAndProcessWorkUnitInput*>
+			(MIDPHASE_ENTRY_PTR(m_currentTask, m_currentPage, m_currentPageEntry)));
+		
+		wuInput.m_pairArrayPtr = reinterpret_cast<uint64_t>(pairArrayPtr);
+		wuInput.m_startIndex = startIndex;
+		wuInput.m_endIndex = endIndex;
+
+		
+	
+		m_currentPageEntry++;
+
+		if (!batch)
+		{
+			issueTask2();
+
+			// find new task buffer
+			for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++)
+			{
+				if (!m_taskBusy[i])
+				{
+					m_currentTask = i;
+					//init the task data
+
+					break;
+				}
+			}
+
+			m_currentPage = 0;
+			m_currentPageEntry =0;
+		}
+	}
+}
+
+
+void 
+SpuCollisionTaskProcess::flush2()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("\nSpuCollisionTaskProcess::flush()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+	
+	// if there's a partially filled task buffer, submit that task
+	if (m_currentPage > 0 || m_currentPageEntry > 0)
+	{
+		issueTask2();
+	}
+
+
+	// all tasks are issued, wait for all tasks to be complete
+	while(m_numBusyTasks > 0)
+	{
+	  // Consolidating SPU code
+	  unsigned int taskId=-1;
+	  unsigned int outputSize;
+	  
+	  for (int i=0;i<int (m_maxNumOutstandingTasks);i++)
+	  {
+		  if (m_taskBusy[i])
+		  {
+			  taskId = i;
+			  break;
+		  }
+	  }
+
+	  btAssert(taskId>=0);
+
+	
+	  {
+			
+		// SPURS support.
+		  m_threadInterface->waitForResponse(&taskId, &outputSize);
+	  }
+//		 printf("flush2 taskId %d completed, numBusy =%d \n",taskId,m_numBusyTasks);
+		//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
+
+		//postProcess(taskId, outputSize);
+
+		m_taskBusy[taskId] = false;
+
+		m_numBusyTasks--;
+	}
+
+
+}
diff --git a/src/bullet/BulletMultiThreaded/SpuCollisionTaskProcess.h b/src/bullet/BulletMultiThreaded/SpuCollisionTaskProcess.h
new file mode 100644
index 00000000..23b5b05a
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuCollisionTaskProcess.h
@@ -0,0 +1,163 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SPU_COLLISION_TASK_PROCESS_H
+#define BT_SPU_COLLISION_TASK_PROCESS_H
+
+#include <assert.h>
+
+#include "LinearMath/btScalar.h"
+
+#include "PlatformDefinitions.h"
+#include "LinearMath/btAlignedObjectArray.h"
+#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h" // for definitions processCollisionTask and createCollisionLocalStoreMemory
+
+#include "btThreadSupportInterface.h"
+
+
+//#include "SPUAssert.h"
+#include <string.h>
+
+
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+
+#include "LinearMath/btAlignedAllocator.h"
+
+#include <stdio.h>
+
+
+#define DEBUG_SpuCollisionTaskProcess 1
+
+
+#define CMD_GATHER_AND_PROCESS_PAIRLIST	1
+
+class btCollisionObject;
+class btPersistentManifold;
+class btDispatcher;
+
+
+/////Task Description for SPU collision detection
+//struct SpuGatherAndProcessPairsTaskDesc
+//{
+//	uint64_t	inPtr;//m_pairArrayPtr;
+//	//mutex variable
+//	uint32_t	m_someMutexVariableInMainMemory;
+//
+//	uint64_t	m_dispatcher;
+//
+//	uint32_t	numOnLastPage;
+//
+//	uint16_t numPages;
+//	uint16_t taskId;
+//
+//	struct	CollisionTask_LocalStoreMemory*	m_lsMemory; 
+//}
+//
+//#if  defined(__CELLOS_LV2__) || defined(USE_LIBSPE2)
+//__attribute__ ((aligned (16)))
+//#endif
+//;
+
+
+///MidphaseWorkUnitInput stores individual primitive versus mesh collision detection input, to be processed by the SPU.
+ATTRIBUTE_ALIGNED16(struct) SpuGatherAndProcessWorkUnitInput
+{
+	uint64_t m_pairArrayPtr;
+	int		m_startIndex;
+	int		m_endIndex;
+};
+
+
+
+
+/// SpuCollisionTaskProcess handles SPU processing of collision pairs.
+/// Maintains a set of task buffers.
+/// When the task is full, the task is issued for SPUs to process.  Contact output goes into btPersistentManifold
+/// associated with each task.
+/// When PPU issues a task, it will look for completed task buffers
+/// PPU will do postprocessing, dependent on workunit output (not likely)
+class SpuCollisionTaskProcess
+{
+
+  unsigned char  *m_workUnitTaskBuffers;
+
+
+	// track task buffers that are being used, and total busy tasks
+	btAlignedObjectArray<bool>	m_taskBusy;
+	btAlignedObjectArray<SpuGatherAndProcessPairsTaskDesc>	m_spuGatherTaskDesc;
+
+	class	btThreadSupportInterface*	m_threadInterface;
+
+	unsigned int	m_maxNumOutstandingTasks;
+
+	unsigned int   m_numBusyTasks;
+
+	// the current task and the current entry to insert a new work unit
+	unsigned int   m_currentTask;
+	unsigned int   m_currentPage;
+	unsigned int   m_currentPageEntry;
+
+	bool m_useEpa;
+
+#ifdef DEBUG_SpuCollisionTaskProcess
+	bool m_initialized;
+#endif
+	void issueTask2();
+	//void postProcess(unsigned int taskId, int outputSize);
+
+public:
+	SpuCollisionTaskProcess(btThreadSupportInterface*	threadInterface, unsigned int maxNumOutstandingTasks);
+	
+	~SpuCollisionTaskProcess();
+	
+	///call initialize in the beginning of the frame, before addCollisionPairToTask
+	void initialize2(bool useEpa = false);
+
+	///batch up additional work to a current task for SPU processing. When batch is full, it issues the task.
+	void addWorkToTask(void* pairArrayPtr,int startIndex,int endIndex);
+
+	///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
+	void flush2();
+
+	/// set the maximum number of SPU tasks allocated
+	void	setNumTasks(int maxNumTasks);
+
+	int		getNumTasks() const
+	{
+		return m_maxNumOutstandingTasks;
+	}
+};
+
+
+
+#define MIDPHASE_TASK_PTR(task) (&m_workUnitTaskBuffers[0] + MIDPHASE_WORKUNIT_TASK_SIZE*task)
+#define MIDPHASE_ENTRY_PTR(task,page,entry) (MIDPHASE_TASK_PTR(task) + MIDPHASE_WORKUNIT_PAGE_SIZE*page + sizeof(SpuGatherAndProcessWorkUnitInput)*entry)
+#define MIDPHASE_OUTPUT_PTR(task) (&m_contactOutputBuffers[0] + MIDPHASE_MAX_CONTACT_BUFFER_SIZE*task)
+#define MIDPHASE_TREENODES_PTR(task) (&m_complexShapeBuffers[0] + MIDPHASE_COMPLEX_SHAPE_BUFFER_SIZE*task)
+
+
+#define MIDPHASE_WORKUNIT_PAGE_SIZE (16)
+//#define MIDPHASE_WORKUNIT_PAGE_SIZE (128)
+
+#define MIDPHASE_NUM_WORKUNIT_PAGES 1
+#define MIDPHASE_WORKUNIT_TASK_SIZE (MIDPHASE_WORKUNIT_PAGE_SIZE*MIDPHASE_NUM_WORKUNIT_PAGES)
+#define MIDPHASE_NUM_WORKUNITS_PER_PAGE (MIDPHASE_WORKUNIT_PAGE_SIZE / sizeof(SpuGatherAndProcessWorkUnitInput))
+#define MIDPHASE_NUM_WORKUNITS_PER_TASK (MIDPHASE_NUM_WORKUNITS_PER_PAGE*MIDPHASE_NUM_WORKUNIT_PAGES)
+
+
+#endif // BT_SPU_COLLISION_TASK_PROCESS_H
+
diff --git a/src/bullet/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp b/src/bullet/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp
new file mode 100644
index 00000000..286b6319
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp
@@ -0,0 +1,69 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuContactManifoldCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h"
+
+
+
+
+void SpuContactManifoldCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	btAssert(0);
+}
+
+btScalar SpuContactManifoldCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	btAssert(0);
+	return 1.f;
+}
+
+#ifndef __SPU__
+SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1)
+:btCollisionAlgorithm(ci)
+#ifdef USE_SEPDISTANCE_UTIL
+,m_sepDistance(body0->getCollisionShape()->getAngularMotionDisc(),body1->getCollisionShape()->getAngularMotionDisc())
+#endif //USE_SEPDISTANCE_UTIL
+{
+	m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
+	m_shapeType0 = body0->getCollisionShape()->getShapeType();
+	m_shapeType1 = body1->getCollisionShape()->getShapeType();
+	m_collisionMargin0 = body0->getCollisionShape()->getMargin();
+	m_collisionMargin1 = body1->getCollisionShape()->getMargin();
+	m_collisionObject0 = body0;
+	m_collisionObject1 = body1;
+
+	if (body0->getCollisionShape()->isPolyhedral())
+	{
+		btPolyhedralConvexShape* convex0 = (btPolyhedralConvexShape*)body0->getCollisionShape();
+		m_shapeDimensions0 = convex0->getImplicitShapeDimensions();
+	}
+	if (body1->getCollisionShape()->isPolyhedral())
+	{
+		btPolyhedralConvexShape* convex1 = (btPolyhedralConvexShape*)body1->getCollisionShape();
+		m_shapeDimensions1 = convex1->getImplicitShapeDimensions();
+	}
+}
+#endif //__SPU__
+
+
+SpuContactManifoldCollisionAlgorithm::~SpuContactManifoldCollisionAlgorithm()
+{
+	if (m_manifoldPtr)
+			m_dispatcher->releaseManifold(m_manifoldPtr);
+}
diff --git a/src/bullet/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h b/src/bullet/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h
new file mode 100644
index 00000000..d28d4db3
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h
@@ -0,0 +1,120 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
+#define BT_SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
+
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "LinearMath/btTransformUtil.h"
+
+class btPersistentManifold;
+
+//#define USE_SEPDISTANCE_UTIL 1
+
+/// SpuContactManifoldCollisionAlgorithm  provides contact manifold and should be processed on SPU.
+ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btCollisionAlgorithm
+{
+	btVector3	m_shapeDimensions0;
+	btVector3	m_shapeDimensions1;
+	btPersistentManifold*	m_manifoldPtr;
+	int		m_shapeType0;
+	int		m_shapeType1;
+	float	m_collisionMargin0;
+	float	m_collisionMargin1;
+
+	btCollisionObject*	m_collisionObject0;
+	btCollisionObject*	m_collisionObject1;
+	
+	
+
+	
+public:
+	
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	
+	SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+#ifdef USE_SEPDISTANCE_UTIL
+	btConvexSeparatingDistanceUtil	m_sepDistance;
+#endif //USE_SEPDISTANCE_UTIL
+
+	virtual ~SpuContactManifoldCollisionAlgorithm();
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		if (m_manifoldPtr)
+			manifoldArray.push_back(m_manifoldPtr);
+	}
+
+	btPersistentManifold*	getContactManifoldPtr()
+	{
+		return m_manifoldPtr;
+	}
+
+	btCollisionObject*	getCollisionObject0()
+	{
+		return m_collisionObject0;
+	}
+	
+	btCollisionObject*	getCollisionObject1()
+	{
+		return m_collisionObject1;
+	}
+
+	int		getShapeType0() const
+	{
+		return m_shapeType0;
+	}
+
+	int		getShapeType1() const
+	{
+		return m_shapeType1;
+	}
+	float	getCollisionMargin0() const
+	{
+		return m_collisionMargin0;
+	}
+	float	getCollisionMargin1() const
+	{
+		return m_collisionMargin1;
+	}
+
+	const btVector3&	getShapeDimensions0() const
+	{
+		return m_shapeDimensions0;
+	}
+
+	const btVector3&	getShapeDimensions1() const
+	{
+		return m_shapeDimensions1;
+	}
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(SpuContactManifoldCollisionAlgorithm));
+			return new(mem) SpuContactManifoldCollisionAlgorithm(ci,body0,body1);
+		}
+	};
+
+};
+
+#endif //BT_SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
diff --git a/src/bullet/BulletMultiThreaded/SpuDoubleBuffer.h b/src/bullet/BulletMultiThreaded/SpuDoubleBuffer.h
new file mode 100644
index 00000000..558d6152
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuDoubleBuffer.h
@@ -0,0 +1,126 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_DOUBLE_BUFFER_H
+#define BT_DOUBLE_BUFFER_H
+
+#include "SpuFakeDma.h"
+#include "LinearMath/btScalar.h"
+
+
+///DoubleBuffer
+template<class T, int size>
+class DoubleBuffer
+{
+#if defined(__SPU__) || defined(USE_LIBSPE2)
+	ATTRIBUTE_ALIGNED128( T m_buffer0[size] ) ;
+	ATTRIBUTE_ALIGNED128( T m_buffer1[size] ) ;
+#else
+	T m_buffer0[size];
+	T m_buffer1[size];
+#endif
+	
+	T *m_frontBuffer;
+	T *m_backBuffer;
+
+	unsigned int m_dmaTag;
+	bool m_dmaPending;
+public:
+	bool	isPending() const { return m_dmaPending;}
+	DoubleBuffer();
+
+	void init ();
+
+	// dma get and put commands
+	void backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag);
+	void backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag);
+
+	// gets pointer to a buffer
+	T *getFront();
+	T *getBack();
+
+	// if back buffer dma was started, wait for it to complete
+	// then move back to front and vice versa
+	T *swapBuffers();
+};
+
+template<class T, int size>
+DoubleBuffer<T,size>::DoubleBuffer()
+{
+	init ();
+}
+
+template<class T, int size>
+void DoubleBuffer<T,size>::init()
+{
+	this->m_dmaPending = false;
+	this->m_frontBuffer = &this->m_buffer0[0];
+	this->m_backBuffer = &this->m_buffer1[0];
+}
+
+template<class T, int size>
+void
+DoubleBuffer<T,size>::backBufferDmaGet(uint64_t ea, unsigned int numBytes, unsigned int tag)
+{
+	m_dmaPending = true;
+	m_dmaTag = tag;
+	if (numBytes)
+	{
+		m_backBuffer = (T*)cellDmaLargeGetReadOnly(m_backBuffer, ea, numBytes, tag, 0, 0);
+	}
+}
+
+template<class T, int size>
+void
+DoubleBuffer<T,size>::backBufferDmaPut(uint64_t ea, unsigned int numBytes, unsigned int tag)
+{
+	m_dmaPending = true;
+	m_dmaTag = tag;
+	cellDmaLargePut(m_backBuffer, ea, numBytes, tag, 0, 0);
+}
+
+template<class T, int size>
+T *
+DoubleBuffer<T,size>::getFront()
+{
+	return m_frontBuffer;
+}
+
+template<class T, int size>
+T *
+DoubleBuffer<T,size>::getBack()
+{
+	return m_backBuffer;
+}
+
+template<class T, int size>
+T *
+DoubleBuffer<T,size>::swapBuffers()
+{
+	if (m_dmaPending)
+	{
+		cellDmaWaitTagStatusAll(1<<m_dmaTag);
+		m_dmaPending = false;
+	}
+
+	T *tmp = m_backBuffer;
+	m_backBuffer = m_frontBuffer;
+	m_frontBuffer = tmp;
+
+	return m_frontBuffer;
+}
+
+#endif
diff --git a/src/bullet/BulletMultiThreaded/SpuFakeDma.cpp b/src/bullet/BulletMultiThreaded/SpuFakeDma.cpp
new file mode 100644
index 00000000..b776a120
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuFakeDma.cpp
@@ -0,0 +1,215 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuFakeDma.h"
+#include <LinearMath/btScalar.h> //for btAssert
+//Disabling memcpy sometimes helps debugging DMA
+
+#define USE_MEMCPY 1
+#ifdef USE_MEMCPY
+
+#endif
+
+
+void*	cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+	cellDmaLargeGet(ls,ea,size,tag,tid,rid);
+	return ls;
+#else
+	return (void*)(ppu_address_t)ea;
+#endif
+}
+
+void*	cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+	mfc_get(ls,ea,size,tag,0,0);
+	return ls;
+#else
+	return (void*)(ppu_address_t)ea;
+#endif
+}
+
+
+
+
+void*	cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+	cellDmaGet(ls,ea,size,tag,tid,rid);
+	return ls;
+#else
+	return (void*)(ppu_address_t)ea;
+#endif
+}
+
+
+///this unalignedDma should not be frequently used, only for small data. It handles alignment and performs check on size (<16 bytes)
+int stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size)
+{
+	
+	btAssert(size<32);
+	
+	ATTRIBUTE_ALIGNED16(char	tmpBuffer[32]);
+
+
+	char* localStore = (char*)ls;
+	uint32_t i;
+	
+
+	///make sure last 4 bits are the same, for cellDmaSmallGet
+	uint32_t last4BitsOffset = ea & 0x0f;
+	char* tmpTarget = tmpBuffer + last4BitsOffset;
+	
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+	
+	int remainingSize = size;
+
+//#define FORCE_cellDmaUnalignedGet 1
+#ifdef FORCE_cellDmaUnalignedGet
+	cellDmaUnalignedGet(tmpTarget,ea,size,DMA_TAG(1),0,0);
+#else
+	char* remainingTmpTarget = tmpTarget;
+	uint64_t remainingEa = ea;
+
+	while (remainingSize)
+	{
+		switch (remainingSize)
+		{
+		case 1:
+		case 2:
+		case 4:
+		case 8:
+		case 16:
+			{
+				mfc_get(remainingTmpTarget,remainingEa,remainingSize,DMA_TAG(1),0,0);
+				remainingSize=0;
+				break;
+			}
+		default:
+			{
+				//spu_printf("unaligned DMA with non-natural size:%d\n",remainingSize);
+				int actualSize = 0;
+
+				if (remainingSize > 16)
+					actualSize = 16;
+				else
+					if (remainingSize >8)
+						actualSize=8;
+					else
+						if (remainingSize >4)
+							actualSize=4;
+						else
+							if (remainingSize >2)
+								actualSize=2;
+				mfc_get(remainingTmpTarget,remainingEa,actualSize,DMA_TAG(1),0,0);
+				remainingSize-=actualSize;
+				remainingTmpTarget+=actualSize;
+				remainingEa += actualSize;
+			}
+		}
+	}
+#endif//FORCE_cellDmaUnalignedGet
+
+#else
+	char* mainMem = (char*)ea;
+	//copy into final destination
+#ifdef USE_MEMCPY
+		
+		memcpy(tmpTarget,mainMem,size);
+#else
+		for ( i=0;i<size;i++)
+		{
+			tmpTarget[i] = mainMem[i];
+		}
+#endif //USE_MEMCPY
+
+#endif
+
+	cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+	//this is slowish, perhaps memcpy on SPU is smarter?
+	for (i=0; btLikely( i<size );i++)
+	{
+		localStore[i] = tmpTarget[i];
+	}
+
+	return 0;
+}
+
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+#else
+
+int	cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+	char* mainMem = (char*)ea;
+	char* localStore = (char*)ls;
+
+#ifdef USE_MEMCPY
+	memcpy(localStore,mainMem,size);
+#else
+	for (uint32_t i=0;i<size;i++)
+	{
+		localStore[i] = mainMem[i];
+	}
+#endif
+	return 0;
+}
+
+int	cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+	char* mainMem = (char*)ea;
+	char* localStore = (char*)ls;
+
+//	printf("mainMem=%x, localStore=%x",mainMem,localStore);
+
+#ifdef USE_MEMCPY
+	memcpy(localStore,mainMem,size);
+#else
+	for (uint32_t i=0;i<size;i++)
+	{
+		localStore[i] = mainMem[i];
+	}	
+#endif //#ifdef USE_MEMCPY
+//	printf(" finished\n");
+	return 0;
+}
+
+int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
+{
+	char* mainMem = (char*)ea;
+	const char* localStore = (const char*)ls;
+#ifdef USE_MEMCPY
+	memcpy(mainMem,localStore,size);
+#else
+	for (uint32_t i=0;i<size;i++)
+	{
+		mainMem[i] = localStore[i];
+	}	
+#endif //#ifdef USE_MEMCPY
+
+	return 0;
+}
+
+
+
+void	cellDmaWaitTagStatusAll(int ignore)
+{
+
+}
+
+#endif
diff --git a/src/bullet/BulletMultiThreaded/SpuFakeDma.h b/src/bullet/BulletMultiThreaded/SpuFakeDma.h
new file mode 100644
index 00000000..40e20393
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuFakeDma.h
@@ -0,0 +1,135 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_FAKE_DMA_H
+#define BT_FAKE_DMA_H
+
+
+#include "PlatformDefinitions.h"
+#include "LinearMath/btScalar.h"
+
+
+#ifdef __SPU__
+
+#ifndef USE_LIBSPE2
+
+#include <cell/dma.h>
+#include <stdint.h>
+
+#define DMA_TAG(xfer) (xfer + 1)
+#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
+
+#else // !USE_LIBSPE2
+
+#define DMA_TAG(xfer) (xfer + 1)
+#define DMA_MASK(xfer) (1 << DMA_TAG(xfer))
+		
+#include <spu_mfcio.h>		
+		
+#define DEBUG_DMA		
+#ifdef DEBUG_DMA
+#define dUASSERT(a,b) if (!(a)) { printf(b);}
+#define uintsize ppu_address_t
+		
+#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+															dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+															dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+															dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
+															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+															dUASSERT(size < 16384, "size too big: "); \
+															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+	    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+															printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
+															} \
+															mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+														dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+														dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+														dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
+														dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+    													dUASSERT(size < 16384, "size too big: "); \
+														dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+    													printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
+														} \
+														mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaLargePut(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+															dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+															dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+															dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
+															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+        													dUASSERT(size < 16384, "size too big: "); \
+															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+        													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+    														printf("PUT %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ls,(unsigned int)ea,(unsigned int)size); \
+															} \
+															mfc_put(ls, ea, size, tag, tid, rid)
+#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) if (  (((uintsize)ls%16) != ((uintsize)ea%16)) || ((((uintsize)ea%16) || ((uintsize)ls%16)) && (( ((uintsize)ls%16) != ((uintsize)size%16) ) || ( ((uintsize)ea%16) != ((uintsize)size%16) ) ) ) || ( ((uintsize)size%16) && ((uintsize)size!=1) && ((uintsize)size!=2) && ((uintsize)size!=4) && ((uintsize)size!=8) ) || (size >= 16384) || !(uintsize)ls || !(uintsize)ea) { \
+																dUASSERT( (((uintsize)ea % 16) == 0) || (size < 16), "XDR Address not aligned: "); \
+																dUASSERT( (((uintsize)ls % 16) == 0) || (size < 16), "LS Address not aligned: "); \
+																dUASSERT( ((((uintsize)ls % size) == 0) && (((uintsize)ea % size) == 0))  || (size > 16), "Not naturally aligned: "); \
+    															dUASSERT((size == 1) || (size == 2) || (size == 4) || (size == 8) || ((size % 16) == 0), "size not a multiple of 16byte: "); \
+    															dUASSERT(size < 16384, "size too big: "); \
+    															dUASSERT( ((uintsize)ea%16)==((uintsize)ls%16), "wrong Quadword alignment of LS and EA: "); \
+    	    													dUASSERT(ea != 0, "Nullpointer EA: "); dUASSERT(ls != 0, "Nullpointer LS: ");\
+    															printf("GET %s:%d from: 0x%x, to: 0x%x - %d bytes\n", __FILE__, __LINE__, (unsigned int)ea,(unsigned int)ls,(unsigned int)size);\
+																} \
+																mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
+
+#else
+#define cellDmaLargeGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaLargePut(ls, ea, size, tag, tid, rid) mfc_put(ls, ea, size, tag, tid, rid)
+#define cellDmaSmallGet(ls, ea, size, tag, tid, rid) mfc_get(ls, ea, size, tag, tid, rid)
+#define cellDmaWaitTagStatusAll(ignore) mfc_write_tag_mask(ignore) ; mfc_read_tag_status_all()
+#endif // DEBUG_DMA
+
+		
+		
+		
+		
+		
+		
+		
+#endif // USE_LIBSPE2
+#else // !__SPU__
+//Simulate DMA using memcpy or direct access on non-CELL platforms that don't have DMAs and SPUs (Win32, Mac, Linux etc)
+//Potential to add networked simulation using this interface
+
+#define DMA_TAG(a) (a)
+#define DMA_MASK(a) (a)
+
+		/// cellDmaLargeGet Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
+		int	cellDmaLargeGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+		int	cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+		/// cellDmaLargePut Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
+		int cellDmaLargePut(const void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+		/// cellDmaWaitTagStatusAll Win32 replacements for Cell DMA to allow simulating most of the SPU code (just memcpy)
+		void	cellDmaWaitTagStatusAll(int ignore);
+
+
+#endif //__CELLOS_LV2__
+
+///stallingUnalignedDmaSmallGet internally uses DMA_TAG(1)
+int	stallingUnalignedDmaSmallGet(void *ls, uint64_t ea, uint32_t size);
+
+
+void*	cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+void*	cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+void*	cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
+
+
+#endif //BT_FAKE_DMA_H
diff --git a/src/bullet/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp b/src/bullet/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp
new file mode 100644
index 00000000..1a76be08
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp
@@ -0,0 +1,276 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuGatheringCollisionDispatcher.h"
+#include "SpuCollisionTaskProcess.h"
+
+
+#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
+#include "BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h"
+#include "SpuContactManifoldCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+#include "LinearMath/btQuickprof.h"
+#include "BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h"
+
+
+
+
+
+SpuGatheringCollisionDispatcher::SpuGatheringCollisionDispatcher(class	btThreadSupportInterface*	threadInterface, unsigned int	maxNumOutstandingTasks,btCollisionConfiguration* collisionConfiguration)
+:btCollisionDispatcher(collisionConfiguration),
+m_spuCollisionTaskProcess(0),
+m_threadInterface(threadInterface),
+m_maxNumOutstandingTasks(maxNumOutstandingTasks)
+{
+	
+}
+
+
+bool	SpuGatheringCollisionDispatcher::supportsDispatchPairOnSpu(int proxyType0,int proxyType1)
+{
+	bool supported0 = (
+		(proxyType0 == BOX_SHAPE_PROXYTYPE) ||
+		(proxyType0 == TRIANGLE_SHAPE_PROXYTYPE) ||
+		(proxyType0 == SPHERE_SHAPE_PROXYTYPE) ||
+		(proxyType0 == CAPSULE_SHAPE_PROXYTYPE) ||
+		(proxyType0 == CYLINDER_SHAPE_PROXYTYPE) ||
+//		(proxyType0 == CONE_SHAPE_PROXYTYPE) ||
+		(proxyType0 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
+		(proxyType0 == CONVEX_HULL_SHAPE_PROXYTYPE)||
+		(proxyType0 == STATIC_PLANE_PROXYTYPE)||
+		(proxyType0 == COMPOUND_SHAPE_PROXYTYPE)
+		);
+
+	bool supported1 = (
+		(proxyType1 == BOX_SHAPE_PROXYTYPE) ||
+		(proxyType1 == TRIANGLE_SHAPE_PROXYTYPE) ||
+		(proxyType1 == SPHERE_SHAPE_PROXYTYPE) ||
+		(proxyType1 == CAPSULE_SHAPE_PROXYTYPE) ||
+		(proxyType1 == CYLINDER_SHAPE_PROXYTYPE) ||
+//		(proxyType1 == CONE_SHAPE_PROXYTYPE) ||
+		(proxyType1 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
+		(proxyType1 == CONVEX_HULL_SHAPE_PROXYTYPE) ||
+		(proxyType1 == STATIC_PLANE_PROXYTYPE) ||
+		(proxyType1 == COMPOUND_SHAPE_PROXYTYPE)
+		);
+
+	
+	return supported0 && supported1;
+}
+
+
+
+SpuGatheringCollisionDispatcher::~SpuGatheringCollisionDispatcher()
+{
+	if (m_spuCollisionTaskProcess)
+		delete m_spuCollisionTaskProcess;
+	
+}
+
+#include "stdio.h"
+
+
+
+///interface for iterating all overlapping collision pairs, no matter how those pairs are stored (array, set, map etc)
+///this is useful for the collision dispatcher.
+class btSpuCollisionPairCallback : public btOverlapCallback
+{
+	const btDispatcherInfo& m_dispatchInfo;
+	SpuGatheringCollisionDispatcher*	m_dispatcher;
+
+public:
+
+	btSpuCollisionPairCallback(const btDispatcherInfo& dispatchInfo, SpuGatheringCollisionDispatcher*	dispatcher)
+	:m_dispatchInfo(dispatchInfo),
+	m_dispatcher(dispatcher)
+	{
+	}
+
+	virtual bool	processOverlap(btBroadphasePair& collisionPair)
+	{
+
+
+		//PPU version
+		//(*m_dispatcher->getNearCallback())(collisionPair,*m_dispatcher,m_dispatchInfo);
+
+		//only support discrete collision detection for now, we could fallback on PPU/unoptimized version for TOI/CCD
+		btAssert(m_dispatchInfo.m_dispatchFunc == btDispatcherInfo::DISPATCH_DISCRETE);
+
+		//by default, Bullet will use this near callback
+		{
+			///userInfo is used to determine if the SPU has to handle this case or not (skip PPU tasks)
+			if (!collisionPair.m_internalTmpValue)
+			{
+				collisionPair.m_internalTmpValue = 1;
+			}
+			if (!collisionPair.m_algorithm)
+			{
+				btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
+				btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
+
+				btCollisionAlgorithmConstructionInfo ci;
+				ci.m_dispatcher1 = m_dispatcher;
+				ci.m_manifold = 0;
+
+				if (m_dispatcher->needsCollision(colObj0,colObj1))
+				{
+					int	proxyType0 = colObj0->getCollisionShape()->getShapeType();
+					int	proxyType1 = colObj1->getCollisionShape()->getShapeType();
+					bool supportsSpuDispatch = m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1) 
+						&& ((colObj0->getCollisionFlags() & btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING) == 0)
+						&& ((colObj1->getCollisionFlags() & btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING) == 0);
+
+					if (proxyType0 == COMPOUND_SHAPE_PROXYTYPE)
+					{
+						btCompoundShape* compound = (btCompoundShape*)colObj0->getCollisionShape();
+						if (compound->getNumChildShapes()>MAX_SPU_COMPOUND_SUBSHAPES)
+						{
+							//printf("PPU fallback, compound->getNumChildShapes(%d)>%d\n",compound->getNumChildShapes(),MAX_SPU_COMPOUND_SUBSHAPES);
+							supportsSpuDispatch = false;
+						}
+					}
+
+					if (proxyType1 == COMPOUND_SHAPE_PROXYTYPE)
+					{
+						btCompoundShape* compound = (btCompoundShape*)colObj1->getCollisionShape();
+						if (compound->getNumChildShapes()>MAX_SPU_COMPOUND_SUBSHAPES)
+						{
+							//printf("PPU fallback, compound->getNumChildShapes(%d)>%d\n",compound->getNumChildShapes(),MAX_SPU_COMPOUND_SUBSHAPES);
+							supportsSpuDispatch = false;
+						}
+					}
+
+					if (supportsSpuDispatch)
+					{
+
+						int so = sizeof(SpuContactManifoldCollisionAlgorithm);
+#ifdef ALLOCATE_SEPARATELY
+						void* mem = btAlignedAlloc(so,16);//m_dispatcher->allocateCollisionAlgorithm(so);
+#else
+						void* mem = m_dispatcher->allocateCollisionAlgorithm(so);
+#endif
+						collisionPair.m_algorithm = new(mem) SpuContactManifoldCollisionAlgorithm(ci,colObj0,colObj1);
+						collisionPair.m_internalTmpValue =  2;
+					} else
+					{
+						collisionPair.m_algorithm = m_dispatcher->findAlgorithm(colObj0,colObj1);
+						collisionPair.m_internalTmpValue = 3;
+					}
+				} 
+			}
+		}
+		return false;
+	}
+};
+
+void	SpuGatheringCollisionDispatcher::dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo, btDispatcher* dispatcher) 
+{
+
+	if (dispatchInfo.m_enableSPU)
+	{
+		m_maxNumOutstandingTasks = m_threadInterface->getNumTasks();
+
+		{
+			BT_PROFILE("processAllOverlappingPairs");
+
+			if (!m_spuCollisionTaskProcess)
+				m_spuCollisionTaskProcess = new SpuCollisionTaskProcess(m_threadInterface,m_maxNumOutstandingTasks);
+		
+			m_spuCollisionTaskProcess->setNumTasks(m_maxNumOutstandingTasks);
+	//		printf("m_maxNumOutstandingTasks =%d\n",m_maxNumOutstandingTasks);
+
+			m_spuCollisionTaskProcess->initialize2(dispatchInfo.m_useEpa);
+			
+		
+			///modified version of btCollisionDispatcher::dispatchAllCollisionPairs:
+			{
+				btSpuCollisionPairCallback	collisionCallback(dispatchInfo,this);
+
+				pairCache->processAllOverlappingPairs(&collisionCallback,dispatcher);
+			}
+		}
+
+		//send one big batch
+		int numTotalPairs = pairCache->getNumOverlappingPairs();
+		if (numTotalPairs)
+		{
+			btBroadphasePair* pairPtr = pairCache->getOverlappingPairArrayPtr();
+			int i;
+			{
+				int pairRange =	SPU_BATCHSIZE_BROADPHASE_PAIRS;
+				if (numTotalPairs < (m_spuCollisionTaskProcess->getNumTasks()*SPU_BATCHSIZE_BROADPHASE_PAIRS))
+				{
+					pairRange = (numTotalPairs/m_spuCollisionTaskProcess->getNumTasks())+1;
+				}
+	
+				BT_PROFILE("addWorkToTask");
+				for (i=0;i<numTotalPairs;)
+				{
+					//Performance Hint: tweak this number during benchmarking
+					
+					int endIndex = (i+pairRange) < numTotalPairs ? i+pairRange : numTotalPairs;
+					m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex);
+					i = endIndex;
+				}
+			}
+			{
+				BT_PROFILE("PPU fallback");
+				//handle PPU fallback pairs
+				for (i=0;i<numTotalPairs;i++)
+				{
+					btBroadphasePair& collisionPair = pairPtr[i];
+					if (collisionPair.m_internalTmpValue == 3)
+					{
+						if (collisionPair.m_algorithm)
+						{
+							btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
+							btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
+	
+							if (dispatcher->needsCollision(colObj0,colObj1))
+							{
+								btManifoldResult contactPointResult(colObj0,colObj1);
+								
+								if (dispatchInfo.m_dispatchFunc == 		btDispatcherInfo::DISPATCH_DISCRETE)
+								{
+									//discrete collision detection query
+									collisionPair.m_algorithm->processCollision(colObj0,colObj1,dispatchInfo,&contactPointResult);
+								} else
+								{
+									//continuous collision detection query, time of impact (toi)
+									btScalar toi = collisionPair.m_algorithm->calculateTimeOfImpact(colObj0,colObj1,dispatchInfo,&contactPointResult);
+									if (dispatchInfo.m_timeOfImpact > toi)
+										dispatchInfo.m_timeOfImpact = toi;
+	
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+		{
+			BT_PROFILE("flush2");
+			//make sure all SPU work is done
+			m_spuCollisionTaskProcess->flush2();
+		}
+
+	} else
+	{
+		///PPU fallback
+		///!Need to make sure to clear all 'algorithms' when switching between SPU and PPU
+		btCollisionDispatcher::dispatchAllCollisionPairs(pairCache,dispatchInfo,dispatcher);
+	}
+}
diff --git a/src/bullet/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h b/src/bullet/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h
new file mode 100644
index 00000000..f8bc7da6
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h
@@ -0,0 +1,72 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef BT_SPU_GATHERING_COLLISION__DISPATCHER_H
+#define BT_SPU_GATHERING_COLLISION__DISPATCHER_H
+
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+
+
+///Tuning value to optimized SPU utilization 
+///Too small value means Task overhead is large compared to computation (too fine granularity)
+///Too big value might render some SPUs are idle, while a few other SPUs are doing all work.
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 8
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 16
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 64
+#define SPU_BATCHSIZE_BROADPHASE_PAIRS 128
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 256
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 512
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 1024
+
+
+
+class SpuCollisionTaskProcess;
+
+///SpuGatheringCollisionDispatcher can use SPU to gather and calculate collision detection
+///Time of Impact, Closest Points and Penetration Depth.
+class SpuGatheringCollisionDispatcher : public btCollisionDispatcher
+{
+	
+	SpuCollisionTaskProcess*	m_spuCollisionTaskProcess;
+	
+protected:
+
+	class	btThreadSupportInterface*	m_threadInterface;
+
+	unsigned int	m_maxNumOutstandingTasks;
+	
+
+public:
+
+	//can be used by SPU collision algorithms	
+	SpuCollisionTaskProcess*	getSpuCollisionTaskProcess()
+	{
+			return m_spuCollisionTaskProcess;
+	}
+	
+	SpuGatheringCollisionDispatcher (class	btThreadSupportInterface*	threadInterface, unsigned int	maxNumOutstandingTasks,btCollisionConfiguration* collisionConfiguration);
+	
+	virtual ~SpuGatheringCollisionDispatcher();
+
+	bool	supportsDispatchPairOnSpu(int proxyType0,int proxyType1);
+
+	virtual void	dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher) ;
+
+};
+
+
+
+#endif //BT_SPU_GATHERING_COLLISION__DISPATCHER_H
+
+
diff --git a/src/bullet/BulletMultiThreaded/SpuLibspe2Support.cpp b/src/bullet/BulletMultiThreaded/SpuLibspe2Support.cpp
new file mode 100644
index 00000000..a312450e
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuLibspe2Support.cpp
@@ -0,0 +1,257 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifdef USE_LIBSPE2
+
+#include "SpuLibspe2Support.h"
+
+
+
+
+//SpuLibspe2Support helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+///Setup and initialize SPU/CELL/Libspe2
+SpuLibspe2Support::SpuLibspe2Support(spe_program_handle_t *speprog, int numThreads)
+{
+	this->program = speprog;
+	this->numThreads =  ((numThreads <= spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1)) ? numThreads : spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1));
+}
+
+///cleanup/shutdown Libspe2
+SpuLibspe2Support::~SpuLibspe2Support()
+{
+	
+	stopSPU();
+}
+
+
+
+///send messages to SPUs
+void SpuLibspe2Support::sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1)
+{
+	spe_context_ptr_t context;
+	
+	switch (uiCommand)
+	{
+	case CMD_SAMPLE_TASK_COMMAND:
+	{
+		//get taskdescription
+		SpuSampleTaskDesc* taskDesc = (SpuSampleTaskDesc*) uiArgument0;
+
+		btAssert(taskDesc->m_taskId<m_activeSpuStatus.size());
+
+		//get status of SPU on which task should run
+		btSpuStatus&	spuStatus = m_activeSpuStatus[taskDesc->m_taskId];
+
+		//set data for spuStatus
+		spuStatus.m_commandId = uiCommand;
+		spuStatus.m_status = Spu_Status_Occupied; //set SPU as "occupied"
+		spuStatus.m_taskDesc.p = taskDesc; 
+		
+		//get context
+		context = data[taskDesc->m_taskId].context;
+		
+		
+		taskDesc->m_mainMemoryPtr = reinterpret_cast<uint64_t> (spuStatus.m_lsMemory.p);
+		
+
+		break;
+	}
+	case CMD_GATHER_AND_PROCESS_PAIRLIST:
+		{
+			//get taskdescription
+			SpuGatherAndProcessPairsTaskDesc* taskDesc = (SpuGatherAndProcessPairsTaskDesc*) uiArgument0;
+
+			btAssert(taskDesc->taskId<m_activeSpuStatus.size());
+
+			//get status of SPU on which task should run
+			btSpuStatus&	spuStatus = m_activeSpuStatus[taskDesc->taskId];
+
+			//set data for spuStatus
+			spuStatus.m_commandId = uiCommand;
+			spuStatus.m_status = Spu_Status_Occupied; //set SPU as "occupied"
+			spuStatus.m_taskDesc.p = taskDesc; 
+			
+			//get context
+			context = data[taskDesc->taskId].context;
+			
+			
+			taskDesc->m_lsMemory = (CollisionTask_LocalStoreMemory*)spuStatus.m_lsMemory.p;
+			
+			break;
+		}
+	default:
+		{
+			///not implemented
+			btAssert(0);
+		}
+
+	};
+
+	
+	//write taskdescription in mailbox
+	unsigned int event = Spu_Mailbox_Event_Task;
+	spe_in_mbox_write(context, &event, 1, SPE_MBOX_ANY_NONBLOCKING);
+
+}
+
+///check for messages from SPUs
+void SpuLibspe2Support::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
+{
+	///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
+	
+	///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
+	
+	btAssert(m_activeSpuStatus.size());
+
+	
+	int last = -1;
+	
+	//find an active spu/thread
+	while(last < 0)
+	{
+		for (int i=0;i<m_activeSpuStatus.size();i++)
+		{
+			if ( m_activeSpuStatus[i].m_status == Spu_Status_Free)
+			{
+				last = i;
+				break;
+			}
+		}
+		if(last < 0)
+			sched_yield();
+	}
+
+
+
+	btSpuStatus& spuStatus = m_activeSpuStatus[last];
+
+	///need to find an active spu
+	btAssert(last>=0);
+
+	
+
+	*puiArgument0 = spuStatus.m_taskId;
+	*puiArgument1 = spuStatus.m_status;
+
+
+}
+
+
+void SpuLibspe2Support::startSPU()
+{
+	this->internal_startSPU();
+}
+
+
+
+///start the spus group (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+void SpuLibspe2Support::internal_startSPU()
+{
+	m_activeSpuStatus.resize(numThreads);
+	
+	
+	for (int i=0; i < numThreads; i++)
+	{
+		
+		if(data[i].context == NULL) 
+		{
+					
+			 /* Create context */
+			if ((data[i].context = spe_context_create(0, NULL)) == NULL)
+			{
+			      perror ("Failed creating context");
+		          exit(1);
+			}
+	
+			/* Load program into context */
+			if(spe_program_load(data[i].context, this->program))
+			{
+			      perror ("Failed loading program");
+		          exit(1);
+			}
+			
+			m_activeSpuStatus[i].m_status = Spu_Status_Startup; 
+			m_activeSpuStatus[i].m_taskId = i; 
+			m_activeSpuStatus[i].m_commandId = 0; 
+			m_activeSpuStatus[i].m_lsMemory.p = NULL; 
+			
+			
+			data[i].entry = SPE_DEFAULT_ENTRY;
+			data[i].flags = 0;
+			data[i].argp.p = &m_activeSpuStatus[i];
+			data[i].envp.p = NULL;
+			
+		    /* Create thread for each SPE context */
+			if (pthread_create(&data[i].pthread, NULL, &ppu_pthread_function, &(data[i]) ))
+			{
+			      perror ("Failed creating thread");
+		          exit(1);
+			}
+			/*
+			else
+			{
+				printf("started thread %d\n",i);
+			}*/
+		}		
+	}
+	
+	
+	for (int i=0; i < numThreads; i++)
+	{
+		if(data[i].context != NULL) 
+		{
+			while( m_activeSpuStatus[i].m_status == Spu_Status_Startup)
+			{
+				// wait for spu to set up
+				sched_yield();
+			}
+			printf("Spu %d is ready\n", i);
+		}
+	}
+}
+
+///tell the task scheduler we are done with the SPU tasks
+void SpuLibspe2Support::stopSPU()
+{
+	// wait for all threads to finish 
+	int i;
+	for ( i = 0; i < this->numThreads; i++ ) 
+	{ 
+		
+		unsigned int event = Spu_Mailbox_Event_Shutdown;
+		spe_context_ptr_t context = data[i].context;
+		spe_in_mbox_write(context, &event, 1, SPE_MBOX_ALL_BLOCKING);
+		pthread_join (data[i].pthread, NULL); 
+		
+	} 
+	// close SPE program 
+	spe_image_close(program); 
+	// destroy SPE contexts 
+	for ( i = 0; i < this->numThreads; i++ ) 
+	{ 
+		if(data[i].context != NULL)
+		{
+			spe_context_destroy (data[i].context);
+		}
+	} 
+	
+	m_activeSpuStatus.clear();
+	
+}
+
+
+
+#endif //USE_LIBSPE2
+
diff --git a/src/bullet/BulletMultiThreaded/SpuLibspe2Support.h b/src/bullet/BulletMultiThreaded/SpuLibspe2Support.h
new file mode 100644
index 00000000..37a5e79f
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuLibspe2Support.h
@@ -0,0 +1,180 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_SPU_LIBSPE2_SUPPORT_H
+#define BT_SPU_LIBSPE2_SUPPORT_H
+
+#include <LinearMath/btScalar.h> //for uint32_t etc.
+
+#ifdef USE_LIBSPE2
+
+#include <stdlib.h>
+#include <stdio.h>
+//#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
+#include "PlatformDefinitions.h"
+
+
+//extern struct SpuGatherAndProcessPairsTaskDesc;
+
+enum
+{
+	Spu_Mailbox_Event_Nothing = 0,
+	Spu_Mailbox_Event_Task = 1,
+	Spu_Mailbox_Event_Shutdown = 2,
+	
+	Spu_Mailbox_Event_ForceDword = 0xFFFFFFFF
+	
+};
+
+enum
+{
+	Spu_Status_Free = 0,
+	Spu_Status_Occupied = 1,
+	Spu_Status_Startup = 2,
+	
+	Spu_Status_ForceDword = 0xFFFFFFFF
+	
+};
+
+
+struct btSpuStatus
+{
+	uint32_t	m_taskId;
+	uint32_t	m_commandId;
+	uint32_t	m_status;
+
+	addr64 m_taskDesc;
+	addr64 m_lsMemory;
+	
+}
+__attribute__ ((aligned (128)))
+;
+
+
+
+#ifndef __SPU__
+
+#include "LinearMath/btAlignedObjectArray.h"
+#include "SpuCollisionTaskProcess.h"
+#include "SpuSampleTaskProcess.h"
+#include "btThreadSupportInterface.h"
+#include <libspe2.h>
+#include <pthread.h>
+#include <sched.h>
+
+#define MAX_SPUS 4 
+
+typedef struct ppu_pthread_data 
+{
+	spe_context_ptr_t context;
+	pthread_t pthread;
+	unsigned int entry;
+	unsigned int flags;
+	addr64 argp;
+	addr64 envp;
+	spe_stop_info_t stopinfo;
+} ppu_pthread_data_t;
+
+
+static void *ppu_pthread_function(void *arg)
+{
+    ppu_pthread_data_t * datap = (ppu_pthread_data_t *)arg;
+    /*
+    int rc;
+    do 
+    {*/
+        spe_context_run(datap->context, &datap->entry, datap->flags, datap->argp.p, datap->envp.p, &datap->stopinfo);
+        if (datap->stopinfo.stop_reason == SPE_EXIT) 
+        {
+           if (datap->stopinfo.result.spe_exit_code != 0) 
+           {
+             perror("FAILED: SPE returned a non-zero exit status: \n");
+             exit(1);
+           }
+         } 
+        else 
+         {
+           perror("FAILED: SPE abnormally terminated\n");
+           exit(1);
+         }
+        
+        
+    //} while (rc > 0); // loop until exit or error, and while any stop & signal
+    pthread_exit(NULL);
+}
+
+
+
+
+
+
+///SpuLibspe2Support helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+class SpuLibspe2Support : public btThreadSupportInterface
+{
+
+	btAlignedObjectArray<btSpuStatus>	m_activeSpuStatus;
+	
+public:
+	//Setup and initialize SPU/CELL/Libspe2
+	SpuLibspe2Support(spe_program_handle_t *speprog,int numThreads);
+	
+	// SPE program handle ptr.
+	spe_program_handle_t *program;
+	
+	// SPE program data
+	ppu_pthread_data_t data[MAX_SPUS];
+	
+	//cleanup/shutdown Libspe2
+	~SpuLibspe2Support();
+
+	///send messages to SPUs
+	void sendRequest(uint32_t uiCommand, uint32_t uiArgument0, uint32_t uiArgument1=0);
+
+	//check for messages from SPUs
+	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
+
+	//start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+	virtual void startSPU();
+
+	//tell the task scheduler we are done with the SPU tasks
+	virtual void stopSPU();
+
+	virtual void setNumTasks(int numTasks)
+	{
+		//changing the number of tasks after initialization is not implemented (yet)
+	}
+
+private:
+	
+	///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+	void internal_startSPU();
+
+
+	
+	
+	int numThreads;
+
+};
+
+#endif // NOT __SPU__
+
+#endif //USE_LIBSPE2
+
+#endif //BT_SPU_LIBSPE2_SUPPORT_H
+
+
+
+
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h
new file mode 100644
index 00000000..e5179611
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h
@@ -0,0 +1,167 @@
+/*
+   Copyright (C) 2006, 2008 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef __BOX_H__
+#define __BOX_H__
+
+
+#ifndef PE_REF
+#define PE_REF(a) a&
+#endif
+
+#include <math.h>
+
+
+#include "../PlatformDefinitions.h"
+
+
+
+
+enum FeatureType { F, E, V };
+
+//----------------------------------------------------------------------------
+// Box
+//----------------------------------------------------------------------------
+///The Box is an internal class used by the boxBoxDistance calculation.
+class Box
+{
+public:
+	vmVector3 mHalf;
+
+	inline Box()
+	{}
+	inline Box(PE_REF(vmVector3) half_);
+	inline Box(float hx, float hy, float hz);
+
+	inline void Set(PE_REF(vmVector3) half_);
+	inline void Set(float hx, float hy, float hz);
+
+	inline vmVector3 GetAABB(const vmMatrix3& rotation) const;
+};
+
+inline
+Box::Box(PE_REF(vmVector3) half_)
+{
+	Set(half_);
+}
+
+inline
+Box::Box(float hx, float hy, float hz)
+{
+	Set(hx, hy, hz);
+}
+
+inline
+void
+Box::Set(PE_REF(vmVector3) half_)
+{
+	mHalf = half_;
+}
+
+inline
+void
+Box::Set(float hx, float hy, float hz)
+{
+	mHalf = vmVector3(hx, hy, hz);
+}
+
+inline
+vmVector3
+Box::GetAABB(const vmMatrix3& rotation) const
+{
+	return absPerElem(rotation) * mHalf;
+}
+
+//-------------------------------------------------------------------------------------------------
+// BoxPoint
+//-------------------------------------------------------------------------------------------------
+
+///The BoxPoint class is an internally used class to contain feature information for boxBoxDistance calculation.
+class BoxPoint
+{
+public:
+	BoxPoint() : localPoint(0.0f) {}
+
+	vmPoint3      localPoint;
+	FeatureType featureType;
+	int         featureIdx;
+
+	inline void setVertexFeature(int plusX, int plusY, int plusZ);
+	inline void setEdgeFeature(int dim0, int plus0, int dim1, int plus1);
+	inline void setFaceFeature(int dim, int plus);
+
+	inline void getVertexFeature(int & plusX, int & plusY, int & plusZ) const;
+	inline void getEdgeFeature(int & dim0, int & plus0, int & dim1, int & plus1) const;
+	inline void getFaceFeature(int & dim, int & plus) const;
+};
+
+inline
+void
+BoxPoint::setVertexFeature(int plusX, int plusY, int plusZ)
+{
+	featureType = V;
+	featureIdx = plusX << 2 | plusY << 1 | plusZ;
+}
+
+inline
+void
+BoxPoint::setEdgeFeature(int dim0, int plus0, int dim1, int plus1)
+{
+	featureType = E;
+
+	if (dim0 > dim1) {
+		featureIdx = plus1 << 5 | dim1 << 3 | plus0 << 2 | dim0;
+	} else {
+		featureIdx = plus0 << 5 | dim0 << 3 | plus1 << 2 | dim1;
+	}
+}
+
+inline
+void
+BoxPoint::setFaceFeature(int dim, int plus)
+{
+	featureType = F;
+	featureIdx = plus << 2 | dim;
+}
+
+inline
+void
+BoxPoint::getVertexFeature(int & plusX, int & plusY, int & plusZ) const
+{
+	plusX = featureIdx >> 2;
+	plusY = featureIdx >> 1 & 1;
+	plusZ = featureIdx & 1;
+}
+
+inline
+void
+BoxPoint::getEdgeFeature(int & dim0, int & plus0, int & dim1, int & plus1) const
+{
+	plus0 = featureIdx >> 5;
+	dim0 = featureIdx >> 3 & 3;
+	plus1 = featureIdx >> 2 & 1;
+	dim1 = featureIdx & 3;
+}
+
+inline
+void
+BoxPoint::getFaceFeature(int & dim, int & plus) const
+{
+	plus = featureIdx >> 2;
+	dim = featureIdx & 3;
+}
+
+#endif /* __BOX_H__ */
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
new file mode 100644
index 00000000..dfcd8426
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
@@ -0,0 +1,302 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "SpuCollisionShapes.h"
+
+///not supported on IBM SDK, until we fix the alignment of btVector3
+#if defined (__CELLOS_LV2__) && defined (__SPU__)
+#include <spu_intrinsics.h>
+static inline vec_float4 vec_dot3( vec_float4 vec0, vec_float4 vec1 )
+{
+    vec_float4 result;
+    result = spu_mul( vec0, vec1 );
+    result = spu_madd( spu_rlqwbyte( vec0, 4 ), spu_rlqwbyte( vec1, 4 ), result );
+    return spu_madd( spu_rlqwbyte( vec0, 8 ), spu_rlqwbyte( vec1, 8 ), result );
+}
+#endif //__SPU__
+
+
+void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform)
+{
+	//calculate the aabb, given the types...
+	switch (shapeType)
+	{
+	case CYLINDER_SHAPE_PROXYTYPE:
+		/* fall through */
+	case BOX_SHAPE_PROXYTYPE:
+	{
+		btScalar margin=convexShape->getMarginNV();
+		btVector3 halfExtents = convexShape->getImplicitShapeDimensions();
+		halfExtents += btVector3(margin,margin,margin);
+		const btTransform& t = xform;
+		btMatrix3x3 abs_b = t.getBasis().absolute();  
+		btVector3 center = t.getOrigin();
+		btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
+		
+		aabbMin = center - extent;
+		aabbMax = center + extent;
+		break;
+	}
+	case CAPSULE_SHAPE_PROXYTYPE:
+	{
+		btScalar margin=convexShape->getMarginNV();
+		btVector3 halfExtents = convexShape->getImplicitShapeDimensions();
+		//add the radius to y-axis to get full height
+		btScalar radius = halfExtents[0];
+		halfExtents[1] += radius;
+		halfExtents += btVector3(margin,margin,margin);
+#if 0
+		int capsuleUpAxis = convexShape->getUpAxis();
+		btScalar halfHeight = convexShape->getHalfHeight();
+		btScalar radius = convexShape->getRadius();
+		halfExtents[capsuleUpAxis] = radius + halfHeight;
+#endif
+		const btTransform& t = xform;
+		btMatrix3x3 abs_b = t.getBasis().absolute();  
+		btVector3 center = t.getOrigin();
+		btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
+		
+		aabbMin = center - extent;
+		aabbMax = center + extent;
+		break;
+	}
+	case SPHERE_SHAPE_PROXYTYPE:
+	{
+		btScalar radius = convexShape->getImplicitShapeDimensions().getX();// * convexShape->getLocalScaling().getX();
+		btScalar margin = radius + convexShape->getMarginNV();
+		const btTransform& t = xform;
+		const btVector3& center = t.getOrigin();
+		btVector3 extent(margin,margin,margin);
+		aabbMin = center - extent;
+		aabbMax = center + extent;
+		break;
+	}
+	case CONVEX_HULL_SHAPE_PROXYTYPE:
+	{
+		ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
+		cellDmaGet(&convexHullShape0, convexShapePtr  , sizeof(btConvexHullShape), DMA_TAG(1), 0, 0);
+		cellDmaWaitTagStatusAll(DMA_MASK(1));
+		btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape0;
+		const btTransform& t = xform;
+		btScalar margin = convexShape->getMarginNV();
+		localPtr->getNonvirtualAabb(t,aabbMin,aabbMax,margin);
+		//spu_printf("SPU convex aabbMin=%f,%f,%f=\n",aabbMin.getX(),aabbMin.getY(),aabbMin.getZ());
+		//spu_printf("SPU convex aabbMax=%f,%f,%f=\n",aabbMax.getX(),aabbMax.getY(),aabbMax.getZ());
+		break;
+	}
+	default:
+		{
+	//	spu_printf("SPU: unsupported shapetype %d in AABB calculation\n");
+		}
+	};
+}
+
+void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape)
+{
+	register int dmaSize;
+	register ppu_address_t	dmaPpuAddress2;
+
+	dmaSize = sizeof(btTriangleIndexVertexArray);
+	dmaPpuAddress2 = reinterpret_cast<ppu_address_t>(triMeshShape->getMeshInterface());
+	//	spu_printf("trimeshShape->getMeshInterface() == %llx\n",dmaPpuAddress2);
+#ifdef __SPU__
+	cellDmaGet(&bvhMeshShape->gTriangleMeshInterfaceStorage, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+	bvhMeshShape->gTriangleMeshInterfacePtr = &bvhMeshShape->gTriangleMeshInterfaceStorage;
+#else
+	bvhMeshShape->gTriangleMeshInterfacePtr = (btTriangleIndexVertexArray*)cellDmaGetReadOnly(&bvhMeshShape->gTriangleMeshInterfaceStorage, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+#endif
+
+	//cellDmaWaitTagStatusAll(DMA_MASK(1));
+	
+	///now DMA over the BVH
+	
+	dmaSize = sizeof(btOptimizedBvh);
+	dmaPpuAddress2 = reinterpret_cast<ppu_address_t>(triMeshShape->getOptimizedBvh());
+	//spu_printf("trimeshShape->getOptimizedBvh() == %llx\n",dmaPpuAddress2);
+	cellDmaGet(&bvhMeshShape->gOptimizedBvh, dmaPpuAddress2  , dmaSize, DMA_TAG(2), 0, 0);
+	//cellDmaWaitTagStatusAll(DMA_MASK(2));
+	cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+}
+
+void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag)
+{		
+	cellDmaGet(IndexMesh, (ppu_address_t)&indexArray[index]  , sizeof(btIndexedMesh), DMA_TAG(dmaTag), 0, 0);
+	
+}
+
+void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag)
+{
+	cellDmaGet(subTreeHeaders, subTreePtr, batchSize * sizeof(btBvhSubtreeInfo), DMA_TAG(dmaTag), 0, 0);
+}
+
+void dmaBvhSubTreeNodes (btQuantizedBvhNode* nodes, const btBvhSubtreeInfo& subtree, QuantizedNodeArray&	nodeArray, int dmaTag)
+{
+	cellDmaGet(nodes, reinterpret_cast<ppu_address_t>(&nodeArray[subtree.m_rootNodeIndex]) , subtree.m_subtreeSize* sizeof(btQuantizedBvhNode), DMA_TAG(2), 0, 0);
+}
+
+///getShapeTypeSize could easily be optimized, but it is not likely a bottleneck
+int		getShapeTypeSize(int shapeType)
+{
+
+
+	switch (shapeType)
+	{
+	case CYLINDER_SHAPE_PROXYTYPE:
+		{
+			int shapeSize = sizeof(btCylinderShape);
+			btAssert(shapeSize < MAX_SHAPE_SIZE);
+			return shapeSize;
+		}
+	case BOX_SHAPE_PROXYTYPE:
+		{
+			int shapeSize = sizeof(btBoxShape);
+			btAssert(shapeSize < MAX_SHAPE_SIZE);
+			return shapeSize;
+		}
+	case SPHERE_SHAPE_PROXYTYPE:
+		{
+			int shapeSize = sizeof(btSphereShape);
+			btAssert(shapeSize < MAX_SHAPE_SIZE);
+			return shapeSize;
+		}
+	case TRIANGLE_MESH_SHAPE_PROXYTYPE:
+		{
+			int shapeSize = sizeof(btBvhTriangleMeshShape);
+			btAssert(shapeSize < MAX_SHAPE_SIZE);
+			return shapeSize;
+		}
+	case CAPSULE_SHAPE_PROXYTYPE:
+		{
+			int shapeSize = sizeof(btCapsuleShape);
+			btAssert(shapeSize < MAX_SHAPE_SIZE);
+			return shapeSize;
+		}
+
+	case CONVEX_HULL_SHAPE_PROXYTYPE:
+		{
+			int shapeSize = sizeof(btConvexHullShape);
+			btAssert(shapeSize < MAX_SHAPE_SIZE);
+			return shapeSize;
+		}
+
+	case COMPOUND_SHAPE_PROXYTYPE:
+		{
+			int shapeSize = sizeof(btCompoundShape);
+			btAssert(shapeSize < MAX_SHAPE_SIZE);
+			return shapeSize;
+		}
+	case STATIC_PLANE_PROXYTYPE:
+		{
+			int shapeSize = sizeof(btStaticPlaneShape);
+			btAssert(shapeSize < MAX_SHAPE_SIZE);
+			return shapeSize;
+		}
+
+	default:
+		btAssert(0);
+		//unsupported shapetype, please add here
+		return 0;
+	}
+}
+
+void dmaConvexVertexData (SpuConvexPolyhedronVertexData* convexVertexData, btConvexHullShape* convexShapeSPU)
+{
+	convexVertexData->gNumConvexPoints = convexShapeSPU->getNumPoints();
+	if (convexVertexData->gNumConvexPoints>MAX_NUM_SPU_CONVEX_POINTS)
+	{
+		btAssert(0);
+	//	spu_printf("SPU: Error: MAX_NUM_SPU_CONVEX_POINTS(%d) exceeded: %d\n",MAX_NUM_SPU_CONVEX_POINTS,convexVertexData->gNumConvexPoints);
+		return;
+	}
+			
+	register int dmaSize = convexVertexData->gNumConvexPoints*sizeof(btVector3);
+	ppu_address_t pointsPPU = (ppu_address_t) convexShapeSPU->getUnscaledPoints();
+	cellDmaGet(&convexVertexData->g_convexPointBuffer[0], pointsPPU  , dmaSize, DMA_TAG(2), 0, 0);
+}
+
+void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionShapePtr, uint32_t dmaTag, int shapeType)
+{
+	register int dmaSize = getShapeTypeSize(shapeType);
+	cellDmaGet(collisionShapeLocation, collisionShapePtr  , dmaSize, DMA_TAG(dmaTag), 0, 0);
+	//cellDmaGetReadOnly(collisionShapeLocation, collisionShapePtr  , dmaSize, DMA_TAG(dmaTag), 0, 0);
+	//cellDmaWaitTagStatusAll(DMA_MASK(dmaTag));
+}
+
+void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag)
+{
+	register int dmaSize;
+	register	ppu_address_t	dmaPpuAddress2;
+	int childShapeCount = spuCompoundShape->getNumChildShapes();
+	dmaSize = childShapeCount * sizeof(btCompoundShapeChild);
+	dmaPpuAddress2 = (ppu_address_t)spuCompoundShape->getChildList();
+	cellDmaGet(&compoundShapeLocation->gSubshapes[0], dmaPpuAddress2, dmaSize, DMA_TAG(dmaTag), 0, 0);
+}
+
+void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag)
+{
+	int childShapeCount = spuCompoundShape->getNumChildShapes();
+	int i;
+	// DMA all the subshapes 
+	for ( i = 0; i < childShapeCount; ++i)
+	{
+		btCompoundShapeChild& childShape = compoundShapeLocation->gSubshapes[i];
+		dmaCollisionShape (&compoundShapeLocation->gSubshapeShape[i],(ppu_address_t)childShape.m_childShape, dmaTag, childShape.m_childShapeType);
+	}
+}
+
+
+void	spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex)
+{
+
+	int curIndex = startNodeIndex;
+	int walkIterations = 0;
+#ifdef BT_DEBUG
+	int subTreeSize = endNodeIndex - startNodeIndex;
+#endif
+
+	int escapeIndex;
+
+	unsigned int aabbOverlap, isLeafNode;
+
+	while (curIndex < endNodeIndex)
+	{
+		//catch bugs in tree data
+		btAssert (walkIterations < subTreeSize);
+
+		walkIterations++;
+		aabbOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax);
+		isLeafNode = rootNode->isLeafNode();
+
+		if (isLeafNode && aabbOverlap)
+		{
+			//printf("overlap with node %d\n",rootNode->getTriangleIndex());
+			nodeCallback->processNode(0,rootNode->getTriangleIndex());
+			//			spu_printf("SPU: overlap detected with triangleIndex:%d\n",rootNode->getTriangleIndex());
+		} 
+
+		if (aabbOverlap || isLeafNode)
+		{
+			rootNode++;
+			curIndex++;
+		} else
+		{
+			escapeIndex = rootNode->getEscapeIndex();
+			rootNode += escapeIndex;
+			curIndex += escapeIndex;
+		}
+	}
+
+}
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
new file mode 100644
index 00000000..aa8a2910
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
@@ -0,0 +1,128 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef __SPU_COLLISION_SHAPES_H
+#define __SPU_COLLISION_SHAPES_H
+
+#include "../SpuDoubleBuffer.h"
+
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionShapes/btConvexInternalShape.h"
+#include "BulletCollision/CollisionShapes/btCylinderShape.h"
+#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
+
+#include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btConvexHullShape.h"
+#include "BulletCollision/CollisionShapes/btCompoundShape.h"
+
+#define MAX_NUM_SPU_CONVEX_POINTS 128 //@fallback to PPU if a btConvexHullShape has more than MAX_NUM_SPU_CONVEX_POINTS points
+#define MAX_SPU_COMPOUND_SUBSHAPES 16 //@fallback on PPU if compound has more than MAX_SPU_COMPOUND_SUBSHAPES child shapes
+#define MAX_SHAPE_SIZE 256 //@todo: assert on this
+
+ATTRIBUTE_ALIGNED16(struct)	SpuConvexPolyhedronVertexData
+{
+	void*	gSpuConvexShapePtr;
+	btVector3* gConvexPoints;
+	int gNumConvexPoints;
+	int unused;
+	ATTRIBUTE_ALIGNED16(btVector3 g_convexPointBuffer[MAX_NUM_SPU_CONVEX_POINTS]);
+};
+
+
+
+ATTRIBUTE_ALIGNED16(struct) CollisionShape_LocalStoreMemory
+{
+	ATTRIBUTE_ALIGNED16(char collisionShape[MAX_SHAPE_SIZE]);
+};
+
+ATTRIBUTE_ALIGNED16(struct) CompoundShape_LocalStoreMemory
+{
+	// Compound data
+
+	ATTRIBUTE_ALIGNED16(btCompoundShapeChild gSubshapes[MAX_SPU_COMPOUND_SUBSHAPES]);
+	ATTRIBUTE_ALIGNED16(char gSubshapeShape[MAX_SPU_COMPOUND_SUBSHAPES][MAX_SHAPE_SIZE]);
+};
+
+ATTRIBUTE_ALIGNED16(struct) bvhMeshShape_LocalStoreMemory
+{
+	//ATTRIBUTE_ALIGNED16(btOptimizedBvh	gOptimizedBvh);
+	ATTRIBUTE_ALIGNED16(char gOptimizedBvh[sizeof(btOptimizedBvh)+16]);
+	btOptimizedBvh*	getOptimizedBvh()
+	{
+		return (btOptimizedBvh*) gOptimizedBvh;
+	}
+
+	ATTRIBUTE_ALIGNED16(btTriangleIndexVertexArray	gTriangleMeshInterfaceStorage);
+	btTriangleIndexVertexArray*	gTriangleMeshInterfacePtr;
+	///only a single mesh part for now, we can add support for multiple parts, but quantized trees don't support this at the moment 
+	ATTRIBUTE_ALIGNED16(btIndexedMesh	gIndexMesh);
+	#define MAX_SPU_SUBTREE_HEADERS 32
+	//1024
+	ATTRIBUTE_ALIGNED16(btBvhSubtreeInfo	gSubtreeHeaders[MAX_SPU_SUBTREE_HEADERS]);
+	ATTRIBUTE_ALIGNED16(btQuantizedBvhNode	gSubtreeNodes[MAX_SUBTREE_SIZE_IN_BYTES/sizeof(btQuantizedBvhNode)]);
+};
+
+
+void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, const btTransform& xform);
+void dmaBvhShapeData (bvhMeshShape_LocalStoreMemory* bvhMeshShape, btBvhTriangleMeshShape* triMeshShape);
+void dmaBvhIndexedMesh (btIndexedMesh* IndexMesh, IndexedMeshArray& indexArray, int index, uint32_t dmaTag);
+void dmaBvhSubTreeHeaders (btBvhSubtreeInfo* subTreeHeaders, ppu_address_t subTreePtr, int batchSize, uint32_t dmaTag);
+void dmaBvhSubTreeNodes (btQuantizedBvhNode* nodes, const btBvhSubtreeInfo& subtree, QuantizedNodeArray&	nodeArray, int dmaTag);
+
+int  getShapeTypeSize(int shapeType);
+void dmaConvexVertexData (SpuConvexPolyhedronVertexData* convexVertexData, btConvexHullShape* convexShapeSPU);
+void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionShapePtr, uint32_t dmaTag, int shapeType);
+void dmaCompoundShapeInfo (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag);
+void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation, btCompoundShape* spuCompoundShape, uint32_t dmaTag);
+
+
+#define USE_BRANCHFREE_TEST 1
+#ifdef USE_BRANCHFREE_TEST
+SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(unsigned short int* aabbMin1,unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
+{		
+#if defined(__CELLOS_LV2__) && defined (__SPU__)
+	vec_ushort8 vecMin = {aabbMin1[0],aabbMin2[0],aabbMin1[2],aabbMin2[2],aabbMin1[1],aabbMin2[1],0,0};
+	vec_ushort8 vecMax = {aabbMax2[0],aabbMax1[0],aabbMax2[2],aabbMax1[2],aabbMax2[1],aabbMax1[1],0,0};
+	vec_ushort8 isGt = spu_cmpgt(vecMin,vecMax);
+	return spu_extract(spu_gather(isGt),0)==0;
+
+#else
+	return btSelect((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0])
+		& (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2])
+		& (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])),
+		1, 0);
+#endif
+}
+#else
+
+SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int*  aabbMax2)
+{
+	unsigned int overlap = 1;
+	overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? 0 : overlap;
+	overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? 0 : overlap;
+	overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? 0 : overlap;
+	return overlap;
+}
+#endif
+
+void	spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex);
+
+#endif
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
new file mode 100644
index 00000000..8584e74c
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
@@ -0,0 +1,248 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuContactResult.h"
+
+//#define DEBUG_SPU_COLLISION_DETECTION 1
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+#ifndef __SPU__
+#include <stdio.h>
+#define spu_printf printf
+#endif
+#endif //DEBUG_SPU_COLLISION_DETECTION
+
+SpuContactResult::SpuContactResult()
+{
+	m_manifoldAddress = 0;
+	m_spuManifold = NULL;
+	m_RequiresWriteBack = false;
+}
+
+ SpuContactResult::~SpuContactResult()
+{
+	g_manifoldDmaExport.swapBuffers();
+}
+
+ 	///User can override this material combiner by implementing gContactAddedCallback and setting body0->m_collisionFlags |= btCollisionObject::customMaterialCallback;
+inline btScalar	calculateCombinedFriction(btScalar friction0,btScalar friction1)
+{
+	btScalar friction = friction0*friction1;
+
+	const btScalar MAX_FRICTION  = btScalar(10.);
+
+	if (friction < -MAX_FRICTION)
+		friction = -MAX_FRICTION;
+	if (friction > MAX_FRICTION)
+		friction = MAX_FRICTION;
+	return friction;
+
+}
+
+inline btScalar	calculateCombinedRestitution(btScalar restitution0,btScalar restitution1)
+{
+	return restitution0*restitution1;
+}
+
+
+
+ void	SpuContactResult::setContactInfo(btPersistentManifold* spuManifold, ppu_address_t	manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction1, bool isSwapped)
+ {
+	//spu_printf("SpuContactResult::setContactInfo ManifoldAddress: %lu\n", manifoldAddress);
+	m_rootWorldTransform0 = worldTrans0;
+	m_rootWorldTransform1 = worldTrans1;
+	m_manifoldAddress = manifoldAddress;    
+	m_spuManifold = spuManifold;
+
+	m_combinedFriction = calculateCombinedFriction(friction0,friction1);
+	m_combinedRestitution = calculateCombinedRestitution(restitution0,restitution1);
+	m_isSwapped = isSwapped;
+ }
+
+ void SpuContactResult::setShapeIdentifiersA(int partId0,int index0)
+ {
+	
+ }
+
+ void SpuContactResult::setShapeIdentifiersB(int partId1,int index1)
+ {
+	
+ }
+
+
+
+ ///return true if it requires a dma transfer back
+bool ManifoldResultAddContactPoint(const btVector3& normalOnBInWorld,
+								   const btVector3& pointInWorld,
+								   float depth,
+								   btPersistentManifold* manifoldPtr,
+								   btTransform& transA,
+								   btTransform& transB,
+									btScalar	combinedFriction,
+									btScalar	combinedRestitution,
+								   bool isSwapped)
+{
+	
+//	float contactTreshold = manifoldPtr->getContactBreakingThreshold();
+
+	//spu_printf("SPU: add contactpoint, depth:%f, contactTreshold %f, manifoldPtr %llx\n",depth,contactTreshold,manifoldPtr);
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+	spu_printf("SPU: contactTreshold %f\n",contactTreshold);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+	if (depth > manifoldPtr->getContactBreakingThreshold())
+		return false;
+
+	//if (depth > manifoldPtr->getContactProcessingThreshold())
+	//	return false;
+
+
+
+	btVector3 pointA;
+	btVector3 localA;
+	btVector3 localB;
+	btVector3 normal;
+
+
+	if (isSwapped)
+	{
+		normal = normalOnBInWorld * -1;
+		pointA = pointInWorld + normal * depth;
+		localA = transA.invXform(pointA );
+		localB = transB.invXform(pointInWorld);
+	}
+	else
+	{
+		normal = normalOnBInWorld;
+		pointA = pointInWorld + normal * depth;
+		localA = transA.invXform(pointA );
+		localB = transB.invXform(pointInWorld);
+	}
+
+	btManifoldPoint newPt(localA,localB,normal,depth);
+	newPt.m_positionWorldOnA = pointA;
+	newPt.m_positionWorldOnB = pointInWorld;
+
+	newPt.m_combinedFriction = combinedFriction;
+	newPt.m_combinedRestitution = combinedRestitution;
+
+
+	int insertIndex = manifoldPtr->getCacheEntry(newPt);
+	if (insertIndex >= 0)
+	{
+		// we need to replace the current contact point, otherwise small errors will accumulate (spheres start rolling etc)
+		manifoldPtr->replaceContactPoint(newPt,insertIndex);
+		return true;
+		
+	} else
+	{
+
+		/*
+		///@todo: SPU callbacks, either immediate (local on the SPU), or deferred
+		//User can override friction and/or restitution
+		if (gContactAddedCallback &&
+			//and if either of the two bodies requires custom material
+			 ((m_body0->m_collisionFlags & btCollisionObject::customMaterialCallback) ||
+			   (m_body1->m_collisionFlags & btCollisionObject::customMaterialCallback)))
+		{
+			//experimental feature info, for per-triangle material etc.
+			(*gContactAddedCallback)(newPt,m_body0,m_partId0,m_index0,m_body1,m_partId1,m_index1);
+		}
+		*/
+
+		manifoldPtr->addManifoldPoint(newPt);
+		return true;
+
+	}
+	return false;
+	
+}
+
+
+void SpuContactResult::writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold)
+{
+	///only write back the contact information on SPU. Other platforms avoid copying, and use the data in-place
+	///see SpuFakeDma.cpp 'cellDmaLargeGetReadOnly'
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+    memcpy(g_manifoldDmaExport.getFront(),lsManifold,sizeof(btPersistentManifold));
+
+    g_manifoldDmaExport.swapBuffers();
+    ppu_address_t mmAddr = (ppu_address_t)mmManifold;
+    g_manifoldDmaExport.backBufferDmaPut(mmAddr, sizeof(btPersistentManifold), DMA_TAG(9));
+	// Should there be any kind of wait here?  What if somebody tries to use this tag again?  What if we call this function again really soon?
+	//no, the swapBuffers does the wait
+#endif
+}
+
+void SpuContactResult::addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+{
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+	spu_printf("*** SpuContactResult::addContactPoint: depth = %f\n",depth);
+	spu_printf("*** normal = %f,%f,%f\n",normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ());
+	spu_printf("*** position = %f,%f,%f\n",pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ());
+#endif //DEBUG_SPU_COLLISION_DETECTION
+	
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+ //   int sman = sizeof(rage::phManifold);
+//	spu_printf("sizeof_manifold = %i\n",sman);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+
+	btPersistentManifold* localManifold = m_spuManifold;
+
+	btVector3	normalB(normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ());
+	btVector3	pointWrld(pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ());
+
+	//process the contact point
+	const bool retVal = ManifoldResultAddContactPoint(normalB,
+		pointWrld,
+		depth,
+		localManifold,
+		m_rootWorldTransform0,
+		m_rootWorldTransform1,
+		m_combinedFriction,
+		m_combinedRestitution,
+		m_isSwapped);
+	m_RequiresWriteBack = m_RequiresWriteBack || retVal;
+}
+
+void SpuContactResult::flush()
+{
+
+	if (m_spuManifold && m_spuManifold->getNumContacts())
+	{
+		m_spuManifold->refreshContactPoints(m_rootWorldTransform0,m_rootWorldTransform1);
+		m_RequiresWriteBack = true;
+	}
+
+
+	if (m_RequiresWriteBack)
+	{
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+		spu_printf("SPU: Start SpuContactResult::flush (Put) DMA\n");
+		spu_printf("Num contacts:%d\n", m_spuManifold->getNumContacts());
+		spu_printf("Manifold address: %llu\n", m_manifoldAddress);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+	//	spu_printf("writeDoubleBufferedManifold\n");
+		writeDoubleBufferedManifold(m_spuManifold, (btPersistentManifold*)m_manifoldAddress);
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+		spu_printf("SPU: Finished (Put) DMA\n");
+#endif //DEBUG_SPU_COLLISION_DETECTION
+	}
+	m_spuManifold = NULL;
+	m_RequiresWriteBack = false;
+}
+
+
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h
new file mode 100644
index 00000000..394f56dc
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h
@@ -0,0 +1,106 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SPU_CONTACT_RESULT2_H
+#define SPU_CONTACT_RESULT2_H
+
+
+#ifndef _WIN32
+#include <stdint.h>
+#endif
+
+
+
+#include "../SpuDoubleBuffer.h"
+
+
+#include "LinearMath/btTransform.h"
+
+
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+#include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
+
+class btCollisionShape;
+
+
+struct SpuCollisionPairInput
+{
+	ppu_address_t m_collisionShapes[2];
+	btCollisionShape*	m_spuCollisionShapes[2];
+
+	ppu_address_t m_persistentManifoldPtr;
+	btVector3	m_primitiveDimensions0;
+	btVector3	m_primitiveDimensions1;
+	int		m_shapeType0;
+	int		m_shapeType1;	
+	float	m_collisionMargin0;
+	float	m_collisionMargin1;
+
+	btTransform	m_worldTransform0;
+	btTransform m_worldTransform1;
+	
+	bool	m_isSwapped;
+	bool    m_useEpa;
+};
+
+
+struct SpuClosestPointInput : public btDiscreteCollisionDetectorInterface::ClosestPointInput
+{
+	struct SpuConvexPolyhedronVertexData* m_convexVertexData[2];
+};
+
+///SpuContactResult exports the contact points using double-buffered DMA transfers, only when needed
+///So when an existing contact point is duplicated, no transfer/refresh is performed.
+class SpuContactResult : public btDiscreteCollisionDetectorInterface::Result
+{
+    btTransform		m_rootWorldTransform0;
+	btTransform		m_rootWorldTransform1;
+	ppu_address_t	m_manifoldAddress;
+
+    btPersistentManifold* m_spuManifold;
+	bool m_RequiresWriteBack;
+	btScalar	m_combinedFriction;
+	btScalar	m_combinedRestitution;
+	
+	bool m_isSwapped;
+
+	DoubleBuffer<btPersistentManifold, 1> g_manifoldDmaExport;
+
+	public:
+		SpuContactResult();
+		virtual ~SpuContactResult();
+
+		btPersistentManifold*	GetSpuManifold() const
+		{
+			return m_spuManifold;
+		}
+
+		virtual void setShapeIdentifiersA(int partId0,int index0);
+		virtual void setShapeIdentifiersB(int partId1,int index1);
+
+		void	setContactInfo(btPersistentManifold* spuManifold, ppu_address_t	manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction01, bool isSwapped);
+
+
+        void writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold);
+
+        virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth);
+
+		void flush();
+};
+
+
+
+#endif //SPU_CONTACT_RESULT2_H
+
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
new file mode 100644
index 00000000..449f1928
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
@@ -0,0 +1,51 @@
+
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef SPU_CONVEX_PENETRATION_DEPTH_H
+#define SPU_CONVEX_PENETRATION_DEPTH_H
+
+
+
+class btStackAlloc;
+class btIDebugDraw;
+#include "BulletCollision/NarrowphaseCollision/btConvexPenetrationDepthSolver.h"
+
+#include "LinearMath/btTransform.h"
+
+
+///ConvexPenetrationDepthSolver provides an interface for penetration depth calculation.
+class SpuConvexPenetrationDepthSolver : public btConvexPenetrationDepthSolver
+{
+public:	
+	
+	virtual ~SpuConvexPenetrationDepthSolver() {};
+	virtual bool calcPenDepth( SpuVoronoiSimplexSolver& simplexSolver,
+	        void* convexA,void* convexB,int shapeTypeA, int shapeTypeB, float marginA, float marginB,
+            btTransform& transA,const btTransform& transB,
+			btVector3& v, btVector3& pa, btVector3& pb,
+			class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc,
+			struct SpuConvexPolyhedronVertexData* convexVertexDataA,
+			struct SpuConvexPolyhedronVertexData* convexVertexDataB
+			) const = 0;
+
+
+};
+
+
+
+#endif //SPU_CONVEX_PENETRATION_DEPTH_H
+
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
new file mode 100644
index 00000000..42f5f45c
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
@@ -0,0 +1,1415 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuGatheringCollisionTask.h"
+
+//#define DEBUG_SPU_COLLISION_DETECTION 1
+#include "../SpuDoubleBuffer.h"
+
+#include "../SpuCollisionTaskProcess.h"
+#include "../SpuGatheringCollisionDispatcher.h" //for SPU_BATCHSIZE_BROADPHASE_PAIRS
+
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "../SpuContactManifoldCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "SpuContactResult.h"
+#include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "BulletCollision/CollisionShapes/btConvexPointCloudShape.h"
+
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+
+#include "BulletCollision/CollisionShapes/btConvexShape.h"
+#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btConvexHullShape.h"
+#include "BulletCollision/CollisionShapes/btCompoundShape.h"
+
+#include "SpuMinkowskiPenetrationDepthSolver.h"
+//#include "SpuEpaPenetrationDepthSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
+
+
+#include "boxBoxDistance.h"
+#include "BulletMultiThreaded/vectormath2bullet.h"
+#include "SpuCollisionShapes.h" //definition of SpuConvexPolyhedronVertexData
+#include "BulletCollision/CollisionDispatch/btBoxBoxDetector.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+
+#ifdef __SPU__
+///Software caching from the IBM Cell SDK, it reduces 25% SPU time for our test cases
+#ifndef USE_LIBSPE2
+//#define USE_SOFTWARE_CACHE 1
+#endif
+#endif //__SPU__
+
+int gSkippedCol = 0;
+int gProcessedCol = 0;
+
+////////////////////////////////////////////////
+/// software caching
+#if USE_SOFTWARE_CACHE
+#include <spu_intrinsics.h>
+#include <sys/spu_thread.h>
+#include <sys/spu_event.h>
+#include <stdint.h>
+#define SPE_CACHE_NWAY   		4
+//#define SPE_CACHE_NSETS 		32, 16
+#define SPE_CACHE_NSETS 		8
+//#define SPE_CACHELINE_SIZE 		512
+#define SPE_CACHELINE_SIZE 		128
+#define SPE_CACHE_SET_TAGID(set) 	15
+///make sure that spe_cache.h is below those defines!
+#include "../Extras/software_cache/cache/include/spe_cache.h"
+
+
+int g_CacheMisses=0;
+int g_CacheHits=0;
+
+#if 0 // Added to allow cache misses and hits to be tracked, change this to 1 to restore unmodified version
+#define spe_cache_read(ea)		_spe_cache_lookup_xfer_wait_(ea, 0, 1)
+#else
+#define spe_cache_read(ea)		\
+({								\
+    int set, idx, line, byte;					\
+    _spe_cache_nway_lookup_(ea, set, idx);			\
+								\
+    if (btUnlikely(idx < 0)) {					\
+        ++g_CacheMisses;                        \
+	    idx = _spe_cache_miss_(ea, set, -1);			\
+        spu_writech(22, SPE_CACHE_SET_TAGMASK(set));		\
+        spu_mfcstat(MFC_TAG_UPDATE_ALL);			\
+    } 								\
+    else                            \
+    {                               \
+        ++g_CacheHits;              \
+    }                               \
+    line = _spe_cacheline_num_(set, idx);			\
+    byte = _spe_cacheline_byte_offset_(ea);			\
+    (void *) &spe_cache_mem[line + byte];			\
+})
+
+#endif
+
+#endif // USE_SOFTWARE_CACHE
+
+bool gUseEpa = false;
+
+#ifdef USE_SN_TUNER
+#include <LibSN_SPU.h>
+#endif //USE_SN_TUNER
+
+#if defined (__SPU__) && !defined (USE_LIBSPE2)
+#include <spu_printf.h>
+#elif defined (USE_LIBSPE2)
+#define spu_printf(a)
+#else
+#define IGNORE_ALIGNMENT 1
+#include <stdio.h>
+#include <stdlib.h>
+#define spu_printf printf
+
+#endif
+
+//int gNumConvexPoints0=0;
+
+///Make sure no destructors are called on this memory
+struct	CollisionTask_LocalStoreMemory
+{
+	///This CollisionTask_LocalStoreMemory is mainly used for the SPU version, using explicit DMA
+	///Other platforms can use other memory programming models.
+
+	ATTRIBUTE_ALIGNED16(btBroadphasePair	gBroadphasePairsBuffer[SPU_BATCHSIZE_BROADPHASE_PAIRS]);
+	DoubleBuffer<unsigned char, MIDPHASE_WORKUNIT_PAGE_SIZE> g_workUnitTaskBuffers;
+	ATTRIBUTE_ALIGNED16(char gSpuContactManifoldAlgoBuffer [sizeof(SpuContactManifoldCollisionAlgorithm)+16]);
+	ATTRIBUTE_ALIGNED16(char gColObj0Buffer [sizeof(btCollisionObject)+16]);
+	ATTRIBUTE_ALIGNED16(char gColObj1Buffer [sizeof(btCollisionObject)+16]);
+	///we reserve 32bit integer indices, even though they might be 16bit
+	ATTRIBUTE_ALIGNED16(int	spuIndices[16]);
+	btPersistentManifold	gPersistentManifoldBuffer;
+	CollisionShape_LocalStoreMemory gCollisionShapes[2];
+	bvhMeshShape_LocalStoreMemory bvhShapeData;
+	SpuConvexPolyhedronVertexData convexVertexData[2];
+	CompoundShape_LocalStoreMemory compoundShapeData[2];
+		
+	///The following pointers might either point into this local store memory, or to the original/other memory locations.
+	///See SpuFakeDma for implementation of cellDmaSmallGetReadOnly.
+	btCollisionObject*	m_lsColObj0Ptr;
+	btCollisionObject*	m_lsColObj1Ptr;
+	btBroadphasePair* m_pairsPointer;
+	btPersistentManifold*	m_lsManifoldPtr;
+	SpuContactManifoldCollisionAlgorithm*	m_lsCollisionAlgorithmPtr;
+
+	bool	needsDmaPutContactManifoldAlgo;
+
+	btCollisionObject* getColObj0()
+	{
+		return m_lsColObj0Ptr;
+	}
+	btCollisionObject* getColObj1()
+	{
+		return m_lsColObj1Ptr;
+	}
+
+
+	btBroadphasePair* getBroadphasePairPtr()
+	{
+		return m_pairsPointer;
+	}
+
+	SpuContactManifoldCollisionAlgorithm*	getlocalCollisionAlgorithm()
+	{
+		return m_lsCollisionAlgorithmPtr;
+	}
+	
+	btPersistentManifold*	getContactManifoldPtr()
+	{
+		return m_lsManifoldPtr;
+	}
+};
+
+
+#if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2) 
+
+ATTRIBUTE_ALIGNED16(CollisionTask_LocalStoreMemory	gLocalStoreMemory);
+
+void* createCollisionLocalStoreMemory()
+{
+	return &gLocalStoreMemory;
+}
+#else
+void* createCollisionLocalStoreMemory()
+{
+        return new CollisionTask_LocalStoreMemory;
+}
+
+#endif
+
+void	ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts);
+
+
+SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size)
+{
+#if USE_SOFTWARE_CACHE
+	// Check for alignment requirements. We need to make sure the entire request fits within one cache line,
+	// so the first and last bytes should fall on the same cache line
+	btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK));
+
+	void* ls = spe_cache_read(ea);
+	memcpy(buffer, ls, size);
+#else
+	stallingUnalignedDmaSmallGet(buffer,ea,size);
+#endif
+}
+
+SIMD_FORCE_INLINE void small_cache_read_triple(	void* ls0, ppu_address_t ea0,
+												void* ls1, ppu_address_t ea1,
+												void* ls2, ppu_address_t ea2,
+												size_t size)
+{
+		btAssert(size<16);
+		ATTRIBUTE_ALIGNED16(char	tmpBuffer0[32]);
+		ATTRIBUTE_ALIGNED16(char	tmpBuffer1[32]);
+		ATTRIBUTE_ALIGNED16(char	tmpBuffer2[32]);
+
+		uint32_t i;
+		
+
+		///make sure last 4 bits are the same, for cellDmaSmallGet
+		char* localStore0 = (char*)ls0;
+		uint32_t last4BitsOffset = ea0 & 0x0f;
+		char* tmpTarget0 = tmpBuffer0 + last4BitsOffset;
+#ifdef __SPU__
+		cellDmaSmallGet(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
+#else
+		tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0);
+#endif
+
+
+		char* localStore1 = (char*)ls1;
+		last4BitsOffset = ea1 & 0x0f;
+		char* tmpTarget1 = tmpBuffer1 + last4BitsOffset;
+#ifdef __SPU__
+		cellDmaSmallGet(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
+#else
+		tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0);
+#endif
+		
+		char* localStore2 = (char*)ls2;
+		last4BitsOffset = ea2 & 0x0f;
+		char* tmpTarget2 = tmpBuffer2 + last4BitsOffset;
+#ifdef __SPU__
+		cellDmaSmallGet(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
+#else
+		tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0);
+#endif
+		
+		
+		cellDmaWaitTagStatusAll( DMA_MASK(1) );
+
+		//this is slowish, perhaps memcpy on SPU is smarter?
+		for (i=0; btLikely( i<size );i++)
+		{
+			localStore0[i] = tmpTarget0[i];
+			localStore1[i] = tmpTarget1[i];
+			localStore2[i] = tmpTarget2[i];
+		}
+
+		
+}
+
+
+
+
+class spuNodeCallback : public btNodeOverlapCallback
+{
+	SpuCollisionPairInput* m_wuInput;
+	SpuContactResult&		m_spuContacts;
+	CollisionTask_LocalStoreMemory*	m_lsMemPtr;
+	ATTRIBUTE_ALIGNED16(btTriangleShape)	m_tmpTriangleShape;
+
+	ATTRIBUTE_ALIGNED16(btVector3	spuTriangleVertices[3]);
+	ATTRIBUTE_ALIGNED16(btScalar	spuUnscaledVertex[4]);
+	
+
+
+public:
+	spuNodeCallback(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory*	lsMemPtr,SpuContactResult& spuContacts)
+		:	m_wuInput(wuInput),
+		m_spuContacts(spuContacts),
+		m_lsMemPtr(lsMemPtr)
+	{
+	}
+
+	virtual void processNode(int subPart, int triangleIndex)
+	{
+		///Create a triangle on the stack, call process collision, with GJK
+		///DMA the vertices, can benefit from software caching
+
+		//		spu_printf("processNode with triangleIndex %d\n",triangleIndex);
+
+		if (m_lsMemPtr->bvhShapeData.gIndexMesh.m_indexType == PHY_SHORT)
+		{
+			unsigned short int* indexBasePtr = (unsigned short int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
+			ATTRIBUTE_ALIGNED16(unsigned short int tmpIndices[3]);
+
+			small_cache_read_triple(&tmpIndices[0],(ppu_address_t)&indexBasePtr[0],
+									&tmpIndices[1],(ppu_address_t)&indexBasePtr[1],
+									&tmpIndices[2],(ppu_address_t)&indexBasePtr[2],
+									sizeof(unsigned short int));
+
+			m_lsMemPtr->spuIndices[0] = int(tmpIndices[0]);
+			m_lsMemPtr->spuIndices[1] = int(tmpIndices[1]);
+			m_lsMemPtr->spuIndices[2] = int(tmpIndices[2]);
+		} else
+		{
+			unsigned int* indexBasePtr = (unsigned int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride);
+
+			small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0],
+								&m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1],
+								&m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2],
+								sizeof(int));
+		}
+		
+		//		spu_printf("SPU index0=%d ,",spuIndices[0]);
+		//		spu_printf("SPU index1=%d ,",spuIndices[1]);
+		//		spu_printf("SPU index2=%d ,",spuIndices[2]);
+		//		spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr);
+
+		const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling();
+		for (int j=2;btLikely( j>=0 );j--)
+		{
+			int graphicsindex = m_lsMemPtr->spuIndices[j];
+
+			//			spu_printf("SPU index=%d ,",graphicsindex);
+			btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride);
+			//			spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr);
+
+
+			///handle un-aligned vertices...
+
+			//another DMA for each vertex
+			small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0],
+									&spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1],
+									&spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2],
+									sizeof(btScalar));
+			
+			m_tmpTriangleShape.getVertexPtr(j).setValue(spuUnscaledVertex[0]*meshScaling.getX(),
+				spuUnscaledVertex[1]*meshScaling.getY(),
+				spuUnscaledVertex[2]*meshScaling.getZ());
+
+			//			spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z());
+		}
+
+
+		SpuCollisionPairInput triangleConcaveInput(*m_wuInput);
+//		triangleConcaveInput.m_spuCollisionShapes[1] = &spuTriangleVertices[0];
+		triangleConcaveInput.m_spuCollisionShapes[1] = &m_tmpTriangleShape;
+		triangleConcaveInput.m_shapeType1 = TRIANGLE_SHAPE_PROXYTYPE;
+
+		m_spuContacts.setShapeIdentifiersB(subPart,triangleIndex);
+
+		//		m_spuContacts.flush();
+
+		ProcessSpuConvexConvexCollision(&triangleConcaveInput, m_lsMemPtr,m_spuContacts);
+		///this flush should be automatic
+		//	m_spuContacts.flush();
+	}
+
+};
+
+
+
+void btConvexPlaneCollideSingleContact (SpuCollisionPairInput* wuInput,CollisionTask_LocalStoreMemory* lsMemPtr,SpuContactResult&  spuContacts)
+{
+	
+	btConvexShape* convexShape = (btConvexShape*) wuInput->m_spuCollisionShapes[0];
+	btStaticPlaneShape* planeShape = (btStaticPlaneShape*) wuInput->m_spuCollisionShapes[1];
+
+    bool hasCollision = false;
+	const btVector3& planeNormal = planeShape->getPlaneNormal();
+	const btScalar& planeConstant = planeShape->getPlaneConstant();
+	
+	
+	btTransform convexWorldTransform = wuInput->m_worldTransform0;
+	btTransform convexInPlaneTrans;
+	convexInPlaneTrans= wuInput->m_worldTransform1.inverse() * convexWorldTransform;
+	btTransform planeInConvex;
+	planeInConvex= convexWorldTransform.inverse() * wuInput->m_worldTransform1;
+	
+	//btVector3 vtx = convexShape->localGetSupportVertexWithoutMarginNonVirtual(planeInConvex.getBasis()*-planeNormal);
+	btVector3 vtx = convexShape->localGetSupportVertexNonVirtual(planeInConvex.getBasis()*-planeNormal);
+
+	btVector3 vtxInPlane = convexInPlaneTrans(vtx);
+	btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant);
+
+	btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal;
+	btVector3 vtxInPlaneWorld = wuInput->m_worldTransform1 * vtxInPlaneProjected;
+
+	hasCollision = distance < lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold();
+	//resultOut->setPersistentManifold(m_manifoldPtr);
+	if (hasCollision)
+	{
+		/// report a contact. internally this will be kept persistent, and contact reduction is done
+		btVector3 normalOnSurfaceB =wuInput->m_worldTransform1.getBasis() * planeNormal;
+		btVector3 pOnB = vtxInPlaneWorld;
+		spuContacts.addContactPoint(normalOnSurfaceB,pOnB,distance);
+	}
+}
+
+void	ProcessConvexPlaneSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
+{
+
+		register	int dmaSize = 0;
+		register ppu_address_t	dmaPpuAddress2;
+		btPersistentManifold* manifold = (btPersistentManifold*)wuInput->m_persistentManifoldPtr;
+
+		///DMA in the vertices for convex shapes
+		ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
+		ATTRIBUTE_ALIGNED16(char convexHullShape1[sizeof(btConvexHullShape)]);
+
+		if ( btLikely( wuInput->m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			//	spu_printf("SPU: DMA btConvexHullShape\n");
+			
+			dmaSize = sizeof(btConvexHullShape);
+			dmaPpuAddress2 = wuInput->m_collisionShapes[0];
+
+			cellDmaGet(&convexHullShape0, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+			//cellDmaWaitTagStatusAll(DMA_MASK(1));
+		}
+
+		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			//	spu_printf("SPU: DMA btConvexHullShape\n");
+			dmaSize = sizeof(btConvexHullShape);
+			dmaPpuAddress2 = wuInput->m_collisionShapes[1];
+			cellDmaGet(&convexHullShape1, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+			//cellDmaWaitTagStatusAll(DMA_MASK(1));
+		}
+		
+		if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{		
+			cellDmaWaitTagStatusAll(DMA_MASK(1));
+			dmaConvexVertexData (&lsMemPtr->convexVertexData[0], (btConvexHullShape*)&convexHullShape0);
+			lsMemPtr->convexVertexData[0].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[0];
+		}
+
+			
+		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			cellDmaWaitTagStatusAll(DMA_MASK(1));
+			dmaConvexVertexData (&lsMemPtr->convexVertexData[1], (btConvexHullShape*)&convexHullShape1);
+			lsMemPtr->convexVertexData[1].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[1];
+		}
+
+		
+		btConvexPointCloudShape cpc0,cpc1;
+
+		if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			cellDmaWaitTagStatusAll(DMA_MASK(2));
+			lsMemPtr->convexVertexData[0].gConvexPoints = &lsMemPtr->convexVertexData[0].g_convexPointBuffer[0];
+			btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[0];
+			const btVector3& localScaling = ch->getLocalScalingNV();
+			cpc0.setPoints(lsMemPtr->convexVertexData[0].gConvexPoints,lsMemPtr->convexVertexData[0].gNumConvexPoints,false,localScaling);
+			wuInput->m_spuCollisionShapes[0] = &cpc0;
+		}
+
+		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			cellDmaWaitTagStatusAll(DMA_MASK(2));		
+			lsMemPtr->convexVertexData[1].gConvexPoints = &lsMemPtr->convexVertexData[1].g_convexPointBuffer[0];
+			btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[1];
+			const btVector3& localScaling = ch->getLocalScalingNV();
+			cpc1.setPoints(lsMemPtr->convexVertexData[1].gConvexPoints,lsMemPtr->convexVertexData[1].gNumConvexPoints,false,localScaling);
+			wuInput->m_spuCollisionShapes[1] = &cpc1;
+
+		}
+
+
+//		const btConvexShape* shape0Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[0];
+//		const btConvexShape* shape1Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[1];
+//		int shapeType0 = wuInput->m_shapeType0;
+//		int shapeType1 = wuInput->m_shapeType1;
+		float marginA = wuInput->m_collisionMargin0;
+		float marginB = wuInput->m_collisionMargin1;
+
+		SpuClosestPointInput	cpInput;
+		cpInput.m_convexVertexData[0] = &lsMemPtr->convexVertexData[0];
+		cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1];
+		cpInput.m_transformA = wuInput->m_worldTransform0;
+		cpInput.m_transformB = wuInput->m_worldTransform1;
+		float sumMargin = (marginA+marginB+lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold());
+		cpInput.m_maximumDistanceSquared = sumMargin * sumMargin;
+
+		ppu_address_t manifoldAddress = (ppu_address_t)manifold;
+
+		btPersistentManifold* spuManifold=lsMemPtr->getContactManifoldPtr();
+		//spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped);
+		spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMemPtr->getColObj0()->getWorldTransform(),
+			lsMemPtr->getColObj1()->getWorldTransform(),
+			lsMemPtr->getColObj0()->getRestitution(),lsMemPtr->getColObj1()->getRestitution(),
+			lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(),
+			wuInput->m_isSwapped);
+
+
+		btConvexPlaneCollideSingleContact(wuInput,lsMemPtr,spuContacts);
+
+
+		
+	
+}
+
+
+
+
+////////////////////////
+/// Convex versus Concave triangle mesh collision detection (handles concave triangle mesh versus sphere, box, cylinder, triangle, cone, convex polyhedron etc)
+///////////////////
+void	ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
+{
+	//order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite
+	
+	btBvhTriangleMeshShape*	trimeshShape = (btBvhTriangleMeshShape*)wuInput->m_spuCollisionShapes[1];
+	//need the mesh interface, for access to triangle vertices
+	dmaBvhShapeData (&lsMemPtr->bvhShapeData, trimeshShape);
+
+	btVector3 aabbMin(-1,-400,-1);
+	btVector3 aabbMax(1,400,1);
+
+
+	//recalc aabbs
+	btTransform convexInTriangleSpace;
+	convexInTriangleSpace = wuInput->m_worldTransform1.inverse() * wuInput->m_worldTransform0;
+	btConvexInternalShape* convexShape = (btConvexInternalShape*)wuInput->m_spuCollisionShapes[0];
+
+	computeAabb (aabbMin, aabbMax, convexShape, wuInput->m_collisionShapes[0], wuInput->m_shapeType0, convexInTriangleSpace);
+
+
+	//CollisionShape* triangleShape = static_cast<btCollisionShape*>(triBody->m_collisionShape);
+	//convexShape->getAabb(convexInTriangleSpace,m_aabbMin,m_aabbMax);
+
+	//	btScalar extraMargin = collisionMarginTriangle;
+	//	btVector3 extra(extraMargin,extraMargin,extraMargin);
+	//	aabbMax += extra;
+	//	aabbMin -= extra;
+
+	///quantize query AABB
+	unsigned short int quantizedQueryAabbMin[3];
+	unsigned short int quantizedQueryAabbMax[3];
+	lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin,aabbMin,0);
+	lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax,aabbMax,1);
+
+	QuantizedNodeArray&	nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray();
+	//spu_printf("SPU: numNodes = %d\n",nodeArray.size());
+
+	BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray();
+
+
+	spuNodeCallback	nodeCallback(wuInput,lsMemPtr,spuContacts);
+	IndexedMeshArray&	indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray();
+	//spu_printf("SPU:indexArray.size() = %d\n",indexArray.size());
+
+	//	spu_printf("SPU: numSubTrees = %d\n",subTrees.size());
+	//not likely to happen
+	if (subTrees.size() && indexArray.size() == 1)
+	{
+		///DMA in the index info
+		dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */);
+		cellDmaWaitTagStatusAll(DMA_MASK(1));
+		
+		//display the headers
+		int numBatch = subTrees.size();
+		for (int i=0;i<numBatch;)
+		{
+			//@todo- can reorder DMA transfers for less stall
+			int remaining = subTrees.size() - i;
+			int nextBatch = remaining < MAX_SPU_SUBTREE_HEADERS ? remaining : MAX_SPU_SUBTREE_HEADERS;
+			
+			dmaBvhSubTreeHeaders (&lsMemPtr->bvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1);
+			cellDmaWaitTagStatusAll(DMA_MASK(1));
+			
+
+			//			spu_printf("nextBatch = %d\n",nextBatch);
+
+			for (int j=0;j<nextBatch;j++)
+			{
+				const btBvhSubtreeInfo& subtree = lsMemPtr->bvhShapeData.gSubtreeHeaders[j];
+
+				unsigned int overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);
+				if (overlap)
+				{
+					btAssert(subtree.m_subtreeSize);
+
+					//dma the actual nodes of this subtree
+					dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2);
+					cellDmaWaitTagStatusAll(DMA_MASK(2));
+
+					/* Walk this subtree */
+					spuWalkStacklessQuantizedTree(&nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax,
+						&lsMemPtr->bvhShapeData.gSubtreeNodes[0],
+						0,
+						subtree.m_subtreeSize);
+				}
+				//				spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize);
+			}
+
+			//	unsigned short int	m_quantizedAabbMin[3];
+			//	unsigned short int	m_quantizedAabbMax[3];
+			//	int			m_rootNodeIndex;
+			//	int			m_subtreeSize;
+			i+=nextBatch;
+		}
+
+		//pre-fetch first tree, then loop and double buffer
+	}
+
+}
+
+
+#define MAX_DEGENERATE_STATS 15
+int stats[MAX_DEGENERATE_STATS]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+int degenerateStats[MAX_DEGENERATE_STATS]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+
+////////////////////////
+/// Convex versus Convex collision detection (handles collision between sphere, box, cylinder, triangle, cone, convex polyhedron etc)
+///////////////////
+void	ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
+{
+	register int dmaSize;
+	register ppu_address_t	dmaPpuAddress2;
+	
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+	//spu_printf("SPU: ProcessSpuConvexConvexCollision\n");
+#endif //DEBUG_SPU_COLLISION_DETECTION
+	//CollisionShape* shape0 = (CollisionShape*)wuInput->m_collisionShapes[0];
+	//CollisionShape* shape1 = (CollisionShape*)wuInput->m_collisionShapes[1];
+	btPersistentManifold* manifold = (btPersistentManifold*)wuInput->m_persistentManifoldPtr;
+
+	bool genericGjk = true;
+
+	if (genericGjk)
+	{
+		//try generic GJK
+
+		
+		
+		//SpuConvexPenetrationDepthSolver* penetrationSolver=0;
+		btVoronoiSimplexSolver simplexSolver;
+		btGjkEpaPenetrationDepthSolver	epaPenetrationSolver2;
+		
+		btConvexPenetrationDepthSolver* penetrationSolver = &epaPenetrationSolver2;
+		
+		//SpuMinkowskiPenetrationDepthSolver	minkowskiPenetrationSolver;
+#ifdef ENABLE_EPA
+		if (gUseEpa)
+		{
+			penetrationSolver = &epaPenetrationSolver2;
+		} else
+#endif
+		{
+			//penetrationSolver = &minkowskiPenetrationSolver;
+		}
+
+
+		///DMA in the vertices for convex shapes
+		ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
+		ATTRIBUTE_ALIGNED16(char convexHullShape1[sizeof(btConvexHullShape)]);
+
+		if ( btLikely( wuInput->m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			//	spu_printf("SPU: DMA btConvexHullShape\n");
+			
+			dmaSize = sizeof(btConvexHullShape);
+			dmaPpuAddress2 = wuInput->m_collisionShapes[0];
+
+			cellDmaGet(&convexHullShape0, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+			//cellDmaWaitTagStatusAll(DMA_MASK(1));
+		}
+
+		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			//	spu_printf("SPU: DMA btConvexHullShape\n");
+			dmaSize = sizeof(btConvexHullShape);
+			dmaPpuAddress2 = wuInput->m_collisionShapes[1];
+			cellDmaGet(&convexHullShape1, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+			//cellDmaWaitTagStatusAll(DMA_MASK(1));
+		}
+		
+		if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{		
+			cellDmaWaitTagStatusAll(DMA_MASK(1));
+			dmaConvexVertexData (&lsMemPtr->convexVertexData[0], (btConvexHullShape*)&convexHullShape0);
+			lsMemPtr->convexVertexData[0].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[0];
+		}
+
+			
+		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			cellDmaWaitTagStatusAll(DMA_MASK(1));
+			dmaConvexVertexData (&lsMemPtr->convexVertexData[1], (btConvexHullShape*)&convexHullShape1);
+			lsMemPtr->convexVertexData[1].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[1];
+		}
+
+		
+		btConvexPointCloudShape cpc0,cpc1;
+
+		if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			cellDmaWaitTagStatusAll(DMA_MASK(2));
+			lsMemPtr->convexVertexData[0].gConvexPoints = &lsMemPtr->convexVertexData[0].g_convexPointBuffer[0];
+			btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[0];
+			const btVector3& localScaling = ch->getLocalScalingNV();
+			cpc0.setPoints(lsMemPtr->convexVertexData[0].gConvexPoints,lsMemPtr->convexVertexData[0].gNumConvexPoints,false,localScaling);
+			wuInput->m_spuCollisionShapes[0] = &cpc0;
+		}
+
+		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			cellDmaWaitTagStatusAll(DMA_MASK(2));		
+			lsMemPtr->convexVertexData[1].gConvexPoints = &lsMemPtr->convexVertexData[1].g_convexPointBuffer[0];
+			btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[1];
+			const btVector3& localScaling = ch->getLocalScalingNV();
+			cpc1.setPoints(lsMemPtr->convexVertexData[1].gConvexPoints,lsMemPtr->convexVertexData[1].gNumConvexPoints,false,localScaling);
+			wuInput->m_spuCollisionShapes[1] = &cpc1;
+
+		}
+
+
+		const btConvexShape* shape0Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[0];
+		const btConvexShape* shape1Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[1];
+		int shapeType0 = wuInput->m_shapeType0;
+		int shapeType1 = wuInput->m_shapeType1;
+		float marginA = wuInput->m_collisionMargin0;
+		float marginB = wuInput->m_collisionMargin1;
+
+		SpuClosestPointInput	cpInput;
+		cpInput.m_convexVertexData[0] = &lsMemPtr->convexVertexData[0];
+		cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1];
+		cpInput.m_transformA = wuInput->m_worldTransform0;
+		cpInput.m_transformB = wuInput->m_worldTransform1;
+		float sumMargin = (marginA+marginB+lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold());
+		cpInput.m_maximumDistanceSquared = sumMargin * sumMargin;
+
+		ppu_address_t manifoldAddress = (ppu_address_t)manifold;
+
+		btPersistentManifold* spuManifold=lsMemPtr->getContactManifoldPtr();
+		//spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped);
+		spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMemPtr->getColObj0()->getWorldTransform(),
+			lsMemPtr->getColObj1()->getWorldTransform(),
+			lsMemPtr->getColObj0()->getRestitution(),lsMemPtr->getColObj1()->getRestitution(),
+			lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(),
+			wuInput->m_isSwapped);
+
+		{
+			btGjkPairDetector gjk(shape0Ptr,shape1Ptr,shapeType0,shapeType1,marginA,marginB,&simplexSolver,penetrationSolver);//&vsSolver,penetrationSolver);
+			gjk.getClosestPoints(cpInput,spuContacts,0);//,debugDraw);
+
+			btAssert(gjk.m_lastUsedMethod <MAX_DEGENERATE_STATS);
+			stats[gjk.m_lastUsedMethod]++;
+			btAssert(gjk.m_degenerateSimplex <MAX_DEGENERATE_STATS);
+			degenerateStats[gjk.m_degenerateSimplex]++;
+
+#ifdef USE_SEPDISTANCE_UTIL			
+			btScalar sepDist = gjk.getCachedSeparatingDistance()+spuManifold->getContactBreakingThreshold();
+			lsMemPtr->getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(gjk.getCachedSeparatingAxis(),sepDist,wuInput->m_worldTransform0,wuInput->m_worldTransform1);
+			lsMemPtr->needsDmaPutContactManifoldAlgo = true;
+#endif //USE_SEPDISTANCE_UTIL
+
+		}
+
+	}
+
+
+}
+
+
+template<typename T> void DoSwap(T& a, T& b)
+{
+	char tmp[sizeof(T)];
+	memcpy(tmp, &a, sizeof(T));
+	memcpy(&a, &b, sizeof(T));
+	memcpy(&b, tmp, sizeof(T));
+}
+
+SIMD_FORCE_INLINE void	dmaAndSetupCollisionObjects(SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem)
+{
+	register int dmaSize;
+	register ppu_address_t	dmaPpuAddress2;
+		
+	dmaSize = sizeof(btCollisionObject);//btTransform);
+	dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject0();
+	lsMem.m_lsColObj0Ptr = (btCollisionObject*)cellDmaGetReadOnly(&lsMem.gColObj0Buffer, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);		
+
+	dmaSize = sizeof(btCollisionObject);//btTransform);
+	dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.getlocalCollisionAlgorithm()->getCollisionObject1();
+	lsMem.m_lsColObj1Ptr = (btCollisionObject*)cellDmaGetReadOnly(&lsMem.gColObj1Buffer, dmaPpuAddress2  , dmaSize, DMA_TAG(2), 0, 0);		
+	
+	cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+
+	btCollisionObject* ob0 = lsMem.getColObj0();
+	btCollisionObject* ob1 = lsMem.getColObj1();
+
+	collisionPairInput.m_worldTransform0 = ob0->getWorldTransform();
+	collisionPairInput.m_worldTransform1 = ob1->getWorldTransform();
+}
+
+
+
+void	handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTask_LocalStoreMemory& lsMem,
+							SpuContactResult &spuContacts,
+							ppu_address_t collisionShape0Ptr, void* collisionShape0Loc,
+							ppu_address_t collisionShape1Ptr, void* collisionShape1Loc, bool dmaShapes = true)
+{
+	
+	if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0) 
+		&& btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1))
+	{
+		if (dmaShapes)
+		{
+			dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
+			dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
+			cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+		}
+
+		btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
+		btConvexInternalShape* spuConvexShape1 = (btConvexInternalShape*)collisionShape1Loc;
+
+		btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
+		btVector3 dim1 = spuConvexShape1->getImplicitShapeDimensions();
+
+		collisionPairInput.m_primitiveDimensions0 = dim0;
+		collisionPairInput.m_primitiveDimensions1 = dim1;
+		collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
+		collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
+		collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
+		collisionPairInput.m_spuCollisionShapes[1] = spuConvexShape1;
+		ProcessSpuConvexConvexCollision(&collisionPairInput,&lsMem,spuContacts);
+	} 
+	else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType0) && 
+			btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType1))
+	{
+		//snPause();
+
+		dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
+		dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
+		cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+
+		// Both are compounds, do N^2 CD for now
+		///@todo: add some AABB-based pruning (probably not -> slower)
+	
+		btCompoundShape* spuCompoundShape0 = (btCompoundShape*)collisionShape0Loc;
+		btCompoundShape* spuCompoundShape1 = (btCompoundShape*)collisionShape1Loc;
+
+		dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape0, 1);
+		dmaCompoundShapeInfo (&lsMem.compoundShapeData[1], spuCompoundShape1, 2);
+		cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+		
+
+		dmaCompoundSubShapes (&lsMem.compoundShapeData[0], spuCompoundShape0, 1);
+		cellDmaWaitTagStatusAll(DMA_MASK(1));
+		dmaCompoundSubShapes (&lsMem.compoundShapeData[1], spuCompoundShape1, 1);
+		cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+		int childShapeCount0 = spuCompoundShape0->getNumChildShapes();
+		btAssert(childShapeCount0< MAX_SPU_COMPOUND_SUBSHAPES);
+		int childShapeCount1 = spuCompoundShape1->getNumChildShapes();
+		btAssert(childShapeCount1< MAX_SPU_COMPOUND_SUBSHAPES);
+
+		// Start the N^2
+		for (int i = 0; i < childShapeCount0; ++i)
+		{
+			btCompoundShapeChild& childShape0 = lsMem.compoundShapeData[0].gSubshapes[i];
+			btAssert(!btBroadphaseProxy::isCompound(childShape0.m_childShapeType));
+
+			for (int j = 0; j < childShapeCount1; ++j)
+			{
+				btCompoundShapeChild& childShape1 = lsMem.compoundShapeData[1].gSubshapes[j];
+				btAssert(!btBroadphaseProxy::isCompound(childShape1.m_childShapeType));
+
+
+				/* Create a new collision pair input struct using the two child shapes */
+				SpuCollisionPairInput cinput (collisionPairInput);
+
+				cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape0.m_transform;
+				cinput.m_shapeType0 = childShape0.m_childShapeType;
+				cinput.m_collisionMargin0 = childShape0.m_childMargin;
+
+				cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape1.m_transform;
+				cinput.m_shapeType1 = childShape1.m_childShapeType;
+				cinput.m_collisionMargin1 = childShape1.m_childMargin;
+				/* Recursively call handleCollisionPair () with new collision pair input */
+				
+				handleCollisionPair(cinput, lsMem, spuContacts,			
+					(ppu_address_t)childShape0.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], 
+					(ppu_address_t)childShape1.m_childShape, lsMem.compoundShapeData[1].gSubshapeShape[j], false);
+			}
+		}
+	}
+	else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType0) )
+	{
+		//snPause();
+		
+		dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
+		dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
+		cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+
+		// object 0 compound, object 1 non-compound
+		btCompoundShape* spuCompoundShape = (btCompoundShape*)collisionShape0Loc;
+		dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape, 1);
+		cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+		int childShapeCount = spuCompoundShape->getNumChildShapes();
+		btAssert(childShapeCount< MAX_SPU_COMPOUND_SUBSHAPES);
+
+		for (int i = 0; i < childShapeCount; ++i)
+		{
+			btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i];
+			btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType));
+			// Dma the child shape
+			dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType);
+			cellDmaWaitTagStatusAll(DMA_MASK(1));
+			
+			SpuCollisionPairInput cinput (collisionPairInput);
+			cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape.m_transform;
+			cinput.m_shapeType0 = childShape.m_childShapeType;
+			cinput.m_collisionMargin0 = childShape.m_childMargin;
+
+			handleCollisionPair(cinput, lsMem, spuContacts,			
+				(ppu_address_t)childShape.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], 
+				collisionShape1Ptr, collisionShape1Loc, false);
+		}
+	}
+	else if (btBroadphaseProxy::isCompound(collisionPairInput.m_shapeType1) )
+	{
+		//snPause();
+		
+		dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
+		dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
+		cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+		// object 0 non-compound, object 1 compound
+		btCompoundShape* spuCompoundShape = (btCompoundShape*)collisionShape1Loc;
+		dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape, 1);
+		cellDmaWaitTagStatusAll(DMA_MASK(1));
+		
+		int childShapeCount = spuCompoundShape->getNumChildShapes();
+		btAssert(childShapeCount< MAX_SPU_COMPOUND_SUBSHAPES);
+
+
+		for (int i = 0; i < childShapeCount; ++i)
+		{
+			btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i];
+			btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType));
+			// Dma the child shape
+			dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType);
+			cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+			SpuCollisionPairInput cinput (collisionPairInput);
+			cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape.m_transform;
+			cinput.m_shapeType1 = childShape.m_childShapeType;
+			cinput.m_collisionMargin1 = childShape.m_childMargin;
+			handleCollisionPair(cinput, lsMem, spuContacts,
+				collisionShape0Ptr, collisionShape0Loc, 
+				(ppu_address_t)childShape.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], false);
+		}
+		
+	}
+	else
+	{
+		//a non-convex shape is involved									
+		bool handleConvexConcave = false;
+
+		//snPause();
+
+		if (btBroadphaseProxy::isConcave(collisionPairInput.m_shapeType0) &&
+			btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1))
+		{
+			// Swap stuff
+			DoSwap(collisionShape0Ptr, collisionShape1Ptr);
+			DoSwap(collisionShape0Loc, collisionShape1Loc);
+			DoSwap(collisionPairInput.m_shapeType0, collisionPairInput.m_shapeType1);
+			DoSwap(collisionPairInput.m_worldTransform0, collisionPairInput.m_worldTransform1);
+			DoSwap(collisionPairInput.m_collisionMargin0, collisionPairInput.m_collisionMargin1);
+			
+			collisionPairInput.m_isSwapped = true;
+		}
+		
+		if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0)&&
+			btBroadphaseProxy::isConcave(collisionPairInput.m_shapeType1))
+		{
+			handleConvexConcave = true;
+		}
+		if (handleConvexConcave)
+		{
+			if (dmaShapes)
+			{
+				dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0);
+				dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1);
+				cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
+			}
+			
+			if (collisionPairInput.m_shapeType1 == STATIC_PLANE_PROXYTYPE)
+			{
+				btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
+				btStaticPlaneShape* planeShape= (btStaticPlaneShape*)collisionShape1Loc;
+
+				btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
+				collisionPairInput.m_primitiveDimensions0 = dim0;
+				collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
+				collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
+				collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
+				collisionPairInput.m_spuCollisionShapes[1] = planeShape;
+
+				ProcessConvexPlaneSpuCollision(&collisionPairInput,&lsMem,spuContacts);
+			} else
+			{
+				btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
+				btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)collisionShape1Loc;
+
+				btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
+				collisionPairInput.m_primitiveDimensions0 = dim0;
+				collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
+				collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
+				collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
+				collisionPairInput.m_spuCollisionShapes[1] = trimeshShape;
+
+				ProcessConvexConcaveSpuCollision(&collisionPairInput,&lsMem,spuContacts);
+			}
+		}
+
+	}
+	
+	spuContacts.flush();
+
+}
+
+
+void	processCollisionTask(void* userPtr, void* lsMemPtr)
+{
+
+	SpuGatherAndProcessPairsTaskDesc* taskDescPtr = (SpuGatherAndProcessPairsTaskDesc*)userPtr;
+	SpuGatherAndProcessPairsTaskDesc& taskDesc = *taskDescPtr;
+	CollisionTask_LocalStoreMemory*	colMemPtr = (CollisionTask_LocalStoreMemory*)lsMemPtr;
+	CollisionTask_LocalStoreMemory& lsMem = *(colMemPtr);
+
+	gUseEpa = taskDesc.m_useEpa;
+
+	//	spu_printf("taskDescPtr=%llx\n",taskDescPtr);
+
+	SpuContactResult spuContacts;
+
+	////////////////////
+
+	ppu_address_t dmaInPtr = taskDesc.m_inPairPtr;
+	unsigned int numPages = taskDesc.numPages;
+	unsigned int numOnLastPage = taskDesc.numOnLastPage;
+
+	// prefetch first set of inputs and wait
+	lsMem.g_workUnitTaskBuffers.init();
+
+	unsigned int nextNumOnPage = (numPages > 1)? MIDPHASE_NUM_WORKUNITS_PER_PAGE : numOnLastPage;
+	lsMem.g_workUnitTaskBuffers.backBufferDmaGet(dmaInPtr, nextNumOnPage*sizeof(SpuGatherAndProcessWorkUnitInput), DMA_TAG(3));
+	dmaInPtr += MIDPHASE_WORKUNIT_PAGE_SIZE;
+
+	
+	register unsigned char *inputPtr;
+	register unsigned int numOnPage;
+	register unsigned int j;
+	SpuGatherAndProcessWorkUnitInput* wuInputs;	
+	register int dmaSize;
+	register ppu_address_t	dmaPpuAddress;
+	register ppu_address_t	dmaPpuAddress2;
+
+	int numPairs;
+	register int p;
+	SpuCollisionPairInput collisionPairInput;
+	
+	for (unsigned int i = 0; btLikely(i < numPages); i++)
+	{
+
+		// wait for back buffer dma and swap buffers
+		inputPtr = lsMem.g_workUnitTaskBuffers.swapBuffers();
+
+		// number on current page is number prefetched last iteration
+		numOnPage = nextNumOnPage;
+
+
+		// prefetch next set of inputs
+#if MIDPHASE_NUM_WORKUNIT_PAGES > 2
+		if ( btLikely( i < numPages-1 ) )
+#else
+		if ( btUnlikely( i < numPages-1 ) )
+#endif
+		{
+			nextNumOnPage = (i == numPages-2)? numOnLastPage : MIDPHASE_NUM_WORKUNITS_PER_PAGE;
+			lsMem.g_workUnitTaskBuffers.backBufferDmaGet(dmaInPtr, nextNumOnPage*sizeof(SpuGatherAndProcessWorkUnitInput), DMA_TAG(3));
+			dmaInPtr += MIDPHASE_WORKUNIT_PAGE_SIZE;
+		}
+
+		wuInputs = reinterpret_cast<SpuGatherAndProcessWorkUnitInput *>(inputPtr);
+		
+		
+		for (j = 0; btLikely( j < numOnPage ); j++)
+		{
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+		//	printMidphaseInput(&wuInputs[j]);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+
+
+			numPairs = wuInputs[j].m_endIndex - wuInputs[j].m_startIndex;
+			
+			if ( btLikely( numPairs ) )
+			{
+					dmaSize = numPairs*sizeof(btBroadphasePair);
+					dmaPpuAddress = wuInputs[j].m_pairArrayPtr+wuInputs[j].m_startIndex * sizeof(btBroadphasePair);
+					lsMem.m_pairsPointer = (btBroadphasePair*)cellDmaGetReadOnly(&lsMem.gBroadphasePairsBuffer, dmaPpuAddress  , dmaSize, DMA_TAG(1), 0, 0);
+					cellDmaWaitTagStatusAll(DMA_MASK(1));
+				
+
+				for (p=0;p<numPairs;p++)
+				{
+
+					//for each broadphase pair, do something
+
+					btBroadphasePair& pair = lsMem.getBroadphasePairPtr()[p];
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+					spu_printf("pair->m_userInfo = %d\n",pair.m_userInfo);
+					spu_printf("pair->m_algorithm = %d\n",pair.m_algorithm);
+					spu_printf("pair->m_pProxy0 = %d\n",pair.m_pProxy0);
+					spu_printf("pair->m_pProxy1 = %d\n",pair.m_pProxy1);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+
+					if (pair.m_internalTmpValue == 2 && pair.m_algorithm && pair.m_pProxy0 && pair.m_pProxy1)
+					{
+						dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm);
+						dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm;
+						lsMem.m_lsCollisionAlgorithmPtr = (SpuContactManifoldCollisionAlgorithm*)cellDmaGetReadOnly(&lsMem.gSpuContactManifoldAlgoBuffer, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+
+						cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+						lsMem.needsDmaPutContactManifoldAlgo = false;
+
+						collisionPairInput.m_persistentManifoldPtr = (ppu_address_t) lsMem.getlocalCollisionAlgorithm()->getContactManifoldPtr();
+						collisionPairInput.m_isSwapped = false;
+
+						if (1)
+						{
+
+							///can wait on the combined DMA_MASK, or dma on the same tag
+
+
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+					//		spu_printf("SPU collisionPairInput->m_shapeType0 = %d\n",collisionPairInput->m_shapeType0);
+					//		spu_printf("SPU collisionPairInput->m_shapeType1 = %d\n",collisionPairInput->m_shapeType1);
+#endif //DEBUG_SPU_COLLISION_DETECTION
+
+							
+							dmaSize = sizeof(btPersistentManifold);
+
+							dmaPpuAddress2 = collisionPairInput.m_persistentManifoldPtr;
+							lsMem.m_lsManifoldPtr = (btPersistentManifold*)cellDmaGetReadOnly(&lsMem.gPersistentManifoldBuffer, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+
+							collisionPairInput.m_shapeType0 = lsMem.getlocalCollisionAlgorithm()->getShapeType0();
+							collisionPairInput.m_shapeType1 = lsMem.getlocalCollisionAlgorithm()->getShapeType1();
+							collisionPairInput.m_collisionMargin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0();
+							collisionPairInput.m_collisionMargin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
+							
+							
+							
+							//??cellDmaWaitTagStatusAll(DMA_MASK(1));
+							
+
+							if (1)
+							{
+								//snPause();
+
+								// Get the collision objects
+								dmaAndSetupCollisionObjects(collisionPairInput, lsMem);
+
+								if (lsMem.getColObj0()->isActive() || lsMem.getColObj1()->isActive())
+								{
+
+									lsMem.needsDmaPutContactManifoldAlgo = true;
+#ifdef USE_SEPDISTANCE_UTIL
+									lsMem.getlocalCollisionAlgorithm()->m_sepDistance.updateSeparatingDistance(collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1);
+#endif //USE_SEPDISTANCE_UTIL
+							
+#define USE_DEDICATED_BOX_BOX 1
+#ifdef USE_DEDICATED_BOX_BOX
+									bool boxbox = ((lsMem.getlocalCollisionAlgorithm()->getShapeType0()==BOX_SHAPE_PROXYTYPE)&&
+										(lsMem.getlocalCollisionAlgorithm()->getShapeType1()==BOX_SHAPE_PROXYTYPE));
+									if (boxbox)
+									{
+										//spu_printf("boxbox dist = %f\n",distance);
+										btPersistentManifold* spuManifold=lsMem.getContactManifoldPtr();
+										btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr;
+										ppu_address_t manifoldAddress = (ppu_address_t)manifold;
+
+										spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(),
+											lsMem.getColObj1()->getWorldTransform(),
+											lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(),
+											lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(),
+											collisionPairInput.m_isSwapped);
+
+						
+									//float distance=0.f;
+									btVector3 normalInB;
+
+
+									if (//!gUseEpa &&
+#ifdef USE_SEPDISTANCE_UTIL
+										lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f
+#else
+										1
+#endif											
+										)
+										{
+//#define USE_PE_BOX_BOX 1
+#ifdef USE_PE_BOX_BOX
+											{
+
+												//getCollisionMargin0
+												btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0();
+												btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
+												btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0);
+												btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1);
+/*
+												//Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ());
+												vmVector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin());
+												vmVector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin());
+												vmMatrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis());
+												vmMatrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis());
+
+												vmTransform3 transformA(vmMatrix0,vmPos0);
+												Box boxB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ());
+												vmTransform3 transformB(vmMatrix1,vmPos1);
+												BoxPoint resultClosestBoxPointA;
+												BoxPoint resultClosestBoxPointB;
+												vmVector3 resultNormal;
+												*/
+
+#ifdef USE_SEPDISTANCE_UTIL
+												float distanceThreshold = FLT_MAX
+#else
+												//float distanceThreshold = 0.f;
+#endif
+
+
+												vmVector3 n;
+												Box boxA;
+												vmVector3 hA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ());
+												vmVector3 hB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ());
+												boxA.mHalf= hA;
+												vmTransform3 trA;
+												trA.setTranslation(getVmVector3(collisionPairInput.m_worldTransform0.getOrigin()));
+												trA.setUpper3x3(getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis()));
+												Box boxB;
+												boxB.mHalf = hB;
+												vmTransform3 trB;
+												trB.setTranslation(getVmVector3(collisionPairInput.m_worldTransform1.getOrigin()));
+												trB.setUpper3x3(getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis()));
+												
+												float distanceThreshold = spuManifold->getContactBreakingThreshold();//0.001f;
+
+
+												BoxPoint ptA,ptB;
+												float dist = boxBoxDistance(n, ptA, ptB,
+														   boxA, trA, boxB,	   trB,
+															distanceThreshold );
+
+
+//												float distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB,  boxA, transformA, boxB,transformB,distanceThreshold);
+												
+												normalInB = -getBtVector3(n);//resultNormal);
+
+												//if(dist < distanceThreshold)//spuManifold->getContactBreakingThreshold())
+												if(dist < spuManifold->getContactBreakingThreshold())
+												{
+													btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(ptB.localPoint));
+
+													spuContacts.addContactPoint(
+														normalInB,
+														pointOnB,
+														dist);
+												}
+											} 
+#else									
+											{
+
+												btScalar margin0 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin0();
+												btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
+												btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0);
+												btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1);
+
+
+												btBoxShape box0(shapeDim0);
+												btBoxShape box1(shapeDim1);
+
+												struct SpuBridgeContactCollector : public btDiscreteCollisionDetectorInterface::Result
+												{
+													SpuContactResult&	m_spuContacts;
+
+													virtual void setShapeIdentifiersA(int partId0,int index0)
+													{
+														m_spuContacts.setShapeIdentifiersA(partId0,index0);
+													}
+													virtual void setShapeIdentifiersB(int partId1,int index1)
+													{
+														m_spuContacts.setShapeIdentifiersB(partId1,index1);
+													}
+													virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+													{
+														m_spuContacts.addContactPoint(normalOnBInWorld,pointInWorld,depth);
+													}
+
+													SpuBridgeContactCollector(SpuContactResult& spuContacts)
+														:m_spuContacts(spuContacts)
+													{
+
+													}
+												};
+												
+												SpuBridgeContactCollector  bridgeOutput(spuContacts);
+
+												btDiscreteCollisionDetectorInterface::ClosestPointInput input;
+												input.m_maximumDistanceSquared = BT_LARGE_FLOAT;
+												input.m_transformA = collisionPairInput.m_worldTransform0;
+												input.m_transformB = collisionPairInput.m_worldTransform1;
+
+												btBoxBoxDetector detector(&box0,&box1);
+												
+												detector.getClosestPoints(input,bridgeOutput,0);
+
+											}
+#endif //USE_PE_BOX_BOX
+											
+											lsMem.needsDmaPutContactManifoldAlgo = true;
+#ifdef USE_SEPDISTANCE_UTIL
+											btScalar sepDist2 = distance+spuManifold->getContactBreakingThreshold();
+											lsMem.getlocalCollisionAlgorithm()->m_sepDistance.initSeparatingDistance(normalInB,sepDist2,collisionPairInput.m_worldTransform0,collisionPairInput.m_worldTransform1);
+#endif //USE_SEPDISTANCE_UTIL
+											gProcessedCol++;
+										} else
+										{
+											gSkippedCol++;
+										}
+
+										spuContacts.flush();
+											
+
+									} else
+#endif //USE_DEDICATED_BOX_BOX
+									{
+										if (
+#ifdef USE_SEPDISTANCE_UTIL
+											lsMem.getlocalCollisionAlgorithm()->m_sepDistance.getConservativeSeparatingDistance()<=0.f
+#else
+											1
+#endif //USE_SEPDISTANCE_UTIL
+											)
+										{
+											handleCollisionPair(collisionPairInput, lsMem, spuContacts,
+												(ppu_address_t)lsMem.getColObj0()->getRootCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape,
+												(ppu_address_t)lsMem.getColObj1()->getRootCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape);
+										} else
+										{
+												//spu_printf("boxbox dist = %f\n",distance);
+											btPersistentManifold* spuManifold=lsMem.getContactManifoldPtr();
+											btPersistentManifold* manifold = (btPersistentManifold*)collisionPairInput.m_persistentManifoldPtr;
+											ppu_address_t manifoldAddress = (ppu_address_t)manifold;
+
+											spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMem.getColObj0()->getWorldTransform(),
+												lsMem.getColObj1()->getWorldTransform(),
+												lsMem.getColObj0()->getRestitution(),lsMem.getColObj1()->getRestitution(),
+												lsMem.getColObj0()->getFriction(),lsMem.getColObj1()->getFriction(),
+												collisionPairInput.m_isSwapped);
+
+											spuContacts.flush();
+										}
+									}
+								
+								}
+
+							}
+						}
+
+#ifdef USE_SEPDISTANCE_UTIL
+#if defined (__SPU__) || defined (USE_LIBSPE2)
+						if (lsMem.needsDmaPutContactManifoldAlgo)
+						{
+							dmaSize = sizeof(SpuContactManifoldCollisionAlgorithm);
+							dmaPpuAddress2 = (ppu_address_t)pair.m_algorithm;
+							cellDmaLargePut(&lsMem.gSpuContactManifoldAlgoBuffer, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+							cellDmaWaitTagStatusAll(DMA_MASK(1));
+						}
+#endif
+#endif //#ifdef USE_SEPDISTANCE_UTIL
+
+					}
+				}
+			}
+		} //end for (j = 0; j < numOnPage; j++)
+
+	}//	for 
+
+
+
+	return;
+}
+
+
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
new file mode 100644
index 00000000..bbaa555e
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
@@ -0,0 +1,140 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef SPU_GATHERING_COLLISION_TASK_H
+#define SPU_GATHERING_COLLISION_TASK_H
+
+#include "../PlatformDefinitions.h"
+//#define DEBUG_SPU_COLLISION_DETECTION 1
+
+
+///Task Description for SPU collision detection
+struct SpuGatherAndProcessPairsTaskDesc 
+{
+	ppu_address_t	m_inPairPtr;//m_pairArrayPtr;
+	//mutex variable
+	uint32_t	m_someMutexVariableInMainMemory;
+
+	ppu_address_t	m_dispatcher;
+
+	uint32_t	numOnLastPage;
+
+	uint16_t numPages;
+	uint16_t taskId;
+	bool m_useEpa;
+
+	struct	CollisionTask_LocalStoreMemory*	m_lsMemory; 
+}
+
+#if  defined(__CELLOS_LV2__) || defined(USE_LIBSPE2)
+__attribute__ ((aligned (128)))
+#endif
+;
+
+
+void	processCollisionTask(void* userPtr, void* lsMemory);
+
+void*	createCollisionLocalStoreMemory();
+
+
+#if defined(USE_LIBSPE2) && defined(__SPU__)
+#include "../SpuLibspe2Support.h"
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+#include <SpuFakeDma.h>
+
+//#define DEBUG_LIBSPE2_SPU_TASK
+
+
+
+int main(unsigned long long speid, addr64 argp, addr64 envp)
+{
+	printf("SPU: hello \n");
+	
+	ATTRIBUTE_ALIGNED128(btSpuStatus status);
+	ATTRIBUTE_ALIGNED16( SpuGatherAndProcessPairsTaskDesc taskDesc ) ;
+	unsigned int received_message = Spu_Mailbox_Event_Nothing;
+    bool shutdown = false;
+
+	cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+	cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+	status.m_status = Spu_Status_Free;
+	status.m_lsMemory.p = createCollisionLocalStoreMemory();
+
+	cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+	cellDmaWaitTagStatusAll(DMA_MASK(3));
+	
+	
+	while ( btLikely( !shutdown ) )
+	{
+		
+		received_message = spu_read_in_mbox();
+		
+		if( btLikely( received_message == Spu_Mailbox_Event_Task ))
+		{
+#ifdef DEBUG_LIBSPE2_SPU_TASK
+			printf("SPU: received Spu_Mailbox_Event_Task\n");
+#endif //DEBUG_LIBSPE2_SPU_TASK
+
+			// refresh the status
+			cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+			cellDmaWaitTagStatusAll(DMA_MASK(3));
+		
+			btAssert(status.m_status==Spu_Status_Occupied);
+			
+			cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuGatherAndProcessPairsTaskDesc), DMA_TAG(3), 0, 0);
+			cellDmaWaitTagStatusAll(DMA_MASK(3));
+#ifdef DEBUG_LIBSPE2_SPU_TASK		
+			printf("SPU:processCollisionTask\n");	
+#endif //DEBUG_LIBSPE2_SPU_TASK
+			processCollisionTask((void*)&taskDesc, taskDesc.m_lsMemory);
+			
+#ifdef DEBUG_LIBSPE2_SPU_TASK
+			printf("SPU:finished processCollisionTask\n");
+#endif //DEBUG_LIBSPE2_SPU_TASK
+		}
+		else
+		{
+#ifdef DEBUG_LIBSPE2_SPU_TASK
+			printf("SPU: received ShutDown\n");
+#endif //DEBUG_LIBSPE2_SPU_TASK
+			if( btLikely( received_message == Spu_Mailbox_Event_Shutdown ) )
+			{
+				shutdown = true;
+			}
+			else
+			{
+				//printf("SPU - Sth. recieved\n");
+			}
+		}
+
+		// set to status free and wait for next task
+		status.m_status = Spu_Status_Free;
+		cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+		cellDmaWaitTagStatusAll(DMA_MASK(3));		
+				
+		
+  	}
+
+	printf("SPU: shutdown\n");
+  	return 0;
+}
+#endif // USE_LIBSPE2
+
+
+#endif //SPU_GATHERING_COLLISION_TASK_H
+
+
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h
new file mode 100644
index 00000000..8b89de03
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h
@@ -0,0 +1,19 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+
+
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
new file mode 100644
index 00000000..9f7e64dd
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
@@ -0,0 +1,348 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "SpuMinkowskiPenetrationDepthSolver.h"
+#include "SpuContactResult.h"
+#include "SpuPreferredPenetrationDirections.h"
+#include "BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
+#include "SpuCollisionShapes.h"
+
+#define NUM_UNITSPHERE_POINTS 42
+static btVector3	sPenetrationDirections[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] = 
+{
+btVector3(btScalar(0.000000) , btScalar(-0.000000),btScalar(-1.000000)),
+btVector3(btScalar(0.723608) , btScalar(-0.525725),btScalar(-0.447219)),
+btVector3(btScalar(-0.276388) , btScalar(-0.850649),btScalar(-0.447219)),
+btVector3(btScalar(-0.894426) , btScalar(-0.000000),btScalar(-0.447216)),
+btVector3(btScalar(-0.276388) , btScalar(0.850649),btScalar(-0.447220)),
+btVector3(btScalar(0.723608) , btScalar(0.525725),btScalar(-0.447219)),
+btVector3(btScalar(0.276388) , btScalar(-0.850649),btScalar(0.447220)),
+btVector3(btScalar(-0.723608) , btScalar(-0.525725),btScalar(0.447219)),
+btVector3(btScalar(-0.723608) , btScalar(0.525725),btScalar(0.447219)),
+btVector3(btScalar(0.276388) , btScalar(0.850649),btScalar(0.447219)),
+btVector3(btScalar(0.894426) , btScalar(0.000000),btScalar(0.447216)),
+btVector3(btScalar(-0.000000) , btScalar(0.000000),btScalar(1.000000)),
+btVector3(btScalar(0.425323) , btScalar(-0.309011),btScalar(-0.850654)),
+btVector3(btScalar(-0.162456) , btScalar(-0.499995),btScalar(-0.850654)),
+btVector3(btScalar(0.262869) , btScalar(-0.809012),btScalar(-0.525738)),
+btVector3(btScalar(0.425323) , btScalar(0.309011),btScalar(-0.850654)),
+btVector3(btScalar(0.850648) , btScalar(-0.000000),btScalar(-0.525736)),
+btVector3(btScalar(-0.525730) , btScalar(-0.000000),btScalar(-0.850652)),
+btVector3(btScalar(-0.688190) , btScalar(-0.499997),btScalar(-0.525736)),
+btVector3(btScalar(-0.162456) , btScalar(0.499995),btScalar(-0.850654)),
+btVector3(btScalar(-0.688190) , btScalar(0.499997),btScalar(-0.525736)),
+btVector3(btScalar(0.262869) , btScalar(0.809012),btScalar(-0.525738)),
+btVector3(btScalar(0.951058) , btScalar(0.309013),btScalar(0.000000)),
+btVector3(btScalar(0.951058) , btScalar(-0.309013),btScalar(0.000000)),
+btVector3(btScalar(0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+btVector3(btScalar(0.000000) , btScalar(-1.000000),btScalar(0.000000)),
+btVector3(btScalar(-0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+btVector3(btScalar(-0.951058) , btScalar(-0.309013),btScalar(-0.000000)),
+btVector3(btScalar(-0.951058) , btScalar(0.309013),btScalar(-0.000000)),
+btVector3(btScalar(-0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+btVector3(btScalar(-0.000000) , btScalar(1.000000),btScalar(-0.000000)),
+btVector3(btScalar(0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+btVector3(btScalar(0.688190) , btScalar(-0.499997),btScalar(0.525736)),
+btVector3(btScalar(-0.262869) , btScalar(-0.809012),btScalar(0.525738)),
+btVector3(btScalar(-0.850648) , btScalar(0.000000),btScalar(0.525736)),
+btVector3(btScalar(-0.262869) , btScalar(0.809012),btScalar(0.525738)),
+btVector3(btScalar(0.688190) , btScalar(0.499997),btScalar(0.525736)),
+btVector3(btScalar(0.525730) , btScalar(0.000000),btScalar(0.850652)),
+btVector3(btScalar(0.162456) , btScalar(-0.499995),btScalar(0.850654)),
+btVector3(btScalar(-0.425323) , btScalar(-0.309011),btScalar(0.850654)),
+btVector3(btScalar(-0.425323) , btScalar(0.309011),btScalar(0.850654)),
+btVector3(btScalar(0.162456) , btScalar(0.499995),btScalar(0.850654))
+};
+
+
+bool SpuMinkowskiPenetrationDepthSolver::calcPenDepth( btSimplexSolverInterface& simplexSolver,
+		const btConvexShape* convexA,const btConvexShape* convexB,
+					const btTransform& transA,const btTransform& transB,
+				btVector3& v, btVector3& pa, btVector3& pb,
+				class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc)
+{
+#if 0
+	(void)stackAlloc;
+	(void)v;
+	
+
+	struct btIntermediateResult : public SpuContactResult
+	{
+
+		btIntermediateResult():m_hasResult(false)
+		{
+		}
+		
+		btVector3 m_normalOnBInWorld;
+		btVector3 m_pointInWorld;
+		btScalar m_depth;
+		bool	m_hasResult;
+
+		virtual void setShapeIdentifiersA(int partId0,int index0)
+		{
+			(void)partId0;
+			(void)index0;
+		}
+
+		virtual void setShapeIdentifiersB(int partId1,int index1)
+		{
+			(void)partId1;
+			(void)index1;
+		}
+		void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+		{
+			m_normalOnBInWorld = normalOnBInWorld;
+			m_pointInWorld = pointInWorld;
+			m_depth = depth;
+			m_hasResult = true;
+		}
+	};
+
+	//just take fixed number of orientation, and sample the penetration depth in that direction
+	btScalar minProj = btScalar(BT_LARGE_FLOAT);
+	btVector3 minNorm(0.f,0.f,0.f);
+	btVector3 minVertex;
+	btVector3 minA,minB;
+	btVector3 seperatingAxisInA,seperatingAxisInB;
+	btVector3 pInA,qInB,pWorld,qWorld,w;
+
+//#define USE_BATCHED_SUPPORT 1
+#ifdef USE_BATCHED_SUPPORT
+
+	btVector3	supportVerticesABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+	btVector3	supportVerticesBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+	btVector3	seperatingAxisInABatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+	btVector3	seperatingAxisInBBatch[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2];
+	int i;
+
+	int numSampleDirections = NUM_UNITSPHERE_POINTS;
+
+	for (i=0;i<numSampleDirections;i++)
+	{
+		const btVector3& norm = sPenetrationDirections[i];
+		seperatingAxisInABatch[i] =  (-norm) * transA.getBasis() ;
+		seperatingAxisInBBatch[i] =  norm   * transB.getBasis() ;
+	}
+
+	{
+		int numPDA = convexA->getNumPreferredPenetrationDirections();
+		if (numPDA)
+		{
+			for (int i=0;i<numPDA;i++)
+			{
+				btVector3 norm;
+				convexA->getPreferredPenetrationDirection(i,norm);
+				norm  = transA.getBasis() * norm;
+				sPenetrationDirections[numSampleDirections] = norm;
+				seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
+				seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
+				numSampleDirections++;
+			}
+		}
+	}
+
+	{
+		int numPDB = convexB->getNumPreferredPenetrationDirections();
+		if (numPDB)
+		{
+			for (int i=0;i<numPDB;i++)
+			{
+				btVector3 norm;
+				convexB->getPreferredPenetrationDirection(i,norm);
+				norm  = transB.getBasis() * norm;
+				sPenetrationDirections[numSampleDirections] = norm;
+				seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
+				seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
+				numSampleDirections++;
+			}
+		}
+	}
+
+
+
+	convexA->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInABatch,supportVerticesABatch,numSampleDirections);
+	convexB->batchedUnitVectorGetSupportingVertexWithoutMargin(seperatingAxisInBBatch,supportVerticesBBatch,numSampleDirections);
+
+	for (i=0;i<numSampleDirections;i++)
+	{
+		const btVector3& norm = sPenetrationDirections[i];
+		seperatingAxisInA = seperatingAxisInABatch[i];
+		seperatingAxisInB = seperatingAxisInBBatch[i];
+
+		pInA = supportVerticesABatch[i];
+		qInB = supportVerticesBBatch[i];
+
+		pWorld = transA(pInA);	
+		qWorld = transB(qInB);
+		w	= qWorld - pWorld;
+		btScalar delta = norm.dot(w);
+		//find smallest delta
+		if (delta < minProj)
+		{
+			minProj = delta;
+			minNorm = norm;
+			minA = pWorld;
+			minB = qWorld;
+		}
+	}	
+#else
+
+	int numSampleDirections = NUM_UNITSPHERE_POINTS;
+
+///this is necessary, otherwise the normal is not correct, and sphere will rotate forever on a sloped triangle mesh
+#define DO_PREFERRED_DIRECTIONS 1
+#ifdef DO_PREFERRED_DIRECTIONS
+	{
+		int numPDA = spuGetNumPreferredPenetrationDirections(shapeTypeA,convexA);
+		if (numPDA)
+		{
+			for (int i=0;i<numPDA;i++)
+			{
+				btVector3 norm;
+				spuGetPreferredPenetrationDirection(shapeTypeA,convexA,i,norm);
+				norm  = transA.getBasis() * norm;
+				sPenetrationDirections[numSampleDirections] = norm;
+				numSampleDirections++;
+			}
+		}
+	}
+
+	{
+		int numPDB = spuGetNumPreferredPenetrationDirections(shapeTypeB,convexB);
+		if (numPDB)
+		{
+			for (int i=0;i<numPDB;i++)
+			{
+				btVector3 norm;
+				spuGetPreferredPenetrationDirection(shapeTypeB,convexB,i,norm);
+				norm  = transB.getBasis() * norm;
+				sPenetrationDirections[numSampleDirections] = norm;
+				numSampleDirections++;
+			}
+		}
+	}
+#endif //DO_PREFERRED_DIRECTIONS
+
+	for (int i=0;i<numSampleDirections;i++)
+	{
+		const btVector3& norm = sPenetrationDirections[i];
+		seperatingAxisInA = (-norm)* transA.getBasis();
+		seperatingAxisInB = norm* transB.getBasis();
+
+		pInA = convexA->localGetSupportVertexWithoutMarginNonVirtual( seperatingAxisInA);//, NULL);
+		qInB = convexB->localGetSupportVertexWithoutMarginNonVirtual(seperatingAxisInB);//, NULL);
+
+	//	pInA = convexA->localGetSupportingVertexWithoutMargin(seperatingAxisInA);
+	//	qInB = convexB->localGetSupportingVertexWithoutMargin(seperatingAxisInB);
+
+		pWorld = transA(pInA);	
+		qWorld = transB(qInB);
+		w	= qWorld - pWorld;
+		btScalar delta = norm.dot(w);
+		//find smallest delta
+		if (delta < minProj)
+		{
+			minProj = delta;
+			minNorm = norm;
+			minA = pWorld;
+			minB = qWorld;
+		}
+	}
+#endif //USE_BATCHED_SUPPORT
+
+	//add the margins
+
+	minA += minNorm*marginA;
+	minB -= minNorm*marginB;
+	//no penetration
+	if (minProj < btScalar(0.))
+		return false;
+
+	minProj += (marginA + marginB) + btScalar(1.00);
+
+
+
+
+
+//#define DEBUG_DRAW 1
+#ifdef DEBUG_DRAW
+	if (debugDraw)
+	{
+		btVector3 color(0,1,0);
+		debugDraw->drawLine(minA,minB,color);
+		color = btVector3 (1,1,1);
+		btVector3 vec = minB-minA;
+		btScalar prj2 = minNorm.dot(vec);
+		debugDraw->drawLine(minA,minA+(minNorm*minProj),color);
+
+	}
+#endif //DEBUG_DRAW
+
+	
+	btGjkPairDetector gjkdet(convexA,convexB,&simplexSolver,0);
+
+	btScalar offsetDist = minProj;
+	btVector3 offset = minNorm * offsetDist;
+	
+
+	SpuClosestPointInput input;
+	input.m_convexVertexData[0] = convexVertexDataA;
+	input.m_convexVertexData[1] = convexVertexDataB;
+	btVector3 newOrg = transA.getOrigin() + offset;
+
+	btTransform displacedTrans = transA;
+	displacedTrans.setOrigin(newOrg);
+
+	input.m_transformA = displacedTrans;
+	input.m_transformB = transB;
+	input.m_maximumDistanceSquared = btScalar(BT_LARGE_FLOAT);//minProj;
+	
+	btIntermediateResult res;
+	gjkdet.getClosestPoints(input,res,0);
+
+	btScalar correctedMinNorm = minProj - res.m_depth;
+
+
+	//the penetration depth is over-estimated, relax it
+	btScalar penetration_relaxation= btScalar(1.);
+	minNorm*=penetration_relaxation;
+
+	if (res.m_hasResult)
+	{
+
+		pa = res.m_pointInWorld - minNorm * correctedMinNorm;
+		pb = res.m_pointInWorld;
+		
+#ifdef DEBUG_DRAW
+		if (debugDraw)
+		{
+			btVector3 color(1,0,0);
+			debugDraw->drawLine(pa,pb,color);
+		}
+#endif//DEBUG_DRAW
+
+
+	} else {
+		// could not seperate shapes
+		//btAssert (false);
+	}
+	return res.m_hasResult;
+#endif
+	return false;
+}
+
+
+
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
new file mode 100644
index 00000000..18ad223e
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
@@ -0,0 +1,48 @@
+
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+#define MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+
+
+#include "BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h"
+
+class btStackAlloc;
+class btIDebugDraw;
+class btVoronoiSimplexSolver;
+class btConvexShape;
+
+///MinkowskiPenetrationDepthSolver implements bruteforce penetration depth estimation.
+///Implementation is based on sampling the depth using support mapping, and using GJK step to get the witness points.
+class SpuMinkowskiPenetrationDepthSolver : public btConvexPenetrationDepthSolver
+{
+public:
+	SpuMinkowskiPenetrationDepthSolver() {}
+	virtual ~SpuMinkowskiPenetrationDepthSolver() {};
+
+		virtual bool calcPenDepth( btSimplexSolverInterface& simplexSolver,
+		const btConvexShape* convexA,const btConvexShape* convexB,
+					const btTransform& transA,const btTransform& transB,
+				btVector3& v, btVector3& pa, btVector3& pb,
+				class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc
+				);
+
+
+};
+
+
+#endif //MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
new file mode 100644
index 00000000..774a0cb2
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
@@ -0,0 +1,70 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef _SPU_PREFERRED_PENETRATION_DIRECTIONS_H
+#define _SPU_PREFERRED_PENETRATION_DIRECTIONS_H
+
+
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+
+int		spuGetNumPreferredPenetrationDirections(int shapeType, void* shape)
+{
+	switch (shapeType)
+    {
+		case TRIANGLE_SHAPE_PROXYTYPE:
+		{
+			return 2;
+			//spu_printf("2\n");
+			break;
+		}
+		default:
+			{
+#if __ASSERT
+        spu_printf("spuGetNumPreferredPenetrationDirections() - Unsupported bound type: %d.\n", shapeType);
+#endif // __ASSERT
+			}
+	}
+
+	return 0;	
+}	
+
+void	spuGetPreferredPenetrationDirection(int shapeType, void* shape, int index, btVector3& penetrationVector)
+{
+
+
+	switch (shapeType)
+    {
+		case TRIANGLE_SHAPE_PROXYTYPE:
+		{
+			btVector3* vertices = (btVector3*)shape;
+			///calcNormal
+			penetrationVector = (vertices[1]-vertices[0]).cross(vertices[2]-vertices[0]);
+			penetrationVector.normalize();
+			if (index)
+				penetrationVector *= btScalar(-1.);
+			break;
+		}
+		default:
+			{
+					
+#if __ASSERT
+        spu_printf("spuGetNumPreferredPenetrationDirections() - Unsupported bound type: %d.\n", shapeType);
+#endif // __ASSERT
+			}
+	}
+		
+}
+
+#endif //_SPU_PREFERRED_PENETRATION_DIRECTIONS_H
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
new file mode 100644
index 00000000..5e1202c0
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
@@ -0,0 +1,1160 @@
+/*
+   Copyright (C) 2006, 2008 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+//#include "PfxContactBoxBox.h"
+
+#include <math.h>
+#include "../PlatformDefinitions.h"
+#include "boxBoxDistance.h"
+
+static inline float sqr( float a )
+{
+	return (a * a);
+}
+
+enum BoxSepAxisType
+{
+	A_AXIS, B_AXIS, CROSS_AXIS
+};
+
+//-------------------------------------------------------------------------------------------------
+// voronoiTol: bevels Voronoi planes slightly which helps when features are parallel.
+//-------------------------------------------------------------------------------------------------
+
+static const float voronoiTol = -1.0e-5f;
+
+//-------------------------------------------------------------------------------------------------
+// separating axis tests: gaps along each axis are computed, and the axis with the maximum
+// gap is stored.  cross product axes are normalized.
+//-------------------------------------------------------------------------------------------------
+
+#define AaxisTest( dim, letter, first )                                                         \
+{                                                                                               \
+   if ( first )                                                                                 \
+   {                                                                                            \
+      maxGap = gap = gapsA.get##letter();                                                      \
+      if ( gap > distanceThreshold ) return gap;                                                \
+      axisType = A_AXIS;                                                                        \
+      faceDimA = dim;                                                                           \
+      axisA = identity.getCol##dim();                                                          \
+   }                                                                                            \
+   else                                                                                         \
+   {                                                                                            \
+      gap = gapsA.get##letter();                                                               \
+      if ( gap > distanceThreshold ) return gap;                                                \
+      else if ( gap > maxGap )                                                                  \
+      {                                                                                         \
+         maxGap = gap;                                                                          \
+         axisType = A_AXIS;                                                                     \
+         faceDimA = dim;                                                                        \
+         axisA = identity.getCol##dim();                                                       \
+      }                                                                                         \
+   }                                                                                            \
+}
+
+
+#define BaxisTest( dim, letter )                                                                \
+{                                                                                               \
+   gap = gapsB.get##letter();                                                                  \
+   if ( gap > distanceThreshold ) return gap;                                                   \
+   else if ( gap > maxGap )                                                                     \
+   {                                                                                            \
+      maxGap = gap;                                                                             \
+      axisType = B_AXIS;                                                                        \
+      faceDimB = dim;                                                                           \
+      axisB = identity.getCol##dim();                                                          \
+   }                                                                                            \
+}
+
+#define CrossAxisTest( dima, dimb, letterb )                                                    \
+{                                                                                               \
+   const float lsqr_tolerance = 1.0e-30f;                                                       \
+   float lsqr;                                                                                  \
+                                                                                                \
+   lsqr = lsqrs.getCol##dima().get##letterb();                                                \
+                                                                                                \
+   if ( lsqr > lsqr_tolerance )                                                                 \
+   {                                                                                            \
+      float l_recip = 1.0f / sqrtf( lsqr );                                                     \
+      gap = float(gapsAxB.getCol##dima().get##letterb()) * l_recip;                           \
+                                                                                                \
+      if ( gap > distanceThreshold )                                                            \
+      {                                                                                         \
+         return gap;                                                                            \
+      }                                                                                         \
+                                                                                                \
+      if ( gap > maxGap )                                                                       \
+      {                                                                                         \
+         maxGap = gap;                                                                          \
+         axisType = CROSS_AXIS;                                                                 \
+         edgeDimA = dima;                                                                       \
+         edgeDimB = dimb;                                                                       \
+         axisA = cross(identity.getCol##dima(),matrixAB.getCol##dimb()) * l_recip;            \
+      }                                                                                         \
+   }                                                                                            \
+}
+
+//-------------------------------------------------------------------------------------------------
+// tests whether a vertex of box B and a face of box A are the closest features
+//-------------------------------------------------------------------------------------------------
+
+inline
+float
+VertexBFaceATest(
+	bool & inVoronoi,
+	float & t0,
+	float & t1,
+	const vmVector3 & hA,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesB )
+{
+	// compute a corner of box B in A's coordinate system
+
+	vmVector3 corner =
+		vmVector3( faceOffsetAB + matrixAB.getCol0() * scalesB.getX() + matrixAB.getCol1() * scalesB.getY() );
+
+	// compute the parameters of the point on A, closest to this corner
+
+	t0 = corner[0];
+	t1 = corner[1];
+
+	if ( t0 > hA[0] )
+		t0 = hA[0];
+	else if ( t0 < -hA[0] )
+		t0 = -hA[0];
+	if ( t1 > hA[1] )
+		t1 = hA[1];
+	else if ( t1 < -hA[1] )
+		t1 = -hA[1];
+
+	// do the Voronoi test: already know the point on B is in the Voronoi region of the
+	// point on A, check the reverse.
+
+	vmVector3 facePointB =
+		vmVector3( mulPerElem( faceOffsetBA + matrixBA.getCol0() * t0 + matrixBA.getCol1() * t1 - scalesB, signsB ) );
+
+	inVoronoi = ( ( facePointB[0] >= voronoiTol * facePointB[2] ) &&
+				  ( facePointB[1] >= voronoiTol * facePointB[0] ) &&
+				  ( facePointB[2] >= voronoiTol * facePointB[1] ) );
+
+	return (sqr( corner[0] - t0 ) + sqr( corner[1] - t1 ) + sqr( corner[2] ));
+}
+
+#define VertexBFaceA_SetNewMin()                \
+{                                               \
+   minDistSqr = distSqr;                        \
+   localPointA.setX(t0);                        \
+   localPointA.setY(t1);                        \
+   localPointB.setX( scalesB.getX() );          \
+   localPointB.setY( scalesB.getY() );          \
+   featureA = F;                                \
+   featureB = V;                                \
+}
+
+void
+VertexBFaceATests(
+	bool & done,
+	float & minDistSqr,
+	vmPoint3 & localPointA,
+	vmPoint3 & localPointB,
+	FeatureType & featureA,
+	FeatureType & featureB,
+	const vmVector3 & hA,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesB,
+	bool first )
+{
+		
+	float t0, t1;
+	float distSqr;
+
+	distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
+								matrixAB, matrixBA, signsB, scalesB );
+
+	if ( first ) {
+		VertexBFaceA_SetNewMin();
+	} else {
+		if ( distSqr < minDistSqr ) {
+			VertexBFaceA_SetNewMin();
+		}
+	}
+
+	if ( done )
+		return;
+
+	signsB.setX( -signsB.getX() );
+	scalesB.setX( -scalesB.getX() );
+
+	distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
+								matrixAB, matrixBA, signsB, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		VertexBFaceA_SetNewMin();
+	}
+
+	if ( done )
+		return;
+
+	signsB.setY( -signsB.getY() );
+	scalesB.setY( -scalesB.getY() );
+
+	distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
+								matrixAB, matrixBA, signsB, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		VertexBFaceA_SetNewMin();
+	}
+
+	if ( done )
+		return;
+
+	signsB.setX( -signsB.getX() );
+	scalesB.setX( -scalesB.getX() );
+
+	distSqr = VertexBFaceATest( done, t0, t1, hA, faceOffsetAB, faceOffsetBA,
+								matrixAB, matrixBA, signsB, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		VertexBFaceA_SetNewMin();
+	}
+}
+
+//-------------------------------------------------------------------------------------------------
+// VertexAFaceBTest: tests whether a vertex of box A and a face of box B are the closest features
+//-------------------------------------------------------------------------------------------------
+
+inline
+float
+VertexAFaceBTest(
+	bool & inVoronoi,
+	float & t0,
+	float & t1,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) scalesA )
+{
+	vmVector3 corner =
+		vmVector3( faceOffsetBA + matrixBA.getCol0() * scalesA.getX() + matrixBA.getCol1() * scalesA.getY() );
+
+	t0 = corner[0];
+	t1 = corner[1];
+
+	if ( t0 > hB[0] )
+		t0 = hB[0];
+	else if ( t0 < -hB[0] )
+		t0 = -hB[0];
+	if ( t1 > hB[1] )
+		t1 = hB[1];
+	else if ( t1 < -hB[1] )
+		t1 = -hB[1];
+
+	vmVector3 facePointA =
+		vmVector3( mulPerElem( faceOffsetAB + matrixAB.getCol0() * t0 + matrixAB.getCol1() * t1 - scalesA, signsA ) );
+
+	inVoronoi = ( ( facePointA[0] >= voronoiTol * facePointA[2] ) &&
+				  ( facePointA[1] >= voronoiTol * facePointA[0] ) &&
+				  ( facePointA[2] >= voronoiTol * facePointA[1] ) );
+
+	return (sqr( corner[0] - t0 ) + sqr( corner[1] - t1 ) + sqr( corner[2] ));
+}
+
+#define VertexAFaceB_SetNewMin()                \
+{                                               \
+   minDistSqr = distSqr;                        \
+   localPointB.setX(t0);                        \
+   localPointB.setY(t1);                        \
+   localPointA.setX( scalesA.getX() );          \
+   localPointA.setY( scalesA.getY() );          \
+   featureA = V;                                \
+   featureB = F;                                \
+}
+
+void
+VertexAFaceBTests(
+	bool & done,
+	float & minDistSqr,
+	vmPoint3 & localPointA,
+	vmPoint3 & localPointB,
+	FeatureType & featureA,
+	FeatureType & featureB,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) scalesA,
+	bool first )
+{
+	float t0, t1;
+	float distSqr;
+
+	distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
+								matrixAB, matrixBA, signsA, scalesA );
+
+	if ( first ) {
+		VertexAFaceB_SetNewMin();
+	} else {
+		if ( distSqr < minDistSqr ) {
+			VertexAFaceB_SetNewMin();
+		}
+	}
+
+	if ( done )
+		return;
+
+	signsA.setX( -signsA.getX() );
+	scalesA.setX( -scalesA.getX() );
+
+	distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
+								matrixAB, matrixBA, signsA, scalesA );
+
+	if ( distSqr < minDistSqr ) {
+		VertexAFaceB_SetNewMin();
+	}
+
+	if ( done )
+		return;
+
+	signsA.setY( -signsA.getY() );
+	scalesA.setY( -scalesA.getY() );
+
+	distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
+								matrixAB, matrixBA, signsA, scalesA );
+
+	if ( distSqr < minDistSqr ) {
+		VertexAFaceB_SetNewMin();
+	}
+
+	if ( done )
+		return;
+
+	signsA.setX( -signsA.getX() );
+	scalesA.setX( -scalesA.getX() );
+
+	distSqr = VertexAFaceBTest( done, t0, t1, hB, faceOffsetAB, faceOffsetBA,
+								matrixAB, matrixBA, signsA, scalesA );
+
+	if ( distSqr < minDistSqr ) {
+		VertexAFaceB_SetNewMin();
+	}
+}
+
+//-------------------------------------------------------------------------------------------------
+// CustomEdgeEdgeTest:
+//
+// tests whether a pair of edges are the closest features
+//
+// note on the shorthand:
+// 'a' & 'b' refer to the edges.
+// 'c' is the dimension of the axis that points from the face center to the edge Center
+// 'd' is the dimension of the edge Direction
+// the dimension of the face normal is 2
+//-------------------------------------------------------------------------------------------------
+
+#define CustomEdgeEdgeTest( ac, ac_letter, ad, ad_letter, bc, bc_letter, bd, bd_letter )              \
+{                                                                                               \
+   vmVector3 edgeOffsetAB;                                                                          \
+   vmVector3 edgeOffsetBA;                                                                          \
+                                                                                                \
+   edgeOffsetAB = faceOffsetAB + matrixAB.getCol##bc() * scalesB.get##bc_letter();            \
+   edgeOffsetAB.set##ac_letter( edgeOffsetAB.get##ac_letter() - scalesA.get##ac_letter() );  \
+                                                                                                \
+   edgeOffsetBA = faceOffsetBA + matrixBA.getCol##ac() * scalesA.get##ac_letter();            \
+   edgeOffsetBA.set##bc_letter( edgeOffsetBA.get##bc_letter() - scalesB.get##bc_letter() );  \
+                                                                                                \
+   float dirDot = matrixAB.getCol##bd().get##ad_letter();                                     \
+   float denom = 1.0f - dirDot*dirDot;                                                          \
+   float edgeOffsetAB_ad = edgeOffsetAB.get##ad_letter();                                      \
+   float edgeOffsetBA_bd = edgeOffsetBA.get##bd_letter();                                      \
+                                                                                                \
+   if ( denom == 0.0f )                                                                         \
+   {                                                                                            \
+      tA = 0.0f;                                                                                \
+   }                                                                                            \
+   else                                                                                         \
+   {                                                                                            \
+      tA = ( edgeOffsetAB_ad + edgeOffsetBA_bd * dirDot ) / denom;                              \
+   }                                                                                            \
+                                                                                                \
+   if ( tA < -hA[ad] ) tA = -hA[ad];                                                            \
+   else if ( tA > hA[ad] ) tA = hA[ad];                                                         \
+                                                                                                \
+   tB = tA * dirDot + edgeOffsetBA_bd;                                                          \
+                                                                                                \
+   if ( tB < -hB[bd] )                                                                          \
+   {                                                                                            \
+      tB = -hB[bd];                                                                             \
+      tA = tB * dirDot + edgeOffsetAB_ad;                                                       \
+                                                                                                \
+      if ( tA < -hA[ad] ) tA = -hA[ad];                                                         \
+      else if ( tA > hA[ad] ) tA = hA[ad];                                                      \
+   }                                                                                            \
+   else if ( tB > hB[bd] )                                                                      \
+   {                                                                                            \
+      tB = hB[bd];                                                                              \
+      tA = tB * dirDot + edgeOffsetAB_ad;                                                       \
+                                                                                                \
+      if ( tA < -hA[ad] ) tA = -hA[ad];                                                         \
+      else if ( tA > hA[ad] ) tA = hA[ad];                                                      \
+   }                                                                                            \
+                                                                                                \
+   vmVector3 edgeOffAB = vmVector3( mulPerElem( edgeOffsetAB + matrixAB.getCol##bd() * tB, signsA ) );\
+   vmVector3 edgeOffBA = vmVector3( mulPerElem( edgeOffsetBA + matrixBA.getCol##ad() * tA, signsB ) );\
+                                                                                                \
+   inVoronoi = ( edgeOffAB[ac] >= voronoiTol * edgeOffAB[2] ) &&                                \
+               ( edgeOffAB[2] >= voronoiTol * edgeOffAB[ac] ) &&                                \
+               ( edgeOffBA[bc] >= voronoiTol * edgeOffBA[2] ) &&                                \
+               ( edgeOffBA[2] >= voronoiTol * edgeOffBA[bc] );                                  \
+                                                                                                \
+   edgeOffAB[ad] -= tA;                                                                         \
+   edgeOffBA[bd] -= tB;                                                                         \
+                                                                                                \
+   return dot(edgeOffAB,edgeOffAB);                                                             \
+}
+
+float
+CustomEdgeEdgeTest_0101(
+	bool & inVoronoi,
+	float & tA,
+	float & tB,
+	const vmVector3 & hA,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesA,
+	PE_REF(vmVector3) scalesB )
+{
+	CustomEdgeEdgeTest( 0, X, 1, Y, 0, X, 1, Y );
+}
+
+float
+CustomEdgeEdgeTest_0110(
+	bool & inVoronoi,
+	float & tA,
+	float & tB,
+	const vmVector3 & hA,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesA,
+	PE_REF(vmVector3) scalesB )
+{
+	CustomEdgeEdgeTest( 0, X, 1, Y, 1, Y, 0, X );
+}
+
+float
+CustomEdgeEdgeTest_1001(
+	bool & inVoronoi,
+	float & tA,
+	float & tB,
+	const vmVector3 & hA,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesA,
+	PE_REF(vmVector3) scalesB )
+{
+	CustomEdgeEdgeTest( 1, Y, 0, X, 0, X, 1, Y );
+}
+
+float
+CustomEdgeEdgeTest_1010(
+	bool & inVoronoi,
+	float & tA,
+	float & tB,
+	const vmVector3 & hA,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesA,
+	PE_REF(vmVector3) scalesB )
+{
+	CustomEdgeEdgeTest( 1, Y, 0, X, 1, Y, 0, X );
+}
+
+#define EdgeEdge_SetNewMin( ac_letter, ad_letter, bc_letter, bd_letter )   \
+{                                                                          \
+   minDistSqr = distSqr;                                                   \
+   localPointA.set##ac_letter(scalesA.get##ac_letter());                 \
+   localPointA.set##ad_letter(tA);                                        \
+   localPointB.set##bc_letter(scalesB.get##bc_letter());                 \
+   localPointB.set##bd_letter(tB);                                        \
+   otherFaceDimA = testOtherFaceDimA;                                      \
+   otherFaceDimB = testOtherFaceDimB;                                      \
+   featureA = E;                                                           \
+   featureB = E;                                                           \
+}
+
+void
+EdgeEdgeTests(
+	bool & done,
+	float & minDistSqr,
+	vmPoint3 & localPointA,
+	vmPoint3 & localPointB,
+	int & otherFaceDimA,
+	int & otherFaceDimB,
+	FeatureType & featureA,
+	FeatureType & featureB,
+	const vmVector3 & hA,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesA,
+	PE_REF(vmVector3) scalesB,
+	bool first )
+{
+
+	float distSqr;
+	float tA, tB;
+
+	int testOtherFaceDimA, testOtherFaceDimB;
+
+	testOtherFaceDimA = 0;
+	testOtherFaceDimB = 0;
+
+	distSqr = CustomEdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( first ) {
+		EdgeEdge_SetNewMin( X, Y, X, Y );
+	} else {
+		if ( distSqr < minDistSqr ) {
+			EdgeEdge_SetNewMin( X, Y, X, Y );
+		}
+	}
+
+	if ( done )
+		return;
+
+	signsA.setX( -signsA.getX() );
+	scalesA.setX( -scalesA.getX() );
+
+	distSqr = CustomEdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( X, Y, X, Y );
+	}
+
+	if ( done )
+		return;
+
+	signsB.setX( -signsB.getX() );
+	scalesB.setX( -scalesB.getX() );
+
+	distSqr = CustomEdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( X, Y, X, Y );
+	}
+
+	if ( done )
+		return;
+
+	signsA.setX( -signsA.getX() );
+	scalesA.setX( -scalesA.getX() );
+
+	distSqr = CustomEdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( X, Y, X, Y );
+	}
+
+	if ( done )
+		return;
+
+	testOtherFaceDimA = 1;
+	testOtherFaceDimB = 0;
+	signsB.setX( -signsB.getX() );
+	scalesB.setX( -scalesB.getX() );
+
+	distSqr = CustomEdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( Y, X, X, Y );
+	}
+
+	if ( done )
+		return;
+
+	signsA.setY( -signsA.getY() );
+	scalesA.setY( -scalesA.getY() );
+
+	distSqr = CustomEdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( Y, X, X, Y );
+	}
+
+	if ( done )
+		return;
+
+	signsB.setX( -signsB.getX() );
+	scalesB.setX( -scalesB.getX() );
+
+	distSqr = CustomEdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( Y, X, X, Y );
+	}
+
+	if ( done )
+		return;
+
+	signsA.setY( -signsA.getY() );
+	scalesA.setY( -scalesA.getY() );
+
+	distSqr = CustomEdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( Y, X, X, Y );
+	}
+
+	if ( done )
+		return;
+
+	testOtherFaceDimA = 0;
+	testOtherFaceDimB = 1;
+	signsB.setX( -signsB.getX() );
+	scalesB.setX( -scalesB.getX() );
+
+	distSqr = CustomEdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( X, Y, Y, X );
+	}
+
+	if ( done )
+		return;
+
+	signsA.setX( -signsA.getX() );
+	scalesA.setX( -scalesA.getX() );
+
+	distSqr = CustomEdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( X, Y, Y, X );
+	}
+
+	if ( done )
+		return;
+
+	signsB.setY( -signsB.getY() );
+	scalesB.setY( -scalesB.getY() );
+
+	distSqr = CustomEdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( X, Y, Y, X );
+	}
+
+	if ( done )
+		return;
+
+	signsA.setX( -signsA.getX() );
+	scalesA.setX( -scalesA.getX() );
+
+	distSqr = CustomEdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( X, Y, Y, X );
+	}
+
+	if ( done )
+		return;
+
+	testOtherFaceDimA = 1;
+	testOtherFaceDimB = 1;
+	signsB.setY( -signsB.getY() );
+	scalesB.setY( -scalesB.getY() );
+
+	distSqr = CustomEdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( Y, X, Y, X );
+	}
+
+	if ( done )
+		return;
+
+	signsA.setY( -signsA.getY() );
+	scalesA.setY( -scalesA.getY() );
+
+	distSqr = CustomEdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( Y, X, Y, X );
+	}
+
+	if ( done )
+		return;
+
+	signsB.setY( -signsB.getY() );
+	scalesB.setY( -scalesB.getY() );
+
+	distSqr = CustomEdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( Y, X, Y, X );
+	}
+
+	if ( done )
+		return;
+
+	signsA.setY( -signsA.getY() );
+	scalesA.setY( -scalesA.getY() );
+
+	distSqr = CustomEdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
+
+	if ( distSqr < minDistSqr ) {
+		EdgeEdge_SetNewMin( Y, X, Y, X );
+	}
+}
+
+
+float
+boxBoxDistance(vmVector3& normal, BoxPoint& boxPointA, BoxPoint& boxPointB,
+			   PE_REF(Box) boxA, const vmTransform3 & transformA, PE_REF(Box) boxB,
+			   const vmTransform3 & transformB,
+			   float distanceThreshold)
+{
+	vmMatrix3 identity;
+	identity = vmMatrix3::identity();
+	vmVector3 ident[3];
+	ident[0] = identity.getCol0();
+	ident[1] = identity.getCol1();
+	ident[2] = identity.getCol2();
+
+	// get relative transformations
+
+	vmTransform3 transformAB, transformBA;
+	vmMatrix3 matrixAB, matrixBA;
+	vmVector3 offsetAB, offsetBA;
+
+	transformAB = orthoInverse(transformA) * transformB;
+	transformBA = orthoInverse(transformAB);
+
+	matrixAB = transformAB.getUpper3x3();
+	offsetAB = transformAB.getTranslation();
+	matrixBA = transformBA.getUpper3x3();
+	offsetBA = transformBA.getTranslation();
+
+	vmMatrix3 absMatrixAB = absPerElem(matrixAB);
+	vmMatrix3 absMatrixBA = absPerElem(matrixBA);
+
+	// find separating axis with largest gap between projections
+
+	BoxSepAxisType axisType;
+	vmVector3 axisA(0.0f), axisB(0.0f);
+	float gap, maxGap;
+	int faceDimA = 0, faceDimB = 0, edgeDimA = 0, edgeDimB = 0;
+
+	// face axes
+
+	vmVector3  gapsA   = absPerElem(offsetAB) - boxA.mHalf - absMatrixAB * boxB.mHalf;
+
+	AaxisTest(0,X,true);
+	AaxisTest(1,Y,false);
+	AaxisTest(2,Z,false);
+
+	vmVector3  gapsB   = absPerElem(offsetBA) - boxB.mHalf - absMatrixBA * boxA.mHalf;
+
+	BaxisTest(0,X);
+	BaxisTest(1,Y);
+	BaxisTest(2,Z);
+
+	// cross product axes
+
+	// �O�ς��O�̂Ƃ��̑΍�
+	absMatrixAB += vmMatrix3(1.0e-5f);
+	absMatrixBA += vmMatrix3(1.0e-5f);
+
+	vmMatrix3 lsqrs, projOffset, projAhalf, projBhalf;
+
+	lsqrs.setCol0( mulPerElem( matrixBA.getCol2(), matrixBA.getCol2() ) +
+				   mulPerElem( matrixBA.getCol1(), matrixBA.getCol1() ) );
+	lsqrs.setCol1( mulPerElem( matrixBA.getCol2(), matrixBA.getCol2() ) +
+				   mulPerElem( matrixBA.getCol0(), matrixBA.getCol0() ) );
+	lsqrs.setCol2( mulPerElem( matrixBA.getCol1(), matrixBA.getCol1() ) +
+				   mulPerElem( matrixBA.getCol0(), matrixBA.getCol0() ) );
+
+	projOffset.setCol0(matrixBA.getCol1() * offsetAB.getZ() - matrixBA.getCol2() * offsetAB.getY());
+	projOffset.setCol1(matrixBA.getCol2() * offsetAB.getX() - matrixBA.getCol0() * offsetAB.getZ());
+	projOffset.setCol2(matrixBA.getCol0() * offsetAB.getY() - matrixBA.getCol1() * offsetAB.getX());
+
+	projAhalf.setCol0(absMatrixBA.getCol1() * boxA.mHalf.getZ() + absMatrixBA.getCol2() * boxA.mHalf.getY());
+	projAhalf.setCol1(absMatrixBA.getCol2() * boxA.mHalf.getX() + absMatrixBA.getCol0() * boxA.mHalf.getZ());
+	projAhalf.setCol2(absMatrixBA.getCol0() * boxA.mHalf.getY() + absMatrixBA.getCol1() * boxA.mHalf.getX());
+
+	projBhalf.setCol0(absMatrixAB.getCol1() * boxB.mHalf.getZ() + absMatrixAB.getCol2() * boxB.mHalf.getY());
+	projBhalf.setCol1(absMatrixAB.getCol2() * boxB.mHalf.getX() + absMatrixAB.getCol0() * boxB.mHalf.getZ());
+	projBhalf.setCol2(absMatrixAB.getCol0() * boxB.mHalf.getY() + absMatrixAB.getCol1() * boxB.mHalf.getX());
+
+	vmMatrix3 gapsAxB = absPerElem(projOffset) - projAhalf - transpose(projBhalf);
+
+	CrossAxisTest(0,0,X);
+	CrossAxisTest(0,1,Y);
+	CrossAxisTest(0,2,Z);
+	CrossAxisTest(1,0,X);
+	CrossAxisTest(1,1,Y);
+	CrossAxisTest(1,2,Z);
+	CrossAxisTest(2,0,X);
+	CrossAxisTest(2,1,Y);
+	CrossAxisTest(2,2,Z);
+
+	// need to pick the face on each box whose normal best matches the separating axis.
+	// will transform vectors to be in the coordinate system of this face to simplify things later.
+	// for this, a permutation matrix can be used, which the next section computes.
+
+	int dimA[3], dimB[3];
+
+	if ( axisType == A_AXIS ) {
+		if ( dot(axisA,offsetAB) < 0.0f )
+			axisA = -axisA;
+		axisB = matrixBA * -axisA;
+
+		vmVector3 absAxisB = vmVector3(absPerElem(axisB));
+
+		if ( ( absAxisB[0] > absAxisB[1] ) && ( absAxisB[0] > absAxisB[2] ) )
+			faceDimB = 0;
+		else if ( absAxisB[1] > absAxisB[2] )
+			faceDimB = 1;
+		else
+			faceDimB = 2;
+	} else if ( axisType == B_AXIS ) {
+		if ( dot(axisB,offsetBA) < 0.0f )
+			axisB = -axisB;
+		axisA = matrixAB * -axisB;
+
+		vmVector3 absAxisA = vmVector3(absPerElem(axisA));
+
+		if ( ( absAxisA[0] > absAxisA[1] ) && ( absAxisA[0] > absAxisA[2] ) )
+			faceDimA = 0;
+		else if ( absAxisA[1] > absAxisA[2] )
+			faceDimA = 1;
+		else
+			faceDimA = 2;
+	}
+
+	if ( axisType == CROSS_AXIS ) {
+		if ( dot(axisA,offsetAB) < 0.0f )
+			axisA = -axisA;
+		axisB = matrixBA * -axisA;
+
+		vmVector3 absAxisA = vmVector3(absPerElem(axisA));
+		vmVector3 absAxisB = vmVector3(absPerElem(axisB));
+
+		dimA[1] = edgeDimA;
+		dimB[1] = edgeDimB;
+
+		if ( edgeDimA == 0 ) {
+			if ( absAxisA[1] > absAxisA[2] ) {
+				dimA[0] = 2;
+				dimA[2] = 1;
+			} else                             {
+				dimA[0] = 1;
+				dimA[2] = 2;
+			}
+		} else if ( edgeDimA == 1 ) {
+			if ( absAxisA[2] > absAxisA[0] ) {
+				dimA[0] = 0;
+				dimA[2] = 2;
+			} else                             {
+				dimA[0] = 2;
+				dimA[2] = 0;
+			}
+		} else {
+			if ( absAxisA[0] > absAxisA[1] ) {
+				dimA[0] = 1;
+				dimA[2] = 0;
+			} else                             {
+				dimA[0] = 0;
+				dimA[2] = 1;
+			}
+		}
+
+		if ( edgeDimB == 0 ) {
+			if ( absAxisB[1] > absAxisB[2] ) {
+				dimB[0] = 2;
+				dimB[2] = 1;
+			} else                             {
+				dimB[0] = 1;
+				dimB[2] = 2;
+			}
+		} else if ( edgeDimB == 1 ) {
+			if ( absAxisB[2] > absAxisB[0] ) {
+				dimB[0] = 0;
+				dimB[2] = 2;
+			} else                             {
+				dimB[0] = 2;
+				dimB[2] = 0;
+			}
+		} else {
+			if ( absAxisB[0] > absAxisB[1] ) {
+				dimB[0] = 1;
+				dimB[2] = 0;
+			} else                             {
+				dimB[0] = 0;
+				dimB[2] = 1;
+			}
+		}
+	} else {
+		dimA[2] = faceDimA;
+		dimA[0] = (faceDimA+1)%3;
+		dimA[1] = (faceDimA+2)%3;
+		dimB[2] = faceDimB;
+		dimB[0] = (faceDimB+1)%3;
+		dimB[1] = (faceDimB+2)%3;
+	}
+
+	vmMatrix3 aperm_col, bperm_col;
+
+	aperm_col.setCol0(ident[dimA[0]]);
+	aperm_col.setCol1(ident[dimA[1]]);
+	aperm_col.setCol2(ident[dimA[2]]);
+
+	bperm_col.setCol0(ident[dimB[0]]);
+	bperm_col.setCol1(ident[dimB[1]]);
+	bperm_col.setCol2(ident[dimB[2]]);
+
+	vmMatrix3 aperm_row, bperm_row;
+
+	aperm_row = transpose(aperm_col);
+	bperm_row = transpose(bperm_col);
+
+	// permute all box parameters to be in the face coordinate systems
+
+	vmMatrix3 matrixAB_perm = aperm_row * matrixAB * bperm_col;
+	vmMatrix3 matrixBA_perm = transpose(matrixAB_perm);
+
+	vmVector3 offsetAB_perm, offsetBA_perm;
+
+	offsetAB_perm = aperm_row * offsetAB;
+	offsetBA_perm = bperm_row * offsetBA;
+
+	vmVector3 halfA_perm, halfB_perm;
+
+	halfA_perm = aperm_row * boxA.mHalf;
+	halfB_perm = bperm_row * boxB.mHalf;
+
+	// compute the vector between the centers of each face, in each face's coordinate frame
+
+	vmVector3 signsA_perm, signsB_perm, scalesA_perm, scalesB_perm, faceOffsetAB_perm, faceOffsetBA_perm;
+
+	signsA_perm = copySignPerElem(vmVector3(1.0f),aperm_row * axisA);
+	signsB_perm = copySignPerElem(vmVector3(1.0f),bperm_row * axisB);
+	scalesA_perm = mulPerElem( signsA_perm, halfA_perm );
+	scalesB_perm = mulPerElem( signsB_perm, halfB_perm );
+
+	faceOffsetAB_perm = offsetAB_perm + matrixAB_perm.getCol2() * scalesB_perm.getZ();
+	faceOffsetAB_perm.setZ( faceOffsetAB_perm.getZ() - scalesA_perm.getZ() );
+
+	faceOffsetBA_perm = offsetBA_perm + matrixBA_perm.getCol2() * scalesA_perm.getZ();
+	faceOffsetBA_perm.setZ( faceOffsetBA_perm.getZ() - scalesB_perm.getZ() );
+
+	if ( maxGap < 0.0f ) {
+		// if boxes overlap, this will separate the faces for finding points of penetration.
+
+		faceOffsetAB_perm -= aperm_row * axisA * maxGap * 1.01f;
+		faceOffsetBA_perm -= bperm_row * axisB * maxGap * 1.01f;
+	}
+
+	// for each vertex/face or edge/edge pair of the two faces, find the closest points.
+	//
+	// these points each have an associated box feature (vertex, edge, or face).  if each
+	// point is in the external Voronoi region of the other's feature, they are the
+	// closest points of the boxes, and the algorithm can exit.
+	//
+	// the feature pairs are arranged so that in the general case, the first test will
+	// succeed.  degenerate cases (parallel faces) may require up to all tests in the
+	// worst case.
+	//
+	// if for some reason no case passes the Voronoi test, the features with the minimum
+	// distance are returned.
+
+	vmPoint3 localPointA_perm, localPointB_perm;
+	float minDistSqr;
+	bool done;
+
+	vmVector3 hA_perm( halfA_perm ), hB_perm( halfB_perm );
+
+	localPointA_perm.setZ( scalesA_perm.getZ() );
+	localPointB_perm.setZ( scalesB_perm.getZ() );
+	scalesA_perm.setZ(0.0f);
+	scalesB_perm.setZ(0.0f);
+
+	int otherFaceDimA, otherFaceDimB;
+	FeatureType featureA, featureB;
+
+	if ( axisType == CROSS_AXIS ) {
+		EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+					   otherFaceDimA, otherFaceDimB, featureA, featureB,
+					   hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+					   matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm,
+					   scalesA_perm, scalesB_perm, true );
+
+		if ( !done ) {
+			VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm,
+							   featureA, featureB,
+							   hA_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+							   matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, false );
+
+			if ( !done ) {
+				VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+								   featureA, featureB,
+								   hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+								   matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, false );
+			}
+		}
+	} else if ( axisType == B_AXIS ) {
+		VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+						   featureA, featureB,
+						   hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+						   matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, true );
+
+		if ( !done ) {
+			VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm,
+							   featureA, featureB,
+							   hA_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+							   matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, false );
+
+			if ( !done ) {
+				EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+							   otherFaceDimA, otherFaceDimB, featureA, featureB,
+							   hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+							   matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm,
+							   scalesA_perm, scalesB_perm, false );
+			}
+		}
+	} else {
+		VertexBFaceATests( done, minDistSqr, localPointA_perm, localPointB_perm,
+						   featureA, featureB,
+						   hA_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+						   matrixAB_perm, matrixBA_perm, signsB_perm, scalesB_perm, true );
+
+		if ( !done ) {
+			VertexAFaceBTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+							   featureA, featureB,
+							   hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+							   matrixAB_perm, matrixBA_perm, signsA_perm, scalesA_perm, false );
+
+			if ( !done ) {
+				EdgeEdgeTests( done, minDistSqr, localPointA_perm, localPointB_perm,
+							   otherFaceDimA, otherFaceDimB, featureA, featureB,
+							   hA_perm, hB_perm, faceOffsetAB_perm, faceOffsetBA_perm,
+							   matrixAB_perm, matrixBA_perm, signsA_perm, signsB_perm,
+							   scalesA_perm, scalesB_perm, false );
+			}
+		}
+	}
+
+	// convert local points from face-local to box-local coordinate system
+
+	
+	boxPointA.localPoint = vmPoint3( aperm_col * vmVector3( localPointA_perm )) ;
+	boxPointB.localPoint = vmPoint3( bperm_col * vmVector3( localPointB_perm )) ;
+
+#if 0
+	// find which features of the boxes are involved.
+	// the only feature pairs which occur in this function are VF, FV, and EE, even though the
+	// closest points might actually lie on sub-features, as in a VF contact might be used for
+	// what's actually a VV contact.  this means some feature pairs could possibly seem distinct
+	// from others, although their contact positions are the same.  don't know yet whether this
+	// matters.
+
+	int sA[3], sB[3];
+
+	sA[0] = boxPointA.localPoint.getX() > 0.0f;
+	sA[1] = boxPointA.localPoint.getY() > 0.0f;
+	sA[2] = boxPointA.localPoint.getZ() > 0.0f;
+
+	sB[0] = boxPointB.localPoint.getX() > 0.0f;
+	sB[1] = boxPointB.localPoint.getY() > 0.0f;
+	sB[2] = boxPointB.localPoint.getZ() > 0.0f;
+
+	if ( featureA == F ) {
+		boxPointA.setFaceFeature( dimA[2], sA[dimA[2]] );
+	} else if ( featureA == E ) {
+		boxPointA.setEdgeFeature( dimA[2], sA[dimA[2]], dimA[otherFaceDimA], sA[dimA[otherFaceDimA]] );
+	} else {
+		boxPointA.setVertexFeature( sA[0], sA[1], sA[2] );
+	}
+
+	if ( featureB == F ) {
+		boxPointB.setFaceFeature( dimB[2], sB[dimB[2]] );
+	} else if ( featureB == E ) {
+		boxPointB.setEdgeFeature( dimB[2], sB[dimB[2]], dimB[otherFaceDimB], sB[dimB[otherFaceDimB]] );
+	} else {
+		boxPointB.setVertexFeature( sB[0], sB[1], sB[2] );
+	}
+#endif
+
+	normal = transformA * axisA;
+
+	if ( maxGap < 0.0f ) {
+		return (maxGap);
+	} else {
+		return (sqrtf( minDistSqr ));
+	}
+}
diff --git a/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h
new file mode 100644
index 00000000..0d4957de
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h
@@ -0,0 +1,65 @@
+/*
+   Copyright (C) 2006, 2008 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#ifndef __BOXBOXDISTANCE_H__
+#define __BOXBOXDISTANCE_H__
+
+
+#include "Box.h"
+
+
+//---------------------------------------------------------------------------
+// boxBoxDistance:
+//
+// description:
+//    this computes info that can be used for the collision response of two boxes.  when the boxes
+//    do not overlap, the points are set to the closest points of the boxes, and a positive
+//    distance between them is returned.  if the boxes do overlap, a negative distance is returned
+//    and the points are set to two points that would touch after the boxes are translated apart.
+//    the contact normal gives the direction to repel or separate the boxes when they touch or
+//    overlap (it's being approximated here as one of the 15 "separating axis" directions).
+//
+// returns:
+//    positive or negative distance between two boxes.
+//
+// args:
+//    vmVector3& normal: set to a unit contact normal pointing from box A to box B.
+//
+//    BoxPoint& boxPointA, BoxPoint& boxPointB:
+//       set to a closest point or point of penetration on each box.
+//
+//    Box boxA, Box boxB:
+//       boxes, represented as 3 half-widths
+//
+//    const vmTransform3& transformA, const vmTransform3& transformB:
+//       box transformations, in world coordinates
+//
+//    float distanceThreshold:
+//       the algorithm will exit early if it finds that the boxes are more distant than this
+//       threshold, and not compute a contact normal or points.  if this distance returned
+//       exceeds the threshold, all the other output data may not have been computed.  by
+//       default, this is set to MAX_FLOAT so it will have no effect.
+//
+//---------------------------------------------------------------------------
+
+float
+boxBoxDistance(vmVector3& normal, BoxPoint& boxPointA, BoxPoint& boxPointB,
+			   PE_REF(Box) boxA, const vmTransform3 & transformA, PE_REF(Box) boxB,
+			   const vmTransform3 & transformB,
+			   float distanceThreshold = FLT_MAX );
+
+#endif /* __BOXBOXDISTANCE_H__ */
diff --git a/src/bullet/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp b/src/bullet/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp
new file mode 100644
index 00000000..fe619555
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp
@@ -0,0 +1,214 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#include "SpuSampleTask.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "../PlatformDefinitions.h"
+#include "../SpuFakeDma.h"
+#include "LinearMath/btMinMax.h"
+
+#ifdef __SPU__
+#include <spu_printf.h>
+#else
+#include <stdio.h>
+#define spu_printf printf
+#endif
+
+#define MAX_NUM_BODIES 8192
+
+struct SampleTask_LocalStoreMemory
+{
+	ATTRIBUTE_ALIGNED16(char gLocalRigidBody [sizeof(btRigidBody)+16]);
+	ATTRIBUTE_ALIGNED16(void* gPointerArray[MAX_NUM_BODIES]);
+
+};
+
+
+
+
+//-- MAIN METHOD
+void processSampleTask(void* userPtr, void* lsMemory)
+{
+	//	BT_PROFILE("processSampleTask");
+
+	SampleTask_LocalStoreMemory* localMemory = (SampleTask_LocalStoreMemory*)lsMemory;
+
+	SpuSampleTaskDesc* taskDescPtr = (SpuSampleTaskDesc*)userPtr;
+	SpuSampleTaskDesc& taskDesc = *taskDescPtr;
+
+	switch (taskDesc.m_sampleCommand)
+	{
+	case CMD_SAMPLE_INTEGRATE_BODIES:
+		{
+			btTransform predictedTrans;
+			btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr;
+
+			int batchSize = taskDesc.m_sampleValue;
+			if (batchSize>MAX_NUM_BODIES)
+			{
+				spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n");
+				break;
+			}
+			int dmaArraySize = batchSize*sizeof(void*);
+
+			uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr);
+
+			//			spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize);
+
+			if (dmaArraySize>=16)
+			{
+				cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize, DMA_TAG(1), 0, 0);	
+				cellDmaWaitTagStatusAll(DMA_MASK(1));
+			} else
+			{
+				stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize);
+			}
+
+
+			for ( int i=0;i<batchSize;i++)
+			{
+				///DMA rigid body
+
+				void* localPtr = &localMemory->gLocalRigidBody[0];
+				void* shortAdd = localMemory->gPointerArray[i];
+				uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd);
+
+				//	spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr);
+
+				int dmaBodySize = sizeof(btRigidBody);
+
+				cellDmaGet((void*)localPtr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);	
+				cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+
+				float timeStep = 1.f/60.f;
+
+				btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj);
+				if (body)
+				{
+					if (body->isActive() && (!body->isStaticOrKinematicObject()))
+					{
+						body->predictIntegratedTransform(timeStep, predictedTrans);
+						body->proceedToTransform( predictedTrans);
+						void* ptr = (void*)localPtr;
+						//	spu_printf("cellDmaLargePut from %llx to LS %llx\n",ptr,ppuRigidBodyAddress);
+
+						cellDmaLargePut(ptr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);
+						cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+					}
+				}
+
+			}
+			break;
+		}
+
+
+	case CMD_SAMPLE_PREDICT_MOTION_BODIES:
+		{
+			btTransform predictedTrans;
+			btCollisionObject** eaPtr = (btCollisionObject**)taskDesc.m_mainMemoryPtr;
+
+			int batchSize = taskDesc.m_sampleValue;
+			int dmaArraySize = batchSize*sizeof(void*);
+
+			if (batchSize>MAX_NUM_BODIES)
+			{
+				spu_printf("SPU Error: exceed number of bodies, see MAX_NUM_BODIES in SpuSampleTask.cpp\n");
+				break;
+			}
+
+			uint64_t ppuArrayAddress = reinterpret_cast<uint64_t>(eaPtr);
+
+			//			spu_printf("array location is at %llx, batchSize = %d, DMA size = %d\n",ppuArrayAddress,batchSize,dmaArraySize);
+
+			if (dmaArraySize>=16)
+			{
+				cellDmaLargeGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize, DMA_TAG(1), 0, 0);	
+				cellDmaWaitTagStatusAll(DMA_MASK(1));
+			} else
+			{
+				stallingUnalignedDmaSmallGet((void*)&localMemory->gPointerArray[0], ppuArrayAddress  , dmaArraySize);
+			}
+
+
+			for ( int i=0;i<batchSize;i++)
+			{
+				///DMA rigid body
+
+				void* localPtr = &localMemory->gLocalRigidBody[0];
+				void* shortAdd = localMemory->gPointerArray[i];
+				uint64_t ppuRigidBodyAddress = reinterpret_cast<uint64_t>(shortAdd);
+
+				//	spu_printf("cellDmaGet at CMD_SAMPLE_INTEGRATE_BODIES from %llx to %llx\n",ppuRigidBodyAddress,localPtr);
+
+				int dmaBodySize = sizeof(btRigidBody);
+
+				cellDmaGet((void*)localPtr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);	
+				cellDmaWaitTagStatusAll(DMA_MASK(1));
+
+
+				float timeStep = 1.f/60.f;
+
+				btRigidBody* body = (btRigidBody*) localPtr;//btRigidBody::upcast(colObj);
+				if (body)
+				{
+					if (!body->isStaticOrKinematicObject())
+					{
+						if (body->isActive())
+						{
+							body->integrateVelocities( timeStep);
+							//damping
+							body->applyDamping(timeStep);
+
+							body->predictIntegratedTransform(timeStep,body->getInterpolationWorldTransform());
+
+							void* ptr = (void*)localPtr;
+							cellDmaLargePut(ptr, ppuRigidBodyAddress  , dmaBodySize, DMA_TAG(1), 0, 0);
+							cellDmaWaitTagStatusAll(DMA_MASK(1));
+						}
+					}
+				}
+
+			}
+			break;
+		}
+	
+
+
+	default:
+		{
+
+		}
+	};
+}
+
+
+#if defined(__CELLOS_LV2__) || defined (LIBSPE2)
+
+ATTRIBUTE_ALIGNED16(SampleTask_LocalStoreMemory	gLocalStoreMemory);
+
+void* createSampleLocalStoreMemory()
+{
+	return &gLocalStoreMemory;
+}
+#else
+void* createSampleLocalStoreMemory()
+{
+	return new SampleTask_LocalStoreMemory;
+};
+
+#endif
diff --git a/src/bullet/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h b/src/bullet/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h
new file mode 100644
index 00000000..c8ebdfd6
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuSampleTask/SpuSampleTask.h
@@ -0,0 +1,54 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef SPU_SAMPLE_TASK_H
+#define SPU_SAMPLE_TASK_H
+
+#include "../PlatformDefinitions.h"
+#include "LinearMath/btScalar.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btMatrix3x3.h"
+
+#include "LinearMath/btAlignedAllocator.h"
+
+
+enum
+{
+	CMD_SAMPLE_INTEGRATE_BODIES = 1,
+	CMD_SAMPLE_PREDICT_MOTION_BODIES
+};
+
+
+
+ATTRIBUTE_ALIGNED16(struct) SpuSampleTaskDesc
+{
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	uint32_t						m_sampleCommand;
+	uint32_t						m_taskId;
+
+	uint64_t 	m_mainMemoryPtr;
+	int			m_sampleValue;
+	
+
+};
+
+
+void	processSampleTask(void* userPtr, void* lsMemory);
+void*	createSampleLocalStoreMemory();
+
+
+#endif //SPU_SAMPLE_TASK_H
+
diff --git a/src/bullet/BulletMultiThreaded/SpuSampleTaskProcess.cpp b/src/bullet/BulletMultiThreaded/SpuSampleTaskProcess.cpp
new file mode 100644
index 00000000..11cb9e7c
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuSampleTaskProcess.cpp
@@ -0,0 +1,222 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//#define __CELLOS_LV2__ 1
+
+#define USE_SAMPLE_PROCESS 1
+#ifdef USE_SAMPLE_PROCESS
+
+
+#include "SpuSampleTaskProcess.h"
+#include <stdio.h>
+
+#ifdef __SPU__
+
+
+
+void	SampleThreadFunc(void* userPtr,void* lsMemory)
+{
+	//do nothing
+	printf("hello world\n");
+}
+
+
+void*	SamplelsMemoryFunc()
+{
+	//don't create local store memory, just return 0
+	return 0;
+}
+
+
+#else
+
+
+#include "btThreadSupportInterface.h"
+
+//#	include "SPUAssert.h"
+#include <string.h>
+
+
+
+extern "C" {
+	extern char SPU_SAMPLE_ELF_SYMBOL[];
+}
+
+
+
+
+
+SpuSampleTaskProcess::SpuSampleTaskProcess(btThreadSupportInterface*	threadInterface,  int maxNumOutstandingTasks)
+:m_threadInterface(threadInterface),
+m_maxNumOutstandingTasks(maxNumOutstandingTasks)
+{
+
+	m_taskBusy.resize(m_maxNumOutstandingTasks);
+	m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
+
+	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+	{
+		m_taskBusy[i] = false;
+	}
+	m_numBusyTasks = 0;
+	m_currentTask = 0;
+
+	m_initialized = false;
+
+	m_threadInterface->startSPU();
+
+
+}
+
+SpuSampleTaskProcess::~SpuSampleTaskProcess()
+{
+	m_threadInterface->stopSPU();
+	
+}
+
+
+
+void	SpuSampleTaskProcess::initialize()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("SpuSampleTaskProcess::initialize()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+	
+	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+	{
+		m_taskBusy[i] = false;
+	}
+	m_numBusyTasks = 0;
+	m_currentTask = 0;
+	m_initialized = true;
+
+}
+
+
+void SpuSampleTaskProcess::issueTask(void* sampleMainMemPtr,int sampleValue,int sampleCommand)
+{
+
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("SpuSampleTaskProcess::issueTask (m_currentTask= %d\)n", m_currentTask);
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+	m_taskBusy[m_currentTask] = true;
+	m_numBusyTasks++;
+
+	SpuSampleTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask];
+	{
+		// send task description in event message
+		// no error checking here...
+		// but, currently, event queue can be no larger than NUM_WORKUNIT_TASKS.
+	
+		taskDesc.m_mainMemoryPtr = reinterpret_cast<uint64_t>(sampleMainMemPtr);
+		taskDesc.m_sampleValue = sampleValue;
+		taskDesc.m_sampleCommand = sampleCommand;
+
+		//some bookkeeping to recognize finished tasks
+		taskDesc.m_taskId = m_currentTask;
+	}
+
+
+	m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask);
+
+	// if all tasks busy, wait for spu event to clear the task.
+	
+	if (m_numBusyTasks >= m_maxNumOutstandingTasks)
+	{
+		unsigned int taskId;
+		unsigned int outputSize;
+
+		for (int i=0;i<m_maxNumOutstandingTasks;i++)
+	  {
+		  if (m_taskBusy[i])
+		  {
+			  taskId = i;
+			  break;
+		  }
+	  }
+		m_threadInterface->waitForResponse(&taskId, &outputSize);
+
+		//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
+
+		postProcess(taskId, outputSize);
+
+		m_taskBusy[taskId] = false;
+
+		m_numBusyTasks--;
+	}
+
+	// find new task buffer
+	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+	{
+		if (!m_taskBusy[i])
+		{
+			m_currentTask = i;
+			break;
+		}
+	}
+}
+
+
+///Optional PPU-size post processing for each task
+void SpuSampleTaskProcess::postProcess(int taskId, int outputSize)
+{
+
+}
+
+
+void SpuSampleTaskProcess::flush()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("\nSpuCollisionTaskProcess::flush()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+	
+
+	// all tasks are issued, wait for all tasks to be complete
+	while(m_numBusyTasks > 0)
+	{
+// Consolidating SPU code
+	  unsigned int taskId;
+	  unsigned int outputSize;
+	  
+	  for (int i=0;i<m_maxNumOutstandingTasks;i++)
+	  {
+		  if (m_taskBusy[i])
+		  {
+			  taskId = i;
+			  break;
+		  }
+	  }
+	  {
+			
+		  m_threadInterface->waitForResponse(&taskId, &outputSize);
+	  }
+
+		//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
+
+		postProcess(taskId, outputSize);
+
+		m_taskBusy[taskId] = false;
+
+		m_numBusyTasks--;
+	}
+
+
+}
+
+#endif
+
+
+#endif //USE_SAMPLE_PROCESS
diff --git a/src/bullet/BulletMultiThreaded/SpuSampleTaskProcess.h b/src/bullet/BulletMultiThreaded/SpuSampleTaskProcess.h
new file mode 100644
index 00000000..6173225a
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuSampleTaskProcess.h
@@ -0,0 +1,153 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SPU_SAMPLE_TASK_PROCESS_H
+#define BT_SPU_SAMPLE_TASK_PROCESS_H
+
+#include <assert.h>
+
+
+#include "PlatformDefinitions.h"
+
+#include <stdlib.h>
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+
+#include "SpuSampleTask/SpuSampleTask.h"
+
+
+//just add your commands here, try to keep them globally unique for debugging purposes
+#define CMD_SAMPLE_TASK_COMMAND 10
+
+
+
+/// SpuSampleTaskProcess handles SPU processing of collision pairs.
+/// When PPU issues a task, it will look for completed task buffers
+/// PPU will do postprocessing, dependent on workunit output (not likely)
+class SpuSampleTaskProcess
+{
+	// track task buffers that are being used, and total busy tasks
+	btAlignedObjectArray<bool>	m_taskBusy;
+	btAlignedObjectArray<SpuSampleTaskDesc>m_spuSampleTaskDesc;
+	
+	int   m_numBusyTasks;
+
+	// the current task and the current entry to insert a new work unit
+	int   m_currentTask;
+
+	bool m_initialized;
+
+	void postProcess(int taskId, int outputSize);
+	
+	class	btThreadSupportInterface*	m_threadInterface;
+
+	int	m_maxNumOutstandingTasks;
+
+
+
+public:
+	SpuSampleTaskProcess(btThreadSupportInterface*	threadInterface, int maxNumOutstandingTasks);
+	
+	~SpuSampleTaskProcess();
+	
+	///call initialize in the beginning of the frame, before addCollisionPairToTask
+	void initialize();
+
+	void issueTask(void* sampleMainMemPtr,int sampleValue,int sampleCommand);
+
+	///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
+	void flush();
+};
+
+
+#if defined(USE_LIBSPE2) && defined(__SPU__)
+////////////////////MAIN/////////////////////////////
+#include "../SpuLibspe2Support.h"
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+#include <SpuFakeDma.h>
+
+void * SamplelsMemoryFunc();
+void SampleThreadFunc(void* userPtr,void* lsMemory);
+
+//#define DEBUG_LIBSPE2_MAINLOOP
+
+int main(unsigned long long speid, addr64 argp, addr64 envp)
+{
+	printf("SPU is up \n");
+	
+	ATTRIBUTE_ALIGNED128(btSpuStatus status);
+	ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ;
+	unsigned int received_message = Spu_Mailbox_Event_Nothing;
+        bool shutdown = false;
+
+	cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+	cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+	status.m_status = Spu_Status_Free;
+	status.m_lsMemory.p = SamplelsMemoryFunc();
+
+	cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+	cellDmaWaitTagStatusAll(DMA_MASK(3));
+	
+	
+	while (!shutdown)
+	{
+		received_message = spu_read_in_mbox();
+		
+
+		
+		switch(received_message)
+		{
+		case Spu_Mailbox_Event_Shutdown:
+			shutdown = true;
+			break; 
+		case Spu_Mailbox_Event_Task:
+			// refresh the status
+#ifdef DEBUG_LIBSPE2_MAINLOOP
+			printf("SPU recieved Task \n");
+#endif //DEBUG_LIBSPE2_MAINLOOP
+			cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+			cellDmaWaitTagStatusAll(DMA_MASK(3));
+		
+			btAssert(status.m_status==Spu_Status_Occupied);
+			
+			cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0);
+			cellDmaWaitTagStatusAll(DMA_MASK(3));
+			
+			SampleThreadFunc((void*)&taskDesc, reinterpret_cast<void*> (taskDesc.m_mainMemoryPtr) );
+			break;
+		case Spu_Mailbox_Event_Nothing:
+		default:
+			break;
+		}
+
+		// set to status free and wait for next task
+		status.m_status = Spu_Status_Free;
+		cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+		cellDmaWaitTagStatusAll(DMA_MASK(3));		
+				
+		
+  	}
+  	return 0;
+}
+//////////////////////////////////////////////////////
+#endif
+
+
+
+#endif // BT_SPU_SAMPLE_TASK_PROCESS_H
+
diff --git a/src/bullet/BulletMultiThreaded/SpuSync.h b/src/bullet/BulletMultiThreaded/SpuSync.h
new file mode 100644
index 00000000..4157b8f0
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/SpuSync.h
@@ -0,0 +1,149 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2007 Starbreeze Studios
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+Written by: Marten Svanfeldt
+*/
+
+#ifndef BT_SPU_SYNC_H
+#define	BT_SPU_SYNC_H
+
+
+#include "PlatformDefinitions.h"
+
+
+#if defined(WIN32)
+
+#define WIN32_LEAN_AND_MEAN
+#ifdef _XBOX
+#include <Xtl.h>
+#else
+#include <Windows.h>
+#endif
+
+///The btSpinlock is a structure to allow multi-platform synchronization. This allows to port the SPU tasks to other platforms.
+class btSpinlock
+{
+public:
+	//typedef volatile LONG SpinVariable;
+	typedef CRITICAL_SECTION SpinVariable;
+
+	btSpinlock (SpinVariable* var)
+		: spinVariable (var)
+	{}
+
+	void Init ()
+	{
+		//*spinVariable = 0;
+		InitializeCriticalSection(spinVariable);
+	}
+
+	void Lock ()
+	{
+		EnterCriticalSection(spinVariable);
+	}
+
+	void Unlock ()
+	{
+		LeaveCriticalSection(spinVariable);
+	}
+
+private:
+	SpinVariable* spinVariable;
+};
+
+
+#elif defined (__CELLOS_LV2__)
+
+//#include <cell/atomic.h>
+#include <cell/sync/mutex.h>
+
+///The btSpinlock is a structure to allow multi-platform synchronization. This allows to port the SPU tasks to other platforms.
+class btSpinlock
+{
+public:
+	typedef CellSyncMutex SpinVariable;
+
+	btSpinlock (SpinVariable* var)
+		: spinVariable (var)
+	{}
+
+	void Init ()
+	{
+#ifndef __SPU__
+		//*spinVariable = 1;
+		cellSyncMutexInitialize(spinVariable);
+#endif
+	}
+
+
+
+	void Lock ()
+	{
+#ifdef __SPU__
+		// lock semaphore
+		/*while (cellAtomicTestAndDecr32(atomic_buf, (uint64_t)spinVariable) == 0) 
+		{
+
+		};*/
+		cellSyncMutexLock((uint64_t)spinVariable);
+#endif
+	}
+
+	void Unlock ()
+	{
+#ifdef __SPU__
+		//cellAtomicIncr32(atomic_buf, (uint64_t)spinVariable);
+		cellSyncMutexUnlock((uint64_t)spinVariable);
+#endif 
+	}
+
+
+private:
+	SpinVariable*	spinVariable;
+	ATTRIBUTE_ALIGNED128(uint32_t		atomic_buf[32]);
+};
+
+#else
+//create a dummy implementation (without any locking) useful for serial processing
+class btSpinlock
+{
+public:
+	typedef int  SpinVariable;
+
+	btSpinlock (SpinVariable* var)
+		: spinVariable (var)
+	{}
+
+	void Init ()
+	{
+	}
+
+	void Lock ()
+	{
+	}
+
+	void Unlock ()
+	{
+	}
+
+private:
+	SpinVariable* spinVariable;
+};
+
+
+#endif
+
+
+#endif //BT_SPU_SYNC_H
+
diff --git a/src/bullet/BulletMultiThreaded/TrbDynBody.h b/src/bullet/BulletMultiThreaded/TrbDynBody.h
new file mode 100644
index 00000000..a7f4bf1b
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/TrbDynBody.h
@@ -0,0 +1,79 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef BT_RB_DYN_BODY_H__
+#define BT_RB_DYN_BODY_H__
+
+#include "vectormath/vmInclude.h"
+using namespace Vectormath::Aos;
+
+#include "TrbStateVec.h"
+
+class CollObject;
+
+class TrbDynBody
+{
+public:
+	TrbDynBody()
+	{
+		fMass   = 0.0f;
+		fCollObject = NULL;
+		fElasticity = 0.2f;
+		fFriction = 0.8f;
+	}
+
+	// Get methods
+	float          getMass() const {return fMass;};
+	float          getElasticity() const {return fElasticity;}
+	float          getFriction() const {return fFriction;}
+	CollObject*    getCollObject() const {return fCollObject;}
+	const Matrix3 &getBodyInertia() const {return fIBody;}
+	const Matrix3 &getBodyInertiaInv() const {return fIBodyInv;}
+	float          getMassInv() const {return fMassInv;}
+
+	// Set methods
+	void           setMass(float mass) {fMass=mass;fMassInv=mass>0.0f?1.0f/mass:0.0f;}
+	void           setBodyInertia(const Matrix3 bodyInertia) {fIBody = bodyInertia;fIBodyInv = inverse(bodyInertia);}
+	void           setElasticity(float elasticity) {fElasticity = elasticity;}
+	void           setFriction(float friction) {fFriction = friction;}
+	void           setCollObject(CollObject *collObj) {fCollObject = collObj;}
+	
+	void           setBodyInertiaInv(const Matrix3 bodyInertiaInv) 
+	{
+		fIBody = inverse(bodyInertiaInv);
+		fIBodyInv = bodyInertiaInv;
+	}
+	void           setMassInv(float invMass) {
+		fMass= invMass>0.0f ? 1.0f/invMass :0.0f;
+		fMassInv=invMass;
+	}
+
+
+private:
+	// Rigid Body constants
+	float          fMass;        // Rigid Body mass
+	float          fMassInv;     // Inverse of mass
+	Matrix3        fIBody;       // Inertia matrix in body's coords
+	Matrix3        fIBodyInv;    // Inertia matrix inverse in body's coords
+	float          fElasticity;  // Coefficient of restitution
+	float          fFriction;    // Coefficient of friction
+
+public:
+	CollObject*    fCollObject;  // Collision object corresponding the RB
+} __attribute__ ((aligned(16)));
+
+#endif //BT_RB_DYN_BODY_H__
+
diff --git a/src/bullet/BulletMultiThreaded/TrbStateVec.h b/src/bullet/BulletMultiThreaded/TrbStateVec.h
new file mode 100644
index 00000000..b6d895e1
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/TrbStateVec.h
@@ -0,0 +1,339 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef BT_TRBSTATEVEC_H__
+#define BT_TRBSTATEVEC_H__
+
+#include <stdlib.h>
+#ifdef PFX_USE_FREE_VECTORMATH
+#include "vecmath/vmInclude.h"
+#else
+#include "vectormath/vmInclude.h"
+#endif //PFX_USE_FREE_VECTORMATH
+
+
+#include "PlatformDefinitions.h"
+
+
+static inline vmVector3 read_Vector3(const float* p)
+{
+	vmVector3 v;
+	loadXYZ(v, p);
+	return v;
+}
+
+static inline vmQuat read_Quat(const float* p)
+{
+	vmQuat vq;
+	loadXYZW(vq, p);
+	return vq;
+}
+
+static inline void store_Vector3(const vmVector3 &src, float* p)
+{
+	vmVector3 v = src;
+	storeXYZ(v, p);
+}
+
+static inline void store_Quat(const vmQuat &src, float* p)
+{
+	vmQuat vq = src;
+	storeXYZW(vq, p);
+}
+
+// Motion Type
+enum {
+	PfxMotionTypeFixed = 0,
+	PfxMotionTypeActive,
+	PfxMotionTypeKeyframe,
+	PfxMotionTypeOneWay,
+	PfxMotionTypeTrigger,
+	PfxMotionTypeCount
+};
+
+#define PFX_MOTION_MASK_DYNAMIC 0x0a // Active,OneWay
+#define PFX_MOTION_MASK_STATIC  0x95 // Fixed,Keyframe,Trigger,Sleeping
+#define PFX_MOTION_MASK_SLEEP   0x0e // Can sleep
+#define PFX_MOTION_MASK_TYPE    0x7f
+
+//
+// Rigid Body state
+//
+
+#ifdef __CELLOS_LV2__
+ATTRIBUTE_ALIGNED128(class) TrbState
+#else
+ATTRIBUTE_ALIGNED16(class) TrbState
+#endif
+
+{
+public:
+	TrbState()
+	{
+		setMotionType(PfxMotionTypeActive);
+		contactFilterSelf=contactFilterTarget=0xffffffff;
+		deleted = 0;
+		mSleeping = 0;
+		useSleep = 1;
+		trbBodyIdx=0;
+		mSleepCount=0;
+		useCcd = 0;
+		useContactCallback = 0;
+		useSleepCallback = 0;
+		linearDamping = 1.0f;
+		angularDamping = 0.99f;
+	}
+
+	TrbState(const uint8_t m, const vmVector3& x, const vmQuat& q, const vmVector3& v, const vmVector3& omega );
+	
+	uint16_t	mSleepCount;
+	uint8_t		mMotionType;
+	uint8_t		deleted            : 1;
+	uint8_t		mSleeping           : 1;
+	uint8_t		useSleep           : 1;
+	uint8_t		useCcd		       : 1;
+	uint8_t		useContactCallback : 1;
+	uint8_t		useSleepCallback   : 1;
+
+	uint16_t	trbBodyIdx;
+	uint32_t	contactFilterSelf;
+	uint32_t	contactFilterTarget;
+
+	float		center[3];		// AABB center(World)
+	float		half[3];		// AABB half(World)
+
+	float		linearDamping;
+	float		angularDamping;
+	
+	float		deltaLinearVelocity[3];
+	float		deltaAngularVelocity[3];
+
+	float     fX[3];				// position
+	float     fQ[4];				// orientation
+	float     fV[3];				// velocity
+	float     fOmega[3];			// angular velocity
+
+	inline void setZero();      // Zeroes out the elements
+	inline void setIdentity();  // Sets the rotation to identity and zeroes out the other elements
+
+	bool		isDeleted() const {return deleted==1;}
+
+	uint16_t	getRigidBodyId() const {return trbBodyIdx;}
+	void		setRigidBodyId(uint16_t i) {trbBodyIdx = i;}
+
+
+	uint32_t	getContactFilterSelf() const {return contactFilterSelf;}
+	void		setContactFilterSelf(uint32_t filter) {contactFilterSelf = filter;}
+
+	uint32_t	getContactFilterTarget() const {return contactFilterTarget;}
+	void		setContactFilterTarget(uint32_t filter) {contactFilterTarget = filter;}
+
+	float getLinearDamping() const {return linearDamping;}
+	float getAngularDamping() const {return angularDamping;}
+
+	void setLinearDamping(float damping) {linearDamping=damping;}
+	void setAngularDamping(float damping) {angularDamping=damping;}
+
+
+	uint8_t		getMotionType() const {return mMotionType;}
+	void		setMotionType(uint8_t t) {mMotionType = t;mSleeping=0;mSleepCount=0;}
+
+	uint8_t		getMotionMask() const {return (1<<mMotionType)|(mSleeping<<7);}
+
+	bool		isAsleep() const {return mSleeping==1;}
+	bool		isAwake() const {return mSleeping==0;}
+
+	void		wakeup() {mSleeping=0;mSleepCount=0;}
+	void		sleep() {if(useSleep) {mSleeping=1;mSleepCount=0;}}
+
+	uint8_t		getUseSleep() const {return useSleep;}
+	void		setUseSleep(uint8_t b) {useSleep=b;}
+
+	uint8_t		getUseCcd() const {return useCcd;}
+	void		setUseCcd(uint8_t b) {useCcd=b;}
+
+	uint8_t		getUseContactCallback() const {return useContactCallback;}
+	void		setUseContactCallback(uint8_t b) {useContactCallback=b;}
+
+	uint8_t		getUseSleepCallback() const {return useSleepCallback;}
+	void		setUseSleepCallback(uint8_t b) {useSleepCallback=b;}
+
+	void	 	incrementSleepCount() {mSleepCount++;}
+	void		resetSleepCount() {mSleepCount=0;}
+	uint16_t	getSleepCount() const {return mSleepCount;}
+
+	vmVector3 getPosition() const {return read_Vector3(fX);}
+	vmQuat    getOrientation() const {return read_Quat(fQ);}
+	vmVector3 getLinearVelocity() const {return read_Vector3(fV);}
+	vmVector3 getAngularVelocity() const {return read_Vector3(fOmega);}
+	vmVector3 getDeltaLinearVelocity() const {return read_Vector3(deltaLinearVelocity);}
+	vmVector3 getDeltaAngularVelocity() const {return read_Vector3(deltaAngularVelocity);}
+
+	void setPosition(const vmVector3 &pos) {store_Vector3(pos, fX);}
+	void setLinearVelocity(const vmVector3 &vel) {store_Vector3(vel, fV);}
+	void setAngularVelocity(const vmVector3 &vel) {store_Vector3(vel, fOmega);}
+	void setDeltaLinearVelocity(const vmVector3 &vel) {store_Vector3(vel, deltaLinearVelocity);}
+	void setDeltaAngularVelocity(const vmVector3 &vel) {store_Vector3(vel, deltaAngularVelocity);}
+	void setOrientation(const vmQuat &rot) {store_Quat(rot, fQ);}
+
+	inline void setAuxils(const vmVector3 &centerLocal,const vmVector3 &halfLocal);
+	inline void	setAuxilsCcd(const vmVector3 &centerLocal,const vmVector3 &halfLocal,float timeStep);
+	inline	void reset();
+};
+
+inline
+TrbState::TrbState(const uint8_t m, const vmVector3& x, const vmQuat& q, const vmVector3& v, const vmVector3& omega)
+{
+	setMotionType(m);
+	fX[0] = x[0];
+	fX[1] = x[1];
+	fX[2] = x[2];
+	fQ[0] = q[0];
+	fQ[1] = q[1];
+	fQ[2] = q[2];
+	fQ[3] = q[3];
+	fV[0] = v[0];
+	fV[1] = v[1];
+	fV[2] = v[2];
+	fOmega[0] = omega[0];
+	fOmega[1] = omega[1];
+	fOmega[2] = omega[2];
+	contactFilterSelf=contactFilterTarget=0xffff;
+	trbBodyIdx=0;
+	mSleeping = 0;
+	deleted = 0;
+	useSleep = 1;
+	useCcd = 0;
+	useContactCallback = 0;
+	useSleepCallback = 0;
+	mSleepCount=0;
+	linearDamping = 1.0f;
+	angularDamping = 0.99f;
+}
+
+inline void
+TrbState::setIdentity()
+{
+	fX[0] = 0.0f;
+	fX[1] = 0.0f;
+	fX[2] = 0.0f;
+	fQ[0] = 0.0f;
+	fQ[1] = 0.0f;
+	fQ[2] = 0.0f;
+	fQ[3] = 1.0f;
+	fV[0] = 0.0f;
+	fV[1] = 0.0f;
+	fV[2] = 0.0f;
+	fOmega[0] = 0.0f;
+	fOmega[1] = 0.0f;
+	fOmega[2] = 0.0f;
+}
+
+inline void
+TrbState::setZero()
+{
+	fX[0] = 0.0f;
+	fX[1] = 0.0f;
+	fX[2] = 0.0f;
+	fQ[0] = 0.0f;
+	fQ[1] = 0.0f;
+	fQ[2] = 0.0f;
+	fQ[3] = 0.0f;
+	fV[0] = 0.0f;
+	fV[1] = 0.0f;
+	fV[2] = 0.0f;
+	fOmega[0] = 0.0f;
+	fOmega[1] = 0.0f;
+	fOmega[2] = 0.0f;
+}
+
+inline void
+TrbState::setAuxils(const vmVector3 &centerLocal,const vmVector3 &halfLocal)
+{
+	vmVector3 centerW = getPosition() + rotate(getOrientation(),centerLocal);
+	vmVector3 halfW = absPerElem(vmMatrix3(getOrientation())) * halfLocal;
+	center[0] = centerW[0];
+	center[1] = centerW[1];
+	center[2] = centerW[2];
+	half[0] = halfW[0];
+	half[1] = halfW[1];
+	half[2] = halfW[2];
+}
+
+inline void
+TrbState::setAuxilsCcd(const vmVector3 &centerLocal,const vmVector3 &halfLocal,float timeStep)
+{
+	vmVector3 centerW = getPosition() + rotate(getOrientation(),centerLocal);
+	vmVector3 halfW = absPerElem(vmMatrix3(getOrientation())) * halfLocal;
+
+	vmVector3 diffvec = getLinearVelocity()*timeStep;
+
+	vmVector3 newCenter = centerW + diffvec;
+	vmVector3 aabbMin = minPerElem(newCenter - halfW,centerW - halfW);
+	vmVector3 aabbMax = maxPerElem(newCenter + halfW,centerW + halfW);
+	
+	centerW = 0.5f * (aabbMin + aabbMax);
+	halfW =0.5f * (aabbMax - aabbMin);
+
+	center[0] = centerW[0];
+	center[1] = centerW[1];
+	center[2] = centerW[2];
+
+	half[0] = halfW[0];
+	half[1] = halfW[1];
+	half[2] = halfW[2];
+}
+
+inline
+void TrbState::reset()
+{
+#if 0
+	mSleepCount = 0;
+	mMotionType = PfxMotionTypeActive;
+	mDeleted = 0;
+	mSleeping = 0;
+	mUseSleep = 1;
+	mUseCcd = 0;
+	mUseContactCallback = 0;
+	mUseSleepCallback = 0;
+	mRigidBodyId = 0;
+	mContactFilterSelf = 0xffffffff;
+	mContactFilterTarget = 0xffffffff;
+	mLinearDamping = 1.0f;
+	mAngularDamping = 0.99f;
+	mPosition = vmVector3(0.0f);
+	mOrientation = vmQuat::identity();
+	mLinearVelocity = vmVector3(0.0f);
+	mAngularVelocity = vmVector3(0.0f);
+#endif
+
+	setMotionType(PfxMotionTypeActive);
+	contactFilterSelf=contactFilterTarget=0xffffffff;
+	deleted = 0;
+	mSleeping = 0;
+	useSleep = 1;
+	trbBodyIdx=0;
+	mSleepCount=0;
+	useCcd = 0;
+	useContactCallback = 0;
+	useSleepCallback = 0;
+	linearDamping = 1.0f;
+	angularDamping = 0.99f;
+}
+
+#endif //BT_TRBSTATEVEC_H__
+
+
diff --git a/src/bullet/BulletMultiThreaded/Win32ThreadSupport.cpp b/src/bullet/BulletMultiThreaded/Win32ThreadSupport.cpp
new file mode 100644
index 00000000..1197bbe0
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/Win32ThreadSupport.cpp
@@ -0,0 +1,446 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "Win32ThreadSupport.h"
+
+#ifdef USE_WIN32_THREADING
+
+#include <windows.h>
+
+#include "SpuCollisionTaskProcess.h"
+
+#include "SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h"
+
+
+
+///The number of threads should be equal to the number of available cores
+///@todo: each worker should be linked to a single core, using SetThreadIdealProcessor.
+
+///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+///Setup and initialize SPU/CELL/Libspe2
+Win32ThreadSupport::Win32ThreadSupport(const Win32ThreadConstructionInfo & threadConstructionInfo)
+{
+	m_maxNumTasks = threadConstructionInfo.m_numThreads;
+	startThreads(threadConstructionInfo);
+}
+
+///cleanup/shutdown Libspe2
+Win32ThreadSupport::~Win32ThreadSupport()
+{
+	stopSPU();
+}
+
+
+
+
+#include <stdio.h>
+
+DWORD WINAPI Thread_no_1( LPVOID lpParam ) 
+{
+
+	Win32ThreadSupport::btSpuStatus* status = (Win32ThreadSupport::btSpuStatus*)lpParam;
+
+	
+	while (1)
+	{
+		WaitForSingleObject(status->m_eventStartHandle,INFINITE);
+		
+		void* userPtr = status->m_userPtr;
+
+		if (userPtr)
+		{
+			btAssert(status->m_status);
+			status->m_userThreadFunc(userPtr,status->m_lsMemory);
+			status->m_status = 2;
+			SetEvent(status->m_eventCompletetHandle);
+		} else
+		{
+			//exit Thread
+			status->m_status = 3;
+			printf("Thread with taskId %i with handle %p exiting\n",status->m_taskId, status->m_threadHandle);
+			SetEvent(status->m_eventCompletetHandle);
+			break;
+		}
+		
+	}
+
+	printf("Thread TERMINATED\n");
+	return 0;
+
+}
+
+///send messages to SPUs
+void Win32ThreadSupport::sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t taskId)
+{
+	///	gMidphaseSPU.sendRequest(CMD_GATHER_AND_PROCESS_PAIRLIST, (ppu_address_t) &taskDesc);
+	
+	///we should spawn an SPU task here, and in 'waitForResponse' it should wait for response of the (one of) the first tasks that finished
+	
+
+
+	switch (uiCommand)
+	{
+	case 	CMD_GATHER_AND_PROCESS_PAIRLIST:
+		{
+
+
+//#define SINGLE_THREADED 1
+#ifdef SINGLE_THREADED
+
+			btSpuStatus&	spuStatus = m_activeSpuStatus[0];
+			spuStatus.m_userPtr=(void*)uiArgument0;
+			spuStatus.m_userThreadFunc(spuStatus.m_userPtr,spuStatus.m_lsMemory);
+			HANDLE handle =0;
+#else
+
+
+			btSpuStatus&	spuStatus = m_activeSpuStatus[taskId];
+			btAssert(taskId>=0);
+			btAssert(int(taskId)<m_activeSpuStatus.size());
+
+			spuStatus.m_commandId = uiCommand;
+			spuStatus.m_status = 1;
+			spuStatus.m_userPtr = (void*)uiArgument0;
+
+			///fire event to start new task
+			SetEvent(spuStatus.m_eventStartHandle);
+
+#endif //CollisionTask_LocalStoreMemory
+
+			
+
+			break;
+		}
+	default:
+		{
+			///not implemented
+			btAssert(0);
+		}
+
+	};
+
+
+}
+
+
+///check for messages from SPUs
+void Win32ThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1)
+{
+	///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
+	
+	///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
+
+
+	btAssert(m_activeSpuStatus.size());
+
+	int last = -1;
+#ifndef SINGLE_THREADED
+	DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, INFINITE);
+	btAssert(res != WAIT_FAILED);
+	last = res - WAIT_OBJECT_0;
+
+	btSpuStatus& spuStatus = m_activeSpuStatus[last];
+	btAssert(spuStatus.m_threadHandle);
+	btAssert(spuStatus.m_eventCompletetHandle);
+
+	//WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
+	btAssert(spuStatus.m_status > 1);
+	spuStatus.m_status = 0;
+
+	///need to find an active spu
+	btAssert(last>=0);
+
+#else
+	last=0;
+	btSpuStatus& spuStatus = m_activeSpuStatus[last];
+#endif //SINGLE_THREADED
+
+	
+
+	*puiArgument0 = spuStatus.m_taskId;
+	*puiArgument1 = spuStatus.m_status;
+
+
+}
+
+
+///check for messages from SPUs
+bool Win32ThreadSupport::isTaskCompleted(unsigned int *puiArgument0, unsigned int *puiArgument1, int timeOutInMilliseconds)
+{
+	///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
+	
+	///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
+
+
+	btAssert(m_activeSpuStatus.size());
+
+	int last = -1;
+#ifndef SINGLE_THREADED
+	DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, timeOutInMilliseconds);
+	
+	if ((res != STATUS_TIMEOUT) && (res != WAIT_FAILED))
+	{
+		
+		btAssert(res != WAIT_FAILED);
+		last = res - WAIT_OBJECT_0;
+
+		btSpuStatus& spuStatus = m_activeSpuStatus[last];
+		btAssert(spuStatus.m_threadHandle);
+		btAssert(spuStatus.m_eventCompletetHandle);
+
+		//WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
+		btAssert(spuStatus.m_status > 1);
+		spuStatus.m_status = 0;
+
+		///need to find an active spu
+		btAssert(last>=0);
+
+	#else
+		last=0;
+		btSpuStatus& spuStatus = m_activeSpuStatus[last];
+	#endif //SINGLE_THREADED
+
+		
+
+		*puiArgument0 = spuStatus.m_taskId;
+		*puiArgument1 = spuStatus.m_status;
+
+		return true;
+	} 
+
+	return false;
+}
+
+
+void Win32ThreadSupport::startThreads(const Win32ThreadConstructionInfo& threadConstructionInfo)
+{
+
+	m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads);
+	m_completeHandles.resize(threadConstructionInfo.m_numThreads);
+
+	m_maxNumTasks = threadConstructionInfo.m_numThreads;
+
+	for (int i=0;i<threadConstructionInfo.m_numThreads;i++)
+	{
+		printf("starting thread %d\n",i);
+
+		btSpuStatus&	spuStatus = m_activeSpuStatus[i];
+
+		LPSECURITY_ATTRIBUTES lpThreadAttributes=NULL;
+		SIZE_T dwStackSize=threadConstructionInfo.m_threadStackSize;
+		LPTHREAD_START_ROUTINE lpStartAddress=&Thread_no_1;
+		LPVOID lpParameter=&spuStatus;
+		DWORD dwCreationFlags=0;
+		LPDWORD lpThreadId=0;
+
+		spuStatus.m_userPtr=0;
+
+		sprintf(spuStatus.m_eventStartHandleName,"eventStart%s%d",threadConstructionInfo.m_uniqueName,i);
+		spuStatus.m_eventStartHandle = CreateEventA (0,false,false,spuStatus.m_eventStartHandleName);
+
+		sprintf(spuStatus.m_eventCompletetHandleName,"eventComplete%s%d",threadConstructionInfo.m_uniqueName,i);
+		spuStatus.m_eventCompletetHandle = CreateEventA (0,false,false,spuStatus.m_eventCompletetHandleName);
+
+		m_completeHandles[i] = spuStatus.m_eventCompletetHandle;
+
+		HANDLE handle = CreateThread(lpThreadAttributes,dwStackSize,lpStartAddress,lpParameter,	dwCreationFlags,lpThreadId);
+		SetThreadPriority(handle,THREAD_PRIORITY_HIGHEST);
+		//SetThreadPriority(handle,THREAD_PRIORITY_TIME_CRITICAL);
+
+		SetThreadAffinityMask(handle, 1<<i);
+
+		spuStatus.m_taskId = i;
+		spuStatus.m_commandId = 0;
+		spuStatus.m_status = 0;
+		spuStatus.m_threadHandle = handle;
+		spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc();
+		spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc;
+
+		printf("started thread %d with threadHandle %p\n",i,handle);
+		
+	}
+
+}
+
+void Win32ThreadSupport::startSPU()
+{
+}
+
+
+///tell the task scheduler we are done with the SPU tasks
+void Win32ThreadSupport::stopSPU()
+{
+	int i;
+	for (i=0;i<m_activeSpuStatus.size();i++)
+	{
+		btSpuStatus& spuStatus = m_activeSpuStatus[i];
+		if (spuStatus.m_status>0)
+		{
+			WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
+		}
+		
+
+		spuStatus.m_userPtr = 0;
+		SetEvent(spuStatus.m_eventStartHandle);
+		WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
+
+		CloseHandle(spuStatus.m_eventCompletetHandle);
+		CloseHandle(spuStatus.m_eventStartHandle);
+		CloseHandle(spuStatus.m_threadHandle);
+	}
+
+	m_activeSpuStatus.clear();
+	m_completeHandles.clear();
+
+}
+
+
+
+class btWin32Barrier : public btBarrier
+{
+private:
+	CRITICAL_SECTION mExternalCriticalSection;
+	CRITICAL_SECTION mLocalCriticalSection;
+	HANDLE mRunEvent,mNotifyEvent;
+	int mCounter,mEnableCounter;
+	int mMaxCount;
+
+public:
+	btWin32Barrier()
+	{
+		mCounter = 0;
+		mMaxCount = 1;
+		mEnableCounter = 0;
+		InitializeCriticalSection(&mExternalCriticalSection);
+		InitializeCriticalSection(&mLocalCriticalSection);
+		mRunEvent = CreateEvent(NULL,TRUE,FALSE,NULL);
+		mNotifyEvent = CreateEvent(NULL,TRUE,FALSE,NULL);
+	}
+
+	virtual ~btWin32Barrier()
+	{
+		DeleteCriticalSection(&mExternalCriticalSection);
+		DeleteCriticalSection(&mLocalCriticalSection);
+		CloseHandle(mRunEvent);
+		CloseHandle(mNotifyEvent);
+	}
+
+	void sync()
+	{
+		int eventId;
+
+		EnterCriticalSection(&mExternalCriticalSection);
+
+		//PFX_PRINTF("enter taskId %d count %d stage %d phase %d mEnableCounter %d\n",taskId,mCounter,debug&0xff,debug>>16,mEnableCounter);
+
+		if(mEnableCounter > 0) {
+			ResetEvent(mNotifyEvent);
+			LeaveCriticalSection(&mExternalCriticalSection);
+			WaitForSingleObject(mNotifyEvent,INFINITE); 
+			EnterCriticalSection(&mExternalCriticalSection);
+		}
+
+		eventId = mCounter;
+		mCounter++;
+
+		if(eventId == mMaxCount-1) {
+			SetEvent(mRunEvent);
+
+			mEnableCounter = mCounter-1;
+			mCounter = 0;
+		}
+		else {
+			ResetEvent(mRunEvent);
+			LeaveCriticalSection(&mExternalCriticalSection);
+			WaitForSingleObject(mRunEvent,INFINITE); 
+			EnterCriticalSection(&mExternalCriticalSection);
+			mEnableCounter--;
+		}
+
+		if(mEnableCounter == 0) {
+			SetEvent(mNotifyEvent);
+		}
+
+		//PFX_PRINTF("leave taskId %d count %d stage %d phase %d mEnableCounter %d\n",taskId,mCounter,debug&0xff,debug>>16,mEnableCounter);
+
+		LeaveCriticalSection(&mExternalCriticalSection);
+	}
+
+	virtual void setMaxCount(int n) {mMaxCount = n;}
+	virtual int  getMaxCount() {return mMaxCount;}
+};
+
+class btWin32CriticalSection : public btCriticalSection
+{
+private:
+	CRITICAL_SECTION mCriticalSection;
+
+public:
+	btWin32CriticalSection()
+	{
+		InitializeCriticalSection(&mCriticalSection);
+	}
+
+	~btWin32CriticalSection()
+	{
+		DeleteCriticalSection(&mCriticalSection);
+	}
+
+	unsigned int getSharedParam(int i)
+	{
+		btAssert(i>=0&&i<31);
+		return mCommonBuff[i+1];
+	}
+
+	void setSharedParam(int i,unsigned int p)
+	{
+		btAssert(i>=0&&i<31);
+		mCommonBuff[i+1] = p;
+	}
+
+	void lock()
+	{
+		EnterCriticalSection(&mCriticalSection);
+		mCommonBuff[0] = 1;
+	}
+
+	void unlock()
+	{
+		mCommonBuff[0] = 0;
+		LeaveCriticalSection(&mCriticalSection);
+	}
+};
+
+
+btBarrier*	Win32ThreadSupport::createBarrier()
+{
+	unsigned char* mem = (unsigned char*)btAlignedAlloc(sizeof(btWin32Barrier),16);
+	btWin32Barrier* barrier = new(mem) btWin32Barrier();
+	barrier->setMaxCount(getNumTasks());
+	return barrier;
+}
+
+btCriticalSection* Win32ThreadSupport::createCriticalSection()
+{
+	unsigned char* mem = (unsigned char*) btAlignedAlloc(sizeof(btWin32CriticalSection),16);
+	btWin32CriticalSection* cs = new(mem) btWin32CriticalSection();
+	return cs;
+}
+
+
+
+#endif //USE_WIN32_THREADING
+
+
diff --git a/src/bullet/BulletMultiThreaded/Win32ThreadSupport.h b/src/bullet/BulletMultiThreaded/Win32ThreadSupport.h
new file mode 100644
index 00000000..abf5d21e
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/Win32ThreadSupport.h
@@ -0,0 +1,138 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "LinearMath/btScalar.h"
+#include "PlatformDefinitions.h"
+
+#ifdef USE_WIN32_THREADING  //platform specific defines are defined in PlatformDefinitions.h
+
+#ifndef BT_WIN32_THREAD_SUPPORT_H
+#define BT_WIN32_THREAD_SUPPORT_H
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+#include "btThreadSupportInterface.h"
+
+
+typedef void (*Win32ThreadFunc)(void* userPtr,void* lsMemory);
+typedef void* (*Win32lsMemorySetupFunc)();
+
+
+///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
+class Win32ThreadSupport : public btThreadSupportInterface 
+{
+public:
+	///placeholder, until libspe2 support is there
+	struct	btSpuStatus
+	{
+		uint32_t	m_taskId;
+		uint32_t	m_commandId;
+		uint32_t	m_status;
+
+		Win32ThreadFunc	m_userThreadFunc;
+		void*	m_userPtr; //for taskDesc etc
+		void*	m_lsMemory; //initialized using Win32LocalStoreMemorySetupFunc
+
+		void*	m_threadHandle; //this one is calling 'Win32ThreadFunc'
+
+		void*	m_eventStartHandle;
+		char	m_eventStartHandleName[32];
+
+		void*	m_eventCompletetHandle;
+		char	m_eventCompletetHandleName[32];
+		
+
+	};
+private:
+
+	btAlignedObjectArray<btSpuStatus>	m_activeSpuStatus;
+	btAlignedObjectArray<void*>			m_completeHandles;
+	
+	int m_maxNumTasks;
+public:
+	///Setup and initialize SPU/CELL/Libspe2
+
+	struct	Win32ThreadConstructionInfo
+	{
+		Win32ThreadConstructionInfo(const char* uniqueName,
+									Win32ThreadFunc userThreadFunc,
+									Win32lsMemorySetupFunc	lsMemoryFunc,
+									int numThreads=1,
+									int threadStackSize=65535
+									)
+									:m_uniqueName(uniqueName),
+									m_userThreadFunc(userThreadFunc),
+									m_lsMemoryFunc(lsMemoryFunc),
+									m_numThreads(numThreads),
+									m_threadStackSize(threadStackSize)
+		{
+
+		}
+
+		const char*				m_uniqueName;
+		Win32ThreadFunc			m_userThreadFunc;
+		Win32lsMemorySetupFunc	m_lsMemoryFunc;
+		int						m_numThreads;
+		int						m_threadStackSize;
+
+	};
+
+
+
+	Win32ThreadSupport(const Win32ThreadConstructionInfo& threadConstructionInfo);
+
+///cleanup/shutdown Libspe2
+	virtual	~Win32ThreadSupport();
+
+	void	startThreads(const Win32ThreadConstructionInfo&	threadInfo);
+
+
+///send messages to SPUs
+	virtual	void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1);
+
+///check for messages from SPUs
+	virtual	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
+
+	virtual bool isTaskCompleted(unsigned int *puiArgument0, unsigned int *puiArgument1, int timeOutInMilliseconds);
+
+///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+	virtual	void startSPU();
+
+///tell the task scheduler we are done with the SPU tasks
+	virtual	void stopSPU();
+
+	virtual	void	setNumTasks(int numTasks)
+	{
+		m_maxNumTasks = numTasks;
+	}
+
+	virtual int getNumTasks() const
+	{
+		return m_maxNumTasks;
+	}
+
+	virtual void*	getThreadLocalMemory(int taskId)
+	{
+		return m_activeSpuStatus[taskId].m_lsMemory;
+	}
+	virtual btBarrier*	createBarrier();
+
+	virtual btCriticalSection* createCriticalSection();
+
+};
+
+#endif //BT_WIN32_THREAD_SUPPORT_H
+
+#endif //USE_WIN32_THREADING
diff --git a/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphase.cpp b/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphase.cpp
new file mode 100644
index 00000000..84a5e59f
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphase.cpp
@@ -0,0 +1,590 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///The 3 following lines include the CPU implementation of the kernels, keep them in this order.
+#include "BulletMultiThreaded/btGpuDefines.h"
+#include "BulletMultiThreaded/btGpuUtilsSharedDefs.h"
+#include "BulletMultiThreaded/btGpuUtilsSharedCode.h"
+
+
+
+#include "LinearMath/btAlignedAllocator.h"
+#include "LinearMath/btQuickprof.h"
+#include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
+
+
+
+#include "btGpuDefines.h"
+#include "btGpuUtilsSharedDefs.h"
+
+#include "btGpu3DGridBroadphaseSharedDefs.h"
+
+#include "btGpu3DGridBroadphase.h"
+#include <string.h> //for memset
+
+
+#include <stdio.h>
+
+
+
+static bt3DGridBroadphaseParams s3DGridBroadphaseParams;
+
+
+
+btGpu3DGridBroadphase::btGpu3DGridBroadphase(	const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
+										int gridSizeX, int gridSizeY, int gridSizeZ, 
+										int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+										int maxBodiesPerCell,
+										btScalar cellFactorAABB) :
+	btSimpleBroadphase(maxSmallProxies,
+//				     new (btAlignedAlloc(sizeof(btSortedOverlappingPairCache),16)) btSortedOverlappingPairCache),
+				     new (btAlignedAlloc(sizeof(btHashedOverlappingPairCache),16)) btHashedOverlappingPairCache),
+	m_bInitialized(false),
+    m_numBodies(0)
+{
+	_initialize(worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, 
+				maxSmallProxies, maxLargeProxies, maxPairsPerBody,
+				maxBodiesPerCell, cellFactorAABB);
+}
+
+
+
+btGpu3DGridBroadphase::btGpu3DGridBroadphase(	btOverlappingPairCache* overlappingPairCache,
+										const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
+										int gridSizeX, int gridSizeY, int gridSizeZ, 
+										int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+										int maxBodiesPerCell,
+										btScalar cellFactorAABB) :
+	btSimpleBroadphase(maxSmallProxies, overlappingPairCache),
+	m_bInitialized(false),
+    m_numBodies(0)
+{
+	_initialize(worldAabbMin, worldAabbMax, gridSizeX, gridSizeY, gridSizeZ, 
+				maxSmallProxies, maxLargeProxies, maxPairsPerBody,
+				maxBodiesPerCell, cellFactorAABB);
+}
+
+
+
+btGpu3DGridBroadphase::~btGpu3DGridBroadphase()
+{
+	//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
+	assert(m_bInitialized);
+	_finalize();
+}
+
+
+
+void btGpu3DGridBroadphase::_initialize(	const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
+										int gridSizeX, int gridSizeY, int gridSizeZ, 
+										int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+										int maxBodiesPerCell,
+										btScalar cellFactorAABB)
+{
+	// set various paramerers
+	m_ownsPairCache = true;
+	m_params.m_gridSizeX = gridSizeX;
+	m_params.m_gridSizeY = gridSizeY;
+	m_params.m_gridSizeZ = gridSizeZ;
+	m_params.m_numCells = m_params.m_gridSizeX * m_params.m_gridSizeY * m_params.m_gridSizeZ;
+	btVector3 w_org = worldAabbMin;
+	m_params.m_worldOriginX = w_org.getX();
+	m_params.m_worldOriginY = w_org.getY();
+	m_params.m_worldOriginZ = w_org.getZ();
+	btVector3 w_size = worldAabbMax - worldAabbMin;
+	m_params.m_cellSizeX = w_size.getX() / m_params.m_gridSizeX;
+	m_params.m_cellSizeY = w_size.getY() / m_params.m_gridSizeY;
+	m_params.m_cellSizeZ = w_size.getZ() / m_params.m_gridSizeZ;
+	m_maxRadius = btMin(btMin(m_params.m_cellSizeX, m_params.m_cellSizeY), m_params.m_cellSizeZ);
+	m_maxRadius *= btScalar(0.5f);
+	m_params.m_numBodies = m_numBodies;
+	m_params.m_maxBodiesPerCell = maxBodiesPerCell;
+
+	m_numLargeHandles = 0;						
+	m_maxLargeHandles = maxLargeProxies;
+
+	m_maxPairsPerBody = maxPairsPerBody;
+
+	m_cellFactorAABB = cellFactorAABB;
+
+	m_LastLargeHandleIndex = -1;
+
+    assert(!m_bInitialized);
+    // allocate host storage
+    m_hBodiesHash = new unsigned int[m_maxHandles * 2];
+    memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int));
+
+    m_hCellStart = new unsigned int[m_params.m_numCells];
+    memset(m_hCellStart, 0x00, m_params.m_numCells * sizeof(unsigned int));
+
+	m_hPairBuffStartCurr = new unsigned int[m_maxHandles * 2 + 2];
+	// --------------- for now, init with m_maxPairsPerBody for each body
+	m_hPairBuffStartCurr[0] = 0;
+	m_hPairBuffStartCurr[1] = 0;
+	for(int i = 1; i <= m_maxHandles; i++) 
+	{
+		m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
+		m_hPairBuffStartCurr[i * 2 + 1] = 0;
+	}
+	//----------------
+	unsigned int numAABB = m_maxHandles + m_maxLargeHandles;
+	m_hAABB = new bt3DGrid3F1U[numAABB * 2]; // AABB Min & Max
+
+	m_hPairBuff = new unsigned int[m_maxHandles * m_maxPairsPerBody];
+	memset(m_hPairBuff, 0x00, m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)); // needed?
+
+	m_hPairScan = new unsigned int[m_maxHandles + 1];
+
+	m_hPairOut = new unsigned int[m_maxHandles * m_maxPairsPerBody];
+
+// large proxies
+
+	// allocate handles buffer and put all handles on free list
+	m_pLargeHandlesRawPtr = btAlignedAlloc(sizeof(btSimpleBroadphaseProxy) * m_maxLargeHandles, 16);
+	m_pLargeHandles = new(m_pLargeHandlesRawPtr) btSimpleBroadphaseProxy[m_maxLargeHandles];
+	m_firstFreeLargeHandle = 0;
+	{
+		for (int i = m_firstFreeLargeHandle; i < m_maxLargeHandles; i++)
+		{
+			m_pLargeHandles[i].SetNextFree(i + 1);
+			m_pLargeHandles[i].m_uniqueId = m_maxHandles+2+i;
+		}
+		m_pLargeHandles[m_maxLargeHandles - 1].SetNextFree(0);
+	}
+
+// debug data
+	m_numPairsAdded = 0;
+	m_numOverflows = 0;
+
+    m_bInitialized = true;
+}
+
+
+
+void btGpu3DGridBroadphase::_finalize()
+{
+    assert(m_bInitialized);
+    delete [] m_hBodiesHash;
+    delete [] m_hCellStart;
+    delete [] m_hPairBuffStartCurr;
+    delete [] m_hAABB;
+	delete [] m_hPairBuff;
+	delete [] m_hPairScan;
+	delete [] m_hPairOut;
+	btAlignedFree(m_pLargeHandlesRawPtr);
+	m_bInitialized = false;
+}
+
+
+
+void btGpu3DGridBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
+{
+	if(m_numHandles <= 0)
+	{
+		BT_PROFILE("addLarge2LargePairsToCache");
+		addLarge2LargePairsToCache(dispatcher);
+		return;
+	}
+	// update constants
+	setParameters(&m_params);
+	// prepare AABB array
+	prepareAABB();
+	// calculate hash
+	calcHashAABB();
+	// sort bodies based on hash
+	sortHash();
+	// find start of each cell
+	findCellStart();
+	// findOverlappingPairs (small/small)
+	findOverlappingPairs();
+	// findOverlappingPairs (small/large)
+	findPairsLarge();
+	// add pairs to CPU cache
+	computePairCacheChanges();
+	scanOverlappingPairBuff();
+	squeezeOverlappingPairBuff();
+	addPairsToCache(dispatcher);
+	// find and add large/large pairs to CPU cache
+	addLarge2LargePairsToCache(dispatcher);
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::addPairsToCache(btDispatcher* dispatcher)
+{
+	m_numPairsAdded = 0;
+	m_numPairsRemoved = 0;
+	for(int i = 0; i < m_numHandles; i++) 
+	{
+		unsigned int num = m_hPairScan[i+1] - m_hPairScan[i];
+		if(!num)
+		{
+			continue;
+		}
+		unsigned int* pInp = m_hPairOut + m_hPairScan[i];
+		unsigned int index0 = m_hAABB[i * 2].uw;
+		btSimpleBroadphaseProxy* proxy0 = &m_pHandles[index0];
+		for(unsigned int j = 0; j < num; j++)
+		{
+			unsigned int indx1_s = pInp[j];
+			unsigned int index1 = indx1_s & (~BT_3DGRID_PAIR_ANY_FLG);
+			btSimpleBroadphaseProxy* proxy1;
+			if(index1 < (unsigned int)m_maxHandles)
+			{
+				proxy1 = &m_pHandles[index1];
+			}
+			else
+			{
+				index1 -= m_maxHandles;
+				btAssert((index1 >= 0) && (index1 < (unsigned int)m_maxLargeHandles));
+				proxy1 = &m_pLargeHandles[index1];
+			}
+			if(indx1_s & BT_3DGRID_PAIR_NEW_FLG)
+			{
+				m_pairCache->addOverlappingPair(proxy0,proxy1);
+				m_numPairsAdded++;
+			}
+			else
+			{
+				m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
+				m_numPairsRemoved++;
+			}
+		}
+	}
+}
+
+
+
+btBroadphaseProxy* btGpu3DGridBroadphase::createProxy(  const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy)
+{
+	btBroadphaseProxy*  proxy;
+	bool bIsLarge = isLargeProxy(aabbMin, aabbMax);
+	if(bIsLarge)
+	{
+		if (m_numLargeHandles >= m_maxLargeHandles)
+		{
+			///you have to increase the cell size, so 'large' proxies become 'small' proxies (fitting a cell)
+			btAssert(0);
+			return 0; //should never happen, but don't let the game crash ;-)
+		}
+		btAssert((aabbMin[0]<= aabbMax[0]) && (aabbMin[1]<= aabbMax[1]) && (aabbMin[2]<= aabbMax[2]));
+		int newHandleIndex = allocLargeHandle();
+		proxy = new (&m_pLargeHandles[newHandleIndex])btSimpleBroadphaseProxy(aabbMin,aabbMax,shapeType,userPtr,collisionFilterGroup,collisionFilterMask,multiSapProxy);
+	}
+	else
+	{
+		proxy = btSimpleBroadphase::createProxy(aabbMin, aabbMax, shapeType, userPtr, collisionFilterGroup, collisionFilterMask, dispatcher, multiSapProxy);
+	}
+	return proxy;
+}
+
+
+
+void btGpu3DGridBroadphase::destroyProxy(btBroadphaseProxy* proxy, btDispatcher* dispatcher)
+{
+	bool bIsLarge = isLargeProxy(proxy);
+	if(bIsLarge)
+	{
+		
+		btSimpleBroadphaseProxy* proxy0 = static_cast<btSimpleBroadphaseProxy*>(proxy);
+		freeLargeHandle(proxy0);
+		m_pairCache->removeOverlappingPairsContainingProxy(proxy,dispatcher);
+	}
+	else
+	{
+		btSimpleBroadphase::destroyProxy(proxy, dispatcher);
+	}
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::resetPool(btDispatcher* dispatcher)
+{
+	m_hPairBuffStartCurr[0] = 0;
+	m_hPairBuffStartCurr[1] = 0;
+	for(int i = 1; i <= m_maxHandles; i++) 
+	{
+		m_hPairBuffStartCurr[i * 2] = m_hPairBuffStartCurr[(i-1) * 2] + m_maxPairsPerBody;
+		m_hPairBuffStartCurr[i * 2 + 1] = 0;
+	}
+}
+
+
+
+bool btGpu3DGridBroadphase::isLargeProxy(const btVector3& aabbMin,  const btVector3& aabbMax)
+{
+	btVector3 diag = aabbMax - aabbMin;
+	
+	///use the bounding sphere radius of this bounding box, to include rotation
+	btScalar radius = diag.length() * btScalar(0.5f);
+	radius *= m_cellFactorAABB; // user-defined factor
+
+	return (radius > m_maxRadius);
+}
+
+
+
+bool btGpu3DGridBroadphase::isLargeProxy(btBroadphaseProxy* proxy)
+{
+	return (proxy->getUid() >= (m_maxHandles+2));
+}
+
+
+
+void btGpu3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher)
+{
+	int i,j;
+	if (m_numLargeHandles <= 0)
+	{
+		return;
+	}
+	int new_largest_index = -1;
+	for(i = 0; i <= m_LastLargeHandleIndex; i++)
+	{
+		btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
+		if(!proxy0->m_clientObject)
+		{
+			continue;
+		}
+		new_largest_index = i;
+		for(j = i + 1; j <= m_LastLargeHandleIndex; j++)
+		{
+			btSimpleBroadphaseProxy* proxy1 = &m_pLargeHandles[j];
+			if(!proxy1->m_clientObject)
+			{
+				continue;
+			}
+			btAssert(proxy0 != proxy1);
+			btSimpleBroadphaseProxy* p0 = getSimpleProxyFromProxy(proxy0);
+			btSimpleBroadphaseProxy* p1 = getSimpleProxyFromProxy(proxy1);
+			if(aabbOverlap(p0,p1))
+			{
+				if (!m_pairCache->findPair(proxy0,proxy1))
+				{
+					m_pairCache->addOverlappingPair(proxy0,proxy1);
+				}
+			} 
+			else
+			{
+				if(m_pairCache->findPair(proxy0,proxy1))
+				{
+					m_pairCache->removeOverlappingPair(proxy0,proxy1,dispatcher);
+				}
+			}
+		}
+	}
+	m_LastLargeHandleIndex = new_largest_index;
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback)
+{
+	btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback);
+	for (int i=0; i <= m_LastLargeHandleIndex; i++)
+	{
+		btSimpleBroadphaseProxy* proxy = &m_pLargeHandles[i];
+		if(!proxy->m_clientObject)
+		{
+			continue;
+		}
+		rayCallback.process(proxy);
+	}
+}
+
+
+
+//
+// overrides for CPU version
+//
+
+
+
+void btGpu3DGridBroadphase::prepareAABB()
+{
+	BT_PROFILE("prepareAABB");
+	bt3DGrid3F1U* pBB = m_hAABB;
+	int i;
+	int new_largest_index = -1;
+	unsigned int num_small = 0;
+	for(i = 0; i <= m_LastHandleIndex; i++) 
+	{
+		btSimpleBroadphaseProxy* proxy0 = &m_pHandles[i];
+		if(!proxy0->m_clientObject)
+		{
+			continue;
+		}
+		new_largest_index = i;
+		pBB->fx = proxy0->m_aabbMin.getX();
+		pBB->fy = proxy0->m_aabbMin.getY();
+		pBB->fz = proxy0->m_aabbMin.getZ();
+		pBB->uw = i;
+		pBB++;
+		pBB->fx = proxy0->m_aabbMax.getX();
+		pBB->fy = proxy0->m_aabbMax.getY();
+		pBB->fz = proxy0->m_aabbMax.getZ();
+		pBB->uw = num_small;
+		pBB++;
+		num_small++;
+	}
+	m_LastHandleIndex = new_largest_index;
+	new_largest_index = -1;
+	unsigned int num_large = 0;
+	for(i = 0; i <= m_LastLargeHandleIndex; i++) 
+	{
+		btSimpleBroadphaseProxy* proxy0 = &m_pLargeHandles[i];
+		if(!proxy0->m_clientObject)
+		{
+			continue;
+		}
+		new_largest_index = i;
+		pBB->fx = proxy0->m_aabbMin.getX();
+		pBB->fy = proxy0->m_aabbMin.getY();
+		pBB->fz = proxy0->m_aabbMin.getZ();
+		pBB->uw = i + m_maxHandles;
+		pBB++;
+		pBB->fx = proxy0->m_aabbMax.getX();
+		pBB->fy = proxy0->m_aabbMax.getY();
+		pBB->fz = proxy0->m_aabbMax.getZ();
+		pBB->uw = num_large + m_maxHandles;
+		pBB++;
+		num_large++;
+	}
+	m_LastLargeHandleIndex = new_largest_index;
+	// paranoid checks
+	btAssert(num_small == m_numHandles);
+	btAssert(num_large == m_numLargeHandles);
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::setParameters(bt3DGridBroadphaseParams* hostParams)
+{
+	s3DGridBroadphaseParams = *hostParams;
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::calcHashAABB()
+{
+	BT_PROFILE("bt3DGrid_calcHashAABB");
+	btGpu_calcHashAABB(m_hAABB, m_hBodiesHash, m_numHandles);
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::sortHash()
+{
+	class bt3DGridHashKey
+	{
+	public:
+	   unsigned int hash;
+	   unsigned int index;
+	   void quickSort(bt3DGridHashKey* pData, int lo, int hi)
+	   {
+			int i=lo, j=hi;
+			bt3DGridHashKey x = pData[(lo+hi)/2];
+			do
+			{    
+				while(pData[i].hash > x.hash) i++; 
+				while(x.hash > pData[j].hash) j--;
+				if(i <= j)
+				{
+					bt3DGridHashKey t = pData[i];
+					pData[i] = pData[j];
+					pData[j] = t;
+					i++; j--;
+				}
+			} while(i <= j);
+			if(lo < j) pData->quickSort(pData, lo, j);
+			if(i < hi) pData->quickSort(pData, i, hi);
+	   }
+	};
+	BT_PROFILE("bt3DGrid_sortHash");
+	bt3DGridHashKey* pHash = (bt3DGridHashKey*)m_hBodiesHash;
+	pHash->quickSort(pHash, 0, m_numHandles - 1);
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::findCellStart()
+{
+	BT_PROFILE("bt3DGrid_findCellStart");
+	btGpu_findCellStart(m_hBodiesHash, m_hCellStart, m_numHandles, m_params.m_numCells);
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::findOverlappingPairs()
+{
+	BT_PROFILE("bt3DGrid_findOverlappingPairs");
+	btGpu_findOverlappingPairs(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr, m_numHandles);
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::findPairsLarge()
+{
+	BT_PROFILE("bt3DGrid_findPairsLarge");
+	btGpu_findPairsLarge(m_hAABB, m_hBodiesHash, m_hCellStart, m_hPairBuff, m_hPairBuffStartCurr,	m_numHandles, m_numLargeHandles);
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::computePairCacheChanges()
+{
+	BT_PROFILE("bt3DGrid_computePairCacheChanges");
+	btGpu_computePairCacheChanges(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hAABB, m_numHandles);
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::scanOverlappingPairBuff()
+{
+	BT_PROFILE("bt3DGrid_scanOverlappingPairBuff");
+	m_hPairScan[0] = 0;
+	for(int i = 1; i <= m_numHandles; i++) 
+	{
+		unsigned int delta = m_hPairScan[i];
+		m_hPairScan[i] = m_hPairScan[i-1] + delta;
+	}
+	return;
+}
+
+
+
+void btGpu3DGridBroadphase::squeezeOverlappingPairBuff()
+{
+	BT_PROFILE("bt3DGrid_squeezeOverlappingPairBuff");
+	btGpu_squeezeOverlappingPairBuff(m_hPairBuff, m_hPairBuffStartCurr, m_hPairScan, m_hPairOut, m_hAABB, m_numHandles);
+	return;
+}
+
+
+
+#include "btGpu3DGridBroadphaseSharedCode.h"
+
+
diff --git a/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphase.h b/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphase.h
new file mode 100644
index 00000000..1d49a055
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphase.h
@@ -0,0 +1,138 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+#ifndef BTGPU3DGRIDBROADPHASE_H
+#define BTGPU3DGRIDBROADPHASE_H
+
+//----------------------------------------------------------------------------------------
+
+#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
+
+#include "btGpu3DGridBroadphaseSharedTypes.h"
+
+//----------------------------------------------------------------------------------------
+
+///The btGpu3DGridBroadphase uses GPU-style code compiled for CPU to compute overlapping pairs
+
+class btGpu3DGridBroadphase : public btSimpleBroadphase
+{
+protected:
+	bool			m_bInitialized;
+    unsigned int	m_numBodies;
+    unsigned int	m_numCells;
+	unsigned int	m_maxPairsPerBody;
+	btScalar		m_cellFactorAABB;
+    unsigned int	m_maxBodiesPerCell;
+	bt3DGridBroadphaseParams m_params;
+	btScalar		m_maxRadius;
+	// CPU data
+    unsigned int*	m_hBodiesHash;
+    unsigned int*	m_hCellStart;
+	unsigned int*	m_hPairBuffStartCurr;
+	bt3DGrid3F1U*		m_hAABB;
+	unsigned int*	m_hPairBuff;
+	unsigned int*	m_hPairScan;
+	unsigned int*	m_hPairOut;
+// large proxies
+	int		m_numLargeHandles;						
+	int		m_maxLargeHandles;						
+	int		m_LastLargeHandleIndex;							
+	btSimpleBroadphaseProxy* m_pLargeHandles;
+	void* m_pLargeHandlesRawPtr;
+	int		m_firstFreeLargeHandle;
+	int allocLargeHandle()
+	{
+		btAssert(m_numLargeHandles < m_maxLargeHandles);
+		int freeLargeHandle = m_firstFreeLargeHandle;
+		m_firstFreeLargeHandle = m_pLargeHandles[freeLargeHandle].GetNextFree();
+		m_numLargeHandles++;
+		if(freeLargeHandle > m_LastLargeHandleIndex)
+		{
+			m_LastLargeHandleIndex = freeLargeHandle;
+		}
+		return freeLargeHandle;
+	}
+	void freeLargeHandle(btSimpleBroadphaseProxy* proxy)
+	{
+		int handle = int(proxy - m_pLargeHandles);
+		btAssert((handle >= 0) && (handle < m_maxHandles));
+		if(handle == m_LastLargeHandleIndex)
+		{
+			m_LastLargeHandleIndex--;
+		}
+		proxy->SetNextFree(m_firstFreeLargeHandle);
+		m_firstFreeLargeHandle = handle;
+		proxy->m_clientObject = 0;
+		m_numLargeHandles--;
+	}
+	bool isLargeProxy(const btVector3& aabbMin,  const btVector3& aabbMax);
+	bool isLargeProxy(btBroadphaseProxy* proxy);
+// debug
+	unsigned int	m_numPairsAdded;
+	unsigned int	m_numPairsRemoved;
+	unsigned int	m_numOverflows;
+// 
+public:
+	btGpu3DGridBroadphase(const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
+					   int gridSizeX, int gridSizeY, int gridSizeZ, 
+					   int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+					   int maxBodiesPerCell = 8,
+					   btScalar cellFactorAABB = btScalar(1.0f));
+	btGpu3DGridBroadphase(	btOverlappingPairCache* overlappingPairCache,
+						const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
+						int gridSizeX, int gridSizeY, int gridSizeZ, 
+						int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+						int maxBodiesPerCell = 8,
+						btScalar cellFactorAABB = btScalar(1.0f));
+	virtual ~btGpu3DGridBroadphase();
+	virtual void	calculateOverlappingPairs(btDispatcher* dispatcher);
+
+	virtual btBroadphaseProxy*	createProxy(const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
+	virtual void	destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback);
+	virtual void	resetPool(btDispatcher* dispatcher);
+
+protected:
+	void _initialize(	const btVector3& worldAabbMin,const btVector3& worldAabbMax, 
+						int gridSizeX, int gridSizeY, int gridSizeZ, 
+						int maxSmallProxies, int maxLargeProxies, int maxPairsPerBody,
+						int maxBodiesPerCell = 8,
+						btScalar cellFactorAABB = btScalar(1.0f));
+	void _finalize();
+	void addPairsToCache(btDispatcher* dispatcher);
+	void addLarge2LargePairsToCache(btDispatcher* dispatcher);
+
+// overrides for CPU version
+	virtual void setParameters(bt3DGridBroadphaseParams* hostParams);
+	virtual void prepareAABB();
+	virtual void calcHashAABB();
+	virtual void sortHash();	
+	virtual void findCellStart();
+	virtual void findOverlappingPairs();
+	virtual void findPairsLarge();
+	virtual void computePairCacheChanges();
+	virtual void scanOverlappingPairBuff();
+	virtual void squeezeOverlappingPairBuff();
+};
+
+//----------------------------------------------------------------------------------------
+
+#endif //BTGPU3DGRIDBROADPHASE_H
+
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
diff --git a/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h b/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h
new file mode 100644
index 00000000..e0afb87b
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphaseSharedCode.h
@@ -0,0 +1,430 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//               K E R N E L    F U N C T I O N S 
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+
+// calculate position in uniform grid
+BT_GPU___device__ int3 bt3DGrid_calcGridPos(float4 p)
+{
+    int3 gridPos;
+    gridPos.x = (int)floor((p.x - BT_GPU_params.m_worldOriginX) / BT_GPU_params.m_cellSizeX);
+    gridPos.y = (int)floor((p.y - BT_GPU_params.m_worldOriginY) / BT_GPU_params.m_cellSizeY);
+    gridPos.z = (int)floor((p.z - BT_GPU_params.m_worldOriginZ) / BT_GPU_params.m_cellSizeZ);
+    return gridPos;
+} // bt3DGrid_calcGridPos()
+
+//----------------------------------------------------------------------------------------
+
+// calculate address in grid from position (clamping to edges)
+BT_GPU___device__ uint bt3DGrid_calcGridHash(int3 gridPos)
+{
+    gridPos.x = BT_GPU_max(0, BT_GPU_min(gridPos.x, (int)BT_GPU_params.m_gridSizeX - 1));
+    gridPos.y = BT_GPU_max(0, BT_GPU_min(gridPos.y, (int)BT_GPU_params.m_gridSizeY - 1));
+    gridPos.z = BT_GPU_max(0, BT_GPU_min(gridPos.z, (int)BT_GPU_params.m_gridSizeZ - 1));
+    return BT_GPU___mul24(BT_GPU___mul24(gridPos.z, BT_GPU_params.m_gridSizeY), BT_GPU_params.m_gridSizeX) + BT_GPU___mul24(gridPos.y, BT_GPU_params.m_gridSizeX) + gridPos.x;
+} // bt3DGrid_calcGridHash()
+
+//----------------------------------------------------------------------------------------
+
+// calculate grid hash value for each body using its AABB
+BT_GPU___global__ void calcHashAABBD(bt3DGrid3F1U* pAABB, uint2* pHash, uint numBodies)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+	bt3DGrid3F1U bbMin = pAABB[index*2];
+	bt3DGrid3F1U bbMax = pAABB[index*2 + 1];
+	float4 pos;
+	pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
+	pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
+	pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
+    // get address in grid
+    int3 gridPos = bt3DGrid_calcGridPos(pos);
+    uint gridHash = bt3DGrid_calcGridHash(gridPos);
+    // store grid hash and body index
+    pHash[index] = BT_GPU_make_uint2(gridHash, index);
+} // calcHashAABBD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void findCellStartD(uint2* pHash, uint* cellStart, uint numBodies)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+    uint2 sortedData = pHash[index];
+	// Load hash data into shared memory so that we can look 
+	// at neighboring body's hash value without loading
+	// two hash values per thread
+	BT_GPU___shared__ uint sharedHash[257];
+	sharedHash[BT_GPU_threadIdx.x+1] = sortedData.x;
+	if((index > 0) && (BT_GPU_threadIdx.x == 0))
+	{
+		// first thread in block must load neighbor body hash
+		volatile uint2 prevData = pHash[index-1];
+		sharedHash[0] = prevData.x;
+	}
+	BT_GPU___syncthreads();
+	if((index == 0) || (sortedData.x != sharedHash[BT_GPU_threadIdx.x]))
+	{
+		cellStart[sortedData.x] = index;
+	}
+} // findCellStartD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___device__ uint cudaTestAABBOverlap(bt3DGrid3F1U min0, bt3DGrid3F1U max0, bt3DGrid3F1U min1, bt3DGrid3F1U max1)
+{
+	return	(min0.fx <= max1.fx)&& (min1.fx <= max0.fx) && 
+			(min0.fy <= max1.fy)&& (min1.fy <= max0.fy) && 
+			(min0.fz <= max1.fz)&& (min1.fz <= max0.fz); 
+} // cudaTestAABBOverlap()
+ 
+//----------------------------------------------------------------------------------------
+
+BT_GPU___device__ void findPairsInCell(	int3	gridPos,
+										uint    index,
+										uint2*  pHash,
+										uint*   pCellStart,
+										bt3DGrid3F1U* pAABB, 
+										uint*   pPairBuff,
+										uint2*	pPairBuffStartCurr,
+										uint	numBodies)
+{
+    if (	(gridPos.x < 0) || (gridPos.x > (int)BT_GPU_params.m_gridSizeX - 1)
+		||	(gridPos.y < 0) || (gridPos.y > (int)BT_GPU_params.m_gridSizeY - 1)
+		||  (gridPos.z < 0) || (gridPos.z > (int)BT_GPU_params.m_gridSizeZ - 1)) 
+    {
+		return;
+	}
+    uint gridHash = bt3DGrid_calcGridHash(gridPos);
+    // get start of bucket for this cell
+    uint bucketStart = pCellStart[gridHash];
+    if (bucketStart == 0xffffffff)
+	{
+        return;   // cell empty
+	}
+	// iterate over bodies in this cell
+    uint2 sortedData = pHash[index];
+	uint unsorted_indx = sortedData.y;
+    bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2); 
+	bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
+	uint handleIndex =  min0.uw;
+	uint2 start_curr = pPairBuffStartCurr[handleIndex];
+	uint start = start_curr.x;
+	uint curr = start_curr.y;
+	uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
+	uint curr_max = start_curr_next.x - start - 1;
+	uint bucketEnd = bucketStart + BT_GPU_params.m_maxBodiesPerCell;
+	bucketEnd = (bucketEnd > numBodies) ? numBodies : bucketEnd;
+	for(uint index2 = bucketStart; index2 < bucketEnd; index2++) 
+	{
+        uint2 cellData = pHash[index2];
+        if (cellData.x != gridHash)
+        {
+			break;   // no longer in same bucket
+		}
+		uint unsorted_indx2 = cellData.y;
+        if (unsorted_indx2 < unsorted_indx) // check not colliding with self
+        {   
+			bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2);
+			bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, unsorted_indx2*2 + 1);
+			if(cudaTestAABBOverlap(min0, max0, min1, max1))
+			{
+				uint handleIndex2 = min1.uw;
+				uint k;
+				for(k = 0; k < curr; k++)
+				{
+					uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
+					if(old_pair == handleIndex2)
+					{
+						pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
+						break;
+					}
+				}
+				if(k == curr)
+				{
+					if(curr >= curr_max) 
+					{ // not a good solution, but let's avoid crash
+						break;
+					}
+					pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
+					curr++;
+				}
+			}
+		}
+	}
+	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
+    return;
+} // findPairsInCell()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void findOverlappingPairsD(	bt3DGrid3F1U*	pAABB, uint2* pHash, uint* pCellStart, 
+												uint* pPairBuff, uint2* pPairBuffStartCurr, uint numBodies)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+    uint2 sortedData = pHash[index];
+	uint unsorted_indx = sortedData.y;
+	bt3DGrid3F1U bbMin = BT_GPU_FETCH(pAABB, unsorted_indx*2);
+	bt3DGrid3F1U bbMax = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
+	float4 pos;
+	pos.x = (bbMin.fx + bbMax.fx) * 0.5f;
+	pos.y = (bbMin.fy + bbMax.fy) * 0.5f;
+	pos.z = (bbMin.fz + bbMax.fz) * 0.5f;
+    // get address in grid
+    int3 gridPos = bt3DGrid_calcGridPos(pos);
+    // examine only neighbouring cells
+    for(int z=-1; z<=1; z++) {
+        for(int y=-1; y<=1; y++) {
+            for(int x=-1; x<=1; x++) {
+                findPairsInCell(gridPos + BT_GPU_make_int3(x, y, z), index, pHash, pCellStart, pAABB, pPairBuff, pPairBuffStartCurr, numBodies);
+            }
+        }
+    }
+} // findOverlappingPairsD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void findPairsLargeD(	bt3DGrid3F1U* pAABB, uint2* pHash, uint* pCellStart, uint* pPairBuff, 
+										uint2* pPairBuffStartCurr, uint numBodies, uint numLarge)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+    uint2 sortedData = pHash[index];
+	uint unsorted_indx = sortedData.y;
+	bt3DGrid3F1U min0 = BT_GPU_FETCH(pAABB, unsorted_indx*2);
+	bt3DGrid3F1U max0 = BT_GPU_FETCH(pAABB, unsorted_indx*2 + 1);
+	uint handleIndex =  min0.uw;
+	uint2 start_curr = pPairBuffStartCurr[handleIndex];
+	uint start = start_curr.x;
+	uint curr = start_curr.y;
+	uint2 start_curr_next = pPairBuffStartCurr[handleIndex+1];
+	uint curr_max = start_curr_next.x - start - 1;
+    for(uint i = 0; i < numLarge; i++)
+    {
+		uint indx2 = numBodies + i;
+		bt3DGrid3F1U min1 = BT_GPU_FETCH(pAABB, indx2*2);
+		bt3DGrid3F1U max1 = BT_GPU_FETCH(pAABB, indx2*2 + 1);
+		if(cudaTestAABBOverlap(min0, max0, min1, max1))
+		{
+			uint k;
+			uint handleIndex2 =  min1.uw;
+			for(k = 0; k < curr; k++)
+			{
+				uint old_pair = pPairBuff[start+k] & (~BT_3DGRID_PAIR_ANY_FLG);
+				if(old_pair == handleIndex2)
+				{
+					pPairBuff[start+k] |= BT_3DGRID_PAIR_FOUND_FLG;
+					break;
+				}
+			}
+			if(k == curr)
+			{
+				pPairBuff[start+curr] = handleIndex2 | BT_3DGRID_PAIR_NEW_FLG;
+				if(curr >= curr_max) 
+				{ // not a good solution, but let's avoid crash
+					break;
+				}
+				curr++;
+			}
+		}
+    }
+	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, curr);
+    return;
+} // findPairsLargeD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void computePairCacheChangesD(uint* pPairBuff, uint2* pPairBuffStartCurr, 
+												uint* pPairScan, bt3DGrid3F1U* pAABB, uint numBodies)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+	bt3DGrid3F1U bbMin = pAABB[index * 2];
+	uint handleIndex = bbMin.uw;
+	uint2 start_curr = pPairBuffStartCurr[handleIndex];
+	uint start = start_curr.x;
+	uint curr = start_curr.y;
+	uint *pInp = pPairBuff + start;
+	uint num_changes = 0;
+	for(uint k = 0; k < curr; k++, pInp++)
+	{
+		if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
+		{
+			num_changes++;
+		}
+	}
+	pPairScan[index+1] = num_changes;
+} // computePairCacheChangesD()
+
+//----------------------------------------------------------------------------------------
+
+BT_GPU___global__ void squeezeOverlappingPairBuffD(uint* pPairBuff, uint2* pPairBuffStartCurr, uint* pPairScan,
+												   uint* pPairOut, bt3DGrid3F1U* pAABB, uint numBodies)
+{
+    int index = BT_GPU___mul24(BT_GPU_blockIdx.x, BT_GPU_blockDim.x) + BT_GPU_threadIdx.x;
+    if(index >= (int)numBodies)
+	{
+		return;
+	}
+	bt3DGrid3F1U bbMin = pAABB[index * 2];
+	uint handleIndex = bbMin.uw;
+	uint2 start_curr = pPairBuffStartCurr[handleIndex];
+	uint start = start_curr.x;
+	uint curr = start_curr.y;
+	uint* pInp = pPairBuff + start;
+	uint* pOut = pPairOut + pPairScan[index];
+	uint* pOut2 = pInp;
+	uint num = 0; 
+	for(uint k = 0; k < curr; k++, pInp++)
+	{
+		if(!((*pInp) & BT_3DGRID_PAIR_FOUND_FLG))
+		{
+			*pOut = *pInp;
+			pOut++;
+		}
+		if((*pInp) & BT_3DGRID_PAIR_ANY_FLG)
+		{
+			*pOut2 = (*pInp) & (~BT_3DGRID_PAIR_ANY_FLG);
+			pOut2++;
+			num++;
+		}
+	}
+	pPairBuffStartCurr[handleIndex] = BT_GPU_make_uint2(start, num);
+} // squeezeOverlappingPairBuffD()
+
+
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//               E N D   O F    K E R N E L    F U N C T I O N S 
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------------
+
+extern "C"
+{
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash,	unsigned int numBodies)
+{
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+    // execute the kernel
+    BT_GPU_EXECKERNEL(numBlocks, numThreads, calcHashAABBD, (pAABB, (uint2*)hash, numBodies));
+    // check if kernel invocation generated an error
+    BT_GPU_CHECK_ERROR("calcHashAABBD kernel execution failed");
+} // calcHashAABB()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(findCellStart(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells))
+{
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+	BT_GPU_SAFE_CALL(BT_GPU_Memset(cellStart, 0xffffffff, numCells*sizeof(uint)));
+	BT_GPU_EXECKERNEL(numBlocks, numThreads, findCellStartD, ((uint2*)hash, (uint*)cellStart, numBodies));
+    BT_GPU_CHECK_ERROR("Kernel execution failed: findCellStartD");
+} // findCellStart()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(findOverlappingPairs(bt3DGrid3F1U* pAABB, unsigned int* pHash,	unsigned int* pCellStart, unsigned int*	pPairBuff, unsigned int*	pPairBuffStartCurr, unsigned int	numBodies))
+{
+#if B_CUDA_USE_TEX
+    BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, numBodies * 2 * sizeof(bt3DGrid3F1U)));
+#endif
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
+    BT_GPU_EXECKERNEL(numBlocks, numThreads, findOverlappingPairsD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies));
+    BT_GPU_CHECK_ERROR("Kernel execution failed: bt_CudaFindOverlappingPairsD");
+#if B_CUDA_USE_TEX
+    BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
+#endif
+} // findOverlappingPairs()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(findPairsLarge(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge))
+{
+#if B_CUDA_USE_TEX
+    BT_GPU_SAFE_CALL(cudaBindTexture(0, pAABBTex, pAABB, (numBodies+numLarge) * 2 * sizeof(bt3DGrid3F1U)));
+#endif
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 64, numBlocks, numThreads);
+    BT_GPU_EXECKERNEL(numBlocks, numThreads, findPairsLargeD, (pAABB,(uint2*)pHash,(uint*)pCellStart,(uint*)pPairBuff,(uint2*)pPairBuffStartCurr,numBodies,numLarge));
+    BT_GPU_CHECK_ERROR("Kernel execution failed: btCuda_findPairsLargeD");
+#if B_CUDA_USE_TEX
+    BT_GPU_SAFE_CALL(cudaUnbindTexture(pAABBTex));
+#endif
+} // findPairsLarge()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(computePairCacheChanges(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies))
+{
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+    BT_GPU_EXECKERNEL(numBlocks, numThreads, computePairCacheChangesD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,pAABB,numBodies));
+    BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaComputePairCacheChangesD");
+} // computePairCacheChanges()
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(squeezeOverlappingPairBuff(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies))
+{
+    int numThreads, numBlocks;
+    BT_GPU_PREF(computeGridSize)(numBodies, 256, numBlocks, numThreads);
+    BT_GPU_EXECKERNEL(numBlocks, numThreads, squeezeOverlappingPairBuffD, ((uint*)pPairBuff,(uint2*)pPairBuffStartCurr,(uint*)pPairScan,(uint*)pPairOut,pAABB,numBodies));
+    BT_GPU_CHECK_ERROR("Kernel execution failed: btCudaSqueezeOverlappingPairBuffD");
+} // btCuda_squeezeOverlappingPairBuff()
+
+//------------------------------------------------------------------------------------------------
+
+} // extern "C"
+
+//------------------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------------------------
+//------------------------------------------------------------------------------------------------
diff --git a/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h b/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h
new file mode 100644
index 00000000..607bda7e
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h
@@ -0,0 +1,61 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+// Shared definitions for GPU-based 3D Grid collision detection broadphase
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//  Keep this file free from Bullet headers
+//  it is included into both CUDA and CPU code
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+//----------------------------------------------------------------------------------------
+
+#ifndef BTGPU3DGRIDBROADPHASESHAREDDEFS_H
+#define BTGPU3DGRIDBROADPHASESHAREDDEFS_H
+
+//----------------------------------------------------------------------------------------
+
+#include "btGpu3DGridBroadphaseSharedTypes.h"
+
+//----------------------------------------------------------------------------------------
+
+extern "C"
+{
+
+//----------------------------------------------------------------------------------------
+
+void BT_GPU_PREF(calcHashAABB)(bt3DGrid3F1U* pAABB, unsigned int* hash,	unsigned int numBodies);
+
+void BT_GPU_PREF(findCellStart)(unsigned int* hash, unsigned int* cellStart, unsigned int numBodies, unsigned int numCells);
+
+void BT_GPU_PREF(findOverlappingPairs)(bt3DGrid3F1U* pAABB, unsigned int* pHash,	unsigned int* pCellStart, unsigned int*	pPairBuff, unsigned int*	pPairBuffStartCurr, unsigned int	numBodies);
+
+void BT_GPU_PREF(findPairsLarge)(bt3DGrid3F1U* pAABB, unsigned int* pHash, unsigned int* pCellStart, unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int numBodies, unsigned int numLarge);
+
+void BT_GPU_PREF(computePairCacheChanges)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, bt3DGrid3F1U* pAABB, unsigned int numBodies);
+
+void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned int* pPairBuffStartCurr, unsigned int* pPairScan, unsigned int* pPairOut, bt3DGrid3F1U* pAABB, unsigned int numBodies);
+
+
+//----------------------------------------------------------------------------------------
+
+} // extern "C"
+
+//----------------------------------------------------------------------------------------
+
+#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H
+
diff --git a/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h b/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h
new file mode 100644
index 00000000..616a4009
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h
@@ -0,0 +1,67 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+// Shared definitions for GPU-based 3D Grid collision detection broadphase
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//  Keep this file free from Bullet headers
+//  it is included into both CUDA and CPU code
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+//----------------------------------------------------------------------------------------
+
+#ifndef BTGPU3DGRIDBROADPHASESHAREDTYPES_H
+#define BTGPU3DGRIDBROADPHASESHAREDTYPES_H
+
+//----------------------------------------------------------------------------------------
+
+#define BT_3DGRID_PAIR_FOUND_FLG (0x40000000)
+#define BT_3DGRID_PAIR_NEW_FLG   (0x20000000)
+#define BT_3DGRID_PAIR_ANY_FLG   (BT_3DGRID_PAIR_FOUND_FLG | BT_3DGRID_PAIR_NEW_FLG)
+
+//----------------------------------------------------------------------------------------
+
+struct bt3DGridBroadphaseParams 
+{
+	unsigned int	m_gridSizeX;
+	unsigned int	m_gridSizeY;
+	unsigned int	m_gridSizeZ;
+	unsigned int	m_numCells;
+	float			m_worldOriginX;
+	float			m_worldOriginY;
+	float			m_worldOriginZ;
+	float			m_cellSizeX;
+	float			m_cellSizeY;
+	float			m_cellSizeZ;
+	unsigned int	m_numBodies;
+	unsigned int	m_maxBodiesPerCell;
+};
+
+//----------------------------------------------------------------------------------------
+
+struct bt3DGrid3F1U
+{
+	float			fx;
+	float			fy;
+	float			fz;
+	unsigned int	uw;
+};
+
+//----------------------------------------------------------------------------------------
+
+#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H
+
diff --git a/src/bullet/BulletMultiThreaded/btGpuDefines.h b/src/bullet/BulletMultiThreaded/btGpuDefines.h
new file mode 100644
index 00000000..f9315ab6
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btGpuDefines.h
@@ -0,0 +1,211 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+// definitions for "GPU on CPU" code
+
+
+#ifndef BT_GPU_DEFINES_H
+#define BT_GPU_DEFINES_H
+
+typedef unsigned int uint;
+
+struct int2
+{
+	int x, y;
+};
+
+struct uint2
+{
+	unsigned int x, y;
+};
+
+struct int3
+{
+	int x, y, z;
+};
+
+struct uint3
+{
+	unsigned int x, y, z;
+};
+
+struct float4
+{
+	float x, y, z, w;
+};
+
+struct float3
+{
+	float x, y, z;
+};
+
+
+#define BT_GPU___device__ inline
+#define BT_GPU___devdata__
+#define BT_GPU___constant__
+#define BT_GPU_max(a, b) ((a) > (b) ? (a) : (b))
+#define BT_GPU_min(a, b) ((a) < (b) ? (a) : (b))
+#define BT_GPU_params s3DGridBroadphaseParams
+#define BT_GPU___mul24(a, b) ((a)*(b))
+#define BT_GPU___global__ inline
+#define BT_GPU___shared__ static
+#define BT_GPU___syncthreads()
+#define CUDART_PI_F SIMD_PI
+
+static inline uint2 bt3dGrid_make_uint2(unsigned int x, unsigned int y)
+{
+  uint2 t; t.x = x; t.y = y; return t;
+}
+#define BT_GPU_make_uint2(x, y) bt3dGrid_make_uint2(x, y)
+
+static inline int3 bt3dGrid_make_int3(int x, int y, int z)
+{
+  int3 t; t.x = x; t.y = y; t.z = z; return t;
+}
+#define BT_GPU_make_int3(x, y, z) bt3dGrid_make_int3(x, y, z)
+
+static inline float3 bt3dGrid_make_float3(float x, float y, float z)
+{
+  float3 t; t.x = x; t.y = y; t.z = z; return t;
+}
+#define BT_GPU_make_float3(x, y, z) bt3dGrid_make_float3(x, y, z)
+
+static inline float3 bt3dGrid_make_float34(float4 f)
+{
+  float3 t; t.x = f.x; t.y = f.y; t.z = f.z; return t;
+}
+#define BT_GPU_make_float34(f) bt3dGrid_make_float34(f)
+
+static inline float3 bt3dGrid_make_float31(float f)
+{
+  float3 t; t.x = t.y = t.z = f; return t;
+}
+#define BT_GPU_make_float31(x) bt3dGrid_make_float31(x)
+
+static inline float4 bt3dGrid_make_float42(float3 v, float f)
+{
+  float4 t; t.x = v.x; t.y = v.y; t.z = v.z; t.w = f; return t;
+}
+#define BT_GPU_make_float42(a, b) bt3dGrid_make_float42(a, b) 
+
+static inline float4 bt3dGrid_make_float44(float a, float b, float c, float d)
+{
+  float4 t; t.x = a; t.y = b; t.z = c; t.w = d; return t;
+}
+#define BT_GPU_make_float44(a, b, c, d) bt3dGrid_make_float44(a, b, c, d) 
+
+inline int3 operator+(int3 a, int3 b)
+{
+    return bt3dGrid_make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
+}
+
+inline float4 operator+(const float4& a, const float4& b)
+{
+	float4 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; r.w = a.w+b.w; return r;
+}
+inline float4 operator*(const float4& a, float fact)
+{
+	float4 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; r.w = a.w*fact; return r;
+}
+inline float4 operator*(float fact, float4& a)
+{
+	return (a * fact);
+}
+inline float4& operator*=(float4& a, float fact)
+{
+	a = fact * a;
+	return a;
+}
+inline float4& operator+=(float4& a, const float4& b)
+{
+	a = a + b;
+	return a;
+}
+
+inline float3 operator+(const float3& a, const float3& b)
+{
+	float3 r; r.x = a.x+b.x; r.y = a.y+b.y; r.z = a.z+b.z; return r;
+}
+inline float3 operator-(const float3& a, const float3& b)
+{
+	float3 r; r.x = a.x-b.x; r.y = a.y-b.y; r.z = a.z-b.z; return r;
+}
+static inline float bt3dGrid_dot(float3& a, float3& b)
+{
+	return a.x*b.x+a.y*b.y+a.z*b.z;
+}
+#define BT_GPU_dot(a,b) bt3dGrid_dot(a,b)
+
+static inline float bt3dGrid_dot4(float4& a, float4& b)
+{
+	return a.x*b.x+a.y*b.y+a.z*b.z+a.w*b.w;
+}
+#define BT_GPU_dot4(a,b) bt3dGrid_dot4(a,b)
+
+static inline float3 bt3dGrid_cross(const float3& a, const float3& b)
+{
+	float3 r; r.x = a.y*b.z-a.z*b.y; r.y = -a.x*b.z+a.z*b.x; r.z = a.x*b.y-a.y*b.x;	return r;
+}
+#define BT_GPU_cross(a,b) bt3dGrid_cross(a,b)
+
+
+inline float3 operator*(const float3& a, float fact)
+{
+	float3 r; r.x = a.x*fact; r.y = a.y*fact; r.z = a.z*fact; return r;
+}
+
+
+inline float3& operator+=(float3& a, const float3& b)
+{
+	a = a + b;
+	return a;
+}
+inline float3& operator-=(float3& a, const float3& b)
+{
+	a = a - b;
+	return a;
+}
+inline float3& operator*=(float3& a, float fact)
+{
+	a = a * fact;
+	return a;
+}
+inline float3 operator-(const float3& v)
+{
+	float3 r; r.x = -v.x; r.y = -v.y; r.z = -v.z; return r;
+}
+
+
+#define BT_GPU_FETCH(a, b) a[b]
+#define BT_GPU_FETCH4(a, b) a[b]
+#define BT_GPU_PREF(func) btGpu_##func
+#define BT_GPU_SAFE_CALL(func) func
+#define BT_GPU_Memset memset
+#define BT_GPU_MemcpyToSymbol(a, b, c) memcpy(&a, b, c)
+#define BT_GPU_BindTexture(a, b, c, d)
+#define BT_GPU_UnbindTexture(a)
+
+static uint2 s_blockIdx, s_blockDim, s_threadIdx;
+#define BT_GPU_blockIdx s_blockIdx
+#define BT_GPU_blockDim s_blockDim
+#define BT_GPU_threadIdx s_threadIdx
+#define BT_GPU_EXECKERNEL(numb, numt, kfunc, args) {s_blockDim.x=numt;for(int nb=0;nb<numb;nb++){s_blockIdx.x=nb;for(int nt=0;nt<numt;nt++){s_threadIdx.x=nt;kfunc args;}}}
+
+#define BT_GPU_CHECK_ERROR(s)
+
+
+#endif //BT_GPU_DEFINES_H
diff --git a/src/bullet/BulletMultiThreaded/btGpuUtilsSharedCode.h b/src/bullet/BulletMultiThreaded/btGpuUtilsSharedCode.h
new file mode 100644
index 00000000..5761e790
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btGpuUtilsSharedCode.h
@@ -0,0 +1,55 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//----------------------------------------------------------------------------------------
+
+// Shared code for GPU-based utilities
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//  Keep this file free from Bullet headers
+//  will be compiled by both CPU and CUDA compilers
+//	file with definitions of BT_GPU_xxx should be included first
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+//----------------------------------------------------------------------------------------
+
+#include "btGpuUtilsSharedDefs.h"
+
+//----------------------------------------------------------------------------------------
+
+extern "C"
+{
+
+//----------------------------------------------------------------------------------------
+
+//Round a / b to nearest higher integer value
+int BT_GPU_PREF(iDivUp)(int a, int b)
+{
+    return (a % b != 0) ? (a / b + 1) : (a / b);
+} // iDivUp()
+
+//----------------------------------------------------------------------------------------
+
+// compute grid and thread block size for a given number of elements
+void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads)
+{
+    numThreads = BT_GPU_min(blockSize, n);
+    numBlocks = BT_GPU_PREF(iDivUp)(n, numThreads);
+} // computeGridSize()
+
+//----------------------------------------------------------------------------------------
+
+} // extern "C"
+
diff --git a/src/bullet/BulletMultiThreaded/btGpuUtilsSharedDefs.h b/src/bullet/BulletMultiThreaded/btGpuUtilsSharedDefs.h
new file mode 100644
index 00000000..dccfda54
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btGpuUtilsSharedDefs.h
@@ -0,0 +1,52 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
+Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+// Shared definitions for GPU-based utilities
+
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//  Keep this file free from Bullet headers
+//  it is included into both CUDA and CPU code
+//	file with definitions of BT_GPU_xxx should be included first
+//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+
+#ifndef BTGPUUTILSDHAREDDEFS_H
+#define BTGPUUTILSDHAREDDEFS_H
+
+
+extern "C"
+{
+
+
+//Round a / b to nearest higher integer value
+int BT_GPU_PREF(iDivUp)(int a, int b);
+
+// compute grid and thread block size for a given number of elements
+void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &numThreads);
+
+void BT_GPU_PREF(allocateArray)(void** devPtr, unsigned int size);
+void BT_GPU_PREF(freeArray)(void* devPtr);
+void BT_GPU_PREF(copyArrayFromDevice)(void* host, const void* device, unsigned int size);
+void BT_GPU_PREF(copyArrayToDevice)(void* device, const void* host, unsigned int size);
+void BT_GPU_PREF(registerGLBufferObject(unsigned int vbo));
+void* BT_GPU_PREF(mapGLBufferObject(unsigned int vbo));
+void BT_GPU_PREF(unmapGLBufferObject(unsigned int vbo));
+
+
+} // extern "C"
+
+
+#endif // BTGPUUTILSDHAREDDEFS_H
+
diff --git a/src/bullet/BulletMultiThreaded/btParallelConstraintSolver.cpp b/src/bullet/BulletMultiThreaded/btParallelConstraintSolver.cpp
new file mode 100644
index 00000000..10164f8e
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btParallelConstraintSolver.cpp
@@ -0,0 +1,1391 @@
+/*
+   Copyright (C) 2010 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#include "btParallelConstraintSolver.h"
+#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "LinearMath/btPoolAllocator.h"
+
+#include "BulletMultiThreaded/vectormath2bullet.h"
+
+#include "LinearMath/btQuickprof.h"
+#include "BulletMultiThreaded/btThreadSupportInterface.h"
+#ifdef PFX_USE_FREE_VECTORMATH
+#include "vecmath/vmInclude.h"
+#else
+#include "vectormath/vmInclude.h"
+#endif //PFX_USE_FREE_VECTORMATH
+
+#include "HeapManager.h"
+
+#include "PlatformDefinitions.h"
+
+//#include "PfxSimdUtils.h"
+#include "LinearMath/btScalar.h"
+
+#include "TrbStateVec.h"
+
+
+
+/////////////////
+
+
+#define TMP_BUFF_BYTES (15*1024*1024)
+unsigned char ATTRIBUTE_ALIGNED128(tmp_buff[TMP_BUFF_BYTES]);
+
+
+
+// Project Gauss Seidel or the equivalent Sequential Impulse
+ inline void resolveSingleConstraintRowGeneric(PfxSolverBody& body1,PfxSolverBody& body2,const btSolverConstraint& c)
+{
+
+	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
+	const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(getBtVector3(body1.mDeltaLinearVelocity)) 	+ c.m_relpos1CrossNormal.dot(getBtVector3(body1.mDeltaAngularVelocity));
+	const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(getBtVector3(body2.mDeltaLinearVelocity)) + c.m_relpos2CrossNormal.dot(getBtVector3(body2.mDeltaAngularVelocity));
+
+//	const btScalar delta_rel_vel	=	deltaVel1Dotn-deltaVel2Dotn;
+	deltaImpulse	-=	deltaVel1Dotn*c.m_jacDiagABInv;
+	deltaImpulse	-=	deltaVel2Dotn*c.m_jacDiagABInv;
+
+	const btScalar sum = btScalar(c.m_appliedImpulse) + deltaImpulse;
+	if (sum < c.m_lowerLimit)
+	{
+		deltaImpulse = c.m_lowerLimit-c.m_appliedImpulse;
+		c.m_appliedImpulse = c.m_lowerLimit;
+	}
+	else if (sum > c.m_upperLimit) 
+	{
+		deltaImpulse = c.m_upperLimit-c.m_appliedImpulse;
+		c.m_appliedImpulse = c.m_upperLimit;
+	}
+	else
+	{
+		c.m_appliedImpulse = sum;
+	}
+	
+
+	if (body1.mMassInv)
+	{
+		btVector3 linearComponent = c.m_contactNormal*body1.mMassInv;
+		body1.mDeltaLinearVelocity += vmVector3(linearComponent.getX()*deltaImpulse,linearComponent.getY()*deltaImpulse,linearComponent.getZ()*deltaImpulse);
+		btVector3 tmp=c.m_angularComponentA*(btVector3(deltaImpulse,deltaImpulse,deltaImpulse));
+		body1.mDeltaAngularVelocity += vmVector3(tmp.getX(),tmp.getY(),tmp.getZ());
+	}
+
+	if (body2.mMassInv)
+	{
+		btVector3 linearComponent = -c.m_contactNormal*body2.mMassInv;
+		body2.mDeltaLinearVelocity += vmVector3(linearComponent.getX()*deltaImpulse,linearComponent.getY()*deltaImpulse,linearComponent.getZ()*deltaImpulse);
+		btVector3 tmp = c.m_angularComponentB*((btVector3(deltaImpulse,deltaImpulse,deltaImpulse)));//*m_angularFactor);
+		body2.mDeltaAngularVelocity += vmVector3(tmp.getX(),tmp.getY(),tmp.getZ());
+	}
+
+	//body1.internalApplyImpulse(c.m_contactNormal*body1.internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
+	//body2.internalApplyImpulse(-c.m_contactNormal*body2.internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
+
+}
+
+ 
+static SIMD_FORCE_INLINE
+void pfxSolveLinearConstraintRow(btConstraintRow &constraint,
+	vmVector3 &deltaLinearVelocityA,vmVector3 &deltaAngularVelocityA,
+	float massInvA,const vmMatrix3 &inertiaInvA,const vmVector3 &rA,
+	vmVector3 &deltaLinearVelocityB,vmVector3 &deltaAngularVelocityB,
+	float massInvB,const vmMatrix3 &inertiaInvB,const vmVector3 &rB)
+{
+	const vmVector3 normal(btReadVector3(constraint.m_normal));
+	btScalar deltaImpulse = constraint.m_rhs;
+	vmVector3 dVA = deltaLinearVelocityA + cross(deltaAngularVelocityA,rA);
+	vmVector3 dVB = deltaLinearVelocityB + cross(deltaAngularVelocityB,rB);
+	deltaImpulse -= constraint.m_jacDiagInv * dot(normal,dVA-dVB);
+	btScalar oldImpulse = constraint.m_accumImpulse;
+	constraint.m_accumImpulse = btClamped(oldImpulse + deltaImpulse,constraint.m_lowerLimit,constraint.m_upperLimit);
+	deltaImpulse = constraint.m_accumImpulse - oldImpulse;
+	deltaLinearVelocityA += deltaImpulse * massInvA * normal;
+	deltaAngularVelocityA += deltaImpulse * inertiaInvA * cross(rA,normal);
+	deltaLinearVelocityB -= deltaImpulse * massInvB * normal;
+	deltaAngularVelocityB -= deltaImpulse * inertiaInvB * cross(rB,normal);
+
+}
+ 
+void btSolveContactConstraint(
+	btConstraintRow &constraintResponse,
+	btConstraintRow &constraintFriction1,
+	btConstraintRow &constraintFriction2,
+	const vmVector3 &contactPointA,
+	const vmVector3 &contactPointB,
+	PfxSolverBody &solverBodyA,
+	PfxSolverBody &solverBodyB,
+	float friction
+	)
+{
+	vmVector3 rA = rotate(solverBodyA.mOrientation,contactPointA);
+	vmVector3 rB = rotate(solverBodyB.mOrientation,contactPointB);
+
+	pfxSolveLinearConstraintRow(constraintResponse,
+		solverBodyA.mDeltaLinearVelocity,solverBodyA.mDeltaAngularVelocity,solverBodyA.mMassInv,solverBodyA.mInertiaInv,rA,
+		solverBodyB.mDeltaLinearVelocity,solverBodyB.mDeltaAngularVelocity,solverBodyB.mMassInv,solverBodyB.mInertiaInv,rB);
+
+	float mf = friction*fabsf(constraintResponse.m_accumImpulse);
+	constraintFriction1.m_lowerLimit = -mf;
+	constraintFriction1.m_upperLimit =  mf;
+	constraintFriction2.m_lowerLimit = -mf;
+	constraintFriction2.m_upperLimit =  mf;
+
+	pfxSolveLinearConstraintRow(constraintFriction1,
+		solverBodyA.mDeltaLinearVelocity,solverBodyA.mDeltaAngularVelocity,solverBodyA.mMassInv,solverBodyA.mInertiaInv,rA,
+		solverBodyB.mDeltaLinearVelocity,solverBodyB.mDeltaAngularVelocity,solverBodyB.mMassInv,solverBodyB.mInertiaInv,rB);
+
+	pfxSolveLinearConstraintRow(constraintFriction2,
+		solverBodyA.mDeltaLinearVelocity,solverBodyA.mDeltaAngularVelocity,solverBodyA.mMassInv,solverBodyA.mInertiaInv,rA,
+		solverBodyB.mDeltaLinearVelocity,solverBodyB.mDeltaAngularVelocity,solverBodyB.mMassInv,solverBodyB.mInertiaInv,rB);
+}
+
+
+void CustomSolveConstraintsTaskParallel(
+	const PfxParallelGroup *contactParallelGroup,const PfxParallelBatch *contactParallelBatches,
+	PfxConstraintPair *contactPairs,uint32_t numContactPairs,
+	btPersistentManifold* offsetContactManifolds,
+	const PfxParallelGroup *jointParallelGroup,const PfxParallelBatch *jointParallelBatches,
+	PfxConstraintPair *jointPairs,uint32_t numJointPairs,
+	btSolverConstraint* offsetSolverConstraints,
+	TrbState *offsetRigStates,
+	PfxSolverBody *offsetSolverBodies,
+	uint32_t numRigidBodies,
+	int iteration,unsigned int taskId,unsigned int numTasks,btBarrier *barrier)
+{
+
+	PfxSolverBody staticBody;
+	staticBody.mMassInv = 0.f;
+	staticBody.mDeltaAngularVelocity=vmVector3(0,0,0);
+	staticBody.mDeltaLinearVelocity =vmVector3(0,0,0);
+
+
+	for(int k=0;k<iteration+1;k++) {
+		// Joint
+		for(uint32_t phaseId=0;phaseId<jointParallelGroup->numPhases;phaseId++) {
+			for(uint32_t batchId=0;batchId<jointParallelGroup->numBatches[phaseId];batchId++) {
+				uint32_t numPairs = jointParallelGroup->numPairs[phaseId*PFX_MAX_SOLVER_BATCHES+batchId];
+				if(batchId%numTasks == taskId && numPairs > 0) {
+					const PfxParallelBatch &batch = jointParallelBatches[phaseId*PFX_MAX_SOLVER_BATCHES+batchId];
+					for(uint32_t i=0;i<numPairs;i++) {
+						PfxConstraintPair &pair = jointPairs[batch.pairIndices[i]];
+						uint16_t iA = pfxGetRigidBodyIdA(pair);
+						uint16_t iB = pfxGetRigidBodyIdB(pair);
+
+						
+						PfxSolverBody &solverBodyA = iA != 65535 ? offsetSolverBodies[iA] : staticBody;
+						PfxSolverBody &solverBodyB = iB != 65535 ? offsetSolverBodies[iB] : staticBody;
+
+						if(k==0) {
+							
+						}
+						else {
+							btSolverConstraint* constraintRow = &offsetSolverConstraints[pfxGetContactId1(pair)];
+							int numRows = pfxGetNumConstraints(pair);
+							int i;
+							for (i=0;i<numRows;i++)
+							{
+								resolveSingleConstraintRowGeneric(solverBodyA,solverBodyB,constraintRow[i]);
+							}
+							
+						}
+					}
+				}
+			}
+
+			barrier->sync();
+		}
+
+		// Contact
+		for(uint32_t phaseId=0;phaseId<contactParallelGroup->numPhases;phaseId++) {
+			for(uint32_t batchId=0;batchId<contactParallelGroup->numBatches[phaseId];batchId++) {
+				uint32_t numPairs = contactParallelGroup->numPairs[phaseId*PFX_MAX_SOLVER_BATCHES+batchId];
+				if(batchId%numTasks == taskId && numPairs > 0) {
+					const PfxParallelBatch &batch = contactParallelBatches[phaseId*PFX_MAX_SOLVER_BATCHES+batchId];
+					for(uint32_t i=0;i<numPairs;i++) {
+						PfxConstraintPair &pair = contactPairs[batch.pairIndices[i]];
+						uint16_t iA = pfxGetRigidBodyIdA(pair);
+						uint16_t iB = pfxGetRigidBodyIdB(pair);
+
+						btPersistentManifold& contact = offsetContactManifolds[pfxGetConstraintId1(pair)];
+
+
+						PfxSolverBody &solverBodyA = offsetSolverBodies[iA];
+						PfxSolverBody &solverBodyB = offsetSolverBodies[iB];
+						
+						for(int j=0;j<contact.getNumContacts();j++) {
+							btManifoldPoint& cp = contact.getContactPoint(j);
+							
+							if(k==0) {
+								vmVector3 rA = rotate(solverBodyA.mOrientation,btReadVector3(cp.m_localPointA));
+								vmVector3 rB = rotate(solverBodyB.mOrientation,btReadVector3(cp.m_localPointB));
+								
+								for(int k=0;k<3;k++) {
+									vmVector3 normal = btReadVector3(cp.mConstraintRow[k].m_normal);
+									float deltaImpulse = cp.mConstraintRow[k].m_accumImpulse;
+									solverBodyA.mDeltaLinearVelocity += deltaImpulse * solverBodyA.mMassInv * normal;
+									solverBodyA.mDeltaAngularVelocity += deltaImpulse * solverBodyA.mInertiaInv * cross(rA,normal);
+									solverBodyB.mDeltaLinearVelocity -= deltaImpulse * solverBodyB.mMassInv * normal;
+									solverBodyB.mDeltaAngularVelocity -= deltaImpulse * solverBodyB.mInertiaInv * cross(rB,normal);
+								}
+							}
+							else {
+								btSolveContactConstraint(
+									cp.mConstraintRow[0],
+									cp.mConstraintRow[1],
+									cp.mConstraintRow[2],
+									btReadVector3(cp.m_localPointA),
+									btReadVector3(cp.m_localPointB),
+									solverBodyA,
+									solverBodyB,
+									cp.m_combinedFriction
+									);
+							}
+						}
+					}
+				}
+			}
+
+			if (barrier)
+				barrier->sync();
+		}
+	}
+}
+
+void CustomPostSolverTask(
+	TrbState *states,
+	PfxSolverBody *solverBodies,
+	uint32_t numRigidBodies)
+{
+	for(uint32_t i=0;i<numRigidBodies;i++) {
+		TrbState &state = states[i];
+		PfxSolverBody &solverBody = solverBodies[i];
+		state.setLinearVelocity(state.getLinearVelocity()+solverBody.mDeltaLinearVelocity);
+		state.setAngularVelocity(state.getAngularVelocity()+solverBody.mDeltaAngularVelocity);
+	}
+}
+
+void*	SolverlsMemoryFunc()
+{
+	//don't create local store memory, just return 0
+	return 0;
+}
+
+
+static SIMD_FORCE_INLINE
+void pfxGetPlaneSpace(const vmVector3& n, vmVector3& p, vmVector3& q)
+{
+	if(fabsf(n[2]) > 0.707f) {
+		// choose p in y-z plane
+		float a = n[1]*n[1] + n[2]*n[2];
+		float k = 1.0f/sqrtf(a);
+		p[0] = 0;
+		p[1] = -n[2]*k;
+		p[2] = n[1]*k;
+		// set q = n x p
+		q[0] = a*k;
+		q[1] = -n[0]*p[2];
+		q[2] = n[0]*p[1];
+	}
+	else {
+		// choose p in x-y plane
+		float a = n[0]*n[0] + n[1]*n[1];
+		float k = 1.0f/sqrtf(a);
+		p[0] = -n[1]*k;
+		p[1] = n[0]*k;
+		p[2] = 0;
+		// set q = n x p
+		q[0] = -n[2]*p[1];
+		q[1] = n[2]*p[0];
+		q[2] = a*k;
+	}
+}
+
+
+
+#define PFX_CONTACT_SLOP 0.001f
+
+void btSetupContactConstraint(
+	btConstraintRow &constraintResponse,
+	btConstraintRow &constraintFriction1,
+	btConstraintRow &constraintFriction2,
+	float penetrationDepth,
+	float restitution,
+	float friction,
+	const vmVector3 &contactNormal,
+	const vmVector3 &contactPointA,
+	const vmVector3 &contactPointB,
+	const TrbState &stateA,
+	const TrbState &stateB,
+	PfxSolverBody &solverBodyA,
+	PfxSolverBody &solverBodyB,
+	float separateBias,
+	float timeStep
+	)
+{
+	vmVector3 rA = rotate(solverBodyA.mOrientation,contactPointA);
+	vmVector3 rB = rotate(solverBodyB.mOrientation,contactPointB);
+
+	vmMatrix3 K = vmMatrix3::scale(vmVector3(solverBodyA.mMassInv + solverBodyB.mMassInv)) - 
+			crossMatrix(rA) * solverBodyA.mInertiaInv * crossMatrix(rA) - 
+			crossMatrix(rB) * solverBodyB.mInertiaInv * crossMatrix(rB);
+
+	vmVector3 vA = stateA.getLinearVelocity() + cross(stateA.getAngularVelocity(),rA);
+	vmVector3 vB = stateB.getLinearVelocity() + cross(stateB.getAngularVelocity(),rB);
+	vmVector3 vAB = vA-vB;
+
+	vmVector3 tangent1,tangent2;
+	btPlaneSpace1(contactNormal,tangent1,tangent2);
+
+//	constraintResponse.m_accumImpulse = 0.f;
+//	constraintFriction1.m_accumImpulse = 0.f;
+//	constraintFriction2.m_accumImpulse = 0.f;
+
+	// Contact Constraint
+	{
+		vmVector3 normal = contactNormal;
+
+		float denom = dot(K*normal,normal);
+
+		constraintResponse.m_rhs = -(1.0f+restitution)*dot(vAB,normal); // velocity error
+		constraintResponse.m_rhs -= (separateBias * btMin(0.0f,penetrationDepth+PFX_CONTACT_SLOP)) / timeStep; // position error
+		constraintResponse.m_rhs /= denom;
+		constraintResponse.m_jacDiagInv = 1.0f/denom;
+		constraintResponse.m_lowerLimit = 0.0f;
+		constraintResponse.m_upperLimit = SIMD_INFINITY;
+		btStoreVector3(normal,constraintResponse.m_normal);
+	}
+
+	// Friction Constraint 1
+	{
+		vmVector3 normal = tangent1;
+
+		float denom = dot(K*normal,normal);
+
+		constraintFriction1.m_jacDiagInv = 1.0f/denom;
+		constraintFriction1.m_rhs = -dot(vAB,normal);
+		constraintFriction1.m_rhs *= constraintFriction1.m_jacDiagInv;
+		constraintFriction1.m_lowerLimit = 0.0f;
+		constraintFriction1.m_upperLimit = SIMD_INFINITY;
+		btStoreVector3(normal,constraintFriction1.m_normal);
+	}
+	
+	// Friction Constraint 2
+	{
+		vmVector3 normal = tangent2;
+
+		float denom = dot(K*normal,normal);
+
+		constraintFriction2.m_jacDiagInv = 1.0f/denom;
+		constraintFriction2.m_rhs = -dot(vAB,normal);
+		constraintFriction2.m_rhs *= constraintFriction2.m_jacDiagInv;
+		constraintFriction2.m_lowerLimit = 0.0f;
+		constraintFriction2.m_upperLimit = SIMD_INFINITY;
+		btStoreVector3(normal,constraintFriction2.m_normal);
+	}
+}
+
+
+void CustomSetupContactConstraintsTask(
+	PfxConstraintPair *contactPairs,uint32_t numContactPairs,
+	btPersistentManifold*	offsetContactManifolds,
+	TrbState *offsetRigStates,
+	PfxSolverBody *offsetSolverBodies,
+	uint32_t numRigidBodies,
+	float separateBias,
+	float timeStep)
+{
+	for(uint32_t i=0;i<numContactPairs;i++) {
+		PfxConstraintPair &pair = contactPairs[i];
+		if(!pfxGetActive(pair) || pfxGetNumConstraints(pair) == 0 ||
+			((pfxGetMotionMaskA(pair)&PFX_MOTION_MASK_STATIC) && (pfxGetMotionMaskB(pair)&PFX_MOTION_MASK_STATIC)) ) {
+			continue;
+		}
+
+		uint16_t iA = pfxGetRigidBodyIdA(pair);
+		uint16_t iB = pfxGetRigidBodyIdB(pair);
+
+		int id = pfxGetConstraintId1(pair);
+		btPersistentManifold& contact = offsetContactManifolds[id];
+
+
+		TrbState &stateA = offsetRigStates[iA];
+//		PfxRigBody &bodyA = offsetRigBodies[iA];
+		PfxSolverBody &solverBodyA = offsetSolverBodies[iA];
+
+		TrbState &stateB = offsetRigStates[iB];
+//		PfxRigBody &bodyB = offsetRigBodies[iB];
+		PfxSolverBody &solverBodyB = offsetSolverBodies[iB];
+		
+		float restitution = 0.5f * (solverBodyA.restitution + solverBodyB.restitution);
+		//if(contact.getDuration() > 1) restitution = 0.0f;
+		
+		float friction = sqrtf(solverBodyA.friction * solverBodyB.friction);
+
+		for(int j=0;j<contact.getNumContacts();j++) {
+			btManifoldPoint& cp = contact.getContactPoint(j);
+			
+			btSetupContactConstraint(
+				cp.mConstraintRow[0],
+				cp.mConstraintRow[1],
+				cp.mConstraintRow[2],
+				cp.getDistance(),
+				restitution,
+				friction,
+				btReadVector3(cp.m_normalWorldOnB),//.mConstraintRow[0].m_normal),
+				btReadVector3(cp.m_localPointA),
+				btReadVector3(cp.m_localPointB),
+				stateA,
+				stateB,
+				solverBodyA,
+				solverBodyB,
+				separateBias,
+				timeStep
+				);
+		}
+
+		//contact.setCompositeFriction(friction);
+	}
+}
+
+void	SolverThreadFunc(void* userPtr,void* lsMemory)
+{
+	btConstraintSolverIO* io = (btConstraintSolverIO*)(userPtr);//arg->io);
+	btCriticalSection* criticalsection = io->setupContactConstraints.criticalSection;
+	
+
+	//CustomCriticalSection *criticalsection = &io->m_cs;
+	switch(io->cmd) {
+
+		case PFX_CONSTRAINT_SOLVER_CMD_SOLVE_CONSTRAINTS:
+		CustomSolveConstraintsTaskParallel(
+			io->solveConstraints.contactParallelGroup,
+			io->solveConstraints.contactParallelBatches,
+			io->solveConstraints.contactPairs,
+			io->solveConstraints.numContactPairs,
+			io->solveConstraints.offsetContactManifolds,
+
+			io->solveConstraints.jointParallelGroup,
+			io->solveConstraints.jointParallelBatches,
+			io->solveConstraints.jointPairs,
+			io->solveConstraints.numJointPairs,
+			io->solveConstraints.offsetSolverConstraints,
+			io->solveConstraints.offsetRigStates1,
+			io->solveConstraints.offsetSolverBodies,
+			io->solveConstraints.numRigidBodies,
+			io->solveConstraints.iteration,
+
+			io->solveConstraints.taskId,
+			io->maxTasks1,
+			io->solveConstraints.barrier
+			);
+		break;
+
+		case PFX_CONSTRAINT_SOLVER_CMD_POST_SOLVER:
+			CustomPostSolverTask(	io->postSolver.states,io->postSolver.solverBodies,	io->postSolver.numRigidBodies);
+			break;
+
+
+		case PFX_CONSTRAINT_SOLVER_CMD_SETUP_CONTACT_CONSTRAINTS:
+		{
+			bool empty = false;
+			while(!empty) {
+				int start,batch;
+				
+				criticalsection->lock();
+
+				start = (int)criticalsection->getSharedParam(0);
+				batch = (int)criticalsection->getSharedParam(1);
+
+				//PFX_PRINTF("taskId %d start %d num %d\n",arg->taskId,start,batch);
+
+				// ���̃o�b�t�@���Z�b�g
+				int nextStart = start + batch;
+				int rest = btMax((int)io->setupContactConstraints.numContactPairs1 - nextStart,0);
+				int nextBatch = (rest > batch)?batch:rest;
+
+				criticalsection->setSharedParam(0,nextStart);
+                criticalsection->setSharedParam(1,nextBatch);
+
+				criticalsection->unlock();
+				
+				if(batch > 0) {
+					CustomSetupContactConstraintsTask(
+						io->setupContactConstraints.offsetContactPairs+start,batch,
+						io->setupContactConstraints.offsetContactManifolds,
+						io->setupContactConstraints.offsetRigStates,
+//						io->setupContactConstraints.offsetRigBodies,
+						io->setupContactConstraints.offsetSolverBodies,
+						io->setupContactConstraints.numRigidBodies,
+						io->setupContactConstraints.separateBias,
+						io->setupContactConstraints.timeStep);
+				}
+				else {
+					empty = true;
+				}
+			}
+		}
+		break;
+
+		default:
+			{
+				btAssert(0);
+			}
+	}
+
+}
+
+
+void CustomSetupContactConstraintsNew(
+	PfxConstraintPair *contactPairs1,uint32_t numContactPairs,
+	btPersistentManifold *offsetContactManifolds,
+	TrbState *offsetRigStates,
+	PfxSolverBody *offsetSolverBodies,
+	uint32_t numRigidBodies,
+	float separationBias,
+	float timeStep,
+	class btThreadSupportInterface* threadSupport,
+	btCriticalSection* criticalSection,
+	btConstraintSolverIO *io 
+	)
+{
+	int maxTasks = threadSupport->getNumTasks();
+
+	int div = (int)maxTasks * 4;
+	int batch = ((int)numContactPairs + div - 1) / div;
+#ifdef __PPU__
+		BulletPE2ConstraintSolverSpursSupport* spursThread = (BulletPE2ConstraintSolverSpursSupport*) threadSupport;
+#endif
+	if (criticalSection)
+	{
+		criticalSection->setSharedParam(0,0);
+		criticalSection->setSharedParam(1,btMin(batch,64)); // batched number
+	} else
+	{
+#ifdef __PPU__
+		spursThread->setSharedParam(0,0);
+		spursThread->setSharedParam(1,btMin(batch,64)); // batched number
+#endif //__PPU__
+	}
+
+	for(int t=0;t<maxTasks;t++) {
+		io[t].cmd = PFX_CONSTRAINT_SOLVER_CMD_SETUP_CONTACT_CONSTRAINTS;
+		io[t].setupContactConstraints.offsetContactPairs = contactPairs1;
+		io[t].setupContactConstraints.numContactPairs1 = numContactPairs;
+		io[t].setupContactConstraints.offsetRigStates = offsetRigStates;
+		io[t].setupContactConstraints.offsetContactManifolds = offsetContactManifolds;		
+		io[t].setupContactConstraints.offsetSolverBodies = offsetSolverBodies;
+		io[t].setupContactConstraints.numRigidBodies = numRigidBodies;
+		io[t].setupContactConstraints.separateBias = separationBias;
+		io[t].setupContactConstraints.timeStep = timeStep;
+		io[t].setupContactConstraints.criticalSection = criticalSection;
+		io[t].maxTasks1 = maxTasks;
+#ifdef __PPU__
+		io[t].barrierAddr2 = (unsigned int)spursThread->getBarrierAddress();
+		io[t].criticalsectionAddr2 = (unsigned int)spursThread->getCriticalSectionAddress();
+#endif
+	
+
+//#define SEQUENTIAL_SETUP
+#ifdef SEQUENTIAL_SETUP
+		CustomSetupContactConstraintsTask(contactPairs1,numContactPairs,offsetContactManifolds,offsetRigStates,offsetSolverBodies,numRigidBodies,separationBias,timeStep);
+#else
+		threadSupport->sendRequest(1,(ppu_address_t)&io[t],t);
+#endif
+
+	}
+#ifndef SEQUENTIAL_SETUP
+	unsigned int arg0,arg1;
+	for(int t=0;t<maxTasks;t++) {
+		arg0 = t;
+		threadSupport->waitForResponse(&arg0,&arg1);
+	}
+#endif //SEQUENTIAL_SETUP
+
+}
+
+
+void CustomSplitConstraints(
+	PfxConstraintPair *pairs,uint32_t numPairs,
+	PfxParallelGroup &group,PfxParallelBatch *batches,
+	uint32_t numTasks,
+	uint32_t numRigidBodies,
+	void *poolBuff,
+	uint32_t poolBytes
+	)
+{
+	HeapManager pool((unsigned char*)poolBuff,poolBytes);
+
+	// �X�e�[�g�`�F�b�N�p�r�b�g�t���O�e�[�u��
+	int bufSize = sizeof(uint8_t)*numRigidBodies;
+	bufSize = ((bufSize+127)>>7)<<7; // 128 bytes alignment
+	uint8_t *bodyTable = (uint8_t*)pool.allocate(bufSize,HeapManager::ALIGN128);
+
+	// �y�A�`�F�b�N�p�r�b�g�t���O�e�[�u��
+	uint32_t *pairTable;
+	size_t allocSize = sizeof(uint32_t)*((numPairs+31)/32);
+	pairTable = (uint32_t*)pool.allocate(allocSize);
+	memset(pairTable,0,allocSize);
+
+	// �ڕW�Ƃ��镪����
+	uint32_t targetCount = btMax(uint32_t(PFX_MIN_SOLVER_PAIRS),btMin(numPairs / (numTasks*2),uint32_t(PFX_MAX_SOLVER_PAIRS)));
+	uint32_t startIndex = 0;
+	
+	uint32_t phaseId;
+	uint32_t batchId;
+	uint32_t totalCount=0;
+	
+	uint32_t maxBatches = btMin(numTasks,uint32_t(PFX_MAX_SOLVER_BATCHES));
+	
+	for(phaseId=0;phaseId<PFX_MAX_SOLVER_PHASES&&totalCount<numPairs;phaseId++) {
+		bool startIndexCheck = true;
+		
+		group.numBatches[phaseId] = 0;
+		
+		uint32_t i = startIndex;
+		
+        // �`�F�b�N�p�r�b�g�t���O�e�[�u�����N���A
+		memset(bodyTable,0xff,bufSize);
+		
+		for(batchId=0;i<numPairs&&totalCount<numPairs&&batchId<maxBatches;batchId++) {
+			uint32_t pairCount=0;
+			
+			PfxParallelBatch &batch = batches[phaseId*PFX_MAX_SOLVER_BATCHES+batchId];
+			uint32_t pairId = 0;
+			
+			for(;i<numPairs&&pairCount<targetCount;i++) {
+				uint32_t idxP = i>>5;
+				uint32_t maskP = 1L << (i & 31);
+				
+				//pair is already assigned to a phase/batch
+				if(pairTable[idxP] & maskP) {
+					continue;
+				}
+				
+				uint32_t idxA = pfxGetRigidBodyIdA(pairs[i]);
+				uint32_t idxB = pfxGetRigidBodyIdB(pairs[i]);
+
+				// �����Ƃ��A�N�e�B�u�łȂ��A�܂��͏Փ˓_���O�̃y�A�͓o�^�Ώۂ���͂���
+				if(!pfxGetActive(pairs[i]) || pfxGetNumConstraints(pairs[i]) == 0 ||
+					((pfxGetMotionMaskA(pairs[i])&PFX_MOTION_MASK_STATIC) && (pfxGetMotionMaskB(pairs[i])&PFX_MOTION_MASK_STATIC)) ) {
+					if(startIndexCheck) 
+						startIndex++;
+					//assign pair -> skip it because it has no constraints
+					pairTable[idxP] |= maskP;
+					totalCount++;
+					continue;
+				}
+				
+				// �ˑ����̃`�F�b�N
+				if( (bodyTable[idxA] != batchId && bodyTable[idxA] != 0xff) || 
+					(bodyTable[idxB] != batchId && bodyTable[idxB] != 0xff) ) {
+					startIndexCheck = false;
+					//bodies of the pair are already assigned to another batch within this phase
+					continue;
+				}
+				
+				// �ˑ�������e�[�u���ɓo�^
+				if(pfxGetMotionMaskA(pairs[i])&PFX_MOTION_MASK_DYNAMIC) 
+						bodyTable[idxA] = batchId;
+				if(pfxGetMotionMaskB(pairs[i])&PFX_MOTION_MASK_DYNAMIC) 
+						bodyTable[idxB] = batchId;
+				
+				if(startIndexCheck) 
+					startIndex++;
+				
+				pairTable[idxP] |= maskP;
+				//add the pair 'i' to the current batch
+				batch.pairIndices[pairId++] = i;
+				pairCount++;
+			}
+
+			group.numPairs[phaseId*PFX_MAX_SOLVER_BATCHES+batchId] = (uint16_t)pairId;
+			totalCount += pairCount;
+		}
+
+		group.numBatches[phaseId] = batchId;
+	}
+
+	group.numPhases = phaseId;
+
+	pool.clear();
+}
+
+
+
+void CustomSolveConstraintsParallel(
+	PfxConstraintPair *contactPairs,uint32_t numContactPairs,
+	
+	PfxConstraintPair *jointPairs,uint32_t numJointPairs,
+	btPersistentManifold* offsetContactManifolds,
+	btSolverConstraint* offsetSolverConstraints,
+	TrbState *offsetRigStates,
+	PfxSolverBody *offsetSolverBodies,
+	uint32_t numRigidBodies,
+	struct btConstraintSolverIO* io,
+	class btThreadSupportInterface* threadSupport,
+	int iteration,
+	void* poolBuf,
+	int poolBytes,
+	class btBarrier* barrier)
+	{
+
+	int maxTasks = threadSupport->getNumTasks();
+//	config.taskManager->setTaskEntry(PFX_SOLVER_ENTRY);
+
+	HeapManager pool((unsigned char*)poolBuf,poolBytes);
+
+	{
+		PfxParallelGroup *cgroup = (PfxParallelGroup*)pool.allocate(sizeof(PfxParallelGroup));
+		PfxParallelBatch *cbatches = (PfxParallelBatch*)pool.allocate(sizeof(PfxParallelBatch)*(PFX_MAX_SOLVER_PHASES*PFX_MAX_SOLVER_BATCHES),128);
+		PfxParallelGroup *jgroup = (PfxParallelGroup*)pool.allocate(sizeof(PfxParallelGroup));
+		PfxParallelBatch *jbatches = (PfxParallelBatch*)pool.allocate(sizeof(PfxParallelBatch)*(PFX_MAX_SOLVER_PHASES*PFX_MAX_SOLVER_BATCHES),128);
+		
+		uint32_t tmpBytes = poolBytes - 2 * (sizeof(PfxParallelGroup) + sizeof(PfxParallelBatch)*(PFX_MAX_SOLVER_PHASES*PFX_MAX_SOLVER_BATCHES) + 128);
+		void *tmpBuff = pool.allocate(tmpBytes);
+		
+		{
+			BT_PROFILE("CustomSplitConstraints");
+			CustomSplitConstraints(contactPairs,numContactPairs,*cgroup,cbatches,maxTasks,numRigidBodies,tmpBuff,tmpBytes);
+			CustomSplitConstraints(jointPairs,numJointPairs,*jgroup,jbatches,maxTasks,numRigidBodies,tmpBuff,tmpBytes);
+		}
+
+		{
+			BT_PROFILE("PFX_CONSTRAINT_SOLVER_CMD_SOLVE_CONSTRAINTS");
+//#define SOLVE_SEQUENTIAL
+#ifdef SOLVE_SEQUENTIAL
+		CustomSolveConstraintsTask(
+			io->solveConstraints.contactParallelGroup,
+			io->solveConstraints.contactParallelBatches,
+			io->solveConstraints.contactPairs,
+			io->solveConstraints.numContactPairs,
+			io->solveConstraints.offsetContactManifolds,
+
+			io->solveConstraints.jointParallelGroup,
+			io->solveConstraints.jointParallelBatches,
+			io->solveConstraints.jointPairs,
+			io->solveConstraints.numJointPairs,
+			io->solveConstraints.offsetJoints,
+
+			io->solveConstraints.offsetRigStates,
+			io->solveConstraints.offsetSolverBodies,
+			io->solveConstraints.numRigidBodies,
+			io->solveConstraints.iteration,0,1,0);//arg->taskId,1,0);//,arg->maxTasks,arg->barrier);
+#else
+		for(int t=0;t<maxTasks;t++) {
+			io[t].cmd = PFX_CONSTRAINT_SOLVER_CMD_SOLVE_CONSTRAINTS;
+			io[t].solveConstraints.contactParallelGroup = cgroup;
+			io[t].solveConstraints.contactParallelBatches = cbatches;
+			io[t].solveConstraints.contactPairs = contactPairs;
+			io[t].solveConstraints.numContactPairs = numContactPairs;
+			io[t].solveConstraints.offsetContactManifolds = offsetContactManifolds;
+			io[t].solveConstraints.jointParallelGroup = jgroup;
+			io[t].solveConstraints.jointParallelBatches = jbatches;
+			io[t].solveConstraints.jointPairs = jointPairs;
+			io[t].solveConstraints.numJointPairs = numJointPairs;
+			io[t].solveConstraints.offsetSolverConstraints = offsetSolverConstraints;
+			io[t].solveConstraints.offsetRigStates1 = offsetRigStates;
+			io[t].solveConstraints.offsetSolverBodies = offsetSolverBodies;
+			io[t].solveConstraints.numRigidBodies = numRigidBodies;
+			io[t].solveConstraints.iteration = iteration;
+			io[t].solveConstraints.taskId = t;
+			io[t].solveConstraints.barrier = barrier;
+
+		io[t].maxTasks1 = maxTasks;
+#ifdef __PPU__
+		BulletPE2ConstraintSolverSpursSupport* spursThread = (BulletPE2ConstraintSolverSpursSupport*) threadSupport;
+		io[t].barrierAddr2 = (unsigned int) spursThread->getBarrierAddress();
+		io[t].criticalsectionAddr2 = (unsigned int)spursThread->getCriticalSectionAddress();
+#endif
+
+			threadSupport->sendRequest(1,(ppu_address_t)&io[t],t);
+		}
+
+		unsigned int arg0,arg1;
+		for(int t=0;t<maxTasks;t++) {
+			arg0 = t;
+			threadSupport->waitForResponse(&arg0,&arg1);
+		}
+#endif
+		}
+		pool.clear();
+	}
+
+	{
+			BT_PROFILE("PFX_CONSTRAINT_SOLVER_CMD_POST_SOLVER");
+		int batch = ((int)numRigidBodies + maxTasks - 1) / maxTasks;
+		int rest = (int)numRigidBodies;
+		int start = 0;
+
+		for(int t=0;t<maxTasks;t++) {
+			int num = (rest - batch ) > 0 ? batch : rest;
+			io[t].cmd = PFX_CONSTRAINT_SOLVER_CMD_POST_SOLVER;
+			io[t].postSolver.states = offsetRigStates + start;
+			io[t].postSolver.solverBodies = offsetSolverBodies + start;
+			io[t].postSolver.numRigidBodies = (uint32_t)num;
+		io[t].maxTasks1 = maxTasks;
+#ifdef __PPU__
+		BulletPE2ConstraintSolverSpursSupport* spursThread = (BulletPE2ConstraintSolverSpursSupport*) threadSupport;
+		io[t].barrierAddr2 = (unsigned int)spursThread->getBarrierAddress();
+		io[t].criticalsectionAddr2 = (unsigned int)spursThread->getCriticalSectionAddress();
+#endif
+
+#ifdef SOLVE_SEQUENTIAL
+			CustomPostSolverTask(	io[t].postSolver.states,io[t].postSolver.solverBodies,	io[t].postSolver.numRigidBodies);
+#else
+			threadSupport->sendRequest(1,(ppu_address_t)&io[t],t);
+#endif
+			rest -= num;
+			start += num;
+		}
+
+		unsigned int arg0,arg1;
+		for(int t=0;t<maxTasks;t++) {
+#ifndef SOLVE_SEQUENTIAL
+			arg0 = t;
+			threadSupport->waitForResponse(&arg0,&arg1);
+#endif
+		}
+	}
+
+}
+
+
+
+void BPE_customConstraintSolverSequentialNew(unsigned int new_num, PfxBroadphasePair *new_pairs1 ,
+									btPersistentManifold* offsetContactManifolds,
+									  TrbState* states,int numRigidBodies, 
+									  struct PfxSolverBody* solverBodies, 
+									  PfxConstraintPair* jointPairs, unsigned int numJoints,
+									  btSolverConstraint* offsetSolverConstraints,
+									  float separateBias,
+									  float timeStep,
+									  int iteration,
+									  btThreadSupportInterface* solverThreadSupport,
+									  btCriticalSection* criticalSection,
+									  struct btConstraintSolverIO* solverIO,
+									  btBarrier* barrier
+									  )
+{
+
+	{
+		BT_PROFILE("pfxSetupConstraints");
+
+		for(uint32_t i=0;i<numJoints;i++) {
+			// ���̍X�V
+			PfxConstraintPair &pair = jointPairs[i];
+			int idA = pfxGetRigidBodyIdA(pair);
+
+			if (idA != 65535)
+			{
+				pfxSetMotionMaskA(pair,states[pfxGetRigidBodyIdA(pair)].getMotionMask());
+			}
+			else
+			{
+				pfxSetMotionMaskA(pair,PFX_MOTION_MASK_STATIC);
+			}
+			int idB = pfxGetRigidBodyIdB(pair);
+			if (idB!= 65535)
+			{
+				pfxSetMotionMaskB(pair,states[pfxGetRigidBodyIdB(pair)].getMotionMask());
+			} else
+			{
+				pfxSetMotionMaskB(pair,PFX_MOTION_MASK_STATIC);
+			}
+		}
+
+//		CustomSetupJointConstraintsSeq(			jointPairs,numJoints,joints,			states,			solverBodies,			numRigidBodies,			timeStep);
+
+#ifdef SEQUENTIAL_SETUP
+		CustomSetupContactConstraintsSeqNew(
+			(PfxConstraintPair*)new_pairs1,new_num,contacts,
+			states,
+			solverBodies,
+			numRigidBodies,
+			separateBias,
+			timeStep);
+#else
+		CustomSetupContactConstraintsNew(
+			(PfxConstraintPair*)new_pairs1,new_num,
+			offsetContactManifolds,
+			states,
+			solverBodies,
+			numRigidBodies,
+			separateBias,
+			timeStep,
+			solverThreadSupport,
+			criticalSection,solverIO
+			);
+
+#endif //SEQUENTIAL_SETUP
+
+	}
+	{
+		BT_PROFILE("pfxSolveConstraints");
+
+//#define SEQUENTIAL
+#ifdef SEQUENTIAL
+		CustomSolveConstraintsSeq(
+			(PfxConstraintPair*)new_pairs1,new_num,contacts,
+			jointPairs,numJoints,
+			states,
+			solverBodies,
+			numRigidBodies,
+			separateBias,
+			timeStep,
+			iteration);
+#else //SEQUENTIAL
+		CustomSolveConstraintsParallel(
+			(PfxConstraintPair*)new_pairs1,new_num,
+			jointPairs,numJoints,
+			offsetContactManifolds,
+			offsetSolverConstraints,
+			states,
+			solverBodies,
+			numRigidBodies,
+			solverIO, solverThreadSupport,
+			iteration,
+			tmp_buff,
+			TMP_BUFF_BYTES,
+			barrier
+			);
+
+#endif //SEQUENTIAL
+	}
+
+}
+
+
+struct	btParallelSolverMemoryCache
+{
+	btAlignedObjectArray<TrbState>	m_mystates;
+	btAlignedObjectArray<PfxSolverBody>  m_mysolverbodies;
+	btAlignedObjectArray<PfxBroadphasePair> m_mypairs;
+	btAlignedObjectArray<PfxConstraintPair> m_jointPairs;
+	
+};
+
+
+btConstraintSolverIO* createSolverIO(int numThreads)
+{
+	return new btConstraintSolverIO[numThreads];
+}
+
+btParallelConstraintSolver::btParallelConstraintSolver(btThreadSupportInterface* solverThreadSupport)
+{
+	
+	m_solverThreadSupport = solverThreadSupport;//createSolverThreadSupport(maxNumThreads);
+	m_solverIO = createSolverIO(m_solverThreadSupport->getNumTasks());
+
+	m_barrier = m_solverThreadSupport->createBarrier();
+	m_criticalSection = m_solverThreadSupport->createCriticalSection();
+
+	m_memoryCache = new btParallelSolverMemoryCache();
+}
+	
+btParallelConstraintSolver::~btParallelConstraintSolver()
+{
+	delete m_memoryCache;
+	delete m_solverIO;
+}
+
+
+
+btScalar btParallelConstraintSolver::solveGroup(btCollisionObject** bodies1,int numRigidBodies,btPersistentManifold** manifoldPtr,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher)
+{
+	
+/*	int sz = sizeof(PfxSolverBody);
+	int sz2 = sizeof(vmVector3);
+	int sz3 = sizeof(vmMatrix3);
+	int sz4 = sizeof(vmQuat);
+	int sz5 = sizeof(btConstraintRow);
+	int sz6 = sizeof(btSolverConstraint);
+	int sz7 = sizeof(TrbState);
+*/
+
+	btPersistentManifold* offsetContactManifolds= (btPersistentManifold*) dispatcher->getInternalManifoldPool()->getPoolAddress();
+
+		
+	m_memoryCache->m_mysolverbodies.resize(numRigidBodies);
+	m_memoryCache->m_mystates.resize(numRigidBodies);
+
+	{
+			BT_PROFILE("create states and solver bodies");
+	for (int i=0;i<numRigidBodies;i++)
+	{
+		btCollisionObject* obj = bodies1[i];
+		obj->setCompanionId(i);
+
+		PfxSolverBody& solverBody = m_memoryCache->m_mysolverbodies[i];
+		btRigidBody* rb = btRigidBody::upcast(obj);
+		TrbState& state = m_memoryCache->m_mystates[i];
+	
+		state.reset();
+		const btQuaternion& orgOri = obj->getWorldTransform().getRotation();
+		vmQuat orn(orgOri.getX(),orgOri.getY(),orgOri.getZ(),orgOri.getW());
+		state.setPosition(getVmVector3(obj->getWorldTransform().getOrigin()));
+		state.setOrientation(orn);
+		state.setPosition(state.getPosition());
+		state.setRigidBodyId(i);
+		state.setAngularDamping(0);
+		state.setLinearDamping(0);
+		
+		
+		solverBody.mOrientation = state.getOrientation();
+		solverBody.mDeltaLinearVelocity = vmVector3(0.0f);
+		solverBody.mDeltaAngularVelocity = vmVector3(0.0f);
+		solverBody.friction = obj->getFriction();
+		solverBody.restitution = obj->getRestitution();
+		
+		state.resetSleepCount();
+		
+		//if(state.getMotionMask()&PFX_MOTION_MASK_DYNAMIC) {
+		if (rb && (rb->getInvMass()>0.f))
+		{
+			state.setAngularVelocity(vmVector3(rb->getAngularVelocity().getX(),rb->getAngularVelocity().getY(),rb->getAngularVelocity().getZ()));
+			state.setLinearVelocity(vmVector3(rb->getLinearVelocity().getX(),rb->getLinearVelocity().getY(),rb->getLinearVelocity().getZ()));
+	
+			state.setMotionType(PfxMotionTypeActive);
+			vmMatrix3 ori(solverBody.mOrientation);
+			vmMatrix3 localInvInertia = vmMatrix3::identity();
+			localInvInertia.setCol(0,vmVector3(rb->getInvInertiaDiagLocal().getX(),0,0));
+			localInvInertia.setCol(1,vmVector3(0, rb->getInvInertiaDiagLocal().getY(),0));
+			localInvInertia.setCol(2,vmVector3(0,0, rb->getInvInertiaDiagLocal().getZ()));
+
+			solverBody.mMassInv = rb->getInvMass();
+			solverBody.mInertiaInv = ori * localInvInertia * transpose(ori);
+		} else
+		{
+			state.setAngularVelocity(vmVector3(0));
+			state.setLinearVelocity(vmVector3(0));
+		
+			state.setMotionType(PfxMotionTypeFixed);
+			m_memoryCache->m_mysolverbodies[i].mMassInv = 0.f;
+			m_memoryCache->m_mysolverbodies[i].mInertiaInv = vmMatrix3(0.0f);
+		}
+
+	}
+	}
+
+
+
+	int totalPoints = 0;
+#ifndef USE_C_ARRAYS
+	m_memoryCache->m_mypairs.resize(numManifolds);
+	m_memoryCache->m_jointPairs.resize(numConstraints);
+#endif//USE_C_ARRAYS
+
+	int actualNumManifolds= 0;
+	{
+		BT_PROFILE("convert manifolds");
+		for (int i1=0;i1<numManifolds;i1++)
+		{
+			if (manifoldPtr[i1]->getNumContacts()>0)
+			{
+				btPersistentManifold* m = manifoldPtr[i1];
+				btCollisionObject* obA = (btCollisionObject*)m->getBody0();
+				btCollisionObject* obB = (btCollisionObject*)m->getBody1();
+				bool obAisActive = !obA->isStaticOrKinematicObject() && obA->isActive();
+				bool obBisActive = !obB->isStaticOrKinematicObject() && obB->isActive();
+
+				if (!obAisActive && !obBisActive)
+					continue;
+
+
+				//int contactId = i1;//actualNumManifolds;
+				
+				PfxBroadphasePair& pair = m_memoryCache->m_mypairs[actualNumManifolds];
+				//init those
+				float compFric = obA->getFriction()*obB->getFriction();//@todo
+				int idA = obA->getCompanionId();
+				int idB = obB->getCompanionId();
+				
+				m->m_companionIdA = idA;
+				m->m_companionIdB = idB;
+				
+				
+			//	if ((mysolverbodies[idA].mMassInv!=0)&&(mysolverbodies[idB].mMassInv!=0))
+			//		continue;
+				int numPosPoints=0;
+				for (int p=0;p<m->getNumContacts();p++)
+				{
+					//btManifoldPoint& pt = m->getContactPoint(p);
+					//float dist = pt.getDistance();
+					//if (dist<0.001)
+						numPosPoints++;
+				}
+
+				
+				numPosPoints = numPosPoints;
+				totalPoints+=numPosPoints;
+				pfxSetRigidBodyIdA(pair,idA);
+				pfxSetRigidBodyIdB(pair,idB);
+				pfxSetMotionMaskA(pair,m_memoryCache->m_mystates[idA].getMotionMask());
+				pfxSetMotionMaskB(pair,m_memoryCache->m_mystates[idB].getMotionMask());
+				pfxSetActive(pair,numPosPoints>0);
+				
+				pfxSetBroadphaseFlag(pair,0);
+				int contactId = m-offsetContactManifolds;
+				//likely the contact pool is not contiguous, make sure to allocate large enough contact pool
+				btAssert(contactId>=0);
+				btAssert(contactId<dispatcher->getInternalManifoldPool()->getMaxCount());
+				
+				pfxSetContactId(pair,contactId);
+				pfxSetNumConstraints(pair,numPosPoints);//manifoldPtr[i]->getNumContacts());
+				actualNumManifolds++;
+			}
+
+		}
+	}
+
+	PfxConstraintPair* jointPairs=0;
+	jointPairs = numConstraints? &m_memoryCache->m_jointPairs[0]:0;
+	int actualNumJoints=0;
+
+
+	btSolverConstraint* offsetSolverConstraints = 0;
+
+	//if (1)
+	{
+		
+		{
+			BT_PROFILE("convert constraints");
+
+			int totalNumRows = 0;
+			int i;
+			
+			m_tmpConstraintSizesPool.resize(numConstraints);
+			//calculate the total number of contraint rows
+			for (i=0;i<numConstraints;i++)
+			{
+				btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
+				constraints[i]->getInfo1(&info1);
+				totalNumRows += info1.m_numConstraintRows;
+			}
+			m_tmpSolverNonContactConstraintPool.resize(totalNumRows);
+			offsetSolverConstraints =totalNumRows? &m_tmpSolverNonContactConstraintPool[0]:0;
+
+			
+			///setup the btSolverConstraints
+			int currentRow = 0;
+
+			for (i=0;i<numConstraints;i++)
+			{
+				const btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
+				
+				if (info1.m_numConstraintRows)
+				{
+					btAssert(currentRow<totalNumRows);
+					btTypedConstraint* constraint = constraints[i];
+					btSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
+
+					btRigidBody& rbA = constraint->getRigidBodyA();
+					btRigidBody& rbB = constraint->getRigidBodyB();
+
+					
+					int j;
+					for ( j=0;j<info1.m_numConstraintRows;j++)
+					{
+						memset(&currentConstraintRow[j],0,sizeof(btSolverConstraint));
+						currentConstraintRow[j].m_lowerLimit = -FLT_MAX;
+						currentConstraintRow[j].m_upperLimit = FLT_MAX;
+						currentConstraintRow[j].m_appliedImpulse = 0.f;
+						currentConstraintRow[j].m_appliedPushImpulse = 0.f;
+						currentConstraintRow[j].m_solverBodyA = &rbA;
+						currentConstraintRow[j].m_solverBodyB = &rbB;
+					}
+
+					rbA.internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
+					rbA.internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
+					rbB.internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
+					rbB.internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
+
+
+
+					btTypedConstraint::btConstraintInfo2 info2;
+					info2.fps = 1.f/infoGlobal.m_timeStep;
+					info2.erp = infoGlobal.m_erp;
+					info2.m_J1linearAxis = currentConstraintRow->m_contactNormal;
+					info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
+					info2.m_J2linearAxis = 0;
+					info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
+					info2.rowskip = sizeof(btSolverConstraint)/sizeof(btScalar);//check this
+					///the size of btSolverConstraint needs be a multiple of btScalar
+					btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
+					info2.m_constraintError = &currentConstraintRow->m_rhs;
+					currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
+					info2.cfm = &currentConstraintRow->m_cfm;
+					info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
+					info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
+					info2.m_numIterations = infoGlobal.m_numIterations;
+					constraints[i]->getInfo2(&info2);
+
+					
+					int idA = constraint->getRigidBodyA().getCompanionId();
+					int idB = constraint->getRigidBodyB().getCompanionId();
+				
+
+					///finalize the constraint setup
+					for ( j=0;j<info1.m_numConstraintRows;j++)
+					{
+						btSolverConstraint& solverConstraint = currentConstraintRow[j];
+						solverConstraint.m_originalContactPoint = constraint;
+
+						solverConstraint.m_companionIdA = idA;
+						solverConstraint.m_companionIdB = idB;
+
+						{
+							const btVector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
+							solverConstraint.m_angularComponentA = constraint->getRigidBodyA().getInvInertiaTensorWorld()*ftorqueAxis1*constraint->getRigidBodyA().getAngularFactor();
+						}
+						{
+							const btVector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
+							solverConstraint.m_angularComponentB = constraint->getRigidBodyB().getInvInertiaTensorWorld()*ftorqueAxis2*constraint->getRigidBodyB().getAngularFactor();
+						}
+
+						{
+							btVector3 iMJlA = solverConstraint.m_contactNormal*rbA.getInvMass();
+							btVector3 iMJaA = rbA.getInvInertiaTensorWorld()*solverConstraint.m_relpos1CrossNormal;
+							btVector3 iMJlB = solverConstraint.m_contactNormal*rbB.getInvMass();//sign of normal?
+							btVector3 iMJaB = rbB.getInvInertiaTensorWorld()*solverConstraint.m_relpos2CrossNormal;
+
+							btScalar sum = iMJlA.dot(solverConstraint.m_contactNormal);
+							sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
+							sum += iMJlB.dot(solverConstraint.m_contactNormal);
+							sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
+
+							solverConstraint.m_jacDiagABInv = btScalar(1.)/sum;
+						}
+
+
+						///fix rhs
+						///todo: add force/torque accelerators
+						{
+							btScalar rel_vel;
+							btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.getLinearVelocity()) + solverConstraint.m_relpos1CrossNormal.dot(rbA.getAngularVelocity());
+							btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.getLinearVelocity()) + solverConstraint.m_relpos2CrossNormal.dot(rbB.getAngularVelocity());
+
+							rel_vel = vel1Dotn+vel2Dotn;
+
+							btScalar restitution = 0.f;
+							btScalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
+							btScalar	velocityError = restitution - rel_vel;// * damping;
+							btScalar	penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
+							btScalar	velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
+							solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
+							solverConstraint.m_appliedImpulse = 0.f;
+
+						}
+					}
+
+					PfxConstraintPair& pair = jointPairs[actualNumJoints];
+					
+					int numConstraintRows= info1.m_numConstraintRows;
+					pfxSetNumConstraints(pair,numConstraintRows);
+					
+
+
+					pfxSetRigidBodyIdA(pair,idA);
+					pfxSetRigidBodyIdB(pair,idB);
+					//is this needed?
+					if (idA>=0)
+						pfxSetMotionMaskA(pair,m_memoryCache->m_mystates[idA].getMotionMask());
+					if (idB>=0)
+						pfxSetMotionMaskB(pair,m_memoryCache->m_mystates[idB].getMotionMask());
+
+					pfxSetActive(pair,true);
+					int id = currentConstraintRow-offsetSolverConstraints;
+					pfxSetContactId(pair,id);
+					actualNumJoints++;
+
+
+				}
+				currentRow+=m_tmpConstraintSizesPool[i].m_numConstraintRows;
+			}
+		}
+	}
+
+
+	
+	float separateBias=0.1;//info.m_erp;//or m_erp2?
+	float timeStep=infoGlobal.m_timeStep;
+	int iteration=infoGlobal.m_numIterations;
+
+	//create a pair for each constraints, copy over info etc
+	
+	
+
+
+	
+	{
+		BT_PROFILE("compute num contacts");
+		int totalContacts =0;
+
+		for (int i=0;i<actualNumManifolds;i++)
+		{
+			PfxConstraintPair* pair = &m_memoryCache->m_mypairs[i];
+			totalContacts += pfxGetNumConstraints(*pair);
+		}
+		//printf("numManifolds = %d\n",numManifolds);
+		//printf("totalContacts=%d\n",totalContacts);
+	}
+	
+
+
+//	printf("actualNumManifolds=%d\n",actualNumManifolds);
+	{
+		BT_PROFILE("BPE_customConstraintSolverSequentialNew");
+		if (numRigidBodies>0 && (actualNumManifolds+actualNumJoints)>0)
+		{
+//			PFX_PRINTF("num points = %d\n",totalPoints);
+//			PFX_PRINTF("num points PFX = %d\n",total);
+			
+			
+			 
+			BPE_customConstraintSolverSequentialNew(
+				actualNumManifolds,
+				&m_memoryCache->m_mypairs[0],
+				offsetContactManifolds,
+				&m_memoryCache->m_mystates[0],numRigidBodies,
+				&m_memoryCache->m_mysolverbodies[0],
+				jointPairs,actualNumJoints,
+				offsetSolverConstraints,
+				separateBias,timeStep,iteration,
+				m_solverThreadSupport,m_criticalSection,m_solverIO,m_barrier);
+		}
+	}
+
+	//copy results back to bodies
+	{
+		BT_PROFILE("copy back");
+		for (int i=0;i<numRigidBodies;i++)
+		{
+			btCollisionObject* obj = bodies1[i];
+			btRigidBody* rb = btRigidBody::upcast(obj);
+			TrbState& state = m_memoryCache->m_mystates[i];
+			if (rb && (rb->getInvMass()>0.f))
+			{
+				rb->setLinearVelocity(btVector3(state.getLinearVelocity().getX(),state.getLinearVelocity().getY(),state.getLinearVelocity().getZ()));
+				rb->setAngularVelocity(btVector3(state.getAngularVelocity().getX(),state.getAngularVelocity().getY(),state.getAngularVelocity().getZ()));
+			}
+		}
+	}
+
+
+	return 0.f;
+}
diff --git a/src/bullet/BulletMultiThreaded/btParallelConstraintSolver.h b/src/bullet/BulletMultiThreaded/btParallelConstraintSolver.h
new file mode 100644
index 00000000..7c0268e7
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btParallelConstraintSolver.h
@@ -0,0 +1,285 @@
+/*
+   Copyright (C) 2010 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef __BT_PARALLEL_CONSTRAINT_SOLVER_H
+#define __BT_PARALLEL_CONSTRAINT_SOLVER_H
+
+#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
+
+
+
+
+#include "LinearMath/btScalar.h"
+#include "PlatformDefinitions.h"
+
+
+#define PFX_MAX_SOLVER_PHASES 64
+#define PFX_MAX_SOLVER_BATCHES 16
+#define PFX_MAX_SOLVER_PAIRS  128
+#define PFX_MIN_SOLVER_PAIRS  16
+
+#ifdef __CELLOS_LV2__
+ATTRIBUTE_ALIGNED128(struct) PfxParallelBatch {
+#else
+ATTRIBUTE_ALIGNED16(struct) PfxParallelBatch {
+#endif
+	uint16_t pairIndices[PFX_MAX_SOLVER_PAIRS];
+};
+
+#ifdef __CELLOS_LV2__
+ATTRIBUTE_ALIGNED128(struct) PfxParallelGroup {
+#else
+ATTRIBUTE_ALIGNED16(struct) PfxParallelGroup {
+#endif
+	uint16_t numPhases;
+	uint16_t numBatches[PFX_MAX_SOLVER_PHASES];
+	uint16_t numPairs[PFX_MAX_SOLVER_PHASES*PFX_MAX_SOLVER_BATCHES];
+};
+
+
+
+ATTRIBUTE_ALIGNED16(struct) PfxSortData16 {
+	union {
+		uint8_t   i8data[16];
+		uint16_t  i16data[8];
+		uint32_t  i32data[4];
+#ifdef __SPU__
+		vec_uint4 vdata;
+#endif
+	};
+
+#ifdef __SPU__
+	void set8(int elem,uint8_t data)   {vdata=(vec_uint4)spu_insert(data,(vec_uchar16)vdata,elem);}
+	void set16(int elem,uint16_t data) {vdata=(vec_uint4)spu_insert(data,(vec_ushort8)vdata,elem);}
+	void set32(int elem,uint32_t data) {vdata=(vec_uint4)spu_insert(data,(vec_uint4)vdata,elem);}
+	uint8_t get8(int elem)   const {return spu_extract((vec_uchar16)vdata,elem);}
+	uint16_t get16(int elem) const {return spu_extract((vec_ushort8)vdata,elem);}
+	uint32_t get32(int elem) const {return spu_extract((vec_uint4)vdata,elem);}
+#else
+	void set8(int elem,uint8_t data)   {i8data[elem] = data;}
+	void set16(int elem,uint16_t data) {i16data[elem] = data;}
+	void set32(int elem,uint32_t data) {i32data[elem] = data;}
+	uint8_t get8(int elem)   const {return i8data[elem];}
+	uint16_t get16(int elem) const {return i16data[elem];}
+	uint32_t get32(int elem) const {return i32data[elem];}
+#endif
+};
+
+typedef PfxSortData16 PfxConstraintPair;
+
+
+//J	PfxBroadphasePair�Ƌ���
+
+SIMD_FORCE_INLINE void pfxSetConstraintId(PfxConstraintPair &pair,uint32_t i)	{pair.set32(2,i);}
+SIMD_FORCE_INLINE void pfxSetNumConstraints(PfxConstraintPair &pair,uint8_t n)	{pair.set8(7,n);}
+
+SIMD_FORCE_INLINE uint32_t pfxGetConstraintId1(const PfxConstraintPair &pair)	{return pair.get32(2);}
+SIMD_FORCE_INLINE uint8_t  pfxGetNumConstraints(const PfxConstraintPair &pair)	{return pair.get8(7);}
+
+typedef PfxSortData16 PfxBroadphasePair;
+
+SIMD_FORCE_INLINE void pfxSetRigidBodyIdA(PfxBroadphasePair &pair,uint16_t i)	{pair.set16(0,i);}
+SIMD_FORCE_INLINE void pfxSetRigidBodyIdB(PfxBroadphasePair &pair,uint16_t i)	{pair.set16(1,i);}
+SIMD_FORCE_INLINE void pfxSetMotionMaskA(PfxBroadphasePair &pair,uint8_t i)		{pair.set8(4,i);}
+SIMD_FORCE_INLINE void pfxSetMotionMaskB(PfxBroadphasePair &pair,uint8_t i)		{pair.set8(5,i);}
+SIMD_FORCE_INLINE void pfxSetBroadphaseFlag(PfxBroadphasePair &pair,uint8_t f)	{pair.set8(6,(pair.get8(6)&0xf0)|(f&0x0f));}
+SIMD_FORCE_INLINE void pfxSetActive(PfxBroadphasePair &pair,bool b)			{pair.set8(6,(pair.get8(6)&0x0f)|((b?1:0)<<4));}
+SIMD_FORCE_INLINE void pfxSetContactId(PfxBroadphasePair &pair,uint32_t i)		{pair.set32(2,i);}
+
+SIMD_FORCE_INLINE uint16_t pfxGetRigidBodyIdA(const PfxBroadphasePair &pair)	{return pair.get16(0);}
+SIMD_FORCE_INLINE uint16_t pfxGetRigidBodyIdB(const PfxBroadphasePair &pair)	{return pair.get16(1);}
+SIMD_FORCE_INLINE uint8_t  pfxGetMotionMaskA(const PfxBroadphasePair &pair)		{return pair.get8(4);}
+SIMD_FORCE_INLINE uint8_t  pfxGetMotionMaskB(const PfxBroadphasePair &pair)		{return pair.get8(5);}
+SIMD_FORCE_INLINE uint8_t  pfxGetBroadphaseFlag(const PfxBroadphasePair &pair)	{return pair.get8(6)&0x0f;}
+SIMD_FORCE_INLINE bool     pfxGetActive(const PfxBroadphasePair &pair)			{return (pair.get8(6)>>4)!=0;}
+SIMD_FORCE_INLINE uint32_t pfxGetContactId1(const PfxBroadphasePair &pair)		{return pair.get32(2);}
+
+
+
+#if defined(__PPU__) || defined (__SPU__)
+ATTRIBUTE_ALIGNED128(struct) PfxSolverBody {
+#else
+ATTRIBUTE_ALIGNED16(struct) PfxSolverBody {
+#endif
+	vmVector3 mDeltaLinearVelocity;
+	vmVector3 mDeltaAngularVelocity;
+	vmMatrix3 mInertiaInv;
+	vmQuat    mOrientation;
+	float   mMassInv;
+	float   friction;
+	float   restitution;
+	float   unused;
+	float   unused2;
+	float   unused3;
+	float   unused4;
+	float   unused5;
+};
+
+
+#ifdef __PPU__
+#include "SpuDispatch/BulletPE2ConstraintSolverSpursSupport.h"
+#endif
+
+static SIMD_FORCE_INLINE vmVector3 btReadVector3(const double* p)
+{
+	float tmp[3] = {float(p[0]),float(p[1]),float(p[2])};
+	vmVector3 v;
+	loadXYZ(v, tmp);
+	return v;
+}
+
+static SIMD_FORCE_INLINE vmQuat btReadQuat(const double* p)
+{
+	float tmp[4] = {float(p[0]),float(p[1]),float(p[2]),float(p[4])};
+	vmQuat vq;
+	loadXYZW(vq, tmp);
+	return vq;
+}
+
+static SIMD_FORCE_INLINE void btStoreVector3(const vmVector3 &src, double* p)
+{
+	float tmp[3];
+	vmVector3 v = src;
+	storeXYZ(v, tmp);
+	p[0] = tmp[0];
+	p[1] = tmp[1];
+	p[2] = tmp[2];
+}
+
+
+static SIMD_FORCE_INLINE vmVector3 btReadVector3(const float* p)
+{
+	vmVector3 v;
+	loadXYZ(v, p);
+	return v;
+}
+
+static SIMD_FORCE_INLINE vmQuat btReadQuat(const float* p)
+{
+	vmQuat vq;
+	loadXYZW(vq, p);
+	return vq;
+}
+
+static SIMD_FORCE_INLINE void btStoreVector3(const vmVector3 &src, float* p)
+{
+	vmVector3 v = src;
+	storeXYZ(v, p);
+}
+
+
+
+
+class btPersistentManifold;
+
+enum {
+	PFX_CONSTRAINT_SOLVER_CMD_SETUP_SOLVER_BODIES,
+	PFX_CONSTRAINT_SOLVER_CMD_SETUP_CONTACT_CONSTRAINTS,
+	PFX_CONSTRAINT_SOLVER_CMD_SETUP_JOINT_CONSTRAINTS,
+	PFX_CONSTRAINT_SOLVER_CMD_SOLVE_CONSTRAINTS,
+	PFX_CONSTRAINT_SOLVER_CMD_POST_SOLVER
+};
+
+
+struct PfxSetupContactConstraintsIO {
+	PfxConstraintPair *offsetContactPairs;
+	uint32_t numContactPairs1;
+	btPersistentManifold*	offsetContactManifolds;
+	class TrbState *offsetRigStates;
+	struct PfxSolverBody *offsetSolverBodies;
+	uint32_t numRigidBodies;
+	float separateBias;
+	float timeStep;
+	class btCriticalSection* criticalSection;
+};
+
+
+
+struct PfxSolveConstraintsIO {
+	PfxParallelGroup *contactParallelGroup;
+	PfxParallelBatch *contactParallelBatches;
+	PfxConstraintPair *contactPairs;
+	uint32_t numContactPairs;
+	btPersistentManifold *offsetContactManifolds;
+	PfxParallelGroup *jointParallelGroup;
+	PfxParallelBatch *jointParallelBatches;
+	PfxConstraintPair *jointPairs;
+	uint32_t numJointPairs;
+	struct btSolverConstraint* offsetSolverConstraints;
+	TrbState *offsetRigStates1;
+	PfxSolverBody *offsetSolverBodies;
+	uint32_t numRigidBodies;
+	uint32_t iteration;
+
+	uint32_t	taskId;
+	
+	class btBarrier* barrier;
+
+};
+
+struct PfxPostSolverIO {
+	TrbState *states;
+	PfxSolverBody *solverBodies;
+	uint32_t numRigidBodies;
+};
+
+ATTRIBUTE_ALIGNED16(struct) btConstraintSolverIO {
+	uint8_t cmd;
+	union {
+		PfxSetupContactConstraintsIO setupContactConstraints;
+		PfxSolveConstraintsIO solveConstraints;
+		PfxPostSolverIO postSolver;
+	};
+	
+	//SPU only
+	uint32_t barrierAddr2;
+	uint32_t criticalsectionAddr2;
+	uint32_t maxTasks1;
+};
+
+
+
+
+void	SolverThreadFunc(void* userPtr,void* lsMemory);
+void*	SolverlsMemoryFunc();
+///The btParallelConstraintSolver performs computations on constraint rows in parallel
+///Using the cross-platform threading it supports Windows, Linux, Mac OSX and PlayStation 3 Cell SPUs
+class btParallelConstraintSolver : public btSequentialImpulseConstraintSolver
+{
+	
+protected:
+	struct btParallelSolverMemoryCache*	m_memoryCache;
+
+	class btThreadSupportInterface*	m_solverThreadSupport;
+
+	struct btConstraintSolverIO* m_solverIO;
+	class btBarrier*			m_barrier;
+	class btCriticalSection*	m_criticalSection;
+
+
+public:
+
+	btParallelConstraintSolver(class btThreadSupportInterface* solverThreadSupport);
+	
+	virtual ~btParallelConstraintSolver();
+
+	virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher);
+
+};
+
+
+
+#endif //__BT_PARALLEL_CONSTRAINT_SOLVER_H
\ No newline at end of file
diff --git a/src/bullet/BulletMultiThreaded/btThreadSupportInterface.cpp b/src/bullet/BulletMultiThreaded/btThreadSupportInterface.cpp
new file mode 100644
index 00000000..8192aa46
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btThreadSupportInterface.cpp
@@ -0,0 +1,22 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btThreadSupportInterface.h"
+
+btThreadSupportInterface::~btThreadSupportInterface()
+{
+
+}
+
diff --git a/src/bullet/BulletMultiThreaded/btThreadSupportInterface.h b/src/bullet/BulletMultiThreaded/btThreadSupportInterface.h
new file mode 100644
index 00000000..16850e22
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/btThreadSupportInterface.h
@@ -0,0 +1,85 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_THREAD_SUPPORT_INTERFACE_H
+#define BT_THREAD_SUPPORT_INTERFACE_H
+
+
+#include <LinearMath/btScalar.h> //for ATTRIBUTE_ALIGNED16
+#include "PlatformDefinitions.h"
+#include "PpuAddressSpace.h"
+
+class btBarrier {
+public:
+	btBarrier() {}
+	virtual ~btBarrier() {}
+
+	virtual void sync() = 0;
+	virtual void setMaxCount(int n) = 0;
+	virtual int  getMaxCount() = 0;
+};
+
+class btCriticalSection {
+public:
+	btCriticalSection() {}
+	virtual ~btCriticalSection() {}
+
+	ATTRIBUTE_ALIGNED16(unsigned int mCommonBuff[32]);
+
+	virtual unsigned int getSharedParam(int i) = 0;
+	virtual void setSharedParam(int i,unsigned int p) = 0;
+
+	virtual void lock() = 0;
+	virtual void unlock() = 0;
+};
+
+
+class btThreadSupportInterface
+{
+public:
+
+	virtual ~btThreadSupportInterface();
+
+///send messages to SPUs
+	virtual void sendRequest(uint32_t uiCommand, ppu_address_t uiArgument0, uint32_t uiArgument1) =0;
+
+///check for messages from SPUs
+	virtual	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1) =0;
+
+
+	///non-blocking test if a task is completed. First implement all versions, and then enable this API
+	///virtual bool isTaskCompleted(unsigned int *puiArgument0, unsigned int *puiArgument1, int timeOutInMilliseconds)=0;
+
+///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
+	virtual	void startSPU() =0;
+
+///tell the task scheduler we are done with the SPU tasks
+	virtual	void stopSPU()=0;
+
+	///tell the task scheduler to use no more than numTasks tasks
+	virtual void	setNumTasks(int numTasks)=0;
+
+	virtual int		getNumTasks() const = 0;
+
+	virtual btBarrier*	createBarrier() = 0;
+
+	virtual btCriticalSection* createCriticalSection() = 0;
+	
+	virtual void*	getThreadLocalMemory(int taskId) { return 0; }
+
+};
+
+#endif //BT_THREAD_SUPPORT_INTERFACE_H
+
diff --git a/src/bullet/BulletMultiThreaded/vectormath2bullet.h b/src/bullet/BulletMultiThreaded/vectormath2bullet.h
new file mode 100644
index 00000000..11ee33ad
--- /dev/null
+++ b/src/bullet/BulletMultiThreaded/vectormath2bullet.h
@@ -0,0 +1,73 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef BT_AOS_VECTORMATH_BULLET_CONVERT_H
+#define BT_AOS_VECTORMATH_BULLET_CONVERT_H
+
+#include "PlatformDefinitions.h"
+#include "LinearMath/btVector3.h"
+#include "LinearMath/btQuaternion.h"
+#include "LinearMath/btMatrix3x3.h"
+
+inline Vectormath::Aos::Vector3	getVmVector3(const btVector3& bulletVec)
+{
+	return Vectormath::Aos::Vector3(bulletVec.getX(),bulletVec.getY(),bulletVec.getZ());
+}
+
+inline btVector3 getBtVector3(const Vectormath::Aos::Vector3& vmVec)
+{
+	return btVector3(vmVec.getX(),vmVec.getY(),vmVec.getZ());
+}
+inline btVector3 getBtVector3(const Vectormath::Aos::Point3& vmVec)
+{
+	return btVector3(vmVec.getX(),vmVec.getY(),vmVec.getZ());
+}
+
+inline Vectormath::Aos::Quat	getVmQuat(const btQuaternion& bulletQuat)
+{
+	Vectormath::Aos::Quat vmQuat(bulletQuat.getX(),bulletQuat.getY(),bulletQuat.getZ(),bulletQuat.getW());
+	return vmQuat;
+}
+
+inline btQuaternion	getBtQuat(const Vectormath::Aos::Quat& vmQuat)
+{
+	return btQuaternion (vmQuat.getX(),vmQuat.getY(),vmQuat.getZ(),vmQuat.getW());
+}
+
+inline Vectormath::Aos::Matrix3	getVmMatrix3(const btMatrix3x3& btMat)
+{
+	Vectormath::Aos::Matrix3 mat(
+		getVmVector3(btMat.getColumn(0)),
+		getVmVector3(btMat.getColumn(1)),
+		getVmVector3(btMat.getColumn(2)));
+		return mat;
+}
+
+
+#endif //BT_AOS_VECTORMATH_BULLET_CONVERT_H
diff --git a/src/bullet/BulletSoftBody/btDefaultSoftBodySolver.cpp b/src/bullet/BulletSoftBody/btDefaultSoftBodySolver.cpp
new file mode 100644
index 00000000..d1435b65
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btDefaultSoftBodySolver.cpp
@@ -0,0 +1,151 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+
+#include "btDefaultSoftBodySolver.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include "BulletSoftBody/btSoftBody.h"
+
+
+btDefaultSoftBodySolver::btDefaultSoftBodySolver()
+{
+	// Initial we will clearly need to update solver constants
+	// For now this is global for the cloths linked with this solver - we should probably make this body specific 
+	// for performance in future once we understand more clearly when constants need to be updated
+	m_updateSolverConstants = true;
+}
+
+btDefaultSoftBodySolver::~btDefaultSoftBodySolver()
+{
+}
+
+// In this case the data is already in the soft bodies so there is no need for us to do anything
+void btDefaultSoftBodySolver::copyBackToSoftBodies(bool bMove)
+{
+
+}
+
+void btDefaultSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate)
+{
+	m_softBodySet.copyFromArray( softBodies );
+}
+
+void btDefaultSoftBodySolver::updateSoftBodies( )
+{
+	for ( int i=0; i < m_softBodySet.size(); i++)
+	{
+		btSoftBody*	psb=(btSoftBody*)m_softBodySet[i];
+		if (psb->isActive())
+		{
+			psb->integrateMotion();	
+		}
+	}
+} // updateSoftBodies
+
+bool btDefaultSoftBodySolver::checkInitialized()
+{
+	return true;
+}
+
+void btDefaultSoftBodySolver::solveConstraints( float solverdt )
+{
+	// Solve constraints for non-solver softbodies
+	for(int i=0; i < m_softBodySet.size(); ++i)
+	{
+		btSoftBody*	psb = static_cast<btSoftBody*>(m_softBodySet[i]);
+		if (psb->isActive())
+		{
+			psb->solveConstraints();
+		}
+	}	
+} // btDefaultSoftBodySolver::solveConstraints
+
+
+void btDefaultSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer )
+{
+	// Currently only support CPU output buffers
+	// TODO: check for DX11 buffers. Take all offsets into the same DX11 buffer
+	// and use them together on a single kernel call if possible by setting up a
+	// per-cloth target buffer array for the copy kernel.
+
+	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER )
+	{
+		const btAlignedObjectArray<btSoftBody::Node> &clothVertices( softBody->m_nodes );
+		int numVertices = clothVertices.size();
+
+		const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer);						
+		float *basePointer = cpuVertexBuffer->getBasePointer();						
+
+		if( vertexBuffer->hasVertexPositions() )
+		{
+			const int vertexOffset = cpuVertexBuffer->getVertexOffset();
+			const int vertexStride = cpuVertexBuffer->getVertexStride();
+			float *vertexPointer = basePointer + vertexOffset;
+
+			for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
+			{
+				btVector3 position = clothVertices[vertexIndex].m_x;
+				*(vertexPointer + 0) = position.getX();
+				*(vertexPointer + 1) = position.getY();
+				*(vertexPointer + 2) = position.getZ();
+				vertexPointer += vertexStride;
+			}
+		}
+		if( vertexBuffer->hasNormals() )
+		{
+			const int normalOffset = cpuVertexBuffer->getNormalOffset();
+			const int normalStride = cpuVertexBuffer->getNormalStride();
+			float *normalPointer = basePointer + normalOffset;
+
+			for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
+			{
+				btVector3 normal = clothVertices[vertexIndex].m_n;
+				*(normalPointer + 0) = normal.getX();
+				*(normalPointer + 1) = normal.getY();
+				*(normalPointer + 2) = normal.getZ();
+				normalPointer += normalStride;
+			}
+		}
+	}
+} // btDefaultSoftBodySolver::copySoftBodyToVertexBuffer
+
+void btDefaultSoftBodySolver::processCollision( btSoftBody* softBody, btSoftBody* otherSoftBody)
+{
+	softBody->defaultCollisionHandler( otherSoftBody);
+}
+
+// For the default solver just leave the soft body to do its collision processing
+void btDefaultSoftBodySolver::processCollision( btSoftBody *softBody, btCollisionObject* collisionObject )
+{
+	softBody->defaultCollisionHandler( collisionObject );
+} // btDefaultSoftBodySolver::processCollision
+
+
+void btDefaultSoftBodySolver::predictMotion( float timeStep )
+{
+	for ( int i=0; i < m_softBodySet.size(); ++i)
+	{
+		btSoftBody*	psb = m_softBodySet[i];
+
+		if (psb->isActive())
+		{
+			psb->predictMotion(timeStep);		
+		}
+	}
+}
+
diff --git a/src/bullet/BulletSoftBody/btDefaultSoftBodySolver.h b/src/bullet/BulletSoftBody/btDefaultSoftBodySolver.h
new file mode 100644
index 00000000..7d9092ce
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btDefaultSoftBodySolver.h
@@ -0,0 +1,63 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_DEFAULT_SOLVER_H
+#define BT_SOFT_BODY_DEFAULT_SOLVER_H
+
+
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "btSoftBodySolverVertexBuffer.h"
+
+
+class btDefaultSoftBodySolver : public btSoftBodySolver
+{
+protected:		
+	/** Variable to define whether we need to update solver constants on the next iteration */
+	bool m_updateSolverConstants;
+
+	btAlignedObjectArray< btSoftBody * > m_softBodySet;
+
+
+public:
+	btDefaultSoftBodySolver();
+	
+	virtual ~btDefaultSoftBodySolver();
+	
+	virtual SolverTypes getSolverType() const
+	{
+		return DEFAULT_SOLVER;
+	}
+
+	virtual bool checkInitialized();
+
+	virtual void updateSoftBodies( );
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies,bool forceUpdate=false );
+
+	virtual void copyBackToSoftBodies(bool bMove = true);
+
+	virtual void solveConstraints( float solverdt );
+
+	virtual void predictMotion( float solverdt );
+
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer );
+
+	virtual void processCollision( btSoftBody *, btCollisionObject* );
+
+	virtual void processCollision( btSoftBody*, btSoftBody* );
+
+};
+
+#endif // #ifndef BT_ACCELERATED_SOFT_BODY_CPU_SOLVER_H
diff --git a/src/bullet/BulletSoftBody/btSoftBody.cpp b/src/bullet/BulletSoftBody/btSoftBody.cpp
new file mode 100644
index 00000000..d1b5eb43
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBody.cpp
@@ -0,0 +1,3538 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///btSoftBody implementation by Nathanael Presson
+
+#include "btSoftBodyInternals.h"
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "btSoftBodyData.h"
+#include "LinearMath/btSerializer.h"
+
+
+//
+btSoftBody::btSoftBody(btSoftBodyWorldInfo*	worldInfo,int node_count,  const btVector3* x,  const btScalar* m)
+:m_softBodySolver(0),m_worldInfo(worldInfo)
+{	
+	/* Init		*/ 
+	initDefaults();
+
+	/* Default material	*/ 
+	Material*	pm=appendMaterial();
+	pm->m_kLST	=	1;
+	pm->m_kAST	=	1;
+	pm->m_kVST	=	1;
+	pm->m_flags	=	fMaterial::Default;
+
+	/* Nodes			*/ 
+	const btScalar		margin=getCollisionShape()->getMargin();
+	m_nodes.resize(node_count);
+	for(int i=0,ni=node_count;i<ni;++i)
+	{	
+		Node&	n=m_nodes[i];
+		ZeroInitialize(n);
+		n.m_x		=	x?*x++:btVector3(0,0,0);
+		n.m_q		=	n.m_x;
+		n.m_im		=	m?*m++:1;
+		n.m_im		=	n.m_im>0?1/n.m_im:0;
+		n.m_leaf	=	m_ndbvt.insert(btDbvtVolume::FromCR(n.m_x,margin),&n);
+		n.m_material=	pm;
+	}
+	updateBounds();	
+
+}
+
+btSoftBody::btSoftBody(btSoftBodyWorldInfo*	worldInfo)
+:m_worldInfo(worldInfo)
+{
+	initDefaults();
+}
+
+
+void	btSoftBody::initDefaults()
+{
+	m_internalType		=	CO_SOFT_BODY;
+	m_cfg.aeromodel		=	eAeroModel::V_Point;
+	m_cfg.kVCF			=	1;
+	m_cfg.kDG			=	0;
+	m_cfg.kLF			=	0;
+	m_cfg.kDP			=	0;
+	m_cfg.kPR			=	0;
+	m_cfg.kVC			=	0;
+	m_cfg.kDF			=	(btScalar)0.2;
+	m_cfg.kMT			=	0;
+	m_cfg.kCHR			=	(btScalar)1.0;
+	m_cfg.kKHR			=	(btScalar)0.1;
+	m_cfg.kSHR			=	(btScalar)1.0;
+	m_cfg.kAHR			=	(btScalar)0.7;
+	m_cfg.kSRHR_CL		=	(btScalar)0.1;
+	m_cfg.kSKHR_CL		=	(btScalar)1;
+	m_cfg.kSSHR_CL		=	(btScalar)0.5;
+	m_cfg.kSR_SPLT_CL	=	(btScalar)0.5;
+	m_cfg.kSK_SPLT_CL	=	(btScalar)0.5;
+	m_cfg.kSS_SPLT_CL	=	(btScalar)0.5;
+	m_cfg.maxvolume		=	(btScalar)1;
+	m_cfg.timescale		=	1;
+	m_cfg.viterations	=	0;
+	m_cfg.piterations	=	1;	
+	m_cfg.diterations	=	0;
+	m_cfg.citerations	=	4;
+	m_cfg.collisions	=	fCollision::Default;
+	m_pose.m_bvolume	=	false;
+	m_pose.m_bframe		=	false;
+	m_pose.m_volume		=	0;
+	m_pose.m_com		=	btVector3(0,0,0);
+	m_pose.m_rot.setIdentity();
+	m_pose.m_scl.setIdentity();
+	m_tag				=	0;
+	m_timeacc			=	0;
+	m_bUpdateRtCst		=	true;
+	m_bounds[0]			=	btVector3(0,0,0);
+	m_bounds[1]			=	btVector3(0,0,0);
+	m_worldTransform.setIdentity();
+	setSolver(eSolverPresets::Positions);
+	
+	/* Collision shape	*/ 
+	///for now, create a collision shape internally
+	m_collisionShape = new btSoftBodyCollisionShape(this);
+	m_collisionShape->setMargin(0.25);
+	
+	m_initialWorldTransform.setIdentity();
+
+	m_windVelocity = btVector3(0,0,0);
+
+}
+
+//
+btSoftBody::~btSoftBody()
+{
+	//for now, delete the internal shape
+	delete m_collisionShape;	
+	int i;
+
+	releaseClusters();
+	for(i=0;i<m_materials.size();++i) 
+		btAlignedFree(m_materials[i]);
+	for(i=0;i<m_joints.size();++i) 
+		btAlignedFree(m_joints[i]);
+}
+
+//
+bool			btSoftBody::checkLink(int node0,int node1) const
+{
+	return(checkLink(&m_nodes[node0],&m_nodes[node1]));
+}
+
+//
+bool			btSoftBody::checkLink(const Node* node0,const Node* node1) const
+{
+	const Node*	n[]={node0,node1};
+	for(int i=0,ni=m_links.size();i<ni;++i)
+	{
+		const Link&	l=m_links[i];
+		if(	(l.m_n[0]==n[0]&&l.m_n[1]==n[1])||
+			(l.m_n[0]==n[1]&&l.m_n[1]==n[0]))
+		{
+			return(true);
+		}
+	}
+	return(false);
+}
+
+//
+bool			btSoftBody::checkFace(int node0,int node1,int node2) const
+{
+	const Node*	n[]={	&m_nodes[node0],
+		&m_nodes[node1],
+		&m_nodes[node2]};
+	for(int i=0,ni=m_faces.size();i<ni;++i)
+	{
+		const Face&	f=m_faces[i];
+		int			c=0;
+		for(int j=0;j<3;++j)
+		{
+			if(	(f.m_n[j]==n[0])||
+				(f.m_n[j]==n[1])||
+				(f.m_n[j]==n[2])) c|=1<<j; else break;
+		}
+		if(c==7) return(true);
+	}
+	return(false);
+}
+
+//
+btSoftBody::Material*		btSoftBody::appendMaterial()
+{
+	Material*	pm=new(btAlignedAlloc(sizeof(Material),16)) Material();
+	if(m_materials.size()>0)
+		*pm=*m_materials[0];
+	else
+		ZeroInitialize(*pm);
+	m_materials.push_back(pm);
+	return(pm);
+}
+
+//
+void			btSoftBody::appendNote(	const char* text,
+									   const btVector3& o,
+									   const btVector4& c,
+									   Node* n0,
+									   Node* n1,
+									   Node* n2,
+									   Node* n3)
+{
+	Note	n;
+	ZeroInitialize(n);
+	n.m_rank		=	0;
+	n.m_text		=	text;
+	n.m_offset		=	o;
+	n.m_coords[0]	=	c.x();
+	n.m_coords[1]	=	c.y();
+	n.m_coords[2]	=	c.z();
+	n.m_coords[3]	=	c.w();
+	n.m_nodes[0]	=	n0;n.m_rank+=n0?1:0;
+	n.m_nodes[1]	=	n1;n.m_rank+=n1?1:0;
+	n.m_nodes[2]	=	n2;n.m_rank+=n2?1:0;
+	n.m_nodes[3]	=	n3;n.m_rank+=n3?1:0;
+	m_notes.push_back(n);
+}
+
+//
+void			btSoftBody::appendNote(	const char* text,
+									   const btVector3& o,
+									   Node* feature)
+{
+	appendNote(text,o,btVector4(1,0,0,0),feature);
+}
+
+//
+void			btSoftBody::appendNote(	const char* text,
+									   const btVector3& o,
+									   Link* feature)
+{
+	static const btScalar	w=1/(btScalar)2;
+	appendNote(text,o,btVector4(w,w,0,0),	feature->m_n[0],
+		feature->m_n[1]);
+}
+
+//
+void			btSoftBody::appendNote(	const char* text,
+									   const btVector3& o,
+									   Face* feature)
+{
+	static const btScalar	w=1/(btScalar)3;
+	appendNote(text,o,btVector4(w,w,w,0),	feature->m_n[0],
+		feature->m_n[1],
+		feature->m_n[2]);
+}
+
+//
+void			btSoftBody::appendNode(	const btVector3& x,btScalar m)
+{
+	if(m_nodes.capacity()==m_nodes.size())
+	{
+		pointersToIndices();
+		m_nodes.reserve(m_nodes.size()*2+1);
+		indicesToPointers();
+	}
+	const btScalar	margin=getCollisionShape()->getMargin();
+	m_nodes.push_back(Node());
+	Node&			n=m_nodes[m_nodes.size()-1];
+	ZeroInitialize(n);
+	n.m_x			=	x;
+	n.m_q			=	n.m_x;
+	n.m_im			=	m>0?1/m:0;
+	n.m_material	=	m_materials[0];
+	n.m_leaf		=	m_ndbvt.insert(btDbvtVolume::FromCR(n.m_x,margin),&n);
+}
+
+//
+void			btSoftBody::appendLink(int model,Material* mat)
+{
+	Link	l;
+	if(model>=0)
+		l=m_links[model];
+	else
+	{ ZeroInitialize(l);l.m_material=mat?mat:m_materials[0]; }
+	m_links.push_back(l);
+}
+
+//
+void			btSoftBody::appendLink(	int node0,
+									   int node1,
+									   Material* mat,
+									   bool bcheckexist)
+{
+	appendLink(&m_nodes[node0],&m_nodes[node1],mat,bcheckexist);
+}
+
+//
+void			btSoftBody::appendLink(	Node* node0,
+									   Node* node1,
+									   Material* mat,
+									   bool bcheckexist)
+{
+	if((!bcheckexist)||(!checkLink(node0,node1)))
+	{
+		appendLink(-1,mat);
+		Link&	l=m_links[m_links.size()-1];
+		l.m_n[0]		=	node0;
+		l.m_n[1]		=	node1;
+		l.m_rl			=	(l.m_n[0]->m_x-l.m_n[1]->m_x).length();
+		m_bUpdateRtCst=true;
+	}
+}
+
+//
+void			btSoftBody::appendFace(int model,Material* mat)
+{
+	Face	f;
+	if(model>=0)
+	{ f=m_faces[model]; }
+	else
+	{ ZeroInitialize(f);f.m_material=mat?mat:m_materials[0]; }
+	m_faces.push_back(f);
+}
+
+//
+void			btSoftBody::appendFace(int node0,int node1,int node2,Material* mat)
+{
+	if (node0==node1)
+		return;
+	if (node1==node2)
+		return;
+	if (node2==node0)
+		return;
+
+	appendFace(-1,mat);
+	Face&	f=m_faces[m_faces.size()-1];
+	btAssert(node0!=node1);
+	btAssert(node1!=node2);
+	btAssert(node2!=node0);
+	f.m_n[0]	=	&m_nodes[node0];
+	f.m_n[1]	=	&m_nodes[node1];
+	f.m_n[2]	=	&m_nodes[node2];
+	f.m_ra		=	AreaOf(	f.m_n[0]->m_x,
+		f.m_n[1]->m_x,
+		f.m_n[2]->m_x);	
+	m_bUpdateRtCst=true;
+}
+
+//
+void			btSoftBody::appendTetra(int model,Material* mat)
+{
+Tetra	t;
+if(model>=0)
+	t=m_tetras[model];
+	else
+	{ ZeroInitialize(t);t.m_material=mat?mat:m_materials[0]; }
+m_tetras.push_back(t);
+}
+
+//
+void			btSoftBody::appendTetra(int node0,
+										int node1,
+										int node2,
+										int node3,
+										Material* mat)
+{
+	appendTetra(-1,mat);
+	Tetra&	t=m_tetras[m_tetras.size()-1];
+	t.m_n[0]	=	&m_nodes[node0];
+	t.m_n[1]	=	&m_nodes[node1];
+	t.m_n[2]	=	&m_nodes[node2];
+	t.m_n[3]	=	&m_nodes[node3];
+	t.m_rv		=	VolumeOf(t.m_n[0]->m_x,t.m_n[1]->m_x,t.m_n[2]->m_x,t.m_n[3]->m_x);
+	m_bUpdateRtCst=true;
+}
+
+//
+
+void			btSoftBody::appendAnchor(int node,btRigidBody* body, bool disableCollisionBetweenLinkedBodies,btScalar influence)
+{
+	btVector3 local = body->getWorldTransform().inverse()*m_nodes[node].m_x;
+	appendAnchor(node,body,local,disableCollisionBetweenLinkedBodies,influence);
+}
+
+//
+void			btSoftBody::appendAnchor(int node,btRigidBody* body, const btVector3& localPivot,bool disableCollisionBetweenLinkedBodies,btScalar influence)
+{
+	if (disableCollisionBetweenLinkedBodies)
+	{
+		if (m_collisionDisabledObjects.findLinearSearch(body)==m_collisionDisabledObjects.size())
+		{
+			m_collisionDisabledObjects.push_back(body);
+		}
+	}
+
+	Anchor	a;
+	a.m_node			=	&m_nodes[node];
+	a.m_body			=	body;
+	a.m_local			=	localPivot;
+	a.m_node->m_battach	=	1;
+	a.m_influence = influence;
+	m_anchors.push_back(a);
+}
+
+//
+void			btSoftBody::appendLinearJoint(const LJoint::Specs& specs,Cluster* body0,Body body1)
+{
+	LJoint*		pj	=	new(btAlignedAlloc(sizeof(LJoint),16)) LJoint();
+	pj->m_bodies[0]	=	body0;
+	pj->m_bodies[1]	=	body1;
+	pj->m_refs[0]	=	pj->m_bodies[0].xform().inverse()*specs.position;
+	pj->m_refs[1]	=	pj->m_bodies[1].xform().inverse()*specs.position;
+	pj->m_cfm		=	specs.cfm;
+	pj->m_erp		=	specs.erp;
+	pj->m_split		=	specs.split;
+	m_joints.push_back(pj);
+}
+
+//
+void			btSoftBody::appendLinearJoint(const LJoint::Specs& specs,Body body)
+{
+	appendLinearJoint(specs,m_clusters[0],body);
+}
+
+//
+void			btSoftBody::appendLinearJoint(const LJoint::Specs& specs,btSoftBody* body)
+{
+	appendLinearJoint(specs,m_clusters[0],body->m_clusters[0]);
+}
+
+//
+void			btSoftBody::appendAngularJoint(const AJoint::Specs& specs,Cluster* body0,Body body1)
+{
+	AJoint*		pj	=	new(btAlignedAlloc(sizeof(AJoint),16)) AJoint();
+	pj->m_bodies[0]	=	body0;
+	pj->m_bodies[1]	=	body1;
+	pj->m_refs[0]	=	pj->m_bodies[0].xform().inverse().getBasis()*specs.axis;
+	pj->m_refs[1]	=	pj->m_bodies[1].xform().inverse().getBasis()*specs.axis;
+	pj->m_cfm		=	specs.cfm;
+	pj->m_erp		=	specs.erp;
+	pj->m_split		=	specs.split;
+	pj->m_icontrol	=	specs.icontrol;
+	m_joints.push_back(pj);
+}
+
+//
+void			btSoftBody::appendAngularJoint(const AJoint::Specs& specs,Body body)
+{
+	appendAngularJoint(specs,m_clusters[0],body);
+}
+
+//
+void			btSoftBody::appendAngularJoint(const AJoint::Specs& specs,btSoftBody* body)
+{
+	appendAngularJoint(specs,m_clusters[0],body->m_clusters[0]);
+}
+
+//
+void			btSoftBody::addForce(const btVector3& force)
+{
+	for(int i=0,ni=m_nodes.size();i<ni;++i) addForce(force,i);
+}
+
+//
+void			btSoftBody::addForce(const btVector3& force,int node)
+{
+	Node&	n=m_nodes[node];
+	if(n.m_im>0)
+	{
+		n.m_f	+=	force;
+	}
+}
+
+void			btSoftBody::addAeroForceToNode(const btVector3& windVelocity,int nodeIndex)
+{
+	btAssert(nodeIndex >= 0 && nodeIndex < m_nodes.size());
+
+	const btScalar dt = m_sst.sdt;
+	const btScalar kLF = m_cfg.kLF;
+	const btScalar kDG = m_cfg.kDG;
+	const btScalar kPR = m_cfg.kPR;
+	const btScalar kVC = m_cfg.kVC;
+	const bool as_lift = kLF>0;
+	const bool as_drag = kDG>0;
+	const bool as_aero = as_lift || as_drag;
+	const bool as_vaero = as_aero && (m_cfg.aeromodel < btSoftBody::eAeroModel::F_TwoSided);
+
+	Node& n = m_nodes[nodeIndex];
+
+	if( n.m_im>0 )
+	{
+		btSoftBody::sMedium	medium;
+
+		EvaluateMedium(m_worldInfo, n.m_x, medium);
+		medium.m_velocity = windVelocity;
+		medium.m_density = m_worldInfo->air_density;
+
+		/* Aerodynamics			*/ 
+		if(as_vaero)
+		{				
+			const btVector3	rel_v = n.m_v - medium.m_velocity;					
+			const btScalar rel_v_len = rel_v.length();
+			const btScalar	rel_v2 = rel_v.length2();
+
+			if(rel_v2>SIMD_EPSILON)
+			{
+				const btVector3 rel_v_nrm = rel_v.normalized();
+				btVector3	nrm = n.m_n;						
+
+				if (m_cfg.aeromodel == btSoftBody::eAeroModel::V_TwoSidedLiftDrag)
+				{
+					nrm *= (btScalar)( (btDot(nrm,rel_v) < 0) ? -1 : +1);
+					btVector3 fDrag(0, 0, 0);
+					btVector3 fLift(0, 0, 0);
+
+					btScalar n_dot_v = nrm.dot(rel_v_nrm);
+					btScalar tri_area = 0.5f * n.m_area;
+							
+					fDrag = 0.5f * kDG * medium.m_density * rel_v2 * tri_area * n_dot_v * (-rel_v_nrm);
+							
+					// Check angle of attack
+					// cos(10�) = 0.98480
+					if ( 0 < n_dot_v && n_dot_v < 0.98480f)
+						fLift = 0.5f * kLF * medium.m_density * rel_v_len * tri_area * btSqrt(1.0f-n_dot_v*n_dot_v) * (nrm.cross(rel_v_nrm).cross(rel_v_nrm));
+
+					n.m_f += fDrag;
+					n.m_f += fLift;
+				}
+				else if (m_cfg.aeromodel == btSoftBody::eAeroModel::V_Point || m_cfg.aeromodel == btSoftBody::eAeroModel::V_OneSided || m_cfg.aeromodel == btSoftBody::eAeroModel::V_TwoSided)
+				{
+					if (btSoftBody::eAeroModel::V_TwoSided)
+						nrm *= (btScalar)( (btDot(nrm,rel_v) < 0) ? -1 : +1);
+
+					const btScalar dvn = btDot(rel_v,nrm);
+					/* Compute forces	*/ 
+					if(dvn>0)
+					{
+						btVector3		force(0,0,0);
+						const btScalar	c0	=	n.m_area * dvn * rel_v2/2;
+						const btScalar	c1	=	c0 * medium.m_density;
+						force	+=	nrm*(-c1*kLF);
+						force	+=	rel_v.normalized() * (-c1 * kDG);
+						ApplyClampedForce(n, force, dt);
+					}
+				}	
+			}
+		}
+	}
+}
+
+void			btSoftBody::addAeroForceToFace(const btVector3& windVelocity,int faceIndex)
+{
+	const btScalar dt = m_sst.sdt;
+	const btScalar kLF = m_cfg.kLF;
+	const btScalar kDG = m_cfg.kDG;
+	const btScalar kPR = m_cfg.kPR;
+	const btScalar kVC = m_cfg.kVC;
+	const bool as_lift = kLF>0;
+	const bool as_drag = kDG>0;
+	const bool as_aero = as_lift || as_drag;
+	const bool as_faero = as_aero && (m_cfg.aeromodel >= btSoftBody::eAeroModel::F_TwoSided);
+
+	if(as_faero)
+	{
+		btSoftBody::Face&	f=m_faces[faceIndex];
+
+		btSoftBody::sMedium	medium;
+		
+		const btVector3	v=(f.m_n[0]->m_v+f.m_n[1]->m_v+f.m_n[2]->m_v)/3;
+		const btVector3	x=(f.m_n[0]->m_x+f.m_n[1]->m_x+f.m_n[2]->m_x)/3;
+		EvaluateMedium(m_worldInfo,x,medium);
+		medium.m_velocity = windVelocity;
+		medium.m_density = m_worldInfo->air_density;
+		const btVector3	rel_v=v-medium.m_velocity;
+		const btScalar rel_v_len = rel_v.length();
+		const btScalar	rel_v2=rel_v.length2();
+
+		if(rel_v2>SIMD_EPSILON)
+		{
+			const btVector3 rel_v_nrm = rel_v.normalized();
+			btVector3	nrm = f.m_normal;
+
+			if (m_cfg.aeromodel == btSoftBody::eAeroModel::F_TwoSidedLiftDrag)
+			{
+				nrm *= (btScalar)( (btDot(nrm,rel_v) < 0) ? -1 : +1);
+
+				btVector3 fDrag(0, 0, 0);
+				btVector3 fLift(0, 0, 0);
+
+				btScalar n_dot_v = nrm.dot(rel_v_nrm);
+				btScalar tri_area = 0.5f * f.m_ra;
+					
+				fDrag = 0.5f * kDG * medium.m_density * rel_v2 * tri_area * n_dot_v * (-rel_v_nrm);
+
+				// Check angle of attack
+				// cos(10�) = 0.98480
+				if ( 0 < n_dot_v && n_dot_v < 0.98480f)
+					fLift = 0.5f * kLF * medium.m_density * rel_v_len * tri_area * btSqrt(1.0f-n_dot_v*n_dot_v) * (nrm.cross(rel_v_nrm).cross(rel_v_nrm));
+
+				fDrag /= 3;
+				fLift /= 3;
+
+				for(int j=0;j<3;++j) 
+				{
+					if (f.m_n[j]->m_im>0)
+					{
+						f.m_n[j]->m_f += fDrag; 
+						f.m_n[j]->m_f += fLift;
+					}
+				}
+			}
+			else if (m_cfg.aeromodel == btSoftBody::eAeroModel::F_OneSided || m_cfg.aeromodel == btSoftBody::eAeroModel::F_TwoSided)
+			{
+				if (btSoftBody::eAeroModel::F_TwoSided)
+					nrm *= (btScalar)( (btDot(nrm,rel_v) < 0) ? -1 : +1);
+
+				const btScalar	dvn=btDot(rel_v,nrm);
+				/* Compute forces	*/ 
+				if(dvn>0)
+				{
+					btVector3		force(0,0,0);
+					const btScalar	c0	=	f.m_ra*dvn*rel_v2;
+					const btScalar	c1	=	c0*medium.m_density;
+					force	+=	nrm*(-c1*kLF);
+					force	+=	rel_v.normalized()*(-c1*kDG);
+					force	/=	3;
+					for(int j=0;j<3;++j) ApplyClampedForce(*f.m_n[j],force,dt);
+				}
+			}
+		}
+	}
+
+}
+
+//
+void			btSoftBody::addVelocity(const btVector3& velocity)
+{
+	for(int i=0,ni=m_nodes.size();i<ni;++i) addVelocity(velocity,i);
+}
+
+/* Set velocity for the entire body										*/ 
+void				btSoftBody::setVelocity(	const btVector3& velocity)
+{
+	for(int i=0,ni=m_nodes.size();i<ni;++i) 
+	{
+		Node&	n=m_nodes[i];
+		if(n.m_im>0)
+		{
+			n.m_v	=	velocity;
+		}
+	}
+}
+
+
+//
+void			btSoftBody::addVelocity(const btVector3& velocity,int node)
+{
+	Node&	n=m_nodes[node];
+	if(n.m_im>0)
+	{
+		n.m_v	+=	velocity;
+	}
+}
+
+//
+void			btSoftBody::setMass(int node,btScalar mass)
+{
+	m_nodes[node].m_im=mass>0?1/mass:0;
+	m_bUpdateRtCst=true;
+}
+
+//
+btScalar		btSoftBody::getMass(int node) const
+{
+	return(m_nodes[node].m_im>0?1/m_nodes[node].m_im:0);
+}
+
+//
+btScalar		btSoftBody::getTotalMass() const
+{
+	btScalar	mass=0;
+	for(int i=0;i<m_nodes.size();++i)
+	{
+		mass+=getMass(i);
+	}
+	return(mass);
+}
+
+//
+void			btSoftBody::setTotalMass(btScalar mass,bool fromfaces)
+{
+	int i;
+
+	if(fromfaces)
+	{
+
+		for(i=0;i<m_nodes.size();++i)
+		{
+			m_nodes[i].m_im=0;
+		}
+		for(i=0;i<m_faces.size();++i)
+		{
+			const Face&		f=m_faces[i];
+			const btScalar	twicearea=AreaOf(	f.m_n[0]->m_x,
+				f.m_n[1]->m_x,
+				f.m_n[2]->m_x);
+			for(int j=0;j<3;++j)
+			{
+				f.m_n[j]->m_im+=twicearea;
+			}
+		}
+		for( i=0;i<m_nodes.size();++i)
+		{
+			m_nodes[i].m_im=1/m_nodes[i].m_im;
+		}
+	}
+	const btScalar	tm=getTotalMass();
+	const btScalar	itm=1/tm;
+	for( i=0;i<m_nodes.size();++i)
+	{
+		m_nodes[i].m_im/=itm*mass;
+	}
+	m_bUpdateRtCst=true;
+}
+
+//
+void			btSoftBody::setTotalDensity(btScalar density)
+{
+	setTotalMass(getVolume()*density,true);
+}
+
+//
+void			btSoftBody::setVolumeMass(btScalar mass)
+{
+btAlignedObjectArray<btScalar>	ranks;
+ranks.resize(m_nodes.size(),0);
+int i;
+
+for(i=0;i<m_nodes.size();++i)
+	{
+	m_nodes[i].m_im=0;
+	}
+for(i=0;i<m_tetras.size();++i)
+	{
+	const Tetra& t=m_tetras[i];
+	for(int j=0;j<4;++j)
+		{
+		t.m_n[j]->m_im+=btFabs(t.m_rv);
+		ranks[int(t.m_n[j]-&m_nodes[0])]+=1;
+		}
+	}
+for( i=0;i<m_nodes.size();++i)
+	{
+	if(m_nodes[i].m_im>0)
+		{
+		m_nodes[i].m_im=ranks[i]/m_nodes[i].m_im;
+		}
+	}
+setTotalMass(mass,false);
+}
+
+//
+void			btSoftBody::setVolumeDensity(btScalar density)
+{
+btScalar	volume=0;
+for(int i=0;i<m_tetras.size();++i)
+	{
+	const Tetra& t=m_tetras[i];
+	for(int j=0;j<4;++j)
+		{
+		volume+=btFabs(t.m_rv);
+		}
+	}
+setVolumeMass(volume*density/6);
+}
+
+//
+void			btSoftBody::transform(const btTransform& trs)
+{
+	const btScalar	margin=getCollisionShape()->getMargin();
+	ATTRIBUTE_ALIGNED16(btDbvtVolume)	vol;
+	
+	for(int i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		Node&	n=m_nodes[i];
+		n.m_x=trs*n.m_x;
+		n.m_q=trs*n.m_q;
+		n.m_n=trs.getBasis()*n.m_n;
+		vol = btDbvtVolume::FromCR(n.m_x,margin);
+		
+		m_ndbvt.update(n.m_leaf,vol);
+	}
+	updateNormals();
+	updateBounds();
+	updateConstants();
+	m_initialWorldTransform = trs;
+}
+
+//
+void			btSoftBody::translate(const btVector3& trs)
+{
+	btTransform	t;
+	t.setIdentity();
+	t.setOrigin(trs);
+	transform(t);
+}
+
+//
+void			btSoftBody::rotate(	const btQuaternion& rot)
+{
+	btTransform	t;
+	t.setIdentity();
+	t.setRotation(rot);
+	transform(t);
+}
+
+//
+void			btSoftBody::scale(const btVector3& scl)
+{
+
+	const btScalar	margin=getCollisionShape()->getMargin();
+	ATTRIBUTE_ALIGNED16(btDbvtVolume)	vol;
+	
+	for(int i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		Node&	n=m_nodes[i];
+		n.m_x*=scl;
+		n.m_q*=scl;
+		vol = btDbvtVolume::FromCR(n.m_x,margin);
+		m_ndbvt.update(n.m_leaf,vol);
+	}
+	updateNormals();
+	updateBounds();
+	updateConstants();
+}
+
+//
+void			btSoftBody::setPose(bool bvolume,bool bframe)
+{
+	m_pose.m_bvolume	=	bvolume;
+	m_pose.m_bframe		=	bframe;
+	int i,ni;
+
+	/* Weights		*/ 
+	const btScalar	omass=getTotalMass();
+	const btScalar	kmass=omass*m_nodes.size()*1000;
+	btScalar		tmass=omass;
+	m_pose.m_wgh.resize(m_nodes.size());
+	for(i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		if(m_nodes[i].m_im<=0) tmass+=kmass;
+	}
+	for( i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		Node&	n=m_nodes[i];
+		m_pose.m_wgh[i]=	n.m_im>0					?
+			1/(m_nodes[i].m_im*tmass)	:
+		kmass/tmass;
+	}
+	/* Pos		*/ 
+	const btVector3	com=evaluateCom();
+	m_pose.m_pos.resize(m_nodes.size());
+	for( i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		m_pose.m_pos[i]=m_nodes[i].m_x-com;
+	}
+	m_pose.m_volume	=	bvolume?getVolume():0;
+	m_pose.m_com	=	com;
+	m_pose.m_rot.setIdentity();
+	m_pose.m_scl.setIdentity();
+	/* Aqq		*/ 
+	m_pose.m_aqq[0]	=
+		m_pose.m_aqq[1]	=
+		m_pose.m_aqq[2]	=	btVector3(0,0,0);
+	for( i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		const btVector3&	q=m_pose.m_pos[i];
+		const btVector3		mq=m_pose.m_wgh[i]*q;
+		m_pose.m_aqq[0]+=mq.x()*q;
+		m_pose.m_aqq[1]+=mq.y()*q;
+		m_pose.m_aqq[2]+=mq.z()*q;
+	}
+	m_pose.m_aqq=m_pose.m_aqq.inverse();
+	updateConstants();
+}
+
+//
+btScalar		btSoftBody::getVolume() const
+{
+	btScalar	vol=0;
+	if(m_nodes.size()>0)
+	{
+		int i,ni;
+
+		const btVector3	org=m_nodes[0].m_x;
+		for(i=0,ni=m_faces.size();i<ni;++i)
+		{
+			const Face&	f=m_faces[i];
+			vol+=btDot(f.m_n[0]->m_x-org,btCross(f.m_n[1]->m_x-org,f.m_n[2]->m_x-org));
+		}
+		vol/=(btScalar)6;
+	}
+	return(vol);
+}
+
+//
+int				btSoftBody::clusterCount() const
+{
+	return(m_clusters.size());
+}
+
+//
+btVector3		btSoftBody::clusterCom(const Cluster* cluster)
+{
+	btVector3		com(0,0,0);
+	for(int i=0,ni=cluster->m_nodes.size();i<ni;++i)
+	{
+		com+=cluster->m_nodes[i]->m_x*cluster->m_masses[i];
+	}
+	return(com*cluster->m_imass);
+}
+
+//
+btVector3		btSoftBody::clusterCom(int cluster) const
+{
+	return(clusterCom(m_clusters[cluster]));
+}
+
+//
+btVector3		btSoftBody::clusterVelocity(const Cluster* cluster,const btVector3& rpos)
+{
+	return(cluster->m_lv+btCross(cluster->m_av,rpos));
+}
+
+//
+void			btSoftBody::clusterVImpulse(Cluster* cluster,const btVector3& rpos,const btVector3& impulse)
+{
+	const btVector3	li=cluster->m_imass*impulse;
+	const btVector3	ai=cluster->m_invwi*btCross(rpos,impulse);
+	cluster->m_vimpulses[0]+=li;cluster->m_lv+=li;
+	cluster->m_vimpulses[1]+=ai;cluster->m_av+=ai;
+	cluster->m_nvimpulses++;
+}
+
+//
+void			btSoftBody::clusterDImpulse(Cluster* cluster,const btVector3& rpos,const btVector3& impulse)
+{
+	const btVector3	li=cluster->m_imass*impulse;
+	const btVector3	ai=cluster->m_invwi*btCross(rpos,impulse);
+	cluster->m_dimpulses[0]+=li;
+	cluster->m_dimpulses[1]+=ai;
+	cluster->m_ndimpulses++;
+}
+
+//
+void			btSoftBody::clusterImpulse(Cluster* cluster,const btVector3& rpos,const Impulse& impulse)
+{
+	if(impulse.m_asVelocity)	clusterVImpulse(cluster,rpos,impulse.m_velocity);
+	if(impulse.m_asDrift)		clusterDImpulse(cluster,rpos,impulse.m_drift);
+}
+
+//
+void			btSoftBody::clusterVAImpulse(Cluster* cluster,const btVector3& impulse)
+{
+	const btVector3	ai=cluster->m_invwi*impulse;
+	cluster->m_vimpulses[1]+=ai;cluster->m_av+=ai;
+	cluster->m_nvimpulses++;
+}
+
+//
+void			btSoftBody::clusterDAImpulse(Cluster* cluster,const btVector3& impulse)
+{
+	const btVector3	ai=cluster->m_invwi*impulse;
+	cluster->m_dimpulses[1]+=ai;
+	cluster->m_ndimpulses++;
+}
+
+//
+void			btSoftBody::clusterAImpulse(Cluster* cluster,const Impulse& impulse)
+{
+	if(impulse.m_asVelocity)	clusterVAImpulse(cluster,impulse.m_velocity);
+	if(impulse.m_asDrift)		clusterDAImpulse(cluster,impulse.m_drift);
+}
+
+//
+void			btSoftBody::clusterDCImpulse(Cluster* cluster,const btVector3& impulse)
+{
+	cluster->m_dimpulses[0]+=impulse*cluster->m_imass;
+	cluster->m_ndimpulses++;
+}
+
+struct NodeLinks
+{
+    btAlignedObjectArray<int> m_links;
+};
+
+
+
+//
+int				btSoftBody::generateBendingConstraints(int distance,Material* mat)
+{
+	int i,j;
+
+	if(distance>1)
+	{
+		/* Build graph	*/ 
+		const int		n=m_nodes.size();
+		const unsigned	inf=(~(unsigned)0)>>1;
+		unsigned*		adj=new unsigned[n*n];
+		
+
+#define IDX(_x_,_y_)	((_y_)*n+(_x_))
+		for(j=0;j<n;++j)
+		{
+			for(i=0;i<n;++i)
+			{
+				if(i!=j)
+				{
+					adj[IDX(i,j)]=adj[IDX(j,i)]=inf;
+				}
+				else
+				{
+					adj[IDX(i,j)]=adj[IDX(j,i)]=0;
+				}
+			}
+		}
+		for( i=0;i<m_links.size();++i)
+		{
+			const int	ia=(int)(m_links[i].m_n[0]-&m_nodes[0]);
+			const int	ib=(int)(m_links[i].m_n[1]-&m_nodes[0]);
+			adj[IDX(ia,ib)]=1;
+			adj[IDX(ib,ia)]=1;
+		}
+
+
+		//special optimized case for distance == 2
+		if (distance == 2)
+		{
+
+			btAlignedObjectArray<NodeLinks> nodeLinks;
+
+
+			/* Build node links */
+			nodeLinks.resize(m_nodes.size());
+
+			for( i=0;i<m_links.size();++i)
+			{
+				const int	ia=(int)(m_links[i].m_n[0]-&m_nodes[0]);
+				const int	ib=(int)(m_links[i].m_n[1]-&m_nodes[0]);
+				if (nodeLinks[ia].m_links.findLinearSearch(ib)==nodeLinks[ia].m_links.size())
+					nodeLinks[ia].m_links.push_back(ib);
+
+				if (nodeLinks[ib].m_links.findLinearSearch(ia)==nodeLinks[ib].m_links.size())
+					nodeLinks[ib].m_links.push_back(ia);
+			}
+			for (int ii=0;ii<nodeLinks.size();ii++)
+			{
+				int i=ii;
+
+				for (int jj=0;jj<nodeLinks[ii].m_links.size();jj++)
+				{
+					int k = nodeLinks[ii].m_links[jj];
+					for (int kk=0;kk<nodeLinks[k].m_links.size();kk++)
+					{
+						int j = nodeLinks[k].m_links[kk];
+						if (i!=j)
+						{
+							const unsigned	sum=adj[IDX(i,k)]+adj[IDX(k,j)];
+							btAssert(sum==2);
+							if(adj[IDX(i,j)]>sum)
+							{
+								adj[IDX(i,j)]=adj[IDX(j,i)]=sum;
+							}
+						}
+
+					}
+				}
+			}
+		} else
+		{
+			///generic Floyd's algorithm
+			for(int k=0;k<n;++k)
+			{
+				for(j=0;j<n;++j)
+				{
+					for(i=j+1;i<n;++i)
+					{
+						const unsigned	sum=adj[IDX(i,k)]+adj[IDX(k,j)];
+						if(adj[IDX(i,j)]>sum)
+						{
+							adj[IDX(i,j)]=adj[IDX(j,i)]=sum;
+						}
+					}
+				}
+			}
+		}
+
+
+		/* Build links	*/ 
+		int	nlinks=0;
+		for(j=0;j<n;++j)
+		{
+			for(i=j+1;i<n;++i)
+			{
+				if(adj[IDX(i,j)]==(unsigned)distance)
+				{
+					appendLink(i,j,mat);
+					m_links[m_links.size()-1].m_bbending=1;
+					++nlinks;
+				}
+			}
+		}
+		delete[] adj;		
+		return(nlinks);
+	}
+	return(0);
+}
+
+//
+void			btSoftBody::randomizeConstraints()
+{
+	unsigned long	seed=243703;
+#define NEXTRAND (seed=(1664525L*seed+1013904223L)&0xffffffff)
+	int i,ni;
+
+	for(i=0,ni=m_links.size();i<ni;++i)
+	{
+		btSwap(m_links[i],m_links[NEXTRAND%ni]);
+	}
+	for(i=0,ni=m_faces.size();i<ni;++i)
+	{
+		btSwap(m_faces[i],m_faces[NEXTRAND%ni]);
+	}
+#undef NEXTRAND
+}
+
+//
+void			btSoftBody::releaseCluster(int index)
+{
+	Cluster*	c=m_clusters[index];
+	if(c->m_leaf) m_cdbvt.remove(c->m_leaf);
+	c->~Cluster();
+	btAlignedFree(c);
+	m_clusters.remove(c);
+}
+
+//
+void			btSoftBody::releaseClusters()
+{
+	while(m_clusters.size()>0) releaseCluster(0);
+}
+
+//
+int				btSoftBody::generateClusters(int k,int maxiterations)
+{
+	int i;
+	releaseClusters();
+	m_clusters.resize(btMin(k,m_nodes.size()));
+	for(i=0;i<m_clusters.size();++i)
+	{
+		m_clusters[i]			=	new(btAlignedAlloc(sizeof(Cluster),16)) Cluster();
+		m_clusters[i]->m_collide=	true;
+	}
+	k=m_clusters.size();
+	if(k>0)
+	{
+		/* Initialize		*/ 
+		btAlignedObjectArray<btVector3>	centers;
+		btVector3						cog(0,0,0);
+		int								i;
+		for(i=0;i<m_nodes.size();++i)
+		{
+			cog+=m_nodes[i].m_x;
+			m_clusters[(i*29873)%m_clusters.size()]->m_nodes.push_back(&m_nodes[i]);
+		}
+		cog/=(btScalar)m_nodes.size();
+		centers.resize(k,cog);
+		/* Iterate			*/ 
+		const btScalar	slope=16;
+		bool			changed;
+		int				iterations=0;
+		do	{
+			const btScalar	w=2-btMin<btScalar>(1,iterations/slope);
+			changed=false;
+			iterations++;	
+			int i;
+
+			for(i=0;i<k;++i)
+			{
+				btVector3	c(0,0,0);
+				for(int j=0;j<m_clusters[i]->m_nodes.size();++j)
+				{
+					c+=m_clusters[i]->m_nodes[j]->m_x;
+				}
+				if(m_clusters[i]->m_nodes.size())
+				{
+					c			/=	(btScalar)m_clusters[i]->m_nodes.size();
+					c			=	centers[i]+(c-centers[i])*w;
+					changed		|=	((c-centers[i]).length2()>SIMD_EPSILON);
+					centers[i]	=	c;
+					m_clusters[i]->m_nodes.resize(0);
+				}			
+			}
+			for(i=0;i<m_nodes.size();++i)
+			{
+				const btVector3	nx=m_nodes[i].m_x;
+				int				kbest=0;
+				btScalar		kdist=ClusterMetric(centers[0],nx);
+				for(int j=1;j<k;++j)
+				{
+					const btScalar	d=ClusterMetric(centers[j],nx);
+					if(d<kdist)
+					{
+						kbest=j;
+						kdist=d;
+					}
+				}
+				m_clusters[kbest]->m_nodes.push_back(&m_nodes[i]);
+			}		
+		} while(changed&&(iterations<maxiterations));
+		/* Merge		*/ 
+		btAlignedObjectArray<int>	cids;
+		cids.resize(m_nodes.size(),-1);
+		for(i=0;i<m_clusters.size();++i)
+		{
+			for(int j=0;j<m_clusters[i]->m_nodes.size();++j)
+			{
+				cids[int(m_clusters[i]->m_nodes[j]-&m_nodes[0])]=i;
+			}
+		}
+		for(i=0;i<m_faces.size();++i)
+		{
+			const int idx[]={	int(m_faces[i].m_n[0]-&m_nodes[0]),
+				int(m_faces[i].m_n[1]-&m_nodes[0]),
+				int(m_faces[i].m_n[2]-&m_nodes[0])};
+			for(int j=0;j<3;++j)
+			{
+				const int cid=cids[idx[j]];
+				for(int q=1;q<3;++q)
+				{
+					const int kid=idx[(j+q)%3];
+					if(cids[kid]!=cid)
+					{
+						if(m_clusters[cid]->m_nodes.findLinearSearch(&m_nodes[kid])==m_clusters[cid]->m_nodes.size())
+						{
+							m_clusters[cid]->m_nodes.push_back(&m_nodes[kid]);
+						}
+					}
+				}
+			}
+		}
+		/* Master		*/ 
+		if(m_clusters.size()>1)
+		{
+			Cluster*	pmaster=new(btAlignedAlloc(sizeof(Cluster),16)) Cluster();
+			pmaster->m_collide	=	false;
+			pmaster->m_nodes.reserve(m_nodes.size());
+			for(int i=0;i<m_nodes.size();++i) pmaster->m_nodes.push_back(&m_nodes[i]);
+			m_clusters.push_back(pmaster);
+			btSwap(m_clusters[0],m_clusters[m_clusters.size()-1]);
+		}
+		/* Terminate	*/ 
+		for(i=0;i<m_clusters.size();++i)
+		{
+			if(m_clusters[i]->m_nodes.size()==0)
+			{
+				releaseCluster(i--);
+			}
+		}
+	} else
+	{
+		//create a cluster for each tetrahedron (if tetrahedra exist) or each face
+		if (m_tetras.size())
+		{
+			m_clusters.resize(m_tetras.size());
+			for(i=0;i<m_clusters.size();++i)
+			{
+				m_clusters[i]			=	new(btAlignedAlloc(sizeof(Cluster),16)) Cluster();
+				m_clusters[i]->m_collide=	true;
+			}
+			for (i=0;i<m_tetras.size();i++)
+			{
+				for (int j=0;j<4;j++)
+				{
+					m_clusters[i]->m_nodes.push_back(m_tetras[i].m_n[j]);
+				}
+			}
+
+		} else
+		{
+			m_clusters.resize(m_faces.size());
+			for(i=0;i<m_clusters.size();++i)
+			{
+				m_clusters[i]			=	new(btAlignedAlloc(sizeof(Cluster),16)) Cluster();
+				m_clusters[i]->m_collide=	true;
+			}
+
+			for(i=0;i<m_faces.size();++i)
+			{
+				for(int j=0;j<3;++j)
+				{
+					m_clusters[i]->m_nodes.push_back(m_faces[i].m_n[j]);
+				}
+			}
+		}
+	}
+
+	if (m_clusters.size())
+	{
+		initializeClusters();
+		updateClusters();
+
+
+		//for self-collision
+		m_clusterConnectivity.resize(m_clusters.size()*m_clusters.size());
+		{
+			for (int c0=0;c0<m_clusters.size();c0++)
+			{
+				m_clusters[c0]->m_clusterIndex=c0;
+				for (int c1=0;c1<m_clusters.size();c1++)
+				{
+					
+					bool connected=false;
+					Cluster* cla = m_clusters[c0];
+					Cluster* clb = m_clusters[c1];
+					for (int i=0;!connected&&i<cla->m_nodes.size();i++)
+					{
+						for (int j=0;j<clb->m_nodes.size();j++)
+						{
+							if (cla->m_nodes[i] == clb->m_nodes[j])
+							{
+								connected=true;
+								break;
+							}
+						}
+					}
+					m_clusterConnectivity[c0+c1*m_clusters.size()]=connected;
+				}
+			}
+		}
+	}
+
+	return(m_clusters.size());
+}
+
+//
+void			btSoftBody::refine(ImplicitFn* ifn,btScalar accurary,bool cut)
+{
+	const Node*			nbase = &m_nodes[0];
+	int					ncount = m_nodes.size();
+	btSymMatrix<int>	edges(ncount,-2);
+	int					newnodes=0;
+	int i,j,k,ni;
+
+	/* Filter out		*/ 
+	for(i=0;i<m_links.size();++i)
+	{
+		Link&	l=m_links[i];
+		if(l.m_bbending)
+		{
+			if(!SameSign(ifn->Eval(l.m_n[0]->m_x),ifn->Eval(l.m_n[1]->m_x)))
+			{
+				btSwap(m_links[i],m_links[m_links.size()-1]);
+				m_links.pop_back();--i;
+			}
+		}	
+	}
+	/* Fill edges		*/ 
+	for(i=0;i<m_links.size();++i)
+	{
+		Link&	l=m_links[i];
+		edges(int(l.m_n[0]-nbase),int(l.m_n[1]-nbase))=-1;
+	}
+	for(i=0;i<m_faces.size();++i)
+	{	
+		Face&	f=m_faces[i];
+		edges(int(f.m_n[0]-nbase),int(f.m_n[1]-nbase))=-1;
+		edges(int(f.m_n[1]-nbase),int(f.m_n[2]-nbase))=-1;
+		edges(int(f.m_n[2]-nbase),int(f.m_n[0]-nbase))=-1;
+	}
+	/* Intersect		*/ 
+	for(i=0;i<ncount;++i)
+	{
+		for(j=i+1;j<ncount;++j)
+		{
+			if(edges(i,j)==-1)
+			{
+				Node&			a=m_nodes[i];
+				Node&			b=m_nodes[j];
+				const btScalar	t=ImplicitSolve(ifn,a.m_x,b.m_x,accurary);
+				if(t>0)
+				{
+					const btVector3	x=Lerp(a.m_x,b.m_x,t);
+					const btVector3	v=Lerp(a.m_v,b.m_v,t);
+					btScalar		m=0;
+					if(a.m_im>0)
+					{
+						if(b.m_im>0)
+						{
+							const btScalar	ma=1/a.m_im;
+							const btScalar	mb=1/b.m_im;
+							const btScalar	mc=Lerp(ma,mb,t);
+							const btScalar	f=(ma+mb)/(ma+mb+mc);
+							a.m_im=1/(ma*f);
+							b.m_im=1/(mb*f);
+							m=mc*f;
+						}
+						else
+						{ a.m_im/=0.5;m=1/a.m_im; }
+					}
+					else
+					{
+						if(b.m_im>0)
+						{ b.m_im/=0.5;m=1/b.m_im; }
+						else
+							m=0;
+					}
+					appendNode(x,m);
+					edges(i,j)=m_nodes.size()-1;
+					m_nodes[edges(i,j)].m_v=v;
+					++newnodes;
+				}
+			}
+		}
+	}
+	nbase=&m_nodes[0];
+	/* Refine links		*/ 
+	for(i=0,ni=m_links.size();i<ni;++i)
+	{
+		Link&		feat=m_links[i];
+		const int	idx[]={	int(feat.m_n[0]-nbase),
+			int(feat.m_n[1]-nbase)};
+		if((idx[0]<ncount)&&(idx[1]<ncount))
+		{
+			const int ni=edges(idx[0],idx[1]);
+			if(ni>0)
+			{
+				appendLink(i);
+				Link*		pft[]={	&m_links[i],
+					&m_links[m_links.size()-1]};			
+				pft[0]->m_n[0]=&m_nodes[idx[0]];
+				pft[0]->m_n[1]=&m_nodes[ni];
+				pft[1]->m_n[0]=&m_nodes[ni];
+				pft[1]->m_n[1]=&m_nodes[idx[1]];
+			}
+		}
+	}
+	/* Refine faces		*/ 
+	for(i=0;i<m_faces.size();++i)
+	{
+		const Face&	feat=m_faces[i];
+		const int	idx[]={	int(feat.m_n[0]-nbase),
+			int(feat.m_n[1]-nbase),
+			int(feat.m_n[2]-nbase)};
+		for(j=2,k=0;k<3;j=k++)
+		{
+			if((idx[j]<ncount)&&(idx[k]<ncount))
+			{
+				const int ni=edges(idx[j],idx[k]);
+				if(ni>0)
+				{
+					appendFace(i);
+					const int	l=(k+1)%3;
+					Face*		pft[]={	&m_faces[i],
+						&m_faces[m_faces.size()-1]};
+					pft[0]->m_n[0]=&m_nodes[idx[l]];
+					pft[0]->m_n[1]=&m_nodes[idx[j]];
+					pft[0]->m_n[2]=&m_nodes[ni];
+					pft[1]->m_n[0]=&m_nodes[ni];
+					pft[1]->m_n[1]=&m_nodes[idx[k]];
+					pft[1]->m_n[2]=&m_nodes[idx[l]];
+					appendLink(ni,idx[l],pft[0]->m_material);
+					--i;break;
+				}
+			}
+		}
+	}
+	/* Cut				*/ 
+	if(cut)
+	{	
+		btAlignedObjectArray<int>	cnodes;
+		const int					pcount=ncount;
+		int							i;
+		ncount=m_nodes.size();
+		cnodes.resize(ncount,0);
+		/* Nodes		*/ 
+		for(i=0;i<ncount;++i)
+		{
+			const btVector3	x=m_nodes[i].m_x;
+			if((i>=pcount)||(btFabs(ifn->Eval(x))<accurary))
+			{
+				const btVector3	v=m_nodes[i].m_v;
+				btScalar		m=getMass(i);
+				if(m>0) { m*=0.5;m_nodes[i].m_im/=0.5; }
+				appendNode(x,m);
+				cnodes[i]=m_nodes.size()-1;
+				m_nodes[cnodes[i]].m_v=v;
+			}
+		}
+		nbase=&m_nodes[0];
+		/* Links		*/ 
+		for(i=0,ni=m_links.size();i<ni;++i)
+		{
+			const int		id[]={	int(m_links[i].m_n[0]-nbase),
+				int(m_links[i].m_n[1]-nbase)};
+			int				todetach=0;
+			if(cnodes[id[0]]&&cnodes[id[1]])
+			{
+				appendLink(i);
+				todetach=m_links.size()-1;
+			}
+			else
+			{
+				if((	(ifn->Eval(m_nodes[id[0]].m_x)<accurary)&&
+					(ifn->Eval(m_nodes[id[1]].m_x)<accurary)))
+					todetach=i;
+			}
+			if(todetach)
+			{
+				Link&	l=m_links[todetach];
+				for(int j=0;j<2;++j)
+				{
+					int cn=cnodes[int(l.m_n[j]-nbase)];
+					if(cn) l.m_n[j]=&m_nodes[cn];
+				}			
+			}
+		}
+		/* Faces		*/ 
+		for(i=0,ni=m_faces.size();i<ni;++i)
+		{
+			Node**			n=	m_faces[i].m_n;
+			if(	(ifn->Eval(n[0]->m_x)<accurary)&&
+				(ifn->Eval(n[1]->m_x)<accurary)&&
+				(ifn->Eval(n[2]->m_x)<accurary))
+			{
+				for(int j=0;j<3;++j)
+				{
+					int cn=cnodes[int(n[j]-nbase)];
+					if(cn) n[j]=&m_nodes[cn];
+				}
+			}
+		}
+		/* Clean orphans	*/ 
+		int							nnodes=m_nodes.size();
+		btAlignedObjectArray<int>	ranks;
+		btAlignedObjectArray<int>	todelete;
+		ranks.resize(nnodes,0);
+		for(i=0,ni=m_links.size();i<ni;++i)
+		{
+			for(int j=0;j<2;++j) ranks[int(m_links[i].m_n[j]-nbase)]++;
+		}
+		for(i=0,ni=m_faces.size();i<ni;++i)
+		{
+			for(int j=0;j<3;++j) ranks[int(m_faces[i].m_n[j]-nbase)]++;
+		}
+		for(i=0;i<m_links.size();++i)
+		{
+			const int	id[]={	int(m_links[i].m_n[0]-nbase),
+				int(m_links[i].m_n[1]-nbase)};
+			const bool	sg[]={	ranks[id[0]]==1,
+				ranks[id[1]]==1};
+			if(sg[0]||sg[1])
+			{
+				--ranks[id[0]];
+				--ranks[id[1]];
+				btSwap(m_links[i],m_links[m_links.size()-1]);
+				m_links.pop_back();--i;
+			}
+		}
+#if 0	
+		for(i=nnodes-1;i>=0;--i)
+		{
+			if(!ranks[i]) todelete.push_back(i);
+		}	
+		if(todelete.size())
+		{		
+			btAlignedObjectArray<int>&	map=ranks;
+			for(int i=0;i<nnodes;++i) map[i]=i;
+			PointersToIndices(this);
+			for(int i=0,ni=todelete.size();i<ni;++i)
+			{
+				int		j=todelete[i];
+				int&	a=map[j];
+				int&	b=map[--nnodes];
+				m_ndbvt.remove(m_nodes[a].m_leaf);m_nodes[a].m_leaf=0;
+				btSwap(m_nodes[a],m_nodes[b]);
+				j=a;a=b;b=j;			
+			}
+			IndicesToPointers(this,&map[0]);
+			m_nodes.resize(nnodes);
+		}
+#endif
+	}
+	m_bUpdateRtCst=true;
+}
+
+//
+bool			btSoftBody::cutLink(const Node* node0,const Node* node1,btScalar position)
+{
+	return(cutLink(int(node0-&m_nodes[0]),int(node1-&m_nodes[0]),position));
+}
+
+//
+bool			btSoftBody::cutLink(int node0,int node1,btScalar position)
+{
+	bool			done=false;
+	int i,ni;
+	const btVector3	d=m_nodes[node0].m_x-m_nodes[node1].m_x;
+	const btVector3	x=Lerp(m_nodes[node0].m_x,m_nodes[node1].m_x,position);
+	const btVector3	v=Lerp(m_nodes[node0].m_v,m_nodes[node1].m_v,position);
+	const btScalar	m=1;
+	appendNode(x,m);
+	appendNode(x,m);
+	Node*			pa=&m_nodes[node0];
+	Node*			pb=&m_nodes[node1];
+	Node*			pn[2]={	&m_nodes[m_nodes.size()-2],
+		&m_nodes[m_nodes.size()-1]};
+	pn[0]->m_v=v;
+	pn[1]->m_v=v;
+	for(i=0,ni=m_links.size();i<ni;++i)
+	{
+		const int mtch=MatchEdge(m_links[i].m_n[0],m_links[i].m_n[1],pa,pb);
+		if(mtch!=-1)
+		{
+			appendLink(i);
+			Link*	pft[]={&m_links[i],&m_links[m_links.size()-1]};
+			pft[0]->m_n[1]=pn[mtch];
+			pft[1]->m_n[0]=pn[1-mtch];
+			done=true;
+		}
+	}
+	for(i=0,ni=m_faces.size();i<ni;++i)
+	{
+		for(int k=2,l=0;l<3;k=l++)
+		{
+			const int mtch=MatchEdge(m_faces[i].m_n[k],m_faces[i].m_n[l],pa,pb);
+			if(mtch!=-1)
+			{
+				appendFace(i);
+				Face*	pft[]={&m_faces[i],&m_faces[m_faces.size()-1]};
+				pft[0]->m_n[l]=pn[mtch];
+				pft[1]->m_n[k]=pn[1-mtch];
+				appendLink(pn[0],pft[0]->m_n[(l+1)%3],pft[0]->m_material,true);
+				appendLink(pn[1],pft[0]->m_n[(l+1)%3],pft[0]->m_material,true);
+			}
+		}
+	}
+	if(!done)
+	{
+		m_ndbvt.remove(pn[0]->m_leaf);
+		m_ndbvt.remove(pn[1]->m_leaf);
+		m_nodes.pop_back();
+		m_nodes.pop_back();
+	}
+	return(done);
+}
+
+//
+bool			btSoftBody::rayTest(const btVector3& rayFrom,
+									const btVector3& rayTo,
+									sRayCast& results)
+{
+	if(m_faces.size()&&m_fdbvt.empty()) 
+		initializeFaceTree();
+
+	results.body	=	this;
+	results.fraction = 1.f;
+	results.feature	=	eFeature::None;
+	results.index	=	-1;
+
+	return(rayTest(rayFrom,rayTo,results.fraction,results.feature,results.index,false)!=0);
+}
+
+//
+void			btSoftBody::setSolver(eSolverPresets::_ preset)
+{
+	m_cfg.m_vsequence.clear();
+	m_cfg.m_psequence.clear();
+	m_cfg.m_dsequence.clear();
+	switch(preset)
+	{
+	case	eSolverPresets::Positions:
+		m_cfg.m_psequence.push_back(ePSolver::Anchors);
+		m_cfg.m_psequence.push_back(ePSolver::RContacts);
+		m_cfg.m_psequence.push_back(ePSolver::SContacts);
+		m_cfg.m_psequence.push_back(ePSolver::Linear);	
+		break;	
+	case	eSolverPresets::Velocities:
+		m_cfg.m_vsequence.push_back(eVSolver::Linear);
+
+		m_cfg.m_psequence.push_back(ePSolver::Anchors);
+		m_cfg.m_psequence.push_back(ePSolver::RContacts);
+		m_cfg.m_psequence.push_back(ePSolver::SContacts);
+
+		m_cfg.m_dsequence.push_back(ePSolver::Linear);
+		break;
+	}
+}
+
+//
+void			btSoftBody::predictMotion(btScalar dt)
+{
+
+	int i,ni;
+
+	/* Update				*/ 
+	if(m_bUpdateRtCst)
+	{
+		m_bUpdateRtCst=false;
+		updateConstants();
+		m_fdbvt.clear();
+		if(m_cfg.collisions&fCollision::VF_SS)
+		{
+			initializeFaceTree();			
+		}
+	}
+
+	/* Prepare				*/ 
+	m_sst.sdt		=	dt*m_cfg.timescale;
+	m_sst.isdt		=	1/m_sst.sdt;
+	m_sst.velmrg	=	m_sst.sdt*3;
+	m_sst.radmrg	=	getCollisionShape()->getMargin();
+	m_sst.updmrg	=	m_sst.radmrg*(btScalar)0.25;
+	/* Forces				*/ 
+	addVelocity(m_worldInfo->m_gravity*m_sst.sdt);
+	applyForces();
+	/* Integrate			*/ 
+	for(i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		Node&	n=m_nodes[i];
+		n.m_q	=	n.m_x;
+		n.m_v	+=	n.m_f*n.m_im*m_sst.sdt;
+		n.m_x	+=	n.m_v*m_sst.sdt;
+		n.m_f	=	btVector3(0,0,0);
+	}
+	/* Clusters				*/ 
+	updateClusters();
+	/* Bounds				*/ 
+	updateBounds();	
+	/* Nodes				*/ 
+	ATTRIBUTE_ALIGNED16(btDbvtVolume)	vol;
+	for(i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		Node&	n=m_nodes[i];
+		vol = btDbvtVolume::FromCR(n.m_x,m_sst.radmrg);
+		m_ndbvt.update(	n.m_leaf,
+			vol,
+			n.m_v*m_sst.velmrg,
+			m_sst.updmrg);
+	}
+	/* Faces				*/ 
+	if(!m_fdbvt.empty())
+	{
+		for(int i=0;i<m_faces.size();++i)
+		{
+			Face&			f=m_faces[i];
+			const btVector3	v=(	f.m_n[0]->m_v+
+				f.m_n[1]->m_v+
+				f.m_n[2]->m_v)/3;
+			vol = VolumeOf(f,m_sst.radmrg);
+			m_fdbvt.update(	f.m_leaf,
+				vol,
+				v*m_sst.velmrg,
+				m_sst.updmrg);
+		}
+	}
+	/* Pose					*/ 
+	updatePose();
+	/* Match				*/ 
+	if(m_pose.m_bframe&&(m_cfg.kMT>0))
+	{
+		const btMatrix3x3	posetrs=m_pose.m_rot;
+		for(int i=0,ni=m_nodes.size();i<ni;++i)
+		{
+			Node&	n=m_nodes[i];
+			if(n.m_im>0)
+			{
+				const btVector3	x=posetrs*m_pose.m_pos[i]+m_pose.m_com;
+				n.m_x=Lerp(n.m_x,x,m_cfg.kMT);
+			}
+		}
+	}
+	/* Clear contacts		*/ 
+	m_rcontacts.resize(0);
+	m_scontacts.resize(0);
+	/* Optimize dbvt's		*/ 
+	m_ndbvt.optimizeIncremental(1);
+	m_fdbvt.optimizeIncremental(1);
+	m_cdbvt.optimizeIncremental(1);
+}
+
+//
+void			btSoftBody::solveConstraints()
+{
+
+	/* Apply clusters		*/ 
+	applyClusters(false);
+	/* Prepare links		*/ 
+
+	int i,ni;
+
+	for(i=0,ni=m_links.size();i<ni;++i)
+	{
+		Link&	l=m_links[i];
+		l.m_c3		=	l.m_n[1]->m_q-l.m_n[0]->m_q;
+		l.m_c2		=	1/(l.m_c3.length2()*l.m_c0);
+	}
+	/* Prepare anchors		*/ 
+	for(i=0,ni=m_anchors.size();i<ni;++i)
+	{
+		Anchor&			a=m_anchors[i];
+		const btVector3	ra=a.m_body->getWorldTransform().getBasis()*a.m_local;
+		a.m_c0	=	ImpulseMatrix(	m_sst.sdt,
+			a.m_node->m_im,
+			a.m_body->getInvMass(),
+			a.m_body->getInvInertiaTensorWorld(),
+			ra);
+		a.m_c1	=	ra;
+		a.m_c2	=	m_sst.sdt*a.m_node->m_im;
+		a.m_body->activate();
+	}
+	/* Solve velocities		*/ 
+	if(m_cfg.viterations>0)
+	{
+		/* Solve			*/ 
+		for(int isolve=0;isolve<m_cfg.viterations;++isolve)
+		{
+			for(int iseq=0;iseq<m_cfg.m_vsequence.size();++iseq)
+			{
+				getSolver(m_cfg.m_vsequence[iseq])(this,1);
+			}
+		}
+		/* Update			*/ 
+		for(i=0,ni=m_nodes.size();i<ni;++i)
+		{
+			Node&	n=m_nodes[i];
+			n.m_x	=	n.m_q+n.m_v*m_sst.sdt;
+		}
+	}
+	/* Solve positions		*/ 
+	if(m_cfg.piterations>0)
+	{
+		for(int isolve=0;isolve<m_cfg.piterations;++isolve)
+		{
+			const btScalar ti=isolve/(btScalar)m_cfg.piterations;
+			for(int iseq=0;iseq<m_cfg.m_psequence.size();++iseq)
+			{
+				getSolver(m_cfg.m_psequence[iseq])(this,1,ti);
+			}
+		}
+		const btScalar	vc=m_sst.isdt*(1-m_cfg.kDP);
+		for(i=0,ni=m_nodes.size();i<ni;++i)
+		{
+			Node&	n=m_nodes[i];
+			n.m_v	=	(n.m_x-n.m_q)*vc;
+			n.m_f	=	btVector3(0,0,0);		
+		}
+	}
+	/* Solve drift			*/ 
+	if(m_cfg.diterations>0)
+	{
+		const btScalar	vcf=m_cfg.kVCF*m_sst.isdt;
+		for(i=0,ni=m_nodes.size();i<ni;++i)
+		{
+			Node&	n=m_nodes[i];
+			n.m_q	=	n.m_x;
+		}
+		for(int idrift=0;idrift<m_cfg.diterations;++idrift)
+		{
+			for(int iseq=0;iseq<m_cfg.m_dsequence.size();++iseq)
+			{
+				getSolver(m_cfg.m_dsequence[iseq])(this,1,0);
+			}
+		}
+		for(int i=0,ni=m_nodes.size();i<ni;++i)
+		{
+			Node&	n=m_nodes[i];
+			n.m_v	+=	(n.m_x-n.m_q)*vcf;
+		}
+	}
+	/* Apply clusters		*/ 
+	dampClusters();
+	applyClusters(true);
+}
+
+//
+void			btSoftBody::staticSolve(int iterations)
+{
+	for(int isolve=0;isolve<iterations;++isolve)
+	{
+		for(int iseq=0;iseq<m_cfg.m_psequence.size();++iseq)
+		{
+			getSolver(m_cfg.m_psequence[iseq])(this,1,0);
+		}
+	}
+}
+
+//
+void			btSoftBody::solveCommonConstraints(btSoftBody** /*bodies*/,int /*count*/,int /*iterations*/)
+{
+	/// placeholder
+}
+
+//
+void			btSoftBody::solveClusters(const btAlignedObjectArray<btSoftBody*>& bodies)
+{
+	const int	nb=bodies.size();
+	int			iterations=0;
+	int i;
+
+	for(i=0;i<nb;++i)
+	{
+		iterations=btMax(iterations,bodies[i]->m_cfg.citerations);
+	}
+	for(i=0;i<nb;++i)
+	{
+		bodies[i]->prepareClusters(iterations);
+	}
+	for(i=0;i<iterations;++i)
+	{
+		const btScalar sor=1;
+		for(int j=0;j<nb;++j)
+		{
+			bodies[j]->solveClusters(sor);
+		}
+	}
+	for(i=0;i<nb;++i)
+	{
+		bodies[i]->cleanupClusters();
+	}
+}
+
+//
+void			btSoftBody::integrateMotion()
+{
+	/* Update			*/ 
+	updateNormals();
+}
+
+//
+btSoftBody::RayFromToCaster::RayFromToCaster(const btVector3& rayFrom,const btVector3& rayTo,btScalar mxt)
+{
+	m_rayFrom = rayFrom;
+	m_rayNormalizedDirection = (rayTo-rayFrom);
+	m_rayTo = rayTo;
+	m_mint	=	mxt;
+	m_face	=	0;
+	m_tests	=	0;
+}
+
+//
+void				btSoftBody::RayFromToCaster::Process(const btDbvtNode* leaf)
+{
+	btSoftBody::Face&	f=*(btSoftBody::Face*)leaf->data;
+	const btScalar		t=rayFromToTriangle(	m_rayFrom,m_rayTo,m_rayNormalizedDirection,
+		f.m_n[0]->m_x,
+		f.m_n[1]->m_x,
+		f.m_n[2]->m_x,
+		m_mint);
+	if((t>0)&&(t<m_mint)) 
+	{ 
+		m_mint=t;m_face=&f; 
+	}
+	++m_tests;
+}
+
+//
+btScalar			btSoftBody::RayFromToCaster::rayFromToTriangle(	const btVector3& rayFrom,
+																   const btVector3& rayTo,
+																   const btVector3& rayNormalizedDirection,
+																   const btVector3& a,
+																   const btVector3& b,
+																   const btVector3& c,
+																   btScalar maxt)
+{
+	static const btScalar	ceps=-SIMD_EPSILON*10;
+	static const btScalar	teps=SIMD_EPSILON*10;
+
+	const btVector3			n=btCross(b-a,c-a);
+	const btScalar			d=btDot(a,n);
+	const btScalar			den=btDot(rayNormalizedDirection,n);
+	if(!btFuzzyZero(den))
+	{
+		const btScalar		num=btDot(rayFrom,n)-d;
+		const btScalar		t=-num/den;
+		if((t>teps)&&(t<maxt))
+		{
+			const btVector3	hit=rayFrom+rayNormalizedDirection*t;
+			if(	(btDot(n,btCross(a-hit,b-hit))>ceps)	&&			
+				(btDot(n,btCross(b-hit,c-hit))>ceps)	&&
+				(btDot(n,btCross(c-hit,a-hit))>ceps))
+			{
+				return(t);
+			}
+		}
+	}
+	return(-1);
+}
+
+//
+void				btSoftBody::pointersToIndices()
+{
+#define	PTR2IDX(_p_,_b_)	reinterpret_cast<btSoftBody::Node*>((_p_)-(_b_))
+	btSoftBody::Node*	base=m_nodes.size() ? &m_nodes[0] : 0;
+	int i,ni;
+
+	for(i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		if(m_nodes[i].m_leaf)
+		{
+			m_nodes[i].m_leaf->data=*(void**)&i;
+		}
+	}
+	for(i=0,ni=m_links.size();i<ni;++i)
+	{
+		m_links[i].m_n[0]=PTR2IDX(m_links[i].m_n[0],base);
+		m_links[i].m_n[1]=PTR2IDX(m_links[i].m_n[1],base);
+	}
+	for(i=0,ni=m_faces.size();i<ni;++i)
+	{
+		m_faces[i].m_n[0]=PTR2IDX(m_faces[i].m_n[0],base);
+		m_faces[i].m_n[1]=PTR2IDX(m_faces[i].m_n[1],base);
+		m_faces[i].m_n[2]=PTR2IDX(m_faces[i].m_n[2],base);
+		if(m_faces[i].m_leaf)
+		{
+			m_faces[i].m_leaf->data=*(void**)&i;
+		}
+	}
+	for(i=0,ni=m_anchors.size();i<ni;++i)
+	{
+		m_anchors[i].m_node=PTR2IDX(m_anchors[i].m_node,base);
+	}
+	for(i=0,ni=m_notes.size();i<ni;++i)
+	{
+		for(int j=0;j<m_notes[i].m_rank;++j)
+		{
+			m_notes[i].m_nodes[j]=PTR2IDX(m_notes[i].m_nodes[j],base);
+		}
+	}
+#undef	PTR2IDX
+}
+
+//
+void				btSoftBody::indicesToPointers(const int* map)
+{
+#define	IDX2PTR(_p_,_b_)	map?(&(_b_)[map[(((char*)_p_)-(char*)0)]]):	\
+	(&(_b_)[(((char*)_p_)-(char*)0)])
+	btSoftBody::Node*	base=m_nodes.size() ? &m_nodes[0]:0;
+	int i,ni;
+
+	for(i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		if(m_nodes[i].m_leaf)
+		{
+			m_nodes[i].m_leaf->data=&m_nodes[i];
+		}
+	}
+	for(i=0,ni=m_links.size();i<ni;++i)
+	{
+		m_links[i].m_n[0]=IDX2PTR(m_links[i].m_n[0],base);
+		m_links[i].m_n[1]=IDX2PTR(m_links[i].m_n[1],base);
+	}
+	for(i=0,ni=m_faces.size();i<ni;++i)
+	{
+		m_faces[i].m_n[0]=IDX2PTR(m_faces[i].m_n[0],base);
+		m_faces[i].m_n[1]=IDX2PTR(m_faces[i].m_n[1],base);
+		m_faces[i].m_n[2]=IDX2PTR(m_faces[i].m_n[2],base);
+		if(m_faces[i].m_leaf)
+		{
+			m_faces[i].m_leaf->data=&m_faces[i];
+		}
+	}
+	for(i=0,ni=m_anchors.size();i<ni;++i)
+	{
+		m_anchors[i].m_node=IDX2PTR(m_anchors[i].m_node,base);
+	}
+	for(i=0,ni=m_notes.size();i<ni;++i)
+	{
+		for(int j=0;j<m_notes[i].m_rank;++j)
+		{
+			m_notes[i].m_nodes[j]=IDX2PTR(m_notes[i].m_nodes[j],base);
+		}
+	}
+#undef	IDX2PTR
+}
+
+//
+int					btSoftBody::rayTest(const btVector3& rayFrom,const btVector3& rayTo,
+										btScalar& mint,eFeature::_& feature,int& index,bool bcountonly) const
+{
+	int	cnt=0;
+	btVector3 dir = rayTo-rayFrom;
+	
+
+	if(bcountonly||m_fdbvt.empty())
+	{/* Full search	*/ 
+		
+		for(int i=0,ni=m_faces.size();i<ni;++i)
+		{
+			const btSoftBody::Face&	f=m_faces[i];
+
+			const btScalar			t=RayFromToCaster::rayFromToTriangle(	rayFrom,rayTo,dir,
+				f.m_n[0]->m_x,
+				f.m_n[1]->m_x,
+				f.m_n[2]->m_x,
+				mint);
+			if(t>0)
+			{
+				++cnt;
+				if(!bcountonly)
+				{
+					feature=btSoftBody::eFeature::Face;
+					index=i;
+					mint=t;
+				}
+			}
+		}
+	}
+	else
+	{/* Use dbvt	*/ 
+		RayFromToCaster	collider(rayFrom,rayTo,mint);
+
+		btDbvt::rayTest(m_fdbvt.m_root,rayFrom,rayTo,collider);
+		if(collider.m_face)
+		{
+			mint=collider.m_mint;
+			feature=btSoftBody::eFeature::Face;
+			index=(int)(collider.m_face-&m_faces[0]);
+			cnt=1;
+		}
+	}
+
+	for (int i=0;i<m_tetras.size();i++)
+	{
+		const btSoftBody::Tetra& tet = m_tetras[i];
+		int tetfaces[4][3] = {{0,1,2},{0,1,3},{1,2,3},{0,2,3}};
+		for (int f=0;f<4;f++)
+		{
+
+			int index0=tetfaces[f][0];
+			int index1=tetfaces[f][1];
+			int index2=tetfaces[f][2];
+			btVector3 v0=tet.m_n[index0]->m_x;
+			btVector3 v1=tet.m_n[index1]->m_x;
+			btVector3 v2=tet.m_n[index2]->m_x;
+
+
+		const btScalar			t=RayFromToCaster::rayFromToTriangle(	rayFrom,rayTo,dir,
+			v0,v1,v2,
+				mint);
+		if(t>0)
+			{
+				++cnt;
+				if(!bcountonly)
+				{
+					feature=btSoftBody::eFeature::Tetra;
+					index=i;
+					mint=t;
+				}
+			}
+		}
+	}
+	return(cnt);
+}
+
+//
+void			btSoftBody::initializeFaceTree()
+{
+	m_fdbvt.clear();
+	for(int i=0;i<m_faces.size();++i)
+	{
+		Face&	f=m_faces[i];
+		f.m_leaf=m_fdbvt.insert(VolumeOf(f,0),&f);
+	}
+}
+
+//
+btVector3		btSoftBody::evaluateCom() const
+{
+	btVector3	com(0,0,0);
+	if(m_pose.m_bframe)
+	{
+		for(int i=0,ni=m_nodes.size();i<ni;++i)
+		{
+			com+=m_nodes[i].m_x*m_pose.m_wgh[i];
+		}
+	}
+	return(com);
+}
+
+//
+bool				btSoftBody::checkContact(	btCollisionObject* colObj,
+											 const btVector3& x,
+											 btScalar margin,
+											 btSoftBody::sCti& cti) const
+{
+	btVector3 nrm;
+	btCollisionShape *shp = colObj->getCollisionShape();
+	btRigidBody *tmpRigid = btRigidBody::upcast(colObj);
+	const btTransform &wtr = tmpRigid ? tmpRigid->getWorldTransform() : colObj->getWorldTransform();
+	btScalar dst = 
+		m_worldInfo->m_sparsesdf.Evaluate(	
+			wtr.invXform(x),
+			shp,
+			nrm,
+			margin);
+	if(dst<0)
+	{
+		cti.m_colObj = colObj;
+		cti.m_normal = wtr.getBasis()*nrm;
+		cti.m_offset = -btDot( cti.m_normal, x - cti.m_normal * dst );
+		return(true);
+	}
+	return(false);
+}
+
+//
+void					btSoftBody::updateNormals()
+{
+
+	const btVector3	zv(0,0,0);
+	int i,ni;
+
+	for(i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		m_nodes[i].m_n=zv;
+	}
+	for(i=0,ni=m_faces.size();i<ni;++i)
+	{
+		btSoftBody::Face&	f=m_faces[i];
+		const btVector3		n=btCross(f.m_n[1]->m_x-f.m_n[0]->m_x,
+			f.m_n[2]->m_x-f.m_n[0]->m_x);
+		f.m_normal=n.normalized();
+		f.m_n[0]->m_n+=n;
+		f.m_n[1]->m_n+=n;
+		f.m_n[2]->m_n+=n;
+	}
+	for(i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		btScalar len = m_nodes[i].m_n.length();
+		if (len>SIMD_EPSILON)
+			m_nodes[i].m_n /= len;
+	}
+}
+
+//
+void					btSoftBody::updateBounds()
+{
+	/*if( m_acceleratedSoftBody )
+	{
+		// If we have an accelerated softbody we need to obtain the bounds correctly
+		// For now (slightly hackily) just have a very large AABB
+		// TODO: Write get bounds kernel
+		// If that is updating in place, atomic collisions might be low (when the cloth isn't perfectly aligned to an axis) and we could
+		// probably do a test and exchange reasonably efficiently.
+
+		m_bounds[0] = btVector3(-1000, -1000, -1000);
+		m_bounds[1] = btVector3(1000, 1000, 1000);
+
+	} else {*/
+		if(m_ndbvt.m_root)
+		{
+			const btVector3&	mins=m_ndbvt.m_root->volume.Mins();
+			const btVector3&	maxs=m_ndbvt.m_root->volume.Maxs();
+			const btScalar		csm=getCollisionShape()->getMargin();
+			const btVector3		mrg=btVector3(	csm,
+				csm,
+				csm)*1; // ??? to investigate...
+			m_bounds[0]=mins-mrg;
+			m_bounds[1]=maxs+mrg;
+			if(0!=getBroadphaseHandle())
+			{					
+				m_worldInfo->m_broadphase->setAabb(	getBroadphaseHandle(),
+					m_bounds[0],
+					m_bounds[1],
+					m_worldInfo->m_dispatcher);
+			}
+		}
+		else
+		{
+			m_bounds[0]=
+				m_bounds[1]=btVector3(0,0,0);
+		}		
+	//}
+}
+
+
+//
+void					btSoftBody::updatePose()
+{
+	if(m_pose.m_bframe)
+	{
+		btSoftBody::Pose&	pose=m_pose;
+		const btVector3		com=evaluateCom();
+		/* Com			*/ 
+		pose.m_com	=	com;
+		/* Rotation		*/ 
+		btMatrix3x3		Apq;
+		const btScalar	eps=SIMD_EPSILON;
+		Apq[0]=Apq[1]=Apq[2]=btVector3(0,0,0);
+		Apq[0].setX(eps);Apq[1].setY(eps*2);Apq[2].setZ(eps*3);
+		for(int i=0,ni=m_nodes.size();i<ni;++i)
+		{
+			const btVector3		a=pose.m_wgh[i]*(m_nodes[i].m_x-com);
+			const btVector3&	b=pose.m_pos[i];
+			Apq[0]+=a.x()*b;
+			Apq[1]+=a.y()*b;
+			Apq[2]+=a.z()*b;
+		}
+		btMatrix3x3		r,s;
+		PolarDecompose(Apq,r,s);
+		pose.m_rot=r;
+		pose.m_scl=pose.m_aqq*r.transpose()*Apq;
+		if(m_cfg.maxvolume>1)
+		{
+			const btScalar	idet=Clamp<btScalar>(	1/pose.m_scl.determinant(),
+				1,m_cfg.maxvolume);
+			pose.m_scl=Mul(pose.m_scl,idet);
+		}
+
+	}
+}
+
+//
+void				btSoftBody::updateConstants()
+{
+	int i,ni;
+
+	/* Links		*/ 
+	for(i=0,ni=m_links.size();i<ni;++i)
+	{
+		Link&		l=m_links[i];
+		Material&	m=*l.m_material;
+		l.m_rl	=	(l.m_n[0]->m_x-l.m_n[1]->m_x).length();
+		l.m_c0	=	(l.m_n[0]->m_im+l.m_n[1]->m_im)/m.m_kLST;
+		l.m_c1	=	l.m_rl*l.m_rl;
+	}
+	/* Faces		*/ 
+	for(i=0,ni=m_faces.size();i<ni;++i)
+	{
+		Face&		f=m_faces[i];
+		f.m_ra	=	AreaOf(f.m_n[0]->m_x,f.m_n[1]->m_x,f.m_n[2]->m_x);
+	}
+	/* Area's		*/ 
+	btAlignedObjectArray<int>	counts;
+	counts.resize(m_nodes.size(),0);
+	for(i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		m_nodes[i].m_area	=	0;
+	}
+	for(i=0,ni=m_faces.size();i<ni;++i)
+	{
+		btSoftBody::Face&	f=m_faces[i];
+		for(int j=0;j<3;++j)
+		{
+			const int index=(int)(f.m_n[j]-&m_nodes[0]);
+			counts[index]++;
+			f.m_n[j]->m_area+=btFabs(f.m_ra);
+		}
+	}
+	for(i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		if(counts[i]>0)
+			m_nodes[i].m_area/=(btScalar)counts[i];
+		else
+			m_nodes[i].m_area=0;
+	}
+}
+
+//
+void					btSoftBody::initializeClusters()
+{
+	int i;
+
+	for( i=0;i<m_clusters.size();++i)
+	{
+		Cluster&	c=*m_clusters[i];
+		c.m_imass=0;
+		c.m_masses.resize(c.m_nodes.size());
+		for(int j=0;j<c.m_nodes.size();++j)
+		{
+			if (c.m_nodes[j]->m_im==0)
+			{
+				c.m_containsAnchor = true;
+				c.m_masses[j]	=	BT_LARGE_FLOAT;
+			} else
+			{
+				c.m_masses[j]	=	btScalar(1.)/c.m_nodes[j]->m_im;
+			}
+			c.m_imass		+=	c.m_masses[j];
+		}
+		c.m_imass		=	btScalar(1.)/c.m_imass;
+		c.m_com			=	btSoftBody::clusterCom(&c);
+		c.m_lv			=	btVector3(0,0,0);
+		c.m_av			=	btVector3(0,0,0);
+		c.m_leaf		=	0;
+		/* Inertia	*/ 
+		btMatrix3x3&	ii=c.m_locii;
+		ii[0]=ii[1]=ii[2]=btVector3(0,0,0);
+		{
+			int i,ni;
+
+			for(i=0,ni=c.m_nodes.size();i<ni;++i)
+			{
+				const btVector3	k=c.m_nodes[i]->m_x-c.m_com;
+				const btVector3	q=k*k;
+				const btScalar	m=c.m_masses[i];
+				ii[0][0]	+=	m*(q[1]+q[2]);
+				ii[1][1]	+=	m*(q[0]+q[2]);
+				ii[2][2]	+=	m*(q[0]+q[1]);
+				ii[0][1]	-=	m*k[0]*k[1];
+				ii[0][2]	-=	m*k[0]*k[2];
+				ii[1][2]	-=	m*k[1]*k[2];
+			}
+		}
+		ii[1][0]=ii[0][1];
+		ii[2][0]=ii[0][2];
+		ii[2][1]=ii[1][2];
+		
+		ii = ii.inverse();
+
+		/* Frame	*/ 
+		c.m_framexform.setIdentity();
+		c.m_framexform.setOrigin(c.m_com);
+		c.m_framerefs.resize(c.m_nodes.size());
+		{
+			int i;
+			for(i=0;i<c.m_framerefs.size();++i)
+			{
+				c.m_framerefs[i]=c.m_nodes[i]->m_x-c.m_com;
+			}
+		}
+	}
+}
+
+//
+void					btSoftBody::updateClusters()
+{
+	BT_PROFILE("UpdateClusters");
+	int i;
+
+	for(i=0;i<m_clusters.size();++i)
+	{
+		btSoftBody::Cluster&	c=*m_clusters[i];
+		const int				n=c.m_nodes.size();
+		//const btScalar			invn=1/(btScalar)n;
+		if(n)
+		{
+			/* Frame				*/ 
+			const btScalar	eps=btScalar(0.0001);
+			btMatrix3x3		m,r,s;
+			m[0]=m[1]=m[2]=btVector3(0,0,0);
+			m[0][0]=eps*1;
+			m[1][1]=eps*2;
+			m[2][2]=eps*3;
+			c.m_com=clusterCom(&c);
+			for(int i=0;i<c.m_nodes.size();++i)
+			{
+				const btVector3		a=c.m_nodes[i]->m_x-c.m_com;
+				const btVector3&	b=c.m_framerefs[i];
+				m[0]+=a[0]*b;m[1]+=a[1]*b;m[2]+=a[2]*b;
+			}
+			PolarDecompose(m,r,s);
+			c.m_framexform.setOrigin(c.m_com);
+			c.m_framexform.setBasis(r);		
+			/* Inertia			*/ 
+#if 1/* Constant	*/ 
+			c.m_invwi=c.m_framexform.getBasis()*c.m_locii*c.m_framexform.getBasis().transpose();
+#else
+#if 0/* Sphere	*/ 
+			const btScalar	rk=(2*c.m_extents.length2())/(5*c.m_imass);
+			const btVector3	inertia(rk,rk,rk);
+			const btVector3	iin(btFabs(inertia[0])>SIMD_EPSILON?1/inertia[0]:0,
+				btFabs(inertia[1])>SIMD_EPSILON?1/inertia[1]:0,
+				btFabs(inertia[2])>SIMD_EPSILON?1/inertia[2]:0);
+
+			c.m_invwi=c.m_xform.getBasis().scaled(iin)*c.m_xform.getBasis().transpose();
+#else/* Actual	*/ 		
+			c.m_invwi[0]=c.m_invwi[1]=c.m_invwi[2]=btVector3(0,0,0);
+			for(int i=0;i<n;++i)
+			{
+				const btVector3	k=c.m_nodes[i]->m_x-c.m_com;
+				const btVector3		q=k*k;
+				const btScalar		m=1/c.m_nodes[i]->m_im;
+				c.m_invwi[0][0]	+=	m*(q[1]+q[2]);
+				c.m_invwi[1][1]	+=	m*(q[0]+q[2]);
+				c.m_invwi[2][2]	+=	m*(q[0]+q[1]);
+				c.m_invwi[0][1]	-=	m*k[0]*k[1];
+				c.m_invwi[0][2]	-=	m*k[0]*k[2];
+				c.m_invwi[1][2]	-=	m*k[1]*k[2];
+			}
+			c.m_invwi[1][0]=c.m_invwi[0][1];
+			c.m_invwi[2][0]=c.m_invwi[0][2];
+			c.m_invwi[2][1]=c.m_invwi[1][2];
+			c.m_invwi=c.m_invwi.inverse();
+#endif
+#endif
+			/* Velocities			*/ 
+			c.m_lv=btVector3(0,0,0);
+			c.m_av=btVector3(0,0,0);
+			{
+				int i;
+
+				for(i=0;i<n;++i)
+				{
+					const btVector3	v=c.m_nodes[i]->m_v*c.m_masses[i];
+					c.m_lv	+=	v;
+					c.m_av	+=	btCross(c.m_nodes[i]->m_x-c.m_com,v);
+				}
+			}
+			c.m_lv=c.m_imass*c.m_lv*(1-c.m_ldamping);
+			c.m_av=c.m_invwi*c.m_av*(1-c.m_adamping);
+			c.m_vimpulses[0]	=
+				c.m_vimpulses[1]	= btVector3(0,0,0);
+			c.m_dimpulses[0]	=
+				c.m_dimpulses[1]	= btVector3(0,0,0);
+			c.m_nvimpulses		= 0;
+			c.m_ndimpulses		= 0;
+			/* Matching				*/ 
+			if(c.m_matching>0)
+			{
+				for(int j=0;j<c.m_nodes.size();++j)
+				{
+					Node&			n=*c.m_nodes[j];
+					const btVector3	x=c.m_framexform*c.m_framerefs[j];
+					n.m_x=Lerp(n.m_x,x,c.m_matching);
+				}
+			}			
+			/* Dbvt					*/ 
+			if(c.m_collide)
+			{
+				btVector3	mi=c.m_nodes[0]->m_x;
+				btVector3	mx=mi;
+				for(int j=1;j<n;++j)
+				{
+					mi.setMin(c.m_nodes[j]->m_x);
+					mx.setMax(c.m_nodes[j]->m_x);
+				}			
+				ATTRIBUTE_ALIGNED16(btDbvtVolume)	bounds=btDbvtVolume::FromMM(mi,mx);
+				if(c.m_leaf)
+					m_cdbvt.update(c.m_leaf,bounds,c.m_lv*m_sst.sdt*3,m_sst.radmrg);
+				else
+					c.m_leaf=m_cdbvt.insert(bounds,&c);
+			}
+		}
+	}
+
+
+}
+
+
+
+
+//
+void					btSoftBody::cleanupClusters()
+{
+	for(int i=0;i<m_joints.size();++i)
+	{
+		m_joints[i]->Terminate(m_sst.sdt);
+		if(m_joints[i]->m_delete)
+		{
+			btAlignedFree(m_joints[i]);
+			m_joints.remove(m_joints[i--]);
+		}	
+	}
+}
+
+//
+void					btSoftBody::prepareClusters(int iterations)
+{
+	for(int i=0;i<m_joints.size();++i)
+	{
+		m_joints[i]->Prepare(m_sst.sdt,iterations);
+	}
+}
+
+
+//
+void					btSoftBody::solveClusters(btScalar sor)
+{
+	for(int i=0,ni=m_joints.size();i<ni;++i)
+	{
+		m_joints[i]->Solve(m_sst.sdt,sor);
+	}
+}
+
+//
+void					btSoftBody::applyClusters(bool drift)
+{
+	BT_PROFILE("ApplyClusters");
+//	const btScalar					f0=m_sst.sdt;
+	//const btScalar					f1=f0/2;
+	btAlignedObjectArray<btVector3> deltas;
+	btAlignedObjectArray<btScalar> weights;
+	deltas.resize(m_nodes.size(),btVector3(0,0,0));
+	weights.resize(m_nodes.size(),0);
+	int i;
+
+	if(drift)
+	{
+		for(i=0;i<m_clusters.size();++i)
+		{
+			Cluster&	c=*m_clusters[i];
+			if(c.m_ndimpulses)
+			{
+				c.m_dimpulses[0]/=(btScalar)c.m_ndimpulses;
+				c.m_dimpulses[1]/=(btScalar)c.m_ndimpulses;
+			}
+		}
+	}
+	
+	for(i=0;i<m_clusters.size();++i)
+	{
+		Cluster&	c=*m_clusters[i];	
+		if(0<(drift?c.m_ndimpulses:c.m_nvimpulses))
+		{
+			const btVector3		v=(drift?c.m_dimpulses[0]:c.m_vimpulses[0])*m_sst.sdt;
+			const btVector3		w=(drift?c.m_dimpulses[1]:c.m_vimpulses[1])*m_sst.sdt;
+			for(int j=0;j<c.m_nodes.size();++j)
+			{
+				const int			idx=int(c.m_nodes[j]-&m_nodes[0]);
+				const btVector3&	x=c.m_nodes[j]->m_x;
+				const btScalar		q=c.m_masses[j];
+				deltas[idx]		+=	(v+btCross(w,x-c.m_com))*q;
+				weights[idx]	+=	q;
+			}
+		}
+	}
+	for(i=0;i<deltas.size();++i)
+	{
+		if(weights[i]>0) 
+		{
+			m_nodes[i].m_x+=deltas[i]/weights[i];
+		}
+	}
+}
+
+//
+void					btSoftBody::dampClusters()
+{
+	int i;
+
+	for(i=0;i<m_clusters.size();++i)
+	{
+		Cluster&	c=*m_clusters[i];	
+		if(c.m_ndamping>0)
+		{
+			for(int j=0;j<c.m_nodes.size();++j)
+			{
+				Node&			n=*c.m_nodes[j];
+				if(n.m_im>0)
+				{
+					const btVector3	vx=c.m_lv+btCross(c.m_av,c.m_nodes[j]->m_q-c.m_com);
+					if(vx.length2()<=n.m_v.length2())
+						{
+						n.m_v	+=	c.m_ndamping*(vx-n.m_v);
+						}
+				}
+			}
+		}
+	}
+}
+
+//
+void				btSoftBody::Joint::Prepare(btScalar dt,int)
+{
+	m_bodies[0].activate();
+	m_bodies[1].activate();
+}
+
+//
+void				btSoftBody::LJoint::Prepare(btScalar dt,int iterations)
+{
+	static const btScalar	maxdrift=4;
+	Joint::Prepare(dt,iterations);
+	m_rpos[0]		=	m_bodies[0].xform()*m_refs[0];
+	m_rpos[1]		=	m_bodies[1].xform()*m_refs[1];
+	m_drift			=	Clamp(m_rpos[0]-m_rpos[1],maxdrift)*m_erp/dt;
+	m_rpos[0]		-=	m_bodies[0].xform().getOrigin();
+	m_rpos[1]		-=	m_bodies[1].xform().getOrigin();
+	m_massmatrix	=	ImpulseMatrix(	m_bodies[0].invMass(),m_bodies[0].invWorldInertia(),m_rpos[0],
+		m_bodies[1].invMass(),m_bodies[1].invWorldInertia(),m_rpos[1]);
+	if(m_split>0)
+	{
+		m_sdrift	=	m_massmatrix*(m_drift*m_split);
+		m_drift		*=	1-m_split;
+	}
+	m_drift	/=(btScalar)iterations;
+}
+
+//
+void				btSoftBody::LJoint::Solve(btScalar dt,btScalar sor)
+{
+	const btVector3		va=m_bodies[0].velocity(m_rpos[0]);
+	const btVector3		vb=m_bodies[1].velocity(m_rpos[1]);
+	const btVector3		vr=va-vb;
+	btSoftBody::Impulse	impulse;
+	impulse.m_asVelocity	=	1;
+	impulse.m_velocity		=	m_massmatrix*(m_drift+vr*m_cfm)*sor;
+	m_bodies[0].applyImpulse(-impulse,m_rpos[0]);
+	m_bodies[1].applyImpulse( impulse,m_rpos[1]);
+}
+
+//
+void				btSoftBody::LJoint::Terminate(btScalar dt)
+{
+	if(m_split>0)
+	{
+		m_bodies[0].applyDImpulse(-m_sdrift,m_rpos[0]);
+		m_bodies[1].applyDImpulse( m_sdrift,m_rpos[1]);
+	}
+}
+
+//
+void				btSoftBody::AJoint::Prepare(btScalar dt,int iterations)
+{
+	static const btScalar	maxdrift=SIMD_PI/16;
+	m_icontrol->Prepare(this);
+	Joint::Prepare(dt,iterations);
+	m_axis[0]	=	m_bodies[0].xform().getBasis()*m_refs[0];
+	m_axis[1]	=	m_bodies[1].xform().getBasis()*m_refs[1];
+	m_drift		=	NormalizeAny(btCross(m_axis[1],m_axis[0]));
+	m_drift		*=	btMin(maxdrift,btAcos(Clamp<btScalar>(btDot(m_axis[0],m_axis[1]),-1,+1)));
+	m_drift		*=	m_erp/dt;
+	m_massmatrix=	AngularImpulseMatrix(m_bodies[0].invWorldInertia(),m_bodies[1].invWorldInertia());
+	if(m_split>0)
+	{
+		m_sdrift	=	m_massmatrix*(m_drift*m_split);
+		m_drift		*=	1-m_split;
+	}
+	m_drift	/=(btScalar)iterations;
+}
+
+//
+void				btSoftBody::AJoint::Solve(btScalar dt,btScalar sor)
+{
+	const btVector3		va=m_bodies[0].angularVelocity();
+	const btVector3		vb=m_bodies[1].angularVelocity();
+	const btVector3		vr=va-vb;
+	const btScalar		sp=btDot(vr,m_axis[0]);
+	const btVector3		vc=vr-m_axis[0]*m_icontrol->Speed(this,sp);
+	btSoftBody::Impulse	impulse;
+	impulse.m_asVelocity	=	1;
+	impulse.m_velocity		=	m_massmatrix*(m_drift+vc*m_cfm)*sor;
+	m_bodies[0].applyAImpulse(-impulse);
+	m_bodies[1].applyAImpulse( impulse);
+}
+
+//
+void				btSoftBody::AJoint::Terminate(btScalar dt)
+{
+	if(m_split>0)
+	{
+		m_bodies[0].applyDAImpulse(-m_sdrift);
+		m_bodies[1].applyDAImpulse( m_sdrift);
+	}
+}
+
+//
+void				btSoftBody::CJoint::Prepare(btScalar dt,int iterations)
+{
+	Joint::Prepare(dt,iterations);
+	const bool	dodrift=(m_life==0);
+	m_delete=(++m_life)>m_maxlife;
+	if(dodrift)
+	{
+		m_drift=m_drift*m_erp/dt;
+		if(m_split>0)
+		{
+			m_sdrift	=	m_massmatrix*(m_drift*m_split);
+			m_drift		*=	1-m_split;
+		}
+		m_drift/=(btScalar)iterations;
+	}
+	else
+	{
+		m_drift=m_sdrift=btVector3(0,0,0);
+	}
+}
+
+//
+void				btSoftBody::CJoint::Solve(btScalar dt,btScalar sor)
+{
+	const btVector3		va=m_bodies[0].velocity(m_rpos[0]);
+	const btVector3		vb=m_bodies[1].velocity(m_rpos[1]);
+	const btVector3		vrel=va-vb;
+	const btScalar		rvac=btDot(vrel,m_normal);
+	btSoftBody::Impulse	impulse;
+	impulse.m_asVelocity	=	1;
+	impulse.m_velocity		=	m_drift;
+	if(rvac<0)
+	{
+		const btVector3	iv=m_normal*rvac;
+		const btVector3	fv=vrel-iv;
+		impulse.m_velocity	+=	iv+fv*m_friction;
+	}
+	impulse.m_velocity=m_massmatrix*impulse.m_velocity*sor;
+	
+	if (m_bodies[0].m_soft==m_bodies[1].m_soft)
+	{
+		if ((impulse.m_velocity.getX() ==impulse.m_velocity.getX())&&(impulse.m_velocity.getY() ==impulse.m_velocity.getY())&&
+			(impulse.m_velocity.getZ() ==impulse.m_velocity.getZ()))
+		{
+			if (impulse.m_asVelocity)
+			{
+				if (impulse.m_velocity.length() <m_bodies[0].m_soft->m_maxSelfCollisionImpulse)
+				{
+					
+				} else
+				{
+					m_bodies[0].applyImpulse(-impulse*m_bodies[0].m_soft->m_selfCollisionImpulseFactor,m_rpos[0]);
+					m_bodies[1].applyImpulse( impulse*m_bodies[0].m_soft->m_selfCollisionImpulseFactor,m_rpos[1]);
+				}
+			}
+		}
+	} else
+	{
+		m_bodies[0].applyImpulse(-impulse,m_rpos[0]);
+		m_bodies[1].applyImpulse( impulse,m_rpos[1]);
+	}
+}
+
+//
+void				btSoftBody::CJoint::Terminate(btScalar dt)
+{
+	if(m_split>0)
+	{
+		m_bodies[0].applyDImpulse(-m_sdrift,m_rpos[0]);
+		m_bodies[1].applyDImpulse( m_sdrift,m_rpos[1]);
+	}
+}
+
+//
+void				btSoftBody::applyForces()
+{
+
+	BT_PROFILE("SoftBody applyForces");
+	const btScalar					dt =			m_sst.sdt;
+	const btScalar					kLF =			m_cfg.kLF;
+	const btScalar					kDG =			m_cfg.kDG;
+	const btScalar					kPR =			m_cfg.kPR;
+	const btScalar					kVC =			m_cfg.kVC;
+	const bool						as_lift =		kLF>0;
+	const bool						as_drag =		kDG>0;
+	const bool						as_pressure =	kPR!=0;
+	const bool						as_volume =		kVC>0;
+	const bool						as_aero =		as_lift	||
+													as_drag		;
+	const bool						as_vaero =		as_aero	&&
+													(m_cfg.aeromodel < btSoftBody::eAeroModel::F_TwoSided);
+	const bool						as_faero =		as_aero	&&
+													(m_cfg.aeromodel >= btSoftBody::eAeroModel::F_TwoSided);
+	const bool						use_medium =	as_aero;
+	const bool						use_volume =	as_pressure	||
+		as_volume	;
+	btScalar						volume =		0;
+	btScalar						ivolumetp =		0;
+	btScalar						dvolumetv =		0;
+	btSoftBody::sMedium	medium;
+	if(use_volume)
+	{
+		volume		=	getVolume();
+		ivolumetp	=	1/btFabs(volume)*kPR;
+		dvolumetv	=	(m_pose.m_volume-volume)*kVC;
+	}
+	/* Per vertex forces			*/ 
+	int i,ni;
+
+	for(i=0,ni=m_nodes.size();i<ni;++i)
+	{
+		btSoftBody::Node&	n=m_nodes[i];
+		if(n.m_im>0)
+		{
+			if(use_medium)
+			{
+				/* Aerodynamics			*/ 
+				addAeroForceToNode(m_windVelocity, i);
+			}
+			/* Pressure				*/ 
+			if(as_pressure)
+			{
+				n.m_f	+=	n.m_n*(n.m_area*ivolumetp);
+			}
+			/* Volume				*/ 
+			if(as_volume)
+			{
+				n.m_f	+=	n.m_n*(n.m_area*dvolumetv);
+			}
+		}
+	}
+
+	/* Per face forces				*/ 
+	for(i=0,ni=m_faces.size();i<ni;++i)
+	{
+		btSoftBody::Face&	f=m_faces[i];
+
+		/* Aerodynamics			*/ 
+		addAeroForceToFace(m_windVelocity, i);	
+	}
+}
+
+//
+void				btSoftBody::PSolve_Anchors(btSoftBody* psb,btScalar kst,btScalar ti)
+{
+	const btScalar	kAHR=psb->m_cfg.kAHR*kst;
+	const btScalar	dt=psb->m_sst.sdt;
+	for(int i=0,ni=psb->m_anchors.size();i<ni;++i)
+	{
+		const Anchor&		a=psb->m_anchors[i];
+		const btTransform&	t=a.m_body->getWorldTransform();
+		Node&				n=*a.m_node;
+		const btVector3		wa=t*a.m_local;
+		const btVector3		va=a.m_body->getVelocityInLocalPoint(a.m_c1)*dt;
+		const btVector3		vb=n.m_x-n.m_q;
+		const btVector3		vr=(va-vb)+(wa-n.m_x)*kAHR;
+		const btVector3		impulse=a.m_c0*vr*a.m_influence;
+		n.m_x+=impulse*a.m_c2;
+		a.m_body->applyImpulse(-impulse,a.m_c1);
+	}
+}
+
+//
+void btSoftBody::PSolve_RContacts(btSoftBody* psb, btScalar kst, btScalar ti)
+{
+	const btScalar	dt = psb->m_sst.sdt;
+	const btScalar	mrg = psb->getCollisionShape()->getMargin();
+	for(int i=0,ni=psb->m_rcontacts.size();i<ni;++i)
+	{
+		const RContact&		c = psb->m_rcontacts[i];
+		const sCti&			cti = c.m_cti;	
+		btRigidBody* tmpRigid = btRigidBody::upcast(cti.m_colObj);
+
+		const btVector3		va = tmpRigid ? tmpRigid->getVelocityInLocalPoint(c.m_c1)*dt : btVector3(0,0,0);
+		const btVector3		vb = c.m_node->m_x-c.m_node->m_q;	
+		const btVector3		vr = vb-va;
+		const btScalar		dn = btDot(vr, cti.m_normal);		
+		if(dn<=SIMD_EPSILON)
+		{
+			const btScalar		dp = btMin( (btDot(c.m_node->m_x, cti.m_normal) + cti.m_offset), mrg );
+			const btVector3		fv = vr - (cti.m_normal * dn);
+			// c0 is the impulse matrix, c3 is 1 - the friction coefficient or 0, c4 is the contact hardness coefficient
+			const btVector3		impulse = c.m_c0 * ( (vr - (fv * c.m_c3) + (cti.m_normal * (dp * c.m_c4))) * kst );
+			c.m_node->m_x -= impulse * c.m_c2;
+			if (tmpRigid)
+				tmpRigid->applyImpulse(impulse,c.m_c1);
+		}
+	}
+}
+
+//
+void				btSoftBody::PSolve_SContacts(btSoftBody* psb,btScalar,btScalar ti)
+{
+	for(int i=0,ni=psb->m_scontacts.size();i<ni;++i)
+	{
+		const SContact&		c=psb->m_scontacts[i];
+		const btVector3&	nr=c.m_normal;
+		Node&				n=*c.m_node;
+		Face&				f=*c.m_face;
+		const btVector3		p=BaryEval(	f.m_n[0]->m_x,
+			f.m_n[1]->m_x,
+			f.m_n[2]->m_x,
+			c.m_weights);
+		const btVector3		q=BaryEval(	f.m_n[0]->m_q,
+			f.m_n[1]->m_q,
+			f.m_n[2]->m_q,
+			c.m_weights);											
+		const btVector3		vr=(n.m_x-n.m_q)-(p-q);
+		btVector3			corr(0,0,0);
+		btScalar dot = btDot(vr,nr);
+		if(dot<0)
+		{
+			const btScalar	j=c.m_margin-(btDot(nr,n.m_x)-btDot(nr,p));
+			corr+=c.m_normal*j;
+		}
+		corr			-=	ProjectOnPlane(vr,nr)*c.m_friction;
+		n.m_x			+=	corr*c.m_cfm[0];
+		f.m_n[0]->m_x	-=	corr*(c.m_cfm[1]*c.m_weights.x());
+		f.m_n[1]->m_x	-=	corr*(c.m_cfm[1]*c.m_weights.y());
+		f.m_n[2]->m_x	-=	corr*(c.m_cfm[1]*c.m_weights.z());
+	}
+}
+
+//
+void				btSoftBody::PSolve_Links(btSoftBody* psb,btScalar kst,btScalar ti)
+{
+	for(int i=0,ni=psb->m_links.size();i<ni;++i)
+	{			
+		Link&	l=psb->m_links[i];
+		if(l.m_c0>0)
+		{
+			Node&			a=*l.m_n[0];
+			Node&			b=*l.m_n[1];
+			const btVector3	del=b.m_x-a.m_x;
+			const btScalar	len=del.length2();
+			if (l.m_c1+len > SIMD_EPSILON)
+			{
+				const btScalar	k=((l.m_c1-len)/(l.m_c0*(l.m_c1+len)))*kst;
+				a.m_x-=del*(k*a.m_im);
+				b.m_x+=del*(k*b.m_im);
+			}
+		}
+	}
+}
+
+//
+void				btSoftBody::VSolve_Links(btSoftBody* psb,btScalar kst)
+{
+	for(int i=0,ni=psb->m_links.size();i<ni;++i)
+	{			
+		Link&			l=psb->m_links[i];
+		Node**			n=l.m_n;
+		const btScalar	j=-btDot(l.m_c3,n[0]->m_v-n[1]->m_v)*l.m_c2*kst;
+		n[0]->m_v+=	l.m_c3*(j*n[0]->m_im);
+		n[1]->m_v-=	l.m_c3*(j*n[1]->m_im);
+	}
+}
+
+//
+btSoftBody::psolver_t	btSoftBody::getSolver(ePSolver::_ solver)
+{
+	switch(solver)
+	{
+	case	ePSolver::Anchors:		
+		return(&btSoftBody::PSolve_Anchors);
+	case	ePSolver::Linear:		
+		return(&btSoftBody::PSolve_Links);
+	case	ePSolver::RContacts:	
+		return(&btSoftBody::PSolve_RContacts);
+	case	ePSolver::SContacts:	
+		return(&btSoftBody::PSolve_SContacts);	
+		default:
+		{
+		}
+	}
+	return(0);
+}
+
+//
+btSoftBody::vsolver_t	btSoftBody::getSolver(eVSolver::_ solver)
+{
+	switch(solver)
+	{
+	case	eVSolver::Linear:		return(&btSoftBody::VSolve_Links);
+		default:
+		{
+		}
+	}
+	return(0);
+}
+
+//
+void			btSoftBody::defaultCollisionHandler(btCollisionObject* pco)
+{
+
+	switch(m_cfg.collisions&fCollision::RVSmask)
+	{
+	case	fCollision::SDF_RS:
+		{
+			btSoftColliders::CollideSDF_RS	docollide;		
+			btRigidBody*		prb1=btRigidBody::upcast(pco);
+			btTransform	wtr=pco->getWorldTransform();
+
+			const btTransform	ctr=pco->getWorldTransform();
+			const btScalar		timemargin=(wtr.getOrigin()-ctr.getOrigin()).length();
+			const btScalar		basemargin=getCollisionShape()->getMargin();
+			btVector3			mins;
+			btVector3			maxs;
+			ATTRIBUTE_ALIGNED16(btDbvtVolume)		volume;
+			pco->getCollisionShape()->getAabb(	pco->getWorldTransform(),
+				mins,
+				maxs);
+			volume=btDbvtVolume::FromMM(mins,maxs);
+			volume.Expand(btVector3(basemargin,basemargin,basemargin));		
+			docollide.psb		=	this;
+			docollide.m_colObj1 = pco;
+			docollide.m_rigidBody = prb1;
+
+			docollide.dynmargin	=	basemargin+timemargin;
+			docollide.stamargin	=	basemargin;
+			m_ndbvt.collideTV(m_ndbvt.m_root,volume,docollide);
+		}
+		break;
+	case	fCollision::CL_RS:
+		{
+			btSoftColliders::CollideCL_RS	collider;
+			collider.Process(this,pco);
+		}
+		break;
+	}
+}
+
+//
+void			btSoftBody::defaultCollisionHandler(btSoftBody* psb)
+{
+	const int cf=m_cfg.collisions&psb->m_cfg.collisions;
+	switch(cf&fCollision::SVSmask)
+	{
+	case	fCollision::CL_SS:
+		{
+			
+			//support self-collision if CL_SELF flag set
+			if (this!=psb || psb->m_cfg.collisions&fCollision::CL_SELF)
+			{
+				btSoftColliders::CollideCL_SS	docollide;
+				docollide.Process(this,psb);
+			}
+			
+		}
+		break;
+	case	fCollision::VF_SS:
+		{
+			//only self-collision for Cluster, not Vertex-Face yet
+			if (this!=psb)
+			{
+				btSoftColliders::CollideVF_SS	docollide;
+				/* common					*/ 
+				docollide.mrg=	getCollisionShape()->getMargin()+
+					psb->getCollisionShape()->getMargin();
+				/* psb0 nodes vs psb1 faces	*/ 
+				docollide.psb[0]=this;
+				docollide.psb[1]=psb;
+				docollide.psb[0]->m_ndbvt.collideTT(	docollide.psb[0]->m_ndbvt.m_root,
+					docollide.psb[1]->m_fdbvt.m_root,
+					docollide);
+				/* psb1 nodes vs psb0 faces	*/ 
+				docollide.psb[0]=psb;
+				docollide.psb[1]=this;
+				docollide.psb[0]->m_ndbvt.collideTT(	docollide.psb[0]->m_ndbvt.m_root,
+					docollide.psb[1]->m_fdbvt.m_root,
+					docollide);
+			}
+		}
+		break;
+	default:
+		{
+			
+		}
+	}
+}
+
+
+
+void btSoftBody::setWindVelocity( const btVector3 &velocity )
+{
+	m_windVelocity = velocity;
+}
+
+
+const btVector3& btSoftBody::getWindVelocity()
+{
+	return m_windVelocity;
+}
+
+
+
+int	btSoftBody::calculateSerializeBufferSize()	const
+{
+	int sz = sizeof(btSoftBodyData);
+	return sz;
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btSoftBody::serialize(void* dataBuffer, class btSerializer* serializer) const
+{
+	btSoftBodyData* sbd = (btSoftBodyData*) dataBuffer;
+
+	btCollisionObject::serialize(&sbd->m_collisionObjectData, serializer);
+
+	btHashMap<btHashPtr,int>	m_nodeIndexMap;
+
+	sbd->m_numMaterials = m_materials.size();
+	sbd->m_materials = sbd->m_numMaterials? (SoftBodyMaterialData**) serializer->getUniquePointer((void*)&m_materials): 0;
+
+	if (sbd->m_materials)
+	{
+		int sz = sizeof(SoftBodyMaterialData*);
+		int numElem = sbd->m_numMaterials;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		//SoftBodyMaterialData** memPtr = chunk->m_oldPtr;
+		SoftBodyMaterialData** memPtr = (SoftBodyMaterialData**)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			btSoftBody::Material* mat = m_materials[i];
+			*memPtr = mat ? (SoftBodyMaterialData*)serializer->getUniquePointer((void*)mat) : 0;
+			if (!serializer->findPointer(mat))
+			{
+				//serialize it here
+				btChunk* chunk = serializer->allocate(sizeof(SoftBodyMaterialData),1);
+				SoftBodyMaterialData* memPtr = (SoftBodyMaterialData*)chunk->m_oldPtr;
+				memPtr->m_flags = mat->m_flags;
+				memPtr->m_angularStiffness = mat->m_kAST;
+				memPtr->m_linearStiffness = mat->m_kLST;
+				memPtr->m_volumeStiffness = mat->m_kVST;
+				serializer->finalizeChunk(chunk,"SoftBodyMaterialData",BT_SBMATERIAL_CODE,mat);
+			}
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyMaterialData",BT_ARRAY_CODE,(void*) &m_materials);
+	}
+
+
+	
+
+	sbd->m_numNodes = m_nodes.size();
+	sbd->m_nodes = sbd->m_numNodes ? (SoftBodyNodeData*)serializer->getUniquePointer((void*)&m_nodes): 0;
+	if (sbd->m_nodes)
+	{
+		int sz = sizeof(SoftBodyNodeData);
+		int numElem = sbd->m_numNodes;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftBodyNodeData* memPtr = (SoftBodyNodeData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			m_nodes[i].m_f.serializeFloat( memPtr->m_accumulatedForce);
+			memPtr->m_area = m_nodes[i].m_area;
+			memPtr->m_attach = m_nodes[i].m_battach;
+			memPtr->m_inverseMass = m_nodes[i].m_im;
+			memPtr->m_material = m_nodes[i].m_material? (SoftBodyMaterialData*)serializer->getUniquePointer((void*) m_nodes[i].m_material):0;
+			m_nodes[i].m_n.serializeFloat(memPtr->m_normal);
+			m_nodes[i].m_x.serializeFloat(memPtr->m_position);
+			m_nodes[i].m_q.serializeFloat(memPtr->m_previousPosition);
+			m_nodes[i].m_v.serializeFloat(memPtr->m_velocity);
+			m_nodeIndexMap.insert(&m_nodes[i],i);
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyNodeData",BT_SBNODE_CODE,(void*) &m_nodes);
+	}
+
+	sbd->m_numLinks = m_links.size();
+	sbd->m_links = sbd->m_numLinks? (SoftBodyLinkData*) serializer->getUniquePointer((void*)&m_links[0]):0;
+	if (sbd->m_links)
+	{
+		int sz = sizeof(SoftBodyLinkData);
+		int numElem = sbd->m_numLinks;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftBodyLinkData* memPtr = (SoftBodyLinkData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_bbending = m_links[i].m_bbending;
+			memPtr->m_material = m_links[i].m_material? (SoftBodyMaterialData*)serializer->getUniquePointer((void*) m_links[i].m_material):0;
+			memPtr->m_nodeIndices[0] = m_links[i].m_n[0] ? m_links[i].m_n[0] - &m_nodes[0]: -1;
+			memPtr->m_nodeIndices[1] = m_links[i].m_n[1] ? m_links[i].m_n[1] - &m_nodes[0]: -1;
+			btAssert(memPtr->m_nodeIndices[0]<m_nodes.size());
+			btAssert(memPtr->m_nodeIndices[1]<m_nodes.size());
+			memPtr->m_restLength = m_links[i].m_rl;
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyLinkData",BT_ARRAY_CODE,(void*) &m_links[0]);
+
+	}
+
+
+	sbd->m_numFaces = m_faces.size();
+	sbd->m_faces = sbd->m_numFaces? (SoftBodyFaceData*) serializer->getUniquePointer((void*)&m_faces[0]):0;
+	if (sbd->m_faces)
+	{
+		int sz = sizeof(SoftBodyFaceData);
+		int numElem = sbd->m_numFaces;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftBodyFaceData* memPtr = (SoftBodyFaceData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_material = m_faces[i].m_material ?  (SoftBodyMaterialData*) serializer->getUniquePointer((void*)m_faces[i].m_material): 0;
+			m_faces[i].m_normal.serializeFloat(	memPtr->m_normal);
+			for (int j=0;j<3;j++)
+			{
+				memPtr->m_nodeIndices[j] = m_faces[i].m_n[j]? m_faces[i].m_n[j] - &m_nodes[0]: -1;
+			}
+			memPtr->m_restArea = m_faces[i].m_ra;
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyFaceData",BT_ARRAY_CODE,(void*) &m_faces[0]);
+	}
+
+
+	sbd->m_numTetrahedra = m_tetras.size();
+	sbd->m_tetrahedra = sbd->m_numTetrahedra ? (SoftBodyTetraData*) serializer->getUniquePointer((void*)&m_tetras[0]):0;
+	if (sbd->m_tetrahedra)
+	{
+		int sz = sizeof(SoftBodyTetraData);
+		int numElem = sbd->m_numTetrahedra;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftBodyTetraData* memPtr = (SoftBodyTetraData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			for (int j=0;j<4;j++)
+			{
+				m_tetras[i].m_c0[j].serializeFloat(	memPtr->m_c0[j] );
+				memPtr->m_nodeIndices[j] = m_tetras[j].m_n[j]? m_tetras[j].m_n[j]-&m_nodes[0] : -1;
+			}
+			memPtr->m_c1 = m_tetras[i].m_c1;
+			memPtr->m_c2 = m_tetras[i].m_c2;
+			memPtr->m_material = m_tetras[i].m_material ? (SoftBodyMaterialData*)serializer->getUniquePointer((void*) m_tetras[i].m_material): 0;
+			memPtr->m_restVolume = m_tetras[i].m_rv;
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyTetraData",BT_ARRAY_CODE,(void*) &m_tetras[0]);
+	}
+
+	sbd->m_numAnchors = m_anchors.size();
+	sbd->m_anchors = sbd->m_numAnchors ? (SoftRigidAnchorData*) serializer->getUniquePointer((void*)&m_anchors[0]):0;
+	if (sbd->m_anchors)
+	{
+		int sz = sizeof(SoftRigidAnchorData);
+		int numElem = sbd->m_numAnchors;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftRigidAnchorData* memPtr = (SoftRigidAnchorData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			m_anchors[i].m_c0.serializeFloat(memPtr->m_c0);
+			m_anchors[i].m_c1.serializeFloat(memPtr->m_c1);
+			memPtr->m_c2 = m_anchors[i].m_c2;
+			m_anchors[i].m_local.serializeFloat(memPtr->m_localFrame);
+			memPtr->m_nodeIndex = m_anchors[i].m_node? m_anchors[i].m_node-&m_nodes[0]: -1;
+			
+			memPtr->m_rigidBody = m_anchors[i].m_body? (btRigidBodyData*)  serializer->getUniquePointer((void*)m_anchors[i].m_body): 0;
+			btAssert(memPtr->m_nodeIndex < m_nodes.size());
+		}
+		serializer->finalizeChunk(chunk,"SoftRigidAnchorData",BT_ARRAY_CODE,(void*) &m_anchors[0]);
+	}
+	
+
+	sbd->m_config.m_dynamicFriction = m_cfg.kDF;
+	sbd->m_config.m_baumgarte = m_cfg.kVCF;
+	sbd->m_config.m_pressure = m_cfg.kPR;
+	sbd->m_config.m_aeroModel = this->m_cfg.aeromodel;
+	sbd->m_config.m_lift = m_cfg.kLF;
+	sbd->m_config.m_drag = m_cfg.kDG;
+	sbd->m_config.m_positionIterations = m_cfg.piterations;
+	sbd->m_config.m_driftIterations = m_cfg.diterations;
+	sbd->m_config.m_clusterIterations = m_cfg.citerations;
+	sbd->m_config.m_velocityIterations = m_cfg.viterations;
+	sbd->m_config.m_maxVolume = m_cfg.maxvolume;
+	sbd->m_config.m_damping = m_cfg.kDP;
+	sbd->m_config.m_poseMatch = m_cfg.kMT;
+	sbd->m_config.m_collisionFlags = m_cfg.collisions;
+	sbd->m_config.m_volume = m_cfg.kVC;
+	sbd->m_config.m_rigidContactHardness = m_cfg.kCHR;
+	sbd->m_config.m_kineticContactHardness = m_cfg.kKHR;
+	sbd->m_config.m_softContactHardness = m_cfg.kSHR;
+	sbd->m_config.m_anchorHardness = m_cfg.kAHR;
+	sbd->m_config.m_timeScale = m_cfg.timescale;
+	sbd->m_config.m_maxVolume = m_cfg.maxvolume;
+	sbd->m_config.m_softRigidClusterHardness = m_cfg.kSRHR_CL;
+	sbd->m_config.m_softKineticClusterHardness = m_cfg.kSKHR_CL;
+	sbd->m_config.m_softSoftClusterHardness = m_cfg.kSSHR_CL;
+	sbd->m_config.m_softRigidClusterImpulseSplit = m_cfg.kSR_SPLT_CL;
+	sbd->m_config.m_softKineticClusterImpulseSplit = m_cfg.kSK_SPLT_CL;
+	sbd->m_config.m_softSoftClusterImpulseSplit = m_cfg.kSS_SPLT_CL;
+
+	//pose for shape matching
+	{
+		sbd->m_pose = (SoftBodyPoseData*)serializer->getUniquePointer((void*)&m_pose);
+
+		int sz = sizeof(SoftBodyPoseData);
+		btChunk* chunk = serializer->allocate(sz,1);
+		SoftBodyPoseData* memPtr = (SoftBodyPoseData*)chunk->m_oldPtr;
+		
+		m_pose.m_aqq.serializeFloat(memPtr->m_aqq);
+		memPtr->m_bframe = m_pose.m_bframe;
+		memPtr->m_bvolume = m_pose.m_bvolume;
+		m_pose.m_com.serializeFloat(memPtr->m_com);
+		
+		memPtr->m_numPositions = m_pose.m_pos.size();
+		memPtr->m_positions = memPtr->m_numPositions ? (btVector3FloatData*)serializer->getUniquePointer((void*)&m_pose.m_pos[0]): 0;
+		if (memPtr->m_numPositions)
+		{
+			int numElem = memPtr->m_numPositions;
+			int sz = sizeof(btVector3Data);
+			btChunk* chunk = serializer->allocate(sz,numElem);
+			btVector3FloatData* memPtr = (btVector3FloatData*)chunk->m_oldPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_pose.m_pos[i].serializeFloat(*memPtr);
+			}
+			serializer->finalizeChunk(chunk,"btVector3FloatData",BT_ARRAY_CODE,(void*)&m_pose.m_pos[0]);
+		}
+		memPtr->m_restVolume = m_pose.m_volume;
+		m_pose.m_rot.serializeFloat(memPtr->m_rot);
+		m_pose.m_scl.serializeFloat(memPtr->m_scale);
+
+		memPtr->m_numWeigts = m_pose.m_wgh.size();
+		memPtr->m_weights = memPtr->m_numWeigts? (float*) serializer->getUniquePointer((void*) &m_pose.m_wgh[0]) : 0;
+		if (memPtr->m_numWeigts)
+		{
+			
+			int numElem = memPtr->m_numWeigts;
+			int sz = sizeof(float);
+			btChunk* chunk = serializer->allocate(sz,numElem);
+			float* memPtr = (float*) chunk->m_oldPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				*memPtr = m_pose.m_wgh[i];
+			}
+			serializer->finalizeChunk(chunk,"float",BT_ARRAY_CODE,(void*)&m_pose.m_wgh[0]);
+		}
+
+		serializer->finalizeChunk(chunk,"SoftBodyPoseData",BT_ARRAY_CODE,(void*)&m_pose);
+	}
+
+	//clusters for convex-cluster collision detection
+
+	sbd->m_numClusters = m_clusters.size();
+	sbd->m_clusters = sbd->m_numClusters? (SoftBodyClusterData*) serializer->getUniquePointer((void*)m_clusters[0]) : 0;
+	if (sbd->m_numClusters)
+	{
+		int numElem = sbd->m_numClusters;
+		int sz = sizeof(SoftBodyClusterData);
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftBodyClusterData* memPtr = (SoftBodyClusterData*) chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_adamping= m_clusters[i]->m_adamping;
+			m_clusters[i]->m_av.serializeFloat(memPtr->m_av);
+			memPtr->m_clusterIndex = m_clusters[i]->m_clusterIndex;
+			memPtr->m_collide = m_clusters[i]->m_collide;
+			m_clusters[i]->m_com.serializeFloat(memPtr->m_com);
+			memPtr->m_containsAnchor = m_clusters[i]->m_containsAnchor;
+			m_clusters[i]->m_dimpulses[0].serializeFloat(memPtr->m_dimpulses[0]);
+			m_clusters[i]->m_dimpulses[1].serializeFloat(memPtr->m_dimpulses[1]);
+			m_clusters[i]->m_framexform.serializeFloat(memPtr->m_framexform);
+			memPtr->m_idmass = m_clusters[i]->m_idmass;
+			memPtr->m_imass = m_clusters[i]->m_imass;
+			m_clusters[i]->m_invwi.serializeFloat(memPtr->m_invwi);
+			memPtr->m_ldamping = m_clusters[i]->m_ldamping;
+			m_clusters[i]->m_locii.serializeFloat(memPtr->m_locii);
+			m_clusters[i]->m_lv.serializeFloat(memPtr->m_lv);
+			memPtr->m_matching = m_clusters[i]->m_matching;
+			memPtr->m_maxSelfCollisionImpulse = m_clusters[i]->m_maxSelfCollisionImpulse;
+			memPtr->m_ndamping = m_clusters[i]->m_ndamping;
+			memPtr->m_ldamping = m_clusters[i]->m_ldamping;
+			memPtr->m_adamping = m_clusters[i]->m_adamping;
+			memPtr->m_selfCollisionImpulseFactor = m_clusters[i]->m_selfCollisionImpulseFactor;
+
+			memPtr->m_numFrameRefs = m_clusters[i]->m_framerefs.size();
+			memPtr->m_numMasses = m_clusters[i]->m_masses.size();
+			memPtr->m_numNodes = m_clusters[i]->m_nodes.size();
+
+			memPtr->m_nvimpulses = m_clusters[i]->m_nvimpulses;
+			m_clusters[i]->m_vimpulses[0].serializeFloat(memPtr->m_vimpulses[0]);
+			m_clusters[i]->m_vimpulses[1].serializeFloat(memPtr->m_vimpulses[1]);
+			memPtr->m_ndimpulses = m_clusters[i]->m_ndimpulses;
+
+			
+
+			memPtr->m_framerefs = memPtr->m_numFrameRefs? (btVector3FloatData*)serializer->getUniquePointer((void*)&m_clusters[i]->m_framerefs[0]) : 0;
+			if (memPtr->m_framerefs)
+			{
+				int numElem = memPtr->m_numFrameRefs;
+				int sz = sizeof(btVector3FloatData);
+				btChunk* chunk = serializer->allocate(sz,numElem);
+				btVector3FloatData* memPtr = (btVector3FloatData*) chunk->m_oldPtr;
+				for (int j=0;j<numElem;j++,memPtr++)
+				{
+					m_clusters[i]->m_framerefs[j].serializeFloat(*memPtr);
+				}
+				serializer->finalizeChunk(chunk,"btVector3FloatData",BT_ARRAY_CODE,(void*)&m_clusters[i]->m_framerefs[0]);
+			}
+			
+			memPtr->m_masses = memPtr->m_numMasses ? (float*) serializer->getUniquePointer((void*)&m_clusters[i]->m_masses[0]): 0;
+			if (memPtr->m_masses)
+			{
+				int numElem = memPtr->m_numMasses;
+				int sz = sizeof(float);
+				btChunk* chunk = serializer->allocate(sz,numElem);
+				float* memPtr = (float*) chunk->m_oldPtr;
+				for (int j=0;j<numElem;j++,memPtr++)
+				{
+					*memPtr = m_clusters[i]->m_masses[j];
+				}
+				serializer->finalizeChunk(chunk,"float",BT_ARRAY_CODE,(void*)&m_clusters[i]->m_masses[0]);
+			}
+
+			memPtr->m_nodeIndices  = memPtr->m_numNodes ? (int*) serializer->getUniquePointer((void*) &m_clusters[i]->m_nodes) : 0;
+			if (memPtr->m_nodeIndices )
+			{
+				int numElem = memPtr->m_numMasses;
+				int sz = sizeof(int);
+				btChunk* chunk = serializer->allocate(sz,numElem);
+				int* memPtr = (int*) chunk->m_oldPtr;
+				for (int j=0;j<numElem;j++,memPtr++)
+				{
+					int* indexPtr = m_nodeIndexMap.find(m_clusters[i]->m_nodes[j]);
+					btAssert(indexPtr);
+					*memPtr = *indexPtr;
+				}
+				serializer->finalizeChunk(chunk,"int",BT_ARRAY_CODE,(void*)&m_clusters[i]->m_nodes);
+			}
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyClusterData",BT_ARRAY_CODE,(void*)m_clusters[0]);
+
+	}
+	
+
+	
+	sbd->m_numJoints = m_joints.size();
+	sbd->m_joints = m_joints.size()? (btSoftBodyJointData*) serializer->getUniquePointer((void*)&m_joints[0]) : 0;
+
+	if (sbd->m_joints)
+	{
+		int sz = sizeof(btSoftBodyJointData);
+		int numElem = m_joints.size();
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btSoftBodyJointData* memPtr = (btSoftBodyJointData*)chunk->m_oldPtr;
+
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_jointType = (int)m_joints[i]->Type();
+			m_joints[i]->m_refs[0].serializeFloat(memPtr->m_refs[0]);
+			m_joints[i]->m_refs[1].serializeFloat(memPtr->m_refs[1]);
+			memPtr->m_cfm = m_joints[i]->m_cfm;
+			memPtr->m_erp = m_joints[i]->m_erp;
+			memPtr->m_split = m_joints[i]->m_split;
+			memPtr->m_delete = m_joints[i]->m_delete;
+			
+			for (int j=0;j<4;j++)
+			{
+				memPtr->m_relPosition[0].m_floats[j] = 0.f;
+				memPtr->m_relPosition[1].m_floats[j] = 0.f;
+			}
+			memPtr->m_bodyA = 0;
+			memPtr->m_bodyB = 0;
+			if (m_joints[i]->m_bodies[0].m_soft)
+			{
+				memPtr->m_bodyAtype = BT_JOINT_SOFT_BODY_CLUSTER;
+				memPtr->m_bodyA = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[0].m_soft);
+			}
+			if (m_joints[i]->m_bodies[0].m_collisionObject)
+			{
+				memPtr->m_bodyAtype = BT_JOINT_COLLISION_OBJECT;
+				memPtr->m_bodyA = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[0].m_collisionObject);
+			}
+			if (m_joints[i]->m_bodies[0].m_rigid)
+			{
+				memPtr->m_bodyAtype = BT_JOINT_RIGID_BODY;
+				memPtr->m_bodyA = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[0].m_rigid);
+			}
+
+			if (m_joints[i]->m_bodies[1].m_soft)
+			{
+				memPtr->m_bodyBtype = BT_JOINT_SOFT_BODY_CLUSTER;
+				memPtr->m_bodyB = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[1].m_soft);
+			}
+			if (m_joints[i]->m_bodies[1].m_collisionObject)
+			{
+				memPtr->m_bodyBtype = BT_JOINT_COLLISION_OBJECT;
+				memPtr->m_bodyB = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[1].m_collisionObject);
+			}
+			if (m_joints[i]->m_bodies[1].m_rigid)
+			{
+				memPtr->m_bodyBtype = BT_JOINT_RIGID_BODY;
+				memPtr->m_bodyB = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[1].m_rigid);
+			}
+		}
+		serializer->finalizeChunk(chunk,"btSoftBodyJointData",BT_ARRAY_CODE,(void*) &m_joints[0]);
+	}
+
+
+	return btSoftBodyDataName;
+}
+
diff --git a/src/bullet/BulletSoftBody/btSoftBody.h b/src/bullet/BulletSoftBody/btSoftBody.h
new file mode 100644
index 00000000..ba589486
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBody.h
@@ -0,0 +1,987 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///btSoftBody implementation by Nathanael Presson
+
+#ifndef _BT_SOFT_BODY_H
+#define _BT_SOFT_BODY_H
+
+#include "LinearMath/btAlignedObjectArray.h"
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btIDebugDraw.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+
+#include "BulletCollision/CollisionShapes/btConcaveShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+#include "btSparseSDF.h"
+#include "BulletCollision/BroadphaseCollision/btDbvt.h"
+
+//#ifdef BT_USE_DOUBLE_PRECISION
+//#define btRigidBodyData	btRigidBodyDoubleData
+//#define btRigidBodyDataName	"btRigidBodyDoubleData"
+//#else
+#define btSoftBodyData	btSoftBodyFloatData
+#define btSoftBodyDataName	"btSoftBodyFloatData"
+//#endif //BT_USE_DOUBLE_PRECISION
+
+class btBroadphaseInterface;
+class btDispatcher;
+class btSoftBodySolver;
+
+/* btSoftBodyWorldInfo	*/ 
+struct	btSoftBodyWorldInfo
+{
+	btScalar				air_density;
+	btScalar				water_density;
+	btScalar				water_offset;
+	btVector3				water_normal;
+	btBroadphaseInterface*	m_broadphase;
+	btDispatcher*	m_dispatcher;
+	btVector3				m_gravity;
+	btSparseSdf<3>			m_sparsesdf;
+
+	btSoftBodyWorldInfo()
+		:air_density((btScalar)1.2),
+		water_density(0),
+		water_offset(0),
+		water_normal(0,0,0),
+		m_broadphase(0),
+		m_dispatcher(0),
+		m_gravity(0,-10,0)
+	{
+	}
+};	
+
+
+///The btSoftBody is an class to simulate cloth and volumetric soft bodies. 
+///There is two-way interaction between btSoftBody and btRigidBody/btCollisionObject.
+class	btSoftBody : public btCollisionObject
+{
+public:
+	btAlignedObjectArray<class btCollisionObject*> m_collisionDisabledObjects;
+
+	// The solver object that handles this soft body
+	btSoftBodySolver *m_softBodySolver;
+
+	//
+	// Enumerations
+	//
+
+	///eAeroModel 
+	struct eAeroModel { enum _ {
+		V_Point,			///Vertex normals are oriented toward velocity
+		V_TwoSided,			///Vertex normals are flipped to match velocity	
+		V_TwoSidedLiftDrag, ///Vertex normals are flipped to match velocity and lift and drag forces are applied
+		V_OneSided,			///Vertex normals are taken as it is	
+		F_TwoSided,			///Face normals are flipped to match velocity
+		F_TwoSidedLiftDrag,	///Face normals are flipped to match velocity and lift and drag forces are applied 
+		F_OneSided,			///Face normals are taken as it is		
+		END
+	};};
+
+	///eVSolver : velocities solvers
+	struct	eVSolver { enum _ {
+		Linear,		///Linear solver
+		END
+	};};
+
+	///ePSolver : positions solvers
+	struct	ePSolver { enum _ {
+		Linear,		///Linear solver
+		Anchors,	///Anchor solver
+		RContacts,	///Rigid contacts solver
+		SContacts,	///Soft contacts solver
+		END
+	};};
+
+	///eSolverPresets
+	struct	eSolverPresets { enum _ {
+		Positions,
+		Velocities,
+		Default	=	Positions,
+		END
+	};};
+
+	///eFeature
+	struct	eFeature { enum _ {
+		None,
+		Node,
+		Link,
+		Face,
+		Tetra,
+		END
+	};};
+
+	typedef btAlignedObjectArray<eVSolver::_>	tVSolverArray;
+	typedef btAlignedObjectArray<ePSolver::_>	tPSolverArray;
+
+	//
+	// Flags
+	//
+
+	///fCollision
+	struct fCollision { enum _ {
+		RVSmask	=	0x000f,	///Rigid versus soft mask
+		SDF_RS	=	0x0001,	///SDF based rigid vs soft
+		CL_RS	=	0x0002, ///Cluster vs convex rigid vs soft
+
+		SVSmask	=	0x0030,	///Rigid versus soft mask		
+		VF_SS	=	0x0010,	///Vertex vs face soft vs soft handling
+		CL_SS	=	0x0020, ///Cluster vs cluster soft vs soft handling
+		CL_SELF =	0x0040, ///Cluster soft body self collision
+		/* presets	*/ 
+		Default	=	SDF_RS,
+		END
+	};};
+
+	///fMaterial
+	struct fMaterial { enum _ {
+		DebugDraw	=	0x0001,	/// Enable debug draw
+		/* presets	*/ 
+		Default		=	DebugDraw,
+		END
+	};};
+
+	//
+	// API Types
+	//
+
+	/* sRayCast		*/ 
+	struct sRayCast
+	{
+		btSoftBody*	body;		/// soft body
+		eFeature::_	feature;	/// feature type
+		int			index;		/// feature index
+		btScalar	fraction;		/// time of impact fraction (rayorg+(rayto-rayfrom)*fraction)
+	};
+
+	/* ImplicitFn	*/ 
+	struct	ImplicitFn
+	{
+		virtual btScalar	Eval(const btVector3& x)=0;
+	};
+
+	//
+	// Internal types
+	//
+
+	typedef btAlignedObjectArray<btScalar>	tScalarArray;
+	typedef btAlignedObjectArray<btVector3>	tVector3Array;
+
+	/* sCti is Softbody contact info	*/ 
+	struct	sCti
+	{
+		btCollisionObject*	m_colObj;		/* Rigid body			*/ 
+		btVector3		m_normal;	/* Outward normal		*/ 
+		btScalar		m_offset;	/* Offset from origin	*/ 
+	};	
+
+	/* sMedium		*/ 
+	struct	sMedium
+	{
+		btVector3		m_velocity;	/* Velocity				*/ 
+		btScalar		m_pressure;	/* Pressure				*/ 
+		btScalar		m_density;	/* Density				*/ 
+	};
+
+	/* Base type	*/ 
+	struct	Element
+	{
+		void*			m_tag;			// User data
+		Element() : m_tag(0) {}
+	};
+	/* Material		*/ 
+	struct	Material : Element
+	{
+		btScalar				m_kLST;			// Linear stiffness coefficient [0,1]
+		btScalar				m_kAST;			// Area/Angular stiffness coefficient [0,1]
+		btScalar				m_kVST;			// Volume stiffness coefficient [0,1]
+		int						m_flags;		// Flags
+	};
+
+	/* Feature		*/ 
+	struct	Feature : Element
+	{
+		Material*				m_material;		// Material
+	};
+	/* Node			*/ 
+	struct	Node : Feature
+	{
+		btVector3				m_x;			// Position
+		btVector3				m_q;			// Previous step position
+		btVector3				m_v;			// Velocity
+		btVector3				m_f;			// Force accumulator
+		btVector3				m_n;			// Normal
+		btScalar				m_im;			// 1/mass
+		btScalar				m_area;			// Area
+		btDbvtNode*				m_leaf;			// Leaf data
+		int						m_battach:1;	// Attached
+	};
+	/* Link			*/ 
+	struct	Link : Feature
+	{
+		Node*					m_n[2];			// Node pointers
+		btScalar				m_rl;			// Rest length		
+		int						m_bbending:1;	// Bending link
+		btScalar				m_c0;			// (ima+imb)*kLST
+		btScalar				m_c1;			// rl^2
+		btScalar				m_c2;			// |gradient|^2/c0
+		btVector3				m_c3;			// gradient
+	};
+	/* Face			*/ 
+	struct	Face : Feature
+	{
+		Node*					m_n[3];			// Node pointers
+		btVector3				m_normal;		// Normal
+		btScalar				m_ra;			// Rest area
+		btDbvtNode*				m_leaf;			// Leaf data
+	};
+	/* Tetra		*/ 
+	struct	Tetra : Feature
+	{
+		Node*					m_n[4];			// Node pointers		
+		btScalar				m_rv;			// Rest volume
+		btDbvtNode*				m_leaf;			// Leaf data
+		btVector3				m_c0[4];		// gradients
+		btScalar				m_c1;			// (4*kVST)/(im0+im1+im2+im3)
+		btScalar				m_c2;			// m_c1/sum(|g0..3|^2)
+	};
+	/* RContact		*/ 
+	struct	RContact
+	{
+		sCti		m_cti;			// Contact infos
+		Node*					m_node;			// Owner node
+		btMatrix3x3				m_c0;			// Impulse matrix
+		btVector3				m_c1;			// Relative anchor
+		btScalar				m_c2;			// ima*dt
+		btScalar				m_c3;			// Friction
+		btScalar				m_c4;			// Hardness
+	};
+	/* SContact		*/ 
+	struct	SContact
+	{
+		Node*					m_node;			// Node
+		Face*					m_face;			// Face
+		btVector3				m_weights;		// Weigths
+		btVector3				m_normal;		// Normal
+		btScalar				m_margin;		// Margin
+		btScalar				m_friction;		// Friction
+		btScalar				m_cfm[2];		// Constraint force mixing
+	};
+	/* Anchor		*/ 
+	struct	Anchor
+	{
+		Node*					m_node;			// Node pointer
+		btVector3				m_local;		// Anchor position in body space
+		btRigidBody*			m_body;			// Body
+		btScalar				m_influence;
+		btMatrix3x3				m_c0;			// Impulse matrix
+		btVector3				m_c1;			// Relative anchor
+		btScalar				m_c2;			// ima*dt
+	};
+	/* Note			*/ 
+	struct	Note : Element
+	{
+		const char*				m_text;			// Text
+		btVector3				m_offset;		// Offset
+		int						m_rank;			// Rank
+		Node*					m_nodes[4];		// Nodes
+		btScalar				m_coords[4];	// Coordinates
+	};	
+	/* Pose			*/ 
+	struct	Pose
+	{
+		bool					m_bvolume;		// Is valid
+		bool					m_bframe;		// Is frame
+		btScalar				m_volume;		// Rest volume
+		tVector3Array			m_pos;			// Reference positions
+		tScalarArray			m_wgh;			// Weights
+		btVector3				m_com;			// COM
+		btMatrix3x3				m_rot;			// Rotation
+		btMatrix3x3				m_scl;			// Scale
+		btMatrix3x3				m_aqq;			// Base scaling
+	};
+	/* Cluster		*/ 
+	struct	Cluster
+	{
+		tScalarArray				m_masses;
+		btAlignedObjectArray<Node*>	m_nodes;		
+		tVector3Array				m_framerefs;
+		btTransform					m_framexform;
+		btScalar					m_idmass;
+		btScalar					m_imass;
+		btMatrix3x3					m_locii;
+		btMatrix3x3					m_invwi;
+		btVector3					m_com;
+		btVector3					m_vimpulses[2];
+		btVector3					m_dimpulses[2];
+		int							m_nvimpulses;
+		int							m_ndimpulses;
+		btVector3					m_lv;
+		btVector3					m_av;
+		btDbvtNode*					m_leaf;
+		btScalar					m_ndamping;	/* Node damping		*/ 
+		btScalar					m_ldamping;	/* Linear damping	*/ 
+		btScalar					m_adamping;	/* Angular damping	*/ 
+		btScalar					m_matching;
+		btScalar					m_maxSelfCollisionImpulse;
+		btScalar					m_selfCollisionImpulseFactor;
+		bool						m_containsAnchor;
+		bool						m_collide;
+		int							m_clusterIndex;
+		Cluster() : m_leaf(0),m_ndamping(0),m_ldamping(0),m_adamping(0),m_matching(0) 
+		,m_maxSelfCollisionImpulse(100.f),
+		m_selfCollisionImpulseFactor(0.01f),
+		m_containsAnchor(false)
+		{}
+	};
+	/* Impulse		*/ 
+	struct	Impulse
+	{
+		btVector3					m_velocity;
+		btVector3					m_drift;
+		int							m_asVelocity:1;
+		int							m_asDrift:1;
+		Impulse() : m_velocity(0,0,0),m_drift(0,0,0),m_asVelocity(0),m_asDrift(0)	{}
+		Impulse						operator -() const
+		{
+			Impulse i=*this;
+			i.m_velocity=-i.m_velocity;
+			i.m_drift=-i.m_drift;
+			return(i);
+		}
+		Impulse						operator*(btScalar x) const
+		{
+			Impulse i=*this;
+			i.m_velocity*=x;
+			i.m_drift*=x;
+			return(i);
+		}
+	};
+	/* Body			*/ 
+	struct	Body
+	{
+		Cluster*			m_soft;
+		btRigidBody*		m_rigid;
+		btCollisionObject*	m_collisionObject;
+
+		Body() : m_soft(0),m_rigid(0),m_collisionObject(0)				{}
+		Body(Cluster* p) : m_soft(p),m_rigid(0),m_collisionObject(0)	{}
+		Body(btCollisionObject* colObj) : m_soft(0),m_collisionObject(colObj)
+		{
+			m_rigid = btRigidBody::upcast(m_collisionObject);
+		}
+
+		void						activate() const
+		{
+			if(m_rigid) 
+				m_rigid->activate();
+			if (m_collisionObject)
+				m_collisionObject->activate();
+
+		}
+		const btMatrix3x3&			invWorldInertia() const
+		{
+			static const btMatrix3x3	iwi(0,0,0,0,0,0,0,0,0);
+			if(m_rigid) return(m_rigid->getInvInertiaTensorWorld());
+			if(m_soft)	return(m_soft->m_invwi);
+			return(iwi);
+		}
+		btScalar					invMass() const
+		{
+			if(m_rigid) return(m_rigid->getInvMass());
+			if(m_soft)	return(m_soft->m_imass);
+			return(0);
+		}
+		const btTransform&			xform() const
+		{
+			static const btTransform	identity=btTransform::getIdentity();		
+			if(m_collisionObject) return(m_collisionObject->getWorldTransform());
+			if(m_soft)	return(m_soft->m_framexform);
+			return(identity);
+		}
+		btVector3					linearVelocity() const
+		{
+			if(m_rigid) return(m_rigid->getLinearVelocity());
+			if(m_soft)	return(m_soft->m_lv);
+			return(btVector3(0,0,0));
+		}
+		btVector3					angularVelocity(const btVector3& rpos) const
+		{			
+			if(m_rigid) return(btCross(m_rigid->getAngularVelocity(),rpos));
+			if(m_soft)	return(btCross(m_soft->m_av,rpos));
+			return(btVector3(0,0,0));
+		}
+		btVector3					angularVelocity() const
+		{			
+			if(m_rigid) return(m_rigid->getAngularVelocity());
+			if(m_soft)	return(m_soft->m_av);
+			return(btVector3(0,0,0));
+		}
+		btVector3					velocity(const btVector3& rpos) const
+		{
+			return(linearVelocity()+angularVelocity(rpos));
+		}
+		void						applyVImpulse(const btVector3& impulse,const btVector3& rpos) const
+		{
+			if(m_rigid)	m_rigid->applyImpulse(impulse,rpos);
+			if(m_soft)	btSoftBody::clusterVImpulse(m_soft,rpos,impulse);
+		}
+		void						applyDImpulse(const btVector3& impulse,const btVector3& rpos) const
+		{
+			if(m_rigid)	m_rigid->applyImpulse(impulse,rpos);
+			if(m_soft)	btSoftBody::clusterDImpulse(m_soft,rpos,impulse);
+		}		
+		void						applyImpulse(const Impulse& impulse,const btVector3& rpos) const
+		{
+			if(impulse.m_asVelocity)	
+			{
+//				printf("impulse.m_velocity = %f,%f,%f\n",impulse.m_velocity.getX(),impulse.m_velocity.getY(),impulse.m_velocity.getZ());
+				applyVImpulse(impulse.m_velocity,rpos);
+			}
+			if(impulse.m_asDrift)		
+			{
+//				printf("impulse.m_drift = %f,%f,%f\n",impulse.m_drift.getX(),impulse.m_drift.getY(),impulse.m_drift.getZ());
+				applyDImpulse(impulse.m_drift,rpos);
+			}
+		}
+		void						applyVAImpulse(const btVector3& impulse) const
+		{
+			if(m_rigid)	m_rigid->applyTorqueImpulse(impulse);
+			if(m_soft)	btSoftBody::clusterVAImpulse(m_soft,impulse);
+		}
+		void						applyDAImpulse(const btVector3& impulse) const
+		{
+			if(m_rigid)	m_rigid->applyTorqueImpulse(impulse);
+			if(m_soft)	btSoftBody::clusterDAImpulse(m_soft,impulse);
+		}
+		void						applyAImpulse(const Impulse& impulse) const
+		{
+			if(impulse.m_asVelocity)	applyVAImpulse(impulse.m_velocity);
+			if(impulse.m_asDrift)		applyDAImpulse(impulse.m_drift);
+		}
+		void						applyDCImpulse(const btVector3& impulse) const
+		{
+			if(m_rigid)	m_rigid->applyCentralImpulse(impulse);
+			if(m_soft)	btSoftBody::clusterDCImpulse(m_soft,impulse);
+		}
+	};
+	/* Joint		*/ 
+	struct	Joint
+	{
+		struct eType { enum _ {
+			Linear=0,
+			Angular,
+			Contact
+		};};
+		struct Specs
+		{
+			Specs() : erp(1),cfm(1),split(1) {}
+			btScalar	erp;
+			btScalar	cfm;
+			btScalar	split;
+		};
+		Body						m_bodies[2];
+		btVector3					m_refs[2];
+		btScalar					m_cfm;
+		btScalar					m_erp;
+		btScalar					m_split;
+		btVector3					m_drift;
+		btVector3					m_sdrift;
+		btMatrix3x3					m_massmatrix;
+		bool						m_delete;
+		virtual						~Joint() {}
+		Joint() : m_delete(false) {}
+		virtual void				Prepare(btScalar dt,int iterations);
+		virtual void				Solve(btScalar dt,btScalar sor)=0;
+		virtual void				Terminate(btScalar dt)=0;
+		virtual eType::_			Type() const=0;
+	};
+	/* LJoint		*/ 
+	struct	LJoint : Joint
+	{
+		struct Specs : Joint::Specs
+		{
+			btVector3	position;
+		};		
+		btVector3					m_rpos[2];
+		void						Prepare(btScalar dt,int iterations);
+		void						Solve(btScalar dt,btScalar sor);
+		void						Terminate(btScalar dt);
+		eType::_					Type() const { return(eType::Linear); }
+	};
+	/* AJoint		*/ 
+	struct	AJoint : Joint
+	{
+		struct IControl
+		{
+			virtual void			Prepare(AJoint*)				{}
+			virtual btScalar		Speed(AJoint*,btScalar current) { return(current); }
+			static IControl*		Default()						{ static IControl def;return(&def); }
+		};
+		struct Specs : Joint::Specs
+		{
+			Specs() : icontrol(IControl::Default()) {}
+			btVector3	axis;
+			IControl*	icontrol;
+		};		
+		btVector3					m_axis[2];
+		IControl*					m_icontrol;
+		void						Prepare(btScalar dt,int iterations);
+		void						Solve(btScalar dt,btScalar sor);
+		void						Terminate(btScalar dt);
+		eType::_					Type() const { return(eType::Angular); }
+	};
+	/* CJoint		*/ 
+	struct	CJoint : Joint
+	{		
+		int							m_life;
+		int							m_maxlife;
+		btVector3					m_rpos[2];
+		btVector3					m_normal;
+		btScalar					m_friction;
+		void						Prepare(btScalar dt,int iterations);
+		void						Solve(btScalar dt,btScalar sor);
+		void						Terminate(btScalar dt);
+		eType::_					Type() const { return(eType::Contact); }
+	};
+	/* Config		*/ 
+	struct	Config
+	{
+		eAeroModel::_			aeromodel;		// Aerodynamic model (default: V_Point)
+		btScalar				kVCF;			// Velocities correction factor (Baumgarte)
+		btScalar				kDP;			// Damping coefficient [0,1]
+		btScalar				kDG;			// Drag coefficient [0,+inf]
+		btScalar				kLF;			// Lift coefficient [0,+inf]
+		btScalar				kPR;			// Pressure coefficient [-inf,+inf]
+		btScalar				kVC;			// Volume conversation coefficient [0,+inf]
+		btScalar				kDF;			// Dynamic friction coefficient [0,1]
+		btScalar				kMT;			// Pose matching coefficient [0,1]		
+		btScalar				kCHR;			// Rigid contacts hardness [0,1]
+		btScalar				kKHR;			// Kinetic contacts hardness [0,1]
+		btScalar				kSHR;			// Soft contacts hardness [0,1]
+		btScalar				kAHR;			// Anchors hardness [0,1]
+		btScalar				kSRHR_CL;		// Soft vs rigid hardness [0,1] (cluster only)
+		btScalar				kSKHR_CL;		// Soft vs kinetic hardness [0,1] (cluster only)
+		btScalar				kSSHR_CL;		// Soft vs soft hardness [0,1] (cluster only)
+		btScalar				kSR_SPLT_CL;	// Soft vs rigid impulse split [0,1] (cluster only)
+		btScalar				kSK_SPLT_CL;	// Soft vs rigid impulse split [0,1] (cluster only)
+		btScalar				kSS_SPLT_CL;	// Soft vs rigid impulse split [0,1] (cluster only)
+		btScalar				maxvolume;		// Maximum volume ratio for pose
+		btScalar				timescale;		// Time scale
+		int						viterations;	// Velocities solver iterations
+		int						piterations;	// Positions solver iterations
+		int						diterations;	// Drift solver iterations
+		int						citerations;	// Cluster solver iterations
+		int						collisions;		// Collisions flags
+		tVSolverArray			m_vsequence;	// Velocity solvers sequence
+		tPSolverArray			m_psequence;	// Position solvers sequence
+		tPSolverArray			m_dsequence;	// Drift solvers sequence
+	};
+	/* SolverState	*/ 
+	struct	SolverState
+	{
+		btScalar				sdt;			// dt*timescale
+		btScalar				isdt;			// 1/sdt
+		btScalar				velmrg;			// velocity margin
+		btScalar				radmrg;			// radial margin
+		btScalar				updmrg;			// Update margin
+	};	
+	/// RayFromToCaster takes a ray from, ray to (instead of direction!)
+	struct	RayFromToCaster : btDbvt::ICollide
+	{
+		btVector3			m_rayFrom;
+		btVector3			m_rayTo;
+		btVector3			m_rayNormalizedDirection;
+		btScalar			m_mint;
+		Face*				m_face;
+		int					m_tests;
+		RayFromToCaster(const btVector3& rayFrom,const btVector3& rayTo,btScalar mxt);
+		void					Process(const btDbvtNode* leaf);
+
+		static inline btScalar	rayFromToTriangle(const btVector3& rayFrom,
+			const btVector3& rayTo,
+			const btVector3& rayNormalizedDirection,
+			const btVector3& a,
+			const btVector3& b,
+			const btVector3& c,
+			btScalar maxt=SIMD_INFINITY);
+	};
+
+	//
+	// Typedefs
+	//
+
+	typedef void								(*psolver_t)(btSoftBody*,btScalar,btScalar);
+	typedef void								(*vsolver_t)(btSoftBody*,btScalar);
+	typedef btAlignedObjectArray<Cluster*>		tClusterArray;
+	typedef btAlignedObjectArray<Note>			tNoteArray;
+	typedef btAlignedObjectArray<Node>			tNodeArray;
+	typedef btAlignedObjectArray<btDbvtNode*>	tLeafArray;
+	typedef btAlignedObjectArray<Link>			tLinkArray;
+	typedef btAlignedObjectArray<Face>			tFaceArray;
+	typedef btAlignedObjectArray<Tetra>			tTetraArray;
+	typedef btAlignedObjectArray<Anchor>		tAnchorArray;
+	typedef btAlignedObjectArray<RContact>		tRContactArray;
+	typedef btAlignedObjectArray<SContact>		tSContactArray;
+	typedef btAlignedObjectArray<Material*>		tMaterialArray;
+	typedef btAlignedObjectArray<Joint*>		tJointArray;
+	typedef btAlignedObjectArray<btSoftBody*>	tSoftBodyArray;	
+
+	//
+	// Fields
+	//
+
+	Config					m_cfg;			// Configuration
+	SolverState				m_sst;			// Solver state
+	Pose					m_pose;			// Pose
+	void*					m_tag;			// User data
+	btSoftBodyWorldInfo*	m_worldInfo;	// World info
+	tNoteArray				m_notes;		// Notes
+	tNodeArray				m_nodes;		// Nodes
+	tLinkArray				m_links;		// Links
+	tFaceArray				m_faces;		// Faces
+	tTetraArray				m_tetras;		// Tetras
+	tAnchorArray			m_anchors;		// Anchors
+	tRContactArray			m_rcontacts;	// Rigid contacts
+	tSContactArray			m_scontacts;	// Soft contacts
+	tJointArray				m_joints;		// Joints
+	tMaterialArray			m_materials;	// Materials
+	btScalar				m_timeacc;		// Time accumulator
+	btVector3				m_bounds[2];	// Spatial bounds	
+	bool					m_bUpdateRtCst;	// Update runtime constants
+	btDbvt					m_ndbvt;		// Nodes tree
+	btDbvt					m_fdbvt;		// Faces tree
+	btDbvt					m_cdbvt;		// Clusters tree
+	tClusterArray			m_clusters;		// Clusters
+
+	btAlignedObjectArray<bool>m_clusterConnectivity;//cluster connectivity, for self-collision
+
+	btTransform			m_initialWorldTransform;
+
+	btVector3			m_windVelocity;
+	//
+	// Api
+	//
+
+	/* ctor																	*/ 
+	btSoftBody(	btSoftBodyWorldInfo* worldInfo,int node_count,		const btVector3* x,		const btScalar* m);
+
+	/* ctor																	*/ 
+	btSoftBody(	btSoftBodyWorldInfo* worldInfo);
+
+	void	initDefaults();
+
+	/* dtor																	*/ 
+	virtual ~btSoftBody();
+	/* Check for existing link												*/ 
+
+	btAlignedObjectArray<int>	m_userIndexMapping;
+
+	btSoftBodyWorldInfo*	getWorldInfo()
+	{
+		return m_worldInfo;
+	}
+
+	///@todo: avoid internal softbody shape hack and move collision code to collision library
+	virtual void	setCollisionShape(btCollisionShape* collisionShape)
+	{
+		
+	}
+
+	bool				checkLink(	int node0,
+		int node1) const;
+	bool				checkLink(	const Node* node0,
+		const Node* node1) const;
+	/* Check for existring face												*/ 
+	bool				checkFace(	int node0,
+		int node1,
+		int node2) const;
+	/* Append material														*/ 
+	Material*			appendMaterial();
+	/* Append note															*/ 
+	void				appendNote(	const char* text,
+		const btVector3& o,
+		const btVector4& c=btVector4(1,0,0,0),
+		Node* n0=0,
+		Node* n1=0,
+		Node* n2=0,
+		Node* n3=0);
+	void				appendNote(	const char* text,
+		const btVector3& o,
+		Node* feature);
+	void				appendNote(	const char* text,
+		const btVector3& o,
+		Link* feature);
+	void				appendNote(	const char* text,
+		const btVector3& o,
+		Face* feature);
+	/* Append node															*/ 
+	void				appendNode(	const btVector3& x,btScalar m);
+	/* Append link															*/ 
+	void				appendLink(int model=-1,Material* mat=0);
+	void				appendLink(	int node0,
+		int node1,
+		Material* mat=0,
+		bool bcheckexist=false);
+	void				appendLink(	Node* node0,
+		Node* node1,
+		Material* mat=0,
+		bool bcheckexist=false);
+	/* Append face															*/ 
+	void				appendFace(int model=-1,Material* mat=0);
+	void				appendFace(	int node0,
+		int node1,
+		int node2,
+		Material* mat=0);
+	void			appendTetra(int model,Material* mat);
+	//
+	void			appendTetra(int node0,
+										int node1,
+										int node2,
+										int node3,
+										Material* mat=0);
+
+
+	/* Append anchor														*/ 
+	void				appendAnchor(	int node,
+		btRigidBody* body, bool disableCollisionBetweenLinkedBodies=false,btScalar influence = 1);
+	void			appendAnchor(int node,btRigidBody* body, const btVector3& localPivot,bool disableCollisionBetweenLinkedBodies=false,btScalar influence = 1);
+	/* Append linear joint													*/ 
+	void				appendLinearJoint(const LJoint::Specs& specs,Cluster* body0,Body body1);
+	void				appendLinearJoint(const LJoint::Specs& specs,Body body=Body());
+	void				appendLinearJoint(const LJoint::Specs& specs,btSoftBody* body);
+	/* Append linear joint													*/ 
+	void				appendAngularJoint(const AJoint::Specs& specs,Cluster* body0,Body body1);
+	void				appendAngularJoint(const AJoint::Specs& specs,Body body=Body());
+	void				appendAngularJoint(const AJoint::Specs& specs,btSoftBody* body);
+	/* Add force (or gravity) to the entire body							*/ 
+	void				addForce(		const btVector3& force);
+	/* Add force (or gravity) to a node of the body							*/ 
+	void				addForce(		const btVector3& force,
+		int node);
+	/* Add aero force to a node of the body */
+	void			    addAeroForceToNode(const btVector3& windVelocity,int nodeIndex);
+
+	/* Add aero force to a face of the body */
+	void			    addAeroForceToFace(const btVector3& windVelocity,int faceIndex);
+
+	/* Add velocity to the entire body										*/ 
+	void				addVelocity(	const btVector3& velocity);
+
+	/* Set velocity for the entire body										*/ 
+	void				setVelocity(	const btVector3& velocity);
+
+	/* Add velocity to a node of the body									*/ 
+	void				addVelocity(	const btVector3& velocity,
+		int node);
+	/* Set mass																*/ 
+	void				setMass(		int node,
+		btScalar mass);
+	/* Get mass																*/ 
+	btScalar			getMass(		int node) const;
+	/* Get total mass														*/ 
+	btScalar			getTotalMass() const;
+	/* Set total mass (weighted by previous masses)							*/ 
+	void				setTotalMass(	btScalar mass,
+		bool fromfaces=false);
+	/* Set total density													*/ 
+	void				setTotalDensity(btScalar density);
+	/* Set volume mass (using tetrahedrons)									*/
+	void				setVolumeMass(		btScalar mass);
+	/* Set volume density (using tetrahedrons)								*/
+	void				setVolumeDensity(	btScalar density);
+	/* Transform															*/ 
+	void				transform(		const btTransform& trs);
+	/* Translate															*/ 
+	void				translate(		const btVector3& trs);
+	/* Rotate															*/ 
+	void				rotate(	const btQuaternion& rot);
+	/* Scale																*/ 
+	void				scale(	const btVector3& scl);
+	/* Set current state as pose											*/ 
+	void				setPose(		bool bvolume,
+		bool bframe);
+	/* Return the volume													*/ 
+	btScalar			getVolume() const;
+	/* Cluster count														*/ 
+	int					clusterCount() const;
+	/* Cluster center of mass												*/ 
+	static btVector3	clusterCom(const Cluster* cluster);
+	btVector3			clusterCom(int cluster) const;
+	/* Cluster velocity at rpos												*/ 
+	static btVector3	clusterVelocity(const Cluster* cluster,const btVector3& rpos);
+	/* Cluster impulse														*/ 
+	static void			clusterVImpulse(Cluster* cluster,const btVector3& rpos,const btVector3& impulse);
+	static void			clusterDImpulse(Cluster* cluster,const btVector3& rpos,const btVector3& impulse);
+	static void			clusterImpulse(Cluster* cluster,const btVector3& rpos,const Impulse& impulse);
+	static void			clusterVAImpulse(Cluster* cluster,const btVector3& impulse);
+	static void			clusterDAImpulse(Cluster* cluster,const btVector3& impulse);
+	static void			clusterAImpulse(Cluster* cluster,const Impulse& impulse);
+	static void			clusterDCImpulse(Cluster* cluster,const btVector3& impulse);
+	/* Generate bending constraints based on distance in the adjency graph	*/ 
+	int					generateBendingConstraints(	int distance,
+		Material* mat=0);
+	/* Randomize constraints to reduce solver bias							*/ 
+	void				randomizeConstraints();
+	/* Release clusters														*/ 
+	void				releaseCluster(int index);
+	void				releaseClusters();
+	/* Generate clusters (K-mean)											*/ 
+	///generateClusters with k=0 will create a convex cluster for each tetrahedron or triangle
+	///otherwise an approximation will be used (better performance)
+	int					generateClusters(int k,int maxiterations=8192);
+	/* Refine																*/ 
+	void				refine(ImplicitFn* ifn,btScalar accurary,bool cut);
+	/* CutLink																*/ 
+	bool				cutLink(int node0,int node1,btScalar position);
+	bool				cutLink(const Node* node0,const Node* node1,btScalar position);
+
+	///Ray casting using rayFrom and rayTo in worldspace, (not direction!)
+	bool				rayTest(const btVector3& rayFrom,
+		const btVector3& rayTo,
+		sRayCast& results);
+	/* Solver presets														*/ 
+	void				setSolver(eSolverPresets::_ preset);
+	/* predictMotion														*/ 
+	void				predictMotion(btScalar dt);
+	/* solveConstraints														*/ 
+	void				solveConstraints();
+	/* staticSolve															*/ 
+	void				staticSolve(int iterations);
+	/* solveCommonConstraints												*/ 
+	static void			solveCommonConstraints(btSoftBody** bodies,int count,int iterations);
+	/* solveClusters														*/ 
+	static void			solveClusters(const btAlignedObjectArray<btSoftBody*>& bodies);
+	/* integrateMotion														*/ 
+	void				integrateMotion();
+	/* defaultCollisionHandlers												*/ 
+	void				defaultCollisionHandler(btCollisionObject* pco);
+	void				defaultCollisionHandler(btSoftBody* psb);
+
+
+
+	//
+	// Functionality to deal with new accelerated solvers.
+	//
+
+	/**
+	 * Set a wind velocity for interaction with the air.
+	 */
+	void setWindVelocity( const btVector3 &velocity );
+
+
+	/**
+	 * Return the wind velocity for interaction with the air.
+	 */
+	const btVector3& getWindVelocity();
+
+	//
+	// Set the solver that handles this soft body
+	// Should not be allowed to get out of sync with reality
+	// Currently called internally on addition to the world
+	void setSoftBodySolver( btSoftBodySolver *softBodySolver )
+	{
+		m_softBodySolver = softBodySolver;
+	}
+
+	//
+	// Return the solver that handles this soft body
+	// 
+	btSoftBodySolver *getSoftBodySolver()
+	{
+		return m_softBodySolver;
+	}
+
+	//
+	// Return the solver that handles this soft body
+	// 
+	btSoftBodySolver *getSoftBodySolver() const
+	{
+		return m_softBodySolver;
+	}
+
+
+	//
+	// Cast
+	//
+
+	static const btSoftBody*	upcast(const btCollisionObject* colObj)
+	{
+		if (colObj->getInternalType()==CO_SOFT_BODY)
+			return (const btSoftBody*)colObj;
+		return 0;
+	}
+	static btSoftBody*			upcast(btCollisionObject* colObj)
+	{
+		if (colObj->getInternalType()==CO_SOFT_BODY)
+			return (btSoftBody*)colObj;
+		return 0;
+	}
+
+	//
+	// ::btCollisionObject
+	//
+
+	virtual void getAabb(btVector3& aabbMin,btVector3& aabbMax) const
+	{
+		aabbMin = m_bounds[0];
+		aabbMax = m_bounds[1];
+	}
+	//
+	// Private
+	//
+	void				pointersToIndices();
+	void				indicesToPointers(const int* map=0);
+
+	int					rayTest(const btVector3& rayFrom,const btVector3& rayTo,
+		btScalar& mint,eFeature::_& feature,int& index,bool bcountonly) const;
+	void				initializeFaceTree();
+	btVector3			evaluateCom() const;
+	bool				checkContact(btCollisionObject* colObj,const btVector3& x,btScalar margin,btSoftBody::sCti& cti) const;
+	void				updateNormals();
+	void				updateBounds();
+	void				updatePose();
+	void				updateConstants();
+	void				initializeClusters();
+	void				updateClusters();
+	void				cleanupClusters();
+	void				prepareClusters(int iterations);
+	void				solveClusters(btScalar sor);
+	void				applyClusters(bool drift);
+	void				dampClusters();
+	void				applyForces();	
+	static void			PSolve_Anchors(btSoftBody* psb,btScalar kst,btScalar ti);
+	static void			PSolve_RContacts(btSoftBody* psb,btScalar kst,btScalar ti);
+	static void			PSolve_SContacts(btSoftBody* psb,btScalar,btScalar ti);
+	static void			PSolve_Links(btSoftBody* psb,btScalar kst,btScalar ti);
+	static void			VSolve_Links(btSoftBody* psb,btScalar kst);
+	static psolver_t	getSolver(ePSolver::_ solver);
+	static vsolver_t	getSolver(eVSolver::_ solver);
+
+
+	virtual	int	calculateSerializeBufferSize()	const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer,  class btSerializer* serializer) const;
+
+	//virtual void serializeSingleObject(class btSerializer* serializer) const;
+
+
+};
+
+
+
+
+#endif //_BT_SOFT_BODY_H
diff --git a/src/bullet/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.cpp b/src/bullet/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.cpp
new file mode 100644
index 00000000..d99be3b8
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.cpp
@@ -0,0 +1,368 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btSoftBodyConcaveCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btMultiSphereShape.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionShapes/btConcaveShape.h"
+#include "BulletCollision/CollisionDispatch/btManifoldResult.h"
+#include "BulletCollision/NarrowPhaseCollision/btRaycastCallback.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "BulletCollision/CollisionShapes/btTetrahedronShape.h"
+#include "BulletCollision/CollisionShapes/btConvexHullShape.h"
+
+
+
+#include "LinearMath/btIDebugDraw.h"
+#include "BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h"
+#include "BulletSoftBody/btSoftBody.h"
+
+#define BT_SOFTBODY_TRIANGLE_EXTRUSION btScalar(0.06)//make this configurable
+
+btSoftBodyConcaveCollisionAlgorithm::btSoftBodyConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1,bool isSwapped)
+: btCollisionAlgorithm(ci),
+m_isSwapped(isSwapped),
+m_btSoftBodyTriangleCallback(ci.m_dispatcher1,body0,body1,isSwapped)
+{
+}
+
+
+
+btSoftBodyConcaveCollisionAlgorithm::~btSoftBodyConcaveCollisionAlgorithm()
+{
+}
+
+
+
+btSoftBodyTriangleCallback::btSoftBodyTriangleCallback(btDispatcher*  dispatcher,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped):
+m_dispatcher(dispatcher),
+m_dispatchInfoPtr(0)
+{
+	m_softBody = (btSoftBody*) (isSwapped? body1:body0);
+	m_triBody = isSwapped? body0:body1;
+
+	//
+	// create the manifold from the dispatcher 'manifold pool'
+	//
+	//	  m_manifoldPtr = m_dispatcher->getNewManifold(m_convexBody,m_triBody);
+
+	clearCache();
+}
+
+btSoftBodyTriangleCallback::~btSoftBodyTriangleCallback()
+{
+	clearCache();
+	//	m_dispatcher->releaseManifold( m_manifoldPtr );
+
+}
+
+
+void	btSoftBodyTriangleCallback::clearCache()
+{
+	for (int i=0;i<m_shapeCache.size();i++)
+	{
+		btTriIndex* tmp = m_shapeCache.getAtIndex(i);
+		btAssert(tmp);
+		btAssert(tmp->m_childShape);
+		m_softBody->getWorldInfo()->m_sparsesdf.RemoveReferences(tmp->m_childShape);//necessary?
+		delete tmp->m_childShape;
+	}
+	m_shapeCache.clear();
+}
+
+
+void btSoftBodyTriangleCallback::processTriangle(btVector3* triangle,int partId, int triangleIndex)
+{
+	//just for debugging purposes
+	//printf("triangle %d",m_triangleCount++);
+	btCollisionObject* ob = static_cast<btCollisionObject*>(m_triBody);
+	btCollisionAlgorithmConstructionInfo ci;
+	ci.m_dispatcher1 = m_dispatcher;
+
+	///debug drawing of the overlapping triangles
+	if (m_dispatchInfoPtr && m_dispatchInfoPtr->m_debugDraw && (m_dispatchInfoPtr->m_debugDraw->getDebugMode() &btIDebugDraw::DBG_DrawWireframe))
+	{
+		btVector3 color(1,1,0);
+		btTransform& tr = ob->getWorldTransform();
+		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[0]),tr(triangle[1]),color);
+		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[1]),tr(triangle[2]),color);
+		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[2]),tr(triangle[0]),color);
+	}
+
+	btTriIndex	triIndex(partId,triangleIndex,0);
+	btHashKey<btTriIndex> triKey(triIndex.getUid());
+
+
+	btTriIndex* shapeIndex = m_shapeCache[triKey];
+	if (shapeIndex)
+	{
+		btCollisionShape* tm = shapeIndex->m_childShape;
+		btAssert(tm);
+
+		//copy over user pointers to temporary shape
+		tm->setUserPointer(ob->getRootCollisionShape()->getUserPointer());
+
+		btCollisionShape* tmpShape = ob->getCollisionShape();
+		ob->internalSetTemporaryCollisionShape( tm );
+
+
+		btCollisionAlgorithm* colAlgo = ci.m_dispatcher1->findAlgorithm(m_softBody,m_triBody,0);//m_manifoldPtr);
+
+		colAlgo->processCollision(m_softBody,m_triBody,*m_dispatchInfoPtr,m_resultOut);
+		colAlgo->~btCollisionAlgorithm();
+		ci.m_dispatcher1->freeCollisionAlgorithm(colAlgo);
+		ob->internalSetTemporaryCollisionShape( tmpShape);
+		return;
+	}
+
+	//aabb filter is already applied!	
+
+	//btCollisionObject* colObj = static_cast<btCollisionObject*>(m_convexProxy->m_clientObject);
+
+	//	if (m_softBody->getCollisionShape()->getShapeType()==
+	{
+		//		btVector3 other;
+		btVector3 normal = (triangle[1]-triangle[0]).cross(triangle[2]-triangle[0]);
+		normal.normalize();
+		normal*= BT_SOFTBODY_TRIANGLE_EXTRUSION;
+		//		other=(triangle[0]+triangle[1]+triangle[2])*0.333333f;
+		//		other+=normal*22.f;
+		btVector3	pts[6] = {triangle[0]+normal,
+			triangle[1]+normal,
+			triangle[2]+normal,
+			triangle[0]-normal,
+			triangle[1]-normal,
+			triangle[2]-normal};
+
+		btConvexHullShape* tm = new btConvexHullShape(&pts[0].getX(),6);
+
+
+		//		btBU_Simplex1to4 tm(triangle[0],triangle[1],triangle[2],other);
+
+		//btTriangleShape tm(triangle[0],triangle[1],triangle[2]);	
+		//	tm.setMargin(m_collisionMarginTriangle);
+
+		//copy over user pointers to temporary shape
+		tm->setUserPointer(ob->getRootCollisionShape()->getUserPointer());
+
+		btCollisionShape* tmpShape = ob->getCollisionShape();
+		ob->internalSetTemporaryCollisionShape( tm );
+
+
+		btCollisionAlgorithm* colAlgo = ci.m_dispatcher1->findAlgorithm(m_softBody,m_triBody,0);//m_manifoldPtr);
+		///this should use the btDispatcher, so the actual registered algorithm is used
+		//		btConvexConvexAlgorithm cvxcvxalgo(m_manifoldPtr,ci,m_convexBody,m_triBody);
+
+		//m_resultOut->setShapeIdentifiersB(partId,triangleIndex);
+		//		cvxcvxalgo.processCollision(m_convexBody,m_triBody,*m_dispatchInfoPtr,m_resultOut);
+		colAlgo->processCollision(m_softBody,m_triBody,*m_dispatchInfoPtr,m_resultOut);
+		colAlgo->~btCollisionAlgorithm();
+		ci.m_dispatcher1->freeCollisionAlgorithm(colAlgo);
+
+
+		ob->internalSetTemporaryCollisionShape( tmpShape );
+		triIndex.m_childShape = tm;
+		m_shapeCache.insert(triKey,triIndex);
+
+	}
+
+
+
+}
+
+
+
+void	btSoftBodyTriangleCallback::setTimeStepAndCounters(btScalar collisionMarginTriangle,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	m_dispatchInfoPtr = &dispatchInfo;
+	m_collisionMarginTriangle = collisionMarginTriangle+btScalar(BT_SOFTBODY_TRIANGLE_EXTRUSION);
+	m_resultOut = resultOut;
+
+
+	btVector3	aabbWorldSpaceMin,aabbWorldSpaceMax;
+	m_softBody->getAabb(aabbWorldSpaceMin,aabbWorldSpaceMax);
+	btVector3 halfExtents = (aabbWorldSpaceMax-aabbWorldSpaceMin)*btScalar(0.5);
+	btVector3 softBodyCenter = (aabbWorldSpaceMax+aabbWorldSpaceMin)*btScalar(0.5);
+
+	btTransform softTransform;
+	softTransform.setIdentity();
+	softTransform.setOrigin(softBodyCenter);
+
+	btTransform convexInTriangleSpace;
+	convexInTriangleSpace = m_triBody->getWorldTransform().inverse() * softTransform;
+	btTransformAabb(halfExtents,m_collisionMarginTriangle,convexInTriangleSpace,m_aabbMin,m_aabbMax);
+}
+
+void btSoftBodyConcaveCollisionAlgorithm::clearCache()
+{
+	m_btSoftBodyTriangleCallback.clearCache();
+
+}
+
+void btSoftBodyConcaveCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+
+
+	//btCollisionObject* convexBody = m_isSwapped ? body1 : body0;
+	btCollisionObject* triBody = m_isSwapped ? body0 : body1;
+
+	if (triBody->getCollisionShape()->isConcave())
+	{
+
+
+		btCollisionObject*	triOb = triBody;
+		btConcaveShape* concaveShape = static_cast<btConcaveShape*>( triOb->getCollisionShape());
+
+		//	if (convexBody->getCollisionShape()->isConvex())
+		{
+			btScalar collisionMarginTriangle = concaveShape->getMargin();
+
+			//			resultOut->setPersistentManifold(m_btSoftBodyTriangleCallback.m_manifoldPtr);
+			m_btSoftBodyTriangleCallback.setTimeStepAndCounters(collisionMarginTriangle,dispatchInfo,resultOut);
+
+			//Disable persistency. previously, some older algorithm calculated all contacts in one go, so you can clear it here.
+			//m_dispatcher->clearManifold(m_btSoftBodyTriangleCallback.m_manifoldPtr);
+
+			//			m_btSoftBodyTriangleCallback.m_manifoldPtr->setBodies(convexBody,triBody);
+
+
+			concaveShape->processAllTriangles( &m_btSoftBodyTriangleCallback,m_btSoftBodyTriangleCallback.getAabbMin(),m_btSoftBodyTriangleCallback.getAabbMax());
+
+			//	resultOut->refreshContactPoints();
+
+		}
+
+	}
+
+}
+
+
+btScalar btSoftBodyConcaveCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)resultOut;
+	(void)dispatchInfo;
+	btCollisionObject* convexbody = m_isSwapped ? body1 : body0;
+	btCollisionObject* triBody = m_isSwapped ? body0 : body1;
+
+
+	//quick approximation using raycast, todo: hook up to the continuous collision detection (one of the btConvexCast)
+
+	//only perform CCD above a certain threshold, this prevents blocking on the long run
+	//because object in a blocked ccd state (hitfraction<1) get their linear velocity halved each frame...
+	btScalar squareMot0 = (convexbody->getInterpolationWorldTransform().getOrigin() - convexbody->getWorldTransform().getOrigin()).length2();
+	if (squareMot0 < convexbody->getCcdSquareMotionThreshold())
+	{
+		return btScalar(1.);
+	}
+
+	//const btVector3& from = convexbody->m_worldTransform.getOrigin();
+	//btVector3 to = convexbody->m_interpolationWorldTransform.getOrigin();
+	//todo: only do if the motion exceeds the 'radius'
+
+	btTransform triInv = triBody->getWorldTransform().inverse();
+	btTransform convexFromLocal = triInv * convexbody->getWorldTransform();
+	btTransform convexToLocal = triInv * convexbody->getInterpolationWorldTransform();
+
+	struct LocalTriangleSphereCastCallback	: public btTriangleCallback
+	{
+		btTransform m_ccdSphereFromTrans;
+		btTransform m_ccdSphereToTrans;
+		btTransform	m_meshTransform;
+
+		btScalar	m_ccdSphereRadius;
+		btScalar	m_hitFraction;
+
+
+		LocalTriangleSphereCastCallback(const btTransform& from,const btTransform& to,btScalar ccdSphereRadius,btScalar hitFraction)
+			:m_ccdSphereFromTrans(from),
+			m_ccdSphereToTrans(to),
+			m_ccdSphereRadius(ccdSphereRadius),
+			m_hitFraction(hitFraction)
+		{			
+		}
+
+
+		virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex)
+		{
+			(void)partId;
+			(void)triangleIndex;
+			//do a swept sphere for now
+			btTransform ident;
+			ident.setIdentity();
+			btConvexCast::CastResult castResult;
+			castResult.m_fraction = m_hitFraction;
+			btSphereShape	pointShape(m_ccdSphereRadius);
+			btTriangleShape	triShape(triangle[0],triangle[1],triangle[2]);
+			btVoronoiSimplexSolver	simplexSolver;
+			btSubsimplexConvexCast convexCaster(&pointShape,&triShape,&simplexSolver);
+			//GjkConvexCast	convexCaster(&pointShape,convexShape,&simplexSolver);
+			//ContinuousConvexCollision convexCaster(&pointShape,convexShape,&simplexSolver,0);
+			//local space?
+
+			if (convexCaster.calcTimeOfImpact(m_ccdSphereFromTrans,m_ccdSphereToTrans,
+				ident,ident,castResult))
+			{
+				if (m_hitFraction > castResult.m_fraction)
+					m_hitFraction = castResult.m_fraction;
+			}
+
+		}
+
+	};
+
+
+
+
+
+	if (triBody->getCollisionShape()->isConcave())
+	{
+		btVector3 rayAabbMin = convexFromLocal.getOrigin();
+		rayAabbMin.setMin(convexToLocal.getOrigin());
+		btVector3 rayAabbMax = convexFromLocal.getOrigin();
+		rayAabbMax.setMax(convexToLocal.getOrigin());
+		btScalar ccdRadius0 = convexbody->getCcdSweptSphereRadius();
+		rayAabbMin -= btVector3(ccdRadius0,ccdRadius0,ccdRadius0);
+		rayAabbMax += btVector3(ccdRadius0,ccdRadius0,ccdRadius0);
+
+		btScalar curHitFraction = btScalar(1.); //is this available?
+		LocalTriangleSphereCastCallback raycastCallback(convexFromLocal,convexToLocal,
+			convexbody->getCcdSweptSphereRadius(),curHitFraction);
+
+		raycastCallback.m_hitFraction = convexbody->getHitFraction();
+
+		btCollisionObject* concavebody = triBody;
+
+		btConcaveShape* triangleMesh = (btConcaveShape*) concavebody->getCollisionShape();
+
+		if (triangleMesh)
+		{
+			triangleMesh->processAllTriangles(&raycastCallback,rayAabbMin,rayAabbMax);
+		}
+
+
+
+		if (raycastCallback.m_hitFraction < convexbody->getHitFraction())
+		{
+			convexbody->setHitFraction( raycastCallback.m_hitFraction);
+			return raycastCallback.m_hitFraction;
+		}
+	}
+
+	return btScalar(1.);
+
+}
diff --git a/src/bullet/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.h b/src/bullet/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.h
new file mode 100644
index 00000000..11ec5b37
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.h
@@ -0,0 +1,153 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_CONCAVE_COLLISION_ALGORITHM_H
+#define BT_SOFT_BODY_CONCAVE_COLLISION_ALGORITHM_H
+
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseInterface.h"
+#include "BulletCollision/CollisionShapes/btTriangleCallback.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+class btDispatcher;
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+class btSoftBody;
+class btCollisionShape;
+
+#include "LinearMath/btHashMap.h"
+
+#include "BulletCollision/BroadphaseCollision/btQuantizedBvh.h" //for definition of MAX_NUM_PARTS_IN_BITS
+
+struct btTriIndex
+{
+	int m_PartIdTriangleIndex;
+	class btCollisionShape*	m_childShape;
+
+	btTriIndex(int partId,int triangleIndex,btCollisionShape* shape)
+	{
+		m_PartIdTriangleIndex = (partId<<(31-MAX_NUM_PARTS_IN_BITS)) | triangleIndex;
+		m_childShape = shape;
+	}
+
+	int	getTriangleIndex() const
+	{
+		// Get only the lower bits where the triangle index is stored
+		return (m_PartIdTriangleIndex&~((~0)<<(31-MAX_NUM_PARTS_IN_BITS)));
+	}
+	int	getPartId() const
+	{
+		// Get only the highest bits where the part index is stored
+		return (m_PartIdTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));
+	}
+	int	getUid() const
+	{
+		return m_PartIdTriangleIndex;
+	}
+};
+
+
+///For each triangle in the concave mesh that overlaps with the AABB of a soft body (m_softBody), processTriangle is called.
+class btSoftBodyTriangleCallback : public btTriangleCallback
+{
+	btSoftBody* m_softBody;
+	btCollisionObject* m_triBody;
+
+	btVector3	m_aabbMin;
+	btVector3	m_aabbMax ;
+
+	btManifoldResult* m_resultOut;
+
+	btDispatcher*	m_dispatcher;
+	const btDispatcherInfo* m_dispatchInfoPtr;
+	btScalar m_collisionMarginTriangle;
+
+	btHashMap<btHashKey<btTriIndex>,btTriIndex> m_shapeCache;
+
+public:
+	int	m_triangleCount;
+
+	//	btPersistentManifold*	m_manifoldPtr;
+
+	btSoftBodyTriangleCallback(btDispatcher* dispatcher,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped);
+
+	void	setTimeStepAndCounters(btScalar collisionMarginTriangle,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual ~btSoftBodyTriangleCallback();
+
+	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex);
+
+	void clearCache();
+
+	SIMD_FORCE_INLINE const btVector3& getAabbMin() const
+	{
+		return m_aabbMin;
+	}
+	SIMD_FORCE_INLINE const btVector3& getAabbMax() const
+	{
+		return m_aabbMax;
+	}
+
+};
+
+
+
+
+/// btSoftBodyConcaveCollisionAlgorithm  supports collision between soft body shapes and (concave) trianges meshes.
+class btSoftBodyConcaveCollisionAlgorithm  : public btCollisionAlgorithm
+{
+
+	bool	m_isSwapped;
+
+	btSoftBodyTriangleCallback m_btSoftBodyTriangleCallback;
+
+public:
+
+	btSoftBodyConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped);
+
+	virtual ~btSoftBodyConcaveCollisionAlgorithm();
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	btScalar	calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		//we don't add any manifolds
+	}
+
+	void	clearCache();
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSoftBodyConcaveCollisionAlgorithm));
+			return new(mem) btSoftBodyConcaveCollisionAlgorithm(ci,body0,body1,false);
+		}
+	};
+
+	struct SwappedCreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSoftBodyConcaveCollisionAlgorithm));
+			return new(mem) btSoftBodyConcaveCollisionAlgorithm(ci,body0,body1,true);
+		}
+	};
+
+};
+
+#endif //BT_SOFT_BODY_CONCAVE_COLLISION_ALGORITHM_H
diff --git a/src/bullet/BulletSoftBody/btSoftBodyData.h b/src/bullet/BulletSoftBody/btSoftBodyData.h
new file mode 100644
index 00000000..40dc65c3
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBodyData.h
@@ -0,0 +1,217 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFTBODY_FLOAT_DATA
+#define BT_SOFTBODY_FLOAT_DATA
+
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+
+
+
+struct	SoftBodyMaterialData
+{
+	float	m_linearStiffness;
+	float	m_angularStiffness;
+	float	m_volumeStiffness;
+	int		m_flags;
+};
+
+struct	SoftBodyNodeData
+{
+	SoftBodyMaterialData		*m_material;
+	btVector3FloatData			m_position;
+	btVector3FloatData			m_previousPosition;
+	btVector3FloatData			m_velocity;
+	btVector3FloatData			m_accumulatedForce;
+	btVector3FloatData			m_normal;
+	float						m_inverseMass;
+	float						m_area;
+	int							m_attach;
+	int							m_pad;
+};
+
+struct	SoftBodyLinkData
+{
+	SoftBodyMaterialData	*m_material;
+	int						m_nodeIndices[2];			// Node pointers
+	float					m_restLength;			// Rest length		
+	int						m_bbending;		// Bending link
+};
+
+struct	SoftBodyFaceData
+{
+	btVector3FloatData		m_normal;		// Normal
+	SoftBodyMaterialData	*m_material;
+	int						m_nodeIndices[3];			// Node pointers
+	float					m_restArea;			// Rest area
+};	
+
+struct	SoftBodyTetraData
+{
+	btVector3FloatData		m_c0[4];		// gradients
+	SoftBodyMaterialData	*m_material;
+	int						m_nodeIndices[4];			// Node pointers		
+	float					m_restVolume;			// Rest volume
+	float					m_c1;			// (4*kVST)/(im0+im1+im2+im3)
+	float					m_c2;			// m_c1/sum(|g0..3|^2)
+	int						m_pad;
+};
+
+struct	SoftRigidAnchorData
+{
+	btMatrix3x3FloatData	m_c0;			// Impulse matrix
+	btVector3FloatData		m_c1;			// Relative anchor
+	btVector3FloatData		m_localFrame;		// Anchor position in body space
+	btRigidBodyData			*m_rigidBody;
+	int						m_nodeIndex;			// Node pointer
+	float					m_c2;			// ima*dt
+};
+
+
+
+struct	SoftBodyConfigData
+{
+	int					m_aeroModel;		// Aerodynamic model (default: V_Point)
+	float				m_baumgarte;			// Velocities correction factor (Baumgarte)
+	float				m_damping;			// Damping coefficient [0,1]
+	float				m_drag;			// Drag coefficient [0,+inf]
+	float				m_lift;			// Lift coefficient [0,+inf]
+	float				m_pressure;			// Pressure coefficient [-inf,+inf]
+	float				m_volume;			// Volume conversation coefficient [0,+inf]
+	float				m_dynamicFriction;			// Dynamic friction coefficient [0,1]
+	float				m_poseMatch;			// Pose matching coefficient [0,1]		
+	float				m_rigidContactHardness;			// Rigid contacts hardness [0,1]
+	float				m_kineticContactHardness;			// Kinetic contacts hardness [0,1]
+	float				m_softContactHardness;			// Soft contacts hardness [0,1]
+	float				m_anchorHardness;			// Anchors hardness [0,1]
+	float				m_softRigidClusterHardness;		// Soft vs rigid hardness [0,1] (cluster only)
+	float				m_softKineticClusterHardness;		// Soft vs kinetic hardness [0,1] (cluster only)
+	float				m_softSoftClusterHardness;		// Soft vs soft hardness [0,1] (cluster only)
+	float				m_softRigidClusterImpulseSplit;	// Soft vs rigid impulse split [0,1] (cluster only)
+	float				m_softKineticClusterImpulseSplit;	// Soft vs rigid impulse split [0,1] (cluster only)
+	float				m_softSoftClusterImpulseSplit;	// Soft vs rigid impulse split [0,1] (cluster only)
+	float				m_maxVolume;		// Maximum volume ratio for pose
+	float				m_timeScale;		// Time scale
+	int					m_velocityIterations;	// Velocities solver iterations
+	int					m_positionIterations;	// Positions solver iterations
+	int					m_driftIterations;	// Drift solver iterations
+	int					m_clusterIterations;	// Cluster solver iterations
+	int					m_collisionFlags;	// Collisions flags
+};
+
+struct	SoftBodyPoseData
+{
+	btMatrix3x3FloatData	m_rot;			// Rotation
+	btMatrix3x3FloatData	m_scale;			// Scale
+	btMatrix3x3FloatData	m_aqq;			// Base scaling
+	btVector3FloatData		m_com;			// COM
+
+	btVector3FloatData		*m_positions;			// Reference positions
+	float					*m_weights;	// Weights
+	int						m_numPositions;
+	int						m_numWeigts;
+
+	int						m_bvolume;		// Is valid
+	int						m_bframe;		// Is frame
+	float					m_restVolume;		// Rest volume
+	int						m_pad;
+};
+
+struct	SoftBodyClusterData
+{
+		btTransformFloatData		m_framexform;
+		btMatrix3x3FloatData		m_locii;
+		btMatrix3x3FloatData		m_invwi;
+		btVector3FloatData			m_com;
+		btVector3FloatData			m_vimpulses[2];
+		btVector3FloatData			m_dimpulses[2];
+		btVector3FloatData			m_lv;
+		btVector3FloatData			m_av;
+		
+		btVector3FloatData			*m_framerefs;
+		int							*m_nodeIndices;
+		float						*m_masses;
+
+		int							m_numFrameRefs;
+		int							m_numNodes;
+		int							m_numMasses;
+
+		float						m_idmass;
+		float						m_imass;
+		int							m_nvimpulses;
+		int							m_ndimpulses;
+		float						m_ndamping;
+		float						m_ldamping;
+		float						m_adamping;
+		float						m_matching;
+		float						m_maxSelfCollisionImpulse;
+		float						m_selfCollisionImpulseFactor;
+		int							m_containsAnchor;
+		int							m_collide;
+		int							m_clusterIndex;
+};
+
+
+enum	btSoftJointBodyType
+{
+	BT_JOINT_SOFT_BODY_CLUSTER=1,
+	BT_JOINT_RIGID_BODY,
+	BT_JOINT_COLLISION_OBJECT
+};
+
+struct	btSoftBodyJointData
+{
+	void						*m_bodyA;
+	void						*m_bodyB;
+	btVector3FloatData			m_refs[2];
+	float						m_cfm;
+	float						m_erp;
+	float						m_split;
+	int							m_delete;
+	btVector3FloatData			m_relPosition[2];//linear
+	int							m_bodyAtype;
+	int							m_bodyBtype;
+	int							m_jointType;
+	int							m_pad;
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btSoftBodyFloatData
+{
+	btCollisionObjectFloatData	m_collisionObjectData;
+
+	SoftBodyPoseData		*m_pose;
+	SoftBodyMaterialData	**m_materials;
+	SoftBodyNodeData		*m_nodes;
+	SoftBodyLinkData		*m_links;
+	SoftBodyFaceData		*m_faces;
+	SoftBodyTetraData		*m_tetrahedra;
+	SoftRigidAnchorData		*m_anchors;
+	SoftBodyClusterData		*m_clusters;
+	btSoftBodyJointData		*m_joints;
+
+	int						m_numMaterials;
+	int						m_numNodes;
+	int						m_numLinks;
+	int						m_numFaces;
+	int						m_numTetrahedra;
+	int						m_numAnchors;
+	int						m_numClusters;
+	int						m_numJoints;
+	SoftBodyConfigData		m_config;
+};
+
+#endif //BT_SOFTBODY_FLOAT_DATA
+
diff --git a/src/bullet/BulletSoftBody/btSoftBodyHelpers.cpp b/src/bullet/BulletSoftBody/btSoftBodyHelpers.cpp
new file mode 100644
index 00000000..0fb3560e
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBodyHelpers.cpp
@@ -0,0 +1,1055 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///btSoftBodyHelpers.cpp by Nathanael Presson
+
+#include "btSoftBodyInternals.h"
+#include <stdio.h>
+#include <string.h>
+#include "btSoftBodyHelpers.h"
+#include "LinearMath/btConvexHull.h"
+#include "LinearMath/btConvexHullComputer.h"
+
+
+//
+static void				drawVertex(	btIDebugDraw* idraw,
+								   const btVector3& x,btScalar s,const btVector3& c)
+{
+	idraw->drawLine(x-btVector3(s,0,0),x+btVector3(s,0,0),c);
+	idraw->drawLine(x-btVector3(0,s,0),x+btVector3(0,s,0),c);
+	idraw->drawLine(x-btVector3(0,0,s),x+btVector3(0,0,s),c);
+}
+
+//
+static void				drawBox(	btIDebugDraw* idraw,
+								const btVector3& mins,
+								const btVector3& maxs,
+								const btVector3& color)
+{
+	const btVector3	c[]={	btVector3(mins.x(),mins.y(),mins.z()),
+		btVector3(maxs.x(),mins.y(),mins.z()),
+		btVector3(maxs.x(),maxs.y(),mins.z()),
+		btVector3(mins.x(),maxs.y(),mins.z()),
+		btVector3(mins.x(),mins.y(),maxs.z()),
+		btVector3(maxs.x(),mins.y(),maxs.z()),
+		btVector3(maxs.x(),maxs.y(),maxs.z()),
+		btVector3(mins.x(),maxs.y(),maxs.z())};
+	idraw->drawLine(c[0],c[1],color);idraw->drawLine(c[1],c[2],color);
+	idraw->drawLine(c[2],c[3],color);idraw->drawLine(c[3],c[0],color);
+	idraw->drawLine(c[4],c[5],color);idraw->drawLine(c[5],c[6],color);
+	idraw->drawLine(c[6],c[7],color);idraw->drawLine(c[7],c[4],color);
+	idraw->drawLine(c[0],c[4],color);idraw->drawLine(c[1],c[5],color);
+	idraw->drawLine(c[2],c[6],color);idraw->drawLine(c[3],c[7],color);
+}
+
+//
+static void				drawTree(	btIDebugDraw* idraw,
+								 const btDbvtNode* node,
+								 int depth,
+								 const btVector3& ncolor,
+								 const btVector3& lcolor,
+								 int mindepth,
+								 int maxdepth)
+{
+	if(node)
+	{
+		if(node->isinternal()&&((depth<maxdepth)||(maxdepth<0)))
+		{
+			drawTree(idraw,node->childs[0],depth+1,ncolor,lcolor,mindepth,maxdepth);
+			drawTree(idraw,node->childs[1],depth+1,ncolor,lcolor,mindepth,maxdepth);
+		}
+		if(depth>=mindepth)
+		{
+			const btScalar	scl=(btScalar)(node->isinternal()?1:1);
+			const btVector3	mi=node->volume.Center()-node->volume.Extents()*scl;
+			const btVector3	mx=node->volume.Center()+node->volume.Extents()*scl;
+			drawBox(idraw,mi,mx,node->isleaf()?lcolor:ncolor);
+		}
+	}
+}
+
+//
+template <typename T>
+static inline T				sum(const btAlignedObjectArray<T>& items)
+{
+	T	v;
+	if(items.size())
+	{
+		v=items[0];
+		for(int i=1,ni=items.size();i<ni;++i)
+		{
+			v+=items[i];
+		}
+	}
+	return(v);
+}
+
+//
+template <typename T,typename Q>
+static inline void			add(btAlignedObjectArray<T>& items,const Q& value)
+{
+	for(int i=0,ni=items.size();i<ni;++i)
+	{
+		items[i]+=value;
+	}
+}
+
+//
+template <typename T,typename Q>
+static inline void			mul(btAlignedObjectArray<T>& items,const Q& value)
+{
+	for(int i=0,ni=items.size();i<ni;++i)
+	{
+		items[i]*=value;
+	}
+}
+
+//
+template <typename T>
+static inline T				average(const btAlignedObjectArray<T>& items)
+{
+	const btScalar	n=(btScalar)(items.size()>0?items.size():1);
+	return(sum(items)/n);
+}
+
+//
+static inline btScalar		tetravolume(const btVector3& x0,
+										const btVector3& x1,
+										const btVector3& x2,
+										const btVector3& x3)
+{
+	const btVector3	a=x1-x0;
+	const btVector3	b=x2-x0;
+	const btVector3	c=x3-x0;
+	return(btDot(a,btCross(b,c)));
+}
+
+//
+#if 0
+static btVector3		stresscolor(btScalar stress)
+{
+	static const btVector3	spectrum[]=	{	btVector3(1,0,1),
+		btVector3(0,0,1),
+		btVector3(0,1,1),
+		btVector3(0,1,0),
+		btVector3(1,1,0),
+		btVector3(1,0,0),
+		btVector3(1,0,0)};
+	static const int		ncolors=sizeof(spectrum)/sizeof(spectrum[0])-1;
+	static const btScalar	one=1;
+	stress=btMax<btScalar>(0,btMin<btScalar>(1,stress))*ncolors;
+	const int				sel=(int)stress;
+	const btScalar			frc=stress-sel;
+	return(spectrum[sel]+(spectrum[sel+1]-spectrum[sel])*frc);
+}
+#endif
+
+//
+void			btSoftBodyHelpers::Draw(	btSoftBody* psb,
+										btIDebugDraw* idraw,
+										int drawflags)
+{
+	const btScalar		scl=(btScalar)0.1;
+	const btScalar		nscl=scl*5;
+	const btVector3		lcolor=btVector3(0,0,0);
+	const btVector3		ncolor=btVector3(1,1,1);
+	const btVector3		ccolor=btVector3(1,0,0);
+	int i,j,nj;
+
+		/* Clusters	*/ 
+	if(0!=(drawflags&fDrawFlags::Clusters))
+	{
+		srand(1806);
+		for(i=0;i<psb->m_clusters.size();++i)
+		{
+			if(psb->m_clusters[i]->m_collide)
+			{
+				btVector3						color(	rand()/(btScalar)RAND_MAX,
+					rand()/(btScalar)RAND_MAX,
+					rand()/(btScalar)RAND_MAX);
+				color=color.normalized()*0.75;
+				btAlignedObjectArray<btVector3>	vertices;
+				vertices.resize(psb->m_clusters[i]->m_nodes.size());
+				for(j=0,nj=vertices.size();j<nj;++j)
+				{				
+					vertices[j]=psb->m_clusters[i]->m_nodes[j]->m_x;
+				}
+#define USE_NEW_CONVEX_HULL_COMPUTER
+#ifdef USE_NEW_CONVEX_HULL_COMPUTER
+				btConvexHullComputer	computer;
+				int stride = sizeof(btVector3);
+				int count = vertices.size();
+				btScalar shrink=0.f;
+				btScalar shrinkClamp=0.f;
+				computer.compute(&vertices[0].getX(),stride,count,shrink,shrinkClamp);
+				for (int i=0;i<computer.faces.size();i++)
+				{
+
+					int face = computer.faces[i];
+					//printf("face=%d\n",face);
+					const btConvexHullComputer::Edge*  firstEdge = &computer.edges[face];
+					const btConvexHullComputer::Edge*  edge = firstEdge->getNextEdgeOfFace();
+
+					int v0 = firstEdge->getSourceVertex();
+					int v1 = firstEdge->getTargetVertex();
+					while (edge!=firstEdge)
+					{
+						int v2 = edge->getTargetVertex();
+						idraw->drawTriangle(computer.vertices[v0],computer.vertices[v1],computer.vertices[v2],color,1);
+						edge = edge->getNextEdgeOfFace();
+						v0=v1;
+						v1=v2;
+					};
+				}
+#else
+
+				HullDesc		hdsc(QF_TRIANGLES,vertices.size(),&vertices[0]);
+				HullResult		hres;
+				HullLibrary		hlib;
+				hdsc.mMaxVertices=vertices.size();
+				hlib.CreateConvexHull(hdsc,hres);
+				const btVector3	center=average(hres.m_OutputVertices);
+				add(hres.m_OutputVertices,-center);
+				mul(hres.m_OutputVertices,(btScalar)1);
+				add(hres.m_OutputVertices,center);
+				for(j=0;j<(int)hres.mNumFaces;++j)
+				{
+					const int idx[]={hres.m_Indices[j*3+0],hres.m_Indices[j*3+1],hres.m_Indices[j*3+2]};
+					idraw->drawTriangle(hres.m_OutputVertices[idx[0]],
+						hres.m_OutputVertices[idx[1]],
+						hres.m_OutputVertices[idx[2]],
+						color,1);
+				}
+				hlib.ReleaseResult(hres);
+#endif
+
+			}
+			/* Velocities	*/ 
+#if 0
+			for(int j=0;j<psb->m_clusters[i].m_nodes.size();++j)
+			{
+				const btSoftBody::Cluster&	c=psb->m_clusters[i];
+				const btVector3				r=c.m_nodes[j]->m_x-c.m_com;
+				const btVector3				v=c.m_lv+btCross(c.m_av,r);
+				idraw->drawLine(c.m_nodes[j]->m_x,c.m_nodes[j]->m_x+v,btVector3(1,0,0));
+			}
+#endif
+			/* Frame		*/ 
+	//		btSoftBody::Cluster& c=*psb->m_clusters[i];
+	//		idraw->drawLine(c.m_com,c.m_framexform*btVector3(10,0,0),btVector3(1,0,0));
+	//		idraw->drawLine(c.m_com,c.m_framexform*btVector3(0,10,0),btVector3(0,1,0));
+	//		idraw->drawLine(c.m_com,c.m_framexform*btVector3(0,0,10),btVector3(0,0,1));
+		}
+	}
+	else
+	{
+		/* Nodes	*/ 
+		if(0!=(drawflags&fDrawFlags::Nodes))
+		{
+			for(i=0;i<psb->m_nodes.size();++i)
+			{
+				const btSoftBody::Node&	n=psb->m_nodes[i];
+				if(0==(n.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
+				idraw->drawLine(n.m_x-btVector3(scl,0,0),n.m_x+btVector3(scl,0,0),btVector3(1,0,0));
+				idraw->drawLine(n.m_x-btVector3(0,scl,0),n.m_x+btVector3(0,scl,0),btVector3(0,1,0));
+				idraw->drawLine(n.m_x-btVector3(0,0,scl),n.m_x+btVector3(0,0,scl),btVector3(0,0,1));
+			}
+		}
+		/* Links	*/ 
+		if(0!=(drawflags&fDrawFlags::Links))
+		{
+			for(i=0;i<psb->m_links.size();++i)
+			{
+				const btSoftBody::Link&	l=psb->m_links[i];
+				if(0==(l.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
+				idraw->drawLine(l.m_n[0]->m_x,l.m_n[1]->m_x,lcolor);
+			}
+		}
+		/* Normals	*/ 
+		if(0!=(drawflags&fDrawFlags::Normals))
+		{
+			for(i=0;i<psb->m_nodes.size();++i)
+			{
+				const btSoftBody::Node&	n=psb->m_nodes[i];
+				if(0==(n.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
+				const btVector3			d=n.m_n*nscl;
+				idraw->drawLine(n.m_x,n.m_x+d,ncolor);
+				idraw->drawLine(n.m_x,n.m_x-d,ncolor*0.5);
+			}
+		}
+		/* Contacts	*/ 
+		if(0!=(drawflags&fDrawFlags::Contacts))
+		{
+			static const btVector3		axis[]={btVector3(1,0,0),
+				btVector3(0,1,0),
+				btVector3(0,0,1)};
+			for(i=0;i<psb->m_rcontacts.size();++i)
+			{		
+				const btSoftBody::RContact&	c=psb->m_rcontacts[i];
+				const btVector3				o=	c.m_node->m_x-c.m_cti.m_normal*
+					(btDot(c.m_node->m_x,c.m_cti.m_normal)+c.m_cti.m_offset);
+				const btVector3				x=btCross(c.m_cti.m_normal,axis[c.m_cti.m_normal.minAxis()]).normalized();
+				const btVector3				y=btCross(x,c.m_cti.m_normal).normalized();
+				idraw->drawLine(o-x*nscl,o+x*nscl,ccolor);
+				idraw->drawLine(o-y*nscl,o+y*nscl,ccolor);
+				idraw->drawLine(o,o+c.m_cti.m_normal*nscl*3,btVector3(1,1,0));
+			}
+		}
+		/* Faces	*/ 
+	if(0!=(drawflags&fDrawFlags::Faces))
+	{
+		const btScalar	scl=(btScalar)0.8;
+		const btScalar	alp=(btScalar)1;
+		const btVector3	col(0,(btScalar)0.7,0);
+		for(i=0;i<psb->m_faces.size();++i)
+		{
+			const btSoftBody::Face&	f=psb->m_faces[i];
+			if(0==(f.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
+			const btVector3			x[]={f.m_n[0]->m_x,f.m_n[1]->m_x,f.m_n[2]->m_x};
+			const btVector3			c=(x[0]+x[1]+x[2])/3;
+			idraw->drawTriangle((x[0]-c)*scl+c,
+				(x[1]-c)*scl+c,
+				(x[2]-c)*scl+c,
+				col,alp);
+		}	
+	}
+	/* Tetras	*/ 
+	if(0!=(drawflags&fDrawFlags::Tetras))
+	{
+		const btScalar	scl=(btScalar)0.8;
+		const btScalar	alp=(btScalar)1;
+		const btVector3	col((btScalar)0.3,(btScalar)0.3,(btScalar)0.7);
+		for(int i=0;i<psb->m_tetras.size();++i)
+		{
+			const btSoftBody::Tetra&	t=psb->m_tetras[i];
+			if(0==(t.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
+			const btVector3				x[]={t.m_n[0]->m_x,t.m_n[1]->m_x,t.m_n[2]->m_x,t.m_n[3]->m_x};
+			const btVector3				c=(x[0]+x[1]+x[2]+x[3])/4;
+			idraw->drawTriangle((x[0]-c)*scl+c,(x[1]-c)*scl+c,(x[2]-c)*scl+c,col,alp);
+			idraw->drawTriangle((x[0]-c)*scl+c,(x[1]-c)*scl+c,(x[3]-c)*scl+c,col,alp);
+			idraw->drawTriangle((x[1]-c)*scl+c,(x[2]-c)*scl+c,(x[3]-c)*scl+c,col,alp);
+			idraw->drawTriangle((x[2]-c)*scl+c,(x[0]-c)*scl+c,(x[3]-c)*scl+c,col,alp);
+		}	
+	}
+	}
+	/* Anchors	*/ 
+	if(0!=(drawflags&fDrawFlags::Anchors))
+	{
+		for(i=0;i<psb->m_anchors.size();++i)
+		{
+			const btSoftBody::Anchor&	a=psb->m_anchors[i];
+			const btVector3				q=a.m_body->getWorldTransform()*a.m_local;
+			drawVertex(idraw,a.m_node->m_x,0.25,btVector3(1,0,0));
+			drawVertex(idraw,q,0.25,btVector3(0,1,0));
+			idraw->drawLine(a.m_node->m_x,q,btVector3(1,1,1));
+		}
+		for(i=0;i<psb->m_nodes.size();++i)
+		{
+			const btSoftBody::Node&	n=psb->m_nodes[i];		
+			if(0==(n.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
+			if(n.m_im<=0)
+			{
+				drawVertex(idraw,n.m_x,0.25,btVector3(1,0,0));
+			}
+		}
+	}
+	
+
+	/* Notes	*/ 
+	if(0!=(drawflags&fDrawFlags::Notes))
+	{
+		for(i=0;i<psb->m_notes.size();++i)
+		{
+			const btSoftBody::Note&	n=psb->m_notes[i];
+			btVector3				p=n.m_offset;
+			for(int j=0;j<n.m_rank;++j)
+			{
+				p+=n.m_nodes[j]->m_x*n.m_coords[j];
+			}
+			idraw->draw3dText(p,n.m_text);
+		}
+	}
+	/* Node tree	*/ 
+	if(0!=(drawflags&fDrawFlags::NodeTree))		DrawNodeTree(psb,idraw);
+	/* Face tree	*/ 
+	if(0!=(drawflags&fDrawFlags::FaceTree))		DrawFaceTree(psb,idraw);
+	/* Cluster tree	*/ 
+	if(0!=(drawflags&fDrawFlags::ClusterTree))	DrawClusterTree(psb,idraw);
+	/* Joints		*/ 
+	if(0!=(drawflags&fDrawFlags::Joints))
+	{
+		for(i=0;i<psb->m_joints.size();++i)
+		{
+			const btSoftBody::Joint*	pj=psb->m_joints[i];
+			switch(pj->Type())
+			{
+			case	btSoftBody::Joint::eType::Linear:
+				{
+					const btSoftBody::LJoint*	pjl=(const btSoftBody::LJoint*)pj;
+					const btVector3	a0=pj->m_bodies[0].xform()*pjl->m_refs[0];
+					const btVector3	a1=pj->m_bodies[1].xform()*pjl->m_refs[1];
+					idraw->drawLine(pj->m_bodies[0].xform().getOrigin(),a0,btVector3(1,1,0));
+					idraw->drawLine(pj->m_bodies[1].xform().getOrigin(),a1,btVector3(0,1,1));
+					drawVertex(idraw,a0,0.25,btVector3(1,1,0));
+					drawVertex(idraw,a1,0.25,btVector3(0,1,1));
+				}
+				break;
+			case	btSoftBody::Joint::eType::Angular:
+				{
+					//const btSoftBody::AJoint*	pja=(const btSoftBody::AJoint*)pj;
+					const btVector3	o0=pj->m_bodies[0].xform().getOrigin();
+					const btVector3	o1=pj->m_bodies[1].xform().getOrigin();
+					const btVector3	a0=pj->m_bodies[0].xform().getBasis()*pj->m_refs[0];
+					const btVector3	a1=pj->m_bodies[1].xform().getBasis()*pj->m_refs[1];
+					idraw->drawLine(o0,o0+a0*10,btVector3(1,1,0));
+					idraw->drawLine(o0,o0+a1*10,btVector3(1,1,0));
+					idraw->drawLine(o1,o1+a0*10,btVector3(0,1,1));
+					idraw->drawLine(o1,o1+a1*10,btVector3(0,1,1));
+					break;
+				}
+				default:
+				{
+				}
+					
+			}		
+		}
+	}
+}
+
+//
+void			btSoftBodyHelpers::DrawInfos(		btSoftBody* psb,
+											 btIDebugDraw* idraw,
+											 bool masses,
+											 bool areas,
+											 bool /*stress*/)
+{
+	for(int i=0;i<psb->m_nodes.size();++i)
+	{
+		const btSoftBody::Node&	n=psb->m_nodes[i];
+		char					text[2048]={0};
+		char					buff[1024];
+		if(masses)
+		{
+			sprintf(buff," M(%.2f)",1/n.m_im);
+			strcat(text,buff);
+		}
+		if(areas)
+		{
+			sprintf(buff," A(%.2f)",n.m_area);
+			strcat(text,buff);
+		}
+		if(text[0]) idraw->draw3dText(n.m_x,text);
+	}
+}
+
+//
+void			btSoftBodyHelpers::DrawNodeTree(	btSoftBody* psb,
+												btIDebugDraw* idraw,
+												int mindepth,
+												int maxdepth)
+{
+	drawTree(idraw,psb->m_ndbvt.m_root,0,btVector3(1,0,1),btVector3(1,1,1),mindepth,maxdepth);
+}
+
+//
+void			btSoftBodyHelpers::DrawFaceTree(	btSoftBody* psb,
+												btIDebugDraw* idraw,
+												int mindepth,
+												int maxdepth)
+{
+	drawTree(idraw,psb->m_fdbvt.m_root,0,btVector3(0,1,0),btVector3(1,0,0),mindepth,maxdepth);
+}
+
+//
+void			btSoftBodyHelpers::DrawClusterTree(	btSoftBody* psb,
+												   btIDebugDraw* idraw,
+												   int mindepth,
+												   int maxdepth)
+{
+	drawTree(idraw,psb->m_cdbvt.m_root,0,btVector3(0,1,1),btVector3(1,0,0),mindepth,maxdepth);
+}
+
+//
+void			btSoftBodyHelpers::DrawFrame(		btSoftBody* psb,
+											 btIDebugDraw* idraw)
+{
+	if(psb->m_pose.m_bframe)
+	{
+		static const btScalar	ascl=10;
+		static const btScalar	nscl=(btScalar)0.1;
+		const btVector3			com=psb->m_pose.m_com;
+		const btMatrix3x3		trs=psb->m_pose.m_rot*psb->m_pose.m_scl;
+		const btVector3			Xaxis=(trs*btVector3(1,0,0)).normalized();
+		const btVector3			Yaxis=(trs*btVector3(0,1,0)).normalized();
+		const btVector3			Zaxis=(trs*btVector3(0,0,1)).normalized();
+		idraw->drawLine(com,com+Xaxis*ascl,btVector3(1,0,0));
+		idraw->drawLine(com,com+Yaxis*ascl,btVector3(0,1,0));
+		idraw->drawLine(com,com+Zaxis*ascl,btVector3(0,0,1));
+		for(int i=0;i<psb->m_pose.m_pos.size();++i)
+		{
+			const btVector3	x=com+trs*psb->m_pose.m_pos[i];
+			drawVertex(idraw,x,nscl,btVector3(1,0,1));
+		}
+	}
+}
+
+//
+btSoftBody*		btSoftBodyHelpers::CreateRope(	btSoftBodyWorldInfo& worldInfo, const btVector3& from,
+											  const btVector3& to,
+											  int res,
+											  int fixeds)
+{
+	/* Create nodes	*/ 
+	const int		r=res+2;
+	btVector3*		x=new btVector3[r];
+	btScalar*		m=new btScalar[r];
+	int i;
+
+	for(i=0;i<r;++i)
+	{
+		const btScalar	t=i/(btScalar)(r-1);
+		x[i]=lerp(from,to,t);
+		m[i]=1;
+	}
+	btSoftBody*		psb= new btSoftBody(&worldInfo,r,x,m);
+	if(fixeds&1) psb->setMass(0,0);
+	if(fixeds&2) psb->setMass(r-1,0);
+	delete[] x;
+	delete[] m;
+	/* Create links	*/ 
+	for(i=1;i<r;++i)
+	{
+		psb->appendLink(i-1,i);
+	}
+	/* Finished		*/ 
+	return(psb);
+}
+
+//
+btSoftBody*		btSoftBodyHelpers::CreatePatch(btSoftBodyWorldInfo& worldInfo,const btVector3& corner00,
+											   const btVector3& corner10,
+											   const btVector3& corner01,
+											   const btVector3& corner11,
+											   int resx,
+											   int resy,
+											   int fixeds,
+											   bool gendiags)
+{
+#define IDX(_x_,_y_)	((_y_)*rx+(_x_))
+	/* Create nodes	*/ 
+	if((resx<2)||(resy<2)) return(0);
+	const int	rx=resx;
+	const int	ry=resy;
+	const int	tot=rx*ry;
+	btVector3*	x=new btVector3[tot];
+	btScalar*	m=new btScalar[tot];
+	int iy;
+
+	for(iy=0;iy<ry;++iy)
+	{
+		const btScalar	ty=iy/(btScalar)(ry-1);
+		const btVector3	py0=lerp(corner00,corner01,ty);
+		const btVector3	py1=lerp(corner10,corner11,ty);
+		for(int ix=0;ix<rx;++ix)
+		{
+			const btScalar	tx=ix/(btScalar)(rx-1);
+			x[IDX(ix,iy)]=lerp(py0,py1,tx);
+			m[IDX(ix,iy)]=1;
+		}
+	}
+	btSoftBody*		psb=new btSoftBody(&worldInfo,tot,x,m);
+	if(fixeds&1)	psb->setMass(IDX(0,0),0);
+	if(fixeds&2)	psb->setMass(IDX(rx-1,0),0);
+	if(fixeds&4)	psb->setMass(IDX(0,ry-1),0);
+	if(fixeds&8)	psb->setMass(IDX(rx-1,ry-1),0);
+	delete[] x;
+	delete[] m;
+	/* Create links	and faces */ 
+	for(iy=0;iy<ry;++iy)
+	{
+		for(int ix=0;ix<rx;++ix)
+		{
+			const int	idx=IDX(ix,iy);
+			const bool	mdx=(ix+1)<rx;
+			const bool	mdy=(iy+1)<ry;
+			if(mdx) psb->appendLink(idx,IDX(ix+1,iy));
+			if(mdy) psb->appendLink(idx,IDX(ix,iy+1));
+			if(mdx&&mdy)
+			{
+				if((ix+iy)&1)
+				{
+					psb->appendFace(IDX(ix,iy),IDX(ix+1,iy),IDX(ix+1,iy+1));
+					psb->appendFace(IDX(ix,iy),IDX(ix+1,iy+1),IDX(ix,iy+1));
+					if(gendiags)
+					{
+						psb->appendLink(IDX(ix,iy),IDX(ix+1,iy+1));
+					}
+				}
+				else
+				{
+					psb->appendFace(IDX(ix,iy+1),IDX(ix,iy),IDX(ix+1,iy));
+					psb->appendFace(IDX(ix,iy+1),IDX(ix+1,iy),IDX(ix+1,iy+1));
+					if(gendiags)
+					{
+						psb->appendLink(IDX(ix+1,iy),IDX(ix,iy+1));
+					}
+				}
+			}
+		}
+	}
+	/* Finished		*/ 
+#undef IDX
+	return(psb);
+}
+
+//
+btSoftBody*		btSoftBodyHelpers::CreatePatchUV(btSoftBodyWorldInfo& worldInfo,
+												 const btVector3& corner00,
+												 const btVector3& corner10,
+												 const btVector3& corner01,
+												 const btVector3& corner11,
+												 int resx,
+												 int resy,
+												 int fixeds,
+												 bool gendiags,
+												 float* tex_coords)
+{
+
+	/*
+	*
+	*  corners:
+	*
+	*  [0][0]     corner00 ------- corner01   [resx][0]
+	*                |                |
+	*                |                |
+	*  [0][resy]  corner10 -------- corner11  [resx][resy]
+	*
+	*
+	*
+	*
+	*
+	*
+	*   "fixedgs" map:
+	*
+	*  corner00     -->   +1
+	*  corner01     -->   +2
+	*  corner10     -->   +4
+	*  corner11     -->   +8
+	*  upper middle -->  +16
+	*  left middle  -->  +32
+	*  right middle -->  +64
+	*  lower middle --> +128
+	*  center       --> +256
+	*
+	*
+	*   tex_coords size   (resx-1)*(resy-1)*12
+	*
+	*
+	*
+	*     SINGLE QUAD INTERNALS
+	*
+	*  1) btSoftBody's nodes and links,
+	*     diagonal link is optional ("gendiags")
+	*
+	*
+	*    node00 ------ node01
+	*      | .              
+	*      |   .            
+	*      |     .          
+	*      |       .        
+	*      |         .      
+	*    node10        node11
+	*
+	*
+	*
+	*   2) Faces:
+	*      two triangles,
+	*      UV Coordinates (hier example for single quad)
+	*      
+	*     (0,1)          (0,1)  (1,1)
+	*     1 |\            3 \-----| 2
+	*       | \              \    |
+	*       |  \              \   |
+	*       |   \              \  |
+	*       |    \              \ |
+	*     2 |-----\ 3            \| 1
+	*     (0,0)    (1,0)       (1,0)
+	*
+	*
+	*
+	*
+	*
+	*
+	*/
+
+#define IDX(_x_,_y_)	((_y_)*rx+(_x_))
+	/* Create nodes		*/ 
+	if((resx<2)||(resy<2)) return(0);
+	const int	rx=resx;
+	const int	ry=resy;
+	const int	tot=rx*ry;
+	btVector3*	x=new btVector3[tot];
+	btScalar*	m=new btScalar[tot];
+
+	int iy;
+
+	for(iy=0;iy<ry;++iy)
+	{
+		const btScalar	ty=iy/(btScalar)(ry-1);
+		const btVector3	py0=lerp(corner00,corner01,ty);
+		const btVector3	py1=lerp(corner10,corner11,ty);
+		for(int ix=0;ix<rx;++ix)
+		{
+			const btScalar	tx=ix/(btScalar)(rx-1);
+			x[IDX(ix,iy)]=lerp(py0,py1,tx);
+			m[IDX(ix,iy)]=1;
+		}
+	}
+	btSoftBody*	psb=new btSoftBody(&worldInfo,tot,x,m);
+	if(fixeds&1)		psb->setMass(IDX(0,0),0);
+	if(fixeds&2)		psb->setMass(IDX(rx-1,0),0);
+	if(fixeds&4)		psb->setMass(IDX(0,ry-1),0);
+	if(fixeds&8)		psb->setMass(IDX(rx-1,ry-1),0);
+	if(fixeds&16)		psb->setMass(IDX((rx-1)/2,0),0);
+	if(fixeds&32)		psb->setMass(IDX(0,(ry-1)/2),0);
+	if(fixeds&64)		psb->setMass(IDX(rx-1,(ry-1)/2),0);
+	if(fixeds&128)		psb->setMass(IDX((rx-1)/2,ry-1),0);
+	if(fixeds&256)		psb->setMass(IDX((rx-1)/2,(ry-1)/2),0);
+	delete[] x;
+	delete[] m;
+
+
+	int z = 0;
+	/* Create links	and faces	*/ 
+	for(iy=0;iy<ry;++iy)
+	{
+		for(int ix=0;ix<rx;++ix)
+		{
+			const bool	mdx=(ix+1)<rx;
+			const bool	mdy=(iy+1)<ry;
+
+			int node00=IDX(ix,iy);
+			int node01=IDX(ix+1,iy);
+			int node10=IDX(ix,iy+1);
+			int node11=IDX(ix+1,iy+1);
+
+			if(mdx) psb->appendLink(node00,node01);
+			if(mdy) psb->appendLink(node00,node10);
+			if(mdx&&mdy)
+			{
+				psb->appendFace(node00,node10,node11);
+				if (tex_coords) {
+					tex_coords[z+0]=CalculateUV(resx,resy,ix,iy,0);
+					tex_coords[z+1]=CalculateUV(resx,resy,ix,iy,1);
+					tex_coords[z+2]=CalculateUV(resx,resy,ix,iy,0);
+					tex_coords[z+3]=CalculateUV(resx,resy,ix,iy,2);
+					tex_coords[z+4]=CalculateUV(resx,resy,ix,iy,3);
+					tex_coords[z+5]=CalculateUV(resx,resy,ix,iy,2);
+				}
+				psb->appendFace(node11,node01,node00);
+				if (tex_coords) {
+					tex_coords[z+6 ]=CalculateUV(resx,resy,ix,iy,3);
+					tex_coords[z+7 ]=CalculateUV(resx,resy,ix,iy,2);
+					tex_coords[z+8 ]=CalculateUV(resx,resy,ix,iy,3);
+					tex_coords[z+9 ]=CalculateUV(resx,resy,ix,iy,1);
+					tex_coords[z+10]=CalculateUV(resx,resy,ix,iy,0);
+					tex_coords[z+11]=CalculateUV(resx,resy,ix,iy,1);
+				}
+				if (gendiags) psb->appendLink(node00,node11);
+				z += 12;
+			}
+		}
+	}
+	/* Finished	*/ 
+#undef IDX
+	return(psb);
+}
+
+float   btSoftBodyHelpers::CalculateUV(int resx,int resy,int ix,int iy,int id)
+{
+
+	/*
+	*
+	*
+	*    node00 --- node01
+	*      |          |
+	*    node10 --- node11
+	*
+	*
+	*   ID map:
+	*
+	*   node00 s --> 0
+	*   node00 t --> 1
+	*
+	*   node01 s --> 3
+	*   node01 t --> 1
+	*
+	*   node10 s --> 0
+	*   node10 t --> 2
+	*
+	*   node11 s --> 3
+	*   node11 t --> 2
+	*
+	*
+	*/
+
+	float tc=0.0f;
+	if (id == 0) {
+		tc = (1.0f/((resx-1))*ix);
+	}
+	else if (id==1) {
+		tc = (1.0f/((resy-1))*(resy-1-iy));
+	}
+	else if (id==2) {
+		tc = (1.0f/((resy-1))*(resy-1-iy-1));
+	}
+	else if (id==3) {
+		tc = (1.0f/((resx-1))*(ix+1));
+	}
+	return tc;
+}
+//
+btSoftBody*		btSoftBodyHelpers::CreateEllipsoid(btSoftBodyWorldInfo& worldInfo,const btVector3& center,
+												   const btVector3& radius,
+												   int res)
+{
+	struct	Hammersley
+	{
+		static void	Generate(btVector3* x,int n)
+		{
+			for(int i=0;i<n;i++)
+			{
+				btScalar	p=0.5,t=0;
+				for(int j=i;j;p*=0.5,j>>=1) if(j&1) t+=p;
+				btScalar	w=2*t-1;
+				btScalar	a=(SIMD_PI+2*i*SIMD_PI)/n;
+				btScalar	s=btSqrt(1-w*w);
+				*x++=btVector3(s*btCos(a),s*btSin(a),w);
+			}
+		}
+	};
+	btAlignedObjectArray<btVector3>	vtx;
+	vtx.resize(3+res);
+	Hammersley::Generate(&vtx[0],vtx.size());
+	for(int i=0;i<vtx.size();++i)
+	{
+		vtx[i]=vtx[i]*radius+center;
+	}
+	return(CreateFromConvexHull(worldInfo,&vtx[0],vtx.size()));
+}
+
+
+
+//
+btSoftBody*		btSoftBodyHelpers::CreateFromTriMesh(btSoftBodyWorldInfo& worldInfo,const btScalar*	vertices,
+													 const int* triangles,
+													 int ntriangles, bool randomizeConstraints)
+{
+	int		maxidx=0;
+	int i,j,ni;
+
+	for(i=0,ni=ntriangles*3;i<ni;++i)
+	{
+		maxidx=btMax(triangles[i],maxidx);
+	}
+	++maxidx;
+	btAlignedObjectArray<bool>		chks;
+	btAlignedObjectArray<btVector3>	vtx;
+	chks.resize(maxidx*maxidx,false);
+	vtx.resize(maxidx);
+	for(i=0,j=0,ni=maxidx*3;i<ni;++j,i+=3)
+	{
+		vtx[j]=btVector3(vertices[i],vertices[i+1],vertices[i+2]);
+	}
+	btSoftBody*		psb=new btSoftBody(&worldInfo,vtx.size(),&vtx[0],0);
+	for( i=0,ni=ntriangles*3;i<ni;i+=3)
+	{
+		const int idx[]={triangles[i],triangles[i+1],triangles[i+2]};
+#define IDX(_x_,_y_) ((_y_)*maxidx+(_x_))
+		for(int j=2,k=0;k<3;j=k++)
+		{
+			if(!chks[IDX(idx[j],idx[k])])
+			{
+				chks[IDX(idx[j],idx[k])]=true;
+				chks[IDX(idx[k],idx[j])]=true;
+				psb->appendLink(idx[j],idx[k]);
+			}
+		}
+#undef IDX
+		psb->appendFace(idx[0],idx[1],idx[2]);
+	}
+
+	if (randomizeConstraints)
+	{
+		psb->randomizeConstraints();
+	}
+
+	return(psb);
+}
+
+//
+btSoftBody*		btSoftBodyHelpers::CreateFromConvexHull(btSoftBodyWorldInfo& worldInfo,	const btVector3* vertices,
+														int nvertices, bool randomizeConstraints)
+{
+	HullDesc		hdsc(QF_TRIANGLES,nvertices,vertices);
+	HullResult		hres;
+	HullLibrary		hlib;/*??*/ 
+	hdsc.mMaxVertices=nvertices;
+	hlib.CreateConvexHull(hdsc,hres);
+	btSoftBody*		psb=new btSoftBody(&worldInfo,(int)hres.mNumOutputVertices,
+		&hres.m_OutputVertices[0],0);
+	for(int i=0;i<(int)hres.mNumFaces;++i)
+	{
+		const int idx[]={	hres.m_Indices[i*3+0],
+			hres.m_Indices[i*3+1],
+			hres.m_Indices[i*3+2]};
+		if(idx[0]<idx[1]) psb->appendLink(	idx[0],idx[1]);
+		if(idx[1]<idx[2]) psb->appendLink(	idx[1],idx[2]);
+		if(idx[2]<idx[0]) psb->appendLink(	idx[2],idx[0]);
+		psb->appendFace(idx[0],idx[1],idx[2]);
+	}
+	hlib.ReleaseResult(hres);
+	if (randomizeConstraints)
+	{
+		psb->randomizeConstraints();
+	}
+	return(psb);
+}
+
+
+
+
+static int nextLine(const char* buffer)
+{
+	int numBytesRead=0;
+
+	while (*buffer != '\n')
+	{
+		buffer++;
+		numBytesRead++;
+	}
+
+	
+	if (buffer[0]==0x0a)
+	{
+		buffer++;
+		numBytesRead++;
+	}
+	return numBytesRead;
+}
+
+/* Create from TetGen .ele, .face, .node data							*/ 
+btSoftBody*	btSoftBodyHelpers::CreateFromTetGenData(btSoftBodyWorldInfo& worldInfo,
+													const char* ele,
+													const char* face,
+													const char* node,
+													bool bfacelinks,
+													bool btetralinks,
+													bool bfacesfromtetras)
+{
+btAlignedObjectArray<btVector3>	pos;
+int								nnode=0;
+int								ndims=0;
+int								nattrb=0;
+int								hasbounds=0;
+int result = sscanf(node,"%d %d %d %d",&nnode,&ndims,&nattrb,&hasbounds);
+result = sscanf(node,"%d %d %d %d",&nnode,&ndims,&nattrb,&hasbounds);
+node += nextLine(node);
+
+pos.resize(nnode);
+for(int i=0;i<pos.size();++i)
+	{
+	int			index=0;
+	//int			bound=0;
+	float	x,y,z;
+	sscanf(node,"%d %f %f %f",&index,&x,&y,&z);
+
+//	sn>>index;
+//	sn>>x;sn>>y;sn>>z;
+	node += nextLine(node);
+
+	//for(int j=0;j<nattrb;++j) 
+	//	sn>>a;
+
+	//if(hasbounds) 
+	//	sn>>bound;
+
+	pos[index].setX(btScalar(x));
+	pos[index].setY(btScalar(y));
+	pos[index].setZ(btScalar(z));
+	}
+btSoftBody*						psb=new btSoftBody(&worldInfo,nnode,&pos[0],0);
+#if 0
+if(face&&face[0])
+	{
+	int								nface=0;
+	sf>>nface;sf>>hasbounds;
+	for(int i=0;i<nface;++i)
+		{
+		int			index=0;
+		int			bound=0;
+		int			ni[3];
+		sf>>index;
+		sf>>ni[0];sf>>ni[1];sf>>ni[2];
+		sf>>bound;
+		psb->appendFace(ni[0],ni[1],ni[2]);	
+		if(btetralinks)
+			{
+			psb->appendLink(ni[0],ni[1],0,true);
+			psb->appendLink(ni[1],ni[2],0,true);
+			psb->appendLink(ni[2],ni[0],0,true);
+			}
+		}
+	}
+#endif
+
+if(ele&&ele[0])
+	{
+	int								ntetra=0;
+	int								ncorner=0;
+	int								neattrb=0;
+	sscanf(ele,"%d %d %d",&ntetra,&ncorner,&neattrb);
+	ele += nextLine(ele);
+	
+	//se>>ntetra;se>>ncorner;se>>neattrb;
+	for(int i=0;i<ntetra;++i)
+		{
+		int			index=0;
+		int			ni[4];
+
+		//se>>index;
+		//se>>ni[0];se>>ni[1];se>>ni[2];se>>ni[3];
+		sscanf(ele,"%d %d %d %d %d",&index,&ni[0],&ni[1],&ni[2],&ni[3]);
+		ele+=nextLine(ele);
+		//for(int j=0;j<neattrb;++j) 
+		//	se>>a;
+		psb->appendTetra(ni[0],ni[1],ni[2],ni[3]);
+		if(btetralinks)
+			{
+			psb->appendLink(ni[0],ni[1],0,true);
+			psb->appendLink(ni[1],ni[2],0,true);
+			psb->appendLink(ni[2],ni[0],0,true);
+			psb->appendLink(ni[0],ni[3],0,true);
+			psb->appendLink(ni[1],ni[3],0,true);
+			psb->appendLink(ni[2],ni[3],0,true);
+			}
+		}
+	}
+printf("Nodes:  %u\r\n",psb->m_nodes.size());
+printf("Links:  %u\r\n",psb->m_links.size());
+printf("Faces:  %u\r\n",psb->m_faces.size());
+printf("Tetras: %u\r\n",psb->m_tetras.size());
+return(psb);
+}
+
diff --git a/src/bullet/BulletSoftBody/btSoftBodyHelpers.h b/src/bullet/BulletSoftBody/btSoftBodyHelpers.h
new file mode 100644
index 00000000..620a52fe
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBodyHelpers.h
@@ -0,0 +1,143 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2008 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_HELPERS_H
+#define BT_SOFT_BODY_HELPERS_H
+
+#include "btSoftBody.h"
+
+//
+// Helpers
+//
+
+/* fDrawFlags															*/ 
+struct	fDrawFlags { enum _ {
+	Nodes		=	0x0001,
+	Links		=	0x0002,
+	Faces		=	0x0004,
+	Tetras		=	0x0008,
+	Normals		=	0x0010,
+	Contacts	=	0x0020,
+	Anchors		=	0x0040,
+	Notes		=	0x0080,
+	Clusters	=	0x0100,
+	NodeTree	=	0x0200,
+	FaceTree	=	0x0400,
+	ClusterTree	=	0x0800,
+	Joints		=	0x1000,
+	/* presets	*/ 
+	Std			=	Links+Faces+Tetras+Anchors+Notes+Joints,
+	StdTetra	=	Std-Faces+Tetras
+};};
+
+struct	btSoftBodyHelpers
+{
+	/* Draw body															*/ 
+	static void				Draw(		btSoftBody* psb,
+		btIDebugDraw* idraw,
+		int drawflags=fDrawFlags::Std);
+	/* Draw body infos														*/ 
+	static	void			DrawInfos(	btSoftBody* psb,
+		btIDebugDraw* idraw,
+		bool masses,
+		bool areas,
+		bool stress);
+	/* Draw node tree														*/ 
+	static void				DrawNodeTree(	btSoftBody* psb,
+		btIDebugDraw* idraw,
+		int mindepth=0,
+		int maxdepth=-1);
+	/* Draw face tree														*/ 
+	static void				DrawFaceTree(	btSoftBody* psb,
+		btIDebugDraw* idraw,
+		int mindepth=0,
+		int maxdepth=-1);
+	/* Draw cluster tree													*/ 
+	static void				DrawClusterTree(btSoftBody* psb,
+		btIDebugDraw* idraw,
+		int mindepth=0,
+		int maxdepth=-1);
+	/* Draw rigid frame														*/ 
+	static	void			DrawFrame(		btSoftBody* psb,
+		btIDebugDraw* idraw);
+	/* Create a rope														*/ 
+	static	btSoftBody*		CreateRope( btSoftBodyWorldInfo& worldInfo,
+		const btVector3& from,
+		const btVector3& to,
+		int res,
+		int fixeds);
+	/* Create a patch														*/ 
+	static	btSoftBody*		CreatePatch(btSoftBodyWorldInfo& worldInfo,
+		const btVector3& corner00,
+		const btVector3& corner10,
+		const btVector3& corner01,
+		const btVector3& corner11,
+		int resx,
+		int resy,
+		int fixeds,
+		bool gendiags);
+	/* Create a patch with UV Texture Coordinates	*/ 
+	static	btSoftBody*		CreatePatchUV(btSoftBodyWorldInfo& worldInfo,
+		const btVector3& corner00,
+		const btVector3& corner10,
+		const btVector3& corner01,
+		const btVector3& corner11,
+		int resx,
+		int resy,
+		int fixeds,
+		bool gendiags,
+		float* tex_coords=0);
+	static	float	CalculateUV(int resx,int resy,int ix,int iy,int id);
+	/* Create an ellipsoid													*/ 
+	static	btSoftBody*		CreateEllipsoid(btSoftBodyWorldInfo& worldInfo,
+		const btVector3& center,
+		const btVector3& radius,
+		int res);	
+	/* Create from trimesh													*/ 
+	static	btSoftBody*		CreateFromTriMesh(	btSoftBodyWorldInfo& worldInfo,
+		const btScalar*	vertices,
+		const int* triangles,
+		int ntriangles,
+		bool randomizeConstraints = true);
+	/* Create from convex-hull												*/ 
+	static	btSoftBody*		CreateFromConvexHull(	btSoftBodyWorldInfo& worldInfo,
+		const btVector3* vertices,
+		int nvertices,
+		bool randomizeConstraints = true);
+
+
+	/* Export TetGen compatible .smesh file									*/ 
+//	static void				ExportAsSMeshFile(	btSoftBody* psb,
+//												const char* filename);	
+	/* Create from TetGen .ele, .face, .node files							*/ 
+//	static btSoftBody*		CreateFromTetGenFile(	btSoftBodyWorldInfo& worldInfo,
+//													const char* ele,
+//													const char* face,
+//													const char* node,
+//													bool bfacelinks,
+//													bool btetralinks,
+//													bool bfacesfromtetras);
+	/* Create from TetGen .ele, .face, .node data							*/ 
+	static btSoftBody*		CreateFromTetGenData(	btSoftBodyWorldInfo& worldInfo,
+													const char* ele,
+													const char* face,
+													const char* node,
+													bool bfacelinks,
+													bool btetralinks,
+													bool bfacesfromtetras);
+	
+};
+
+#endif //BT_SOFT_BODY_HELPERS_H
diff --git a/src/bullet/BulletSoftBody/btSoftBodyInternals.h b/src/bullet/BulletSoftBody/btSoftBodyInternals.h
new file mode 100644
index 00000000..5ef8db19
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBodyInternals.h
@@ -0,0 +1,930 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///btSoftBody implementation by Nathanael Presson
+
+#ifndef _BT_SOFT_BODY_INTERNALS_H
+#define _BT_SOFT_BODY_INTERNALS_H
+
+#include "btSoftBody.h"
+
+
+#include "LinearMath/btQuickprof.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseInterface.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionShapes/btConvexInternalShape.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpa2.h"
+#include <string.h> //for memset
+//
+// btSymMatrix
+//
+template <typename T>
+struct btSymMatrix
+{
+	btSymMatrix() : dim(0)					{}
+	btSymMatrix(int n,const T& init=T())	{ resize(n,init); }
+	void					resize(int n,const T& init=T())			{ dim=n;store.resize((n*(n+1))/2,init); }
+	int						index(int c,int r) const				{ if(c>r) btSwap(c,r);btAssert(r<dim);return((r*(r+1))/2+c); }
+	T&						operator()(int c,int r)					{ return(store[index(c,r)]); }
+	const T&				operator()(int c,int r) const			{ return(store[index(c,r)]); }
+	btAlignedObjectArray<T>	store;
+	int						dim;
+};	
+
+//
+// btSoftBodyCollisionShape
+//
+class btSoftBodyCollisionShape : public btConcaveShape
+{
+public:
+	btSoftBody*						m_body;
+
+	btSoftBodyCollisionShape(btSoftBody* backptr)
+	{
+		m_shapeType = SOFTBODY_SHAPE_PROXYTYPE;
+		m_body=backptr;
+	}
+
+	virtual ~btSoftBodyCollisionShape()
+	{
+
+	}
+
+	void	processAllTriangles(btTriangleCallback* /*callback*/,const btVector3& /*aabbMin*/,const btVector3& /*aabbMax*/) const
+	{
+		//not yet
+		btAssert(0);
+	}
+
+	///getAabb returns the axis aligned bounding box in the coordinate frame of the given transform t.
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+	{
+		/* t is usually identity, except when colliding against btCompoundShape. See Issue 512 */
+		const btVector3	mins=m_body->m_bounds[0];
+		const btVector3	maxs=m_body->m_bounds[1];
+		const btVector3	crns[]={t*btVector3(mins.x(),mins.y(),mins.z()),
+			t*btVector3(maxs.x(),mins.y(),mins.z()),
+			t*btVector3(maxs.x(),maxs.y(),mins.z()),
+			t*btVector3(mins.x(),maxs.y(),mins.z()),
+			t*btVector3(mins.x(),mins.y(),maxs.z()),
+			t*btVector3(maxs.x(),mins.y(),maxs.z()),
+			t*btVector3(maxs.x(),maxs.y(),maxs.z()),
+			t*btVector3(mins.x(),maxs.y(),maxs.z())};
+		aabbMin=aabbMax=crns[0];
+		for(int i=1;i<8;++i)
+		{
+			aabbMin.setMin(crns[i]);
+			aabbMax.setMax(crns[i]);
+		}
+	}
+
+
+	virtual void	setLocalScaling(const btVector3& /*scaling*/)
+	{		
+		///na
+	}
+	virtual const btVector3& getLocalScaling() const
+	{
+		static const btVector3 dummy(1,1,1);
+		return dummy;
+	}
+	virtual void	calculateLocalInertia(btScalar /*mass*/,btVector3& /*inertia*/) const
+	{
+		///not yet
+		btAssert(0);
+	}
+	virtual const char*	getName()const
+	{
+		return "SoftBody";
+	}
+
+};
+
+//
+// btSoftClusterCollisionShape
+//
+class btSoftClusterCollisionShape : public btConvexInternalShape
+{
+public:
+	const btSoftBody::Cluster*	m_cluster;
+
+	btSoftClusterCollisionShape (const btSoftBody::Cluster* cluster) : m_cluster(cluster) { setMargin(0); }
+
+
+	virtual btVector3	localGetSupportingVertex(const btVector3& vec) const
+	{
+		btSoftBody::Node* const *						n=&m_cluster->m_nodes[0];
+		btScalar										d=btDot(vec,n[0]->m_x);
+		int												j=0;
+		for(int i=1,ni=m_cluster->m_nodes.size();i<ni;++i)
+		{
+			const btScalar	k=btDot(vec,n[i]->m_x);
+			if(k>d) { d=k;j=i; }
+		}
+		return(n[j]->m_x);
+	}
+	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const
+	{
+		return(localGetSupportingVertex(vec));
+	}
+	//notice that the vectors should be unit length
+	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
+	{}
+
+
+	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const
+	{}
+
+	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+	{}
+
+	virtual int	getShapeType() const { return SOFTBODY_SHAPE_PROXYTYPE; }
+
+	//debugging
+	virtual const char*	getName()const {return "SOFTCLUSTER";}
+
+	virtual void	setMargin(btScalar margin)
+	{
+		btConvexInternalShape::setMargin(margin);
+	}
+	virtual btScalar	getMargin() const
+	{
+		return getMargin();
+	}
+};
+
+//
+// Inline's
+//
+
+//
+template <typename T>
+static inline void			ZeroInitialize(T& value)
+{
+	memset(&value,0,sizeof(T));
+}
+//
+template <typename T>
+static inline bool			CompLess(const T& a,const T& b)
+{ return(a<b); }
+//
+template <typename T>
+static inline bool			CompGreater(const T& a,const T& b)
+{ return(a>b); }
+//
+template <typename T>
+static inline T				Lerp(const T& a,const T& b,btScalar t)
+{ return(a+(b-a)*t); }
+//
+template <typename T>
+static inline T				InvLerp(const T& a,const T& b,btScalar t)
+{ return((b+a*t-b*t)/(a*b)); }
+//
+static inline btMatrix3x3	Lerp(	const btMatrix3x3& a,
+								 const btMatrix3x3& b,
+								 btScalar t)
+{
+	btMatrix3x3	r;
+	r[0]=Lerp(a[0],b[0],t);
+	r[1]=Lerp(a[1],b[1],t);
+	r[2]=Lerp(a[2],b[2],t);
+	return(r);
+}
+//
+static inline btVector3		Clamp(const btVector3& v,btScalar maxlength)
+{
+	const btScalar sql=v.length2();
+	if(sql>(maxlength*maxlength))
+		return((v*maxlength)/btSqrt(sql));
+	else
+		return(v);
+}
+//
+template <typename T>
+static inline T				Clamp(const T& x,const T& l,const T& h)
+{ return(x<l?l:x>h?h:x); }
+//
+template <typename T>
+static inline T				Sq(const T& x)
+{ return(x*x); }
+//
+template <typename T>
+static inline T				Cube(const T& x)
+{ return(x*x*x); }
+//
+template <typename T>
+static inline T				Sign(const T& x)
+{ return((T)(x<0?-1:+1)); }
+//
+template <typename T>
+static inline bool			SameSign(const T& x,const T& y)
+{ return((x*y)>0); }
+//
+static inline btScalar		ClusterMetric(const btVector3& x,const btVector3& y)
+{
+	const btVector3	d=x-y;
+	return(btFabs(d[0])+btFabs(d[1])+btFabs(d[2]));
+}
+//
+static inline btMatrix3x3	ScaleAlongAxis(const btVector3& a,btScalar s)
+{
+	const btScalar	xx=a.x()*a.x();
+	const btScalar	yy=a.y()*a.y();
+	const btScalar	zz=a.z()*a.z();
+	const btScalar	xy=a.x()*a.y();
+	const btScalar	yz=a.y()*a.z();
+	const btScalar	zx=a.z()*a.x();
+	btMatrix3x3		m;
+	m[0]=btVector3(1-xx+xx*s,xy*s-xy,zx*s-zx);
+	m[1]=btVector3(xy*s-xy,1-yy+yy*s,yz*s-yz);
+	m[2]=btVector3(zx*s-zx,yz*s-yz,1-zz+zz*s);
+	return(m);
+}
+//
+static inline btMatrix3x3	Cross(const btVector3& v)
+{
+	btMatrix3x3	m;
+	m[0]=btVector3(0,-v.z(),+v.y());
+	m[1]=btVector3(+v.z(),0,-v.x());
+	m[2]=btVector3(-v.y(),+v.x(),0);
+	return(m);
+}
+//
+static inline btMatrix3x3	Diagonal(btScalar x)
+{
+	btMatrix3x3	m;
+	m[0]=btVector3(x,0,0);
+	m[1]=btVector3(0,x,0);
+	m[2]=btVector3(0,0,x);
+	return(m);
+}
+//
+static inline btMatrix3x3	Add(const btMatrix3x3& a,
+								const btMatrix3x3& b)
+{
+	btMatrix3x3	r;
+	for(int i=0;i<3;++i) r[i]=a[i]+b[i];
+	return(r);
+}
+//
+static inline btMatrix3x3	Sub(const btMatrix3x3& a,
+								const btMatrix3x3& b)
+{
+	btMatrix3x3	r;
+	for(int i=0;i<3;++i) r[i]=a[i]-b[i];
+	return(r);
+}
+//
+static inline btMatrix3x3	Mul(const btMatrix3x3& a,
+								btScalar b)
+{
+	btMatrix3x3	r;
+	for(int i=0;i<3;++i) r[i]=a[i]*b;
+	return(r);
+}
+//
+static inline void			Orthogonalize(btMatrix3x3& m)
+{
+	m[2]=btCross(m[0],m[1]).normalized();
+	m[1]=btCross(m[2],m[0]).normalized();
+	m[0]=btCross(m[1],m[2]).normalized();
+}
+//
+static inline btMatrix3x3	MassMatrix(btScalar im,const btMatrix3x3& iwi,const btVector3& r)
+{
+	const btMatrix3x3	cr=Cross(r);
+	return(Sub(Diagonal(im),cr*iwi*cr));
+}
+
+//
+static inline btMatrix3x3	ImpulseMatrix(	btScalar dt,
+										  btScalar ima,
+										  btScalar imb,
+										  const btMatrix3x3& iwi,
+										  const btVector3& r)
+{
+	return(Diagonal(1/dt)*Add(Diagonal(ima),MassMatrix(imb,iwi,r)).inverse());
+}
+
+//
+static inline btMatrix3x3	ImpulseMatrix(	btScalar ima,const btMatrix3x3& iia,const btVector3& ra,
+										  btScalar imb,const btMatrix3x3& iib,const btVector3& rb)	
+{
+	return(Add(MassMatrix(ima,iia,ra),MassMatrix(imb,iib,rb)).inverse());
+}
+
+//
+static inline btMatrix3x3	AngularImpulseMatrix(	const btMatrix3x3& iia,
+												 const btMatrix3x3& iib)
+{
+	return(Add(iia,iib).inverse());
+}
+
+//
+static inline btVector3		ProjectOnAxis(	const btVector3& v,
+										  const btVector3& a)
+{
+	return(a*btDot(v,a));
+}
+//
+static inline btVector3		ProjectOnPlane(	const btVector3& v,
+										   const btVector3& a)
+{
+	return(v-ProjectOnAxis(v,a));
+}
+
+//
+static inline void			ProjectOrigin(	const btVector3& a,
+										  const btVector3& b,
+										  btVector3& prj,
+										  btScalar& sqd)
+{
+	const btVector3	d=b-a;
+	const btScalar	m2=d.length2();
+	if(m2>SIMD_EPSILON)
+	{	
+		const btScalar	t=Clamp<btScalar>(-btDot(a,d)/m2,0,1);
+		const btVector3	p=a+d*t;
+		const btScalar	l2=p.length2();
+		if(l2<sqd)
+		{
+			prj=p;
+			sqd=l2;
+		}
+	}
+}
+//
+static inline void			ProjectOrigin(	const btVector3& a,
+										  const btVector3& b,
+										  const btVector3& c,
+										  btVector3& prj,
+										  btScalar& sqd)
+{
+	const btVector3&	q=btCross(b-a,c-a);
+	const btScalar		m2=q.length2();
+	if(m2>SIMD_EPSILON)
+	{
+		const btVector3	n=q/btSqrt(m2);
+		const btScalar	k=btDot(a,n);
+		const btScalar	k2=k*k;
+		if(k2<sqd)
+		{
+			const btVector3	p=n*k;
+			if(	(btDot(btCross(a-p,b-p),q)>0)&&
+				(btDot(btCross(b-p,c-p),q)>0)&&
+				(btDot(btCross(c-p,a-p),q)>0))
+			{			
+				prj=p;
+				sqd=k2;
+			}
+			else
+			{
+				ProjectOrigin(a,b,prj,sqd);
+				ProjectOrigin(b,c,prj,sqd);
+				ProjectOrigin(c,a,prj,sqd);
+			}
+		}
+	}
+}
+
+//
+template <typename T>
+static inline T				BaryEval(		const T& a,
+									 const T& b,
+									 const T& c,
+									 const btVector3& coord)
+{
+	return(a*coord.x()+b*coord.y()+c*coord.z());
+}
+//
+static inline btVector3		BaryCoord(	const btVector3& a,
+									  const btVector3& b,
+									  const btVector3& c,
+									  const btVector3& p)
+{
+	const btScalar	w[]={	btCross(a-p,b-p).length(),
+		btCross(b-p,c-p).length(),
+		btCross(c-p,a-p).length()};
+	const btScalar	isum=1/(w[0]+w[1]+w[2]);
+	return(btVector3(w[1]*isum,w[2]*isum,w[0]*isum));
+}
+
+//
+static btScalar				ImplicitSolve(	btSoftBody::ImplicitFn* fn,
+										  const btVector3& a,
+										  const btVector3& b,
+										  const btScalar accuracy,
+										  const int maxiterations=256)
+{
+	btScalar	span[2]={0,1};
+	btScalar	values[2]={fn->Eval(a),fn->Eval(b)};
+	if(values[0]>values[1])
+	{
+		btSwap(span[0],span[1]);
+		btSwap(values[0],values[1]);
+	}
+	if(values[0]>-accuracy) return(-1);
+	if(values[1]<+accuracy) return(-1);
+	for(int i=0;i<maxiterations;++i)
+	{
+		const btScalar	t=Lerp(span[0],span[1],values[0]/(values[0]-values[1]));
+		const btScalar	v=fn->Eval(Lerp(a,b,t));
+		if((t<=0)||(t>=1))		break;
+		if(btFabs(v)<accuracy)	return(t);
+		if(v<0)
+		{ span[0]=t;values[0]=v; }
+		else
+		{ span[1]=t;values[1]=v; }
+	}
+	return(-1);
+}
+
+//
+static inline btVector3		NormalizeAny(const btVector3& v)
+{
+	const btScalar l=v.length();
+	if(l>SIMD_EPSILON)
+		return(v/l);
+	else
+		return(btVector3(0,0,0));
+}
+
+//
+static inline btDbvtVolume	VolumeOf(	const btSoftBody::Face& f,
+									 btScalar margin)
+{
+	const btVector3*	pts[]={	&f.m_n[0]->m_x,
+		&f.m_n[1]->m_x,
+		&f.m_n[2]->m_x};
+	btDbvtVolume		vol=btDbvtVolume::FromPoints(pts,3);
+	vol.Expand(btVector3(margin,margin,margin));
+	return(vol);
+}
+
+//
+static inline btVector3			CenterOf(	const btSoftBody::Face& f)
+{
+	return((f.m_n[0]->m_x+f.m_n[1]->m_x+f.m_n[2]->m_x)/3);
+}
+
+//
+static inline btScalar			AreaOf(		const btVector3& x0,
+									   const btVector3& x1,
+									   const btVector3& x2)
+{
+	const btVector3	a=x1-x0;
+	const btVector3	b=x2-x0;
+	const btVector3	cr=btCross(a,b);
+	const btScalar	area=cr.length();
+	return(area);
+}
+
+//
+static inline btScalar		VolumeOf(	const btVector3& x0,
+									 const btVector3& x1,
+									 const btVector3& x2,
+									 const btVector3& x3)
+{
+	const btVector3	a=x1-x0;
+	const btVector3	b=x2-x0;
+	const btVector3	c=x3-x0;
+	return(btDot(a,btCross(b,c)));
+}
+
+//
+static void					EvaluateMedium(	const btSoftBodyWorldInfo* wfi,
+										   const btVector3& x,
+										   btSoftBody::sMedium& medium)
+{
+	medium.m_velocity	=	btVector3(0,0,0);
+	medium.m_pressure	=	0;
+	medium.m_density	=	wfi->air_density;
+	if(wfi->water_density>0)
+	{
+		const btScalar	depth=-(btDot(x,wfi->water_normal)+wfi->water_offset);
+		if(depth>0)
+		{
+			medium.m_density	=	wfi->water_density;
+			medium.m_pressure	=	depth*wfi->water_density*wfi->m_gravity.length();
+		}
+	}
+}
+
+//
+static inline void			ApplyClampedForce(	btSoftBody::Node& n,
+											  const btVector3& f,
+											  btScalar dt)
+{
+	const btScalar	dtim=dt*n.m_im;
+	if((f*dtim).length2()>n.m_v.length2())
+	{/* Clamp	*/ 
+		n.m_f-=ProjectOnAxis(n.m_v,f.normalized())/dtim;						
+	}
+	else
+	{/* Apply	*/ 
+		n.m_f+=f;
+	}
+}
+
+//
+static inline int		MatchEdge(	const btSoftBody::Node* a,
+								  const btSoftBody::Node* b,
+								  const btSoftBody::Node* ma,
+								  const btSoftBody::Node* mb)
+{
+	if((a==ma)&&(b==mb)) return(0);
+	if((a==mb)&&(b==ma)) return(1);
+	return(-1);
+}
+
+//
+// btEigen : Extract eigen system,
+// straitforward implementation of http://math.fullerton.edu/mathews/n2003/JacobiMethodMod.html
+// outputs are NOT sorted.
+//
+struct	btEigen
+{
+	static int			system(btMatrix3x3& a,btMatrix3x3* vectors,btVector3* values=0)
+	{
+		static const int		maxiterations=16;
+		static const btScalar	accuracy=(btScalar)0.0001;
+		btMatrix3x3&			v=*vectors;
+		int						iterations=0;
+		vectors->setIdentity();
+		do	{
+			int				p=0,q=1;
+			if(btFabs(a[p][q])<btFabs(a[0][2])) { p=0;q=2; }
+			if(btFabs(a[p][q])<btFabs(a[1][2])) { p=1;q=2; }
+			if(btFabs(a[p][q])>accuracy)
+			{
+				const btScalar	w=(a[q][q]-a[p][p])/(2*a[p][q]);
+				const btScalar	z=btFabs(w);
+				const btScalar	t=w/(z*(btSqrt(1+w*w)+z));
+				if(t==t)/* [WARNING] let hope that one does not get thrown aways by some compilers... */ 
+				{
+					const btScalar	c=1/btSqrt(t*t+1);
+					const btScalar	s=c*t;
+					mulPQ(a,c,s,p,q);
+					mulTPQ(a,c,s,p,q);
+					mulPQ(v,c,s,p,q);
+				} else break;
+			} else break;
+		} while((++iterations)<maxiterations);
+		if(values)
+		{
+			*values=btVector3(a[0][0],a[1][1],a[2][2]);
+		}
+		return(iterations);
+	}
+private:
+	static inline void	mulTPQ(btMatrix3x3& a,btScalar c,btScalar s,int p,int q)
+	{
+		const btScalar	m[2][3]={	{a[p][0],a[p][1],a[p][2]},
+		{a[q][0],a[q][1],a[q][2]}};
+		int i;
+
+		for(i=0;i<3;++i) a[p][i]=c*m[0][i]-s*m[1][i];
+		for(i=0;i<3;++i) a[q][i]=c*m[1][i]+s*m[0][i];
+	}
+	static inline void	mulPQ(btMatrix3x3& a,btScalar c,btScalar s,int p,int q)
+	{
+		const btScalar	m[2][3]={	{a[0][p],a[1][p],a[2][p]},
+		{a[0][q],a[1][q],a[2][q]}};
+		int i;
+
+		for(i=0;i<3;++i) a[i][p]=c*m[0][i]-s*m[1][i];
+		for(i=0;i<3;++i) a[i][q]=c*m[1][i]+s*m[0][i];
+	}
+};
+
+//
+// Polar decomposition,
+// "Computing the Polar Decomposition with Applications", Nicholas J. Higham, 1986.
+//
+static inline int			PolarDecompose(	const btMatrix3x3& m,btMatrix3x3& q,btMatrix3x3& s)
+{
+	static const btScalar	half=(btScalar)0.5;
+	static const btScalar	accuracy=(btScalar)0.0001;
+	static const int		maxiterations=16;
+	int						i=0;
+	btScalar				det=0;
+	q	=	Mul(m,1/btVector3(m[0][0],m[1][1],m[2][2]).length());
+	det	=	q.determinant();
+	if(!btFuzzyZero(det))
+	{
+		for(;i<maxiterations;++i)
+		{
+			q=Mul(Add(q,Mul(q.adjoint(),1/det).transpose()),half);
+			const btScalar	ndet=q.determinant();
+			if(Sq(ndet-det)>accuracy) det=ndet; else break;
+		}
+		/* Final orthogonalization	*/ 
+		Orthogonalize(q);
+		/* Compute 'S'				*/ 
+		s=q.transpose()*m;
+	}
+	else
+	{
+		q.setIdentity();
+		s.setIdentity();
+	}
+	return(i);
+}
+
+//
+// btSoftColliders
+//
+struct btSoftColliders
+{
+	//
+	// ClusterBase
+	//
+	struct	ClusterBase : btDbvt::ICollide
+	{
+		btScalar			erp;
+		btScalar			idt;
+		btScalar			m_margin;
+		btScalar			friction;
+		btScalar			threshold;
+		ClusterBase()
+		{
+			erp			=(btScalar)1;
+			idt			=0;
+			m_margin		=0;
+			friction	=0;
+			threshold	=(btScalar)0;
+		}
+		bool				SolveContact(	const btGjkEpaSolver2::sResults& res,
+			btSoftBody::Body ba,btSoftBody::Body bb,
+			btSoftBody::CJoint& joint)
+		{
+			if(res.distance<m_margin)
+			{
+				btVector3 norm = res.normal;
+				norm.normalize();//is it necessary?
+
+				const btVector3		ra=res.witnesses[0]-ba.xform().getOrigin();
+				const btVector3		rb=res.witnesses[1]-bb.xform().getOrigin();
+				const btVector3		va=ba.velocity(ra);
+				const btVector3		vb=bb.velocity(rb);
+				const btVector3		vrel=va-vb;
+				const btScalar		rvac=btDot(vrel,norm);
+				 btScalar		depth=res.distance-m_margin;
+				
+//				printf("depth=%f\n",depth);
+				const btVector3		iv=norm*rvac;
+				const btVector3		fv=vrel-iv;
+				joint.m_bodies[0]	=	ba;
+				joint.m_bodies[1]	=	bb;
+				joint.m_refs[0]		=	ra*ba.xform().getBasis();
+				joint.m_refs[1]		=	rb*bb.xform().getBasis();
+				joint.m_rpos[0]		=	ra;
+				joint.m_rpos[1]		=	rb;
+				joint.m_cfm			=	1;
+				joint.m_erp			=	1;
+				joint.m_life		=	0;
+				joint.m_maxlife		=	0;
+				joint.m_split		=	1;
+				
+				joint.m_drift		=	depth*norm;
+
+				joint.m_normal		=	norm;
+//				printf("normal=%f,%f,%f\n",res.normal.getX(),res.normal.getY(),res.normal.getZ());
+				joint.m_delete		=	false;
+				joint.m_friction	=	fv.length2()<(-rvac*friction)?1:friction;
+				joint.m_massmatrix	=	ImpulseMatrix(	ba.invMass(),ba.invWorldInertia(),joint.m_rpos[0],
+					bb.invMass(),bb.invWorldInertia(),joint.m_rpos[1]);
+
+				return(true);
+			}
+			return(false);
+		}
+	};
+	//
+	// CollideCL_RS
+	//
+	struct	CollideCL_RS : ClusterBase
+	{
+		btSoftBody*		psb;
+		
+		btCollisionObject*	m_colObj;
+		void		Process(const btDbvtNode* leaf)
+		{
+			btSoftBody::Cluster*		cluster=(btSoftBody::Cluster*)leaf->data;
+			btSoftClusterCollisionShape	cshape(cluster);
+			
+			const btConvexShape*		rshape=(const btConvexShape*)m_colObj->getCollisionShape();
+
+			///don't collide an anchored cluster with a static/kinematic object
+			if(m_colObj->isStaticOrKinematicObject() && cluster->m_containsAnchor)
+				return;
+
+			btGjkEpaSolver2::sResults	res;		
+			if(btGjkEpaSolver2::SignedDistance(	&cshape,btTransform::getIdentity(),
+				rshape,m_colObj->getWorldTransform(),
+				btVector3(1,0,0),res))
+			{
+				btSoftBody::CJoint	joint;
+				if(SolveContact(res,cluster,m_colObj,joint))//prb,joint))
+				{
+					btSoftBody::CJoint*	pj=new(btAlignedAlloc(sizeof(btSoftBody::CJoint),16)) btSoftBody::CJoint();
+					*pj=joint;psb->m_joints.push_back(pj);
+					if(m_colObj->isStaticOrKinematicObject())
+					{
+						pj->m_erp	*=	psb->m_cfg.kSKHR_CL;
+						pj->m_split	*=	psb->m_cfg.kSK_SPLT_CL;
+					}
+					else
+					{
+						pj->m_erp	*=	psb->m_cfg.kSRHR_CL;
+						pj->m_split	*=	psb->m_cfg.kSR_SPLT_CL;
+					}
+				}
+			}
+		}
+		void		Process(btSoftBody* ps,btCollisionObject* colOb)
+		{
+			psb			=	ps;
+			m_colObj			=	colOb;
+			idt			=	ps->m_sst.isdt;
+			m_margin		=	m_colObj->getCollisionShape()->getMargin()+psb->getCollisionShape()->getMargin();
+			///Bullet rigid body uses multiply instead of minimum to determine combined friction. Some customization would be useful.
+			friction	=	btMin(psb->m_cfg.kDF,m_colObj->getFriction());
+			btVector3			mins;
+			btVector3			maxs;
+
+			ATTRIBUTE_ALIGNED16(btDbvtVolume)		volume;
+			colOb->getCollisionShape()->getAabb(colOb->getWorldTransform(),mins,maxs);
+			volume=btDbvtVolume::FromMM(mins,maxs);
+			volume.Expand(btVector3(1,1,1)*m_margin);
+			ps->m_cdbvt.collideTV(ps->m_cdbvt.m_root,volume,*this);
+		}	
+	};
+	//
+	// CollideCL_SS
+	//
+	struct	CollideCL_SS : ClusterBase
+	{
+		btSoftBody*	bodies[2];
+		void		Process(const btDbvtNode* la,const btDbvtNode* lb)
+		{
+			btSoftBody::Cluster*		cla=(btSoftBody::Cluster*)la->data;
+			btSoftBody::Cluster*		clb=(btSoftBody::Cluster*)lb->data;
+
+
+			bool connected=false;
+			if ((bodies[0]==bodies[1])&&(bodies[0]->m_clusterConnectivity.size()))
+			{
+				connected = bodies[0]->m_clusterConnectivity[cla->m_clusterIndex+bodies[0]->m_clusters.size()*clb->m_clusterIndex];
+			}
+
+			if (!connected)
+			{
+				btSoftClusterCollisionShape	csa(cla);
+				btSoftClusterCollisionShape	csb(clb);
+				btGjkEpaSolver2::sResults	res;		
+				if(btGjkEpaSolver2::SignedDistance(	&csa,btTransform::getIdentity(),
+					&csb,btTransform::getIdentity(),
+					cla->m_com-clb->m_com,res))
+				{
+					btSoftBody::CJoint	joint;
+					if(SolveContact(res,cla,clb,joint))
+					{
+						btSoftBody::CJoint*	pj=new(btAlignedAlloc(sizeof(btSoftBody::CJoint),16)) btSoftBody::CJoint();
+						*pj=joint;bodies[0]->m_joints.push_back(pj);
+						pj->m_erp	*=	btMax(bodies[0]->m_cfg.kSSHR_CL,bodies[1]->m_cfg.kSSHR_CL);
+						pj->m_split	*=	(bodies[0]->m_cfg.kSS_SPLT_CL+bodies[1]->m_cfg.kSS_SPLT_CL)/2;
+					}
+				}
+			} else
+			{
+				static int count=0;
+				count++;
+				//printf("count=%d\n",count);
+				
+			}
+		}
+		void		Process(btSoftBody* psa,btSoftBody* psb)
+		{
+			idt			=	psa->m_sst.isdt;
+			//m_margin		=	(psa->getCollisionShape()->getMargin()+psb->getCollisionShape()->getMargin())/2;
+			m_margin		=	(psa->getCollisionShape()->getMargin()+psb->getCollisionShape()->getMargin());
+			friction	=	btMin(psa->m_cfg.kDF,psb->m_cfg.kDF);
+			bodies[0]	=	psa;
+			bodies[1]	=	psb;
+			psa->m_cdbvt.collideTT(psa->m_cdbvt.m_root,psb->m_cdbvt.m_root,*this);
+		}	
+	};
+	//
+	// CollideSDF_RS
+	//
+	struct	CollideSDF_RS : btDbvt::ICollide
+	{
+		void		Process(const btDbvtNode* leaf)
+		{
+			btSoftBody::Node*	node=(btSoftBody::Node*)leaf->data;
+			DoNode(*node);
+		}
+		void		DoNode(btSoftBody::Node& n) const
+		{
+			const btScalar			m=n.m_im>0?dynmargin:stamargin;
+			btSoftBody::RContact	c;
+			if(	(!n.m_battach)&&
+				psb->checkContact(m_colObj1,n.m_x,m,c.m_cti))
+			{
+				const btScalar	ima=n.m_im;
+				const btScalar	imb= m_rigidBody? m_rigidBody->getInvMass() : 0.f;
+				const btScalar	ms=ima+imb;
+				if(ms>0)
+				{
+					const btTransform&	wtr=m_rigidBody?m_rigidBody->getWorldTransform() : m_colObj1->getWorldTransform();
+					static const btMatrix3x3	iwiStatic(0,0,0,0,0,0,0,0,0);
+					const btMatrix3x3&	iwi=m_rigidBody?m_rigidBody->getInvInertiaTensorWorld() : iwiStatic;
+					const btVector3		ra=n.m_x-wtr.getOrigin();
+					const btVector3		va=m_rigidBody ? m_rigidBody->getVelocityInLocalPoint(ra)*psb->m_sst.sdt : btVector3(0,0,0);
+					const btVector3		vb=n.m_x-n.m_q;	
+					const btVector3		vr=vb-va;
+					const btScalar		dn=btDot(vr,c.m_cti.m_normal);
+					const btVector3		fv=vr-c.m_cti.m_normal*dn;
+					const btScalar		fc=psb->m_cfg.kDF*m_colObj1->getFriction();
+					c.m_node	=	&n;
+					c.m_c0		=	ImpulseMatrix(psb->m_sst.sdt,ima,imb,iwi,ra);
+					c.m_c1		=	ra;
+					c.m_c2		=	ima*psb->m_sst.sdt;
+					c.m_c3		=	fv.length2()<(btFabs(dn)*fc)?0:1-fc;
+					c.m_c4		=	m_colObj1->isStaticOrKinematicObject()?psb->m_cfg.kKHR:psb->m_cfg.kCHR;
+					psb->m_rcontacts.push_back(c);
+					if (m_rigidBody)
+						m_rigidBody->activate();
+				}
+			}
+		}
+		btSoftBody*		psb;
+		btCollisionObject*	m_colObj1;
+		btRigidBody*	m_rigidBody;
+		btScalar		dynmargin;
+		btScalar		stamargin;
+	};
+	//
+	// CollideVF_SS
+	//
+	struct	CollideVF_SS : btDbvt::ICollide
+	{
+		void		Process(const btDbvtNode* lnode,
+			const btDbvtNode* lface)
+		{
+			btSoftBody::Node*	node=(btSoftBody::Node*)lnode->data;
+			btSoftBody::Face*	face=(btSoftBody::Face*)lface->data;
+			btVector3			o=node->m_x;
+			btVector3			p;
+			btScalar			d=SIMD_INFINITY;
+			ProjectOrigin(	face->m_n[0]->m_x-o,
+				face->m_n[1]->m_x-o,
+				face->m_n[2]->m_x-o,
+				p,d);
+			const btScalar	m=mrg+(o-node->m_q).length()*2;
+			if(d<(m*m))
+			{
+				const btSoftBody::Node*	n[]={face->m_n[0],face->m_n[1],face->m_n[2]};
+				const btVector3			w=BaryCoord(n[0]->m_x,n[1]->m_x,n[2]->m_x,p+o);
+				const btScalar			ma=node->m_im;
+				btScalar				mb=BaryEval(n[0]->m_im,n[1]->m_im,n[2]->m_im,w);
+				if(	(n[0]->m_im<=0)||
+					(n[1]->m_im<=0)||
+					(n[2]->m_im<=0))
+				{
+					mb=0;
+				}
+				const btScalar	ms=ma+mb;
+				if(ms>0)
+				{
+					btSoftBody::SContact	c;
+					c.m_normal		=	p/-btSqrt(d);
+					c.m_margin		=	m;
+					c.m_node		=	node;
+					c.m_face		=	face;
+					c.m_weights		=	w;
+					c.m_friction	=	btMax(psb[0]->m_cfg.kDF,psb[1]->m_cfg.kDF);
+					c.m_cfm[0]		=	ma/ms*psb[0]->m_cfg.kSHR;
+					c.m_cfm[1]		=	mb/ms*psb[1]->m_cfg.kSHR;
+					psb[0]->m_scontacts.push_back(c);
+				}
+			}	
+		}
+		btSoftBody*		psb[2];
+		btScalar		mrg;
+	};
+};
+
+#endif //_BT_SOFT_BODY_INTERNALS_H
diff --git a/src/bullet/BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.cpp b/src/bullet/BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.cpp
new file mode 100644
index 00000000..f5a67f6d
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.cpp
@@ -0,0 +1,134 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btSoftBodyRigidBodyCollisionConfiguration.h"
+#include "btSoftRigidCollisionAlgorithm.h"
+#include "btSoftBodyConcaveCollisionAlgorithm.h"
+#include "btSoftSoftCollisionAlgorithm.h"
+
+#include "LinearMath/btPoolAllocator.h"
+
+#define ENABLE_SOFTBODY_CONCAVE_COLLISIONS 1
+
+btSoftBodyRigidBodyCollisionConfiguration::btSoftBodyRigidBodyCollisionConfiguration(const btDefaultCollisionConstructionInfo& constructionInfo)
+:btDefaultCollisionConfiguration(constructionInfo)
+{
+	void* mem;
+
+	mem = btAlignedAlloc(sizeof(btSoftSoftCollisionAlgorithm::CreateFunc),16);
+	m_softSoftCreateFunc = new(mem) btSoftSoftCollisionAlgorithm::CreateFunc;
+
+	mem = btAlignedAlloc(sizeof(btSoftRigidCollisionAlgorithm::CreateFunc),16);
+	m_softRigidConvexCreateFunc = new(mem) btSoftRigidCollisionAlgorithm::CreateFunc;
+
+	mem = btAlignedAlloc(sizeof(btSoftRigidCollisionAlgorithm::CreateFunc),16);
+	m_swappedSoftRigidConvexCreateFunc = new(mem) btSoftRigidCollisionAlgorithm::CreateFunc;
+	m_swappedSoftRigidConvexCreateFunc->m_swapped=true;
+
+#ifdef ENABLE_SOFTBODY_CONCAVE_COLLISIONS
+	mem = btAlignedAlloc(sizeof(btSoftBodyConcaveCollisionAlgorithm::CreateFunc),16);
+	m_softRigidConcaveCreateFunc = new(mem) btSoftBodyConcaveCollisionAlgorithm::CreateFunc;
+
+	mem = btAlignedAlloc(sizeof(btSoftBodyConcaveCollisionAlgorithm::CreateFunc),16);
+	m_swappedSoftRigidConcaveCreateFunc = new(mem) btSoftBodyConcaveCollisionAlgorithm::SwappedCreateFunc;
+	m_swappedSoftRigidConcaveCreateFunc->m_swapped=true;
+#endif
+
+	//replace pool by a new one, with potential larger size
+
+	if (m_ownsCollisionAlgorithmPool && m_collisionAlgorithmPool)
+	{
+		int curElemSize = m_collisionAlgorithmPool->getElementSize();
+		///calculate maximum element size, big enough to fit any collision algorithm in the memory pool
+
+
+		int maxSize0 = sizeof(btSoftSoftCollisionAlgorithm);
+		int maxSize1 = sizeof(btSoftRigidCollisionAlgorithm);
+		int maxSize2 = sizeof(btSoftBodyConcaveCollisionAlgorithm);
+
+		int	collisionAlgorithmMaxElementSize = btMax(maxSize0,maxSize1);
+		collisionAlgorithmMaxElementSize = btMax(collisionAlgorithmMaxElementSize,maxSize2);
+		
+		if (collisionAlgorithmMaxElementSize > curElemSize)
+		{
+			m_collisionAlgorithmPool->~btPoolAllocator();
+			btAlignedFree(m_collisionAlgorithmPool);
+			void* mem = btAlignedAlloc(sizeof(btPoolAllocator),16);
+			m_collisionAlgorithmPool = new(mem) btPoolAllocator(collisionAlgorithmMaxElementSize,constructionInfo.m_defaultMaxCollisionAlgorithmPoolSize);
+		}
+	}
+
+}
+
+btSoftBodyRigidBodyCollisionConfiguration::~btSoftBodyRigidBodyCollisionConfiguration()
+{
+	m_softSoftCreateFunc->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree(	m_softSoftCreateFunc);
+
+	m_softRigidConvexCreateFunc->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree(	m_softRigidConvexCreateFunc);
+
+	m_swappedSoftRigidConvexCreateFunc->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree(	m_swappedSoftRigidConvexCreateFunc);
+
+#ifdef ENABLE_SOFTBODY_CONCAVE_COLLISIONS
+	m_softRigidConcaveCreateFunc->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree(	m_softRigidConcaveCreateFunc);
+
+	m_swappedSoftRigidConcaveCreateFunc->~btCollisionAlgorithmCreateFunc();
+	btAlignedFree(	m_swappedSoftRigidConcaveCreateFunc);
+#endif
+}
+
+///creation of soft-soft and soft-rigid, and otherwise fallback to base class implementation
+btCollisionAlgorithmCreateFunc* btSoftBodyRigidBodyCollisionConfiguration::getCollisionAlgorithmCreateFunc(int proxyType0,int proxyType1)
+{
+
+	///try to handle the softbody interactions first
+
+	if ((proxyType0 == SOFTBODY_SHAPE_PROXYTYPE  ) && (proxyType1==SOFTBODY_SHAPE_PROXYTYPE))
+	{
+		return	m_softSoftCreateFunc;
+	}
+
+	///softbody versus convex
+	if (proxyType0 == SOFTBODY_SHAPE_PROXYTYPE  && btBroadphaseProxy::isConvex(proxyType1))
+	{
+		return	m_softRigidConvexCreateFunc;
+	}
+
+	///convex versus soft body
+	if (btBroadphaseProxy::isConvex(proxyType0) && proxyType1 == SOFTBODY_SHAPE_PROXYTYPE )
+	{
+		return	m_swappedSoftRigidConvexCreateFunc;
+	}
+
+#ifdef ENABLE_SOFTBODY_CONCAVE_COLLISIONS
+	///softbody versus convex
+	if (proxyType0 == SOFTBODY_SHAPE_PROXYTYPE  && btBroadphaseProxy::isConcave(proxyType1))
+	{
+		return	m_softRigidConcaveCreateFunc;
+	}
+
+	///convex versus soft body
+	if (btBroadphaseProxy::isConcave(proxyType0) && proxyType1 == SOFTBODY_SHAPE_PROXYTYPE )
+	{
+		return	m_swappedSoftRigidConcaveCreateFunc;
+	}
+#endif
+
+	///fallback to the regular rigid collision shape
+	return btDefaultCollisionConfiguration::getCollisionAlgorithmCreateFunc(proxyType0,proxyType1);
+}
diff --git a/src/bullet/BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.h b/src/bullet/BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.h
new file mode 100644
index 00000000..21addcfe
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.h
@@ -0,0 +1,48 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFTBODY_RIGIDBODY_COLLISION_CONFIGURATION
+#define BT_SOFTBODY_RIGIDBODY_COLLISION_CONFIGURATION
+
+#include "BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h"
+
+class btVoronoiSimplexSolver;
+class btGjkEpaPenetrationDepthSolver;
+
+
+///btSoftBodyRigidBodyCollisionConfiguration add softbody interaction on top of btDefaultCollisionConfiguration
+class	btSoftBodyRigidBodyCollisionConfiguration : public btDefaultCollisionConfiguration
+{
+
+	//default CreationFunctions, filling the m_doubleDispatch table
+	btCollisionAlgorithmCreateFunc*	m_softSoftCreateFunc;
+	btCollisionAlgorithmCreateFunc*	m_softRigidConvexCreateFunc;
+	btCollisionAlgorithmCreateFunc*	m_swappedSoftRigidConvexCreateFunc;
+	btCollisionAlgorithmCreateFunc*	m_softRigidConcaveCreateFunc;
+	btCollisionAlgorithmCreateFunc*	m_swappedSoftRigidConcaveCreateFunc;
+
+public:
+
+	btSoftBodyRigidBodyCollisionConfiguration(const btDefaultCollisionConstructionInfo& constructionInfo = btDefaultCollisionConstructionInfo());
+
+	virtual ~btSoftBodyRigidBodyCollisionConfiguration();
+
+	///creation of soft-soft and soft-rigid, and otherwise fallback to base class implementation
+	virtual btCollisionAlgorithmCreateFunc* getCollisionAlgorithmCreateFunc(int proxyType0,int proxyType1);
+
+};
+
+#endif //BT_SOFTBODY_RIGIDBODY_COLLISION_CONFIGURATION
+
diff --git a/src/bullet/BulletSoftBody/btSoftBodySolverVertexBuffer.h b/src/bullet/BulletSoftBody/btSoftBodySolverVertexBuffer.h
new file mode 100644
index 00000000..c4733d64
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBodySolverVertexBuffer.h
@@ -0,0 +1,165 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_H
+#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_H
+
+
+class btVertexBufferDescriptor
+{
+public:
+	enum BufferTypes
+	{
+		CPU_BUFFER,
+		DX11_BUFFER,
+		OPENGL_BUFFER
+	};
+
+protected:	
+
+	bool m_hasVertexPositions;
+	bool m_hasNormals;
+
+	int m_vertexOffset;
+	int m_vertexStride;
+
+	int m_normalOffset;
+	int m_normalStride;
+
+public:
+	btVertexBufferDescriptor()
+	{
+		m_hasVertexPositions = false;
+		m_hasNormals = false;
+		m_vertexOffset = 0;
+		m_vertexStride = 0;
+		m_normalOffset = 0;
+		m_normalStride = 0;
+	}
+
+	virtual ~btVertexBufferDescriptor()
+	{
+
+	}
+
+	virtual bool hasVertexPositions() const
+	{
+		return m_hasVertexPositions;
+	}
+
+	virtual bool hasNormals() const
+	{
+		return m_hasNormals;
+	}
+
+	/**
+	 * Return the type of the vertex buffer descriptor.
+	 */
+	virtual BufferTypes getBufferType() const = 0;
+
+	/**
+	 * Return the vertex offset in floats from the base pointer.
+	 */
+	virtual int getVertexOffset() const
+	{
+		return m_vertexOffset;
+	}
+
+	/**
+	 * Return the vertex stride in number of floats between vertices.
+	 */
+	virtual int getVertexStride() const
+	{
+		return m_vertexStride;
+	}
+
+	/**
+	 * Return the vertex offset in floats from the base pointer.
+	 */
+	virtual int getNormalOffset() const
+	{
+		return m_normalOffset;
+	}
+
+	/**
+	 * Return the vertex stride in number of floats between vertices.
+	 */
+	virtual int getNormalStride() const
+	{
+		return m_normalStride;
+	}
+};
+
+
+class btCPUVertexBufferDescriptor : public btVertexBufferDescriptor
+{
+protected:
+	float *m_basePointer;
+
+public:
+	/**
+	 * vertexBasePointer is pointer to beginning of the buffer.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 */
+	btCPUVertexBufferDescriptor( float *basePointer, int vertexOffset, int vertexStride )
+	{
+		m_basePointer = basePointer;
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+	}
+
+	/**
+	 * vertexBasePointer is pointer to beginning of the buffer.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 */
+	btCPUVertexBufferDescriptor( float *basePointer, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
+	{
+		m_basePointer = basePointer;
+
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+
+		m_normalOffset = normalOffset;
+		m_normalStride = normalStride;
+		m_hasNormals = true;
+	}
+
+	virtual ~btCPUVertexBufferDescriptor()
+	{
+
+	}
+
+	/**
+	 * Return the type of the vertex buffer descriptor.
+	 */
+	virtual BufferTypes getBufferType() const
+	{
+		return CPU_BUFFER;
+	}
+
+	/**
+	 * Return the base pointer in memory to the first vertex.
+	 */
+	virtual float *getBasePointer() const
+	{
+		return m_basePointer;
+	}
+};
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_H
diff --git a/src/bullet/BulletSoftBody/btSoftBodySolvers.h b/src/bullet/BulletSoftBody/btSoftBodySolvers.h
new file mode 100644
index 00000000..2fcd8b67
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftBodySolvers.h
@@ -0,0 +1,154 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVERS_H
+#define BT_SOFT_BODY_SOLVERS_H
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+
+
+class btSoftBodyTriangleData;
+class btSoftBodyLinkData;
+class btSoftBodyVertexData;
+class btVertexBufferDescriptor;
+class btCollisionObject;
+class btSoftBody;
+
+
+class btSoftBodySolver
+{
+public:
+	enum SolverTypes
+	{
+		DEFAULT_SOLVER,
+		CPU_SOLVER,
+		CL_SOLVER,
+		CL_SIMD_SOLVER,
+		DX_SOLVER,
+		DX_SIMD_SOLVER
+	};
+
+
+protected:
+	int m_numberOfPositionIterations;
+	int m_numberOfVelocityIterations;
+	// Simulation timescale
+	float m_timeScale;
+	
+public:
+	btSoftBodySolver() :
+		m_numberOfPositionIterations( 10 ),
+		m_timeScale( 1 )
+	{
+		m_numberOfVelocityIterations = 0;
+		m_numberOfPositionIterations = 5;
+	}
+
+	virtual ~btSoftBodySolver()
+	{
+	}
+	
+	/**
+	 * Return the type of the solver.
+	 */
+	virtual SolverTypes getSolverType() const = 0;
+
+
+	/** Ensure that this solver is initialized. */
+	virtual bool checkInitialized() = 0;
+
+	/** Optimize soft bodies in this solver. */
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false) = 0;
+
+	/** Copy necessary data back to the original soft body source objects. */
+	virtual void copyBackToSoftBodies(bool bMove = true) = 0;
+
+	/** Predict motion of soft bodies into next timestep */
+	virtual void predictMotion( float solverdt ) = 0;
+
+	/** Solve constraints for a set of soft bodies */
+	virtual void solveConstraints( float solverdt ) = 0;
+
+	/** Perform necessary per-step updates of soft bodies such as recomputing normals and bounding boxes */
+	virtual void updateSoftBodies() = 0;
+
+	/** Process a collision between one of the world's soft bodies and another collision object */
+	virtual void processCollision( btSoftBody *, btCollisionObject* ) = 0;
+
+	/** Process a collision between two soft bodies */
+	virtual void processCollision( btSoftBody*, btSoftBody* ) = 0;
+
+	/** Set the number of velocity constraint solver iterations this solver uses. */
+	virtual void setNumberOfPositionIterations( int iterations )
+	{
+		m_numberOfPositionIterations = iterations;
+	}
+
+	/** Get the number of velocity constraint solver iterations this solver uses. */
+	virtual int getNumberOfPositionIterations()
+	{
+		return m_numberOfPositionIterations;
+	}
+
+	/** Set the number of velocity constraint solver iterations this solver uses. */
+	virtual void setNumberOfVelocityIterations( int iterations )
+	{
+		m_numberOfVelocityIterations = iterations;
+	}
+
+	/** Get the number of velocity constraint solver iterations this solver uses. */
+	virtual int getNumberOfVelocityIterations()
+	{
+		return m_numberOfVelocityIterations;
+	}
+
+	/** Return the timescale that the simulation is using */
+	float getTimeScale()
+	{
+		return m_timeScale;
+	}
+
+#if 0
+	/**
+	 * Add a collision object to be used by the indicated softbody.
+	 */
+	virtual void addCollisionObjectForSoftBody( int clothIdentifier, btCollisionObject *collisionObject ) = 0;
+#endif
+};
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is abstract. Subclasses will have custom pairings for different combinations.
+ */
+class btSoftBodySolverOutput
+{
+protected:
+
+public:
+	btSoftBodySolverOutput()
+	{
+	}
+
+	virtual ~btSoftBodySolverOutput()
+	{
+	}
+
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer ) = 0;
+};
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVERS_H
diff --git a/src/bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp b/src/bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp
new file mode 100644
index 00000000..bc374c80
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp
@@ -0,0 +1,84 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btSoftRigidCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "btSoftBody.h"
+#include "BulletSoftBody/btSoftBodySolvers.h"
+
+///TODO: include all the shapes that the softbody can collide with
+///alternatively, implement special case collision algorithms (just like for rigid collision shapes)
+
+//#include <stdio.h>
+
+btSoftRigidCollisionAlgorithm::btSoftRigidCollisionAlgorithm(btPersistentManifold* /*mf*/,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* /*col0*/,btCollisionObject* /*col1*/, bool isSwapped)
+: btCollisionAlgorithm(ci),
+//m_ownManifold(false),
+//m_manifoldPtr(mf),
+m_isSwapped(isSwapped)
+{
+}
+
+
+btSoftRigidCollisionAlgorithm::~btSoftRigidCollisionAlgorithm()
+{
+
+	//m_softBody->m_overlappingRigidBodies.remove(m_rigidCollisionObject);
+
+	/*if (m_ownManifold)
+	{
+	if (m_manifoldPtr)
+	m_dispatcher->releaseManifold(m_manifoldPtr);
+	}
+	*/
+
+}
+
+
+#include <stdio.h>
+
+void btSoftRigidCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)dispatchInfo;
+	(void)resultOut;
+	//printf("btSoftRigidCollisionAlgorithm\n");
+
+	btSoftBody* softBody =  m_isSwapped? (btSoftBody*)body1 : (btSoftBody*)body0;
+	btCollisionObject* rigidCollisionObject = m_isSwapped? body0 : body1;
+	
+	if (softBody->m_collisionDisabledObjects.findLinearSearch(rigidCollisionObject)==softBody->m_collisionDisabledObjects.size())
+	{
+		softBody->getSoftBodySolver()->processCollision(softBody, rigidCollisionObject);
+	}
+
+
+}
+
+btScalar btSoftRigidCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* col0,btCollisionObject* col1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+{
+	(void)resultOut;
+	(void)dispatchInfo;
+	(void)col0;
+	(void)col1;
+
+	//not yet
+	return btScalar(1.);
+}
+
+
+
diff --git a/src/bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.h b/src/bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.h
new file mode 100644
index 00000000..7658e3c2
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.h
@@ -0,0 +1,75 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_RIGID_COLLISION_ALGORITHM_H
+#define BT_SOFT_RIGID_COLLISION_ALGORITHM_H
+
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+class btPersistentManifold;
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+
+#include "LinearMath/btVector3.h"
+class btSoftBody;
+
+/// btSoftRigidCollisionAlgorithm  provides collision detection between btSoftBody and btRigidBody
+class btSoftRigidCollisionAlgorithm : public btCollisionAlgorithm
+{
+	//	bool	m_ownManifold;
+	//	btPersistentManifold*	m_manifoldPtr;
+
+	btSoftBody*				m_softBody;
+	btCollisionObject*		m_rigidCollisionObject;
+
+	///for rigid versus soft (instead of soft versus rigid), we use this swapped boolean
+	bool	m_isSwapped;
+
+public:
+
+	btSoftRigidCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1, bool isSwapped);
+
+	virtual ~btSoftRigidCollisionAlgorithm();
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		//we don't add any manifolds
+	}
+
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSoftRigidCollisionAlgorithm));
+			if (!m_swapped)
+			{
+				return new(mem) btSoftRigidCollisionAlgorithm(0,ci,body0,body1,false);
+			} else
+			{
+				return new(mem) btSoftRigidCollisionAlgorithm(0,ci,body0,body1,true);
+			}
+		}
+	};
+
+};
+
+#endif //BT_SOFT_RIGID_COLLISION_ALGORITHM_H
+
+
diff --git a/src/bullet/BulletSoftBody/btSoftRigidDynamicsWorld.cpp b/src/bullet/BulletSoftBody/btSoftRigidDynamicsWorld.cpp
new file mode 100644
index 00000000..8f4be231
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftRigidDynamicsWorld.cpp
@@ -0,0 +1,365 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "btSoftRigidDynamicsWorld.h"
+#include "LinearMath/btQuickprof.h"
+
+//softbody & helpers
+#include "btSoftBody.h"
+#include "btSoftBodyHelpers.h"
+#include "btSoftBodySolvers.h"
+#include "btDefaultSoftBodySolver.h"
+#include "LinearMath/btSerializer.h"
+
+
+btSoftRigidDynamicsWorld::btSoftRigidDynamicsWorld(
+	btDispatcher* dispatcher,
+	btBroadphaseInterface* pairCache,
+	btConstraintSolver* constraintSolver,
+	btCollisionConfiguration* collisionConfiguration,
+	btSoftBodySolver *softBodySolver ) : 
+		btDiscreteDynamicsWorld(dispatcher,pairCache,constraintSolver,collisionConfiguration),
+		m_softBodySolver( softBodySolver ),
+		m_ownsSolver(false)
+{
+	if( !m_softBodySolver )
+	{
+		void* ptr = btAlignedAlloc(sizeof(btDefaultSoftBodySolver),16);
+		m_softBodySolver = new(ptr) btDefaultSoftBodySolver();
+		m_ownsSolver = true;
+	}
+
+	m_drawFlags			=	fDrawFlags::Std;
+	m_drawNodeTree		=	true;
+	m_drawFaceTree		=	false;
+	m_drawClusterTree	=	false;
+	m_sbi.m_broadphase = pairCache;
+	m_sbi.m_dispatcher = dispatcher;
+	m_sbi.m_sparsesdf.Initialize();
+	m_sbi.m_sparsesdf.Reset();
+
+	m_sbi.air_density		=	(btScalar)1.2;
+	m_sbi.water_density	=	0;
+	m_sbi.water_offset		=	0;
+	m_sbi.water_normal		=	btVector3(0,0,0);
+	m_sbi.m_gravity.setValue(0,-10,0);
+
+	m_sbi.m_sparsesdf.Initialize();
+
+
+}
+
+btSoftRigidDynamicsWorld::~btSoftRigidDynamicsWorld()
+{
+	if (m_ownsSolver)
+	{
+		m_softBodySolver->~btSoftBodySolver();
+		btAlignedFree(m_softBodySolver);
+	}
+}
+
+void	btSoftRigidDynamicsWorld::predictUnconstraintMotion(btScalar timeStep)
+{
+	btDiscreteDynamicsWorld::predictUnconstraintMotion( timeStep );
+	{
+		BT_PROFILE("predictUnconstraintMotionSoftBody");
+		m_softBodySolver->predictMotion( timeStep );
+	}
+}
+
+void	btSoftRigidDynamicsWorld::internalSingleStepSimulation( btScalar timeStep )
+{
+
+	// Let the solver grab the soft bodies and if necessary optimize for it
+	m_softBodySolver->optimize( getSoftBodyArray() );
+
+	if( !m_softBodySolver->checkInitialized() )
+	{
+		btAssert( "Solver initialization failed\n" );
+	}
+
+	btDiscreteDynamicsWorld::internalSingleStepSimulation( timeStep );
+
+	///solve soft bodies constraints
+	solveSoftBodiesConstraints( timeStep );
+
+	//self collisions
+	for ( int i=0;i<m_softBodies.size();i++)
+	{
+		btSoftBody*	psb=(btSoftBody*)m_softBodies[i];
+		psb->defaultCollisionHandler(psb);
+	}
+
+	///update soft bodies
+	m_softBodySolver->updateSoftBodies( );
+	
+	// End solver-wise simulation step
+	// ///////////////////////////////
+
+}
+
+void	btSoftRigidDynamicsWorld::solveSoftBodiesConstraints( btScalar timeStep )
+{
+	BT_PROFILE("solveSoftConstraints");
+
+	if(m_softBodies.size())
+	{
+		btSoftBody::solveClusters(m_softBodies);
+	}
+
+	// Solve constraints solver-wise
+	m_softBodySolver->solveConstraints( timeStep * m_softBodySolver->getTimeScale() );
+
+}
+
+void	btSoftRigidDynamicsWorld::addSoftBody(btSoftBody* body,short int collisionFilterGroup,short int collisionFilterMask)
+{
+	m_softBodies.push_back(body);
+
+	// Set the soft body solver that will deal with this body
+	// to be the world's solver
+	body->setSoftBodySolver( m_softBodySolver );
+
+	btCollisionWorld::addCollisionObject(body,
+		collisionFilterGroup,
+		collisionFilterMask);
+
+}
+
+void	btSoftRigidDynamicsWorld::removeSoftBody(btSoftBody* body)
+{
+	m_softBodies.remove(body);
+
+	btCollisionWorld::removeCollisionObject(body);
+}
+
+void	btSoftRigidDynamicsWorld::removeCollisionObject(btCollisionObject* collisionObject)
+{
+	btSoftBody* body = btSoftBody::upcast(collisionObject);
+	if (body)
+		removeSoftBody(body);
+	else
+		btDiscreteDynamicsWorld::removeCollisionObject(collisionObject);
+}
+
+void	btSoftRigidDynamicsWorld::debugDrawWorld()
+{
+	btDiscreteDynamicsWorld::debugDrawWorld();
+
+	if (getDebugDrawer())
+	{
+		int i;
+		for (  i=0;i<this->m_softBodies.size();i++)
+		{
+			btSoftBody*	psb=(btSoftBody*)this->m_softBodies[i];
+			if (getDebugDrawer() && (getDebugDrawer()->getDebugMode() & (btIDebugDraw::DBG_DrawWireframe)))
+			{
+				btSoftBodyHelpers::DrawFrame(psb,m_debugDrawer);
+				btSoftBodyHelpers::Draw(psb,m_debugDrawer,m_drawFlags);
+			}
+			
+			if (m_debugDrawer && (m_debugDrawer->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
+			{
+				if(m_drawNodeTree)		btSoftBodyHelpers::DrawNodeTree(psb,m_debugDrawer);
+				if(m_drawFaceTree)		btSoftBodyHelpers::DrawFaceTree(psb,m_debugDrawer);
+				if(m_drawClusterTree)	btSoftBodyHelpers::DrawClusterTree(psb,m_debugDrawer);
+			}
+		}		
+	}	
+}
+
+
+
+
+struct btSoftSingleRayCallback : public btBroadphaseRayCallback
+{
+	btVector3	m_rayFromWorld;
+	btVector3	m_rayToWorld;
+	btTransform	m_rayFromTrans;
+	btTransform	m_rayToTrans;
+	btVector3	m_hitNormal;
+
+	const btSoftRigidDynamicsWorld*	m_world;
+	btCollisionWorld::RayResultCallback&	m_resultCallback;
+
+	btSoftSingleRayCallback(const btVector3& rayFromWorld,const btVector3& rayToWorld,const btSoftRigidDynamicsWorld* world,btCollisionWorld::RayResultCallback& resultCallback)
+	:m_rayFromWorld(rayFromWorld),
+	m_rayToWorld(rayToWorld),
+	m_world(world),
+	m_resultCallback(resultCallback)
+	{
+		m_rayFromTrans.setIdentity();
+		m_rayFromTrans.setOrigin(m_rayFromWorld);
+		m_rayToTrans.setIdentity();
+		m_rayToTrans.setOrigin(m_rayToWorld);
+
+		btVector3 rayDir = (rayToWorld-rayFromWorld);
+
+		rayDir.normalize ();
+		///what about division by zero? --> just set rayDirection[i] to INF/1e30
+		m_rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[0];
+		m_rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[1];
+		m_rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[2];
+		m_signs[0] = m_rayDirectionInverse[0] < 0.0;
+		m_signs[1] = m_rayDirectionInverse[1] < 0.0;
+		m_signs[2] = m_rayDirectionInverse[2] < 0.0;
+
+		m_lambda_max = rayDir.dot(m_rayToWorld-m_rayFromWorld);
+
+	}
+
+	
+
+	virtual bool	process(const btBroadphaseProxy* proxy)
+	{
+		///terminate further ray tests, once the closestHitFraction reached zero
+		if (m_resultCallback.m_closestHitFraction == btScalar(0.f))
+			return false;
+
+		btCollisionObject*	collisionObject = (btCollisionObject*)proxy->m_clientObject;
+
+		//only perform raycast if filterMask matches
+		if(m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) 
+		{
+			//RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject();
+			//btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
+#if 0
+#ifdef RECALCULATE_AABB
+			btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
+			collisionObject->getCollisionShape()->getAabb(collisionObject->getWorldTransform(),collisionObjectAabbMin,collisionObjectAabbMax);
+#else
+			//getBroadphase()->getAabb(collisionObject->getBroadphaseHandle(),collisionObjectAabbMin,collisionObjectAabbMax);
+			const btVector3& collisionObjectAabbMin = collisionObject->getBroadphaseHandle()->m_aabbMin;
+			const btVector3& collisionObjectAabbMax = collisionObject->getBroadphaseHandle()->m_aabbMax;
+#endif
+#endif
+			//btScalar hitLambda = m_resultCallback.m_closestHitFraction;
+			//culling already done by broadphase
+			//if (btRayAabb(m_rayFromWorld,m_rayToWorld,collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,m_hitNormal))
+			{
+				m_world->rayTestSingle(m_rayFromTrans,m_rayToTrans,
+					collisionObject,
+						collisionObject->getCollisionShape(),
+						collisionObject->getWorldTransform(),
+						m_resultCallback);
+			}
+		}
+		return true;
+	}
+};
+
+void	btSoftRigidDynamicsWorld::rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const
+{
+	BT_PROFILE("rayTest");
+	/// use the broadphase to accelerate the search for objects, based on their aabb
+	/// and for each object with ray-aabb overlap, perform an exact ray test
+	btSoftSingleRayCallback rayCB(rayFromWorld,rayToWorld,this,resultCallback);
+
+#ifndef USE_BRUTEFORCE_RAYBROADPHASE
+	m_broadphasePairCache->rayTest(rayFromWorld,rayToWorld,rayCB);
+#else
+	for (int i=0;i<this->getNumCollisionObjects();i++)
+	{
+		rayCB.process(m_collisionObjects[i]->getBroadphaseHandle());
+	}	
+#endif //USE_BRUTEFORCE_RAYBROADPHASE
+
+}
+
+
+void	btSoftRigidDynamicsWorld::rayTestSingle(const btTransform& rayFromTrans,const btTransform& rayToTrans,
+					  btCollisionObject* collisionObject,
+					  const btCollisionShape* collisionShape,
+					  const btTransform& colObjWorldTransform,
+					  RayResultCallback& resultCallback)
+{
+	if (collisionShape->isSoftBody()) {
+		btSoftBody* softBody = btSoftBody::upcast(collisionObject);
+		if (softBody) {
+			btSoftBody::sRayCast softResult;
+			if (softBody->rayTest(rayFromTrans.getOrigin(), rayToTrans.getOrigin(), softResult)) 
+			{
+				
+				if (softResult.fraction<= resultCallback.m_closestHitFraction)
+				{
+
+					btCollisionWorld::LocalShapeInfo shapeInfo;
+					shapeInfo.m_shapePart = 0;
+					shapeInfo.m_triangleIndex = softResult.index;
+					// get the normal
+					btVector3 rayDir = rayToTrans.getOrigin() - rayFromTrans.getOrigin();
+					btVector3 normal=-rayDir;
+					normal.normalize();
+
+					if (softResult.feature == btSoftBody::eFeature::Face)
+					{
+						normal = softBody->m_faces[softResult.index].m_normal;
+						if (normal.dot(rayDir) > 0) {
+							// normal always point toward origin of the ray
+							normal = -normal;
+						}
+					}
+	
+					btCollisionWorld::LocalRayResult rayResult
+						(collisionObject,
+						 &shapeInfo,
+						 normal,
+						 softResult.fraction);
+					bool	normalInWorldSpace = true;
+					resultCallback.addSingleResult(rayResult,normalInWorldSpace);
+				}
+			}
+		}
+	} 
+	else {
+		btCollisionWorld::rayTestSingle(rayFromTrans,rayToTrans,collisionObject,collisionShape,colObjWorldTransform,resultCallback);
+	}
+}
+
+
+void	btSoftRigidDynamicsWorld::serializeSoftBodies(btSerializer* serializer)
+{
+	int i;
+	//serialize all collision objects
+	for (i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		if (colObj->getInternalType() & btCollisionObject::CO_SOFT_BODY)
+		{
+			int len = colObj->calculateSerializeBufferSize();
+			btChunk* chunk = serializer->allocate(len,1);
+			const char* structType = colObj->serialize(chunk->m_oldPtr, serializer);
+			serializer->finalizeChunk(chunk,structType,BT_SOFTBODY_CODE,colObj);
+		}
+	}
+
+}
+
+void	btSoftRigidDynamicsWorld::serialize(btSerializer* serializer)
+{
+
+	serializer->startSerialization();
+
+	serializeSoftBodies(serializer);
+
+	serializeRigidBodies(serializer);
+
+	serializeCollisionObjects(serializer);
+
+	serializer->finishSerialization();
+}
+
+
diff --git a/src/bullet/BulletSoftBody/btSoftRigidDynamicsWorld.h b/src/bullet/BulletSoftBody/btSoftRigidDynamicsWorld.h
new file mode 100644
index 00000000..3e0efafd
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftRigidDynamicsWorld.h
@@ -0,0 +1,107 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_RIGID_DYNAMICS_WORLD_H
+#define BT_SOFT_RIGID_DYNAMICS_WORLD_H
+
+#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h"
+#include "btSoftBody.h"
+
+typedef	btAlignedObjectArray<btSoftBody*> btSoftBodyArray;
+
+class btSoftBodySolver;
+
+class btSoftRigidDynamicsWorld : public btDiscreteDynamicsWorld
+{
+
+	btSoftBodyArray	m_softBodies;
+	int				m_drawFlags;
+	bool			m_drawNodeTree;
+	bool			m_drawFaceTree;
+	bool			m_drawClusterTree;
+	btSoftBodyWorldInfo m_sbi;
+	///Solver classes that encapsulate multiple soft bodies for solving
+	btSoftBodySolver *m_softBodySolver;
+	bool			m_ownsSolver;
+
+protected:
+
+	virtual void	predictUnconstraintMotion(btScalar timeStep);
+
+	virtual void	internalSingleStepSimulation( btScalar timeStep);
+
+	void	solveSoftBodiesConstraints( btScalar timeStep );
+
+	void	serializeSoftBodies(btSerializer* serializer);
+
+public:
+
+	btSoftRigidDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver, btCollisionConfiguration* collisionConfiguration, btSoftBodySolver *softBodySolver = 0 );
+
+	virtual ~btSoftRigidDynamicsWorld();
+
+	virtual void	debugDrawWorld();
+
+	void	addSoftBody(btSoftBody* body,short int collisionFilterGroup=btBroadphaseProxy::DefaultFilter,short int collisionFilterMask=btBroadphaseProxy::AllFilter);
+
+	void	removeSoftBody(btSoftBody* body);
+
+	///removeCollisionObject will first check if it is a rigid body, if so call removeRigidBody otherwise call btDiscreteDynamicsWorld::removeCollisionObject
+	virtual void	removeCollisionObject(btCollisionObject* collisionObject);
+
+	int		getDrawFlags() const { return(m_drawFlags); }
+	void	setDrawFlags(int f)	{ m_drawFlags=f; }
+
+	btSoftBodyWorldInfo&	getWorldInfo()
+	{
+		return m_sbi;
+	}
+	const btSoftBodyWorldInfo&	getWorldInfo() const
+	{
+		return m_sbi;
+	}
+
+	virtual btDynamicsWorldType	getWorldType() const
+	{
+		return	BT_SOFT_RIGID_DYNAMICS_WORLD;
+	}
+
+	btSoftBodyArray& getSoftBodyArray()
+	{
+		return m_softBodies;
+	}
+
+	const btSoftBodyArray& getSoftBodyArray() const
+	{
+		return m_softBodies;
+	}
+
+
+	virtual void rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const; 
+
+	/// rayTestSingle performs a raycast call and calls the resultCallback. It is used internally by rayTest.
+	/// In a future implementation, we consider moving the ray test as a virtual method in btCollisionShape.
+	/// This allows more customization.
+	static void	rayTestSingle(const btTransform& rayFromTrans,const btTransform& rayToTrans,
+					  btCollisionObject* collisionObject,
+					  const btCollisionShape* collisionShape,
+					  const btTransform& colObjWorldTransform,
+					  RayResultCallback& resultCallback);
+
+	virtual	void	serialize(btSerializer* serializer);
+
+};
+
+#endif //BT_SOFT_RIGID_DYNAMICS_WORLD_H
diff --git a/src/bullet/BulletSoftBody/btSoftSoftCollisionAlgorithm.cpp b/src/bullet/BulletSoftBody/btSoftSoftCollisionAlgorithm.cpp
new file mode 100644
index 00000000..1b8cfa72
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftSoftCollisionAlgorithm.cpp
@@ -0,0 +1,47 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btSoftSoftCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "btSoftBody.h"
+
+#define USE_PERSISTENT_CONTACTS 1
+
+btSoftSoftCollisionAlgorithm::btSoftSoftCollisionAlgorithm(btPersistentManifold* /*mf*/,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* /*obj0*/,btCollisionObject* /*obj1*/)
+: btCollisionAlgorithm(ci)
+//m_ownManifold(false),
+//m_manifoldPtr(mf)
+{
+}
+
+btSoftSoftCollisionAlgorithm::~btSoftSoftCollisionAlgorithm()
+{
+}
+
+void btSoftSoftCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& /*dispatchInfo*/,btManifoldResult* /*resultOut*/)
+{
+	btSoftBody* soft0 =	(btSoftBody*)body0;
+	btSoftBody* soft1 =	(btSoftBody*)body1;
+	soft0->getSoftBodySolver()->processCollision(soft0, soft1);
+}
+
+btScalar btSoftSoftCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* /*body0*/,btCollisionObject* /*body1*/,const btDispatcherInfo& /*dispatchInfo*/,btManifoldResult* /*resultOut*/)
+{
+	//not yet
+	return 1.f;
+}
diff --git a/src/bullet/BulletSoftBody/btSoftSoftCollisionAlgorithm.h b/src/bullet/BulletSoftBody/btSoftSoftCollisionAlgorithm.h
new file mode 100644
index 00000000..92d683c1
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSoftSoftCollisionAlgorithm.h
@@ -0,0 +1,69 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_SOFT_COLLISION_ALGORITHM_H
+#define BT_SOFT_SOFT_COLLISION_ALGORITHM_H
+
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
+
+class btPersistentManifold;
+class btSoftBody;
+
+///collision detection between two btSoftBody shapes
+class btSoftSoftCollisionAlgorithm : public btCollisionAlgorithm
+{
+	bool	m_ownManifold;
+	btPersistentManifold*	m_manifoldPtr;
+
+	btSoftBody*	m_softBody0;
+	btSoftBody*	m_softBody1;
+
+
+public:
+	btSoftSoftCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
+		: btCollisionAlgorithm(ci) {}
+
+	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+
+	virtual	void	getAllContactManifolds(btManifoldArray&	manifoldArray)
+	{
+		if (m_manifoldPtr && m_ownManifold)
+			manifoldArray.push_back(m_manifoldPtr);
+	}
+
+	btSoftSoftCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+
+	virtual ~btSoftSoftCollisionAlgorithm();
+
+	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
+	{
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		{
+			int bbsize = sizeof(btSoftSoftCollisionAlgorithm);
+			void* ptr = ci.m_dispatcher1->allocateCollisionAlgorithm(bbsize);
+			return new(ptr) btSoftSoftCollisionAlgorithm(0,ci,body0,body1);
+		}
+	};
+
+};
+
+#endif //BT_SOFT_SOFT_COLLISION_ALGORITHM_H
+
+
diff --git a/src/bullet/BulletSoftBody/btSparseSDF.h b/src/bullet/BulletSoftBody/btSparseSDF.h
new file mode 100644
index 00000000..90a26cdf
--- /dev/null
+++ b/src/bullet/BulletSoftBody/btSparseSDF.h
@@ -0,0 +1,306 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+///btSparseSdf implementation by Nathanael Presson
+
+#ifndef BT_SPARSE_SDF_H
+#define BT_SPARSE_SDF_H
+
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/NarrowPhaseCollision/btGjkEpa2.h"
+
+// Modified Paul Hsieh hash
+template <const int DWORDLEN>
+unsigned int HsiehHash(const void* pdata)
+{
+	const unsigned short*	data=(const unsigned short*)pdata;
+	unsigned				hash=DWORDLEN<<2,tmp;
+	for(int i=0;i<DWORDLEN;++i)
+	{
+		hash	+=	data[0];
+		tmp		=	(data[1]<<11)^hash;
+		hash	=	(hash<<16)^tmp;
+		data	+=	2;
+		hash	+=	hash>>11;
+	}
+	hash^=hash<<3;hash+=hash>>5;
+	hash^=hash<<4;hash+=hash>>17;
+	hash^=hash<<25;hash+=hash>>6;
+	return(hash);
+}
+
+template <const int CELLSIZE>
+struct	btSparseSdf
+{
+	//
+	// Inner types
+	//
+	struct IntFrac
+	{
+		int					b;
+		int					i;
+		btScalar			f;
+	};
+	struct	Cell
+	{
+		btScalar			d[CELLSIZE+1][CELLSIZE+1][CELLSIZE+1];
+		int					c[3];
+		int					puid;
+		unsigned			hash;
+		btCollisionShape*	pclient;
+		Cell*				next;
+	};
+	//
+	// Fields
+	//
+
+	btAlignedObjectArray<Cell*>		cells;	
+	btScalar						voxelsz;
+	int								puid;
+	int								ncells;
+	int								nprobes;
+	int								nqueries;	
+
+	//
+	// Methods
+	//
+
+	//
+	void					Initialize(int hashsize=2383)
+	{
+		cells.resize(hashsize,0);
+		Reset();		
+	}
+	//
+	void					Reset()
+	{
+		for(int i=0,ni=cells.size();i<ni;++i)
+		{
+			Cell*	pc=cells[i];
+			cells[i]=0;
+			while(pc)
+			{
+				Cell*	pn=pc->next;
+				delete pc;
+				pc=pn;
+			}
+		}
+		voxelsz		=0.25;
+		puid		=0;
+		ncells		=0;
+		nprobes		=1;
+		nqueries	=1;
+	}
+	//
+	void					GarbageCollect(int lifetime=256)
+	{
+		const int life=puid-lifetime;
+		for(int i=0;i<cells.size();++i)
+		{
+			Cell*&	root=cells[i];
+			Cell*	pp=0;
+			Cell*	pc=root;
+			while(pc)
+			{
+				Cell*	pn=pc->next;
+				if(pc->puid<life)
+				{
+					if(pp) pp->next=pn; else root=pn;
+					delete pc;pc=pp;--ncells;
+				}
+				pp=pc;pc=pn;
+			}
+		}
+		//printf("GC[%d]: %d cells, PpQ: %f\r\n",puid,ncells,nprobes/(btScalar)nqueries);
+		nqueries=1;
+		nprobes=1;
+		++puid;	///@todo: Reset puid's when int range limit is reached	*/ 
+		/* else setup a priority list...						*/ 
+	}
+	//
+	int						RemoveReferences(btCollisionShape* pcs)
+	{
+		int	refcount=0;
+		for(int i=0;i<cells.size();++i)
+		{
+			Cell*&	root=cells[i];
+			Cell*	pp=0;
+			Cell*	pc=root;
+			while(pc)
+			{
+				Cell*	pn=pc->next;
+				if(pc->pclient==pcs)
+				{
+					if(pp) pp->next=pn; else root=pn;
+					delete pc;pc=pp;++refcount;
+				}
+				pp=pc;pc=pn;
+			}
+		}
+		return(refcount);
+	}
+	//
+	btScalar				Evaluate(	const btVector3& x,
+		btCollisionShape* shape,
+		btVector3& normal,
+		btScalar margin)
+	{
+		/* Lookup cell			*/ 
+		const btVector3	scx=x/voxelsz;
+		const IntFrac	ix=Decompose(scx.x());
+		const IntFrac	iy=Decompose(scx.y());
+		const IntFrac	iz=Decompose(scx.z());
+		const unsigned	h=Hash(ix.b,iy.b,iz.b,shape);
+		Cell*&			root=cells[static_cast<int>(h%cells.size())];
+		Cell*			c=root;
+		++nqueries;
+		while(c)
+		{
+			++nprobes;
+			if(	(c->hash==h)	&&
+				(c->c[0]==ix.b)	&&
+				(c->c[1]==iy.b)	&&
+				(c->c[2]==iz.b)	&&
+				(c->pclient==shape))
+			{ break; }
+			else
+			{ c=c->next; }
+		}
+		if(!c)
+		{
+			++nprobes;		
+			++ncells;
+			c=new Cell();
+			c->next=root;root=c;
+			c->pclient=shape;
+			c->hash=h;
+			c->c[0]=ix.b;c->c[1]=iy.b;c->c[2]=iz.b;
+			BuildCell(*c);
+		}
+		c->puid=puid;
+		/* Extract infos		*/ 
+		const int		o[]={	ix.i,iy.i,iz.i};
+		const btScalar	d[]={	c->d[o[0]+0][o[1]+0][o[2]+0],
+			c->d[o[0]+1][o[1]+0][o[2]+0],
+			c->d[o[0]+1][o[1]+1][o[2]+0],
+			c->d[o[0]+0][o[1]+1][o[2]+0],
+			c->d[o[0]+0][o[1]+0][o[2]+1],
+			c->d[o[0]+1][o[1]+0][o[2]+1],
+			c->d[o[0]+1][o[1]+1][o[2]+1],
+			c->d[o[0]+0][o[1]+1][o[2]+1]};
+		/* Normal	*/ 
+#if 1
+		const btScalar	gx[]={	d[1]-d[0],d[2]-d[3],
+			d[5]-d[4],d[6]-d[7]};
+		const btScalar	gy[]={	d[3]-d[0],d[2]-d[1],
+			d[7]-d[4],d[6]-d[5]};
+		const btScalar	gz[]={	d[4]-d[0],d[5]-d[1],
+			d[7]-d[3],d[6]-d[2]};
+		normal.setX(Lerp(	Lerp(gx[0],gx[1],iy.f),
+			Lerp(gx[2],gx[3],iy.f),iz.f));
+		normal.setY(Lerp(	Lerp(gy[0],gy[1],ix.f),
+			Lerp(gy[2],gy[3],ix.f),iz.f));
+		normal.setZ(Lerp(	Lerp(gz[0],gz[1],ix.f),
+			Lerp(gz[2],gz[3],ix.f),iy.f));
+		normal		=	normal.normalized();
+#else
+		normal		=	btVector3(d[1]-d[0],d[3]-d[0],d[4]-d[0]).normalized();
+#endif
+		/* Distance	*/ 
+		const btScalar	d0=Lerp(Lerp(d[0],d[1],ix.f),
+			Lerp(d[3],d[2],ix.f),iy.f);
+		const btScalar	d1=Lerp(Lerp(d[4],d[5],ix.f),
+			Lerp(d[7],d[6],ix.f),iy.f);
+		return(Lerp(d0,d1,iz.f)-margin);
+	}
+	//
+	void					BuildCell(Cell& c)
+	{
+		const btVector3	org=btVector3(	(btScalar)c.c[0],
+			(btScalar)c.c[1],
+			(btScalar)c.c[2])	*
+			CELLSIZE*voxelsz;
+		for(int k=0;k<=CELLSIZE;++k)
+		{
+			const btScalar	z=voxelsz*k+org.z();
+			for(int j=0;j<=CELLSIZE;++j)
+			{
+				const btScalar	y=voxelsz*j+org.y();
+				for(int i=0;i<=CELLSIZE;++i)
+				{
+					const btScalar	x=voxelsz*i+org.x();
+					c.d[i][j][k]=DistanceToShape(	btVector3(x,y,z),
+						c.pclient);
+				}
+			}
+		}
+	}
+	//
+	static inline btScalar	DistanceToShape(const btVector3& x,
+		btCollisionShape* shape)
+	{
+		btTransform	unit;
+		unit.setIdentity();
+		if(shape->isConvex())
+		{
+			btGjkEpaSolver2::sResults	res;
+			btConvexShape*				csh=static_cast<btConvexShape*>(shape);
+			return(btGjkEpaSolver2::SignedDistance(x,0,csh,unit,res));
+		}
+		return(0);
+	}
+	//
+	static inline IntFrac	Decompose(btScalar x)
+	{
+		/* That one need a lot of improvements...	*/
+		/* Remove test, faster floor...				*/ 
+		IntFrac			r;
+		x/=CELLSIZE;
+		const int		o=x<0?(int)(-x+1):0;
+		x+=o;r.b=(int)x;
+		const btScalar	k=(x-r.b)*CELLSIZE;
+		r.i=(int)k;r.f=k-r.i;r.b-=o;
+		return(r);
+	}
+	//
+	static inline btScalar	Lerp(btScalar a,btScalar b,btScalar t)
+	{
+		return(a+(b-a)*t);
+	}
+
+
+
+	//
+	static inline unsigned int	Hash(int x,int y,int z,btCollisionShape* shape)
+	{
+		struct btS
+		{ 
+			int x,y,z;
+			void* p;
+		};
+
+		btS myset;
+
+		myset.x=x;myset.y=y;myset.z=z;myset.p=shape;
+		const void* ptr = &myset;
+
+		unsigned int result = HsiehHash<sizeof(btS)/4> (ptr);
+
+
+		return result;
+	}
+};
+
+
+#endif //BT_SPARSE_SDF_H
diff --git a/src/bullet/LinearMath/btAabbUtil2.h b/src/bullet/LinearMath/btAabbUtil2.h
new file mode 100644
index 00000000..42b721de
--- /dev/null
+++ b/src/bullet/LinearMath/btAabbUtil2.h
@@ -0,0 +1,236 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_AABB_UTIL2
+#define BT_AABB_UTIL2
+
+#include "btTransform.h"
+#include "btVector3.h"
+#include "btMinMax.h"
+
+
+
+SIMD_FORCE_INLINE void AabbExpand (btVector3& aabbMin,
+								   btVector3& aabbMax,
+								   const btVector3& expansionMin,
+								   const btVector3& expansionMax)
+{
+	aabbMin = aabbMin + expansionMin;
+	aabbMax = aabbMax + expansionMax;
+}
+
+/// conservative test for overlap between two aabbs
+SIMD_FORCE_INLINE bool TestPointAgainstAabb2(const btVector3 &aabbMin1, const btVector3 &aabbMax1,
+								const btVector3 &point)
+{
+	bool overlap = true;
+	overlap = (aabbMin1.getX() > point.getX() || aabbMax1.getX() < point.getX()) ? false : overlap;
+	overlap = (aabbMin1.getZ() > point.getZ() || aabbMax1.getZ() < point.getZ()) ? false : overlap;
+	overlap = (aabbMin1.getY() > point.getY() || aabbMax1.getY() < point.getY()) ? false : overlap;
+	return overlap;
+}
+
+
+/// conservative test for overlap between two aabbs
+SIMD_FORCE_INLINE bool TestAabbAgainstAabb2(const btVector3 &aabbMin1, const btVector3 &aabbMax1,
+								const btVector3 &aabbMin2, const btVector3 &aabbMax2)
+{
+	bool overlap = true;
+	overlap = (aabbMin1.getX() > aabbMax2.getX() || aabbMax1.getX() < aabbMin2.getX()) ? false : overlap;
+	overlap = (aabbMin1.getZ() > aabbMax2.getZ() || aabbMax1.getZ() < aabbMin2.getZ()) ? false : overlap;
+	overlap = (aabbMin1.getY() > aabbMax2.getY() || aabbMax1.getY() < aabbMin2.getY()) ? false : overlap;
+	return overlap;
+}
+
+/// conservative test for overlap between triangle and aabb
+SIMD_FORCE_INLINE bool TestTriangleAgainstAabb2(const btVector3 *vertices,
+									const btVector3 &aabbMin, const btVector3 &aabbMax)
+{
+	const btVector3 &p1 = vertices[0];
+	const btVector3 &p2 = vertices[1];
+	const btVector3 &p3 = vertices[2];
+
+	if (btMin(btMin(p1[0], p2[0]), p3[0]) > aabbMax[0]) return false;
+	if (btMax(btMax(p1[0], p2[0]), p3[0]) < aabbMin[0]) return false;
+
+	if (btMin(btMin(p1[2], p2[2]), p3[2]) > aabbMax[2]) return false;
+	if (btMax(btMax(p1[2], p2[2]), p3[2]) < aabbMin[2]) return false;
+  
+	if (btMin(btMin(p1[1], p2[1]), p3[1]) > aabbMax[1]) return false;
+	if (btMax(btMax(p1[1], p2[1]), p3[1]) < aabbMin[1]) return false;
+	return true;
+}
+
+
+SIMD_FORCE_INLINE int	btOutcode(const btVector3& p,const btVector3& halfExtent) 
+{
+	return (p.getX()  < -halfExtent.getX() ? 0x01 : 0x0) |    
+		   (p.getX() >  halfExtent.getX() ? 0x08 : 0x0) |
+		   (p.getY() < -halfExtent.getY() ? 0x02 : 0x0) |    
+		   (p.getY() >  halfExtent.getY() ? 0x10 : 0x0) |
+		   (p.getZ() < -halfExtent.getZ() ? 0x4 : 0x0) |    
+		   (p.getZ() >  halfExtent.getZ() ? 0x20 : 0x0);
+}
+
+
+
+SIMD_FORCE_INLINE bool btRayAabb2(const btVector3& rayFrom,
+								  const btVector3& rayInvDirection,
+								  const unsigned int raySign[3],
+								  const btVector3 bounds[2],
+								  btScalar& tmin,
+								  btScalar lambda_min,
+								  btScalar lambda_max)
+{
+	btScalar tmax, tymin, tymax, tzmin, tzmax;
+	tmin = (bounds[raySign[0]].getX() - rayFrom.getX()) * rayInvDirection.getX();
+	tmax = (bounds[1-raySign[0]].getX() - rayFrom.getX()) * rayInvDirection.getX();
+	tymin = (bounds[raySign[1]].getY() - rayFrom.getY()) * rayInvDirection.getY();
+	tymax = (bounds[1-raySign[1]].getY() - rayFrom.getY()) * rayInvDirection.getY();
+
+	if ( (tmin > tymax) || (tymin > tmax) )
+		return false;
+
+	if (tymin > tmin)
+		tmin = tymin;
+
+	if (tymax < tmax)
+		tmax = tymax;
+
+	tzmin = (bounds[raySign[2]].getZ() - rayFrom.getZ()) * rayInvDirection.getZ();
+	tzmax = (bounds[1-raySign[2]].getZ() - rayFrom.getZ()) * rayInvDirection.getZ();
+
+	if ( (tmin > tzmax) || (tzmin > tmax) )
+		return false;
+	if (tzmin > tmin)
+		tmin = tzmin;
+	if (tzmax < tmax)
+		tmax = tzmax;
+	return ( (tmin < lambda_max) && (tmax > lambda_min) );
+}
+
+SIMD_FORCE_INLINE bool btRayAabb(const btVector3& rayFrom, 
+								 const btVector3& rayTo, 
+								 const btVector3& aabbMin, 
+								 const btVector3& aabbMax,
+					  btScalar& param, btVector3& normal) 
+{
+	btVector3 aabbHalfExtent = (aabbMax-aabbMin)* btScalar(0.5);
+	btVector3 aabbCenter = (aabbMax+aabbMin)* btScalar(0.5);
+	btVector3	source = rayFrom - aabbCenter;
+	btVector3	target = rayTo - aabbCenter;
+	int	sourceOutcode = btOutcode(source,aabbHalfExtent);
+	int targetOutcode = btOutcode(target,aabbHalfExtent);
+	if ((sourceOutcode & targetOutcode) == 0x0)
+	{
+		btScalar lambda_enter = btScalar(0.0);
+		btScalar lambda_exit  = param;
+		btVector3 r = target - source;
+		int i;
+		btScalar	normSign = 1;
+		btVector3	hitNormal(0,0,0);
+		int bit=1;
+
+		for (int j=0;j<2;j++)
+		{
+			for (i = 0; i != 3; ++i)
+			{
+				if (sourceOutcode & bit)
+				{
+					btScalar lambda = (-source[i] - aabbHalfExtent[i]*normSign) / r[i];
+					if (lambda_enter <= lambda)
+					{
+						lambda_enter = lambda;
+						hitNormal.setValue(0,0,0);
+						hitNormal[i] = normSign;
+					}
+				}
+				else if (targetOutcode & bit) 
+				{
+					btScalar lambda = (-source[i] - aabbHalfExtent[i]*normSign) / r[i];
+					btSetMin(lambda_exit, lambda);
+				}
+				bit<<=1;
+			}
+			normSign = btScalar(-1.);
+		}
+		if (lambda_enter <= lambda_exit)
+		{
+			param = lambda_enter;
+			normal = hitNormal;
+			return true;
+		}
+	}
+	return false;
+}
+
+
+
+SIMD_FORCE_INLINE	void btTransformAabb(const btVector3& halfExtents, btScalar margin,const btTransform& t,btVector3& aabbMinOut,btVector3& aabbMaxOut)
+{
+	btVector3 halfExtentsWithMargin = halfExtents+btVector3(margin,margin,margin);
+	btMatrix3x3 abs_b = t.getBasis().absolute();  
+	btVector3 center = t.getOrigin();
+	btVector3 extent = btVector3(abs_b[0].dot(halfExtentsWithMargin),
+		   abs_b[1].dot(halfExtentsWithMargin),
+		  abs_b[2].dot(halfExtentsWithMargin));
+	aabbMinOut = center - extent;
+	aabbMaxOut = center + extent;
+}
+
+
+SIMD_FORCE_INLINE	void btTransformAabb(const btVector3& localAabbMin,const btVector3& localAabbMax, btScalar margin,const btTransform& trans,btVector3& aabbMinOut,btVector3& aabbMaxOut)
+{
+		btAssert(localAabbMin.getX() <= localAabbMax.getX());
+		btAssert(localAabbMin.getY() <= localAabbMax.getY());
+		btAssert(localAabbMin.getZ() <= localAabbMax.getZ());
+		btVector3 localHalfExtents = btScalar(0.5)*(localAabbMax-localAabbMin);
+		localHalfExtents+=btVector3(margin,margin,margin);
+
+		btVector3 localCenter = btScalar(0.5)*(localAabbMax+localAabbMin);
+		btMatrix3x3 abs_b = trans.getBasis().absolute();  
+		btVector3 center = trans(localCenter);
+		btVector3 extent = btVector3(abs_b[0].dot(localHalfExtents),
+			   abs_b[1].dot(localHalfExtents),
+			  abs_b[2].dot(localHalfExtents));
+		aabbMinOut = center-extent;
+		aabbMaxOut = center+extent;
+}
+
+#define USE_BANCHLESS 1
+#ifdef USE_BANCHLESS
+	//This block replaces the block below and uses no branches, and replaces the 8 bit return with a 32 bit return for improved performance (~3x on XBox 360)
+	SIMD_FORCE_INLINE unsigned testQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
+	{		
+		return static_cast<unsigned int>(btSelect((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0])
+			& (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2])
+			& (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])),
+			1, 0));
+	}
+#else
+	SIMD_FORCE_INLINE bool testQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2)
+	{
+		bool overlap = true;
+		overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? false : overlap;
+		overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? false : overlap;
+		overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? false : overlap;
+		return overlap;
+	}
+#endif //USE_BANCHLESS
+
+#endif //BT_AABB_UTIL2
+
+
diff --git a/src/bullet/LinearMath/btAlignedAllocator.cpp b/src/bullet/LinearMath/btAlignedAllocator.cpp
new file mode 100644
index 00000000..c4c0ceb2
--- /dev/null
+++ b/src/bullet/LinearMath/btAlignedAllocator.cpp
@@ -0,0 +1,181 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "btAlignedAllocator.h"
+
+int gNumAlignedAllocs = 0;
+int gNumAlignedFree = 0;
+int gTotalBytesAlignedAllocs = 0;//detect memory leaks
+
+static void *btAllocDefault(size_t size)
+{
+	return malloc(size);
+}
+
+static void btFreeDefault(void *ptr)
+{
+	free(ptr);
+}
+
+static btAllocFunc *sAllocFunc = btAllocDefault;
+static btFreeFunc *sFreeFunc = btFreeDefault;
+
+
+
+#if defined (BT_HAS_ALIGNED_ALLOCATOR)
+#include <malloc.h>
+static void *btAlignedAllocDefault(size_t size, int alignment)
+{
+	return _aligned_malloc(size, (size_t)alignment);
+}
+
+static void btAlignedFreeDefault(void *ptr)
+{
+	_aligned_free(ptr);
+}
+#elif defined(__CELLOS_LV2__)
+#include <stdlib.h>
+
+static inline void *btAlignedAllocDefault(size_t size, int alignment)
+{
+	return memalign(alignment, size);
+}
+
+static inline void btAlignedFreeDefault(void *ptr)
+{
+	free(ptr);
+}
+#else
+
+
+
+
+
+static inline void *btAlignedAllocDefault(size_t size, int alignment)
+{
+  void *ret;
+  char *real;
+  real = (char *)sAllocFunc(size + sizeof(void *) + (alignment-1));
+  if (real) {
+	ret = btAlignPointer(real + sizeof(void *),alignment);
+    *((void **)(ret)-1) = (void *)(real);
+  } else {
+    ret = (void *)(real);
+  }
+  return (ret);
+}
+
+static inline void btAlignedFreeDefault(void *ptr)
+{
+  void* real;
+
+  if (ptr) {
+    real = *((void **)(ptr)-1);
+    sFreeFunc(real);
+  }
+}
+#endif
+
+
+static btAlignedAllocFunc *sAlignedAllocFunc = btAlignedAllocDefault;
+static btAlignedFreeFunc *sAlignedFreeFunc = btAlignedFreeDefault;
+
+void btAlignedAllocSetCustomAligned(btAlignedAllocFunc *allocFunc, btAlignedFreeFunc *freeFunc)
+{
+  sAlignedAllocFunc = allocFunc ? allocFunc : btAlignedAllocDefault;
+  sAlignedFreeFunc = freeFunc ? freeFunc : btAlignedFreeDefault;
+}
+
+void btAlignedAllocSetCustom(btAllocFunc *allocFunc, btFreeFunc *freeFunc)
+{
+  sAllocFunc = allocFunc ? allocFunc : btAllocDefault;
+  sFreeFunc = freeFunc ? freeFunc : btFreeDefault;
+}
+
+#ifdef BT_DEBUG_MEMORY_ALLOCATIONS
+//this generic allocator provides the total allocated number of bytes
+#include <stdio.h>
+
+void*   btAlignedAllocInternal  (size_t size, int alignment,int line,char* filename)
+{
+ void *ret;
+ char *real;
+
+ gTotalBytesAlignedAllocs += size;
+ gNumAlignedAllocs++;
+
+ 
+ real = (char *)sAllocFunc(size + 2*sizeof(void *) + (alignment-1));
+ if (real) {
+   ret = (void*) btAlignPointer((real + 2*sizeof(void *), alignment);
+   *((void **)(ret)-1) = (void *)(real);
+       *((int*)(ret)-2) = size;
+
+ } else {
+   ret = (void *)(real);//??
+ }
+
+ printf("allocation#%d at address %x, from %s,line %d, size %d\n",gNumAlignedAllocs,real, filename,line,size);
+
+ int* ptr = (int*)ret;
+ *ptr = 12;
+ return (ret);
+}
+
+void    btAlignedFreeInternal   (void* ptr,int line,char* filename)
+{
+
+ void* real;
+ gNumAlignedFree++;
+
+ if (ptr) {
+   real = *((void **)(ptr)-1);
+       int size = *((int*)(ptr)-2);
+       gTotalBytesAlignedAllocs -= size;
+
+	   printf("free #%d at address %x, from %s,line %d, size %d\n",gNumAlignedFree,real, filename,line,size);
+
+   sFreeFunc(real);
+ } else
+ {
+	 printf("NULL ptr\n");
+ }
+}
+
+#else //BT_DEBUG_MEMORY_ALLOCATIONS
+
+void*	btAlignedAllocInternal	(size_t size, int alignment)
+{
+	gNumAlignedAllocs++;
+	void* ptr;
+	ptr = sAlignedAllocFunc(size, alignment);
+//	printf("btAlignedAllocInternal %d, %x\n",size,ptr);
+	return ptr;
+}
+
+void	btAlignedFreeInternal	(void* ptr)
+{
+	if (!ptr)
+	{
+		return;
+	}
+
+	gNumAlignedFree++;
+//	printf("btAlignedFreeInternal %x\n",ptr);
+	sAlignedFreeFunc(ptr);
+}
+
+#endif //BT_DEBUG_MEMORY_ALLOCATIONS
+
diff --git a/src/bullet/LinearMath/btAlignedAllocator.h b/src/bullet/LinearMath/btAlignedAllocator.h
new file mode 100644
index 00000000..f168f3c6
--- /dev/null
+++ b/src/bullet/LinearMath/btAlignedAllocator.h
@@ -0,0 +1,107 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_ALIGNED_ALLOCATOR
+#define BT_ALIGNED_ALLOCATOR
+
+///we probably replace this with our own aligned memory allocator
+///so we replace _aligned_malloc and _aligned_free with our own
+///that is better portable and more predictable
+
+#include "btScalar.h"
+//#define BT_DEBUG_MEMORY_ALLOCATIONS 1
+#ifdef BT_DEBUG_MEMORY_ALLOCATIONS
+
+#define btAlignedAlloc(a,b) \
+		btAlignedAllocInternal(a,b,__LINE__,__FILE__)
+
+#define btAlignedFree(ptr) \
+		btAlignedFreeInternal(ptr,__LINE__,__FILE__)
+
+void*	btAlignedAllocInternal	(size_t size, int alignment,int line,char* filename);
+
+void	btAlignedFreeInternal	(void* ptr,int line,char* filename);
+
+#else
+	void*	btAlignedAllocInternal	(size_t size, int alignment);
+	void	btAlignedFreeInternal	(void* ptr);
+
+	#define btAlignedAlloc(size,alignment) btAlignedAllocInternal(size,alignment)
+	#define btAlignedFree(ptr) btAlignedFreeInternal(ptr)
+
+#endif
+typedef int	size_type;
+
+typedef void *(btAlignedAllocFunc)(size_t size, int alignment);
+typedef void (btAlignedFreeFunc)(void *memblock);
+typedef void *(btAllocFunc)(size_t size);
+typedef void (btFreeFunc)(void *memblock);
+
+///The developer can let all Bullet memory allocations go through a custom memory allocator, using btAlignedAllocSetCustom
+void btAlignedAllocSetCustom(btAllocFunc *allocFunc, btFreeFunc *freeFunc);
+///If the developer has already an custom aligned allocator, then btAlignedAllocSetCustomAligned can be used. The default aligned allocator pre-allocates extra memory using the non-aligned allocator, and instruments it.
+void btAlignedAllocSetCustomAligned(btAlignedAllocFunc *allocFunc, btAlignedFreeFunc *freeFunc);
+
+
+///The btAlignedAllocator is a portable class for aligned memory allocations.
+///Default implementations for unaligned and aligned allocations can be overridden by a custom allocator using btAlignedAllocSetCustom and btAlignedAllocSetCustomAligned.
+template < typename T , unsigned Alignment >
+class btAlignedAllocator {
+	
+	typedef btAlignedAllocator< T , Alignment > self_type;
+	
+public:
+
+	//just going down a list:
+	btAlignedAllocator() {}
+	/*
+	btAlignedAllocator( const self_type & ) {}
+	*/
+
+	template < typename Other >
+	btAlignedAllocator( const btAlignedAllocator< Other , Alignment > & ) {}
+
+	typedef const T*         const_pointer;
+	typedef const T&         const_reference;
+	typedef T*               pointer;
+	typedef T&               reference;
+	typedef T                value_type;
+
+	pointer       address   ( reference        ref ) const                           { return &ref; }
+	const_pointer address   ( const_reference  ref ) const                           { return &ref; }
+	pointer       allocate  ( size_type        n   , const_pointer *      hint = 0 ) {
+		(void)hint;
+		return reinterpret_cast< pointer >(btAlignedAlloc( sizeof(value_type) * n , Alignment ));
+	}
+	void          construct ( pointer          ptr , const value_type &   value    ) { new (ptr) value_type( value ); }
+	void          deallocate( pointer          ptr ) {
+		btAlignedFree( reinterpret_cast< void * >( ptr ) );
+	}
+	void          destroy   ( pointer          ptr )                                 { ptr->~value_type(); }
+	
+
+	template < typename O > struct rebind {
+		typedef btAlignedAllocator< O , Alignment > other;
+	};
+	template < typename O >
+	self_type & operator=( const btAlignedAllocator< O , Alignment > & ) { return *this; }
+
+	friend bool operator==( const self_type & , const self_type & ) { return true; }
+};
+
+
+
+#endif //BT_ALIGNED_ALLOCATOR
+
diff --git a/src/bullet/LinearMath/btAlignedObjectArray.h b/src/bullet/LinearMath/btAlignedObjectArray.h
new file mode 100644
index 00000000..36090e13
--- /dev/null
+++ b/src/bullet/LinearMath/btAlignedObjectArray.h
@@ -0,0 +1,494 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_OBJECT_ARRAY__
+#define BT_OBJECT_ARRAY__
+
+#include "btScalar.h" // has definitions like SIMD_FORCE_INLINE
+#include "btAlignedAllocator.h"
+
+///If the platform doesn't support placement new, you can disable BT_USE_PLACEMENT_NEW
+///then the btAlignedObjectArray doesn't support objects with virtual methods, and non-trivial constructors/destructors
+///You can enable BT_USE_MEMCPY, then swapping elements in the array will use memcpy instead of operator=
+///see discussion here: http://continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1231 and
+///http://www.continuousphysics.com/Bullet/phpBB2/viewtopic.php?t=1240
+
+#define BT_USE_PLACEMENT_NEW 1
+//#define BT_USE_MEMCPY 1 //disable, because it is cumbersome to find out for each platform where memcpy is defined. It can be in <memory.h> or <string.h> or otherwise...
+#define BT_ALLOW_ARRAY_COPY_OPERATOR // enabling this can accidently perform deep copies of data if you are not careful
+
+#ifdef BT_USE_MEMCPY
+#include <memory.h>
+#include <string.h>
+#endif //BT_USE_MEMCPY
+
+#ifdef BT_USE_PLACEMENT_NEW
+#include <new> //for placement new
+#endif //BT_USE_PLACEMENT_NEW
+
+
+///The btAlignedObjectArray template class uses a subset of the stl::vector interface for its methods
+///It is developed to replace stl::vector to avoid portability issues, including STL alignment issues to add SIMD/SSE data
+template <typename T> 
+//template <class T> 
+class btAlignedObjectArray
+{
+	btAlignedAllocator<T , 16>	m_allocator;
+
+	int					m_size;
+	int					m_capacity;
+	T*					m_data;
+	//PCK: added this line
+	bool				m_ownsMemory;
+
+#ifdef BT_ALLOW_ARRAY_COPY_OPERATOR
+public:
+	SIMD_FORCE_INLINE btAlignedObjectArray<T>& operator=(const btAlignedObjectArray<T> &other)
+	{
+		copyFromArray(other);
+		return *this;
+	}
+#else//BT_ALLOW_ARRAY_COPY_OPERATOR
+private:
+		SIMD_FORCE_INLINE btAlignedObjectArray<T>& operator=(const btAlignedObjectArray<T> &other);
+#endif//BT_ALLOW_ARRAY_COPY_OPERATOR
+
+protected:
+		SIMD_FORCE_INLINE	int	allocSize(int size)
+		{
+			return (size ? size*2 : 1);
+		}
+		SIMD_FORCE_INLINE	void	copy(int start,int end, T* dest) const
+		{
+			int i;
+			for (i=start;i<end;++i)
+#ifdef BT_USE_PLACEMENT_NEW
+				new (&dest[i]) T(m_data[i]);
+#else
+				dest[i] = m_data[i];
+#endif //BT_USE_PLACEMENT_NEW
+		}
+
+		SIMD_FORCE_INLINE	void	init()
+		{
+			//PCK: added this line
+			m_ownsMemory = true;
+			m_data = 0;
+			m_size = 0;
+			m_capacity = 0;
+		}
+		SIMD_FORCE_INLINE	void	destroy(int first,int last)
+		{
+			int i;
+			for (i=first; i<last;i++)
+			{
+				m_data[i].~T();
+			}
+		}
+
+		SIMD_FORCE_INLINE	void* allocate(int size)
+		{
+			if (size)
+				return m_allocator.allocate(size);
+			return 0;
+		}
+
+		SIMD_FORCE_INLINE	void	deallocate()
+		{
+			if(m_data)	{
+				//PCK: enclosed the deallocation in this block
+				if (m_ownsMemory)
+				{
+					m_allocator.deallocate(m_data);
+				}
+				m_data = 0;
+			}
+		}
+
+	
+
+
+	public:
+		
+		btAlignedObjectArray()
+		{
+			init();
+		}
+
+		~btAlignedObjectArray()
+		{
+			clear();
+		}
+
+		///Generally it is best to avoid using the copy constructor of an btAlignedObjectArray, and use a (const) reference to the array instead.
+		btAlignedObjectArray(const btAlignedObjectArray& otherArray)
+		{
+			init();
+
+			int otherSize = otherArray.size();
+			resize (otherSize);
+			otherArray.copy(0, otherSize, m_data);
+		}
+
+		
+		
+		/// return the number of elements in the array
+		SIMD_FORCE_INLINE	int size() const
+		{	
+			return m_size;
+		}
+		
+		SIMD_FORCE_INLINE const T& at(int n) const
+		{
+			btAssert(n>=0);
+			btAssert(n<size());
+			return m_data[n];
+		}
+
+		SIMD_FORCE_INLINE T& at(int n)
+		{
+			btAssert(n>=0);
+			btAssert(n<size());
+			return m_data[n];
+		}
+
+		SIMD_FORCE_INLINE const T& operator[](int n) const
+		{
+			btAssert(n>=0);
+			btAssert(n<size());
+			return m_data[n];
+		}
+
+		SIMD_FORCE_INLINE T& operator[](int n)
+		{
+			btAssert(n>=0);
+			btAssert(n<size());
+			return m_data[n];
+		}
+		
+
+		///clear the array, deallocated memory. Generally it is better to use array.resize(0), to reduce performance overhead of run-time memory (de)allocations.
+		SIMD_FORCE_INLINE	void	clear()
+		{
+			destroy(0,size());
+			
+			deallocate();
+			
+			init();
+		}
+
+		SIMD_FORCE_INLINE	void	pop_back()
+		{
+			btAssert(m_size>0);
+			m_size--;
+			m_data[m_size].~T();
+		}
+
+		///resize changes the number of elements in the array. If the new size is larger, the new elements will be constructed using the optional second argument.
+		///when the new number of elements is smaller, the destructor will be called, but memory will not be freed, to reduce performance overhead of run-time memory (de)allocations.
+		SIMD_FORCE_INLINE	void	resize(int newsize, const T& fillData=T())
+		{
+			int curSize = size();
+
+			if (newsize < curSize)
+			{
+				for(int i = newsize; i < curSize; i++)
+				{
+					m_data[i].~T();
+				}
+			} else
+			{
+				if (newsize > size())
+				{
+					reserve(newsize);
+				}
+#ifdef BT_USE_PLACEMENT_NEW
+				for (int i=curSize;i<newsize;i++)
+				{
+					new ( &m_data[i]) T(fillData);
+				}
+#endif //BT_USE_PLACEMENT_NEW
+
+			}
+
+			m_size = newsize;
+		}
+	
+		SIMD_FORCE_INLINE	T&  expandNonInitializing( )
+		{	
+			int sz = size();
+			if( sz == capacity() )
+			{
+				reserve( allocSize(size()) );
+			}
+			m_size++;
+
+			return m_data[sz];		
+		}
+
+
+		SIMD_FORCE_INLINE	T&  expand( const T& fillValue=T())
+		{	
+			int sz = size();
+			if( sz == capacity() )
+			{
+				reserve( allocSize(size()) );
+			}
+			m_size++;
+#ifdef BT_USE_PLACEMENT_NEW
+			new (&m_data[sz]) T(fillValue); //use the in-place new (not really allocating heap memory)
+#endif
+
+			return m_data[sz];		
+		}
+
+
+		SIMD_FORCE_INLINE	void push_back(const T& _Val)
+		{	
+			int sz = size();
+			if( sz == capacity() )
+			{
+				reserve( allocSize(size()) );
+			}
+			
+#ifdef BT_USE_PLACEMENT_NEW
+			new ( &m_data[m_size] ) T(_Val);
+#else
+			m_data[size()] = _Val;			
+#endif //BT_USE_PLACEMENT_NEW
+
+			m_size++;
+		}
+
+	
+		/// return the pre-allocated (reserved) elements, this is at least as large as the total number of elements,see size() and reserve()
+		SIMD_FORCE_INLINE	int capacity() const
+		{	
+			return m_capacity;
+		}
+		
+		SIMD_FORCE_INLINE	void reserve(int _Count)
+		{	// determine new minimum length of allocated storage
+			if (capacity() < _Count)
+			{	// not enough room, reallocate
+				T*	s = (T*)allocate(_Count);
+
+				copy(0, size(), s);
+
+				destroy(0,size());
+
+				deallocate();
+				
+				//PCK: added this line
+				m_ownsMemory = true;
+
+				m_data = s;
+				
+				m_capacity = _Count;
+
+			}
+		}
+
+
+		class less
+		{
+			public:
+
+				bool operator() ( const T& a, const T& b )
+				{
+					return ( a < b );
+				}
+		};
+	
+
+		template <typename L>
+		void quickSortInternal(const L& CompareFunc,int lo, int hi)
+		{
+		//  lo is the lower index, hi is the upper index
+		//  of the region of array a that is to be sorted
+			int i=lo, j=hi;
+			T x=m_data[(lo+hi)/2];
+
+			//  partition
+			do
+			{    
+				while (CompareFunc(m_data[i],x)) 
+					i++; 
+				while (CompareFunc(x,m_data[j])) 
+					j--;
+				if (i<=j)
+				{
+					swap(i,j);
+					i++; j--;
+				}
+			} while (i<=j);
+
+			//  recursion
+			if (lo<j) 
+				quickSortInternal( CompareFunc, lo, j);
+			if (i<hi) 
+				quickSortInternal( CompareFunc, i, hi);
+		}
+
+
+		template <typename L>
+		void quickSort(const L& CompareFunc)
+		{
+			//don't sort 0 or 1 elements
+			if (size()>1)
+			{
+				quickSortInternal(CompareFunc,0,size()-1);
+			}
+		}
+
+
+		///heap sort from http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Sort/Heap/
+		template <typename L>
+		void downHeap(T *pArr, int k, int n, const L& CompareFunc)
+		{
+			/*  PRE: a[k+1..N] is a heap */
+			/* POST:  a[k..N]  is a heap */
+			
+			T temp = pArr[k - 1];
+			/* k has child(s) */
+			while (k <= n/2) 
+			{
+				int child = 2*k;
+				
+				if ((child < n) && CompareFunc(pArr[child - 1] , pArr[child]))
+				{
+					child++;
+				}
+				/* pick larger child */
+				if (CompareFunc(temp , pArr[child - 1]))
+				{
+					/* move child up */
+					pArr[k - 1] = pArr[child - 1];
+					k = child;
+				}
+				else
+				{
+					break;
+				}
+			}
+			pArr[k - 1] = temp;
+		} /*downHeap*/
+
+		void	swap(int index0,int index1)
+		{
+#ifdef BT_USE_MEMCPY
+			char	temp[sizeof(T)];
+			memcpy(temp,&m_data[index0],sizeof(T));
+			memcpy(&m_data[index0],&m_data[index1],sizeof(T));
+			memcpy(&m_data[index1],temp,sizeof(T));
+#else
+			T temp = m_data[index0];
+			m_data[index0] = m_data[index1];
+			m_data[index1] = temp;
+#endif //BT_USE_PLACEMENT_NEW
+
+		}
+
+	template <typename L>
+	void heapSort(const L& CompareFunc)
+	{
+		/* sort a[0..N-1],  N.B. 0 to N-1 */
+		int k;
+		int n = m_size;
+		for (k = n/2; k > 0; k--) 
+		{
+			downHeap(m_data, k, n, CompareFunc);
+		}
+
+		/* a[1..N] is now a heap */
+		while ( n>=1 ) 
+		{
+			swap(0,n-1); /* largest of a[0..n-1] */
+
+
+			n = n - 1;
+			/* restore a[1..i-1] heap */
+			downHeap(m_data, 1, n, CompareFunc);
+		} 
+	}
+
+	///non-recursive binary search, assumes sorted array
+	int	findBinarySearch(const T& key) const
+	{
+		int first = 0;
+		int last = size()-1;
+
+		//assume sorted array
+		while (first <= last) {
+			int mid = (first + last) / 2;  // compute mid point.
+			if (key > m_data[mid]) 
+				first = mid + 1;  // repeat search in top half.
+			else if (key < m_data[mid]) 
+				last = mid - 1; // repeat search in bottom half.
+			else
+				return mid;     // found it. return position /////
+		}
+		return size();    // failed to find key
+	}
+
+
+	int	findLinearSearch(const T& key) const
+	{
+		int index=size();
+		int i;
+
+		for (i=0;i<size();i++)
+		{
+			if (m_data[i] == key)
+			{
+				index = i;
+				break;
+			}
+		}
+		return index;
+	}
+
+	void	remove(const T& key)
+	{
+
+		int findIndex = findLinearSearch(key);
+		if (findIndex<size())
+		{
+			swap( findIndex,size()-1);
+			pop_back();
+		}
+	}
+
+	//PCK: whole function
+	void initializeFromBuffer(void *buffer, int size, int capacity)
+	{
+		clear();
+		m_ownsMemory = false;
+		m_data = (T*)buffer;
+		m_size = size;
+		m_capacity = capacity;
+	}
+
+	void copyFromArray(const btAlignedObjectArray& otherArray)
+	{
+		int otherSize = otherArray.size();
+		resize (otherSize);
+		otherArray.copy(0, otherSize, m_data);
+	}
+
+};
+
+#endif //BT_OBJECT_ARRAY__
diff --git a/src/bullet/LinearMath/btConvexHull.cpp b/src/bullet/LinearMath/btConvexHull.cpp
new file mode 100644
index 00000000..532d76d8
--- /dev/null
+++ b/src/bullet/LinearMath/btConvexHull.cpp
@@ -0,0 +1,1174 @@
+/*
+Stan Melax Convex Hull Computation
+Copyright (c) 2003-2006 Stan Melax http://www.melax.com/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <string.h>
+
+#include "btConvexHull.h"
+#include "btAlignedObjectArray.h"
+#include "btMinMax.h"
+#include "btVector3.h"
+
+
+
+template <class T>
+void Swap(T &a,T &b)
+{
+	T tmp = a;
+	a=b;
+	b=tmp;
+}
+
+
+//----------------------------------
+
+class int3  
+{
+public:
+	int x,y,z;
+	int3(){};
+	int3(int _x,int _y, int _z){x=_x;y=_y;z=_z;}
+	const int& operator[](int i) const {return (&x)[i];}
+	int& operator[](int i) {return (&x)[i];}
+};
+
+
+//------- btPlane ----------
+
+
+inline btPlane PlaneFlip(const btPlane &plane){return btPlane(-plane.normal,-plane.dist);}
+inline int operator==( const btPlane &a, const btPlane &b ) { return (a.normal==b.normal && a.dist==b.dist); }
+inline int coplanar( const btPlane &a, const btPlane &b ) { return (a==b || a==PlaneFlip(b)); }
+
+
+//--------- Utility Functions ------
+
+btVector3  PlaneLineIntersection(const btPlane &plane, const btVector3 &p0, const btVector3 &p1);
+btVector3  PlaneProject(const btPlane &plane, const btVector3 &point);
+
+btVector3  ThreePlaneIntersection(const btPlane &p0,const btPlane &p1, const btPlane &p2);
+btVector3  ThreePlaneIntersection(const btPlane &p0,const btPlane &p1, const btPlane &p2)
+{
+	btVector3 N1 = p0.normal;
+	btVector3 N2 = p1.normal;
+	btVector3 N3 = p2.normal;
+
+	btVector3 n2n3; n2n3 = N2.cross(N3);
+	btVector3 n3n1; n3n1 = N3.cross(N1);
+	btVector3 n1n2; n1n2 = N1.cross(N2);
+
+	btScalar quotient = (N1.dot(n2n3));
+
+	btAssert(btFabs(quotient) > btScalar(0.000001));
+	
+	quotient = btScalar(-1.) / quotient;
+	n2n3 *= p0.dist;
+	n3n1 *= p1.dist;
+	n1n2 *= p2.dist;
+	btVector3 potentialVertex = n2n3;
+	potentialVertex += n3n1;
+	potentialVertex += n1n2;
+	potentialVertex *= quotient;
+
+	btVector3 result(potentialVertex.getX(),potentialVertex.getY(),potentialVertex.getZ());
+	return result;
+
+}
+
+btScalar   DistanceBetweenLines(const btVector3 &ustart, const btVector3 &udir, const btVector3 &vstart, const btVector3 &vdir, btVector3 *upoint=NULL, btVector3 *vpoint=NULL);
+btVector3  TriNormal(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2);
+btVector3  NormalOf(const btVector3 *vert, const int n);
+
+
+btVector3 PlaneLineIntersection(const btPlane &plane, const btVector3 &p0, const btVector3 &p1)
+{
+	// returns the point where the line p0-p1 intersects the plane n&d
+				static btVector3 dif;
+		dif = p1-p0;
+				btScalar dn= btDot(plane.normal,dif);
+				btScalar t = -(plane.dist+btDot(plane.normal,p0) )/dn;
+				return p0 + (dif*t);
+}
+
+btVector3 PlaneProject(const btPlane &plane, const btVector3 &point)
+{
+	return point - plane.normal * (btDot(point,plane.normal)+plane.dist);
+}
+
+btVector3 TriNormal(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2)
+{
+	// return the normal of the triangle
+	// inscribed by v0, v1, and v2
+	btVector3 cp=btCross(v1-v0,v2-v1);
+	btScalar m=cp.length();
+	if(m==0) return btVector3(1,0,0);
+	return cp*(btScalar(1.0)/m);
+}
+
+
+btScalar DistanceBetweenLines(const btVector3 &ustart, const btVector3 &udir, const btVector3 &vstart, const btVector3 &vdir, btVector3 *upoint, btVector3 *vpoint)
+{
+	static btVector3 cp;
+	cp = btCross(udir,vdir).normalized();
+
+	btScalar distu = -btDot(cp,ustart);
+	btScalar distv = -btDot(cp,vstart);
+	btScalar dist = (btScalar)fabs(distu-distv);
+	if(upoint) 
+		{
+		btPlane plane;
+		plane.normal = btCross(vdir,cp).normalized();
+		plane.dist = -btDot(plane.normal,vstart);
+		*upoint = PlaneLineIntersection(plane,ustart,ustart+udir);
+	}
+	if(vpoint) 
+		{
+		btPlane plane;
+		plane.normal = btCross(udir,cp).normalized();
+		plane.dist = -btDot(plane.normal,ustart);
+		*vpoint = PlaneLineIntersection(plane,vstart,vstart+vdir);
+	}
+	return dist;
+}
+
+
+
+
+
+
+
+#define COPLANAR   (0)
+#define UNDER      (1)
+#define OVER       (2)
+#define SPLIT      (OVER|UNDER)
+#define PAPERWIDTH (btScalar(0.001))
+
+btScalar planetestepsilon = PAPERWIDTH;
+
+
+
+typedef ConvexH::HalfEdge HalfEdge;
+
+ConvexH::ConvexH(int vertices_size,int edges_size,int facets_size)
+{
+	vertices.resize(vertices_size);
+	edges.resize(edges_size);
+	facets.resize(facets_size);
+}
+
+
+int PlaneTest(const btPlane &p, const btVector3 &v);
+int PlaneTest(const btPlane &p, const btVector3 &v) {
+	btScalar a  = btDot(v,p.normal)+p.dist;
+	int   flag = (a>planetestepsilon)?OVER:((a<-planetestepsilon)?UNDER:COPLANAR);
+	return flag;
+}
+
+int SplitTest(ConvexH &convex,const btPlane &plane);
+int SplitTest(ConvexH &convex,const btPlane &plane) {
+	int flag=0;
+	for(int i=0;i<convex.vertices.size();i++) {
+		flag |= PlaneTest(plane,convex.vertices[i]);
+	}
+	return flag;
+}
+
+class VertFlag
+{
+public:
+	unsigned char planetest;
+	unsigned char junk;
+	unsigned char undermap;
+	unsigned char overmap;
+};
+class EdgeFlag 
+{
+public:
+	unsigned char planetest;
+	unsigned char fixes;
+	short undermap;
+	short overmap;
+};
+class PlaneFlag
+{
+public:
+	unsigned char undermap;
+	unsigned char overmap;
+};
+class Coplanar{
+public:
+	unsigned short ea;
+	unsigned char v0;
+	unsigned char v1;
+};
+
+
+
+
+
+
+
+
+template<class T>
+int maxdirfiltered(const T *p,int count,const T &dir,btAlignedObjectArray<int> &allow)
+{
+	btAssert(count);
+	int m=-1;
+	for(int i=0;i<count;i++) 
+		if(allow[i])
+		{
+			if(m==-1 || btDot(p[i],dir)>btDot(p[m],dir))
+				m=i;
+		}
+	btAssert(m!=-1);
+	return m;
+} 
+
+btVector3 orth(const btVector3 &v);
+btVector3 orth(const btVector3 &v)
+{
+	btVector3 a=btCross(v,btVector3(0,0,1));
+	btVector3 b=btCross(v,btVector3(0,1,0));
+	if (a.length() > b.length())
+	{
+		return a.normalized();
+	} else {
+		return b.normalized();
+	}
+}
+
+
+template<class T>
+int maxdirsterid(const T *p,int count,const T &dir,btAlignedObjectArray<int> &allow)
+{
+	int m=-1;
+	while(m==-1)
+	{
+		m = maxdirfiltered(p,count,dir,allow);
+		if(allow[m]==3) return m;
+		T u = orth(dir);
+		T v = btCross(u,dir);
+		int ma=-1;
+		for(btScalar x = btScalar(0.0) ; x<= btScalar(360.0) ; x+= btScalar(45.0))
+		{
+			btScalar s = btSin(SIMD_RADS_PER_DEG*(x));
+			btScalar c = btCos(SIMD_RADS_PER_DEG*(x));
+			int mb = maxdirfiltered(p,count,dir+(u*s+v*c)*btScalar(0.025),allow);
+			if(ma==m && mb==m)
+			{
+				allow[m]=3;
+				return m;
+			}
+			if(ma!=-1 && ma!=mb)  // Yuck - this is really ugly
+			{
+				int mc = ma;
+				for(btScalar xx = x-btScalar(40.0) ; xx <= x ; xx+= btScalar(5.0))
+				{
+					btScalar s = btSin(SIMD_RADS_PER_DEG*(xx));
+					btScalar c = btCos(SIMD_RADS_PER_DEG*(xx));
+					int md = maxdirfiltered(p,count,dir+(u*s+v*c)*btScalar(0.025),allow);
+					if(mc==m && md==m)
+					{
+						allow[m]=3;
+						return m;
+					}
+					mc=md;
+				}
+			}
+			ma=mb;
+		}
+		allow[m]=0;
+		m=-1;
+	}
+	btAssert(0);
+	return m;
+} 
+
+
+
+
+int operator ==(const int3 &a,const int3 &b);
+int operator ==(const int3 &a,const int3 &b) 
+{
+	for(int i=0;i<3;i++) 
+	{
+		if(a[i]!=b[i]) return 0;
+	}
+	return 1;
+}
+
+
+int above(btVector3* vertices,const int3& t, const btVector3 &p, btScalar epsilon);
+int above(btVector3* vertices,const int3& t, const btVector3 &p, btScalar epsilon) 
+{
+	btVector3 n=TriNormal(vertices[t[0]],vertices[t[1]],vertices[t[2]]);
+	return (btDot(n,p-vertices[t[0]]) > epsilon); // EPSILON???
+}
+int hasedge(const int3 &t, int a,int b);
+int hasedge(const int3 &t, int a,int b)
+{
+	for(int i=0;i<3;i++)
+	{
+		int i1= (i+1)%3;
+		if(t[i]==a && t[i1]==b) return 1;
+	}
+	return 0;
+}
+int hasvert(const int3 &t, int v);
+int hasvert(const int3 &t, int v)
+{
+	return (t[0]==v || t[1]==v || t[2]==v) ;
+}
+int shareedge(const int3 &a,const int3 &b);
+int shareedge(const int3 &a,const int3 &b)
+{
+	int i;
+	for(i=0;i<3;i++)
+	{
+		int i1= (i+1)%3;
+		if(hasedge(a,b[i1],b[i])) return 1;
+	}
+	return 0;
+}
+
+class btHullTriangle;
+
+
+
+class btHullTriangle : public int3
+{
+public:
+	int3 n;
+	int id;
+	int vmax;
+	btScalar rise;
+	btHullTriangle(int a,int b,int c):int3(a,b,c),n(-1,-1,-1)
+	{
+		vmax=-1;
+		rise = btScalar(0.0);
+	}
+	~btHullTriangle()
+	{
+	}
+	int &neib(int a,int b);
+};
+
+
+int &btHullTriangle::neib(int a,int b)
+{
+	static int er=-1;
+	int i;
+	for(i=0;i<3;i++) 
+	{
+		int i1=(i+1)%3;
+		int i2=(i+2)%3;
+		if((*this)[i]==a && (*this)[i1]==b) return n[i2];
+		if((*this)[i]==b && (*this)[i1]==a) return n[i2];
+	}
+	btAssert(0);
+	return er;
+}
+void HullLibrary::b2bfix(btHullTriangle* s,btHullTriangle*t)
+{
+	int i;
+	for(i=0;i<3;i++) 
+	{
+		int i1=(i+1)%3;
+		int i2=(i+2)%3;
+		int a = (*s)[i1];
+		int b = (*s)[i2];
+		btAssert(m_tris[s->neib(a,b)]->neib(b,a) == s->id);
+		btAssert(m_tris[t->neib(a,b)]->neib(b,a) == t->id);
+		m_tris[s->neib(a,b)]->neib(b,a) = t->neib(b,a);
+		m_tris[t->neib(b,a)]->neib(a,b) = s->neib(a,b);
+	}
+}
+
+void HullLibrary::removeb2b(btHullTriangle* s,btHullTriangle*t)
+{
+	b2bfix(s,t);
+	deAllocateTriangle(s);
+
+	deAllocateTriangle(t);
+}
+
+void HullLibrary::checkit(btHullTriangle *t)
+{
+	(void)t;
+
+	int i;
+	btAssert(m_tris[t->id]==t);
+	for(i=0;i<3;i++)
+	{
+		int i1=(i+1)%3;
+		int i2=(i+2)%3;
+		int a = (*t)[i1];
+		int b = (*t)[i2];
+
+		// release compile fix
+		(void)i1;
+		(void)i2;
+		(void)a;
+		(void)b;
+
+		btAssert(a!=b);
+		btAssert( m_tris[t->n[i]]->neib(b,a) == t->id);
+	}
+}
+
+btHullTriangle*	HullLibrary::allocateTriangle(int a,int b,int c)
+{
+	void* mem = btAlignedAlloc(sizeof(btHullTriangle),16);
+	btHullTriangle* tr = new (mem)btHullTriangle(a,b,c);
+	tr->id = m_tris.size();
+	m_tris.push_back(tr);
+
+	return tr;
+}
+
+void	HullLibrary::deAllocateTriangle(btHullTriangle* tri)
+{
+	btAssert(m_tris[tri->id]==tri);
+	m_tris[tri->id]=NULL;
+	tri->~btHullTriangle();
+	btAlignedFree(tri);
+}
+
+
+void HullLibrary::extrude(btHullTriangle *t0,int v)
+{
+	int3 t= *t0;
+	int n = m_tris.size();
+	btHullTriangle* ta = allocateTriangle(v,t[1],t[2]);
+	ta->n = int3(t0->n[0],n+1,n+2);
+	m_tris[t0->n[0]]->neib(t[1],t[2]) = n+0;
+	btHullTriangle* tb = allocateTriangle(v,t[2],t[0]);
+	tb->n = int3(t0->n[1],n+2,n+0);
+	m_tris[t0->n[1]]->neib(t[2],t[0]) = n+1;
+	btHullTriangle* tc = allocateTriangle(v,t[0],t[1]);
+	tc->n = int3(t0->n[2],n+0,n+1);
+	m_tris[t0->n[2]]->neib(t[0],t[1]) = n+2;
+	checkit(ta);
+	checkit(tb);
+	checkit(tc);
+	if(hasvert(*m_tris[ta->n[0]],v)) removeb2b(ta,m_tris[ta->n[0]]);
+	if(hasvert(*m_tris[tb->n[0]],v)) removeb2b(tb,m_tris[tb->n[0]]);
+	if(hasvert(*m_tris[tc->n[0]],v)) removeb2b(tc,m_tris[tc->n[0]]);
+	deAllocateTriangle(t0);
+
+}
+
+btHullTriangle* HullLibrary::extrudable(btScalar epsilon)
+{
+	int i;
+	btHullTriangle *t=NULL;
+	for(i=0;i<m_tris.size();i++)
+	{
+		if(!t || (m_tris[i] && t->rise<m_tris[i]->rise))
+		{
+			t = m_tris[i];
+		}
+	}
+	return (t->rise >epsilon)?t:NULL ;
+}
+
+
+
+
+int4 HullLibrary::FindSimplex(btVector3 *verts,int verts_count,btAlignedObjectArray<int> &allow)
+{
+	btVector3 basis[3];
+	basis[0] = btVector3( btScalar(0.01), btScalar(0.02), btScalar(1.0) );      
+	int p0 = maxdirsterid(verts,verts_count, basis[0],allow);   
+	int	p1 = maxdirsterid(verts,verts_count,-basis[0],allow);
+	basis[0] = verts[p0]-verts[p1];
+	if(p0==p1 || basis[0]==btVector3(0,0,0)) 
+		return int4(-1,-1,-1,-1);
+	basis[1] = btCross(btVector3(     btScalar(1),btScalar(0.02), btScalar(0)),basis[0]);
+	basis[2] = btCross(btVector3(btScalar(-0.02),     btScalar(1), btScalar(0)),basis[0]);
+	if (basis[1].length() > basis[2].length())
+	{
+		basis[1].normalize();
+	} else {
+		basis[1] = basis[2];
+		basis[1].normalize ();
+	}
+	int p2 = maxdirsterid(verts,verts_count,basis[1],allow);
+	if(p2 == p0 || p2 == p1)
+	{
+		p2 = maxdirsterid(verts,verts_count,-basis[1],allow);
+	}
+	if(p2 == p0 || p2 == p1) 
+		return int4(-1,-1,-1,-1);
+	basis[1] = verts[p2] - verts[p0];
+	basis[2] = btCross(basis[1],basis[0]).normalized();
+	int p3 = maxdirsterid(verts,verts_count,basis[2],allow);
+	if(p3==p0||p3==p1||p3==p2) p3 = maxdirsterid(verts,verts_count,-basis[2],allow);
+	if(p3==p0||p3==p1||p3==p2) 
+		return int4(-1,-1,-1,-1);
+	btAssert(!(p0==p1||p0==p2||p0==p3||p1==p2||p1==p3||p2==p3));
+	if(btDot(verts[p3]-verts[p0],btCross(verts[p1]-verts[p0],verts[p2]-verts[p0])) <0) {Swap(p2,p3);}
+	return int4(p0,p1,p2,p3);
+}
+
+int HullLibrary::calchullgen(btVector3 *verts,int verts_count, int vlimit)
+{
+	if(verts_count <4) return 0;
+	if(vlimit==0) vlimit=1000000000;
+	int j;
+	btVector3 bmin(*verts),bmax(*verts);
+	btAlignedObjectArray<int> isextreme;
+	isextreme.reserve(verts_count);
+	btAlignedObjectArray<int> allow;
+	allow.reserve(verts_count);
+
+	for(j=0;j<verts_count;j++) 
+	{
+		allow.push_back(1);
+		isextreme.push_back(0);
+		bmin.setMin (verts[j]);
+		bmax.setMax (verts[j]);
+	}
+	btScalar epsilon = (bmax-bmin).length() * btScalar(0.001);
+	btAssert (epsilon != 0.0);
+
+
+	int4 p = FindSimplex(verts,verts_count,allow);
+	if(p.x==-1) return 0; // simplex failed
+
+
+
+	btVector3 center = (verts[p[0]]+verts[p[1]]+verts[p[2]]+verts[p[3]]) / btScalar(4.0);  // a valid interior point
+	btHullTriangle *t0 = allocateTriangle(p[2],p[3],p[1]); t0->n=int3(2,3,1);
+	btHullTriangle *t1 = allocateTriangle(p[3],p[2],p[0]); t1->n=int3(3,2,0);
+	btHullTriangle *t2 = allocateTriangle(p[0],p[1],p[3]); t2->n=int3(0,1,3);
+	btHullTriangle *t3 = allocateTriangle(p[1],p[0],p[2]); t3->n=int3(1,0,2);
+	isextreme[p[0]]=isextreme[p[1]]=isextreme[p[2]]=isextreme[p[3]]=1;
+	checkit(t0);checkit(t1);checkit(t2);checkit(t3);
+
+	for(j=0;j<m_tris.size();j++)
+	{
+		btHullTriangle *t=m_tris[j];
+		btAssert(t);
+		btAssert(t->vmax<0);
+		btVector3 n=TriNormal(verts[(*t)[0]],verts[(*t)[1]],verts[(*t)[2]]);
+		t->vmax = maxdirsterid(verts,verts_count,n,allow);
+		t->rise = btDot(n,verts[t->vmax]-verts[(*t)[0]]);
+	}
+	btHullTriangle *te;
+	vlimit-=4;
+	while(vlimit >0 && ((te=extrudable(epsilon)) != 0))
+	{
+		int3 ti=*te;
+		int v=te->vmax;
+		btAssert(v != -1);
+		btAssert(!isextreme[v]);  // wtf we've already done this vertex
+		isextreme[v]=1;
+		//if(v==p0 || v==p1 || v==p2 || v==p3) continue; // done these already
+		j=m_tris.size();
+		while(j--) {
+			if(!m_tris[j]) continue;
+			int3 t=*m_tris[j];
+			if(above(verts,t,verts[v],btScalar(0.01)*epsilon)) 
+			{
+				extrude(m_tris[j],v);
+			}
+		}
+		// now check for those degenerate cases where we have a flipped triangle or a really skinny triangle
+		j=m_tris.size();
+		while(j--)
+		{
+			if(!m_tris[j]) continue;
+			if(!hasvert(*m_tris[j],v)) break;
+			int3 nt=*m_tris[j];
+			if(above(verts,nt,center,btScalar(0.01)*epsilon)  || btCross(verts[nt[1]]-verts[nt[0]],verts[nt[2]]-verts[nt[1]]).length()< epsilon*epsilon*btScalar(0.1) )
+			{
+				btHullTriangle *nb = m_tris[m_tris[j]->n[0]];
+				btAssert(nb);btAssert(!hasvert(*nb,v));btAssert(nb->id<j);
+				extrude(nb,v);
+				j=m_tris.size(); 
+			}
+		} 
+		j=m_tris.size();
+		while(j--)
+		{
+			btHullTriangle *t=m_tris[j];
+			if(!t) continue;
+			if(t->vmax>=0) break;
+			btVector3 n=TriNormal(verts[(*t)[0]],verts[(*t)[1]],verts[(*t)[2]]);
+			t->vmax = maxdirsterid(verts,verts_count,n,allow);
+			if(isextreme[t->vmax]) 
+			{
+				t->vmax=-1; // already done that vertex - algorithm needs to be able to terminate.
+			}
+			else
+			{
+				t->rise = btDot(n,verts[t->vmax]-verts[(*t)[0]]);
+			}
+		}
+		vlimit --;
+	}
+	return 1;
+}
+
+int HullLibrary::calchull(btVector3 *verts,int verts_count, TUIntArray& tris_out, int &tris_count,int vlimit) 
+{
+	int rc=calchullgen(verts,verts_count,  vlimit) ;
+	if(!rc) return 0;
+	btAlignedObjectArray<int> ts;
+	int i;
+
+	for(i=0;i<m_tris.size();i++)
+	{
+		if(m_tris[i])
+		{
+			for(int j=0;j<3;j++)
+				ts.push_back((*m_tris[i])[j]);
+			deAllocateTriangle(m_tris[i]);
+		}
+	}
+	tris_count = ts.size()/3;
+	tris_out.resize(ts.size());
+	
+	for (i=0;i<ts.size();i++)
+	{
+		tris_out[i] = static_cast<unsigned int>(ts[i]);
+	}
+	m_tris.resize(0);
+
+	return 1;
+}
+
+
+
+
+
+bool HullLibrary::ComputeHull(unsigned int vcount,const btVector3 *vertices,PHullResult &result,unsigned int vlimit)
+{
+	
+	int    tris_count;
+	int ret = calchull( (btVector3 *) vertices, (int) vcount, result.m_Indices, tris_count, static_cast<int>(vlimit) );
+	if(!ret) return false;
+	result.mIndexCount = (unsigned int) (tris_count*3);
+	result.mFaceCount  = (unsigned int) tris_count;
+	result.mVertices   = (btVector3*) vertices;
+	result.mVcount     = (unsigned int) vcount;
+	return true;
+
+}
+
+
+void ReleaseHull(PHullResult &result);
+void ReleaseHull(PHullResult &result)
+{
+	if ( result.m_Indices.size() )
+	{
+		result.m_Indices.clear();
+	}
+
+	result.mVcount = 0;
+	result.mIndexCount = 0;
+	result.mVertices = 0;
+}
+
+
+//*********************************************************************
+//*********************************************************************
+//********  HullLib header
+//*********************************************************************
+//*********************************************************************
+
+//*********************************************************************
+//*********************************************************************
+//********  HullLib implementation
+//*********************************************************************
+//*********************************************************************
+
+HullError HullLibrary::CreateConvexHull(const HullDesc       &desc,           // describes the input request
+																					HullResult           &result)         // contains the resulst
+{
+	HullError ret = QE_FAIL;
+
+
+	PHullResult hr;
+
+	unsigned int vcount = desc.mVcount;
+	if ( vcount < 8 ) vcount = 8;
+
+	btAlignedObjectArray<btVector3> vertexSource;
+	vertexSource.resize(static_cast<int>(vcount));
+
+	btVector3 scale;
+
+	unsigned int ovcount;
+
+	bool ok = CleanupVertices(desc.mVcount,desc.mVertices, desc.mVertexStride, ovcount, &vertexSource[0], desc.mNormalEpsilon, scale ); // normalize point cloud, remove duplicates!
+
+	if ( ok )
+	{
+
+
+//		if ( 1 ) // scale vertices back to their original size.
+		{
+			for (unsigned int i=0; i<ovcount; i++)
+			{
+				btVector3& v = vertexSource[static_cast<int>(i)];
+				v[0]*=scale[0];
+				v[1]*=scale[1];
+				v[2]*=scale[2];
+			}
+		}
+
+		ok = ComputeHull(ovcount,&vertexSource[0],hr,desc.mMaxVertices);
+
+		if ( ok )
+		{
+
+			// re-index triangle mesh so it refers to only used vertices, rebuild a new vertex table.
+			btAlignedObjectArray<btVector3>	vertexScratch;
+			vertexScratch.resize(static_cast<int>(hr.mVcount));
+
+			BringOutYourDead(hr.mVertices,hr.mVcount, &vertexScratch[0], ovcount, &hr.m_Indices[0], hr.mIndexCount );
+
+			ret = QE_OK;
+
+			if ( desc.HasHullFlag(QF_TRIANGLES) ) // if he wants the results as triangle!
+			{
+				result.mPolygons          = false;
+				result.mNumOutputVertices = ovcount;
+				result.m_OutputVertices.resize(static_cast<int>(ovcount));
+				result.mNumFaces          = hr.mFaceCount;
+				result.mNumIndices        = hr.mIndexCount;
+
+				result.m_Indices.resize(static_cast<int>(hr.mIndexCount));
+
+				memcpy(&result.m_OutputVertices[0], &vertexScratch[0], sizeof(btVector3)*ovcount );
+
+  			if ( desc.HasHullFlag(QF_REVERSE_ORDER) )
+				{
+
+					const unsigned int *source = &hr.m_Indices[0];
+					unsigned int *dest   = &result.m_Indices[0];
+
+					for (unsigned int i=0; i<hr.mFaceCount; i++)
+					{
+						dest[0] = source[2];
+						dest[1] = source[1];
+						dest[2] = source[0];
+						dest+=3;
+						source+=3;
+					}
+
+				}
+				else
+				{
+					memcpy(&result.m_Indices[0], &hr.m_Indices[0], sizeof(unsigned int)*hr.mIndexCount);
+				}
+			}
+			else
+			{
+				result.mPolygons          = true;
+				result.mNumOutputVertices = ovcount;
+				result.m_OutputVertices.resize(static_cast<int>(ovcount));
+				result.mNumFaces          = hr.mFaceCount;
+				result.mNumIndices        = hr.mIndexCount+hr.mFaceCount;
+				result.m_Indices.resize(static_cast<int>(result.mNumIndices));
+				memcpy(&result.m_OutputVertices[0], &vertexScratch[0], sizeof(btVector3)*ovcount );
+
+//				if ( 1 )
+				{
+					const unsigned int *source = &hr.m_Indices[0];
+					unsigned int *dest   = &result.m_Indices[0];
+					for (unsigned int i=0; i<hr.mFaceCount; i++)
+					{
+						dest[0] = 3;
+						if ( desc.HasHullFlag(QF_REVERSE_ORDER) )
+						{
+							dest[1] = source[2];
+							dest[2] = source[1];
+							dest[3] = source[0];
+						}
+						else
+						{
+							dest[1] = source[0];
+							dest[2] = source[1];
+							dest[3] = source[2];
+						}
+
+						dest+=4;
+						source+=3;
+					}
+				}
+			}
+			ReleaseHull(hr);
+		}
+	}
+
+	return ret;
+}
+
+
+
+HullError HullLibrary::ReleaseResult(HullResult &result) // release memory allocated for this result, we are done with it.
+{
+	if ( result.m_OutputVertices.size())
+	{
+		result.mNumOutputVertices=0;
+		result.m_OutputVertices.clear();
+	}
+	if ( result.m_Indices.size() )
+	{
+		result.mNumIndices=0;
+		result.m_Indices.clear();
+	}
+	return QE_OK;
+}
+
+
+static void addPoint(unsigned int &vcount,btVector3 *p,btScalar x,btScalar y,btScalar z)
+{
+	// XXX, might be broken
+	btVector3& dest = p[vcount];
+	dest[0] = x;
+	dest[1] = y;
+	dest[2] = z;
+	vcount++;
+}
+
+btScalar GetDist(btScalar px,btScalar py,btScalar pz,const btScalar *p2);
+btScalar GetDist(btScalar px,btScalar py,btScalar pz,const btScalar *p2)
+{
+
+	btScalar dx = px - p2[0];
+	btScalar dy = py - p2[1];
+	btScalar dz = pz - p2[2];
+
+	return dx*dx+dy*dy+dz*dz;
+}
+
+
+
+bool  HullLibrary::CleanupVertices(unsigned int svcount,
+				   const btVector3 *svertices,
+				   unsigned int stride,
+				   unsigned int &vcount,       // output number of vertices
+				   btVector3 *vertices,                 // location to store the results.
+				   btScalar  normalepsilon,
+				   btVector3& scale)
+{
+	if ( svcount == 0 ) return false;
+
+	m_vertexIndexMapping.resize(0);
+
+
+#define EPSILON btScalar(0.000001) /* close enough to consider two btScalaring point numbers to be 'the same'. */
+
+	vcount = 0;
+
+	btScalar recip[3]={0.f,0.f,0.f};
+
+	if ( scale )
+	{
+		scale[0] = 1;
+		scale[1] = 1;
+		scale[2] = 1;
+	}
+
+	btScalar bmin[3] = {  FLT_MAX,  FLT_MAX,  FLT_MAX };
+	btScalar bmax[3] = { -FLT_MAX, -FLT_MAX, -FLT_MAX };
+
+	const char *vtx = (const char *) svertices;
+
+//	if ( 1 )
+	{
+		for (unsigned int i=0; i<svcount; i++)
+		{
+			const btScalar *p = (const btScalar *) vtx;
+
+			vtx+=stride;
+
+			for (int j=0; j<3; j++)
+			{
+				if ( p[j] < bmin[j] ) bmin[j] = p[j];
+				if ( p[j] > bmax[j] ) bmax[j] = p[j];
+			}
+		}
+	}
+
+	btScalar dx = bmax[0] - bmin[0];
+	btScalar dy = bmax[1] - bmin[1];
+	btScalar dz = bmax[2] - bmin[2];
+
+	btVector3 center;
+
+	center[0] = dx*btScalar(0.5) + bmin[0];
+	center[1] = dy*btScalar(0.5) + bmin[1];
+	center[2] = dz*btScalar(0.5) + bmin[2];
+
+	if ( dx < EPSILON || dy < EPSILON || dz < EPSILON || svcount < 3 )
+	{
+
+		btScalar len = FLT_MAX;
+
+		if ( dx > EPSILON && dx < len ) len = dx;
+		if ( dy > EPSILON && dy < len ) len = dy;
+		if ( dz > EPSILON && dz < len ) len = dz;
+
+		if ( len == FLT_MAX )
+		{
+			dx = dy = dz = btScalar(0.01); // one centimeter
+		}
+		else
+		{
+			if ( dx < EPSILON ) dx = len * btScalar(0.05); // 1/5th the shortest non-zero edge.
+			if ( dy < EPSILON ) dy = len * btScalar(0.05);
+			if ( dz < EPSILON ) dz = len * btScalar(0.05);
+		}
+
+		btScalar x1 = center[0] - dx;
+		btScalar x2 = center[0] + dx;
+
+		btScalar y1 = center[1] - dy;
+		btScalar y2 = center[1] + dy;
+
+		btScalar z1 = center[2] - dz;
+		btScalar z2 = center[2] + dz;
+
+		addPoint(vcount,vertices,x1,y1,z1);
+		addPoint(vcount,vertices,x2,y1,z1);
+		addPoint(vcount,vertices,x2,y2,z1);
+		addPoint(vcount,vertices,x1,y2,z1);
+		addPoint(vcount,vertices,x1,y1,z2);
+		addPoint(vcount,vertices,x2,y1,z2);
+		addPoint(vcount,vertices,x2,y2,z2);
+		addPoint(vcount,vertices,x1,y2,z2);
+
+		return true; // return cube
+
+
+	}
+	else
+	{
+		if ( scale )
+		{
+			scale[0] = dx;
+			scale[1] = dy;
+			scale[2] = dz;
+
+			recip[0] = 1 / dx;
+			recip[1] = 1 / dy;
+			recip[2] = 1 / dz;
+
+			center[0]*=recip[0];
+			center[1]*=recip[1];
+			center[2]*=recip[2];
+
+		}
+
+	}
+
+
+
+	vtx = (const char *) svertices;
+
+	for (unsigned int i=0; i<svcount; i++)
+	{
+		const btVector3 *p = (const btVector3 *)vtx;
+		vtx+=stride;
+
+		btScalar px = p->getX();
+		btScalar py = p->getY();
+		btScalar pz = p->getZ();
+
+		if ( scale )
+		{
+			px = px*recip[0]; // normalize
+			py = py*recip[1]; // normalize
+			pz = pz*recip[2]; // normalize
+		}
+
+//		if ( 1 )
+		{
+			unsigned int j;
+
+			for (j=0; j<vcount; j++)
+			{
+				/// XXX might be broken
+				btVector3& v = vertices[j];
+
+				btScalar x = v[0];
+				btScalar y = v[1];
+				btScalar z = v[2];
+
+				btScalar dx = btFabs(x - px );
+				btScalar dy = btFabs(y - py );
+				btScalar dz = btFabs(z - pz );
+
+				if ( dx < normalepsilon && dy < normalepsilon && dz < normalepsilon )
+				{
+					// ok, it is close enough to the old one
+					// now let us see if it is further from the center of the point cloud than the one we already recorded.
+					// in which case we keep this one instead.
+
+					btScalar dist1 = GetDist(px,py,pz,center);
+					btScalar dist2 = GetDist(v[0],v[1],v[2],center);
+
+					if ( dist1 > dist2 )
+					{
+						v[0] = px;
+						v[1] = py;
+						v[2] = pz;
+						
+					}
+
+					break;
+				}
+			}
+
+			if ( j == vcount )
+			{
+				btVector3& dest = vertices[vcount];
+				dest[0] = px;
+				dest[1] = py;
+				dest[2] = pz;
+				vcount++;
+			}
+			m_vertexIndexMapping.push_back(j);
+		}
+	}
+
+	// ok..now make sure we didn't prune so many vertices it is now invalid.
+//	if ( 1 )
+	{
+		btScalar bmin[3] = {  FLT_MAX,  FLT_MAX,  FLT_MAX };
+		btScalar bmax[3] = { -FLT_MAX, -FLT_MAX, -FLT_MAX };
+
+		for (unsigned int i=0; i<vcount; i++)
+		{
+			const btVector3& p = vertices[i];
+			for (int j=0; j<3; j++)
+			{
+				if ( p[j] < bmin[j] ) bmin[j] = p[j];
+				if ( p[j] > bmax[j] ) bmax[j] = p[j];
+			}
+		}
+
+		btScalar dx = bmax[0] - bmin[0];
+		btScalar dy = bmax[1] - bmin[1];
+		btScalar dz = bmax[2] - bmin[2];
+
+		if ( dx < EPSILON || dy < EPSILON || dz < EPSILON || vcount < 3)
+		{
+			btScalar cx = dx*btScalar(0.5) + bmin[0];
+			btScalar cy = dy*btScalar(0.5) + bmin[1];
+			btScalar cz = dz*btScalar(0.5) + bmin[2];
+
+			btScalar len = FLT_MAX;
+
+			if ( dx >= EPSILON && dx < len ) len = dx;
+			if ( dy >= EPSILON && dy < len ) len = dy;
+			if ( dz >= EPSILON && dz < len ) len = dz;
+
+			if ( len == FLT_MAX )
+			{
+				dx = dy = dz = btScalar(0.01); // one centimeter
+			}
+			else
+			{
+				if ( dx < EPSILON ) dx = len * btScalar(0.05); // 1/5th the shortest non-zero edge.
+				if ( dy < EPSILON ) dy = len * btScalar(0.05);
+				if ( dz < EPSILON ) dz = len * btScalar(0.05);
+			}
+
+			btScalar x1 = cx - dx;
+			btScalar x2 = cx + dx;
+
+			btScalar y1 = cy - dy;
+			btScalar y2 = cy + dy;
+
+			btScalar z1 = cz - dz;
+			btScalar z2 = cz + dz;
+
+			vcount = 0; // add box
+
+			addPoint(vcount,vertices,x1,y1,z1);
+			addPoint(vcount,vertices,x2,y1,z1);
+			addPoint(vcount,vertices,x2,y2,z1);
+			addPoint(vcount,vertices,x1,y2,z1);
+			addPoint(vcount,vertices,x1,y1,z2);
+			addPoint(vcount,vertices,x2,y1,z2);
+			addPoint(vcount,vertices,x2,y2,z2);
+			addPoint(vcount,vertices,x1,y2,z2);
+
+			return true;
+		}
+	}
+
+	return true;
+}
+
+void HullLibrary::BringOutYourDead(const btVector3* verts,unsigned int vcount, btVector3* overts,unsigned int &ocount,unsigned int *indices,unsigned indexcount)
+{
+	btAlignedObjectArray<int>tmpIndices;
+	tmpIndices.resize(m_vertexIndexMapping.size());
+	int i;
+
+	for (i=0;i<m_vertexIndexMapping.size();i++)
+	{
+		tmpIndices[i] = m_vertexIndexMapping[i];
+	}
+
+	TUIntArray usedIndices;
+	usedIndices.resize(static_cast<int>(vcount));
+	memset(&usedIndices[0],0,sizeof(unsigned int)*vcount);
+
+	ocount = 0;
+
+	for (i=0; i<int (indexcount); i++)
+	{
+		unsigned int v = indices[i]; // original array index
+
+		btAssert( v >= 0 && v < vcount );
+
+		if ( usedIndices[static_cast<int>(v)] ) // if already remapped
+		{
+			indices[i] = usedIndices[static_cast<int>(v)]-1; // index to new array
+		}
+		else
+		{
+
+			indices[i] = ocount;      // new index mapping
+
+			overts[ocount][0] = verts[v][0]; // copy old vert to new vert array
+			overts[ocount][1] = verts[v][1];
+			overts[ocount][2] = verts[v][2];
+
+			for (int k=0;k<m_vertexIndexMapping.size();k++)
+			{
+				if (tmpIndices[k]==int(v))
+					m_vertexIndexMapping[k]=ocount;
+			}
+
+			ocount++; // increment output vert count
+
+			btAssert( ocount >=0 && ocount <= vcount );
+
+			usedIndices[static_cast<int>(v)] = ocount; // assign new index remapping
+
+		
+		}
+	}
+
+	
+}
diff --git a/src/bullet/LinearMath/btConvexHull.h b/src/bullet/LinearMath/btConvexHull.h
new file mode 100644
index 00000000..69c52bc6
--- /dev/null
+++ b/src/bullet/LinearMath/btConvexHull.h
@@ -0,0 +1,241 @@
+
+/*
+Stan Melax Convex Hull Computation
+Copyright (c) 2008 Stan Melax http://www.melax.com/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+///includes modifications/improvements by John Ratcliff, see BringOutYourDead below.
+
+#ifndef BT_CD_HULL_H
+#define BT_CD_HULL_H
+
+#include "btVector3.h"
+#include "btAlignedObjectArray.h"
+
+typedef btAlignedObjectArray<unsigned int> TUIntArray;
+
+class HullResult
+{
+public:
+	HullResult(void)
+	{
+		mPolygons = true;
+		mNumOutputVertices = 0;
+		mNumFaces = 0;
+		mNumIndices = 0;
+	}
+	bool                    mPolygons;                  // true if indices represents polygons, false indices are triangles
+	unsigned int            mNumOutputVertices;         // number of vertices in the output hull
+	btAlignedObjectArray<btVector3>	m_OutputVertices;            // array of vertices
+	unsigned int            mNumFaces;                  // the number of faces produced
+	unsigned int            mNumIndices;                // the total number of indices
+	btAlignedObjectArray<unsigned int>    m_Indices;                   // pointer to indices.
+
+// If triangles, then indices are array indexes into the vertex list.
+// If polygons, indices are in the form (number of points in face) (p1, p2, p3, ..) etc..
+};
+
+enum HullFlag
+{
+	QF_TRIANGLES         = (1<<0),             // report results as triangles, not polygons.
+	QF_REVERSE_ORDER     = (1<<1),             // reverse order of the triangle indices.
+	QF_DEFAULT           = QF_TRIANGLES
+};
+
+
+class HullDesc
+{
+public:
+	HullDesc(void)
+	{
+		mFlags          = QF_DEFAULT;
+		mVcount         = 0;
+		mVertices       = 0;
+		mVertexStride   = sizeof(btVector3);
+		mNormalEpsilon  = 0.001f;
+		mMaxVertices	= 4096; // maximum number of points to be considered for a convex hull.
+		mMaxFaces	= 4096;
+	};
+
+	HullDesc(HullFlag flag,
+		 unsigned int vcount,
+		 const btVector3 *vertices,
+		 unsigned int stride = sizeof(btVector3))
+	{
+		mFlags          = flag;
+		mVcount         = vcount;
+		mVertices       = vertices;
+		mVertexStride   = stride;
+		mNormalEpsilon  = btScalar(0.001);
+		mMaxVertices    = 4096;
+	}
+
+	bool HasHullFlag(HullFlag flag) const
+	{
+		if ( mFlags & flag ) return true;
+		return false;
+	}
+
+	void SetHullFlag(HullFlag flag)
+	{
+		mFlags|=flag;
+	}
+
+	void ClearHullFlag(HullFlag flag)
+	{
+		mFlags&=~flag;
+	}
+
+	unsigned int      mFlags;           // flags to use when generating the convex hull.
+	unsigned int      mVcount;          // number of vertices in the input point cloud
+	const btVector3  *mVertices;        // the array of vertices.
+	unsigned int      mVertexStride;    // the stride of each vertex, in bytes.
+	btScalar             mNormalEpsilon;   // the epsilon for removing duplicates.  This is a normalized value, if normalized bit is on.
+	unsigned int      mMaxVertices;     // maximum number of vertices to be considered for the hull!
+	unsigned int      mMaxFaces;
+};
+
+enum HullError
+{
+	QE_OK,            // success!
+	QE_FAIL           // failed.
+};
+
+class btPlane
+{
+	public:
+	btVector3	normal;
+	btScalar	dist;   // distance below origin - the D from plane equasion Ax+By+Cz+D=0
+			btPlane(const btVector3 &n,btScalar d):normal(n),dist(d){}
+			btPlane():normal(),dist(0){}
+	
+};
+
+
+
+class ConvexH 
+{
+  public:
+	class HalfEdge
+	{
+	  public:
+		short ea;         // the other half of the edge (index into edges list)
+		unsigned char v;  // the vertex at the start of this edge (index into vertices list)
+		unsigned char p;  // the facet on which this edge lies (index into facets list)
+		HalfEdge(){}
+		HalfEdge(short _ea,unsigned char _v, unsigned char _p):ea(_ea),v(_v),p(_p){}
+	};
+	ConvexH()
+	{
+	}
+	~ConvexH()
+	{
+	}
+	btAlignedObjectArray<btVector3> vertices;
+	btAlignedObjectArray<HalfEdge> edges;
+	btAlignedObjectArray<btPlane>  facets;
+	ConvexH(int vertices_size,int edges_size,int facets_size);
+};
+
+
+class int4
+{
+public:
+	int x,y,z,w;
+	int4(){};
+	int4(int _x,int _y, int _z,int _w){x=_x;y=_y;z=_z;w=_w;}
+	const int& operator[](int i) const {return (&x)[i];}
+	int& operator[](int i) {return (&x)[i];}
+};
+
+class PHullResult
+{
+public:
+
+	PHullResult(void)
+	{
+		mVcount = 0;
+		mIndexCount = 0;
+		mFaceCount = 0;
+		mVertices = 0;
+	}
+
+	unsigned int mVcount;
+	unsigned int mIndexCount;
+	unsigned int mFaceCount;
+	btVector3*   mVertices;
+	TUIntArray m_Indices;
+};
+
+
+
+///The HullLibrary class can create a convex hull from a collection of vertices, using the ComputeHull method.
+///The btShapeHull class uses this HullLibrary to create a approximate convex mesh given a general (non-polyhedral) convex shape.
+class HullLibrary
+{
+
+	btAlignedObjectArray<class btHullTriangle*> m_tris;
+
+public:
+
+	btAlignedObjectArray<int> m_vertexIndexMapping;
+
+
+	HullError CreateConvexHull(const HullDesc& desc, // describes the input request
+				   HullResult&     result);        // contains the resulst
+	HullError ReleaseResult(HullResult &result); // release memory allocated for this result, we are done with it.
+
+private:
+
+	bool ComputeHull(unsigned int vcount,const btVector3 *vertices,PHullResult &result,unsigned int vlimit);
+
+	class btHullTriangle*	allocateTriangle(int a,int b,int c);
+	void	deAllocateTriangle(btHullTriangle*);
+	void b2bfix(btHullTriangle* s,btHullTriangle*t);
+
+	void removeb2b(btHullTriangle* s,btHullTriangle*t);
+
+	void checkit(btHullTriangle *t);
+
+	btHullTriangle* extrudable(btScalar epsilon);
+
+	int calchull(btVector3 *verts,int verts_count, TUIntArray& tris_out, int &tris_count,int vlimit);
+
+	int calchullgen(btVector3 *verts,int verts_count, int vlimit);
+
+	int4 FindSimplex(btVector3 *verts,int verts_count,btAlignedObjectArray<int> &allow);
+
+	class ConvexH* ConvexHCrop(ConvexH& convex,const btPlane& slice);
+
+	void extrude(class btHullTriangle* t0,int v);
+
+	ConvexH* test_cube();
+
+	//BringOutYourDead (John Ratcliff): When you create a convex hull you hand it a large input set of vertices forming a 'point cloud'. 
+	//After the hull is generated it give you back a set of polygon faces which index the *original* point cloud.
+	//The thing is, often times, there are many 'dead vertices' in the point cloud that are on longer referenced by the hull.
+	//The routine 'BringOutYourDead' find only the referenced vertices, copies them to an new buffer, and re-indexes the hull so that it is a minimal representation.
+	void BringOutYourDead(const btVector3* verts,unsigned int vcount, btVector3* overts,unsigned int &ocount,unsigned int* indices,unsigned indexcount);
+
+	bool CleanupVertices(unsigned int svcount,
+			     const btVector3* svertices,
+			     unsigned int stride,
+			     unsigned int &vcount, // output number of vertices
+			     btVector3* vertices, // location to store the results.
+			     btScalar  normalepsilon,
+			     btVector3& scale);
+};
+
+
+#endif //BT_CD_HULL_H
+
diff --git a/src/bullet/LinearMath/btConvexHullComputer.cpp b/src/bullet/LinearMath/btConvexHullComputer.cpp
new file mode 100644
index 00000000..c03c901c
--- /dev/null
+++ b/src/bullet/LinearMath/btConvexHullComputer.cpp
@@ -0,0 +1,2751 @@
+/*
+Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <string.h>
+
+#include "btConvexHullComputer.h"
+#include "btAlignedObjectArray.h"
+#include "btMinMax.h"
+#include "btVector3.h"
+
+#ifdef __GNUC__
+	#include <stdint.h>
+#elif defined(_MSC_VER)
+	typedef __int32 int32_t;
+	typedef __int64 int64_t;
+	typedef unsigned __int32 uint32_t;
+	typedef unsigned __int64 uint64_t;
+#else
+	typedef int int32_t;
+	typedef long long int int64_t;
+	typedef unsigned int uint32_t;
+	typedef unsigned long long int uint64_t;
+#endif
+
+
+//The definition of USE_X86_64_ASM is moved into the build system. You can enable it manually by commenting out the following lines
+//#if (defined(__GNUC__) && defined(__x86_64__) && !defined(__ICL))  // || (defined(__ICL) && defined(_M_X64))   bug in Intel compiler, disable inline assembly
+//	#define USE_X86_64_ASM
+//#endif
+
+
+//#define DEBUG_CONVEX_HULL
+//#define SHOW_ITERATIONS
+
+#if defined(DEBUG_CONVEX_HULL) || defined(SHOW_ITERATIONS)
+	#include <stdio.h>
+#endif
+
+// Convex hull implementation based on Preparata and Hong
+// Ole Kniemeyer, MAXON Computer GmbH
+class btConvexHullInternal
+{
+	public:
+		
+		class Point64
+		{
+			public:
+				int64_t x;
+				int64_t y;
+				int64_t z;
+				
+				Point64(int64_t x, int64_t y, int64_t z): x(x), y(y), z(z)
+				{
+				}
+
+				bool isZero()
+				{
+					return (x == 0) && (y == 0) && (z == 0);
+				}
+
+				int64_t dot(const Point64& b) const
+				{
+					return x * b.x + y * b.y + z * b.z;
+				}
+		};
+		
+		class Point32
+		{
+			public:
+				int32_t x;
+				int32_t y;
+				int32_t z;
+				int index;
+				
+				Point32()
+				{
+				}
+				
+				Point32(int32_t x, int32_t y, int32_t z): x(x), y(y), z(z), index(-1)
+				{
+				}
+				
+				bool operator==(const Point32& b) const
+				{
+					return (x == b.x) && (y == b.y) && (z == b.z);
+				}
+
+				bool operator!=(const Point32& b) const
+				{
+					return (x != b.x) || (y != b.y) || (z != b.z);
+				}
+
+				bool isZero()
+				{
+					return (x == 0) && (y == 0) && (z == 0);
+				}
+
+				Point64 cross(const Point32& b) const
+				{
+					return Point64(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x);
+				}
+
+				Point64 cross(const Point64& b) const
+				{
+					return Point64(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x);
+				}
+
+				int64_t dot(const Point32& b) const
+				{
+					return x * b.x + y * b.y + z * b.z;
+				}
+
+				int64_t dot(const Point64& b) const
+				{
+					return x * b.x + y * b.y + z * b.z;
+				}
+
+				Point32 operator+(const Point32& b) const
+				{
+					return Point32(x + b.x, y + b.y, z + b.z);
+				}
+
+				Point32 operator-(const Point32& b) const
+				{
+					return Point32(x - b.x, y - b.y, z - b.z);
+				}
+		};
+
+		class Int128
+		{
+			public:
+				uint64_t low;
+				uint64_t high;
+
+				Int128()
+				{
+				}
+
+				Int128(uint64_t low, uint64_t high): low(low), high(high)
+				{
+				}
+
+				Int128(uint64_t low): low(low), high(0)
+				{
+				}
+
+				Int128(int64_t value): low(value), high((value >= 0) ? 0 : (uint64_t) -1LL)
+				{
+				}
+
+				static Int128 mul(int64_t a, int64_t b);
+
+				static Int128 mul(uint64_t a, uint64_t b);
+
+				Int128 operator-() const
+				{
+					return Int128((uint64_t) -(int64_t)low, ~high + (low == 0));
+				}
+
+				Int128 operator+(const Int128& b) const
+				{
+#ifdef USE_X86_64_ASM
+					Int128 result;
+					__asm__ ("addq %[bl], %[rl]\n\t"
+									 "adcq %[bh], %[rh]\n\t"
+									 : [rl] "=r" (result.low), [rh] "=r" (result.high)
+									 : "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
+									 : "cc" );
+					return result;
+#else
+					uint64_t lo = low + b.low;
+					return Int128(lo, high + b.high + (lo < low));
+#endif
+				}
+
+				Int128 operator-(const Int128& b) const
+				{
+#ifdef USE_X86_64_ASM
+					Int128 result;
+					__asm__ ("subq %[bl], %[rl]\n\t"
+									 "sbbq %[bh], %[rh]\n\t"
+									 : [rl] "=r" (result.low), [rh] "=r" (result.high)
+									 : "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
+									 : "cc" );
+					return result;
+#else
+					return *this + -b;
+#endif
+				}
+
+				Int128& operator+=(const Int128& b)
+				{
+#ifdef USE_X86_64_ASM
+					__asm__ ("addq %[bl], %[rl]\n\t"
+									 "adcq %[bh], %[rh]\n\t"
+									 : [rl] "=r" (low), [rh] "=r" (high)
+									 : "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
+									 : "cc" );
+#else
+					uint64_t lo = low + b.low;
+					if (lo < low)
+					{
+						++high;
+					}
+					low = lo;
+					high += b.high;
+#endif
+					return *this;
+				}
+
+				Int128& operator++()
+				{
+					if (++low == 0)
+					{
+						++high;
+					}
+					return *this;
+				}
+
+				Int128 operator*(int64_t b) const;
+
+				btScalar toScalar() const
+				{
+					return ((int64_t) high >= 0) ? btScalar(high) * (btScalar(0x100000000LL) * btScalar(0x100000000LL)) + btScalar(low)
+						: -(-*this).toScalar();
+				}
+
+				int getSign() const
+				{
+					return ((int64_t) high < 0) ? -1 : (high || low) ? 1 : 0;
+				}
+
+				bool operator<(const Int128& b) const
+				{
+					return (high < b.high) || ((high == b.high) && (low < b.low));
+				}
+
+				int ucmp(const Int128&b) const
+				{
+					if (high < b.high)
+					{
+						return -1;
+					}
+					if (high > b.high)
+					{
+						return 1;
+					}
+					if (low < b.low)
+					{
+						return -1;
+					}
+					if (low > b.low)
+					{
+						return 1;
+					}
+					return 0;
+				}
+		};
+
+
+		class Rational64
+		{
+			private:
+				uint64_t m_numerator;
+				uint64_t m_denominator;
+				int sign;
+				
+			public:
+				Rational64(int64_t numerator, int64_t denominator)
+				{
+					if (numerator > 0)
+					{
+						sign = 1;
+						m_numerator = (uint64_t) numerator;
+					}
+					else if (numerator < 0)
+					{
+						sign = -1;
+						m_numerator = (uint64_t) -numerator;
+					}
+					else
+					{
+						sign = 0;
+						m_numerator = 0;
+					}
+					if (denominator > 0)
+					{
+						m_denominator = (uint64_t) denominator;
+					}
+					else if (denominator < 0)
+					{
+						sign = -sign;
+						m_denominator = (uint64_t) -denominator;
+					}
+					else
+					{
+						m_denominator = 0;
+					}
+				}
+				
+				bool isNegativeInfinity() const
+				{
+					return (sign < 0) && (m_denominator == 0);
+				}
+				
+				bool isNaN() const
+				{
+					return (sign == 0) && (m_denominator == 0);
+				}
+				
+				int compare(const Rational64& b) const;
+				
+				btScalar toScalar() const
+				{
+					return sign * ((m_denominator == 0) ? SIMD_INFINITY : (btScalar) m_numerator / m_denominator);
+				}
+		};
+
+
+		class Rational128
+		{
+			private:
+				Int128 numerator;
+				Int128 denominator;
+				int sign;
+				bool isInt64;
+
+			public:
+				Rational128(int64_t value)
+				{
+					if (value > 0)
+					{
+						sign = 1;
+						this->numerator = value;
+					}
+					else if (value < 0)
+					{
+						sign = -1;
+						this->numerator = -value;
+					}
+					else
+					{
+						sign = 0;
+						this->numerator = (uint64_t) 0;
+					}
+					this->denominator = (uint64_t) 1;
+					isInt64 = true;
+				}
+
+				Rational128(const Int128& numerator, const Int128& denominator)
+				{
+					sign = numerator.getSign();
+					if (sign >= 0)
+					{
+						this->numerator = numerator;
+					}
+					else
+					{
+						this->numerator = -numerator;
+					}
+					int dsign = denominator.getSign();
+					if (dsign >= 0)
+					{
+						this->denominator = denominator;
+					}
+					else
+					{
+						sign = -sign;
+						this->denominator = -denominator;
+					}
+					isInt64 = false;
+				}
+
+				int compare(const Rational128& b) const;
+
+				int compare(int64_t b) const;
+
+				btScalar toScalar() const
+				{
+					return sign * ((denominator.getSign() == 0) ? SIMD_INFINITY : numerator.toScalar() / denominator.toScalar());
+				}
+		};
+
+		class PointR128
+		{
+			public:
+				Int128 x;
+				Int128 y;
+				Int128 z;
+				Int128 denominator;
+
+				PointR128()
+				{
+				}
+
+				PointR128(Int128 x, Int128 y, Int128 z, Int128 denominator): x(x), y(y), z(z), denominator(denominator)
+				{
+				}
+
+				btScalar xvalue() const
+				{
+					return x.toScalar() / denominator.toScalar();
+				}
+
+				btScalar yvalue() const
+				{
+					return y.toScalar() / denominator.toScalar();
+				}
+
+				btScalar zvalue() const
+				{
+					return z.toScalar() / denominator.toScalar();
+				}
+		};
+
+
+		class Edge;
+		class Face;
+
+		class Vertex
+		{
+			public:
+				Vertex* next;
+				Vertex* prev;
+				Edge* edges;
+				Face* firstNearbyFace;
+				Face* lastNearbyFace;
+				PointR128 point128;
+				Point32 point;
+				int copy;
+				
+				Vertex(): next(NULL), prev(NULL), edges(NULL), firstNearbyFace(NULL), lastNearbyFace(NULL), copy(-1)
+				{
+				}
+
+#ifdef DEBUG_CONVEX_HULL
+				void print()
+				{
+					printf("V%d (%d, %d, %d)", point.index, point.x, point.y, point.z);
+				}
+
+				void printGraph();
+#endif
+
+				Point32 operator-(const Vertex& b) const
+				{
+					return point - b.point;
+				}
+
+				Rational128 dot(const Point64& b) const
+				{
+					return (point.index >= 0) ? Rational128(point.dot(b))
+						: Rational128(point128.x * b.x + point128.y * b.y + point128.z * b.z, point128.denominator);
+				}
+
+				btScalar xvalue() const
+				{
+					return (point.index >= 0) ? btScalar(point.x) : point128.xvalue();
+				}
+
+				btScalar yvalue() const
+				{
+					return (point.index >= 0) ? btScalar(point.y) : point128.yvalue();
+				}
+
+				btScalar zvalue() const
+				{
+					return (point.index >= 0) ? btScalar(point.z) : point128.zvalue();
+				}
+
+				void receiveNearbyFaces(Vertex* src)
+				{
+					if (lastNearbyFace)
+					{
+						lastNearbyFace->nextWithSameNearbyVertex = src->firstNearbyFace;
+					}
+					else
+					{
+						firstNearbyFace = src->firstNearbyFace;
+					}
+					if (src->lastNearbyFace)
+					{
+						lastNearbyFace = src->lastNearbyFace;
+					}
+					for (Face* f = src->firstNearbyFace; f; f = f->nextWithSameNearbyVertex)
+					{
+						btAssert(f->nearbyVertex == src);
+						f->nearbyVertex = this;
+					}
+					src->firstNearbyFace = NULL;
+					src->lastNearbyFace = NULL;
+				}
+		};
+
+
+		class Edge
+		{
+			public:
+				Edge* next;
+				Edge* prev;
+				Edge* reverse;
+				Vertex* target;
+				Face* face;
+				int copy;
+
+				~Edge()
+				{
+					next = NULL;
+					prev = NULL;
+					reverse = NULL;
+					target = NULL;
+					face = NULL;
+				}
+
+				void link(Edge* n)
+				{
+					btAssert(reverse->target == n->reverse->target);
+					next = n;
+					n->prev = this;
+				}
+
+#ifdef DEBUG_CONVEX_HULL
+				void print()
+				{
+					printf("E%p : %d -> %d,  n=%p p=%p   (0 %d\t%d\t%d) -> (%d %d %d)", this, reverse->target->point.index, target->point.index, next, prev,
+								 reverse->target->point.x, reverse->target->point.y, reverse->target->point.z, target->point.x, target->point.y, target->point.z);
+				}
+#endif
+		};
+
+		class Face
+		{
+			public:
+				Face* next;
+				Vertex* nearbyVertex;
+				Face* nextWithSameNearbyVertex;
+				Point32 origin;
+				Point32 dir0;
+				Point32 dir1;
+
+				Face(): next(NULL), nearbyVertex(NULL), nextWithSameNearbyVertex(NULL)
+				{
+				}
+
+				void init(Vertex* a, Vertex* b, Vertex* c)
+				{
+					nearbyVertex = a;
+					origin = a->point;
+					dir0 = *b - *a;
+					dir1 = *c - *a;
+					if (a->lastNearbyFace)
+					{
+						a->lastNearbyFace->nextWithSameNearbyVertex = this;
+					}
+					else
+					{
+						a->firstNearbyFace = this;
+					}
+					a->lastNearbyFace = this;
+				}
+
+				Point64 getNormal()
+				{
+					return dir0.cross(dir1);
+				}
+		};
+
+		template<typename UWord, typename UHWord> class DMul
+		{
+			private:
+				static uint32_t high(uint64_t value)
+				{
+					return (uint32_t) (value >> 32);
+				}
+				
+				static uint32_t low(uint64_t value)
+				{
+					return (uint32_t) value;
+				}
+				
+				static uint64_t mul(uint32_t a, uint32_t b)
+				{
+					return (uint64_t) a * (uint64_t) b;
+				}
+				
+				static void shlHalf(uint64_t& value)
+				{
+					value <<= 32;
+				}
+				
+				static uint64_t high(Int128 value)
+				{
+					return value.high;
+				}
+				
+				static uint64_t low(Int128 value)
+				{
+					return value.low;
+				}
+				
+				static Int128 mul(uint64_t a, uint64_t b)
+				{
+					return Int128::mul(a, b);
+				}
+				
+				static void shlHalf(Int128& value)
+				{
+					value.high = value.low;
+					value.low = 0;
+				}
+				
+			public:
+				
+				static void mul(UWord a, UWord b, UWord& resLow, UWord& resHigh)
+				{
+					UWord p00 = mul(low(a), low(b));
+					UWord p01 = mul(low(a), high(b));
+					UWord p10 = mul(high(a), low(b));
+					UWord p11 = mul(high(a), high(b));
+					UWord p0110 = UWord(low(p01)) + UWord(low(p10));
+					p11 += high(p01);
+					p11 += high(p10);
+					p11 += high(p0110);
+					shlHalf(p0110);
+					p00 += p0110;
+					if (p00 < p0110)
+					{
+						++p11;
+					}
+					resLow = p00;
+					resHigh = p11;
+				}
+		};
+	
+	private:
+
+		class IntermediateHull
+		{
+			public:
+				Vertex* minXy;
+				Vertex* maxXy;
+				Vertex* minYx;
+				Vertex* maxYx;
+				
+				IntermediateHull(): minXy(NULL), maxXy(NULL), minYx(NULL), maxYx(NULL)
+				{
+				}
+				
+				void print();
+		};
+	
+		enum Orientation {NONE, CLOCKWISE, COUNTER_CLOCKWISE};
+
+		template <typename T> class PoolArray
+		{
+			private:
+				T* array;
+				int size;
+
+			public:
+				PoolArray<T>* next;
+
+				PoolArray(int size): size(size), next(NULL)
+				{
+					array = (T*) btAlignedAlloc(sizeof(T) * size, 16);
+				}
+
+				~PoolArray()
+				{
+					btAlignedFree(array);
+				}
+
+				T* init()
+				{
+					T* o = array;
+					for (int i = 0; i < size; i++, o++)
+					{
+						o->next = (i+1 < size) ? o + 1 : NULL;
+					}
+					return array;
+				}
+		};
+
+		template <typename T> class Pool
+		{
+			private:
+				PoolArray<T>* arrays;
+				PoolArray<T>* nextArray;
+				T* freeObjects;
+				int arraySize;
+
+			public:
+				Pool(): arrays(NULL), nextArray(NULL), freeObjects(NULL), arraySize(256)
+				{
+				}
+
+				~Pool()
+				{
+					while (arrays)
+					{
+						PoolArray<T>* p = arrays;
+						arrays = p->next;
+						p->~PoolArray<T>();
+						btAlignedFree(p);
+					}
+				}
+
+				void reset()
+				{
+					nextArray = arrays;
+					freeObjects = NULL;
+				}
+
+				void setArraySize(int arraySize)
+				{
+					this->arraySize = arraySize;
+				}
+
+				T* newObject()
+				{
+					T* o = freeObjects;
+					if (!o)
+					{
+						PoolArray<T>* p = nextArray;
+						if (p)
+						{
+							nextArray = p->next;
+						}
+						else
+						{
+							p = new(btAlignedAlloc(sizeof(PoolArray<T>), 16)) PoolArray<T>(arraySize);
+							p->next = arrays;
+							arrays = p;
+						}
+						o = p->init();
+					}
+					freeObjects = o->next;
+					return new(o) T();
+				};
+
+				void freeObject(T* object)
+				{
+					object->~T();
+					object->next = freeObjects;
+					freeObjects = object;
+				}
+		};
+
+		btVector3 scaling;
+		btVector3 center;
+		Pool<Vertex> vertexPool;
+		Pool<Edge> edgePool;
+		Pool<Face> facePool;
+		btAlignedObjectArray<Vertex*> originalVertices;
+		int mergeStamp;
+		int minAxis;
+		int medAxis;
+		int maxAxis;
+		int usedEdgePairs;
+		int maxUsedEdgePairs;
+
+		static Orientation getOrientation(const Edge* prev, const Edge* next, const Point32& s, const Point32& t);
+		Edge* findMaxAngle(bool ccw, const Vertex* start, const Point32& s, const Point64& rxs, const Point64& sxrxs, Rational64& minCot);
+		void findEdgeForCoplanarFaces(Vertex* c0, Vertex* c1, Edge*& e0, Edge*& e1, Vertex* stop0, Vertex* stop1);
+
+		Edge* newEdgePair(Vertex* from, Vertex* to);
+
+		void removeEdgePair(Edge* edge)
+		{
+			Edge* n = edge->next;
+			Edge* r = edge->reverse;
+
+			btAssert(edge->target && r->target);
+
+			if (n != edge)
+			{
+				n->prev = edge->prev;
+				edge->prev->next = n;
+				r->target->edges = n;
+			}
+			else
+			{
+				r->target->edges = NULL;
+			}
+			
+			n = r->next;
+			
+			if (n != r)
+			{
+				n->prev = r->prev;
+				r->prev->next = n;
+				edge->target->edges = n;
+			}
+			else
+			{
+				edge->target->edges = NULL;
+			}
+
+			edgePool.freeObject(edge);
+			edgePool.freeObject(r);
+			usedEdgePairs--;
+		}
+		
+		void computeInternal(int start, int end, IntermediateHull& result);
+		
+		bool mergeProjection(IntermediateHull& h0, IntermediateHull& h1, Vertex*& c0, Vertex*& c1);
+		
+		void merge(IntermediateHull& h0, IntermediateHull& h1);
+
+		btVector3 toBtVector(const Point32& v);
+
+		btVector3 getBtNormal(Face* face);
+
+		bool shiftFace(Face* face, btScalar amount, btAlignedObjectArray<Vertex*> stack);
+
+	public:
+		Vertex* vertexList;
+
+		void compute(const void* coords, bool doubleCoords, int stride, int count);
+
+		btVector3 getCoordinates(const Vertex* v);
+
+		btScalar shrink(btScalar amount, btScalar clampAmount);
+};
+
+
+btConvexHullInternal::Int128 btConvexHullInternal::Int128::operator*(int64_t b) const
+{
+	bool negative = (int64_t) high < 0;
+	Int128 a = negative ? -*this : *this;
+	if (b < 0)
+	{
+		negative = !negative;
+		b = -b;
+	}
+	Int128 result = mul(a.low, (uint64_t) b);
+	result.high += a.high * (uint64_t) b;
+	return negative ? -result : result;
+}
+
+btConvexHullInternal::Int128 btConvexHullInternal::Int128::mul(int64_t a, int64_t b)
+{
+	Int128 result;
+	
+#ifdef USE_X86_64_ASM
+	__asm__ ("imulq %[b]"
+					 : "=a" (result.low), "=d" (result.high)
+					 : "0"(a), [b] "r"(b)
+					 : "cc" );
+	return result;
+	
+#else
+	bool negative = a < 0;
+	if (negative)
+	{
+		a = -a;
+	}
+	if (b < 0)
+	{
+		negative = !negative;
+		b = -b;
+	}
+	DMul<uint64_t, uint32_t>::mul((uint64_t) a, (uint64_t) b, result.low, result.high);
+	return negative ? -result : result;
+#endif
+}
+
+btConvexHullInternal::Int128 btConvexHullInternal::Int128::mul(uint64_t a, uint64_t b)
+{
+	Int128 result;
+
+#ifdef USE_X86_64_ASM
+	__asm__ ("mulq %[b]"
+					 : "=a" (result.low), "=d" (result.high)
+					 : "0"(a), [b] "r"(b)
+					 : "cc" );
+
+#else
+	DMul<uint64_t, uint32_t>::mul(a, b, result.low, result.high);
+#endif
+
+	return result;
+}
+
+int btConvexHullInternal::Rational64::compare(const Rational64& b) const
+{
+	if (sign != b.sign)
+	{
+		return sign - b.sign;
+	}
+	else if (sign == 0)
+	{
+		return 0;
+	}
+
+	//	return (numerator * b.denominator > b.numerator * denominator) ? sign : (numerator * b.denominator < b.numerator * denominator) ? -sign : 0;
+
+#ifdef USE_X86_64_ASM
+
+	int result;
+	int64_t tmp;
+	int64_t dummy;
+	__asm__ ("mulq %[bn]\n\t"
+					 "movq %%rax, %[tmp]\n\t"
+					 "movq %%rdx, %%rbx\n\t"
+					 "movq %[tn], %%rax\n\t"
+					 "mulq %[bd]\n\t"
+					 "subq %[tmp], %%rax\n\t"
+					 "sbbq %%rbx, %%rdx\n\t" // rdx:rax contains 128-bit-difference "numerator*b.denominator - b.numerator*denominator"
+					 "setnsb %%bh\n\t" // bh=1 if difference is non-negative, bh=0 otherwise
+					 "orq %%rdx, %%rax\n\t"
+					 "setnzb %%bl\n\t" // bl=1 if difference if non-zero, bl=0 if it is zero
+					 "decb %%bh\n\t" // now bx=0x0000 if difference is zero, 0xff01 if it is negative, 0x0001 if it is positive (i.e., same sign as difference)
+					 "shll $16, %%ebx\n\t" // ebx has same sign as difference
+					 : "=&b"(result), [tmp] "=&r"(tmp), "=a"(dummy)
+					 : "a"(denominator), [bn] "g"(b.numerator), [tn] "g"(numerator), [bd] "g"(b.denominator)
+					 : "%rdx", "cc" );
+	return result ? result ^ sign // if sign is +1, only bit 0 of result is inverted, which does not change the sign of result (and cannot result in zero)
+																// if sign is -1, all bits of result are inverted, which changes the sign of result (and again cannot result in zero)
+								: 0;
+
+#else
+
+	return sign * Int128::mul(m_numerator, b.m_denominator).ucmp(Int128::mul(m_denominator, b.m_numerator));
+
+#endif
+}
+
+int btConvexHullInternal::Rational128::compare(const Rational128& b) const
+{
+	if (sign != b.sign)
+	{
+		return sign - b.sign;
+	}
+	else if (sign == 0)
+	{
+		return 0;
+	}
+	if (isInt64)
+	{
+		return -b.compare(sign * (int64_t) numerator.low);
+	}
+
+	Int128 nbdLow, nbdHigh, dbnLow, dbnHigh;
+	DMul<Int128, uint64_t>::mul(numerator, b.denominator, nbdLow, nbdHigh);
+	DMul<Int128, uint64_t>::mul(denominator, b.numerator, dbnLow, dbnHigh);
+
+	int cmp = nbdHigh.ucmp(dbnHigh);
+	if (cmp)
+	{
+		return cmp * sign;
+	}
+	return nbdLow.ucmp(dbnLow) * sign;
+}
+
+int btConvexHullInternal::Rational128::compare(int64_t b) const
+{
+	if (isInt64)
+	{
+		int64_t a = sign * (int64_t) numerator.low;
+		return (a > b) ? 1 : (a < b) ? -1 : 0;
+	}
+	if (b > 0)
+	{
+		if (sign <= 0)
+		{
+			return -1;
+		}
+	}
+	else if (b < 0)
+	{
+		if (sign >= 0)
+		{
+			return 1;
+		}
+		b = -b;
+	}
+	else
+	{
+		return sign;
+	}
+
+	return numerator.ucmp(denominator * b) * sign;
+}
+
+
+btConvexHullInternal::Edge* btConvexHullInternal::newEdgePair(Vertex* from, Vertex* to)
+{
+	btAssert(from && to);
+	Edge* e = edgePool.newObject();
+	Edge* r = edgePool.newObject();
+	e->reverse = r;
+	r->reverse = e;
+	e->copy = mergeStamp;
+	r->copy = mergeStamp;
+	e->target = to;
+	r->target = from;
+	e->face = NULL;
+	r->face = NULL;
+	usedEdgePairs++;
+	if (usedEdgePairs > maxUsedEdgePairs)
+	{
+		maxUsedEdgePairs = usedEdgePairs;
+	}
+	return e;
+}
+
+bool btConvexHullInternal::mergeProjection(IntermediateHull& h0, IntermediateHull& h1, Vertex*& c0, Vertex*& c1)
+{
+	Vertex* v0 = h0.maxYx;
+	Vertex* v1 = h1.minYx;
+	if ((v0->point.x == v1->point.x) && (v0->point.y == v1->point.y))
+	{
+		btAssert(v0->point.z < v1->point.z);
+		Vertex* v1p = v1->prev;
+		if (v1p == v1)
+		{
+			c0 = v0;
+			if (v1->edges)
+			{
+				btAssert(v1->edges->next == v1->edges);
+				v1 = v1->edges->target;
+				btAssert(v1->edges->next == v1->edges);
+			}
+			c1 = v1;
+			return false;
+		}
+		Vertex* v1n = v1->next;
+		v1p->next = v1n;
+		v1n->prev = v1p;
+		if (v1 == h1.minXy)
+		{
+			if ((v1n->point.x < v1p->point.x) || ((v1n->point.x == v1p->point.x) && (v1n->point.y < v1p->point.y)))
+			{
+				h1.minXy = v1n;
+			}
+			else
+			{
+				h1.minXy = v1p;
+			}
+		}
+		if (v1 == h1.maxXy)
+		{
+			if ((v1n->point.x > v1p->point.x) || ((v1n->point.x == v1p->point.x) && (v1n->point.y > v1p->point.y)))
+			{
+				h1.maxXy = v1n;
+			}
+			else
+			{
+				h1.maxXy = v1p;
+			}
+		}
+	}
+	
+	v0 = h0.maxXy;
+	v1 = h1.maxXy;
+	Vertex* v00 = NULL;
+	Vertex* v10 = NULL;
+	int32_t sign = 1;
+
+	for (int side = 0; side <= 1; side++)
+	{		
+		int32_t dx = (v1->point.x - v0->point.x) * sign;
+		if (dx > 0)
+		{
+			while (true)
+			{
+				int32_t dy = v1->point.y - v0->point.y;
+
+				Vertex* w0 = side ? v0->next : v0->prev;
+				if (w0 != v0)
+				{
+					int32_t dx0 = (w0->point.x - v0->point.x) * sign;
+					int32_t dy0 = w0->point.y - v0->point.y;
+					if ((dy0 <= 0) && ((dx0 == 0) || ((dx0 < 0) && (dy0 * dx <= dy * dx0))))
+					{
+						v0 = w0;
+						dx = (v1->point.x - v0->point.x) * sign;
+						continue;
+					}
+				}
+
+				Vertex* w1 = side ? v1->next : v1->prev;
+				if (w1 != v1)
+				{
+					int32_t dx1 = (w1->point.x - v1->point.x) * sign;
+					int32_t dy1 = w1->point.y - v1->point.y;
+					int32_t dxn = (w1->point.x - v0->point.x) * sign;
+					if ((dxn > 0) && (dy1 < 0) && ((dx1 == 0) || ((dx1 < 0) && (dy1 * dx < dy * dx1))))
+					{
+						v1 = w1;
+						dx = dxn;
+						continue;
+					}
+				}
+
+				break;
+			}
+		}
+		else if (dx < 0)
+		{
+			while (true)
+			{
+				int32_t dy = v1->point.y - v0->point.y;
+				
+				Vertex* w1 = side ? v1->prev : v1->next;
+				if (w1 != v1)
+				{
+					int32_t dx1 = (w1->point.x - v1->point.x) * sign;
+					int32_t dy1 = w1->point.y - v1->point.y;
+					if ((dy1 >= 0) && ((dx1 == 0) || ((dx1 < 0) && (dy1 * dx <= dy * dx1))))
+					{
+						v1 = w1;
+						dx = (v1->point.x - v0->point.x) * sign;
+						continue;
+					}
+				}
+				
+				Vertex* w0 = side ? v0->prev : v0->next;
+				if (w0 != v0)
+				{
+					int32_t dx0 = (w0->point.x - v0->point.x) * sign;
+					int32_t dy0 = w0->point.y - v0->point.y;
+					int32_t dxn = (v1->point.x - w0->point.x) * sign;
+					if ((dxn < 0) && (dy0 > 0) && ((dx0 == 0) || ((dx0 < 0) && (dy0 * dx < dy * dx0))))
+					{
+						v0 = w0;
+						dx = dxn;
+						continue;
+					}
+				}
+				
+				break;
+			}
+		}
+		else
+		{
+			int32_t x = v0->point.x;
+			int32_t y0 = v0->point.y;
+			Vertex* w0 = v0;
+			Vertex* t;
+			while (((t = side ? w0->next : w0->prev) != v0) && (t->point.x == x) && (t->point.y <= y0))
+			{
+				w0 = t;
+				y0 = t->point.y;
+			}
+			v0 = w0;
+
+			int32_t y1 = v1->point.y;
+			Vertex* w1 = v1;
+			while (((t = side ? w1->prev : w1->next) != v1) && (t->point.x == x) && (t->point.y >= y1))
+			{
+				w1 = t;
+				y1 = t->point.y;
+			}
+			v1 = w1;
+		}
+		
+		if (side == 0)
+		{
+			v00 = v0;
+			v10 = v1;
+
+			v0 = h0.minXy;
+			v1 = h1.minXy;
+			sign = -1;
+		}
+	}
+
+	v0->prev = v1;
+	v1->next = v0;
+
+	v00->next = v10;
+	v10->prev = v00;
+
+	if (h1.minXy->point.x < h0.minXy->point.x)
+	{
+		h0.minXy = h1.minXy;
+	}
+	if (h1.maxXy->point.x >= h0.maxXy->point.x)
+	{
+		h0.maxXy = h1.maxXy;
+	}
+	
+	h0.maxYx = h1.maxYx;
+
+	c0 = v00;
+	c1 = v10;
+
+	return true;
+}
+
+void btConvexHullInternal::computeInternal(int start, int end, IntermediateHull& result)
+{
+	int n = end - start;
+	switch (n)
+	{
+		case 0:
+			result.minXy = NULL;
+			result.maxXy = NULL;
+			result.minYx = NULL;
+			result.maxYx = NULL;
+			return;
+		case 2:
+		{
+			Vertex* v = originalVertices[start];
+			Vertex* w = v + 1;
+			if (v->point != w->point)
+			{
+				int32_t dx = v->point.x - w->point.x;
+				int32_t dy = v->point.y - w->point.y;
+
+				if ((dx == 0) && (dy == 0))
+				{
+					if (v->point.z > w->point.z)
+					{
+						Vertex* t = w;
+						w = v;
+						v = t;
+					}
+					btAssert(v->point.z < w->point.z);
+					v->next = v;
+					v->prev = v;
+					result.minXy = v;
+					result.maxXy = v;
+					result.minYx = v;
+					result.maxYx = v;
+				}
+				else
+				{
+					v->next = w;
+					v->prev = w;
+					w->next = v;
+					w->prev = v;
+
+					if ((dx < 0) || ((dx == 0) && (dy < 0)))
+					{
+						result.minXy = v;
+						result.maxXy = w;
+					}
+					else
+					{
+						result.minXy = w;
+						result.maxXy = v;
+					}
+
+					if ((dy < 0) || ((dy == 0) && (dx < 0)))
+					{
+						result.minYx = v;
+						result.maxYx = w;
+					}
+					else
+					{
+						result.minYx = w;
+						result.maxYx = v;
+					}
+				}
+
+				Edge* e = newEdgePair(v, w);
+				e->link(e);
+				v->edges = e;
+
+				e = e->reverse;
+				e->link(e);
+				w->edges = e;
+
+				return;
+			}
+		}
+		// lint -fallthrough
+		case 1:
+		{
+			Vertex* v = originalVertices[start];
+			v->edges = NULL;
+			v->next = v;
+			v->prev = v;
+
+			result.minXy = v;
+			result.maxXy = v;
+			result.minYx = v;
+			result.maxYx = v;
+
+			return;
+		}
+	}
+
+	int split0 = start + n / 2;
+	Point32 p = originalVertices[split0-1]->point;
+	int split1 = split0;
+	while ((split1 < end) && (originalVertices[split1]->point == p))
+	{
+		split1++;
+	}
+	computeInternal(start, split0, result);
+	IntermediateHull hull1;
+	computeInternal(split1, end, hull1);
+#ifdef DEBUG_CONVEX_HULL
+	printf("\n\nMerge\n");
+	result.print();
+	hull1.print();
+#endif
+	merge(result, hull1);
+#ifdef DEBUG_CONVEX_HULL
+	printf("\n  Result\n");
+	result.print();
+#endif
+}
+
+#ifdef DEBUG_CONVEX_HULL
+void btConvexHullInternal::IntermediateHull::print()
+{
+	printf("    Hull\n");
+	for (Vertex* v = minXy; v; )
+	{
+		printf("      ");
+		v->print();
+		if (v == maxXy)
+		{
+			printf(" maxXy");
+		}
+		if (v == minYx)
+		{
+			printf(" minYx");
+		}
+		if (v == maxYx)
+		{
+			printf(" maxYx");
+		}
+		if (v->next->prev != v)
+		{
+			printf(" Inconsistency");
+		}
+		printf("\n");
+		v = v->next;
+		if (v == minXy)
+		{
+			break;
+		}
+	}
+	if (minXy)
+	{		
+		minXy->copy = (minXy->copy == -1) ? -2 : -1;
+		minXy->printGraph();
+	}
+}
+
+void btConvexHullInternal::Vertex::printGraph()
+{
+	print();
+	printf("\nEdges\n");
+	Edge* e = edges;
+	if (e)
+	{
+		do
+		{
+			e->print();
+			printf("\n");
+			e = e->next;
+		} while (e != edges);
+		do
+		{
+			Vertex* v = e->target;
+			if (v->copy != copy)
+			{
+				v->copy = copy;
+				v->printGraph();
+			}
+			e = e->next;
+		} while (e != edges);
+	}
+}
+#endif
+
+btConvexHullInternal::Orientation btConvexHullInternal::getOrientation(const Edge* prev, const Edge* next, const Point32& s, const Point32& t)
+{
+	btAssert(prev->reverse->target == next->reverse->target);
+	if (prev->next == next)
+	{
+		if (prev->prev == next)
+		{
+			Point64 n = t.cross(s);
+			Point64 m = (*prev->target - *next->reverse->target).cross(*next->target - *next->reverse->target);
+			btAssert(!m.isZero());
+			int64_t dot = n.dot(m);
+			btAssert(dot != 0);
+			return (dot > 0) ? COUNTER_CLOCKWISE : CLOCKWISE;
+		}
+		return COUNTER_CLOCKWISE;
+	}
+	else if (prev->prev == next)
+	{
+		return CLOCKWISE;
+	}
+	else
+	{
+		return NONE;
+	}
+}
+
+btConvexHullInternal::Edge* btConvexHullInternal::findMaxAngle(bool ccw, const Vertex* start, const Point32& s, const Point64& rxs, const Point64& sxrxs, Rational64& minCot)
+{
+	Edge* minEdge = NULL;
+
+#ifdef DEBUG_CONVEX_HULL
+	printf("find max edge for %d\n", start->point.index);
+#endif
+	Edge* e = start->edges;
+	if (e)
+	{
+		do
+		{
+			if (e->copy > mergeStamp)
+			{
+				Point32 t = *e->target - *start;
+				Rational64 cot(t.dot(sxrxs), t.dot(rxs));
+#ifdef DEBUG_CONVEX_HULL
+				printf("      Angle is %f (%d) for ", (float) btAtan(cot.toScalar()), (int) cot.isNaN());
+				e->print();
+#endif
+				if (cot.isNaN())
+				{
+					btAssert(ccw ? (t.dot(s) < 0) : (t.dot(s) > 0));
+				}
+				else
+				{
+					int cmp;
+					if (minEdge == NULL)
+					{
+						minCot = cot;
+						minEdge = e;
+					}
+					else if ((cmp = cot.compare(minCot)) < 0)
+					{
+						minCot = cot;
+						minEdge = e;
+					}
+					else if ((cmp == 0) && (ccw == (getOrientation(minEdge, e, s, t) == COUNTER_CLOCKWISE)))
+					{
+						minEdge = e;
+					}
+				}
+#ifdef DEBUG_CONVEX_HULL
+				printf("\n");
+#endif
+			}
+			e = e->next;
+		} while (e != start->edges);
+	}
+	return minEdge;
+}
+
+void btConvexHullInternal::findEdgeForCoplanarFaces(Vertex* c0, Vertex* c1, Edge*& e0, Edge*& e1, Vertex* stop0, Vertex* stop1)
+{
+	Edge* start0 = e0;
+	Edge* start1 = e1;
+	Point32 et0 = start0 ? start0->target->point : c0->point;
+	Point32 et1 = start1 ? start1->target->point : c1->point;
+	Point32 s = c1->point - c0->point;
+	Point64 normal = ((start0 ? start0 : start1)->target->point - c0->point).cross(s);
+	int64_t dist = c0->point.dot(normal);
+	btAssert(!start1 || (start1->target->point.dot(normal) == dist));
+	Point64 perp = s.cross(normal);
+	btAssert(!perp.isZero());
+	
+#ifdef DEBUG_CONVEX_HULL
+	printf("   Advancing %d %d  (%p %p, %d %d)\n", c0->point.index, c1->point.index, start0, start1, start0 ? start0->target->point.index : -1, start1 ? start1->target->point.index : -1);
+#endif
+
+	int64_t maxDot0 = et0.dot(perp);
+	if (e0)
+	{
+		while (e0->target != stop0)
+		{
+			Edge* e = e0->reverse->prev;
+			if (e->target->point.dot(normal) < dist)
+			{
+				break;
+			}
+			btAssert(e->target->point.dot(normal) == dist);
+			if (e->copy == mergeStamp)
+			{
+				break;
+			}
+			int64_t dot = e->target->point.dot(perp);
+			if (dot <= maxDot0)
+			{
+				break;
+			}
+			maxDot0 = dot;
+			e0 = e;
+			et0 = e->target->point;
+		}
+	}
+	
+	int64_t maxDot1 = et1.dot(perp);
+	if (e1)
+	{
+		while (e1->target != stop1)
+		{
+			Edge* e = e1->reverse->next;
+			if (e->target->point.dot(normal) < dist)
+			{
+				break;
+			}
+			btAssert(e->target->point.dot(normal) == dist);
+			if (e->copy == mergeStamp)
+			{
+				break;
+			}
+			int64_t dot = e->target->point.dot(perp);
+			if (dot <= maxDot1)
+			{
+				break;
+			}
+			maxDot1 = dot;
+			e1 = e;
+			et1 = e->target->point;
+		}
+	}
+
+#ifdef DEBUG_CONVEX_HULL
+	printf("   Starting at %d %d\n", et0.index, et1.index);
+#endif
+
+	int64_t dx = maxDot1 - maxDot0;
+	if (dx > 0)
+	{
+		while (true)
+		{
+			int64_t dy = (et1 - et0).dot(s);
+			
+			if (e0 && (e0->target != stop0))
+			{
+				Edge* f0 = e0->next->reverse;
+				if (f0->copy > mergeStamp)
+				{
+					int64_t dx0 = (f0->target->point - et0).dot(perp);
+					int64_t dy0 = (f0->target->point - et0).dot(s);
+					if ((dx0 == 0) ? (dy0 < 0) : ((dx0 < 0) && (Rational64(dy0, dx0).compare(Rational64(dy, dx)) >= 0)))
+					{
+						et0 = f0->target->point;
+						dx = (et1 - et0).dot(perp);
+						e0 = (e0 == start0) ? NULL : f0;
+						continue;
+					}
+				}
+			}
+			
+			if (e1 && (e1->target != stop1))
+			{
+				Edge* f1 = e1->reverse->next;
+				if (f1->copy > mergeStamp)
+				{
+					Point32 d1 = f1->target->point - et1;
+					if (d1.dot(normal) == 0)
+					{
+						int64_t dx1 = d1.dot(perp);
+						int64_t dy1 = d1.dot(s);
+						int64_t dxn = (f1->target->point - et0).dot(perp);
+						if ((dxn > 0) && ((dx1 == 0) ? (dy1 < 0) : ((dx1 < 0) && (Rational64(dy1, dx1).compare(Rational64(dy, dx)) > 0))))
+						{
+							e1 = f1;
+							et1 = e1->target->point;
+							dx = dxn;
+							continue;
+						}
+					}
+					else
+					{
+						btAssert((e1 == start1) && (d1.dot(normal) < 0));
+					}
+				}
+			}
+
+			break;
+		}
+	}
+	else if (dx < 0)
+	{
+		while (true)
+		{
+			int64_t dy = (et1 - et0).dot(s);
+			
+			if (e1 && (e1->target != stop1))
+			{
+				Edge* f1 = e1->prev->reverse;
+				if (f1->copy > mergeStamp)
+				{
+					int64_t dx1 = (f1->target->point - et1).dot(perp);
+					int64_t dy1 = (f1->target->point - et1).dot(s);
+					if ((dx1 == 0) ? (dy1 > 0) : ((dx1 < 0) && (Rational64(dy1, dx1).compare(Rational64(dy, dx)) <= 0)))
+					{
+						et1 = f1->target->point;
+						dx = (et1 - et0).dot(perp);
+						e1 = (e1 == start1) ? NULL : f1;
+						continue;
+					}
+				}
+			}
+			
+			if (e0 && (e0->target != stop0))
+			{
+				Edge* f0 = e0->reverse->prev;
+				if (f0->copy > mergeStamp)
+				{
+					Point32 d0 = f0->target->point - et0;
+					if (d0.dot(normal) == 0)
+					{
+						int64_t dx0 = d0.dot(perp);
+						int64_t dy0 = d0.dot(s);
+						int64_t dxn = (et1 - f0->target->point).dot(perp);
+						if ((dxn < 0) && ((dx0 == 0) ? (dy0 > 0) : ((dx0 < 0) && (Rational64(dy0, dx0).compare(Rational64(dy, dx)) < 0))))
+						{
+							e0 = f0;
+							et0 = e0->target->point;
+							dx = dxn;
+							continue;
+						}
+					}
+					else
+					{
+						btAssert((e0 == start0) && (d0.dot(normal) < 0));
+					}
+				}
+			}
+
+			break;
+		}
+	}
+#ifdef DEBUG_CONVEX_HULL
+	printf("   Advanced edges to %d %d\n", et0.index, et1.index);
+#endif
+}
+
+
+void btConvexHullInternal::merge(IntermediateHull& h0, IntermediateHull& h1)
+{
+	if (!h1.maxXy)
+	{
+		return;
+	}
+	if (!h0.maxXy)
+	{
+		h0 = h1;
+		return;
+	}
+	
+	mergeStamp--;
+
+	Vertex* c0 = NULL;
+	Edge* toPrev0 = NULL;
+	Edge* firstNew0 = NULL;
+	Edge* pendingHead0 = NULL;
+	Edge* pendingTail0 = NULL;
+	Vertex* c1 = NULL;
+	Edge* toPrev1 = NULL;
+	Edge* firstNew1 = NULL;
+	Edge* pendingHead1 = NULL;
+	Edge* pendingTail1 = NULL;
+	Point32 prevPoint;
+
+	if (mergeProjection(h0, h1, c0, c1))
+	{
+		Point32 s = *c1 - *c0;
+		Point64 normal = Point32(0, 0, -1).cross(s);
+		Point64 t = s.cross(normal);
+		btAssert(!t.isZero());
+
+		Edge* e = c0->edges;
+		Edge* start0 = NULL;
+		if (e)
+		{
+			do
+			{
+				int64_t dot = (*e->target - *c0).dot(normal);
+				btAssert(dot <= 0);
+				if ((dot == 0) && ((*e->target - *c0).dot(t) > 0))
+				{
+					if (!start0 || (getOrientation(start0, e, s, Point32(0, 0, -1)) == CLOCKWISE))
+					{
+						start0 = e;
+					}
+				}
+				e = e->next;
+			} while (e != c0->edges);
+		}
+		
+		e = c1->edges;
+		Edge* start1 = NULL;
+		if (e)
+		{
+			do
+			{
+				int64_t dot = (*e->target - *c1).dot(normal);
+				btAssert(dot <= 0);
+				if ((dot == 0) && ((*e->target - *c1).dot(t) > 0))
+				{
+					if (!start1 || (getOrientation(start1, e, s, Point32(0, 0, -1)) == COUNTER_CLOCKWISE))
+					{
+						start1 = e;
+					}
+				}
+				e = e->next;
+			} while (e != c1->edges);
+		}
+
+		if (start0 || start1)
+		{
+			findEdgeForCoplanarFaces(c0, c1, start0, start1, NULL, NULL);
+			if (start0)
+			{
+				c0 = start0->target;
+			}
+			if (start1)
+			{
+				c1 = start1->target;
+			}
+		}
+
+		prevPoint = c1->point;
+		prevPoint.z++;
+	}
+	else
+	{
+		prevPoint = c1->point;
+		prevPoint.x++;
+	}
+
+	Vertex* first0 = c0;
+	Vertex* first1 = c1;
+	bool firstRun = true;
+
+	while (true)
+	{
+		Point32 s = *c1 - *c0;
+		Point32 r = prevPoint - c0->point;
+		Point64 rxs = r.cross(s);
+		Point64 sxrxs = s.cross(rxs);
+		
+#ifdef DEBUG_CONVEX_HULL
+		printf("\n  Checking %d %d\n", c0->point.index, c1->point.index);
+#endif
+		Rational64 minCot0(0, 0);
+		Edge* min0 = findMaxAngle(false, c0, s, rxs, sxrxs, minCot0);
+		Rational64 minCot1(0, 0);
+		Edge* min1 = findMaxAngle(true, c1, s, rxs, sxrxs, minCot1);
+		if (!min0 && !min1)
+		{
+			Edge* e = newEdgePair(c0, c1);
+			e->link(e);
+			c0->edges = e;
+
+			e = e->reverse;
+			e->link(e);
+			c1->edges = e;
+			return;
+		}
+		else
+		{
+			int cmp = !min0 ? 1 : !min1 ? -1 : minCot0.compare(minCot1);
+#ifdef DEBUG_CONVEX_HULL
+			printf("    -> Result %d\n", cmp);
+#endif
+			if (firstRun || ((cmp >= 0) ? !minCot1.isNegativeInfinity() : !minCot0.isNegativeInfinity()))
+			{
+				Edge* e = newEdgePair(c0, c1);
+				if (pendingTail0)
+				{
+					pendingTail0->prev = e;
+				}
+				else
+				{
+					pendingHead0 = e;
+				}
+				e->next = pendingTail0;
+				pendingTail0 = e;
+
+				e = e->reverse;
+				if (pendingTail1)
+				{
+					pendingTail1->next = e;
+				}
+				else
+				{
+					pendingHead1 = e;
+				}
+				e->prev = pendingTail1;
+				pendingTail1 = e;
+			}
+			
+			Edge* e0 = min0;
+			Edge* e1 = min1;
+
+#ifdef DEBUG_CONVEX_HULL
+			printf("   Found min edges to %d %d\n", e0 ? e0->target->point.index : -1, e1 ? e1->target->point.index : -1);
+#endif
+
+			if (cmp == 0)
+			{
+				findEdgeForCoplanarFaces(c0, c1, e0, e1, NULL, NULL);
+			}
+
+			if ((cmp >= 0) && e1)
+			{
+				if (toPrev1)
+				{
+					for (Edge* e = toPrev1->next, *n = NULL; e != min1; e = n)
+					{
+						n = e->next;
+						removeEdgePair(e);
+					}
+				}
+
+				if (pendingTail1)
+				{
+					if (toPrev1)
+					{
+						toPrev1->link(pendingHead1);
+					}
+					else
+					{
+						min1->prev->link(pendingHead1);
+						firstNew1 = pendingHead1;
+					}
+					pendingTail1->link(min1);
+					pendingHead1 = NULL;
+					pendingTail1 = NULL;
+				}
+				else if (!toPrev1)
+				{
+					firstNew1 = min1;
+				}
+
+				prevPoint = c1->point;
+				c1 = e1->target;
+				toPrev1 = e1->reverse;
+			}
+
+			if ((cmp <= 0) && e0)
+			{
+				if (toPrev0)
+				{
+					for (Edge* e = toPrev0->prev, *n = NULL; e != min0; e = n)
+					{
+						n = e->prev;
+						removeEdgePair(e);
+					}
+				}
+
+				if (pendingTail0)
+				{
+					if (toPrev0)
+					{
+						pendingHead0->link(toPrev0);
+					}
+					else
+					{
+						pendingHead0->link(min0->next);
+						firstNew0 = pendingHead0;
+					}
+					min0->link(pendingTail0);
+					pendingHead0 = NULL;
+					pendingTail0 = NULL;
+				}
+				else if (!toPrev0)
+				{
+					firstNew0 = min0;
+				}
+
+				prevPoint = c0->point;
+				c0 = e0->target;
+				toPrev0 = e0->reverse;
+			}
+		}
+
+		if ((c0 == first0) && (c1 == first1))
+		{
+			if (toPrev0 == NULL)
+			{
+				pendingHead0->link(pendingTail0);
+				c0->edges = pendingTail0;
+			}
+			else
+			{
+				for (Edge* e = toPrev0->prev, *n = NULL; e != firstNew0; e = n)
+				{
+					n = e->prev;
+					removeEdgePair(e);
+				}
+				if (pendingTail0)
+				{
+					pendingHead0->link(toPrev0);
+					firstNew0->link(pendingTail0);
+				}
+			}
+
+			if (toPrev1 == NULL)
+			{
+				pendingTail1->link(pendingHead1);
+				c1->edges = pendingTail1;
+			}
+			else
+			{
+				for (Edge* e = toPrev1->next, *n = NULL; e != firstNew1; e = n)
+				{
+					n = e->next;
+					removeEdgePair(e);
+				}
+				if (pendingTail1)
+				{
+					toPrev1->link(pendingHead1);
+					pendingTail1->link(firstNew1);
+				}
+			}
+			
+			return;
+		}
+
+		firstRun = false;
+	}
+}
+
+
+static bool pointCmp(const btConvexHullInternal::Point32& p, const btConvexHullInternal::Point32& q)
+{
+	return (p.y < q.y) || ((p.y == q.y) && ((p.x < q.x) || ((p.x == q.x) && (p.z < q.z))));
+}
+
+void btConvexHullInternal::compute(const void* coords, bool doubleCoords, int stride, int count)
+{
+	btVector3 min(btScalar(1e30), btScalar(1e30), btScalar(1e30)), max(btScalar(-1e30), btScalar(-1e30), btScalar(-1e30));
+	const char* ptr = (const char*) coords;
+	if (doubleCoords)
+	{
+		for (int i = 0; i < count; i++)
+		{
+			const double* v = (const double*) ptr;
+			btVector3 p((btScalar) v[0], (btScalar) v[1], (btScalar) v[2]);
+			ptr += stride;
+			min.setMin(p);
+			max.setMax(p);
+		}
+	}
+	else
+	{
+		for (int i = 0; i < count; i++)
+		{
+			const float* v = (const float*) ptr;
+			btVector3 p(v[0], v[1], v[2]);
+			ptr += stride;
+			min.setMin(p);
+			max.setMax(p);
+		}
+	}
+
+	btVector3 s = max - min;
+	maxAxis = s.maxAxis();
+	minAxis = s.minAxis();
+	if (minAxis == maxAxis)
+	{
+		minAxis = (maxAxis + 1) % 3;
+	}
+	medAxis = 3 - maxAxis - minAxis;
+
+	s /= btScalar(10216);
+	if (((medAxis + 1) % 3) != maxAxis)
+	{
+		s *= -1;
+	}
+	scaling = s;
+
+	if (s[0] != 0)
+	{
+		s[0] = btScalar(1) / s[0];
+	}
+	if (s[1] != 0)
+	{
+		s[1] = btScalar(1) / s[1];
+	}
+	if (s[2] != 0)
+	{
+		s[2] = btScalar(1) / s[2];
+	}
+
+	center = (min + max) * btScalar(0.5);
+
+	btAlignedObjectArray<Point32> points;
+	points.resize(count);
+	ptr = (const char*) coords;
+	if (doubleCoords)
+	{
+		for (int i = 0; i < count; i++)
+		{
+			const double* v = (const double*) ptr;
+			btVector3 p((btScalar) v[0], (btScalar) v[1], (btScalar) v[2]);
+			ptr += stride;
+			p = (p - center) * s;
+			points[i].x = (int32_t) p[medAxis];
+			points[i].y = (int32_t) p[maxAxis];
+			points[i].z = (int32_t) p[minAxis];
+			points[i].index = i;
+		}
+	}
+	else
+	{
+		for (int i = 0; i < count; i++)
+		{
+			const float* v = (const float*) ptr;
+			btVector3 p(v[0], v[1], v[2]);
+			ptr += stride;
+			p = (p - center) * s;
+			points[i].x = (int32_t) p[medAxis];
+			points[i].y = (int32_t) p[maxAxis];
+			points[i].z = (int32_t) p[minAxis];
+			points[i].index = i;
+		}
+	}
+	points.quickSort(pointCmp);
+
+	vertexPool.reset();
+	vertexPool.setArraySize(count);
+	originalVertices.resize(count);
+	for (int i = 0; i < count; i++)
+	{
+		Vertex* v = vertexPool.newObject();
+		v->edges = NULL;
+		v->point = points[i];
+		v->copy = -1;
+		originalVertices[i] = v;
+	}
+
+	points.clear();
+
+	edgePool.reset();
+	edgePool.setArraySize(6 * count);
+
+	usedEdgePairs = 0;
+	maxUsedEdgePairs = 0;
+
+	mergeStamp = -3;
+
+	IntermediateHull hull;
+	computeInternal(0, count, hull);
+	vertexList = hull.minXy;
+#ifdef DEBUG_CONVEX_HULL
+	printf("max. edges %d (3v = %d)", maxUsedEdgePairs, 3 * count);
+#endif
+}
+
+btVector3 btConvexHullInternal::toBtVector(const Point32& v)
+{
+	btVector3 p;
+	p[medAxis] = btScalar(v.x);
+	p[maxAxis] = btScalar(v.y);
+	p[minAxis] = btScalar(v.z);
+	return p * scaling;
+}
+
+btVector3 btConvexHullInternal::getBtNormal(Face* face)
+{
+	return toBtVector(face->dir0).cross(toBtVector(face->dir1)).normalized();
+}
+
+btVector3 btConvexHullInternal::getCoordinates(const Vertex* v)
+{
+	btVector3 p;
+	p[medAxis] = v->xvalue();
+	p[maxAxis] = v->yvalue();
+	p[minAxis] = v->zvalue();
+	return p * scaling + center;
+}
+
+btScalar btConvexHullInternal::shrink(btScalar amount, btScalar clampAmount)
+{
+	if (!vertexList)
+	{
+		return 0;
+	}
+	int stamp = --mergeStamp;
+	btAlignedObjectArray<Vertex*> stack;
+	vertexList->copy = stamp;
+	stack.push_back(vertexList);
+	btAlignedObjectArray<Face*> faces;
+
+	Point32 ref = vertexList->point;
+	Int128 hullCenterX(0, 0);
+	Int128 hullCenterY(0, 0);
+	Int128 hullCenterZ(0, 0);
+	Int128 volume(0, 0);
+
+	while (stack.size() > 0)
+	{
+		Vertex* v = stack[stack.size() - 1];
+		stack.pop_back();
+		Edge* e = v->edges;
+		if (e)
+		{
+			do
+			{
+				if (e->target->copy != stamp)
+				{
+					e->target->copy = stamp;
+					stack.push_back(e->target);
+				}
+				if (e->copy != stamp)
+				{
+					Face* face = facePool.newObject();
+					face->init(e->target, e->reverse->prev->target, v);
+					faces.push_back(face);
+					Edge* f = e;
+
+					Vertex* a = NULL;
+					Vertex* b = NULL;
+					do
+					{
+						if (a && b)
+						{
+							int64_t vol = (v->point - ref).dot((a->point - ref).cross(b->point - ref));
+							btAssert(vol >= 0);
+							Point32 c = v->point + a->point + b->point + ref;
+							hullCenterX += vol * c.x;
+							hullCenterY += vol * c.y;
+							hullCenterZ += vol * c.z;
+							volume += vol;
+						}
+
+						btAssert(f->copy != stamp);
+						f->copy = stamp;
+						f->face = face;
+
+						a = b;
+						b = f->target;
+
+						f = f->reverse->prev;
+					} while (f != e);
+				}
+				e = e->next;
+			} while (e != v->edges);
+		}
+	}
+
+	if (volume.getSign() <= 0)
+	{
+		return 0;
+	}
+
+	btVector3 hullCenter;
+	hullCenter[medAxis] = hullCenterX.toScalar();
+	hullCenter[maxAxis] = hullCenterY.toScalar();
+	hullCenter[minAxis] = hullCenterZ.toScalar();
+	hullCenter /= 4 * volume.toScalar();
+	hullCenter *= scaling;
+
+	int faceCount = faces.size();
+
+	if (clampAmount > 0)
+	{
+		btScalar minDist = SIMD_INFINITY;
+		for (int i = 0; i < faceCount; i++)
+		{
+			btVector3 normal = getBtNormal(faces[i]);
+			btScalar dist = normal.dot(toBtVector(faces[i]->origin) - hullCenter);
+			if (dist < minDist)
+			{
+				minDist = dist;
+			}
+		}
+		
+		if (minDist <= 0)
+		{
+			return 0;
+		}
+
+		amount = btMin(amount, minDist * clampAmount);
+	}
+
+	unsigned int seed = 243703;
+	for (int i = 0; i < faceCount; i++, seed = 1664525 * seed + 1013904223)
+	{
+		btSwap(faces[i], faces[seed % faceCount]);
+	}
+
+	for (int i = 0; i < faceCount; i++)
+	{
+		if (!shiftFace(faces[i], amount, stack))
+		{
+			return -amount;
+		}
+	}
+
+	return amount;
+}
+
+bool btConvexHullInternal::shiftFace(Face* face, btScalar amount, btAlignedObjectArray<Vertex*> stack)
+{
+	btVector3 origShift = getBtNormal(face) * -amount;
+	if (scaling[0] != 0)
+	{
+		origShift[0] /= scaling[0];
+	}
+	if (scaling[1] != 0)
+	{
+		origShift[1] /= scaling[1];
+	}
+	if (scaling[2] != 0)
+	{
+		origShift[2] /= scaling[2];
+	}
+	Point32 shift((int32_t) origShift[medAxis], (int32_t) origShift[maxAxis], (int32_t) origShift[minAxis]);
+	if (shift.isZero())
+	{
+		return true;
+	}
+	Point64 normal = face->getNormal();
+#ifdef DEBUG_CONVEX_HULL
+	printf("\nShrinking face (%d %d %d) (%d %d %d) (%d %d %d) by (%d %d %d)\n",
+				 face->origin.x, face->origin.y, face->origin.z, face->dir0.x, face->dir0.y, face->dir0.z, face->dir1.x, face->dir1.y, face->dir1.z, shift.x, shift.y, shift.z);
+#endif
+	int64_t origDot = face->origin.dot(normal);
+	Point32 shiftedOrigin = face->origin + shift;
+	int64_t shiftedDot = shiftedOrigin.dot(normal);
+	btAssert(shiftedDot <= origDot);
+	if (shiftedDot >= origDot)
+	{
+		return false;
+	}
+
+	Edge* intersection = NULL;
+
+	Edge* startEdge = face->nearbyVertex->edges;
+#ifdef DEBUG_CONVEX_HULL
+	printf("Start edge is ");
+	startEdge->print();
+	printf(", normal is (%lld %lld %lld), shifted dot is %lld\n", normal.x, normal.y, normal.z, shiftedDot);
+#endif
+	Rational128 optDot = face->nearbyVertex->dot(normal);
+	int cmp = optDot.compare(shiftedDot);
+#ifdef SHOW_ITERATIONS
+	int n = 0;
+#endif
+	if (cmp >= 0)
+	{
+		Edge* e = startEdge;
+		do
+		{
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			Rational128 dot = e->target->dot(normal);
+			btAssert(dot.compare(origDot) <= 0);
+#ifdef DEBUG_CONVEX_HULL
+			printf("Moving downwards, edge is ");
+			e->print();
+			printf(", dot is %f (%f %lld)\n", (float) dot.toScalar(), (float) optDot.toScalar(), shiftedDot);
+#endif
+			if (dot.compare(optDot) < 0)
+			{
+				int c = dot.compare(shiftedDot);
+				optDot = dot;
+				e = e->reverse;
+				startEdge = e;
+				if (c < 0)
+				{
+					intersection = e;
+					break;
+				}
+				cmp = c;
+			}
+			e = e->prev;
+		} while (e != startEdge);
+
+		if (!intersection)
+		{
+			return false;
+		}
+	}
+	else
+	{
+		Edge* e = startEdge;
+		do
+		{
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			Rational128 dot = e->target->dot(normal);
+			btAssert(dot.compare(origDot) <= 0);
+#ifdef DEBUG_CONVEX_HULL
+			printf("Moving upwards, edge is ");
+			e->print();
+			printf(", dot is %f (%f %lld)\n", (float) dot.toScalar(), (float) optDot.toScalar(), shiftedDot);
+#endif
+			if (dot.compare(optDot) > 0)
+			{
+				cmp = dot.compare(shiftedDot);
+				if (cmp >= 0)
+				{
+					intersection = e;
+					break;
+				}
+				optDot = dot;
+				e = e->reverse;
+				startEdge = e;
+			}
+			e = e->prev;
+		} while (e != startEdge);
+		
+		if (!intersection)
+		{
+			return true;
+		}
+	}
+
+#ifdef SHOW_ITERATIONS
+	printf("Needed %d iterations to find initial intersection\n", n);
+#endif
+
+	if (cmp == 0)
+	{
+		Edge* e = intersection->reverse->next;
+#ifdef SHOW_ITERATIONS
+		n = 0;
+#endif
+		while (e->target->dot(normal).compare(shiftedDot) <= 0)
+		{
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			e = e->next;
+			if (e == intersection->reverse)
+			{
+				return true;
+			}
+#ifdef DEBUG_CONVEX_HULL
+			printf("Checking for outwards edge, current edge is ");
+			e->print();
+			printf("\n");
+#endif
+		}
+#ifdef SHOW_ITERATIONS
+		printf("Needed %d iterations to check for complete containment\n", n);
+#endif
+	}
+	
+	Edge* firstIntersection = NULL;
+	Edge* faceEdge = NULL;
+	Edge* firstFaceEdge = NULL;
+
+#ifdef SHOW_ITERATIONS
+	int m = 0;
+#endif
+	while (true)
+	{
+#ifdef SHOW_ITERATIONS
+		m++;
+#endif
+#ifdef DEBUG_CONVEX_HULL
+		printf("Intersecting edge is ");
+		intersection->print();
+		printf("\n");
+#endif
+		if (cmp == 0)
+		{
+			Edge* e = intersection->reverse->next;
+			startEdge = e;
+#ifdef SHOW_ITERATIONS
+			n = 0;
+#endif
+			while (true)
+			{
+#ifdef SHOW_ITERATIONS
+				n++;
+#endif
+				if (e->target->dot(normal).compare(shiftedDot) >= 0)
+				{
+					break;
+				}
+				intersection = e->reverse;
+				e = e->next;
+				if (e == startEdge)
+				{
+					return true;
+				}
+			}
+#ifdef SHOW_ITERATIONS
+			printf("Needed %d iterations to advance intersection\n", n);
+#endif
+		}
+
+#ifdef DEBUG_CONVEX_HULL
+		printf("Advanced intersecting edge to ");
+		intersection->print();
+		printf(", cmp = %d\n", cmp);
+#endif
+
+		if (!firstIntersection)
+		{
+			firstIntersection = intersection;
+		}
+		else if (intersection == firstIntersection)
+		{
+			break;
+		}
+
+		int prevCmp = cmp;
+		Edge* prevIntersection = intersection;
+		Edge* prevFaceEdge = faceEdge;
+
+		Edge* e = intersection->reverse;
+#ifdef SHOW_ITERATIONS
+		n = 0;
+#endif
+		while (true)
+		{
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			e = e->reverse->prev;
+			btAssert(e != intersection->reverse);
+			cmp = e->target->dot(normal).compare(shiftedDot);
+#ifdef DEBUG_CONVEX_HULL
+			printf("Testing edge ");
+			e->print();
+			printf(" -> cmp = %d\n", cmp);
+#endif
+			if (cmp >= 0)
+			{
+				intersection = e;
+				break;
+			}
+		}
+#ifdef SHOW_ITERATIONS
+		printf("Needed %d iterations to find other intersection of face\n", n);
+#endif
+
+		if (cmp > 0)
+		{
+			Vertex* removed = intersection->target;
+			e = intersection->reverse;
+			if (e->prev == e)
+			{
+				removed->edges = NULL;
+			}
+			else
+			{
+				removed->edges = e->prev;
+				e->prev->link(e->next);
+				e->link(e);
+			}
+#ifdef DEBUG_CONVEX_HULL
+			printf("1: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
+#endif
+			
+			Point64 n0 = intersection->face->getNormal();
+			Point64 n1 = intersection->reverse->face->getNormal();
+			int64_t m00 = face->dir0.dot(n0);
+			int64_t m01 = face->dir1.dot(n0);
+			int64_t m10 = face->dir0.dot(n1);
+			int64_t m11 = face->dir1.dot(n1);
+			int64_t r0 = (intersection->face->origin - shiftedOrigin).dot(n0);
+			int64_t r1 = (intersection->reverse->face->origin - shiftedOrigin).dot(n1);
+			Int128 det = Int128::mul(m00, m11) - Int128::mul(m01, m10);
+			btAssert(det.getSign() != 0);
+			Vertex* v = vertexPool.newObject();
+			v->point.index = -1;
+			v->copy = -1;
+			v->point128 = PointR128(Int128::mul(face->dir0.x * r0, m11) - Int128::mul(face->dir0.x * r1, m01)
+															+ Int128::mul(face->dir1.x * r1, m00) - Int128::mul(face->dir1.x * r0, m10) + det * shiftedOrigin.x,
+															Int128::mul(face->dir0.y * r0, m11) - Int128::mul(face->dir0.y * r1, m01)
+															+ Int128::mul(face->dir1.y * r1, m00) - Int128::mul(face->dir1.y * r0, m10) + det * shiftedOrigin.y,
+															Int128::mul(face->dir0.z * r0, m11) - Int128::mul(face->dir0.z * r1, m01)
+															+ Int128::mul(face->dir1.z * r1, m00) - Int128::mul(face->dir1.z * r0, m10) + det * shiftedOrigin.z,
+															det);
+			v->point.x = (int32_t) v->point128.xvalue();
+			v->point.y = (int32_t) v->point128.yvalue();
+			v->point.z = (int32_t) v->point128.zvalue();
+			intersection->target = v;
+			v->edges = e;
+
+			stack.push_back(v);
+			stack.push_back(removed);
+			stack.push_back(NULL);
+		}
+
+		if (cmp || prevCmp || (prevIntersection->reverse->next->target != intersection->target))
+		{
+			faceEdge = newEdgePair(prevIntersection->target, intersection->target);
+			if (prevCmp == 0)
+			{
+				faceEdge->link(prevIntersection->reverse->next);
+			}
+			if ((prevCmp == 0) || prevFaceEdge)
+			{
+				prevIntersection->reverse->link(faceEdge);
+			}
+			if (cmp == 0)
+			{
+				intersection->reverse->prev->link(faceEdge->reverse);
+			}
+			faceEdge->reverse->link(intersection->reverse);
+		}
+		else
+		{
+			faceEdge = prevIntersection->reverse->next;
+		}
+
+		if (prevFaceEdge)
+		{
+			if (prevCmp > 0)
+			{
+				faceEdge->link(prevFaceEdge->reverse);
+			}
+			else if (faceEdge != prevFaceEdge->reverse)
+			{
+				stack.push_back(prevFaceEdge->target);
+				while (faceEdge->next != prevFaceEdge->reverse)
+				{
+					Vertex* removed = faceEdge->next->target;
+					removeEdgePair(faceEdge->next);
+					stack.push_back(removed);
+#ifdef DEBUG_CONVEX_HULL
+					printf("2: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
+#endif
+				}
+				stack.push_back(NULL);
+			}
+		}
+		faceEdge->face = face;
+		faceEdge->reverse->face = intersection->face;
+
+		if (!firstFaceEdge)
+		{
+			firstFaceEdge = faceEdge;
+		}
+	}
+#ifdef SHOW_ITERATIONS
+	printf("Needed %d iterations to process all intersections\n", m);
+#endif
+
+	if (cmp > 0)
+	{
+		firstFaceEdge->reverse->target = faceEdge->target;
+		firstIntersection->reverse->link(firstFaceEdge);
+		firstFaceEdge->link(faceEdge->reverse);
+	}
+	else if (firstFaceEdge != faceEdge->reverse)
+	{
+		stack.push_back(faceEdge->target);
+		while (firstFaceEdge->next != faceEdge->reverse)
+		{
+			Vertex* removed = firstFaceEdge->next->target;
+			removeEdgePair(firstFaceEdge->next);
+			stack.push_back(removed);
+#ifdef DEBUG_CONVEX_HULL
+			printf("3: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
+#endif
+		}
+		stack.push_back(NULL);
+	}
+
+	btAssert(stack.size() > 0);
+	vertexList = stack[0];
+
+#ifdef DEBUG_CONVEX_HULL
+	printf("Removing part\n");
+#endif
+#ifdef SHOW_ITERATIONS
+	n = 0;
+#endif
+	int pos = 0;
+	while (pos < stack.size())
+	{
+		int end = stack.size();
+		while (pos < end)
+		{
+			Vertex* kept = stack[pos++];
+#ifdef DEBUG_CONVEX_HULL
+			kept->print();
+#endif
+			bool deeper = false;
+			Vertex* removed;
+			while ((removed = stack[pos++]) != NULL)
+			{
+#ifdef SHOW_ITERATIONS
+				n++;
+#endif
+				kept->receiveNearbyFaces(removed);
+				while (removed->edges)
+				{
+					if (!deeper)
+					{
+						deeper = true;
+						stack.push_back(kept);
+					}
+					stack.push_back(removed->edges->target);
+					removeEdgePair(removed->edges);
+				}
+			}
+			if (deeper)
+			{
+				stack.push_back(NULL);
+			}
+		}
+	}
+#ifdef SHOW_ITERATIONS
+	printf("Needed %d iterations to remove part\n", n);
+#endif
+
+	stack.resize(0);
+	face->origin = shiftedOrigin;
+
+	return true;
+}
+
+
+static int getVertexCopy(btConvexHullInternal::Vertex* vertex, btAlignedObjectArray<btConvexHullInternal::Vertex*>& vertices)
+{
+	int index = vertex->copy;
+	if (index < 0)
+	{
+		index = vertices.size();
+		vertex->copy = index;
+		vertices.push_back(vertex);
+#ifdef DEBUG_CONVEX_HULL
+		printf("Vertex %d gets index *%d\n", vertex->point.index, index);
+#endif
+	}
+	return index;
+}
+
+btScalar btConvexHullComputer::compute(const void* coords, bool doubleCoords, int stride, int count, btScalar shrink, btScalar shrinkClamp)
+{
+	if (count <= 0)
+	{
+		vertices.clear();
+		edges.clear();
+		faces.clear();
+		return 0;
+	}
+
+	btConvexHullInternal hull;
+	hull.compute(coords, doubleCoords, stride, count);
+
+	btScalar shift = 0;
+	if ((shrink > 0) && ((shift = hull.shrink(shrink, shrinkClamp)) < 0))
+	{
+		vertices.clear();
+		edges.clear();
+		faces.clear();
+		return shift;
+	}
+
+	vertices.resize(0);
+	edges.resize(0);
+	faces.resize(0);
+
+	btAlignedObjectArray<btConvexHullInternal::Vertex*> oldVertices;
+	getVertexCopy(hull.vertexList, oldVertices);
+	int copied = 0;
+	while (copied < oldVertices.size())
+	{
+		btConvexHullInternal::Vertex* v = oldVertices[copied];
+		vertices.push_back(hull.getCoordinates(v));
+		btConvexHullInternal::Edge* firstEdge = v->edges;
+		if (firstEdge)
+		{
+			int firstCopy = -1;
+			int prevCopy = -1;
+			btConvexHullInternal::Edge* e = firstEdge;
+			do
+			{
+				if (e->copy < 0)
+				{
+					int s = edges.size();
+					edges.push_back(Edge());
+					edges.push_back(Edge());
+					Edge* c = &edges[s];
+					Edge* r = &edges[s + 1];
+					e->copy = s;
+					e->reverse->copy = s + 1;
+					c->reverse = 1;
+					r->reverse = -1;
+					c->targetVertex = getVertexCopy(e->target, oldVertices);
+					r->targetVertex = copied;
+#ifdef DEBUG_CONVEX_HULL
+					printf("      CREATE: Vertex *%d has edge to *%d\n", copied, c->getTargetVertex());
+#endif
+				}
+				if (prevCopy >= 0)
+				{
+					edges[e->copy].next = prevCopy - e->copy;
+				}
+				else
+				{
+					firstCopy = e->copy;
+				}
+				prevCopy = e->copy;
+				e = e->next;
+			} while (e != firstEdge);
+			edges[firstCopy].next = prevCopy - firstCopy;
+		}
+		copied++;
+	}
+
+	for (int i = 0; i < copied; i++)
+	{
+		btConvexHullInternal::Vertex* v = oldVertices[i];
+		btConvexHullInternal::Edge* firstEdge = v->edges;
+		if (firstEdge)
+		{
+			btConvexHullInternal::Edge* e = firstEdge;
+			do
+			{
+				if (e->copy >= 0)
+				{
+#ifdef DEBUG_CONVEX_HULL
+					printf("Vertex *%d has edge to *%d\n", i, edges[e->copy].getTargetVertex());
+#endif
+					faces.push_back(e->copy);
+					btConvexHullInternal::Edge* f = e;
+					do
+					{
+#ifdef DEBUG_CONVEX_HULL
+						printf("   Face *%d\n", edges[f->copy].getTargetVertex());
+#endif
+						f->copy = -1;
+						f = f->reverse->prev;
+					} while (f != e);
+				}
+				e = e->next;
+			} while (e != firstEdge);
+		}
+	}
+
+	return shift;
+}
+
+
+
+
+
diff --git a/src/bullet/LinearMath/btConvexHullComputer.h b/src/bullet/LinearMath/btConvexHullComputer.h
new file mode 100644
index 00000000..7240ac4f
--- /dev/null
+++ b/src/bullet/LinearMath/btConvexHullComputer.h
@@ -0,0 +1,103 @@
+/*
+Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONVEX_HULL_COMPUTER_H
+#define BT_CONVEX_HULL_COMPUTER_H
+
+#include "btVector3.h"
+#include "btAlignedObjectArray.h"
+
+/// Convex hull implementation based on Preparata and Hong
+/// See http://code.google.com/p/bullet/issues/detail?id=275
+/// Ole Kniemeyer, MAXON Computer GmbH
+class btConvexHullComputer
+{
+	private:
+		btScalar compute(const void* coords, bool doubleCoords, int stride, int count, btScalar shrink, btScalar shrinkClamp);
+
+	public:
+
+		class Edge
+		{
+			private:
+				int next;
+				int reverse;
+				int targetVertex;
+
+				friend class btConvexHullComputer;
+
+			public:
+				int getSourceVertex() const
+				{
+					return (this + reverse)->targetVertex;
+				}
+
+				int getTargetVertex() const
+				{
+					return targetVertex;
+				}
+
+				const Edge* getNextEdgeOfVertex() const // clockwise list of all edges of a vertex
+				{
+					return this + next;
+				}
+
+				const Edge* getNextEdgeOfFace() const // counter-clockwise list of all edges of a face
+				{
+					return (this + reverse)->getNextEdgeOfVertex();
+				}
+
+				const Edge* getReverseEdge() const
+				{
+					return this + reverse;
+				}
+		};
+
+
+		// Vertices of the output hull
+		btAlignedObjectArray<btVector3> vertices;
+
+		// Edges of the output hull
+		btAlignedObjectArray<Edge> edges;
+
+		// Faces of the convex hull. Each entry is an index into the "edges" array pointing to an edge of the face. Faces are planar n-gons
+		btAlignedObjectArray<int> faces;
+
+		/*
+		Compute convex hull of "count" vertices stored in "coords". "stride" is the difference in bytes
+		between the addresses of consecutive vertices. If "shrink" is positive, the convex hull is shrunken
+		by that amount (each face is moved by "shrink" length units towards the center along its normal).
+		If "shrinkClamp" is positive, "shrink" is clamped to not exceed "shrinkClamp * innerRadius", where "innerRadius"
+		is the minimum distance of a face to the center of the convex hull.
+
+		The returned value is the amount by which the hull has been shrunken. If it is negative, the amount was so large
+		that the resulting convex hull is empty.
+
+		The output convex hull can be found in the member variables "vertices", "edges", "faces".
+		*/
+		btScalar compute(const float* coords, int stride, int count, btScalar shrink, btScalar shrinkClamp)
+		{
+			return compute(coords, false, stride, count, shrink, shrinkClamp);
+		}
+
+		// same as above, but double precision
+		btScalar compute(const double* coords, int stride, int count, btScalar shrink, btScalar shrinkClamp)
+		{
+			return compute(coords, true, stride, count, shrink, shrinkClamp);
+		}
+};
+
+
+#endif //BT_CONVEX_HULL_COMPUTER_H
+
diff --git a/src/bullet/LinearMath/btDefaultMotionState.h b/src/bullet/LinearMath/btDefaultMotionState.h
new file mode 100644
index 00000000..a6b7ef15
--- /dev/null
+++ b/src/bullet/LinearMath/btDefaultMotionState.h
@@ -0,0 +1,40 @@
+#ifndef BT_DEFAULT_MOTION_STATE_H
+#define BT_DEFAULT_MOTION_STATE_H
+
+#include "btMotionState.h"
+
+///The btDefaultMotionState provides a common implementation to synchronize world transforms with offsets.
+struct	btDefaultMotionState : public btMotionState
+{
+	btTransform m_graphicsWorldTrans;
+	btTransform	m_centerOfMassOffset;
+	btTransform m_startWorldTrans;
+	void*		m_userPointer;
+
+	btDefaultMotionState(const btTransform& startTrans = btTransform::getIdentity(),const btTransform& centerOfMassOffset = btTransform::getIdentity())
+		: m_graphicsWorldTrans(startTrans),
+		m_centerOfMassOffset(centerOfMassOffset),
+		m_startWorldTrans(startTrans),
+		m_userPointer(0)
+
+	{
+	}
+
+	///synchronizes world transform from user to physics
+	virtual void	getWorldTransform(btTransform& centerOfMassWorldTrans ) const 
+	{
+			centerOfMassWorldTrans = 	m_centerOfMassOffset.inverse() * m_graphicsWorldTrans ;
+	}
+
+	///synchronizes world transform from physics to user
+	///Bullet only calls the update of worldtransform for active objects
+	virtual void	setWorldTransform(const btTransform& centerOfMassWorldTrans)
+	{
+			m_graphicsWorldTrans = centerOfMassWorldTrans * m_centerOfMassOffset ;
+	}
+
+	
+
+};
+
+#endif //BT_DEFAULT_MOTION_STATE_H
diff --git a/src/bullet/LinearMath/btGeometryUtil.cpp b/src/bullet/LinearMath/btGeometryUtil.cpp
new file mode 100644
index 00000000..5ac230f7
--- /dev/null
+++ b/src/bullet/LinearMath/btGeometryUtil.cpp
@@ -0,0 +1,185 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#include "btGeometryUtil.h"
+
+
+/*
+  Make sure this dummy function never changes so that it
+  can be used by probes that are checking whether the
+  library is actually installed.
+*/
+extern "C"
+{	
+	void btBulletMathProbe ();
+
+	void btBulletMathProbe () {}
+}
+
+
+bool	btGeometryUtil::isPointInsidePlanes(const btAlignedObjectArray<btVector3>& planeEquations, const btVector3& point, btScalar	margin)
+{
+	int numbrushes = planeEquations.size();
+	for (int i=0;i<numbrushes;i++)
+	{
+		const btVector3& N1 = planeEquations[i];
+		btScalar dist = btScalar(N1.dot(point))+btScalar(N1[3])-margin;
+		if (dist>btScalar(0.))
+		{
+			return false;
+		}
+	}
+	return true;
+		
+}
+
+
+bool	btGeometryUtil::areVerticesBehindPlane(const btVector3& planeNormal, const btAlignedObjectArray<btVector3>& vertices, btScalar	margin)
+{
+	int numvertices = vertices.size();
+	for (int i=0;i<numvertices;i++)
+	{
+		const btVector3& N1 = vertices[i];
+		btScalar dist = btScalar(planeNormal.dot(N1))+btScalar(planeNormal[3])-margin;
+		if (dist>btScalar(0.))
+		{
+			return false;
+		}
+	}
+	return true;
+}
+
+bool notExist(const btVector3& planeEquation,const btAlignedObjectArray<btVector3>& planeEquations);
+
+bool notExist(const btVector3& planeEquation,const btAlignedObjectArray<btVector3>& planeEquations)
+{
+	int numbrushes = planeEquations.size();
+	for (int i=0;i<numbrushes;i++)
+	{
+		const btVector3& N1 = planeEquations[i];
+		if (planeEquation.dot(N1) > btScalar(0.999))
+		{
+			return false;
+		} 
+	}
+	return true;
+}
+
+void	btGeometryUtil::getPlaneEquationsFromVertices(btAlignedObjectArray<btVector3>& vertices, btAlignedObjectArray<btVector3>& planeEquationsOut )
+{
+		const int numvertices = vertices.size();
+	// brute force:
+	for (int i=0;i<numvertices;i++)
+	{
+		const btVector3& N1 = vertices[i];
+		
+
+		for (int j=i+1;j<numvertices;j++)
+		{
+			const btVector3& N2 = vertices[j];
+				
+			for (int k=j+1;k<numvertices;k++)
+			{
+
+				const btVector3& N3 = vertices[k];
+
+				btVector3 planeEquation,edge0,edge1;
+				edge0 = N2-N1;
+				edge1 = N3-N1;
+				btScalar normalSign = btScalar(1.);
+				for (int ww=0;ww<2;ww++)
+				{
+					planeEquation = normalSign * edge0.cross(edge1);
+					if (planeEquation.length2() > btScalar(0.0001))
+					{
+						planeEquation.normalize();
+						if (notExist(planeEquation,planeEquationsOut))
+						{
+							planeEquation[3] = -planeEquation.dot(N1);
+							
+								//check if inside, and replace supportingVertexOut if needed
+								if (areVerticesBehindPlane(planeEquation,vertices,btScalar(0.01)))
+								{
+									planeEquationsOut.push_back(planeEquation);
+								}
+						}
+					}
+					normalSign = btScalar(-1.);
+				}
+			
+			}
+		}
+	}
+
+}
+
+void	btGeometryUtil::getVerticesFromPlaneEquations(const btAlignedObjectArray<btVector3>& planeEquations , btAlignedObjectArray<btVector3>& verticesOut )
+{
+	const int numbrushes = planeEquations.size();
+	// brute force:
+	for (int i=0;i<numbrushes;i++)
+	{
+		const btVector3& N1 = planeEquations[i];
+		
+
+		for (int j=i+1;j<numbrushes;j++)
+		{
+			const btVector3& N2 = planeEquations[j];
+				
+			for (int k=j+1;k<numbrushes;k++)
+			{
+
+				const btVector3& N3 = planeEquations[k];
+
+				btVector3 n2n3; n2n3 = N2.cross(N3);
+				btVector3 n3n1; n3n1 = N3.cross(N1);
+				btVector3 n1n2; n1n2 = N1.cross(N2);
+				
+				if ( ( n2n3.length2() > btScalar(0.0001) ) &&
+					 ( n3n1.length2() > btScalar(0.0001) ) &&
+					 ( n1n2.length2() > btScalar(0.0001) ) )
+				{
+					//point P out of 3 plane equations:
+
+					//	d1 ( N2 * N3 ) + d2 ( N3 * N1 ) + d3 ( N1 * N2 )  
+					//P =  -------------------------------------------------------------------------  
+					//   N1 . ( N2 * N3 )  
+
+
+					btScalar quotient = (N1.dot(n2n3));
+					if (btFabs(quotient) > btScalar(0.000001))
+					{
+						quotient = btScalar(-1.) / quotient;
+						n2n3 *= N1[3];
+						n3n1 *= N2[3];
+						n1n2 *= N3[3];
+						btVector3 potentialVertex = n2n3;
+						potentialVertex += n3n1;
+						potentialVertex += n1n2;
+						potentialVertex *= quotient;
+
+						//check if inside, and replace supportingVertexOut if needed
+						if (isPointInsidePlanes(planeEquations,potentialVertex,btScalar(0.01)))
+						{
+							verticesOut.push_back(potentialVertex);
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
diff --git a/src/bullet/LinearMath/btGeometryUtil.h b/src/bullet/LinearMath/btGeometryUtil.h
new file mode 100644
index 00000000..a4b13b45
--- /dev/null
+++ b/src/bullet/LinearMath/btGeometryUtil.h
@@ -0,0 +1,42 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_GEOMETRY_UTIL_H
+#define BT_GEOMETRY_UTIL_H
+
+#include "btVector3.h"
+#include "btAlignedObjectArray.h"
+
+///The btGeometryUtil helper class provides a few methods to convert between plane equations and vertices.
+class btGeometryUtil
+{
+	public:
+	
+	
+		static void	getPlaneEquationsFromVertices(btAlignedObjectArray<btVector3>& vertices, btAlignedObjectArray<btVector3>& planeEquationsOut );
+
+		static void	getVerticesFromPlaneEquations(const btAlignedObjectArray<btVector3>& planeEquations , btAlignedObjectArray<btVector3>& verticesOut );
+	
+		static bool	isInside(const btAlignedObjectArray<btVector3>& vertices, const btVector3& planeNormal, btScalar	margin);
+		
+		static bool	isPointInsidePlanes(const btAlignedObjectArray<btVector3>& planeEquations, const btVector3& point, btScalar	margin);
+
+		static bool	areVerticesBehindPlane(const btVector3& planeNormal, const btAlignedObjectArray<btVector3>& vertices, btScalar	margin);
+
+};
+
+
+#endif //BT_GEOMETRY_UTIL_H
+
diff --git a/src/bullet/LinearMath/btGrahamScan2dConvexHull.h b/src/bullet/LinearMath/btGrahamScan2dConvexHull.h
new file mode 100644
index 00000000..d7bd3eb8
--- /dev/null
+++ b/src/bullet/LinearMath/btGrahamScan2dConvexHull.h
@@ -0,0 +1,110 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef GRAHAM_SCAN_2D_CONVEX_HULL_H
+#define GRAHAM_SCAN_2D_CONVEX_HULL_H
+
+
+#include "btVector3.h"
+#include "btAlignedObjectArray.h"
+
+struct GrahamVector2 : public btVector3
+{
+	GrahamVector2(const btVector3& org, int orgIndex)
+		:btVector3(org),
+			m_orgIndex(orgIndex)
+	{
+	}
+	btScalar	m_angle;
+	int m_orgIndex;
+};
+
+
+struct btAngleCompareFunc {
+	btVector3 m_anchor;
+	btAngleCompareFunc(const btVector3& anchor)
+	: m_anchor(anchor) 
+	{
+	}
+	bool operator()(const GrahamVector2& a, const GrahamVector2& b) const {
+		if (a.m_angle != b.m_angle)
+			return a.m_angle < b.m_angle;
+		else
+		{
+			btScalar al = (a-m_anchor).length2();
+			btScalar bl = (b-m_anchor).length2();
+			if (al != bl)
+				return  al < bl;
+			else
+			{
+				return a.m_orgIndex < b.m_orgIndex;
+			}
+		}
+	}
+};
+
+inline void GrahamScanConvexHull2D(btAlignedObjectArray<GrahamVector2>& originalPoints, btAlignedObjectArray<GrahamVector2>& hull)
+{
+	if (originalPoints.size()<=1)
+	{
+		for (int i=0;i<originalPoints.size();i++)
+			hull.push_back(originalPoints[0]);
+		return;
+	}
+	//step1 : find anchor point with smallest x/y and move it to first location
+	//also precompute angles
+	for (int i=0;i<originalPoints.size();i++)
+	{
+		const btVector3& left = originalPoints[i];
+		const btVector3& right = originalPoints[0];
+		if (left.x() < right.x() || !(right.x() < left.x()) && left.y() < right.y())
+		{
+			originalPoints.swap(0,i);
+		}
+	}
+
+	for (int i=0;i<originalPoints.size();i++)
+	{
+		btVector3 xvec(1,0,0);
+		btVector3 ar = originalPoints[i]-originalPoints[0];
+		originalPoints[i].m_angle = btCross(xvec, ar).dot(btVector3(0,0,1)) / ar.length();
+	}
+
+	//step 2: sort all points, based on 'angle' with this anchor
+	btAngleCompareFunc comp(originalPoints[0]);
+	originalPoints.quickSortInternal(comp,1,originalPoints.size()-1);
+
+	int i;
+	for (i = 0; i<2; i++) 
+		hull.push_back(originalPoints[i]);
+
+	//step 3: keep all 'convex' points and discard concave points (using back tracking)
+	for (; i != originalPoints.size(); i++) 
+	{
+		bool isConvex = false;
+		while (!isConvex&& hull.size()>1) {
+			btVector3& a = hull[hull.size()-2];
+			btVector3& b = hull[hull.size()-1];
+			isConvex = btCross(a-b,a-originalPoints[i]).dot(btVector3(0,0,1))> 0;
+			if (!isConvex)
+				hull.pop_back();
+			else 
+				hull.push_back(originalPoints[i]);
+		}
+	}
+}
+
+#endif //GRAHAM_SCAN_2D_CONVEX_HULL_H
diff --git a/src/bullet/LinearMath/btHashMap.h b/src/bullet/LinearMath/btHashMap.h
new file mode 100644
index 00000000..ce07db3a
--- /dev/null
+++ b/src/bullet/LinearMath/btHashMap.h
@@ -0,0 +1,450 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_HASH_MAP_H
+#define BT_HASH_MAP_H
+
+#include "btAlignedObjectArray.h"
+
+///very basic hashable string implementation, compatible with btHashMap
+struct btHashString
+{
+	const char* m_string;
+	unsigned int	m_hash;
+
+	SIMD_FORCE_INLINE	unsigned int getHash()const
+	{
+		return m_hash;
+	}
+
+	btHashString(const char* name)
+		:m_string(name)
+	{
+		/* magic numbers from http://www.isthe.com/chongo/tech/comp/fnv/ */
+		static const unsigned int  InitialFNV = 2166136261u;
+		static const unsigned int FNVMultiple = 16777619u;
+
+		/* Fowler / Noll / Vo (FNV) Hash */
+		unsigned int hash = InitialFNV;
+		
+		for(int i = 0; m_string[i]; i++)
+		{
+			hash = hash ^ (m_string[i]);       /* xor  the low 8 bits */
+			hash = hash * FNVMultiple;  /* multiply by the magic number */
+		}
+		m_hash = hash;
+	}
+
+	int portableStringCompare(const char* src,	const char* dst) const
+	{
+			int ret = 0 ;
+
+			while( ! (ret = *(unsigned char *)src - *(unsigned char *)dst) && *dst)
+					++src, ++dst;
+
+			if ( ret < 0 )
+					ret = -1 ;
+			else if ( ret > 0 )
+					ret = 1 ;
+
+			return( ret );
+	}
+
+	bool equals(const btHashString& other) const
+	{
+		return (m_string == other.m_string) ||
+			(0==portableStringCompare(m_string,other.m_string));
+
+	}
+
+};
+
+const int BT_HASH_NULL=0xffffffff;
+
+
+class btHashInt
+{
+	int	m_uid;
+public:
+	btHashInt(int uid)	:m_uid(uid)
+	{
+	}
+
+	int	getUid1() const
+	{
+		return m_uid;
+	}
+
+	void	setUid1(int uid)
+	{
+		m_uid = uid;
+	}
+
+	bool equals(const btHashInt& other) const
+	{
+		return getUid1() == other.getUid1();
+	}
+	//to our success
+	SIMD_FORCE_INLINE	unsigned int getHash()const
+	{
+		int key = m_uid;
+		// Thomas Wang's hash
+		key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
+		return key;
+	}
+};
+
+
+
+class btHashPtr
+{
+
+	union
+	{
+		const void*	m_pointer;
+		int	m_hashValues[2];
+	};
+
+public:
+
+	btHashPtr(const void* ptr)
+		:m_pointer(ptr)
+	{
+	}
+
+	const void*	getPointer() const
+	{
+		return m_pointer;
+	}
+
+	bool equals(const btHashPtr& other) const
+	{
+		return getPointer() == other.getPointer();
+	}
+
+	//to our success
+	SIMD_FORCE_INLINE	unsigned int getHash()const
+	{
+		const bool VOID_IS_8 = ((sizeof(void*)==8));
+		
+		int key = VOID_IS_8? m_hashValues[0]+m_hashValues[1] : m_hashValues[0];
+	
+		// Thomas Wang's hash
+		key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
+		return key;
+	}
+
+	
+};
+
+
+template <class Value>
+class btHashKeyPtr
+{
+        int     m_uid;
+public:
+
+        btHashKeyPtr(int uid)    :m_uid(uid)
+        {
+        }
+
+        int     getUid1() const
+        {
+                return m_uid;
+        }
+
+        bool equals(const btHashKeyPtr<Value>& other) const
+        {
+                return getUid1() == other.getUid1();
+        }
+
+        //to our success
+        SIMD_FORCE_INLINE       unsigned int getHash()const
+        {
+                int key = m_uid;
+                // Thomas Wang's hash
+                key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
+                return key;
+        }
+
+        
+};
+
+
+template <class Value>
+class btHashKey
+{
+	int	m_uid;
+public:
+
+	btHashKey(int uid)	:m_uid(uid)
+	{
+	}
+
+	int	getUid1() const
+	{
+		return m_uid;
+	}
+
+	bool equals(const btHashKey<Value>& other) const
+	{
+		return getUid1() == other.getUid1();
+	}
+	//to our success
+	SIMD_FORCE_INLINE	unsigned int getHash()const
+	{
+		int key = m_uid;
+		// Thomas Wang's hash
+		key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
+		return key;
+	}
+};
+
+
+///The btHashMap template class implements a generic and lightweight hashmap.
+///A basic sample of how to use btHashMap is located in Demos\BasicDemo\main.cpp
+template <class Key, class Value>
+class btHashMap
+{
+
+protected:
+	btAlignedObjectArray<int>		m_hashTable;
+	btAlignedObjectArray<int>		m_next;
+	
+	btAlignedObjectArray<Value>		m_valueArray;
+	btAlignedObjectArray<Key>		m_keyArray;
+
+	void	growTables(const Key& /*key*/)
+	{
+		int newCapacity = m_valueArray.capacity();
+
+		if (m_hashTable.size() < newCapacity)
+		{
+			//grow hashtable and next table
+			int curHashtableSize = m_hashTable.size();
+
+			m_hashTable.resize(newCapacity);
+			m_next.resize(newCapacity);
+
+			int i;
+
+			for (i= 0; i < newCapacity; ++i)
+			{
+				m_hashTable[i] = BT_HASH_NULL;
+			}
+			for (i = 0; i < newCapacity; ++i)
+			{
+				m_next[i] = BT_HASH_NULL;
+			}
+
+			for(i=0;i<curHashtableSize;i++)
+			{
+				//const Value& value = m_valueArray[i];
+				//const Key& key = m_keyArray[i];
+
+				int	hashValue = m_keyArray[i].getHash() & (m_valueArray.capacity()-1);	// New hash value with new mask
+				m_next[i] = m_hashTable[hashValue];
+				m_hashTable[hashValue] = i;
+			}
+
+
+		}
+	}
+
+	public:
+
+	void insert(const Key& key, const Value& value) {
+		int hash = key.getHash() & (m_valueArray.capacity()-1);
+
+		//replace value if the key is already there
+		int index = findIndex(key);
+		if (index != BT_HASH_NULL)
+		{
+			m_valueArray[index]=value;
+			return;
+		}
+
+		int count = m_valueArray.size();
+		int oldCapacity = m_valueArray.capacity();
+		m_valueArray.push_back(value);
+		m_keyArray.push_back(key);
+
+		int newCapacity = m_valueArray.capacity();
+		if (oldCapacity < newCapacity)
+		{
+			growTables(key);
+			//hash with new capacity
+			hash = key.getHash() & (m_valueArray.capacity()-1);
+		}
+		m_next[count] = m_hashTable[hash];
+		m_hashTable[hash] = count;
+	}
+
+	void remove(const Key& key) {
+
+		int hash = key.getHash() & (m_valueArray.capacity()-1);
+
+		int pairIndex = findIndex(key);
+		
+		if (pairIndex ==BT_HASH_NULL)
+		{
+			return;
+		}
+
+		// Remove the pair from the hash table.
+		int index = m_hashTable[hash];
+		btAssert(index != BT_HASH_NULL);
+
+		int previous = BT_HASH_NULL;
+		while (index != pairIndex)
+		{
+			previous = index;
+			index = m_next[index];
+		}
+
+		if (previous != BT_HASH_NULL)
+		{
+			btAssert(m_next[previous] == pairIndex);
+			m_next[previous] = m_next[pairIndex];
+		}
+		else
+		{
+			m_hashTable[hash] = m_next[pairIndex];
+		}
+
+		// We now move the last pair into spot of the
+		// pair being removed. We need to fix the hash
+		// table indices to support the move.
+
+		int lastPairIndex = m_valueArray.size() - 1;
+
+		// If the removed pair is the last pair, we are done.
+		if (lastPairIndex == pairIndex)
+		{
+			m_valueArray.pop_back();
+			m_keyArray.pop_back();
+			return;
+		}
+
+		// Remove the last pair from the hash table.
+		int lastHash = m_keyArray[lastPairIndex].getHash() & (m_valueArray.capacity()-1);
+
+		index = m_hashTable[lastHash];
+		btAssert(index != BT_HASH_NULL);
+
+		previous = BT_HASH_NULL;
+		while (index != lastPairIndex)
+		{
+			previous = index;
+			index = m_next[index];
+		}
+
+		if (previous != BT_HASH_NULL)
+		{
+			btAssert(m_next[previous] == lastPairIndex);
+			m_next[previous] = m_next[lastPairIndex];
+		}
+		else
+		{
+			m_hashTable[lastHash] = m_next[lastPairIndex];
+		}
+
+		// Copy the last pair into the remove pair's spot.
+		m_valueArray[pairIndex] = m_valueArray[lastPairIndex];
+		m_keyArray[pairIndex] = m_keyArray[lastPairIndex];
+
+		// Insert the last pair into the hash table
+		m_next[pairIndex] = m_hashTable[lastHash];
+		m_hashTable[lastHash] = pairIndex;
+
+		m_valueArray.pop_back();
+		m_keyArray.pop_back();
+
+	}
+
+
+	int size() const
+	{
+		return m_valueArray.size();
+	}
+
+	const Value* getAtIndex(int index) const
+	{
+		btAssert(index < m_valueArray.size());
+
+		return &m_valueArray[index];
+	}
+
+	Value* getAtIndex(int index)
+	{
+		btAssert(index < m_valueArray.size());
+
+		return &m_valueArray[index];
+	}
+
+	Value* operator[](const Key& key) {
+		return find(key);
+	}
+
+	const Value*	find(const Key& key) const
+	{
+		int index = findIndex(key);
+		if (index == BT_HASH_NULL)
+		{
+			return NULL;
+		}
+		return &m_valueArray[index];
+	}
+
+	Value*	find(const Key& key)
+	{
+		int index = findIndex(key);
+		if (index == BT_HASH_NULL)
+		{
+			return NULL;
+		}
+		return &m_valueArray[index];
+	}
+
+
+	int	findIndex(const Key& key) const
+	{
+		unsigned int hash = key.getHash() & (m_valueArray.capacity()-1);
+
+		if (hash >= (unsigned int)m_hashTable.size())
+		{
+			return BT_HASH_NULL;
+		}
+
+		int index = m_hashTable[hash];
+		while ((index != BT_HASH_NULL) && key.equals(m_keyArray[index]) == false)
+		{
+			index = m_next[index];
+		}
+		return index;
+	}
+
+	void	clear()
+	{
+		m_hashTable.clear();
+		m_next.clear();
+		m_valueArray.clear();
+		m_keyArray.clear();
+	}
+
+};
+
+#endif //BT_HASH_MAP_H
diff --git a/src/bullet/LinearMath/btIDebugDraw.h b/src/bullet/LinearMath/btIDebugDraw.h
new file mode 100644
index 00000000..935502f8
--- /dev/null
+++ b/src/bullet/LinearMath/btIDebugDraw.h
@@ -0,0 +1,418 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_IDEBUG_DRAW__H
+#define BT_IDEBUG_DRAW__H
+
+#include "btVector3.h"
+#include "btTransform.h"
+
+
+///The btIDebugDraw interface class allows hooking up a debug renderer to visually debug simulations.
+///Typical use case: create a debug drawer object, and assign it to a btCollisionWorld or btDynamicsWorld using setDebugDrawer and call debugDrawWorld.
+///A class that implements the btIDebugDraw interface has to implement the drawLine method at a minimum.
+///For color arguments the X,Y,Z components refer to Red, Green and Blue each in the range [0..1]
+class	btIDebugDraw
+{
+	public:
+
+	enum	DebugDrawModes
+	{
+		DBG_NoDebug=0,
+		DBG_DrawWireframe = 1,
+		DBG_DrawAabb=2,
+		DBG_DrawFeaturesText=4,
+		DBG_DrawContactPoints=8,
+		DBG_NoDeactivation=16,
+		DBG_NoHelpText = 32,
+		DBG_DrawText=64,
+		DBG_ProfileTimings = 128,
+		DBG_EnableSatComparison = 256,
+		DBG_DisableBulletLCP = 512,
+		DBG_EnableCCD = 1024,
+		DBG_DrawConstraints = (1 << 11),
+		DBG_DrawConstraintLimits = (1 << 12),
+		DBG_FastWireframe = (1<<13),
+        DBG_DrawNormals = (1<<14),
+		DBG_MAX_DEBUG_DRAW_MODE
+	};
+
+	virtual ~btIDebugDraw() {};
+
+	virtual void	drawLine(const btVector3& from,const btVector3& to,const btVector3& color)=0;
+		
+	virtual void    drawLine(const btVector3& from,const btVector3& to, const btVector3& fromColor, const btVector3& toColor)
+	{
+        (void) toColor;
+		drawLine (from, to, fromColor);
+	}
+
+	virtual void	drawSphere(btScalar radius, const btTransform& transform, const btVector3& color)
+	{
+		btVector3 start = transform.getOrigin();
+
+		const btVector3 xoffs = transform.getBasis() * btVector3(radius,0,0);
+		const btVector3 yoffs = transform.getBasis() * btVector3(0,radius,0);
+		const btVector3 zoffs = transform.getBasis() * btVector3(0,0,radius);
+
+		// XY 
+		drawLine(start-xoffs, start+yoffs, color);
+		drawLine(start+yoffs, start+xoffs, color);
+		drawLine(start+xoffs, start-yoffs, color);
+		drawLine(start-yoffs, start-xoffs, color);
+
+		// XZ
+		drawLine(start-xoffs, start+zoffs, color);
+		drawLine(start+zoffs, start+xoffs, color);
+		drawLine(start+xoffs, start-zoffs, color);
+		drawLine(start-zoffs, start-xoffs, color);
+
+		// YZ
+		drawLine(start-yoffs, start+zoffs, color);
+		drawLine(start+zoffs, start+yoffs, color);
+		drawLine(start+yoffs, start-zoffs, color);
+		drawLine(start-zoffs, start-yoffs, color);
+	}
+	
+	virtual void	drawSphere (const btVector3& p, btScalar radius, const btVector3& color)
+	{
+		btTransform tr;
+		tr.setIdentity();
+		tr.setOrigin(p);
+		drawSphere(radius,tr,color);
+	}
+	
+	virtual	void	drawTriangle(const btVector3& v0,const btVector3& v1,const btVector3& v2,const btVector3& /*n0*/,const btVector3& /*n1*/,const btVector3& /*n2*/,const btVector3& color, btScalar alpha)
+	{
+		drawTriangle(v0,v1,v2,color,alpha);
+	}
+	virtual	void	drawTriangle(const btVector3& v0,const btVector3& v1,const btVector3& v2,const btVector3& color, btScalar /*alpha*/)
+	{
+		drawLine(v0,v1,color);
+		drawLine(v1,v2,color);
+		drawLine(v2,v0,color);
+	}
+
+	virtual void	drawContactPoint(const btVector3& PointOnB,const btVector3& normalOnB,btScalar distance,int lifeTime,const btVector3& color)=0;
+
+	virtual void	reportErrorWarning(const char* warningString) = 0;
+
+	virtual void	draw3dText(const btVector3& location,const char* textString) = 0;
+	
+	virtual void	setDebugMode(int debugMode) =0;
+	
+	virtual int		getDebugMode() const = 0;
+
+	virtual void drawAabb(const btVector3& from,const btVector3& to,const btVector3& color)
+	{
+
+		btVector3 halfExtents = (to-from)* 0.5f;
+		btVector3 center = (to+from) *0.5f;
+		int i,j;
+
+		btVector3 edgecoord(1.f,1.f,1.f),pa,pb;
+		for (i=0;i<4;i++)
+		{
+			for (j=0;j<3;j++)
+			{
+				pa = btVector3(edgecoord[0]*halfExtents[0], edgecoord[1]*halfExtents[1],		
+					edgecoord[2]*halfExtents[2]);
+				pa+=center;
+
+				int othercoord = j%3;
+				edgecoord[othercoord]*=-1.f;
+				pb = btVector3(edgecoord[0]*halfExtents[0], edgecoord[1]*halfExtents[1],	
+					edgecoord[2]*halfExtents[2]);
+				pb+=center;
+
+				drawLine(pa,pb,color);
+			}
+			edgecoord = btVector3(-1.f,-1.f,-1.f);
+			if (i<3)
+				edgecoord[i]*=-1.f;
+		}
+	}
+	virtual void drawTransform(const btTransform& transform, btScalar orthoLen)
+	{
+		btVector3 start = transform.getOrigin();
+		drawLine(start, start+transform.getBasis() * btVector3(orthoLen, 0, 0), btVector3(0.7f,0,0));
+		drawLine(start, start+transform.getBasis() * btVector3(0, orthoLen, 0), btVector3(0,0.7f,0));
+		drawLine(start, start+transform.getBasis() * btVector3(0, 0, orthoLen), btVector3(0,0,0.7f));
+	}
+
+	virtual void drawArc(const btVector3& center, const btVector3& normal, const btVector3& axis, btScalar radiusA, btScalar radiusB, btScalar minAngle, btScalar maxAngle, 
+				const btVector3& color, bool drawSect, btScalar stepDegrees = btScalar(10.f))
+	{
+		const btVector3& vx = axis;
+		btVector3 vy = normal.cross(axis);
+		btScalar step = stepDegrees * SIMD_RADS_PER_DEG;
+		int nSteps = (int)((maxAngle - minAngle) / step);
+		if(!nSteps) nSteps = 1;
+		btVector3 prev = center + radiusA * vx * btCos(minAngle) + radiusB * vy * btSin(minAngle);
+		if(drawSect)
+		{
+			drawLine(center, prev, color);
+		}
+		for(int i = 1; i <= nSteps; i++)
+		{
+			btScalar angle = minAngle + (maxAngle - minAngle) * btScalar(i) / btScalar(nSteps);
+			btVector3 next = center + radiusA * vx * btCos(angle) + radiusB * vy * btSin(angle);
+			drawLine(prev, next, color);
+			prev = next;
+		}
+		if(drawSect)
+		{
+			drawLine(center, prev, color);
+		}
+	}
+	virtual void drawSpherePatch(const btVector3& center, const btVector3& up, const btVector3& axis, btScalar radius, 
+		btScalar minTh, btScalar maxTh, btScalar minPs, btScalar maxPs, const btVector3& color, btScalar stepDegrees = btScalar(10.f))
+	{
+		btVector3 vA[74];
+		btVector3 vB[74];
+		btVector3 *pvA = vA, *pvB = vB, *pT;
+		btVector3 npole = center + up * radius;
+		btVector3 spole = center - up * radius;
+		btVector3 arcStart;
+		btScalar step = stepDegrees * SIMD_RADS_PER_DEG;
+		const btVector3& kv = up;
+		const btVector3& iv = axis;
+		btVector3 jv = kv.cross(iv);
+		bool drawN = false;
+		bool drawS = false;
+		if(minTh <= -SIMD_HALF_PI)
+		{
+			minTh = -SIMD_HALF_PI + step;
+			drawN = true;
+		}
+		if(maxTh >= SIMD_HALF_PI)
+		{
+			maxTh = SIMD_HALF_PI - step;
+			drawS = true;
+		}
+		if(minTh > maxTh)
+		{
+			minTh = -SIMD_HALF_PI + step;
+			maxTh =  SIMD_HALF_PI - step;
+			drawN = drawS = true;
+		}
+		int n_hor = (int)((maxTh - minTh) / step) + 1;
+		if(n_hor < 2) n_hor = 2;
+		btScalar step_h = (maxTh - minTh) / btScalar(n_hor - 1);
+		bool isClosed = false;
+		if(minPs > maxPs)
+		{
+			minPs = -SIMD_PI + step;
+			maxPs =  SIMD_PI;
+			isClosed = true;
+		}
+		else if((maxPs - minPs) >= SIMD_PI * btScalar(2.f))
+		{
+			isClosed = true;
+		}
+		else
+		{
+			isClosed = false;
+		}
+		int n_vert = (int)((maxPs - minPs) / step) + 1;
+		if(n_vert < 2) n_vert = 2;
+		btScalar step_v = (maxPs - minPs) / btScalar(n_vert - 1);
+		for(int i = 0; i < n_hor; i++)
+		{
+			btScalar th = minTh + btScalar(i) * step_h;
+			btScalar sth = radius * btSin(th);
+			btScalar cth = radius * btCos(th);
+			for(int j = 0; j < n_vert; j++)
+			{
+				btScalar psi = minPs + btScalar(j) * step_v;
+				btScalar sps = btSin(psi);
+				btScalar cps = btCos(psi);
+				pvB[j] = center + cth * cps * iv + cth * sps * jv + sth * kv;
+				if(i)
+				{
+					drawLine(pvA[j], pvB[j], color);
+				}
+				else if(drawS)
+				{
+					drawLine(spole, pvB[j], color);
+				}
+				if(j)
+				{
+					drawLine(pvB[j-1], pvB[j], color);
+				}
+				else
+				{
+					arcStart = pvB[j];
+				}
+				if((i == (n_hor - 1)) && drawN)
+				{
+					drawLine(npole, pvB[j], color);
+				}
+				if(isClosed)
+				{
+					if(j == (n_vert-1))
+					{
+						drawLine(arcStart, pvB[j], color);
+					}
+				}
+				else
+				{
+					if(((!i) || (i == (n_hor-1))) && ((!j) || (j == (n_vert-1))))
+					{
+						drawLine(center, pvB[j], color);
+					}
+				}
+			}
+			pT = pvA; pvA = pvB; pvB = pT;
+		}
+	}
+	
+	virtual void drawBox(const btVector3& bbMin, const btVector3& bbMax, const btVector3& color)
+	{
+		drawLine(btVector3(bbMin[0], bbMin[1], bbMin[2]), btVector3(bbMax[0], bbMin[1], bbMin[2]), color);
+		drawLine(btVector3(bbMax[0], bbMin[1], bbMin[2]), btVector3(bbMax[0], bbMax[1], bbMin[2]), color);
+		drawLine(btVector3(bbMax[0], bbMax[1], bbMin[2]), btVector3(bbMin[0], bbMax[1], bbMin[2]), color);
+		drawLine(btVector3(bbMin[0], bbMax[1], bbMin[2]), btVector3(bbMin[0], bbMin[1], bbMin[2]), color);
+		drawLine(btVector3(bbMin[0], bbMin[1], bbMin[2]), btVector3(bbMin[0], bbMin[1], bbMax[2]), color);
+		drawLine(btVector3(bbMax[0], bbMin[1], bbMin[2]), btVector3(bbMax[0], bbMin[1], bbMax[2]), color);
+		drawLine(btVector3(bbMax[0], bbMax[1], bbMin[2]), btVector3(bbMax[0], bbMax[1], bbMax[2]), color);
+		drawLine(btVector3(bbMin[0], bbMax[1], bbMin[2]), btVector3(bbMin[0], bbMax[1], bbMax[2]), color);
+		drawLine(btVector3(bbMin[0], bbMin[1], bbMax[2]), btVector3(bbMax[0], bbMin[1], bbMax[2]), color);
+		drawLine(btVector3(bbMax[0], bbMin[1], bbMax[2]), btVector3(bbMax[0], bbMax[1], bbMax[2]), color);
+		drawLine(btVector3(bbMax[0], bbMax[1], bbMax[2]), btVector3(bbMin[0], bbMax[1], bbMax[2]), color);
+		drawLine(btVector3(bbMin[0], bbMax[1], bbMax[2]), btVector3(bbMin[0], bbMin[1], bbMax[2]), color);
+	}
+	virtual void drawBox(const btVector3& bbMin, const btVector3& bbMax, const btTransform& trans, const btVector3& color)
+	{
+		drawLine(trans * btVector3(bbMin[0], bbMin[1], bbMin[2]), trans * btVector3(bbMax[0], bbMin[1], bbMin[2]), color);
+		drawLine(trans * btVector3(bbMax[0], bbMin[1], bbMin[2]), trans * btVector3(bbMax[0], bbMax[1], bbMin[2]), color);
+		drawLine(trans * btVector3(bbMax[0], bbMax[1], bbMin[2]), trans * btVector3(bbMin[0], bbMax[1], bbMin[2]), color);
+		drawLine(trans * btVector3(bbMin[0], bbMax[1], bbMin[2]), trans * btVector3(bbMin[0], bbMin[1], bbMin[2]), color);
+		drawLine(trans * btVector3(bbMin[0], bbMin[1], bbMin[2]), trans * btVector3(bbMin[0], bbMin[1], bbMax[2]), color);
+		drawLine(trans * btVector3(bbMax[0], bbMin[1], bbMin[2]), trans * btVector3(bbMax[0], bbMin[1], bbMax[2]), color);
+		drawLine(trans * btVector3(bbMax[0], bbMax[1], bbMin[2]), trans * btVector3(bbMax[0], bbMax[1], bbMax[2]), color);
+		drawLine(trans * btVector3(bbMin[0], bbMax[1], bbMin[2]), trans * btVector3(bbMin[0], bbMax[1], bbMax[2]), color);
+		drawLine(trans * btVector3(bbMin[0], bbMin[1], bbMax[2]), trans * btVector3(bbMax[0], bbMin[1], bbMax[2]), color);
+		drawLine(trans * btVector3(bbMax[0], bbMin[1], bbMax[2]), trans * btVector3(bbMax[0], bbMax[1], bbMax[2]), color);
+		drawLine(trans * btVector3(bbMax[0], bbMax[1], bbMax[2]), trans * btVector3(bbMin[0], bbMax[1], bbMax[2]), color);
+		drawLine(trans * btVector3(bbMin[0], bbMax[1], bbMax[2]), trans * btVector3(bbMin[0], bbMin[1], bbMax[2]), color);
+	}
+
+	virtual void drawCapsule(btScalar radius, btScalar halfHeight, int upAxis, const btTransform& transform, const btVector3& color)
+	{
+		btVector3 capStart(0.f,0.f,0.f);
+		capStart[upAxis] = -halfHeight;
+
+		btVector3 capEnd(0.f,0.f,0.f);
+		capEnd[upAxis] = halfHeight;
+
+		// Draw the ends
+		{
+
+			btTransform childTransform = transform;
+			childTransform.getOrigin() = transform * capStart;
+			drawSphere(radius, childTransform, color);
+		}
+
+		{
+			btTransform childTransform = transform;
+			childTransform.getOrigin() = transform * capEnd;
+			drawSphere(radius, childTransform, color);
+		}
+
+		// Draw some additional lines
+		btVector3 start = transform.getOrigin();
+
+		capStart[(upAxis+1)%3] = radius;
+		capEnd[(upAxis+1)%3] = radius;
+		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
+		capStart[(upAxis+1)%3] = -radius;
+		capEnd[(upAxis+1)%3] = -radius;
+		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
+
+		capStart[(upAxis+1)%3] = 0.f;
+		capEnd[(upAxis+1)%3] = 0.f;
+
+		capStart[(upAxis+2)%3] = radius;
+		capEnd[(upAxis+2)%3] = radius;
+		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
+		capStart[(upAxis+2)%3] = -radius;
+		capEnd[(upAxis+2)%3] = -radius;
+		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
+	}
+
+	virtual void drawCylinder(btScalar radius, btScalar halfHeight, int upAxis, const btTransform& transform, const btVector3& color)
+	{
+		btVector3 start = transform.getOrigin();
+		btVector3	offsetHeight(0,0,0);
+		offsetHeight[upAxis] = halfHeight;
+		btVector3	offsetRadius(0,0,0);
+		offsetRadius[(upAxis+1)%3] = radius;
+		drawLine(start+transform.getBasis() * (offsetHeight+offsetRadius),start+transform.getBasis() * (-offsetHeight+offsetRadius),color);
+		drawLine(start+transform.getBasis() * (offsetHeight-offsetRadius),start+transform.getBasis() * (-offsetHeight-offsetRadius),color);
+
+		// Drawing top and bottom caps of the cylinder
+		btVector3 yaxis(0,0,0);
+		yaxis[upAxis] = btScalar(1.0);
+		btVector3 xaxis(0,0,0);
+		xaxis[(upAxis+1)%3] = btScalar(1.0);
+		drawArc(start-transform.getBasis()*(offsetHeight),transform.getBasis()*yaxis,transform.getBasis()*xaxis,radius,radius,0,SIMD_2_PI,color,false,btScalar(10.0));
+		drawArc(start+transform.getBasis()*(offsetHeight),transform.getBasis()*yaxis,transform.getBasis()*xaxis,radius,radius,0,SIMD_2_PI,color,false,btScalar(10.0));
+	}
+
+	virtual void drawCone(btScalar radius, btScalar height, int upAxis, const btTransform& transform, const btVector3& color)
+	{
+
+		btVector3 start = transform.getOrigin();
+
+		btVector3	offsetHeight(0,0,0);
+		offsetHeight[upAxis] = height * btScalar(0.5);
+		btVector3	offsetRadius(0,0,0);
+		offsetRadius[(upAxis+1)%3] = radius;
+		btVector3	offset2Radius(0,0,0);
+		offset2Radius[(upAxis+2)%3] = radius;
+
+		drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight+offsetRadius),color);
+		drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight-offsetRadius),color);
+		drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight+offset2Radius),color);
+		drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight-offset2Radius),color);
+
+		// Drawing the base of the cone
+		btVector3 yaxis(0,0,0);
+		yaxis[upAxis] = btScalar(1.0);
+		btVector3 xaxis(0,0,0);
+		xaxis[(upAxis+1)%3] = btScalar(1.0);
+		drawArc(start-transform.getBasis()*(offsetHeight),transform.getBasis()*yaxis,transform.getBasis()*xaxis,radius,radius,0,SIMD_2_PI,color,false,10.0);
+	}
+
+	virtual void drawPlane(const btVector3& planeNormal, btScalar planeConst, const btTransform& transform, const btVector3& color)
+	{
+		btVector3 planeOrigin = planeNormal * planeConst;
+		btVector3 vec0,vec1;
+		btPlaneSpace1(planeNormal,vec0,vec1);
+		btScalar vecLen = 100.f;
+		btVector3 pt0 = planeOrigin + vec0*vecLen;
+		btVector3 pt1 = planeOrigin - vec0*vecLen;
+		btVector3 pt2 = planeOrigin + vec1*vecLen;
+		btVector3 pt3 = planeOrigin - vec1*vecLen;
+		drawLine(transform*pt0,transform*pt1,color);
+		drawLine(transform*pt2,transform*pt3,color);
+	}
+};
+
+
+#endif //BT_IDEBUG_DRAW__H
+
diff --git a/src/bullet/LinearMath/btList.h b/src/bullet/LinearMath/btList.h
new file mode 100644
index 00000000..eec80a70
--- /dev/null
+++ b/src/bullet/LinearMath/btList.h
@@ -0,0 +1,73 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_GEN_LIST_H
+#define BT_GEN_LIST_H
+
+class btGEN_Link {
+public:
+    btGEN_Link() : m_next(0), m_prev(0) {}
+    btGEN_Link(btGEN_Link *next, btGEN_Link *prev) : m_next(next), m_prev(prev) {}
+    
+    btGEN_Link *getNext() const { return m_next; }  
+    btGEN_Link *getPrev() const { return m_prev; }  
+
+    bool isHead() const { return m_prev == 0; }
+    bool isTail() const { return m_next == 0; }
+
+    void insertBefore(btGEN_Link *link) {
+        m_next         = link;
+        m_prev         = link->m_prev;
+        m_next->m_prev = this;
+        m_prev->m_next = this;
+    } 
+
+    void insertAfter(btGEN_Link *link) {
+        m_next         = link->m_next;
+        m_prev         = link;
+        m_next->m_prev = this;
+        m_prev->m_next = this;
+    } 
+
+    void remove() { 
+        m_next->m_prev = m_prev; 
+        m_prev->m_next = m_next;
+    }
+
+private:  
+    btGEN_Link  *m_next;
+    btGEN_Link  *m_prev;
+};
+
+class btGEN_List {
+public:
+    btGEN_List() : m_head(&m_tail, 0), m_tail(0, &m_head) {}
+
+    btGEN_Link *getHead() const { return m_head.getNext(); } 
+    btGEN_Link *getTail() const { return m_tail.getPrev(); } 
+
+    void addHead(btGEN_Link *link) { link->insertAfter(&m_head); }
+    void addTail(btGEN_Link *link) { link->insertBefore(&m_tail); }
+    
+private:
+    btGEN_Link m_head;
+    btGEN_Link m_tail;
+};
+
+#endif //BT_GEN_LIST_H
+
+
+
diff --git a/src/bullet/LinearMath/btMatrix3x3.h b/src/bullet/LinearMath/btMatrix3x3.h
new file mode 100644
index 00000000..d0234a04
--- /dev/null
+++ b/src/bullet/LinearMath/btMatrix3x3.h
@@ -0,0 +1,771 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef	BT_MATRIX3x3_H
+#define BT_MATRIX3x3_H
+
+#include "btVector3.h"
+#include "btQuaternion.h"
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btMatrix3x3Data	btMatrix3x3DoubleData 
+#else
+#define btMatrix3x3Data	btMatrix3x3FloatData
+#endif //BT_USE_DOUBLE_PRECISION
+
+
+/**@brief The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with btQuaternion, btTransform and btVector3.
+* Make sure to only include a pure orthogonal matrix without scaling. */
+class btMatrix3x3 {
+
+	///Data storage for the matrix, each vector is a row of the matrix
+	btVector3 m_el[3];
+
+public:
+	/** @brief No initializaion constructor */
+	btMatrix3x3 () {}
+
+	//		explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
+
+	/**@brief Constructor from Quaternion */
+	explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
+	/*
+	template <typename btScalar>
+	Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
+	{ 
+	setEulerYPR(yaw, pitch, roll);
+	}
+	*/
+	/** @brief Constructor with row major formatting */
+	btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
+		const btScalar& yx, const btScalar& yy, const btScalar& yz,
+		const btScalar& zx, const btScalar& zy, const btScalar& zz)
+	{ 
+		setValue(xx, xy, xz, 
+			yx, yy, yz, 
+			zx, zy, zz);
+	}
+	/** @brief Copy constructor */
+	SIMD_FORCE_INLINE btMatrix3x3 (const btMatrix3x3& other)
+	{
+		m_el[0] = other.m_el[0];
+		m_el[1] = other.m_el[1];
+		m_el[2] = other.m_el[2];
+	}
+	/** @brief Assignment Operator */
+	SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& other)
+	{
+		m_el[0] = other.m_el[0];
+		m_el[1] = other.m_el[1];
+		m_el[2] = other.m_el[2];
+		return *this;
+	}
+
+	/** @brief Get a column of the matrix as a vector 
+	*  @param i Column number 0 indexed */
+	SIMD_FORCE_INLINE btVector3 getColumn(int i) const
+	{
+		return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
+	}
+
+
+	/** @brief Get a row of the matrix as a vector 
+	*  @param i Row number 0 indexed */
+	SIMD_FORCE_INLINE const btVector3& getRow(int i) const
+	{
+		btFullAssert(0 <= i && i < 3);
+		return m_el[i];
+	}
+
+	/** @brief Get a mutable reference to a row of the matrix as a vector 
+	*  @param i Row number 0 indexed */
+	SIMD_FORCE_INLINE btVector3&  operator[](int i)
+	{ 
+		btFullAssert(0 <= i && i < 3);
+		return m_el[i]; 
+	}
+
+	/** @brief Get a const reference to a row of the matrix as a vector 
+	*  @param i Row number 0 indexed */
+	SIMD_FORCE_INLINE const btVector3& operator[](int i) const
+	{
+		btFullAssert(0 <= i && i < 3);
+		return m_el[i]; 
+	}
+
+	/** @brief Multiply by the target matrix on the right
+	*  @param m Rotation matrix to be applied 
+	* Equivilant to this = this * m */
+	btMatrix3x3& operator*=(const btMatrix3x3& m); 
+
+	/** @brief Adds by the target matrix on the right
+	*  @param m matrix to be applied 
+	* Equivilant to this = this + m */
+	btMatrix3x3& operator+=(const btMatrix3x3& m); 
+
+	/** @brief Substractss by the target matrix on the right
+	*  @param m matrix to be applied 
+	* Equivilant to this = this - m */
+	btMatrix3x3& operator-=(const btMatrix3x3& m); 
+
+	/** @brief Set from the rotational part of a 4x4 OpenGL matrix
+	*  @param m A pointer to the beginning of the array of scalars*/
+	void setFromOpenGLSubMatrix(const btScalar *m)
+	{
+		m_el[0].setValue(m[0],m[4],m[8]);
+		m_el[1].setValue(m[1],m[5],m[9]);
+		m_el[2].setValue(m[2],m[6],m[10]);
+
+	}
+	/** @brief Set the values of the matrix explicitly (row major)
+	*  @param xx Top left
+	*  @param xy Top Middle
+	*  @param xz Top Right
+	*  @param yx Middle Left
+	*  @param yy Middle Middle
+	*  @param yz Middle Right
+	*  @param zx Bottom Left
+	*  @param zy Bottom Middle
+	*  @param zz Bottom Right*/
+	void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz, 
+		const btScalar& yx, const btScalar& yy, const btScalar& yz, 
+		const btScalar& zx, const btScalar& zy, const btScalar& zz)
+	{
+		m_el[0].setValue(xx,xy,xz);
+		m_el[1].setValue(yx,yy,yz);
+		m_el[2].setValue(zx,zy,zz);
+	}
+
+	/** @brief Set the matrix from a quaternion
+	*  @param q The Quaternion to match */  
+	void setRotation(const btQuaternion& q) 
+	{
+		btScalar d = q.length2();
+		btFullAssert(d != btScalar(0.0));
+		btScalar s = btScalar(2.0) / d;
+		btScalar xs = q.x() * s,   ys = q.y() * s,   zs = q.z() * s;
+		btScalar wx = q.w() * xs,  wy = q.w() * ys,  wz = q.w() * zs;
+		btScalar xx = q.x() * xs,  xy = q.x() * ys,  xz = q.x() * zs;
+		btScalar yy = q.y() * ys,  yz = q.y() * zs,  zz = q.z() * zs;
+		setValue(btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
+			xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
+			xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
+	}
+
+
+	/** @brief Set the matrix from euler angles using YPR around YXZ respectively
+	*  @param yaw Yaw about Y axis
+	*  @param pitch Pitch about X axis
+	*  @param roll Roll about Z axis 
+	*/
+	void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll) 
+	{
+		setEulerZYX(roll, pitch, yaw);
+	}
+
+	/** @brief Set the matrix from euler angles YPR around ZYX axes
+	* @param eulerX Roll about X axis
+	* @param eulerY Pitch around Y axis
+	* @param eulerZ Yaw aboud Z axis
+	* 
+	* These angles are used to produce a rotation matrix. The euler
+	* angles are applied in ZYX order. I.e a vector is first rotated 
+	* about X then Y and then Z
+	**/
+	void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) { 
+		///@todo proposed to reverse this since it's labeled zyx but takes arguments xyz and it will match all other parts of the code
+		btScalar ci ( btCos(eulerX)); 
+		btScalar cj ( btCos(eulerY)); 
+		btScalar ch ( btCos(eulerZ)); 
+		btScalar si ( btSin(eulerX)); 
+		btScalar sj ( btSin(eulerY)); 
+		btScalar sh ( btSin(eulerZ)); 
+		btScalar cc = ci * ch; 
+		btScalar cs = ci * sh; 
+		btScalar sc = si * ch; 
+		btScalar ss = si * sh;
+
+		setValue(cj * ch, sj * sc - cs, sj * cc + ss,
+			cj * sh, sj * ss + cc, sj * cs - sc, 
+			-sj,      cj * si,      cj * ci);
+	}
+
+	/**@brief Set the matrix to the identity */
+	void setIdentity()
+	{ 
+		setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0), 
+			btScalar(0.0), btScalar(1.0), btScalar(0.0), 
+			btScalar(0.0), btScalar(0.0), btScalar(1.0)); 
+	}
+
+	static const btMatrix3x3&	getIdentity()
+	{
+		static const btMatrix3x3 identityMatrix(btScalar(1.0), btScalar(0.0), btScalar(0.0), 
+			btScalar(0.0), btScalar(1.0), btScalar(0.0), 
+			btScalar(0.0), btScalar(0.0), btScalar(1.0));
+		return identityMatrix;
+	}
+
+	/**@brief Fill the rotational part of an OpenGL matrix and clear the shear/perspective
+	* @param m The array to be filled */
+	void getOpenGLSubMatrix(btScalar *m) const 
+	{
+		m[0]  = btScalar(m_el[0].x()); 
+		m[1]  = btScalar(m_el[1].x());
+		m[2]  = btScalar(m_el[2].x());
+		m[3]  = btScalar(0.0); 
+		m[4]  = btScalar(m_el[0].y());
+		m[5]  = btScalar(m_el[1].y());
+		m[6]  = btScalar(m_el[2].y());
+		m[7]  = btScalar(0.0); 
+		m[8]  = btScalar(m_el[0].z()); 
+		m[9]  = btScalar(m_el[1].z());
+		m[10] = btScalar(m_el[2].z());
+		m[11] = btScalar(0.0); 
+	}
+
+	/**@brief Get the matrix represented as a quaternion 
+	* @param q The quaternion which will be set */
+	void getRotation(btQuaternion& q) const
+	{
+		btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
+		btScalar temp[4];
+
+		if (trace > btScalar(0.0)) 
+		{
+			btScalar s = btSqrt(trace + btScalar(1.0));
+			temp[3]=(s * btScalar(0.5));
+			s = btScalar(0.5) / s;
+
+			temp[0]=((m_el[2].y() - m_el[1].z()) * s);
+			temp[1]=((m_el[0].z() - m_el[2].x()) * s);
+			temp[2]=((m_el[1].x() - m_el[0].y()) * s);
+		} 
+		else 
+		{
+			int i = m_el[0].x() < m_el[1].y() ? 
+				(m_el[1].y() < m_el[2].z() ? 2 : 1) :
+				(m_el[0].x() < m_el[2].z() ? 2 : 0); 
+			int j = (i + 1) % 3;  
+			int k = (i + 2) % 3;
+
+			btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
+			temp[i] = s * btScalar(0.5);
+			s = btScalar(0.5) / s;
+
+			temp[3] = (m_el[k][j] - m_el[j][k]) * s;
+			temp[j] = (m_el[j][i] + m_el[i][j]) * s;
+			temp[k] = (m_el[k][i] + m_el[i][k]) * s;
+		}
+		q.setValue(temp[0],temp[1],temp[2],temp[3]);
+	}
+
+	/**@brief Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR
+	* @param yaw Yaw around Y axis
+	* @param pitch Pitch around X axis
+	* @param roll around Z axis */	
+	void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
+	{
+
+		// first use the normal calculus
+		yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
+		pitch = btScalar(btAsin(-m_el[2].x()));
+		roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
+
+		// on pitch = +/-HalfPI
+		if (btFabs(pitch)==SIMD_HALF_PI)
+		{
+			if (yaw>0)
+				yaw-=SIMD_PI;
+			else
+				yaw+=SIMD_PI;
+
+			if (roll>0)
+				roll-=SIMD_PI;
+			else
+				roll+=SIMD_PI;
+		}
+	};
+
+
+	/**@brief Get the matrix represented as euler angles around ZYX
+	* @param yaw Yaw around X axis
+	* @param pitch Pitch around Y axis
+	* @param roll around X axis 
+	* @param solution_number Which solution of two possible solutions ( 1 or 2) are possible values*/	
+	void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
+	{
+		struct Euler
+		{
+			btScalar yaw;
+			btScalar pitch;
+			btScalar roll;
+		};
+
+		Euler euler_out;
+		Euler euler_out2; //second solution
+		//get the pointer to the raw data
+
+		// Check that pitch is not at a singularity
+		if (btFabs(m_el[2].x()) >= 1)
+		{
+			euler_out.yaw = 0;
+			euler_out2.yaw = 0;
+
+			// From difference of angles formula
+			btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
+			if (m_el[2].x() > 0)  //gimbal locked up
+			{
+				euler_out.pitch = SIMD_PI / btScalar(2.0);
+				euler_out2.pitch = SIMD_PI / btScalar(2.0);
+				euler_out.roll = euler_out.pitch + delta;
+				euler_out2.roll = euler_out.pitch + delta;
+			}
+			else // gimbal locked down
+			{
+				euler_out.pitch = -SIMD_PI / btScalar(2.0);
+				euler_out2.pitch = -SIMD_PI / btScalar(2.0);
+				euler_out.roll = -euler_out.pitch + delta;
+				euler_out2.roll = -euler_out.pitch + delta;
+			}
+		}
+		else
+		{
+			euler_out.pitch = - btAsin(m_el[2].x());
+			euler_out2.pitch = SIMD_PI - euler_out.pitch;
+
+			euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch), 
+				m_el[2].z()/btCos(euler_out.pitch));
+			euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch), 
+				m_el[2].z()/btCos(euler_out2.pitch));
+
+			euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch), 
+				m_el[0].x()/btCos(euler_out.pitch));
+			euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch), 
+				m_el[0].x()/btCos(euler_out2.pitch));
+		}
+
+		if (solution_number == 1)
+		{ 
+			yaw = euler_out.yaw; 
+			pitch = euler_out.pitch;
+			roll = euler_out.roll;
+		}
+		else
+		{ 
+			yaw = euler_out2.yaw; 
+			pitch = euler_out2.pitch;
+			roll = euler_out2.roll;
+		}
+	}
+
+	/**@brief Create a scaled copy of the matrix 
+	* @param s Scaling vector The elements of the vector will scale each column */
+
+	btMatrix3x3 scaled(const btVector3& s) const
+	{
+		return btMatrix3x3(m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
+			m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
+			m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
+	}
+
+	/**@brief Return the determinant of the matrix */
+	btScalar            determinant() const;
+	/**@brief Return the adjoint of the matrix */
+	btMatrix3x3 adjoint() const;
+	/**@brief Return the matrix with all values non negative */
+	btMatrix3x3 absolute() const;
+	/**@brief Return the transpose of the matrix */
+	btMatrix3x3 transpose() const;
+	/**@brief Return the inverse of the matrix */
+	btMatrix3x3 inverse() const; 
+
+	btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
+	btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
+
+	SIMD_FORCE_INLINE btScalar tdotx(const btVector3& v) const 
+	{
+		return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
+	}
+	SIMD_FORCE_INLINE btScalar tdoty(const btVector3& v) const 
+	{
+		return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
+	}
+	SIMD_FORCE_INLINE btScalar tdotz(const btVector3& v) const 
+	{
+		return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
+	}
+
+
+	/**@brief diagonalizes this matrix by the Jacobi method.
+	* @param rot stores the rotation from the coordinate system in which the matrix is diagonal to the original
+	* coordinate system, i.e., old_this = rot * new_this * rot^T. 
+	* @param threshold See iteration
+	* @param iteration The iteration stops when all off-diagonal elements are less than the threshold multiplied 
+	* by the sum of the absolute values of the diagonal, or when maxSteps have been executed. 
+	* 
+	* Note that this matrix is assumed to be symmetric. 
+	*/
+	void diagonalize(btMatrix3x3& rot, btScalar threshold, int maxSteps)
+	{
+		rot.setIdentity();
+		for (int step = maxSteps; step > 0; step--)
+		{
+			// find off-diagonal element [p][q] with largest magnitude
+			int p = 0;
+			int q = 1;
+			int r = 2;
+			btScalar max = btFabs(m_el[0][1]);
+			btScalar v = btFabs(m_el[0][2]);
+			if (v > max)
+			{
+				q = 2;
+				r = 1;
+				max = v;
+			}
+			v = btFabs(m_el[1][2]);
+			if (v > max)
+			{
+				p = 1;
+				q = 2;
+				r = 0;
+				max = v;
+			}
+
+			btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
+			if (max <= t)
+			{
+				if (max <= SIMD_EPSILON * t)
+				{
+					return;
+				}
+				step = 1;
+			}
+
+			// compute Jacobi rotation J which leads to a zero for element [p][q] 
+			btScalar mpq = m_el[p][q];
+			btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
+			btScalar theta2 = theta * theta;
+			btScalar cos;
+			btScalar sin;
+			if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
+			{
+				t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
+					: 1 / (theta - btSqrt(1 + theta2));
+				cos = 1 / btSqrt(1 + t * t);
+				sin = cos * t;
+			}
+			else
+			{
+				// approximation for large theta-value, i.e., a nearly diagonal matrix
+				t = 1 / (theta * (2 + btScalar(0.5) / theta2));
+				cos = 1 - btScalar(0.5) * t * t;
+				sin = cos * t;
+			}
+
+			// apply rotation to matrix (this = J^T * this * J)
+			m_el[p][q] = m_el[q][p] = 0;
+			m_el[p][p] -= t * mpq;
+			m_el[q][q] += t * mpq;
+			btScalar mrp = m_el[r][p];
+			btScalar mrq = m_el[r][q];
+			m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
+			m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
+
+			// apply rotation to rot (rot = rot * J)
+			for (int i = 0; i < 3; i++)
+			{
+				btVector3& row = rot[i];
+				mrp = row[p];
+				mrq = row[q];
+				row[p] = cos * mrp - sin * mrq;
+				row[q] = cos * mrq + sin * mrp;
+			}
+		}
+	}
+
+
+
+
+	/**@brief Calculate the matrix cofactor 
+	* @param r1 The first row to use for calculating the cofactor
+	* @param c1 The first column to use for calculating the cofactor
+	* @param r1 The second row to use for calculating the cofactor
+	* @param c1 The second column to use for calculating the cofactor
+	* See http://en.wikipedia.org/wiki/Cofactor_(linear_algebra) for more details
+	*/
+	btScalar cofac(int r1, int c1, int r2, int c2) const 
+	{
+		return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
+	}
+
+	void	serialize(struct	btMatrix3x3Data& dataOut) const;
+
+	void	serializeFloat(struct	btMatrix3x3FloatData& dataOut) const;
+
+	void	deSerialize(const struct	btMatrix3x3Data& dataIn);
+
+	void	deSerializeFloat(const struct	btMatrix3x3FloatData& dataIn);
+
+	void	deSerializeDouble(const struct	btMatrix3x3DoubleData& dataIn);
+
+};
+
+
+SIMD_FORCE_INLINE btMatrix3x3& 
+btMatrix3x3::operator*=(const btMatrix3x3& m)
+{
+	setValue(m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
+		m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
+		m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
+	return *this;
+}
+
+SIMD_FORCE_INLINE btMatrix3x3& 
+btMatrix3x3::operator+=(const btMatrix3x3& m)
+{
+	setValue(
+		m_el[0][0]+m.m_el[0][0], 
+		m_el[0][1]+m.m_el[0][1],
+		m_el[0][2]+m.m_el[0][2],
+		m_el[1][0]+m.m_el[1][0], 
+		m_el[1][1]+m.m_el[1][1],
+		m_el[1][2]+m.m_el[1][2],
+		m_el[2][0]+m.m_el[2][0], 
+		m_el[2][1]+m.m_el[2][1],
+		m_el[2][2]+m.m_el[2][2]);
+	return *this;
+}
+
+SIMD_FORCE_INLINE btMatrix3x3
+operator*(const btMatrix3x3& m, const btScalar & k)
+{
+	return btMatrix3x3(
+		m[0].x()*k,m[0].y()*k,m[0].z()*k,
+		m[1].x()*k,m[1].y()*k,m[1].z()*k,
+		m[2].x()*k,m[2].y()*k,m[2].z()*k);
+}
+
+ SIMD_FORCE_INLINE btMatrix3x3 
+operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
+{
+	return btMatrix3x3(
+	m1[0][0]+m2[0][0], 
+	m1[0][1]+m2[0][1],
+	m1[0][2]+m2[0][2],
+	m1[1][0]+m2[1][0], 
+	m1[1][1]+m2[1][1],
+	m1[1][2]+m2[1][2],
+	m1[2][0]+m2[2][0], 
+	m1[2][1]+m2[2][1],
+	m1[2][2]+m2[2][2]);
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
+{
+	return btMatrix3x3(
+	m1[0][0]-m2[0][0], 
+	m1[0][1]-m2[0][1],
+	m1[0][2]-m2[0][2],
+	m1[1][0]-m2[1][0], 
+	m1[1][1]-m2[1][1],
+	m1[1][2]-m2[1][2],
+	m1[2][0]-m2[2][0], 
+	m1[2][1]-m2[2][1],
+	m1[2][2]-m2[2][2]);
+}
+
+
+SIMD_FORCE_INLINE btMatrix3x3& 
+btMatrix3x3::operator-=(const btMatrix3x3& m)
+{
+	setValue(
+	m_el[0][0]-m.m_el[0][0], 
+	m_el[0][1]-m.m_el[0][1],
+	m_el[0][2]-m.m_el[0][2],
+	m_el[1][0]-m.m_el[1][0], 
+	m_el[1][1]-m.m_el[1][1],
+	m_el[1][2]-m.m_el[1][2],
+	m_el[2][0]-m.m_el[2][0], 
+	m_el[2][1]-m.m_el[2][1],
+	m_el[2][2]-m.m_el[2][2]);
+	return *this;
+}
+
+
+SIMD_FORCE_INLINE btScalar 
+btMatrix3x3::determinant() const
+{ 
+	return btTriple((*this)[0], (*this)[1], (*this)[2]);
+}
+
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::absolute() const
+{
+	return btMatrix3x3(
+		btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
+		btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
+		btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::transpose() const 
+{
+	return btMatrix3x3(m_el[0].x(), m_el[1].x(), m_el[2].x(),
+		m_el[0].y(), m_el[1].y(), m_el[2].y(),
+		m_el[0].z(), m_el[1].z(), m_el[2].z());
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::adjoint() const 
+{
+	return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
+		cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
+		cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::inverse() const
+{
+	btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
+	btScalar det = (*this)[0].dot(co);
+	btFullAssert(det != btScalar(0.0));
+	btScalar s = btScalar(1.0) / det;
+	return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
+		co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
+		co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::transposeTimes(const btMatrix3x3& m) const
+{
+	return btMatrix3x3(
+		m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
+		m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
+		m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
+		m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
+		m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
+		m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
+		m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
+		m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
+		m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::timesTranspose(const btMatrix3x3& m) const
+{
+	return btMatrix3x3(
+		m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
+		m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
+		m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
+
+}
+
+SIMD_FORCE_INLINE btVector3 
+operator*(const btMatrix3x3& m, const btVector3& v) 
+{
+	return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
+}
+
+
+SIMD_FORCE_INLINE btVector3
+operator*(const btVector3& v, const btMatrix3x3& m)
+{
+	return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
+{
+	return btMatrix3x3(
+		m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
+		m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
+		m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
+}
+
+/*
+SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
+return btMatrix3x3(
+m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
+m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
+m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
+m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
+m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
+m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
+m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
+m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
+m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
+}
+*/
+
+/**@brief Equality operator between two matrices
+* It will test all elements are equal.  */
+SIMD_FORCE_INLINE bool operator==(const btMatrix3x3& m1, const btMatrix3x3& m2)
+{
+	return ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
+		m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
+		m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
+}
+
+///for serialization
+struct	btMatrix3x3FloatData
+{
+	btVector3FloatData m_el[3];
+};
+
+///for serialization
+struct	btMatrix3x3DoubleData
+{
+	btVector3DoubleData m_el[3];
+};
+
+
+	
+
+SIMD_FORCE_INLINE	void	btMatrix3x3::serialize(struct	btMatrix3x3Data& dataOut) const
+{
+	for (int i=0;i<3;i++)
+		m_el[i].serialize(dataOut.m_el[i]);
+}
+
+SIMD_FORCE_INLINE	void	btMatrix3x3::serializeFloat(struct	btMatrix3x3FloatData& dataOut) const
+{
+	for (int i=0;i<3;i++)
+		m_el[i].serializeFloat(dataOut.m_el[i]);
+}
+
+
+SIMD_FORCE_INLINE	void	btMatrix3x3::deSerialize(const struct	btMatrix3x3Data& dataIn)
+{
+	for (int i=0;i<3;i++)
+		m_el[i].deSerialize(dataIn.m_el[i]);
+}
+
+SIMD_FORCE_INLINE	void	btMatrix3x3::deSerializeFloat(const struct	btMatrix3x3FloatData& dataIn)
+{
+	for (int i=0;i<3;i++)
+		m_el[i].deSerializeFloat(dataIn.m_el[i]);
+}
+
+SIMD_FORCE_INLINE	void	btMatrix3x3::deSerializeDouble(const struct	btMatrix3x3DoubleData& dataIn)
+{
+	for (int i=0;i<3;i++)
+		m_el[i].deSerializeDouble(dataIn.m_el[i]);
+}
+
+#endif //BT_MATRIX3x3_H
+
diff --git a/src/bullet/LinearMath/btMinMax.h b/src/bullet/LinearMath/btMinMax.h
new file mode 100644
index 00000000..5b436e9b
--- /dev/null
+++ b/src/bullet/LinearMath/btMinMax.h
@@ -0,0 +1,71 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_GEN_MINMAX_H
+#define BT_GEN_MINMAX_H
+
+#include "btScalar.h"
+
+template <class T>
+SIMD_FORCE_INLINE const T& btMin(const T& a, const T& b) 
+{
+  return a < b ? a : b ;
+}
+
+template <class T>
+SIMD_FORCE_INLINE const T& btMax(const T& a, const T& b) 
+{
+  return  a > b ? a : b;
+}
+
+template <class T>
+SIMD_FORCE_INLINE const T& btClamped(const T& a, const T& lb, const T& ub) 
+{
+	return a < lb ? lb : (ub < a ? ub : a); 
+}
+
+template <class T>
+SIMD_FORCE_INLINE void btSetMin(T& a, const T& b) 
+{
+    if (b < a) 
+	{
+		a = b;
+	}
+}
+
+template <class T>
+SIMD_FORCE_INLINE void btSetMax(T& a, const T& b) 
+{
+    if (a < b) 
+	{
+		a = b;
+	}
+}
+
+template <class T>
+SIMD_FORCE_INLINE void btClamp(T& a, const T& lb, const T& ub) 
+{
+	if (a < lb) 
+	{
+		a = lb; 
+	}
+	else if (ub < a) 
+	{
+		a = ub;
+	}
+}
+
+#endif //BT_GEN_MINMAX_H
diff --git a/src/bullet/LinearMath/btMotionState.h b/src/bullet/LinearMath/btMotionState.h
new file mode 100644
index 00000000..94318140
--- /dev/null
+++ b/src/bullet/LinearMath/btMotionState.h
@@ -0,0 +1,40 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_MOTIONSTATE_H
+#define BT_MOTIONSTATE_H
+
+#include "btTransform.h"
+
+///The btMotionState interface class allows the dynamics world to synchronize and interpolate the updated world transforms with graphics
+///For optimizations, potentially only moving objects get synchronized (using setWorldPosition/setWorldOrientation)
+class	btMotionState
+{
+	public:
+		
+		virtual ~btMotionState()
+		{
+			
+		}
+		
+		virtual void	getWorldTransform(btTransform& worldTrans ) const =0;
+
+		//Bullet only calls the update of worldtransform for active objects
+		virtual void	setWorldTransform(const btTransform& worldTrans)=0;
+		
+	
+};
+
+#endif //BT_MOTIONSTATE_H
diff --git a/src/bullet/LinearMath/btPoolAllocator.h b/src/bullet/LinearMath/btPoolAllocator.h
new file mode 100755
index 00000000..ef208453
--- /dev/null
+++ b/src/bullet/LinearMath/btPoolAllocator.h
@@ -0,0 +1,121 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef _BT_POOL_ALLOCATOR_H
+#define _BT_POOL_ALLOCATOR_H
+
+#include "btScalar.h"
+#include "btAlignedAllocator.h"
+
+///The btPoolAllocator class allows to efficiently allocate a large pool of objects, instead of dynamically allocating them separately.
+class btPoolAllocator
+{
+	int				m_elemSize;
+	int				m_maxElements;
+	int				m_freeCount;
+	void*			m_firstFree;
+	unsigned char*	m_pool;
+
+public:
+
+	btPoolAllocator(int elemSize, int maxElements)
+		:m_elemSize(elemSize),
+		m_maxElements(maxElements)
+	{
+		m_pool = (unsigned char*) btAlignedAlloc( static_cast<unsigned int>(m_elemSize*m_maxElements),16);
+
+		unsigned char* p = m_pool;
+        m_firstFree = p;
+        m_freeCount = m_maxElements;
+        int count = m_maxElements;
+        while (--count) {
+            *(void**)p = (p + m_elemSize);
+            p += m_elemSize;
+        }
+        *(void**)p = 0;
+    }
+
+	~btPoolAllocator()
+	{
+		btAlignedFree( m_pool);
+	}
+
+	int	getFreeCount() const
+	{
+		return m_freeCount;
+	}
+
+	int getUsedCount() const
+	{
+		return m_maxElements - m_freeCount;
+	}
+
+	int getMaxCount() const
+	{
+		return m_maxElements;
+	}
+
+	void*	allocate(int size)
+	{
+		// release mode fix
+		(void)size;
+		btAssert(!size || size<=m_elemSize);
+		btAssert(m_freeCount>0);
+        void* result = m_firstFree;
+        m_firstFree = *(void**)m_firstFree;
+        --m_freeCount;
+        return result;
+	}
+
+	bool validPtr(void* ptr)
+	{
+		if (ptr) {
+			if (((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize))
+			{
+				return true;
+			}
+		}
+		return false;
+	}
+
+	void	freeMemory(void* ptr)
+	{
+		 if (ptr) {
+            btAssert((unsigned char*)ptr >= m_pool && (unsigned char*)ptr < m_pool + m_maxElements * m_elemSize);
+
+            *(void**)ptr = m_firstFree;
+            m_firstFree = ptr;
+            ++m_freeCount;
+        }
+	}
+
+	int	getElementSize() const
+	{
+		return m_elemSize;
+	}
+
+	unsigned char*	getPoolAddress()
+	{
+		return m_pool;
+	}
+
+	const unsigned char*	getPoolAddress() const
+	{
+		return m_pool;
+	}
+
+};
+
+#endif //_BT_POOL_ALLOCATOR_H
diff --git a/src/bullet/LinearMath/btQuadWord.h b/src/bullet/LinearMath/btQuadWord.h
new file mode 100644
index 00000000..d5e9daa4
--- /dev/null
+++ b/src/bullet/LinearMath/btQuadWord.h
@@ -0,0 +1,180 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_SIMD_QUADWORD_H
+#define BT_SIMD_QUADWORD_H
+
+#include "btScalar.h"
+#include "btMinMax.h"
+
+
+#if defined (__CELLOS_LV2) && defined (__SPU__)
+#include <altivec.h>
+#endif
+
+/**@brief The btQuadWord class is base class for btVector3 and btQuaternion. 
+ * Some issues under PS3 Linux with IBM 2.1 SDK, gcc compiler prevent from using aligned quadword.
+ */
+#ifndef USE_LIBSPE2
+ATTRIBUTE_ALIGNED16(class) btQuadWord
+#else
+class btQuadWord
+#endif
+{
+protected:
+
+#if defined (__SPU__) && defined (__CELLOS_LV2__)
+	union {
+		vec_float4 mVec128;
+		btScalar	m_floats[4];
+	};
+public:
+	vec_float4	get128() const
+	{
+		return mVec128;
+	}
+protected:
+#else //__CELLOS_LV2__ __SPU__
+	btScalar	m_floats[4];
+#endif //__CELLOS_LV2__ __SPU__
+
+	public:
+  
+
+  /**@brief Return the x value */
+		SIMD_FORCE_INLINE const btScalar& getX() const { return m_floats[0]; }
+  /**@brief Return the y value */
+		SIMD_FORCE_INLINE const btScalar& getY() const { return m_floats[1]; }
+  /**@brief Return the z value */
+		SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; }
+  /**@brief Set the x value */
+		SIMD_FORCE_INLINE void	setX(btScalar x) { m_floats[0] = x;};
+  /**@brief Set the y value */
+		SIMD_FORCE_INLINE void	setY(btScalar y) { m_floats[1] = y;};
+  /**@brief Set the z value */
+		SIMD_FORCE_INLINE void	setZ(btScalar z) { m_floats[2] = z;};
+  /**@brief Set the w value */
+		SIMD_FORCE_INLINE void	setW(btScalar w) { m_floats[3] = w;};
+  /**@brief Return the x value */
+		SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; }
+  /**@brief Return the y value */
+		SIMD_FORCE_INLINE const btScalar& y() const { return m_floats[1]; }
+  /**@brief Return the z value */
+		SIMD_FORCE_INLINE const btScalar& z() const { return m_floats[2]; }
+  /**@brief Return the w value */
+		SIMD_FORCE_INLINE const btScalar& w() const { return m_floats[3]; }
+
+	//SIMD_FORCE_INLINE btScalar&       operator[](int i)       { return (&m_floats[0])[i];	}      
+	//SIMD_FORCE_INLINE const btScalar& operator[](int i) const { return (&m_floats[0])[i]; }
+	///operator btScalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
+	SIMD_FORCE_INLINE	operator       btScalar *()       { return &m_floats[0]; }
+	SIMD_FORCE_INLINE	operator const btScalar *() const { return &m_floats[0]; }
+
+	SIMD_FORCE_INLINE	bool	operator==(const btQuadWord& other) const
+	{
+		return ((m_floats[3]==other.m_floats[3]) && (m_floats[2]==other.m_floats[2]) && (m_floats[1]==other.m_floats[1]) && (m_floats[0]==other.m_floats[0]));
+	}
+
+	SIMD_FORCE_INLINE	bool	operator!=(const btQuadWord& other) const
+	{
+		return !(*this == other);
+	}
+
+  /**@brief Set x,y,z and zero w 
+   * @param x Value of x
+   * @param y Value of y
+   * @param z Value of z
+   */
+		SIMD_FORCE_INLINE void 	setValue(const btScalar& x, const btScalar& y, const btScalar& z)
+		{
+			m_floats[0]=x;
+			m_floats[1]=y;
+			m_floats[2]=z;
+			m_floats[3] = 0.f;
+		}
+
+/*		void getValue(btScalar *m) const 
+		{
+			m[0] = m_floats[0];
+			m[1] = m_floats[1];
+			m[2] = m_floats[2];
+		}
+*/
+/**@brief Set the values 
+   * @param x Value of x
+   * @param y Value of y
+   * @param z Value of z
+   * @param w Value of w
+   */
+		SIMD_FORCE_INLINE void	setValue(const btScalar& x, const btScalar& y, const btScalar& z,const btScalar& w)
+		{
+			m_floats[0]=x;
+			m_floats[1]=y;
+			m_floats[2]=z;
+			m_floats[3]=w;
+		}
+  /**@brief No initialization constructor */
+		SIMD_FORCE_INLINE btQuadWord()
+		//	:m_floats[0](btScalar(0.)),m_floats[1](btScalar(0.)),m_floats[2](btScalar(0.)),m_floats[3](btScalar(0.))
+		{
+		}
+ 
+  /**@brief Three argument constructor (zeros w)
+   * @param x Value of x
+   * @param y Value of y
+   * @param z Value of z
+   */
+		SIMD_FORCE_INLINE btQuadWord(const btScalar& x, const btScalar& y, const btScalar& z)		
+		{
+			m_floats[0] = x, m_floats[1] = y, m_floats[2] = z, m_floats[3] = 0.0f;
+		}
+
+/**@brief Initializing constructor
+   * @param x Value of x
+   * @param y Value of y
+   * @param z Value of z
+   * @param w Value of w
+   */
+		SIMD_FORCE_INLINE btQuadWord(const btScalar& x, const btScalar& y, const btScalar& z,const btScalar& w) 
+		{
+			m_floats[0] = x, m_floats[1] = y, m_floats[2] = z, m_floats[3] = w;
+		}
+
+  /**@brief Set each element to the max of the current values and the values of another btQuadWord
+   * @param other The other btQuadWord to compare with 
+   */
+		SIMD_FORCE_INLINE void	setMax(const btQuadWord& other)
+		{
+			btSetMax(m_floats[0], other.m_floats[0]);
+			btSetMax(m_floats[1], other.m_floats[1]);
+			btSetMax(m_floats[2], other.m_floats[2]);
+			btSetMax(m_floats[3], other.m_floats[3]);
+		}
+  /**@brief Set each element to the min of the current values and the values of another btQuadWord
+   * @param other The other btQuadWord to compare with 
+   */
+		SIMD_FORCE_INLINE void	setMin(const btQuadWord& other)
+		{
+			btSetMin(m_floats[0], other.m_floats[0]);
+			btSetMin(m_floats[1], other.m_floats[1]);
+			btSetMin(m_floats[2], other.m_floats[2]);
+			btSetMin(m_floats[3], other.m_floats[3]);
+		}
+
+
+
+};
+
+#endif //BT_SIMD_QUADWORD_H
diff --git a/src/bullet/LinearMath/btQuaternion.h b/src/bullet/LinearMath/btQuaternion.h
new file mode 100644
index 00000000..ee79f6ea
--- /dev/null
+++ b/src/bullet/LinearMath/btQuaternion.h
@@ -0,0 +1,430 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_SIMD__QUATERNION_H_
+#define BT_SIMD__QUATERNION_H_
+
+
+#include "btVector3.h"
+#include "btQuadWord.h"
+
+/**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
+class btQuaternion : public btQuadWord {
+public:
+  /**@brief No initialization constructor */
+	btQuaternion() {}
+
+	//		template <typename btScalar>
+	//		explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
+  /**@brief Constructor from scalars */
+	btQuaternion(const btScalar& x, const btScalar& y, const btScalar& z, const btScalar& w) 
+		: btQuadWord(x, y, z, w) 
+	{}
+  /**@brief Axis angle Constructor
+   * @param axis The axis which the rotation is around
+   * @param angle The magnitude of the rotation around the angle (Radians) */
+	btQuaternion(const btVector3& axis, const btScalar& angle) 
+	{ 
+		setRotation(axis, angle); 
+	}
+  /**@brief Constructor from Euler angles
+   * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
+   * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
+   * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
+	btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
+	{ 
+#ifndef BT_EULER_DEFAULT_ZYX
+		setEuler(yaw, pitch, roll); 
+#else
+		setEulerZYX(yaw, pitch, roll); 
+#endif 
+	}
+  /**@brief Set the rotation using axis angle notation 
+   * @param axis The axis around which to rotate
+   * @param angle The magnitude of the rotation in Radians */
+	void setRotation(const btVector3& axis, const btScalar& angle)
+	{
+		btScalar d = axis.length();
+		btAssert(d != btScalar(0.0));
+		btScalar s = btSin(angle * btScalar(0.5)) / d;
+		setValue(axis.x() * s, axis.y() * s, axis.z() * s, 
+			btCos(angle * btScalar(0.5)));
+	}
+  /**@brief Set the quaternion using Euler angles
+   * @param yaw Angle around Y
+   * @param pitch Angle around X
+   * @param roll Angle around Z */
+	void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
+	{
+		btScalar halfYaw = btScalar(yaw) * btScalar(0.5);  
+		btScalar halfPitch = btScalar(pitch) * btScalar(0.5);  
+		btScalar halfRoll = btScalar(roll) * btScalar(0.5);  
+		btScalar cosYaw = btCos(halfYaw);
+		btScalar sinYaw = btSin(halfYaw);
+		btScalar cosPitch = btCos(halfPitch);
+		btScalar sinPitch = btSin(halfPitch);
+		btScalar cosRoll = btCos(halfRoll);
+		btScalar sinRoll = btSin(halfRoll);
+		setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
+			cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
+			sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
+			cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
+	}
+  /**@brief Set the quaternion using euler angles 
+   * @param yaw Angle around Z
+   * @param pitch Angle around Y
+   * @param roll Angle around X */
+	void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
+	{
+		btScalar halfYaw = btScalar(yaw) * btScalar(0.5);  
+		btScalar halfPitch = btScalar(pitch) * btScalar(0.5);  
+		btScalar halfRoll = btScalar(roll) * btScalar(0.5);  
+		btScalar cosYaw = btCos(halfYaw);
+		btScalar sinYaw = btSin(halfYaw);
+		btScalar cosPitch = btCos(halfPitch);
+		btScalar sinPitch = btSin(halfPitch);
+		btScalar cosRoll = btCos(halfRoll);
+		btScalar sinRoll = btSin(halfRoll);
+		setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
+                         cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
+                         cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
+                         cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
+	}
+  /**@brief Add two quaternions
+   * @param q The quaternion to add to this one */
+	SIMD_FORCE_INLINE	btQuaternion& operator+=(const btQuaternion& q)
+	{
+		m_floats[0] += q.x(); m_floats[1] += q.y(); m_floats[2] += q.z(); m_floats[3] += q.m_floats[3];
+		return *this;
+	}
+
+  /**@brief Subtract out a quaternion
+   * @param q The quaternion to subtract from this one */
+	btQuaternion& operator-=(const btQuaternion& q) 
+	{
+		m_floats[0] -= q.x(); m_floats[1] -= q.y(); m_floats[2] -= q.z(); m_floats[3] -= q.m_floats[3];
+		return *this;
+	}
+
+  /**@brief Scale this quaternion
+   * @param s The scalar to scale by */
+	btQuaternion& operator*=(const btScalar& s)
+	{
+		m_floats[0] *= s; m_floats[1] *= s; m_floats[2] *= s; m_floats[3] *= s;
+		return *this;
+	}
+
+  /**@brief Multiply this quaternion by q on the right
+   * @param q The other quaternion 
+   * Equivilant to this = this * q */
+	btQuaternion& operator*=(const btQuaternion& q)
+	{
+		setValue(m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
+			m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
+			m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
+			m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
+		return *this;
+	}
+  /**@brief Return the dot product between this quaternion and another
+   * @param q The other quaternion */
+	btScalar dot(const btQuaternion& q) const
+	{
+		return m_floats[0] * q.x() + m_floats[1] * q.y() + m_floats[2] * q.z() + m_floats[3] * q.m_floats[3];
+	}
+
+  /**@brief Return the length squared of the quaternion */
+	btScalar length2() const
+	{
+		return dot(*this);
+	}
+
+  /**@brief Return the length of the quaternion */
+	btScalar length() const
+	{
+		return btSqrt(length2());
+	}
+
+  /**@brief Normalize the quaternion 
+   * Such that x^2 + y^2 + z^2 +w^2 = 1 */
+	btQuaternion& normalize() 
+	{
+		return *this /= length();
+	}
+
+  /**@brief Return a scaled version of this quaternion
+   * @param s The scale factor */
+	SIMD_FORCE_INLINE btQuaternion
+	operator*(const btScalar& s) const
+	{
+		return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
+	}
+
+
+  /**@brief Return an inversely scaled versionof this quaternion
+   * @param s The inverse scale factor */
+	btQuaternion operator/(const btScalar& s) const
+	{
+		btAssert(s != btScalar(0.0));
+		return *this * (btScalar(1.0) / s);
+	}
+
+  /**@brief Inversely scale this quaternion
+   * @param s The scale factor */
+	btQuaternion& operator/=(const btScalar& s) 
+	{
+		btAssert(s != btScalar(0.0));
+		return *this *= btScalar(1.0) / s;
+	}
+
+  /**@brief Return a normalized version of this quaternion */
+	btQuaternion normalized() const 
+	{
+		return *this / length();
+	} 
+  /**@brief Return the angle between this quaternion and the other 
+   * @param q The other quaternion */
+	btScalar angle(const btQuaternion& q) const 
+	{
+		btScalar s = btSqrt(length2() * q.length2());
+		btAssert(s != btScalar(0.0));
+		return btAcos(dot(q) / s);
+	}
+  /**@brief Return the angle of rotation represented by this quaternion */
+	btScalar getAngle() const 
+	{
+		btScalar s = btScalar(2.) * btAcos(m_floats[3]);
+		return s;
+	}
+
+	/**@brief Return the axis of the rotation represented by this quaternion */
+	btVector3 getAxis() const
+	{
+		btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
+		
+		if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
+			return btVector3(1.0, 0.0, 0.0);  // Arbitrary
+		btScalar s = 1.f/btSqrt(s_squared);
+		return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
+	}
+
+	/**@brief Return the inverse of this quaternion */
+	btQuaternion inverse() const
+	{
+		return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
+	}
+
+  /**@brief Return the sum of this quaternion and the other 
+   * @param q2 The other quaternion */
+	SIMD_FORCE_INLINE btQuaternion
+	operator+(const btQuaternion& q2) const
+	{
+		const btQuaternion& q1 = *this;
+		return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
+	}
+
+  /**@brief Return the difference between this quaternion and the other 
+   * @param q2 The other quaternion */
+	SIMD_FORCE_INLINE btQuaternion
+	operator-(const btQuaternion& q2) const
+	{
+		const btQuaternion& q1 = *this;
+		return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
+	}
+
+  /**@brief Return the negative of this quaternion 
+   * This simply negates each element */
+	SIMD_FORCE_INLINE btQuaternion operator-() const
+	{
+		const btQuaternion& q2 = *this;
+		return btQuaternion( - q2.x(), - q2.y(),  - q2.z(),  - q2.m_floats[3]);
+	}
+  /**@todo document this and it's use */
+	SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const 
+	{
+		btQuaternion diff,sum;
+		diff = *this - qd;
+		sum = *this + qd;
+		if( diff.dot(diff) > sum.dot(sum) )
+			return qd;
+		return (-qd);
+	}
+
+	/**@todo document this and it's use */
+	SIMD_FORCE_INLINE btQuaternion nearest( const btQuaternion& qd) const 
+	{
+		btQuaternion diff,sum;
+		diff = *this - qd;
+		sum = *this + qd;
+		if( diff.dot(diff) < sum.dot(sum) )
+			return qd;
+		return (-qd);
+	}
+
+
+  /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
+   * @param q The other quaternion to interpolate with 
+   * @param t The ratio between this and q to interpolate.  If t = 0 the result is this, if t=1 the result is q.
+   * Slerp interpolates assuming constant velocity.  */
+	btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
+	{
+	  btScalar magnitude = btSqrt(length2() * q.length2()); 
+	  btAssert(magnitude > btScalar(0));
+
+    btScalar product = dot(q) / magnitude;
+    if (btFabs(product) != btScalar(1))
+		{
+      // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
+      const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
+
+      const btScalar theta = btAcos(sign * product);
+      const btScalar s1 = btSin(sign * t * theta);   
+      const btScalar d = btScalar(1.0) / btSin(theta);
+      const btScalar s0 = btSin((btScalar(1.0) - t) * theta);
+
+      return btQuaternion(
+          (m_floats[0] * s0 + q.x() * s1) * d,
+          (m_floats[1] * s0 + q.y() * s1) * d,
+          (m_floats[2] * s0 + q.z() * s1) * d,
+          (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
+		}
+		else
+		{
+			return *this;
+		}
+	}
+
+	static const btQuaternion&	getIdentity()
+	{
+		static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
+		return identityQuat;
+	}
+
+	SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
+
+	
+};
+
+
+
+
+
+/**@brief Return the product of two quaternions */
+SIMD_FORCE_INLINE btQuaternion
+operator*(const btQuaternion& q1, const btQuaternion& q2) {
+	return btQuaternion(q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
+		q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
+		q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
+		q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z()); 
+}
+
+SIMD_FORCE_INLINE btQuaternion
+operator*(const btQuaternion& q, const btVector3& w)
+{
+	return btQuaternion( q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
+		q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
+		q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
+		-q.x() * w.x() - q.y() * w.y() - q.z() * w.z()); 
+}
+
+SIMD_FORCE_INLINE btQuaternion
+operator*(const btVector3& w, const btQuaternion& q)
+{
+	return btQuaternion( w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
+		w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
+		w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
+		-w.x() * q.x() - w.y() * q.y() - w.z() * q.z()); 
+}
+
+/**@brief Calculate the dot product between two quaternions */
+SIMD_FORCE_INLINE btScalar 
+dot(const btQuaternion& q1, const btQuaternion& q2) 
+{ 
+	return q1.dot(q2); 
+}
+
+
+/**@brief Return the length of a quaternion */
+SIMD_FORCE_INLINE btScalar
+length(const btQuaternion& q) 
+{ 
+	return q.length(); 
+}
+
+/**@brief Return the angle between two quaternions*/
+SIMD_FORCE_INLINE btScalar
+angle(const btQuaternion& q1, const btQuaternion& q2) 
+{ 
+	return q1.angle(q2); 
+}
+
+/**@brief Return the inverse of a quaternion*/
+SIMD_FORCE_INLINE btQuaternion
+inverse(const btQuaternion& q) 
+{
+	return q.inverse();
+}
+
+/**@brief Return the result of spherical linear interpolation betwen two quaternions 
+ * @param q1 The first quaternion
+ * @param q2 The second quaternion 
+ * @param t The ration between q1 and q2.  t = 0 return q1, t=1 returns q2 
+ * Slerp assumes constant velocity between positions. */
+SIMD_FORCE_INLINE btQuaternion
+slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t) 
+{
+	return q1.slerp(q2, t);
+}
+
+SIMD_FORCE_INLINE btVector3 
+quatRotate(const btQuaternion& rotation, const btVector3& v) 
+{
+	btQuaternion q = rotation * v;
+	q *= rotation.inverse();
+	return btVector3(q.getX(),q.getY(),q.getZ());
+}
+
+SIMD_FORCE_INLINE btQuaternion 
+shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
+{
+	btVector3 c = v0.cross(v1);
+	btScalar  d = v0.dot(v1);
+
+	if (d < -1.0 + SIMD_EPSILON)
+	{
+		btVector3 n,unused;
+		btPlaneSpace1(v0,n,unused);
+		return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0
+	}
+
+	btScalar  s = btSqrt((1.0f + d) * 2.0f);
+	btScalar rs = 1.0f / s;
+
+	return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
+}
+
+SIMD_FORCE_INLINE btQuaternion 
+shortestArcQuatNormalize2(btVector3& v0,btVector3& v1)
+{
+	v0.normalize();
+	v1.normalize();
+	return shortestArcQuat(v0,v1);
+}
+
+#endif //BT_SIMD__QUATERNION_H_
+
+
+
+
diff --git a/src/bullet/LinearMath/btQuickprof.cpp b/src/bullet/LinearMath/btQuickprof.cpp
new file mode 100644
index 00000000..544aee89
--- /dev/null
+++ b/src/bullet/LinearMath/btQuickprof.cpp
@@ -0,0 +1,566 @@
+/*
+
+***************************************************************************************************
+**
+** profile.cpp
+**
+** Real-Time Hierarchical Profiling for Game Programming Gems 3
+**
+** by Greg Hjelstrom & Byon Garrabrant
+**
+***************************************************************************************************/
+
+// Credits: The Clock class was inspired by the Timer classes in 
+// Ogre (www.ogre3d.org).
+
+#include "btQuickprof.h"
+
+#ifndef BT_NO_PROFILE
+
+
+static btClock gProfileClock;
+
+
+#ifdef __CELLOS_LV2__
+#include <sys/sys_time.h>
+#include <sys/time_util.h>
+#include <stdio.h>
+#endif
+
+#if defined (SUNOS) || defined (__SUNOS__) 
+#include <stdio.h> 
+#endif
+
+#if defined(WIN32) || defined(_WIN32)
+
+#define BT_USE_WINDOWS_TIMERS
+#define WIN32_LEAN_AND_MEAN
+#define NOWINRES
+#define NOMCX
+#define NOIME 
+
+#ifdef _XBOX
+	#include <Xtl.h>
+#else //_XBOX
+	#include <windows.h>
+#endif //_XBOX
+
+#include <time.h>
+
+
+#else //_WIN32
+#include <sys/time.h>
+#endif //_WIN32
+
+#define mymin(a,b) (a > b ? a : b)
+
+struct btClockData
+{
+
+#ifdef BT_USE_WINDOWS_TIMERS
+	LARGE_INTEGER mClockFrequency;
+	DWORD mStartTick;
+	LONGLONG mPrevElapsedTime;
+	LARGE_INTEGER mStartTime;
+#else
+#ifdef __CELLOS_LV2__
+	uint64_t	mStartTime;
+#else
+	struct timeval mStartTime;
+#endif
+#endif //__CELLOS_LV2__
+
+};
+
+///The btClock is a portable basic clock that measures accurate time in seconds, use for profiling.
+btClock::btClock()
+{
+	m_data = new btClockData;
+#ifdef BT_USE_WINDOWS_TIMERS
+	QueryPerformanceFrequency(&m_data->mClockFrequency);
+#endif
+	reset();
+}
+
+btClock::~btClock()
+{
+	delete m_data;
+}
+
+btClock::btClock(const btClock& other)
+{
+	m_data = new btClockData;
+	*m_data = *other.m_data;
+}
+
+btClock& btClock::operator=(const btClock& other)
+{
+	*m_data = *other.m_data;
+	return *this;
+}
+
+
+	/// Resets the initial reference time.
+void btClock::reset()
+{
+#ifdef BT_USE_WINDOWS_TIMERS
+	QueryPerformanceCounter(&m_data->mStartTime);
+	m_data->mStartTick = GetTickCount();
+	m_data->mPrevElapsedTime = 0;
+#else
+#ifdef __CELLOS_LV2__
+
+	typedef uint64_t  ClockSize;
+	ClockSize newTime;
+	//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
+	SYS_TIMEBASE_GET( newTime );
+	m_data->mStartTime = newTime;
+#else
+	gettimeofday(&m_data->mStartTime, 0);
+#endif
+#endif
+}
+
+/// Returns the time in ms since the last call to reset or since 
+/// the btClock was created.
+unsigned long int btClock::getTimeMilliseconds()
+{
+#ifdef BT_USE_WINDOWS_TIMERS
+	LARGE_INTEGER currentTime;
+	QueryPerformanceCounter(&currentTime);
+	LONGLONG elapsedTime = currentTime.QuadPart - 
+		m_data->mStartTime.QuadPart;
+		// Compute the number of millisecond ticks elapsed.
+	unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / 
+		m_data->mClockFrequency.QuadPart);
+		// Check for unexpected leaps in the Win32 performance counter.  
+	// (This is caused by unexpected data across the PCI to ISA 
+		// bridge, aka south bridge.  See Microsoft KB274323.)
+		unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
+		signed long msecOff = (signed long)(msecTicks - elapsedTicks);
+		if (msecOff < -100 || msecOff > 100)
+		{
+			// Adjust the starting time forwards.
+			LONGLONG msecAdjustment = mymin(msecOff * 
+				m_data->mClockFrequency.QuadPart / 1000, elapsedTime - 
+				m_data->mPrevElapsedTime);
+			m_data->mStartTime.QuadPart += msecAdjustment;
+			elapsedTime -= msecAdjustment;
+
+			// Recompute the number of millisecond ticks elapsed.
+			msecTicks = (unsigned long)(1000 * elapsedTime / 
+				m_data->mClockFrequency.QuadPart);
+		}
+
+		// Store the current elapsed time for adjustments next time.
+		m_data->mPrevElapsedTime = elapsedTime;
+
+		return msecTicks;
+#else
+
+#ifdef __CELLOS_LV2__
+		uint64_t freq=sys_time_get_timebase_frequency();
+		double dFreq=((double) freq) / 1000.0;
+		typedef uint64_t  ClockSize;
+		ClockSize newTime;
+		SYS_TIMEBASE_GET( newTime );
+		//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
+
+		return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
+#else
+
+		struct timeval currentTime;
+		gettimeofday(&currentTime, 0);
+		return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 + 
+			(currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000;
+#endif //__CELLOS_LV2__
+#endif
+}
+
+	/// Returns the time in us since the last call to reset or since 
+	/// the Clock was created.
+unsigned long int btClock::getTimeMicroseconds()
+{
+#ifdef BT_USE_WINDOWS_TIMERS
+		LARGE_INTEGER currentTime;
+		QueryPerformanceCounter(&currentTime);
+		LONGLONG elapsedTime = currentTime.QuadPart - 
+			m_data->mStartTime.QuadPart;
+
+		// Compute the number of millisecond ticks elapsed.
+		unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / 
+			m_data->mClockFrequency.QuadPart);
+
+		// Check for unexpected leaps in the Win32 performance counter.  
+		// (This is caused by unexpected data across the PCI to ISA 
+		// bridge, aka south bridge.  See Microsoft KB274323.)
+		unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
+		signed long msecOff = (signed long)(msecTicks - elapsedTicks);
+		if (msecOff < -100 || msecOff > 100)
+		{
+			// Adjust the starting time forwards.
+			LONGLONG msecAdjustment = mymin(msecOff * 
+				m_data->mClockFrequency.QuadPart / 1000, elapsedTime - 
+				m_data->mPrevElapsedTime);
+			m_data->mStartTime.QuadPart += msecAdjustment;
+			elapsedTime -= msecAdjustment;
+		}
+
+		// Store the current elapsed time for adjustments next time.
+		m_data->mPrevElapsedTime = elapsedTime;
+
+		// Convert to microseconds.
+		unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime / 
+			m_data->mClockFrequency.QuadPart);
+
+		return usecTicks;
+#else
+
+#ifdef __CELLOS_LV2__
+		uint64_t freq=sys_time_get_timebase_frequency();
+		double dFreq=((double) freq)/ 1000000.0;
+		typedef uint64_t  ClockSize;
+		ClockSize newTime;
+		//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
+		SYS_TIMEBASE_GET( newTime );
+
+		return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
+#else
+
+		struct timeval currentTime;
+		gettimeofday(&currentTime, 0);
+		return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 + 
+			(currentTime.tv_usec - m_data->mStartTime.tv_usec);
+#endif//__CELLOS_LV2__
+#endif 
+}
+
+
+
+
+
+inline void Profile_Get_Ticks(unsigned long int * ticks)
+{
+	*ticks = gProfileClock.getTimeMicroseconds();
+}
+
+inline float Profile_Get_Tick_Rate(void)
+{
+//	return 1000000.f;
+	return 1000.f;
+
+}
+
+
+
+/***************************************************************************************************
+**
+** CProfileNode
+**
+***************************************************************************************************/
+
+/***********************************************************************************************
+ * INPUT:                                                                                      *
+ * name - pointer to a static string which is the name of this profile node                    *
+ * parent - parent pointer                                                                     *
+ *                                                                                             *
+ * WARNINGS:                                                                                   *
+ * The name is assumed to be a static pointer, only the pointer is stored and compared for     *
+ * efficiency reasons.                                                                         *
+ *=============================================================================================*/
+CProfileNode::CProfileNode( const char * name, CProfileNode * parent ) :
+	Name( name ),
+	TotalCalls( 0 ),
+	TotalTime( 0 ),
+	StartTime( 0 ),
+	RecursionCounter( 0 ),
+	Parent( parent ),
+	Child( NULL ),
+	Sibling( NULL ),
+	m_userPtr(0)
+{
+	Reset();
+}
+
+
+void	CProfileNode::CleanupMemory()
+{
+	delete ( Child);
+	Child = NULL;
+	delete ( Sibling);
+	Sibling = NULL;
+}
+
+CProfileNode::~CProfileNode( void )
+{
+	delete ( Child);
+	delete ( Sibling);
+}
+
+
+/***********************************************************************************************
+ * INPUT:                                                                                      *
+ * name - static string pointer to the name of the node we are searching for                   *
+ *                                                                                             *
+ * WARNINGS:                                                                                   *
+ * All profile names are assumed to be static strings so this function uses pointer compares   *
+ * to find the named node.                                                                     *
+ *=============================================================================================*/
+CProfileNode * CProfileNode::Get_Sub_Node( const char * name )
+{
+	// Try to find this sub node
+	CProfileNode * child = Child;
+	while ( child ) {
+		if ( child->Name == name ) {
+			return child;
+		}
+		child = child->Sibling;
+	}
+
+	// We didn't find it, so add it
+	
+	CProfileNode * node = new CProfileNode( name, this );
+	node->Sibling = Child;
+	Child = node;
+	return node;
+}
+
+
+void	CProfileNode::Reset( void )
+{
+	TotalCalls = 0;
+	TotalTime = 0.0f;
+	
+
+	if ( Child ) {
+		Child->Reset();
+	}
+	if ( Sibling ) {
+		Sibling->Reset();
+	}
+}
+
+
+void	CProfileNode::Call( void )
+{
+	TotalCalls++;
+	if (RecursionCounter++ == 0) {
+		Profile_Get_Ticks(&StartTime);
+	}
+}
+
+
+bool	CProfileNode::Return( void )
+{
+	if ( --RecursionCounter == 0 && TotalCalls != 0 ) { 
+		unsigned long int time;
+		Profile_Get_Ticks(&time);
+		time-=StartTime;
+		TotalTime += (float)time / Profile_Get_Tick_Rate();
+	}
+	return ( RecursionCounter == 0 );
+}
+
+
+/***************************************************************************************************
+**
+** CProfileIterator
+**
+***************************************************************************************************/
+CProfileIterator::CProfileIterator( CProfileNode * start )
+{
+	CurrentParent = start;
+	CurrentChild = CurrentParent->Get_Child();
+}
+
+
+void	CProfileIterator::First(void)
+{
+	CurrentChild = CurrentParent->Get_Child();
+}
+
+
+void	CProfileIterator::Next(void)
+{
+	CurrentChild = CurrentChild->Get_Sibling();
+}
+
+
+bool	CProfileIterator::Is_Done(void)
+{
+	return CurrentChild == NULL;
+}
+
+
+void	CProfileIterator::Enter_Child( int index )
+{
+	CurrentChild = CurrentParent->Get_Child();
+	while ( (CurrentChild != NULL) && (index != 0) ) {
+		index--;
+		CurrentChild = CurrentChild->Get_Sibling();
+	}
+
+	if ( CurrentChild != NULL ) {
+		CurrentParent = CurrentChild;
+		CurrentChild = CurrentParent->Get_Child();
+	}
+}
+
+
+void	CProfileIterator::Enter_Parent( void )
+{
+	if ( CurrentParent->Get_Parent() != NULL ) {
+		CurrentParent = CurrentParent->Get_Parent();
+	}
+	CurrentChild = CurrentParent->Get_Child();
+}
+
+
+/***************************************************************************************************
+**
+** CProfileManager
+**
+***************************************************************************************************/
+
+CProfileNode	CProfileManager::Root( "Root", NULL );
+CProfileNode *	CProfileManager::CurrentNode = &CProfileManager::Root;
+int				CProfileManager::FrameCounter = 0;
+unsigned long int			CProfileManager::ResetTime = 0;
+
+
+/***********************************************************************************************
+ * CProfileManager::Start_Profile -- Begin a named profile                                    *
+ *                                                                                             *
+ * Steps one level deeper into the tree, if a child already exists with the specified name     *
+ * then it accumulates the profiling; otherwise a new child node is added to the profile tree. *
+ *                                                                                             *
+ * INPUT:                                                                                      *
+ * name - name of this profiling record                                                        *
+ *                                                                                             *
+ * WARNINGS:                                                                                   *
+ * The string used is assumed to be a static string; pointer compares are used throughout      *
+ * the profiling code for efficiency.                                                          *
+ *=============================================================================================*/
+void	CProfileManager::Start_Profile( const char * name )
+{
+	if (name != CurrentNode->Get_Name()) {
+		CurrentNode = CurrentNode->Get_Sub_Node( name );
+	} 
+	
+	CurrentNode->Call();
+}
+
+
+/***********************************************************************************************
+ * CProfileManager::Stop_Profile -- Stop timing and record the results.                       *
+ *=============================================================================================*/
+void	CProfileManager::Stop_Profile( void )
+{
+	// Return will indicate whether we should back up to our parent (we may
+	// be profiling a recursive function)
+	if (CurrentNode->Return()) {
+		CurrentNode = CurrentNode->Get_Parent();
+	}
+}
+
+
+/***********************************************************************************************
+ * CProfileManager::Reset -- Reset the contents of the profiling system                       *
+ *                                                                                             *
+ *    This resets everything except for the tree structure.  All of the timing data is reset.  *
+ *=============================================================================================*/
+void	CProfileManager::Reset( void )
+{ 
+	gProfileClock.reset();
+	Root.Reset();
+    Root.Call();
+	FrameCounter = 0;
+	Profile_Get_Ticks(&ResetTime);
+}
+
+
+/***********************************************************************************************
+ * CProfileManager::Increment_Frame_Counter -- Increment the frame counter                    *
+ *=============================================================================================*/
+void CProfileManager::Increment_Frame_Counter( void )
+{
+	FrameCounter++;
+}
+
+
+/***********************************************************************************************
+ * CProfileManager::Get_Time_Since_Reset -- returns the elapsed time since last reset         *
+ *=============================================================================================*/
+float CProfileManager::Get_Time_Since_Reset( void )
+{
+	unsigned long int time;
+	Profile_Get_Ticks(&time);
+	time -= ResetTime;
+	return (float)time / Profile_Get_Tick_Rate();
+}
+
+#include <stdio.h>
+
+void	CProfileManager::dumpRecursive(CProfileIterator* profileIterator, int spacing)
+{
+	profileIterator->First();
+	if (profileIterator->Is_Done())
+		return;
+
+	float accumulated_time=0,parent_time = profileIterator->Is_Root() ? CProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time();
+	int i;
+	int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset();
+	for (i=0;i<spacing;i++)	printf(".");
+	printf("----------------------------------\n");
+	for (i=0;i<spacing;i++)	printf(".");
+	printf("Profiling: %s (total running time: %.3f ms) ---\n",	profileIterator->Get_Current_Parent_Name(), parent_time );
+	float totalTime = 0.f;
+
+	
+	int numChildren = 0;
+	
+	for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next())
+	{
+		numChildren++;
+		float current_total_time = profileIterator->Get_Current_Total_Time();
+		accumulated_time += current_total_time;
+		float fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
+		{
+			int i;	for (i=0;i<spacing;i++)	printf(".");
+		}
+		printf("%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)\n",i, profileIterator->Get_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls());
+		totalTime += current_total_time;
+		//recurse into children
+	}
+
+	if (parent_time < accumulated_time)
+	{
+		printf("what's wrong\n");
+	}
+	for (i=0;i<spacing;i++)	printf(".");
+	printf("%s (%.3f %%) :: %.3f ms\n", "Unaccounted:",parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
+	
+	for (i=0;i<numChildren;i++)
+	{
+		profileIterator->Enter_Child(i);
+		dumpRecursive(profileIterator,spacing+3);
+		profileIterator->Enter_Parent();
+	}
+}
+
+
+
+void	CProfileManager::dumpAll()
+{
+	CProfileIterator* profileIterator = 0;
+	profileIterator = CProfileManager::Get_Iterator();
+
+	dumpRecursive(profileIterator,0);
+
+	CProfileManager::Release_Iterator(profileIterator);
+}
+
+
+
+
+#endif //BT_NO_PROFILE
diff --git a/src/bullet/LinearMath/btQuickprof.h b/src/bullet/LinearMath/btQuickprof.h
new file mode 100644
index 00000000..93f3f4a6
--- /dev/null
+++ b/src/bullet/LinearMath/btQuickprof.h
@@ -0,0 +1,203 @@
+
+/***************************************************************************************************
+**
+** Real-Time Hierarchical Profiling for Game Programming Gems 3
+**
+** by Greg Hjelstrom & Byon Garrabrant
+**
+***************************************************************************************************/
+
+// Credits: The Clock class was inspired by the Timer classes in 
+// Ogre (www.ogre3d.org).
+
+
+
+#ifndef BT_QUICK_PROF_H
+#define BT_QUICK_PROF_H
+
+//To disable built-in profiling, please comment out next line
+//#define BT_NO_PROFILE 1
+#ifndef BT_NO_PROFILE
+#include <stdio.h>//@todo remove this, backwards compatibility
+#include "btScalar.h"
+#include "btAlignedAllocator.h"
+#include <new>
+
+
+
+
+
+#define USE_BT_CLOCK 1
+
+#ifdef USE_BT_CLOCK
+
+///The btClock is a portable basic clock that measures accurate time in seconds, use for profiling.
+class btClock
+{
+public:
+	btClock();
+
+	btClock(const btClock& other);
+	btClock& operator=(const btClock& other);
+
+	~btClock();
+
+	/// Resets the initial reference time.
+	void reset();
+
+	/// Returns the time in ms since the last call to reset or since 
+	/// the btClock was created.
+	unsigned long int getTimeMilliseconds();
+
+	/// Returns the time in us since the last call to reset or since 
+	/// the Clock was created.
+	unsigned long int getTimeMicroseconds();
+private:
+	struct btClockData* m_data;
+};
+
+#endif //USE_BT_CLOCK
+
+
+
+
+///A node in the Profile Hierarchy Tree
+class	CProfileNode {
+
+public:
+	CProfileNode( const char * name, CProfileNode * parent );
+	~CProfileNode( void );
+
+	CProfileNode * Get_Sub_Node( const char * name );
+
+	CProfileNode * Get_Parent( void )		{ return Parent; }
+	CProfileNode * Get_Sibling( void )		{ return Sibling; }
+	CProfileNode * Get_Child( void )			{ return Child; }
+
+	void				CleanupMemory();
+	void				Reset( void );
+	void				Call( void );
+	bool				Return( void );
+
+	const char *	Get_Name( void )				{ return Name; }
+	int				Get_Total_Calls( void )		{ return TotalCalls; }
+	float				Get_Total_Time( void )		{ return TotalTime; }
+	void*			GetUserPointer() const {return m_userPtr;}
+	void			SetUserPointer(void* ptr) { m_userPtr = ptr;}
+protected:
+
+	const char *	Name;
+	int				TotalCalls;
+	float				TotalTime;
+	unsigned long int			StartTime;
+	int				RecursionCounter;
+
+	CProfileNode *	Parent;
+	CProfileNode *	Child;
+	CProfileNode *	Sibling;
+	void*	m_userPtr;
+};
+
+///An iterator to navigate through the tree
+class CProfileIterator
+{
+public:
+	// Access all the children of the current parent
+	void				First(void);
+	void				Next(void);
+	bool				Is_Done(void);
+	bool                Is_Root(void) { return (CurrentParent->Get_Parent() == 0); }
+
+	void				Enter_Child( int index );		// Make the given child the new parent
+	void				Enter_Largest_Child( void );	// Make the largest child the new parent
+	void				Enter_Parent( void );			// Make the current parent's parent the new parent
+
+	// Access the current child
+	const char *	Get_Current_Name( void )			{ return CurrentChild->Get_Name(); }
+	int				Get_Current_Total_Calls( void )	{ return CurrentChild->Get_Total_Calls(); }
+	float				Get_Current_Total_Time( void )	{ return CurrentChild->Get_Total_Time(); }
+
+	void*	Get_Current_UserPointer( void )			{ return CurrentChild->GetUserPointer(); }
+	void	Set_Current_UserPointer(void* ptr) {CurrentChild->SetUserPointer(ptr);}
+	// Access the current parent
+	const char *	Get_Current_Parent_Name( void )			{ return CurrentParent->Get_Name(); }
+	int				Get_Current_Parent_Total_Calls( void )	{ return CurrentParent->Get_Total_Calls(); }
+	float				Get_Current_Parent_Total_Time( void )	{ return CurrentParent->Get_Total_Time(); }
+
+	
+
+protected:
+
+	CProfileNode *	CurrentParent;
+	CProfileNode *	CurrentChild;
+	
+
+	CProfileIterator( CProfileNode * start );
+	friend	class		CProfileManager;
+};
+
+
+///The Manager for the Profile system
+class	CProfileManager {
+public:
+	static	void						Start_Profile( const char * name );
+	static	void						Stop_Profile( void );
+
+	static	void						CleanupMemory(void)
+	{
+		Root.CleanupMemory();
+	}
+
+	static	void						Reset( void );
+	static	void						Increment_Frame_Counter( void );
+	static	int						Get_Frame_Count_Since_Reset( void )		{ return FrameCounter; }
+	static	float						Get_Time_Since_Reset( void );
+
+	static	CProfileIterator *	Get_Iterator( void )	
+	{ 
+		
+		return new CProfileIterator( &Root ); 
+	}
+	static	void						Release_Iterator( CProfileIterator * iterator ) { delete ( iterator); }
+
+	static void	dumpRecursive(CProfileIterator* profileIterator, int spacing);
+
+	static void	dumpAll();
+
+private:
+	static	CProfileNode			Root;
+	static	CProfileNode *			CurrentNode;
+	static	int						FrameCounter;
+	static	unsigned long int					ResetTime;
+};
+
+
+///ProfileSampleClass is a simple way to profile a function's scope
+///Use the BT_PROFILE macro at the start of scope to time
+class	CProfileSample {
+public:
+	CProfileSample( const char * name )
+	{ 
+		CProfileManager::Start_Profile( name ); 
+	}
+
+	~CProfileSample( void )					
+	{ 
+		CProfileManager::Stop_Profile(); 
+	}
+};
+
+
+#define	BT_PROFILE( name )			CProfileSample __profile( name )
+
+#else
+
+#define	BT_PROFILE( name )
+
+#endif //#ifndef BT_NO_PROFILE
+
+
+
+#endif //BT_QUICK_PROF_H
+
+
diff --git a/src/bullet/LinearMath/btRandom.h b/src/bullet/LinearMath/btRandom.h
new file mode 100644
index 00000000..4cbfc6bf
--- /dev/null
+++ b/src/bullet/LinearMath/btRandom.h
@@ -0,0 +1,42 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_GEN_RANDOM_H
+#define BT_GEN_RANDOM_H
+
+#ifdef MT19937
+
+#include <limits.h>
+#include <mt19937.h>
+
+#define GEN_RAND_MAX UINT_MAX
+
+SIMD_FORCE_INLINE void         GEN_srand(unsigned int seed) { init_genrand(seed); }
+SIMD_FORCE_INLINE unsigned int GEN_rand()                   { return genrand_int32(); }
+
+#else
+
+#include <stdlib.h>
+
+#define GEN_RAND_MAX RAND_MAX
+
+SIMD_FORCE_INLINE void         GEN_srand(unsigned int seed) { srand(seed); } 
+SIMD_FORCE_INLINE unsigned int GEN_rand()                   { return rand(); }
+
+#endif
+
+#endif //BT_GEN_RANDOM_H
+
diff --git a/src/bullet/LinearMath/btScalar.h b/src/bullet/LinearMath/btScalar.h
new file mode 100644
index 00000000..e3bd37a0
--- /dev/null
+++ b/src/bullet/LinearMath/btScalar.h
@@ -0,0 +1,539 @@
+/*
+Copyright (c) 2003-2009 Erwin Coumans  http://bullet.googlecode.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_SCALAR_H
+#define BT_SCALAR_H
+
+#ifdef BT_MANAGED_CODE
+//Aligned data types not supported in managed code
+#pragma unmanaged
+#endif
+
+
+#include <math.h>
+#include <stdlib.h>//size_t for MSVC 6.0
+#include <float.h>
+
+/* SVN $Revision$ on $Date$ from http://bullet.googlecode.com*/
+#define BT_BULLET_VERSION 280
+
+inline int	btGetVersion()
+{
+	return BT_BULLET_VERSION;
+}
+
+#if defined(DEBUG) || defined (_DEBUG)
+#define BT_DEBUG
+#endif
+
+
+#ifdef _WIN32
+
+		#if defined(__MINGW32__) || defined(__CYGWIN__) || (defined (_MSC_VER) && _MSC_VER < 1300)
+
+			#define SIMD_FORCE_INLINE inline
+			#define ATTRIBUTE_ALIGNED16(a) a
+			#define ATTRIBUTE_ALIGNED64(a) a
+			#define ATTRIBUTE_ALIGNED128(a) a
+		#else
+			//#define BT_HAS_ALIGNED_ALLOCATOR
+			#pragma warning(disable : 4324) // disable padding warning
+//			#pragma warning(disable:4530) // Disable the exception disable but used in MSCV Stl warning.
+//			#pragma warning(disable:4996) //Turn off warnings about deprecated C routines
+//			#pragma warning(disable:4786) // Disable the "debug name too long" warning
+
+			#define SIMD_FORCE_INLINE __forceinline
+			#define ATTRIBUTE_ALIGNED16(a) __declspec(align(16)) a
+			#define ATTRIBUTE_ALIGNED64(a) __declspec(align(64)) a
+			#define ATTRIBUTE_ALIGNED128(a) __declspec (align(128)) a
+		#ifdef _XBOX
+			#define BT_USE_VMX128
+
+			#include <ppcintrinsics.h>
+ 			#define BT_HAVE_NATIVE_FSEL
+ 			#define btFsel(a,b,c) __fsel((a),(b),(c))
+		#else
+
+#if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined (BT_USE_DOUBLE_PRECISION))
+			#define BT_USE_SSE
+			#include <emmintrin.h>
+#endif
+
+		#endif//_XBOX
+
+		#endif //__MINGW32__
+
+		#include <assert.h>
+#ifdef BT_DEBUG
+		#define btAssert assert
+#else
+		#define btAssert(x)
+#endif
+		//btFullAssert is optional, slows down a lot
+		#define btFullAssert(x)
+
+		#define btLikely(_c)  _c
+		#define btUnlikely(_c) _c
+
+#else
+	
+#if defined	(__CELLOS_LV2__)
+		#define SIMD_FORCE_INLINE inline __attribute__((always_inline))
+		#define ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
+		#define ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
+		#define ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
+		#ifndef assert
+		#include <assert.h>
+		#endif
+#ifdef BT_DEBUG
+#ifdef __SPU__
+#include <spu_printf.h>
+#define printf spu_printf
+	#define btAssert(x) {if(!(x)){printf("Assert "__FILE__ ":%u ("#x")\n", __LINE__);spu_hcmpeq(0,0);}}
+#else
+	#define btAssert assert
+#endif
+	
+#else
+		#define btAssert(x)
+#endif
+		//btFullAssert is optional, slows down a lot
+		#define btFullAssert(x)
+
+		#define btLikely(_c)  _c
+		#define btUnlikely(_c) _c
+
+#else
+
+#ifdef USE_LIBSPE2
+
+		#define SIMD_FORCE_INLINE __inline
+		#define ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
+		#define ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
+		#define ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
+		#ifndef assert
+		#include <assert.h>
+		#endif
+#ifdef BT_DEBUG
+		#define btAssert assert
+#else
+		#define btAssert(x)
+#endif
+		//btFullAssert is optional, slows down a lot
+		#define btFullAssert(x)
+
+
+		#define btLikely(_c)   __builtin_expect((_c), 1)
+		#define btUnlikely(_c) __builtin_expect((_c), 0)
+		
+
+#else
+	//non-windows systems
+
+#if (defined (__APPLE__) && defined (__i386__) && (!defined (BT_USE_DOUBLE_PRECISION)))
+	#define BT_USE_SSE
+	#include <emmintrin.h>
+
+	#define SIMD_FORCE_INLINE inline
+///@todo: check out alignment methods for other platforms/compilers
+	#define ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
+	#define ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
+	#define ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
+	#ifndef assert
+	#include <assert.h>
+	#endif
+
+	#if defined(DEBUG) || defined (_DEBUG)
+		#define btAssert assert
+	#else
+		#define btAssert(x)
+	#endif
+
+	//btFullAssert is optional, slows down a lot
+	#define btFullAssert(x)
+	#define btLikely(_c)  _c
+	#define btUnlikely(_c) _c
+
+#else
+
+		#define SIMD_FORCE_INLINE inline
+		///@todo: check out alignment methods for other platforms/compilers
+		///#define ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
+		///#define ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
+		///#define ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
+		#define ATTRIBUTE_ALIGNED16(a) a
+		#define ATTRIBUTE_ALIGNED64(a) a
+		#define ATTRIBUTE_ALIGNED128(a) a
+		#ifndef assert
+		#include <assert.h>
+		#endif
+
+#if defined(DEBUG) || defined (_DEBUG)
+		#define btAssert assert
+#else
+		#define btAssert(x)
+#endif
+
+		//btFullAssert is optional, slows down a lot
+		#define btFullAssert(x)
+		#define btLikely(_c)  _c
+		#define btUnlikely(_c) _c
+#endif //__APPLE__ 
+
+#endif // LIBSPE2
+
+#endif	//__CELLOS_LV2__
+#endif
+
+
+///The btScalar type abstracts floating point numbers, to easily switch between double and single floating point precision.
+#if defined(BT_USE_DOUBLE_PRECISION)
+typedef double btScalar;
+//this number could be bigger in double precision
+#define BT_LARGE_FLOAT 1e30
+#else
+typedef float btScalar;
+//keep BT_LARGE_FLOAT*BT_LARGE_FLOAT < FLT_MAX
+#define BT_LARGE_FLOAT 1e18f
+#endif
+
+
+
+#define BT_DECLARE_ALIGNED_ALLOCATOR() \
+   SIMD_FORCE_INLINE void* operator new(size_t sizeInBytes)   { return btAlignedAlloc(sizeInBytes,16); }   \
+   SIMD_FORCE_INLINE void  operator delete(void* ptr)         { btAlignedFree(ptr); }   \
+   SIMD_FORCE_INLINE void* operator new(size_t, void* ptr)   { return ptr; }   \
+   SIMD_FORCE_INLINE void  operator delete(void*, void*)      { }   \
+   SIMD_FORCE_INLINE void* operator new[](size_t sizeInBytes)   { return btAlignedAlloc(sizeInBytes,16); }   \
+   SIMD_FORCE_INLINE void  operator delete[](void* ptr)         { btAlignedFree(ptr); }   \
+   SIMD_FORCE_INLINE void* operator new[](size_t, void* ptr)   { return ptr; }   \
+   SIMD_FORCE_INLINE void  operator delete[](void*, void*)      { }   \
+
+
+
+#if defined(BT_USE_DOUBLE_PRECISION) || defined(BT_FORCE_DOUBLE_FUNCTIONS)
+		
+SIMD_FORCE_INLINE btScalar btSqrt(btScalar x) { return sqrt(x); }
+SIMD_FORCE_INLINE btScalar btFabs(btScalar x) { return fabs(x); }
+SIMD_FORCE_INLINE btScalar btCos(btScalar x) { return cos(x); }
+SIMD_FORCE_INLINE btScalar btSin(btScalar x) { return sin(x); }
+SIMD_FORCE_INLINE btScalar btTan(btScalar x) { return tan(x); }
+SIMD_FORCE_INLINE btScalar btAcos(btScalar x) { if (x<btScalar(-1))	x=btScalar(-1); if (x>btScalar(1))	x=btScalar(1); return acos(x); }
+SIMD_FORCE_INLINE btScalar btAsin(btScalar x) { if (x<btScalar(-1))	x=btScalar(-1); if (x>btScalar(1))	x=btScalar(1); return asin(x); }
+SIMD_FORCE_INLINE btScalar btAtan(btScalar x) { return atan(x); }
+SIMD_FORCE_INLINE btScalar btAtan2(btScalar x, btScalar y) { return atan2(x, y); }
+SIMD_FORCE_INLINE btScalar btExp(btScalar x) { return exp(x); }
+SIMD_FORCE_INLINE btScalar btLog(btScalar x) { return log(x); }
+SIMD_FORCE_INLINE btScalar btPow(btScalar x,btScalar y) { return pow(x,y); }
+SIMD_FORCE_INLINE btScalar btFmod(btScalar x,btScalar y) { return fmod(x,y); }
+
+#else
+		
+SIMD_FORCE_INLINE btScalar btSqrt(btScalar y) 
+{ 
+#ifdef USE_APPROXIMATION
+    double x, z, tempf;
+    unsigned long *tfptr = ((unsigned long *)&tempf) + 1;
+
+	tempf = y;
+	*tfptr = (0xbfcdd90a - *tfptr)>>1; /* estimate of 1/sqrt(y) */
+	x =  tempf;
+	z =  y*btScalar(0.5);
+	x = (btScalar(1.5)*x)-(x*x)*(x*z);         /* iteration formula     */
+	x = (btScalar(1.5)*x)-(x*x)*(x*z);
+	x = (btScalar(1.5)*x)-(x*x)*(x*z);
+	x = (btScalar(1.5)*x)-(x*x)*(x*z);
+	x = (btScalar(1.5)*x)-(x*x)*(x*z);
+	return x*y;
+#else
+	return sqrtf(y); 
+#endif
+}
+SIMD_FORCE_INLINE btScalar btFabs(btScalar x) { return fabsf(x); }
+SIMD_FORCE_INLINE btScalar btCos(btScalar x) { return cosf(x); }
+SIMD_FORCE_INLINE btScalar btSin(btScalar x) { return sinf(x); }
+SIMD_FORCE_INLINE btScalar btTan(btScalar x) { return tanf(x); }
+SIMD_FORCE_INLINE btScalar btAcos(btScalar x) { 
+	if (x<btScalar(-1))	
+		x=btScalar(-1); 
+	if (x>btScalar(1))	
+		x=btScalar(1);
+	return acosf(x); 
+}
+SIMD_FORCE_INLINE btScalar btAsin(btScalar x) { 
+	if (x<btScalar(-1))	
+		x=btScalar(-1); 
+	if (x>btScalar(1))	
+		x=btScalar(1);
+	return asinf(x); 
+}
+SIMD_FORCE_INLINE btScalar btAtan(btScalar x) { return atanf(x); }
+SIMD_FORCE_INLINE btScalar btAtan2(btScalar x, btScalar y) { return atan2f(x, y); }
+SIMD_FORCE_INLINE btScalar btExp(btScalar x) { return expf(x); }
+SIMD_FORCE_INLINE btScalar btLog(btScalar x) { return logf(x); }
+SIMD_FORCE_INLINE btScalar btPow(btScalar x,btScalar y) { return powf(x,y); }
+SIMD_FORCE_INLINE btScalar btFmod(btScalar x,btScalar y) { return fmodf(x,y); }
+	
+#endif
+
+#define SIMD_2_PI         btScalar(6.283185307179586232)
+#define SIMD_PI           (SIMD_2_PI * btScalar(0.5))
+#define SIMD_HALF_PI      (SIMD_2_PI * btScalar(0.25))
+#define SIMD_RADS_PER_DEG (SIMD_2_PI / btScalar(360.0))
+#define SIMD_DEGS_PER_RAD  (btScalar(360.0) / SIMD_2_PI)
+#define SIMDSQRT12 btScalar(0.7071067811865475244008443621048490)
+
+#define btRecipSqrt(x) ((btScalar)(btScalar(1.0)/btSqrt(btScalar(x))))		/* reciprocal square root */
+
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define SIMD_EPSILON      DBL_EPSILON
+#define SIMD_INFINITY     DBL_MAX
+#else
+#define SIMD_EPSILON      FLT_EPSILON
+#define SIMD_INFINITY     FLT_MAX
+#endif
+
+SIMD_FORCE_INLINE btScalar btAtan2Fast(btScalar y, btScalar x) 
+{
+	btScalar coeff_1 = SIMD_PI / 4.0f;
+	btScalar coeff_2 = 3.0f * coeff_1;
+	btScalar abs_y = btFabs(y);
+	btScalar angle;
+	if (x >= 0.0f) {
+		btScalar r = (x - abs_y) / (x + abs_y);
+		angle = coeff_1 - coeff_1 * r;
+	} else {
+		btScalar r = (x + abs_y) / (abs_y - x);
+		angle = coeff_2 - coeff_1 * r;
+	}
+	return (y < 0.0f) ? -angle : angle;
+}
+
+SIMD_FORCE_INLINE bool      btFuzzyZero(btScalar x) { return btFabs(x) < SIMD_EPSILON; }
+
+SIMD_FORCE_INLINE bool	btEqual(btScalar a, btScalar eps) {
+	return (((a) <= eps) && !((a) < -eps));
+}
+SIMD_FORCE_INLINE bool	btGreaterEqual (btScalar a, btScalar eps) {
+	return (!((a) <= eps));
+}
+
+
+SIMD_FORCE_INLINE int       btIsNegative(btScalar x) {
+    return x < btScalar(0.0) ? 1 : 0;
+}
+
+SIMD_FORCE_INLINE btScalar btRadians(btScalar x) { return x * SIMD_RADS_PER_DEG; }
+SIMD_FORCE_INLINE btScalar btDegrees(btScalar x) { return x * SIMD_DEGS_PER_RAD; }
+
+#define BT_DECLARE_HANDLE(name) typedef struct name##__ { int unused; } *name
+
+#ifndef btFsel
+SIMD_FORCE_INLINE btScalar btFsel(btScalar a, btScalar b, btScalar c)
+{
+	return a >= 0 ? b : c;
+}
+#endif
+#define btFsels(a,b,c) (btScalar)btFsel(a,b,c)
+
+
+SIMD_FORCE_INLINE bool btMachineIsLittleEndian()
+{
+   long int i = 1;
+   const char *p = (const char *) &i;
+   if (p[0] == 1)  // Lowest address contains the least significant byte
+	   return true;
+   else
+	   return false;
+}
+
+
+
+///btSelect avoids branches, which makes performance much better for consoles like Playstation 3 and XBox 360
+///Thanks Phil Knight. See also http://www.cellperformance.com/articles/2006/04/more_techniques_for_eliminatin_1.html
+SIMD_FORCE_INLINE unsigned btSelect(unsigned condition, unsigned valueIfConditionNonZero, unsigned valueIfConditionZero) 
+{
+    // Set testNz to 0xFFFFFFFF if condition is nonzero, 0x00000000 if condition is zero
+    // Rely on positive value or'ed with its negative having sign bit on
+    // and zero value or'ed with its negative (which is still zero) having sign bit off 
+    // Use arithmetic shift right, shifting the sign bit through all 32 bits
+    unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31);
+    unsigned testEqz = ~testNz;
+    return ((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz)); 
+}
+SIMD_FORCE_INLINE int btSelect(unsigned condition, int valueIfConditionNonZero, int valueIfConditionZero)
+{
+    unsigned testNz = (unsigned)(((int)condition | -(int)condition) >> 31);
+    unsigned testEqz = ~testNz; 
+    return static_cast<int>((valueIfConditionNonZero & testNz) | (valueIfConditionZero & testEqz));
+}
+SIMD_FORCE_INLINE float btSelect(unsigned condition, float valueIfConditionNonZero, float valueIfConditionZero)
+{
+#ifdef BT_HAVE_NATIVE_FSEL
+    return (float)btFsel((btScalar)condition - btScalar(1.0f), valueIfConditionNonZero, valueIfConditionZero);
+#else
+    return (condition != 0) ? valueIfConditionNonZero : valueIfConditionZero; 
+#endif
+}
+
+template<typename T> SIMD_FORCE_INLINE void btSwap(T& a, T& b)
+{
+	T tmp = a;
+	a = b;
+	b = tmp;
+}
+
+
+//PCK: endian swapping functions
+SIMD_FORCE_INLINE unsigned btSwapEndian(unsigned val)
+{
+	return (((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8)  | ((val & 0x000000ff) << 24));
+}
+
+SIMD_FORCE_INLINE unsigned short btSwapEndian(unsigned short val)
+{
+	return static_cast<unsigned short>(((val & 0xff00) >> 8) | ((val & 0x00ff) << 8));
+}
+
+SIMD_FORCE_INLINE unsigned btSwapEndian(int val)
+{
+	return btSwapEndian((unsigned)val);
+}
+
+SIMD_FORCE_INLINE unsigned short btSwapEndian(short val)
+{
+	return btSwapEndian((unsigned short) val);
+}
+
+///btSwapFloat uses using char pointers to swap the endianness
+////btSwapFloat/btSwapDouble will NOT return a float, because the machine might 'correct' invalid floating point values
+///Not all values of sign/exponent/mantissa are valid floating point numbers according to IEEE 754. 
+///When a floating point unit is faced with an invalid value, it may actually change the value, or worse, throw an exception. 
+///In most systems, running user mode code, you wouldn't get an exception, but instead the hardware/os/runtime will 'fix' the number for you. 
+///so instead of returning a float/double, we return integer/long long integer
+SIMD_FORCE_INLINE unsigned int  btSwapEndianFloat(float d)
+{
+    unsigned int a = 0;
+    unsigned char *dst = (unsigned char *)&a;
+    unsigned char *src = (unsigned char *)&d;
+
+    dst[0] = src[3];
+    dst[1] = src[2];
+    dst[2] = src[1];
+    dst[3] = src[0];
+    return a;
+}
+
+// unswap using char pointers
+SIMD_FORCE_INLINE float btUnswapEndianFloat(unsigned int a) 
+{
+    float d = 0.0f;
+    unsigned char *src = (unsigned char *)&a;
+    unsigned char *dst = (unsigned char *)&d;
+
+    dst[0] = src[3];
+    dst[1] = src[2];
+    dst[2] = src[1];
+    dst[3] = src[0];
+
+    return d;
+}
+
+
+// swap using char pointers
+SIMD_FORCE_INLINE void  btSwapEndianDouble(double d, unsigned char* dst)
+{
+    unsigned char *src = (unsigned char *)&d;
+
+    dst[0] = src[7];
+    dst[1] = src[6];
+    dst[2] = src[5];
+    dst[3] = src[4];
+    dst[4] = src[3];
+    dst[5] = src[2];
+    dst[6] = src[1];
+    dst[7] = src[0];
+
+}
+
+// unswap using char pointers
+SIMD_FORCE_INLINE double btUnswapEndianDouble(const unsigned char *src) 
+{
+    double d = 0.0;
+    unsigned char *dst = (unsigned char *)&d;
+
+    dst[0] = src[7];
+    dst[1] = src[6];
+    dst[2] = src[5];
+    dst[3] = src[4];
+    dst[4] = src[3];
+    dst[5] = src[2];
+    dst[6] = src[1];
+    dst[7] = src[0];
+
+	return d;
+}
+
+// returns normalized value in range [-SIMD_PI, SIMD_PI]
+SIMD_FORCE_INLINE btScalar btNormalizeAngle(btScalar angleInRadians) 
+{
+	angleInRadians = btFmod(angleInRadians, SIMD_2_PI);
+	if(angleInRadians < -SIMD_PI)
+	{
+		return angleInRadians + SIMD_2_PI;
+	}
+	else if(angleInRadians > SIMD_PI)
+	{
+		return angleInRadians - SIMD_2_PI;
+	}
+	else
+	{
+		return angleInRadians;
+	}
+}
+
+///rudimentary class to provide type info
+struct btTypedObject
+{
+	btTypedObject(int objectType)
+		:m_objectType(objectType)
+	{
+	}
+	int	m_objectType;
+	inline int getObjectType() const
+	{
+		return m_objectType;
+	}
+};
+
+
+///align a pointer to the provided alignment, upwards
+template <typename T>T* btAlignPointer(T* unalignedPtr, size_t alignment)
+{
+        union
+        {
+                T* ptr;
+                size_t integer;
+        };
+        const size_t bit_mask = ~(alignment - 1);
+        ptr = unalignedPtr;
+		integer += alignment-1;
+        integer &= bit_mask;
+        return ptr;
+}
+
+#endif //BT_SCALAR_H
diff --git a/src/bullet/LinearMath/btSerializer.cpp b/src/bullet/LinearMath/btSerializer.cpp
new file mode 100644
index 00000000..49c25b7e
--- /dev/null
+++ b/src/bullet/LinearMath/btSerializer.cpp
@@ -0,0 +1,841 @@
+char sBulletDNAstr[]= {
+83,68,78,65,78,65,77,69,44,1,0,0,109,95,115,105,122,101,0,109,
+95,99,97,112,97,99,105,116,121,0,42,109,95,100,97,116,97,0,109,95,
+99,111,108,108,105,115,105,111,110,83,104,97,112,101,115,0,109,95,99,111,
+108,108,105,115,105,111,110,79,98,106,101,99,116,115,0,109,95,99,111,110,
+115,116,114,97,105,110,116,115,0,42,102,105,114,115,116,0,42,108,97,115,
+116,0,109,95,102,108,111,97,116,115,91,52,93,0,109,95,101,108,91,51,
+93,0,109,95,98,97,115,105,115,0,109,95,111,114,105,103,105,110,0,109,
+95,114,111,111,116,78,111,100,101,73,110,100,101,120,0,109,95,115,117,98,
+116,114,101,101,83,105,122,101,0,109,95,113,117,97,110,116,105,122,101,100,
+65,97,98,98,77,105,110,91,51,93,0,109,95,113,117,97,110,116,105,122,
+101,100,65,97,98,98,77,97,120,91,51,93,0,109,95,97,97,98,98,77,
+105,110,79,114,103,0,109,95,97,97,98,98,77,97,120,79,114,103,0,109,
+95,101,115,99,97,112,101,73,110,100,101,120,0,109,95,115,117,98,80,97,
+114,116,0,109,95,116,114,105,97,110,103,108,101,73,110,100,101,120,0,109,
+95,112,97,100,91,52,93,0,109,95,101,115,99,97,112,101,73,110,100,101,
+120,79,114,84,114,105,97,110,103,108,101,73,110,100,101,120,0,109,95,98,
+118,104,65,97,98,98,77,105,110,0,109,95,98,118,104,65,97,98,98,77,
+97,120,0,109,95,98,118,104,81,117,97,110,116,105,122,97,116,105,111,110,
+0,109,95,99,117,114,78,111,100,101,73,110,100,101,120,0,109,95,117,115,
+101,81,117,97,110,116,105,122,97,116,105,111,110,0,109,95,110,117,109,67,
+111,110,116,105,103,117,111,117,115,76,101,97,102,78,111,100,101,115,0,109,
+95,110,117,109,81,117,97,110,116,105,122,101,100,67,111,110,116,105,103,117,
+111,117,115,78,111,100,101,115,0,42,109,95,99,111,110,116,105,103,117,111,
+117,115,78,111,100,101,115,80,116,114,0,42,109,95,113,117,97,110,116,105,
+122,101,100,67,111,110,116,105,103,117,111,117,115,78,111,100,101,115,80,116,
+114,0,42,109,95,115,117,98,84,114,101,101,73,110,102,111,80,116,114,0,
+109,95,116,114,97,118,101,114,115,97,108,77,111,100,101,0,109,95,110,117,
+109,83,117,98,116,114,101,101,72,101,97,100,101,114,115,0,42,109,95,110,
+97,109,101,0,109,95,115,104,97,112,101,84,121,112,101,0,109,95,112,97,
+100,100,105,110,103,91,52,93,0,109,95,99,111,108,108,105,115,105,111,110,
+83,104,97,112,101,68,97,116,97,0,109,95,108,111,99,97,108,83,99,97,
+108,105,110,103,0,109,95,112,108,97,110,101,78,111,114,109,97,108,0,109,
+95,112,108,97,110,101,67,111,110,115,116,97,110,116,0,109,95,105,109,112,
+108,105,99,105,116,83,104,97,112,101,68,105,109,101,110,115,105,111,110,115,
+0,109,95,99,111,108,108,105,115,105,111,110,77,97,114,103,105,110,0,109,
+95,112,97,100,100,105,110,103,0,109,95,112,111,115,0,109,95,114,97,100,
+105,117,115,0,109,95,99,111,110,118,101,120,73,110,116,101,114,110,97,108,
+83,104,97,112,101,68,97,116,97,0,42,109,95,108,111,99,97,108,80,111,
+115,105,116,105,111,110,65,114,114,97,121,80,116,114,0,109,95,108,111,99,
+97,108,80,111,115,105,116,105,111,110,65,114,114,97,121,83,105,122,101,0,
+109,95,118,97,108,117,101,0,109,95,112,97,100,91,50,93,0,109,95,118,
+97,108,117,101,115,91,51,93,0,109,95,112,97,100,0,42,109,95,118,101,
+114,116,105,99,101,115,51,102,0,42,109,95,118,101,114,116,105,99,101,115,
+51,100,0,42,109,95,105,110,100,105,99,101,115,51,50,0,42,109,95,51,
+105,110,100,105,99,101,115,49,54,0,42,109,95,51,105,110,100,105,99,101,
+115,56,0,42,109,95,105,110,100,105,99,101,115,49,54,0,109,95,110,117,
+109,84,114,105,97,110,103,108,101,115,0,109,95,110,117,109,86,101,114,116,
+105,99,101,115,0,42,109,95,109,101,115,104,80,97,114,116,115,80,116,114,
+0,109,95,115,99,97,108,105,110,103,0,109,95,110,117,109,77,101,115,104,
+80,97,114,116,115,0,109,95,109,101,115,104,73,110,116,101,114,102,97,99,
+101,0,42,109,95,113,117,97,110,116,105,122,101,100,70,108,111,97,116,66,
+118,104,0,42,109,95,113,117,97,110,116,105,122,101,100,68,111,117,98,108,
+101,66,118,104,0,42,109,95,116,114,105,97,110,103,108,101,73,110,102,111,
+77,97,112,0,109,95,112,97,100,51,91,52,93,0,109,95,116,114,105,109,
+101,115,104,83,104,97,112,101,68,97,116,97,0,109,95,116,114,97,110,115,
+102,111,114,109,0,42,109,95,99,104,105,108,100,83,104,97,112,101,0,109,
+95,99,104,105,108,100,83,104,97,112,101,84,121,112,101,0,109,95,99,104,
+105,108,100,77,97,114,103,105,110,0,42,109,95,99,104,105,108,100,83,104,
+97,112,101,80,116,114,0,109,95,110,117,109,67,104,105,108,100,83,104,97,
+112,101,115,0,109,95,117,112,65,120,105,115,0,109,95,102,108,97,103,115,
+0,109,95,101,100,103,101,86,48,86,49,65,110,103,108,101,0,109,95,101,
+100,103,101,86,49,86,50,65,110,103,108,101,0,109,95,101,100,103,101,86,
+50,86,48,65,110,103,108,101,0,42,109,95,104,97,115,104,84,97,98,108,
+101,80,116,114,0,42,109,95,110,101,120,116,80,116,114,0,42,109,95,118,
+97,108,117,101,65,114,114,97,121,80,116,114,0,42,109,95,107,101,121,65,
+114,114,97,121,80,116,114,0,109,95,99,111,110,118,101,120,69,112,115,105,
+108,111,110,0,109,95,112,108,97,110,97,114,69,112,115,105,108,111,110,0,
+109,95,101,113,117,97,108,86,101,114,116,101,120,84,104,114,101,115,104,111,
+108,100,0,109,95,101,100,103,101,68,105,115,116,97,110,99,101,84,104,114,
+101,115,104,111,108,100,0,109,95,122,101,114,111,65,114,101,97,84,104,114,
+101,115,104,111,108,100,0,109,95,110,101,120,116,83,105,122,101,0,109,95,
+104,97,115,104,84,97,98,108,101,83,105,122,101,0,109,95,110,117,109,86,
+97,108,117,101,115,0,109,95,110,117,109,75,101,121,115,0,109,95,103,105,
+109,112,97,99,116,83,117,98,84,121,112,101,0,42,109,95,117,110,115,99,
+97,108,101,100,80,111,105,110,116,115,70,108,111,97,116,80,116,114,0,42,
+109,95,117,110,115,99,97,108,101,100,80,111,105,110,116,115,68,111,117,98,
+108,101,80,116,114,0,109,95,110,117,109,85,110,115,99,97,108,101,100,80,
+111,105,110,116,115,0,109,95,112,97,100,100,105,110,103,51,91,52,93,0,
+42,109,95,98,114,111,97,100,112,104,97,115,101,72,97,110,100,108,101,0,
+42,109,95,99,111,108,108,105,115,105,111,110,83,104,97,112,101,0,42,109,
+95,114,111,111,116,67,111,108,108,105,115,105,111,110,83,104,97,112,101,0,
+109,95,119,111,114,108,100,84,114,97,110,115,102,111,114,109,0,109,95,105,
+110,116,101,114,112,111,108,97,116,105,111,110,87,111,114,108,100,84,114,97,
+110,115,102,111,114,109,0,109,95,105,110,116,101,114,112,111,108,97,116,105,
+111,110,76,105,110,101,97,114,86,101,108,111,99,105,116,121,0,109,95,105,
+110,116,101,114,112,111,108,97,116,105,111,110,65,110,103,117,108,97,114,86,
+101,108,111,99,105,116,121,0,109,95,97,110,105,115,111,116,114,111,112,105,
+99,70,114,105,99,116,105,111,110,0,109,95,99,111,110,116,97,99,116,80,
+114,111,99,101,115,115,105,110,103,84,104,114,101,115,104,111,108,100,0,109,
+95,100,101,97,99,116,105,118,97,116,105,111,110,84,105,109,101,0,109,95,
+102,114,105,99,116,105,111,110,0,109,95,114,101,115,116,105,116,117,116,105,
+111,110,0,109,95,104,105,116,70,114,97,99,116,105,111,110,0,109,95,99,
+99,100,83,119,101,112,116,83,112,104,101,114,101,82,97,100,105,117,115,0,
+109,95,99,99,100,77,111,116,105,111,110,84,104,114,101,115,104,111,108,100,
+0,109,95,104,97,115,65,110,105,115,111,116,114,111,112,105,99,70,114,105,
+99,116,105,111,110,0,109,95,99,111,108,108,105,115,105,111,110,70,108,97,
+103,115,0,109,95,105,115,108,97,110,100,84,97,103,49,0,109,95,99,111,
+109,112,97,110,105,111,110,73,100,0,109,95,97,99,116,105,118,97,116,105,
+111,110,83,116,97,116,101,49,0,109,95,105,110,116,101,114,110,97,108,84,
+121,112,101,0,109,95,99,104,101,99,107,67,111,108,108,105,100,101,87,105,
+116,104,0,109,95,99,111,108,108,105,115,105,111,110,79,98,106,101,99,116,
+68,97,116,97,0,109,95,105,110,118,73,110,101,114,116,105,97,84,101,110,
+115,111,114,87,111,114,108,100,0,109,95,108,105,110,101,97,114,86,101,108,
+111,99,105,116,121,0,109,95,97,110,103,117,108,97,114,86,101,108,111,99,
+105,116,121,0,109,95,97,110,103,117,108,97,114,70,97,99,116,111,114,0,
+109,95,108,105,110,101,97,114,70,97,99,116,111,114,0,109,95,103,114,97,
+118,105,116,121,0,109,95,103,114,97,118,105,116,121,95,97,99,99,101,108,
+101,114,97,116,105,111,110,0,109,95,105,110,118,73,110,101,114,116,105,97,
+76,111,99,97,108,0,109,95,116,111,116,97,108,70,111,114,99,101,0,109,
+95,116,111,116,97,108,84,111,114,113,117,101,0,109,95,105,110,118,101,114,
+115,101,77,97,115,115,0,109,95,108,105,110,101,97,114,68,97,109,112,105,
+110,103,0,109,95,97,110,103,117,108,97,114,68,97,109,112,105,110,103,0,
+109,95,97,100,100,105,116,105,111,110,97,108,68,97,109,112,105,110,103,70,
+97,99,116,111,114,0,109,95,97,100,100,105,116,105,111,110,97,108,76,105,
+110,101,97,114,68,97,109,112,105,110,103,84,104,114,101,115,104,111,108,100,
+83,113,114,0,109,95,97,100,100,105,116,105,111,110,97,108,65,110,103,117,
+108,97,114,68,97,109,112,105,110,103,84,104,114,101,115,104,111,108,100,83,
+113,114,0,109,95,97,100,100,105,116,105,111,110,97,108,65,110,103,117,108,
+97,114,68,97,109,112,105,110,103,70,97,99,116,111,114,0,109,95,108,105,
+110,101,97,114,83,108,101,101,112,105,110,103,84,104,114,101,115,104,111,108,
+100,0,109,95,97,110,103,117,108,97,114,83,108,101,101,112,105,110,103,84,
+104,114,101,115,104,111,108,100,0,109,95,97,100,100,105,116,105,111,110,97,
+108,68,97,109,112,105,110,103,0,109,95,110,117,109,67,111,110,115,116,114,
+97,105,110,116,82,111,119,115,0,110,117,98,0,42,109,95,114,98,65,0,
+42,109,95,114,98,66,0,109,95,111,98,106,101,99,116,84,121,112,101,0,
+109,95,117,115,101,114,67,111,110,115,116,114,97,105,110,116,84,121,112,101,
+0,109,95,117,115,101,114,67,111,110,115,116,114,97,105,110,116,73,100,0,
+109,95,110,101,101,100,115,70,101,101,100,98,97,99,107,0,109,95,97,112,
+112,108,105,101,100,73,109,112,117,108,115,101,0,109,95,100,98,103,68,114,
+97,119,83,105,122,101,0,109,95,100,105,115,97,98,108,101,67,111,108,108,
+105,115,105,111,110,115,66,101,116,119,101,101,110,76,105,110,107,101,100,66,
+111,100,105,101,115,0,109,95,111,118,101,114,114,105,100,101,78,117,109,83,
+111,108,118,101,114,73,116,101,114,97,116,105,111,110,115,0,109,95,98,114,
+101,97,107,105,110,103,73,109,112,117,108,115,101,84,104,114,101,115,104,111,
+108,100,0,109,95,105,115,69,110,97,98,108,101,100,0,109,95,116,121,112,
+101,67,111,110,115,116,114,97,105,110,116,68,97,116,97,0,109,95,112,105,
+118,111,116,73,110,65,0,109,95,112,105,118,111,116,73,110,66,0,109,95,
+114,98,65,70,114,97,109,101,0,109,95,114,98,66,70,114,97,109,101,0,
+109,95,117,115,101,82,101,102,101,114,101,110,99,101,70,114,97,109,101,65,
+0,109,95,97,110,103,117,108,97,114,79,110,108,121,0,109,95,101,110,97,
+98,108,101,65,110,103,117,108,97,114,77,111,116,111,114,0,109,95,109,111,
+116,111,114,84,97,114,103,101,116,86,101,108,111,99,105,116,121,0,109,95,
+109,97,120,77,111,116,111,114,73,109,112,117,108,115,101,0,109,95,108,111,
+119,101,114,76,105,109,105,116,0,109,95,117,112,112,101,114,76,105,109,105,
+116,0,109,95,108,105,109,105,116,83,111,102,116,110,101,115,115,0,109,95,
+98,105,97,115,70,97,99,116,111,114,0,109,95,114,101,108,97,120,97,116,
+105,111,110,70,97,99,116,111,114,0,109,95,115,119,105,110,103,83,112,97,
+110,49,0,109,95,115,119,105,110,103,83,112,97,110,50,0,109,95,116,119,
+105,115,116,83,112,97,110,0,109,95,100,97,109,112,105,110,103,0,109,95,
+108,105,110,101,97,114,85,112,112,101,114,76,105,109,105,116,0,109,95,108,
+105,110,101,97,114,76,111,119,101,114,76,105,109,105,116,0,109,95,97,110,
+103,117,108,97,114,85,112,112,101,114,76,105,109,105,116,0,109,95,97,110,
+103,117,108,97,114,76,111,119,101,114,76,105,109,105,116,0,109,95,117,115,
+101,76,105,110,101,97,114,82,101,102,101,114,101,110,99,101,70,114,97,109,
+101,65,0,109,95,117,115,101,79,102,102,115,101,116,70,111,114,67,111,110,
+115,116,114,97,105,110,116,70,114,97,109,101,0,109,95,54,100,111,102,68,
+97,116,97,0,109,95,115,112,114,105,110,103,69,110,97,98,108,101,100,91,
+54,93,0,109,95,101,113,117,105,108,105,98,114,105,117,109,80,111,105,110,
+116,91,54,93,0,109,95,115,112,114,105,110,103,83,116,105,102,102,110,101,
+115,115,91,54,93,0,109,95,115,112,114,105,110,103,68,97,109,112,105,110,
+103,91,54,93,0,109,95,108,105,110,101,97,114,83,116,105,102,102,110,101,
+115,115,0,109,95,97,110,103,117,108,97,114,83,116,105,102,102,110,101,115,
+115,0,109,95,118,111,108,117,109,101,83,116,105,102,102,110,101,115,115,0,
+42,109,95,109,97,116,101,114,105,97,108,0,109,95,112,111,115,105,116,105,
+111,110,0,109,95,112,114,101,118,105,111,117,115,80,111,115,105,116,105,111,
+110,0,109,95,118,101,108,111,99,105,116,121,0,109,95,97,99,99,117,109,
+117,108,97,116,101,100,70,111,114,99,101,0,109,95,110,111,114,109,97,108,
+0,109,95,97,114,101,97,0,109,95,97,116,116,97,99,104,0,109,95,110,
+111,100,101,73,110,100,105,99,101,115,91,50,93,0,109,95,114,101,115,116,
+76,101,110,103,116,104,0,109,95,98,98,101,110,100,105,110,103,0,109,95,
+110,111,100,101,73,110,100,105,99,101,115,91,51,93,0,109,95,114,101,115,
+116,65,114,101,97,0,109,95,99,48,91,52,93,0,109,95,110,111,100,101,
+73,110,100,105,99,101,115,91,52,93,0,109,95,114,101,115,116,86,111,108,
+117,109,101,0,109,95,99,49,0,109,95,99,50,0,109,95,99,48,0,109,
+95,108,111,99,97,108,70,114,97,109,101,0,42,109,95,114,105,103,105,100,
+66,111,100,121,0,109,95,110,111,100,101,73,110,100,101,120,0,109,95,97,
+101,114,111,77,111,100,101,108,0,109,95,98,97,117,109,103,97,114,116,101,
+0,109,95,100,114,97,103,0,109,95,108,105,102,116,0,109,95,112,114,101,
+115,115,117,114,101,0,109,95,118,111,108,117,109,101,0,109,95,100,121,110,
+97,109,105,99,70,114,105,99,116,105,111,110,0,109,95,112,111,115,101,77,
+97,116,99,104,0,109,95,114,105,103,105,100,67,111,110,116,97,99,116,72,
+97,114,100,110,101,115,115,0,109,95,107,105,110,101,116,105,99,67,111,110,
+116,97,99,116,72,97,114,100,110,101,115,115,0,109,95,115,111,102,116,67,
+111,110,116,97,99,116,72,97,114,100,110,101,115,115,0,109,95,97,110,99,
+104,111,114,72,97,114,100,110,101,115,115,0,109,95,115,111,102,116,82,105,
+103,105,100,67,108,117,115,116,101,114,72,97,114,100,110,101,115,115,0,109,
+95,115,111,102,116,75,105,110,101,116,105,99,67,108,117,115,116,101,114,72,
+97,114,100,110,101,115,115,0,109,95,115,111,102,116,83,111,102,116,67,108,
+117,115,116,101,114,72,97,114,100,110,101,115,115,0,109,95,115,111,102,116,
+82,105,103,105,100,67,108,117,115,116,101,114,73,109,112,117,108,115,101,83,
+112,108,105,116,0,109,95,115,111,102,116,75,105,110,101,116,105,99,67,108,
+117,115,116,101,114,73,109,112,117,108,115,101,83,112,108,105,116,0,109,95,
+115,111,102,116,83,111,102,116,67,108,117,115,116,101,114,73,109,112,117,108,
+115,101,83,112,108,105,116,0,109,95,109,97,120,86,111,108,117,109,101,0,
+109,95,116,105,109,101,83,99,97,108,101,0,109,95,118,101,108,111,99,105,
+116,121,73,116,101,114,97,116,105,111,110,115,0,109,95,112,111,115,105,116,
+105,111,110,73,116,101,114,97,116,105,111,110,115,0,109,95,100,114,105,102,
+116,73,116,101,114,97,116,105,111,110,115,0,109,95,99,108,117,115,116,101,
+114,73,116,101,114,97,116,105,111,110,115,0,109,95,114,111,116,0,109,95,
+115,99,97,108,101,0,109,95,97,113,113,0,109,95,99,111,109,0,42,109,
+95,112,111,115,105,116,105,111,110,115,0,42,109,95,119,101,105,103,104,116,
+115,0,109,95,110,117,109,80,111,115,105,116,105,111,110,115,0,109,95,110,
+117,109,87,101,105,103,116,115,0,109,95,98,118,111,108,117,109,101,0,109,
+95,98,102,114,97,109,101,0,109,95,102,114,97,109,101,120,102,111,114,109,
+0,109,95,108,111,99,105,105,0,109,95,105,110,118,119,105,0,109,95,118,
+105,109,112,117,108,115,101,115,91,50,93,0,109,95,100,105,109,112,117,108,
+115,101,115,91,50,93,0,109,95,108,118,0,109,95,97,118,0,42,109,95,
+102,114,97,109,101,114,101,102,115,0,42,109,95,110,111,100,101,73,110,100,
+105,99,101,115,0,42,109,95,109,97,115,115,101,115,0,109,95,110,117,109,
+70,114,97,109,101,82,101,102,115,0,109,95,110,117,109,78,111,100,101,115,
+0,109,95,110,117,109,77,97,115,115,101,115,0,109,95,105,100,109,97,115,
+115,0,109,95,105,109,97,115,115,0,109,95,110,118,105,109,112,117,108,115,
+101,115,0,109,95,110,100,105,109,112,117,108,115,101,115,0,109,95,110,100,
+97,109,112,105,110,103,0,109,95,108,100,97,109,112,105,110,103,0,109,95,
+97,100,97,109,112,105,110,103,0,109,95,109,97,116,99,104,105,110,103,0,
+109,95,109,97,120,83,101,108,102,67,111,108,108,105,115,105,111,110,73,109,
+112,117,108,115,101,0,109,95,115,101,108,102,67,111,108,108,105,115,105,111,
+110,73,109,112,117,108,115,101,70,97,99,116,111,114,0,109,95,99,111,110,
+116,97,105,110,115,65,110,99,104,111,114,0,109,95,99,111,108,108,105,100,
+101,0,109,95,99,108,117,115,116,101,114,73,110,100,101,120,0,42,109,95,
+98,111,100,121,65,0,42,109,95,98,111,100,121,66,0,109,95,114,101,102,
+115,91,50,93,0,109,95,99,102,109,0,109,95,101,114,112,0,109,95,115,
+112,108,105,116,0,109,95,100,101,108,101,116,101,0,109,95,114,101,108,80,
+111,115,105,116,105,111,110,91,50,93,0,109,95,98,111,100,121,65,116,121,
+112,101,0,109,95,98,111,100,121,66,116,121,112,101,0,109,95,106,111,105,
+110,116,84,121,112,101,0,42,109,95,112,111,115,101,0,42,42,109,95,109,
+97,116,101,114,105,97,108,115,0,42,109,95,110,111,100,101,115,0,42,109,
+95,108,105,110,107,115,0,42,109,95,102,97,99,101,115,0,42,109,95,116,
+101,116,114,97,104,101,100,114,97,0,42,109,95,97,110,99,104,111,114,115,
+0,42,109,95,99,108,117,115,116,101,114,115,0,42,109,95,106,111,105,110,
+116,115,0,109,95,110,117,109,77,97,116,101,114,105,97,108,115,0,109,95,
+110,117,109,76,105,110,107,115,0,109,95,110,117,109,70,97,99,101,115,0,
+109,95,110,117,109,84,101,116,114,97,104,101,100,114,97,0,109,95,110,117,
+109,65,110,99,104,111,114,115,0,109,95,110,117,109,67,108,117,115,116,101,
+114,115,0,109,95,110,117,109,74,111,105,110,116,115,0,109,95,99,111,110,
+102,105,103,0,84,89,80,69,72,0,0,0,99,104,97,114,0,117,99,104,
+97,114,0,115,104,111,114,116,0,117,115,104,111,114,116,0,105,110,116,0,
+108,111,110,103,0,117,108,111,110,103,0,102,108,111,97,116,0,100,111,117,
+98,108,101,0,118,111,105,100,0,80,111,105,110,116,101,114,65,114,114,97,
+121,0,98,116,80,104,121,115,105,99,115,83,121,115,116,101,109,0,76,105,
+115,116,66,97,115,101,0,98,116,86,101,99,116,111,114,51,70,108,111,97,
+116,68,97,116,97,0,98,116,86,101,99,116,111,114,51,68,111,117,98,108,
+101,68,97,116,97,0,98,116,77,97,116,114,105,120,51,120,51,70,108,111,
+97,116,68,97,116,97,0,98,116,77,97,116,114,105,120,51,120,51,68,111,
+117,98,108,101,68,97,116,97,0,98,116,84,114,97,110,115,102,111,114,109,
+70,108,111,97,116,68,97,116,97,0,98,116,84,114,97,110,115,102,111,114,
+109,68,111,117,98,108,101,68,97,116,97,0,98,116,66,118,104,83,117,98,
+116,114,101,101,73,110,102,111,68,97,116,97,0,98,116,79,112,116,105,109,
+105,122,101,100,66,118,104,78,111,100,101,70,108,111,97,116,68,97,116,97,
+0,98,116,79,112,116,105,109,105,122,101,100,66,118,104,78,111,100,101,68,
+111,117,98,108,101,68,97,116,97,0,98,116,81,117,97,110,116,105,122,101,
+100,66,118,104,78,111,100,101,68,97,116,97,0,98,116,81,117,97,110,116,
+105,122,101,100,66,118,104,70,108,111,97,116,68,97,116,97,0,98,116,81,
+117,97,110,116,105,122,101,100,66,118,104,68,111,117,98,108,101,68,97,116,
+97,0,98,116,67,111,108,108,105,115,105,111,110,83,104,97,112,101,68,97,
+116,97,0,98,116,83,116,97,116,105,99,80,108,97,110,101,83,104,97,112,
+101,68,97,116,97,0,98,116,67,111,110,118,101,120,73,110,116,101,114,110,
+97,108,83,104,97,112,101,68,97,116,97,0,98,116,80,111,115,105,116,105,
+111,110,65,110,100,82,97,100,105,117,115,0,98,116,77,117,108,116,105,83,
+112,104,101,114,101,83,104,97,112,101,68,97,116,97,0,98,116,73,110,116,
+73,110,100,101,120,68,97,116,97,0,98,116,83,104,111,114,116,73,110,116,
+73,110,100,101,120,68,97,116,97,0,98,116,83,104,111,114,116,73,110,116,
+73,110,100,101,120,84,114,105,112,108,101,116,68,97,116,97,0,98,116,67,
+104,97,114,73,110,100,101,120,84,114,105,112,108,101,116,68,97,116,97,0,
+98,116,77,101,115,104,80,97,114,116,68,97,116,97,0,98,116,83,116,114,
+105,100,105,110,103,77,101,115,104,73,110,116,101,114,102,97,99,101,68,97,
+116,97,0,98,116,84,114,105,97,110,103,108,101,77,101,115,104,83,104,97,
+112,101,68,97,116,97,0,98,116,84,114,105,97,110,103,108,101,73,110,102,
+111,77,97,112,68,97,116,97,0,98,116,83,99,97,108,101,100,84,114,105,
+97,110,103,108,101,77,101,115,104,83,104,97,112,101,68,97,116,97,0,98,
+116,67,111,109,112,111,117,110,100,83,104,97,112,101,67,104,105,108,100,68,
+97,116,97,0,98,116,67,111,109,112,111,117,110,100,83,104,97,112,101,68,
+97,116,97,0,98,116,67,121,108,105,110,100,101,114,83,104,97,112,101,68,
+97,116,97,0,98,116,67,97,112,115,117,108,101,83,104,97,112,101,68,97,
+116,97,0,98,116,84,114,105,97,110,103,108,101,73,110,102,111,68,97,116,
+97,0,98,116,71,73,109,112,97,99,116,77,101,115,104,83,104,97,112,101,
+68,97,116,97,0,98,116,67,111,110,118,101,120,72,117,108,108,83,104,97,
+112,101,68,97,116,97,0,98,116,67,111,108,108,105,115,105,111,110,79,98,
+106,101,99,116,68,111,117,98,108,101,68,97,116,97,0,98,116,67,111,108,
+108,105,115,105,111,110,79,98,106,101,99,116,70,108,111,97,116,68,97,116,
+97,0,98,116,82,105,103,105,100,66,111,100,121,70,108,111,97,116,68,97,
+116,97,0,98,116,82,105,103,105,100,66,111,100,121,68,111,117,98,108,101,
+68,97,116,97,0,98,116,67,111,110,115,116,114,97,105,110,116,73,110,102,
+111,49,0,98,116,84,121,112,101,100,67,111,110,115,116,114,97,105,110,116,
+68,97,116,97,0,98,116,82,105,103,105,100,66,111,100,121,68,97,116,97,
+0,98,116,80,111,105,110,116,50,80,111,105,110,116,67,111,110,115,116,114,
+97,105,110,116,70,108,111,97,116,68,97,116,97,0,98,116,80,111,105,110,
+116,50,80,111,105,110,116,67,111,110,115,116,114,97,105,110,116,68,111,117,
+98,108,101,68,97,116,97,0,98,116,72,105,110,103,101,67,111,110,115,116,
+114,97,105,110,116,68,111,117,98,108,101,68,97,116,97,0,98,116,72,105,
+110,103,101,67,111,110,115,116,114,97,105,110,116,70,108,111,97,116,68,97,
+116,97,0,98,116,67,111,110,101,84,119,105,115,116,67,111,110,115,116,114,
+97,105,110,116,68,97,116,97,0,98,116,71,101,110,101,114,105,99,54,68,
+111,102,67,111,110,115,116,114,97,105,110,116,68,97,116,97,0,98,116,71,
+101,110,101,114,105,99,54,68,111,102,83,112,114,105,110,103,67,111,110,115,
+116,114,97,105,110,116,68,97,116,97,0,98,116,83,108,105,100,101,114,67,
+111,110,115,116,114,97,105,110,116,68,97,116,97,0,83,111,102,116,66,111,
+100,121,77,97,116,101,114,105,97,108,68,97,116,97,0,83,111,102,116,66,
+111,100,121,78,111,100,101,68,97,116,97,0,83,111,102,116,66,111,100,121,
+76,105,110,107,68,97,116,97,0,83,111,102,116,66,111,100,121,70,97,99,
+101,68,97,116,97,0,83,111,102,116,66,111,100,121,84,101,116,114,97,68,
+97,116,97,0,83,111,102,116,82,105,103,105,100,65,110,99,104,111,114,68,
+97,116,97,0,83,111,102,116,66,111,100,121,67,111,110,102,105,103,68,97,
+116,97,0,83,111,102,116,66,111,100,121,80,111,115,101,68,97,116,97,0,
+83,111,102,116,66,111,100,121,67,108,117,115,116,101,114,68,97,116,97,0,
+98,116,83,111,102,116,66,111,100,121,74,111,105,110,116,68,97,116,97,0,
+98,116,83,111,102,116,66,111,100,121,70,108,111,97,116,68,97,116,97,0,
+84,76,69,78,1,0,1,0,2,0,2,0,4,0,4,0,4,0,4,0,
+8,0,0,0,12,0,36,0,8,0,16,0,32,0,48,0,96,0,64,0,
+-128,0,20,0,48,0,80,0,16,0,84,0,-124,0,12,0,52,0,52,0,
+20,0,64,0,4,0,4,0,8,0,4,0,32,0,28,0,60,0,56,0,
+76,0,76,0,24,0,60,0,60,0,16,0,64,0,68,0,-56,1,-8,0,
+-32,1,-104,3,8,0,52,0,0,0,84,0,116,0,92,1,-36,0,-44,0,
+-4,0,92,1,-52,0,16,0,100,0,20,0,36,0,100,0,92,0,104,0,
+-64,0,92,1,104,0,-92,1,83,84,82,67,61,0,0,0,10,0,3,0,
+4,0,0,0,4,0,1,0,9,0,2,0,11,0,3,0,10,0,3,0,
+10,0,4,0,10,0,5,0,12,0,2,0,9,0,6,0,9,0,7,0,
+13,0,1,0,7,0,8,0,14,0,1,0,8,0,8,0,15,0,1,0,
+13,0,9,0,16,0,1,0,14,0,9,0,17,0,2,0,15,0,10,0,
+13,0,11,0,18,0,2,0,16,0,10,0,14,0,11,0,19,0,4,0,
+4,0,12,0,4,0,13,0,2,0,14,0,2,0,15,0,20,0,6,0,
+13,0,16,0,13,0,17,0,4,0,18,0,4,0,19,0,4,0,20,0,
+0,0,21,0,21,0,6,0,14,0,16,0,14,0,17,0,4,0,18,0,
+4,0,19,0,4,0,20,0,0,0,21,0,22,0,3,0,2,0,14,0,
+2,0,15,0,4,0,22,0,23,0,12,0,13,0,23,0,13,0,24,0,
+13,0,25,0,4,0,26,0,4,0,27,0,4,0,28,0,4,0,29,0,
+20,0,30,0,22,0,31,0,19,0,32,0,4,0,33,0,4,0,34,0,
+24,0,12,0,14,0,23,0,14,0,24,0,14,0,25,0,4,0,26,0,
+4,0,27,0,4,0,28,0,4,0,29,0,21,0,30,0,22,0,31,0,
+4,0,33,0,4,0,34,0,19,0,32,0,25,0,3,0,0,0,35,0,
+4,0,36,0,0,0,37,0,26,0,5,0,25,0,38,0,13,0,39,0,
+13,0,40,0,7,0,41,0,0,0,21,0,27,0,5,0,25,0,38,0,
+13,0,39,0,13,0,42,0,7,0,43,0,4,0,44,0,28,0,2,0,
+13,0,45,0,7,0,46,0,29,0,4,0,27,0,47,0,28,0,48,0,
+4,0,49,0,0,0,37,0,30,0,1,0,4,0,50,0,31,0,2,0,
+2,0,50,0,0,0,51,0,32,0,2,0,2,0,52,0,0,0,51,0,
+33,0,2,0,0,0,52,0,0,0,53,0,34,0,8,0,13,0,54,0,
+14,0,55,0,30,0,56,0,32,0,57,0,33,0,58,0,31,0,59,0,
+4,0,60,0,4,0,61,0,35,0,4,0,34,0,62,0,13,0,63,0,
+4,0,64,0,0,0,37,0,36,0,7,0,25,0,38,0,35,0,65,0,
+23,0,66,0,24,0,67,0,37,0,68,0,7,0,43,0,0,0,69,0,
+38,0,2,0,36,0,70,0,13,0,39,0,39,0,4,0,17,0,71,0,
+25,0,72,0,4,0,73,0,7,0,74,0,40,0,4,0,25,0,38,0,
+39,0,75,0,4,0,76,0,7,0,43,0,41,0,3,0,27,0,47,0,
+4,0,77,0,0,0,37,0,42,0,3,0,27,0,47,0,4,0,77,0,
+0,0,37,0,43,0,4,0,4,0,78,0,7,0,79,0,7,0,80,0,
+7,0,81,0,37,0,14,0,4,0,82,0,4,0,83,0,43,0,84,0,
+4,0,85,0,7,0,86,0,7,0,87,0,7,0,88,0,7,0,89,0,
+7,0,90,0,4,0,91,0,4,0,92,0,4,0,93,0,4,0,94,0,
+0,0,37,0,44,0,5,0,25,0,38,0,35,0,65,0,13,0,39,0,
+7,0,43,0,4,0,95,0,45,0,5,0,27,0,47,0,13,0,96,0,
+14,0,97,0,4,0,98,0,0,0,99,0,46,0,24,0,9,0,100,0,
+9,0,101,0,25,0,102,0,0,0,35,0,18,0,103,0,18,0,104,0,
+14,0,105,0,14,0,106,0,14,0,107,0,8,0,108,0,8,0,109,0,
+8,0,110,0,8,0,111,0,8,0,112,0,8,0,113,0,8,0,114,0,
+4,0,115,0,4,0,116,0,4,0,117,0,4,0,118,0,4,0,119,0,
+4,0,120,0,4,0,121,0,0,0,37,0,47,0,23,0,9,0,100,0,
+9,0,101,0,25,0,102,0,0,0,35,0,17,0,103,0,17,0,104,0,
+13,0,105,0,13,0,106,0,13,0,107,0,7,0,108,0,7,0,109,0,
+7,0,110,0,7,0,111,0,7,0,112,0,7,0,113,0,7,0,114,0,
+4,0,115,0,4,0,116,0,4,0,117,0,4,0,118,0,4,0,119,0,
+4,0,120,0,4,0,121,0,48,0,21,0,47,0,122,0,15,0,123,0,
+13,0,124,0,13,0,125,0,13,0,126,0,13,0,127,0,13,0,-128,0,
+13,0,-127,0,13,0,-126,0,13,0,-125,0,13,0,-124,0,7,0,-123,0,
+7,0,-122,0,7,0,-121,0,7,0,-120,0,7,0,-119,0,7,0,-118,0,
+7,0,-117,0,7,0,-116,0,7,0,-115,0,4,0,-114,0,49,0,22,0,
+46,0,122,0,16,0,123,0,14,0,124,0,14,0,125,0,14,0,126,0,
+14,0,127,0,14,0,-128,0,14,0,-127,0,14,0,-126,0,14,0,-125,0,
+14,0,-124,0,8,0,-123,0,8,0,-122,0,8,0,-121,0,8,0,-120,0,
+8,0,-119,0,8,0,-118,0,8,0,-117,0,8,0,-116,0,8,0,-115,0,
+4,0,-114,0,0,0,37,0,50,0,2,0,4,0,-113,0,4,0,-112,0,
+51,0,13,0,52,0,-111,0,52,0,-110,0,0,0,35,0,4,0,-109,0,
+4,0,-108,0,4,0,-107,0,4,0,-106,0,7,0,-105,0,7,0,-104,0,
+4,0,-103,0,4,0,-102,0,7,0,-101,0,4,0,-100,0,53,0,3,0,
+51,0,-99,0,13,0,-98,0,13,0,-97,0,54,0,3,0,51,0,-99,0,
+14,0,-98,0,14,0,-97,0,55,0,13,0,51,0,-99,0,18,0,-96,0,
+18,0,-95,0,4,0,-94,0,4,0,-93,0,4,0,-92,0,7,0,-91,0,
+7,0,-90,0,7,0,-89,0,7,0,-88,0,7,0,-87,0,7,0,-86,0,
+7,0,-85,0,56,0,13,0,51,0,-99,0,17,0,-96,0,17,0,-95,0,
+4,0,-94,0,4,0,-93,0,4,0,-92,0,7,0,-91,0,7,0,-90,0,
+7,0,-89,0,7,0,-88,0,7,0,-87,0,7,0,-86,0,7,0,-85,0,
+57,0,11,0,51,0,-99,0,17,0,-96,0,17,0,-95,0,7,0,-84,0,
+7,0,-83,0,7,0,-82,0,7,0,-87,0,7,0,-86,0,7,0,-85,0,
+7,0,-81,0,0,0,21,0,58,0,9,0,51,0,-99,0,17,0,-96,0,
+17,0,-95,0,13,0,-80,0,13,0,-79,0,13,0,-78,0,13,0,-77,0,
+4,0,-76,0,4,0,-75,0,59,0,5,0,58,0,-74,0,4,0,-73,0,
+7,0,-72,0,7,0,-71,0,7,0,-70,0,60,0,9,0,51,0,-99,0,
+17,0,-96,0,17,0,-95,0,7,0,-80,0,7,0,-79,0,7,0,-78,0,
+7,0,-77,0,4,0,-76,0,4,0,-75,0,61,0,4,0,7,0,-69,0,
+7,0,-68,0,7,0,-67,0,4,0,78,0,62,0,10,0,61,0,-66,0,
+13,0,-65,0,13,0,-64,0,13,0,-63,0,13,0,-62,0,13,0,-61,0,
+7,0,-123,0,7,0,-60,0,4,0,-59,0,4,0,53,0,63,0,4,0,
+61,0,-66,0,4,0,-58,0,7,0,-57,0,4,0,-56,0,64,0,4,0,
+13,0,-61,0,61,0,-66,0,4,0,-55,0,7,0,-54,0,65,0,7,0,
+13,0,-53,0,61,0,-66,0,4,0,-52,0,7,0,-51,0,7,0,-50,0,
+7,0,-49,0,4,0,53,0,66,0,6,0,15,0,-48,0,13,0,-50,0,
+13,0,-47,0,52,0,-46,0,4,0,-45,0,7,0,-49,0,67,0,26,0,
+4,0,-44,0,7,0,-43,0,7,0,-81,0,7,0,-42,0,7,0,-41,0,
+7,0,-40,0,7,0,-39,0,7,0,-38,0,7,0,-37,0,7,0,-36,0,
+7,0,-35,0,7,0,-34,0,7,0,-33,0,7,0,-32,0,7,0,-31,0,
+7,0,-30,0,7,0,-29,0,7,0,-28,0,7,0,-27,0,7,0,-26,0,
+7,0,-25,0,4,0,-24,0,4,0,-23,0,4,0,-22,0,4,0,-21,0,
+4,0,116,0,68,0,12,0,15,0,-20,0,15,0,-19,0,15,0,-18,0,
+13,0,-17,0,13,0,-16,0,7,0,-15,0,4,0,-14,0,4,0,-13,0,
+4,0,-12,0,4,0,-11,0,7,0,-51,0,4,0,53,0,69,0,27,0,
+17,0,-10,0,15,0,-9,0,15,0,-8,0,13,0,-17,0,13,0,-7,0,
+13,0,-6,0,13,0,-5,0,13,0,-4,0,13,0,-3,0,4,0,-2,0,
+7,0,-1,0,4,0,0,1,4,0,1,1,4,0,2,1,7,0,3,1,
+7,0,4,1,4,0,5,1,4,0,6,1,7,0,7,1,7,0,8,1,
+7,0,9,1,7,0,10,1,7,0,11,1,7,0,12,1,4,0,13,1,
+4,0,14,1,4,0,15,1,70,0,12,0,9,0,16,1,9,0,17,1,
+13,0,18,1,7,0,19,1,7,0,20,1,7,0,21,1,4,0,22,1,
+13,0,23,1,4,0,24,1,4,0,25,1,4,0,26,1,4,0,53,0,
+71,0,19,0,47,0,122,0,68,0,27,1,61,0,28,1,62,0,29,1,
+63,0,30,1,64,0,31,1,65,0,32,1,66,0,33,1,69,0,34,1,
+70,0,35,1,4,0,36,1,4,0,1,1,4,0,37,1,4,0,38,1,
+4,0,39,1,4,0,40,1,4,0,41,1,4,0,42,1,67,0,43,1,
+};
+int sBulletDNAlen= sizeof(sBulletDNAstr);
+
+ char sBulletDNAstr64[]= {
+83,68,78,65,78,65,77,69,44,1,0,0,109,95,115,105,122,101,0,109,
+95,99,97,112,97,99,105,116,121,0,42,109,95,100,97,116,97,0,109,95,
+99,111,108,108,105,115,105,111,110,83,104,97,112,101,115,0,109,95,99,111,
+108,108,105,115,105,111,110,79,98,106,101,99,116,115,0,109,95,99,111,110,
+115,116,114,97,105,110,116,115,0,42,102,105,114,115,116,0,42,108,97,115,
+116,0,109,95,102,108,111,97,116,115,91,52,93,0,109,95,101,108,91,51,
+93,0,109,95,98,97,115,105,115,0,109,95,111,114,105,103,105,110,0,109,
+95,114,111,111,116,78,111,100,101,73,110,100,101,120,0,109,95,115,117,98,
+116,114,101,101,83,105,122,101,0,109,95,113,117,97,110,116,105,122,101,100,
+65,97,98,98,77,105,110,91,51,93,0,109,95,113,117,97,110,116,105,122,
+101,100,65,97,98,98,77,97,120,91,51,93,0,109,95,97,97,98,98,77,
+105,110,79,114,103,0,109,95,97,97,98,98,77,97,120,79,114,103,0,109,
+95,101,115,99,97,112,101,73,110,100,101,120,0,109,95,115,117,98,80,97,
+114,116,0,109,95,116,114,105,97,110,103,108,101,73,110,100,101,120,0,109,
+95,112,97,100,91,52,93,0,109,95,101,115,99,97,112,101,73,110,100,101,
+120,79,114,84,114,105,97,110,103,108,101,73,110,100,101,120,0,109,95,98,
+118,104,65,97,98,98,77,105,110,0,109,95,98,118,104,65,97,98,98,77,
+97,120,0,109,95,98,118,104,81,117,97,110,116,105,122,97,116,105,111,110,
+0,109,95,99,117,114,78,111,100,101,73,110,100,101,120,0,109,95,117,115,
+101,81,117,97,110,116,105,122,97,116,105,111,110,0,109,95,110,117,109,67,
+111,110,116,105,103,117,111,117,115,76,101,97,102,78,111,100,101,115,0,109,
+95,110,117,109,81,117,97,110,116,105,122,101,100,67,111,110,116,105,103,117,
+111,117,115,78,111,100,101,115,0,42,109,95,99,111,110,116,105,103,117,111,
+117,115,78,111,100,101,115,80,116,114,0,42,109,95,113,117,97,110,116,105,
+122,101,100,67,111,110,116,105,103,117,111,117,115,78,111,100,101,115,80,116,
+114,0,42,109,95,115,117,98,84,114,101,101,73,110,102,111,80,116,114,0,
+109,95,116,114,97,118,101,114,115,97,108,77,111,100,101,0,109,95,110,117,
+109,83,117,98,116,114,101,101,72,101,97,100,101,114,115,0,42,109,95,110,
+97,109,101,0,109,95,115,104,97,112,101,84,121,112,101,0,109,95,112,97,
+100,100,105,110,103,91,52,93,0,109,95,99,111,108,108,105,115,105,111,110,
+83,104,97,112,101,68,97,116,97,0,109,95,108,111,99,97,108,83,99,97,
+108,105,110,103,0,109,95,112,108,97,110,101,78,111,114,109,97,108,0,109,
+95,112,108,97,110,101,67,111,110,115,116,97,110,116,0,109,95,105,109,112,
+108,105,99,105,116,83,104,97,112,101,68,105,109,101,110,115,105,111,110,115,
+0,109,95,99,111,108,108,105,115,105,111,110,77,97,114,103,105,110,0,109,
+95,112,97,100,100,105,110,103,0,109,95,112,111,115,0,109,95,114,97,100,
+105,117,115,0,109,95,99,111,110,118,101,120,73,110,116,101,114,110,97,108,
+83,104,97,112,101,68,97,116,97,0,42,109,95,108,111,99,97,108,80,111,
+115,105,116,105,111,110,65,114,114,97,121,80,116,114,0,109,95,108,111,99,
+97,108,80,111,115,105,116,105,111,110,65,114,114,97,121,83,105,122,101,0,
+109,95,118,97,108,117,101,0,109,95,112,97,100,91,50,93,0,109,95,118,
+97,108,117,101,115,91,51,93,0,109,95,112,97,100,0,42,109,95,118,101,
+114,116,105,99,101,115,51,102,0,42,109,95,118,101,114,116,105,99,101,115,
+51,100,0,42,109,95,105,110,100,105,99,101,115,51,50,0,42,109,95,51,
+105,110,100,105,99,101,115,49,54,0,42,109,95,51,105,110,100,105,99,101,
+115,56,0,42,109,95,105,110,100,105,99,101,115,49,54,0,109,95,110,117,
+109,84,114,105,97,110,103,108,101,115,0,109,95,110,117,109,86,101,114,116,
+105,99,101,115,0,42,109,95,109,101,115,104,80,97,114,116,115,80,116,114,
+0,109,95,115,99,97,108,105,110,103,0,109,95,110,117,109,77,101,115,104,
+80,97,114,116,115,0,109,95,109,101,115,104,73,110,116,101,114,102,97,99,
+101,0,42,109,95,113,117,97,110,116,105,122,101,100,70,108,111,97,116,66,
+118,104,0,42,109,95,113,117,97,110,116,105,122,101,100,68,111,117,98,108,
+101,66,118,104,0,42,109,95,116,114,105,97,110,103,108,101,73,110,102,111,
+77,97,112,0,109,95,112,97,100,51,91,52,93,0,109,95,116,114,105,109,
+101,115,104,83,104,97,112,101,68,97,116,97,0,109,95,116,114,97,110,115,
+102,111,114,109,0,42,109,95,99,104,105,108,100,83,104,97,112,101,0,109,
+95,99,104,105,108,100,83,104,97,112,101,84,121,112,101,0,109,95,99,104,
+105,108,100,77,97,114,103,105,110,0,42,109,95,99,104,105,108,100,83,104,
+97,112,101,80,116,114,0,109,95,110,117,109,67,104,105,108,100,83,104,97,
+112,101,115,0,109,95,117,112,65,120,105,115,0,109,95,102,108,97,103,115,
+0,109,95,101,100,103,101,86,48,86,49,65,110,103,108,101,0,109,95,101,
+100,103,101,86,49,86,50,65,110,103,108,101,0,109,95,101,100,103,101,86,
+50,86,48,65,110,103,108,101,0,42,109,95,104,97,115,104,84,97,98,108,
+101,80,116,114,0,42,109,95,110,101,120,116,80,116,114,0,42,109,95,118,
+97,108,117,101,65,114,114,97,121,80,116,114,0,42,109,95,107,101,121,65,
+114,114,97,121,80,116,114,0,109,95,99,111,110,118,101,120,69,112,115,105,
+108,111,110,0,109,95,112,108,97,110,97,114,69,112,115,105,108,111,110,0,
+109,95,101,113,117,97,108,86,101,114,116,101,120,84,104,114,101,115,104,111,
+108,100,0,109,95,101,100,103,101,68,105,115,116,97,110,99,101,84,104,114,
+101,115,104,111,108,100,0,109,95,122,101,114,111,65,114,101,97,84,104,114,
+101,115,104,111,108,100,0,109,95,110,101,120,116,83,105,122,101,0,109,95,
+104,97,115,104,84,97,98,108,101,83,105,122,101,0,109,95,110,117,109,86,
+97,108,117,101,115,0,109,95,110,117,109,75,101,121,115,0,109,95,103,105,
+109,112,97,99,116,83,117,98,84,121,112,101,0,42,109,95,117,110,115,99,
+97,108,101,100,80,111,105,110,116,115,70,108,111,97,116,80,116,114,0,42,
+109,95,117,110,115,99,97,108,101,100,80,111,105,110,116,115,68,111,117,98,
+108,101,80,116,114,0,109,95,110,117,109,85,110,115,99,97,108,101,100,80,
+111,105,110,116,115,0,109,95,112,97,100,100,105,110,103,51,91,52,93,0,
+42,109,95,98,114,111,97,100,112,104,97,115,101,72,97,110,100,108,101,0,
+42,109,95,99,111,108,108,105,115,105,111,110,83,104,97,112,101,0,42,109,
+95,114,111,111,116,67,111,108,108,105,115,105,111,110,83,104,97,112,101,0,
+109,95,119,111,114,108,100,84,114,97,110,115,102,111,114,109,0,109,95,105,
+110,116,101,114,112,111,108,97,116,105,111,110,87,111,114,108,100,84,114,97,
+110,115,102,111,114,109,0,109,95,105,110,116,101,114,112,111,108,97,116,105,
+111,110,76,105,110,101,97,114,86,101,108,111,99,105,116,121,0,109,95,105,
+110,116,101,114,112,111,108,97,116,105,111,110,65,110,103,117,108,97,114,86,
+101,108,111,99,105,116,121,0,109,95,97,110,105,115,111,116,114,111,112,105,
+99,70,114,105,99,116,105,111,110,0,109,95,99,111,110,116,97,99,116,80,
+114,111,99,101,115,115,105,110,103,84,104,114,101,115,104,111,108,100,0,109,
+95,100,101,97,99,116,105,118,97,116,105,111,110,84,105,109,101,0,109,95,
+102,114,105,99,116,105,111,110,0,109,95,114,101,115,116,105,116,117,116,105,
+111,110,0,109,95,104,105,116,70,114,97,99,116,105,111,110,0,109,95,99,
+99,100,83,119,101,112,116,83,112,104,101,114,101,82,97,100,105,117,115,0,
+109,95,99,99,100,77,111,116,105,111,110,84,104,114,101,115,104,111,108,100,
+0,109,95,104,97,115,65,110,105,115,111,116,114,111,112,105,99,70,114,105,
+99,116,105,111,110,0,109,95,99,111,108,108,105,115,105,111,110,70,108,97,
+103,115,0,109,95,105,115,108,97,110,100,84,97,103,49,0,109,95,99,111,
+109,112,97,110,105,111,110,73,100,0,109,95,97,99,116,105,118,97,116,105,
+111,110,83,116,97,116,101,49,0,109,95,105,110,116,101,114,110,97,108,84,
+121,112,101,0,109,95,99,104,101,99,107,67,111,108,108,105,100,101,87,105,
+116,104,0,109,95,99,111,108,108,105,115,105,111,110,79,98,106,101,99,116,
+68,97,116,97,0,109,95,105,110,118,73,110,101,114,116,105,97,84,101,110,
+115,111,114,87,111,114,108,100,0,109,95,108,105,110,101,97,114,86,101,108,
+111,99,105,116,121,0,109,95,97,110,103,117,108,97,114,86,101,108,111,99,
+105,116,121,0,109,95,97,110,103,117,108,97,114,70,97,99,116,111,114,0,
+109,95,108,105,110,101,97,114,70,97,99,116,111,114,0,109,95,103,114,97,
+118,105,116,121,0,109,95,103,114,97,118,105,116,121,95,97,99,99,101,108,
+101,114,97,116,105,111,110,0,109,95,105,110,118,73,110,101,114,116,105,97,
+76,111,99,97,108,0,109,95,116,111,116,97,108,70,111,114,99,101,0,109,
+95,116,111,116,97,108,84,111,114,113,117,101,0,109,95,105,110,118,101,114,
+115,101,77,97,115,115,0,109,95,108,105,110,101,97,114,68,97,109,112,105,
+110,103,0,109,95,97,110,103,117,108,97,114,68,97,109,112,105,110,103,0,
+109,95,97,100,100,105,116,105,111,110,97,108,68,97,109,112,105,110,103,70,
+97,99,116,111,114,0,109,95,97,100,100,105,116,105,111,110,97,108,76,105,
+110,101,97,114,68,97,109,112,105,110,103,84,104,114,101,115,104,111,108,100,
+83,113,114,0,109,95,97,100,100,105,116,105,111,110,97,108,65,110,103,117,
+108,97,114,68,97,109,112,105,110,103,84,104,114,101,115,104,111,108,100,83,
+113,114,0,109,95,97,100,100,105,116,105,111,110,97,108,65,110,103,117,108,
+97,114,68,97,109,112,105,110,103,70,97,99,116,111,114,0,109,95,108,105,
+110,101,97,114,83,108,101,101,112,105,110,103,84,104,114,101,115,104,111,108,
+100,0,109,95,97,110,103,117,108,97,114,83,108,101,101,112,105,110,103,84,
+104,114,101,115,104,111,108,100,0,109,95,97,100,100,105,116,105,111,110,97,
+108,68,97,109,112,105,110,103,0,109,95,110,117,109,67,111,110,115,116,114,
+97,105,110,116,82,111,119,115,0,110,117,98,0,42,109,95,114,98,65,0,
+42,109,95,114,98,66,0,109,95,111,98,106,101,99,116,84,121,112,101,0,
+109,95,117,115,101,114,67,111,110,115,116,114,97,105,110,116,84,121,112,101,
+0,109,95,117,115,101,114,67,111,110,115,116,114,97,105,110,116,73,100,0,
+109,95,110,101,101,100,115,70,101,101,100,98,97,99,107,0,109,95,97,112,
+112,108,105,101,100,73,109,112,117,108,115,101,0,109,95,100,98,103,68,114,
+97,119,83,105,122,101,0,109,95,100,105,115,97,98,108,101,67,111,108,108,
+105,115,105,111,110,115,66,101,116,119,101,101,110,76,105,110,107,101,100,66,
+111,100,105,101,115,0,109,95,111,118,101,114,114,105,100,101,78,117,109,83,
+111,108,118,101,114,73,116,101,114,97,116,105,111,110,115,0,109,95,98,114,
+101,97,107,105,110,103,73,109,112,117,108,115,101,84,104,114,101,115,104,111,
+108,100,0,109,95,105,115,69,110,97,98,108,101,100,0,109,95,116,121,112,
+101,67,111,110,115,116,114,97,105,110,116,68,97,116,97,0,109,95,112,105,
+118,111,116,73,110,65,0,109,95,112,105,118,111,116,73,110,66,0,109,95,
+114,98,65,70,114,97,109,101,0,109,95,114,98,66,70,114,97,109,101,0,
+109,95,117,115,101,82,101,102,101,114,101,110,99,101,70,114,97,109,101,65,
+0,109,95,97,110,103,117,108,97,114,79,110,108,121,0,109,95,101,110,97,
+98,108,101,65,110,103,117,108,97,114,77,111,116,111,114,0,109,95,109,111,
+116,111,114,84,97,114,103,101,116,86,101,108,111,99,105,116,121,0,109,95,
+109,97,120,77,111,116,111,114,73,109,112,117,108,115,101,0,109,95,108,111,
+119,101,114,76,105,109,105,116,0,109,95,117,112,112,101,114,76,105,109,105,
+116,0,109,95,108,105,109,105,116,83,111,102,116,110,101,115,115,0,109,95,
+98,105,97,115,70,97,99,116,111,114,0,109,95,114,101,108,97,120,97,116,
+105,111,110,70,97,99,116,111,114,0,109,95,115,119,105,110,103,83,112,97,
+110,49,0,109,95,115,119,105,110,103,83,112,97,110,50,0,109,95,116,119,
+105,115,116,83,112,97,110,0,109,95,100,97,109,112,105,110,103,0,109,95,
+108,105,110,101,97,114,85,112,112,101,114,76,105,109,105,116,0,109,95,108,
+105,110,101,97,114,76,111,119,101,114,76,105,109,105,116,0,109,95,97,110,
+103,117,108,97,114,85,112,112,101,114,76,105,109,105,116,0,109,95,97,110,
+103,117,108,97,114,76,111,119,101,114,76,105,109,105,116,0,109,95,117,115,
+101,76,105,110,101,97,114,82,101,102,101,114,101,110,99,101,70,114,97,109,
+101,65,0,109,95,117,115,101,79,102,102,115,101,116,70,111,114,67,111,110,
+115,116,114,97,105,110,116,70,114,97,109,101,0,109,95,54,100,111,102,68,
+97,116,97,0,109,95,115,112,114,105,110,103,69,110,97,98,108,101,100,91,
+54,93,0,109,95,101,113,117,105,108,105,98,114,105,117,109,80,111,105,110,
+116,91,54,93,0,109,95,115,112,114,105,110,103,83,116,105,102,102,110,101,
+115,115,91,54,93,0,109,95,115,112,114,105,110,103,68,97,109,112,105,110,
+103,91,54,93,0,109,95,108,105,110,101,97,114,83,116,105,102,102,110,101,
+115,115,0,109,95,97,110,103,117,108,97,114,83,116,105,102,102,110,101,115,
+115,0,109,95,118,111,108,117,109,101,83,116,105,102,102,110,101,115,115,0,
+42,109,95,109,97,116,101,114,105,97,108,0,109,95,112,111,115,105,116,105,
+111,110,0,109,95,112,114,101,118,105,111,117,115,80,111,115,105,116,105,111,
+110,0,109,95,118,101,108,111,99,105,116,121,0,109,95,97,99,99,117,109,
+117,108,97,116,101,100,70,111,114,99,101,0,109,95,110,111,114,109,97,108,
+0,109,95,97,114,101,97,0,109,95,97,116,116,97,99,104,0,109,95,110,
+111,100,101,73,110,100,105,99,101,115,91,50,93,0,109,95,114,101,115,116,
+76,101,110,103,116,104,0,109,95,98,98,101,110,100,105,110,103,0,109,95,
+110,111,100,101,73,110,100,105,99,101,115,91,51,93,0,109,95,114,101,115,
+116,65,114,101,97,0,109,95,99,48,91,52,93,0,109,95,110,111,100,101,
+73,110,100,105,99,101,115,91,52,93,0,109,95,114,101,115,116,86,111,108,
+117,109,101,0,109,95,99,49,0,109,95,99,50,0,109,95,99,48,0,109,
+95,108,111,99,97,108,70,114,97,109,101,0,42,109,95,114,105,103,105,100,
+66,111,100,121,0,109,95,110,111,100,101,73,110,100,101,120,0,109,95,97,
+101,114,111,77,111,100,101,108,0,109,95,98,97,117,109,103,97,114,116,101,
+0,109,95,100,114,97,103,0,109,95,108,105,102,116,0,109,95,112,114,101,
+115,115,117,114,101,0,109,95,118,111,108,117,109,101,0,109,95,100,121,110,
+97,109,105,99,70,114,105,99,116,105,111,110,0,109,95,112,111,115,101,77,
+97,116,99,104,0,109,95,114,105,103,105,100,67,111,110,116,97,99,116,72,
+97,114,100,110,101,115,115,0,109,95,107,105,110,101,116,105,99,67,111,110,
+116,97,99,116,72,97,114,100,110,101,115,115,0,109,95,115,111,102,116,67,
+111,110,116,97,99,116,72,97,114,100,110,101,115,115,0,109,95,97,110,99,
+104,111,114,72,97,114,100,110,101,115,115,0,109,95,115,111,102,116,82,105,
+103,105,100,67,108,117,115,116,101,114,72,97,114,100,110,101,115,115,0,109,
+95,115,111,102,116,75,105,110,101,116,105,99,67,108,117,115,116,101,114,72,
+97,114,100,110,101,115,115,0,109,95,115,111,102,116,83,111,102,116,67,108,
+117,115,116,101,114,72,97,114,100,110,101,115,115,0,109,95,115,111,102,116,
+82,105,103,105,100,67,108,117,115,116,101,114,73,109,112,117,108,115,101,83,
+112,108,105,116,0,109,95,115,111,102,116,75,105,110,101,116,105,99,67,108,
+117,115,116,101,114,73,109,112,117,108,115,101,83,112,108,105,116,0,109,95,
+115,111,102,116,83,111,102,116,67,108,117,115,116,101,114,73,109,112,117,108,
+115,101,83,112,108,105,116,0,109,95,109,97,120,86,111,108,117,109,101,0,
+109,95,116,105,109,101,83,99,97,108,101,0,109,95,118,101,108,111,99,105,
+116,121,73,116,101,114,97,116,105,111,110,115,0,109,95,112,111,115,105,116,
+105,111,110,73,116,101,114,97,116,105,111,110,115,0,109,95,100,114,105,102,
+116,73,116,101,114,97,116,105,111,110,115,0,109,95,99,108,117,115,116,101,
+114,73,116,101,114,97,116,105,111,110,115,0,109,95,114,111,116,0,109,95,
+115,99,97,108,101,0,109,95,97,113,113,0,109,95,99,111,109,0,42,109,
+95,112,111,115,105,116,105,111,110,115,0,42,109,95,119,101,105,103,104,116,
+115,0,109,95,110,117,109,80,111,115,105,116,105,111,110,115,0,109,95,110,
+117,109,87,101,105,103,116,115,0,109,95,98,118,111,108,117,109,101,0,109,
+95,98,102,114,97,109,101,0,109,95,102,114,97,109,101,120,102,111,114,109,
+0,109,95,108,111,99,105,105,0,109,95,105,110,118,119,105,0,109,95,118,
+105,109,112,117,108,115,101,115,91,50,93,0,109,95,100,105,109,112,117,108,
+115,101,115,91,50,93,0,109,95,108,118,0,109,95,97,118,0,42,109,95,
+102,114,97,109,101,114,101,102,115,0,42,109,95,110,111,100,101,73,110,100,
+105,99,101,115,0,42,109,95,109,97,115,115,101,115,0,109,95,110,117,109,
+70,114,97,109,101,82,101,102,115,0,109,95,110,117,109,78,111,100,101,115,
+0,109,95,110,117,109,77,97,115,115,101,115,0,109,95,105,100,109,97,115,
+115,0,109,95,105,109,97,115,115,0,109,95,110,118,105,109,112,117,108,115,
+101,115,0,109,95,110,100,105,109,112,117,108,115,101,115,0,109,95,110,100,
+97,109,112,105,110,103,0,109,95,108,100,97,109,112,105,110,103,0,109,95,
+97,100,97,109,112,105,110,103,0,109,95,109,97,116,99,104,105,110,103,0,
+109,95,109,97,120,83,101,108,102,67,111,108,108,105,115,105,111,110,73,109,
+112,117,108,115,101,0,109,95,115,101,108,102,67,111,108,108,105,115,105,111,
+110,73,109,112,117,108,115,101,70,97,99,116,111,114,0,109,95,99,111,110,
+116,97,105,110,115,65,110,99,104,111,114,0,109,95,99,111,108,108,105,100,
+101,0,109,95,99,108,117,115,116,101,114,73,110,100,101,120,0,42,109,95,
+98,111,100,121,65,0,42,109,95,98,111,100,121,66,0,109,95,114,101,102,
+115,91,50,93,0,109,95,99,102,109,0,109,95,101,114,112,0,109,95,115,
+112,108,105,116,0,109,95,100,101,108,101,116,101,0,109,95,114,101,108,80,
+111,115,105,116,105,111,110,91,50,93,0,109,95,98,111,100,121,65,116,121,
+112,101,0,109,95,98,111,100,121,66,116,121,112,101,0,109,95,106,111,105,
+110,116,84,121,112,101,0,42,109,95,112,111,115,101,0,42,42,109,95,109,
+97,116,101,114,105,97,108,115,0,42,109,95,110,111,100,101,115,0,42,109,
+95,108,105,110,107,115,0,42,109,95,102,97,99,101,115,0,42,109,95,116,
+101,116,114,97,104,101,100,114,97,0,42,109,95,97,110,99,104,111,114,115,
+0,42,109,95,99,108,117,115,116,101,114,115,0,42,109,95,106,111,105,110,
+116,115,0,109,95,110,117,109,77,97,116,101,114,105,97,108,115,0,109,95,
+110,117,109,76,105,110,107,115,0,109,95,110,117,109,70,97,99,101,115,0,
+109,95,110,117,109,84,101,116,114,97,104,101,100,114,97,0,109,95,110,117,
+109,65,110,99,104,111,114,115,0,109,95,110,117,109,67,108,117,115,116,101,
+114,115,0,109,95,110,117,109,74,111,105,110,116,115,0,109,95,99,111,110,
+102,105,103,0,84,89,80,69,72,0,0,0,99,104,97,114,0,117,99,104,
+97,114,0,115,104,111,114,116,0,117,115,104,111,114,116,0,105,110,116,0,
+108,111,110,103,0,117,108,111,110,103,0,102,108,111,97,116,0,100,111,117,
+98,108,101,0,118,111,105,100,0,80,111,105,110,116,101,114,65,114,114,97,
+121,0,98,116,80,104,121,115,105,99,115,83,121,115,116,101,109,0,76,105,
+115,116,66,97,115,101,0,98,116,86,101,99,116,111,114,51,70,108,111,97,
+116,68,97,116,97,0,98,116,86,101,99,116,111,114,51,68,111,117,98,108,
+101,68,97,116,97,0,98,116,77,97,116,114,105,120,51,120,51,70,108,111,
+97,116,68,97,116,97,0,98,116,77,97,116,114,105,120,51,120,51,68,111,
+117,98,108,101,68,97,116,97,0,98,116,84,114,97,110,115,102,111,114,109,
+70,108,111,97,116,68,97,116,97,0,98,116,84,114,97,110,115,102,111,114,
+109,68,111,117,98,108,101,68,97,116,97,0,98,116,66,118,104,83,117,98,
+116,114,101,101,73,110,102,111,68,97,116,97,0,98,116,79,112,116,105,109,
+105,122,101,100,66,118,104,78,111,100,101,70,108,111,97,116,68,97,116,97,
+0,98,116,79,112,116,105,109,105,122,101,100,66,118,104,78,111,100,101,68,
+111,117,98,108,101,68,97,116,97,0,98,116,81,117,97,110,116,105,122,101,
+100,66,118,104,78,111,100,101,68,97,116,97,0,98,116,81,117,97,110,116,
+105,122,101,100,66,118,104,70,108,111,97,116,68,97,116,97,0,98,116,81,
+117,97,110,116,105,122,101,100,66,118,104,68,111,117,98,108,101,68,97,116,
+97,0,98,116,67,111,108,108,105,115,105,111,110,83,104,97,112,101,68,97,
+116,97,0,98,116,83,116,97,116,105,99,80,108,97,110,101,83,104,97,112,
+101,68,97,116,97,0,98,116,67,111,110,118,101,120,73,110,116,101,114,110,
+97,108,83,104,97,112,101,68,97,116,97,0,98,116,80,111,115,105,116,105,
+111,110,65,110,100,82,97,100,105,117,115,0,98,116,77,117,108,116,105,83,
+112,104,101,114,101,83,104,97,112,101,68,97,116,97,0,98,116,73,110,116,
+73,110,100,101,120,68,97,116,97,0,98,116,83,104,111,114,116,73,110,116,
+73,110,100,101,120,68,97,116,97,0,98,116,83,104,111,114,116,73,110,116,
+73,110,100,101,120,84,114,105,112,108,101,116,68,97,116,97,0,98,116,67,
+104,97,114,73,110,100,101,120,84,114,105,112,108,101,116,68,97,116,97,0,
+98,116,77,101,115,104,80,97,114,116,68,97,116,97,0,98,116,83,116,114,
+105,100,105,110,103,77,101,115,104,73,110,116,101,114,102,97,99,101,68,97,
+116,97,0,98,116,84,114,105,97,110,103,108,101,77,101,115,104,83,104,97,
+112,101,68,97,116,97,0,98,116,84,114,105,97,110,103,108,101,73,110,102,
+111,77,97,112,68,97,116,97,0,98,116,83,99,97,108,101,100,84,114,105,
+97,110,103,108,101,77,101,115,104,83,104,97,112,101,68,97,116,97,0,98,
+116,67,111,109,112,111,117,110,100,83,104,97,112,101,67,104,105,108,100,68,
+97,116,97,0,98,116,67,111,109,112,111,117,110,100,83,104,97,112,101,68,
+97,116,97,0,98,116,67,121,108,105,110,100,101,114,83,104,97,112,101,68,
+97,116,97,0,98,116,67,97,112,115,117,108,101,83,104,97,112,101,68,97,
+116,97,0,98,116,84,114,105,97,110,103,108,101,73,110,102,111,68,97,116,
+97,0,98,116,71,73,109,112,97,99,116,77,101,115,104,83,104,97,112,101,
+68,97,116,97,0,98,116,67,111,110,118,101,120,72,117,108,108,83,104,97,
+112,101,68,97,116,97,0,98,116,67,111,108,108,105,115,105,111,110,79,98,
+106,101,99,116,68,111,117,98,108,101,68,97,116,97,0,98,116,67,111,108,
+108,105,115,105,111,110,79,98,106,101,99,116,70,108,111,97,116,68,97,116,
+97,0,98,116,82,105,103,105,100,66,111,100,121,70,108,111,97,116,68,97,
+116,97,0,98,116,82,105,103,105,100,66,111,100,121,68,111,117,98,108,101,
+68,97,116,97,0,98,116,67,111,110,115,116,114,97,105,110,116,73,110,102,
+111,49,0,98,116,84,121,112,101,100,67,111,110,115,116,114,97,105,110,116,
+68,97,116,97,0,98,116,82,105,103,105,100,66,111,100,121,68,97,116,97,
+0,98,116,80,111,105,110,116,50,80,111,105,110,116,67,111,110,115,116,114,
+97,105,110,116,70,108,111,97,116,68,97,116,97,0,98,116,80,111,105,110,
+116,50,80,111,105,110,116,67,111,110,115,116,114,97,105,110,116,68,111,117,
+98,108,101,68,97,116,97,0,98,116,72,105,110,103,101,67,111,110,115,116,
+114,97,105,110,116,68,111,117,98,108,101,68,97,116,97,0,98,116,72,105,
+110,103,101,67,111,110,115,116,114,97,105,110,116,70,108,111,97,116,68,97,
+116,97,0,98,116,67,111,110,101,84,119,105,115,116,67,111,110,115,116,114,
+97,105,110,116,68,97,116,97,0,98,116,71,101,110,101,114,105,99,54,68,
+111,102,67,111,110,115,116,114,97,105,110,116,68,97,116,97,0,98,116,71,
+101,110,101,114,105,99,54,68,111,102,83,112,114,105,110,103,67,111,110,115,
+116,114,97,105,110,116,68,97,116,97,0,98,116,83,108,105,100,101,114,67,
+111,110,115,116,114,97,105,110,116,68,97,116,97,0,83,111,102,116,66,111,
+100,121,77,97,116,101,114,105,97,108,68,97,116,97,0,83,111,102,116,66,
+111,100,121,78,111,100,101,68,97,116,97,0,83,111,102,116,66,111,100,121,
+76,105,110,107,68,97,116,97,0,83,111,102,116,66,111,100,121,70,97,99,
+101,68,97,116,97,0,83,111,102,116,66,111,100,121,84,101,116,114,97,68,
+97,116,97,0,83,111,102,116,82,105,103,105,100,65,110,99,104,111,114,68,
+97,116,97,0,83,111,102,116,66,111,100,121,67,111,110,102,105,103,68,97,
+116,97,0,83,111,102,116,66,111,100,121,80,111,115,101,68,97,116,97,0,
+83,111,102,116,66,111,100,121,67,108,117,115,116,101,114,68,97,116,97,0,
+98,116,83,111,102,116,66,111,100,121,74,111,105,110,116,68,97,116,97,0,
+98,116,83,111,102,116,66,111,100,121,70,108,111,97,116,68,97,116,97,0,
+84,76,69,78,1,0,1,0,2,0,2,0,4,0,4,0,4,0,4,0,
+8,0,0,0,16,0,48,0,16,0,16,0,32,0,48,0,96,0,64,0,
+-128,0,20,0,48,0,80,0,16,0,96,0,-112,0,16,0,56,0,56,0,
+20,0,72,0,4,0,4,0,8,0,4,0,56,0,32,0,80,0,72,0,
+96,0,80,0,32,0,64,0,64,0,16,0,72,0,80,0,-40,1,8,1,
+-16,1,-88,3,8,0,64,0,0,0,96,0,-128,0,104,1,-24,0,-32,0,
+8,1,104,1,-40,0,16,0,104,0,24,0,40,0,104,0,96,0,104,0,
+-56,0,104,1,112,0,-40,1,83,84,82,67,61,0,0,0,10,0,3,0,
+4,0,0,0,4,0,1,0,9,0,2,0,11,0,3,0,10,0,3,0,
+10,0,4,0,10,0,5,0,12,0,2,0,9,0,6,0,9,0,7,0,
+13,0,1,0,7,0,8,0,14,0,1,0,8,0,8,0,15,0,1,0,
+13,0,9,0,16,0,1,0,14,0,9,0,17,0,2,0,15,0,10,0,
+13,0,11,0,18,0,2,0,16,0,10,0,14,0,11,0,19,0,4,0,
+4,0,12,0,4,0,13,0,2,0,14,0,2,0,15,0,20,0,6,0,
+13,0,16,0,13,0,17,0,4,0,18,0,4,0,19,0,4,0,20,0,
+0,0,21,0,21,0,6,0,14,0,16,0,14,0,17,0,4,0,18,0,
+4,0,19,0,4,0,20,0,0,0,21,0,22,0,3,0,2,0,14,0,
+2,0,15,0,4,0,22,0,23,0,12,0,13,0,23,0,13,0,24,0,
+13,0,25,0,4,0,26,0,4,0,27,0,4,0,28,0,4,0,29,0,
+20,0,30,0,22,0,31,0,19,0,32,0,4,0,33,0,4,0,34,0,
+24,0,12,0,14,0,23,0,14,0,24,0,14,0,25,0,4,0,26,0,
+4,0,27,0,4,0,28,0,4,0,29,0,21,0,30,0,22,0,31,0,
+4,0,33,0,4,0,34,0,19,0,32,0,25,0,3,0,0,0,35,0,
+4,0,36,0,0,0,37,0,26,0,5,0,25,0,38,0,13,0,39,0,
+13,0,40,0,7,0,41,0,0,0,21,0,27,0,5,0,25,0,38,0,
+13,0,39,0,13,0,42,0,7,0,43,0,4,0,44,0,28,0,2,0,
+13,0,45,0,7,0,46,0,29,0,4,0,27,0,47,0,28,0,48,0,
+4,0,49,0,0,0,37,0,30,0,1,0,4,0,50,0,31,0,2,0,
+2,0,50,0,0,0,51,0,32,0,2,0,2,0,52,0,0,0,51,0,
+33,0,2,0,0,0,52,0,0,0,53,0,34,0,8,0,13,0,54,0,
+14,0,55,0,30,0,56,0,32,0,57,0,33,0,58,0,31,0,59,0,
+4,0,60,0,4,0,61,0,35,0,4,0,34,0,62,0,13,0,63,0,
+4,0,64,0,0,0,37,0,36,0,7,0,25,0,38,0,35,0,65,0,
+23,0,66,0,24,0,67,0,37,0,68,0,7,0,43,0,0,0,69,0,
+38,0,2,0,36,0,70,0,13,0,39,0,39,0,4,0,17,0,71,0,
+25,0,72,0,4,0,73,0,7,0,74,0,40,0,4,0,25,0,38,0,
+39,0,75,0,4,0,76,0,7,0,43,0,41,0,3,0,27,0,47,0,
+4,0,77,0,0,0,37,0,42,0,3,0,27,0,47,0,4,0,77,0,
+0,0,37,0,43,0,4,0,4,0,78,0,7,0,79,0,7,0,80,0,
+7,0,81,0,37,0,14,0,4,0,82,0,4,0,83,0,43,0,84,0,
+4,0,85,0,7,0,86,0,7,0,87,0,7,0,88,0,7,0,89,0,
+7,0,90,0,4,0,91,0,4,0,92,0,4,0,93,0,4,0,94,0,
+0,0,37,0,44,0,5,0,25,0,38,0,35,0,65,0,13,0,39,0,
+7,0,43,0,4,0,95,0,45,0,5,0,27,0,47,0,13,0,96,0,
+14,0,97,0,4,0,98,0,0,0,99,0,46,0,24,0,9,0,100,0,
+9,0,101,0,25,0,102,0,0,0,35,0,18,0,103,0,18,0,104,0,
+14,0,105,0,14,0,106,0,14,0,107,0,8,0,108,0,8,0,109,0,
+8,0,110,0,8,0,111,0,8,0,112,0,8,0,113,0,8,0,114,0,
+4,0,115,0,4,0,116,0,4,0,117,0,4,0,118,0,4,0,119,0,
+4,0,120,0,4,0,121,0,0,0,37,0,47,0,23,0,9,0,100,0,
+9,0,101,0,25,0,102,0,0,0,35,0,17,0,103,0,17,0,104,0,
+13,0,105,0,13,0,106,0,13,0,107,0,7,0,108,0,7,0,109,0,
+7,0,110,0,7,0,111,0,7,0,112,0,7,0,113,0,7,0,114,0,
+4,0,115,0,4,0,116,0,4,0,117,0,4,0,118,0,4,0,119,0,
+4,0,120,0,4,0,121,0,48,0,21,0,47,0,122,0,15,0,123,0,
+13,0,124,0,13,0,125,0,13,0,126,0,13,0,127,0,13,0,-128,0,
+13,0,-127,0,13,0,-126,0,13,0,-125,0,13,0,-124,0,7,0,-123,0,
+7,0,-122,0,7,0,-121,0,7,0,-120,0,7,0,-119,0,7,0,-118,0,
+7,0,-117,0,7,0,-116,0,7,0,-115,0,4,0,-114,0,49,0,22,0,
+46,0,122,0,16,0,123,0,14,0,124,0,14,0,125,0,14,0,126,0,
+14,0,127,0,14,0,-128,0,14,0,-127,0,14,0,-126,0,14,0,-125,0,
+14,0,-124,0,8,0,-123,0,8,0,-122,0,8,0,-121,0,8,0,-120,0,
+8,0,-119,0,8,0,-118,0,8,0,-117,0,8,0,-116,0,8,0,-115,0,
+4,0,-114,0,0,0,37,0,50,0,2,0,4,0,-113,0,4,0,-112,0,
+51,0,13,0,52,0,-111,0,52,0,-110,0,0,0,35,0,4,0,-109,0,
+4,0,-108,0,4,0,-107,0,4,0,-106,0,7,0,-105,0,7,0,-104,0,
+4,0,-103,0,4,0,-102,0,7,0,-101,0,4,0,-100,0,53,0,3,0,
+51,0,-99,0,13,0,-98,0,13,0,-97,0,54,0,3,0,51,0,-99,0,
+14,0,-98,0,14,0,-97,0,55,0,13,0,51,0,-99,0,18,0,-96,0,
+18,0,-95,0,4,0,-94,0,4,0,-93,0,4,0,-92,0,7,0,-91,0,
+7,0,-90,0,7,0,-89,0,7,0,-88,0,7,0,-87,0,7,0,-86,0,
+7,0,-85,0,56,0,13,0,51,0,-99,0,17,0,-96,0,17,0,-95,0,
+4,0,-94,0,4,0,-93,0,4,0,-92,0,7,0,-91,0,7,0,-90,0,
+7,0,-89,0,7,0,-88,0,7,0,-87,0,7,0,-86,0,7,0,-85,0,
+57,0,11,0,51,0,-99,0,17,0,-96,0,17,0,-95,0,7,0,-84,0,
+7,0,-83,0,7,0,-82,0,7,0,-87,0,7,0,-86,0,7,0,-85,0,
+7,0,-81,0,0,0,21,0,58,0,9,0,51,0,-99,0,17,0,-96,0,
+17,0,-95,0,13,0,-80,0,13,0,-79,0,13,0,-78,0,13,0,-77,0,
+4,0,-76,0,4,0,-75,0,59,0,5,0,58,0,-74,0,4,0,-73,0,
+7,0,-72,0,7,0,-71,0,7,0,-70,0,60,0,9,0,51,0,-99,0,
+17,0,-96,0,17,0,-95,0,7,0,-80,0,7,0,-79,0,7,0,-78,0,
+7,0,-77,0,4,0,-76,0,4,0,-75,0,61,0,4,0,7,0,-69,0,
+7,0,-68,0,7,0,-67,0,4,0,78,0,62,0,10,0,61,0,-66,0,
+13,0,-65,0,13,0,-64,0,13,0,-63,0,13,0,-62,0,13,0,-61,0,
+7,0,-123,0,7,0,-60,0,4,0,-59,0,4,0,53,0,63,0,4,0,
+61,0,-66,0,4,0,-58,0,7,0,-57,0,4,0,-56,0,64,0,4,0,
+13,0,-61,0,61,0,-66,0,4,0,-55,0,7,0,-54,0,65,0,7,0,
+13,0,-53,0,61,0,-66,0,4,0,-52,0,7,0,-51,0,7,0,-50,0,
+7,0,-49,0,4,0,53,0,66,0,6,0,15,0,-48,0,13,0,-50,0,
+13,0,-47,0,52,0,-46,0,4,0,-45,0,7,0,-49,0,67,0,26,0,
+4,0,-44,0,7,0,-43,0,7,0,-81,0,7,0,-42,0,7,0,-41,0,
+7,0,-40,0,7,0,-39,0,7,0,-38,0,7,0,-37,0,7,0,-36,0,
+7,0,-35,0,7,0,-34,0,7,0,-33,0,7,0,-32,0,7,0,-31,0,
+7,0,-30,0,7,0,-29,0,7,0,-28,0,7,0,-27,0,7,0,-26,0,
+7,0,-25,0,4,0,-24,0,4,0,-23,0,4,0,-22,0,4,0,-21,0,
+4,0,116,0,68,0,12,0,15,0,-20,0,15,0,-19,0,15,0,-18,0,
+13,0,-17,0,13,0,-16,0,7,0,-15,0,4,0,-14,0,4,0,-13,0,
+4,0,-12,0,4,0,-11,0,7,0,-51,0,4,0,53,0,69,0,27,0,
+17,0,-10,0,15,0,-9,0,15,0,-8,0,13,0,-17,0,13,0,-7,0,
+13,0,-6,0,13,0,-5,0,13,0,-4,0,13,0,-3,0,4,0,-2,0,
+7,0,-1,0,4,0,0,1,4,0,1,1,4,0,2,1,7,0,3,1,
+7,0,4,1,4,0,5,1,4,0,6,1,7,0,7,1,7,0,8,1,
+7,0,9,1,7,0,10,1,7,0,11,1,7,0,12,1,4,0,13,1,
+4,0,14,1,4,0,15,1,70,0,12,0,9,0,16,1,9,0,17,1,
+13,0,18,1,7,0,19,1,7,0,20,1,7,0,21,1,4,0,22,1,
+13,0,23,1,4,0,24,1,4,0,25,1,4,0,26,1,4,0,53,0,
+71,0,19,0,47,0,122,0,68,0,27,1,61,0,28,1,62,0,29,1,
+63,0,30,1,64,0,31,1,65,0,32,1,66,0,33,1,69,0,34,1,
+70,0,35,1,4,0,36,1,4,0,1,1,4,0,37,1,4,0,38,1,
+4,0,39,1,4,0,40,1,4,0,41,1,4,0,42,1,67,0,43,1,
+};
+int sBulletDNAlen64= sizeof(sBulletDNAstr64);
diff --git a/src/bullet/LinearMath/btSerializer.h b/src/bullet/LinearMath/btSerializer.h
new file mode 100644
index 00000000..7d90ad20
--- /dev/null
+++ b/src/bullet/LinearMath/btSerializer.h
@@ -0,0 +1,639 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SERIALIZER_H
+#define BT_SERIALIZER_H
+
+#include "btScalar.h" // has definitions like SIMD_FORCE_INLINE
+#include "btStackAlloc.h"
+#include "btHashMap.h"
+
+#if !defined( __CELLOS_LV2__) && !defined(__MWERKS__)
+#include <memory.h>
+#endif
+#include <string.h>
+
+
+
+///only the 32bit versions for now
+extern char sBulletDNAstr[];
+extern int sBulletDNAlen;
+extern char sBulletDNAstr64[];
+extern int sBulletDNAlen64;
+
+SIMD_FORCE_INLINE	int btStrLen(const char* str) 
+{
+    if (!str) 
+		return(0);
+	int len = 0;
+    
+	while (*str != 0)
+	{
+        str++;
+        len++;
+    }
+
+    return len;
+}
+
+
+class btChunk
+{
+public:
+	int		m_chunkCode;
+	int		m_length;
+	void	*m_oldPtr;
+	int		m_dna_nr;
+	int		m_number;
+};
+
+enum	btSerializationFlags
+{
+	BT_SERIALIZE_NO_BVH = 1,
+	BT_SERIALIZE_NO_TRIANGLEINFOMAP = 2,
+	BT_SERIALIZE_NO_DUPLICATE_ASSERT = 4
+};
+
+class	btSerializer
+{
+
+public:
+
+	virtual ~btSerializer() {}
+
+	virtual	const unsigned char*		getBufferPointer() const = 0;
+
+	virtual	int		getCurrentBufferSize() const = 0;
+
+	virtual	btChunk*	allocate(size_t size, int numElements) = 0;
+
+	virtual	void	finalizeChunk(btChunk* chunk, const char* structType, int chunkCode,void* oldPtr)= 0;
+
+	virtual	 void*	findPointer(void* oldPtr)  = 0;
+
+	virtual	void*	getUniquePointer(void*oldPtr) = 0;
+
+	virtual	void	startSerialization() = 0;
+	
+	virtual	void	finishSerialization() = 0;
+
+	virtual	const char*	findNameForPointer(const void* ptr) const = 0;
+
+	virtual	void	registerNameForPointer(const void* ptr, const char* name) = 0;
+
+	virtual void	serializeName(const char* ptr) = 0;
+
+	virtual int		getSerializationFlags() const = 0;
+
+	virtual void	setSerializationFlags(int flags) = 0;
+
+
+};
+
+
+
+#define BT_HEADER_LENGTH 12
+#if defined(__sgi) || defined (__sparc) || defined (__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__BIG_ENDIAN__)
+#	define MAKE_ID(a,b,c,d) ( (int)(a)<<24 | (int)(b)<<16 | (c)<<8 | (d) )
+#else
+#	define MAKE_ID(a,b,c,d) ( (int)(d)<<24 | (int)(c)<<16 | (b)<<8 | (a) )
+#endif
+
+#define BT_SOFTBODY_CODE		MAKE_ID('S','B','D','Y')
+#define BT_COLLISIONOBJECT_CODE MAKE_ID('C','O','B','J')
+#define BT_RIGIDBODY_CODE		MAKE_ID('R','B','D','Y')
+#define BT_CONSTRAINT_CODE		MAKE_ID('C','O','N','S')
+#define BT_BOXSHAPE_CODE		MAKE_ID('B','O','X','S')
+#define BT_QUANTIZED_BVH_CODE	MAKE_ID('Q','B','V','H')
+#define BT_TRIANLGE_INFO_MAP	MAKE_ID('T','M','A','P')
+#define BT_SHAPE_CODE			MAKE_ID('S','H','A','P')
+#define BT_ARRAY_CODE			MAKE_ID('A','R','A','Y')
+#define BT_SBMATERIAL_CODE		MAKE_ID('S','B','M','T')
+#define BT_SBNODE_CODE			MAKE_ID('S','B','N','D')
+#define BT_DNA_CODE				MAKE_ID('D','N','A','1')
+
+
+struct	btPointerUid
+{
+	union
+	{
+		void*	m_ptr;
+		int		m_uniqueIds[2];
+	};
+};
+
+///The btDefaultSerializer is the main Bullet serialization class.
+///The constructor takes an optional argument for backwards compatibility, it is recommended to leave this empty/zero.
+class btDefaultSerializer	:	public btSerializer
+{
+
+
+	btAlignedObjectArray<char*>			mTypes;
+	btAlignedObjectArray<short*>			mStructs;
+	btAlignedObjectArray<short>			mTlens;
+	btHashMap<btHashInt, int>			mStructReverse;
+	btHashMap<btHashString,int>	mTypeLookup;
+
+	
+	btHashMap<btHashPtr,void*>	m_chunkP;
+	
+	btHashMap<btHashPtr,const char*>	m_nameMap;
+
+	btHashMap<btHashPtr,btPointerUid>	m_uniquePointers;
+	int	m_uniqueIdGenerator;
+
+	int					m_totalSize;
+	unsigned char*		m_buffer;
+	int					m_currentSize;
+	void*				m_dna;
+	int					m_dnaLength;
+
+	int					m_serializationFlags;
+
+
+	btAlignedObjectArray<btChunk*>	m_chunkPtrs;
+	
+protected:
+
+	virtual	void*	findPointer(void* oldPtr) 
+	{
+		void** ptr = m_chunkP.find(oldPtr);
+		if (ptr && *ptr)
+			return *ptr;
+		return 0;
+	}
+
+	
+
+
+
+		void	writeDNA()
+		{
+			btChunk* dnaChunk = allocate(m_dnaLength,1);
+			memcpy(dnaChunk->m_oldPtr,m_dna,m_dnaLength);
+			finalizeChunk(dnaChunk,"DNA1",BT_DNA_CODE, m_dna);
+		}
+
+		int getReverseType(const char *type) const
+		{
+
+			btHashString key(type);
+			const int* valuePtr = mTypeLookup.find(key);
+			if (valuePtr)
+				return *valuePtr;
+			
+			return -1;
+		}
+
+		void initDNA(const char* bdnaOrg,int dnalen)
+		{
+			///was already initialized
+			if (m_dna)
+				return;
+
+			int littleEndian= 1;
+			littleEndian= ((char*)&littleEndian)[0];
+			
+
+			m_dna = btAlignedAlloc(dnalen,16);
+			memcpy(m_dna,bdnaOrg,dnalen);
+			m_dnaLength = dnalen;
+
+			int *intPtr=0;
+			short *shtPtr=0;
+			char *cp = 0;int dataLen =0;
+			intPtr = (int*)m_dna;
+
+			/*
+				SDNA (4 bytes) (magic number)
+				NAME (4 bytes)
+				<nr> (4 bytes) amount of names (int)
+				<string>
+				<string>
+			*/
+
+			if (strncmp((const char*)m_dna, "SDNA", 4)==0)
+			{
+				// skip ++ NAME
+				intPtr++; intPtr++;
+			}
+
+			// Parse names
+			if (!littleEndian)
+				*intPtr = btSwapEndian(*intPtr);
+				
+			dataLen = *intPtr;
+			
+			intPtr++;
+
+			cp = (char*)intPtr;
+			int i;
+			for ( i=0; i<dataLen; i++)
+			{
+				
+				while (*cp)cp++;
+				cp++;
+			}
+			cp = btAlignPointer(cp,4);
+
+			/*
+				TYPE (4 bytes)
+				<nr> amount of types (int)
+				<string>
+				<string>
+			*/
+
+			intPtr = (int*)cp;
+			assert(strncmp(cp, "TYPE", 4)==0); intPtr++;
+
+			if (!littleEndian)
+				*intPtr =  btSwapEndian(*intPtr);
+			
+			dataLen = *intPtr;
+			intPtr++;
+
+			
+			cp = (char*)intPtr;
+			for (i=0; i<dataLen; i++)
+			{
+				mTypes.push_back(cp);
+				while (*cp)cp++;
+				cp++;
+			}
+
+			cp = btAlignPointer(cp,4);
+
+
+			/*
+				TLEN (4 bytes)
+				<len> (short) the lengths of types
+				<len>
+			*/
+
+			// Parse type lens
+			intPtr = (int*)cp;
+			assert(strncmp(cp, "TLEN", 4)==0); intPtr++;
+
+			dataLen = (int)mTypes.size();
+
+			shtPtr = (short*)intPtr;
+			for (i=0; i<dataLen; i++, shtPtr++)
+			{
+				if (!littleEndian)
+					shtPtr[0] = btSwapEndian(shtPtr[0]);
+				mTlens.push_back(shtPtr[0]);
+			}
+
+			if (dataLen & 1) shtPtr++;
+
+			/*
+				STRC (4 bytes)
+				<nr> amount of structs (int)
+				<typenr>
+				<nr_of_elems>
+				<typenr>
+				<namenr>
+				<typenr>
+				<namenr>
+			*/
+
+			intPtr = (int*)shtPtr;
+			cp = (char*)intPtr;
+			assert(strncmp(cp, "STRC", 4)==0); intPtr++;
+
+			if (!littleEndian)
+				*intPtr = btSwapEndian(*intPtr);
+			dataLen = *intPtr ; 
+			intPtr++;
+
+
+			shtPtr = (short*)intPtr;
+			for (i=0; i<dataLen; i++)
+			{
+				mStructs.push_back (shtPtr);
+				
+				if (!littleEndian)
+				{
+					shtPtr[0]= btSwapEndian(shtPtr[0]);
+					shtPtr[1]= btSwapEndian(shtPtr[1]);
+
+					int len = shtPtr[1];
+					shtPtr+= 2;
+
+					for (int a=0; a<len; a++, shtPtr+=2)
+					{
+							shtPtr[0]= btSwapEndian(shtPtr[0]);
+							shtPtr[1]= btSwapEndian(shtPtr[1]);
+					}
+
+				} else
+				{
+					shtPtr+= (2*shtPtr[1])+2;
+				}
+			}
+
+			// build reverse lookups
+			for (i=0; i<(int)mStructs.size(); i++)
+			{
+				short *strc = mStructs.at(i);
+				mStructReverse.insert(strc[0], i);
+				mTypeLookup.insert(btHashString(mTypes[strc[0]]),i);
+			}
+		}
+
+public:	
+	
+
+	
+
+		btDefaultSerializer(int totalSize=0)
+			:m_totalSize(totalSize),
+			m_currentSize(0),
+			m_dna(0),
+			m_dnaLength(0),
+			m_serializationFlags(0)
+		{
+			m_buffer = m_totalSize?(unsigned char*)btAlignedAlloc(totalSize,16):0;
+			
+			const bool VOID_IS_8 = ((sizeof(void*)==8));
+
+#ifdef BT_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES
+			if (VOID_IS_8)
+			{
+#if _WIN64
+				initDNA((const char*)sBulletDNAstr64,sBulletDNAlen64);
+#else
+				btAssert(0);
+#endif
+			} else
+			{
+#ifndef _WIN64
+				initDNA((const char*)sBulletDNAstr,sBulletDNAlen);
+#else
+				btAssert(0);
+#endif
+			}
+	
+#else //BT_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES
+			if (VOID_IS_8)
+			{
+				initDNA((const char*)sBulletDNAstr64,sBulletDNAlen64);
+			} else
+			{
+				initDNA((const char*)sBulletDNAstr,sBulletDNAlen);
+			}
+#endif //BT_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES
+	
+		}
+
+		virtual ~btDefaultSerializer() 
+		{
+			if (m_buffer)
+				btAlignedFree(m_buffer);
+			if (m_dna)
+				btAlignedFree(m_dna);
+		}
+
+		void	writeHeader(unsigned char* buffer) const
+		{
+			
+
+#ifdef  BT_USE_DOUBLE_PRECISION
+			memcpy(buffer, "BULLETd", 7);
+#else
+			memcpy(buffer, "BULLETf", 7);
+#endif //BT_USE_DOUBLE_PRECISION
+	
+			int littleEndian= 1;
+			littleEndian= ((char*)&littleEndian)[0];
+
+			if (sizeof(void*)==8)
+			{
+				buffer[7] = '-';
+			} else
+			{
+				buffer[7] = '_';
+			}
+
+			if (littleEndian)
+			{
+				buffer[8]='v';				
+			} else
+			{
+				buffer[8]='V';
+			}
+
+
+			buffer[9] = '2';
+			buffer[10] = '8';
+			buffer[11] = '0';
+
+		}
+
+		virtual	void	startSerialization()
+		{
+			m_uniqueIdGenerator= 1;
+			if (m_totalSize)
+			{
+				unsigned char* buffer = internalAlloc(BT_HEADER_LENGTH);
+				writeHeader(buffer);
+			}
+			
+		}
+
+		virtual	void	finishSerialization()
+		{
+			writeDNA();
+
+			//if we didn't pre-allocate a buffer, we need to create a contiguous buffer now
+			int mysize = 0;
+			if (!m_totalSize)
+			{
+				if (m_buffer)
+					btAlignedFree(m_buffer);
+
+				m_currentSize += BT_HEADER_LENGTH;
+				m_buffer = (unsigned char*)btAlignedAlloc(m_currentSize,16);
+
+				unsigned char* currentPtr = m_buffer;
+				writeHeader(m_buffer);
+				currentPtr += BT_HEADER_LENGTH;
+				mysize+=BT_HEADER_LENGTH;
+				for (int i=0;i<	m_chunkPtrs.size();i++)
+				{
+					int curLength = sizeof(btChunk)+m_chunkPtrs[i]->m_length;
+					memcpy(currentPtr,m_chunkPtrs[i], curLength);
+					btAlignedFree(m_chunkPtrs[i]);
+					currentPtr+=curLength;
+					mysize+=curLength;
+				}
+			}
+
+			mTypes.clear();
+			mStructs.clear();
+			mTlens.clear();
+			mStructReverse.clear();
+			mTypeLookup.clear();
+			m_chunkP.clear();
+			m_nameMap.clear();
+			m_uniquePointers.clear();
+			m_chunkPtrs.clear();
+		}
+
+		virtual	void*	getUniquePointer(void*oldPtr)
+		{
+			if (!oldPtr)
+				return 0;
+
+			btPointerUid* uptr = (btPointerUid*)m_uniquePointers.find(oldPtr);
+			if (uptr)
+			{
+				return uptr->m_ptr;
+			}
+			m_uniqueIdGenerator++;
+			
+			btPointerUid uid;
+			uid.m_uniqueIds[0] = m_uniqueIdGenerator;
+			uid.m_uniqueIds[1] = m_uniqueIdGenerator;
+			m_uniquePointers.insert(oldPtr,uid);
+			return uid.m_ptr;
+
+		}
+
+		virtual	const unsigned char*		getBufferPointer() const
+		{
+			return m_buffer;
+		}
+
+		virtual	int					getCurrentBufferSize() const
+		{
+			return	m_currentSize;
+		}
+
+		virtual	void	finalizeChunk(btChunk* chunk, const char* structType, int chunkCode,void* oldPtr)
+		{
+			if (!(m_serializationFlags&BT_SERIALIZE_NO_DUPLICATE_ASSERT))
+			{
+				btAssert(!findPointer(oldPtr));
+			}
+
+			chunk->m_dna_nr = getReverseType(structType);
+			
+			chunk->m_chunkCode = chunkCode;
+			
+			void* uniquePtr = getUniquePointer(oldPtr);
+			
+			m_chunkP.insert(oldPtr,uniquePtr);//chunk->m_oldPtr);
+			chunk->m_oldPtr = uniquePtr;//oldPtr;
+			
+		}
+
+		
+		virtual unsigned char* internalAlloc(size_t size)
+		{
+			unsigned char* ptr = 0;
+
+			if (m_totalSize)
+			{
+				ptr = m_buffer+m_currentSize;
+				m_currentSize += int(size);
+				btAssert(m_currentSize<m_totalSize);
+			} else
+			{
+				ptr = (unsigned char*)btAlignedAlloc(size,16);
+				m_currentSize += int(size);
+			}
+			return ptr;
+		}
+
+		
+
+		virtual	btChunk*	allocate(size_t size, int numElements)
+		{
+
+			unsigned char* ptr = internalAlloc(int(size)*numElements+sizeof(btChunk));
+
+			unsigned char* data = ptr + sizeof(btChunk);
+			
+			btChunk* chunk = (btChunk*)ptr;
+			chunk->m_chunkCode = 0;
+			chunk->m_oldPtr = data;
+			chunk->m_length = int(size)*numElements;
+			chunk->m_number = numElements;
+			
+			m_chunkPtrs.push_back(chunk);
+			
+
+			return chunk;
+		}
+
+		virtual	const char*	findNameForPointer(const void* ptr) const
+		{
+			const char*const * namePtr = m_nameMap.find(ptr);
+			if (namePtr && *namePtr)
+				return *namePtr;
+			return 0;
+
+		}
+
+		virtual	void	registerNameForPointer(const void* ptr, const char* name)
+		{
+			m_nameMap.insert(ptr,name);
+		}
+
+		virtual void	serializeName(const char* name)
+		{
+			if (name)
+			{
+				//don't serialize name twice
+				if (findPointer((void*)name))
+					return;
+
+				int len = btStrLen(name);
+				if (len)
+				{
+
+					int newLen = len+1;
+					int padding = ((newLen+3)&~3)-newLen;
+					newLen += padding;
+
+					//serialize name string now
+					btChunk* chunk = allocate(sizeof(char),newLen);
+					char* destinationName = (char*)chunk->m_oldPtr;
+					for (int i=0;i<len;i++)
+					{
+						destinationName[i] = name[i];
+					}
+					destinationName[len] = 0;
+					finalizeChunk(chunk,"char",BT_ARRAY_CODE,(void*)name);
+				}
+			}
+		}
+
+		virtual int		getSerializationFlags() const
+		{
+			return m_serializationFlags;
+		}
+
+		virtual void	setSerializationFlags(int flags)
+		{
+			m_serializationFlags = flags;
+		}
+
+};
+
+
+#endif //BT_SERIALIZER_H
+
diff --git a/src/bullet/LinearMath/btStackAlloc.h b/src/bullet/LinearMath/btStackAlloc.h
new file mode 100644
index 00000000..397b0848
--- /dev/null
+++ b/src/bullet/LinearMath/btStackAlloc.h
@@ -0,0 +1,116 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/*
+StackAlloc extracted from GJK-EPA collision solver by Nathanael Presson
+Nov.2006
+*/
+
+#ifndef BT_STACK_ALLOC
+#define BT_STACK_ALLOC
+
+#include "btScalar.h" //for btAssert
+#include "btAlignedAllocator.h"
+
+///The btBlock class is an internal structure for the btStackAlloc memory allocator.
+struct btBlock
+{
+	btBlock*			previous;
+	unsigned char*		address;
+};
+
+///The StackAlloc class provides some fast stack-based memory allocator (LIFO last-in first-out)
+class btStackAlloc
+{
+public:
+
+	btStackAlloc(unsigned int size)	{ ctor();create(size); }
+	~btStackAlloc()		{ destroy(); }
+	
+	inline void		create(unsigned int size)
+	{
+		destroy();
+		data		=  (unsigned char*) btAlignedAlloc(size,16);
+		totalsize	=	size;
+	}
+	inline void		destroy()
+	{
+		btAssert(usedsize==0);
+		//Raise(L"StackAlloc is still in use");
+
+		if(usedsize==0)
+		{
+			if(!ischild && data)		
+				btAlignedFree(data);
+
+			data				=	0;
+			usedsize			=	0;
+		}
+		
+	}
+
+	int	getAvailableMemory() const
+	{
+		return static_cast<int>(totalsize - usedsize);
+	}
+
+	unsigned char*			allocate(unsigned int size)
+	{
+		const unsigned int	nus(usedsize+size);
+		if(nus<totalsize)
+		{
+			usedsize=nus;
+			return(data+(usedsize-size));
+		}
+		btAssert(0);
+		//&& (L"Not enough memory"));
+		
+		return(0);
+	}
+	SIMD_FORCE_INLINE btBlock*		beginBlock()
+	{
+		btBlock*	pb = (btBlock*)allocate(sizeof(btBlock));
+		pb->previous	=	current;
+		pb->address		=	data+usedsize;
+		current			=	pb;
+		return(pb);
+	}
+	SIMD_FORCE_INLINE void		endBlock(btBlock* block)
+	{
+		btAssert(block==current);
+		//Raise(L"Unmatched blocks");
+		if(block==current)
+		{
+			current		=	block->previous;
+			usedsize	=	(unsigned int)((block->address-data)-sizeof(btBlock));
+		}
+	}
+
+private:
+	void		ctor()
+	{
+		data		=	0;
+		totalsize	=	0;
+		usedsize	=	0;
+		current		=	0;
+		ischild		=	false;
+	}
+	unsigned char*		data;
+	unsigned int		totalsize;
+	unsigned int		usedsize;
+	btBlock*	current;
+	bool		ischild;
+};
+
+#endif //BT_STACK_ALLOC
diff --git a/src/bullet/LinearMath/btTransform.h b/src/bullet/LinearMath/btTransform.h
new file mode 100644
index 00000000..5e52d183
--- /dev/null
+++ b/src/bullet/LinearMath/btTransform.h
@@ -0,0 +1,307 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_TRANSFORM_H
+#define BT_TRANSFORM_H
+
+
+#include "btMatrix3x3.h"
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btTransformData btTransformDoubleData
+#else
+#define btTransformData btTransformFloatData
+#endif
+
+
+
+
+/**@brief The btTransform class supports rigid transforms with only translation and rotation and no scaling/shear.
+ *It can be used in combination with btVector3, btQuaternion and btMatrix3x3 linear algebra classes. */
+class btTransform {
+	
+  ///Storage for the rotation
+	btMatrix3x3 m_basis;
+  ///Storage for the translation
+	btVector3   m_origin;
+
+public:
+	
+  /**@brief No initialization constructor */
+	btTransform() {}
+  /**@brief Constructor from btQuaternion (optional btVector3 )
+   * @param q Rotation from quaternion 
+   * @param c Translation from Vector (default 0,0,0) */
+	explicit SIMD_FORCE_INLINE btTransform(const btQuaternion& q, 
+		const btVector3& c = btVector3(btScalar(0), btScalar(0), btScalar(0))) 
+		: m_basis(q),
+		m_origin(c)
+	{}
+
+  /**@brief Constructor from btMatrix3x3 (optional btVector3)
+   * @param b Rotation from Matrix 
+   * @param c Translation from Vector default (0,0,0)*/
+	explicit SIMD_FORCE_INLINE btTransform(const btMatrix3x3& b, 
+		const btVector3& c = btVector3(btScalar(0), btScalar(0), btScalar(0)))
+		: m_basis(b),
+		m_origin(c)
+	{}
+  /**@brief Copy constructor */
+	SIMD_FORCE_INLINE btTransform (const btTransform& other)
+		: m_basis(other.m_basis),
+		m_origin(other.m_origin)
+	{
+	}
+  /**@brief Assignment Operator */
+	SIMD_FORCE_INLINE btTransform& operator=(const btTransform& other)
+	{
+		m_basis = other.m_basis;
+		m_origin = other.m_origin;
+		return *this;
+	}
+
+
+  /**@brief Set the current transform as the value of the product of two transforms
+   * @param t1 Transform 1
+   * @param t2 Transform 2
+   * This = Transform1 * Transform2 */
+		SIMD_FORCE_INLINE void mult(const btTransform& t1, const btTransform& t2) {
+			m_basis = t1.m_basis * t2.m_basis;
+			m_origin = t1(t2.m_origin);
+		}
+
+/*		void multInverseLeft(const btTransform& t1, const btTransform& t2) {
+			btVector3 v = t2.m_origin - t1.m_origin;
+			m_basis = btMultTransposeLeft(t1.m_basis, t2.m_basis);
+			m_origin = v * t1.m_basis;
+		}
+		*/
+
+/**@brief Return the transform of the vector */
+	SIMD_FORCE_INLINE btVector3 operator()(const btVector3& x) const
+	{
+		return btVector3(m_basis[0].dot(x) + m_origin.x(), 
+			m_basis[1].dot(x) + m_origin.y(), 
+			m_basis[2].dot(x) + m_origin.z());
+	}
+
+  /**@brief Return the transform of the vector */
+	SIMD_FORCE_INLINE btVector3 operator*(const btVector3& x) const
+	{
+		return (*this)(x);
+	}
+
+  /**@brief Return the transform of the btQuaternion */
+	SIMD_FORCE_INLINE btQuaternion operator*(const btQuaternion& q) const
+	{
+		return getRotation() * q;
+	}
+
+  /**@brief Return the basis matrix for the rotation */
+	SIMD_FORCE_INLINE btMatrix3x3&       getBasis()          { return m_basis; }
+  /**@brief Return the basis matrix for the rotation */
+	SIMD_FORCE_INLINE const btMatrix3x3& getBasis()    const { return m_basis; }
+
+  /**@brief Return the origin vector translation */
+	SIMD_FORCE_INLINE btVector3&         getOrigin()         { return m_origin; }
+  /**@brief Return the origin vector translation */
+	SIMD_FORCE_INLINE const btVector3&   getOrigin()   const { return m_origin; }
+
+  /**@brief Return a quaternion representing the rotation */
+	btQuaternion getRotation() const { 
+		btQuaternion q;
+		m_basis.getRotation(q);
+		return q;
+	}
+	
+	
+  /**@brief Set from an array 
+   * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */
+	void setFromOpenGLMatrix(const btScalar *m)
+	{
+		m_basis.setFromOpenGLSubMatrix(m);
+		m_origin.setValue(m[12],m[13],m[14]);
+	}
+
+  /**@brief Fill an array representation
+   * @param m A pointer to a 15 element array (12 rotation(row major padded on the right by 1), and 3 translation */
+	void getOpenGLMatrix(btScalar *m) const 
+	{
+		m_basis.getOpenGLSubMatrix(m);
+		m[12] = m_origin.x();
+		m[13] = m_origin.y();
+		m[14] = m_origin.z();
+		m[15] = btScalar(1.0);
+	}
+
+  /**@brief Set the translational element
+   * @param origin The vector to set the translation to */
+	SIMD_FORCE_INLINE void setOrigin(const btVector3& origin) 
+	{ 
+		m_origin = origin;
+	}
+
+	SIMD_FORCE_INLINE btVector3 invXform(const btVector3& inVec) const;
+
+
+  /**@brief Set the rotational element by btMatrix3x3 */
+	SIMD_FORCE_INLINE void setBasis(const btMatrix3x3& basis)
+	{ 
+		m_basis = basis;
+	}
+
+  /**@brief Set the rotational element by btQuaternion */
+	SIMD_FORCE_INLINE void setRotation(const btQuaternion& q)
+	{
+		m_basis.setRotation(q);
+	}
+
+
+  /**@brief Set this transformation to the identity */
+	void setIdentity()
+	{
+		m_basis.setIdentity();
+		m_origin.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
+	}
+
+  /**@brief Multiply this Transform by another(this = this * another) 
+   * @param t The other transform */
+	btTransform& operator*=(const btTransform& t) 
+	{
+		m_origin += m_basis * t.m_origin;
+		m_basis *= t.m_basis;
+		return *this;
+	}
+
+  /**@brief Return the inverse of this transform */
+	btTransform inverse() const
+	{ 
+		btMatrix3x3 inv = m_basis.transpose();
+		return btTransform(inv, inv * -m_origin);
+	}
+
+  /**@brief Return the inverse of this transform times the other transform
+   * @param t The other transform 
+   * return this.inverse() * the other */
+	btTransform inverseTimes(const btTransform& t) const;  
+
+  /**@brief Return the product of this transform and the other */
+	btTransform operator*(const btTransform& t) const;
+
+  /**@brief Return an identity transform */
+	static const btTransform&	getIdentity()
+	{
+		static const btTransform identityTransform(btMatrix3x3::getIdentity());
+		return identityTransform;
+	}
+
+	void	serialize(struct	btTransformData& dataOut) const;
+
+	void	serializeFloat(struct	btTransformFloatData& dataOut) const;
+
+	void	deSerialize(const struct	btTransformData& dataIn);
+
+	void	deSerializeDouble(const struct	btTransformDoubleData& dataIn);
+
+	void	deSerializeFloat(const struct	btTransformFloatData& dataIn);
+
+};
+
+
+SIMD_FORCE_INLINE btVector3
+btTransform::invXform(const btVector3& inVec) const
+{
+	btVector3 v = inVec - m_origin;
+	return (m_basis.transpose() * v);
+}
+
+SIMD_FORCE_INLINE btTransform 
+btTransform::inverseTimes(const btTransform& t) const  
+{
+	btVector3 v = t.getOrigin() - m_origin;
+		return btTransform(m_basis.transposeTimes(t.m_basis),
+			v * m_basis);
+}
+
+SIMD_FORCE_INLINE btTransform 
+btTransform::operator*(const btTransform& t) const
+{
+	return btTransform(m_basis * t.m_basis, 
+		(*this)(t.m_origin));
+}
+
+/**@brief Test if two transforms have all elements equal */
+SIMD_FORCE_INLINE bool operator==(const btTransform& t1, const btTransform& t2)
+{
+   return ( t1.getBasis()  == t2.getBasis() &&
+            t1.getOrigin() == t2.getOrigin() );
+}
+
+
+///for serialization
+struct	btTransformFloatData
+{
+	btMatrix3x3FloatData	m_basis;
+	btVector3FloatData	m_origin;
+};
+
+struct	btTransformDoubleData
+{
+	btMatrix3x3DoubleData	m_basis;
+	btVector3DoubleData	m_origin;
+};
+
+
+
+SIMD_FORCE_INLINE	void	btTransform::serialize(btTransformData& dataOut) const
+{
+	m_basis.serialize(dataOut.m_basis);
+	m_origin.serialize(dataOut.m_origin);
+}
+
+SIMD_FORCE_INLINE	void	btTransform::serializeFloat(btTransformFloatData& dataOut) const
+{
+	m_basis.serializeFloat(dataOut.m_basis);
+	m_origin.serializeFloat(dataOut.m_origin);
+}
+
+
+SIMD_FORCE_INLINE	void	btTransform::deSerialize(const btTransformData& dataIn)
+{
+	m_basis.deSerialize(dataIn.m_basis);
+	m_origin.deSerialize(dataIn.m_origin);
+}
+
+SIMD_FORCE_INLINE	void	btTransform::deSerializeFloat(const btTransformFloatData& dataIn)
+{
+	m_basis.deSerializeFloat(dataIn.m_basis);
+	m_origin.deSerializeFloat(dataIn.m_origin);
+}
+
+SIMD_FORCE_INLINE	void	btTransform::deSerializeDouble(const btTransformDoubleData& dataIn)
+{
+	m_basis.deSerializeDouble(dataIn.m_basis);
+	m_origin.deSerializeDouble(dataIn.m_origin);
+}
+
+
+#endif //BT_TRANSFORM_H
+
+
+
+
+
+
diff --git a/src/bullet/LinearMath/btTransformUtil.h b/src/bullet/LinearMath/btTransformUtil.h
new file mode 100644
index 00000000..2303c274
--- /dev/null
+++ b/src/bullet/LinearMath/btTransformUtil.h
@@ -0,0 +1,228 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_TRANSFORM_UTIL_H
+#define BT_TRANSFORM_UTIL_H
+
+#include "btTransform.h"
+#define ANGULAR_MOTION_THRESHOLD btScalar(0.5)*SIMD_HALF_PI
+
+
+
+
+SIMD_FORCE_INLINE btVector3 btAabbSupport(const btVector3& halfExtents,const btVector3& supportDir)
+{
+	return btVector3(supportDir.x() < btScalar(0.0) ? -halfExtents.x() : halfExtents.x(),
+      supportDir.y() < btScalar(0.0) ? -halfExtents.y() : halfExtents.y(),
+      supportDir.z() < btScalar(0.0) ? -halfExtents.z() : halfExtents.z()); 
+}
+
+
+
+
+
+
+/// Utils related to temporal transforms
+class btTransformUtil
+{
+
+public:
+
+	static void integrateTransform(const btTransform& curTrans,const btVector3& linvel,const btVector3& angvel,btScalar timeStep,btTransform& predictedTransform)
+	{
+		predictedTransform.setOrigin(curTrans.getOrigin() + linvel * timeStep);
+//	#define QUATERNION_DERIVATIVE
+	#ifdef QUATERNION_DERIVATIVE
+		btQuaternion predictedOrn = curTrans.getRotation();
+		predictedOrn += (angvel * predictedOrn) * (timeStep * btScalar(0.5));
+		predictedOrn.normalize();
+	#else
+		//Exponential map
+		//google for "Practical Parameterization of Rotations Using the Exponential Map", F. Sebastian Grassia
+
+		btVector3 axis;
+		btScalar	fAngle = angvel.length(); 
+		//limit the angular motion
+		if (fAngle*timeStep > ANGULAR_MOTION_THRESHOLD)
+		{
+			fAngle = ANGULAR_MOTION_THRESHOLD / timeStep;
+		}
+
+		if ( fAngle < btScalar(0.001) )
+		{
+			// use Taylor's expansions of sync function
+			axis   = angvel*( btScalar(0.5)*timeStep-(timeStep*timeStep*timeStep)*(btScalar(0.020833333333))*fAngle*fAngle );
+		}
+		else
+		{
+			// sync(fAngle) = sin(c*fAngle)/t
+			axis   = angvel*( btSin(btScalar(0.5)*fAngle*timeStep)/fAngle );
+		}
+		btQuaternion dorn (axis.x(),axis.y(),axis.z(),btCos( fAngle*timeStep*btScalar(0.5) ));
+		btQuaternion orn0 = curTrans.getRotation();
+
+		btQuaternion predictedOrn = dorn * orn0;
+		predictedOrn.normalize();
+	#endif
+		predictedTransform.setRotation(predictedOrn);
+	}
+
+	static void	calculateVelocityQuaternion(const btVector3& pos0,const btVector3& pos1,const btQuaternion& orn0,const btQuaternion& orn1,btScalar timeStep,btVector3& linVel,btVector3& angVel)
+	{
+		linVel = (pos1 - pos0) / timeStep;
+		btVector3 axis;
+		btScalar  angle;
+		if (orn0 != orn1)
+		{
+			calculateDiffAxisAngleQuaternion(orn0,orn1,axis,angle);
+			angVel = axis * angle / timeStep;
+		} else
+		{
+			angVel.setValue(0,0,0);
+		}
+	}
+
+	static void calculateDiffAxisAngleQuaternion(const btQuaternion& orn0,const btQuaternion& orn1a,btVector3& axis,btScalar& angle)
+	{
+		btQuaternion orn1 = orn0.nearest(orn1a);
+		btQuaternion dorn = orn1 * orn0.inverse();
+		angle = dorn.getAngle();
+		axis = btVector3(dorn.x(),dorn.y(),dorn.z());
+		axis[3] = btScalar(0.);
+		//check for axis length
+		btScalar len = axis.length2();
+		if (len < SIMD_EPSILON*SIMD_EPSILON)
+			axis = btVector3(btScalar(1.),btScalar(0.),btScalar(0.));
+		else
+			axis /= btSqrt(len);
+	}
+
+	static void	calculateVelocity(const btTransform& transform0,const btTransform& transform1,btScalar timeStep,btVector3& linVel,btVector3& angVel)
+	{
+		linVel = (transform1.getOrigin() - transform0.getOrigin()) / timeStep;
+		btVector3 axis;
+		btScalar  angle;
+		calculateDiffAxisAngle(transform0,transform1,axis,angle);
+		angVel = axis * angle / timeStep;
+	}
+
+	static void calculateDiffAxisAngle(const btTransform& transform0,const btTransform& transform1,btVector3& axis,btScalar& angle)
+	{
+		btMatrix3x3 dmat = transform1.getBasis() * transform0.getBasis().inverse();
+		btQuaternion dorn;
+		dmat.getRotation(dorn);
+
+		///floating point inaccuracy can lead to w component > 1..., which breaks 
+		dorn.normalize();
+		
+		angle = dorn.getAngle();
+		axis = btVector3(dorn.x(),dorn.y(),dorn.z());
+		axis[3] = btScalar(0.);
+		//check for axis length
+		btScalar len = axis.length2();
+		if (len < SIMD_EPSILON*SIMD_EPSILON)
+			axis = btVector3(btScalar(1.),btScalar(0.),btScalar(0.));
+		else
+			axis /= btSqrt(len);
+	}
+
+};
+
+
+///The btConvexSeparatingDistanceUtil can help speed up convex collision detection 
+///by conservatively updating a cached separating distance/vector instead of re-calculating the closest distance
+class	btConvexSeparatingDistanceUtil
+{
+	btQuaternion	m_ornA;
+	btQuaternion	m_ornB;
+	btVector3	m_posA;
+	btVector3	m_posB;
+	
+	btVector3	m_separatingNormal;
+
+	btScalar	m_boundingRadiusA;
+	btScalar	m_boundingRadiusB;
+	btScalar	m_separatingDistance;
+
+public:
+
+	btConvexSeparatingDistanceUtil(btScalar	boundingRadiusA,btScalar	boundingRadiusB)
+		:m_boundingRadiusA(boundingRadiusA),
+		m_boundingRadiusB(boundingRadiusB),
+		m_separatingDistance(0.f)
+	{
+	}
+
+	btScalar	getConservativeSeparatingDistance()
+	{
+		return m_separatingDistance;
+	}
+
+	void	updateSeparatingDistance(const btTransform& transA,const btTransform& transB)
+	{
+		const btVector3& toPosA = transA.getOrigin();
+		const btVector3& toPosB = transB.getOrigin();
+		btQuaternion toOrnA = transA.getRotation();
+		btQuaternion toOrnB = transB.getRotation();
+
+		if (m_separatingDistance>0.f)
+		{
+			
+
+			btVector3 linVelA,angVelA,linVelB,angVelB;
+			btTransformUtil::calculateVelocityQuaternion(m_posA,toPosA,m_ornA,toOrnA,btScalar(1.),linVelA,angVelA);
+			btTransformUtil::calculateVelocityQuaternion(m_posB,toPosB,m_ornB,toOrnB,btScalar(1.),linVelB,angVelB);
+			btScalar maxAngularProjectedVelocity = angVelA.length() * m_boundingRadiusA + angVelB.length() * m_boundingRadiusB;
+			btVector3 relLinVel = (linVelB-linVelA);
+			btScalar relLinVelocLength = relLinVel.dot(m_separatingNormal);
+			if (relLinVelocLength<0.f)
+			{
+				relLinVelocLength = 0.f;
+			}
+	
+			btScalar	projectedMotion = maxAngularProjectedVelocity +relLinVelocLength;
+			m_separatingDistance -= projectedMotion;
+		}
+	
+		m_posA = toPosA;
+		m_posB = toPosB;
+		m_ornA = toOrnA;
+		m_ornB = toOrnB;
+	}
+
+	void	initSeparatingDistance(const btVector3& separatingVector,btScalar separatingDistance,const btTransform& transA,const btTransform& transB)
+	{
+		m_separatingDistance = separatingDistance;
+
+		if (m_separatingDistance>0.f)
+		{
+			m_separatingNormal = separatingVector;
+			
+			const btVector3& toPosA = transA.getOrigin();
+			const btVector3& toPosB = transB.getOrigin();
+			btQuaternion toOrnA = transA.getRotation();
+			btQuaternion toOrnB = transB.getRotation();
+			m_posA = toPosA;
+			m_posB = toPosB;
+			m_ornA = toOrnA;
+			m_ornB = toOrnB;
+		}
+	}
+
+};
+
+
+#endif //BT_TRANSFORM_UTIL_H
+
diff --git a/src/bullet/LinearMath/btVector3.h b/src/bullet/LinearMath/btVector3.h
new file mode 100644
index 00000000..d99b7c83
--- /dev/null
+++ b/src/bullet/LinearMath/btVector3.h
@@ -0,0 +1,766 @@
+/*
+Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_VECTOR3_H
+#define BT_VECTOR3_H
+
+
+#include "btScalar.h"
+#include "btMinMax.h"
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btVector3Data btVector3DoubleData
+#define btVector3DataName "btVector3DoubleData"
+#else
+#define btVector3Data btVector3FloatData
+#define btVector3DataName "btVector3FloatData"
+#endif //BT_USE_DOUBLE_PRECISION
+
+
+
+
+/**@brief btVector3 can be used to represent 3D points and vectors.
+ * It has an un-used w component to suit 16-byte alignment when btVector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
+ * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
+ */
+ATTRIBUTE_ALIGNED16(class) btVector3
+{
+public:
+
+#if defined (__SPU__) && defined (__CELLOS_LV2__)
+		btScalar	m_floats[4];
+public:
+	SIMD_FORCE_INLINE const vec_float4&	get128() const
+	{
+		return *((const vec_float4*)&m_floats[0]);
+	}
+public:
+#else //__CELLOS_LV2__ __SPU__
+#ifdef BT_USE_SSE // _WIN32
+	union {
+		__m128 mVec128;
+		btScalar	m_floats[4];
+	};
+	SIMD_FORCE_INLINE	__m128	get128() const
+	{
+		return mVec128;
+	}
+	SIMD_FORCE_INLINE	void	set128(__m128 v128)
+	{
+		mVec128 = v128;
+	}
+#else
+	btScalar	m_floats[4];
+#endif
+#endif //__CELLOS_LV2__ __SPU__
+
+	public:
+
+  /**@brief No initialization constructor */
+	SIMD_FORCE_INLINE btVector3() {}
+
+ 
+	
+  /**@brief Constructor from scalars 
+   * @param x X value
+   * @param y Y value 
+   * @param z Z value 
+   */
+	SIMD_FORCE_INLINE btVector3(const btScalar& x, const btScalar& y, const btScalar& z)
+	{
+		m_floats[0] = x;
+		m_floats[1] = y;
+		m_floats[2] = z;
+		m_floats[3] = btScalar(0.);
+	}
+
+	
+/**@brief Add a vector to this one 
+ * @param The vector to add to this one */
+	SIMD_FORCE_INLINE btVector3& operator+=(const btVector3& v)
+	{
+
+		m_floats[0] += v.m_floats[0]; m_floats[1] += v.m_floats[1];m_floats[2] += v.m_floats[2];
+		return *this;
+	}
+
+
+  /**@brief Subtract a vector from this one
+   * @param The vector to subtract */
+	SIMD_FORCE_INLINE btVector3& operator-=(const btVector3& v) 
+	{
+		m_floats[0] -= v.m_floats[0]; m_floats[1] -= v.m_floats[1];m_floats[2] -= v.m_floats[2];
+		return *this;
+	}
+  /**@brief Scale the vector
+   * @param s Scale factor */
+	SIMD_FORCE_INLINE btVector3& operator*=(const btScalar& s)
+	{
+		m_floats[0] *= s; m_floats[1] *= s;m_floats[2] *= s;
+		return *this;
+	}
+
+  /**@brief Inversely scale the vector 
+   * @param s Scale factor to divide by */
+	SIMD_FORCE_INLINE btVector3& operator/=(const btScalar& s) 
+	{
+		btFullAssert(s != btScalar(0.0));
+		return *this *= btScalar(1.0) / s;
+	}
+
+  /**@brief Return the dot product
+   * @param v The other vector in the dot product */
+	SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const
+	{
+		return m_floats[0] * v.m_floats[0] + m_floats[1] * v.m_floats[1] +m_floats[2] * v.m_floats[2];
+	}
+
+  /**@brief Return the length of the vector squared */
+	SIMD_FORCE_INLINE btScalar length2() const
+	{
+		return dot(*this);
+	}
+
+  /**@brief Return the length of the vector */
+	SIMD_FORCE_INLINE btScalar length() const
+	{
+		return btSqrt(length2());
+	}
+
+  /**@brief Return the distance squared between the ends of this and another vector
+   * This is symantically treating the vector like a point */
+	SIMD_FORCE_INLINE btScalar distance2(const btVector3& v) const;
+
+  /**@brief Return the distance between the ends of this and another vector
+   * This is symantically treating the vector like a point */
+	SIMD_FORCE_INLINE btScalar distance(const btVector3& v) const;
+
+	SIMD_FORCE_INLINE btVector3& safeNormalize() 
+	{
+		btVector3 absVec = this->absolute();
+		int maxIndex = absVec.maxAxis();
+		if (absVec[maxIndex]>0)
+		{
+			*this /= absVec[maxIndex];
+			return *this /= length();
+		}
+		setValue(1,0,0);
+		return *this;
+	}
+
+  /**@brief Normalize this vector 
+   * x^2 + y^2 + z^2 = 1 */
+	SIMD_FORCE_INLINE btVector3& normalize() 
+	{
+		return *this /= length();
+	}
+
+  /**@brief Return a normalized version of this vector */
+	SIMD_FORCE_INLINE btVector3 normalized() const;
+
+  /**@brief Return a rotated version of this vector
+   * @param wAxis The axis to rotate about 
+   * @param angle The angle to rotate by */
+	SIMD_FORCE_INLINE btVector3 rotate( const btVector3& wAxis, const btScalar angle ) const;
+
+  /**@brief Return the angle between this and another vector
+   * @param v The other vector */
+	SIMD_FORCE_INLINE btScalar angle(const btVector3& v) const 
+	{
+		btScalar s = btSqrt(length2() * v.length2());
+		btFullAssert(s != btScalar(0.0));
+		return btAcos(dot(v) / s);
+	}
+  /**@brief Return a vector will the absolute values of each element */
+	SIMD_FORCE_INLINE btVector3 absolute() const 
+	{
+		return btVector3(
+			btFabs(m_floats[0]), 
+			btFabs(m_floats[1]), 
+			btFabs(m_floats[2]));
+	}
+  /**@brief Return the cross product between this and another vector 
+   * @param v The other vector */
+	SIMD_FORCE_INLINE btVector3 cross(const btVector3& v) const
+	{
+		return btVector3(
+			m_floats[1] * v.m_floats[2] -m_floats[2] * v.m_floats[1],
+			m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
+			m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
+	}
+
+	SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const
+	{
+		return m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) + 
+			m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) + 
+			m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
+	}
+
+  /**@brief Return the axis with the smallest value 
+   * Note return values are 0,1,2 for x, y, or z */
+	SIMD_FORCE_INLINE int minAxis() const
+	{
+		return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2);
+	}
+
+  /**@brief Return the axis with the largest value 
+   * Note return values are 0,1,2 for x, y, or z */
+	SIMD_FORCE_INLINE int maxAxis() const 
+	{
+		return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0);
+	}
+
+	SIMD_FORCE_INLINE int furthestAxis() const
+	{
+		return absolute().minAxis();
+	}
+
+	SIMD_FORCE_INLINE int closestAxis() const 
+	{
+		return absolute().maxAxis();
+	}
+
+	SIMD_FORCE_INLINE void setInterpolate3(const btVector3& v0, const btVector3& v1, btScalar rt)
+	{
+		btScalar s = btScalar(1.0) - rt;
+		m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
+		m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
+		m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
+		//don't do the unused w component
+		//		m_co[3] = s * v0[3] + rt * v1[3];
+	}
+
+  /**@brief Return the linear interpolation between this and another vector 
+   * @param v The other vector 
+   * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
+	SIMD_FORCE_INLINE btVector3 lerp(const btVector3& v, const btScalar& t) const 
+	{
+		return btVector3(m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
+			m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
+			m_floats[2] + (v.m_floats[2] -m_floats[2]) * t);
+	}
+
+  /**@brief Elementwise multiply this vector by the other 
+   * @param v The other vector */
+	SIMD_FORCE_INLINE btVector3& operator*=(const btVector3& v)
+	{
+		m_floats[0] *= v.m_floats[0]; m_floats[1] *= v.m_floats[1];m_floats[2] *= v.m_floats[2];
+		return *this;
+	}
+
+	 /**@brief Return the x value */
+		SIMD_FORCE_INLINE const btScalar& getX() const { return m_floats[0]; }
+  /**@brief Return the y value */
+		SIMD_FORCE_INLINE const btScalar& getY() const { return m_floats[1]; }
+  /**@brief Return the z value */
+		SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; }
+  /**@brief Set the x value */
+		SIMD_FORCE_INLINE void	setX(btScalar x) { m_floats[0] = x;};
+  /**@brief Set the y value */
+		SIMD_FORCE_INLINE void	setY(btScalar y) { m_floats[1] = y;};
+  /**@brief Set the z value */
+		SIMD_FORCE_INLINE void	setZ(btScalar z) {m_floats[2] = z;};
+  /**@brief Set the w value */
+		SIMD_FORCE_INLINE void	setW(btScalar w) { m_floats[3] = w;};
+  /**@brief Return the x value */
+		SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; }
+  /**@brief Return the y value */
+		SIMD_FORCE_INLINE const btScalar& y() const { return m_floats[1]; }
+  /**@brief Return the z value */
+		SIMD_FORCE_INLINE const btScalar& z() const { return m_floats[2]; }
+  /**@brief Return the w value */
+		SIMD_FORCE_INLINE const btScalar& w() const { return m_floats[3]; }
+
+	//SIMD_FORCE_INLINE btScalar&       operator[](int i)       { return (&m_floats[0])[i];	}      
+	//SIMD_FORCE_INLINE const btScalar& operator[](int i) const { return (&m_floats[0])[i]; }
+	///operator btScalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
+	SIMD_FORCE_INLINE	operator       btScalar *()       { return &m_floats[0]; }
+	SIMD_FORCE_INLINE	operator const btScalar *() const { return &m_floats[0]; }
+
+	SIMD_FORCE_INLINE	bool	operator==(const btVector3& other) const
+	{
+		return ((m_floats[3]==other.m_floats[3]) && (m_floats[2]==other.m_floats[2]) && (m_floats[1]==other.m_floats[1]) && (m_floats[0]==other.m_floats[0]));
+	}
+
+	SIMD_FORCE_INLINE	bool	operator!=(const btVector3& other) const
+	{
+		return !(*this == other);
+	}
+
+	 /**@brief Set each element to the max of the current values and the values of another btVector3
+   * @param other The other btVector3 to compare with 
+   */
+		SIMD_FORCE_INLINE void	setMax(const btVector3& other)
+		{
+			btSetMax(m_floats[0], other.m_floats[0]);
+			btSetMax(m_floats[1], other.m_floats[1]);
+			btSetMax(m_floats[2], other.m_floats[2]);
+			btSetMax(m_floats[3], other.w());
+		}
+  /**@brief Set each element to the min of the current values and the values of another btVector3
+   * @param other The other btVector3 to compare with 
+   */
+		SIMD_FORCE_INLINE void	setMin(const btVector3& other)
+		{
+			btSetMin(m_floats[0], other.m_floats[0]);
+			btSetMin(m_floats[1], other.m_floats[1]);
+			btSetMin(m_floats[2], other.m_floats[2]);
+			btSetMin(m_floats[3], other.w());
+		}
+
+		SIMD_FORCE_INLINE void 	setValue(const btScalar& x, const btScalar& y, const btScalar& z)
+		{
+			m_floats[0]=x;
+			m_floats[1]=y;
+			m_floats[2]=z;
+			m_floats[3] = btScalar(0.);
+		}
+
+		void	getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
+		{
+			v0->setValue(0.		,-z()		,y());
+			v1->setValue(z()	,0.			,-x());
+			v2->setValue(-y()	,x()	,0.);
+		}
+
+		void	setZero()
+		{
+			setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+		}
+
+		SIMD_FORCE_INLINE bool isZero() const 
+		{
+			return m_floats[0] == btScalar(0) && m_floats[1] == btScalar(0) && m_floats[2] == btScalar(0);
+		}
+
+		SIMD_FORCE_INLINE bool fuzzyZero() const 
+		{
+			return length2() < SIMD_EPSILON;
+		}
+
+		SIMD_FORCE_INLINE	void	serialize(struct	btVector3Data& dataOut) const;
+
+		SIMD_FORCE_INLINE	void	deSerialize(const struct	btVector3Data& dataIn);
+
+		SIMD_FORCE_INLINE	void	serializeFloat(struct	btVector3FloatData& dataOut) const;
+
+		SIMD_FORCE_INLINE	void	deSerializeFloat(const struct	btVector3FloatData& dataIn);
+
+		SIMD_FORCE_INLINE	void	serializeDouble(struct	btVector3DoubleData& dataOut) const;
+
+		SIMD_FORCE_INLINE	void	deSerializeDouble(const struct	btVector3DoubleData& dataIn);
+
+};
+
+/**@brief Return the sum of two vectors (Point symantics)*/
+SIMD_FORCE_INLINE btVector3 
+operator+(const btVector3& v1, const btVector3& v2) 
+{
+	return btVector3(v1.m_floats[0] + v2.m_floats[0], v1.m_floats[1] + v2.m_floats[1], v1.m_floats[2] + v2.m_floats[2]);
+}
+
+/**@brief Return the elementwise product of two vectors */
+SIMD_FORCE_INLINE btVector3 
+operator*(const btVector3& v1, const btVector3& v2) 
+{
+	return btVector3(v1.m_floats[0] * v2.m_floats[0], v1.m_floats[1] * v2.m_floats[1], v1.m_floats[2] * v2.m_floats[2]);
+}
+
+/**@brief Return the difference between two vectors */
+SIMD_FORCE_INLINE btVector3 
+operator-(const btVector3& v1, const btVector3& v2)
+{
+	return btVector3(v1.m_floats[0] - v2.m_floats[0], v1.m_floats[1] - v2.m_floats[1], v1.m_floats[2] - v2.m_floats[2]);
+}
+/**@brief Return the negative of the vector */
+SIMD_FORCE_INLINE btVector3 
+operator-(const btVector3& v)
+{
+	return btVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
+}
+
+/**@brief Return the vector scaled by s */
+SIMD_FORCE_INLINE btVector3 
+operator*(const btVector3& v, const btScalar& s)
+{
+	return btVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
+}
+
+/**@brief Return the vector scaled by s */
+SIMD_FORCE_INLINE btVector3 
+operator*(const btScalar& s, const btVector3& v)
+{ 
+	return v * s; 
+}
+
+/**@brief Return the vector inversely scaled by s */
+SIMD_FORCE_INLINE btVector3
+operator/(const btVector3& v, const btScalar& s)
+{
+	btFullAssert(s != btScalar(0.0));
+	return v * (btScalar(1.0) / s);
+}
+
+/**@brief Return the vector inversely scaled by s */
+SIMD_FORCE_INLINE btVector3
+operator/(const btVector3& v1, const btVector3& v2)
+{
+	return btVector3(v1.m_floats[0] / v2.m_floats[0],v1.m_floats[1] / v2.m_floats[1],v1.m_floats[2] / v2.m_floats[2]);
+}
+
+/**@brief Return the dot product between two vectors */
+SIMD_FORCE_INLINE btScalar 
+btDot(const btVector3& v1, const btVector3& v2) 
+{ 
+	return v1.dot(v2); 
+}
+
+
+/**@brief Return the distance squared between two vectors */
+SIMD_FORCE_INLINE btScalar
+btDistance2(const btVector3& v1, const btVector3& v2) 
+{ 
+	return v1.distance2(v2); 
+}
+
+
+/**@brief Return the distance between two vectors */
+SIMD_FORCE_INLINE btScalar
+btDistance(const btVector3& v1, const btVector3& v2) 
+{ 
+	return v1.distance(v2); 
+}
+
+/**@brief Return the angle between two vectors */
+SIMD_FORCE_INLINE btScalar
+btAngle(const btVector3& v1, const btVector3& v2) 
+{ 
+	return v1.angle(v2); 
+}
+
+/**@brief Return the cross product of two vectors */
+SIMD_FORCE_INLINE btVector3 
+btCross(const btVector3& v1, const btVector3& v2) 
+{ 
+	return v1.cross(v2); 
+}
+
+SIMD_FORCE_INLINE btScalar
+btTriple(const btVector3& v1, const btVector3& v2, const btVector3& v3)
+{
+	return v1.triple(v2, v3);
+}
+
+/**@brief Return the linear interpolation between two vectors
+ * @param v1 One vector 
+ * @param v2 The other vector 
+ * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
+SIMD_FORCE_INLINE btVector3 
+lerp(const btVector3& v1, const btVector3& v2, const btScalar& t)
+{
+	return v1.lerp(v2, t);
+}
+
+
+
+SIMD_FORCE_INLINE btScalar btVector3::distance2(const btVector3& v) const
+{
+	return (v - *this).length2();
+}
+
+SIMD_FORCE_INLINE btScalar btVector3::distance(const btVector3& v) const
+{
+	return (v - *this).length();
+}
+
+SIMD_FORCE_INLINE btVector3 btVector3::normalized() const
+{
+	return *this / length();
+} 
+
+SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar angle ) const
+{
+	// wAxis must be a unit lenght vector
+
+	btVector3 o = wAxis * wAxis.dot( *this );
+	btVector3 x = *this - o;
+	btVector3 y;
+
+	y = wAxis.cross( *this );
+
+	return ( o + x * btCos( angle ) + y * btSin( angle ) );
+}
+
+class btVector4 : public btVector3
+{
+public:
+
+	SIMD_FORCE_INLINE btVector4() {}
+
+
+	SIMD_FORCE_INLINE btVector4(const btScalar& x, const btScalar& y, const btScalar& z,const btScalar& w) 
+		: btVector3(x,y,z)
+	{
+		m_floats[3] = w;
+	}
+
+
+	SIMD_FORCE_INLINE btVector4 absolute4() const 
+	{
+		return btVector4(
+			btFabs(m_floats[0]), 
+			btFabs(m_floats[1]), 
+			btFabs(m_floats[2]),
+			btFabs(m_floats[3]));
+	}
+
+
+
+	btScalar	getW() const { return m_floats[3];}
+
+
+		SIMD_FORCE_INLINE int maxAxis4() const
+	{
+		int maxIndex = -1;
+		btScalar maxVal = btScalar(-BT_LARGE_FLOAT);
+		if (m_floats[0] > maxVal)
+		{
+			maxIndex = 0;
+			maxVal = m_floats[0];
+		}
+		if (m_floats[1] > maxVal)
+		{
+			maxIndex = 1;
+			maxVal = m_floats[1];
+		}
+		if (m_floats[2] > maxVal)
+		{
+			maxIndex = 2;
+			maxVal =m_floats[2];
+		}
+		if (m_floats[3] > maxVal)
+		{
+			maxIndex = 3;
+			maxVal = m_floats[3];
+		}
+		
+		
+		
+
+		return maxIndex;
+
+	}
+
+
+	SIMD_FORCE_INLINE int minAxis4() const
+	{
+		int minIndex = -1;
+		btScalar minVal = btScalar(BT_LARGE_FLOAT);
+		if (m_floats[0] < minVal)
+		{
+			minIndex = 0;
+			minVal = m_floats[0];
+		}
+		if (m_floats[1] < minVal)
+		{
+			minIndex = 1;
+			minVal = m_floats[1];
+		}
+		if (m_floats[2] < minVal)
+		{
+			minIndex = 2;
+			minVal =m_floats[2];
+		}
+		if (m_floats[3] < minVal)
+		{
+			minIndex = 3;
+			minVal = m_floats[3];
+		}
+		
+		return minIndex;
+
+	}
+
+
+	SIMD_FORCE_INLINE int closestAxis4() const 
+	{
+		return absolute4().maxAxis4();
+	}
+
+	
+ 
+
+  /**@brief Set x,y,z and zero w 
+   * @param x Value of x
+   * @param y Value of y
+   * @param z Value of z
+   */
+		
+
+/*		void getValue(btScalar *m) const 
+		{
+			m[0] = m_floats[0];
+			m[1] = m_floats[1];
+			m[2] =m_floats[2];
+		}
+*/
+/**@brief Set the values 
+   * @param x Value of x
+   * @param y Value of y
+   * @param z Value of z
+   * @param w Value of w
+   */
+		SIMD_FORCE_INLINE void	setValue(const btScalar& x, const btScalar& y, const btScalar& z,const btScalar& w)
+		{
+			m_floats[0]=x;
+			m_floats[1]=y;
+			m_floats[2]=z;
+			m_floats[3]=w;
+		}
+
+
+};
+
+
+///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
+SIMD_FORCE_INLINE void	btSwapScalarEndian(const btScalar& sourceVal, btScalar& destVal)
+{
+	#ifdef BT_USE_DOUBLE_PRECISION
+	unsigned char* dest = (unsigned char*) &destVal;
+	unsigned char* src  = (unsigned char*) &sourceVal;
+	dest[0] = src[7];
+    dest[1] = src[6];
+    dest[2] = src[5];
+    dest[3] = src[4];
+    dest[4] = src[3];
+    dest[5] = src[2];
+    dest[6] = src[1];
+    dest[7] = src[0];
+#else
+	unsigned char* dest = (unsigned char*) &destVal;
+	unsigned char* src  = (unsigned char*) &sourceVal;
+	dest[0] = src[3];
+    dest[1] = src[2];
+    dest[2] = src[1];
+    dest[3] = src[0];
+#endif //BT_USE_DOUBLE_PRECISION
+}
+///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
+SIMD_FORCE_INLINE void	btSwapVector3Endian(const btVector3& sourceVec, btVector3& destVec)
+{
+	for (int i=0;i<4;i++)
+	{
+		btSwapScalarEndian(sourceVec[i],destVec[i]);
+	}
+
+}
+
+///btUnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
+SIMD_FORCE_INLINE void	btUnSwapVector3Endian(btVector3& vector)
+{
+
+	btVector3	swappedVec;
+	for (int i=0;i<4;i++)
+	{
+		btSwapScalarEndian(vector[i],swappedVec[i]);
+	}
+	vector = swappedVec;
+}
+
+template <class T>
+SIMD_FORCE_INLINE void btPlaneSpace1 (const T& n, T& p, T& q)
+{
+  if (btFabs(n[2]) > SIMDSQRT12) {
+    // choose p in y-z plane
+    btScalar a = n[1]*n[1] + n[2]*n[2];
+    btScalar k = btRecipSqrt (a);
+    p[0] = 0;
+	p[1] = -n[2]*k;
+	p[2] = n[1]*k;
+    // set q = n x p
+    q[0] = a*k;
+	q[1] = -n[0]*p[2];
+	q[2] = n[0]*p[1];
+  }
+  else {
+    // choose p in x-y plane
+    btScalar a = n[0]*n[0] + n[1]*n[1];
+    btScalar k = btRecipSqrt (a);
+    p[0] = -n[1]*k;
+	p[1] = n[0]*k;
+	p[2] = 0;
+    // set q = n x p
+    q[0] = -n[2]*p[1];
+	q[1] = n[2]*p[0];
+	q[2] = a*k;
+  }
+}
+
+
+struct	btVector3FloatData
+{
+	float	m_floats[4];
+};
+
+struct	btVector3DoubleData
+{
+	double	m_floats[4];
+
+};
+
+SIMD_FORCE_INLINE	void	btVector3::serializeFloat(struct	btVector3FloatData& dataOut) const
+{
+	///could also do a memcpy, check if it is worth it
+	for (int i=0;i<4;i++)
+		dataOut.m_floats[i] = float(m_floats[i]);
+}
+
+SIMD_FORCE_INLINE void	btVector3::deSerializeFloat(const struct	btVector3FloatData& dataIn)
+{
+	for (int i=0;i<4;i++)
+		m_floats[i] = btScalar(dataIn.m_floats[i]);
+}
+
+
+SIMD_FORCE_INLINE	void	btVector3::serializeDouble(struct	btVector3DoubleData& dataOut) const
+{
+	///could also do a memcpy, check if it is worth it
+	for (int i=0;i<4;i++)
+		dataOut.m_floats[i] = double(m_floats[i]);
+}
+
+SIMD_FORCE_INLINE void	btVector3::deSerializeDouble(const struct	btVector3DoubleData& dataIn)
+{
+	for (int i=0;i<4;i++)
+		m_floats[i] = btScalar(dataIn.m_floats[i]);
+}
+
+
+SIMD_FORCE_INLINE	void	btVector3::serialize(struct	btVector3Data& dataOut) const
+{
+	///could also do a memcpy, check if it is worth it
+	for (int i=0;i<4;i++)
+		dataOut.m_floats[i] = m_floats[i];
+}
+
+SIMD_FORCE_INLINE void	btVector3::deSerialize(const struct	btVector3Data& dataIn)
+{
+	for (int i=0;i<4;i++)
+		m_floats[i] = dataIn.m_floats[i];
+}
+
+
+#endif //BT_VECTOR3_H
diff --git a/src/bullet/MiniCL/MiniCL.cpp b/src/bullet/MiniCL/MiniCL.cpp
new file mode 100644
index 00000000..24f6751f
--- /dev/null
+++ b/src/bullet/MiniCL/MiniCL.cpp
@@ -0,0 +1,784 @@
+/*
+   Copyright (C) 2010 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#include "MiniCL/cl.h"
+#define __PHYSICS_COMMON_H__ 1
+#ifdef _WIN32
+#include "BulletMultiThreaded/Win32ThreadSupport.h"
+#endif
+
+#include "BulletMultiThreaded/PlatformDefinitions.h"
+#ifdef USE_PTHREADS
+#include "BulletMultiThreaded/PosixThreadSupport.h"
+#endif
+
+
+#include "BulletMultiThreaded/SequentialThreadSupport.h"
+#include "MiniCLTaskScheduler.h"
+#include "MiniCLTask/MiniCLTask.h"
+#include "LinearMath/btMinMax.h"
+#include <stdio.h>
+
+//#define DEBUG_MINICL_KERNELS 1
+
+static const char* spPlatformID = "MiniCL, SCEA";
+static const char* spDriverVersion= "1.0";
+
+CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs(
+	cl_uint           num_entries,
+    cl_platform_id *  platforms,
+    cl_uint *         num_platforms ) CL_API_SUFFIX__VERSION_1_0
+{
+	if(platforms != NULL)
+	{
+		if(num_entries <= 0)
+		{
+			return CL_INVALID_VALUE; 
+		}
+		*((const char**)platforms) = spPlatformID;
+	}
+	if(num_platforms != NULL)
+	{
+		*num_platforms = 1;
+	}
+	return CL_SUCCESS;
+}
+
+
+CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfo(
+	cl_platform_id   platform, 
+	cl_platform_info param_name,
+	size_t           param_value_size, 
+	void *           param_value,
+	size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+{
+	char* pId = (char*)platform;
+	if(strcmp(pId, spPlatformID))
+	{
+			return CL_INVALID_PLATFORM; 
+	}
+	switch(param_name)
+	{
+	case CL_PLATFORM_VERSION:
+		{
+			if(param_value_size < (strlen(spDriverVersion) + 1))
+			{
+				return CL_INVALID_VALUE; 
+			}
+			strcpy((char*)param_value, spDriverVersion);
+			if(param_value_size_ret != NULL)
+			{
+				*param_value_size_ret = strlen(spDriverVersion) + 1;
+			}
+			break;
+		}
+		case CL_PLATFORM_NAME:
+		case CL_PLATFORM_VENDOR	:
+			if(param_value_size < (strlen(spPlatformID) + 1))
+			{
+				return CL_INVALID_VALUE; 
+			}
+			strcpy((char*)param_value, spPlatformID);
+			if(param_value_size_ret != NULL)
+			{
+				*param_value_size_ret = strlen(spPlatformID) + 1;
+			}
+			break;
+		default : 
+			return CL_INVALID_VALUE; 
+	}
+	return CL_SUCCESS;
+}
+
+
+
+
+CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
+	cl_device_id            device ,
+	cl_device_info          param_name ,
+	size_t                  param_value_size ,
+	void *                  param_value ,
+	size_t *                param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+{
+
+	switch (param_name)
+	{
+	case CL_DEVICE_NAME:
+		{
+			char deviceName[] = "MiniCL CPU";
+			unsigned int nameLen = (unsigned int)strlen(deviceName)+1;
+			btAssert(param_value_size>strlen(deviceName));
+			if (nameLen < param_value_size)
+			{
+				const char* cpuName = "MiniCL CPU";
+				sprintf((char*)param_value,"%s",cpuName);
+			} else
+			{
+				printf("error: param_value_size should be at least %d, but it is %d\n",nameLen,param_value_size);
+				return CL_INVALID_VALUE; 
+			}
+			break;
+		}
+	case CL_DEVICE_TYPE:
+		{
+			if (param_value_size>=sizeof(cl_device_type))
+			{
+				cl_device_type* deviceType = (cl_device_type*)param_value;
+				*deviceType = CL_DEVICE_TYPE_CPU;
+			} else
+			{
+				printf("error: param_value_size should be at least %d\n",sizeof(cl_device_type));
+				return CL_INVALID_VALUE; 
+			}
+			break;
+		}
+	case CL_DEVICE_MAX_COMPUTE_UNITS:
+		{
+			if (param_value_size>=sizeof(cl_uint))
+			{
+				cl_uint* numUnits = (cl_uint*)param_value;
+				*numUnits= 4;
+			} else
+			{
+				printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
+				return CL_INVALID_VALUE; 
+			}
+
+			break;
+		}
+	case CL_DEVICE_MAX_WORK_ITEM_SIZES:
+		{
+			size_t workitem_size[3];
+
+			if (param_value_size>=sizeof(workitem_size))
+			{
+				size_t* workItemSize = (size_t*)param_value;
+				workItemSize[0] = 64;
+				workItemSize[1] = 24;
+				workItemSize[2] = 16;
+			} else
+			{
+				printf("error: param_value_size should be at least %d\n",sizeof(cl_uint));
+				return CL_INVALID_VALUE; 
+			}
+			break;
+		}
+	case CL_DEVICE_MAX_CLOCK_FREQUENCY:
+		{
+			 cl_uint* clock_frequency = (cl_uint*)param_value;
+			 *clock_frequency = 3*1024;
+			break;
+		}
+
+	case CL_DEVICE_VENDOR	:
+		{
+			if(param_value_size < (strlen(spPlatformID) + 1))
+			{
+				return CL_INVALID_VALUE; 
+			}
+			strcpy((char*)param_value, spPlatformID);
+			if(param_value_size_ret != NULL)
+			{
+				*param_value_size_ret = strlen(spPlatformID) + 1;
+			}
+			break;
+		}
+	case CL_DRIVER_VERSION:
+		{
+			if(param_value_size < (strlen(spDriverVersion) + 1))
+			{
+				return CL_INVALID_VALUE; 
+			}
+			strcpy((char*)param_value, spDriverVersion);
+			if(param_value_size_ret != NULL)
+			{
+				*param_value_size_ret = strlen(spDriverVersion) + 1;
+			}
+
+			break;
+		}
+	case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
+		{
+			 cl_uint* maxDimensions = (cl_uint*)param_value;
+			 *maxDimensions = 1;
+			 break;
+		}
+		case CL_DEVICE_MAX_WORK_GROUP_SIZE:
+		{
+			 cl_uint* maxWorkGroupSize = (cl_uint*)param_value;
+			 *maxWorkGroupSize = 128;//1;
+			 break;
+		}
+		case CL_DEVICE_ADDRESS_BITS:
+		{
+			 cl_uint* addressBits = (cl_uint*)param_value;
+			 *addressBits= 32; //@todo: should this be 64 for 64bit builds?
+			 break;
+		}
+		case CL_DEVICE_MAX_MEM_ALLOC_SIZE:
+			{
+				cl_ulong* maxMemAlloc = (cl_ulong*)param_value;
+				*maxMemAlloc= 512*1024*1024; //this "should be enough for everyone" ?
+			 break;
+			}
+		case CL_DEVICE_GLOBAL_MEM_SIZE:
+			{
+				cl_ulong* maxMemAlloc = (cl_ulong*)param_value;
+				*maxMemAlloc= 1024*1024*1024; //this "should be enough for everyone" ?
+			 break;
+			}
+
+		case CL_DEVICE_ERROR_CORRECTION_SUPPORT:
+			{
+			cl_bool* error_correction_support = (cl_bool*)param_value;
+			*error_correction_support = CL_FALSE;
+			break;
+			}
+
+		case CL_DEVICE_LOCAL_MEM_TYPE:
+			{
+			cl_device_local_mem_type* local_mem_type = (cl_device_local_mem_type*)param_value;
+			*local_mem_type = CL_GLOBAL;
+			break;
+			}
+		case CL_DEVICE_LOCAL_MEM_SIZE:
+			{
+				cl_ulong* localmem = (cl_ulong*) param_value;
+				*localmem = 32*1024;
+				break;
+			}
+
+		case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
+			{
+				cl_ulong* localmem = (cl_ulong*) param_value;
+				*localmem = 64*1024;
+				break;
+			}
+		case CL_DEVICE_QUEUE_PROPERTIES:
+			{
+				cl_command_queue_properties* queueProp = (cl_command_queue_properties*) param_value;
+				memset(queueProp,0,param_value_size);
+
+				break;
+			}
+		case CL_DEVICE_IMAGE_SUPPORT:
+			{
+				cl_bool* imageSupport = (cl_bool*) param_value;
+				*imageSupport = CL_FALSE;
+				break;
+			}
+
+		case CL_DEVICE_MAX_WRITE_IMAGE_ARGS:
+		case CL_DEVICE_MAX_READ_IMAGE_ARGS:
+			{
+				cl_uint* imageArgs = (cl_uint*) param_value;
+				*imageArgs = 0;
+				break;
+			}
+		case CL_DEVICE_IMAGE3D_MAX_DEPTH:
+		case CL_DEVICE_IMAGE3D_MAX_HEIGHT:
+		case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
+		case CL_DEVICE_IMAGE3D_MAX_WIDTH:
+		case CL_DEVICE_IMAGE2D_MAX_WIDTH:
+			{
+				size_t* maxSize = (size_t*) param_value;
+				*maxSize = 0;
+				break;
+			}
+
+		case CL_DEVICE_EXTENSIONS:
+			{
+				char* extensions = (char*) param_value;
+				*extensions = 0;
+				break;
+			}
+
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT:
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT:
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:
+			{
+				cl_uint* width  = (cl_uint*) param_value;
+				*width = 1;
+				break;
+			}
+			
+	default:
+		{
+			printf("error: unsupported param_name:%d\n",param_name);
+		}
+	}
+
+
+	return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
+{
+	return 0;
+}
+
+
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
+{
+	return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
+{
+	return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel   /* kernel */) CL_API_SUFFIX__VERSION_1_0
+{
+	return 0;
+}
+
+
+// Enqueued Commands APIs
+CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue     command_queue ,
+                    cl_mem               buffer ,
+                    cl_bool             /* blocking_read */,
+                    size_t               offset ,
+                    size_t               cb , 
+                    void *               ptr ,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0
+{
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
+
+	///wait for all work items to be completed
+	scheduler->flush();
+
+	memcpy(ptr,(char*)buffer + offset,cb);
+	return 0;
+}
+
+
+CL_API_ENTRY cl_int clGetProgramBuildInfo(cl_program            /* program */,
+                      cl_device_id          /* device */,
+                      cl_program_build_info /* param_name */,
+                      size_t                /* param_value_size */,
+                      void *                /* param_value */,
+                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+
+	return 0;
+}
+
+
+// Program Object APIs
+CL_API_ENTRY cl_program
+clCreateProgramWithSource(cl_context         context ,
+                          cl_uint           /* count */,
+                          const char **     /* strings */,
+                          const size_t *    /* lengths */,
+                          cl_int *          errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	*errcode_ret = CL_SUCCESS;
+	return (cl_program)context;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue     command_queue ,
+                    cl_mem               buffer ,
+                    cl_bool             /* blocking_read */,
+                    size_t              offset,
+                    size_t               cb , 
+                    const void *         ptr ,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0
+{
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
+
+	///wait for all work items to be completed
+	scheduler->flush();
+
+	memcpy((char*)buffer + offset, ptr,cb);
+	return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue  command_queue)
+{
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
+	///wait for all work items to be completed
+	scheduler->flush();
+	return 0;
+}
+
+
+CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
+                       cl_kernel         clKernel ,
+                       cl_uint           work_dim ,
+                       const size_t *   /* global_work_offset */,
+                       const size_t *    global_work_size ,
+                       const size_t *   /* local_work_size */,
+                       cl_uint          /* num_events_in_wait_list */,
+                       const cl_event * /* event_wait_list */,
+                       cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0
+{
+
+	
+	MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
+	for (unsigned int ii=0;ii<work_dim;ii++)
+	{
+		int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
+		int numWorkItems = global_work_size[ii];
+
+//		//at minimum 64 work items per task
+//		int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
+		int numWorkItemsPerTask = numWorkItems / maxTask;
+		if (!numWorkItemsPerTask) numWorkItemsPerTask = 1;
+
+		for (int t=0;t<numWorkItems;)
+		{
+			//Performance Hint: tweak this number during benchmarking
+			int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
+			kernel->m_scheduler->issueTask(t, endIndex, kernel);
+			t = endIndex;
+		}
+	}
+/*
+
+	void* bla = 0;
+
+	scheduler->issueTask(bla,2,3);
+	scheduler->flush();
+
+	*/
+
+	return 0;
+}
+
+#define LOCAL_BUF_SIZE 32768
+static int sLocalMemBuf[LOCAL_BUF_SIZE * 4 + 16];
+static int* spLocalBufCurr = NULL;
+static int sLocalBufUsed = LOCAL_BUF_SIZE; // so it will be reset at the first call
+static void* localBufMalloc(int size)
+{
+	int size16 = (size + 15) >> 4; // in 16-byte units
+	if((sLocalBufUsed + size16) > LOCAL_BUF_SIZE)
+	{ // reset
+		spLocalBufCurr = sLocalMemBuf;
+		while((unsigned long)spLocalBufCurr & 0x0F) spLocalBufCurr++; // align to 16 bytes
+		sLocalBufUsed = 0;
+	}
+	void* ret = spLocalBufCurr;
+	spLocalBufCurr += size16 * 4;
+	sLocalBufUsed += size;
+	return ret;
+}
+
+
+
+CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel    clKernel ,
+               cl_uint      arg_index ,
+               size_t       arg_size ,
+               const void *  arg_value ) CL_API_SUFFIX__VERSION_1_0
+{
+	MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
+	btAssert(arg_size <= MINICL_MAX_ARGLENGTH);
+	if (arg_index>MINI_CL_MAX_ARG)
+	{
+		printf("error: clSetKernelArg arg_index (%u) exceeds %u\n",arg_index,MINI_CL_MAX_ARG);
+	} else
+	{
+		if (arg_size>MINICL_MAX_ARGLENGTH)
+		//if (arg_size != MINICL_MAX_ARGLENGTH)
+		{
+			printf("error: clSetKernelArg argdata too large: %zu (maximum is %zu)\n",arg_size,MINICL_MAX_ARGLENGTH);
+		} 
+		else
+		{
+			if(arg_value == NULL)
+			{	// this is only for __local memory qualifier
+				void* ptr = localBufMalloc(arg_size);
+				kernel->m_argData[arg_index] = ptr;
+			}
+			else
+			{
+				memcpy(&(kernel->m_argData[arg_index]), arg_value, arg_size);
+			}
+			kernel->m_argSizes[arg_index] = arg_size;
+			if(arg_index >= kernel->m_numArgs)
+			{
+				kernel->m_numArgs = arg_index + 1;
+				kernel->updateLauncher();
+			}
+		}
+	}
+	return 0;
+}
+
+// Kernel Object APIs
+CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program       program ,
+               const char *     kernel_name ,
+               cl_int *         errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
+	MiniCLKernel* kernel = new MiniCLKernel();
+	int nameLen = strlen(kernel_name);
+	if(nameLen >= MINI_CL_MAX_KERNEL_NAME)
+	{
+		*errcode_ret = CL_INVALID_KERNEL_NAME;
+		return NULL;
+	}
+	strcpy(kernel->m_name, kernel_name);
+	kernel->m_numArgs = 0;
+
+	//kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
+	//if (kernel->m_kernelProgramCommandId>=0)
+	//{
+	//	*errcode_ret = CL_SUCCESS;
+	//} else
+	//{
+	//	*errcode_ret = CL_INVALID_KERNEL_NAME;
+	//}
+	kernel->m_scheduler = scheduler;
+	if(kernel->registerSelf() == NULL)
+	{
+		*errcode_ret = CL_INVALID_KERNEL_NAME;
+		return NULL;
+	}
+	else
+	{
+		*errcode_ret = CL_SUCCESS;
+	}
+
+	return (cl_kernel)kernel;
+
+}
+
+
+CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program           /* program */,
+               cl_uint              /* num_devices */,
+               const cl_device_id * /* device_list */,
+               const char *         /* options */, 
+               void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
+               void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0
+{
+	return CL_SUCCESS;
+}
+
+CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context                     context ,
+                          cl_uint                        /* num_devices */,
+                          const cl_device_id *           /* device_list */,
+                          const size_t *                 /* lengths */,
+                          const unsigned char **         /* binaries */,
+                          cl_int *                       /* binary_status */,
+                          cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+	return (cl_program)context;
+}
+
+
+// Memory Object APIs
+CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context   /* context */,
+               cl_mem_flags flags ,
+               size_t       size,
+               void *       host_ptr ,
+               cl_int *     errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	cl_mem buf = (cl_mem)malloc(size);
+	if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
+	{
+		memcpy(buf,host_ptr,size);
+	}
+	*errcode_ret = 0;
+	return buf;
+}
+
+// Command Queue APIs
+CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context                      context , 
+                     cl_device_id                   /* device */, 
+                     cl_command_queue_properties    /* properties */,
+                     cl_int *                        errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	*errcode_ret = 0;
+	return (cl_command_queue) context;
+}
+
+extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context         /* context */, 
+                 cl_context_info    param_name , 
+                 size_t             param_value_size , 
+                 void *             param_value, 
+                 size_t *           param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+
+	switch (param_name)
+	{
+	case CL_CONTEXT_DEVICES:
+		{
+			if (!param_value_size)
+			{
+				*param_value_size_ret = 13;
+			} else
+			{
+				const char* testName = "MiniCL_Test.";
+				sprintf((char*)param_value,"%s",testName);
+			}
+			break;
+		};
+	default:
+		{
+			printf("unsupported\n");
+		}
+	}
+	
+	return 0;
+}
+
+
+
+CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(const cl_context_properties * /* properties */,
+                        cl_device_type           device_type ,
+                        void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
+                        void *                  /* user_data */,
+                        cl_int *                 errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	int maxNumOutstandingTasks = 4;
+//	int maxNumOutstandingTasks = 2;
+//	int maxNumOutstandingTasks = 1;
+	gMiniCLNumOutstandingTasks = maxNumOutstandingTasks;
+	const int maxNumOfThreadSupports = 8;
+	static int sUniqueThreadSupportIndex = 0;
+	static const char* sUniqueThreadSupportName[maxNumOfThreadSupports] = 
+	{
+		"MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7" 
+	};
+
+	btThreadSupportInterface* threadSupport = 0;
+
+	if (device_type==CL_DEVICE_TYPE_DEBUG)
+	{
+		SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
+		threadSupport = new SequentialThreadSupport(stc);
+	} else
+	{
+
+#if _WIN32
+	btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports);
+	const char* bla = "MiniCL";
+	threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
+//								bla,
+								sUniqueThreadSupportName[sUniqueThreadSupportIndex++],
+								processMiniCLTask, //processCollisionTask,
+								createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
+								maxNumOutstandingTasks));
+#else
+
+#ifdef USE_PTHREADS
+		PosixThreadSupport::ThreadConstructionInfo constructionInfo("PosixThreads",
+																	processMiniCLTask,
+																	createMiniCLLocalStoreMemory,
+																	maxNumOutstandingTasks);
+		threadSupport = new PosixThreadSupport(constructionInfo);
+
+#else
+	///todo: add posix thread support for other platforms
+	SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
+	threadSupport = new SequentialThreadSupport(stc);
+#endif //USE_PTHREADS
+#endif
+
+	}
+	
+	
+	MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
+
+	*errcode_ret = 0;
+	return (cl_context)scheduler;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDs(cl_platform_id   /* platform */,
+               cl_device_type   /* device_type */, 
+               cl_uint          /* num_entries */, 
+               cl_device_id *   /* devices */, 
+               cl_uint *        /* num_devices */) CL_API_SUFFIX__VERSION_1_0
+{
+	return 0;
+}
+
+CL_API_ENTRY cl_context CL_API_CALL
+clCreateContext(const cl_context_properties *  properties ,
+                cl_uint                        num_devices ,
+                const cl_device_id *           devices ,
+                 void (*pfn_notify)(const char *, const void *, size_t, void *),
+                void *                         user_data ,
+                cl_int *                       errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	
+	return	clCreateContextFromType(properties,CL_DEVICE_TYPE_ALL,pfn_notify,user_data,errcode_ret);
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context  context ) CL_API_SUFFIX__VERSION_1_0
+{
+
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
+	
+	btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
+	delete scheduler;
+	delete threadSupport;
+	
+	return 0;
+}
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFinish(cl_command_queue command_queue ) CL_API_SUFFIX__VERSION_1_0
+{
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
+	///wait for all work items to be completed
+	scheduler->flush();
+	return CL_SUCCESS;
+}
+
+extern CL_API_ENTRY cl_int CL_API_CALL 
+clGetProgramInfo(cl_program         /* program */,
+                 cl_program_info    /* param_name */,
+                 size_t             /* param_value_size */,
+                 void *             /* param_value */,
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+   return 0;
+}
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelWorkGroupInfo(cl_kernel                   kernel ,
+                         cl_device_id               /* device */,
+                         cl_kernel_work_group_info  wgi/* param_name */,
+                         size_t   sz                  /* param_value_size */,
+                         void *     ptr                /* param_value */,
+                         size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+	if((wgi == CL_KERNEL_WORK_GROUP_SIZE)
+	 &&(sz == sizeof(size_t))
+	 &&(ptr != NULL))
+	{
+		MiniCLKernel* miniCLKernel = (MiniCLKernel*)kernel;
+		MiniCLTaskScheduler* scheduler = miniCLKernel->m_scheduler;
+		*((size_t*)ptr) = scheduler->getMaxNumOutstandingTasks();
+		return CL_SUCCESS;
+	}
+	else
+	{
+		return CL_INVALID_VALUE;
+	}
+}
diff --git a/src/bullet/MiniCL/MiniCLTask/MiniCLTask.cpp b/src/bullet/MiniCL/MiniCLTask/MiniCLTask.cpp
new file mode 100644
index 00000000..a56e96a0
--- /dev/null
+++ b/src/bullet/MiniCL/MiniCLTask/MiniCLTask.cpp
@@ -0,0 +1,74 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#include "MiniCLTask.h"
+#include "BulletMultiThreaded/PlatformDefinitions.h"
+#include "BulletMultiThreaded/SpuFakeDma.h"
+#include "LinearMath/btMinMax.h"
+#include "MiniCLTask.h"
+#include "MiniCL/MiniCLTaskScheduler.h"
+
+
+#ifdef __SPU__
+#include <spu_printf.h>
+#else
+#include <stdio.h>
+#define spu_printf printf
+#endif
+
+int gMiniCLNumOutstandingTasks = 0;
+
+struct MiniCLTask_LocalStoreMemory
+{
+	
+};
+
+
+//-- MAIN METHOD
+void processMiniCLTask(void* userPtr, void* lsMemory)
+{
+	//	BT_PROFILE("processSampleTask");
+
+	MiniCLTask_LocalStoreMemory* localMemory = (MiniCLTask_LocalStoreMemory*)lsMemory;
+
+	MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr;
+	MiniCLTaskDesc& taskDesc = *taskDescPtr;
+
+	for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
+	{
+		taskDesc.m_kernel->m_launcher(&taskDesc, i);
+	}
+
+//	printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
+	
+}
+
+
+#if defined(__CELLOS_LV2__) || defined (LIBSPE2)
+
+ATTRIBUTE_ALIGNED16(MiniCLTask_LocalStoreMemory	gLocalStoreMemory);
+
+void* createMiniCLLocalStoreMemory()
+{
+	return &gLocalStoreMemory;
+}
+#else
+void* createMiniCLLocalStoreMemory()
+{
+	return new MiniCLTask_LocalStoreMemory;
+};
+
+#endif
diff --git a/src/bullet/MiniCL/MiniCLTask/MiniCLTask.h b/src/bullet/MiniCL/MiniCLTask/MiniCLTask.h
new file mode 100644
index 00000000..7e78be08
--- /dev/null
+++ b/src/bullet/MiniCL/MiniCLTask/MiniCLTask.h
@@ -0,0 +1,62 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef MINICL__TASK_H
+#define MINICL__TASK_H
+
+#include "BulletMultiThreaded/PlatformDefinitions.h"
+#include "LinearMath/btScalar.h"
+
+#include "LinearMath/btAlignedAllocator.h"
+
+
+#define MINICL_MAX_ARGLENGTH (sizeof(void*))
+#define MINI_CL_MAX_ARG 16
+#define MINI_CL_MAX_KERNEL_NAME 256
+
+struct MiniCLKernel;
+
+ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
+{
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
+	MiniCLTaskDesc()
+	{
+		for (int i=0;i<MINI_CL_MAX_ARG;i++)
+		{
+			m_argSizes[i]=0;
+		}
+	}
+
+	uint32_t		m_taskId;
+
+	uint32_t		m_firstWorkUnit;
+	uint32_t		m_lastWorkUnit;
+
+	MiniCLKernel*	m_kernel;
+
+	void*			m_argData[MINI_CL_MAX_ARG];
+	int				m_argSizes[MINI_CL_MAX_ARG];
+};
+
+extern "C" int gMiniCLNumOutstandingTasks;
+
+
+void	processMiniCLTask(void* userPtr, void* lsMemory);
+void*	createMiniCLLocalStoreMemory();
+
+
+#endif //MINICL__TASK_H
+
diff --git a/src/bullet/MiniCL/MiniCLTaskScheduler.cpp b/src/bullet/MiniCL/MiniCLTaskScheduler.cpp
new file mode 100644
index 00000000..18cf6457
--- /dev/null
+++ b/src/bullet/MiniCL/MiniCLTaskScheduler.cpp
@@ -0,0 +1,519 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//#define __CELLOS_LV2__ 1
+#define __BT_SKIP_UINT64_H 1
+
+#define USE_SAMPLE_PROCESS 1
+#ifdef USE_SAMPLE_PROCESS
+
+
+#include "MiniCLTaskScheduler.h"
+#include <stdio.h>
+
+#ifdef __SPU__
+
+
+
+void	SampleThreadFunc(void* userPtr,void* lsMemory)
+{
+	//do nothing
+	printf("hello world\n");
+}
+
+
+void*	SamplelsMemoryFunc()
+{
+	//don't create local store memory, just return 0
+	return 0;
+}
+
+
+#else
+
+
+#include "BulletMultiThreaded/btThreadSupportInterface.h"
+
+//#	include "SPUAssert.h"
+#include <string.h>
+
+#include "MiniCL/cl_platform.h"
+
+extern "C" {
+	extern char SPU_SAMPLE_ELF_SYMBOL[];
+}
+
+
+MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface*	threadInterface,  int maxNumOutstandingTasks)
+:m_threadInterface(threadInterface),
+m_maxNumOutstandingTasks(maxNumOutstandingTasks)
+{
+
+	m_taskBusy.resize(m_maxNumOutstandingTasks);
+	m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
+
+	m_kernels.resize(0);
+
+	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+	{
+		m_taskBusy[i] = false;
+	}
+	m_numBusyTasks = 0;
+	m_currentTask = 0;
+
+	m_initialized = false;
+
+	m_threadInterface->startSPU();
+
+
+}
+
+MiniCLTaskScheduler::~MiniCLTaskScheduler()
+{
+	m_threadInterface->stopSPU();
+	
+}
+
+
+
+void	MiniCLTaskScheduler::initialize()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("MiniCLTaskScheduler::initialize()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+	
+	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+	{
+		m_taskBusy[i] = false;
+	}
+	m_numBusyTasks = 0;
+	m_currentTask = 0;
+	m_initialized = true;
+
+}
+
+
+void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel)
+{
+
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("MiniCLTaskScheduler::issueTask (m_currentTask= %d\)n", m_currentTask);
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+	m_taskBusy[m_currentTask] = true;
+	m_numBusyTasks++;
+
+	MiniCLTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask];
+	{
+		// send task description in event message
+		taskDesc.m_firstWorkUnit = firstWorkUnit;
+		taskDesc.m_lastWorkUnit = lastWorkUnit;
+		taskDesc.m_kernel = kernel;
+		//some bookkeeping to recognize finished tasks
+		taskDesc.m_taskId = m_currentTask;
+		
+//		for (int i=0;i<MINI_CL_MAX_ARG;i++)
+		for (unsigned int i=0; i < kernel->m_numArgs; i++)
+		{
+			taskDesc.m_argSizes[i] = kernel->m_argSizes[i];
+			if (taskDesc.m_argSizes[i])
+			{
+				taskDesc.m_argData[i] = kernel->m_argData[i];
+//				memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]);
+			}
+		}
+	}
+
+
+	m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask);
+
+	// if all tasks busy, wait for spu event to clear the task.
+	
+	if (m_numBusyTasks >= m_maxNumOutstandingTasks)
+	{
+		unsigned int taskId;
+		unsigned int outputSize;
+
+		for (int i=0;i<m_maxNumOutstandingTasks;i++)
+	  {
+		  if (m_taskBusy[i])
+		  {
+			  taskId = i;
+			  break;
+		  }
+	  }
+		m_threadInterface->waitForResponse(&taskId, &outputSize);
+
+		//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
+
+		postProcess(taskId, outputSize);
+
+		m_taskBusy[taskId] = false;
+
+		m_numBusyTasks--;
+	}
+
+	// find new task buffer
+	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+	{
+		if (!m_taskBusy[i])
+		{
+			m_currentTask = i;
+			break;
+		}
+	}
+}
+
+
+///Optional PPU-size post processing for each task
+void MiniCLTaskScheduler::postProcess(int taskId, int outputSize)
+{
+
+}
+
+
+void MiniCLTaskScheduler::flush()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("\nSpuCollisionTaskProcess::flush()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+	
+
+	// all tasks are issued, wait for all tasks to be complete
+	while(m_numBusyTasks > 0)
+	{
+// Consolidating SPU code
+	  unsigned int taskId;
+	  unsigned int outputSize;
+	  
+	  for (int i=0;i<m_maxNumOutstandingTasks;i++)
+	  {
+		  if (m_taskBusy[i])
+		  {
+			  taskId = i;
+			  break;
+		  }
+	  }
+	  {
+			
+		  m_threadInterface->waitForResponse(&taskId, &outputSize);
+	  }
+
+		//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
+
+		postProcess(taskId, outputSize);
+
+		m_taskBusy[taskId] = false;
+
+		m_numBusyTasks--;
+	}
+
+
+}
+
+
+
+typedef void (*MiniCLKernelLauncher0)(int);
+typedef void (*MiniCLKernelLauncher1)(void*, int);
+typedef void (*MiniCLKernelLauncher2)(void*, void*, int);
+typedef void (*MiniCLKernelLauncher3)(void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher4)(void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher5)(void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher6)(void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher7)(void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher8)(void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher9)(void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher10)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher11)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher12)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher13)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher14)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher15)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher16)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+
+
+static void kernelLauncher0(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher0)(taskDesc->m_kernel->m_launcher))(guid);
+}
+static void kernelLauncher1(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher1)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												guid);
+}
+static void kernelLauncher2(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher2)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												guid);
+}
+static void kernelLauncher3(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher3)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												guid);
+}
+static void kernelLauncher4(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher4)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												guid);
+}
+static void kernelLauncher5(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher5)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												guid);
+}
+static void kernelLauncher6(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher6)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												guid);
+}
+static void kernelLauncher7(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher7)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												guid);
+}
+static void kernelLauncher8(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher8)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												guid);
+}
+static void kernelLauncher9(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher9)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												guid);
+}
+static void kernelLauncher10(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher10)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												guid);
+}
+static void kernelLauncher11(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher11)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												guid);
+}
+static void kernelLauncher12(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher12)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												taskDesc->m_argData[11], 
+												guid);
+}
+static void kernelLauncher13(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher13)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												taskDesc->m_argData[11], 
+												taskDesc->m_argData[12], 
+												guid);
+}
+static void kernelLauncher14(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher14)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												taskDesc->m_argData[11], 
+												taskDesc->m_argData[12], 
+												taskDesc->m_argData[13], 
+												guid);
+}
+static void kernelLauncher15(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher15)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												taskDesc->m_argData[11], 
+												taskDesc->m_argData[12], 
+												taskDesc->m_argData[13], 
+												taskDesc->m_argData[14], 
+												guid);
+}
+static void kernelLauncher16(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher16)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												taskDesc->m_argData[11], 
+												taskDesc->m_argData[12], 
+												taskDesc->m_argData[13], 
+												taskDesc->m_argData[14], 
+												taskDesc->m_argData[15], 
+												guid);
+}
+
+static kernelLauncherCB spLauncherList[MINI_CL_MAX_ARG+1] = 
+{
+	kernelLauncher0,
+	kernelLauncher1,
+	kernelLauncher2,
+	kernelLauncher3,
+	kernelLauncher4,
+	kernelLauncher5,
+	kernelLauncher6,
+	kernelLauncher7,
+	kernelLauncher8,
+	kernelLauncher9,
+	kernelLauncher10,
+	kernelLauncher11,
+	kernelLauncher12,
+	kernelLauncher13,
+	kernelLauncher14,
+	kernelLauncher15,
+	kernelLauncher16
+};
+
+void MiniCLKernel::updateLauncher()
+{
+	m_launcher = spLauncherList[m_numArgs];
+}
+
+struct MiniCLKernelDescEntry
+{
+	void* pCode;
+	const char* pName;
+};
+static MiniCLKernelDescEntry spKernelDesc[256];
+static int sNumKernelDesc = 0;
+
+MiniCLKernelDesc::MiniCLKernelDesc(void* pCode, const char* pName)
+{
+	for(int i = 0; i < sNumKernelDesc; i++)
+	{
+		if(!strcmp(pName, spKernelDesc[i].pName))
+		{	// already registered
+			btAssert(spKernelDesc[i].pCode == pCode);
+			return; 
+		}
+	}
+	spKernelDesc[sNumKernelDesc].pCode = pCode;
+	spKernelDesc[sNumKernelDesc].pName = pName;
+	sNumKernelDesc++;
+}
+
+
+MiniCLKernel* MiniCLKernel::registerSelf()
+{
+	m_scheduler->registerKernel(this);
+	for(int i = 0; i < sNumKernelDesc; i++)
+	{
+		if(!strcmp(m_name, spKernelDesc[i].pName))
+		{
+			m_pCode = spKernelDesc[i].pCode;
+			return this;
+		}
+	}
+	return NULL;
+}
+
+#endif
+
+
+#endif //USE_SAMPLE_PROCESS
diff --git a/src/bullet/MiniCL/MiniCLTaskScheduler.h b/src/bullet/MiniCL/MiniCLTaskScheduler.h
new file mode 100644
index 00000000..3061a713
--- /dev/null
+++ b/src/bullet/MiniCL/MiniCLTaskScheduler.h
@@ -0,0 +1,194 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef MINICL_TASK_SCHEDULER_H
+#define MINICL_TASK_SCHEDULER_H
+
+#include <assert.h>
+
+
+#include "BulletMultiThreaded/PlatformDefinitions.h"
+
+#include <stdlib.h>
+
+#include "LinearMath/btAlignedObjectArray.h"
+
+
+#include "MiniCLTask/MiniCLTask.h"
+
+//just add your commands here, try to keep them globally unique for debugging purposes
+#define CMD_SAMPLE_TASK_COMMAND 10
+
+struct MiniCLKernel;
+
+/// MiniCLTaskScheduler handles SPU processing of collision pairs.
+/// When PPU issues a task, it will look for completed task buffers
+/// PPU will do postprocessing, dependent on workunit output (not likely)
+class MiniCLTaskScheduler
+{
+	// track task buffers that are being used, and total busy tasks
+	btAlignedObjectArray<bool>	m_taskBusy;
+	btAlignedObjectArray<MiniCLTaskDesc>	m_spuSampleTaskDesc;
+
+
+	btAlignedObjectArray<const MiniCLKernel*>	m_kernels;
+
+
+	int   m_numBusyTasks;
+
+	// the current task and the current entry to insert a new work unit
+	int   m_currentTask;
+
+	bool m_initialized;
+
+	void postProcess(int taskId, int outputSize);
+	
+	class	btThreadSupportInterface*	m_threadInterface;
+
+	int	m_maxNumOutstandingTasks;
+
+
+
+public:
+	MiniCLTaskScheduler(btThreadSupportInterface*	threadInterface, int maxNumOutstandingTasks);
+	
+	~MiniCLTaskScheduler();
+	
+	///call initialize in the beginning of the frame, before addCollisionPairToTask
+	void initialize();
+
+	void issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel);
+
+	///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
+	void flush();
+
+	class	btThreadSupportInterface*	getThreadSupportInterface()
+	{
+		return m_threadInterface;
+	}
+
+	int	findProgramCommandIdByName(const char* programName) const;
+
+	int getMaxNumOutstandingTasks() const
+	{
+		return m_maxNumOutstandingTasks;
+	}
+
+	void registerKernel(MiniCLKernel* kernel)
+	{
+		m_kernels.push_back(kernel);
+	}
+};
+
+typedef void (*kernelLauncherCB)(MiniCLTaskDesc* taskDesc, int guid);
+
+struct	MiniCLKernel
+{
+	MiniCLTaskScheduler* m_scheduler;
+	
+//	int	m_kernelProgramCommandId;
+
+	char	m_name[MINI_CL_MAX_KERNEL_NAME];
+	unsigned int	m_numArgs;
+	kernelLauncherCB	m_launcher;
+	void* m_pCode;
+	void updateLauncher();
+	MiniCLKernel* registerSelf();
+
+	void*	m_argData[MINI_CL_MAX_ARG];
+	int				m_argSizes[MINI_CL_MAX_ARG];
+};
+
+
+#if defined(USE_LIBSPE2) && defined(__SPU__)
+////////////////////MAIN/////////////////////////////
+#include "../SpuLibspe2Support.h"
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+#include <SpuFakeDma.h>
+
+void * SamplelsMemoryFunc();
+void SampleThreadFunc(void* userPtr,void* lsMemory);
+
+//#define DEBUG_LIBSPE2_MAINLOOP
+
+int main(unsigned long long speid, addr64 argp, addr64 envp)
+{
+	printf("SPU is up \n");
+	
+	ATTRIBUTE_ALIGNED128(btSpuStatus status);
+	ATTRIBUTE_ALIGNED16( SpuSampleTaskDesc taskDesc ) ;
+	unsigned int received_message = Spu_Mailbox_Event_Nothing;
+        bool shutdown = false;
+
+	cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+	cellDmaWaitTagStatusAll(DMA_MASK(3));
+
+	status.m_status = Spu_Status_Free;
+	status.m_lsMemory.p = SamplelsMemoryFunc();
+
+	cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+	cellDmaWaitTagStatusAll(DMA_MASK(3));
+	
+	
+	while (!shutdown)
+	{
+		received_message = spu_read_in_mbox();
+		
+
+		
+		switch(received_message)
+		{
+		case Spu_Mailbox_Event_Shutdown:
+			shutdown = true;
+			break; 
+		case Spu_Mailbox_Event_Task:
+			// refresh the status
+#ifdef DEBUG_LIBSPE2_MAINLOOP
+			printf("SPU recieved Task \n");
+#endif //DEBUG_LIBSPE2_MAINLOOP
+			cellDmaGet(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+			cellDmaWaitTagStatusAll(DMA_MASK(3));
+		
+			btAssert(status.m_status==Spu_Status_Occupied);
+			
+			cellDmaGet(&taskDesc, status.m_taskDesc.p, sizeof(SpuSampleTaskDesc), DMA_TAG(3), 0, 0);
+			cellDmaWaitTagStatusAll(DMA_MASK(3));
+			
+			SampleThreadFunc((void*)&taskDesc, reinterpret_cast<void*> (taskDesc.m_mainMemoryPtr) );
+			break;
+		case Spu_Mailbox_Event_Nothing:
+		default:
+			break;
+		}
+
+		// set to status free and wait for next task
+		status.m_status = Spu_Status_Free;
+		cellDmaLargePut(&status, argp.ull, sizeof(btSpuStatus), DMA_TAG(3), 0, 0);
+		cellDmaWaitTagStatusAll(DMA_MASK(3));		
+				
+		
+  	}
+  	return 0;
+}
+//////////////////////////////////////////////////////
+#endif
+
+
+
+#endif // MINICL_TASK_SCHEDULER_H
+
diff --git a/src/bullet/MiniCL/cl.h b/src/bullet/MiniCL/cl.h
new file mode 100644
index 00000000..35282988
--- /dev/null
+++ b/src/bullet/MiniCL/cl.h
@@ -0,0 +1,867 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2009 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+#ifndef __OPENCL_CL_H
+#define __OPENCL_CL_H
+
+#ifdef __APPLE__
+#include <MiniCL/cl_platform.h>
+#else
+#include <MiniCL/cl_platform.h>
+#endif	
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************/
+
+typedef struct _cl_platform_id *    cl_platform_id;
+typedef struct _cl_device_id *      cl_device_id;
+typedef struct _cl_context *        cl_context;
+typedef struct _cl_command_queue *  cl_command_queue;
+typedef struct _cl_mem *            cl_mem;
+typedef struct _cl_program *        cl_program;
+typedef struct _cl_kernel *         cl_kernel;
+typedef struct _cl_event *          cl_event;
+typedef struct _cl_sampler *        cl_sampler;
+
+typedef cl_uint             cl_bool;                     /* WARNING!  Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ 
+typedef cl_ulong            cl_bitfield;
+typedef cl_bitfield         cl_device_type;
+typedef cl_uint             cl_platform_info;
+typedef cl_uint             cl_device_info;
+typedef cl_bitfield         cl_device_address_info;
+typedef cl_bitfield         cl_device_fp_config;
+typedef cl_uint             cl_device_mem_cache_type;
+typedef cl_uint             cl_device_local_mem_type;
+typedef cl_bitfield         cl_device_exec_capabilities;
+typedef cl_bitfield         cl_command_queue_properties;
+
+typedef intptr_t			cl_context_properties;
+typedef cl_uint             cl_context_info;
+typedef cl_uint             cl_command_queue_info;
+typedef cl_uint             cl_channel_order;
+typedef cl_uint             cl_channel_type;
+typedef cl_bitfield         cl_mem_flags;
+typedef cl_uint             cl_mem_object_type;
+typedef cl_uint             cl_mem_info;
+typedef cl_uint             cl_image_info;
+typedef cl_uint             cl_addressing_mode;
+typedef cl_uint             cl_filter_mode;
+typedef cl_uint             cl_sampler_info;
+typedef cl_bitfield         cl_map_flags;
+typedef cl_uint             cl_program_info;
+typedef cl_uint             cl_program_build_info;
+typedef cl_int              cl_build_status;
+typedef cl_uint             cl_kernel_info;
+typedef cl_uint             cl_kernel_work_group_info;
+typedef cl_uint             cl_event_info;
+typedef cl_uint             cl_command_type;
+typedef cl_uint             cl_profiling_info;
+
+typedef struct _cl_image_format {
+    cl_channel_order        image_channel_order;
+    cl_channel_type         image_channel_data_type;
+} cl_image_format;
+
+/******************************************************************************/
+
+// Error Codes
+#define CL_SUCCESS                                  0
+#define CL_DEVICE_NOT_FOUND                         -1
+#define CL_DEVICE_NOT_AVAILABLE                     -2
+#define CL_DEVICE_COMPILER_NOT_AVAILABLE            -3
+#define CL_MEM_OBJECT_ALLOCATION_FAILURE            -4
+#define CL_OUT_OF_RESOURCES                         -5
+#define CL_OUT_OF_HOST_MEMORY                       -6
+#define CL_PROFILING_INFO_NOT_AVAILABLE             -7
+#define CL_MEM_COPY_OVERLAP                         -8
+#define CL_IMAGE_FORMAT_MISMATCH                    -9
+#define CL_IMAGE_FORMAT_NOT_SUPPORTED               -10
+#define CL_BUILD_PROGRAM_FAILURE                    -11
+#define CL_MAP_FAILURE                              -12
+
+#define CL_INVALID_VALUE                            -30
+#define CL_INVALID_DEVICE_TYPE                      -31
+#define CL_INVALID_PLATFORM                         -32
+#define CL_INVALID_DEVICE                           -33
+#define CL_INVALID_CONTEXT                          -34
+#define CL_INVALID_QUEUE_PROPERTIES                 -35
+#define CL_INVALID_COMMAND_QUEUE                    -36
+#define CL_INVALID_HOST_PTR                         -37
+#define CL_INVALID_MEM_OBJECT                       -38
+#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR          -39
+#define CL_INVALID_IMAGE_SIZE                       -40
+#define CL_INVALID_SAMPLER                          -41
+#define CL_INVALID_BINARY                           -42
+#define CL_INVALID_BUILD_OPTIONS                    -43
+#define CL_INVALID_PROGRAM                          -44
+#define CL_INVALID_PROGRAM_EXECUTABLE               -45
+#define CL_INVALID_KERNEL_NAME                      -46
+#define CL_INVALID_KERNEL_DEFINITION                -47
+#define CL_INVALID_KERNEL                           -48
+#define CL_INVALID_ARG_INDEX                        -49
+#define CL_INVALID_ARG_VALUE                        -50
+#define CL_INVALID_ARG_SIZE                         -51
+#define CL_INVALID_KERNEL_ARGS                      -52
+#define CL_INVALID_WORK_DIMENSION                   -53
+#define CL_INVALID_WORK_GROUP_SIZE                  -54
+#define CL_INVALID_WORK_ITEM_SIZE                   -55
+#define CL_INVALID_GLOBAL_OFFSET                    -56
+#define CL_INVALID_EVENT_WAIT_LIST                  -57
+#define CL_INVALID_EVENT                            -58
+#define CL_INVALID_OPERATION                        -59
+#define CL_INVALID_GL_OBJECT                        -60
+#define CL_INVALID_BUFFER_SIZE                      -61
+#define CL_INVALID_MIP_LEVEL                        -62
+
+// OpenCL Version
+#define CL_VERSION_1_0                              1
+
+// cl_bool
+#define CL_FALSE                                    0
+#define CL_TRUE                                     1
+
+// cl_platform_info
+#define CL_PLATFORM_PROFILE                         0x0900
+#define CL_PLATFORM_VERSION                         0x0901
+#define CL_PLATFORM_NAME                            0x0902
+#define CL_PLATFORM_VENDOR                          0x0903
+#define CL_PLATFORM_EXTENSIONS                      0x0904
+
+// cl_device_type - bitfield
+#define CL_DEVICE_TYPE_DEFAULT                      (1 << 0)
+#define CL_DEVICE_TYPE_CPU                          (1 << 1)
+#define CL_DEVICE_TYPE_GPU                          (1 << 2)
+#define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
+#define CL_DEVICE_TYPE_DEBUG						(1 << 4)
+#define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
+
+
+// cl_device_info
+#define CL_DEVICE_TYPE                              0x1000
+#define CL_DEVICE_VENDOR_ID                         0x1001
+#define CL_DEVICE_MAX_COMPUTE_UNITS                 0x1002
+#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS          0x1003
+#define CL_DEVICE_MAX_WORK_GROUP_SIZE               0x1004
+#define CL_DEVICE_MAX_WORK_ITEM_SIZES               0x1005
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR       0x1006
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT      0x1007
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT        0x1008
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG       0x1009
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT      0x100A
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE     0x100B
+#define CL_DEVICE_MAX_CLOCK_FREQUENCY               0x100C
+#define CL_DEVICE_ADDRESS_BITS                      0x100D
+#define CL_DEVICE_MAX_READ_IMAGE_ARGS               0x100E
+#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS              0x100F
+#define CL_DEVICE_MAX_MEM_ALLOC_SIZE                0x1010
+#define CL_DEVICE_IMAGE2D_MAX_WIDTH                 0x1011
+#define CL_DEVICE_IMAGE2D_MAX_HEIGHT                0x1012
+#define CL_DEVICE_IMAGE3D_MAX_WIDTH                 0x1013
+#define CL_DEVICE_IMAGE3D_MAX_HEIGHT                0x1014
+#define CL_DEVICE_IMAGE3D_MAX_DEPTH                 0x1015
+#define CL_DEVICE_IMAGE_SUPPORT                     0x1016
+#define CL_DEVICE_MAX_PARAMETER_SIZE                0x1017
+#define CL_DEVICE_MAX_SAMPLERS                      0x1018
+#define CL_DEVICE_MEM_BASE_ADDR_ALIGN               0x1019
+#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE          0x101A
+#define CL_DEVICE_SINGLE_FP_CONFIG                  0x101B
+#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE             0x101C
+#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE         0x101D
+#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE             0x101E
+#define CL_DEVICE_GLOBAL_MEM_SIZE                   0x101F
+#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE          0x1020
+#define CL_DEVICE_MAX_CONSTANT_ARGS                 0x1021
+#define CL_DEVICE_LOCAL_MEM_TYPE                    0x1022
+#define CL_DEVICE_LOCAL_MEM_SIZE                    0x1023
+#define CL_DEVICE_ERROR_CORRECTION_SUPPORT          0x1024
+#define CL_DEVICE_PROFILING_TIMER_RESOLUTION        0x1025
+#define CL_DEVICE_ENDIAN_LITTLE                     0x1026
+#define CL_DEVICE_AVAILABLE                         0x1027
+#define CL_DEVICE_COMPILER_AVAILABLE                0x1028
+#define CL_DEVICE_EXECUTION_CAPABILITIES            0x1029
+#define CL_DEVICE_QUEUE_PROPERTIES                  0x102A
+#define CL_DEVICE_NAME                              0x102B
+#define CL_DEVICE_VENDOR                            0x102C
+#define CL_DRIVER_VERSION                           0x102D
+#define CL_DEVICE_PROFILE                           0x102E
+#define CL_DEVICE_VERSION                           0x102F
+#define CL_DEVICE_EXTENSIONS                        0x1030
+#define CL_DEVICE_PLATFORM                          0x1031
+	
+// cl_device_address_info - bitfield
+#define CL_DEVICE_ADDRESS_32_BITS                   (1 << 0)
+#define CL_DEVICE_ADDRESS_64_BITS                   (1 << 1)
+
+// cl_device_fp_config - bitfield
+#define CL_FP_DENORM                                (1 << 0)
+#define CL_FP_INF_NAN                               (1 << 1)
+#define CL_FP_ROUND_TO_NEAREST                      (1 << 2)
+#define CL_FP_ROUND_TO_ZERO                         (1 << 3)
+#define CL_FP_ROUND_TO_INF                          (1 << 4)
+#define CL_FP_FMA                                   (1 << 5)
+
+// cl_device_mem_cache_type
+#define CL_NONE                                     0x0
+#define CL_READ_ONLY_CACHE                          0x1
+#define CL_READ_WRITE_CACHE                         0x2
+
+// cl_device_local_mem_type
+#define CL_LOCAL                                    0x1
+#define CL_GLOBAL                                   0x2
+
+// cl_device_exec_capabilities - bitfield
+#define CL_EXEC_KERNEL                              (1 << 0)
+#define CL_EXEC_NATIVE_KERNEL                       (1 << 1)
+
+// cl_command_queue_properties - bitfield
+#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE      (1 << 0)
+#define CL_QUEUE_PROFILING_ENABLE                   (1 << 1)
+
+// cl_context_info
+#define CL_CONTEXT_REFERENCE_COUNT                  0x1080
+#define CL_CONTEXT_NUM_DEVICES                      0x1081
+#define CL_CONTEXT_DEVICES                          0x1082
+#define CL_CONTEXT_PROPERTIES                       0x1083
+#define CL_CONTEXT_PLATFORM                         0x1084
+
+// cl_command_queue_info
+#define CL_QUEUE_CONTEXT                            0x1090
+#define CL_QUEUE_DEVICE                             0x1091
+#define CL_QUEUE_REFERENCE_COUNT                    0x1092
+#define CL_QUEUE_PROPERTIES                         0x1093
+
+// cl_mem_flags - bitfield
+#define CL_MEM_READ_WRITE                           (1 << 0)
+#define CL_MEM_WRITE_ONLY                           (1 << 1)
+#define CL_MEM_READ_ONLY                            (1 << 2)
+#define CL_MEM_USE_HOST_PTR                         (1 << 3)
+#define CL_MEM_ALLOC_HOST_PTR                       (1 << 4)
+#define CL_MEM_COPY_HOST_PTR                        (1 << 5)
+
+// cl_channel_order
+#define CL_R                                        0x10B0
+#define CL_A                                        0x10B1
+#define CL_RG                                       0x10B2
+#define CL_RA                                       0x10B3
+#define CL_RGB                                      0x10B4
+#define CL_RGBA                                     0x10B5
+#define CL_BGRA                                     0x10B6
+#define CL_ARGB                                     0x10B7
+#define CL_INTENSITY                                0x10B8
+#define CL_LUMINANCE                                0x10B9
+
+// cl_channel_type
+#define CL_SNORM_INT8                               0x10D0
+#define CL_SNORM_INT16                              0x10D1
+#define CL_UNORM_INT8                               0x10D2
+#define CL_UNORM_INT16                              0x10D3
+#define CL_UNORM_SHORT_565                          0x10D4
+#define CL_UNORM_SHORT_555                          0x10D5
+#define CL_UNORM_INT_101010                         0x10D6
+#define CL_SIGNED_INT8                              0x10D7
+#define CL_SIGNED_INT16                             0x10D8
+#define CL_SIGNED_INT32                             0x10D9
+#define CL_UNSIGNED_INT8                            0x10DA
+#define CL_UNSIGNED_INT16                           0x10DB
+#define CL_UNSIGNED_INT32                           0x10DC
+#define CL_HALF_FLOAT                               0x10DD
+#define CL_FLOAT                                    0x10DE
+
+// cl_mem_object_type
+#define CL_MEM_OBJECT_BUFFER                        0x10F0
+#define CL_MEM_OBJECT_IMAGE2D                       0x10F1
+#define CL_MEM_OBJECT_IMAGE3D                       0x10F2
+
+// cl_mem_info
+#define CL_MEM_TYPE                                 0x1100
+#define CL_MEM_FLAGS                                0x1101
+#define CL_MEM_SIZE                                 0x1102
+#define CL_MEM_HOST_PTR                             0x1103
+#define CL_MEM_MAP_COUNT                            0x1104
+#define CL_MEM_REFERENCE_COUNT                      0x1105
+#define CL_MEM_CONTEXT                              0x1106
+
+// cl_image_info
+#define CL_IMAGE_FORMAT                             0x1110
+#define CL_IMAGE_ELEMENT_SIZE                       0x1111
+#define CL_IMAGE_ROW_PITCH                          0x1112
+#define CL_IMAGE_SLICE_PITCH                        0x1113
+#define CL_IMAGE_WIDTH                              0x1114
+#define CL_IMAGE_HEIGHT                             0x1115
+#define CL_IMAGE_DEPTH                              0x1116
+
+// cl_addressing_mode
+#define CL_ADDRESS_NONE                             0x1130
+#define CL_ADDRESS_CLAMP_TO_EDGE                    0x1131
+#define CL_ADDRESS_CLAMP                            0x1132
+#define CL_ADDRESS_REPEAT                           0x1133
+
+// cl_filter_mode
+#define CL_FILTER_NEAREST                           0x1140
+#define CL_FILTER_LINEAR                            0x1141
+
+// cl_sampler_info
+#define CL_SAMPLER_REFERENCE_COUNT                  0x1150
+#define CL_SAMPLER_CONTEXT                          0x1151
+#define CL_SAMPLER_NORMALIZED_COORDS                0x1152
+#define CL_SAMPLER_ADDRESSING_MODE                  0x1153
+#define CL_SAMPLER_FILTER_MODE                      0x1154
+
+// cl_map_flags - bitfield
+#define CL_MAP_READ                                 (1 << 0)
+#define CL_MAP_WRITE                                (1 << 1)
+
+// cl_program_info
+#define CL_PROGRAM_REFERENCE_COUNT                  0x1160
+#define CL_PROGRAM_CONTEXT                          0x1161
+#define CL_PROGRAM_NUM_DEVICES                      0x1162
+#define CL_PROGRAM_DEVICES                          0x1163
+#define CL_PROGRAM_SOURCE                           0x1164
+#define CL_PROGRAM_BINARY_SIZES                     0x1165
+#define CL_PROGRAM_BINARIES                         0x1166
+
+// cl_program_build_info
+#define CL_PROGRAM_BUILD_STATUS                     0x1181
+#define CL_PROGRAM_BUILD_OPTIONS                    0x1182
+#define CL_PROGRAM_BUILD_LOG                        0x1183
+
+// cl_build_status
+#define CL_BUILD_SUCCESS                            0
+#define CL_BUILD_NONE                               -1
+#define CL_BUILD_ERROR                              -2
+#define CL_BUILD_IN_PROGRESS                        -3
+
+// cl_kernel_info
+#define CL_KERNEL_FUNCTION_NAME                     0x1190
+#define CL_KERNEL_NUM_ARGS                          0x1191
+#define CL_KERNEL_REFERENCE_COUNT                   0x1192
+#define CL_KERNEL_CONTEXT                           0x1193
+#define CL_KERNEL_PROGRAM                           0x1194
+
+// cl_kernel_work_group_info
+#define CL_KERNEL_WORK_GROUP_SIZE                   0x11B0
+#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE           0x11B1
+#define CL_KERNEL_LOCAL_MEM_SIZE                    0x11B2
+
+// cl_event_info
+#define CL_EVENT_COMMAND_QUEUE                      0x11D0
+#define CL_EVENT_COMMAND_TYPE                       0x11D1
+#define CL_EVENT_REFERENCE_COUNT                    0x11D2
+#define CL_EVENT_COMMAND_EXECUTION_STATUS           0x11D3
+
+// cl_command_type
+#define CL_COMMAND_NDRANGE_KERNEL                   0x11F0
+#define CL_COMMAND_TASK                             0x11F1
+#define CL_COMMAND_NATIVE_KERNEL                    0x11F2
+#define CL_COMMAND_READ_BUFFER                      0x11F3
+#define CL_COMMAND_WRITE_BUFFER                     0x11F4
+#define CL_COMMAND_COPY_BUFFER                      0x11F5
+#define CL_COMMAND_READ_IMAGE                       0x11F6
+#define CL_COMMAND_WRITE_IMAGE                      0x11F7
+#define CL_COMMAND_COPY_IMAGE                       0x11F8
+#define CL_COMMAND_COPY_IMAGE_TO_BUFFER             0x11F9
+#define CL_COMMAND_COPY_BUFFER_TO_IMAGE             0x11FA
+#define CL_COMMAND_MAP_BUFFER                       0x11FB
+#define CL_COMMAND_MAP_IMAGE                        0x11FC
+#define CL_COMMAND_UNMAP_MEM_OBJECT                 0x11FD
+#define CL_COMMAND_MARKER                           0x11FE
+#define CL_COMMAND_WAIT_FOR_EVENTS                  0x11FF
+#define CL_COMMAND_BARRIER                          0x1200
+#define CL_COMMAND_ACQUIRE_GL_OBJECTS               0x1201
+#define CL_COMMAND_RELEASE_GL_OBJECTS               0x1202
+
+// command execution status
+#define CL_COMPLETE                                 0x0
+#define CL_RUNNING                                  0x1
+#define CL_SUBMITTED                                0x2
+#define CL_QUEUED                                   0x3
+  
+// cl_profiling_info
+#define CL_PROFILING_COMMAND_QUEUED                 0x1280
+#define CL_PROFILING_COMMAND_SUBMIT                 0x1281
+#define CL_PROFILING_COMMAND_START                  0x1282
+#define CL_PROFILING_COMMAND_END                    0x1283
+
+/********************************************************************************************************/
+
+// Platform API
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetPlatformIDs(cl_uint          /* num_entries */,
+                 cl_platform_id * /* platforms */,
+                 cl_uint *        /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL 
+clGetPlatformInfo(cl_platform_id   /* platform */, 
+                  cl_platform_info /* param_name */,
+                  size_t           /* param_value_size */, 
+                  void *           /* param_value */,
+                  size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+// Device APIs
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDs(cl_platform_id   /* platform */,
+               cl_device_type   /* device_type */, 
+               cl_uint          /* num_entries */, 
+               cl_device_id *   /* devices */, 
+               cl_uint *        /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceInfo(cl_device_id    /* device */,
+                cl_device_info  /* param_name */, 
+                size_t          /* param_value_size */, 
+                void *          /* param_value */,
+                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+// Context APIs  
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContext(const cl_context_properties * /* properties */,
+                cl_uint                 /* num_devices */,
+                const cl_device_id *    /* devices */,
+                void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
+                void *                  /* user_data */,
+                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_context CL_API_CALL
+clCreateContextFromType(const cl_context_properties * /* properties */,
+                        cl_device_type          /* device_type */,
+                        void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
+                        void *                  /* user_data */,
+                        cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetContextInfo(cl_context         /* context */, 
+                 cl_context_info    /* param_name */, 
+                 size_t             /* param_value_size */, 
+                 void *             /* param_value */, 
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+// Command Queue APIs
+extern CL_API_ENTRY cl_command_queue CL_API_CALL
+clCreateCommandQueue(cl_context                     /* context */, 
+                     cl_device_id                   /* device */, 
+                     cl_command_queue_properties    /* properties */,
+                     cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetCommandQueueInfo(cl_command_queue      /* command_queue */,
+                      cl_command_queue_info /* param_name */,
+                      size_t                /* param_value_size */,
+                      void *                /* param_value */,
+                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetCommandQueueProperty(cl_command_queue              /* command_queue */,
+                          cl_command_queue_properties   /* properties */, 
+                          cl_bool                        /* enable */,
+                          cl_command_queue_properties * /* old_properties */) CL_API_SUFFIX__VERSION_1_0;
+
+// Memory Object APIs
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateBuffer(cl_context   /* context */,
+               cl_mem_flags /* flags */,
+               size_t       /* size */,
+               void *       /* host_ptr */,
+               cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateImage2D(cl_context              /* context */,
+                cl_mem_flags            /* flags */,
+                const cl_image_format * /* image_format */,
+                size_t                  /* image_width */,
+                size_t                  /* image_height */,
+                size_t                  /* image_row_pitch */, 
+                void *                  /* host_ptr */,
+                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+                        
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateImage3D(cl_context              /* context */,
+                cl_mem_flags            /* flags */,
+                const cl_image_format * /* image_format */,
+                size_t                  /* image_width */, 
+                size_t                  /* image_height */,
+                size_t                  /* image_depth */, 
+                size_t                  /* image_row_pitch */, 
+                size_t                  /* image_slice_pitch */, 
+                void *                  /* host_ptr */,
+                cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+                        
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSupportedImageFormats(cl_context           /* context */,
+                           cl_mem_flags         /* flags */,
+                           cl_mem_object_type   /* image_type */,
+                           cl_uint              /* num_entries */,
+                           cl_image_format *    /* image_formats */,
+                           cl_uint *            /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
+                                    
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetMemObjectInfo(cl_mem           /* memobj */,
+                   cl_mem_info      /* param_name */, 
+                   size_t           /* param_value_size */,
+                   void *           /* param_value */,
+                   size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetImageInfo(cl_mem           /* image */,
+               cl_image_info    /* param_name */, 
+               size_t           /* param_value_size */,
+               void *           /* param_value */,
+               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+// Sampler APIs
+extern CL_API_ENTRY cl_sampler CL_API_CALL
+clCreateSampler(cl_context          /* context */,
+                cl_bool             /* normalized_coords */, 
+                cl_addressing_mode  /* addressing_mode */, 
+                cl_filter_mode      /* filter_mode */,
+                cl_int *            /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetSamplerInfo(cl_sampler         /* sampler */,
+                 cl_sampler_info    /* param_name */,
+                 size_t             /* param_value_size */,
+                 void *             /* param_value */,
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+                            
+// Program Object APIs
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithSource(cl_context        /* context */,
+                          cl_uint           /* count */,
+                          const char **     /* strings */,
+                          const size_t *    /* lengths */,
+                          cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithBinary(cl_context                     /* context */,
+                          cl_uint                        /* num_devices */,
+                          const cl_device_id *           /* device_list */,
+                          const size_t *                 /* lengths */,
+                          const unsigned char **         /* binaries */,
+                          cl_int *                       /* binary_status */,
+                          cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clBuildProgram(cl_program           /* program */,
+               cl_uint              /* num_devices */,
+               const cl_device_id * /* device_list */,
+               const char *         /* options */, 
+               void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
+               void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramInfo(cl_program         /* program */,
+                 cl_program_info    /* param_name */,
+                 size_t             /* param_value_size */,
+                 void *             /* param_value */,
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetProgramBuildInfo(cl_program            /* program */,
+                      cl_device_id          /* device */,
+                      cl_program_build_info /* param_name */,
+                      size_t                /* param_value_size */,
+                      void *                /* param_value */,
+                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+                            
+// Kernel Object APIs
+extern CL_API_ENTRY cl_kernel CL_API_CALL
+clCreateKernel(cl_program      /* program */,
+               const char *    /* kernel_name */,
+               cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCreateKernelsInProgram(cl_program     /* program */,
+                         cl_uint        /* num_kernels */,
+                         cl_kernel *    /* kernels */,
+                         cl_uint *      /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainKernel(cl_kernel    /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseKernel(cl_kernel   /* kernel */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetKernelArg(cl_kernel    /* kernel */,
+               cl_uint      /* arg_index */,
+               size_t       /* arg_size */,
+               const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelInfo(cl_kernel       /* kernel */,
+                cl_kernel_info  /* param_name */,
+                size_t          /* param_value_size */,
+                void *          /* param_value */,
+                size_t *        /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelWorkGroupInfo(cl_kernel                  /* kernel */,
+                         cl_device_id               /* device */,
+                         cl_kernel_work_group_info  /* param_name */,
+                         size_t                     /* param_value_size */,
+                         void *                     /* param_value */,
+                         size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+// Event Object APIs
+extern CL_API_ENTRY cl_int CL_API_CALL
+clWaitForEvents(cl_uint             /* num_events */,
+                const cl_event *    /* event_list */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventInfo(cl_event         /* event */,
+               cl_event_info    /* param_name */,
+               size_t           /* param_value_size */,
+               void *           /* param_value */,
+               size_t *         /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+                            
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+// Profiling APIs
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetEventProfilingInfo(cl_event            /* event */,
+                        cl_profiling_info   /* param_name */,
+                        size_t              /* param_value_size */,
+                        void *              /* param_value */,
+                        size_t *            /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+                                
+// Flush and Finish APIs
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+// Enqueued Commands APIs
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadBuffer(cl_command_queue    /* command_queue */,
+                    cl_mem              /* buffer */,
+                    cl_bool             /* blocking_read */,
+                    size_t              /* offset */,
+                    size_t              /* cb */, 
+                    void *              /* ptr */,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+                            
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteBuffer(cl_command_queue   /* command_queue */, 
+                     cl_mem             /* buffer */, 
+                     cl_bool            /* blocking_write */, 
+                     size_t             /* offset */, 
+                     size_t             /* cb */, 
+                     const void *       /* ptr */, 
+                     cl_uint            /* num_events_in_wait_list */, 
+                     const cl_event *   /* event_wait_list */, 
+                     cl_event *         /* event */) CL_API_SUFFIX__VERSION_1_0;
+                            
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBuffer(cl_command_queue    /* command_queue */, 
+                    cl_mem              /* src_buffer */,
+                    cl_mem              /* dst_buffer */, 
+                    size_t              /* src_offset */,
+                    size_t              /* dst_offset */,
+                    size_t              /* cb */, 
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+                            
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReadImage(cl_command_queue     /* command_queue */,
+                   cl_mem               /* image */,
+                   cl_bool              /* blocking_read */, 
+                   const size_t *       /* origin[3] */,
+                   const size_t *       /* region[3] */,
+                   size_t               /* row_pitch */,
+                   size_t               /* slice_pitch */, 
+                   void *               /* ptr */,
+                   cl_uint              /* num_events_in_wait_list */,
+                   const cl_event *     /* event_wait_list */,
+                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWriteImage(cl_command_queue    /* command_queue */,
+                    cl_mem              /* image */,
+                    cl_bool             /* blocking_write */, 
+                    const size_t *      /* origin[3] */,
+                    const size_t *      /* region[3] */,
+                    size_t              /* input_row_pitch */,
+                    size_t              /* input_slice_pitch */, 
+                    const void *        /* ptr */,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImage(cl_command_queue     /* command_queue */,
+                   cl_mem               /* src_image */,
+                   cl_mem               /* dst_image */, 
+                   const size_t *       /* src_origin[3] */,
+                   const size_t *       /* dst_origin[3] */,
+                   const size_t *       /* region[3] */, 
+                   cl_uint              /* num_events_in_wait_list */,
+                   const cl_event *     /* event_wait_list */,
+                   cl_event *           /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */,
+                           cl_mem           /* src_image */,
+                           cl_mem           /* dst_buffer */, 
+                           const size_t *   /* src_origin[3] */,
+                           const size_t *   /* region[3] */, 
+                           size_t           /* dst_offset */,
+                           cl_uint          /* num_events_in_wait_list */,
+                           const cl_event * /* event_wait_list */,
+                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */,
+                           cl_mem           /* src_buffer */,
+                           cl_mem           /* dst_image */, 
+                           size_t           /* src_offset */,
+                           const size_t *   /* dst_origin[3] */,
+                           const size_t *   /* region[3] */, 
+                           cl_uint          /* num_events_in_wait_list */,
+                           const cl_event * /* event_wait_list */,
+                           cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapBuffer(cl_command_queue /* command_queue */,
+                   cl_mem           /* buffer */,
+                   cl_bool          /* blocking_map */, 
+                   cl_map_flags     /* map_flags */,
+                   size_t           /* offset */,
+                   size_t           /* cb */,
+                   cl_uint          /* num_events_in_wait_list */,
+                   const cl_event * /* event_wait_list */,
+                   cl_event *       /* event */,
+                   cl_int *         /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clEnqueueMapImage(cl_command_queue  /* command_queue */,
+                  cl_mem            /* image */, 
+                  cl_bool           /* blocking_map */, 
+                  cl_map_flags      /* map_flags */, 
+                  const size_t *    /* origin[3] */,
+                  const size_t *    /* region[3] */,
+                  size_t *          /* image_row_pitch */,
+                  size_t *          /* image_slice_pitch */,
+                  cl_uint           /* num_events_in_wait_list */,
+                  const cl_event *  /* event_wait_list */,
+                  cl_event *        /* event */,
+                  cl_int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
+                        cl_mem           /* memobj */,
+                        void *           /* mapped_ptr */,
+                        cl_uint          /* num_events_in_wait_list */,
+                        const cl_event *  /* event_wait_list */,
+                        cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
+                       cl_kernel        /* kernel */,
+                       cl_uint          /* work_dim */,
+                       const size_t *   /* global_work_offset */,
+                       const size_t *   /* global_work_size */,
+                       const size_t *   /* local_work_size */,
+                       cl_uint          /* num_events_in_wait_list */,
+                       const cl_event * /* event_wait_list */,
+                       cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueTask(cl_command_queue  /* command_queue */,
+              cl_kernel         /* kernel */,
+              cl_uint           /* num_events_in_wait_list */,
+              const cl_event *  /* event_wait_list */,
+              cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueNativeKernel(cl_command_queue  /* command_queue */,
+					  void (*user_func)(void *), 
+                      void *            /* args */,
+                      size_t            /* cb_args */, 
+                      cl_uint           /* num_mem_objects */,
+                      const cl_mem *    /* mem_list */,
+                      const void **     /* args_mem_loc */,
+                      cl_uint           /* num_events_in_wait_list */,
+                      const cl_event *  /* event_wait_list */,
+                      cl_event *        /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMarker(cl_command_queue    /* command_queue */,
+                cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
+                       cl_uint          /* num_events */,
+                       const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueBarrier(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // __OPENCL_CL_H
+
diff --git a/src/bullet/MiniCL/cl_MiniCL_Defs.h b/src/bullet/MiniCL/cl_MiniCL_Defs.h
new file mode 100644
index 00000000..73fd3c7d
--- /dev/null
+++ b/src/bullet/MiniCL/cl_MiniCL_Defs.h
@@ -0,0 +1,439 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#include <float.h>
+#include <math.h>
+#include "LinearMath/btScalar.h"
+
+#include "MiniCL/cl.h"
+
+
+#define __kernel
+#define __global
+#define __local
+#define get_global_id(a)	__guid_arg
+#define get_local_id(a)		((__guid_arg) % gMiniCLNumOutstandingTasks)
+#define get_local_size(a)	(gMiniCLNumOutstandingTasks)
+#define get_group_id(a)		((__guid_arg) / gMiniCLNumOutstandingTasks)
+
+static unsigned int as_uint(float val) { return *((unsigned int*)&val); }
+
+
+#define CLK_LOCAL_MEM_FENCE		0x01
+#define CLK_GLOBAL_MEM_FENCE	0x02
+
+static void barrier(unsigned int a)
+{
+	// TODO : implement
+}
+
+//ATTRIBUTE_ALIGNED16(struct) float8
+struct float8
+{
+	float s0;
+	float s1;
+	float s2;
+	float s3;
+	float s4;
+	float s5;
+	float s6;
+	float s7;
+
+	float8(float scalar)
+	{
+		s0=s1=s2=s3=s4=s5=s6=s7=scalar;
+	}
+};
+
+
+float select( float arg0, float arg1, bool select)
+{
+	if (select)
+		return arg0;
+	return arg1;
+}
+
+#define __constant
+
+
+struct float3
+{
+	float x,y,z;
+
+	float3& operator+=(const float3& other)
+	{
+		x += other.x;
+		y += other.y;
+		z += other.z;
+		return *this;
+	}
+
+	float3& operator-=(const float3& other)
+	{
+		x -= other.x;
+		y -= other.y;
+		z -= other.z;
+		return *this;
+	}
+
+};
+
+static float dot(const float3&a ,const float3& b)
+{
+	float3 tmp;
+	tmp.x = a.x*b.x;
+	tmp.y = a.y*b.y;
+	tmp.z = a.z*b.z;
+	return tmp.x+tmp.y+tmp.z;
+}
+
+static float3 operator-(const float3& a,const float3& b)
+{
+	float3 tmp;
+	tmp.x = a.x - b.x;
+	tmp.y = a.y - b.y;
+	tmp.z = a.z - b.z;
+	return tmp;
+}
+
+static float3 operator*(const float& scalar,const float3& b)
+{
+	float3 tmp;
+	tmp.x = scalar * b.x;
+	tmp.y = scalar * b.y;
+	tmp.z = scalar * b.z;
+	return tmp;
+}
+
+static float3 operator*(const float3& a,const float& scalar)
+{
+	float3 tmp;
+	tmp.x = a.x * scalar;
+	tmp.y = a.y * scalar;
+	tmp.z = a.z * scalar;
+	return tmp;
+}
+
+
+static float3 operator*(const float3& a,const float3& b)
+{
+	float3 tmp;
+	tmp.x = a.x * b.x;
+	tmp.y = a.y * b.y;
+	tmp.z = a.z * b.z;
+	return tmp;
+}
+	
+
+//ATTRIBUTE_ALIGNED16(struct) float4
+struct float4
+{
+	union
+	{
+		struct {
+			float x;
+			float y;
+			float z;
+		};
+		float3 xyz;
+	};
+	float w;
+
+	float4() {}
+
+	float4(float v0, float v1, float v2, float v3)
+	{
+		x=v0;
+		y=v1;
+		z=v2;
+		w=v3;
+
+	}
+	float4(float3 xyz, float scalarW) 
+	{
+		x = xyz.x;
+		y = xyz.y;
+		z = xyz.z;
+		w = scalarW;
+	}
+
+	float4(float v) 
+	{
+		x = y = z = w = v; 
+	}
+	float4 operator*(const float4& other)
+	{
+		float4 tmp;
+		tmp.x = x*other.x;
+		tmp.y = y*other.y;
+		tmp.z = z*other.z;
+		tmp.w = w*other.w;
+		return tmp;
+	}
+
+	
+
+	float4 operator*(const float& other)
+	{
+		float4 tmp;
+		tmp.x = x*other;
+		tmp.y = y*other;
+		tmp.z = z*other;
+		tmp.w = w*other;
+		return tmp;
+	}
+
+	
+
+	float4& operator+=(const float4& other)
+	{
+		x += other.x;
+		y += other.y;
+		z += other.z;
+		w += other.w;
+		return *this;
+	}
+
+	float4& operator-=(const float4& other)
+	{
+		x -= other.x;
+		y -= other.y;
+		z -= other.z;
+		w -= other.w;
+		return *this;
+	}
+
+	float4& operator *=(float scalar)
+	{
+		x *= scalar;
+		y *= scalar;
+		z *= scalar;
+		w *= scalar;
+		return (*this);
+	}
+
+	
+	
+	
+	
+};
+
+static float4 fabs(const float4& a)
+{
+	float4 tmp;
+	tmp.x = a.x < 0.f ? 0.f  : a.x;
+	tmp.y = a.y < 0.f ? 0.f  : a.y;
+	tmp.z = a.z < 0.f ? 0.f  : a.z;
+	tmp.w = a.w < 0.f ? 0.f  : a.w;
+	return tmp;
+}
+static float4 operator+(const float4& a,const float4& b)
+{
+	float4 tmp;
+	tmp.x = a.x + b.x;
+	tmp.y = a.y + b.y;
+	tmp.z = a.z + b.z;
+	tmp.w = a.w + b.w;
+	return tmp;
+}
+
+
+static float8 operator+(const float8& a,const float8& b)
+{
+	float8 tmp(0);
+	tmp.s0  = a.s0 + b.s0;
+	tmp.s1  = a.s1 + b.s1;
+	tmp.s2  = a.s2 + b.s2;
+	tmp.s3  = a.s3 + b.s3;
+	tmp.s4  = a.s4 + b.s4;
+	tmp.s5  = a.s5 + b.s5;
+	tmp.s6  = a.s6 + b.s6;
+	tmp.s7  = a.s7 + b.s7;
+	return tmp;
+}
+
+
+static float4 operator-(const float4& a,const float4& b)
+{
+	float4 tmp;
+	tmp.x = a.x - b.x;
+	tmp.y = a.y - b.y;
+	tmp.z = a.z - b.z;
+	tmp.w = a.w - b.w;
+	return tmp;
+}
+
+static float8 operator-(const float8& a,const float8& b)
+{
+	float8 tmp(0);
+	tmp.s0  = a.s0 - b.s0;
+	tmp.s1  = a.s1 - b.s1;
+	tmp.s2  = a.s2 - b.s2;
+	tmp.s3  = a.s3 - b.s3;
+	tmp.s4  = a.s4 - b.s4;
+	tmp.s5  = a.s5 - b.s5;
+	tmp.s6  = a.s6 - b.s6;
+	tmp.s7  = a.s7 - b.s7;
+	return tmp;
+}
+
+static float4 operator*(float a,const float4& b)
+{
+	float4 tmp;
+	tmp.x = a * b.x;
+	tmp.y = a * b.y;
+	tmp.z = a * b.z;
+	tmp.w = a * b.w;
+	return tmp;
+}
+
+static float4 operator/(const float4& b,float a)
+{
+	float4 tmp;
+	tmp.x = b.x/a;
+	tmp.y = b.y/a;
+	tmp.z = b.z/a;
+	tmp.w = b.w/a;
+	return tmp;
+}
+
+
+
+
+
+static float dot(const float4&a ,const float4& b)
+{
+	float4 tmp;
+	tmp.x = a.x*b.x;
+	tmp.y = a.y*b.y;
+	tmp.z = a.z*b.z;
+	tmp.w = a.w*b.w;
+	return tmp.x+tmp.y+tmp.z+tmp.w;
+}
+
+static float length(const float4&a)
+{
+	float l = sqrtf(a.x*a.x+a.y*a.y+a.z*a.z);
+	return l;
+}
+
+static float4 normalize(const float4&a)
+{
+	float4 tmp;
+	float l = length(a);
+	tmp = 1.f/l*a;
+	return tmp;
+}
+
+
+
+static float4 cross(const float4&a ,const float4& b)
+{
+	float4 tmp;
+	tmp.x =  a.y*b.z - a.z*b.y;
+	tmp.y = -a.x*b.z + a.z*b.x;
+	tmp.z =  a.x*b.y - a.y*b.x;
+	tmp.w = 0.f;
+	return tmp;
+}
+
+static float max(float a, float b) 
+{
+	return (a >= b) ? a : b;
+}
+
+
+static float min(float a, float b) 
+{
+	return (a <= b) ? a : b;
+}
+
+static float fmax(float a, float b) 
+{
+	return (a >= b) ? a : b;
+}
+
+static float fmin(float a, float b) 
+{
+	return (a <= b) ? a : b;
+}
+
+struct int2
+{
+	int x,y;
+};
+
+struct uint2
+{
+	unsigned int x,y;
+};
+
+//typedef int2 uint2;
+
+typedef unsigned int uint;
+
+struct int4
+{
+	int x,y,z,w;
+};
+
+struct uint4
+{
+	unsigned int x,y,z,w;
+	uint4() {}
+	uint4(uint val) { x = y = z = w = val; }
+	uint4& operator+=(const uint4& other)
+	{
+		x += other.x;
+		y += other.y;
+		z += other.z;
+		w += other.w;
+		return *this;
+	}
+};
+static uint4 operator+(const uint4& a,const uint4& b)
+{
+	uint4 tmp;
+	tmp.x = a.x + b.x;
+	tmp.y = a.y + b.y;
+	tmp.z = a.z + b.z;
+	tmp.w = a.w + b.w;
+	return tmp;
+}
+static uint4 operator-(const uint4& a,const uint4& b)
+{
+	uint4 tmp;
+	tmp.x = a.x - b.x;
+	tmp.y = a.y - b.y;
+	tmp.z = a.z - b.z;
+	tmp.w = a.w - b.w;
+	return tmp;
+}
+
+#define native_sqrt sqrtf
+#define native_sin sinf
+#define native_cos cosf
+#define native_powr powf
+
+#define GUID_ARG ,int __guid_arg
+#define GUID_ARG_VAL ,__guid_arg
+
+
+#define as_int(a) (*((int*)&(a)))
+
+extern "C" int gMiniCLNumOutstandingTasks;
+//	extern "C" void __kernel_func();
+
+
diff --git a/src/bullet/MiniCL/cl_gl.h b/src/bullet/MiniCL/cl_gl.h
new file mode 100644
index 00000000..0a69d6ec
--- /dev/null
+++ b/src/bullet/MiniCL/cl_gl.h
@@ -0,0 +1,113 @@
+/**********************************************************************************
+ * Copyright (c) 2008-2009 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+
+#ifndef __OPENCL_CL_GL_H
+#define __OPENCL_CL_GL_H
+
+#ifdef __APPLE__
+#include <OpenCL/cl_platform.h>
+#else
+#include <MiniCL/cl_platform.h>
+#endif	
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// NOTE:  Make sure that appropriate GL header file is included separately
+
+typedef cl_uint     cl_gl_object_type;
+typedef cl_uint     cl_gl_texture_info;
+typedef cl_uint     cl_gl_platform_info;
+
+// cl_gl_object_type
+#define CL_GL_OBJECT_BUFFER             0x2000
+#define CL_GL_OBJECT_TEXTURE2D          0x2001
+#define CL_GL_OBJECT_TEXTURE3D          0x2002
+#define CL_GL_OBJECT_RENDERBUFFER       0x2003
+
+// cl_gl_texture_info
+#define CL_GL_TEXTURE_TARGET            0x2004
+#define CL_GL_MIPMAP_LEVEL              0x2005
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLBuffer(cl_context     /* context */,
+                     cl_mem_flags   /* flags */,
+                     GLuint         /* bufobj */,
+                     int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLTexture2D(cl_context      /* context */,
+                        cl_mem_flags    /* flags */,
+                        GLenum          /* target */,
+                        GLint           /* miplevel */,
+                        GLuint          /* texture */,
+                        cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLTexture3D(cl_context      /* context */,
+                        cl_mem_flags    /* flags */,
+                        GLenum          /* target */,
+                        GLint           /* miplevel */,
+                        GLuint          /* texture */,
+                        cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromGLRenderbuffer(cl_context   /* context */,
+                           cl_mem_flags /* flags */,
+                           GLuint       /* renderbuffer */,
+                           cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLObjectInfo(cl_mem                /* memobj */,
+                  cl_gl_object_type *   /* gl_object_type */,
+                  GLuint *              /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
+                  
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetGLTextureInfo(cl_mem               /* memobj */,
+                   cl_gl_texture_info   /* param_name */,
+                   size_t               /* param_value_size */,
+                   void *               /* param_value */,
+                   size_t *             /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireGLObjects(cl_command_queue      /* command_queue */,
+                          cl_uint               /* num_objects */,
+                          const cl_mem *        /* mem_objects */,
+                          cl_uint               /* num_events_in_wait_list */,
+                          const cl_event *      /* event_wait_list */,
+                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseGLObjects(cl_command_queue      /* command_queue */,
+                          cl_uint               /* num_objects */,
+                          const cl_mem *        /* mem_objects */,
+                          cl_uint               /* num_events_in_wait_list */,
+                          const cl_event *      /* event_wait_list */,
+                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // __OPENCL_CL_GL_H
diff --git a/src/bullet/MiniCL/cl_platform.h b/src/bullet/MiniCL/cl_platform.h
new file mode 100644
index 00000000..43219e14
--- /dev/null
+++ b/src/bullet/MiniCL/cl_platform.h
@@ -0,0 +1,254 @@
+/**********************************************************************************
+ * Copyright (c) 2008-2009 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+
+#ifndef __CL_PLATFORM_H
+#define __CL_PLATFORM_H
+
+#define CL_PLATFORM_MINI_CL  0x12345
+
+struct MiniCLKernelDesc
+{
+	MiniCLKernelDesc(void* pCode, const char* pName);
+};
+
+#define MINICL_REGISTER(__kernel_func) static MiniCLKernelDesc __kernel_func##Desc((void*)__kernel_func, #__kernel_func);
+
+
+#ifdef __APPLE__
+    /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
+    #include <AvailabilityMacros.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CL_API_ENTRY
+#define CL_API_CALL
+#ifdef __APPLE__
+#define CL_API_SUFFIX__VERSION_1_0 //  AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
+#define CL_EXTENSION_WEAK_LINK       __attribute__((weak_import))       
+#else
+#define CL_API_SUFFIX__VERSION_1_0
+#define CL_EXTENSION_WEAK_LINK                         
+#endif
+
+#if defined (_WIN32) && ! defined (__MINGW32__)
+typedef signed   __int8  int8_t;
+typedef unsigned __int8  uint8_t;
+typedef signed   __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef signed   __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef signed   __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+
+typedef int8_t          cl_char;
+typedef uint8_t         cl_uchar;
+typedef int16_t         cl_short    ;
+typedef uint16_t        cl_ushort   ;
+typedef int32_t         cl_int      ;
+typedef uint32_t        cl_uint     ;
+typedef int64_t         cl_long     ;
+typedef uint64_t        cl_ulong    ;
+
+typedef uint16_t        cl_half     ;
+typedef float           cl_float    ;
+typedef double          cl_double   ;
+
+
+typedef int8_t          cl_char2[2]     ;
+typedef int8_t          cl_char4[4]     ;
+typedef int8_t          cl_char8[8]     ;
+typedef int8_t          cl_char16[16]   ;
+typedef uint8_t         cl_uchar2[2]    ;
+typedef uint8_t         cl_uchar4[4]    ;
+typedef uint8_t         cl_uchar8[8]    ;
+typedef uint8_t         cl_uchar16[16]  ;
+
+typedef int16_t         cl_short2[2]     ;
+typedef int16_t         cl_short4[4]     ;
+typedef int16_t         cl_short8[8]     ;
+typedef int16_t         cl_short16[16]   ;
+typedef uint16_t        cl_ushort2[2]    ;
+typedef uint16_t        cl_ushort4[4]    ;
+typedef uint16_t        cl_ushort8[8]    ;
+typedef uint16_t        cl_ushort16[16]  ;
+
+typedef int32_t         cl_int2[2]     ;
+typedef int32_t         cl_int4[4]     ;
+typedef int32_t         cl_int8[8]     ;
+typedef int32_t         cl_int16[16]    ;
+typedef uint32_t        cl_uint2[2]     ;
+typedef uint32_t        cl_uint4[4]     ;
+typedef uint32_t        cl_uint8[8]     ;
+typedef uint32_t        cl_uint16[16]   ;
+
+typedef int64_t         cl_long2[2]     ;
+typedef int64_t         cl_long4[4]     ;
+typedef int64_t         cl_long8[8]     ;
+typedef int64_t         cl_long16[16]   ;
+typedef uint64_t        cl_ulong2[2]    ;
+typedef uint64_t        cl_ulong4[4]    ;
+typedef uint64_t        cl_ulong8[8]    ;
+typedef uint64_t        cl_ulong16[16]  ;
+
+typedef float           cl_float2[2]    ;
+typedef float           cl_float4[4]    ;
+typedef float           cl_float8[8]    ;
+typedef float           cl_float16[16]  ;
+
+typedef double          cl_double2[2]   ;
+typedef double          cl_double4[4]   ;
+typedef double          cl_double8[8]   ;
+typedef double          cl_double16[16] ;
+
+
+#else
+#include <stdint.h>
+
+/* scalar types  */
+typedef int8_t          cl_char;
+typedef uint8_t         cl_uchar;
+typedef int16_t         cl_short    __attribute__((aligned(2)));
+typedef uint16_t        cl_ushort   __attribute__((aligned(2)));
+typedef int32_t         cl_int      __attribute__((aligned(4)));
+typedef uint32_t        cl_uint     __attribute__((aligned(4)));
+typedef int64_t         cl_long     __attribute__((aligned(8)));
+typedef uint64_t        cl_ulong    __attribute__((aligned(8)));
+
+typedef uint16_t        cl_half     __attribute__((aligned(2)));
+typedef float           cl_float    __attribute__((aligned(4)));
+typedef double          cl_double   __attribute__((aligned(8)));
+
+
+/*
+ * Vector types 
+ *
+ *  Note:   OpenCL requires that all types be naturally aligned. 
+ *          This means that vector types must be naturally aligned.
+ *          For example, a vector of four floats must be aligned to
+ *          a 16 byte boundary (calculated as 4 * the natural 4-byte 
+ *          alignment of the float).  The alignment qualifiers here
+ *          will only function properly if your compiler supports them
+ *          and if you don't actively work to defeat them.  For example,
+ *          in order for a cl_float4 to be 16 byte aligned in a struct,
+ *          the start of the struct must itself be 16-byte aligned. 
+ *
+ *          Maintaining proper alignment is the user's responsibility.
+ */
+typedef int8_t          cl_char2[2]     __attribute__((aligned(2)));
+typedef int8_t          cl_char4[4]     __attribute__((aligned(4)));
+typedef int8_t          cl_char8[8]     __attribute__((aligned(8)));
+typedef int8_t          cl_char16[16]   __attribute__((aligned(16)));
+typedef uint8_t         cl_uchar2[2]    __attribute__((aligned(2)));
+typedef uint8_t         cl_uchar4[4]    __attribute__((aligned(4)));
+typedef uint8_t         cl_uchar8[8]    __attribute__((aligned(8)));
+typedef uint8_t         cl_uchar16[16]  __attribute__((aligned(16)));
+
+typedef int16_t         cl_short2[2]     __attribute__((aligned(4)));
+typedef int16_t         cl_short4[4]     __attribute__((aligned(8)));
+typedef int16_t         cl_short8[8]     __attribute__((aligned(16)));
+typedef int16_t         cl_short16[16]   __attribute__((aligned(32)));
+typedef uint16_t        cl_ushort2[2]    __attribute__((aligned(4)));
+typedef uint16_t        cl_ushort4[4]    __attribute__((aligned(8)));
+typedef uint16_t        cl_ushort8[8]    __attribute__((aligned(16)));
+typedef uint16_t        cl_ushort16[16]  __attribute__((aligned(32)));
+
+typedef int32_t         cl_int2[2]      __attribute__((aligned(8)));
+typedef int32_t         cl_int4[4]      __attribute__((aligned(16)));
+typedef int32_t         cl_int8[8]      __attribute__((aligned(32)));
+typedef int32_t         cl_int16[16]    __attribute__((aligned(64)));
+typedef uint32_t        cl_uint2[2]     __attribute__((aligned(8)));
+typedef uint32_t        cl_uint4[4]     __attribute__((aligned(16)));
+typedef uint32_t        cl_uint8[8]     __attribute__((aligned(32)));
+typedef uint32_t        cl_uint16[16]   __attribute__((aligned(64)));
+
+typedef int64_t         cl_long2[2]     __attribute__((aligned(16)));
+typedef int64_t         cl_long4[4]     __attribute__((aligned(32)));
+typedef int64_t         cl_long8[8]     __attribute__((aligned(64)));
+typedef int64_t         cl_long16[16]   __attribute__((aligned(128)));
+typedef uint64_t        cl_ulong2[2]    __attribute__((aligned(16)));
+typedef uint64_t        cl_ulong4[4]    __attribute__((aligned(32)));
+typedef uint64_t        cl_ulong8[8]    __attribute__((aligned(64)));
+typedef uint64_t        cl_ulong16[16]  __attribute__((aligned(128)));
+
+typedef float           cl_float2[2]    __attribute__((aligned(8)));
+typedef float           cl_float4[4]    __attribute__((aligned(16)));
+typedef float           cl_float8[8]    __attribute__((aligned(32)));
+typedef float           cl_float16[16]  __attribute__((aligned(64)));
+
+typedef double          cl_double2[2]   __attribute__((aligned(16)));
+typedef double          cl_double4[4]   __attribute__((aligned(32)));
+typedef double          cl_double8[8]   __attribute__((aligned(64)));
+typedef double          cl_double16[16] __attribute__((aligned(128)));
+#endif
+
+#include <stddef.h>
+
+/* and a few goodies to go with them */
+#define CL_CHAR_BIT         8
+#define CL_SCHAR_MAX        127
+#define CL_SCHAR_MIN        (-127-1)
+#define CL_CHAR_MAX         CL_SCHAR_MAX
+#define CL_CHAR_MIN         CL_SCHAR_MIN
+#define CL_UCHAR_MAX        255
+#define CL_SHRT_MAX         32767
+#define CL_SHRT_MIN         (-32767-1)
+#define CL_USHRT_MAX        65535
+#define CL_INT_MAX          2147483647
+#define CL_INT_MIN          (-2147483647-1)
+#define CL_UINT_MAX         0xffffffffU
+#define CL_LONG_MAX         ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
+#define CL_LONG_MIN         ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
+#define CL_ULONG_MAX        ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
+
+#define CL_FLT_DIG          6
+#define CL_FLT_MANT_DIG     24
+#define CL_FLT_MAX_10_EXP   +38
+#define CL_FLT_MAX_EXP      +128
+#define CL_FLT_MIN_10_EXP   -37
+#define CL_FLT_MIN_EXP      -125
+#define CL_FLT_RADIX        2
+#define CL_FLT_MAX          0x1.fffffep127f
+#define CL_FLT_MIN          0x1.0p-126f
+#define CL_FLT_EPSILON      0x1.0p-23f
+
+#define CL_DBL_DIG          15
+#define CL_DBL_MANT_DIG     53
+#define CL_DBL_MAX_10_EXP   +308
+#define CL_DBL_MAX_EXP      +1024
+#define CL_DBL_MIN_10_EXP   -307
+#define CL_DBL_MIN_EXP      -1021
+#define CL_DBL_RADIX        2
+#define CL_DBL_MAX          0x1.fffffffffffffp1023
+#define CL_DBL_MIN          0x1.0p-1022
+#define CL_DBL_EPSILON      0x1.0p-52
+
+/* There are no vector types for half */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // __CL_PLATFORM_H
diff --git a/src/bullet/btBulletCollisionCommon.h b/src/bullet/btBulletCollisionCommon.h
new file mode 100644
index 00000000..472690c1
--- /dev/null
+++ b/src/bullet/btBulletCollisionCommon.h
@@ -0,0 +1,69 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BULLET_COLLISION_COMMON_H
+#define BULLET_COLLISION_COMMON_H
+
+///Common headerfile includes for Bullet Collision Detection
+
+///Bullet's btCollisionWorld and btCollisionObject definitions
+#include "BulletCollision/CollisionDispatch/btCollisionWorld.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+
+///Collision Shapes
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include "BulletCollision/CollisionShapes/btCylinderShape.h"
+#include "BulletCollision/CollisionShapes/btConeShape.h"
+#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
+#include "BulletCollision/CollisionShapes/btConvexHullShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleMesh.h"
+#include "BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "BulletCollision/CollisionShapes/btCompoundShape.h"
+#include "BulletCollision/CollisionShapes/btTetrahedronShape.h"
+#include "BulletCollision/CollisionShapes/btEmptyShape.h"
+#include "BulletCollision/CollisionShapes/btMultiSphereShape.h"
+#include "BulletCollision/CollisionShapes/btUniformScalingShape.h"
+
+///Narrowphase Collision Detector
+#include "BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h"
+
+//btSphereBoxCollisionAlgorithm is broken, use gjk for now
+//#include "BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h"
+#include "BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h"
+
+///Dispatching and generation of collision pairs (broadphase)
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
+#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
+#include "BulletCollision/BroadphaseCollision/btAxisSweep3.h"
+#include "BulletCollision/BroadphaseCollision/btMultiSapBroadphase.h"
+#include "BulletCollision/BroadphaseCollision/btDbvtBroadphase.h"
+
+///Math library & Utils
+#include "LinearMath/btQuaternion.h"
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btDefaultMotionState.h"
+#include "LinearMath/btQuickprof.h"
+#include "LinearMath/btIDebugDraw.h"
+#include "LinearMath/btSerializer.h"
+
+
+#endif //BULLET_COLLISION_COMMON_H
+
diff --git a/src/bullet/btBulletDynamicsCommon.h b/src/bullet/btBulletDynamicsCommon.h
new file mode 100644
index 00000000..ccfad19b
--- /dev/null
+++ b/src/bullet/btBulletDynamicsCommon.h
@@ -0,0 +1,48 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BULLET_DYNAMICS_COMMON_H
+#define BULLET_DYNAMICS_COMMON_H
+
+///Common headerfile includes for Bullet Dynamics, including Collision Detection
+#include "btBulletCollisionCommon.h"
+
+#include "BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h"
+
+#include "BulletDynamics/Dynamics/btSimpleDynamicsWorld.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+
+#include "BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btHingeConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btConeTwistConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btSliderConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btUniversalConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btHinge2Constraint.h"
+
+#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
+
+
+///Vehicle simulation, with wheel contact simulated by raycasts
+#include "BulletDynamics/Vehicle/btRaycastVehicle.h"
+
+
+
+
+
+
+#endif //BULLET_DYNAMICS_COMMON_H
+
diff --git a/src/bullet/vectormath/scalar/boolInVec.h b/src/bullet/vectormath/scalar/boolInVec.h
new file mode 100644
index 00000000..c5eeeebd
--- /dev/null
+++ b/src/bullet/vectormath/scalar/boolInVec.h
@@ -0,0 +1,225 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <math.h>
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+private:
+    unsigned int mData;
+
+public:
+    // Default constructor; does no initialization
+    //
+    inline boolInVec( ) { };
+
+    // Construct from a value converted from float
+    //
+    inline boolInVec(floatInVec vec);
+
+    // Explicit cast from bool
+    //
+    explicit inline boolInVec(bool scalar);
+
+    // Explicit cast to bool
+    //
+    inline bool getAsBool() const;
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+    // Implicit cast to bool
+    //
+    inline operator bool() const;
+#endif
+
+    // Boolean negation operator
+    //
+    inline const boolInVec operator ! () const;
+
+    // Assignment operator
+    //
+    inline boolInVec& operator = (boolInVec vec);
+
+    // Boolean and assignment operator
+    //
+    inline boolInVec& operator &= (boolInVec vec);
+
+    // Boolean exclusive or assignment operator
+    //
+    inline boolInVec& operator ^= (boolInVec vec);
+
+    // Boolean or assignment operator
+    //
+    inline boolInVec& operator |= (boolInVec vec);
+
+};
+
+// Equal operator
+//
+inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+
+// Not equal operator
+//
+inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+
+// And operator
+//
+inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+
+// Exclusive or operator
+//
+inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+
+// Or operator
+//
+inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+
+// Conditionally select between two values
+//
+inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+
+
+} // namespace Vectormath
+
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(floatInVec vec)
+{
+    *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+    mData = -(int)scalar;
+}
+
+inline
+bool
+boolInVec::getAsBool() const
+{
+    return (mData > 0);
+}
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+inline
+boolInVec::operator bool() const
+{
+    return getAsBool();
+}
+#endif
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+    return boolInVec(!mData);
+}
+
+inline
+boolInVec&
+boolInVec::operator = (boolInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (boolInVec vec)
+{
+    *this = *this & vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (boolInVec vec)
+{
+    *this = *this ^ vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (boolInVec vec)
+{
+    *this = *this | vec;
+    return *this;
+}
+
+inline
+const boolInVec
+operator == (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() == vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator != (boolInVec vec0, boolInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+
+inline
+const boolInVec
+operator & (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() & vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator | (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() | vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator ^ (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() ^ vec1.getAsBool());
+}
+
+inline
+const boolInVec
+select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
+{
+    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
+}
+
+} // namespace Vectormath
+
+#endif // boolInVec_h
diff --git a/src/bullet/vectormath/scalar/floatInVec.h b/src/bullet/vectormath/scalar/floatInVec.h
new file mode 100644
index 00000000..12d89e43
--- /dev/null
+++ b/src/bullet/vectormath/scalar/floatInVec.h
@@ -0,0 +1,343 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+// A class representing a scalar float value contained in a vector register
+// This class does not support fastmath
+class floatInVec
+{
+private:
+    float mData;
+
+public:
+    // Default constructor; does no initialization
+    //
+    inline floatInVec( ) { };
+
+    // Construct from a value converted from bool
+    //
+    inline floatInVec(boolInVec vec);
+
+    // Explicit cast from float
+    //
+    explicit inline floatInVec(float scalar);
+
+    // Explicit cast to float
+    //
+    inline float getAsFloat() const;
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+    // Implicit cast to float
+    //
+    inline operator float() const;
+#endif
+
+    // Post increment (add 1.0f)
+    //
+    inline const floatInVec operator ++ (int);
+
+    // Post decrement (subtract 1.0f)
+    //
+    inline const floatInVec operator -- (int);
+
+    // Pre increment (add 1.0f)
+    //
+    inline floatInVec& operator ++ ();
+
+    // Pre decrement (subtract 1.0f)
+    //
+    inline floatInVec& operator -- ();
+
+    // Negation operator
+    //
+    inline const floatInVec operator - () const;
+
+    // Assignment operator
+    //
+    inline floatInVec& operator = (floatInVec vec);
+
+    // Multiplication assignment operator
+    //
+    inline floatInVec& operator *= (floatInVec vec);
+
+    // Division assignment operator
+    //
+    inline floatInVec& operator /= (floatInVec vec);
+
+    // Addition assignment operator
+    //
+    inline floatInVec& operator += (floatInVec vec);
+
+    // Subtraction assignment operator
+    //
+    inline floatInVec& operator -= (floatInVec vec);
+
+};
+
+// Multiplication operator
+//
+inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+
+// Division operator
+//
+inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+
+// Addition operator
+//
+inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+
+// Subtraction operator
+//
+inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+
+// Less than operator
+//
+inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+
+// Less than or equal operator
+//
+inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+
+// Greater than operator
+//
+inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+
+// Greater than or equal operator
+//
+inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+
+// Equal operator
+//
+inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+
+// Not equal operator
+//
+inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+
+// Conditionally select between two values
+//
+inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+
+
+} // namespace Vectormath
+
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(boolInVec vec)
+{
+    mData = float(vec.getAsBool());
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+    mData = scalar;
+}
+
+inline
+float
+floatInVec::getAsFloat() const
+{
+    return mData;
+}
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+inline
+floatInVec::operator float() const
+{
+    return getAsFloat();
+}
+#endif
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+    float olddata = mData;
+    operator ++();
+    return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+    float olddata = mData;
+    operator --();
+    return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+    *this += floatInVec(1.0f);
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+    *this -= floatInVec(1.0f);
+    return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+    return floatInVec(-mData);
+}
+
+inline
+floatInVec&
+floatInVec::operator = (floatInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (floatInVec vec)
+{
+    *this = *this * vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (floatInVec vec)
+{
+    *this = *this / vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (floatInVec vec)
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (floatInVec vec)
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline
+const floatInVec
+operator * (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() * vec1.getAsFloat());
+}
+
+inline
+const floatInVec
+operator / (floatInVec num, floatInVec den)
+{
+    return floatInVec(num.getAsFloat() / den.getAsFloat());
+}
+
+inline
+const floatInVec
+operator + (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() + vec1.getAsFloat());
+}
+
+inline
+const floatInVec
+operator - (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() - vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator < (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() < vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator <= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 > vec1);
+}
+
+inline
+const boolInVec
+operator > (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() > vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator >= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 < vec1);
+}
+
+inline
+const boolInVec
+operator == (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() == vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator != (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+
+inline
+const floatInVec
+select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
+{
+    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h
diff --git a/src/bullet/vectormath/scalar/mat_aos.h b/src/bullet/vectormath/scalar/mat_aos.h
new file mode 100644
index 00000000..e103243d
--- /dev/null
+++ b/src/bullet/vectormath/scalar/mat_aos.h
@@ -0,0 +1,1630 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( const Quat & unitQuat )
+{
+    float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat.getX();
+    qy = unitQuat.getY();
+    qz = unitQuat.getZ();
+    qw = unitQuat.getW();
+    qx2 = ( qx + qx );
+    qy2 = ( qy + qy );
+    qz2 = ( qz + qz );
+    qxqx2 = ( qx * qx2 );
+    qxqy2 = ( qx * qy2 );
+    qxqz2 = ( qx * qz2 );
+    qxqw2 = ( qw * qx2 );
+    qyqy2 = ( qy * qy2 );
+    qyqz2 = ( qy * qz2 );
+    qyqw2 = ( qw * qy2 );
+    qzqz2 = ( qz * qz2 );
+    qzqw2 = ( qw * qz2 );
+    mCol0 = Vector3( ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
+    mCol1 = Vector3( ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
+    mCol2 = Vector3( ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
+}
+
+inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    return Matrix3(
+        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
+        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
+        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    Vector3 tmp0, tmp1, tmp2;
+    float detinv;
+    tmp0 = cross( mat.getCol1(), mat.getCol2() );
+    tmp1 = cross( mat.getCol2(), mat.getCol0() );
+    tmp2 = cross( mat.getCol0(), mat.getCol1() );
+    detinv = ( 1.0f / dot( mat.getCol2(), tmp2 ) );
+    return Matrix3(
+        Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ),
+        Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ),
+        Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) )
+    );
+}
+
+inline float determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( float scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( 0.0f, c, s ),
+        Vector3( 0.0f, -s, c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3( c, 0.0f, -s ),
+        Vector3::yAxis( ),
+        Vector3( s, 0.0f, c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3( c, s, 0.0f ),
+        Vector3( -s, c, 0.0f ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Matrix3(
+        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
+        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
+        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( float radians, const Vector3 & unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    return Matrix3(
+        Vector3( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) ),
+        Vector3( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) ),
+        Vector3( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
+{
+    return Matrix3(
+        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
+        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
+        Vector3( 0.0f, 0.0f, scaleVec.getZ() )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( float scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    return Matrix4(
+        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
+        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
+        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
+        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    Vector4 res0, res1, res2, res3;
+    float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    res0.setX( ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
+    res0.setY( ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
+    res0.setZ( ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
+    res0.setW( ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
+    detInv = ( 1.0f / ( ( ( ( mA * res0.getX() ) + ( mE * res0.getY() ) ) + ( mI * res0.getZ() ) ) + ( mM * res0.getW() ) ) );
+    res1.setX( ( mI * tmp1 ) );
+    res1.setY( ( mM * tmp0 ) );
+    res1.setZ( ( mA * tmp1 ) );
+    res1.setW( ( mE * tmp0 ) );
+    res3.setX( ( mI * tmp3 ) );
+    res3.setY( ( mM * tmp2 ) );
+    res3.setZ( ( mA * tmp3 ) );
+    res3.setW( ( mE * tmp2 ) );
+    res2.setX( ( mI * tmp5 ) );
+    res2.setY( ( mM * tmp4 ) );
+    res2.setZ( ( mA * tmp5 ) );
+    res2.setW( ( mE * tmp4 ) );
+    tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
+    tmp1 = ( ( mM * mF ) - ( mE * mN ) );
+    tmp2 = ( ( mI * mD ) - ( mA * mL ) );
+    tmp3 = ( ( mM * mH ) - ( mE * mP ) );
+    tmp4 = ( ( mI * mC ) - ( mA * mK ) );
+    tmp5 = ( ( mM * mG ) - ( mE * mO ) );
+    res2.setX( ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.getX() ) );
+    res2.setY( ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.getY() ) );
+    res2.setZ( ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.getZ() ) );
+    res2.setW( ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.getW() ) );
+    res3.setX( ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.getX() ) );
+    res3.setY( ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.getY() ) );
+    res3.setZ( ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.getZ() ) );
+    res3.setW( ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.getW() ) );
+    res1.setX( ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.getX() ) );
+    res1.setY( ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.getY() ) );
+    res1.setZ( ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.getZ() ) );
+    res1.setW( ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.getW() ) );
+    return Matrix4(
+        ( res0 * detInv ),
+        ( res1 * detInv ),
+        ( res2 * detInv ),
+        ( res3 * detInv )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline float determinant( const Matrix4 & mat )
+{
+    float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
+    dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
+    dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
+    dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
+    return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( float scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ) + ( mCol3.getX() * vec.getW() ) ),
+        ( ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ) + ( mCol3.getY() * vec.getW() ) ),
+        ( ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ( mCol3.getZ() * vec.getW() ) ),
+        ( ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ( mCol3.getW() * vec.getW() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
+{
+    return Vector4(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ),
+        ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
+{
+    return Vector4(
+        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
+        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
+        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ),
+        ( ( ( ( mCol0.getW() * pnt.getX() ) + ( mCol1.getW() * pnt.getY() ) ) + ( mCol2.getW() * pnt.getZ() ) ) + mCol3.getW() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( 0.0f, c, s, 0.0f ),
+        Vector4( 0.0f, -s, c, 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4( c, 0.0f, -s, 0.0f ),
+        Vector4::yAxis( ),
+        Vector4( s, 0.0f, c, 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4( c, s, 0.0f, 0.0f ),
+        Vector4( -s, c, 0.0f, 0.0f ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Matrix4(
+        Vector4( ( cZ * cY ), ( sZ * cY ), -sY, 0.0f ),
+        Vector4( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f ),
+        Vector4( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( float radians, const Vector3 & unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    return Matrix4(
+        Vector4( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f ),
+        Vector4( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f ),
+        Vector4( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
+{
+    return Matrix4(
+        Vector4( scaleVec.getX(), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, scaleVec.getY(), 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, scaleVec.getZ(), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, 1.0f );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, 1.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
+    rangeInv = ( 1.0f / ( zNear - zFar ) );
+    return Matrix4(
+        Vector4( ( f / aspect ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, f, 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f ),
+        Vector4( 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    n2 = ( zNear + zNear );
+    return Matrix4(
+        Vector4( ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f ),
+        Vector4( ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f ),
+        Vector4( 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    return Matrix4(
+        Vector4( ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f ),
+        Vector4( ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+    print( mat.getRow( 3 ) );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, float val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
+    float detinv;
+    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
+    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
+    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
+    detinv = ( 1.0f / dot( tfrm.getCol2(), tmp2 ) );
+    inv0 = Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) );
+    inv1 = Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) );
+    inv2 = Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    Vector3 inv0, inv1, inv2;
+    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
+    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
+    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
+    );
+}
+
+inline const Point3 Transform3::operator *( const Point3 & pnt ) const
+{
+    return Point3(
+        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
+        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
+        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() )
+    );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( 0.0f, c, s ),
+        Vector3( 0.0f, -s, c ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3( c, 0.0f, -s ),
+        Vector3::yAxis( ),
+        Vector3( s, 0.0f, c ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3( c, s, 0.0f ),
+        Vector3( -s, c, 0.0f ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Transform3(
+        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
+        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
+        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotation( float radians, const Vector3 & unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::rotation( const Quat & unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
+{
+    return Transform3(
+        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
+        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
+        Vector3( 0.0f, 0.0f, scaleVec.getZ() ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( const Vector3 & translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    print( tfrm.getRow( 0 ) );
+    print( tfrm.getRow( 1 ) );
+    print( tfrm.getRow( 2 ) );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    int negTrace, ZgtX, ZgtY, YgtX;
+    int largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm.getCol0().getX();
+    yx = tfrm.getCol0().getY();
+    zx = tfrm.getCol0().getZ();
+    xy = tfrm.getCol1().getX();
+    yy = tfrm.getCol1().getY();
+    zy = tfrm.getCol1().getZ();
+    xz = tfrm.getCol2().getX();
+    yz = tfrm.getCol2().getY();
+    zz = tfrm.getCol2().getZ();
+
+    trace = ( ( xx + yy ) + zz );
+
+    negTrace = ( trace < 0.0f );
+    ZgtX = zz > xx;
+    ZgtY = zz > yy;
+    YgtX = yy > xx;
+    largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
+    largestYorZ = ( YgtX || ZgtX ) && negTrace;
+    largestZorX = ( ZgtY || !YgtX ) && negTrace;
+    
+    if ( largestXorY )
+    {
+        zz = -zz;
+        xy = -xy;
+    }
+    if ( largestYorZ )
+    {
+        xx = -xx;
+        yz = -yz;
+    }
+    if ( largestZorX )
+    {
+        yy = -yy;
+        zx = -zx;
+    }
+
+    radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
+    scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
+
+    tmpx = ( ( zy - yz ) * scale );
+    tmpy = ( ( xz - zx ) * scale );
+    tmpz = ( ( yx - xy ) * scale );
+    tmpw = ( radicand * scale );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    if ( largestXorY )
+    {
+        qx = tmpw;
+        qy = tmpz;
+        qz = tmpy;
+        qw = tmpx;
+    }
+    if ( largestYorZ )
+    {
+        tmpx = qx;
+        tmpz = qz;
+        qx = qy;
+        qy = tmpx;
+        qz = qw;
+        qw = tmpz;
+    }
+
+    mX = qx;
+    mY = qy;
+    mZ = qz;
+    mW = qw;
+}
+
+inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Vector3(
+        ( ( ( vec.getX() * mat.getCol0().getX() ) + ( vec.getY() * mat.getCol0().getY() ) ) + ( vec.getZ() * mat.getCol0().getZ() ) ),
+        ( ( ( vec.getX() * mat.getCol1().getX() ) + ( vec.getY() * mat.getCol1().getY() ) ) + ( vec.getZ() * mat.getCol1().getZ() ) ),
+        ( ( ( vec.getX() * mat.getCol2().getX() ) + ( vec.getY() * mat.getCol2().getY() ) ) + ( vec.getZ() * mat.getCol2().getZ() ) )
+    );
+}
+
+inline const Matrix3 crossMatrix( const Vector3 & vec )
+{
+    return Matrix3(
+        Vector3( 0.0f, vec.getZ(), -vec.getY() ),
+        Vector3( -vec.getZ(), 0.0f, vec.getX() ),
+        Vector3( vec.getY(), -vec.getX(), 0.0f )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/src/bullet/vectormath/scalar/quat_aos.h b/src/bullet/vectormath/scalar/quat_aos.h
new file mode 100644
index 00000000..764e0170
--- /dev/null
+++ b/src/bullet/vectormath/scalar/quat_aos.h
@@ -0,0 +1,433 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Quat::Quat( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+}
+
+inline Quat::Quat( float _x, float _y, float _z, float _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Quat::Quat( const Vector3 & xyz, float _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Quat::Quat( const Vector4 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = vec.getW();
+}
+
+inline Quat::Quat( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline const Quat Quat::identity( )
+{
+    return Quat( 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 )
+{
+    Quat start;
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = dot( unitQuat0, unitQuat1 );
+    if ( cosAngle < 0.0f ) {
+        cosAngle = -cosAngle;
+        start = ( -unitQuat0 );
+    } else {
+        start = unitQuat0;
+    }
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
+}
+
+inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
+{
+    Quat tmp0, tmp1;
+    tmp0 = slerp( t, unitQuat0, unitQuat3 );
+    tmp1 = slerp( t, unitQuat1, unitQuat2 );
+    return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
+}
+
+inline void loadXYZW( Quat & quat, const float * fptr )
+{
+    quat = Quat( fptr[0], fptr[1], fptr[2], fptr[3] );
+}
+
+inline void storeXYZW( const Quat & quat, float * fptr )
+{
+    fptr[0] = quat.getX();
+    fptr[1] = quat.getY();
+    fptr[2] = quat.getZ();
+    fptr[3] = quat.getW();
+}
+
+inline Quat & Quat::operator =( const Quat & quat )
+{
+    mX = quat.mX;
+    mY = quat.mY;
+    mZ = quat.mZ;
+    mW = quat.mW;
+    return *this;
+}
+
+inline Quat & Quat::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Quat & Quat::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Quat::getX( ) const
+{
+    return mX;
+}
+
+inline Quat & Quat::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Quat::getY( ) const
+{
+    return mY;
+}
+
+inline Quat & Quat::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Quat::getZ( ) const
+{
+    return mZ;
+}
+
+inline Quat & Quat::setW( float _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline float Quat::getW( ) const
+{
+    return mW;
+}
+
+inline Quat & Quat::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Quat::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Quat::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Quat::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Quat Quat::operator +( const Quat & quat ) const
+{
+    return Quat(
+        ( mX + quat.mX ),
+        ( mY + quat.mY ),
+        ( mZ + quat.mZ ),
+        ( mW + quat.mW )
+    );
+}
+
+inline const Quat Quat::operator -( const Quat & quat ) const
+{
+    return Quat(
+        ( mX - quat.mX ),
+        ( mY - quat.mY ),
+        ( mZ - quat.mZ ),
+        ( mW - quat.mW )
+    );
+}
+
+inline const Quat Quat::operator *( float scalar ) const
+{
+    return Quat(
+        ( mX * scalar ),
+        ( mY * scalar ),
+        ( mZ * scalar ),
+        ( mW * scalar )
+    );
+}
+
+inline Quat & Quat::operator +=( const Quat & quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+inline Quat & Quat::operator -=( const Quat & quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+inline Quat & Quat::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator /( float scalar ) const
+{
+    return Quat(
+        ( mX / scalar ),
+        ( mY / scalar ),
+        ( mZ / scalar ),
+        ( mW / scalar )
+    );
+}
+
+inline Quat & Quat::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Quat Quat::operator -( ) const
+{
+    return Quat(
+        -mX,
+        -mY,
+        -mZ,
+        -mW
+    );
+}
+
+inline const Quat operator *( float scalar, const Quat & quat )
+{
+    return quat * scalar;
+}
+
+inline float dot( const Quat & quat0, const Quat & quat1 )
+{
+    float result;
+    result = ( quat0.getX() * quat1.getX() );
+    result = ( result + ( quat0.getY() * quat1.getY() ) );
+    result = ( result + ( quat0.getZ() * quat1.getZ() ) );
+    result = ( result + ( quat0.getW() * quat1.getW() ) );
+    return result;
+}
+
+inline float norm( const Quat & quat )
+{
+    float result;
+    result = ( quat.getX() * quat.getX() );
+    result = ( result + ( quat.getY() * quat.getY() ) );
+    result = ( result + ( quat.getZ() * quat.getZ() ) );
+    result = ( result + ( quat.getW() * quat.getW() ) );
+    return result;
+}
+
+inline float length( const Quat & quat )
+{
+    return ::sqrtf( norm( quat ) );
+}
+
+inline const Quat normalize( const Quat & quat )
+{
+    float lenSqr, lenInv;
+    lenSqr = norm( quat );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    return Quat(
+        ( quat.getX() * lenInv ),
+        ( quat.getY() * lenInv ),
+        ( quat.getZ() * lenInv ),
+        ( quat.getW() * lenInv )
+    );
+}
+
+inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    float cosHalfAngleX2, recipCosHalfAngleX2;
+    cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
+    recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
+    return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
+}
+
+inline const Quat Quat::rotation( float radians, const Vector3 & unitVec )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( ( unitVec * s ), c );
+}
+
+inline const Quat Quat::rotationX( float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( s, 0.0f, 0.0f, c );
+}
+
+inline const Quat Quat::rotationY( float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( 0.0f, s, 0.0f, c );
+}
+
+inline const Quat Quat::rotationZ( float radians )
+{
+    float s, c, angle;
+    angle = ( radians * 0.5f );
+    s = sinf( angle );
+    c = cosf( angle );
+    return Quat( 0.0f, 0.0f, s, c );
+}
+
+inline const Quat Quat::operator *( const Quat & quat ) const
+{
+    return Quat(
+        ( ( ( ( mW * quat.mX ) + ( mX * quat.mW ) ) + ( mY * quat.mZ ) ) - ( mZ * quat.mY ) ),
+        ( ( ( ( mW * quat.mY ) + ( mY * quat.mW ) ) + ( mZ * quat.mX ) ) - ( mX * quat.mZ ) ),
+        ( ( ( ( mW * quat.mZ ) + ( mZ * quat.mW ) ) + ( mX * quat.mY ) ) - ( mY * quat.mX ) ),
+        ( ( ( ( mW * quat.mW ) - ( mX * quat.mX ) ) - ( mY * quat.mY ) ) - ( mZ * quat.mZ ) )
+    );
+}
+
+inline Quat & Quat::operator *=( const Quat & quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
+{
+    float tmpX, tmpY, tmpZ, tmpW;
+    tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
+    tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
+    tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
+    tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
+    return Vector3(
+        ( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
+        ( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
+        ( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
+    );
+}
+
+inline const Quat conj( const Quat & quat )
+{
+    return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
+}
+
+inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 )
+{
+    return Quat(
+        ( select1 )? quat1.getX() : quat0.getX(),
+        ( select1 )? quat1.getY() : quat0.getY(),
+        ( select1 )? quat1.getZ() : quat0.getZ(),
+        ( select1 )? quat1.getW() : quat0.getW()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Quat & quat )
+{
+    printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
+}
+
+inline void print( const Quat & quat, const char * name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/src/bullet/vectormath/scalar/vec_aos.h b/src/bullet/vectormath/scalar/vec_aos.h
new file mode 100644
index 00000000..46d4d6b3
--- /dev/null
+++ b/src/bullet/vectormath/scalar/vec_aos.h
@@ -0,0 +1,1426 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Vector3::Vector3( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+}
+
+inline Vector3::Vector3( float _x, float _y, float _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Vector3::Vector3( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+}
+
+inline Vector3::Vector3( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( 1.0f, 0.0f, 0.0f );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( 0.0f, 1.0f, 0.0f );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( 0.0f, 0.0f, 1.0f );
+}
+
+inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void loadXYZ( Vector3 & vec, const float * fptr )
+{
+    vec = Vector3( fptr[0], fptr[1], fptr[2] );
+}
+
+inline void storeXYZ( const Vector3 & vec, float * fptr )
+{
+    fptr[0] = vec.getX();
+    fptr[1] = vec.getY();
+    fptr[2] = vec.getZ();
+}
+
+inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        unsigned short fp16 = hfptr[i];
+        unsigned int sign = fp16 >> 15;
+        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+        unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+        if (exponent == 0) {
+            // zero
+            mantissa = 0;
+
+        } else if (exponent == 31) {
+            // infinity or nan -> infinity
+            exponent = 255;
+	    mantissa = 0;
+
+        } else {
+            exponent += 127 - 15;
+            mantissa <<= 13;
+        }
+
+        Data32 d;
+        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+        vec[i] = d.f32;
+    }
+}
+
+inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        Data32 d;
+        d.f32 = vec[i];
+
+        unsigned int sign = d.u32 >> 31;
+        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+        if (exponent == 0) {
+            // zero or denorm -> zero
+            mantissa = 0;
+
+        } else if (exponent == 255 && mantissa != 0) {
+            // nan -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent >= 127 - 15 + 31) {
+            // overflow or infinity -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent <= 127 - 15) {
+            // underflow -> zero
+            exponent = 0;
+            mantissa = 0;
+
+        } else {
+            exponent -= 127 - 15;
+            mantissa >>= 13;
+        }
+
+        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+    }
+}
+
+inline Vector3 & Vector3::operator =( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Vector3::getX( ) const
+{
+    return mX;
+}
+
+inline Vector3 & Vector3::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Vector3::getY( ) const
+{
+    return mY;
+}
+
+inline Vector3 & Vector3::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Vector3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector3 & Vector3::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Vector3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Vector3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Vector3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( mX + vec.mX ),
+        ( mY + vec.mY ),
+        ( mZ + vec.mZ )
+    );
+}
+
+inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( mX - vec.mX ),
+        ( mY - vec.mY ),
+        ( mZ - vec.mZ )
+    );
+}
+
+inline const Point3 Vector3::operator +( const Point3 & pnt ) const
+{
+    return Point3(
+        ( mX + pnt.getX() ),
+        ( mY + pnt.getY() ),
+        ( mZ + pnt.getZ() )
+    );
+}
+
+inline const Vector3 Vector3::operator *( float scalar ) const
+{
+    return Vector3(
+        ( mX * scalar ),
+        ( mY * scalar ),
+        ( mZ * scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( float scalar ) const
+{
+    return Vector3(
+        ( mX / scalar ),
+        ( mY / scalar ),
+        ( mZ / scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3(
+        -mX,
+        -mY,
+        -mZ
+    );
+}
+
+inline const Vector3 operator *( float scalar, const Vector3 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec0.getX() * vec1.getX() ),
+        ( vec0.getY() * vec1.getY() ),
+        ( vec0.getZ() * vec1.getZ() )
+    );
+}
+
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec0.getX() / vec1.getX() ),
+        ( vec0.getY() / vec1.getY() ),
+        ( vec0.getZ() / vec1.getZ() )
+    );
+}
+
+inline const Vector3 recipPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        ( 1.0f / vec.getX() ),
+        ( 1.0f / vec.getY() ),
+        ( 1.0f / vec.getZ() )
+    );
+}
+
+inline const Vector3 sqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        sqrtf( vec.getX() ),
+        sqrtf( vec.getY() ),
+        sqrtf( vec.getZ() )
+    );
+}
+
+inline const Vector3 rsqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        ( 1.0f / sqrtf( vec.getX() ) ),
+        ( 1.0f / sqrtf( vec.getY() ) ),
+        ( 1.0f / sqrtf( vec.getZ() ) )
+    );
+}
+
+inline const Vector3 absPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        fabsf( vec.getX() ),
+        fabsf( vec.getY() ),
+        fabsf( vec.getZ() )
+    );
+}
+
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
+        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
+        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() )
+    );
+}
+
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ()
+    );
+}
+
+inline float maxElem( const Vector3 & vec )
+{
+    float result;
+    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() > result)? vec.getZ() : result;
+    return result;
+}
+
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ()
+    );
+}
+
+inline float minElem( const Vector3 & vec )
+{
+    float result;
+    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() < result)? vec.getZ() : result;
+    return result;
+}
+
+inline float sum( const Vector3 & vec )
+{
+    float result;
+    result = ( vec.getX() + vec.getY() );
+    result = ( result + vec.getZ() );
+    return result;
+}
+
+inline float dot( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    float result;
+    result = ( vec0.getX() * vec1.getX() );
+    result = ( result + ( vec0.getY() * vec1.getY() ) );
+    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
+    return result;
+}
+
+inline float lengthSqr( const Vector3 & vec )
+{
+    float result;
+    result = ( vec.getX() * vec.getX() );
+    result = ( result + ( vec.getY() * vec.getY() ) );
+    result = ( result + ( vec.getZ() * vec.getZ() ) );
+    return result;
+}
+
+inline float length( const Vector3 & vec )
+{
+    return ::sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector3 normalize( const Vector3 & vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    return Vector3(
+        ( vec.getX() * lenInv ),
+        ( vec.getY() * lenInv ),
+        ( vec.getZ() * lenInv )
+    );
+}
+
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( ( vec0.getY() * vec1.getZ() ) - ( vec0.getZ() * vec1.getY() ) ),
+        ( ( vec0.getZ() * vec1.getX() ) - ( vec0.getX() * vec1.getZ() ) ),
+        ( ( vec0.getX() * vec1.getY() ) - ( vec0.getY() * vec1.getX() ) )
+    );
+}
+
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 )
+{
+    return Vector3(
+        ( select1 )? vec1.getX() : vec0.getX(),
+        ( select1 )? vec1.getY() : vec0.getY(),
+        ( select1 )? vec1.getZ() : vec0.getZ()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector3 & vec )
+{
+    printf( "( %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ() );
+}
+
+inline void print( const Vector3 & vec, const char * name )
+{
+    printf( "%s: ( %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ() );
+}
+
+#endif
+
+inline Vector4::Vector4( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+}
+
+inline Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Vector4::Vector4( const Vector3 & xyz, float _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Vector4::Vector4( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = 0.0f;
+}
+
+inline Vector4::Vector4( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+    mW = 1.0f;
+}
+
+inline Vector4::Vector4( const Quat & quat )
+{
+    mX = quat.getX();
+    mY = quat.getY();
+    mZ = quat.getZ();
+    mW = quat.getW();
+}
+
+inline Vector4::Vector4( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( 1.0f, 0.0f, 0.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( 0.0f, 1.0f, 0.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( 0.0f, 0.0f, 1.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
+{
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void loadXYZW( Vector4 & vec, const float * fptr )
+{
+    vec = Vector4( fptr[0], fptr[1], fptr[2], fptr[3] );
+}
+
+inline void storeXYZW( const Vector4 & vec, float * fptr )
+{
+    fptr[0] = vec.getX();
+    fptr[1] = vec.getY();
+    fptr[2] = vec.getZ();
+    fptr[3] = vec.getW();
+}
+
+inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 4; i++) {
+        unsigned short fp16 = hfptr[i];
+        unsigned int sign = fp16 >> 15;
+        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+        unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+        if (exponent == 0) {
+            // zero
+            mantissa = 0;
+
+        } else if (exponent == 31) {
+            // infinity or nan -> infinity
+            exponent = 255;
+	    mantissa = 0;
+
+        } else {
+            exponent += 127 - 15;
+            mantissa <<= 13;
+        }
+
+        Data32 d;
+        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+        vec[i] = d.f32;
+    }
+}
+
+inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 4; i++) {
+        Data32 d;
+        d.f32 = vec[i];
+
+        unsigned int sign = d.u32 >> 31;
+        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+        if (exponent == 0) {
+            // zero or denorm -> zero
+            mantissa = 0;
+
+        } else if (exponent == 255 && mantissa != 0) {
+            // nan -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent >= 127 - 15 + 31) {
+            // overflow or infinity -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent <= 127 - 15) {
+            // underflow -> zero
+            exponent = 0;
+            mantissa = 0;
+
+        } else {
+            exponent -= 127 - 15;
+            mantissa >>= 13;
+        }
+
+        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+    }
+}
+
+inline Vector4 & Vector4::operator =( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Vector4 & Vector4::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Vector4::getX( ) const
+{
+    return mX;
+}
+
+inline Vector4 & Vector4::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Vector4::getY( ) const
+{
+    return mY;
+}
+
+inline Vector4 & Vector4::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Vector4::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector4 & Vector4::setW( float _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline float Vector4::getW( ) const
+{
+    return mW;
+}
+
+inline Vector4 & Vector4::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Vector4::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Vector4::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Vector4::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( mX + vec.mX ),
+        ( mY + vec.mY ),
+        ( mZ + vec.mZ ),
+        ( mW + vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( mX - vec.mX ),
+        ( mY - vec.mY ),
+        ( mZ - vec.mZ ),
+        ( mW - vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator *( float scalar ) const
+{
+    return Vector4(
+        ( mX * scalar ),
+        ( mY * scalar ),
+        ( mZ * scalar ),
+        ( mW * scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator +=( const Vector4 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( const Vector4 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( float scalar ) const
+{
+    return Vector4(
+        ( mX / scalar ),
+        ( mY / scalar ),
+        ( mZ / scalar ),
+        ( mW / scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4(
+        -mX,
+        -mY,
+        -mZ,
+        -mW
+    );
+}
+
+inline const Vector4 operator *( float scalar, const Vector4 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec0.getX() * vec1.getX() ),
+        ( vec0.getY() * vec1.getY() ),
+        ( vec0.getZ() * vec1.getZ() ),
+        ( vec0.getW() * vec1.getW() )
+    );
+}
+
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec0.getX() / vec1.getX() ),
+        ( vec0.getY() / vec1.getY() ),
+        ( vec0.getZ() / vec1.getZ() ),
+        ( vec0.getW() / vec1.getW() )
+    );
+}
+
+inline const Vector4 recipPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        ( 1.0f / vec.getX() ),
+        ( 1.0f / vec.getY() ),
+        ( 1.0f / vec.getZ() ),
+        ( 1.0f / vec.getW() )
+    );
+}
+
+inline const Vector4 sqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        sqrtf( vec.getX() ),
+        sqrtf( vec.getY() ),
+        sqrtf( vec.getZ() ),
+        sqrtf( vec.getW() )
+    );
+}
+
+inline const Vector4 rsqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        ( 1.0f / sqrtf( vec.getX() ) ),
+        ( 1.0f / sqrtf( vec.getY() ) ),
+        ( 1.0f / sqrtf( vec.getZ() ) ),
+        ( 1.0f / sqrtf( vec.getW() ) )
+    );
+}
+
+inline const Vector4 absPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        fabsf( vec.getX() ),
+        fabsf( vec.getY() ),
+        fabsf( vec.getZ() ),
+        fabsf( vec.getW() )
+    );
+}
+
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
+        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
+        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ),
+        ( vec1.getW() < 0.0f )? -fabsf( vec0.getW() ) : fabsf( vec0.getW() )
+    );
+}
+
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ(),
+        (vec0.getW() > vec1.getW())? vec0.getW() : vec1.getW()
+    );
+}
+
+inline float maxElem( const Vector4 & vec )
+{
+    float result;
+    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() > result)? vec.getZ() : result;
+    result = (vec.getW() > result)? vec.getW() : result;
+    return result;
+}
+
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ(),
+        (vec0.getW() < vec1.getW())? vec0.getW() : vec1.getW()
+    );
+}
+
+inline float minElem( const Vector4 & vec )
+{
+    float result;
+    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() < result)? vec.getZ() : result;
+    result = (vec.getW() < result)? vec.getW() : result;
+    return result;
+}
+
+inline float sum( const Vector4 & vec )
+{
+    float result;
+    result = ( vec.getX() + vec.getY() );
+    result = ( result + vec.getZ() );
+    result = ( result + vec.getW() );
+    return result;
+}
+
+inline float dot( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    float result;
+    result = ( vec0.getX() * vec1.getX() );
+    result = ( result + ( vec0.getY() * vec1.getY() ) );
+    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
+    result = ( result + ( vec0.getW() * vec1.getW() ) );
+    return result;
+}
+
+inline float lengthSqr( const Vector4 & vec )
+{
+    float result;
+    result = ( vec.getX() * vec.getX() );
+    result = ( result + ( vec.getY() * vec.getY() ) );
+    result = ( result + ( vec.getZ() * vec.getZ() ) );
+    result = ( result + ( vec.getW() * vec.getW() ) );
+    return result;
+}
+
+inline float length( const Vector4 & vec )
+{
+    return ::sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector4 normalize( const Vector4 & vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    return Vector4(
+        ( vec.getX() * lenInv ),
+        ( vec.getY() * lenInv ),
+        ( vec.getZ() * lenInv ),
+        ( vec.getW() * lenInv )
+    );
+}
+
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 )
+{
+    return Vector4(
+        ( select1 )? vec1.getX() : vec0.getX(),
+        ( select1 )? vec1.getY() : vec0.getY(),
+        ( select1 )? vec1.getZ() : vec0.getZ(),
+        ( select1 )? vec1.getW() : vec0.getW()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector4 & vec )
+{
+    printf( "( %f %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
+}
+
+inline void print( const Vector4 & vec, const char * name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
+}
+
+#endif
+
+inline Point3::Point3( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+}
+
+inline Point3::Point3( float _x, float _y, float _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Point3::Point3( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+}
+
+inline Point3::Point3( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline void loadXYZ( Point3 & pnt, const float * fptr )
+{
+    pnt = Point3( fptr[0], fptr[1], fptr[2] );
+}
+
+inline void storeXYZ( const Point3 & pnt, float * fptr )
+{
+    fptr[0] = pnt.getX();
+    fptr[1] = pnt.getY();
+    fptr[2] = pnt.getZ();
+}
+
+inline void loadHalfFloats( Point3 & vec, const unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        unsigned short fp16 = hfptr[i];
+        unsigned int sign = fp16 >> 15;
+        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+        unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+        if (exponent == 0) {
+            // zero
+            mantissa = 0;
+
+        } else if (exponent == 31) {
+            // infinity or nan -> infinity
+            exponent = 255;
+	    mantissa = 0;
+
+        } else {
+            exponent += 127 - 15;
+            mantissa <<= 13;
+        }
+
+        Data32 d;
+        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+        vec[i] = d.f32;
+    }
+}
+
+inline void storeHalfFloats( const Point3 & vec, unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        Data32 d;
+        d.f32 = vec[i];
+
+        unsigned int sign = d.u32 >> 31;
+        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+        if (exponent == 0) {
+            // zero or denorm -> zero
+            mantissa = 0;
+
+        } else if (exponent == 255 && mantissa != 0) {
+            // nan -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent >= 127 - 15 + 31) {
+            // overflow or infinity -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent <= 127 - 15) {
+            // underflow -> zero
+            exponent = 0;
+            mantissa = 0;
+
+        } else {
+            exponent -= 127 - 15;
+            mantissa >>= 13;
+        }
+
+        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+    }
+}
+
+inline Point3 & Point3::operator =( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+    return *this;
+}
+
+inline Point3 & Point3::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Point3::getX( ) const
+{
+    return mX;
+}
+
+inline Point3 & Point3::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Point3::getY( ) const
+{
+    return mY;
+}
+
+inline Point3 & Point3::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Point3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Point3 & Point3::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Point3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Point3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Point3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Point3::operator -( const Point3 & pnt ) const
+{
+    return Vector3(
+        ( mX - pnt.mX ),
+        ( mY - pnt.mY ),
+        ( mZ - pnt.mZ )
+    );
+}
+
+inline const Point3 Point3::operator +( const Vector3 & vec ) const
+{
+    return Point3(
+        ( mX + vec.getX() ),
+        ( mY + vec.getY() ),
+        ( mZ + vec.getZ() )
+    );
+}
+
+inline const Point3 Point3::operator -( const Vector3 & vec ) const
+{
+    return Point3(
+        ( mX - vec.getX() ),
+        ( mY - vec.getY() ),
+        ( mZ - vec.getZ() )
+    );
+}
+
+inline Point3 & Point3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt0.getX() * pnt1.getX() ),
+        ( pnt0.getY() * pnt1.getY() ),
+        ( pnt0.getZ() * pnt1.getZ() )
+    );
+}
+
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt0.getX() / pnt1.getX() ),
+        ( pnt0.getY() / pnt1.getY() ),
+        ( pnt0.getZ() / pnt1.getZ() )
+    );
+}
+
+inline const Point3 recipPerElem( const Point3 & pnt )
+{
+    return Point3(
+        ( 1.0f / pnt.getX() ),
+        ( 1.0f / pnt.getY() ),
+        ( 1.0f / pnt.getZ() )
+    );
+}
+
+inline const Point3 sqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        sqrtf( pnt.getX() ),
+        sqrtf( pnt.getY() ),
+        sqrtf( pnt.getZ() )
+    );
+}
+
+inline const Point3 rsqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        ( 1.0f / sqrtf( pnt.getX() ) ),
+        ( 1.0f / sqrtf( pnt.getY() ) ),
+        ( 1.0f / sqrtf( pnt.getZ() ) )
+    );
+}
+
+inline const Point3 absPerElem( const Point3 & pnt )
+{
+    return Point3(
+        fabsf( pnt.getX() ),
+        fabsf( pnt.getY() ),
+        fabsf( pnt.getZ() )
+    );
+}
+
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt1.getX() < 0.0f )? -fabsf( pnt0.getX() ) : fabsf( pnt0.getX() ),
+        ( pnt1.getY() < 0.0f )? -fabsf( pnt0.getY() ) : fabsf( pnt0.getY() ),
+        ( pnt1.getZ() < 0.0f )? -fabsf( pnt0.getZ() ) : fabsf( pnt0.getZ() )
+    );
+}
+
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        (pnt0.getX() > pnt1.getX())? pnt0.getX() : pnt1.getX(),
+        (pnt0.getY() > pnt1.getY())? pnt0.getY() : pnt1.getY(),
+        (pnt0.getZ() > pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
+    );
+}
+
+inline float maxElem( const Point3 & pnt )
+{
+    float result;
+    result = (pnt.getX() > pnt.getY())? pnt.getX() : pnt.getY();
+    result = (pnt.getZ() > result)? pnt.getZ() : result;
+    return result;
+}
+
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        (pnt0.getX() < pnt1.getX())? pnt0.getX() : pnt1.getX(),
+        (pnt0.getY() < pnt1.getY())? pnt0.getY() : pnt1.getY(),
+        (pnt0.getZ() < pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
+    );
+}
+
+inline float minElem( const Point3 & pnt )
+{
+    float result;
+    result = (pnt.getX() < pnt.getY())? pnt.getX() : pnt.getY();
+    result = (pnt.getZ() < result)? pnt.getZ() : result;
+    return result;
+}
+
+inline float sum( const Point3 & pnt )
+{
+    float result;
+    result = ( pnt.getX() + pnt.getY() );
+    result = ( result + pnt.getZ() );
+    return result;
+}
+
+inline const Point3 scale( const Point3 & pnt, float scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline float projection( const Point3 & pnt, const Vector3 & unitVec )
+{
+    float result;
+    result = ( pnt.getX() * unitVec.getX() );
+    result = ( result + ( pnt.getY() * unitVec.getY() ) );
+    result = ( result + ( pnt.getZ() * unitVec.getZ() ) );
+    return result;
+}
+
+inline float distSqrFromOrigin( const Point3 & pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline float distFromOrigin( const Point3 & pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline float dist( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 )
+{
+    return Point3(
+        ( select1 )? pnt1.getX() : pnt0.getX(),
+        ( select1 )? pnt1.getY() : pnt0.getY(),
+        ( select1 )? pnt1.getZ() : pnt0.getZ()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Point3 & pnt )
+{
+    printf( "( %f %f %f )\n", pnt.getX(), pnt.getY(), pnt.getZ() );
+}
+
+inline void print( const Point3 & pnt, const char * name )
+{
+    printf( "%s: ( %f %f %f )\n", name, pnt.getX(), pnt.getY(), pnt.getZ() );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/src/bullet/vectormath/scalar/vectormath_aos.h b/src/bullet/vectormath/scalar/vectormath_aos.h
new file mode 100644
index 00000000..d00456df
--- /dev/null
+++ b/src/bullet/vectormath/scalar/vectormath_aos.h
@@ -0,0 +1,1872 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _VECTORMATH_AOS_CPP_H
+#define _VECTORMATH_AOS_CPP_H
+
+#include <math.h>
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+    float mX;
+    float mY;
+    float mZ;
+#ifndef __GNUC__
+    float d;
+#endif
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Copy a 3-D vector
+    // 
+    inline Vector3( const Vector3 & vec );
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( float x, float y, float z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( const Point3 & pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( float scalar );
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( const Vector3 & vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( float x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( float y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( float z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( const Vector3 & vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( const Point3 & pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( float scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( const Vector3 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( float scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( float scalar, const Vector3 & vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( const Vector3 & vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( const Vector3 & vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( const Vector3 & vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( const Vector3 & vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline float maxElem( const Vector3 & vec );
+
+// Minimum element of a 3-D vector
+// 
+inline float minElem( const Vector3 & vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline float sum( const Vector3 & vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline float dot( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline float lengthSqr( const Vector3 & vec );
+
+// Compute the length of a 3-D vector
+// 
+inline float length( const Vector3 & vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( const Vector3 & vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// 
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( const Vector3 & vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// 
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 );
+
+// Load x, y, and z elements from the first three words of a float array.
+// 
+// 
+inline void loadXYZ( Vector3 & vec, const float * fptr );
+
+// Store x, y, and z elements of a 3-D vector in the first three words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZ( const Vector3 & vec, float * fptr );
+
+// Load three-half-floats as a 3-D vector
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs.
+// 
+inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr );
+
+// Store a 3-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+// 
+inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+    float mX;
+    float mY;
+    float mZ;
+    float mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Copy a 4-D vector
+    // 
+    inline Vector4( const Vector4 & vec );
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( float x, float y, float z, float w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( const Vector3 & xyz, float w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( const Vector3 & vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( const Point3 & pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( const Quat & quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( float scalar );
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( const Vector4 & vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( float x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( float y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( float z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( float w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( const Vector4 & vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( const Vector4 & vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( float scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( const Vector4 & vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( const Vector4 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( float scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( float scalar, const Vector4 & vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( const Vector4 & vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( const Vector4 & vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( const Vector4 & vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( const Vector4 & vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline float maxElem( const Vector4 & vec );
+
+// Minimum element of a 4-D vector
+// 
+inline float minElem( const Vector4 & vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline float sum( const Vector4 & vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline float dot( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline float lengthSqr( const Vector4 & vec );
+
+// Compute the length of a 4-D vector
+// 
+inline float length( const Vector4 & vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( const Vector4 & vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// 
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 );
+
+// Load x, y, z, and w elements from the first four words of a float array.
+// 
+// 
+inline void loadXYZW( Vector4 & vec, const float * fptr );
+
+// Store x, y, z, and w elements of a 4-D vector in the first four words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZW( const Vector4 & vec, float * fptr );
+
+// Load four-half-floats as a 4-D vector
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs.
+// 
+inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr );
+
+// Store a 4-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+// 
+inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+    float mX;
+    float mY;
+    float mZ;
+#ifndef __GNUC__
+    float d;
+#endif
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Copy a 3-D point
+    // 
+    inline Point3( const Point3 & pnt );
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( float x, float y, float z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( const Vector3 & vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( float scalar );
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( const Point3 & pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( float x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( float y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( float z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( const Point3 & pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( const Vector3 & vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( const Vector3 & vec );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( const Point3 & pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( const Point3 & pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( const Point3 & pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( const Point3 & pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline float maxElem( const Point3 & pnt );
+
+// Minimum element of a 3-D point
+// 
+inline float minElem( const Point3 & pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline float sum( const Point3 & pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, float scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline float projection( const Point3 & pnt, const Vector3 & unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distSqrFromOrigin( const Point3 & pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distFromOrigin( const Point3 & pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline float dist( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 );
+
+// Conditionally select between two 3-D points
+// 
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 );
+
+// Load x, y, and z elements from the first three words of a float array.
+// 
+// 
+inline void loadXYZ( Point3 & pnt, const float * fptr );
+
+// Store x, y, and z elements of a 3-D point in the first three words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZ( const Point3 & pnt, float * fptr );
+
+// Load three-half-floats as a 3-D point
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs.
+// 
+inline void loadHalfFloats( Point3 & pnt, const unsigned short * hfptr );
+
+// Store a 3-D point as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+// 
+inline void storeHalfFloats( const Point3 & pnt, unsigned short * hfptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+    float mX;
+    float mY;
+    float mZ;
+    float mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Copy a quaternion
+    // 
+    inline Quat( const Quat & quat );
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( float x, float y, float z, float w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( const Vector3 & xyz, float w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( const Vector4 & vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( float scalar );
+
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( const Quat & quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( float x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( float y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( float z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( float w );
+
+    // Get the x element of a quaternion
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( const Quat & quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( const Quat & quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( const Quat & quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( float scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( const Quat & quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( float scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( float radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( float radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( float radians );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( float scalar, const Quat & quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( const Quat & quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
+
+// Compute the dot product of two quaternions
+// 
+inline float dot( const Quat & quat0, const Quat & quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline float norm( const Quat & quat );
+
+// Compute the length of a quaternion
+// 
+inline float length( const Quat & quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( const Quat & quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
+
+// Conditionally select between two quaternions
+// 
+inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 );
+
+// Load x, y, z, and w elements from the first four words of a float array.
+// 
+// 
+inline void loadXYZW( Quat & quat, const float * fptr );
+
+// Store x, y, z, and w elements of a quaternion in the first four words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZW( const Quat & quat, float * fptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( const Quat & unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( float scalar );
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( const Vector3 & col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, const Vector3 & vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( float scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( float radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( float radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( float radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( const Vector3 & scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline float determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( float scalar );
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( const Vector4 & col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( const Vector4 & col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( const Vector4 & col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( const Vector4 & col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, const Vector4 & vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, float val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( float scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( const Vector4 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( float radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( float radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( float radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( const Quat & unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( const Vector3 & scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( const Vector3 & translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline float determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( float scalar );
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( const Vector3 & col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( const Vector3 & col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( const Vector3 & scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( const Vector3 & translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/src/bullet/vectormath/sse/boolInVec.h b/src/bullet/vectormath/sse/boolInVec.h
new file mode 100644
index 00000000..d21d25cb
--- /dev/null
+++ b/src/bullet/vectormath/sse/boolInVec.h
@@ -0,0 +1,247 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <math.h>
+
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+    private:
+        __m128 mData;
+
+        inline boolInVec(__m128 vec);
+    public:
+        inline boolInVec() {}
+
+        // matches standard type conversions
+        //
+        inline boolInVec(const floatInVec &vec);
+
+        // explicit cast from bool
+        //
+        explicit inline boolInVec(bool scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to bool
+        // 
+        inline bool getAsBool() const;
+#else
+        // implicit cast to bool
+        // 
+        inline operator bool() const;
+#endif
+        
+        // get vector data
+        // bool value is splatted across all word slots of vector as 0 (false) or -1 (true)
+        //
+        inline __m128 get128() const;
+
+        // operators
+        //
+        inline const boolInVec operator ! () const;
+        inline boolInVec& operator = (const boolInVec &vec);
+        inline boolInVec& operator &= (const boolInVec &vec);
+        inline boolInVec& operator ^= (const boolInVec &vec);
+        inline boolInVec& operator |= (const boolInVec &vec);
+
+        // friend functions
+        //
+        friend inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec functions
+//
+
+// operators
+//
+inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(__m128 vec)
+{
+    mData = vec;
+}
+
+inline
+boolInVec::boolInVec(const floatInVec &vec)
+{
+    *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+    unsigned int mask = -(int)scalar;
+	mData = _mm_set1_ps(*(float *)&mask); // TODO: Union
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+bool
+boolInVec::getAsBool() const
+#else
+inline
+boolInVec::operator bool() const
+#endif
+{
+	return *(bool *)&mData;
+}
+
+inline
+__m128
+boolInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+    return boolInVec(_mm_andnot_ps(mData, _mm_cmpneq_ps(_mm_setzero_ps(),_mm_setzero_ps())));
+}
+
+inline
+boolInVec&
+boolInVec::operator = (const boolInVec &vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (const boolInVec &vec)
+{
+    *this = *this & vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (const boolInVec &vec)
+{
+    *this = *this ^ vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (const boolInVec &vec)
+{
+    *this = *this | vec;
+    return *this;
+}
+
+inline
+const boolInVec
+operator == (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128()));
+}
+    
+inline
+const boolInVec
+operator & (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_and_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator | (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_or_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator ^ (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_xor_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1)
+{
+	return boolInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+ 
+} // namespace Vectormath
+
+#endif // boolInVec_h
diff --git a/src/bullet/vectormath/sse/floatInVec.h b/src/bullet/vectormath/sse/floatInVec.h
new file mode 100644
index 00000000..e8ac5959
--- /dev/null
+++ b/src/bullet/vectormath/sse/floatInVec.h
@@ -0,0 +1,340 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+#include <xmmintrin.h>
+
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+class floatInVec
+{
+    private:
+        __m128 mData;
+
+    public:
+        inline floatInVec(__m128 vec);
+
+        inline floatInVec() {}
+
+        // matches standard type conversions
+        //
+        inline floatInVec(const boolInVec &vec);
+
+        // construct from a slot of __m128
+        //
+        inline floatInVec(__m128 vec, int slot);
+        
+        // explicit cast from float
+        //
+        explicit inline floatInVec(float scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to float
+        // 
+        inline float getAsFloat() const;
+#else
+        // implicit cast to float
+        //
+        inline operator float() const;
+#endif
+
+        // get vector data
+        // float value is splatted across all word slots of vector
+        //
+        inline __m128 get128() const;
+
+        // operators
+        // 
+        inline const floatInVec operator ++ (int);
+        inline const floatInVec operator -- (int);
+        inline floatInVec& operator ++ ();
+        inline floatInVec& operator -- ();
+        inline const floatInVec operator - () const;
+        inline floatInVec& operator = (const floatInVec &vec);
+        inline floatInVec& operator *= (const floatInVec &vec);
+        inline floatInVec& operator /= (const floatInVec &vec);
+        inline floatInVec& operator += (const floatInVec &vec);
+        inline floatInVec& operator -= (const floatInVec &vec);
+
+        // friend functions
+        //
+        friend inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, boolInVec select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec functions
+//
+
+// operators
+// 
+inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1);
+inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1);
+inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1);
+inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(__m128 vec)
+{
+    mData = vec;
+}
+
+inline
+floatInVec::floatInVec(const boolInVec &vec)
+{
+	mData = vec_sel(_mm_setzero_ps(), _mm_set1_ps(1.0f), vec.get128());
+}
+
+inline
+floatInVec::floatInVec(__m128 vec, int slot)
+{
+	SSEFloat v;
+	v.m128 = vec;
+	mData = _mm_set1_ps(v.f[slot]);
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+	mData = _mm_set1_ps(scalar);
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+float
+floatInVec::getAsFloat() const
+#else
+inline
+floatInVec::operator float() const
+#endif
+{
+    return *((float *)&mData);
+}
+
+inline
+__m128
+floatInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+    __m128 olddata = mData;
+    operator ++();
+    return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+    __m128 olddata = mData;
+    operator --();
+    return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+    *this += floatInVec(_mm_set1_ps(1.0f));
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+    *this -= floatInVec(_mm_set1_ps(1.0f));
+    return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+    return floatInVec(_mm_sub_ps(_mm_setzero_ps(), mData));
+}
+
+inline
+floatInVec&
+floatInVec::operator = (const floatInVec &vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (const floatInVec &vec)
+{
+    *this = *this * vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (const floatInVec &vec)
+{
+    *this = *this / vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (const floatInVec &vec)
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (const floatInVec &vec)
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline
+const floatInVec
+operator * (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return floatInVec(_mm_mul_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator / (const floatInVec &num, const floatInVec &den)
+{
+    return floatInVec(_mm_div_ps(num.get128(), den.get128()));
+}
+
+inline
+const floatInVec
+operator + (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return floatInVec(_mm_add_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator - (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return floatInVec(_mm_sub_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator < (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpgt_ps(vec1.get128(), vec0.get128()));
+}
+
+inline
+const boolInVec
+operator <= (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpge_ps(vec1.get128(), vec0.get128()));
+}
+
+inline
+const boolInVec
+operator > (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpgt_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator >= (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpge_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator == (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128()));
+}
+    
+inline
+const floatInVec
+select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1)
+{
+    return floatInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h
diff --git a/src/bullet/vectormath/sse/mat_aos.h b/src/bullet/vectormath/sse/mat_aos.h
new file mode 100644
index 00000000..a2c66cc5
--- /dev/null
+++ b/src/bullet/vectormath/sse/mat_aos.h
@@ -0,0 +1,2190 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
+#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
+#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
+#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3::Matrix3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const floatInVec &scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Quat &unitQuat )
+{
+    __m128 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
+    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+	VM_ATTRIBUTE_ALIGN16 unsigned int sx[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int sz[4] = {0, 0, 0xffffffff, 0};
+	__m128 select_x = _mm_load_ps((float *)sx);
+	__m128 select_z = _mm_load_ps((float *)sz);
+
+    xyzw_2 = _mm_add_ps( unitQuat.get128(), unitQuat.get128() );
+    wwww = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,3,3,3) );
+	yzxw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,0,2,1) );
+	zxyw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,1,0,2) );
+    yzxw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,0,2,1) );
+    zxyw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,1,0,2) );
+
+    tmp0 = _mm_mul_ps( yzxw_2, wwww );									// tmp0 = 2yw, 2zw, 2xw, 2w2
+	tmp1 = _mm_sub_ps( _mm_set1_ps(1.0f), _mm_mul_ps(yzxw, yzxw_2) );	// tmp1 = 1 - 2y2, 1 - 2z2, 1 - 2x2, 1 - 2w2
+    tmp2 = _mm_mul_ps( yzxw, xyzw_2 );									// tmp2 = 2xy, 2yz, 2xz, 2w2
+    tmp0 = _mm_add_ps( _mm_mul_ps(zxyw, xyzw_2), tmp0 );				// tmp0 = 2yw + 2zx, 2zw + 2xy, 2xw + 2yz, 2w2 + 2w2
+    tmp1 = _mm_sub_ps( tmp1, _mm_mul_ps(zxyw, zxyw_2) );				// tmp1 = 1 - 2y2 - 2z2, 1 - 2z2 - 2x2, 1 - 2x2 - 2y2, 1 - 2w2 - 2w2
+    tmp2 = _mm_sub_ps( tmp2, _mm_mul_ps(zxyw_2, wwww) );				// tmp2 = 2xy - 2zw, 2yz - 2xw, 2xz - 2yw, 2w2 -2w2
+
+    tmp3 = vec_sel( tmp0, tmp1, select_x );
+    tmp4 = vec_sel( tmp1, tmp2, select_x );
+    tmp5 = vec_sel( tmp2, tmp0, select_x );
+    mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) );
+    mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) );
+    mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol0( const Vector3 &_col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol1( const Vector3 &_col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol2( const Vector3 &_col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol( int col, const Vector3 &vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setRow( int row, const Vector3 &vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setElem( int col, int row, const floatInVec &val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 transpose( const Matrix3 & mat )
+{
+    __m128 tmp0, tmp1, res0, res1, res2;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    res0 = vec_mergeh( tmp0, mat.getCol1().get128() );
+    //res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	res1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
+	res1 = vec_sel(res1, mat.getCol1().get128(), select_y);
+    //res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+	res2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
+	res2 = vec_sel(res2, vec_splat(mat.getCol1().get128(), 2), select_y);
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 inverse( const Matrix3 & mat )
+{
+    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
+    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
+    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
+    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
+    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
+	inv1 = vec_sel(inv1, tmp1, select_y);
+    //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+	inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
+	inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
+    inv0 = vec_mul( inv0, invdet );
+    inv1 = vec_mul( inv1, invdet );
+	inv2 = vec_mul( inv2, invdet );
+    return Matrix3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( const floatInVec &scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+    return floatInVec(scalar) * mat;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator *( const Vector3 &vec ) const
+{
+    __m128 res;
+    __m128 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_mul( mCol0.get128(), xxxx );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, res1, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, res0, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 )
+	);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, res0, res1;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( )
+	);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZYX( const Vector3 &radiansXYZ )
+{
+    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
+	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
+	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_mul( Z0, Y1 );
+    return Matrix3(
+        Vector3( vec_mul( Z0, Y0 ) ),
+        Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
+        Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    axis = unitVec.get128();
+    sincosf4( radians.get128(), &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
+    axisS = vec_mul( axis, s );
+    negAxisS = negatef4( axisS );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+	tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
+	tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
+    //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+	tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
+    //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+	tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
+	tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
+    tmp0 = vec_sel( tmp0, c, select_x );
+    tmp1 = vec_sel( tmp1, c, select_y );
+    tmp2 = vec_sel( tmp2, c, select_z );
+    return Matrix3(
+        Vector3( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
+        Vector3( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
+        Vector3( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const Quat &unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::scale( const Vector3 &scaleVec )
+{
+    __m128 zero = _mm_setzero_ps();
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    return Matrix3(
+        Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_z ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( float scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const floatInVec &scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vector4 &_col2, const Vector4 &_col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol0( const Vector4 &_col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol1( const Vector4 &_col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol2( const Vector4 &_col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol3( const Vector4 &_col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol( int col, const Vector4 &vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setRow( int row, const Vector4 &vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setElem( int col, int row, const floatInVec &val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 transpose( const Matrix4 & mat )
+{
+    __m128 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
+    tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
+    res0 = vec_mergeh( tmp0, tmp1 );
+    res1 = vec_mergel( tmp0, tmp1 );
+    res2 = vec_mergeh( tmp2, tmp3 );
+    res3 = vec_mergel( tmp2, tmp3 );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4( res3 )
+    );
+}
+
+// TODO: Tidy
+static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathPNPN[4] = {0x00000000, 0x80000000, 0x00000000, 0x80000000};
+static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathNPNP[4] = {0x80000000, 0x00000000, 0x80000000, 0x00000000};
+static VM_ATTRIBUTE_ALIGN16 const float _vmathZERONE[4] = {1.0f, 0.0f, 0.0f, 1.0f};
+
+VECTORMATH_FORCE_INLINE const Matrix4 inverse( const Matrix4 & mat )
+{
+	__m128 Va,Vb,Vc;
+	__m128 r1,r2,r3,tt,tt2;
+	__m128 sum,Det,RDet;
+	__m128 trns0,trns1,trns2,trns3;
+
+	__m128 _L1 = mat.getCol0().get128();
+	__m128 _L2 = mat.getCol1().get128();
+	__m128 _L3 = mat.getCol2().get128();
+	__m128 _L4 = mat.getCol3().get128();
+	// Calculating the minterms for the first line.
+
+	// _mm_ror_ps is just a macro using _mm_shuffle_ps().
+	tt = _L4; tt2 = _mm_ror_ps(_L3,1); 
+	Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0));					// V3'dot V4
+	Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2));					// V3'dot V4"
+	Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3));					// V3' dot V4^
+
+	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V3" dot V4^ - V3^ dot V4"
+	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V3^ dot V4' - V3' dot V4^
+	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V3' dot V4" - V3" dot V4'
+
+	tt = _L2;
+	Va = _mm_ror_ps(tt,1);		sum = _mm_mul_ps(Va,r1);
+	Vb = _mm_ror_ps(tt,2);		sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
+	Vc = _mm_ror_ps(tt,3);		sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
+
+	// Calculating the determinant.
+	Det = _mm_mul_ps(sum,_L1);
+	Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
+
+	const __m128 Sign_PNPN = _mm_load_ps((float *)_vmathPNPN);
+	const __m128 Sign_NPNP = _mm_load_ps((float *)_vmathNPNP);
+
+	__m128 mtL1 = _mm_xor_ps(sum,Sign_PNPN);
+
+	// Calculating the minterms of the second line (using previous results).
+	tt = _mm_ror_ps(_L1,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+	__m128 mtL2 = _mm_xor_ps(sum,Sign_NPNP);
+
+	// Testing the determinant.
+	Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
+
+	// Calculating the minterms of the third line.
+	tt = _mm_ror_ps(_L1,1);
+	Va = _mm_mul_ps(tt,Vb);									// V1' dot V2"
+	Vb = _mm_mul_ps(tt,Vc);									// V1' dot V2^
+	Vc = _mm_mul_ps(tt,_L2);								// V1' dot V2
+
+	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V1" dot V2^ - V1^ dot V2"
+	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V1^ dot V2' - V1' dot V2^
+	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V1' dot V2" - V1" dot V2'
+
+	tt = _mm_ror_ps(_L4,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+	__m128 mtL3 = _mm_xor_ps(sum,Sign_PNPN);
+
+	// Dividing is FASTER than rcp_nr! (Because rcp_nr causes many register-memory RWs).
+	RDet = _mm_div_ss(_mm_load_ss((float *)&_vmathZERONE), Det); // TODO: just 1.0f?
+	RDet = _mm_shuffle_ps(RDet,RDet,0x00);
+
+	// Devide the first 12 minterms with the determinant.
+	mtL1 = _mm_mul_ps(mtL1, RDet);
+	mtL2 = _mm_mul_ps(mtL2, RDet);
+	mtL3 = _mm_mul_ps(mtL3, RDet);
+
+	// Calculate the minterms of the forth line and devide by the determinant.
+	tt = _mm_ror_ps(_L3,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+	__m128 mtL4 = _mm_xor_ps(sum,Sign_NPNP);
+	mtL4 = _mm_mul_ps(mtL4, RDet);
+
+	// Now we just have to transpose the minterms matrix.
+	trns0 = _mm_unpacklo_ps(mtL1,mtL2);
+	trns1 = _mm_unpacklo_ps(mtL3,mtL4);
+	trns2 = _mm_unpackhi_ps(mtL1,mtL2);
+	trns3 = _mm_unpackhi_ps(mtL3,mtL4);
+	_L1 = _mm_movelh_ps(trns0,trns1);
+	_L2 = _mm_movehl_ps(trns1,trns0);
+	_L3 = _mm_movelh_ps(trns2,trns3);
+	_L4 = _mm_movehl_ps(trns3,trns2);
+
+    return Matrix4(
+        Vector4( _L1 ),
+        Vector4( _L2 ),
+        Vector4( _L3 ),
+        Vector4( _L4 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix4 & mat )
+{
+	__m128 Va,Vb,Vc;
+	__m128 r1,r2,r3,tt,tt2;
+	__m128 sum,Det;
+
+	__m128 _L1 = mat.getCol0().get128();
+	__m128 _L2 = mat.getCol1().get128();
+	__m128 _L3 = mat.getCol2().get128();
+	__m128 _L4 = mat.getCol3().get128();
+	// Calculating the minterms for the first line.
+
+	// _mm_ror_ps is just a macro using _mm_shuffle_ps().
+	tt = _L4; tt2 = _mm_ror_ps(_L3,1); 
+	Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0));					// V3' dot V4
+	Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2));					// V3' dot V4"
+	Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3));					// V3' dot V4^
+
+	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V3" dot V4^ - V3^ dot V4"
+	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V3^ dot V4' - V3' dot V4^
+	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V3' dot V4" - V3" dot V4'
+
+	tt = _L2;
+	Va = _mm_ror_ps(tt,1);		sum = _mm_mul_ps(Va,r1);
+	Vb = _mm_ror_ps(tt,2);		sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
+	Vc = _mm_ror_ps(tt,3);		sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
+
+	// Calculating the determinant.
+	Det = _mm_mul_ps(sum,_L1);
+	Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
+
+	// Calculating the minterms of the second line (using previous results).
+	tt = _mm_ror_ps(_L1,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+
+	// Testing the determinant.
+	Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
+	return floatInVec(Det, 0);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const floatInVec &scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+    return floatInVec(scalar) * mat;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector4 &vec ) const
+{
+    return Vector4(
+		_mm_add_ps(
+			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
+			_mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))), _mm_mul_ps(mCol3.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(3,3,3,3)))))
+		);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector3 &vec ) const
+{
+    return Vector4(
+		_mm_add_ps(
+			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
+			_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))))
+		);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Point3 &pnt ) const
+{
+    return Vector4(
+		_mm_add_ps(
+			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(1,1,1,1)))),
+			_mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(2,2,2,2))), mCol3.get128()))
+		);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setTranslation( const Vector3 &translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, res1, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, res0, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4::yAxis( ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, res0, res1;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZYX( const Vector3 &radiansXYZ )
+{
+    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
+	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
+	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_mul( Z0, Y1 );
+    return Matrix4(
+        Vector4( vec_mul( Z0, Y0 ) ),
+        Vector4( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
+        Vector4( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    axis = unitVec.get128();
+    sincosf4( radians.get128(), &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
+    axisS = vec_mul( axis, s );
+    negAxisS = negatef4( axisS );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+	tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
+	tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
+    //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+	tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
+    //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+	tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
+	tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
+    tmp0 = vec_sel( tmp0, c, select_x );
+    tmp1 = vec_sel( tmp1, c, select_y );
+    tmp2 = vec_sel( tmp2, c, select_z );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    axis = vec_and( axis, _mm_load_ps( (float *)select_xyz ) );
+    tmp0 = vec_and( tmp0, _mm_load_ps( (float *)select_xyz ) );
+    tmp1 = vec_and( tmp1, _mm_load_ps( (float *)select_xyz ) );
+    tmp2 = vec_and( tmp2, _mm_load_ps( (float *)select_xyz ) );
+    return Matrix4(
+        Vector4( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
+        Vector4( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
+        Vector4( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const Quat &unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::scale( const Vector3 &scaleVec )
+{
+    __m128 zero = _mm_setzero_ps();
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    return Matrix4(
+        Vector4( vec_sel( zero, scaleVec.get128(), select_x ) ),
+        Vector4( vec_sel( zero, scaleVec.get128(), select_y ) ),
+        Vector4( vec_sel( zero, scaleVec.get128(), select_z ) ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, 1.0f );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::translation( const Vector3 &translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, 1.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    __m128 zero, col0, col1, col2, col3;
+    union { __m128 v; float s[4]; } tmp;
+    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
+    rangeInv = 1.0f / ( zNear - zFar );
+    zero = _mm_setzero_ps();
+    tmp.v = zero;
+    tmp.s[0] = f / aspect;
+    col0 = tmp.v;
+    tmp.v = zero;
+    tmp.s[1] = f;
+    col1 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = ( zNear + zFar ) * rangeInv;
+    tmp.s[3] = -1.0f;
+    col2 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
+    col3 = tmp.v;
+    return Matrix4(
+        Vector4( col0 ),
+        Vector4( col1 ),
+        Vector4( col2 ),
+        Vector4( col3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    __m128 lbf, rtn;
+    __m128 diff, sum, inv_diff;
+    __m128 diagonal, column, near2;
+    __m128 zero = _mm_setzero_ps();
+    union { __m128 v; float s[4]; } l, f, r, n, b, t; // TODO: Union?
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    near2 = vec_splat( n.v, 0 );
+    near2 = vec_add( near2, near2 );
+    diagonal = vec_mul( near2, inv_diff );
+    column = vec_mul( sum, inv_diff );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
+    return Matrix4(
+        Vector4( vec_sel( zero, diagonal, select_x ) ),
+        Vector4( vec_sel( zero, diagonal, select_y ) ),
+        Vector4( vec_sel( column, _mm_set1_ps(-1.0f), select_w ) ),
+        Vector4( vec_sel( zero, vec_mul( diagonal, vec_splat( f.v, 0 ) ), select_z ) )
+	);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    __m128 lbf, rtn;
+    __m128 diff, sum, inv_diff, neg_inv_diff;
+    __m128 diagonal, column;
+    __m128 zero = _mm_setzero_ps();
+    union { __m128 v; float s[4]; } l, f, r, n, b, t;
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    neg_inv_diff = negatef4( inv_diff );
+    diagonal = vec_add( inv_diff, inv_diff );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
+    column = vec_mul( sum, vec_sel( neg_inv_diff, inv_diff, select_z ) ); // TODO: no madds with zero
+    return Matrix4(
+        Vector4( vec_sel( zero, diagonal, select_x ) ),
+        Vector4( vec_sel( zero, diagonal, select_y ) ),
+        Vector4( vec_sel( zero, diagonal, select_z ) ),
+        Vector4( vec_sel( column, _mm_set1_ps(1.0f), select_w ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+    print( mat.getRow( 3 ) );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( const floatInVec &scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2, const Vector3 &_col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( const Matrix3 & tfrm, const Vector3 &translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( const Quat &unitQuat, const Vector3 &translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol0( const Vector3 &_col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol1( const Vector3 &_col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol2( const Vector3 &_col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol3( const Vector3 &_col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol( int col, const Vector3 &vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setRow( int row, const Vector4 &vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setElem( int col, int row, const floatInVec &val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 inverse( const Transform3 & tfrm )
+{
+    __m128 inv0, inv1, inv2, inv3;
+    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
+    __m128 xxxx, yyyy, zzzz;
+    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
+    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
+    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    xxxx = vec_splat( inv3, 0 );
+    //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
+	inv1 = vec_sel(inv1, tmp1, select_y);
+    //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+	inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
+	inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_mul( inv0, xxxx );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    inv0 = vec_mul( inv0, invdet );
+    inv1 = vec_mul( inv1, invdet );
+    inv2 = vec_mul( inv2, invdet );
+    inv3 = vec_mul( inv3, invdet );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    __m128 inv0, inv1, inv2, inv3;
+    __m128 tmp0, tmp1;
+    __m128 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
+    tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() );
+    xxxx = vec_splat( inv3, 0 );
+    //inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	inv1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
+	inv1 = vec_sel(inv1, tfrm.getCol1().get128(), select_y);
+    //inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+	inv2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
+	inv2 = vec_sel(inv2, vec_splat(tfrm.getCol1().get128(), 2), select_y);
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_mul( inv0, xxxx );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator *( const Vector3 &vec ) const
+{
+    __m128 res;
+    __m128 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_mul( mCol0.get128(), xxxx );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 Transform3::operator *( const Point3 &pnt ) const
+{
+    __m128 tmp0, tmp1, res;
+    __m128 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( pnt.get128(), 0 );
+    yyyy = vec_splat( pnt.get128(), 1 );
+    zzzz = vec_splat( pnt.get128(), 2 );
+    tmp0 = vec_mul( mCol0.get128(), xxxx );
+    tmp1 = vec_mul( mCol1.get128(), yyyy );
+    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = vec_add( mCol3.get128(), tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    return Point3( res );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setTranslation( const Vector3 &translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, res1, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 ),
+        Vector3( _mm_setzero_ps() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, res0, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 ),
+        Vector3( 0.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, res0, res1;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+    __m128 zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZYX( const Vector3 &radiansXYZ )
+{
+    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
+	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
+	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_mul( Z0, Y1 );
+    return Transform3(
+        Vector3( vec_mul( Z0, Y0 ) ),
+        Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
+        Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
+        Vector3( 0.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( const Quat &unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::scale( const Vector3 &scaleVec )
+{
+    __m128 zero = _mm_setzero_ps();
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    return Transform3(
+        Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_z ) ),
+        Vector3( 0.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::translation( const Vector3 &translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm )
+{
+    print( tfrm.getRow( 0 ) );
+    print( tfrm.getRow( 1 ) );
+    print( tfrm.getRow( 2 ) );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const Matrix3 & tfrm )
+{
+    __m128 res;
+    __m128 col0, col1, col2;
+    __m128 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
+    __m128 zy_xz_yx, yz_zx_xy, sum, diff;
+    __m128 radicand, invSqrt, scale;
+    __m128 res0, res1, res2, res3;
+    __m128 xx, yy, zz;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
+
+    col0 = tfrm.getCol0().get128();
+    col1 = tfrm.getCol1().get128();
+    col2 = tfrm.getCol2().get128();
+
+    /* four cases: */
+    /* trace > 0 */
+    /* else */
+    /*    xx largest diagonal element */
+    /*    yy largest diagonal element */
+    /*    zz largest diagonal element */
+
+    /* compute quaternion for each case */
+
+    xx_yy = vec_sel( col0, col1, select_y );
+    //xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
+    //yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
+    //zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
+    xx_yy_zz_xx = _mm_shuffle_ps( xx_yy, xx_yy, _MM_SHUFFLE(0,0,1,0) );
+    xx_yy_zz_xx = vec_sel( xx_yy_zz_xx, col2, select_z ); // TODO: Ck
+    yy_zz_xx_yy = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(1,0,2,1) );
+    zz_xx_yy_zz = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(2,1,0,2) );
+
+    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) );
+ //   invSqrt = rsqrtf4( radicand );
+	invSqrt = newtonrapson_rsqrt4( radicand );
+
+	
+
+    zy_xz_yx = vec_sel( col0, col1, select_z );									// zy_xz_yx = 00 01 12 03
+    //zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
+	zy_xz_yx = _mm_shuffle_ps( zy_xz_yx, zy_xz_yx, _MM_SHUFFLE(0,1,2,2) );		// zy_xz_yx = 12 12 01 00
+    zy_xz_yx = vec_sel( zy_xz_yx, vec_splat(col2, 0), select_y );				// zy_xz_yx = 12 20 01 00
+    yz_zx_xy = vec_sel( col0, col1, select_x );									// yz_zx_xy = 10 01 02 03
+    //yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
+	yz_zx_xy = _mm_shuffle_ps( yz_zx_xy, yz_zx_xy, _MM_SHUFFLE(0,0,2,0) );		// yz_zx_xy = 10 02 10 10
+	yz_zx_xy = vec_sel( yz_zx_xy, vec_splat(col2, 1), select_x );				// yz_zx_xy = 21 02 10 10
+
+    sum = vec_add( zy_xz_yx, yz_zx_xy );
+    diff = vec_sub( zy_xz_yx, yz_zx_xy );
+
+    scale = vec_mul( invSqrt, _mm_set1_ps(0.5f) );
+
+    //res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
+	res0 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,1,2,0) );
+	res0 = vec_sel( res0, vec_splat(diff, 0), select_w );  // TODO: Ck
+    //res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
+	res1 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,2) );
+	res1 = vec_sel( res1, vec_splat(diff, 1), select_w );  // TODO: Ck
+    //res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
+	res2 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,1) );
+	res2 = vec_sel( res2, vec_splat(diff, 2), select_w );  // TODO: Ck
+    res3 = diff;
+    res0 = vec_sel( res0, radicand, select_x );
+    res1 = vec_sel( res1, radicand, select_y );
+    res2 = vec_sel( res2, radicand, select_z );
+    res3 = vec_sel( res3, radicand, select_w );
+    res0 = vec_mul( res0, vec_splat( scale, 0 ) );
+    res1 = vec_mul( res1, vec_splat( scale, 1 ) );
+    res2 = vec_mul( res2, vec_splat( scale, 2 ) );
+    res3 = vec_mul( res3, vec_splat( scale, 3 ) );
+
+    /* determine case and select answer */
+
+    xx = vec_splat( col0, 0 );
+    yy = vec_splat( col1, 1 );
+    zz = vec_splat( col2, 2 );
+    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
+    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
+    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), _mm_setzero_ps() ) );
+    mVec128 = res;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat )
+{
+    __m128 tmp0, tmp1, mcol0, mcol1, mcol2, res;
+    __m128 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    xxxx = vec_splat( vec.get128(), 0 );
+    mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() );
+    //mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	mcol1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
+	mcol1 = vec_sel(mcol1, mat.getCol1().get128(), select_y);
+    //mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+	mcol2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
+	mcol2 = vec_sel(mcol2, vec_splat(mat.getCol1().get128(), 2), select_y);
+    yyyy = vec_splat( vec.get128(), 1 );
+    res = vec_mul( mcol0, xxxx );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_madd( mcol1, yyyy, res );
+    res = vec_madd( mcol2, zzzz, res );
+    return Vector3( res );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 crossMatrix( const Vector3 &vec )
+{
+    __m128 neg, res0, res1, res2;
+    neg = negatef4( vec.get128() );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    //res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX );
+	res0 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,2,2,0) );
+	res0 = vec_sel(res0, vec_splat(neg, 1), select_z);
+    //res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX );
+	res1 = vec_sel(vec_splat(vec.get128(), 0), vec_splat(neg, 2), select_x);
+    //res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX );
+	res2 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,1,1) );
+	res2 = vec_sel(res2, vec_splat(neg, 0), select_y);
+	VM_ATTRIBUTE_ALIGN16 unsigned int filter_x[4] = {0, 0xffffffff, 0xffffffff, 0xffffffff};
+	VM_ATTRIBUTE_ALIGN16 unsigned int filter_y[4] = {0xffffffff, 0, 0xffffffff, 0xffffffff};
+	VM_ATTRIBUTE_ALIGN16 unsigned int filter_z[4] = {0xffffffff, 0xffffffff, 0, 0xffffffff};
+    res0 = vec_and( res0, _mm_load_ps((float *)filter_x ) );
+    res1 = vec_and( res1, _mm_load_ps((float *)filter_y ) );
+    res2 = vec_and( res2, _mm_load_ps((float *)filter_z ) ); // TODO: Use selects?
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/src/bullet/vectormath/sse/quat_aos.h b/src/bullet/vectormath/sse/quat_aos.h
new file mode 100644
index 00000000..7eac59fe
--- /dev/null
+++ b/src/bullet/vectormath/sse/quat_aos.h
@@ -0,0 +1,579 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+VECTORMATH_FORCE_INLINE void Quat::set128(vec_float4 vec)
+{
+    mVec128 = vec;
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w )
+{
+	mVec128 = _mm_unpacklo_ps(
+		_mm_unpacklo_ps( _x.get128(), _z.get128() ),
+		_mm_unpacklo_ps( _y.get128(), _w.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const Vector3 &xyz, float _w )
+{
+    mVec128 = xyz.get128();
+    _vmathVfSetElement(mVec128, _w, 3);
+}
+
+
+
+VECTORMATH_FORCE_INLINE  Quat::Quat(const Quat& quat)
+{
+	mVec128 = quat.get128();
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( float _x, float _y, float _z, float _w )
+{
+	mVec128 = _mm_setr_ps(_x, _y, _z, _w);
+}
+
+
+
+
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const Vector3 &xyz, const floatInVec &_w )
+{
+    mVec128 = xyz.get128();
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const Vector4 &vec )
+{
+    mVec128 = vec.get128();
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::identity( )
+{
+    return Quat( _VECTORMATH_UNIT_0001 );
+}
+
+VECTORMATH_FORCE_INLINE const Quat lerp( float t, const Quat &quat0, const Quat &quat1 )
+{
+    return lerp( floatInVec(t), quat0, quat1 );
+}
+
+VECTORMATH_FORCE_INLINE const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 )
+{
+    return slerp( floatInVec(t), unitQuat0, unitQuat1 );
+}
+
+VECTORMATH_FORCE_INLINE const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 )
+{
+    Quat start;
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    __m128 selectMask;
+    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
+    selectMask = (__m128)vec_cmpgt( _mm_setzero_ps(), cosAngle );
+    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start = Quat( vec_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
+    selectMask = (__m128)vec_cmpgt( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = vec_sub( _mm_set1_ps(1.0f), tttt );
+    angles = vec_mergeh( _mm_set1_ps(1.0f), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, _mm_setzero_ps() );
+    sines = sinf4( angles );
+    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Quat( vec_madd( start.get128(), scale0, vec_mul( unitQuat1.get128(), scale1 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 )
+{
+    return squad( floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 );
+}
+
+VECTORMATH_FORCE_INLINE const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 )
+{
+    return slerp( ( ( floatInVec(2.0f) * t ) * ( floatInVec(1.0f) - t ) ), slerp( t, unitQuat0, unitQuat3 ), slerp( t, unitQuat1, unitQuat2 ) );
+}
+
+VECTORMATH_FORCE_INLINE __m128 Quat::get128( ) const
+{
+    return mVec128;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator =( const Quat &quat )
+{
+    mVec128 = quat.mVec128;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setXYZ( const Vector3 &vec )
+{
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+	mVec128 = vec_sel( vec.get128(), mVec128, sw );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setW( float _w )
+{
+    _vmathVfSetElement(mVec128, _w, 3);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setW( const floatInVec &_w )
+{
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::getW( ) const
+{
+    return floatInVec( mVec128, 3 );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE VecIdx Quat::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator +( const Quat &quat ) const
+{
+    return Quat( _mm_add_ps( mVec128, quat.mVec128 ) );
+}
+
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator -( const Quat &quat ) const
+{
+    return Quat( _mm_sub_ps( mVec128, quat.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator *( const floatInVec &scalar ) const
+{
+    return Quat( _mm_mul_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator +=( const Quat &quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator -=( const Quat &quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator /( const floatInVec &scalar ) const
+{
+    return Quat( _mm_div_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator /=( const floatInVec &scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator -( ) const
+{
+	return Quat(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat operator *( float scalar, const Quat &quat )
+{
+    return floatInVec(scalar) * quat;
+}
+
+VECTORMATH_FORCE_INLINE const Quat operator *( const floatInVec &scalar, const Quat &quat )
+{
+    return quat * scalar;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Quat &quat0, const Quat &quat1 )
+{
+    return floatInVec( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec norm( const Quat &quat )
+{
+    return floatInVec(  _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec length( const Quat &quat )
+{
+    return floatInVec(  _mm_sqrt_ps(_vmathVfDot4( quat.get128(), quat.get128() )), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const Quat normalize( const Quat &quat )
+{
+	vec_float4 dot =_vmathVfDot4( quat.get128(), quat.get128());
+    return Quat( _mm_mul_ps( quat.get128(), newtonrapson_rsqrt4( dot ) ) );
+}
+
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 )
+{
+    Vector3 crossVec;
+    __m128 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    cosAngleX2Plus2 = vec_madd( cosAngle, _mm_set1_ps(2.0f), _mm_set1_ps(2.0f) );
+    recipCosHalfAngleX2 = _mm_rsqrt_ps( cosAngleX2Plus2 );
+    cosHalfAngleX2 = vec_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
+    crossVec = cross( unitVec0, unitVec1 );
+    res = vec_mul( crossVec.get128(), recipCosHalfAngleX2 );
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( res, vec_mul( cosHalfAngleX2, _mm_set1_ps(0.5f) ), sw );
+    return Quat( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( vec_mul( unitVec.get128(), s ), c, sw );
+    return Quat( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( _mm_setzero_ps(), s, xsw );
+    res = vec_sel( res, c, wsw );
+    return Quat( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int ysw[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( _mm_setzero_ps(), s, ysw );
+    res = vec_sel( res, c, wsw );
+    return Quat( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0, 0, 0xffffffff, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( _mm_setzero_ps(), s, zsw );
+    res = vec_sel( res, c, wsw );
+    return Quat( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator *( const Quat &quat ) const
+{
+    __m128 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
+    __m128 product, l_wxyz, r_wxyz, xy, qw;
+    ldata = mVec128;
+    rdata = quat.mVec128;
+    tmp0 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,0,2,1) );
+    tmp1 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,1,0,2) );
+    tmp2 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,0,2,1) );
+    qv = vec_mul( vec_splat( ldata, 3 ), rdata );
+    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_mul( ldata, rdata );
+    l_wxyz = vec_sld( ldata, ldata, 12 );
+    r_wxyz = vec_sld( rdata, rdata, 12 );
+    qw = vec_nmsub( l_wxyz, r_wxyz, product );
+    xy = vec_madd( l_wxyz, r_wxyz, product );
+    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+    return Quat( vec_sel( qv, qw, sw ) );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator *=( const Quat &quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 rotate( const Quat &quat, const Vector3 &vec )
+{    __m128 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
+    qdata = quat.get128();
+    vdata = vec.get128();
+    tmp0 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,0,2,1) );
+    tmp1 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,1,0,2) );
+    tmp2 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,0,2,1) );
+    wwww = vec_splat( qdata, 3 );
+    qv = vec_mul( wwww, vdata );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_mul( qdata, vdata );
+    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
+    qw = vec_add( vec_sld( product, product, 8 ), qw );
+    tmp1 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,0,2,1) );
+    res = vec_mul( vec_splat( qw, 0 ), qdata );
+    res = vec_madd( wwww, qv, res );
+    res = vec_madd( tmp0, tmp1, res );
+    res = vec_nmsub( tmp2, tmp3, res );
+    return Vector3( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat conj( const Quat &quat )
+{
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0x80000000,0x80000000,0x80000000,0};
+    return Quat( vec_xor( quat.get128(), _mm_load_ps((float *)sw) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, bool select1 )
+{
+    return select( quat0, quat1, boolInVec(select1) );
+}
+
+//VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 )
+//{
+//    return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) );
+//}
+
+VECTORMATH_FORCE_INLINE void loadXYZW(Quat& quat, const float* fptr)
+{
+#ifdef USE_SSE3_LDDQU
+	quat = Quat(	SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128		);
+#else
+	SSEFloat fl;
+	fl.f[0] = fptr[0];
+	fl.f[1] = fptr[1];
+	fl.f[2] = fptr[2];
+	fl.f[3] = fptr[3];
+    quat = Quat(	fl.m128);
+#endif
+    
+
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZW(const Quat& quat, float* fptr)
+{
+	fptr[0] = quat.getX();
+	fptr[1] = quat.getY();
+	fptr[2] = quat.getZ();
+	fptr[3] = quat.getW();
+//    _mm_storeu_ps((float*)quat.get128(),fptr);
+}
+
+
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Quat &quat )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Quat &quat, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/src/bullet/vectormath/sse/vec_aos.h b/src/bullet/vectormath/sse/vec_aos.h
new file mode 100644
index 00000000..35aeeaf1
--- /dev/null
+++ b/src/bullet/vectormath/sse/vec_aos.h
@@ -0,0 +1,1455 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Constants
+// for permutes words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
+#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
+#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
+#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
+#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
+#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
+#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
+#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
+#define _VECTORMATH_UNIT_1000 _mm_setr_ps(1.0f,0.0f,0.0f,0.0f) // (__m128){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 _mm_setr_ps(0.0f,1.0f,0.0f,0.0f) // (__m128){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 _mm_setr_ps(0.0f,0.0f,1.0f,0.0f) // (__m128){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 _mm_setr_ps(0.0f,0.0f,0.0f,1.0f) // (__m128){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+//_VECTORMATH_SLERP_TOLF
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#define     _vmath_shufps(a, b, immx, immy, immz, immw) _mm_shuffle_ps(a, b, _MM_SHUFFLE(immw, immz, immy, immx))
+static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 )
+{
+	__m128 result = _mm_mul_ps( vec0, vec1);
+    return _mm_add_ps( vec_splat( result, 0 ), _mm_add_ps( vec_splat( result, 1 ), vec_splat( result, 2 ) ) );
+}
+
+static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 )
+{
+    __m128 result = _mm_mul_ps(vec0, vec1);
+	return _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(0,0,0,0)),
+			_mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(1,1,1,1)),
+			_mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(2,2,2,2)), _mm_shuffle_ps(result, result, _MM_SHUFFLE(3,3,3,3)))));
+}
+
+static VECTORMATH_FORCE_INLINE __m128 _vmathVfCross( __m128 vec0, __m128 vec1 )
+{
+    __m128 tmp0, tmp1, tmp2, tmp3, result;
+    tmp0 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,0,2,1) );
+    tmp1 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,1,0,2) );
+    tmp2 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,0,2,1) );
+    result = vec_mul( tmp0, tmp1 );
+    result = vec_nmsub( tmp2, tmp3, result );
+    return result;
+}
+/*
+static VECTORMATH_FORCE_INLINE vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v)
+{
+#if 0
+    vec_int4 bexp;
+    vec_uint4 mant, sign, hfloat;
+    vec_uint4 notZero, isInf;
+    const vec_uint4 hfloatInf = (vec_uint4)(0x00007c00u);
+    const vec_uint4 mergeMant = (vec_uint4)(0x000003ffu);
+    const vec_uint4 mergeSign = (vec_uint4)(0x00008000u);
+
+    sign = vec_sr((vec_uint4)v, (vec_uint4)16);
+    mant = vec_sr((vec_uint4)v, (vec_uint4)13);
+    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4)23), (vec_int4)0xff);
+
+    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4)112);
+    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4)142);
+
+    bexp = _mm_add_ps(bexp, (vec_int4)-112);
+    bexp = vec_sl(bexp, (vec_uint4)10);
+
+    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
+    hfloat = vec_sel((vec_uint4)(0), hfloat, notZero);
+    hfloat = vec_sel(hfloat, hfloatInf, isInf);
+    hfloat = vec_sel(hfloat, sign, mergeSign);
+
+    return hfloat;
+#else
+	assert(0);
+	return _mm_setzero_ps();
+#endif
+}
+
+static VECTORMATH_FORCE_INLINE vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v)
+{
+#if 0
+    vec_uint4 hfloat_u, hfloat_v;
+    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
+    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
+    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
+    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
+#else
+	assert(0);
+	return _mm_setzero_si128();
+#endif
+}
+*/
+
+static VECTORMATH_FORCE_INLINE __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot)
+{
+	SSEFloat s;
+	s.m128 = src;
+	SSEFloat d;
+	d.m128 = dst;
+	d.f[slot] = s.f[slot];
+	return d.m128;
+}
+
+#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
+
+static VECTORMATH_FORCE_INLINE __m128 _vmathVfSplatScalar(float scalar)
+{
+	return _mm_set1_ps(scalar);
+}
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+	
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+VECTORMATH_FORCE_INLINE VecIdx::operator floatInVec() const
+{
+    return floatInVec(ref, i);
+}
+
+VECTORMATH_FORCE_INLINE float VecIdx::getAsFloat() const
+#else
+VECTORMATH_FORCE_INLINE VecIdx::operator float() const
+#endif
+{
+    return ((float *)&ref)[i];
+}
+
+VECTORMATH_FORCE_INLINE float VecIdx::operator =( float scalar )
+{
+    _vmathVfSetElement(ref, scalar, i);
+    return scalar;
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator =( const floatInVec &scalar )
+{
+    ref = _vmathVfInsert(ref, scalar.get128(), i);
+    return scalar;
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator =( const VecIdx& scalar )
+{
+    return *this = floatInVec(scalar.ref, scalar.i);
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator *=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) * scalar;
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator /=( float scalar )
+{
+    return *this /= floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator /=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) / scalar;
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator +=( float scalar )
+{
+    return *this += floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator +=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) + scalar;
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator -=( float scalar )
+{
+    return *this -= floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator -=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) - scalar;
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3(const Vector3& vec)
+{
+    set128(vec.get128());
+}
+
+VECTORMATH_FORCE_INLINE void Vector3::set128(vec_float4 vec)
+{
+    mVec128 = vec;
+}
+
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( float _x, float _y, float _z )
+{
+    mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
+{
+	__m128 xz = _mm_unpacklo_ps( _x.get128(), _z.get128() );
+	mVec128 = _mm_unpacklo_ps( xz, _y.get128() );
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( const Point3 &pnt )
+{
+    mVec128 = pnt.get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::xAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_1000 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::yAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0100 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::zAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0010 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return lerp( floatInVec(t), vec0, vec1 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
+{
+    return slerp( floatInVec(t), unitVec0, unitVec1 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
+{
+    __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
+    angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
+    angles = _mm_unpacklo_ps( angles, oneMinusT );		// angles = 1, 1-t, t, 1-t
+    angles = _mm_mul_ps( angles, angle );
+    sines = sinf4( angles );
+    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Vector3( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE __m128 Vector3::get128( ) const
+{
+    return mVec128;
+}
+
+VECTORMATH_FORCE_INLINE void loadXYZ(Point3& vec, const float* fptr)
+{
+#ifdef USE_SSE3_LDDQU
+	vec = Point3(	SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 );
+#else
+	SSEFloat fl;
+	fl.f[0] = fptr[0];
+	fl.f[1] = fptr[1];
+	fl.f[2] = fptr[2];
+	fl.f[3] = fptr[3];
+    vec = Point3(	fl.m128);
+#endif //USE_SSE3_LDDQU
+	
+}
+
+
+
+VECTORMATH_FORCE_INLINE void loadXYZ(Vector3& vec, const float* fptr)
+{
+#ifdef USE_SSE3_LDDQU
+	vec = Vector3(	SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 );
+#else
+	SSEFloat fl;
+	fl.f[0] = fptr[0];
+	fl.f[1] = fptr[1];
+	fl.f[2] = fptr[2];
+	fl.f[3] = fptr[3];
+    vec = Vector3(	fl.m128);
+#endif //USE_SSE3_LDDQU
+	
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZ( const Vector3 &vec, __m128 * quad )
+{
+	__m128 dstVec = *quad;
+	VM_ATTRIBUTE_ALIGN16  unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
+	dstVec = vec_sel(vec.get128(), dstVec, sw);
+	*quad = dstVec;
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZ(const Point3& vec, float* fptr)
+{
+	fptr[0] = vec.getX();
+	fptr[1] = vec.getY();
+	fptr[2] = vec.getZ();
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZ(const Vector3& vec, float* fptr)
+{
+	fptr[0] = vec.getX();
+	fptr[1] = vec.getY();
+	fptr[2] = vec.getZ();
+}
+
+
+VECTORMATH_FORCE_INLINE void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads )
+{
+	const float *quads = (float *)threeQuads;
+    vec0 = Vector3(  _mm_load_ps(quads) );
+    vec1 = Vector3( _mm_loadu_ps(quads + 3) );
+    vec2 = Vector3( _mm_loadu_ps(quads + 6) );
+    vec3 = Vector3( _mm_loadu_ps(quads + 9) );
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads )
+{
+	__m128 xxxx = _mm_shuffle_ps( vec1.get128(), vec1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
+	__m128 zzzz = _mm_shuffle_ps( vec2.get128(), vec2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
+	VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
+	VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
+	threeQuads[0] = vec_sel( vec0.get128(), xxxx, xsw );
+    threeQuads[1] = _mm_shuffle_ps( vec1.get128(), vec2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
+    threeQuads[2] = vec_sel( _mm_shuffle_ps( vec3.get128(), vec3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
+}
+/*
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads )
+{
+	assert(0);
+#if 0
+    __m128 xyz0[3];
+    __m128 xyz1[3];
+    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
+    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+#endif
+}
+*/
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator =( const Vector3 &vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector3::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector3::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector3::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector3::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE VecIdx Vector3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector3::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator +( const Vector3 &vec ) const
+{
+    return Vector3( _mm_add_ps( mVec128, vec.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator -( const Vector3 &vec ) const
+{
+    return Vector3( _mm_sub_ps( mVec128, vec.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 Vector3::operator +( const Point3 &pnt ) const
+{
+    return Point3( _mm_add_ps( mVec128, pnt.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator *( const floatInVec &scalar ) const
+{
+    return Vector3( _mm_mul_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator +=( const Vector3 &vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator -=( const Vector3 &vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator /( const floatInVec &scalar ) const
+{
+    return Vector3( _mm_div_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator /=( const floatInVec &scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator -( ) const
+{
+	//return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
+
+	VM_ATTRIBUTE_ALIGN16 static const int array[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
+	__m128 NEG_MASK = SSEFloat(*(const vec_float4*)array).vf;
+	return Vector3(_mm_xor_ps(get128(),NEG_MASK));
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 operator *( float scalar, const Vector3 &vec )
+{
+    return floatInVec(scalar) * vec;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec )
+{
+    return vec * scalar;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_div_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 recipPerElem( const Vector3 &vec )
+{
+    return Vector3( _mm_rcp_ps( vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 absPerElem( const Vector3 &vec )
+{
+    return Vector3( fabsf4( vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+	__m128 vmask = toM128(0x7fffffff);
+	return Vector3( _mm_or_ps(
+		_mm_and_ps   ( vmask, vec0.get128() ),			// Value
+		_mm_andnot_ps( vmask, vec1.get128() ) ) );		// Signs
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_max_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector3 &vec )
+{
+    return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_min_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector3 &vec )
+{
+    return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector3 &vec )
+{
+    return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector3 &vec )
+{
+    return floatInVec(  _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec length( const Vector3 &vec )
+{
+    return floatInVec(  _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
+}
+
+
+VECTORMATH_FORCE_INLINE const Vector3 normalizeApprox( const Vector3 &vec )
+{
+    return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 normalize( const Vector3 &vec )
+{
+	return Vector3( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 )
+{
+    return select( vec0, vec1, boolInVec(select1) );
+}
+
+
+VECTORMATH_FORCE_INLINE  const Vector4 select(const Vector4& vec0, const Vector4& vec1, const boolInVec& select1)
+{
+    return Vector4(vec_sel(vec0.get128(), vec1.get128(), select1.get128()));
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Vector3 &vec )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Vector3 &vec, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+    mVec128 = _mm_setr_ps(_x, _y, _z, _w); 
+ }
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w )
+{
+	mVec128 = _mm_unpacklo_ps(
+		_mm_unpacklo_ps( _x.get128(), _z.get128() ),
+		_mm_unpacklo_ps( _y.get128(), _w.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &xyz, float _w )
+{
+    mVec128 = xyz.get128();
+    _vmathVfSetElement(mVec128, _w, 3);
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &xyz, const floatInVec &_w )
+{
+    mVec128 = xyz.get128();
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &vec )
+{
+    mVec128 = vec.get128();
+    mVec128 = _vmathVfInsert(mVec128, _mm_setzero_ps(), 3);
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const Point3 &pnt )
+{
+    mVec128 = pnt.get128();
+    mVec128 = _vmathVfInsert(mVec128, _mm_set1_ps(1.0f), 3);
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const Quat &quat )
+{
+    mVec128 = quat.get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::xAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_1000 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::yAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0100 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::zAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0010 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::wAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0001 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return lerp( floatInVec(t), vec0, vec1 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
+{
+    return slerp( floatInVec(t), unitVec0, unitVec1 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
+{
+    __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
+    __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
+    angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
+    angles = _mm_unpacklo_ps( angles, oneMinusT );		// angles = 1, 1-t, t, 1-t
+    angles = _mm_mul_ps( angles, angle );
+    sines = sinf4( angles );
+    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Vector4( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE __m128 Vector4::get128( ) const
+{
+    return mVec128;
+}
+/*
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads )
+{
+    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
+    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
+}
+*/
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator =( const Vector4 &vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setXYZ( const Vector3 &vec )
+{
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+	mVec128 = vec_sel( vec.get128(), mVec128, sw );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setW( float _w )
+{
+    _vmathVfSetElement(mVec128, _w, 3);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setW( const floatInVec &_w )
+{
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::getW( ) const
+{
+    return floatInVec( mVec128, 3 );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE VecIdx Vector4::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator +( const Vector4 &vec ) const
+{
+    return Vector4( _mm_add_ps( mVec128, vec.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator -( const Vector4 &vec ) const
+{
+    return Vector4( _mm_sub_ps( mVec128, vec.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator *( const floatInVec &scalar ) const
+{
+    return Vector4( _mm_mul_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator +=( const Vector4 &vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator -=( const Vector4 &vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator /( const floatInVec &scalar ) const
+{
+    return Vector4( _mm_div_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator /=( const floatInVec &scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator -( ) const
+{
+	return Vector4(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 operator *( float scalar, const Vector4 &vec )
+{
+    return floatInVec(scalar) * vec;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec )
+{
+    return vec * scalar;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_div_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 recipPerElem( const Vector4 &vec )
+{
+    return Vector4( _mm_rcp_ps( vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 absPerElem( const Vector4 &vec )
+{
+    return Vector4( fabsf4( vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+	__m128 vmask = toM128(0x7fffffff);
+	return Vector4( _mm_or_ps(
+		_mm_and_ps   ( vmask, vec0.get128() ),			// Value
+		_mm_andnot_ps( vmask, vec1.get128() ) ) );		// Signs
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_max_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector4 &vec )
+{
+    return floatInVec( _mm_max_ps(
+		_mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
+		_mm_max_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_min_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector4 &vec )
+{
+    return floatInVec( _mm_min_ps(
+		_mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
+		_mm_min_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector4 &vec )
+{
+    return floatInVec( _mm_add_ps(
+		_mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
+		_mm_add_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector4 &vec )
+{
+    return floatInVec(  _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec length( const Vector4 &vec )
+{
+    return floatInVec(  _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 normalizeApprox( const Vector4 &vec )
+{
+    return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 normalize( const Vector4 &vec )
+{
+    return Vector4( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 )
+{
+    return select( vec0, vec1, boolInVec(select1) );
+}
+
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Vector4 &vec )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Vector4 &vec, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+VECTORMATH_FORCE_INLINE Point3::Point3( float _x, float _y, float _z )
+{
+    mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
+}
+
+VECTORMATH_FORCE_INLINE Point3::Point3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
+{
+	mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _y.get128() );
+}
+
+VECTORMATH_FORCE_INLINE Point3::Point3( const Vector3 &vec )
+{
+    mVec128 = vec.get128();
+}
+
+VECTORMATH_FORCE_INLINE Point3::Point3( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+VECTORMATH_FORCE_INLINE Point3::Point3( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+VECTORMATH_FORCE_INLINE Point3::Point3( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+VECTORMATH_FORCE_INLINE const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return lerp( floatInVec(t), pnt0, pnt1 );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+VECTORMATH_FORCE_INLINE __m128 Point3::get128( ) const
+{
+    return mVec128;
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZ( const Point3 &pnt, __m128 * quad )
+{
+    __m128 dstVec = *quad;
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
+    dstVec = vec_sel(pnt.get128(), dstVec, sw);
+    *quad = dstVec;
+}
+
+VECTORMATH_FORCE_INLINE void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads )
+{
+	const float *quads = (float *)threeQuads;
+    pnt0 = Point3(  _mm_load_ps(quads) );
+    pnt1 = Point3( _mm_loadu_ps(quads + 3) );
+    pnt2 = Point3( _mm_loadu_ps(quads + 6) );
+    pnt3 = Point3( _mm_loadu_ps(quads + 9) );
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads )
+{
+	__m128 xxxx = _mm_shuffle_ps( pnt1.get128(), pnt1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
+	__m128 zzzz = _mm_shuffle_ps( pnt2.get128(), pnt2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
+	VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
+	VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
+	threeQuads[0] = vec_sel( pnt0.get128(), xxxx, xsw );
+    threeQuads[1] = _mm_shuffle_ps( pnt1.get128(), pnt2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
+    threeQuads[2] = vec_sel( _mm_shuffle_ps( pnt3.get128(), pnt3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
+}
+/*
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads )
+{
+#if 0
+    __m128 xyz0[3];
+    __m128 xyz1[3];
+    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
+    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+#else
+	assert(0);
+#endif
+}
+*/
+VECTORMATH_FORCE_INLINE Point3 & Point3::operator =( const Point3 &pnt )
+{
+    mVec128 = pnt.mVec128;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Point3::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Point3::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Point3::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Point3::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE VecIdx Point3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Point3::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Point3::operator -( const Point3 &pnt ) const
+{
+    return Vector3( _mm_sub_ps( mVec128, pnt.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 Point3::operator +( const Vector3 &vec ) const
+{
+    return Point3( _mm_add_ps( mVec128, vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 Point3::operator -( const Vector3 &vec ) const
+{
+    return Point3( _mm_sub_ps( mVec128, vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::operator +=( const Vector3 &vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::operator -=( const Vector3 &vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_mul_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_div_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 recipPerElem( const Point3 &pnt )
+{
+    return Point3( _mm_rcp_ps( pnt.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 absPerElem( const Point3 &pnt )
+{
+    return Point3( fabsf4( pnt.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+	__m128 vmask = toM128(0x7fffffff);
+	return Point3( _mm_or_ps(
+		_mm_and_ps   ( vmask, pnt0.get128() ),			// Value
+		_mm_andnot_ps( vmask, pnt1.get128() ) ) );		// Signs
+}
+
+VECTORMATH_FORCE_INLINE const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_max_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Point3 &pnt )
+{
+    return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_min_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Point3 &pnt )
+{
+    return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Point3 &pnt )
+{
+    return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, float scaleVal )
+{
+    return scale( pnt, floatInVec( scaleVal ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec )
+{
+    return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec distSqrFromOrigin( const Point3 &pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec distFromOrigin( const Point3 &pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 )
+{
+    return select( pnt0, pnt1, boolInVec(select1) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 )
+{
+    return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) );
+}
+
+
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Point3 &pnt )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Point3 &pnt, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/src/bullet/vectormath/sse/vecidx_aos.h b/src/bullet/vectormath/sse/vecidx_aos.h
new file mode 100644
index 00000000..8ba4b1d7
--- /dev/null
+++ b/src/bullet/vectormath/sse/vecidx_aos.h
@@ -0,0 +1,80 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VECIDX_AOS_H
+#define _VECTORMATH_VECIDX_AOS_H
+
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// VecIdx 
+// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
+// subscripting operator.
+//
+
+VM_ATTRIBUTE_ALIGNED_CLASS16 (class) VecIdx
+{
+private:
+   __m128 &ref;
+   int i;
+public:
+    inline VecIdx( __m128& vec, int idx ): ref(vec) { i = idx; }
+
+    // implicitly casts to float unless _VECTORMATH_NO_SCALAR_CAST defined
+    // in which case, implicitly casts to floatInVec, and one must call
+    // getAsFloat to convert to float.
+    //
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+    inline operator floatInVec() const;
+    inline float getAsFloat() const;
+#else
+    inline operator float() const;
+#endif
+
+    inline float operator =( float scalar );
+    inline floatInVec operator =( const floatInVec &scalar );
+    inline floatInVec operator =( const VecIdx& scalar );
+    inline floatInVec operator *=( float scalar );
+    inline floatInVec operator *=( const floatInVec &scalar );
+    inline floatInVec operator /=( float scalar );
+    inline floatInVec operator /=( const floatInVec &scalar );
+    inline floatInVec operator +=( float scalar );
+    inline floatInVec operator +=( const floatInVec &scalar );
+    inline floatInVec operator -=( float scalar );
+    inline floatInVec operator -=( const floatInVec &scalar );
+};
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/src/bullet/vectormath/sse/vectormath_aos.h b/src/bullet/vectormath/sse/vectormath_aos.h
new file mode 100644
index 00000000..c3a02be0
--- /dev/null
+++ b/src/bullet/vectormath/sse/vectormath_aos.h
@@ -0,0 +1,2547 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef _VECTORMATH_AOS_CPP_SSE_H
+#define _VECTORMATH_AOS_CPP_SSE_H
+
+#include <math.h>
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <assert.h>
+
+#define Vector3Ref Vector3&
+#define QuatRef	Quat&
+#define Matrix3Ref Matrix3&
+
+#if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400)
+	#define USE_SSE3_LDDQU
+
+	#define VM_ATTRIBUTE_ALIGNED_CLASS16(a) __declspec(align(16)) a
+	#define VM_ATTRIBUTE_ALIGN16 __declspec(align(16))
+	#define VECTORMATH_FORCE_INLINE __forceinline 
+#else
+	#define VM_ATTRIBUTE_ALIGNED_CLASS16(a) a __attribute__ ((aligned (16)))	
+	#define VM_ATTRIBUTE_ALIGN16 __attribute__ ((aligned (16)))	
+	#define VECTORMATH_FORCE_INLINE inline 
+	#ifdef __SSE3__
+		#define USE_SSE3_LDDQU
+	#endif //__SSE3__
+#endif//_WIN32
+
+
+#ifdef USE_SSE3_LDDQU
+#include <pmmintrin.h>//_mm_lddqu_si128
+#endif //USE_SSE3_LDDQU
+
+
+// TODO: Tidy
+typedef __m128 vec_float4;
+typedef __m128 vec_uint4;
+typedef __m128 vec_int4;
+typedef __m128i vec_uchar16;
+typedef __m128i vec_ushort8;
+
+#define vec_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
+
+#define _mm_ror_ps(vec,i)	\
+	(((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(i+3)%4,(unsigned char)(i+2)%4,(unsigned char)(i+1)%4,(unsigned char)(i+0)%4))) : (vec))
+#define _mm_rol_ps(vec,i)	\
+	(((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(7-i)%4,(unsigned char)(6-i)%4,(unsigned char)(5-i)%4,(unsigned char)(4-i)%4))) : (vec))
+
+#define vec_sld(vec,vec2,x) _mm_ror_ps(vec, ((x)/4))
+
+#define _mm_abs_ps(vec)		_mm_andnot_ps(_MASKSIGN_,vec)
+#define _mm_neg_ps(vec)		_mm_xor_ps(_MASKSIGN_,vec)
+
+#define vec_madd(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b) )
+
+union SSEFloat
+{
+	__m128i vi;
+	__m128 m128;
+	__m128 vf;
+	unsigned int	ui[4];
+	unsigned short s[8];
+	float f[4];
+	SSEFloat(__m128 v) : m128(v) {}
+    SSEFloat(__m128i v) : vi(v) {}
+	SSEFloat() {}//uninitialized
+};
+
+static VECTORMATH_FORCE_INLINE __m128 vec_sel(__m128 a, __m128 b, __m128 mask)
+{
+	return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a));
+}
+static VECTORMATH_FORCE_INLINE __m128 vec_sel(__m128 a, __m128 b, const unsigned int *_mask)
+{
+	return vec_sel(a, b, _mm_load_ps((float *)_mask));
+}
+static VECTORMATH_FORCE_INLINE __m128 vec_sel(__m128 a, __m128 b, unsigned int _mask)
+{
+	return vec_sel(a, b, _mm_set1_ps(*(float *)&_mask));
+}
+
+static VECTORMATH_FORCE_INLINE __m128 toM128(unsigned int x)
+{
+    return _mm_set1_ps( *(float *)&x );
+}
+
+static VECTORMATH_FORCE_INLINE __m128 fabsf4(__m128 x)
+{
+    return _mm_and_ps( x, toM128( 0x7fffffff ) );
+}
+/*
+union SSE64
+{
+	__m128 m128;
+	struct
+	{
+		__m64 m01;
+		__m64 m23;
+	} m64;
+};
+
+static VECTORMATH_FORCE_INLINE __m128 vec_cts(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	SSE64 sse64;
+	sse64.m64.m01 = _mm_cvttps_pi32(x);
+	sse64.m64.m23 = _mm_cvttps_pi32(_mm_ror_ps(x,2));
+	_mm_empty();
+    return sse64.m128;
+}
+
+static VECTORMATH_FORCE_INLINE __m128 vec_ctf(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	SSE64 sse64;
+	sse64.m128 = x;
+	__m128 result =_mm_movelh_ps(
+		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m01),
+		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m23));
+	_mm_empty();
+	return result;
+}
+*/
+static VECTORMATH_FORCE_INLINE __m128 vec_cts(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	__m128i result = _mm_cvtps_epi32(x);
+    return (__m128 &)result;
+}
+
+static VECTORMATH_FORCE_INLINE __m128 vec_ctf(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	return _mm_cvtepi32_ps((__m128i &)x);
+}
+
+#define vec_nmsub(a,b,c) _mm_sub_ps( c, _mm_mul_ps( a, b ) )
+#define vec_sub(a,b) _mm_sub_ps( a, b )
+#define vec_add(a,b) _mm_add_ps( a, b )
+#define vec_mul(a,b) _mm_mul_ps( a, b )
+#define vec_xor(a,b) _mm_xor_ps( a, b )
+#define vec_and(a,b) _mm_and_ps( a, b )
+#define vec_cmpeq(a,b) _mm_cmpeq_ps( a, b )
+#define vec_cmpgt(a,b) _mm_cmpgt_ps( a, b )
+
+#define vec_mergeh(a,b) _mm_unpacklo_ps( a, b )
+#define vec_mergel(a,b) _mm_unpackhi_ps( a, b )
+
+#define vec_andc(a,b) _mm_andnot_ps( b, a )
+
+#define sqrtf4(x) _mm_sqrt_ps( x )
+#define rsqrtf4(x) _mm_rsqrt_ps( x )
+#define recipf4(x) _mm_rcp_ps( x )
+#define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x )
+
+static VECTORMATH_FORCE_INLINE __m128 newtonrapson_rsqrt4( const __m128 v )
+{   
+#define _half4 _mm_setr_ps(.5f,.5f,.5f,.5f) 
+#define _three _mm_setr_ps(3.f,3.f,3.f,3.f)
+const __m128 approx = _mm_rsqrt_ps( v );   
+const __m128 muls = _mm_mul_ps(_mm_mul_ps(v, approx), approx);   
+return _mm_mul_ps(_mm_mul_ps(_half4, approx), _mm_sub_ps(_three, muls) );
+}
+
+static VECTORMATH_FORCE_INLINE __m128 acosf4(__m128 x)
+{
+    __m128 xabs = fabsf4(x);
+	__m128 select = _mm_cmplt_ps( x, _mm_setzero_ps() );
+    __m128 t1 = sqrtf4(vec_sub(_mm_set1_ps(1.0f), xabs));
+    
+    /* Instruction counts can be reduced if the polynomial was
+     * computed entirely from nested (dependent) fma's. However, 
+     * to reduce the number of pipeline stalls, the polygon is evaluated 
+     * in two halves (hi amd lo). 
+     */
+    __m128 xabs2 = _mm_mul_ps(xabs,  xabs);
+    __m128 xabs4 = _mm_mul_ps(xabs2, xabs2);
+    __m128 hi = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0012624911f),
+		xabs, _mm_set1_ps(0.0066700901f)),
+			xabs, _mm_set1_ps(-0.0170881256f)),
+				xabs, _mm_set1_ps( 0.0308918810f));
+    __m128 lo = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0501743046f),
+		xabs, _mm_set1_ps(0.0889789874f)),
+			xabs, _mm_set1_ps(-0.2145988016f)),
+				xabs, _mm_set1_ps( 1.5707963050f));
+    
+    __m128 result = vec_madd(hi, xabs4, lo);
+    
+    // Adjust the result if x is negactive.
+    return vec_sel(
+		vec_mul(t1, result),									// Positive
+		vec_nmsub(t1, result, _mm_set1_ps(3.1415926535898f)),	// Negative
+		select);
+}
+
+static VECTORMATH_FORCE_INLINE __m128 sinf4(vec_float4 x)
+{
+
+//
+// Common constants used to evaluate sinf4/cosf4/tanf4
+//
+#define _SINCOS_CC0  -0.0013602249f
+#define _SINCOS_CC1   0.0416566950f
+#define _SINCOS_CC2  -0.4999990225f
+#define _SINCOS_SC0  -0.0001950727f
+#define _SINCOS_SC1   0.0083320758f
+#define _SINCOS_SC2  -0.1666665247f
+
+#define _SINCOS_KC1  1.57079625129f
+#define _SINCOS_KC2  7.54978995489e-8f
+
+    vec_float4 xl,xl2,xl3,res;
+
+    // Range reduction using : xl = angle * TwoOverPi;
+    //  
+    xl = vec_mul(x, _mm_set1_ps(0.63661977236f));
+
+    // Find the quadrant the angle falls in
+    // using:  q = (int) (ceil(abs(xl))*sign(xl))
+    //
+    vec_int4 q = vec_cts(xl,0);
+
+    // Compute an offset based on the quadrant that the angle falls in
+    // 
+    vec_int4 offset = _mm_and_ps(q,toM128(0x3));
+
+    // Remainder in range [-pi/4..pi/4]
+    //
+    vec_float4 qf = vec_ctf(q,0);
+    xl  = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x));
+    
+    // Compute x^2 and x^3
+    //
+    xl2 = vec_mul(xl,xl);
+    xl3 = vec_mul(xl2,xl);
+    
+    // Compute both the sin and cos of the angles
+    // using a polynomial expression:
+    //   cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
+    //   sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
+    //
+    
+    vec_float4 cx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f));
+    vec_float4 sx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl);
+
+    // Use the cosine when the offset is odd and the sin
+    // when the offset is even
+    //
+    res = vec_sel(cx,sx,vec_cmpeq(vec_and(offset,
+                                          toM128(0x1)),
+										  _mm_setzero_ps()));
+
+    // Flip the sign of the result when (offset mod 4) = 1 or 2
+    //
+    return vec_sel(
+		vec_xor(toM128(0x80000000U), res),	// Negative
+		res,								// Positive
+		vec_cmpeq(vec_and(offset,toM128(0x2)),_mm_setzero_ps()));
+}
+
+static VECTORMATH_FORCE_INLINE void sincosf4(vec_float4 x, vec_float4* s, vec_float4* c)
+{
+    vec_float4 xl,xl2,xl3;
+    vec_int4   offsetSin, offsetCos;
+
+    // Range reduction using : xl = angle * TwoOverPi;
+    //  
+    xl = vec_mul(x, _mm_set1_ps(0.63661977236f));
+
+    // Find the quadrant the angle falls in
+    // using:  q = (int) (ceil(abs(xl))*sign(xl))
+    //
+    //vec_int4 q = vec_cts(vec_add(xl,vec_sel(_mm_set1_ps(0.5f),xl,(0x80000000))),0);
+    vec_int4 q = vec_cts(xl,0);
+     
+    // Compute the offset based on the quadrant that the angle falls in.
+    // Add 1 to the offset for the cosine. 
+    //
+    offsetSin = vec_and(q,toM128((int)0x3));
+	__m128i temp = _mm_add_epi32(_mm_set1_epi32(1),(__m128i &)offsetSin);
+	offsetCos = (__m128 &)temp;
+
+    // Remainder in range [-pi/4..pi/4]
+    //
+    vec_float4 qf = vec_ctf(q,0);
+    xl  = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x));
+    
+    // Compute x^2 and x^3
+    //
+    xl2 = vec_mul(xl,xl);
+    xl3 = vec_mul(xl2,xl);
+    
+    // Compute both the sin and cos of the angles
+    // using a polynomial expression:
+    //   cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
+    //   sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
+    //
+    vec_float4 cx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f));
+    vec_float4 sx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl);
+
+    // Use the cosine when the offset is odd and the sin
+    // when the offset is even
+    //
+    vec_uint4 sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,toM128(0x1)),_mm_setzero_ps());
+    vec_uint4 cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,toM128(0x1)),_mm_setzero_ps());    
+    *s = vec_sel(cx,sx,sinMask);
+    *c = vec_sel(cx,sx,cosMask);
+
+    // Flip the sign of the result when (offset mod 4) = 1 or 2
+    //
+    sinMask = vec_cmpeq(vec_and(offsetSin,toM128(0x2)),_mm_setzero_ps());
+    cosMask = vec_cmpeq(vec_and(offsetCos,toM128(0x2)),_mm_setzero_ps());
+    
+    *s = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*s),*s,sinMask);
+    *c = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*c),*c,cosMask);    
+}
+
+#include "vecidx_aos.h"
+#include "floatInVec.h"
+#include "boolInVec.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+    __m128 mVec128;
+
+	VECTORMATH_FORCE_INLINE void set128(vec_float4 vec);
+	 
+	 VECTORMATH_FORCE_INLINE  vec_float4& get128Ref();
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Vector3( ) { };
+
+	// Default copy constructor
+    // 
+	VECTORMATH_FORCE_INLINE Vector3(const Vector3& vec);
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    VECTORMATH_FORCE_INLINE Vector3( float x, float y, float z );
+
+    // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3( const floatInVec &x, const floatInVec &y, const floatInVec &z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector3( const Point3 &pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector3( float scalar );
+
+    // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector3( const floatInVec &scalar );
+
+    // Set vector float data in a 3-D vector
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector3( __m128 vf4 );
+
+    // Get vector float data from a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE __m128 get128( ) const;
+
+    // Assign one 3-D vector to another
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator =( const Vector3 &vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setX( float x );
+
+    // Set the y element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setY( float y );
+
+    // Set the z element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setZ( float z );
+
+    // Set the x element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setX( const floatInVec &x );
+
+    // Set the y element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setY( const floatInVec &y );
+
+    // Set the z element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setZ( const floatInVec &z );
+
+    // Get the x element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setElem( int idx, float value );
+
+    // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    VECTORMATH_FORCE_INLINE VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator +( const Vector3 &vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator -( const Vector3 &vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const Point3 operator +( const Point3 &pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator *( float scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator /( float scalar ) const;
+
+    // Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator *( const floatInVec &scalar ) const;
+
+    // Divide a 3-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator /( const floatInVec &scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator +=( const Vector3 &vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator -=( const Vector3 &vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator /=( const floatInVec &scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector3 zAxis( );
+
+};
+
+// Multiply a 3-D vector by a scalar
+// 
+VECTORMATH_FORCE_INLINE const Vector3 operator *( float scalar, const Vector3 &vec );
+
+// Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec );
+
+// Multiply two 3-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 recipPerElem( const Vector3 &vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+VECTORMATH_FORCE_INLINE const Vector3 absPerElem( const Vector3 &vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+VECTORMATH_FORCE_INLINE const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Maximum element of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector3 &vec );
+
+// Minimum element of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector3 &vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector3 &vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector3 &vec );
+
+// Compute the length of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec length( const Vector3 &vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 normalize( const Vector3 &vec );
+
+// Compute cross product of two 3-D vectors
+// 
+VECTORMATH_FORCE_INLINE const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Outer product of two 3-D vectors
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 outer( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// NOTE: 
+// Slower than column post-multiply.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 crossMatrix( const Vector3 &vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 );
+
+// Linear interpolation between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 );
+
+// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 );
+
+// Conditionally select between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 );
+
+// Store x, y, and z elements of 3-D vector in first three words of a quadword, preserving fourth word
+// 
+VECTORMATH_FORCE_INLINE void storeXYZ( const Vector3 &vec, __m128 * quad );
+
+// Load four three-float 3-D vectors, stored in three quadwords
+// 
+VECTORMATH_FORCE_INLINE void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads );
+
+// Store four 3-D vectors in three quadwords
+// 
+VECTORMATH_FORCE_INLINE void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads );
+
+// Store eight 3-D vectors as half-floats
+// 
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Vector3 &vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Vector3 &vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+    __m128 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Vector4( ) { };
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    VECTORMATH_FORCE_INLINE Vector4( float x, float y, float z, float w );
+
+    // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Vector4( const Vector3 &xyz, float w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4( const Vector3 &xyz, const floatInVec &w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( const Vector3 &vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( const Point3 &pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( const Quat &quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( float scalar );
+
+    // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( const floatInVec &scalar );
+
+    // Set vector float data in a 4-D vector
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( __m128 vf4 );
+
+    // Get vector float data from a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE __m128 get128( ) const;
+
+    // Assign one 4-D vector to another
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator =( const Vector4 &vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setXYZ( const Vector3 &vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setX( float x );
+
+    // Set the y element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setY( float y );
+
+    // Set the z element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setZ( float z );
+
+    // Set the w element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setW( float w );
+
+    // Set the x element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setX( const floatInVec &x );
+
+    // Set the y element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setY( const floatInVec &y );
+
+    // Set the z element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setZ( const floatInVec &z );
+
+    // Set the w element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setW( const floatInVec &w );
+
+    // Get the x element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setElem( int idx, float value );
+
+    // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    VECTORMATH_FORCE_INLINE VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator +( const Vector4 &vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator -( const Vector4 &vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator *( float scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator /( float scalar ) const;
+
+    // Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator *( const floatInVec &scalar ) const;
+
+    // Divide a 4-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator /( const floatInVec &scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator +=( const Vector4 &vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator -=( const Vector4 &vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator /=( const floatInVec &scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector4 wAxis( );
+
+};
+
+// Multiply a 4-D vector by a scalar
+// 
+VECTORMATH_FORCE_INLINE const Vector4 operator *( float scalar, const Vector4 &vec );
+
+// Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec );
+
+// Multiply two 4-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 recipPerElem( const Vector4 &vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+VECTORMATH_FORCE_INLINE const Vector4 absPerElem( const Vector4 &vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+VECTORMATH_FORCE_INLINE const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Maximum element of a 4-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector4 &vec );
+
+// Minimum element of a 4-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector4 &vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector4 &vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector4 &vec );
+
+// Compute the length of a 4-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec length( const Vector4 &vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 normalize( const Vector4 &vec );
+
+// Outer product of two 4-D vectors
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 outer( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 );
+
+// Linear interpolation between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 );
+
+// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 );
+
+// Conditionally select between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 );
+
+// Store four 4-D vectors as half-floats
+// 
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Vector4 &vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Vector4 &vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+    __m128 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Point3( ) { };
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    VECTORMATH_FORCE_INLINE Point3( float x, float y, float z );
+
+    // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Point3( const floatInVec &x, const floatInVec &y, const floatInVec &z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit VECTORMATH_FORCE_INLINE Point3( const Vector3 &vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Point3( float scalar );
+
+    // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Point3( const floatInVec &scalar );
+
+    // Set vector float data in a 3-D point
+    // 
+    explicit VECTORMATH_FORCE_INLINE Point3( __m128 vf4 );
+
+    // Get vector float data from a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE __m128 get128( ) const;
+
+    // Assign one 3-D point to another
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & operator =( const Point3 &pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setX( float x );
+
+    // Set the y element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setY( float y );
+
+    // Set the z element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setZ( float z );
+
+    // Set the x element of a 3-D point (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setX( const floatInVec &x );
+
+    // Set the y element of a 3-D point (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setY( const floatInVec &y );
+
+    // Set the z element of a 3-D point (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setZ( const floatInVec &z );
+
+    // Get the x element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setElem( int idx, float value );
+
+    // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    VECTORMATH_FORCE_INLINE VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator -( const Point3 &pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Point3 operator +( const Vector3 &vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const Point3 operator -( const Vector3 &vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & operator +=( const Vector3 &vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & operator -=( const Vector3 &vec );
+
+};
+
+// Multiply two 3-D points per element
+// 
+VECTORMATH_FORCE_INLINE const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+VECTORMATH_FORCE_INLINE const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+VECTORMATH_FORCE_INLINE const Point3 recipPerElem( const Point3 &pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+VECTORMATH_FORCE_INLINE const Point3 absPerElem( const Point3 &pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+VECTORMATH_FORCE_INLINE const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+VECTORMATH_FORCE_INLINE const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+VECTORMATH_FORCE_INLINE const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Maximum element of a 3-D point
+// 
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Point3 &pnt );
+
+// Minimum element of a 3-D point
+// 
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Point3 &pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Point3 &pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, float scaleVal );
+
+// Apply uniform scale to a 3-D point (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+VECTORMATH_FORCE_INLINE const floatInVec distSqrFromOrigin( const Point3 &pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+VECTORMATH_FORCE_INLINE const floatInVec distFromOrigin( const Point3 &pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+VECTORMATH_FORCE_INLINE const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+VECTORMATH_FORCE_INLINE const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 );
+
+// Linear interpolation between two 3-D points (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 );
+
+// Conditionally select between two 3-D points
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 );
+
+// Conditionally select between two 3-D points (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 );
+
+// Store x, y, and z elements of 3-D point in first three words of a quadword, preserving fourth word
+// 
+VECTORMATH_FORCE_INLINE void storeXYZ( const Point3 &pnt, __m128 * quad );
+
+// Load four three-float 3-D points, stored in three quadwords
+// 
+VECTORMATH_FORCE_INLINE void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads );
+
+// Store four 3-D points in three quadwords
+// 
+VECTORMATH_FORCE_INLINE void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads );
+
+// Store eight 3-D points as half-floats
+// 
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Point3 &pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Point3 &pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+    __m128 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Quat( ) { };
+
+	VECTORMATH_FORCE_INLINE  Quat(const Quat& quat);
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    VECTORMATH_FORCE_INLINE Quat( float x, float y, float z, float w );
+
+    // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Quat( const Vector3 &xyz, float w );
+
+    // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat( const Vector3 &xyz, const floatInVec &w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit VECTORMATH_FORCE_INLINE Quat( const Vector4 &vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit VECTORMATH_FORCE_INLINE Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Quat( float scalar );
+
+    // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Quat( const floatInVec &scalar );
+
+    // Set vector float data in a quaternion
+    // 
+    explicit VECTORMATH_FORCE_INLINE Quat( __m128 vf4 );
+
+    // Get vector float data from a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE __m128 get128( ) const;
+
+	// Set a quaterion from vector float data
+    //
+	VECTORMATH_FORCE_INLINE void set128(vec_float4 vec);
+
+    // Assign one quaternion to another
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator =( const Quat &quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setXYZ( const Vector3 &vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setX( float x );
+
+    // Set the y element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setY( float y );
+
+    // Set the z element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setZ( float z );
+
+    // Set the w element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setW( float w );
+
+    // Set the x element of a quaternion (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setX( const floatInVec &x );
+
+    // Set the y element of a quaternion (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setY( const floatInVec &y );
+
+    // Set the z element of a quaternion (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setZ( const floatInVec &z );
+
+    // Set the w element of a quaternion (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setW( const floatInVec &w );
+
+    // Get the x element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setElem( int idx, float value );
+
+    // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    VECTORMATH_FORCE_INLINE VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator +( const Quat &quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator -( const Quat &quat ) const;
+
+    // Multiply two quaternions
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator *( const Quat &quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator *( float scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator /( float scalar ) const;
+
+    // Multiply a quaternion by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator *( const floatInVec &scalar ) const;
+
+    // Divide a quaternion by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator /( const floatInVec &scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator +=( const Quat &quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator -=( const Quat &quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator *=( const Quat &quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator /=( const floatInVec &scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationX( float radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationY( float radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationZ( float radians );
+
+    // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationX( const floatInVec &radians );
+
+    // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationY( const floatInVec &radians );
+
+    // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationZ( const floatInVec &radians );
+
+};
+
+// Multiply a quaternion by a scalar
+// 
+VECTORMATH_FORCE_INLINE const Quat operator *( float scalar, const Quat &quat );
+
+// Multiply a quaternion by a scalar (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Quat operator *( const floatInVec &scalar, const Quat &quat );
+
+// Compute the conjugate of a quaternion
+// 
+VECTORMATH_FORCE_INLINE const Quat conj( const Quat &quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const Vector3 rotate( const Quat &unitQuat, const Vector3 &vec );
+
+// Compute the dot product of two quaternions
+// 
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Quat &quat0, const Quat &quat1 );
+
+// Compute the norm of a quaternion
+// 
+VECTORMATH_FORCE_INLINE const floatInVec norm( const Quat &quat );
+
+// Compute the length of a quaternion
+// 
+VECTORMATH_FORCE_INLINE const floatInVec length( const Quat &quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+VECTORMATH_FORCE_INLINE const Quat normalize( const Quat &quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Quat lerp( float t, const Quat &quat0, const Quat &quat1 );
+
+// Linear interpolation between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 );
+
+// Spherical linear interpolation between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+VECTORMATH_FORCE_INLINE const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 );
+
+// Spherical quadrangle interpolation (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 );
+
+// Conditionally select between two quaternions
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, bool select1 );
+
+// Conditionally select between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Quat &quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Quat &quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix3( const Quat &unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix3( float scalar );
+
+    // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix3( const floatInVec &scalar );
+
+    // Assign one 3x3 matrix to another
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setCol0( const Vector3 &col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setCol1( const Vector3 &col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setCol2( const Vector3 &col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setCol( int col, const Vector3 &vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setRow( int row, const Vector3 &vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setElem( int col, int row, float val );
+
+    // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setElem( int col, int row, const floatInVec &val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar ) const;
+
+    // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator *( const Vector3 &vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationX( float radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationY( float radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationZ( float radians );
+
+    // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationX( const floatInVec &radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationY( const floatInVec &radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationZ( const floatInVec &radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationZYX( const Vector3 &radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotation( const Quat &unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 scale( const Vector3 &scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+// Conditionally select between two 3x3 matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4( const Vector4 &col0, const Vector4 &col1, const Vector4 &col2, const Vector4 &col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4( const Matrix3 & mat, const Vector3 &translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4( const Quat &unitQuat, const Vector3 &translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix4( float scalar );
+
+    // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix4( const floatInVec &scalar );
+
+    // Assign one 4x4 matrix to another
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setTranslation( const Vector3 &translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setCol0( const Vector4 &col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setCol1( const Vector4 &col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setCol2( const Vector4 &col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setCol3( const Vector4 &col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setCol( int col, const Vector4 &vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setRow( int row, const Vector4 &vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setElem( int col, int row, float val );
+
+    // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setElem( int col, int row, const floatInVec &val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar ) const;
+
+    // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator *( const Vector4 &vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator *( const Vector3 &vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator *( const Point3 &pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationX( float radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationY( float radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationZ( float radians );
+
+    // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationX( const floatInVec &radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationY( const floatInVec &radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationZ( const floatInVec &radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationZYX( const Vector3 &radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotation( const Quat &unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 scale( const Vector3 &scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 translation( const Vector3 &translateVec );
+
+    // Construct viewing matrix based on eye, position looked at, and up direction
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+// Conditionally select between two 4x4 matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    VECTORMATH_FORCE_INLINE Transform3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2, const Vector3 &col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Transform3( const Matrix3 & tfrm, const Vector3 &translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Transform3( const Quat &unitQuat, const Vector3 &translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Transform3( float scalar );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Transform3( const floatInVec &scalar );
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setTranslation( const Vector3 &translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setCol0( const Vector3 &col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setCol1( const Vector3 &col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setCol2( const Vector3 &col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setCol3( const Vector3 &col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setCol( int col, const Vector3 &vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setRow( int row, const Vector4 &vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setElem( int col, int row, float val );
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setElem( int col, int row, const floatInVec &val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator *( const Vector3 &vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const Point3 operator *( const Point3 &pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    VECTORMATH_FORCE_INLINE const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationX( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationY( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationZ( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationX( const floatInVec &radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationY( const floatInVec &radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationZ( const floatInVec &radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationZYX( const Vector3 &radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotation( const Quat &unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 scale( const Vector3 &scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 translation( const Vector3 &translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+VECTORMATH_FORCE_INLINE const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+VECTORMATH_FORCE_INLINE const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/src/bullet/vectormath/vmInclude.h b/src/bullet/vectormath/vmInclude.h
new file mode 100644
index 00000000..a43152ce
--- /dev/null
+++ b/src/bullet/vectormath/vmInclude.h
@@ -0,0 +1,27 @@
+
+#ifndef __VM_INCLUDE_H
+#define __VM_INCLUDE_H
+
+#include "LinearMath/btScalar.h"
+
+#if defined (USE_SYSTEM_VECTORMATH) || defined (__CELLOS_LV2__)
+	#include <vectormath_aos.h>
+#else //(USE_SYSTEM_VECTORMATH)
+	#if defined (BT_USE_SSE) && defined (_WIN32)
+		#include "sse/vectormath_aos.h"
+	#else //all other platforms
+		#include "scalar/vectormath_aos.h"
+	#endif //(BT_USE_SSE) && defined (_WIN32)
+#endif //(USE_SYSTEM_VECTORMATH)
+
+
+
+typedef Vectormath::Aos::Vector3    vmVector3;
+typedef Vectormath::Aos::Quat       vmQuat;
+typedef Vectormath::Aos::Matrix3    vmMatrix3;
+typedef Vectormath::Aos::Transform3 vmTransform3;
+typedef Vectormath::Aos::Point3     vmPoint3;
+
+#endif //__VM_INCLUDE_H
+
+