diff -r 000000000000 -r 2f259fa3e83a ode/src/quickstep.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ode/src/quickstep.cpp Tue Feb 02 01:00:49 2010 +0200 @@ -0,0 +1,827 @@ +/************************************************************************* + * * + * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. * + * All rights reserved. Email: russ@q12.org Web: www.q12.org * + * * + * This library is free software; you can redistribute it and/or * + * modify it under the terms of EITHER: * + * (1) The GNU Lesser General Public License as published by the Free * + * Software Foundation; either version 2.1 of the License, or (at * + * your option) any later version. The text of the GNU Lesser * + * General Public License is included with this library in the * + * file LICENSE.TXT. * + * (2) The BSD-style license that is included with this library in * + * the file LICENSE-BSD.TXT. * + * * + * This library is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * + * LICENSE.TXT and LICENSE-BSD.TXT for more details. * + * * + *************************************************************************/ + +#include "object.h" +#include "joint.h" +#include +#include +#include +#include +#include +#include +#include +#include "lcp.h" +#include "util.h" + +//#define ALLOCA dALLOCA16 + +typedef const dReal *dRealPtr; +typedef dReal *dRealMutablePtr; +#define dRealArray(name,n) dReal name[n]; +#define dRealAllocaArray(name,n) dReal *name = (dReal*) malloc ((n)*sizeof(dReal)); + +//*************************************************************************** +// configuration + +// for the SOR method: +// uncomment the following line to randomly reorder constraint rows +// during the solution. depending on the situation, this can help a lot +// or hardly at all, but it doesn't seem to hurt. + +#define RANDOMLY_REORDER_CONSTRAINTS 1 + +//**************************************************************************** +// special matrix multipliers + +// multiply block of B matrix (q x 6) with 12 dReal per row with C vektor (q) +static void Multiply1_12q1 (dReal *A, dReal *B, dReal *C, int q) +{ + int k; + dReal sum; + + sum = 0; + for (k=0; kinvMass; + for (j=0; j<3; j++) iMJ_ptr[j] = dMUL(k,J_ptr[j]); + dMULTIPLY0_331 (iMJ_ptr + 3, invI + 12*b1, J_ptr + 3); + if (b2 >= 0) { + k = body[b2]->invMass; + for (j=0; j<3; j++) iMJ_ptr[j+6] = dMUL(k,J_ptr[j+6]); + dMULTIPLY0_331 (iMJ_ptr + 9, invI + 12*b2, J_ptr + 9); + } + J_ptr += 12; + iMJ_ptr += 12; + } +} + +// compute out = J*in. + +static void multiply_J (int m, dRealMutablePtr J, int *jb, + dRealMutablePtr in, dRealMutablePtr out) +{ + int i,j; + dRealPtr J_ptr = J; + for (i=0; i= 0) { + in_ptr = in + b2*6; + for (j=0; j<6; j++) sum += dMUL(J_ptr[j],in_ptr[j]); + } + J_ptr += 6; + out[i] = sum; + } +} + + +//*************************************************************************** +// SOR-LCP method + +// nb is the number of bodies in the body array. +// J is an m*12 matrix of constraint rows +// jb is an array of first and second body numbers for each constraint row +// invI is the global frame inverse inertia for each body (stacked 3x3 matrices) +// +// this returns lambda and fc (the constraint force). +// note: fc is returned as inv(M)*J'*lambda, the constraint force is actually J'*lambda +// +// b, lo and hi are modified on exit + + +struct IndexError { + dReal error; // error to sort on + int findex; + int index; // row index +}; + + +static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, dxBody * const *body, + dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr fc, dRealMutablePtr b, + dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex, + dxQuickStepParameters *qs) +{ + const int num_iterations = qs->num_iterations; + const dReal sor_w = qs->w; // SOR over-relaxation parameter + + int i,j; + + dSetZero (lambda,m); + + // a copy of the 'hi' vector in case findex[] is being used + dRealAllocaArray (hicopy,m); + if(hicopy == NULL) { + return; + } + memcpy (hicopy,hi,m*sizeof(dReal)); + + // precompute iMJ = inv(M)*J' + dRealAllocaArray (iMJ,m*12); + if(iMJ == NULL) { + free(hicopy); + return; + } + compute_invM_JT (m,J,iMJ,jb,body,invI); + + // compute fc=(inv(M)*J')*lambda. we will incrementally maintain fc + // as we change lambda. + + dSetZero (fc,nb*6); + + // precompute 1 / diagonals of A + dRealAllocaArray (Ad,m); + if(Ad == NULL) { + free(hicopy); + free(iMJ); + return; + } + dRealPtr iMJ_ptr = iMJ; + dRealMutablePtr J_ptr = J; + for (i=0; i= 0) { + for (j=6; j<12; j++) sum += dMUL(iMJ_ptr[j],J_ptr[j]); + } + iMJ_ptr += 12; + J_ptr += 12; + Ad[i] = dDIV(sor_w,(sum + cfm[i])); + } + + // scale J and b by Ad + J_ptr = J; + for (i=0; i= 0) { + hi[index] = dFabs (dMUL(hicopy[index],lambda[findex[index]])); + lo[index] = -hi[index]; + } + + int b1 = jb[index*2]; + int b2 = jb[index*2+1]; + dReal delta = b[index] - dMUL(lambda[index],Ad[index]); + dRealMutablePtr fc_ptr = fc + 6*b1; + + // @@@ potential optimization: SIMD-ize this and the b2 >= 0 case + delta -=dMUL(fc_ptr[0],J_ptr[0]) + dMUL(fc_ptr[1],J_ptr[1]) + + dMUL(fc_ptr[2],J_ptr[2]) + dMUL(fc_ptr[3],J_ptr[3]) + + dMUL(fc_ptr[4],J_ptr[4]) + dMUL(fc_ptr[5],J_ptr[5]); + // @@@ potential optimization: handle 1-body constraints in a separate + // loop to avoid the cost of test & jump? + if (b2 >= 0) { + fc_ptr = fc + 6*b2; + delta -=dMUL(fc_ptr[0],J_ptr[6]) + dMUL(fc_ptr[1],J_ptr[7]) + + dMUL(fc_ptr[2],J_ptr[8]) + dMUL(fc_ptr[3],J_ptr[9]) + + dMUL(fc_ptr[4],J_ptr[10]) + dMUL(fc_ptr[5],J_ptr[11]); + } + + // compute lambda and clamp it to [lo,hi]. + // @@@ potential optimization: does SSE have clamping instructions + // to save test+jump penalties here? + dReal new_lambda = lambda[index] + delta; + if (new_lambda < lo[index]) { + delta = lo[index]-lambda[index]; + lambda[index] = lo[index]; + } + else if (new_lambda > hi[index]) { + delta = hi[index]-lambda[index]; + lambda[index] = hi[index]; + } + else { + lambda[index] = new_lambda; + } + + //@@@ a trick that may or may not help + //dReal ramp = (1-((dReal)(iteration+1)/(dReal)num_iterations)); + //delta *= ramp; + + // update fc. + // @@@ potential optimization: SIMD for this and the b2 >= 0 case + fc_ptr = fc + 6*b1; + fc_ptr[0] += dMUL(delta,iMJ_ptr[0]); + fc_ptr[1] += dMUL(delta,iMJ_ptr[1]); + fc_ptr[2] += dMUL(delta,iMJ_ptr[2]); + fc_ptr[3] += dMUL(delta,iMJ_ptr[3]); + fc_ptr[4] += dMUL(delta,iMJ_ptr[4]); + fc_ptr[5] += dMUL(delta,iMJ_ptr[5]); + // @@@ potential optimization: handle 1-body constraints in a separate + // loop to avoid the cost of test & jump? + if (b2 >= 0) { + fc_ptr = fc + 6*b2; + fc_ptr[0] += dMUL(delta,iMJ_ptr[6]); + fc_ptr[1] += dMUL(delta,iMJ_ptr[7]); + fc_ptr[2] += dMUL(delta,iMJ_ptr[8]); + fc_ptr[3] += dMUL(delta,iMJ_ptr[9]); + fc_ptr[4] += dMUL(delta,iMJ_ptr[10]); + fc_ptr[5] += dMUL(delta,iMJ_ptr[11]); + } + } + } + + free(hicopy); + free(iMJ); + free(Ad); + free(order); + +} + + +void dxQuickStepper (dxWorld *world, dxBody * const *body, int nb, + dxJoint * const *_joint, int nj, dReal stepsize) +{ + int i,j; + IFTIMING(dTimerStart("preprocessing");) + + dReal stepsize1 = dRecip(stepsize); + + // number all bodies in the body list - set their tag values + for (i=0; itag = i; + + // make a local copy of the joint array, because we might want to modify it. + // (the "dxJoint *const*" declaration says we're allowed to modify the joints + // but not the joint array, because the caller might need it unchanged). + //@@@ do we really need to do this? we'll be sorting constraint rows individually, not joints + dxJoint **joint = (dxJoint**) malloc (nj * sizeof(dxJoint*)); + if(joint == NULL) { + return; + } + memcpy (joint,_joint,nj * sizeof(dxJoint*)); + + // for all bodies, compute the inertia tensor and its inverse in the global + // frame, and compute the rotational force and add it to the torque + // accumulator. I and invI are a vertical stack of 3x4 matrices, one per body. + //dRealAllocaArray (I,3*4*nb); // need to remember all I's for feedback purposes only + dRealAllocaArray (invI,3*4*nb); + if( invI == NULL) { + free(joint); + return; + } + for (i=0; iinvI,body[i]->posr.R); + dMULTIPLY0_333 (invI+i*12,body[i]->posr.R,tmp); + } + + // add the gravity force to all bodies + for (i=0; iflags & dxBodyNoGravity)==0) { + body[i]->facc[0] += dMUL( body[i]->mass.mass,world->gravity[0]); + body[i]->facc[1] += dMUL(body[i]->mass.mass,world->gravity[1]); + body[i]->facc[2] += dMUL(body[i]->mass.mass,world->gravity[2]); + } + } + + // get joint information (m = total constraint dimension, nub = number of unbounded variables). + // joints with m=0 are inactive and are removed from the joints array + // entirely, so that the code that follows does not consider them. + //@@@ do we really need to save all the info1's + dxJoint::Info1 *info = (dxJoint::Info1*) malloc (nj*sizeof(dxJoint::Info1)); + if( info == NULL) { + free(joint); + free(invI); + return; + } + for (i=0, j=0; jvtable->getInfo1 (joint[j],info+i); + + if (info[i].m > 0) { + joint[i] = joint[j]; + i++; + } + } + nj = i; + + // create the row offset array + int m = 0; + int *ofs = (int*) malloc (nj*sizeof(int)); + if( ofs == NULL) { + free(joint); + free(invI); + free(info); + return; + } + for (i=0; i 0) { + // create a constraint equation right hand side vector `c', a constraint + // force mixing vector `cfm', and LCP low and high bound vectors, and an + // 'findex' vector. + dRealAllocaArray (c,m); + if( c == NULL) { + free(joint); + free(invI); + free(info); + free(ofs); + free(J); + free(jb); + return; + } + dRealAllocaArray (cfm,m); + if( cfm == NULL) { + free(joint); + free(invI); + free(info); + free(ofs); + free(J); + free(jb); + free(c); + return; + } + dRealAllocaArray (lo,m); + if( lo == NULL) { + free(joint); + free(invI); + free(info); + free(ofs); + free(J); + free(jb); + free(c); + free(cfm); + return; + } + dRealAllocaArray (hi,m); + if( hi == NULL) { + free(joint); + free(invI); + free(info); + free(ofs); + free(J); + free(jb); + free(c); + free(cfm); + free(lo); + return; + } + int *findex = (int*) malloc (m*sizeof(int)); + if( findex == NULL) { + free(joint); + free(invI); + free(info); + free(ofs); + free(J); + free(jb); + free(c); + free(cfm); + free(lo); + free(hi); + return; + } + dSetZero (c,m); + dSetValue (cfm,m,world->global_cfm); + dSetValue (lo,m,-dInfinity); + dSetValue (hi,m, dInfinity); + for (i=0; iglobal_erp; + int mfb = 0; // number of rows of Jacobian we will have to save for joint feedback + for (i=0; ivtable->getInfo2 (joint[i],&Jinfo); + // adjust returned findex values for global index numbering + for (j=0; j= 0) findex[ofs[i] + j] += ofs[i]; + } + if (joint[i]->feedback) + mfb += info[i].m; + } + + // we need a copy of Jacobian for joint feedbacks + // because it gets destroyed by SOR solver + // instead of saving all Jacobian, we can save just rows + // for joints, that requested feedback (which is normaly much less) + dRealAllocaArray (Jcopy,mfb*12); + if( Jcopy == NULL) { + free(joint); + free(invI); + free(info); + free(ofs); + free(J); + free(jb); + free(c); + free(cfm); + free(lo); + free(hi); + free(findex); + return; + } + if (mfb > 0) { + mfb = 0; + for (i=0; ifeedback) { + memcpy(Jcopy+mfb*12, J+ofs[i]*12, info[i].m*12*sizeof(dReal)); + mfb += info[i].m; + } + } + + + // create an array of body numbers for each joint row + int *jb_ptr = jb; + for (i=0; inode[0].body) ? (joint[i]->node[0].body->tag) : -1; + int b2 = (joint[i]->node[1].body) ? (joint[i]->node[1].body->tag) : -1; + for (j=0; jinvMass; + for (j=0; j<3; j++) tmp1[i*6+j] = dMUL(body[i]->facc[j],body_invMass) + dMUL(body[i]->lvel[j],stepsize1); + dMULTIPLY0_331 (tmp1 + i*6 + 3,invI + i*12,body[i]->tacc); + for (j=0; j<3; j++) tmp1[i*6+3+j] += dMUL(body[i]->avel[j],stepsize1); + } + + // put J*tmp1 into rhs + dRealAllocaArray (rhs,m); + if( rhs == NULL) { + free(joint); + free(invI); + free(info); + free(ofs); + free(J); + free(jb); + free(c); + free(cfm); + free(lo); + free(hi); + free(findex); + free(Jcopy); + free(tmp1); + return; + } + multiply_J (m,J,jb,tmp1,rhs); + + // complete rhs + for (i=0; iqs); + + + // note that the SOR method overwrites rhs and J at this point, so + // they should not be used again. + + // add stepsize * cforce to the body velocity + for (i=0; ilvel[j] += dMUL(stepsize,cforce[i*6+j]); + for (j=0; j<3; j++) body[i]->avel[j] += dMUL(stepsize,cforce[i*6+3+j]); + } + + // if joint feedback is requested, compute the constraint force. + // BUT: cforce is inv(M)*J'*lambda, whereas we want just J'*lambda, + // so we must compute M*cforce. + // @@@ if any joint has a feedback request we compute the entire + // adjusted cforce, which is not the most efficient way to do it. + /*for (j=0; jfeedback) { + // compute adjusted cforce + for (i=0; imass.mass; + cforce [i*6+0] *= k; + cforce [i*6+1] *= k; + cforce [i*6+2] *= k; + dVector3 tmp; + dMULTIPLY0_331 (tmp, I + 12*i, cforce + i*6 + 3); + cforce [i*6+3] = tmp[0]; + cforce [i*6+4] = tmp[1]; + cforce [i*6+5] = tmp[2]; + } + // compute feedback for this and all remaining joints + for (; jfeedback; + if (fb) { + int b1 = joint[j]->node[0].body->tag; + memcpy (fb->f1,cforce+b1*6,3*sizeof(dReal)); + memcpy (fb->t1,cforce+b1*6+3,3*sizeof(dReal)); + if (joint[j]->node[1].body) { + int b2 = joint[j]->node[1].body->tag; + memcpy (fb->f2,cforce+b2*6,3*sizeof(dReal)); + memcpy (fb->t2,cforce+b2*6+3,3*sizeof(dReal)); + } + } + } + } + }*/ + + if (mfb > 0) { + // straightforward computation of joint constraint forces: + // multiply related lambdas with respective J' block for joints + // where feedback was requested + mfb = 0; + for (i=0; ifeedback) { + dJointFeedback *fb = joint[i]->feedback; + dReal data[6]; + Multiply1_12q1 (data, Jcopy+mfb*12, lambda+ofs[i], info[i].m); + fb->f1[0] = data[0]; + fb->f1[1] = data[1]; + fb->f1[2] = data[2]; + fb->t1[0] = data[3]; + fb->t1[1] = data[4]; + fb->t1[2] = data[5]; + if (joint[i]->node[1].body) + { + Multiply1_12q1 (data, Jcopy+mfb*12+6, lambda+ofs[i], info[i].m); + fb->f2[0] = data[0]; + fb->f2[1] = data[1]; + fb->f2[2] = data[2]; + fb->t2[0] = data[3]; + fb->t2[1] = data[4]; + fb->t2[2] = data[5]; + } + mfb += info[i].m; + } + } + } + + free(c); + free(cfm); + free(lo); + free(hi); + free(findex); + free(Jcopy); + free(tmp1); + free(rhs); + free(lambda); + free(cforce); + + } + + // compute the velocity update: + // add stepsize * invM * fe to the body velocity + + IFTIMING (dTimerNow ("compute velocity update");) + for (i=0; iinvMass; + for (j=0; j<3; j++) body[i]->lvel[j] += dMUL(stepsize,dMUL(body_invMass,body[i]->facc[j])); + for (j=0; j<3; j++) body[i]->tacc[j] = dMUL(body[i]->tacc[j],stepsize); + dMULTIPLYADD0_331 (body[i]->avel,invI + i*12,body[i]->tacc); + } + + + // update the position and orientation from the new linear/angular velocity + // (over the given timestep) + IFTIMING (dTimerNow ("update position");) + for (i=0; ifacc,3); + dSetZero (body[i]->tacc,3); + } + + IFTIMING (dTimerEnd();) + IFTIMING (if (m > 0) dTimerReport (stdout,1);) + + + free(joint); + free(invI); + free(info); + free(ofs); + free(J); + free(jb); + +}