Sleipnir
src/statistics.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "statistics.h"
00024 #include "measure.h"
00025 #include "dat.h"
00026 #define WT_MULTIPLIER 50
00027 /*
00028  * Implementations thanks to:
00029  * Numerical Recipes in C
00030  * RANLIB
00031  * http://www.csit.fsu.edu/~burkardt
00032  */
00033 
00034 namespace Sleipnir {
00035 
00036 static double ranf( ) {
00037 
00038     return ( (double)rand( ) / RAND_MAX ); }
00039 
00040 static double fsign( double dNum, double dSign ) {
00041 
00042     return ( ( ( ( dSign > 0 ) && ( dNum < 0 ) ) ||
00043         ( ( dSign < 0 ) && ( dNum > 0 ) ) ) ? -dNum : dNum ); }
00044 
00062 double CStatistics::LjungBox( const float* adX, size_t iN, size_t iH ) {
00063     float*  adCor;
00064     size_t  i, iShift;
00065     double  dRet;
00066 
00067     adCor = new float[ iN ];
00068     for( iShift = 0; iShift < iN; ++iShift ) {
00069         adCor[ iShift ] = 0;
00070         for( i = 0; i <= iShift; ++i )
00071             adCor[ iShift ] += adX[ i + iN - iShift - 1 ] * adX[ i ]; }
00072     for( i = 0; i < iN; ++i )
00073         if( adCor[ iN - 1 ] )
00074             adCor[ i ] /= adCor[ iN - 1 ];
00075         else
00076             adCor[ i ] = 0;
00077     adCor[ iN - 1 ] = 1;
00078 
00079     dRet = 0;
00080     for( i = 0; i < iH; ++i )
00081         dRet += adCor[ i ] * adCor[ i ] / ( iN - i - 1 );
00082     delete[] adCor;
00083 
00084     return ( 1 - CStatisticsImpl::Chi2CDF( sqrt( iN * ( iN + 2 ) * dRet ), 0, 1, iH ) ); }
00085 
00086 double CStatisticsImpl::Chi2CDF( double dX, double dA, double dB, double dC ) {
00087     double  dRet, dX2, dY, dP2;
00088 
00089     if( dX <= dA )
00090         dRet = 0;
00091     else {
00092         dY = ( dX - dA ) / dB;
00093         dX2 = dY * dY / 2;
00094         dP2 = dC / 2;
00095         dRet = IncompleteGamma( dP2, dX2 ); }
00096 
00097     return dRet; }
00098 
00099 double CStatisticsImpl::IncompleteGamma( double p, double x ) {
00100     double  a, arg, b, c, pn1, pn2, pn3, pn4, pn5, pn6, rn, value;
00101     double  exp_arg_min = -88.0E+00;
00102     double  overflow    = 1.0E+37;
00103     double  plimit      = 1000.0E+00;
00104     double  tol         = 1.0E-07;
00105     double  xbig        = 1.0E+08;
00106 
00107     value = 0;
00108 
00109     if( p <= 0 )
00110         return 0;
00111     if( x <= 0 )
00112         return 0;
00113 
00114 //
00115 //  Use a normal approximation if PLIMIT < P.
00116 //
00117     if( plimit < p ) {
00118         pn1 = 3.0 * sqrt ( p ) * ( pow ( x / p, 1.0 / 3.0 ) + 1.0 / ( 9.0 * p ) - 1.0 );
00119         return Normal01CDF( pn1 ); }
00120 
00121 //
00122 //  Is X extremely large compared to P?
00123 //
00124     if( xbig < x )
00125         return 1;
00126 
00127 //
00128 //  Use Pearson's series expansion.
00129 //  (P is not large enough to force overflow in the log of Gamma.
00130 //
00131     if( x <= 1.0 || x < p ) {
00132         arg = p * log ( x ) - x - GammaLog( p + 1.0 );
00133         c = 1.0;
00134         value = 1.0;
00135         a = p;
00136 
00137         for( ; ; ) {
00138             a = a + 1.0;
00139             c = c * x / a;
00140             value = value + c;
00141 
00142             if( c <= tol )
00143                 break; }
00144         arg = arg + log ( value );
00145         value = ( exp_arg_min <= arg ) ? exp( arg ) : 0; }
00146     else {
00147 //
00148 //  Use a continued fraction expansion.
00149 //
00150         arg = p * log ( x ) - x - GammaLog( p );
00151         a = 1.0 - p;
00152         b = a + x + 1.0;
00153         c = 0.0;
00154         pn1 = 1.0;
00155         pn2 = x;
00156         pn3 = x + 1.0;
00157         pn4 = x * b;
00158         value = pn3 / pn4;
00159 
00160         for( ; ; ) {
00161             a = a + 1.0;
00162             b = b + 2.0;
00163             c = c + 1.0;
00164             pn5 = b * pn3 - a * c * pn1;
00165             pn6 = b * pn4 - a * c * pn2;
00166 
00167             if( 0 < fabs( pn6 ) ) {
00168                 rn = pn5 / pn6;
00169                 if( fabs( value - rn ) <= min(tol, tol * rn) ) {
00170                     arg = arg + log( value );
00171                     return ( ( exp_arg_min <= arg ) ? ( 1 - exp( arg ) ) : 1 ); }
00172 
00173                 value = rn; }
00174             pn1 = pn3;
00175             pn2 = pn4;
00176             pn3 = pn5;
00177             pn4 = pn6;
00178 //
00179 //  Rescale terms in continued fraction if terms are large.
00180 //
00181             if( overflow <= fabs( pn5 ) ) {
00182                 pn1 = pn1 / overflow;
00183                 pn2 = pn2 / overflow;
00184                 pn3 = pn3 / overflow;
00185                 pn4 = pn4 / overflow; } } }
00186 
00187     return value; }
00188 
00189 double CStatisticsImpl::Normal01CDF( double x ) {
00190     double  a1  = 0.398942280444E+00;
00191     double  a2  = 0.399903438504E+00;
00192     double  a3  = 5.75885480458E+00;
00193     double  a4  = 29.8213557808E+00;
00194     double  a5  = 2.62433121679E+00;
00195     double  a6  = 48.6959930692E+00;
00196     double  a7  = 5.92885724438E+00;
00197     double  b0  = 0.398942280385E+00;
00198     double  b1  = 3.8052E-08;
00199     double  b2  = 1.00000615302E+00;
00200     double  b3  = 3.98064794E-04;
00201     double  b4  = 1.98615381364E+00;
00202     double  b5  = 0.151679116635E+00;
00203     double  b6  = 5.29330324926E+00;
00204     double  b7  = 4.8385912808E+00;
00205     double  b8  = 15.1508972451E+00;
00206     double  b9  = 0.742380924027E+00;
00207     double  b10 = 30.789933034E+00;
00208     double  b11 = 3.99019417011E+00;
00209     double  cdf;
00210     double  q;
00211     double  y;
00212 
00213 //
00214 //  |X| <= 1.28.
00215 //
00216     if( fabs( x ) <= 1.28 ) {
00217         y = 0.5 * x * x;
00218         q = 0.5 - fabs( x ) * ( a1 - a2 * y / ( y + a3 - a4 / ( y + a5 + a6 / ( y + a7 ) ) ) ); }
00219 //
00220 //  1.28 < |X| <= 12.7
00221 //
00222     else if( fabs( x ) <= 12.7 ) {
00223         y = 0.5 * x * x;
00224         q = exp ( - y ) * b0 / ( fabs ( x ) - b1 +
00225             b2 / ( fabs ( x ) + b3 +
00226             b4 / ( fabs ( x ) - b5 +
00227             b6 / ( fabs ( x ) + b7 -
00228             b8 / ( fabs ( x ) + b9 +
00229             b10 / ( fabs ( x ) + b11 ) ) ) ) ) ); }
00230 //
00231 //  12.7 < |X|
00232 //
00233     else
00234         q = 0;
00235 
00236 //
00237 //  Take account of negative X.
00238 //
00239     cdf = ( x < 0 ) ? q : ( 1 - q );
00240 
00241     return cdf; }
00242 
00243 double CStatisticsImpl::GammaLog( double x ) {
00244     double  c[ 7 ]  = {
00245         -1.910444077728E-03, 
00246          8.4171387781295E-04, 
00247         -5.952379913043012E-04, 
00248          7.93650793500350248E-04, 
00249         -2.777777777777681622553E-03, 
00250          8.333333333333333331554247E-02, 
00251          5.7083835261E-03 };
00252     double  corr;
00253     double  d1      = -5.772156649015328605195174E-01;
00254     double  d2      = 4.227843350984671393993777E-01;
00255     double  d4      = 1.791759469228055000094023E+00;
00256     double  frtbig  = 1.42E+09;
00257     int i;
00258     double  p1[ 8 ] = {
00259         4.945235359296727046734888E+00, 
00260         2.018112620856775083915565E+02, 
00261         2.290838373831346393026739E+03, 
00262         1.131967205903380828685045E+04, 
00263         2.855724635671635335736389E+04, 
00264         3.848496228443793359990269E+04, 
00265         2.637748787624195437963534E+04, 
00266         7.225813979700288197698961E+03 };
00267     double  p2[ 8 ] = {
00268         4.974607845568932035012064E+00, 
00269         5.424138599891070494101986E+02, 
00270         1.550693864978364947665077E+04, 
00271         1.847932904445632425417223E+05, 
00272         1.088204769468828767498470E+06, 
00273         3.338152967987029735917223E+06, 
00274         5.106661678927352456275255E+06, 
00275         3.074109054850539556250927E+06 };
00276     double  p4[ 8 ] = {
00277         1.474502166059939948905062E+04, 
00278         2.426813369486704502836312E+06, 
00279         1.214755574045093227939592E+08, 
00280         2.663432449630976949898078E+09, 
00281         2.940378956634553899906876E+010,
00282         1.702665737765398868392998E+011,
00283         4.926125793377430887588120E+011, 
00284         5.606251856223951465078242E+011 };
00285     double  pnt68   = 0.6796875E+00;
00286     double  q1[ 8 ] = {
00287         6.748212550303777196073036E+01, 
00288         1.113332393857199323513008E+03, 
00289         7.738757056935398733233834E+03, 
00290         2.763987074403340708898585E+04, 
00291         5.499310206226157329794414E+04, 
00292         6.161122180066002127833352E+04, 
00293         3.635127591501940507276287E+04, 
00294         8.785536302431013170870835E+03 };
00295     double  q2[ 8 ] = {
00296         1.830328399370592604055942E+02, 
00297         7.765049321445005871323047E+03, 
00298         1.331903827966074194402448E+05, 
00299         1.136705821321969608938755E+06, 
00300         5.267964117437946917577538E+06, 
00301         1.346701454311101692290052E+07, 
00302         1.782736530353274213975932E+07, 
00303         9.533095591844353613395747E+06 };
00304     double  q4[ 8 ] = {
00305         2.690530175870899333379843E+03, 
00306         6.393885654300092398984238E+05, 
00307         4.135599930241388052042842E+07, 
00308         1.120872109616147941376570E+09, 
00309         1.488613728678813811542398E+010, 
00310         1.016803586272438228077304E+011, 
00311         3.417476345507377132798597E+011, 
00312         4.463158187419713286462081E+011 };
00313     double  res;
00314     double  sqrtpi  = 0.9189385332046727417803297E+00;
00315     double  xbig    = 4.08E+36;
00316     double  xden;
00317     double  xm1;
00318     double  xm2;
00319     double  xm4;
00320     double  xnum;
00321     double  xsq;
00322 
00323 //
00324 //  Return immediately if the argument is out of range.
00325 //
00326     if( x <= 0 || xbig < x )
00327         return HUGE_VAL;
00328 
00329     if( x <= EpsilonDouble( ) )
00330         res = -log( x );
00331     else if( x <= 1.5 ) {
00332         if ( x < pnt68 ) {
00333             corr = -log( x );
00334             xm1 = x; }
00335         else {
00336             corr = 0;
00337             xm1 = ( x - 0.5 ) - 0.5; }
00338 
00339         if ( x <= 0.5 || pnt68 <= x ) {
00340             xden = 1.0;
00341             xnum = 0.0;
00342 
00343             for( i = 0; i < 8; i++ ) {
00344                 xnum = xnum * xm1 + p1[i];
00345                 xden = xden * xm1 + q1[i]; }
00346             res = corr + ( xm1 * ( d1 + xm1 * ( xnum / xden ) ) ); }
00347         else {
00348             xm2 = ( x - 0.5 ) - 0.5;
00349             xden = 1.0;
00350             xnum = 0.0;
00351             for( i = 0; i < 8; i++ ) {
00352                 xnum = xnum * xm2 + p2[i];
00353                 xden = xden * xm2 + q2[i]; }
00354             res = corr + xm2 * ( d2 + xm2 * ( xnum / xden ) ); } }
00355     else if( x <= 4.0 ) {
00356         xm2 = x - 2.0;
00357         xden = 1.0;
00358         xnum = 0.0;
00359         for( i = 0; i < 8; i++ ) {
00360             xnum = xnum * xm2 + p2[i];
00361             xden = xden * xm2 + q2[i]; }
00362 
00363         res = xm2 * ( d2 + xm2 * ( xnum / xden ) ); }
00364     else if( x <= 12.0 ) {
00365         xm4 = x - 4.0;
00366         xden = - 1.0;
00367         xnum = 0.0;
00368         for( i = 0; i < 8; i++ ) {
00369             xnum = xnum * xm4 + p4[i];
00370             xden = xden * xm4 + q4[i]; }
00371         res = d4 + xm4 * ( xnum / xden ); }
00372     else {
00373         res = 0.0;
00374 
00375         if( x <= frtbig ) {
00376             res = c[6];
00377             xsq = x * x;
00378 
00379             for( i = 0; i < 6; i++ )
00380                 res = res / xsq + c[i]; }
00381 
00382         res = res / x;
00383         corr = log ( x );
00384         res = res + sqrtpi - 0.5 * corr;
00385         res = res + x * ( corr - 1.0 ); }
00386 
00387     return res; }
00388 
00403 double CStatistics::SampleGammaLogStandard( double dXX ) {
00404     static const double c_adCof[]   = { 76.18009172947146, -86.50532032941677, 24.01409824083091,
00405         -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5 };
00406     double  dX, dY, dTmp, dSer;
00407     size_t  j;
00408 
00409     dX = dY = dXX;
00410     dTmp = dX + 5.5;
00411     dTmp -= ( dX + 0.5 ) * log( dTmp );
00412     dSer = 1.000000000190015;
00413     for( j = 0; j <= 5; ++j )
00414         dSer += c_adCof[ j ] / ++dY;
00415 
00416     return ( -dTmp + log( 2.5066282746310005 * dSer / dX ) ); }
00417 
00432 double CStatistics::SampleGammaStandard( double dShape ) {
00433 static double q1 = 4.166669E-2;
00434 static double q2 = 2.083148E-2;
00435 static double q3 = 8.01191E-3;
00436 static double q4 = 1.44121E-3;
00437 static double q5 = -7.388E-5;
00438 static double q6 = 2.4511E-4;
00439 static double q7 = 2.424E-4;
00440 static double a1 = 0.3333333;
00441 static double a2 = -0.250003;
00442 static double a3 = 0.2000062;
00443 static double a4 = -0.1662921;
00444 static double a5 = 0.1423657;
00445 static double a6 = -0.1367177;
00446 static double a7 = 0.1233795;
00447 static double e1 = 1.0;
00448 static double e2 = 0.4999897;
00449 static double e3 = 0.166829;
00450 static double e4 = 4.07753E-2;
00451 static double e5 = 1.0293E-2;
00452 static double aa = 0.0;
00453 static double aaa = 0.0;
00454 static double sqrt32 = 5.656854;
00455 static double sgamma,s2,s,d,t,x,u,r,q0,b,si,c,v,q,e,w,p;
00456 double a = dShape;
00457     if(a == aa) goto S10;
00458     if(a < 1.0) goto S120;
00459 /*
00460      STEP  1:  RECALCULATIONS OF S2,S,D IF A HAS CHANGED
00461 */
00462     aa = a;
00463     s2 = a-0.5;
00464     s = sqrt(s2);
00465     d = sqrt32-12.0*s;
00466 S10:
00467 /*
00468      STEP  2:  T=STANDARD NORMAL DEVIATE,
00469                X=(S,1/2)-NORMAL DEVIATE.
00470                IMMEDIATE ACCEPTANCE (I)
00471 */
00472     t = SampleNormalStandard();
00473     x = s+0.5*t;
00474     sgamma = x*x;
00475     if(t >= 0.0) return sgamma;
00476 /*
00477      STEP  3:  U= 0,1 -UNIFORM SAMPLE. SQUEEZE ACCEPTANCE (S)
00478 */
00479     u = ranf();
00480     if(d*u <= t*t*t) return sgamma;
00481 /*
00482      STEP  4:  RECALCULATIONS OF Q0,B,SI,C IF NECESSARY
00483 */
00484     if(a == aaa) goto S40;
00485     aaa = a;
00486     r = 1.0/ a;
00487     q0 = ((((((q7*r+q6)*r+q5)*r+q4)*r+q3)*r+q2)*r+q1)*r;
00488 /*
00489                APPROXIMATION DEPENDING ON SIZE OF PARAMETER A
00490                THE CONSTANTS IN THE EXPRESSIONS FOR B, SI AND
00491                C WERE ESTABLISHED BY NUMERICAL EXPERIMENTS
00492 */
00493     if(a <= 3.686) goto S30;
00494     if(a <= 13.022) goto S20;
00495 /*
00496                CASE 3:  A .GT. 13.022
00497 */
00498     b = 1.77;
00499     si = 0.75;
00500     c = 0.1515/s;
00501     goto S40;
00502 S20:
00503 /*
00504                CASE 2:  3.686 .LT. A .LE. 13.022
00505 */
00506     b = 1.654+7.6E-3*s2;
00507     si = 1.68/s+0.275;
00508     c = 6.2E-2/s+2.4E-2;
00509     goto S40;
00510 S30:
00511 /*
00512                CASE 1:  A .LE. 3.686
00513 */
00514     b = 0.463+s+0.178*s2;
00515     si = 1.235;
00516     c = 0.195/s-7.9E-2+1.6E-1*s;
00517 S40:
00518 /*
00519      STEP  5:  NO QUOTIENT TEST IF X NOT POSITIVE
00520 */
00521     if(x <= 0.0) goto S70;
00522 /*
00523      STEP  6:  CALCULATION OF V AND QUOTIENT Q
00524 */
00525     v = t/(s+s);
00526     if(fabs(v) <= 0.25) goto S50;
00527     q = q0-s*t+0.25*t*t+(s2+s2)*log(1.0+v);
00528     goto S60;
00529 S50:
00530     q = q0+0.5*t*t*((((((a7*v+a6)*v+a5)*v+a4)*v+a3)*v+a2)*v+a1)*v;
00531 S60:
00532 /*
00533      STEP  7:  QUOTIENT ACCEPTANCE (Q)
00534 */
00535     if(log(1.0-u) <= q) return sgamma;
00536 S70:
00537 /*
00538      STEP  8:  E=STANDARD EXPONENTIAL DEVIATE
00539                U= 0,1 -UNIFORM DEVIATE
00540                T=(B,SI)-DOUBLE EXPONENTIAL (LAPLACE) SAMPLE
00541 */
00542     e = SampleExponentialStandard();
00543     u = ranf();
00544     u += (u-1.0);
00545     t = b+fsign(si*e,u);
00546 /*
00547      STEP  9:  REJECTION IF T .LT. TAU(1) = -.71874483771719
00548 */
00549     if(t < -0.7187449) goto S70;
00550 /*
00551      STEP 10:  CALCULATION OF V AND QUOTIENT Q
00552 */
00553     v = t/(s+s);
00554     if(fabs(v) <= 0.25) goto S80;
00555     q = q0-s*t+0.25*t*t+(s2+s2)*log(1.0+v);
00556     goto S90;
00557 S80:
00558     q = q0+0.5*t*t*((((((a7*v+a6)*v+a5)*v+a4)*v+a3)*v+a2)*v+a1)*v;
00559 S90:
00560 /*
00561      STEP 11:  HAT ACCEPTANCE (H) (IF Q NOT POSITIVE GO TO STEP 8)
00562 */
00563     if(q <= 0.0) goto S70;
00564     if(q <= 0.5) goto S100;
00565     w = exp(q)-1.0;
00566     goto S110;
00567 S100:
00568     w = ((((e5*q+e4)*q+e3)*q+e2)*q+e1)*q;
00569 S110:
00570 /*
00571                IF T IS REJECTED, SAMPLE AGAIN AT STEP 8
00572 */
00573     if(c*fabs(u) > w*exp(e-0.5*t*t)) goto S70;
00574     x = s+0.5*t;
00575     sgamma = x*x;
00576     return sgamma;
00577 S120:
00578 /*
00579      ALTERNATE METHOD FOR PARAMETERS A BELOW 1  (.3678794=EXP(-1.))
00580 */
00581     aa = 0.0;
00582     b = 1.0+0.3678794*a;
00583 S130:
00584     p = b*ranf();
00585     if(p >= 1.0) goto S140;
00586     sgamma = exp(log(p)/ a);
00587     if(SampleExponentialStandard() < sgamma) goto S130;
00588     return sgamma;
00589 S140:
00590     sgamma = -log((b-p)/ a);
00591     if(SampleExponentialStandard() < (1.0-a)*log(sgamma)) goto S130;
00592     return sgamma;
00593 }
00594 
00606 double CStatistics::SampleNormalStandard( ) {
00607 double x1, x2, w;
00608  
00609 do {
00610     x1 = 2.0 * ranf() - 1.0;
00611     x2 = 2.0 * ranf() - 1.0;
00612     w = x1 * x1 + x2 * x2;
00613 } while ( w >= 1.0 );
00614 
00615 w = sqrt( (-2.0 * log( w ) ) / w );
00616 return x1 * w; }
00617 
00618 /*
00619 static double a[32] = {
00620     0.0,3.917609E-2,7.841241E-2,0.11777,0.1573107,0.1970991,0.2372021,0.2776904,
00621     0.3186394,0.36013,0.4022501,0.4450965,0.4887764,0.5334097,0.5791322,
00622     0.626099,0.6744898,0.7245144,0.7764218,0.8305109,0.8871466,0.9467818,
00623     1.00999,1.077516,1.150349,1.229859,1.318011,1.417797,1.534121,1.67594,
00624     1.862732,2.153875
00625 };
00626 static double d[31] = {
00627     0.0,0.0,0.0,0.0,0.0,0.2636843,0.2425085,0.2255674,0.2116342,0.1999243,
00628     0.1899108,0.1812252,0.1736014,0.1668419,0.1607967,0.1553497,0.1504094,
00629     0.1459026,0.14177,0.1379632,0.1344418,0.1311722,0.128126,0.1252791,
00630     0.1226109,0.1201036,0.1177417,0.1155119,0.1134023,0.1114027,0.1095039
00631 };
00632 static double t[31] = {
00633     7.673828E-4,2.30687E-3,3.860618E-3,5.438454E-3,7.0507E-3,8.708396E-3,
00634     1.042357E-2,1.220953E-2,1.408125E-2,1.605579E-2,1.81529E-2,2.039573E-2,
00635     2.281177E-2,2.543407E-2,2.830296E-2,3.146822E-2,3.499233E-2,3.895483E-2,
00636     4.345878E-2,4.864035E-2,5.468334E-2,6.184222E-2,7.047983E-2,8.113195E-2,
00637     9.462444E-2,0.1123001,0.136498,0.1716886,0.2276241,0.330498,0.5847031
00638 };
00639 static double h[31] = {
00640     3.920617E-2,3.932705E-2,3.951E-2,3.975703E-2,4.007093E-2,4.045533E-2,
00641     4.091481E-2,4.145507E-2,4.208311E-2,4.280748E-2,4.363863E-2,4.458932E-2,
00642     4.567523E-2,4.691571E-2,4.833487E-2,4.996298E-2,5.183859E-2,5.401138E-2,
00643     5.654656E-2,5.95313E-2,6.308489E-2,6.737503E-2,7.264544E-2,7.926471E-2,
00644     8.781922E-2,9.930398E-2,0.11556,0.1404344,0.1836142,0.2790016,0.7010474
00645 };
00646 static long i;
00647 static double snorm,u,s,ustar,aa,w,y,tt;
00648     u = ranf();
00649     s = 0.0;
00650     if(u > 0.5) s = 1.0;
00651     u += (u-s);
00652     u = 32.0*u;
00653     i = (long) (u);
00654     if(i == 32) i = 31;
00655     if(i == 0) goto S100;
00656 //                                START CENTER
00657     ustar = u-(double)i;
00658     aa = *(a+i-1);
00659 S40:
00660     if(ustar <= *(t+i-1)) goto S60;
00661     w = (ustar-*(t+i-1))**(h+i-1);
00662 S50:
00663 //                                EXIT   (BOTH CASES)
00664     y = aa+w;
00665     snorm = y;
00666     if(s == 1.0) snorm = -y;
00667     return snorm;
00668 S60:
00669 //                                CENTER CONTINUED
00670     u = ranf();
00671     w = u*(*(a+i)-aa);
00672     tt = (0.5*w+aa)*w;
00673     goto S80;
00674 S70:
00675     tt = u;
00676     ustar = ranf();
00677 S80:
00678     if(ustar > tt) goto S50;
00679     u = ranf();
00680     if(ustar >= u) goto S70;
00681     ustar = ranf();
00682     goto S40;
00683 S100:
00684 //                                START TAIL
00685     i = 6;
00686     aa = *(a+31);
00687     goto S120;
00688 S110:
00689     aa += *(d+i-1);
00690     i += 1;
00691 S120:
00692     u += u;
00693     if(u < 1.0) goto S110;
00694     u -= 1.0;
00695 S140:
00696     w = u**(d+i-1);
00697     tt = (0.5*w+aa)*w;
00698     goto S160;
00699 S150:
00700     tt = u;
00701 S160:
00702     ustar = ranf();
00703     if(ustar > tt) goto S50;
00704     u = ranf();
00705     if(ustar >= u) goto S150;
00706     u = ranf();
00707     goto S140;
00708 }
00709 */
00710 
00722 double CStatistics::SampleExponentialStandard( ) {
00723 static double q[8] = {
00724     0.6931472,0.9333737,0.9888778,0.9984959,0.9998293,0.9999833,0.9999986,1.0
00725 };
00726 static long i;
00727 static double sexpo,a,u,ustar,umin;
00728 static double *q1 = q;
00729     a = 0.0;
00730     u = ranf();
00731     goto S30;
00732 S20:
00733     a += *q1;
00734 S30:
00735     u += u;
00736     if(u <= 1.0) goto S20;
00737     u -= 1.0;
00738     if(u > *q1) goto S60;
00739     sexpo = a+u;
00740     return sexpo;
00741 S60:
00742     i = 1;
00743     ustar = ranf();
00744     umin = ustar;
00745 S70:
00746     ustar = ranf();
00747     if(ustar < umin) umin = ustar;
00748     i += 1;
00749     if(u > *(q+i-1)) goto S70;
00750     sexpo = a+umin**q1;
00751     return sexpo;
00752 }
00753 
00754 double CStatisticsImpl::IncompleteBeta( double dA, double dB, double dX ) {
00755     double  dBT;
00756 
00757     if( ( dX < 0 ) || ( dX > 1 ) )
00758         return -1;
00759     dBT = ( ( dX == 0 ) || ( dX == 1 ) ) ? 0 :
00760         exp( GammaLog( dA + dB ) - GammaLog( dA ) - GammaLog( dB ) + ( dA * log( dX ) ) +
00761         ( dB * log( 1 - dX ) ) );
00762 
00763     return ( ( dX < ( ( dA + 1 ) / ( dA + dB + 2 ) ) ) ?
00764         ( dBT * IncompleteBetaCF( dA, dB, dX ) / dA ) :
00765         ( 1 - ( dBT * IncompleteBetaCF( dB, dA, 1 - dX ) / dB ) ) ); }
00766 
00767 double CStatisticsImpl::IncompleteBetaCF(double a, double b, double x)
00768 {
00769 static const double c_dFPMin    = 1e-30;
00770 static const double c_iMaxIt    = 100;
00771 static const double c_dEPS      = 3e-7;
00772 int m,m2;
00773 double aa,c,d,del,h,qab,qam,qap;
00774 qab=a+b;
00775 qap=a+1.0;
00776 qam=a-1.0;
00777 c=1.0;
00778 d=1.0-qab*x/qap;
00779 if (fabs(d) < c_dFPMin) d=c_dFPMin;
00780 d=1.0/d;
00781 h=d;
00782 for (m=1;m<=c_iMaxIt;m++) {
00783 m2=2*m;
00784 aa=m*(b-m)*x/((qam+m2)*(a+m2));
00785 d=1.0+aa*d;
00786 if (fabs(d) < c_dFPMin) d=c_dFPMin;
00787 c=1.0+aa/c;
00788 if (fabs(c) < c_dFPMin) c=c_dFPMin;
00789 d=1.0/d;
00790 h *= d*c;
00791 aa = -(a+m)*(qab+m)*x/((a+m2)*(qap+m2));
00792 d=1.0+aa*d;
00793 if (fabs(d) < c_dFPMin) d=c_dFPMin;
00794 c=1.0+aa/c;
00795 if (fabs(c) < c_dFPMin) c=c_dFPMin;
00796 d=1.0/d;
00797 del=d*c;
00798 h *= del;
00799 if (fabs(del-1.0) < c_dEPS) break;
00800 }
00801 if (m > c_iMaxIt) return -1;
00802 return h;
00803 }
00804 
00824 double CStatistics::HypergeometricCDF( size_t iBoth, size_t iNonZeroInOne, size_t iNonZeroInTwo,
00825     size_t iN ) {
00826     size_t  i, iHits;
00827     double  dRet;
00828 
00829     dRet = 0;
00830     iHits = ( iNonZeroInTwo < iNonZeroInOne ) ? iNonZeroInTwo : iNonZeroInOne;
00831     for( i = iBoth; i <= iHits; ++i )
00832         dRet += HypergeometricPDF( i, iNonZeroInOne, iNonZeroInTwo, iN );
00833 
00834     return ( ( dRet > 1 ) ? 1 : dRet ); }
00835 
00855 double CStatistics::TwoSidedHypergeometricCDF( size_t iNonZeroInCommon, size_t iNonZeroInOne, size_t iNonZeroInTwo,
00856     size_t iTotalNumValues ) {
00857     size_t  i, iHits;
00858     double  dRet;
00859 
00860     int a = iTotalNumValues - iNonZeroInTwo + iNonZeroInCommon - iNonZeroInOne ;
00861     int b = iNonZeroInOne - iNonZeroInCommon;
00862     int c = iNonZeroInTwo - iNonZeroInCommon;
00863     int d = iNonZeroInCommon;
00864 
00865     dRet = 0;
00866 
00867     if (a*d - b*c < 0) {
00868         while (a >= 0 && d >= 0) {
00869             dRet += HypergeometricPDF( d, b+d, c+d, a+b+c+d );
00870             a--;d--;c++;b++;
00871             cout << dRet << endl;
00872         }
00873     }
00874     else {
00875         while (c >= 0 && b >= 0) {
00876             dRet += HypergeometricPDF( d, b+d, c+d, a+b+c+d );
00877             a++;d++;c--;b--;
00878         }
00879     }
00880     dRet *= 2;
00881     return ( ( dRet > 1 ) ? 1 : dRet ); }
00882 
00883 
00910 double CStatistics::FScore( size_t iTruePositives, size_t iFalsePositives, size_t iTrueNegatives,
00911     size_t iFalseNegatives, double dBeta ) {
00912     double  dP, dR;
00913 
00914     dP = Precision( iTruePositives, iFalsePositives, iTrueNegatives, iFalseNegatives );
00915     dR = Recall( iTruePositives, iFalsePositives, iTrueNegatives, iFalseNegatives );
00916     dBeta *= dBeta;
00917 
00918     return ( ( dBeta + 1 ) * dP * dR / ( ( dBeta * dP ) + dR ) ); }
00919 
00920 template<class tType>
00921 struct SCompareRank {
00922     const vector<tType>&    m_vecData;
00923 
00924     SCompareRank( const vector<tType>& vecData ) : m_vecData(vecData) { }
00925 
00926     bool operator()( size_t iOne, size_t iTwo ) const {
00927 
00928         return ( m_vecData[ iOne ] < m_vecData[ iTwo ] ); }
00929 };
00930 
00975 double CStatistics::WilcoxonRankSum( const CDat& DatData, const CDat& DatAnswers,
00976     const vector<bool>& vecfGenesOfInterest, bool fInvert ) {
00977     size_t              i, j, k, iOne, iTwo, iNeg;
00978     uint64_t            iPos;
00979     float               d, dAnswer;
00980     double              dSum;
00981     std::vector<size_t> veciGenes;
00982     std::vector<float>  vecdValues, vecdRanks;
00983     bool                fAnswer;
00984 
00985     veciGenes.resize( DatAnswers.GetGenes( ) );
00986     for( i = 0; i < veciGenes.size( ); ++i )
00987         veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) );
00988 
00989     for( i = 0; i < DatAnswers.GetGenes( ); ++i ) {
00990         if( ( iOne = veciGenes[ i ] ) == -1 )
00991             continue;
00992         for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) {
00993             if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
00994                 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) ||
00995                 CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) ) )
00996                 continue;
00997             fAnswer = dAnswer > 0;
00998             if( !( vecfGenesOfInterest.empty( ) ||
00999                 ( fAnswer && vecfGenesOfInterest[ i ] && vecfGenesOfInterest[ j ] ) ||
01000                 ( !fAnswer && ( vecfGenesOfInterest[ i ] || vecfGenesOfInterest[ j ] ) ) ) )
01001                 continue;
01002             if( fInvert )
01003                 d = 1 - d;
01004             vecdValues.push_back( d ); } }
01005     {
01006         std::vector<size_t> veciIndices;
01007         size_t              iIndex, iCount;
01008 
01009         veciIndices.resize( vecdValues.size( ) );
01010         for( i = 0; i < vecdValues.size( ); ++i )
01011             veciIndices[ i ] = i;
01012         std::sort( veciIndices.begin( ), veciIndices.end( ), SCompareRank<float>( vecdValues ) );
01013         vecdRanks.resize( veciIndices.size( ) );
01014         for( i = 0; i < vecdRanks.size( ); ++i ) {
01015             iIndex = veciIndices[ i ];
01016             if( !i || ( vecdValues[ iIndex ] != vecdValues[ veciIndices[ i - 1 ] ] ) ) {
01017                 for( iCount = 0,j = i; j < veciIndices.size( ); ++j ) {
01018                     if( vecdValues[ veciIndices[ j ] ] != vecdValues[ iIndex ] )
01019                         break;
01020                     iCount++; }
01021                 d = i + ( iCount - 1 ) / 2.0f; }
01022             vecdRanks[ iIndex ] = d; }
01023     }
01024 
01025     for( dSum = 0,iPos = iNeg = i = k = 0; i < DatAnswers.GetGenes( ); ++i ) {
01026         if( ( iOne = veciGenes[ i ] ) == -1 )
01027             continue;
01028         for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) {
01029             if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
01030                 CMeta::IsNaN( DatData.Get( iOne, iTwo ) ) ||
01031                 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) ||
01032                 !( vecfGenesOfInterest.empty( ) ||
01033                 ( ( dAnswer > 0 ) && vecfGenesOfInterest[ i ] && vecfGenesOfInterest[ j ] ) ||
01034                 ( ( dAnswer <= 0 ) && ( vecfGenesOfInterest[ i ] || vecfGenesOfInterest[ j ] ) ) ) )
01035                 continue;
01036             if( dAnswer > 0 ) {
01037                 iPos++;
01038                 dSum += vecdRanks[ k ]; }
01039             else
01040                 iNeg++;
01041             k++; } }
01042     dSum -= ( iPos * ( iPos - 1 ) ) / 2;
01043 
01044     return ( dSum / iPos / iNeg ); }
01045 
01046 double CStatistics::WilcoxonRankSum( const CPCL& DatData, const CPCL& DatAnswers,
01047     const vector<bool>& vecfHere, const vector<bool>& vecfUbik,
01048     bool fPosIn, bool fNegIn, bool fPosBridge, bool fNegBridge,
01049     bool fPosOut, bool fNegOut, bool fInvert ) {
01050     size_t              i, j, k, iOne, iTwo, iNeg;
01051     uint64_t            iPos;
01052     float               d, dAnswer;
01053     double              dSum;
01054     std::vector<size_t>     veciGenes;
01055     std::vector<float>      vecdValues, vecdRanks;
01056     bool                fAnswer;
01057 
01058     veciGenes.resize( DatAnswers.GetGenes( ) );
01059     for( i = 0; i < veciGenes.size( ); ++i )
01060         veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) );
01061 
01062     for( i = 0; i < DatAnswers.GetGenes( ); ++i ) {
01063         if( ( iOne = veciGenes[ i ] ) == -1 )
01064             continue;
01065         for( j = 0; j < DatAnswers.GetGenes( ); ++j ) {
01066             if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
01067                 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) ||
01068                 CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) ) )
01069                 continue;
01070             fAnswer = dAnswer > 0;
01071             if ( CMeta::SkipEdge( fAnswer, i, j, vecfHere, vecfUbik, fPosIn, fNegIn, fPosBridge, fNegBridge, fPosOut, fNegOut ) ) {
01072                 continue;
01073             }
01074 
01075             if( fInvert )
01076                 d = 1 - d;
01077             vecdValues.push_back( d ); } }
01078     {
01079         std::vector<size_t> veciIndices;
01080         size_t              iIndex, iCount;
01081 
01082         veciIndices.resize( vecdValues.size( ) );
01083         for( i = 0; i < vecdValues.size( ); ++i )
01084             veciIndices[ i ] = i;
01085         std::sort( veciIndices.begin( ), veciIndices.end( ), SCompareRank<float>( vecdValues ) );
01086         vecdRanks.resize( veciIndices.size( ) );
01087         for( i = 0; i < vecdRanks.size( ); ++i ) {
01088             iIndex = veciIndices[ i ];
01089             if( !i || ( vecdValues[ iIndex ] != vecdValues[ veciIndices[ i - 1 ] ] ) ) {
01090                 for( iCount = 0,j = i; j < veciIndices.size( ); ++j ) {
01091                     if( vecdValues[ veciIndices[ j ] ] != vecdValues[ iIndex ] )
01092                         break;
01093                     iCount++; }
01094                 d = i + ( iCount - 1 ) / 2.0f; }
01095             vecdRanks[ iIndex ] = d; }
01096     }
01097 
01098     for( dSum = 0,iPos = iNeg = i = k = 0; i < DatAnswers.GetGenes( ); ++i ) {
01099         if( ( iOne = veciGenes[ i ] ) == -1 )
01100             continue;
01101         for( j = 0; j < DatAnswers.GetGenes( ); ++j ) {
01102             if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
01103                 CMeta::IsNaN( DatData.Get( iOne, iTwo ) ) ||
01104                 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) )
01105                 continue;
01106             fAnswer = dAnswer > 0;
01107 
01108             if ( CMeta::SkipEdge( fAnswer, i, j, vecfHere, vecfUbik, fPosIn, fNegIn, fPosBridge, fNegBridge, fPosOut, fNegOut ) ) {
01109                 continue;
01110             }
01111 
01112             if( dAnswer > 0 ) {
01113                 iPos++;
01114                 dSum += vecdRanks[ k ]; }
01115             else
01116                 iNeg++;
01117             k++; } }
01118     dSum -= ( iPos * ( iPos - 1 ) ) / 2;
01119 
01120     return ( dSum / iPos / iNeg ); }
01121 
01187 double CStatistics::WilcoxonRankSum( const CDat& DatData, const CDat& DatAnswers,
01188     const vector<bool>& vecfHere, const vector<bool>& vecfUbik,
01189     bool fPosIn, bool fNegIn, bool fPosBridge, bool fNegBridge,
01190     bool fPosOut, bool fNegOut, bool fInvert ) {
01191     size_t              i, j, k, iOne, iTwo, iNeg;
01192     uint64_t            iPos;
01193     float               d, dAnswer;
01194     double              dSum;
01195     std::vector<size_t>     veciGenes;
01196     std::vector<float>      vecdValues, vecdRanks;
01197     bool                fAnswer;
01198 
01199     veciGenes.resize( DatAnswers.GetGenes( ) );
01200     for( i = 0; i < veciGenes.size( ); ++i )
01201         veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) );
01202 
01203     for( i = 0; i < DatAnswers.GetGenes( ); ++i ) {
01204         if( ( iOne = veciGenes[ i ] ) == -1 )
01205             continue;
01206         for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) {
01207             if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
01208                 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) ||
01209                 CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) ) )
01210                 continue;
01211             fAnswer = dAnswer > 0;
01212             if ( CMeta::SkipEdge( fAnswer, i, j, vecfHere, vecfUbik, fPosIn, fNegIn, fPosBridge, fNegBridge, fPosOut, fNegOut ) ) {
01213                 continue;
01214             }
01215 
01216             if( fInvert )
01217                 d = 1 - d;
01218             vecdValues.push_back( d ); } }
01219     {
01220         std::vector<size_t> veciIndices;
01221         size_t              iIndex, iCount;
01222 
01223         veciIndices.resize( vecdValues.size( ) );
01224         for( i = 0; i < vecdValues.size( ); ++i )
01225             veciIndices[ i ] = i;
01226         std::sort( veciIndices.begin( ), veciIndices.end( ), SCompareRank<float>( vecdValues ) );
01227         vecdRanks.resize( veciIndices.size( ) );
01228         for( i = 0; i < vecdRanks.size( ); ++i ) {
01229             iIndex = veciIndices[ i ];
01230             if( !i || ( vecdValues[ iIndex ] != vecdValues[ veciIndices[ i - 1 ] ] ) ) {
01231                 for( iCount = 0,j = i; j < veciIndices.size( ); ++j ) {
01232                     if( vecdValues[ veciIndices[ j ] ] != vecdValues[ iIndex ] )
01233                         break;
01234                     iCount++; }
01235                 d = i + ( iCount - 1 ) / 2.0f; }
01236             vecdRanks[ iIndex ] = d; }
01237     }
01238 
01239     for( dSum = 0,iPos = iNeg = i = k = 0; i < DatAnswers.GetGenes( ); ++i ) {
01240         if( ( iOne = veciGenes[ i ] ) == -1 )
01241             continue;
01242         for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) {
01243             if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
01244                 CMeta::IsNaN( DatData.Get( iOne, iTwo ) ) ||
01245                 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) )
01246                 continue;
01247             fAnswer = dAnswer > 0;
01248 
01249             if ( CMeta::SkipEdge( fAnswer, i, j, vecfHere, vecfUbik, fPosIn, fNegIn, fPosBridge, fNegBridge, fPosOut, fNegOut ) ) {
01250                 continue;
01251             }
01252 
01253             if( dAnswer > 0 ) {
01254                 iPos++;
01255                 dSum += vecdRanks[ k ]; }
01256             else
01257                 iNeg++;
01258             k++; } }
01259     dSum -= ( iPos * ( iPos - 1 ) ) / 2;
01260 
01261     return ( dSum / iPos / iNeg ); }
01262 
01263 double CStatistics::WilcoxonRankSum( const CDat& DatData, const CDat& DatAnswers,
01264     const vector<float>& vecGeneWeights, bool flipneg) {
01265     size_t              i, j, k,m, iOne, iTwo, iNeg;
01266     uint64_t            iPos;
01267     float               d, dAnswer,w;
01268     double              dSum;
01269     std::vector<size_t>     veciGenes;
01270     std::vector<float>      vecdValues, vecdRanks;
01271     bool                fAnswer;
01272 
01273     veciGenes.resize( DatAnswers.GetGenes( ) );
01274     for( i = 0; i < veciGenes.size( ); ++i )
01275         veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) );
01276 
01277     for( i = 0; i < DatAnswers.GetGenes( ); ++i ) {
01278         if( ( iOne = veciGenes[ i ] ) == -1 )
01279             continue;
01280         for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) {
01281             if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
01282                 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) ||
01283                 CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) )||
01284                 CMeta::IsNaN( w = vecGeneWeights[i]*vecGeneWeights[j] ))
01285                 continue;
01286             fAnswer = dAnswer > 0;
01287 //write sth here
01288             if(fAnswer || !flipneg)
01289                 for( m = 0; m <(vecGeneWeights[i]*vecGeneWeights[j]*WT_MULTIPLIER-0.5); m++){
01290                     vecdValues.push_back( d );}  
01291             else
01292                 for( m = 0; m <((1-vecGeneWeights[i]*vecGeneWeights[j])*WT_MULTIPLIER-0.5); m++){
01293                     vecdValues.push_back( d );} } }
01294     {
01295         std::vector<size_t> veciIndices;
01296         size_t              iIndex, iCount;
01297 
01298         veciIndices.resize( vecdValues.size( ) );
01299         for( i = 0; i < vecdValues.size( ); ++i )
01300             veciIndices[ i ] = i;
01301         std::sort( veciIndices.begin( ), veciIndices.end( ), SCompareRank<float>( vecdValues ) );
01302         vecdRanks.resize( veciIndices.size( ) );
01303         for( i = 0; i < vecdRanks.size( ); ++i ) {
01304             iIndex = veciIndices[ i ];
01305             if( !i || ( vecdValues[ iIndex ] != vecdValues[ veciIndices[ i - 1 ] ] ) ) {
01306                 for( iCount = 0,j = i; j < veciIndices.size( ); ++j ) {
01307                     if( vecdValues[ veciIndices[ j ] ] != vecdValues[ iIndex ] )
01308                         break;
01309                     iCount++; }
01310                 d = i + ( iCount - 1 ) / 2.0f; }
01311             vecdRanks[ iIndex ] = d; }
01312     }
01313 
01314     for( dSum = 0,iPos = iNeg = i = k = 0; i < DatAnswers.GetGenes( ); ++i ) {
01315         if( ( iOne = veciGenes[ i ] ) == -1 )
01316             continue;
01317         for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) {
01318             if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
01319                 CMeta::IsNaN( DatData.Get( iOne, iTwo ) ) ||
01320                 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) )||
01321                 CMeta::IsNaN( w = vecGeneWeights[i]*vecGeneWeights[j] ) )
01322                 continue;
01323             fAnswer = dAnswer > 0;
01324 
01325                 if( dAnswer > 0 ) {
01326                     for( m = 0; m <(vecGeneWeights[i]*vecGeneWeights[j]*WT_MULTIPLIER-0.5); m++){
01327                     iPos++;
01328                     dSum += vecdRanks[ k ];
01329                     k++;}}
01330                 else{
01331                     if(flipneg)
01332                         for( m = 0; m <((1-vecGeneWeights[i]*vecGeneWeights[j])*WT_MULTIPLIER-0.5); m++){
01333                             iNeg++;
01334                         k++;}
01335                     else
01336                         for( m = 0; m <(vecGeneWeights[i]*vecGeneWeights[j]*WT_MULTIPLIER-0.5); m++){
01337                             iNeg++;
01338                         k++;}
01339                 }
01340         
01341         }}
01342     dSum -= ( iPos * ( iPos - 1 ) ) / 2;
01343 
01344     return ( dSum / iPos / iNeg ); }
01345 
01346 double CStatistics::WilcoxonRankSum( const CDat& DatData, const CDat& DatAnswers,
01347     const CDat& wDat, bool flipneg) {
01348     size_t              i, j, k,m, iOne, iTwo, iNeg;
01349     uint64_t            iPos;
01350     float               d, dAnswer,w;
01351     double              dSum;
01352     std::vector<size_t>     veciGenes,vecfiGenes;
01353     std::vector<float>      vecdValues, vecdRanks, vecdWeights;
01354     bool                fAnswer;
01355 
01356 
01357     veciGenes.resize( DatAnswers.GetGenes( ) );
01358     for( i = 0; i < veciGenes.size( ); ++i )
01359         veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) );
01360 
01361     vecfiGenes.resize( DatAnswers.GetGenes( ) );
01362     for( i = 0; i < vecfiGenes.size( ); ++i ){
01363         vecfiGenes[ i ] = wDat.GetGene( DatAnswers.GetGene( i ));}
01364 
01365     for( i = 0; i < DatAnswers.GetGenes( ); ++i ) {
01366         if( ( iOne = veciGenes[ i ] ) == -1 || vecfiGenes[ i ] == -1 )
01367             continue;
01368         for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) {
01369             if( ( ( iTwo = veciGenes[ j ] ) == -1 ||vecfiGenes[ j ] == -1 ) ||
01370                 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) ||
01371                 CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) )||
01372                 CMeta::IsNaN( w = wDat.Get(vecfiGenes[i],vecfiGenes[j]) ))
01373                 continue;
01374             fAnswer = dAnswer > 0;
01375             
01376             if(fAnswer || !flipneg)
01377                 for( m = 0; m <(w*WT_MULTIPLIER-0.5); m++){
01378                     vecdValues.push_back(d);}
01379             else
01380                 for( m = 0; m <((1-w)*WT_MULTIPLIER-0.5); m++){
01381                     vecdValues.push_back(d);}
01382 
01383             } }
01384 
01385     {
01386         std::vector<size_t> veciIndices;
01387         size_t              iIndex, iCount;
01388 
01389         veciIndices.resize( vecdValues.size( ) );
01390         for( i = 0; i < vecdValues.size( ); ++i )
01391             veciIndices[ i ] = i;
01392         std::sort( veciIndices.begin( ), veciIndices.end( ), SCompareRank<float>( vecdValues ) );
01393         vecdRanks.resize( veciIndices.size( ) );
01394         for( i = 0; i < vecdRanks.size( ); ++i ) {
01395             iIndex = veciIndices[ i ];
01396             if( !i || ( vecdValues[ iIndex ] != vecdValues[ veciIndices[ i - 1 ] ] ) ) {
01397                 for( iCount = 0,j = i; j < veciIndices.size( ); ++j ) {
01398                     if( vecdValues[ veciIndices[ j ] ] != vecdValues[ iIndex ] )
01399                         break;
01400                     iCount++; }
01401                 d = i + ( iCount - 1 ) / 2.0f; }
01402             vecdRanks[ iIndex ] = d; }
01403     }
01404 
01405     for( dSum = 0,iPos = iNeg = i = k = 0; i < DatAnswers.GetGenes( ); ++i ) {
01406         if( ( iOne = veciGenes[ i ] ) == -1 || vecfiGenes[ i ] == -1)
01407             continue;
01408         for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) {
01409             if( ( ( iTwo = veciGenes[ j ] ) == -1 || vecfiGenes[ j ] == -1 ) ||
01410                 CMeta::IsNaN( DatData.Get( iOne, iTwo ) ) ||
01411                 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) )||
01412                 CMeta::IsNaN( w = wDat.Get(vecfiGenes[i],vecfiGenes[j]) ) )
01413                 continue;
01414             fAnswer = dAnswer > 0;
01415                 if( dAnswer > 0 ) {
01416                     for( m = 0; m <(wDat.Get(vecfiGenes[i],vecfiGenes[j])*WT_MULTIPLIER-0.5); m++){
01417                         iPos++;
01418                         dSum += vecdRanks[ k ]; 
01419                         k++;}}
01420                 else{
01421                     if(flipneg)
01422                         for( m = 0; m <((1-wDat.Get(vecfiGenes[i],vecfiGenes[j]))*WT_MULTIPLIER-0.5); m++){
01423                             iNeg++;
01424                             k++;}
01425                     else
01426                         for( m = 0; m <(wDat.Get(vecfiGenes[i],vecfiGenes[j])*WT_MULTIPLIER-0.5); m++){
01427                             iNeg++;
01428                             k++;}
01429                      }
01430         }}
01431 
01432     dSum -= ( iPos * ( iPos - 1 ) ) / 2;
01433 
01434     return ( dSum / iPos / iNeg ); }
01435 
01436 double bessi0( double dX ) {
01437     double  dAbs, dRet, dY;
01438 
01439     if( ( dAbs = fabs( dX ) ) < 3.75 ) {
01440         dY = dX / 3.75;
01441         dY *= dY;
01442         dRet = 1 + dY * ( 3.5156229 + dY * ( 3.0899424 + dY * ( 1.2067492 + dY * ( 0.2659732 +
01443             dY * ( 0.360768e-1 + dY * 0.45813e-2 ) ) ) ) ); }
01444     else {
01445         dY = 3.75 / dAbs;
01446         dRet = ( exp( dAbs ) / sqrt( dAbs ) ) * ( 0.39894228 + dY * ( 0.1328592e-1 + dY * ( 0.225319e-2 +
01447             dY * ( -0.157565e-2 + dY * ( 0.916281e-2 + dY * ( -0.2057706e-1 + dY * ( 0.2635537e-1 +
01448             dY * ( -0.1647633e-1 + dY * 0.392377e-2 ) ) ) ) ) ) ) ); }
01449 
01450     return dRet; }
01451 
01452 double bessi1( double dX ) {
01453     double  dAbs, dRet, dY;
01454 
01455     if( ( dAbs = fabs( dX ) ) < 3.75 ) {
01456         dY = dX / 3.75;
01457         dY *= dY;
01458         dRet = dAbs * ( 0.5 + dY * ( 0.87890594 + dY * ( 0.51498869 + dY * ( 0.15084934 + dY * ( 0.2658733e-1 +
01459             dY * ( 0.301532e-2 + dY * 0.32411e-3 ) ) ) ) ) ); }
01460     else {
01461         dY = 3.75 / dAbs;
01462         dRet = 0.2282967e-1 + dY * ( -0.2895312e-1 + dY * ( 0.1787654e-1 - dY * 0.420059e-2 ) );
01463         dRet = 0.39894228 + dY * ( -0.3988024e-1 + dY * ( -0.362018e-2 + dY * ( 0.163801e-2 +
01464             dY * ( -0.1031555e-1 + dY * dRet ) ) ) );
01465         dRet *= exp( dAbs ) / sqrt( dAbs ); }
01466 
01467     return dRet; }
01468 
01469 double bessi( size_t iN, double dX ) {
01470     static const double ACC     = 40;
01471     static const double BIGN0   = 1e10;
01472     static const double BIGN1   = 1e-10;
01473     double  bi, bim, bip, tox, ans;
01474     size_t  j;
01475 
01476     if( !dX )
01477         return 0;
01478 
01479     tox = 2 / fabs( dX );
01480     bip = ans = 0;
01481     bi = 1;
01482     for( j = 2 * ( iN + (size_t)sqrt( ACC * iN ) ); j > 0; j-- ) {
01483         bim = bip + j * tox * bi;
01484         bip = bi;
01485         bi = bim;
01486         if( fabs( bi ) > BIGN0 ) {
01487             ans *= BIGN1;
01488             bi *= BIGN1;
01489             bip *= BIGN1; }
01490         if( j == iN )
01491             ans = bip; }
01492     ans *= bessi0( dX ) / bi;
01493     if( ( dX < 0 ) && ( iN & 1 ) )
01494         ans *= -1;
01495 
01496     return ans; }
01497 
01498 double CStatisticsImpl::ModifiedBesselI( size_t iOrder, double dX ) {
01499 
01500     switch( iOrder ) {
01501         case 0:
01502             return bessi0( dX );
01503 
01504         case 1:
01505             return bessi1( dX ); }
01506 
01507     return bessi( iOrder, dX ); }
01508 
01509 /*
01510  * The inverse standard normal distribution.
01511  *
01512  *   Author:      Peter J. Acklam <pjacklam@online.no>
01513  *   URL:         http://home.online.no/~pjacklam
01514  *
01515  * This function is based on the MATLAB code from the address above,
01516  * translated to C, and adapted for our purposes.
01517  */
01518 double CStatistics::InverseNormal01CDF( double dX ) {
01519  double p = dX;
01520  const double a[6] = {
01521   -3.969683028665376e+01,  2.209460984245205e+02,
01522   -2.759285104469687e+02,  1.383577518672690e+02,
01523   -3.066479806614716e+01,  2.506628277459239e+00
01524  };
01525  const double b[5] = {
01526   -5.447609879822406e+01,  1.615858368580409e+02,
01527   -1.556989798598866e+02,  6.680131188771972e+01,
01528   -1.328068155288572e+01
01529  };
01530  const double c[6] = {
01531   -7.784894002430293e-03, -3.223964580411365e-01,
01532   -2.400758277161838e+00, -2.549732539343734e+00,
01533    4.374664141464968e+00,  2.938163982698783e+00
01534  };
01535  const double d[4] = {
01536    7.784695709041462e-03,  3.224671290700398e-01,
01537    2.445134137142996e+00,  3.754408661907416e+00
01538  };
01539 
01540  register double q, t, u;
01541 
01542  if (_isnan(p) || p > 1.0 || p < 0.0)
01543   return CMeta::GetNaN( );
01544  if (p == 0.0)
01545   return -DBL_MAX;
01546  if (p == 1.0)
01547   return  DBL_MAX;
01548  q = min(p,1-p);
01549  if (q > 0.02425) {
01550   /* Rational approximation for central region. */
01551   u = q-0.5;
01552   t = u*u;
01553   u = u*(((((a[0]*t+a[1])*t+a[2])*t+a[3])*t+a[4])*t+a[5])
01554     /(((((b[0]*t+b[1])*t+b[2])*t+b[3])*t+b[4])*t+1);
01555  } else {
01556   /* Rational approximation for tail region. */
01557   t = sqrt(-2*log(q));
01558   u = (((((c[0]*t+c[1])*t+c[2])*t+c[3])*t+c[4])*t+c[5])
01559    /((((d[0]*t+d[1])*t+d[2])*t+d[3])*t+1);
01560  }
01561  /* The relative error of the approximation has absolute value less
01562     than 1.15e-9.  One iteration of Halley's rational method (third
01563     order) gives full machine precision... */
01564  t = Normal01CDF(u)-q;    /* error */
01565  t = t*(2.506628274631)*exp(u*u/2);   /* f(u)/df(u) */
01566  u = u-t/(1+u*t/2);     /* Halley's method */
01567 
01568  return (p > 0.5 ? -u : u);
01569 };
01570 
01595 bool CStatistics::MatrixLUDecompose( CDataMatrix& Mat, vector<size_t>& veciIndices, bool& fEven ) {
01596     const float     c_dEpsilon  = 1e-20f;
01597     float           big, dum, sum, temp;
01598     vector<float>   vv;
01599     size_t          i, j, k, imax;
01600 
01601     if( Mat.GetRows( ) != Mat.GetColumns( ) )
01602         return false;
01603 
01604     veciIndices.resize( Mat.GetRows( ) );
01605     vv.resize( Mat.GetRows( ) );
01606     fEven = true;
01607     for (i=0;i<Mat.GetRows( );i++) {
01608         big=0.0;
01609         for (j=0;j<Mat.GetRows( );j++)
01610             if ((temp=fabs(Mat.Get( i, j ))) > big) big=temp;
01611         if (big == 0.0)
01612             g_CatSleipnir( ).warn( "CStatisticsImpl::LUDecomposition( ) singular matrix" );
01613         vv[i]=1.0f/big; }
01614     for (j=0;j<Mat.GetRows( );j++) {
01615         for (i=0;i<j;i++) {
01616             sum=Mat.Get( i, j );
01617             for (k=0;k<i;k++)
01618                 sum -= Mat.Get( i, k )*Mat.Get( k, j );
01619             Mat.Get( i, j )=sum; }
01620         big=0.0;
01621         for (i=j;i<Mat.GetRows( );i++) {
01622             sum=Mat.Get( i, j );
01623             for (k=0;k<j;k++)
01624                 sum -= Mat.Get( i, k )*Mat.Get( k, j );
01625             Mat.Set( i, j, sum );
01626             if ( (dum=vv[i]*fabs(sum)) >= big) {
01627                 big=dum;
01628                 imax=i; } }
01629         if (j != imax) {
01630             for (k=0;k<Mat.GetRows( );k++) {
01631                 dum=Mat.Get( imax, k );
01632                 Mat.Set( imax, k, Mat.Get( j, k ) );
01633                 Mat.Set( j, k, dum ); }
01634             fEven = !fEven;
01635             vv[imax]=vv[j]; }
01636         veciIndices[j]=imax;
01637         if( !Mat.Get( j, j ) )
01638             Mat.Set( j, j, c_dEpsilon );
01639         if ((j+1) != Mat.GetRows( )) {
01640             dum=1.0f/(Mat.Get( j, j ));
01641             for (i=j+1;i<Mat.GetRows( );i++)
01642                 Mat.Get( i, j ) *= dum; } }
01643 
01644     return true; }
01645 
01646 bool CStatisticsImpl::MatrixLUSubstitute( CDataMatrix& MatLU, const vector<size_t>& veciIndices,
01647     vector<float>& vecdB ) {
01648     size_t  i,ii=0,ip,j,k;
01649     float   sum;
01650 
01651     if( ( MatLU.GetRows( ) != MatLU.GetColumns( ) ) || ( MatLU.GetRows( ) != veciIndices.size( ) ) ||
01652         ( MatLU.GetRows( ) != vecdB.size( ) ) )
01653         return false;
01654 
01655     for (i=0;i<MatLU.GetRows( );i++) {
01656         ip=veciIndices[i];
01657         sum=vecdB[ip];
01658         vecdB[ip]=vecdB[i];
01659         if (ii)
01660             for (j=ii-1;j<=i-1;j++)
01661                 sum -= MatLU.Get( i, j ) * vecdB[j];
01662         else if (sum)
01663             ii=i+1;
01664         vecdB[i]=sum; }
01665     for( k = 0; k < MatLU.GetRows( ); ++k ) {
01666         i = MatLU.GetRows( )-k-1;
01667         sum=vecdB[i];
01668         for (j=i+1;j<MatLU.GetRows( );j++)
01669             sum -= MatLU.Get( i, j )*vecdB[j];
01670         vecdB[i]=sum/MatLU.Get( i, i ); }
01671 
01672     return true; }
01673 
01698 bool CStatistics::MatrixLUInvert( CDataMatrix& MatLU, const vector<size_t>& veciIndices,
01699     CDataMatrix& MatInv ) {
01700     size_t          i, j;
01701     vector<float>   vecdCol;
01702 
01703     if( MatLU.GetRows( ) != MatLU.GetColumns( ) )
01704         return false;
01705 
01706     if( ( MatInv.GetRows( ) != MatLU.GetRows( ) ) || ( MatInv.GetColumns( ) != MatLU.GetColumns( ) ) )
01707         MatInv.Initialize( MatLU.GetRows( ), MatLU.GetColumns( ) );
01708     vecdCol.resize( MatLU.GetRows( ) );
01709     for( j = 0; j < MatLU.GetRows( ); ++j ) {
01710         for( i = 0; i < MatLU.GetRows( ); ++i )
01711             vecdCol[ i ] = 0;
01712         vecdCol[ j ] = 1;
01713         if( !MatrixLUSubstitute( MatLU, veciIndices, vecdCol ) )
01714             return false;
01715         for( i = 0; i < MatLU.GetRows( ); ++i )
01716             MatInv.Set( i, j, vecdCol[ i ] ); }
01717 
01718     return true; }
01719 
01720 }