Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "statistics.h" 00024 #include "measure.h" 00025 #include "dat.h" 00026 #define WT_MULTIPLIER 50 00027 /* 00028 * Implementations thanks to: 00029 * Numerical Recipes in C 00030 * RANLIB 00031 * http://www.csit.fsu.edu/~burkardt 00032 */ 00033 00034 namespace Sleipnir { 00035 00036 static double ranf( ) { 00037 00038 return ( (double)rand( ) / RAND_MAX ); } 00039 00040 static double fsign( double dNum, double dSign ) { 00041 00042 return ( ( ( ( dSign > 0 ) && ( dNum < 0 ) ) || 00043 ( ( dSign < 0 ) && ( dNum > 0 ) ) ) ? -dNum : dNum ); } 00044 00062 double CStatistics::LjungBox( const float* adX, size_t iN, size_t iH ) { 00063 float* adCor; 00064 size_t i, iShift; 00065 double dRet; 00066 00067 adCor = new float[ iN ]; 00068 for( iShift = 0; iShift < iN; ++iShift ) { 00069 adCor[ iShift ] = 0; 00070 for( i = 0; i <= iShift; ++i ) 00071 adCor[ iShift ] += adX[ i + iN - iShift - 1 ] * adX[ i ]; } 00072 for( i = 0; i < iN; ++i ) 00073 if( adCor[ iN - 1 ] ) 00074 adCor[ i ] /= adCor[ iN - 1 ]; 00075 else 00076 adCor[ i ] = 0; 00077 adCor[ iN - 1 ] = 1; 00078 00079 dRet = 0; 00080 for( i = 0; i < iH; ++i ) 00081 dRet += adCor[ i ] * adCor[ i ] / ( iN - i - 1 ); 00082 delete[] adCor; 00083 00084 return ( 1 - CStatisticsImpl::Chi2CDF( sqrt( iN * ( iN + 2 ) * dRet ), 0, 1, iH ) ); } 00085 00086 double CStatisticsImpl::Chi2CDF( double dX, double dA, double dB, double dC ) { 00087 double dRet, dX2, dY, dP2; 00088 00089 if( dX <= dA ) 00090 dRet = 0; 00091 else { 00092 dY = ( dX - dA ) / dB; 00093 dX2 = dY * dY / 2; 00094 dP2 = dC / 2; 00095 dRet = IncompleteGamma( dP2, dX2 ); } 00096 00097 return dRet; } 00098 00099 double CStatisticsImpl::IncompleteGamma( double p, double x ) { 00100 double a, arg, b, c, pn1, pn2, pn3, pn4, pn5, pn6, rn, value; 00101 double exp_arg_min = -88.0E+00; 00102 double overflow = 1.0E+37; 00103 double plimit = 1000.0E+00; 00104 double tol = 1.0E-07; 00105 double xbig = 1.0E+08; 00106 00107 value = 0; 00108 00109 if( p <= 0 ) 00110 return 0; 00111 if( x <= 0 ) 00112 return 0; 00113 00114 // 00115 // Use a normal approximation if PLIMIT < P. 00116 // 00117 if( plimit < p ) { 00118 pn1 = 3.0 * sqrt ( p ) * ( pow ( x / p, 1.0 / 3.0 ) + 1.0 / ( 9.0 * p ) - 1.0 ); 00119 return Normal01CDF( pn1 ); } 00120 00121 // 00122 // Is X extremely large compared to P? 00123 // 00124 if( xbig < x ) 00125 return 1; 00126 00127 // 00128 // Use Pearson's series expansion. 00129 // (P is not large enough to force overflow in the log of Gamma. 00130 // 00131 if( x <= 1.0 || x < p ) { 00132 arg = p * log ( x ) - x - GammaLog( p + 1.0 ); 00133 c = 1.0; 00134 value = 1.0; 00135 a = p; 00136 00137 for( ; ; ) { 00138 a = a + 1.0; 00139 c = c * x / a; 00140 value = value + c; 00141 00142 if( c <= tol ) 00143 break; } 00144 arg = arg + log ( value ); 00145 value = ( exp_arg_min <= arg ) ? exp( arg ) : 0; } 00146 else { 00147 // 00148 // Use a continued fraction expansion. 00149 // 00150 arg = p * log ( x ) - x - GammaLog( p ); 00151 a = 1.0 - p; 00152 b = a + x + 1.0; 00153 c = 0.0; 00154 pn1 = 1.0; 00155 pn2 = x; 00156 pn3 = x + 1.0; 00157 pn4 = x * b; 00158 value = pn3 / pn4; 00159 00160 for( ; ; ) { 00161 a = a + 1.0; 00162 b = b + 2.0; 00163 c = c + 1.0; 00164 pn5 = b * pn3 - a * c * pn1; 00165 pn6 = b * pn4 - a * c * pn2; 00166 00167 if( 0 < fabs( pn6 ) ) { 00168 rn = pn5 / pn6; 00169 if( fabs( value - rn ) <= min(tol, tol * rn) ) { 00170 arg = arg + log( value ); 00171 return ( ( exp_arg_min <= arg ) ? ( 1 - exp( arg ) ) : 1 ); } 00172 00173 value = rn; } 00174 pn1 = pn3; 00175 pn2 = pn4; 00176 pn3 = pn5; 00177 pn4 = pn6; 00178 // 00179 // Rescale terms in continued fraction if terms are large. 00180 // 00181 if( overflow <= fabs( pn5 ) ) { 00182 pn1 = pn1 / overflow; 00183 pn2 = pn2 / overflow; 00184 pn3 = pn3 / overflow; 00185 pn4 = pn4 / overflow; } } } 00186 00187 return value; } 00188 00189 double CStatisticsImpl::Normal01CDF( double x ) { 00190 double a1 = 0.398942280444E+00; 00191 double a2 = 0.399903438504E+00; 00192 double a3 = 5.75885480458E+00; 00193 double a4 = 29.8213557808E+00; 00194 double a5 = 2.62433121679E+00; 00195 double a6 = 48.6959930692E+00; 00196 double a7 = 5.92885724438E+00; 00197 double b0 = 0.398942280385E+00; 00198 double b1 = 3.8052E-08; 00199 double b2 = 1.00000615302E+00; 00200 double b3 = 3.98064794E-04; 00201 double b4 = 1.98615381364E+00; 00202 double b5 = 0.151679116635E+00; 00203 double b6 = 5.29330324926E+00; 00204 double b7 = 4.8385912808E+00; 00205 double b8 = 15.1508972451E+00; 00206 double b9 = 0.742380924027E+00; 00207 double b10 = 30.789933034E+00; 00208 double b11 = 3.99019417011E+00; 00209 double cdf; 00210 double q; 00211 double y; 00212 00213 // 00214 // |X| <= 1.28. 00215 // 00216 if( fabs( x ) <= 1.28 ) { 00217 y = 0.5 * x * x; 00218 q = 0.5 - fabs( x ) * ( a1 - a2 * y / ( y + a3 - a4 / ( y + a5 + a6 / ( y + a7 ) ) ) ); } 00219 // 00220 // 1.28 < |X| <= 12.7 00221 // 00222 else if( fabs( x ) <= 12.7 ) { 00223 y = 0.5 * x * x; 00224 q = exp ( - y ) * b0 / ( fabs ( x ) - b1 + 00225 b2 / ( fabs ( x ) + b3 + 00226 b4 / ( fabs ( x ) - b5 + 00227 b6 / ( fabs ( x ) + b7 - 00228 b8 / ( fabs ( x ) + b9 + 00229 b10 / ( fabs ( x ) + b11 ) ) ) ) ) ); } 00230 // 00231 // 12.7 < |X| 00232 // 00233 else 00234 q = 0; 00235 00236 // 00237 // Take account of negative X. 00238 // 00239 cdf = ( x < 0 ) ? q : ( 1 - q ); 00240 00241 return cdf; } 00242 00243 double CStatisticsImpl::GammaLog( double x ) { 00244 double c[ 7 ] = { 00245 -1.910444077728E-03, 00246 8.4171387781295E-04, 00247 -5.952379913043012E-04, 00248 7.93650793500350248E-04, 00249 -2.777777777777681622553E-03, 00250 8.333333333333333331554247E-02, 00251 5.7083835261E-03 }; 00252 double corr; 00253 double d1 = -5.772156649015328605195174E-01; 00254 double d2 = 4.227843350984671393993777E-01; 00255 double d4 = 1.791759469228055000094023E+00; 00256 double frtbig = 1.42E+09; 00257 int i; 00258 double p1[ 8 ] = { 00259 4.945235359296727046734888E+00, 00260 2.018112620856775083915565E+02, 00261 2.290838373831346393026739E+03, 00262 1.131967205903380828685045E+04, 00263 2.855724635671635335736389E+04, 00264 3.848496228443793359990269E+04, 00265 2.637748787624195437963534E+04, 00266 7.225813979700288197698961E+03 }; 00267 double p2[ 8 ] = { 00268 4.974607845568932035012064E+00, 00269 5.424138599891070494101986E+02, 00270 1.550693864978364947665077E+04, 00271 1.847932904445632425417223E+05, 00272 1.088204769468828767498470E+06, 00273 3.338152967987029735917223E+06, 00274 5.106661678927352456275255E+06, 00275 3.074109054850539556250927E+06 }; 00276 double p4[ 8 ] = { 00277 1.474502166059939948905062E+04, 00278 2.426813369486704502836312E+06, 00279 1.214755574045093227939592E+08, 00280 2.663432449630976949898078E+09, 00281 2.940378956634553899906876E+010, 00282 1.702665737765398868392998E+011, 00283 4.926125793377430887588120E+011, 00284 5.606251856223951465078242E+011 }; 00285 double pnt68 = 0.6796875E+00; 00286 double q1[ 8 ] = { 00287 6.748212550303777196073036E+01, 00288 1.113332393857199323513008E+03, 00289 7.738757056935398733233834E+03, 00290 2.763987074403340708898585E+04, 00291 5.499310206226157329794414E+04, 00292 6.161122180066002127833352E+04, 00293 3.635127591501940507276287E+04, 00294 8.785536302431013170870835E+03 }; 00295 double q2[ 8 ] = { 00296 1.830328399370592604055942E+02, 00297 7.765049321445005871323047E+03, 00298 1.331903827966074194402448E+05, 00299 1.136705821321969608938755E+06, 00300 5.267964117437946917577538E+06, 00301 1.346701454311101692290052E+07, 00302 1.782736530353274213975932E+07, 00303 9.533095591844353613395747E+06 }; 00304 double q4[ 8 ] = { 00305 2.690530175870899333379843E+03, 00306 6.393885654300092398984238E+05, 00307 4.135599930241388052042842E+07, 00308 1.120872109616147941376570E+09, 00309 1.488613728678813811542398E+010, 00310 1.016803586272438228077304E+011, 00311 3.417476345507377132798597E+011, 00312 4.463158187419713286462081E+011 }; 00313 double res; 00314 double sqrtpi = 0.9189385332046727417803297E+00; 00315 double xbig = 4.08E+36; 00316 double xden; 00317 double xm1; 00318 double xm2; 00319 double xm4; 00320 double xnum; 00321 double xsq; 00322 00323 // 00324 // Return immediately if the argument is out of range. 00325 // 00326 if( x <= 0 || xbig < x ) 00327 return HUGE_VAL; 00328 00329 if( x <= EpsilonDouble( ) ) 00330 res = -log( x ); 00331 else if( x <= 1.5 ) { 00332 if ( x < pnt68 ) { 00333 corr = -log( x ); 00334 xm1 = x; } 00335 else { 00336 corr = 0; 00337 xm1 = ( x - 0.5 ) - 0.5; } 00338 00339 if ( x <= 0.5 || pnt68 <= x ) { 00340 xden = 1.0; 00341 xnum = 0.0; 00342 00343 for( i = 0; i < 8; i++ ) { 00344 xnum = xnum * xm1 + p1[i]; 00345 xden = xden * xm1 + q1[i]; } 00346 res = corr + ( xm1 * ( d1 + xm1 * ( xnum / xden ) ) ); } 00347 else { 00348 xm2 = ( x - 0.5 ) - 0.5; 00349 xden = 1.0; 00350 xnum = 0.0; 00351 for( i = 0; i < 8; i++ ) { 00352 xnum = xnum * xm2 + p2[i]; 00353 xden = xden * xm2 + q2[i]; } 00354 res = corr + xm2 * ( d2 + xm2 * ( xnum / xden ) ); } } 00355 else if( x <= 4.0 ) { 00356 xm2 = x - 2.0; 00357 xden = 1.0; 00358 xnum = 0.0; 00359 for( i = 0; i < 8; i++ ) { 00360 xnum = xnum * xm2 + p2[i]; 00361 xden = xden * xm2 + q2[i]; } 00362 00363 res = xm2 * ( d2 + xm2 * ( xnum / xden ) ); } 00364 else if( x <= 12.0 ) { 00365 xm4 = x - 4.0; 00366 xden = - 1.0; 00367 xnum = 0.0; 00368 for( i = 0; i < 8; i++ ) { 00369 xnum = xnum * xm4 + p4[i]; 00370 xden = xden * xm4 + q4[i]; } 00371 res = d4 + xm4 * ( xnum / xden ); } 00372 else { 00373 res = 0.0; 00374 00375 if( x <= frtbig ) { 00376 res = c[6]; 00377 xsq = x * x; 00378 00379 for( i = 0; i < 6; i++ ) 00380 res = res / xsq + c[i]; } 00381 00382 res = res / x; 00383 corr = log ( x ); 00384 res = res + sqrtpi - 0.5 * corr; 00385 res = res + x * ( corr - 1.0 ); } 00386 00387 return res; } 00388 00403 double CStatistics::SampleGammaLogStandard( double dXX ) { 00404 static const double c_adCof[] = { 76.18009172947146, -86.50532032941677, 24.01409824083091, 00405 -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5 }; 00406 double dX, dY, dTmp, dSer; 00407 size_t j; 00408 00409 dX = dY = dXX; 00410 dTmp = dX + 5.5; 00411 dTmp -= ( dX + 0.5 ) * log( dTmp ); 00412 dSer = 1.000000000190015; 00413 for( j = 0; j <= 5; ++j ) 00414 dSer += c_adCof[ j ] / ++dY; 00415 00416 return ( -dTmp + log( 2.5066282746310005 * dSer / dX ) ); } 00417 00432 double CStatistics::SampleGammaStandard( double dShape ) { 00433 static double q1 = 4.166669E-2; 00434 static double q2 = 2.083148E-2; 00435 static double q3 = 8.01191E-3; 00436 static double q4 = 1.44121E-3; 00437 static double q5 = -7.388E-5; 00438 static double q6 = 2.4511E-4; 00439 static double q7 = 2.424E-4; 00440 static double a1 = 0.3333333; 00441 static double a2 = -0.250003; 00442 static double a3 = 0.2000062; 00443 static double a4 = -0.1662921; 00444 static double a5 = 0.1423657; 00445 static double a6 = -0.1367177; 00446 static double a7 = 0.1233795; 00447 static double e1 = 1.0; 00448 static double e2 = 0.4999897; 00449 static double e3 = 0.166829; 00450 static double e4 = 4.07753E-2; 00451 static double e5 = 1.0293E-2; 00452 static double aa = 0.0; 00453 static double aaa = 0.0; 00454 static double sqrt32 = 5.656854; 00455 static double sgamma,s2,s,d,t,x,u,r,q0,b,si,c,v,q,e,w,p; 00456 double a = dShape; 00457 if(a == aa) goto S10; 00458 if(a < 1.0) goto S120; 00459 /* 00460 STEP 1: RECALCULATIONS OF S2,S,D IF A HAS CHANGED 00461 */ 00462 aa = a; 00463 s2 = a-0.5; 00464 s = sqrt(s2); 00465 d = sqrt32-12.0*s; 00466 S10: 00467 /* 00468 STEP 2: T=STANDARD NORMAL DEVIATE, 00469 X=(S,1/2)-NORMAL DEVIATE. 00470 IMMEDIATE ACCEPTANCE (I) 00471 */ 00472 t = SampleNormalStandard(); 00473 x = s+0.5*t; 00474 sgamma = x*x; 00475 if(t >= 0.0) return sgamma; 00476 /* 00477 STEP 3: U= 0,1 -UNIFORM SAMPLE. SQUEEZE ACCEPTANCE (S) 00478 */ 00479 u = ranf(); 00480 if(d*u <= t*t*t) return sgamma; 00481 /* 00482 STEP 4: RECALCULATIONS OF Q0,B,SI,C IF NECESSARY 00483 */ 00484 if(a == aaa) goto S40; 00485 aaa = a; 00486 r = 1.0/ a; 00487 q0 = ((((((q7*r+q6)*r+q5)*r+q4)*r+q3)*r+q2)*r+q1)*r; 00488 /* 00489 APPROXIMATION DEPENDING ON SIZE OF PARAMETER A 00490 THE CONSTANTS IN THE EXPRESSIONS FOR B, SI AND 00491 C WERE ESTABLISHED BY NUMERICAL EXPERIMENTS 00492 */ 00493 if(a <= 3.686) goto S30; 00494 if(a <= 13.022) goto S20; 00495 /* 00496 CASE 3: A .GT. 13.022 00497 */ 00498 b = 1.77; 00499 si = 0.75; 00500 c = 0.1515/s; 00501 goto S40; 00502 S20: 00503 /* 00504 CASE 2: 3.686 .LT. A .LE. 13.022 00505 */ 00506 b = 1.654+7.6E-3*s2; 00507 si = 1.68/s+0.275; 00508 c = 6.2E-2/s+2.4E-2; 00509 goto S40; 00510 S30: 00511 /* 00512 CASE 1: A .LE. 3.686 00513 */ 00514 b = 0.463+s+0.178*s2; 00515 si = 1.235; 00516 c = 0.195/s-7.9E-2+1.6E-1*s; 00517 S40: 00518 /* 00519 STEP 5: NO QUOTIENT TEST IF X NOT POSITIVE 00520 */ 00521 if(x <= 0.0) goto S70; 00522 /* 00523 STEP 6: CALCULATION OF V AND QUOTIENT Q 00524 */ 00525 v = t/(s+s); 00526 if(fabs(v) <= 0.25) goto S50; 00527 q = q0-s*t+0.25*t*t+(s2+s2)*log(1.0+v); 00528 goto S60; 00529 S50: 00530 q = q0+0.5*t*t*((((((a7*v+a6)*v+a5)*v+a4)*v+a3)*v+a2)*v+a1)*v; 00531 S60: 00532 /* 00533 STEP 7: QUOTIENT ACCEPTANCE (Q) 00534 */ 00535 if(log(1.0-u) <= q) return sgamma; 00536 S70: 00537 /* 00538 STEP 8: E=STANDARD EXPONENTIAL DEVIATE 00539 U= 0,1 -UNIFORM DEVIATE 00540 T=(B,SI)-DOUBLE EXPONENTIAL (LAPLACE) SAMPLE 00541 */ 00542 e = SampleExponentialStandard(); 00543 u = ranf(); 00544 u += (u-1.0); 00545 t = b+fsign(si*e,u); 00546 /* 00547 STEP 9: REJECTION IF T .LT. TAU(1) = -.71874483771719 00548 */ 00549 if(t < -0.7187449) goto S70; 00550 /* 00551 STEP 10: CALCULATION OF V AND QUOTIENT Q 00552 */ 00553 v = t/(s+s); 00554 if(fabs(v) <= 0.25) goto S80; 00555 q = q0-s*t+0.25*t*t+(s2+s2)*log(1.0+v); 00556 goto S90; 00557 S80: 00558 q = q0+0.5*t*t*((((((a7*v+a6)*v+a5)*v+a4)*v+a3)*v+a2)*v+a1)*v; 00559 S90: 00560 /* 00561 STEP 11: HAT ACCEPTANCE (H) (IF Q NOT POSITIVE GO TO STEP 8) 00562 */ 00563 if(q <= 0.0) goto S70; 00564 if(q <= 0.5) goto S100; 00565 w = exp(q)-1.0; 00566 goto S110; 00567 S100: 00568 w = ((((e5*q+e4)*q+e3)*q+e2)*q+e1)*q; 00569 S110: 00570 /* 00571 IF T IS REJECTED, SAMPLE AGAIN AT STEP 8 00572 */ 00573 if(c*fabs(u) > w*exp(e-0.5*t*t)) goto S70; 00574 x = s+0.5*t; 00575 sgamma = x*x; 00576 return sgamma; 00577 S120: 00578 /* 00579 ALTERNATE METHOD FOR PARAMETERS A BELOW 1 (.3678794=EXP(-1.)) 00580 */ 00581 aa = 0.0; 00582 b = 1.0+0.3678794*a; 00583 S130: 00584 p = b*ranf(); 00585 if(p >= 1.0) goto S140; 00586 sgamma = exp(log(p)/ a); 00587 if(SampleExponentialStandard() < sgamma) goto S130; 00588 return sgamma; 00589 S140: 00590 sgamma = -log((b-p)/ a); 00591 if(SampleExponentialStandard() < (1.0-a)*log(sgamma)) goto S130; 00592 return sgamma; 00593 } 00594 00606 double CStatistics::SampleNormalStandard( ) { 00607 double x1, x2, w; 00608 00609 do { 00610 x1 = 2.0 * ranf() - 1.0; 00611 x2 = 2.0 * ranf() - 1.0; 00612 w = x1 * x1 + x2 * x2; 00613 } while ( w >= 1.0 ); 00614 00615 w = sqrt( (-2.0 * log( w ) ) / w ); 00616 return x1 * w; } 00617 00618 /* 00619 static double a[32] = { 00620 0.0,3.917609E-2,7.841241E-2,0.11777,0.1573107,0.1970991,0.2372021,0.2776904, 00621 0.3186394,0.36013,0.4022501,0.4450965,0.4887764,0.5334097,0.5791322, 00622 0.626099,0.6744898,0.7245144,0.7764218,0.8305109,0.8871466,0.9467818, 00623 1.00999,1.077516,1.150349,1.229859,1.318011,1.417797,1.534121,1.67594, 00624 1.862732,2.153875 00625 }; 00626 static double d[31] = { 00627 0.0,0.0,0.0,0.0,0.0,0.2636843,0.2425085,0.2255674,0.2116342,0.1999243, 00628 0.1899108,0.1812252,0.1736014,0.1668419,0.1607967,0.1553497,0.1504094, 00629 0.1459026,0.14177,0.1379632,0.1344418,0.1311722,0.128126,0.1252791, 00630 0.1226109,0.1201036,0.1177417,0.1155119,0.1134023,0.1114027,0.1095039 00631 }; 00632 static double t[31] = { 00633 7.673828E-4,2.30687E-3,3.860618E-3,5.438454E-3,7.0507E-3,8.708396E-3, 00634 1.042357E-2,1.220953E-2,1.408125E-2,1.605579E-2,1.81529E-2,2.039573E-2, 00635 2.281177E-2,2.543407E-2,2.830296E-2,3.146822E-2,3.499233E-2,3.895483E-2, 00636 4.345878E-2,4.864035E-2,5.468334E-2,6.184222E-2,7.047983E-2,8.113195E-2, 00637 9.462444E-2,0.1123001,0.136498,0.1716886,0.2276241,0.330498,0.5847031 00638 }; 00639 static double h[31] = { 00640 3.920617E-2,3.932705E-2,3.951E-2,3.975703E-2,4.007093E-2,4.045533E-2, 00641 4.091481E-2,4.145507E-2,4.208311E-2,4.280748E-2,4.363863E-2,4.458932E-2, 00642 4.567523E-2,4.691571E-2,4.833487E-2,4.996298E-2,5.183859E-2,5.401138E-2, 00643 5.654656E-2,5.95313E-2,6.308489E-2,6.737503E-2,7.264544E-2,7.926471E-2, 00644 8.781922E-2,9.930398E-2,0.11556,0.1404344,0.1836142,0.2790016,0.7010474 00645 }; 00646 static long i; 00647 static double snorm,u,s,ustar,aa,w,y,tt; 00648 u = ranf(); 00649 s = 0.0; 00650 if(u > 0.5) s = 1.0; 00651 u += (u-s); 00652 u = 32.0*u; 00653 i = (long) (u); 00654 if(i == 32) i = 31; 00655 if(i == 0) goto S100; 00656 // START CENTER 00657 ustar = u-(double)i; 00658 aa = *(a+i-1); 00659 S40: 00660 if(ustar <= *(t+i-1)) goto S60; 00661 w = (ustar-*(t+i-1))**(h+i-1); 00662 S50: 00663 // EXIT (BOTH CASES) 00664 y = aa+w; 00665 snorm = y; 00666 if(s == 1.0) snorm = -y; 00667 return snorm; 00668 S60: 00669 // CENTER CONTINUED 00670 u = ranf(); 00671 w = u*(*(a+i)-aa); 00672 tt = (0.5*w+aa)*w; 00673 goto S80; 00674 S70: 00675 tt = u; 00676 ustar = ranf(); 00677 S80: 00678 if(ustar > tt) goto S50; 00679 u = ranf(); 00680 if(ustar >= u) goto S70; 00681 ustar = ranf(); 00682 goto S40; 00683 S100: 00684 // START TAIL 00685 i = 6; 00686 aa = *(a+31); 00687 goto S120; 00688 S110: 00689 aa += *(d+i-1); 00690 i += 1; 00691 S120: 00692 u += u; 00693 if(u < 1.0) goto S110; 00694 u -= 1.0; 00695 S140: 00696 w = u**(d+i-1); 00697 tt = (0.5*w+aa)*w; 00698 goto S160; 00699 S150: 00700 tt = u; 00701 S160: 00702 ustar = ranf(); 00703 if(ustar > tt) goto S50; 00704 u = ranf(); 00705 if(ustar >= u) goto S150; 00706 u = ranf(); 00707 goto S140; 00708 } 00709 */ 00710 00722 double CStatistics::SampleExponentialStandard( ) { 00723 static double q[8] = { 00724 0.6931472,0.9333737,0.9888778,0.9984959,0.9998293,0.9999833,0.9999986,1.0 00725 }; 00726 static long i; 00727 static double sexpo,a,u,ustar,umin; 00728 static double *q1 = q; 00729 a = 0.0; 00730 u = ranf(); 00731 goto S30; 00732 S20: 00733 a += *q1; 00734 S30: 00735 u += u; 00736 if(u <= 1.0) goto S20; 00737 u -= 1.0; 00738 if(u > *q1) goto S60; 00739 sexpo = a+u; 00740 return sexpo; 00741 S60: 00742 i = 1; 00743 ustar = ranf(); 00744 umin = ustar; 00745 S70: 00746 ustar = ranf(); 00747 if(ustar < umin) umin = ustar; 00748 i += 1; 00749 if(u > *(q+i-1)) goto S70; 00750 sexpo = a+umin**q1; 00751 return sexpo; 00752 } 00753 00754 double CStatisticsImpl::IncompleteBeta( double dA, double dB, double dX ) { 00755 double dBT; 00756 00757 if( ( dX < 0 ) || ( dX > 1 ) ) 00758 return -1; 00759 dBT = ( ( dX == 0 ) || ( dX == 1 ) ) ? 0 : 00760 exp( GammaLog( dA + dB ) - GammaLog( dA ) - GammaLog( dB ) + ( dA * log( dX ) ) + 00761 ( dB * log( 1 - dX ) ) ); 00762 00763 return ( ( dX < ( ( dA + 1 ) / ( dA + dB + 2 ) ) ) ? 00764 ( dBT * IncompleteBetaCF( dA, dB, dX ) / dA ) : 00765 ( 1 - ( dBT * IncompleteBetaCF( dB, dA, 1 - dX ) / dB ) ) ); } 00766 00767 double CStatisticsImpl::IncompleteBetaCF(double a, double b, double x) 00768 { 00769 static const double c_dFPMin = 1e-30; 00770 static const double c_iMaxIt = 100; 00771 static const double c_dEPS = 3e-7; 00772 int m,m2; 00773 double aa,c,d,del,h,qab,qam,qap; 00774 qab=a+b; 00775 qap=a+1.0; 00776 qam=a-1.0; 00777 c=1.0; 00778 d=1.0-qab*x/qap; 00779 if (fabs(d) < c_dFPMin) d=c_dFPMin; 00780 d=1.0/d; 00781 h=d; 00782 for (m=1;m<=c_iMaxIt;m++) { 00783 m2=2*m; 00784 aa=m*(b-m)*x/((qam+m2)*(a+m2)); 00785 d=1.0+aa*d; 00786 if (fabs(d) < c_dFPMin) d=c_dFPMin; 00787 c=1.0+aa/c; 00788 if (fabs(c) < c_dFPMin) c=c_dFPMin; 00789 d=1.0/d; 00790 h *= d*c; 00791 aa = -(a+m)*(qab+m)*x/((a+m2)*(qap+m2)); 00792 d=1.0+aa*d; 00793 if (fabs(d) < c_dFPMin) d=c_dFPMin; 00794 c=1.0+aa/c; 00795 if (fabs(c) < c_dFPMin) c=c_dFPMin; 00796 d=1.0/d; 00797 del=d*c; 00798 h *= del; 00799 if (fabs(del-1.0) < c_dEPS) break; 00800 } 00801 if (m > c_iMaxIt) return -1; 00802 return h; 00803 } 00804 00824 double CStatistics::HypergeometricCDF( size_t iBoth, size_t iNonZeroInOne, size_t iNonZeroInTwo, 00825 size_t iN ) { 00826 size_t i, iHits; 00827 double dRet; 00828 00829 dRet = 0; 00830 iHits = ( iNonZeroInTwo < iNonZeroInOne ) ? iNonZeroInTwo : iNonZeroInOne; 00831 for( i = iBoth; i <= iHits; ++i ) 00832 dRet += HypergeometricPDF( i, iNonZeroInOne, iNonZeroInTwo, iN ); 00833 00834 return ( ( dRet > 1 ) ? 1 : dRet ); } 00835 00855 double CStatistics::TwoSidedHypergeometricCDF( size_t iNonZeroInCommon, size_t iNonZeroInOne, size_t iNonZeroInTwo, 00856 size_t iTotalNumValues ) { 00857 size_t i, iHits; 00858 double dRet; 00859 00860 int a = iTotalNumValues - iNonZeroInTwo + iNonZeroInCommon - iNonZeroInOne ; 00861 int b = iNonZeroInOne - iNonZeroInCommon; 00862 int c = iNonZeroInTwo - iNonZeroInCommon; 00863 int d = iNonZeroInCommon; 00864 00865 dRet = 0; 00866 00867 if (a*d - b*c < 0) { 00868 while (a >= 0 && d >= 0) { 00869 dRet += HypergeometricPDF( d, b+d, c+d, a+b+c+d ); 00870 a--;d--;c++;b++; 00871 cout << dRet << endl; 00872 } 00873 } 00874 else { 00875 while (c >= 0 && b >= 0) { 00876 dRet += HypergeometricPDF( d, b+d, c+d, a+b+c+d ); 00877 a++;d++;c--;b--; 00878 } 00879 } 00880 dRet *= 2; 00881 return ( ( dRet > 1 ) ? 1 : dRet ); } 00882 00883 00910 double CStatistics::FScore( size_t iTruePositives, size_t iFalsePositives, size_t iTrueNegatives, 00911 size_t iFalseNegatives, double dBeta ) { 00912 double dP, dR; 00913 00914 dP = Precision( iTruePositives, iFalsePositives, iTrueNegatives, iFalseNegatives ); 00915 dR = Recall( iTruePositives, iFalsePositives, iTrueNegatives, iFalseNegatives ); 00916 dBeta *= dBeta; 00917 00918 return ( ( dBeta + 1 ) * dP * dR / ( ( dBeta * dP ) + dR ) ); } 00919 00920 template<class tType> 00921 struct SCompareRank { 00922 const vector<tType>& m_vecData; 00923 00924 SCompareRank( const vector<tType>& vecData ) : m_vecData(vecData) { } 00925 00926 bool operator()( size_t iOne, size_t iTwo ) const { 00927 00928 return ( m_vecData[ iOne ] < m_vecData[ iTwo ] ); } 00929 }; 00930 00975 double CStatistics::WilcoxonRankSum( const CDat& DatData, const CDat& DatAnswers, 00976 const vector<bool>& vecfGenesOfInterest, bool fInvert ) { 00977 size_t i, j, k, iOne, iTwo, iNeg; 00978 uint64_t iPos; 00979 float d, dAnswer; 00980 double dSum; 00981 std::vector<size_t> veciGenes; 00982 std::vector<float> vecdValues, vecdRanks; 00983 bool fAnswer; 00984 00985 veciGenes.resize( DatAnswers.GetGenes( ) ); 00986 for( i = 0; i < veciGenes.size( ); ++i ) 00987 veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) ); 00988 00989 for( i = 0; i < DatAnswers.GetGenes( ); ++i ) { 00990 if( ( iOne = veciGenes[ i ] ) == -1 ) 00991 continue; 00992 for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) { 00993 if( ( ( iTwo = veciGenes[ j ] ) == -1 ) || 00994 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) || 00995 CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) ) ) 00996 continue; 00997 fAnswer = dAnswer > 0; 00998 if( !( vecfGenesOfInterest.empty( ) || 00999 ( fAnswer && vecfGenesOfInterest[ i ] && vecfGenesOfInterest[ j ] ) || 01000 ( !fAnswer && ( vecfGenesOfInterest[ i ] || vecfGenesOfInterest[ j ] ) ) ) ) 01001 continue; 01002 if( fInvert ) 01003 d = 1 - d; 01004 vecdValues.push_back( d ); } } 01005 { 01006 std::vector<size_t> veciIndices; 01007 size_t iIndex, iCount; 01008 01009 veciIndices.resize( vecdValues.size( ) ); 01010 for( i = 0; i < vecdValues.size( ); ++i ) 01011 veciIndices[ i ] = i; 01012 std::sort( veciIndices.begin( ), veciIndices.end( ), SCompareRank<float>( vecdValues ) ); 01013 vecdRanks.resize( veciIndices.size( ) ); 01014 for( i = 0; i < vecdRanks.size( ); ++i ) { 01015 iIndex = veciIndices[ i ]; 01016 if( !i || ( vecdValues[ iIndex ] != vecdValues[ veciIndices[ i - 1 ] ] ) ) { 01017 for( iCount = 0,j = i; j < veciIndices.size( ); ++j ) { 01018 if( vecdValues[ veciIndices[ j ] ] != vecdValues[ iIndex ] ) 01019 break; 01020 iCount++; } 01021 d = i + ( iCount - 1 ) / 2.0f; } 01022 vecdRanks[ iIndex ] = d; } 01023 } 01024 01025 for( dSum = 0,iPos = iNeg = i = k = 0; i < DatAnswers.GetGenes( ); ++i ) { 01026 if( ( iOne = veciGenes[ i ] ) == -1 ) 01027 continue; 01028 for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) { 01029 if( ( ( iTwo = veciGenes[ j ] ) == -1 ) || 01030 CMeta::IsNaN( DatData.Get( iOne, iTwo ) ) || 01031 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) || 01032 !( vecfGenesOfInterest.empty( ) || 01033 ( ( dAnswer > 0 ) && vecfGenesOfInterest[ i ] && vecfGenesOfInterest[ j ] ) || 01034 ( ( dAnswer <= 0 ) && ( vecfGenesOfInterest[ i ] || vecfGenesOfInterest[ j ] ) ) ) ) 01035 continue; 01036 if( dAnswer > 0 ) { 01037 iPos++; 01038 dSum += vecdRanks[ k ]; } 01039 else 01040 iNeg++; 01041 k++; } } 01042 dSum -= ( iPos * ( iPos - 1 ) ) / 2; 01043 01044 return ( dSum / iPos / iNeg ); } 01045 01046 double CStatistics::WilcoxonRankSum( const CPCL& DatData, const CPCL& DatAnswers, 01047 const vector<bool>& vecfHere, const vector<bool>& vecfUbik, 01048 bool fPosIn, bool fNegIn, bool fPosBridge, bool fNegBridge, 01049 bool fPosOut, bool fNegOut, bool fInvert ) { 01050 size_t i, j, k, iOne, iTwo, iNeg; 01051 uint64_t iPos; 01052 float d, dAnswer; 01053 double dSum; 01054 std::vector<size_t> veciGenes; 01055 std::vector<float> vecdValues, vecdRanks; 01056 bool fAnswer; 01057 01058 veciGenes.resize( DatAnswers.GetGenes( ) ); 01059 for( i = 0; i < veciGenes.size( ); ++i ) 01060 veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) ); 01061 01062 for( i = 0; i < DatAnswers.GetGenes( ); ++i ) { 01063 if( ( iOne = veciGenes[ i ] ) == -1 ) 01064 continue; 01065 for( j = 0; j < DatAnswers.GetGenes( ); ++j ) { 01066 if( ( ( iTwo = veciGenes[ j ] ) == -1 ) || 01067 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) || 01068 CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) ) ) 01069 continue; 01070 fAnswer = dAnswer > 0; 01071 if ( CMeta::SkipEdge( fAnswer, i, j, vecfHere, vecfUbik, fPosIn, fNegIn, fPosBridge, fNegBridge, fPosOut, fNegOut ) ) { 01072 continue; 01073 } 01074 01075 if( fInvert ) 01076 d = 1 - d; 01077 vecdValues.push_back( d ); } } 01078 { 01079 std::vector<size_t> veciIndices; 01080 size_t iIndex, iCount; 01081 01082 veciIndices.resize( vecdValues.size( ) ); 01083 for( i = 0; i < vecdValues.size( ); ++i ) 01084 veciIndices[ i ] = i; 01085 std::sort( veciIndices.begin( ), veciIndices.end( ), SCompareRank<float>( vecdValues ) ); 01086 vecdRanks.resize( veciIndices.size( ) ); 01087 for( i = 0; i < vecdRanks.size( ); ++i ) { 01088 iIndex = veciIndices[ i ]; 01089 if( !i || ( vecdValues[ iIndex ] != vecdValues[ veciIndices[ i - 1 ] ] ) ) { 01090 for( iCount = 0,j = i; j < veciIndices.size( ); ++j ) { 01091 if( vecdValues[ veciIndices[ j ] ] != vecdValues[ iIndex ] ) 01092 break; 01093 iCount++; } 01094 d = i + ( iCount - 1 ) / 2.0f; } 01095 vecdRanks[ iIndex ] = d; } 01096 } 01097 01098 for( dSum = 0,iPos = iNeg = i = k = 0; i < DatAnswers.GetGenes( ); ++i ) { 01099 if( ( iOne = veciGenes[ i ] ) == -1 ) 01100 continue; 01101 for( j = 0; j < DatAnswers.GetGenes( ); ++j ) { 01102 if( ( ( iTwo = veciGenes[ j ] ) == -1 ) || 01103 CMeta::IsNaN( DatData.Get( iOne, iTwo ) ) || 01104 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) ) 01105 continue; 01106 fAnswer = dAnswer > 0; 01107 01108 if ( CMeta::SkipEdge( fAnswer, i, j, vecfHere, vecfUbik, fPosIn, fNegIn, fPosBridge, fNegBridge, fPosOut, fNegOut ) ) { 01109 continue; 01110 } 01111 01112 if( dAnswer > 0 ) { 01113 iPos++; 01114 dSum += vecdRanks[ k ]; } 01115 else 01116 iNeg++; 01117 k++; } } 01118 dSum -= ( iPos * ( iPos - 1 ) ) / 2; 01119 01120 return ( dSum / iPos / iNeg ); } 01121 01187 double CStatistics::WilcoxonRankSum( const CDat& DatData, const CDat& DatAnswers, 01188 const vector<bool>& vecfHere, const vector<bool>& vecfUbik, 01189 bool fPosIn, bool fNegIn, bool fPosBridge, bool fNegBridge, 01190 bool fPosOut, bool fNegOut, bool fInvert ) { 01191 size_t i, j, k, iOne, iTwo, iNeg; 01192 uint64_t iPos; 01193 float d, dAnswer; 01194 double dSum; 01195 std::vector<size_t> veciGenes; 01196 std::vector<float> vecdValues, vecdRanks; 01197 bool fAnswer; 01198 01199 veciGenes.resize( DatAnswers.GetGenes( ) ); 01200 for( i = 0; i < veciGenes.size( ); ++i ) 01201 veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) ); 01202 01203 for( i = 0; i < DatAnswers.GetGenes( ); ++i ) { 01204 if( ( iOne = veciGenes[ i ] ) == -1 ) 01205 continue; 01206 for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) { 01207 if( ( ( iTwo = veciGenes[ j ] ) == -1 ) || 01208 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) || 01209 CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) ) ) 01210 continue; 01211 fAnswer = dAnswer > 0; 01212 if ( CMeta::SkipEdge( fAnswer, i, j, vecfHere, vecfUbik, fPosIn, fNegIn, fPosBridge, fNegBridge, fPosOut, fNegOut ) ) { 01213 continue; 01214 } 01215 01216 if( fInvert ) 01217 d = 1 - d; 01218 vecdValues.push_back( d ); } } 01219 { 01220 std::vector<size_t> veciIndices; 01221 size_t iIndex, iCount; 01222 01223 veciIndices.resize( vecdValues.size( ) ); 01224 for( i = 0; i < vecdValues.size( ); ++i ) 01225 veciIndices[ i ] = i; 01226 std::sort( veciIndices.begin( ), veciIndices.end( ), SCompareRank<float>( vecdValues ) ); 01227 vecdRanks.resize( veciIndices.size( ) ); 01228 for( i = 0; i < vecdRanks.size( ); ++i ) { 01229 iIndex = veciIndices[ i ]; 01230 if( !i || ( vecdValues[ iIndex ] != vecdValues[ veciIndices[ i - 1 ] ] ) ) { 01231 for( iCount = 0,j = i; j < veciIndices.size( ); ++j ) { 01232 if( vecdValues[ veciIndices[ j ] ] != vecdValues[ iIndex ] ) 01233 break; 01234 iCount++; } 01235 d = i + ( iCount - 1 ) / 2.0f; } 01236 vecdRanks[ iIndex ] = d; } 01237 } 01238 01239 for( dSum = 0,iPos = iNeg = i = k = 0; i < DatAnswers.GetGenes( ); ++i ) { 01240 if( ( iOne = veciGenes[ i ] ) == -1 ) 01241 continue; 01242 for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) { 01243 if( ( ( iTwo = veciGenes[ j ] ) == -1 ) || 01244 CMeta::IsNaN( DatData.Get( iOne, iTwo ) ) || 01245 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) ) 01246 continue; 01247 fAnswer = dAnswer > 0; 01248 01249 if ( CMeta::SkipEdge( fAnswer, i, j, vecfHere, vecfUbik, fPosIn, fNegIn, fPosBridge, fNegBridge, fPosOut, fNegOut ) ) { 01250 continue; 01251 } 01252 01253 if( dAnswer > 0 ) { 01254 iPos++; 01255 dSum += vecdRanks[ k ]; } 01256 else 01257 iNeg++; 01258 k++; } } 01259 dSum -= ( iPos * ( iPos - 1 ) ) / 2; 01260 01261 return ( dSum / iPos / iNeg ); } 01262 01263 double CStatistics::WilcoxonRankSum( const CDat& DatData, const CDat& DatAnswers, 01264 const vector<float>& vecGeneWeights, bool flipneg) { 01265 size_t i, j, k,m, iOne, iTwo, iNeg; 01266 uint64_t iPos; 01267 float d, dAnswer,w; 01268 double dSum; 01269 std::vector<size_t> veciGenes; 01270 std::vector<float> vecdValues, vecdRanks; 01271 bool fAnswer; 01272 01273 veciGenes.resize( DatAnswers.GetGenes( ) ); 01274 for( i = 0; i < veciGenes.size( ); ++i ) 01275 veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) ); 01276 01277 for( i = 0; i < DatAnswers.GetGenes( ); ++i ) { 01278 if( ( iOne = veciGenes[ i ] ) == -1 ) 01279 continue; 01280 for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) { 01281 if( ( ( iTwo = veciGenes[ j ] ) == -1 ) || 01282 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) || 01283 CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) )|| 01284 CMeta::IsNaN( w = vecGeneWeights[i]*vecGeneWeights[j] )) 01285 continue; 01286 fAnswer = dAnswer > 0; 01287 //write sth here 01288 if(fAnswer || !flipneg) 01289 for( m = 0; m <(vecGeneWeights[i]*vecGeneWeights[j]*WT_MULTIPLIER-0.5); m++){ 01290 vecdValues.push_back( d );} 01291 else 01292 for( m = 0; m <((1-vecGeneWeights[i]*vecGeneWeights[j])*WT_MULTIPLIER-0.5); m++){ 01293 vecdValues.push_back( d );} } } 01294 { 01295 std::vector<size_t> veciIndices; 01296 size_t iIndex, iCount; 01297 01298 veciIndices.resize( vecdValues.size( ) ); 01299 for( i = 0; i < vecdValues.size( ); ++i ) 01300 veciIndices[ i ] = i; 01301 std::sort( veciIndices.begin( ), veciIndices.end( ), SCompareRank<float>( vecdValues ) ); 01302 vecdRanks.resize( veciIndices.size( ) ); 01303 for( i = 0; i < vecdRanks.size( ); ++i ) { 01304 iIndex = veciIndices[ i ]; 01305 if( !i || ( vecdValues[ iIndex ] != vecdValues[ veciIndices[ i - 1 ] ] ) ) { 01306 for( iCount = 0,j = i; j < veciIndices.size( ); ++j ) { 01307 if( vecdValues[ veciIndices[ j ] ] != vecdValues[ iIndex ] ) 01308 break; 01309 iCount++; } 01310 d = i + ( iCount - 1 ) / 2.0f; } 01311 vecdRanks[ iIndex ] = d; } 01312 } 01313 01314 for( dSum = 0,iPos = iNeg = i = k = 0; i < DatAnswers.GetGenes( ); ++i ) { 01315 if( ( iOne = veciGenes[ i ] ) == -1 ) 01316 continue; 01317 for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) { 01318 if( ( ( iTwo = veciGenes[ j ] ) == -1 ) || 01319 CMeta::IsNaN( DatData.Get( iOne, iTwo ) ) || 01320 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) )|| 01321 CMeta::IsNaN( w = vecGeneWeights[i]*vecGeneWeights[j] ) ) 01322 continue; 01323 fAnswer = dAnswer > 0; 01324 01325 if( dAnswer > 0 ) { 01326 for( m = 0; m <(vecGeneWeights[i]*vecGeneWeights[j]*WT_MULTIPLIER-0.5); m++){ 01327 iPos++; 01328 dSum += vecdRanks[ k ]; 01329 k++;}} 01330 else{ 01331 if(flipneg) 01332 for( m = 0; m <((1-vecGeneWeights[i]*vecGeneWeights[j])*WT_MULTIPLIER-0.5); m++){ 01333 iNeg++; 01334 k++;} 01335 else 01336 for( m = 0; m <(vecGeneWeights[i]*vecGeneWeights[j]*WT_MULTIPLIER-0.5); m++){ 01337 iNeg++; 01338 k++;} 01339 } 01340 01341 }} 01342 dSum -= ( iPos * ( iPos - 1 ) ) / 2; 01343 01344 return ( dSum / iPos / iNeg ); } 01345 01346 double CStatistics::WilcoxonRankSum( const CDat& DatData, const CDat& DatAnswers, 01347 const CDat& wDat, bool flipneg) { 01348 size_t i, j, k,m, iOne, iTwo, iNeg; 01349 uint64_t iPos; 01350 float d, dAnswer,w; 01351 double dSum; 01352 std::vector<size_t> veciGenes,vecfiGenes; 01353 std::vector<float> vecdValues, vecdRanks, vecdWeights; 01354 bool fAnswer; 01355 01356 01357 veciGenes.resize( DatAnswers.GetGenes( ) ); 01358 for( i = 0; i < veciGenes.size( ); ++i ) 01359 veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) ); 01360 01361 vecfiGenes.resize( DatAnswers.GetGenes( ) ); 01362 for( i = 0; i < vecfiGenes.size( ); ++i ){ 01363 vecfiGenes[ i ] = wDat.GetGene( DatAnswers.GetGene( i ));} 01364 01365 for( i = 0; i < DatAnswers.GetGenes( ); ++i ) { 01366 if( ( iOne = veciGenes[ i ] ) == -1 || vecfiGenes[ i ] == -1 ) 01367 continue; 01368 for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) { 01369 if( ( ( iTwo = veciGenes[ j ] ) == -1 ||vecfiGenes[ j ] == -1 ) || 01370 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) || 01371 CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) )|| 01372 CMeta::IsNaN( w = wDat.Get(vecfiGenes[i],vecfiGenes[j]) )) 01373 continue; 01374 fAnswer = dAnswer > 0; 01375 01376 if(fAnswer || !flipneg) 01377 for( m = 0; m <(w*WT_MULTIPLIER-0.5); m++){ 01378 vecdValues.push_back(d);} 01379 else 01380 for( m = 0; m <((1-w)*WT_MULTIPLIER-0.5); m++){ 01381 vecdValues.push_back(d);} 01382 01383 } } 01384 01385 { 01386 std::vector<size_t> veciIndices; 01387 size_t iIndex, iCount; 01388 01389 veciIndices.resize( vecdValues.size( ) ); 01390 for( i = 0; i < vecdValues.size( ); ++i ) 01391 veciIndices[ i ] = i; 01392 std::sort( veciIndices.begin( ), veciIndices.end( ), SCompareRank<float>( vecdValues ) ); 01393 vecdRanks.resize( veciIndices.size( ) ); 01394 for( i = 0; i < vecdRanks.size( ); ++i ) { 01395 iIndex = veciIndices[ i ]; 01396 if( !i || ( vecdValues[ iIndex ] != vecdValues[ veciIndices[ i - 1 ] ] ) ) { 01397 for( iCount = 0,j = i; j < veciIndices.size( ); ++j ) { 01398 if( vecdValues[ veciIndices[ j ] ] != vecdValues[ iIndex ] ) 01399 break; 01400 iCount++; } 01401 d = i + ( iCount - 1 ) / 2.0f; } 01402 vecdRanks[ iIndex ] = d; } 01403 } 01404 01405 for( dSum = 0,iPos = iNeg = i = k = 0; i < DatAnswers.GetGenes( ); ++i ) { 01406 if( ( iOne = veciGenes[ i ] ) == -1 || vecfiGenes[ i ] == -1) 01407 continue; 01408 for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) { 01409 if( ( ( iTwo = veciGenes[ j ] ) == -1 || vecfiGenes[ j ] == -1 ) || 01410 CMeta::IsNaN( DatData.Get( iOne, iTwo ) ) || 01411 CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) )|| 01412 CMeta::IsNaN( w = wDat.Get(vecfiGenes[i],vecfiGenes[j]) ) ) 01413 continue; 01414 fAnswer = dAnswer > 0; 01415 if( dAnswer > 0 ) { 01416 for( m = 0; m <(wDat.Get(vecfiGenes[i],vecfiGenes[j])*WT_MULTIPLIER-0.5); m++){ 01417 iPos++; 01418 dSum += vecdRanks[ k ]; 01419 k++;}} 01420 else{ 01421 if(flipneg) 01422 for( m = 0; m <((1-wDat.Get(vecfiGenes[i],vecfiGenes[j]))*WT_MULTIPLIER-0.5); m++){ 01423 iNeg++; 01424 k++;} 01425 else 01426 for( m = 0; m <(wDat.Get(vecfiGenes[i],vecfiGenes[j])*WT_MULTIPLIER-0.5); m++){ 01427 iNeg++; 01428 k++;} 01429 } 01430 }} 01431 01432 dSum -= ( iPos * ( iPos - 1 ) ) / 2; 01433 01434 return ( dSum / iPos / iNeg ); } 01435 01436 double bessi0( double dX ) { 01437 double dAbs, dRet, dY; 01438 01439 if( ( dAbs = fabs( dX ) ) < 3.75 ) { 01440 dY = dX / 3.75; 01441 dY *= dY; 01442 dRet = 1 + dY * ( 3.5156229 + dY * ( 3.0899424 + dY * ( 1.2067492 + dY * ( 0.2659732 + 01443 dY * ( 0.360768e-1 + dY * 0.45813e-2 ) ) ) ) ); } 01444 else { 01445 dY = 3.75 / dAbs; 01446 dRet = ( exp( dAbs ) / sqrt( dAbs ) ) * ( 0.39894228 + dY * ( 0.1328592e-1 + dY * ( 0.225319e-2 + 01447 dY * ( -0.157565e-2 + dY * ( 0.916281e-2 + dY * ( -0.2057706e-1 + dY * ( 0.2635537e-1 + 01448 dY * ( -0.1647633e-1 + dY * 0.392377e-2 ) ) ) ) ) ) ) ); } 01449 01450 return dRet; } 01451 01452 double bessi1( double dX ) { 01453 double dAbs, dRet, dY; 01454 01455 if( ( dAbs = fabs( dX ) ) < 3.75 ) { 01456 dY = dX / 3.75; 01457 dY *= dY; 01458 dRet = dAbs * ( 0.5 + dY * ( 0.87890594 + dY * ( 0.51498869 + dY * ( 0.15084934 + dY * ( 0.2658733e-1 + 01459 dY * ( 0.301532e-2 + dY * 0.32411e-3 ) ) ) ) ) ); } 01460 else { 01461 dY = 3.75 / dAbs; 01462 dRet = 0.2282967e-1 + dY * ( -0.2895312e-1 + dY * ( 0.1787654e-1 - dY * 0.420059e-2 ) ); 01463 dRet = 0.39894228 + dY * ( -0.3988024e-1 + dY * ( -0.362018e-2 + dY * ( 0.163801e-2 + 01464 dY * ( -0.1031555e-1 + dY * dRet ) ) ) ); 01465 dRet *= exp( dAbs ) / sqrt( dAbs ); } 01466 01467 return dRet; } 01468 01469 double bessi( size_t iN, double dX ) { 01470 static const double ACC = 40; 01471 static const double BIGN0 = 1e10; 01472 static const double BIGN1 = 1e-10; 01473 double bi, bim, bip, tox, ans; 01474 size_t j; 01475 01476 if( !dX ) 01477 return 0; 01478 01479 tox = 2 / fabs( dX ); 01480 bip = ans = 0; 01481 bi = 1; 01482 for( j = 2 * ( iN + (size_t)sqrt( ACC * iN ) ); j > 0; j-- ) { 01483 bim = bip + j * tox * bi; 01484 bip = bi; 01485 bi = bim; 01486 if( fabs( bi ) > BIGN0 ) { 01487 ans *= BIGN1; 01488 bi *= BIGN1; 01489 bip *= BIGN1; } 01490 if( j == iN ) 01491 ans = bip; } 01492 ans *= bessi0( dX ) / bi; 01493 if( ( dX < 0 ) && ( iN & 1 ) ) 01494 ans *= -1; 01495 01496 return ans; } 01497 01498 double CStatisticsImpl::ModifiedBesselI( size_t iOrder, double dX ) { 01499 01500 switch( iOrder ) { 01501 case 0: 01502 return bessi0( dX ); 01503 01504 case 1: 01505 return bessi1( dX ); } 01506 01507 return bessi( iOrder, dX ); } 01508 01509 /* 01510 * The inverse standard normal distribution. 01511 * 01512 * Author: Peter J. Acklam <pjacklam@online.no> 01513 * URL: http://home.online.no/~pjacklam 01514 * 01515 * This function is based on the MATLAB code from the address above, 01516 * translated to C, and adapted for our purposes. 01517 */ 01518 double CStatistics::InverseNormal01CDF( double dX ) { 01519 double p = dX; 01520 const double a[6] = { 01521 -3.969683028665376e+01, 2.209460984245205e+02, 01522 -2.759285104469687e+02, 1.383577518672690e+02, 01523 -3.066479806614716e+01, 2.506628277459239e+00 01524 }; 01525 const double b[5] = { 01526 -5.447609879822406e+01, 1.615858368580409e+02, 01527 -1.556989798598866e+02, 6.680131188771972e+01, 01528 -1.328068155288572e+01 01529 }; 01530 const double c[6] = { 01531 -7.784894002430293e-03, -3.223964580411365e-01, 01532 -2.400758277161838e+00, -2.549732539343734e+00, 01533 4.374664141464968e+00, 2.938163982698783e+00 01534 }; 01535 const double d[4] = { 01536 7.784695709041462e-03, 3.224671290700398e-01, 01537 2.445134137142996e+00, 3.754408661907416e+00 01538 }; 01539 01540 register double q, t, u; 01541 01542 if (_isnan(p) || p > 1.0 || p < 0.0) 01543 return CMeta::GetNaN( ); 01544 if (p == 0.0) 01545 return -DBL_MAX; 01546 if (p == 1.0) 01547 return DBL_MAX; 01548 q = min(p,1-p); 01549 if (q > 0.02425) { 01550 /* Rational approximation for central region. */ 01551 u = q-0.5; 01552 t = u*u; 01553 u = u*(((((a[0]*t+a[1])*t+a[2])*t+a[3])*t+a[4])*t+a[5]) 01554 /(((((b[0]*t+b[1])*t+b[2])*t+b[3])*t+b[4])*t+1); 01555 } else { 01556 /* Rational approximation for tail region. */ 01557 t = sqrt(-2*log(q)); 01558 u = (((((c[0]*t+c[1])*t+c[2])*t+c[3])*t+c[4])*t+c[5]) 01559 /((((d[0]*t+d[1])*t+d[2])*t+d[3])*t+1); 01560 } 01561 /* The relative error of the approximation has absolute value less 01562 than 1.15e-9. One iteration of Halley's rational method (third 01563 order) gives full machine precision... */ 01564 t = Normal01CDF(u)-q; /* error */ 01565 t = t*(2.506628274631)*exp(u*u/2); /* f(u)/df(u) */ 01566 u = u-t/(1+u*t/2); /* Halley's method */ 01567 01568 return (p > 0.5 ? -u : u); 01569 }; 01570 01595 bool CStatistics::MatrixLUDecompose( CDataMatrix& Mat, vector<size_t>& veciIndices, bool& fEven ) { 01596 const float c_dEpsilon = 1e-20f; 01597 float big, dum, sum, temp; 01598 vector<float> vv; 01599 size_t i, j, k, imax; 01600 01601 if( Mat.GetRows( ) != Mat.GetColumns( ) ) 01602 return false; 01603 01604 veciIndices.resize( Mat.GetRows( ) ); 01605 vv.resize( Mat.GetRows( ) ); 01606 fEven = true; 01607 for (i=0;i<Mat.GetRows( );i++) { 01608 big=0.0; 01609 for (j=0;j<Mat.GetRows( );j++) 01610 if ((temp=fabs(Mat.Get( i, j ))) > big) big=temp; 01611 if (big == 0.0) 01612 g_CatSleipnir( ).warn( "CStatisticsImpl::LUDecomposition( ) singular matrix" ); 01613 vv[i]=1.0f/big; } 01614 for (j=0;j<Mat.GetRows( );j++) { 01615 for (i=0;i<j;i++) { 01616 sum=Mat.Get( i, j ); 01617 for (k=0;k<i;k++) 01618 sum -= Mat.Get( i, k )*Mat.Get( k, j ); 01619 Mat.Get( i, j )=sum; } 01620 big=0.0; 01621 for (i=j;i<Mat.GetRows( );i++) { 01622 sum=Mat.Get( i, j ); 01623 for (k=0;k<j;k++) 01624 sum -= Mat.Get( i, k )*Mat.Get( k, j ); 01625 Mat.Set( i, j, sum ); 01626 if ( (dum=vv[i]*fabs(sum)) >= big) { 01627 big=dum; 01628 imax=i; } } 01629 if (j != imax) { 01630 for (k=0;k<Mat.GetRows( );k++) { 01631 dum=Mat.Get( imax, k ); 01632 Mat.Set( imax, k, Mat.Get( j, k ) ); 01633 Mat.Set( j, k, dum ); } 01634 fEven = !fEven; 01635 vv[imax]=vv[j]; } 01636 veciIndices[j]=imax; 01637 if( !Mat.Get( j, j ) ) 01638 Mat.Set( j, j, c_dEpsilon ); 01639 if ((j+1) != Mat.GetRows( )) { 01640 dum=1.0f/(Mat.Get( j, j )); 01641 for (i=j+1;i<Mat.GetRows( );i++) 01642 Mat.Get( i, j ) *= dum; } } 01643 01644 return true; } 01645 01646 bool CStatisticsImpl::MatrixLUSubstitute( CDataMatrix& MatLU, const vector<size_t>& veciIndices, 01647 vector<float>& vecdB ) { 01648 size_t i,ii=0,ip,j,k; 01649 float sum; 01650 01651 if( ( MatLU.GetRows( ) != MatLU.GetColumns( ) ) || ( MatLU.GetRows( ) != veciIndices.size( ) ) || 01652 ( MatLU.GetRows( ) != vecdB.size( ) ) ) 01653 return false; 01654 01655 for (i=0;i<MatLU.GetRows( );i++) { 01656 ip=veciIndices[i]; 01657 sum=vecdB[ip]; 01658 vecdB[ip]=vecdB[i]; 01659 if (ii) 01660 for (j=ii-1;j<=i-1;j++) 01661 sum -= MatLU.Get( i, j ) * vecdB[j]; 01662 else if (sum) 01663 ii=i+1; 01664 vecdB[i]=sum; } 01665 for( k = 0; k < MatLU.GetRows( ); ++k ) { 01666 i = MatLU.GetRows( )-k-1; 01667 sum=vecdB[i]; 01668 for (j=i+1;j<MatLU.GetRows( );j++) 01669 sum -= MatLU.Get( i, j )*vecdB[j]; 01670 vecdB[i]=sum/MatLU.Get( i, i ); } 01671 01672 return true; } 01673 01698 bool CStatistics::MatrixLUInvert( CDataMatrix& MatLU, const vector<size_t>& veciIndices, 01699 CDataMatrix& MatInv ) { 01700 size_t i, j; 01701 vector<float> vecdCol; 01702 01703 if( MatLU.GetRows( ) != MatLU.GetColumns( ) ) 01704 return false; 01705 01706 if( ( MatInv.GetRows( ) != MatLU.GetRows( ) ) || ( MatInv.GetColumns( ) != MatLU.GetColumns( ) ) ) 01707 MatInv.Initialize( MatLU.GetRows( ), MatLU.GetColumns( ) ); 01708 vecdCol.resize( MatLU.GetRows( ) ); 01709 for( j = 0; j < MatLU.GetRows( ); ++j ) { 01710 for( i = 0; i < MatLU.GetRows( ); ++i ) 01711 vecdCol[ i ] = 0; 01712 vecdCol[ j ] = 1; 01713 if( !MatrixLUSubstitute( MatLU, veciIndices, vecdCol ) ) 01714 return false; 01715 for( i = 0; i < MatLU.GetRows( ); ++i ) 01716 MatInv.Set( i, j, vecdCol[ i ] ); } 01717 01718 return true; } 01719 01720 }