Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "measure.h" 00024 #include "meta.h" 00025 #include "statistics.h" 00026 #include <stdlib.h> 00027 #include <float.h> 00028 #include <math.h> 00029 00030 namespace Sleipnir { 00031 00032 //Added for calculating distance correlation. Code partly adapted from R "energy" package by Maria L. Rizzo and Gabor J. Szekely. 00033 static inline void dCOV(const float *x,const float *y, int *dims, float *DCOV) { 00034 /* computes dCov(x,y), dCor(x,y), dVar(x), dVar(y) 00035 V-statistic is n*dCov^2 where n*dCov^2 --> Q 00036 dims = sample size 00037 DCOV : vector [dCov, dCor, dVar(x), dVar(y)] 00038 */ 00039 00040 int i,j, k, n, n2; 00041 float **Dx, **Dy, **A, **B; 00042 float *akbar; 00043 float abar; 00044 float V; 00045 00046 n = *dims; 00047 00048 /* allocate a n*n matrix */ 00049 00050 Dx = (float **)calloc(n, sizeof(float *)); 00051 Dy = (float **)calloc(n, sizeof(float *)); 00052 A = (float **)calloc(n, sizeof(float *)); 00053 B = (float **)calloc(n, sizeof(float *)); 00054 for (i = 0; i < n; i++) 00055 { 00056 Dx[i] = (float *)calloc(n, sizeof(float)); 00057 Dy[i] = (float *)calloc(n, sizeof(float)); 00058 A[i] = (float *)calloc(n, sizeof(float)); 00059 B[i] = (float *)calloc(n, sizeof(float)); 00060 } 00061 00062 for (i=1; i<n; i++) { 00063 Dx[i][i] = 0.0; 00064 Dy[i][i] = 0.0; 00065 for (j=0; j<i; j++) { 00066 Dx[i][j] = Dx[j][i] = fabs(*(x+i) - *(x+j)); 00067 Dy[i][j] = Dy[j][i] = fabs(*(y+i) - *(y+j)); 00068 } 00069 } 00070 00071 00072 00073 akbar = (float*) calloc(n, sizeof(float)); 00074 abar = 0.0; 00075 for (k=0; k<n; k++) { 00076 akbar[k] = 0.0; 00077 for (j=0; j<n; j++) { 00078 akbar[k] += Dx[k][j]; 00079 } 00080 abar += akbar[k]; 00081 akbar[k] /= (float) n; 00082 } 00083 abar /= (float) (n*n); 00084 00085 for (k=0; k<n; k++) 00086 for (j=k; j<n; j++) { 00087 A[k][j] = Dx[k][j] - akbar[k] - akbar[j] + abar; 00088 A[j][k] = A[k][j]; 00089 } 00090 free(akbar); 00091 00092 00093 akbar = (float*) calloc(n, sizeof(float)); 00094 abar = 0.0; 00095 for (k=0; k<n; k++) { 00096 akbar[k] = 0.0; 00097 for (j=0; j<n; j++) { 00098 akbar[k] += Dy[k][j]; 00099 } 00100 abar += akbar[k]; 00101 akbar[k] /= (float) n; 00102 } 00103 abar /= (float) (n*n); 00104 00105 for (k=0; k<n; k++) 00106 for (j=k; j<n; j++) { 00107 B[k][j] = Dy[k][j] - akbar[k] - akbar[j] + abar; 00108 B[j][k] = B[k][j]; 00109 } 00110 free(akbar); 00111 00112 00113 for (i = 0; i < n; i++) { 00114 free(Dx[i]); 00115 free(Dy[i]); 00116 } 00117 free(Dx); 00118 free(Dy); 00119 00120 00121 n2 = ( n) * n; 00122 00123 /* compute dCov(x,y), dVar(x), dVar(y) */ 00124 for (k=0; k<4; k++) 00125 DCOV[k] = 0.0; 00126 for (k=0; k<n; k++) 00127 for (j=0; j<n; j++) { 00128 DCOV[0] += A[k][j]*B[k][j]; 00129 DCOV[2] += A[k][j]*A[k][j]; 00130 DCOV[3] += B[k][j]*B[k][j]; 00131 } 00132 00133 for (k=0; k<4; k++) { 00134 DCOV[k] /= n2; 00135 if (DCOV[k] > 0) 00136 DCOV[k] = sqrt(DCOV[k]); 00137 else DCOV[k] = 0.0; 00138 } 00139 /* compute dCor(x, y) */ 00140 V = DCOV[2]*DCOV[3]; 00141 if (V > DBL_EPSILON) 00142 DCOV[1] = DCOV[0] / sqrt(V); 00143 else DCOV[1] = 0.0; 00144 00145 00146 for (i = 0; i < n; i++) { 00147 free(A[i]); 00148 free(B[i]); 00149 } 00150 free(A); 00151 free(B); 00152 00153 } 00154 00155 00156 00157 00158 00159 00160 static inline float GetWeight(const float* adW, size_t iW) { 00161 00162 return (adW ? adW[iW] : 1); 00163 } 00164 00165 CMeasureImpl::CMeasureImpl(const IMeasure* pMeasure, bool fMemory) : 00166 m_pMeasure((IMeasure*) pMeasure), m_fMemory(fMemory) { 00167 } 00168 00169 CMeasureImpl::~CMeasureImpl() { 00170 00171 if (m_fMemory && m_pMeasure) 00172 delete m_pMeasure; 00173 } 00174 00175 bool CMeasureImpl::IsNaN(const float* adX, size_t iX) { 00176 size_t i; 00177 00178 for (i = 0; i < iX; ++i) 00179 if (CMeta::IsNaN(adX[i])) 00180 return true; 00181 00182 return false; 00183 } 00184 00185 double CMeasureImpl::MeasureTrim(const IMeasure* pMeasure, const float* adX, 00186 size_t iM, const float* adY, size_t iN, const IMeasure::EMap eMap, 00187 const float* adWX, const float* adWY, bool fAlign) { 00188 float* adA; 00189 float* adB; 00190 float* adWA; 00191 float* adWB; 00192 size_t i, iA, iB; 00193 double dRet; 00194 00195 adA = new float[iM]; 00196 adB = new float[iN]; 00197 adWA = adWX ? new float[iM] : NULL; 00198 adWB = adWY ? new float[iN] : NULL; 00199 if (fAlign) { 00200 for (i = iA = 0; i < min(iM, iN); ++i) 00201 if (!(CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i]))) { 00202 if (adWA) 00203 adWA[iA] = adWX[i]; 00204 if (adWB) 00205 adWB[iA] = adWY[i]; 00206 adA[iA] = adX[i]; 00207 adB[iA++] = adY[i]; 00208 } 00209 } else { 00210 for (i = iA = 0; i < iM; ++i) 00211 if (!CMeta::IsNaN(adX[i])) { 00212 if (adWA) 00213 adWA[iA] = adWX[i]; 00214 adA[iA++] = adX[i]; 00215 } 00216 for (i = iB = 0; i < iN; ++i) 00217 if (!CMeta::IsNaN(adY[i])) { 00218 if (adWB) 00219 adWB[iB] = adWY[i]; 00220 adB[iB++] = adY[i]; 00221 } 00222 } 00223 00224 dRet = pMeasure->Measure(adA, iA, adB, iB, eMap, adWA, adWB); 00225 delete[] adA; 00226 delete[] adB; 00227 if (adWA) 00228 delete[] adWA; 00229 if (adWB) 00230 delete[] adWB; 00231 00232 return dRet; 00233 } 00234 00235 double CMeasureKolmogorovSmirnov::Measure(const float* adX, size_t iM, 00236 const float* adY, size_t iN, EMap eMap, const float* adWX, 00237 const float* adWY) const { 00238 double dCur, dMax; 00239 size_t i, iX, iY; 00240 vector<float> vecdX, vecdY, vecdZ; 00241 00242 if (adWX || adWY) 00243 return CMeta::GetNaN(); 00244 if (CMeasureImpl::IsNaN(adX, iM) || CMeasureImpl::IsNaN(adY, iN)) 00245 return CMeasureImpl::MeasureTrim(this, adX, iM, adY, iN, eMap, adWX, 00246 adWY, false); 00247 if (iM > iN) 00248 return Measure(adY, iN, adX, iM, eMap, adWY, adWX); 00249 00250 vecdX.resize(iM); 00251 copy(adX, adX + iM, vecdX.begin()); 00252 sort(vecdX.begin(), vecdX.end()); 00253 vecdY.resize(iN); 00254 copy(adY, adY + iN, vecdY.begin()); 00255 sort(vecdY.begin(), vecdY.end()); 00256 vecdZ.resize(iM + iN); 00257 for (iX = iY = i = 0; i < vecdZ.size(); ++i) 00258 if (iX >= vecdX.size()) 00259 vecdZ[i] = vecdY[iY++]; 00260 else if (iY >= vecdY.size()) 00261 vecdZ[i] = vecdX[iX++]; 00262 else 00263 vecdZ[i] = (vecdX[iX] < vecdY[iY]) ? vecdX[iX++] : vecdY[iY++]; 00264 00265 for (dMax = iX = iY = i = 0; i < vecdZ.size(); ++i) { 00266 while ((iX < iM) && (vecdX[iX] <= vecdZ[i])) 00267 iX++; 00268 while ((iY < iN) && (vecdY[iY] <= vecdZ[i])) 00269 iY++; 00270 if ((dCur = fabs(((double) iX / iM) - ((double) iY / iN))) > dMax) 00271 dMax = dCur; 00272 } 00273 00274 return CStatistics::PValueKolmogorovSmirnov(dMax, iM, iN); 00275 } 00276 00277 double CMeasureEuclidean::Measure(const float* adX, size_t iM, 00278 const float* adY, size_t iN, EMap eMap, const float* adWX, 00279 const float* adWY) const { 00280 size_t i; 00281 double dRet, d; 00282 00283 if (iM != iN) 00284 return CMeta::GetNaN(); 00285 00286 dRet = 0; 00287 for (i = 0; i < iN; ++i) 00288 if ((adX[i] || adY[i]) && !(CMeta::IsNaN(adX[i]) 00289 || CMeta::IsNaN(adY[i]))) { 00290 d = adX[i] - adY[i]; 00291 d *= d; 00292 if (adWX || adWY) 00293 d *= GetWeight(adWX, i) * GetWeight(adWY, i); 00294 dRet += d; 00295 } 00296 00297 return sqrt(dRet); 00298 } 00299 00300 double CMeasureDistanceCorrelation::Measure(const float* adX, size_t iM, 00301 const float* adY, size_t iN, EMap eMap, const float* adWX, 00302 const float* adWY) const { 00303 size_t i; 00304 float dRet, d; 00305 int size=iN; 00306 float DCOV[4]={0,0,0,0}; 00307 00308 if (iM != iN) 00309 return CMeta::GetNaN(); 00310 00311 dRet = 0; 00312 dCOV(adX,adY,&size,DCOV); 00313 dRet = DCOV[1]; 00314 return (double)dRet; 00315 } 00316 00317 double CMeasureSignedDistanceCorrelation::Measure(const float* adX, size_t iM, 00318 const float* adY, size_t iN, EMap eMap, const float* adWX, 00319 const float* adWY) const { 00320 size_t i; 00321 float dRet, d; 00322 int size=iN; 00323 float DCOV[4]={0,0,0,0}; 00324 double dP; 00325 00326 if (iM != iN) 00327 return CMeta::GetNaN(); 00328 00329 dRet = 0; 00330 dCOV(adX,adY,&size,DCOV); 00331 dRet = DCOV[1]; 00332 dP=CMeasurePearson::Pearson(adX, iM, adY, iN, EMapNone, adWX, adWY); 00333 if(dP<0) 00334 dRet *=-1; 00335 00336 return (double)dRet; 00337 } 00338 00339 double CMeasureEuclideanScaled::Measure(const float* adX, size_t iM, 00340 const float* adY, size_t iN, EMap eMap, const float* adWX, 00341 const float* adWY) const { 00342 size_t i; 00343 double dRet, d, dY, dX; 00344 dX = dY = 0; 00345 if (iM != iN) 00346 return CMeta::GetNaN(); 00347 00348 dRet = 0; 00349 for (i = 0; i < iN; ++i) 00350 if ((adX[i] || adY[i]) && !(CMeta::IsNaN(adX[i]) 00351 || CMeta::IsNaN(adY[i]))) { 00352 d = adX[i] - adY[i]; 00353 d *= d; 00354 dX += (adX[i] * adX[i]); 00355 dY += (adY[i] * adY[i]); 00356 if (adWX || adWY) 00357 d *= GetWeight(adWX, i) * GetWeight(adWY, i); 00358 dRet += d; 00359 } 00360 dRet /= (0.5 * (dX + dY)); 00361 return sqrt(dRet); 00362 } 00363 00398 double CMeasurePearson::Pearson(const float* adX, size_t iM, const float* adY, 00399 size_t iN, EMap eMap, const float* adWX, const float* adWY, 00400 size_t* piCount) { 00401 double dMX, dMY, dRet, dDX, dDY, dX, dY; 00402 size_t i, iCount; 00403 00404 if (piCount) 00405 *piCount = 0; 00406 if (iM != iN) 00407 return CMeta::GetNaN(); 00408 00409 dMX = dMY = dX = dY = 0; 00410 for (iCount = i = 0; i < iN; ++i) { 00411 if (CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i])) 00412 continue; 00413 iCount++; 00414 dX += GetWeight(adWX, i); 00415 dY += GetWeight(adWY, i); 00416 dMX += adX[i] * GetWeight(adWX, i); 00417 dMY += adY[i] * GetWeight(adWY, i); 00418 } 00419 dMX /= dX; 00420 dMY /= dY; 00421 00422 dRet = dDX = dDY = 0; 00423 for (i = 0; i < iN; ++i) { 00424 if (CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i])) 00425 continue; 00426 dX = adX[i] - dMX; 00427 dY = adY[i] - dMY; 00428 dRet += dX * dY * sqrt(GetWeight(adWX, i) * GetWeight(adWY, i)); 00429 dDX += dX * dX * GetWeight(adWX, i); 00430 dDY += dY * dY * GetWeight(adWY, i); 00431 } 00432 if (!dDX || !dDY) 00433 dRet = CMeta::GetNaN(); 00434 else { 00435 dRet /= ( sqrt(dDX) * sqrt(dDY) ); 00436 } 00437 00438 switch (eMap) { 00439 case EMapCenter: 00440 dRet = (1 + dRet) / 2; 00441 break; 00442 00443 case EMapAbs: 00444 dRet = fabs(dRet); 00445 break; 00446 } 00447 if (piCount) 00448 *piCount = iCount; 00449 00450 return dRet; 00451 } 00452 00453 double CMeasureQuickPearson::Measure(const float* adX, size_t iM, 00454 const float* adY, size_t iN, EMap eMap, const float* adWX, 00455 const float* adWY) const { 00456 double dMX, dMY, dRet, dDX, dDY, dX, dY; 00457 size_t i; 00458 00459 dMX = dMY = 0; 00460 for (i = 0; i < iN; ++i) { 00461 dMX += adX[i]; 00462 dMY += adY[i]; 00463 } 00464 dMX /= iN; 00465 dMY /= iN; 00466 00467 dRet = dDX = dDY = 0; 00468 for (i = 0; i < iN; ++i) { 00469 dX = adX[i] - dMX; 00470 dY = adY[i] - dMY; 00471 dRet += dX * dY; 00472 dDX += dX * dX; 00473 dDY += dY * dY; 00474 } 00475 if (!(dDX || dDY)) 00476 dRet = 1; 00477 else { 00478 if (dDX) 00479 dRet /= sqrt(dDX); 00480 if (dDY) 00481 dRet /= sqrt(dDY); 00482 } 00483 00484 switch (eMap) { 00485 case EMapCenter: 00486 dRet = (1 + dRet) / 2; 00487 break; 00488 00489 case EMapAbs: 00490 dRet = fabs(dRet); 00491 break; 00492 } 00493 00494 return dRet; 00495 } 00496 00497 double CMeasureKendallsTau::Measure(const float* adX, size_t iM, 00498 const float* adY, size_t iN, EMap eMap, const float* adWX, 00499 const float* adWY) const { 00500 double dRet; 00501 00502 if (iM != iN) 00503 return CMeta::GetNaN(); 00504 if (CMeasureImpl::IsNaN(adX, iM) || CMeasureImpl::IsNaN(adY, iN)) 00505 return CMeasureImpl::MeasureTrim(this, adX, iM, adY, iN, eMap, adWX, 00506 adWY, true); 00507 00508 dRet = (adWX || adWY) ? CMeasureKendallsTauImpl::MeasureWeighted(adX, adY, 00509 iN, adWX, adWY) : CMeasureKendallsTauImpl::MeasureUnweighted(adX, 00510 adY, iN); 00511 if (dRet < -1) 00512 dRet = -1; 00513 else if (dRet > 1) 00514 dRet = 1; 00515 switch (eMap) { 00516 case EMapCenter: 00517 dRet = (1 + dRet) / 2; 00518 break; 00519 00520 case EMapAbs: 00521 dRet = fabs(dRet); 00522 break; 00523 } 00524 00525 return dRet; 00526 } 00527 00528 double CMeasureKendallsTauImpl::MeasureWeighted(const float* adX, 00529 const float* adY, size_t iN, const float* adWX, const float* adWY) { 00530 size_t i, j; 00531 double dA1, dA2, dWX, dWY, dW, dN1, dN2, dS, dAA; 00532 00533 dN1 = dN2 = dS = 0; 00534 for (i = 0; (i + 1) < iN; ++i) 00535 for (j = (i + 1); j < iN; ++j) { 00536 dA1 = adX[i] - adX[j]; 00537 dA2 = adY[i] - adY[j]; 00538 dWX = GetWeight(adWX, i) * GetWeight(adWX, j); 00539 dWY = GetWeight(adWY, i) * GetWeight(adWY, j); 00540 dW = sqrt(dWX * dWY); 00541 if (dAA = (dA1 * dA2)) { 00542 dN1 += dWX; 00543 dN2 += dWY; 00544 dS += (dAA > 0) ? dW : -dW; 00545 } else if (dA1) 00546 dN1 += dWX; 00547 else 00548 dN2 += dWY; 00549 } 00550 00551 return (dS / (sqrt(dN1) * sqrt(dN2))); 00552 } 00553 00554 /* 00555 double CMeasureKendallsTauImpl::MeasureUnweighted( const float* adX, const float* adY, 00556 size_t iN ) { 00557 size_t i, j, iN1, iN2; 00558 float dA1, dA2, dAA; 00559 int iS; 00560 00561 for( iN1 = iN2 = iS = i = 0; ( i + 1 ) < iN; ++i ) 00562 for( j = ( i + 1 ); j < iN; ++j ) { 00563 dA1 = adX[ i ] - adX[ j ]; 00564 dA2 = adY[ i ] - adY[ j ]; 00565 if( dAA = ( dA1 * dA2 ) ) { 00566 iN1++; 00567 iN2++; 00568 iS += ( dAA > 0 ) ? 1 : -1; } 00569 else if( dA1 ) 00570 iN1++; 00571 else 00572 iN2++; } 00573 00574 return ( ( iN1 && iN2 ) ? ( iS / ( sqrt( (float)iN1 ) * sqrt( (float)iN2 ) ) ) : 1 ); } 00575 */ 00576 00577 double CMeasureKendallsTauImpl::MeasureUnweighted(const float* adX, 00578 const float* adY, size_t iN) { 00579 static const size_t c_iCache = 1024; 00580 static size_t l_aiPerm[c_iCache]; 00581 static size_t l_aiTemp[c_iCache]; 00582 size_t* aiPerm; 00583 size_t* aiTemp; 00584 size_t i, iFirst, iT, iU, iV, iExchanges; 00585 int iTotal; 00586 double dBottom; 00587 00588 aiTemp = (iN > c_iCache) ? new size_t[iN] : l_aiTemp; 00589 aiPerm = (iN > c_iCache) ? new size_t[iN] : l_aiPerm; 00590 for (i = 0; i < iN; ++i) 00591 aiPerm[i] = i; 00592 00593 // First of all we first by the first ordering. 00594 sort(aiPerm, aiPerm + iN, SKendallsFirst(adX, adY)); 00595 iFirst = iT = 0; 00596 // Next, we compute the number of joint ties. 00597 for (i = 1; i < iN; ++i) 00598 if ((adX[aiPerm[iFirst]] != adX[aiPerm[i]]) || (adY[aiPerm[iFirst]] 00599 != adY[aiPerm[i]])) { 00600 iT += ((i - iFirst) * (i - iFirst - 1)) / 2; 00601 iFirst = i; 00602 } 00603 iT += ((i - iFirst) * (i - iFirst - 1)) / 2; 00604 00605 // Now we compute the number of ties. 00606 iFirst = iU = 0; 00607 for (i = 1; i < iN; ++i) 00608 if (adX[aiPerm[iFirst]] != adX[aiPerm[i]]) { 00609 iU += ((i - iFirst) * (i - iFirst - 1)) / 2; 00610 iFirst = i; 00611 } 00612 iU += ((i - iFirst) * (i - iFirst - 1)) / 2; 00613 00614 // Now we use an exchange counter to order by the second ordering and count the number 00615 // of exchanges (i.e., discordances). 00616 memset(aiTemp, 0, iN * sizeof(*aiTemp)); 00617 iExchanges = CountExchanges(aiPerm, iN, aiTemp, SKendallsSecond(adX, adY)); 00618 00619 // Now we compute the number of ties. 00620 iFirst = iV = 0; 00621 for (i = 1; i < iN; ++i) 00622 if (adY[aiPerm[iFirst]] != adY[aiPerm[i]]) { 00623 iV += ((i - iFirst) * (i - iFirst - 1)) / 2; 00624 iFirst = i; 00625 } 00626 iV += ((i - iFirst) * (i - iFirst - 1)) / 2; 00627 00628 if (iN > c_iCache) { 00629 delete[] aiPerm; 00630 delete[] aiTemp; 00631 } 00632 00633 iTotal = (iN * (iN - 1)) / 2; 00634 dBottom = sqrt((float) (iTotal - iU)) * sqrt((float) (iTotal - iV)); 00635 iTotal = (iTotal - (iV + iU - iT)) - (2 * iExchanges); 00636 return (dBottom ? (iTotal / dBottom) : 1); 00637 } 00638 00639 size_t CMeasureKendallsTauImpl::CountExchanges(size_t* aiPerm, size_t iN, 00640 size_t* aiTemp, const SKendallsSecond& sCompare, size_t iOffset) { 00641 size_t iExchanges, iT, iL0, iL1, iMiddle, i, j, k; 00642 int iD; 00643 00644 if (iN == 1) 00645 return 0; 00646 if (iN == 2) { 00647 if (sCompare(aiPerm[iOffset], aiPerm[iOffset + 1]) <= 0) 00648 return 0; 00649 iT = aiPerm[iOffset]; 00650 aiPerm[iOffset] = aiPerm[iOffset + 1]; 00651 aiPerm[iOffset + 1] = iT; 00652 return 1; 00653 } 00654 00655 iL1 = iN - (iL0 = iN / 2); 00656 iMiddle = iOffset + iL0; 00657 iExchanges = CountExchanges(aiPerm, iL0, aiTemp, sCompare, iOffset) 00658 + CountExchanges(aiPerm, iL1, aiTemp, sCompare, iMiddle); 00659 00660 // If the last element of the first subarray is smaller than the first element of 00661 // the second subarray, there is nothing to do and we can return the exchanges got so far. 00662 if (sCompare(aiPerm[iMiddle - 1], aiPerm[iMiddle]) < 0) 00663 return iExchanges; 00664 00665 // We merge the lists into temp, adding the number of forward moves to exchanges. 00666 for (i = j = k = 0; (j < iL0) || (k < iL1); ++i) { 00667 if ((k >= iL1) || ((j < iL0) && (sCompare(aiPerm[iOffset + j], 00668 aiPerm[iMiddle + k]) <= 0))) { 00669 aiTemp[i] = aiPerm[iOffset + j]; 00670 iD = i - j++; 00671 } else { 00672 aiTemp[i] = aiPerm[iMiddle + k]; 00673 iD = (iOffset + i) - (iMiddle + k++); 00674 } 00675 00676 if (iD > 0) 00677 iExchanges += iD; 00678 } 00679 00680 memcpy(aiPerm + iOffset, aiTemp, iN * sizeof(*aiPerm)); 00681 return iExchanges; 00682 } 00683 00684 double CMeasureAutocorrelate::Measure(const float* adX, size_t iM, 00685 const float* adY, size_t iN, EMap eMap, const float* adWX, 00686 const float* adWY) const { 00687 size_t i, j; 00688 double dCur, dMax; 00689 float* adZ; 00690 float* adWZ; 00691 00692 if (iM != iN) 00693 return CMeta::GetNaN(); 00694 00695 dMax = m_pMeasure->Measure(adX, iM, adY, iN, eMap, adWX, adWY); 00696 adZ = new float[iN]; 00697 adWZ = adWY ? new float[iN] : NULL; 00698 for (i = 1; i < iN; ++i) { 00699 for (j = 0; j < iN; ++j) { 00700 adZ[j] = adY[(j + i) % iN]; 00701 if (adWZ) 00702 adWZ[j] = adWY[(j + i) % iN]; 00703 } 00704 if ((dCur = m_pMeasure->Measure(adX, iM, adZ, iN, eMap, adWX, adWZ)) 00705 > dMax) 00706 dMax = dCur; 00707 } 00708 delete[] adZ; 00709 00710 return dMax; 00711 } 00712 00713 double CMeasureSpearman::Measure(const float* adX, size_t iM, const float* adY, 00714 size_t iN, EMap eMap, const float* adWX, const float* adWY) const { 00715 static const size_t c_iCache = 1024; 00716 static size_t l_aiX[c_iCache]; 00717 static size_t l_aiY[c_iCache]; 00718 size_t* aiX; 00719 size_t* aiY; 00720 size_t i, j, iSum; 00721 double dRet, d, dSum; 00722 00723 if ((iM != iN) || adWX || adWY) 00724 return CMeta::GetNaN(); 00725 if (CMeasureImpl::IsNaN(adX, iM) || CMeasureImpl::IsNaN(adY, iN)) 00726 return CMeasureImpl::MeasureTrim(this, adX, iM, adY, iN, eMap, adWX, 00727 adWY, true); 00728 00729 if (m_fTransformed) { 00730 dSum = 0; 00731 for (i = 0; i < iN; ++i) { 00732 d = adX[i] - adY[i]; 00733 dSum += d * d; 00734 } 00735 dRet = dSum ? 1 - (6 * dSum / iN / ((iN * iN) - 1)) : 1; 00736 } else { 00737 if (iN > c_iCache) { 00738 aiX = new size_t[iM]; 00739 aiY = new size_t[iN]; 00740 } else { 00741 aiX = l_aiX; 00742 aiY = l_aiY; 00743 } 00744 memset(aiX, 0, iM * sizeof(*aiX)); 00745 memset(aiY, 0, iN * sizeof(*aiY)); 00746 for (i = 0; i < iN; ++i) 00747 for (j = 0; j < iN; ++j) { 00748 if (i == j) 00749 continue; 00750 if (adX[j] < adX[i]) 00751 aiX[i]++; 00752 if (adY[j] < adY[i]) 00753 aiY[i]++; 00754 } 00755 00756 for (iSum = i = 0; i < iN; ++i) { 00757 j = aiX[i] - aiY[i]; 00758 iSum += j * j; 00759 } 00760 if (aiX != l_aiX) 00761 delete[] aiX; 00762 if (aiY != l_aiY) 00763 delete[] aiY; 00764 dRet = iSum ? 1 - (6.0 * iSum / iN / ((iN * iN) - 1)) : 1; 00765 } 00766 00767 switch (eMap) { 00768 case EMapCenter: 00769 dRet = (1 + dRet) / 2; 00770 break; 00771 00772 case EMapAbs: 00773 dRet = fabs(dRet); 00774 break; 00775 } 00776 00777 return dRet; 00778 } 00779 00780 double CMeasurePearNorm::Measure(const float* adX, size_t iM, 00781 const float* adY,size_t iN, EMap eMap, const float* adWX, const float* adWY) const { 00782 static const float c_dBound = 0.9999f; 00783 double dP; 00784 00785 dP = CMeasurePearson::Pearson(adX, iM, adY, iN, EMapNone, adWX, adWY); 00786 00787 if (fabs(dP) >= c_dBound) 00788 dP *= c_dBound; 00789 dP = CStatistics::FisherTransform(dP); 00790 if (m_dAverage != HUGE_VAL) 00791 dP = (dP - m_dAverage) / m_dStdDev; 00792 return dP; 00793 } 00794 00795 double CMeasureHypergeometric::Measure(const float* adX, size_t iM, 00796 const float* adY, size_t iN, EMap eMap, const float* adWX, 00797 const float* adWY) const { 00798 size_t i, iOne, iTwo, iBoth, iTotalPresent; 00799 00800 if (iM != iN) 00801 return CMeta::GetNaN(); 00802 00803 iOne = iTwo = iTotalPresent = iBoth = 0; 00804 for (i = 0; i < iN; ++i) { 00805 if (CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i])) 00806 continue; 00807 iTotalPresent++; 00808 if (adX[i]) 00809 iOne++; 00810 if (adY[i]) { 00811 iTwo++; 00812 if (adX[i]) 00813 iBoth++; 00814 } 00815 } 00816 00817 return (1 00818 - CStatistics::HypergeometricCDF(iBoth, iOne, iTwo, iTotalPresent)); 00819 } 00820 00821 double CMeasureInnerProduct::Measure(const float* adX, size_t iM, 00822 const float* adY, size_t iN, EMap eMap, const float* adWX, 00823 const float* adWY) const { 00824 size_t i; 00825 double dRet; 00826 00827 if (iM != iN) 00828 return CMeta::GetNaN(); 00829 00830 dRet = 0; 00831 for (i = 0; i < iN; ++i) 00832 if ((adX[i] || adY[i]) && !(CMeta::IsNaN(adX[i]) 00833 || CMeta::IsNaN(adY[i]))) 00834 dRet += adX[i] * adY[i] * GetWeight(adWX, i) * GetWeight(adWY, i); 00835 00836 return dRet; 00837 } 00838 00839 double CMeasureBinaryInnerProduct::Measure(const float* adX, size_t iM, 00840 const float* adY, size_t iN, EMap eMap, const float* adWX, 00841 const float* adWY) const { 00842 size_t i; 00843 double dRet, dCount; 00844 00845 if (iM != iN) 00846 return CMeta::GetNaN(); 00847 00848 dRet = dCount = 0; 00849 for (i = 0; i < iN; ++i) { 00850 if (!(CMeta::IsNaN(adX[i]) && CMeta::IsNaN(adY[i]))) 00851 dCount += GetWeight(adWX, i) + GetWeight(adWY, i); 00852 if (CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i]) || !adX[i] || !adY[i]) 00853 continue; 00854 dRet += GetWeight(adWX, i) * GetWeight(adWY, i); 00855 } 00856 if (dCount) 00857 dRet /= dCount / 2; 00858 00859 return dRet; 00860 } 00861 00862 double CMeasureMutualInformation::Measure(const float* adX, size_t iM, 00863 const float* adY, size_t iN, EMap eMap, const float* adWX, 00864 const float* adWY) const { 00865 map<float, size_t> mapOne, mapTwo; 00866 map<float, size_t>::iterator iter; 00867 map<float, size_t>::const_iterator iterOne, iterTwo; 00868 map<pair<float, float> , size_t> mapJoint; 00869 map<pair<float, float> , size_t>::iterator iterJoint; 00870 size_t i, iOne, iTwo, iJoint; 00871 double dOne, dJoint, dRet; 00872 00873 if (iM != iN) 00874 return CMeta::GetNaN(); 00875 00876 iOne = iTwo = iJoint = 0; 00877 for (i = 0; i < iM; ++i) { 00878 if (!CMeta::IsNaN(adX[i])) { 00879 if ((iter = mapOne.find(adX[i])) == mapOne.end()) 00880 mapOne[adX[i]] = 1; 00881 else 00882 iter->second += 1; 00883 iOne++; 00884 if (!CMeta::IsNaN(adY[i])) { 00885 if ((iterJoint = mapJoint.find(pair<float, float> (adX[i], 00886 adY[i]))) == mapJoint.end()) 00887 mapJoint[pair<float, float> (adX[i], adY[i])] = 1; 00888 else 00889 iterJoint->second += 1; 00890 iJoint++; 00891 } 00892 } 00893 if (!CMeta::IsNaN(adY[i])) { 00894 if ((iter = mapTwo.find(adY[i])) == mapTwo.end()) 00895 mapTwo[adY[i]] = 1; 00896 else 00897 iter->second += 1; 00898 iTwo++; 00899 } 00900 } 00901 00902 for (dRet = 0, iterOne = mapOne.begin(); iterOne != mapOne.end(); ++iterOne) { 00903 dOne = (double) iterOne->second / iOne; 00904 for (iterTwo = mapTwo.begin(); iterTwo != mapTwo.end(); ++iterTwo) 00905 if ((iterJoint = mapJoint.find(pair<float, float> (iterOne->first, 00906 iterTwo->first))) != mapJoint.end()) { 00907 dJoint = (double) iterJoint->second / iJoint; 00908 dRet += dJoint * log(dJoint * iTwo / dOne / iterTwo->second); 00909 } 00910 } 00911 dRet /= log(2.0); 00912 dRet -= (double) max(mapOne.size(), mapTwo.size()) / (2 * max(iOne, iTwo) 00913 * log(2.0)); 00914 00915 return dRet; 00916 } 00917 00918 double CMeasureRelativeAUC::Measure(const float* adX, size_t iM, 00919 const float* adY, size_t iN, EMap eMap, const float* adWX, 00920 const float* adWY) const { 00921 float dOne, dTwo, dDiff; 00922 size_t i; 00923 00924 if (iM != iN) 00925 return CMeta::GetNaN(); 00926 00927 dOne = dTwo = dDiff = 0; 00928 for (i = 0; i < iN; ++i) { 00929 if (CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i])) 00930 continue; 00931 dOne += fabs(adX[i]); 00932 dTwo += fabs(adY[i]); 00933 dDiff += fabs(adX[i] - adY[i]); 00934 } 00935 00936 return (1 - (dDiff / (dOne + dTwo))); 00937 } 00938 00939 double CMeasurePearsonSignificance::Measure(const float* adX, size_t iM, 00940 const float* adY, size_t iN, EMap eMap, const float* adWX, 00941 const float* adWY) const { 00942 double dRet, dPearson; 00943 size_t iCount; 00944 00945 if (CMeta::IsNaN(dPearson = CMeasurePearson::Pearson(adX, iM, adY, iN, 00946 EMapNone, adWX, adWY, &iCount))) 00947 return CMeta::GetNaN(); 00948 00949 dRet = (iCount < 2) ? 0 : CStatistics::TCDF(dPearson * sqrt( 00950 (double) (iCount - 2)) / sqrt(1 - (dPearson * dPearson)), iCount 00951 - 2); 00952 switch (eMap) { 00953 case EMapCenter: 00954 dRet = (dPearson > 0) ? ((dRet / 2) + 0.5) : (0.5 - (dRet / 2)); 00955 break; 00956 00957 case EMapNone: 00958 if (dPearson < 0) 00959 dRet *= -1; 00960 break; 00961 } 00962 00963 return dRet; 00964 } 00965 00966 double CMeasureDice::Measure( const float* adX, size_t iM, const float* adY, 00967 size_t iN, EMap eMap, const float* adWX, const float* adWY ) const { 00968 double dDot, dXY, dYX, dX, dY; 00969 size_t i; 00970 00971 dDot = dXY = dYX = 0; 00972 for( i = 0; i < min(iM, iN); ++i ) { 00973 if( CMeta::IsNaN( dX = adX[i] ) || CMeta::IsNaN( dY = adY[i] ) ) 00974 continue; 00975 dX *= GetWeight( adWX, i ); 00976 dY *= GetWeight( adWY, i ); 00977 dDot += dX * dY; 00978 dXY += pow( max(0.0, dX - dY), 2 ); 00979 dYX += pow( max(0.0, dY - dX), 2 ); } 00980 dX = dDot + ( m_dAlpha * dXY ) + ( ( 1 - m_dAlpha ) * dYX ); 00981 return ( dX ? ( dDot / dX ) : CMeta::GetNaN( ) ); } 00982 00983 00984 00985 00986 00987 00988 00989 00990 00991 } 00992 00993 00994