Sleipnir
src/measure.cpp
00001 /*****************************************************************************
00002  * This file is provided under the Creative Commons Attribution 3.0 license.
00003  *
00004  * You are free to share, copy, distribute, transmit, or adapt this work
00005  * PROVIDED THAT you attribute the work to the authors listed below.
00006  * For more information, please see the following web page:
00007  * http://creativecommons.org/licenses/by/3.0/
00008  *
00009  * This file is a component of the Sleipnir library for functional genomics,
00010  * authored by:
00011  * Curtis Huttenhower (chuttenh@princeton.edu)
00012  * Mark Schroeder
00013  * Maria D. Chikina
00014  * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015  *
00016  * If you use this library, the included executable tools, or any related
00017  * code in your work, please cite the following publication:
00018  * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019  * Olga G. Troyanskaya.
00020  * "The Sleipnir library for computational functional genomics"
00021  *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "measure.h"
00024 #include "meta.h"
00025 #include "statistics.h"
00026 #include <stdlib.h>
00027 #include <float.h>
00028 #include <math.h>
00029 
00030 namespace Sleipnir {
00031 
00032 //Added for calculating distance correlation. Code partly adapted from R "energy" package by Maria L. Rizzo and Gabor J. Szekely.
00033 static inline void dCOV(const float *x,const float *y, int *dims, float *DCOV) {
00034     /*  computes dCov(x,y), dCor(x,y), dVar(x), dVar(y)
00035         V-statistic is n*dCov^2 where n*dCov^2 --> Q
00036         dims = sample size
00037         DCOV  : vector [dCov, dCor, dVar(x), dVar(y)]
00038      */
00039 
00040     int    i,j, k, n, n2;
00041     float **Dx, **Dy, **A, **B;
00042     float *akbar;
00043     float abar;
00044     float V;
00045 
00046     n = *dims;
00047 
00048     /* allocate a n*n matrix  */
00049 
00050     Dx = (float **)calloc(n, sizeof(float *));
00051     Dy = (float **)calloc(n, sizeof(float *));
00052     A = (float **)calloc(n, sizeof(float *));
00053     B = (float **)calloc(n, sizeof(float *));
00054     for (i = 0; i < n; i++)
00055     {
00056     Dx[i] = (float *)calloc(n, sizeof(float));
00057     Dy[i] = (float *)calloc(n, sizeof(float));
00058     A[i] = (float *)calloc(n, sizeof(float));
00059     B[i] = (float *)calloc(n, sizeof(float));
00060     }
00061 
00062     for (i=1; i<n; i++) {
00063         Dx[i][i] = 0.0;
00064         Dy[i][i] = 0.0;
00065         for (j=0; j<i; j++) {
00066             Dx[i][j] = Dx[j][i] = fabs(*(x+i) - *(x+j));
00067             Dy[i][j] = Dy[j][i] = fabs(*(y+i) - *(y+j));
00068         }
00069     }
00070 
00071 
00072 
00073     akbar = (float*)  calloc(n, sizeof(float));
00074     abar = 0.0;
00075     for (k=0; k<n; k++) {
00076         akbar[k] = 0.0;
00077         for (j=0; j<n; j++) {
00078             akbar[k] += Dx[k][j];
00079         }
00080         abar += akbar[k];
00081         akbar[k] /= (float) n;
00082     }
00083     abar /= (float) (n*n);
00084 
00085     for (k=0; k<n; k++)
00086         for (j=k; j<n; j++) {
00087             A[k][j] = Dx[k][j] - akbar[k] - akbar[j] + abar;
00088             A[j][k] = A[k][j];
00089         }
00090     free(akbar);
00091 
00092 
00093     akbar = (float*)  calloc(n, sizeof(float));
00094     abar = 0.0;
00095     for (k=0; k<n; k++) {
00096         akbar[k] = 0.0;
00097         for (j=0; j<n; j++) {
00098             akbar[k] += Dy[k][j];
00099         }
00100         abar += akbar[k];
00101         akbar[k] /= (float) n;
00102     }
00103     abar /= (float) (n*n);
00104 
00105     for (k=0; k<n; k++)
00106         for (j=k; j<n; j++) {
00107             B[k][j] = Dy[k][j] - akbar[k] - akbar[j] + abar;
00108             B[j][k] = B[k][j];
00109         }
00110     free(akbar);
00111 
00112 
00113     for (i = 0; i < n; i++) {
00114         free(Dx[i]);
00115         free(Dy[i]);
00116         }
00117     free(Dx);
00118     free(Dy);
00119 
00120 
00121     n2 = ( n) * n;
00122 
00123     /* compute dCov(x,y), dVar(x), dVar(y) */
00124     for (k=0; k<4; k++)
00125         DCOV[k] = 0.0;
00126     for (k=0; k<n; k++)
00127         for (j=0; j<n; j++) {
00128             DCOV[0] += A[k][j]*B[k][j];
00129             DCOV[2] += A[k][j]*A[k][j];
00130             DCOV[3] += B[k][j]*B[k][j];
00131         }
00132 
00133     for (k=0; k<4; k++) {
00134         DCOV[k] /= n2;
00135         if (DCOV[k] > 0)
00136             DCOV[k] = sqrt(DCOV[k]);
00137             else DCOV[k] = 0.0;
00138     }
00139     /* compute dCor(x, y) */
00140     V = DCOV[2]*DCOV[3];
00141     if (V > DBL_EPSILON)
00142         DCOV[1] = DCOV[0] / sqrt(V);
00143         else DCOV[1] = 0.0;
00144 
00145 
00146     for (i = 0; i < n; i++) {
00147         free(A[i]);
00148         free(B[i]);
00149         }
00150     free(A);
00151     free(B);
00152 
00153 }
00154 
00155 
00156 
00157 
00158 
00159 
00160 static inline float GetWeight(const float* adW, size_t iW) {
00161 
00162     return (adW ? adW[iW] : 1);
00163 }
00164 
00165 CMeasureImpl::CMeasureImpl(const IMeasure* pMeasure, bool fMemory) :
00166     m_pMeasure((IMeasure*) pMeasure), m_fMemory(fMemory) {
00167 }
00168 
00169 CMeasureImpl::~CMeasureImpl() {
00170 
00171     if (m_fMemory && m_pMeasure)
00172         delete m_pMeasure;
00173 }
00174 
00175 bool CMeasureImpl::IsNaN(const float* adX, size_t iX) {
00176     size_t i;
00177 
00178     for (i = 0; i < iX; ++i)
00179         if (CMeta::IsNaN(adX[i]))
00180             return true;
00181 
00182     return false;
00183 }
00184 
00185 double CMeasureImpl::MeasureTrim(const IMeasure* pMeasure, const float* adX,
00186         size_t iM, const float* adY, size_t iN, const IMeasure::EMap eMap,
00187         const float* adWX, const float* adWY, bool fAlign) {
00188     float* adA;
00189     float* adB;
00190     float* adWA;
00191     float* adWB;
00192     size_t i, iA, iB;
00193     double dRet;
00194 
00195     adA = new float[iM];
00196     adB = new float[iN];
00197     adWA = adWX ? new float[iM] : NULL;
00198     adWB = adWY ? new float[iN] : NULL;
00199     if (fAlign) {
00200         for (i = iA = 0; i < min(iM, iN); ++i)
00201             if (!(CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i]))) {
00202                 if (adWA)
00203                     adWA[iA] = adWX[i];
00204                 if (adWB)
00205                     adWB[iA] = adWY[i];
00206                 adA[iA] = adX[i];
00207                 adB[iA++] = adY[i];
00208             }
00209     } else {
00210         for (i = iA = 0; i < iM; ++i)
00211             if (!CMeta::IsNaN(adX[i])) {
00212                 if (adWA)
00213                     adWA[iA] = adWX[i];
00214                 adA[iA++] = adX[i];
00215             }
00216         for (i = iB = 0; i < iN; ++i)
00217             if (!CMeta::IsNaN(adY[i])) {
00218                 if (adWB)
00219                     adWB[iB] = adWY[i];
00220                 adB[iB++] = adY[i];
00221             }
00222     }
00223 
00224     dRet = pMeasure->Measure(adA, iA, adB, iB, eMap, adWA, adWB);
00225     delete[] adA;
00226     delete[] adB;
00227     if (adWA)
00228         delete[] adWA;
00229     if (adWB)
00230         delete[] adWB;
00231 
00232     return dRet;
00233 }
00234 
00235 double CMeasureKolmogorovSmirnov::Measure(const float* adX, size_t iM,
00236         const float* adY, size_t iN, EMap eMap, const float* adWX,
00237         const float* adWY) const {
00238     double dCur, dMax;
00239     size_t i, iX, iY;
00240     vector<float> vecdX, vecdY, vecdZ;
00241 
00242     if (adWX || adWY)
00243         return CMeta::GetNaN();
00244     if (CMeasureImpl::IsNaN(adX, iM) || CMeasureImpl::IsNaN(adY, iN))
00245         return CMeasureImpl::MeasureTrim(this, adX, iM, adY, iN, eMap, adWX,
00246                 adWY, false);
00247     if (iM > iN)
00248         return Measure(adY, iN, adX, iM, eMap, adWY, adWX);
00249 
00250     vecdX.resize(iM);
00251     copy(adX, adX + iM, vecdX.begin());
00252     sort(vecdX.begin(), vecdX.end());
00253     vecdY.resize(iN);
00254     copy(adY, adY + iN, vecdY.begin());
00255     sort(vecdY.begin(), vecdY.end());
00256     vecdZ.resize(iM + iN);
00257     for (iX = iY = i = 0; i < vecdZ.size(); ++i)
00258         if (iX >= vecdX.size())
00259             vecdZ[i] = vecdY[iY++];
00260         else if (iY >= vecdY.size())
00261             vecdZ[i] = vecdX[iX++];
00262         else
00263             vecdZ[i] = (vecdX[iX] < vecdY[iY]) ? vecdX[iX++] : vecdY[iY++];
00264 
00265     for (dMax = iX = iY = i = 0; i < vecdZ.size(); ++i) {
00266         while ((iX < iM) && (vecdX[iX] <= vecdZ[i]))
00267             iX++;
00268         while ((iY < iN) && (vecdY[iY] <= vecdZ[i]))
00269             iY++;
00270         if ((dCur = fabs(((double) iX / iM) - ((double) iY / iN))) > dMax)
00271             dMax = dCur;
00272     }
00273 
00274     return CStatistics::PValueKolmogorovSmirnov(dMax, iM, iN);
00275 }
00276 
00277 double CMeasureEuclidean::Measure(const float* adX, size_t iM,
00278         const float* adY, size_t iN, EMap eMap, const float* adWX,
00279         const float* adWY) const {
00280     size_t i;
00281     double dRet, d;
00282 
00283     if (iM != iN)
00284         return CMeta::GetNaN();
00285 
00286     dRet = 0;
00287     for (i = 0; i < iN; ++i)
00288         if ((adX[i] || adY[i]) && !(CMeta::IsNaN(adX[i])
00289                 || CMeta::IsNaN(adY[i]))) {
00290             d = adX[i] - adY[i];
00291             d *= d;
00292             if (adWX || adWY)
00293                 d *= GetWeight(adWX, i) * GetWeight(adWY, i);
00294             dRet += d;
00295         }
00296 
00297     return sqrt(dRet);
00298 }
00299 
00300 double CMeasureDistanceCorrelation::Measure(const float* adX, size_t iM,
00301         const float* adY, size_t iN, EMap eMap, const float* adWX,
00302         const float* adWY) const {
00303     size_t i;
00304     float dRet, d;
00305     int size=iN;
00306     float DCOV[4]={0,0,0,0};
00307 
00308     if (iM != iN)
00309         return CMeta::GetNaN();
00310 
00311     dRet = 0;
00312     dCOV(adX,adY,&size,DCOV);
00313     dRet = DCOV[1];
00314     return (double)dRet;
00315 }
00316 
00317 double CMeasureSignedDistanceCorrelation::Measure(const float* adX, size_t iM,
00318         const float* adY, size_t iN, EMap eMap, const float* adWX,
00319         const float* adWY) const {
00320     size_t i;
00321         float dRet, d;
00322         int size=iN;
00323         float DCOV[4]={0,0,0,0};
00324         double dP;
00325 
00326         if (iM != iN)
00327             return CMeta::GetNaN();
00328 
00329         dRet = 0;
00330         dCOV(adX,adY,&size,DCOV);
00331     dRet = DCOV[1];
00332     dP=CMeasurePearson::Pearson(adX, iM, adY, iN, EMapNone, adWX, adWY);
00333     if(dP<0)
00334         dRet *=-1;
00335 
00336     return (double)dRet;
00337 }
00338 
00339 double CMeasureEuclideanScaled::Measure(const float* adX, size_t iM,
00340         const float* adY, size_t iN, EMap eMap, const float* adWX,
00341         const float* adWY) const {
00342     size_t i;
00343     double dRet, d, dY, dX;
00344     dX = dY = 0;
00345     if (iM != iN)
00346         return CMeta::GetNaN();
00347 
00348     dRet = 0;
00349     for (i = 0; i < iN; ++i)
00350         if ((adX[i] || adY[i]) && !(CMeta::IsNaN(adX[i])
00351                 || CMeta::IsNaN(adY[i]))) {
00352             d = adX[i] - adY[i];
00353             d *= d;
00354             dX += (adX[i] * adX[i]);
00355             dY += (adY[i] * adY[i]);
00356             if (adWX || adWY)
00357                 d *= GetWeight(adWX, i) * GetWeight(adWY, i);
00358             dRet += d;
00359         }
00360     dRet /= (0.5 * (dX + dY));
00361     return sqrt(dRet);
00362 }
00363 
00398 double CMeasurePearson::Pearson(const float* adX, size_t iM, const float* adY,
00399         size_t iN, EMap eMap, const float* adWX, const float* adWY,
00400         size_t* piCount) {
00401     double dMX, dMY, dRet, dDX, dDY, dX, dY;
00402     size_t i, iCount;
00403 
00404     if (piCount)
00405         *piCount = 0;
00406     if (iM != iN)
00407         return CMeta::GetNaN();
00408 
00409     dMX = dMY = dX = dY = 0;
00410     for (iCount = i = 0; i < iN; ++i) {
00411         if (CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i]))
00412             continue;
00413         iCount++;
00414         dX += GetWeight(adWX, i);
00415         dY += GetWeight(adWY, i);
00416         dMX += adX[i] * GetWeight(adWX, i);
00417         dMY += adY[i] * GetWeight(adWY, i);
00418     }
00419     dMX /= dX;
00420     dMY /= dY;
00421 
00422     dRet = dDX = dDY = 0;
00423     for (i = 0; i < iN; ++i) {
00424         if (CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i]))
00425             continue;
00426         dX = adX[i] - dMX;
00427         dY = adY[i] - dMY;
00428         dRet += dX * dY * sqrt(GetWeight(adWX, i) * GetWeight(adWY, i));
00429         dDX += dX * dX * GetWeight(adWX, i);
00430         dDY += dY * dY * GetWeight(adWY, i);
00431     }
00432     if (!dDX || !dDY)
00433         dRet = CMeta::GetNaN();
00434     else {
00435         dRet /= ( sqrt(dDX) * sqrt(dDY) );
00436     }
00437 
00438     switch (eMap) {
00439     case EMapCenter:
00440         dRet = (1 + dRet) / 2;
00441         break;
00442 
00443     case EMapAbs:
00444         dRet = fabs(dRet);
00445         break;
00446     }
00447     if (piCount)
00448         *piCount = iCount;
00449 
00450     return dRet;
00451 }
00452 
00453 double CMeasureQuickPearson::Measure(const float* adX, size_t iM,
00454         const float* adY, size_t iN, EMap eMap, const float* adWX,
00455         const float* adWY) const {
00456     double dMX, dMY, dRet, dDX, dDY, dX, dY;
00457     size_t i;
00458 
00459     dMX = dMY = 0;
00460     for (i = 0; i < iN; ++i) {
00461         dMX += adX[i];
00462         dMY += adY[i];
00463     }
00464     dMX /= iN;
00465     dMY /= iN;
00466 
00467     dRet = dDX = dDY = 0;
00468     for (i = 0; i < iN; ++i) {
00469         dX = adX[i] - dMX;
00470         dY = adY[i] - dMY;
00471         dRet += dX * dY;
00472         dDX += dX * dX;
00473         dDY += dY * dY;
00474     }
00475     if (!(dDX || dDY))
00476         dRet = 1;
00477     else {
00478         if (dDX)
00479             dRet /= sqrt(dDX);
00480         if (dDY)
00481             dRet /= sqrt(dDY);
00482     }
00483 
00484     switch (eMap) {
00485     case EMapCenter:
00486         dRet = (1 + dRet) / 2;
00487         break;
00488 
00489     case EMapAbs:
00490         dRet = fabs(dRet);
00491         break;
00492     }
00493 
00494     return dRet;
00495 }
00496 
00497 double CMeasureKendallsTau::Measure(const float* adX, size_t iM,
00498         const float* adY, size_t iN, EMap eMap, const float* adWX,
00499         const float* adWY) const {
00500     double dRet;
00501 
00502     if (iM != iN)
00503         return CMeta::GetNaN();
00504     if (CMeasureImpl::IsNaN(adX, iM) || CMeasureImpl::IsNaN(adY, iN))
00505         return CMeasureImpl::MeasureTrim(this, adX, iM, adY, iN, eMap, adWX,
00506                 adWY, true);
00507 
00508     dRet = (adWX || adWY) ? CMeasureKendallsTauImpl::MeasureWeighted(adX, adY,
00509             iN, adWX, adWY) : CMeasureKendallsTauImpl::MeasureUnweighted(adX,
00510             adY, iN);
00511     if (dRet < -1)
00512         dRet = -1;
00513     else if (dRet > 1)
00514         dRet = 1;
00515     switch (eMap) {
00516     case EMapCenter:
00517         dRet = (1 + dRet) / 2;
00518         break;
00519 
00520     case EMapAbs:
00521         dRet = fabs(dRet);
00522         break;
00523     }
00524 
00525     return dRet;
00526 }
00527 
00528 double CMeasureKendallsTauImpl::MeasureWeighted(const float* adX,
00529         const float* adY, size_t iN, const float* adWX, const float* adWY) {
00530     size_t i, j;
00531     double dA1, dA2, dWX, dWY, dW, dN1, dN2, dS, dAA;
00532 
00533     dN1 = dN2 = dS = 0;
00534     for (i = 0; (i + 1) < iN; ++i)
00535         for (j = (i + 1); j < iN; ++j) {
00536             dA1 = adX[i] - adX[j];
00537             dA2 = adY[i] - adY[j];
00538             dWX = GetWeight(adWX, i) * GetWeight(adWX, j);
00539             dWY = GetWeight(adWY, i) * GetWeight(adWY, j);
00540             dW = sqrt(dWX * dWY);
00541             if (dAA = (dA1 * dA2)) {
00542                 dN1 += dWX;
00543                 dN2 += dWY;
00544                 dS += (dAA > 0) ? dW : -dW;
00545             } else if (dA1)
00546                 dN1 += dWX;
00547             else
00548                 dN2 += dWY;
00549         }
00550 
00551     return (dS / (sqrt(dN1) * sqrt(dN2)));
00552 }
00553 
00554 /*
00555  double CMeasureKendallsTauImpl::MeasureUnweighted( const float* adX, const float* adY,
00556  size_t iN ) {
00557  size_t i, j, iN1, iN2;
00558  float  dA1, dA2, dAA;
00559  int        iS;
00560 
00561  for( iN1 = iN2 = iS = i = 0; ( i + 1 ) < iN; ++i )
00562  for( j = ( i + 1 ); j < iN; ++j ) {
00563  dA1 = adX[ i ] - adX[ j ];
00564  dA2 = adY[ i ] - adY[ j ];
00565  if( dAA = ( dA1 * dA2 ) ) {
00566  iN1++;
00567  iN2++;
00568  iS += ( dAA > 0 ) ? 1 : -1; }
00569  else if( dA1 )
00570  iN1++;
00571  else
00572  iN2++; }
00573 
00574  return ( ( iN1 && iN2 ) ? ( iS / ( sqrt( (float)iN1 ) * sqrt( (float)iN2 ) ) ) : 1 ); }
00575  */
00576 
00577 double CMeasureKendallsTauImpl::MeasureUnweighted(const float* adX,
00578         const float* adY, size_t iN) {
00579     static const size_t c_iCache = 1024;
00580     static size_t l_aiPerm[c_iCache];
00581     static size_t l_aiTemp[c_iCache];
00582     size_t* aiPerm;
00583     size_t* aiTemp;
00584     size_t i, iFirst, iT, iU, iV, iExchanges;
00585     int iTotal;
00586     double dBottom;
00587 
00588     aiTemp = (iN > c_iCache) ? new size_t[iN] : l_aiTemp;
00589     aiPerm = (iN > c_iCache) ? new size_t[iN] : l_aiPerm;
00590     for (i = 0; i < iN; ++i)
00591         aiPerm[i] = i;
00592 
00593     // First of all we first by the first ordering.
00594     sort(aiPerm, aiPerm + iN, SKendallsFirst(adX, adY));
00595     iFirst = iT = 0;
00596     // Next, we compute the number of joint ties.
00597     for (i = 1; i < iN; ++i)
00598         if ((adX[aiPerm[iFirst]] != adX[aiPerm[i]]) || (adY[aiPerm[iFirst]]
00599                 != adY[aiPerm[i]])) {
00600             iT += ((i - iFirst) * (i - iFirst - 1)) / 2;
00601             iFirst = i;
00602         }
00603     iT += ((i - iFirst) * (i - iFirst - 1)) / 2;
00604 
00605     // Now we compute the number of ties.
00606     iFirst = iU = 0;
00607     for (i = 1; i < iN; ++i)
00608         if (adX[aiPerm[iFirst]] != adX[aiPerm[i]]) {
00609             iU += ((i - iFirst) * (i - iFirst - 1)) / 2;
00610             iFirst = i;
00611         }
00612     iU += ((i - iFirst) * (i - iFirst - 1)) / 2;
00613 
00614     // Now we use an exchange counter to order by the second ordering and count the number
00615     // of exchanges (i.e., discordances).
00616     memset(aiTemp, 0, iN * sizeof(*aiTemp));
00617     iExchanges = CountExchanges(aiPerm, iN, aiTemp, SKendallsSecond(adX, adY));
00618 
00619     // Now we compute the number of ties.
00620     iFirst = iV = 0;
00621     for (i = 1; i < iN; ++i)
00622         if (adY[aiPerm[iFirst]] != adY[aiPerm[i]]) {
00623             iV += ((i - iFirst) * (i - iFirst - 1)) / 2;
00624             iFirst = i;
00625         }
00626     iV += ((i - iFirst) * (i - iFirst - 1)) / 2;
00627 
00628     if (iN > c_iCache) {
00629         delete[] aiPerm;
00630         delete[] aiTemp;
00631     }
00632 
00633     iTotal = (iN * (iN - 1)) / 2;
00634     dBottom = sqrt((float) (iTotal - iU)) * sqrt((float) (iTotal - iV));
00635     iTotal = (iTotal - (iV + iU - iT)) - (2 * iExchanges);
00636     return (dBottom ? (iTotal / dBottom) : 1);
00637 }
00638 
00639 size_t CMeasureKendallsTauImpl::CountExchanges(size_t* aiPerm, size_t iN,
00640         size_t* aiTemp, const SKendallsSecond& sCompare, size_t iOffset) {
00641     size_t iExchanges, iT, iL0, iL1, iMiddle, i, j, k;
00642     int iD;
00643 
00644     if (iN == 1)
00645         return 0;
00646     if (iN == 2) {
00647         if (sCompare(aiPerm[iOffset], aiPerm[iOffset + 1]) <= 0)
00648             return 0;
00649         iT = aiPerm[iOffset];
00650         aiPerm[iOffset] = aiPerm[iOffset + 1];
00651         aiPerm[iOffset + 1] = iT;
00652         return 1;
00653     }
00654 
00655     iL1 = iN - (iL0 = iN / 2);
00656     iMiddle = iOffset + iL0;
00657     iExchanges = CountExchanges(aiPerm, iL0, aiTemp, sCompare, iOffset)
00658             + CountExchanges(aiPerm, iL1, aiTemp, sCompare, iMiddle);
00659 
00660     // If the last element of the first subarray is smaller than the first element of 
00661     // the second subarray, there is nothing to do and we can return the exchanges got so far.
00662     if (sCompare(aiPerm[iMiddle - 1], aiPerm[iMiddle]) < 0)
00663         return iExchanges;
00664 
00665     // We merge the lists into temp, adding the number of forward moves to exchanges.
00666     for (i = j = k = 0; (j < iL0) || (k < iL1); ++i) {
00667         if ((k >= iL1) || ((j < iL0) && (sCompare(aiPerm[iOffset + j],
00668                 aiPerm[iMiddle + k]) <= 0))) {
00669             aiTemp[i] = aiPerm[iOffset + j];
00670             iD = i - j++;
00671         } else {
00672             aiTemp[i] = aiPerm[iMiddle + k];
00673             iD = (iOffset + i) - (iMiddle + k++);
00674         }
00675 
00676         if (iD > 0)
00677             iExchanges += iD;
00678     }
00679 
00680     memcpy(aiPerm + iOffset, aiTemp, iN * sizeof(*aiPerm));
00681     return iExchanges;
00682 }
00683 
00684 double CMeasureAutocorrelate::Measure(const float* adX, size_t iM,
00685         const float* adY, size_t iN, EMap eMap, const float* adWX,
00686         const float* adWY) const {
00687     size_t i, j;
00688     double dCur, dMax;
00689     float* adZ;
00690     float* adWZ;
00691 
00692     if (iM != iN)
00693         return CMeta::GetNaN();
00694 
00695     dMax = m_pMeasure->Measure(adX, iM, adY, iN, eMap, adWX, adWY);
00696     adZ = new float[iN];
00697     adWZ = adWY ? new float[iN] : NULL;
00698     for (i = 1; i < iN; ++i) {
00699         for (j = 0; j < iN; ++j) {
00700             adZ[j] = adY[(j + i) % iN];
00701             if (adWZ)
00702                 adWZ[j] = adWY[(j + i) % iN];
00703         }
00704         if ((dCur = m_pMeasure->Measure(adX, iM, adZ, iN, eMap, adWX, adWZ))
00705                 > dMax)
00706             dMax = dCur;
00707     }
00708     delete[] adZ;
00709 
00710     return dMax;
00711 }
00712 
00713 double CMeasureSpearman::Measure(const float* adX, size_t iM, const float* adY,
00714         size_t iN, EMap eMap, const float* adWX, const float* adWY) const {
00715     static const size_t c_iCache = 1024;
00716     static size_t l_aiX[c_iCache];
00717     static size_t l_aiY[c_iCache];
00718     size_t* aiX;
00719     size_t* aiY;
00720     size_t i, j, iSum;
00721     double dRet, d, dSum;
00722 
00723     if ((iM != iN) || adWX || adWY)
00724         return CMeta::GetNaN();
00725     if (CMeasureImpl::IsNaN(adX, iM) || CMeasureImpl::IsNaN(adY, iN))
00726         return CMeasureImpl::MeasureTrim(this, adX, iM, adY, iN, eMap, adWX,
00727                 adWY, true);
00728 
00729     if (m_fTransformed) {
00730         dSum = 0;
00731         for (i = 0; i < iN; ++i) {
00732             d = adX[i] - adY[i];
00733             dSum += d * d;
00734         }
00735         dRet = dSum ? 1 - (6 * dSum / iN / ((iN * iN) - 1)) : 1;
00736     } else {
00737         if (iN > c_iCache) {
00738             aiX = new size_t[iM];
00739             aiY = new size_t[iN];
00740         } else {
00741             aiX = l_aiX;
00742             aiY = l_aiY;
00743         }
00744         memset(aiX, 0, iM * sizeof(*aiX));
00745         memset(aiY, 0, iN * sizeof(*aiY));
00746         for (i = 0; i < iN; ++i)
00747             for (j = 0; j < iN; ++j) {
00748                 if (i == j)
00749                     continue;
00750                 if (adX[j] < adX[i])
00751                     aiX[i]++;
00752                 if (adY[j] < adY[i])
00753                     aiY[i]++;
00754             }
00755 
00756         for (iSum = i = 0; i < iN; ++i) {
00757             j = aiX[i] - aiY[i];
00758             iSum += j * j;
00759         }
00760         if (aiX != l_aiX)
00761             delete[] aiX;
00762         if (aiY != l_aiY)
00763             delete[] aiY;
00764         dRet = iSum ? 1 - (6.0 * iSum / iN / ((iN * iN) - 1)) : 1;
00765     }
00766 
00767     switch (eMap) {
00768     case EMapCenter:
00769         dRet = (1 + dRet) / 2;
00770         break;
00771 
00772     case EMapAbs:
00773         dRet = fabs(dRet);
00774         break;
00775     }
00776 
00777     return dRet;
00778 }
00779 
00780 double CMeasurePearNorm::Measure(const float* adX, size_t iM,
00781         const float* adY,size_t iN, EMap eMap, const float* adWX, const float* adWY) const {
00782     static const float c_dBound = 0.9999f;
00783     double dP;
00784 
00785     dP = CMeasurePearson::Pearson(adX, iM, adY, iN, EMapNone, adWX, adWY);
00786 
00787     if (fabs(dP) >= c_dBound)
00788         dP *= c_dBound;
00789     dP = CStatistics::FisherTransform(dP);
00790     if (m_dAverage != HUGE_VAL)
00791         dP = (dP - m_dAverage) / m_dStdDev;
00792     return dP;
00793 }
00794 
00795 double CMeasureHypergeometric::Measure(const float* adX, size_t iM,
00796         const float* adY, size_t iN, EMap eMap, const float* adWX,
00797         const float* adWY) const {
00798     size_t i, iOne, iTwo, iBoth, iTotalPresent;
00799 
00800     if (iM != iN)
00801         return CMeta::GetNaN();
00802 
00803     iOne = iTwo = iTotalPresent = iBoth = 0;
00804     for (i = 0; i < iN; ++i) {
00805         if (CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i]))
00806             continue;
00807         iTotalPresent++;
00808         if (adX[i])
00809             iOne++;
00810         if (adY[i]) {
00811             iTwo++;
00812             if (adX[i])
00813                 iBoth++;
00814         }
00815     }
00816 
00817     return (1
00818             - CStatistics::HypergeometricCDF(iBoth, iOne, iTwo, iTotalPresent));
00819 }
00820 
00821 double CMeasureInnerProduct::Measure(const float* adX, size_t iM,
00822         const float* adY, size_t iN, EMap eMap, const float* adWX,
00823         const float* adWY) const {
00824     size_t i;
00825     double dRet;
00826 
00827     if (iM != iN)
00828         return CMeta::GetNaN();
00829 
00830     dRet = 0;
00831     for (i = 0; i < iN; ++i)
00832         if ((adX[i] || adY[i]) && !(CMeta::IsNaN(adX[i])
00833                 || CMeta::IsNaN(adY[i])))
00834             dRet += adX[i] * adY[i] * GetWeight(adWX, i) * GetWeight(adWY, i);
00835 
00836     return dRet;
00837 }
00838 
00839 double CMeasureBinaryInnerProduct::Measure(const float* adX, size_t iM,
00840         const float* adY, size_t iN, EMap eMap, const float* adWX,
00841         const float* adWY) const {
00842     size_t i;
00843     double dRet, dCount;
00844 
00845     if (iM != iN)
00846         return CMeta::GetNaN();
00847 
00848     dRet = dCount = 0;
00849     for (i = 0; i < iN; ++i) {
00850         if (!(CMeta::IsNaN(adX[i]) && CMeta::IsNaN(adY[i])))
00851             dCount += GetWeight(adWX, i) + GetWeight(adWY, i);
00852         if (CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i]) || !adX[i] || !adY[i])
00853             continue;
00854         dRet += GetWeight(adWX, i) * GetWeight(adWY, i);
00855     }
00856     if (dCount)
00857         dRet /= dCount / 2;
00858 
00859     return dRet;
00860 }
00861 
00862 double CMeasureMutualInformation::Measure(const float* adX, size_t iM,
00863         const float* adY, size_t iN, EMap eMap, const float* adWX,
00864         const float* adWY) const {
00865     map<float, size_t> mapOne, mapTwo;
00866     map<float, size_t>::iterator iter;
00867     map<float, size_t>::const_iterator iterOne, iterTwo;
00868     map<pair<float, float> , size_t> mapJoint;
00869     map<pair<float, float> , size_t>::iterator iterJoint;
00870     size_t i, iOne, iTwo, iJoint;
00871     double dOne, dJoint, dRet;
00872 
00873     if (iM != iN)
00874         return CMeta::GetNaN();
00875 
00876     iOne = iTwo = iJoint = 0;
00877     for (i = 0; i < iM; ++i) {
00878         if (!CMeta::IsNaN(adX[i])) {
00879             if ((iter = mapOne.find(adX[i])) == mapOne.end())
00880                 mapOne[adX[i]] = 1;
00881             else
00882                 iter->second += 1;
00883             iOne++;
00884             if (!CMeta::IsNaN(adY[i])) {
00885                 if ((iterJoint = mapJoint.find(pair<float, float> (adX[i],
00886                         adY[i]))) == mapJoint.end())
00887                     mapJoint[pair<float, float> (adX[i], adY[i])] = 1;
00888                 else
00889                     iterJoint->second += 1;
00890                 iJoint++;
00891             }
00892         }
00893         if (!CMeta::IsNaN(adY[i])) {
00894             if ((iter = mapTwo.find(adY[i])) == mapTwo.end())
00895                 mapTwo[adY[i]] = 1;
00896             else
00897                 iter->second += 1;
00898             iTwo++;
00899         }
00900     }
00901 
00902     for (dRet = 0, iterOne = mapOne.begin(); iterOne != mapOne.end(); ++iterOne) {
00903         dOne = (double) iterOne->second / iOne;
00904         for (iterTwo = mapTwo.begin(); iterTwo != mapTwo.end(); ++iterTwo)
00905             if ((iterJoint = mapJoint.find(pair<float, float> (iterOne->first,
00906                     iterTwo->first))) != mapJoint.end()) {
00907                 dJoint = (double) iterJoint->second / iJoint;
00908                 dRet += dJoint * log(dJoint * iTwo / dOne / iterTwo->second);
00909             }
00910     }
00911     dRet /= log(2.0);
00912     dRet -= (double) max(mapOne.size(), mapTwo.size()) / (2 * max(iOne, iTwo)
00913             * log(2.0));
00914 
00915     return dRet;
00916 }
00917 
00918 double CMeasureRelativeAUC::Measure(const float* adX, size_t iM,
00919         const float* adY, size_t iN, EMap eMap, const float* adWX,
00920         const float* adWY) const {
00921     float dOne, dTwo, dDiff;
00922     size_t i;
00923 
00924     if (iM != iN)
00925         return CMeta::GetNaN();
00926 
00927     dOne = dTwo = dDiff = 0;
00928     for (i = 0; i < iN; ++i) {
00929         if (CMeta::IsNaN(adX[i]) || CMeta::IsNaN(adY[i]))
00930             continue;
00931         dOne += fabs(adX[i]);
00932         dTwo += fabs(adY[i]);
00933         dDiff += fabs(adX[i] - adY[i]);
00934     }
00935 
00936     return (1 - (dDiff / (dOne + dTwo)));
00937 }
00938 
00939 double CMeasurePearsonSignificance::Measure(const float* adX, size_t iM,
00940         const float* adY, size_t iN, EMap eMap, const float* adWX,
00941         const float* adWY) const {
00942     double dRet, dPearson;
00943     size_t iCount;
00944 
00945     if (CMeta::IsNaN(dPearson = CMeasurePearson::Pearson(adX, iM, adY, iN,
00946             EMapNone, adWX, adWY, &iCount)))
00947         return CMeta::GetNaN();
00948 
00949     dRet = (iCount < 2) ? 0 : CStatistics::TCDF(dPearson * sqrt(
00950             (double) (iCount - 2)) / sqrt(1 - (dPearson * dPearson)), iCount
00951             - 2);
00952     switch (eMap) {
00953     case EMapCenter:
00954         dRet = (dPearson > 0) ? ((dRet / 2) + 0.5) : (0.5 - (dRet / 2));
00955         break;
00956 
00957     case EMapNone:
00958         if (dPearson < 0)
00959             dRet *= -1;
00960         break;
00961     }
00962 
00963     return dRet;
00964 }
00965 
00966 double CMeasureDice::Measure( const float* adX, size_t iM, const float* adY,
00967     size_t iN, EMap eMap, const float* adWX, const float* adWY ) const {
00968     double  dDot, dXY, dYX, dX, dY;
00969     size_t  i;
00970 
00971     dDot = dXY = dYX = 0;
00972     for( i = 0; i < min(iM, iN); ++i ) {
00973         if( CMeta::IsNaN( dX = adX[i] ) || CMeta::IsNaN( dY = adY[i] ) )
00974             continue;
00975         dX *= GetWeight( adWX, i );
00976         dY *= GetWeight( adWY, i );
00977         dDot += dX * dY;
00978         dXY += pow( max(0.0, dX - dY), 2 );
00979         dYX += pow( max(0.0, dY - dX), 2 ); }
00980     dX = dDot + ( m_dAlpha * dXY ) + ( ( 1 - m_dAlpha ) * dYX );
00981     return ( dX ? ( dDot / dX ) : CMeta::GetNaN( ) ); }
00982 
00983 
00984 
00985 
00986 
00987 
00988 
00989 
00990 
00991 }
00992 
00993 
00994