Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "seekwriter.h" 00023 #include "seekreader.h" 00024 00025 namespace Sleipnir { 00026 00027 //mat a symmetric matrix 00028 bool CSeekWriter::ReadSparseMatrix(const char *fileName, 00029 vector<map<utype,float> > &mat, CSeekIntIntMap &m, 00030 const int maxRank, const float rbp_p, 00031 const vector<string> &vecstrGenes){ 00032 00033 FILE *f = fopen(fileName, "rb"); 00034 if(f==NULL){ 00035 cerr << "File not found" << endl; 00036 return false; 00037 } 00038 00039 utype numGenes = 0; 00040 utype numPresent = 0; 00041 utype i, j; 00042 int ret; 00043 mat.clear(); 00044 00045 mat.resize(vecstrGenes.size()); 00046 for(i=0; i<vecstrGenes.size(); i++) 00047 mat[i] = map<utype,float>(); 00048 00049 //m need to be initialized to size vecstrGenes.size() first! 00050 ret = fread((char*) (&numPresent), 1, sizeof(numPresent), f); 00051 for(j=0; j<numPresent; j++){ 00052 utype val; 00053 ret = fread((char*)(&val), 1, sizeof(val), f); 00054 m.Add(val); 00055 } 00056 00057 ret = fread((char*) (&numGenes), 1, sizeof(numGenes), f); 00058 00059 vector<float> rbp_score; 00060 rbp_score.resize(maxRank); 00061 for(i=0; i<maxRank; i++) 00062 rbp_score[i] = (1.0 - rbp_p) * pow(rbp_p, i); 00063 00064 for(i=0; i<numGenes; i++){ 00065 utype id, id2; 00066 unsigned short numEntries; 00067 unsigned short val; 00068 ret = fread((char*)(&id), 1, sizeof(id), f); 00069 ret = fread((char*)(&numEntries), 1, sizeof(numEntries), f); 00070 for(j=0; j<numEntries; j++){ 00071 ret = fread((char*)(&id2),1,sizeof(id2),f); 00072 ret = fread((char*)(&val),1,sizeof(val),f); 00073 utype first = id; 00074 utype second = id2; 00075 if(first>=second){ 00076 first = id2; 00077 second = id; 00078 } 00079 mat[first][second] = rbp_score[val]; 00080 } 00081 } 00082 fclose(f); 00083 00084 utype ii, jj; 00085 const vector<utype> &allRGenes = m.GetAllReverse(); 00086 fprintf(stderr, "Begin calculating row sum\n"); 00087 vector<float> vecSum; 00088 CSeekTools::InitVector(vecSum, vecstrGenes.size(), (float) 0); 00089 for(ii=0; ii<m.GetNumSet(); ii++){ 00090 i = allRGenes[ii]; 00091 map<utype,float>::iterator it; 00092 for(it=mat[i].begin(); it!=mat[i].end(); it++){ 00093 j = it->first; 00094 float second = it->second; 00095 vecSum[i] += second; 00096 vecSum[j] += second; 00097 } 00098 } 00099 00100 vector<float> vecSqrtSum; 00101 CSeekTools::InitVector(vecSqrtSum, vecstrGenes.size(), (float) 0); 00102 00103 for(ii=0; ii<m.GetNumSet(); ii++){ 00104 i = allRGenes[ii]; 00105 if(vecSum[i]==0) continue; 00106 vecSqrtSum[i] = sqrtf(vecSum[i]); 00107 } 00108 00109 fprintf(stderr, "Begin normalization using row sum\n"); 00110 for(ii=0; ii<m.GetNumSet(); ii++){ 00111 i = allRGenes[ii]; 00112 map<utype,float>::iterator it; 00113 for(it=mat[i].begin(); it!=mat[i].end(); it++){ 00114 j = it->first; 00115 if(vecSqrtSum[i]==0 || vecSqrtSum[j]==0) continue; 00116 it->second = it->second / vecSqrtSum[i] / vecSqrtSum[j]; 00117 //symmetric matrix 00118 mat[j][i] = it->second; 00119 } 00120 } 00121 return true; 00122 } 00123 00124 //add this matrix with weight w 00125 bool CSeekWriter::SumSparseMatrix(CSparseFlatMatrix<float> &mat1, 00126 CSparseFlatHalfMatrix<float> &res, const CSeekIntIntMap &mi, const float w){ 00127 utype i, j, ii, jj; 00128 //suppose res is already initialized 00129 const vector<utype> &allR = mi.GetAllReverse(); 00130 for(ii=0; ii<mi.GetNumSet(); ii++){ 00131 i = allR[ii]; 00132 vector<CPair<float> >::iterator row_it; 00133 vector<CPair<float> > create; 00134 for(row_it = mat1.RowBegin(i); row_it!=mat1.RowEnd(i); row_it++){ 00135 utype j = row_it->i; 00136 float rv = row_it->v; 00137 if(j<=i) continue; //only add if j is greater than i 00138 CPair<float> *pp = res.GetElement(i,j); 00139 if(pp==NULL){ 00140 CPair<float> cp; 00141 cp.i = j; 00142 cp.v = rv; 00143 create.push_back(cp); 00144 continue; 00145 } 00146 pp->v += rv * w; 00147 } 00148 for(jj=0; jj<create.size(); jj++) 00149 res.Add(i, create[jj].i, create[jj].v * w); 00150 if(create.size()>0) 00151 res.SortRow(i); 00152 } 00153 return true; 00154 } 00155 00156 bool CSeekWriter::SumSparseMatrix(CSparseFlatMatrix<float> &mat1, 00157 CHalfMatrix<float> &res, const CSeekIntIntMap &mi, const float w){ 00158 utype i, ii; 00159 //suppose res is already initialized 00160 const vector<utype> &allR = mi.GetAllReverse(); 00161 for(ii=0; ii<mi.GetNumSet(); ii++){ 00162 i = allR[ii]; 00163 vector<CPair<float> >::iterator row_it; 00164 for(row_it = mat1.RowBegin(i); row_it!=mat1.RowEnd(i); row_it++){ 00165 utype j = row_it->i; 00166 float rv = row_it->v; 00167 if(j<=i) continue; //only add if j is greater than i 00168 res.Set(i, j, res.Get(i, j) + rv * w); 00169 } 00170 } 00171 return true; 00172 } 00173 //Calculate the similarity of two distance matrices 00174 //by simply taking product of two matrix for corresponding entries 00175 bool CSeekWriter::ProductNorm(const vector<map<utype,float> > &mat1, 00176 const vector<map<utype,float> > &mat2, const CSeekIntIntMap &m1, 00177 const CSeekIntIntMap &m2, vector<map<utype,float> > &re){ 00178 00179 utype ii, jj; 00180 utype i, j; 00181 00182 re.resize(mat1.size()); 00183 for(i=0; i<mat1.size(); i++) 00184 re[i] = map<utype,float>(); 00185 00186 const vector<utype> &allRGenes1 = m1.GetAllReverse(); 00187 CSeekIntIntMap mi(mat1.size()); 00188 for(ii=0; ii<m1.GetNumSet(); ii++){ 00189 i = allRGenes1[ii]; 00190 if(CSeekTools::IsNaN(m2.GetForward(i))) continue; 00191 mi.Add(i); 00192 } 00193 00194 const vector<utype> &allR = mi.GetAllReverse(); 00195 fprintf(stderr, "Begin calculating row sum\n"); 00196 vector<float> vecSum; 00197 CSeekTools::InitVector(vecSum, mat1.size(), (float) 0); 00198 for(ii=0; ii<mi.GetNumSet(); ii++){ 00199 i = allR[ii]; 00200 map<utype,float>::const_iterator it; 00201 for(it=mat1[i].begin(); it!=mat1[i].end(); it++){ 00202 j = it->first; 00203 float f1 = it->second; 00204 map<utype,float>::const_iterator it2; 00205 if((it2 = mat2[i].find(j))==mat2[i].end()) continue; 00206 float f2 = it2->second; 00207 re[i][j] = sqrtf(f1*f2); 00208 vecSum[i] += re[i][j]; 00209 vecSum[j] += re[i][j]; 00210 } 00211 } 00212 00213 vector<float> vecSqrtSum; 00214 CSeekTools::InitVector(vecSqrtSum, mat1.size(), (float)0); 00215 for(ii=0; ii<mi.GetNumSet(); ii++){ 00216 i = allR[ii]; 00217 if(vecSum[i]==0) continue; 00218 vecSqrtSum[i] = sqrtf(vecSum[i]); 00219 } 00220 00221 fprintf(stderr, "Begin normalization using row sum\n"); 00222 for(ii=0; ii<mi.GetNumSet(); ii++){ 00223 i = allR[ii]; 00224 map<utype,float>::iterator it; 00225 for(it=re[i].begin(); it!=re[i].end(); it++){ 00226 j = it->first; 00227 if(vecSqrtSum[i]==0 || vecSqrtSum[j]==0) continue; 00228 it->second = it->second / vecSqrtSum[i] / vecSqrtSum[j]; 00229 } 00230 } 00231 return true; 00232 } 00233 00234 bool CSeekWriter::TopologicalOverlap(CDataPair &Dat, 00235 const vector<string> &vecstrGenes){ 00236 size_t i, j; 00237 vector<unsigned int> veciGenes; 00238 veciGenes.resize(vecstrGenes.size()); 00239 for(i=0; i<vecstrGenes.size(); i++) 00240 veciGenes[i] = (unsigned int) Dat.GetGeneIndex(vecstrGenes[i]); 00241 00242 unsigned int s,t; 00243 float d; 00244 CSeekIntIntMap m(vecstrGenes.size()); 00245 for(i=0; i<vecstrGenes.size(); i++){ 00246 if((s=veciGenes[i])==(unsigned int)-1) continue; 00247 m.Add(i); 00248 } 00249 00250 size_t trueSize = m.GetNumSet(); 00251 vector<float> inner(trueSize); 00252 vector<vector<float> > fs(trueSize, inner); 00253 00254 //float* fs = new float[trueSize*trueSize]; 00255 const vector<utype> &allRGenes = m.GetAllReverse(); 00256 00257 for(i=0; i<m.GetNumSet(); i++){ 00258 s = veciGenes[allRGenes[i]]; 00259 for(j=i+1; j<m.GetNumSet(); j++){ 00260 t = veciGenes[allRGenes[j]]; 00261 if(CMeta::IsNaN(d = Dat.Get(s,t))){ 00262 fs[i][j] = 0; 00263 fs[j][i] = 0; 00264 fprintf(stderr, "Warning i, j is NaN, set to 0!\n", i, j); 00265 }else{ 00266 fs[i][j] = d; 00267 fs[j][i] = d; 00268 } 00269 } 00270 fs[i][i] = 0; 00271 } 00272 00273 //first step: transform z-scores back to correlation 00274 //(exp(z*2) = (1+r)/(1-r) -> exp(2z) - exp(2z)*r = 1+r -> exp(2z) - 1 = exp(2z)*r + r = (exp(2z) + 1)*r-> 00275 //(exp(2z) - 1) / (exp(2z) + 1) = r 00276 //second step: transform by abs, then take it to the exponent 9 00277 for(i=0; i<m.GetNumSet(); i++){ 00278 for(j=i+1; j<m.GetNumSet(); j++){ 00279 if((d = fs[i][j])==0) continue; 00280 d = (expf(2.0*d) - 1.0) / (expf(2.0*d) + 1.0); 00281 d = pow(abs(d), 9); 00282 fs[i][j] = d; 00283 fs[j][i] = d; 00284 //fprintf(stderr, "%.3e\n", d); 00285 } 00286 } 00287 00288 fprintf(stderr, "Finished step 1: tranform z-score back to pearson\n"); 00289 vector<float> vecSum; 00290 CSeekTools::InitVector(vecSum, trueSize, CMeta::GetNaN()); 00291 for(i=0; i<m.GetNumSet(); i++) 00292 vecSum[i] = 0; 00293 00294 for(i=0; i<m.GetNumSet(); i++){ 00295 for(j=i+1; j<m.GetNumSet(); j++){ 00296 vecSum[i] += fs[i][j]; 00297 vecSum[j] += fs[i][j]; 00298 } 00299 } 00300 00301 //duplicate of fs 00302 vector<vector<float> > fs2(trueSize, inner); 00303 for(i=0; i<m.GetNumSet(); i++){ 00304 for(j=i+1; j<m.GetNumSet(); j++){ 00305 fs2[i][j] = fs[i][j]; 00306 fs2[j][i] = fs[i][j]; 00307 } 00308 fs2[i][i] = 0; 00309 } 00310 00311 00312 //temporary storage matrix 00313 CHalfMatrix<float> res; 00314 res.Initialize(trueSize); 00315 for(i=0; i<m.GetNumSet(); i++){ 00316 for(j=i+1; j<m.GetNumSet(); j++){ 00317 res.Set(i, j, 0); 00318 } 00319 } 00320 00321 //result of multiplication 00322 fprintf(stderr, "Begin!\n"); 00323 vector<vector<float> > fs_result(trueSize, inner); 00324 CStrassen::strassen(fs, fs2, fs_result, trueSize); 00325 fprintf(stderr, "Done!\n"); 00326 00327 size_t k; 00328 unsigned int u; 00329 //size_t ii = 0; 00330 for(i=0; i<m.GetNumSet(); i++){ 00331 for(j=i+1; j<m.GetNumSet(); j++){ 00332 float tsum = fs_result[i][j]; 00333 /*float *pi = &fs[i*trueSize]; 00334 float *pj = &fs[j*trueSize]; 00335 for(k=0; k<m.GetNumSet(); k++){ 00336 tsum += pi[k] * pj[k]; 00337 }*/ 00338 tsum -= fs[i][i] * fs[j][i]; 00339 tsum -= fs[i][j] * fs[j][j]; 00340 float tmin = 0; 00341 if(vecSum[i] < vecSum[j]) 00342 tmin = vecSum[i]; 00343 else 00344 tmin = vecSum[j]; 00345 float to = (tsum + d) / (tmin + 1.0 - d); 00346 res.Set(i, j, (float) to); //temporary matrix to store the results 00347 //fprintf(stderr, "%.3e\n", to); 00348 } 00349 if(i%100==0){ 00350 fprintf(stderr, "Doing topological overlap calculation, current %d\n", i); 00351 } 00352 } 00353 00354 fprintf(stderr, "Finished step 2: topological overlap calculation\n"); 00355 00356 for(i=0; i<m.GetNumSet(); i++){ 00357 s = veciGenes[allRGenes[i]]; 00358 for(j=i+1; j<m.GetNumSet(); j++){ 00359 t = veciGenes[allRGenes[j]]; 00360 Dat.Set(s, t, res.Get(i, j)); 00361 } 00362 Dat.Set(s, s, 1.0); 00363 } 00364 00365 } 00366 00367 00368 bool CSeekWriter::NormalizeDAB(CDataPair &Dat, 00369 const vector<string> &vecstrGenes, 00370 //bool cutoff, float cutoff_val, 00371 bool expTransform, bool divideNorm, bool subtractNorm){ 00372 //default cutoff_val is 0 00373 00374 size_t i, j; 00375 vector<unsigned int> veciGenes; 00376 veciGenes.clear(); 00377 veciGenes.resize(vecstrGenes.size()); 00378 for(i=0; i<vecstrGenes.size(); i++) 00379 veciGenes[i] = (unsigned int) Dat.GetGeneIndex(vecstrGenes[i]); 00380 00381 vector<float> vecSum; 00382 vector<int> vecNum; 00383 CSeekTools::InitVector(vecSum, vecstrGenes.size(), CMeta::GetNaN()); 00384 CSeekTools::InitVector(vecNum, vecstrGenes.size(), (int)-9999); 00385 00386 unsigned int s,t; 00387 for(i=0; i<vecstrGenes.size(); i++){ 00388 if((s=veciGenes[i])==(unsigned int)-1) continue; 00389 vecSum[i] = 0; 00390 vecNum[i] = 0; 00391 } 00392 00393 if(divideNorm && subtractNorm){ 00394 fprintf(stderr, "Error: both divideNorm and subtractNorm are true\n"); 00395 return false; 00396 }else if(!divideNorm && !subtractNorm){ 00397 fprintf(stderr, "Error: both divideNorm and subtractNorm are false\n"); 00398 return false; 00399 } 00400 00401 float d = -1; 00402 float r = -1; 00403 for(i=0; i<vecstrGenes.size(); i++){ 00404 if((s=veciGenes[i])==(unsigned int)-1) continue; 00405 for(j=i+1; j<vecstrGenes.size(); j++){ 00406 if((t=veciGenes[j])==(unsigned int)-1) continue; 00407 if(CMeta::IsNaN(d = Dat.Get(s,t))) continue; 00408 /*if(cutoff){ 00409 if(d>cutoff_val){ 00410 if(expTransform) 00411 r = expf(-1.0*d*d/2.0); 00412 else 00413 r = d; 00414 vecSum[i] += r; 00415 vecSum[j] += r; 00416 vecNum[i]++; 00417 vecNum[j]++; 00418 } 00419 } 00420 else{*/ 00421 //fprintf(stderr, "Warning: Negative Z-Scores"); 00422 if(expTransform) 00423 r = expf(-1.0*d*d/2.0); 00424 else 00425 r = d; 00426 vecSum[i] += r; 00427 vecSum[j] += r; 00428 vecNum[i]++; 00429 vecNum[j]++; 00430 //} 00431 } 00432 } 00433 00434 for(i=0; i<vecstrGenes.size(); i++){ 00435 if((s=veciGenes[i])==(unsigned int)-1) continue; 00436 for(j=i+1; j<vecstrGenes.size(); j++){ 00437 if((t=veciGenes[j])==(unsigned int)-1) continue; 00438 if(CMeta::IsNaN(d = Dat.Get(s,t))) continue; 00439 /*if(cutoff){ 00440 if(d>cutoff_val){ 00441 if(expTransform){ 00442 if(divideNorm) 00443 r=expf(-1.0*d*d/2.0)/sqrtf(vecSum[i])/sqrtf(vecSum[j]); 00444 else if(subtractNorm) 00445 r=expf(-1.0*d*d/2.0)-vecSum[i]/vecNum[i]-vecSum[j]/vecNum[j]; 00446 }else{ 00447 if(divideNorm) 00448 r=d/sqrtf(vecSum[i])/sqrtf(vecSum[j]); 00449 else if(subtractNorm) 00450 r=d-vecSum[i]/vecNum[i]-vecSum[j]/vecNum[j]; 00451 } 00452 }else{ 00453 r=0; //default value 00454 } 00455 Dat.Set(s, t, r); 00456 } 00457 else{*/ 00458 if(expTransform){ 00459 if(divideNorm) 00460 r=expf(-1.0*d*d/2.0)/sqrtf(vecSum[i])/sqrtf(vecSum[j]); 00461 else if(subtractNorm) 00462 r=expf(-1.0*d*d/2.0)-vecSum[i]/vecNum[i]-vecSum[j]/vecNum[j]; 00463 }else{ 00464 if(divideNorm){ 00465 //DANGEROUS 00466 if(vecSum[i]<=0){ 00467 fprintf(stderr, "Warning, divide sqrt(z), when z<=0\n"); 00468 r=0; //default value 00469 }else 00470 r=d/sqrtf(vecSum[i])/sqrtf(vecSum[j]); 00471 }else if(subtractNorm){ 00472 r=d-0.5*(vecSum[i]/vecNum[i]+vecSum[j]/vecNum[j]); 00473 } 00474 } 00475 Dat.Set(s, t, r); 00476 //} 00477 } 00478 } 00479 00480 //Plot a distribution 00481 /*vector<unsigned long> bins; 00482 bins.resize(55); 00483 float upper = 5.0; //assume z scores 00484 float lower = -5.0; 00485 float bin_size = (upper - lower) / 50; 00486 for(i=0; i<55; i++) 00487 bins[i] = 0; 00488 for(i=0; i<Dat.GetGenes(); i++){ 00489 for(j=i+1; j<Dat.GetGenes(); j++){ 00490 d = Dat.Get(i,j); 00491 if(CMeta::IsNaN(d)) continue; 00492 int b = (int) ((d - lower) / bin_size); 00493 if(b<0){ 00494 bins[0]++; 00495 continue; 00496 } 00497 if(b>=55){ 00498 bins[54]++; 00499 continue; 00500 } 00501 bins[b]++; 00502 } 00503 } 00504 fprintf(stderr, 00505 "Distances: bin size: %.5f, num of bins: %d, min bin val: %.5f, max bin val: %.5f\n", 00506 bin_size, 55, lower, upper); 00507 for(i=0; i<55; i++){ 00508 fprintf(stderr, "%lu\t%lu\n", i, bins[i]); 00509 } 00510 */ 00511 return true; 00512 } 00513 00514 bool CSeekWriter::GetGeneAverage(CDataPair &Dat, 00515 const vector<string> &vecstrGenes, 00516 vector<float> &vecResult, bool logit, float top_percent){ 00517 00518 /* assume datapair is already opened */ 00519 utype i, j; 00520 vector<utype> veciGenes; 00521 veciGenes.clear(); 00522 veciGenes.resize(vecstrGenes.size()); 00523 for( i = 0; i < vecstrGenes.size( ); ++i ) 00524 veciGenes[ i ] = Dat.GetGene( vecstrGenes[i] ); 00525 00526 CSeekTools::InitVector(vecResult, vecstrGenes.size(), CMeta::GetNaN()); 00527 for(i=0; i<vecstrGenes.size(); i++){ 00528 utype s = veciGenes[i]; 00529 if(CSeekTools::IsNaN(s)) continue; 00530 float *v = Dat.GetFullRow(s); 00531 float sum = 0; 00532 utype num = 0; 00533 vector<float> all; 00534 for(j=0; j<vecstrGenes.size(); j++){ 00535 utype t = veciGenes[j]; 00536 if(CSeekTools::IsNaN(t)) continue; 00537 if(CMeta::IsNaN(v[t])) continue; 00538 if(logit){ 00539 //sum+=log(v[t]) - log((float) (1.0-v[t])); 00540 all.push_back(log(v[t]) - log((float) (1.0-v[t]))); 00541 }else{ 00542 //sum+=v[t]; 00543 all.push_back(v[t]); 00544 } 00545 //num++; 00546 } 00547 sort(all.begin(), all.end()); 00548 int top_start = (int) (((float)1.0 - top_percent)*(float)all.size()); 00549 00550 if(top_start<0){ 00551 top_start = 0; 00552 } 00553 for(j=top_start; j<all.size(); j++){ 00554 sum+=all[j]; 00555 num++; 00556 } 00557 vecResult[i] = sum / (float) num; 00558 //fprintf(stderr, "%.2f\n", vecResult[i]); 00559 free(v); 00560 } 00561 return true; 00562 } 00563 00564 bool CSeekWriter::GetGenePresence(CDataPair &Dat, 00565 const vector<string> &vecstrGenes, 00566 vector<char> &vecResult){ 00567 /* assume datapair is already opened */ 00568 utype i, j; 00569 vector<utype> veciGenes; 00570 veciGenes.clear(); 00571 veciGenes.resize(vecstrGenes.size()); 00572 for( i = 0; i < vecstrGenes.size( ); ++i ) 00573 veciGenes[ i ] = Dat.GetGene( vecstrGenes[i] ); 00574 00575 CSeekTools::InitVector(vecResult, vecstrGenes.size(), (char) 0); 00576 00577 for(i=0; i<vecstrGenes.size(); i++){ 00578 if(CSeekTools::IsNaN(veciGenes[i])) continue; 00579 vecResult[i]=1; 00580 } 00581 return true; 00582 } 00583 00584 bool CSeekWriter::GetDatasetSinfo(CDataPair &Dat, 00585 float &mean, float &stdev){ 00586 utype i, j; 00587 mean = CMeta::GetNaN(); 00588 stdev = CMeta::GetNaN(); 00589 00590 utype iGenes = Dat.GetGenes(); 00591 00592 unsigned int num = 0; 00593 float sum = 0; 00594 00595 for(i=0; i<iGenes; i++){ 00596 utype s = i; 00597 float *v = Dat.GetFullRow(s); 00598 for(j=0; j<iGenes; j++){ 00599 if(CMeta::IsNaN(v[j])) continue; 00600 sum+=v[j]; 00601 num++; 00602 } 00603 } 00604 00605 if(num==0) return true; 00606 00607 mean = sum / (float) num; 00608 float diff = 0; 00609 for(i=0; i<iGenes; i++){ 00610 utype s = i; 00611 float *v = Dat.GetFullRow(s); 00612 for(j=0; j<iGenes; j++){ 00613 if(CMeta::IsNaN(v[j])) continue; 00614 diff += (v[j] - mean) * (v[j] - mean); 00615 } 00616 } 00617 diff /= (float) num; 00618 stdev = sqrt(diff); 00619 00620 return true; 00621 } 00622 00623 }