Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 00024 static int MainDABs( const gengetopt_args_info& ); 00025 static int MainDATs( const gengetopt_args_info& ); 00026 static int MainRevDATs( const gengetopt_args_info& ); 00027 static int MainPCLs( const gengetopt_args_info& ); 00028 static int MainModules( const gengetopt_args_info& ); 00029 00030 static const TPFnCombiner c_apfnCombiners[] = { MainPCLs, MainDATs, MainDABs, MainModules, MainRevDATs, NULL }; 00031 static const char* c_aszCombiners[] = { "pcl", "dat", "dad", "module", "revdat", NULL }; 00032 static const char c_acDab[] = ".dab"; 00033 static const char c_acDat[] = ".dat"; 00034 static const char c_acQDab[] = ".qdab"; 00035 00036 // store all input file names 00037 vector<string> input_files; 00038 00039 enum EMethod { 00040 EMethodBegin = 0, 00041 EMethodMean = EMethodBegin, 00042 EMethodSum = EMethodMean + 1, 00043 EMethodGMean = EMethodSum + 1, 00044 EMethodHMean = EMethodGMean + 1, 00045 EMethodMax = EMethodHMean + 1, 00046 EMethodMin = EMethodMax + 1, 00047 EMethodDiff = EMethodMin + 1, 00048 EMethodMeta = EMethodDiff + 1, 00049 EMethodQMeta = EMethodMeta + 1, 00050 EMethodEnd = EMethodQMeta + 1 00051 }; 00052 00053 static const char* c_aszMethods[] = { 00054 "mean", "sum", "gmean", "hmean", "max", "min", "diff", "meta", "qmeta", NULL 00055 }; 00056 00057 int main( int iArgs, char** aszArgs ) { 00058 gengetopt_args_info sArgs; 00059 int iRet; 00060 size_t i; 00061 DIR* dp; 00062 struct dirent* ep; 00063 00064 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00065 cmdline_parser_print_help( ); 00066 return 1; } 00067 CMeta Meta( sArgs.verbosity_arg ); 00068 00069 // now collect the data files from directory if given 00070 if(sArgs.directory_arg){ 00071 dp = opendir (sArgs.directory_arg); 00072 if (dp != NULL){ 00073 while (ep = readdir (dp)){ 00074 // skip . .. files and temp files with ~ 00075 if (ep->d_name[0] == '.' || ep->d_name[strlen(ep->d_name)-1] == '~') 00076 continue; 00077 00078 // currently opens all files. Add filter here if want pick file extensions 00079 input_files.push_back((string)sArgs.directory_arg + "/" + ep->d_name); 00080 } 00081 (void) closedir (dp); 00082 } 00083 else{ 00084 cerr << "Couldn't open the directory: " << sArgs.directory_arg << '\n'; 00085 return 1; 00086 } 00087 } 00088 else{ 00089 input_files.resize( sArgs.inputs_num ); 00090 copy( sArgs.inputs, sArgs.inputs + sArgs.inputs_num, input_files.begin( ) ); 00091 } 00092 00093 for( i = 0; c_aszCombiners[ i ]; ++i ) 00094 if( !strcmp( c_aszCombiners[ i ], sArgs.type_arg ) ) { 00095 iRet = c_apfnCombiners[ i ]( sArgs ); 00096 break; } 00097 00098 return iRet; } 00099 00100 int MainPCLs( const gengetopt_args_info& sArgs ) { 00101 CPCL PCL, PCLNew; 00102 size_t i, j, iArg, iExp, iGene; 00103 vector<string> vecstrGenes, vecstrExps, vecstrFeatures; 00104 set<string> setstrGenes; 00105 set<string>::const_iterator iterGenes; 00106 ifstream ifsm; 00107 ofstream ofsm; 00108 00109 for( iArg = 0; iArg < input_files.size(); ++iArg ) { 00110 ifsm.clear( ); 00111 ifsm.open( input_files[ iArg ].c_str() ); 00112 if( !PCL.Open( ifsm, sArgs.skip_arg ) ) { 00113 cerr << "Could not open: " << input_files[ iArg ] << endl; 00114 return 1; } 00115 if( !iArg ) 00116 for( i = 0; i < PCL.GetFeatures( ); ++i ) 00117 vecstrFeatures.push_back( PCL.GetFeature( i ) ); 00118 for( i = 0; i < PCL.GetExperiments( ); ++i ) 00119 vecstrExps.push_back( PCL.GetExperiment( i ) ); 00120 for( i = 0; i < PCL.GetGenes( ); ++i ) 00121 setstrGenes.insert( PCL.GetGene( i ) ); 00122 ifsm.close( ); } 00123 vecstrGenes.resize( setstrGenes.size( ) ); 00124 copy( setstrGenes.begin( ), setstrGenes.end( ), vecstrGenes.begin( ) ); 00125 00126 PCLNew.Open( vecstrGenes, vecstrExps, vecstrFeatures ); 00127 iExp = 0; 00128 for( iArg = 0; iArg < input_files.size(); ++iArg ) { 00129 cerr << "Processing " << input_files[ iArg ] << "..." << endl; 00130 ifsm.clear( ); 00131 ifsm.open( input_files[ iArg ].c_str() ); 00132 PCL.Open( ifsm, sArgs.skip_arg ); 00133 for( i = 0; i < PCLNew.GetGenes( ); ++i ) 00134 if( ( iGene = PCL.GetGene( vecstrGenes[ i ] ) ) != -1 ) { 00135 if( !iArg ) 00136 for( j = 1; j < PCLNew.GetFeatures( ); ++j ) 00137 PCLNew.SetFeature( i, j, PCL.GetFeature( iGene, j ) ); 00138 for( j = 0; j < PCL.GetExperiments( ); ++j ) 00139 PCLNew.Set( i, iExp + j, PCL.Get( iGene, j ) ); } 00140 iExp += PCL.GetExperiments( ); 00141 ifsm.close( ); } 00142 00143 if( sArgs.output_arg ) { 00144 ofsm.open( sArgs.output_arg ); 00145 PCLNew.Save( ofsm ); 00146 ofsm.close( ); } 00147 else { 00148 PCLNew.Save( cout ); 00149 cout.flush( ); } 00150 00151 return 0; } 00152 00153 struct SCallbackMeta { 00154 CDat m_DatWei; 00155 CDat m_DatYBare; 00156 CDat m_DatQe; 00157 CDat m_DatDeltaeD; 00158 }; 00159 00160 struct SCallbackVars { 00161 size_t m_iDataset; 00162 size_t m_iOne; 00163 size_t m_iTwo; 00164 float m_dValue; 00165 float m_dWeight; 00166 EMethod m_eMethod; 00167 size_t m_iDatasets; 00168 CDat* m_pDatOut; 00169 CDat* m_pDatCur; 00170 CHalfMatrix<float>* m_pMatCounts; 00171 const vector<set<size_t> >* m_pvecsetiGenes; 00172 const vector<vector<size_t> >* m_pvecveciGenes; 00173 SCallbackMeta m_sCallbackMeta; 00174 }; 00175 00176 struct SCallback { 00177 const gengetopt_args_info& m_sArgs; 00178 const CGenes& m_GenesIn; 00179 const CPCL& m_PCLWeights; 00180 const vector<string>& m_vecstrTerms; 00181 const vector<CGenes*>& m_vecpTerms; 00182 SCallbackVars m_sCallbackVars; 00183 00184 SCallback( const gengetopt_args_info& sArgs, const CGenes& GenesIn, const CPCL& PCLWeights, 00185 const vector<string>& vecstrTerms, const vector<CGenes*>& vecpTerms ) : 00186 m_sArgs(sArgs), m_GenesIn(GenesIn), m_PCLWeights(PCLWeights), m_vecstrTerms(vecstrTerms), 00187 m_vecpTerms(vecpTerms) { } 00188 }; 00189 00190 int iterate_inputs(SCallback& sCallback, void (*pfnCallback)( SCallbackVars& ), void (*pfnInitialize)( SCallbackVars& ) = NULL ) { 00191 SCallbackVars& sCallbackVars = sCallback.m_sCallbackVars; 00192 CDat& DatOut = *sCallbackVars.m_pDatOut; 00193 const gengetopt_args_info& sArgs = sCallback.m_sArgs; 00194 const CGenes& GenesIn = sCallback.m_GenesIn; 00195 const CPCL& PCLWeights = sCallback.m_PCLWeights; 00196 const vector<string>& vecstrTerms = sCallback.m_vecstrTerms; 00197 const vector<CGenes*>& vecpTerms = sCallback.m_vecpTerms; 00198 vector<set<size_t> > vecsetiGenes; 00199 vector<vector<size_t> > vecveciGenes; 00200 size_t i, j, k, iOne, iTwo, iA, iB; 00201 CDat DatCur; 00202 float d; 00203 00204 sCallbackVars.m_iDatasets = input_files.size( ); 00205 sCallbackVars.m_pvecveciGenes = &vecveciGenes; 00206 sCallbackVars.m_pvecsetiGenes = &vecsetiGenes; 00207 vecsetiGenes.resize( DatOut.GetGenes( ) ); 00208 for( i = 0; i < input_files.size( ); ++i ) { 00209 if( !DatCur.Open( input_files[ i ].c_str(), !!sArgs.memmap_flag && !sArgs.normalize_flag && !GenesIn.GetGenes( ) ) ) { 00210 cerr << "Couldn't open: " << input_files[ i ] << endl; 00211 return 1; } 00212 sCallbackVars.m_iDataset = i; 00213 sCallbackVars.m_pDatCur = &DatCur; 00214 if( PCLWeights.GetGenes( ) ) { 00215 if( ( j = PCLWeights.GetGene( CMeta::Deextension( CMeta::Basename( input_files[ i ].c_str() ) ) ) ) == -1 ) { 00216 cerr << "Ignoring unweighted graph: " << input_files[ i ] << endl; 00217 continue; } 00218 sCallbackVars.m_dWeight = PCLWeights.Get( j, 0 ); } 00219 else 00220 sCallbackVars.m_dWeight = 1; 00221 cerr << "Opened: " << input_files[ i ] << endl; 00222 if( sArgs.zero_flag ) { 00223 vector<string> vecstrGenes; 00224 00225 for( j = 0; j < DatOut.GetGenes( ); ++j ) 00226 if( DatCur.GetGene( DatOut.GetGene( j ) ) == -1 ) 00227 vecstrGenes.push_back( DatOut.GetGene( j ) ); 00228 DatCur.AddGenes( vecstrGenes ); 00229 for( j = 0; j < DatCur.GetGenes( ); ++j ) 00230 for( k = ( j + 1 ); k < DatCur.GetGenes( ); ++k ) 00231 if( CMeta::IsNaN( DatCur.Get( j, k ) ) ) 00232 DatCur.Set( j, k, 0 ); } 00233 if( sArgs.normalize_flag ) 00234 DatCur.Normalize( CDat::ENormalizeZScore ); 00235 if( sArgs.quantiles_arg > 0 ) 00236 DatCur.NormalizeQuantiles( sArgs.quantiles_arg ); 00237 if( GenesIn.GetGenes( ) ) 00238 DatCur.FilterGenes( GenesIn, CDat::EFilterInclude ); 00239 vecveciGenes.resize( DatCur.GetGenes( ) ); 00240 for( j = 0; j < vecveciGenes.size( ); ++j ) 00241 vecveciGenes[j].clear( ); 00242 for( j = 0; j < DatOut.GetGenes( ); ++j ) { 00243 vecsetiGenes[j].clear( ); 00244 if( vecstrTerms.empty( ) ) { 00245 if( ( iOne = DatCur.GetGene( DatOut.GetGene( j ) ) ) != -1 ) 00246 vecveciGenes[iOne].push_back( j ); } 00247 else 00248 for( k = 0; k < vecpTerms[j]->GetGenes( ); ++k ) 00249 if( ( iOne = DatCur.GetGene( vecpTerms[j]->GetGene( k ).GetName( ) ) ) != -1 ) { 00250 vecveciGenes[iOne].push_back( j ); 00251 vecsetiGenes[j].insert( iOne ); } } 00252 if( pfnInitialize ) 00253 pfnInitialize( sCallbackVars ); 00254 for( j = 0; j < DatCur.GetGenes( ); ++j ) 00255 for( k = ( j + 1 ); k < DatCur.GetGenes( ); ++k ) { 00256 if( CMeta::IsNaN( d = DatCur.Get( j, k ) ) ) 00257 continue; 00258 sCallbackVars.m_dValue = d; 00259 for( iA = 0; iA < vecveciGenes[j].size( ); ++iA ) { 00260 iOne = vecveciGenes[j][iA]; 00261 if( vecsetiGenes[iOne].find( k ) != vecsetiGenes[iOne].end( ) ) 00262 continue; 00263 sCallbackVars.m_iOne = iOne; 00264 for( iB = 0; iB < vecveciGenes[k].size( ); ++iB ) { 00265 iTwo = vecveciGenes[k][iB]; 00266 if( vecsetiGenes[iTwo].find( j ) != vecsetiGenes[iTwo].end( ) ) 00267 continue; 00268 sCallbackVars.m_iTwo = iTwo; 00269 pfnCallback( sCallbackVars ); } } } } 00270 00271 return 0; } 00272 00273 float weight_weistar( const SCallbackVars& sCallback ) { 00274 const SCallbackMeta& sCallbackMeta = sCallback.m_sCallbackMeta; 00275 float dDelta2; 00276 00277 if( dDelta2 = sCallbackMeta.m_DatDeltaeD.Get( sCallback.m_iOne, sCallback.m_iTwo ) ) { 00278 if( ( dDelta2 = ( sCallbackMeta.m_DatQe.Get( sCallback.m_iOne, sCallback.m_iTwo ) - 00279 sCallback.m_iDatasets + 1 ) / dDelta2 ) < 0 ) 00280 dDelta2 = 0; } 00281 /* 00282 if( ( (float)rand( ) / RAND_MAX ) < 0.000001 ) 00283 cerr << ( 1 / ( ( 1 / sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ) ) + dDelta2 ) ) << '\t' << 00284 sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ) << '\t' << dDelta2 << endl; 00285 //*/ 00286 return ( 1 / ( ( 1 / sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ) ) + dDelta2 ) ); } 00287 00288 void callback_andersondarling( SCallbackVars& sCallback ) { 00289 static const float c_dFrac = 0.001f; 00290 vector<float> vecdValues; 00291 size_t i, j, iGenes; 00292 float d; 00293 00294 iGenes = sCallback.m_pDatCur->GetGenes( ); 00295 vecdValues.reserve( (size_t)( iGenes * iGenes * c_dFrac ) ); 00296 for( i = 0; i < iGenes; ++i ) 00297 for( j = ( i + 1 ); j < iGenes; ++j ) 00298 if( !CMeta::IsNaN( d = sCallback.m_pDatCur->Get( i, j ) ) && 00299 ( ( (float)rand( ) / RAND_MAX ) < c_dFrac ) ) 00300 vecdValues.push_back( d ); 00301 sCallback.m_dWeight = (float)( log( CStatistics::AndersonDarlingScore<float>( 00302 vecdValues.begin( ), vecdValues.end( ) ) ) / log( 2.0 ) ); } 00303 00304 void callback_combine( SCallbackVars& sCallback ) { 00305 00306 if( sCallback.m_eMethod == EMethodMeta ) 00307 sCallback.m_dWeight = weight_weistar( sCallback ); 00308 switch( sCallback.m_eMethod ) { 00309 case EMethodGMean: 00310 sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) *= pow( sCallback.m_dValue, sCallback.m_dWeight ); 00311 sCallback.m_pMatCounts->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallback.m_dWeight; 00312 break; 00313 00314 case EMethodHMean: 00315 sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallback.m_dWeight / sCallback.m_dValue; 00316 sCallback.m_pMatCounts->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallback.m_dWeight; 00317 break; 00318 00319 case EMethodMax: 00320 if( sCallback.m_dValue > sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) ) 00321 sCallback.m_pDatOut->Set( sCallback.m_iOne, sCallback.m_iTwo, sCallback.m_dValue ); 00322 break; 00323 00324 case EMethodMin: 00325 if( sCallback.m_dValue < sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) ) 00326 sCallback.m_pDatOut->Set( sCallback.m_iOne, sCallback.m_iTwo, sCallback.m_dValue ); 00327 break; 00328 00329 default: 00330 sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallback.m_dWeight * sCallback.m_dValue * 00331 ( ( sCallback.m_iDataset && ( sCallback.m_eMethod == EMethodDiff ) ) ? -1 : 1 ); 00332 sCallback.m_pMatCounts->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallback.m_dWeight; } } 00333 00334 void callback_wei( SCallbackVars& sCallback ) { 00335 static const float c_dFrac = 0.001f; 00336 SCallbackMeta& sCallbackMeta = sCallback.m_sCallbackMeta; 00337 const vector<vector<size_t> >& vecveciGenes = *sCallback.m_pvecveciGenes; 00338 const vector<set<size_t> >& vecsetiGenes = *sCallback.m_pvecsetiGenes; 00339 size_t i, j, iOne, iTwo, iA, iB, iGenes; 00340 float d, dSum, dSumSqs; 00341 vector<float> vecdSums, vecdSumSqs, vecdValues; 00342 00343 iGenes = sCallback.m_pDatCur->GetGenes( ); 00344 vecdValues.reserve( (size_t)( iGenes * ( iGenes - 1 ) * c_dFrac ) ); 00345 vecdSums.resize( sCallback.m_pDatOut->GetGenes( ) ); 00346 fill( vecdSums.begin( ), vecdSums.end( ), 0.0f ); 00347 vecdSumSqs.resize( sCallback.m_pDatOut->GetGenes( ) ); 00348 fill( vecdSumSqs.begin( ), vecdSumSqs.end( ), 0.0f ); 00349 dSum = dSumSqs = 0; 00350 for( i = 0; i < iGenes; ++i ) 00351 for( j = ( i + 1 ); j < iGenes; ++j ) { 00352 if( CMeta::IsNaN( d = sCallback.m_pDatCur->Get( i, j ) ) ) 00353 continue; 00354 for( iA = 0; iA < vecveciGenes[i].size( ); ++iA ) { 00355 iOne = vecveciGenes[i][iA]; 00356 if( vecsetiGenes[iOne].find( j ) != vecsetiGenes[iOne].end( ) ) 00357 continue; 00358 for( iB = 0; iB < vecveciGenes[j].size( ); ++iB ) { 00359 iTwo = vecveciGenes[j][iB]; 00360 if( vecsetiGenes[iTwo].find( i ) != vecsetiGenes[iTwo].end( ) ) 00361 continue; 00362 if( !( iA || iB ) && ( ( (float)rand( ) / RAND_MAX ) < c_dFrac ) ) 00363 vecdValues.push_back( d ); 00364 dSum += d; 00365 vecdSums[iOne] += d; 00366 vecdSums[iTwo] += d; 00367 d *= d; 00368 dSumSqs += d; 00369 vecdSumSqs[iOne] += d; 00370 vecdSumSqs[iTwo] += d; } } } 00371 i = iGenes * ( iGenes - 1 ) / 2; 00372 dSum /= i; 00373 dSumSqs = ( dSumSqs / ( i - 1 ) ) - ( dSum * dSum ); 00374 for( i = 0; i < vecdSums.size( ); ++i ) { 00375 d = ( vecdSums[i] /= iGenes - 1 ); 00376 vecdSumSqs[i] = ( vecdSumSqs[i] / ( iGenes - 2 ) ) - ( d * d ); } 00377 00378 d = (float)( log( CStatistics::AndersonDarlingScore<float>( vecdValues.begin( ), vecdValues.end( ) ) ) / 00379 log( 2.0 ) ); 00380 sCallbackMeta.m_DatWei.Open( sCallback.m_pDatOut->GetGeneNames( ) ); 00381 sCallbackMeta.m_DatWei.Clear( 0 ); 00382 for( i = 0; i < vecdSumSqs.size( ); ++i ) 00383 for( j = ( i + 1 ); j < vecdSumSqs.size( ); ++j ) 00384 sCallbackMeta.m_DatWei.Set( i, j, 00385 // 2 / ( vecdSumSqs[i] + vecdSumSqs[j] ) // pooled variance of adjacent genes 00386 // 3 / ( vecdSumSqs[i] + vecdSumSqs[j] + dSumSqs ) // unweighted pool of adjanced genes + whole network 00387 3 * d / ( vecdSumSqs[i] + vecdSumSqs[j] + dSumSqs ) 00388 ); } 00389 00390 void callback_deltaed( SCallbackVars& sCallback ) { 00391 SCallbackMeta& sCallbackMeta = sCallback.m_sCallbackMeta; 00392 float d; 00393 00394 d = sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ); 00395 sCallbackMeta.m_DatDeltaeD.Get( sCallback.m_iOne, sCallback.m_iTwo ) += d; 00396 sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) += d * d; } 00397 00398 void callback_ybare( SCallbackVars& sCallback ) { 00399 SCallbackMeta& sCallbackMeta = sCallback.m_sCallbackMeta; 00400 00401 sCallbackMeta.m_DatYBare.Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ) * 00402 sCallback.m_dValue; 00403 sCallback.m_pMatCounts->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ); } 00404 00405 void callback_qe( SCallbackVars& sCallback ) { 00406 SCallbackMeta& sCallbackMeta = sCallback.m_sCallbackMeta; 00407 float d; 00408 00409 d = sCallback.m_dValue - sCallbackMeta.m_DatYBare.Get( sCallback.m_iOne, sCallback.m_iTwo ); 00410 sCallbackMeta.m_DatQe.Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ) * d * d; } 00411 00412 void initialize_meta( SCallback& sCallback ) { 00413 SCallbackMeta& sCallbackMeta = sCallback.m_sCallbackVars.m_sCallbackMeta; 00414 const vector<string>& vecstrGenes = sCallback.m_sCallbackVars.m_pDatOut->GetGeneNames( ); 00415 size_t i, j; 00416 CDat* pDatOut; 00417 CHalfMatrix<float>* pMatCounts; 00418 CHalfMatrix<float> MatCounts; 00419 float d; 00420 CDat DatTmp; 00421 00422 sCallbackMeta.m_DatDeltaeD.Open( vecstrGenes ); 00423 sCallbackMeta.m_DatDeltaeD.Clear( 0 ); 00424 DatTmp.Open( vecstrGenes, false ); 00425 DatTmp.Clear( 0 ); 00426 pDatOut = sCallback.m_sCallbackVars.m_pDatOut; 00427 sCallback.m_sCallbackVars.m_pDatOut = &DatTmp; 00428 iterate_inputs( sCallback, callback_deltaed, callback_wei ); 00429 sCallback.m_sCallbackVars.m_pDatOut = pDatOut; 00430 for( i = 0; i < sCallbackMeta.m_DatDeltaeD.GetGenes( ); ++i ) 00431 for( j = ( i + 1 ); j < sCallbackMeta.m_DatDeltaeD.GetGenes( ); ++j ) 00432 if( d = sCallbackMeta.m_DatDeltaeD.Get( i, j ) ) 00433 sCallbackMeta.m_DatDeltaeD.Get( i, j ) -= DatTmp.Get( i, j ) / d; 00434 00435 sCallbackMeta.m_DatYBare.Open( vecstrGenes, false ); 00436 sCallbackMeta.m_DatYBare.Clear( 0 ); 00437 MatCounts.Initialize( vecstrGenes.size( ) ); 00438 MatCounts.Clear( ); 00439 pMatCounts = sCallback.m_sCallbackVars.m_pMatCounts; 00440 sCallback.m_sCallbackVars.m_pMatCounts = &MatCounts; 00441 iterate_inputs( sCallback, callback_ybare, callback_wei ); 00442 sCallback.m_sCallbackVars.m_pMatCounts = pMatCounts; 00443 for( i = 0; i < sCallbackMeta.m_DatYBare.GetGenes( ); ++i ) 00444 for( j = ( i + 1 ); j < sCallbackMeta.m_DatYBare.GetGenes( ); ++j ) 00445 if( d = MatCounts.Get( i, j ) ) 00446 sCallbackMeta.m_DatYBare.Get( i, j ) /= d; 00447 00448 sCallbackMeta.m_DatQe.Open( vecstrGenes, false ); 00449 sCallbackMeta.m_DatQe.Clear( 0 ); 00450 iterate_inputs( sCallback, callback_qe, callback_wei ); } 00451 00452 int MainDATs( const gengetopt_args_info& sArgs ) { 00453 CDataset Dataset; 00454 CDat DatOut, DatCur, DatQW; 00455 CHalfMatrix<float> MatCounts; 00456 size_t i, j; 00457 float d; 00458 vector<string> vecstrTerms; 00459 CPCL PCLWeights( false ); 00460 CGenome Genome; 00461 CGenes GenesIn( Genome ); 00462 vector<CGenes*> vecpTerms; 00463 EMethod eMethod; 00464 vector<float> vecdWis; 00465 int iRet; 00466 SCallback sCallback( sArgs, GenesIn, PCLWeights, vecstrTerms, vecpTerms ); 00467 void (*pfnInitialize)( SCallbackVars& ); 00468 00469 if( !input_files.size() ) 00470 return 1; 00471 00472 if( !Dataset.OpenGenes( input_files ) ) { 00473 cerr << "Couldn't open: " << input_files[ 0 ]; 00474 for( i = 1; i < input_files.size( ); ++i ) 00475 cerr << ", " << input_files[ i ]; 00476 cerr << endl; 00477 return 1; } 00478 if( sArgs.weights_arg && !PCLWeights.Open( sArgs.weights_arg, 0 ) ) { 00479 cerr << "Could not open: " << sArgs.weights_arg << endl; 00480 return 1; } 00481 if( sArgs.genes_arg && !GenesIn.Open( sArgs.genes_arg ) ) { 00482 cerr << "Could not open: " << sArgs.genes_arg << endl; 00483 return 1; } 00484 if( sArgs.terms_arg && !CGenes::Open( sArgs.terms_arg, Genome, vecstrTerms, vecpTerms ) ) { 00485 cerr << "Could not open: " << sArgs.terms_arg << endl; 00486 return 1; } 00487 00488 for( eMethod = EMethodBegin; eMethod < EMethodEnd; eMethod = (EMethod)( eMethod + 1 ) ) 00489 if( !strcmp( c_aszMethods[eMethod], sArgs.method_arg ) ) 00490 break; 00491 if( eMethod >= EMethodEnd ) { 00492 cmdline_parser_print_help( ); 00493 return 1; } 00494 00495 DatOut.Open( vecstrTerms.empty( ) ? Dataset.GetGeneNames( ) : vecstrTerms, false, sArgs.memmap_flag ? sArgs.output_arg : NULL ); 00496 switch( eMethod ) { 00497 case EMethodMax: 00498 d = -FLT_MAX; 00499 break; 00500 00501 case EMethodMin: 00502 d = FLT_MAX; 00503 break; 00504 00505 case EMethodGMean: 00506 d = 1; 00507 break; 00508 00509 default: 00510 d = 0; } 00511 for( i = 0; i < DatOut.GetGenes( ); ++i ) 00512 for( j = ( i + 1 ); j < DatOut.GetGenes( ); ++j ) 00513 DatOut.Set( i, j, d ); 00514 if( fabs( d ) < 2 ) { 00515 MatCounts.Initialize( DatOut.GetGenes( ) ); 00516 MatCounts.Clear( ); } 00517 00518 sCallback.m_sCallbackVars.m_eMethod = eMethod; 00519 sCallback.m_sCallbackVars.m_pDatOut = &DatOut; 00520 sCallback.m_sCallbackVars.m_pMatCounts = &MatCounts; 00521 if( eMethod == EMethodMeta ) 00522 initialize_meta( sCallback ); 00523 00524 switch( eMethod ) { 00525 case EMethodMeta: 00526 pfnInitialize = callback_wei; 00527 break; 00528 00529 case EMethodQMeta: 00530 pfnInitialize = callback_andersondarling; 00531 break; 00532 00533 default: 00534 pfnInitialize = NULL; } 00535 if( iRet = iterate_inputs( sCallback, callback_combine, pfnInitialize ) ) 00536 return iRet; 00537 for( i = 0; i < DatOut.GetGenes( ); ++i ) 00538 for( j = ( i + 1 ); j < DatOut.GetGenes( ); ++j ) 00539 switch( eMethod ) { 00540 case EMethodMean: 00541 case EMethodMeta: 00542 case EMethodQMeta: 00543 DatOut.Set( i, j, ( d = MatCounts.Get( i, j ) ) ? ( DatOut.Get( i, j ) / ( sArgs.reweight_flag ? 1 : d ) ) : 00544 CMeta::GetNaN( ) ); 00545 break; 00546 00547 case EMethodGMean: 00548 DatOut.Set( i, j, ( d = MatCounts.Get( i, j ) ) ? 00549 (float)pow( (double)DatOut.Get( i, j ), 1.0 / ( sArgs.reweight_flag ? 1 : d ) ) : CMeta::GetNaN( ) ); 00550 break; 00551 00552 case EMethodHMean: 00553 DatOut.Set( i, j, ( d = MatCounts.Get( i, j ) ) ? ( ( sArgs.reweight_flag ? 1 : d ) / DatOut.Get( i, j ) ) : 00554 CMeta::GetNaN( ) ); 00555 break; 00556 00557 case EMethodMax: 00558 if( DatOut.Get( i, j ) == -FLT_MAX ) 00559 DatOut.Set( i, j, CMeta::GetNaN( ) ); 00560 break; 00561 00562 case EMethodMin: 00563 if( DatOut.Get( i, j ) == FLT_MAX ) 00564 DatOut.Set( i, j, CMeta::GetNaN( ) ); } 00565 00566 if( sArgs.zscore_flag ) 00567 DatOut.Normalize( CDat::ENormalizeZScore ); 00568 if( !sArgs.memmap_flag ) 00569 DatOut.Save( sArgs.output_arg ); 00570 00571 for( i = 0; i < vecpTerms.size( ); ++i ) 00572 delete vecpTerms[i]; 00573 return 0; } 00574 00575 static int MainDABs( const gengetopt_args_info& sArgs ) { 00576 CDatasetCompact Dataset; 00577 size_t i; 00578 ofstream ofsm; 00579 00580 if( !input_files.size() ) 00581 return 1; 00582 00583 if( !Dataset.Open( input_files ) ) { 00584 cerr << "Couldn't open: " << input_files[ 0 ]; 00585 for( i = 1; i < input_files.size( ); ++i ) 00586 cerr << ", " << input_files[ i ]; 00587 cerr << endl; 00588 return 1; } 00589 00590 if( sArgs.output_arg ) { 00591 ofsm.open( sArgs.output_arg ); 00592 Dataset.Save( ofsm, true ); 00593 ofsm.close( ); } 00594 else { 00595 Dataset.Save( cout, false ); 00596 cout.flush( ); } 00597 00598 return 0; } 00599 00600 struct SSimpleome { 00601 map<string, size_t> m_mapstriGenes; 00602 map<size_t, string> m_mapistrGenes; 00603 00604 size_t Get( const string& strGene ) { 00605 map<string, size_t>::const_iterator iterGene; 00606 size_t iRet; 00607 00608 if( ( iterGene = m_mapstriGenes.find( strGene ) ) != m_mapstriGenes.end( ) ) 00609 return iterGene->second; 00610 00611 m_mapstriGenes[ strGene ] = iRet = m_mapstriGenes.size( ); 00612 m_mapistrGenes[ iRet ] = strGene; 00613 return iRet; } 00614 00615 const string& Get( size_t iGene ) const { 00616 00617 return m_mapistrGenes.find( iGene )->second; } 00618 }; 00619 00620 struct SModule { 00621 float m_dSpecificity; 00622 set<size_t> m_setiGenes; 00623 set<const SModule*> m_setpsChildren; 00624 00625 static float Open( const char* szFile, SSimpleome& sSimpleome, vector<SModule*>& vecpsModules ) { 00626 static const size_t c_iBuffer = 131072; 00627 ifstream ifsm; 00628 char acBuffer[ c_iBuffer ]; 00629 vector<string> vecstrLine; 00630 float dRet; 00631 size_t i, iModule; 00632 SModule* psModule; 00633 00634 ifsm.open( szFile ); 00635 if( !ifsm.is_open( ) ) { 00636 cerr << "Could not open: " << szFile << endl; 00637 return CMeta::GetNaN( ); } 00638 for( iModule = 0; !ifsm.eof( ); ++iModule ) { 00639 ifsm.getline( acBuffer, c_iBuffer - 1 ); 00640 if( !acBuffer[ 0 ] ) 00641 iModule--; } 00642 ifsm.close( ); 00643 if( !iModule ) { 00644 cerr << "No modules found in: " << szFile << endl; 00645 return CMeta::GetNaN( ); } 00646 cerr << "Found " << --iModule << " modules in: " << szFile << endl; 00647 vecpsModules.resize( iModule ); 00648 00649 dRet = CMeta::GetNaN( ); 00650 ifsm.clear( ); 00651 ifsm.open( szFile ); 00652 for( iModule = 0; !ifsm.eof( ); ++iModule ) { 00653 ifsm.getline( acBuffer, c_iBuffer - 1 ); 00654 acBuffer[ c_iBuffer - 1 ] = 0; 00655 if( CMeta::IsNaN( dRet ) ) { 00656 dRet = (float)atof( acBuffer ); 00657 iModule--; 00658 continue; } 00659 vecstrLine.clear( ); 00660 CMeta::Tokenize( acBuffer, vecstrLine ); 00661 if( vecstrLine.size( ) < 2 ) { 00662 iModule--; 00663 continue; } 00664 vecpsModules[ iModule ] = psModule = new SModule( ); 00665 psModule->m_dSpecificity = (float)atof( vecstrLine[ 0 ].c_str( ) ); 00666 for( i = 1; i < vecstrLine.size( ); ++i ) 00667 psModule->m_setiGenes.insert( sSimpleome.Get( vecstrLine[ i ] ) ); } 00668 00669 return dRet; } 00670 00671 float Jaccard( const SModule& sModule ) const { 00672 set<size_t>::const_iterator iterGene; 00673 size_t iUnion, iIntersection; 00674 00675 iIntersection = 0; 00676 iUnion = sModule.m_setiGenes.size( ); 00677 for( iterGene = m_setiGenes.begin( ); iterGene != m_setiGenes.end( ); ++iterGene ) 00678 if( sModule.m_setiGenes.find( *iterGene ) != sModule.m_setiGenes.end( ) ) 00679 iIntersection++; 00680 else 00681 iUnion++; 00682 00683 return ( (float)iIntersection / iUnion ); } 00684 00685 size_t Intersection( const SModule& sModule ) const { 00686 size_t iRet; 00687 set<size_t>::const_iterator iterGene; 00688 00689 for( iRet = 0,iterGene = m_setiGenes.begin( ); iterGene != m_setiGenes.end( ); ++iterGene ) 00690 if( sModule.m_setiGenes.find( *iterGene ) != sModule.m_setiGenes.end( ) ) 00691 iRet++; 00692 00693 return iRet; } 00694 00695 void Merge( const SModule& sModule ) { 00696 set<size_t>::const_iterator iterGene; 00697 00698 for( iterGene = sModule.m_setiGenes.begin( ); iterGene != sModule.m_setiGenes.end( ); ++iterGene ) 00699 m_setiGenes.insert( *iterGene ); 00700 m_dSpecificity = ( m_dSpecificity + sModule.m_dSpecificity ) / 2; } 00701 00702 bool IsChild( const SModule* psModule ) const { 00703 00704 return ( m_setpsChildren.find( psModule ) != m_setpsChildren.end( ) ); } 00705 00706 void Save( ostream& ostm, float dCutoff, const SSimpleome& sSimpleome ) const { 00707 set<const SModule*>::const_iterator iterChild; 00708 set<size_t>::const_iterator iterGene; 00709 00710 ostm << this << '\t' << dCutoff << '\t' << m_dSpecificity << '\t'; 00711 for( iterChild = m_setpsChildren.begin( ); iterChild != m_setpsChildren.end( ); ++iterChild ) { 00712 if( iterChild != m_setpsChildren.begin( ) ) 00713 ostm << '|'; 00714 ostm << *iterChild; } 00715 for( iterGene = m_setiGenes.begin( ); iterGene != m_setiGenes.end( ); ++iterGene ) 00716 ostm << '\t' << sSimpleome.Get( *iterGene ); 00717 ostm << endl; } 00718 }; 00719 00720 struct SSorterModules { 00721 const vector<float>& m_vecdModules; 00722 00723 SSorterModules( const vector<float>& vecdModules ) : m_vecdModules(vecdModules) { } 00724 00725 bool operator()( size_t iOne, size_t iTwo ) const { 00726 00727 return ( m_vecdModules[ iOne ] > m_vecdModules[ iTwo ] ); } 00728 }; 00729 00730 int MainModules( const gengetopt_args_info& sArgs ) { 00731 vector<float> vecdModules; 00732 vector<vector<SModule*> > vecvecpsModules; 00733 vector<size_t> veciIndices; 00734 size_t i, j, iOuter, iCutoffOne, iCutoffTwo, iModuleOne, iModuleTwo; 00735 SSimpleome sSimpleome; 00736 bool fDone; 00737 float d; 00738 ofstream ofsm; 00739 ostream* postm; 00740 00741 vecdModules.resize( input_files.size() ); 00742 vecvecpsModules.resize( input_files.size() ); 00743 veciIndices.resize( input_files.size() ); 00744 for( i = 0; i < vecvecpsModules.size( ); ++i ) { 00745 veciIndices[ i ] = i; 00746 if( CMeta::IsNaN( vecdModules[ i ] = SModule::Open( input_files[ i ].c_str(), sSimpleome, 00747 vecvecpsModules[ i ] ) ) ) 00748 return 1; } 00749 sort( veciIndices.begin( ), veciIndices.end( ), SSorterModules( vecdModules ) ); 00750 00751 for( iOuter = 0,fDone = false; !fDone; ++iOuter ) { 00752 fDone = true; 00753 cerr << "Outer loop: " << iOuter << endl; 00754 for( iCutoffOne = 0; iCutoffOne < veciIndices.size( ); ++iCutoffOne ) { 00755 vector<SModule*>& vecpsModulesOne = vecvecpsModules[ veciIndices[ iCutoffOne ] ]; 00756 00757 cerr << "Merging cutoff: " << vecdModules[ veciIndices[ iCutoffOne ] ] << endl; 00758 for( iModuleOne = 0; iModuleOne < vecpsModulesOne.size( ); ++iModuleOne ) { 00759 SModule* psOne = vecpsModulesOne[ iModuleOne ]; 00760 00761 if( !psOne ) 00762 continue; 00763 for( iModuleTwo = ( iModuleOne + 1 ); iModuleTwo < vecpsModulesOne.size( ); ++iModuleTwo ) { 00764 SModule* psTwo = vecpsModulesOne[ iModuleTwo ]; 00765 00766 if( !psTwo ) 00767 continue; 00768 if( ( d = psOne->Jaccard( *psTwo ) ) >= sArgs.jaccard_arg ) { 00769 cerr << "Merging @" << d << ' ' << iCutoffOne << ':' << iModuleOne << " (" << 00770 psOne->m_setiGenes.size( ) << ") " << iCutoffOne << ':' << iModuleTwo << " (" << 00771 psTwo->m_setiGenes.size( ) << ')' << endl; 00772 psOne->Merge( *psTwo ); 00773 delete vecpsModulesOne[ iModuleTwo ]; 00774 vecpsModulesOne[ iModuleTwo ] = NULL; 00775 iModuleTwo--; 00776 fDone = false; } } } 00777 for( iCutoffTwo = ( iCutoffOne + 1 ); iCutoffTwo < veciIndices.size( ); ++iCutoffTwo ) { 00778 vector<SModule*>& vecpsModulesTwo = vecvecpsModules[ veciIndices[ iCutoffTwo ] ]; 00779 00780 for( iModuleOne = 0; iModuleOne < vecpsModulesOne.size( ); ++iModuleOne ) { 00781 SModule* psOne = vecpsModulesOne[ iModuleOne ]; 00782 00783 if( !psOne ) 00784 continue; 00785 for( iModuleTwo = 0; iModuleTwo < vecpsModulesTwo.size( ); ++iModuleTwo ) { 00786 SModule* psTwo = vecpsModulesTwo[ iModuleTwo ]; 00787 00788 if( !psTwo ) 00789 continue; 00790 if( ( d = psOne->Jaccard( *psTwo ) ) >= sArgs.jaccard_arg ) { 00791 cerr << "Merging @" << d << ' ' << iCutoffOne << ':' << iModuleOne << " (" << 00792 psOne->m_setiGenes.size( ) << ") " << iCutoffTwo << ':' << iModuleTwo << 00793 " (" << psTwo->m_setiGenes.size( ) << ')' << endl; 00794 psOne->Merge( *psTwo ); 00795 delete vecpsModulesTwo[ iModuleTwo ]; 00796 vecpsModulesTwo[ iModuleTwo ] = NULL; 00797 iModuleTwo--; 00798 fDone = false; } } } } } } 00799 00800 for( iCutoffOne = 0; iCutoffOne < veciIndices.size( ); ++iCutoffOne ) { 00801 vector<SModule*>& vecpsModulesOne = vecvecpsModules[ veciIndices[ iCutoffOne ] ]; 00802 00803 for( iCutoffTwo = ( iCutoffOne + 1 ); iCutoffTwo < veciIndices.size( ); ++iCutoffTwo ) { 00804 vector<SModule*>& vecpsModulesTwo = vecvecpsModules[ veciIndices[ iCutoffTwo ] ]; 00805 00806 for( iModuleOne = 0; iModuleOne < vecpsModulesOne.size( ); ++iModuleOne ) { 00807 SModule* psOne = vecpsModulesOne[ iModuleOne ]; 00808 00809 if( !psOne ) 00810 continue; 00811 for( iModuleTwo = 0; iModuleTwo < vecpsModulesTwo.size( ); ++iModuleTwo ) { 00812 SModule* psTwo = vecpsModulesTwo[ iModuleTwo ]; 00813 00814 if( !psTwo ) 00815 continue; 00816 if( !psTwo->IsChild( psOne ) && ( ( d = ( (float)psOne->Intersection( *psTwo ) / 00817 psOne->m_setiGenes.size( ) ) ) >= sArgs.intersection_arg ) ) { 00818 cerr << vecdModules[ veciIndices[ iCutoffOne ] ] << ':' << iModuleOne << 00819 " child of " << vecdModules[ veciIndices[ iCutoffTwo ] ] << ':' << iModuleTwo << 00820 " (" << psOne->m_setiGenes.size( ) << ", " << psTwo->m_setiGenes.size( ) << ", " << 00821 d << ')' << endl; 00822 psTwo->m_setpsChildren.insert( psOne ); } } } } } 00823 00824 if( sArgs.output_arg ) { 00825 ofsm.open( sArgs.output_arg ); 00826 postm = &ofsm; } 00827 else 00828 postm = &cout; 00829 for( iCutoffOne = 0; iCutoffOne < veciIndices.size( ); ++iCutoffOne ) { 00830 vector<SModule*>& vecpsModulesOne = vecvecpsModules[ veciIndices[ iCutoffOne ] ]; 00831 00832 for( iModuleOne = 0; iModuleOne < vecpsModulesOne.size( ); ++iModuleOne ) { 00833 SModule* psOne = vecpsModulesOne[ iModuleOne ]; 00834 00835 if( psOne ) 00836 psOne->Save( *postm, vecdModules[ veciIndices[ iCutoffOne ] ], sSimpleome ); } } 00837 if( sArgs.output_arg ) 00838 ofsm.close( ); 00839 00840 for( i = 0; i < vecvecpsModules.size( ); ++i ) 00841 for( j = 0; j < vecvecpsModules[ i ].size( ); ++j ) 00842 if( vecvecpsModules[ i ][ j ] ) 00843 delete vecvecpsModules[ i ][ j ]; 00844 00845 return 0; } 00846 00847 int MainRevDATs( const gengetopt_args_info& sArgs ) { 00848 vector<string> vecstrTerms, vecstrGenes; 00849 vector<CGenes*> vecpTerms; 00850 CGenome Genome; 00851 CDat DatOut; 00852 CHalfMatrix<uint32_t> MatCounts; 00853 size_t i, j, k, iOne, iTwo, iArg, iA, iB; 00854 float d; 00855 00856 if( !sArgs.terms_arg ) { 00857 cerr << "Terms argument required" << endl; 00858 return 1; } 00859 if( !CGenes::Open( sArgs.terms_arg, Genome, vecstrTerms, vecpTerms ) ) { 00860 cerr << "Could not open: " << sArgs.terms_arg << endl; 00861 return 1; } 00862 00863 { 00864 set<string> setstrGenes; 00865 00866 for( i = 0; i < vecpTerms.size( ); ++i ) 00867 for( j = 0; j < vecpTerms[i]->GetGenes( ); ++j ) 00868 setstrGenes.insert( vecpTerms[i]->GetGene( j ).GetName( ) ); 00869 vecstrGenes.resize( setstrGenes.size( ) ); 00870 copy( setstrGenes.begin( ), setstrGenes.end( ), vecstrGenes.begin( ) ); 00871 } 00872 00873 DatOut.Open( vecstrGenes, false ); 00874 for( i = 0; i < DatOut.GetGenes( ); ++i ) 00875 memset( DatOut.Get( i ), 0, ( DatOut.GetGenes( ) - i - 1 ) * sizeof(*DatOut.Get( i )) ); 00876 MatCounts.Initialize( DatOut.GetGenes( ) ); 00877 MatCounts.Clear( ); 00878 for( iArg = 0; iArg < input_files.size(); ++iArg ) { 00879 CDat DatIn; 00880 vector<vector<size_t> > vecveciGenes; 00881 00882 if( !DatIn.Open( input_files[iArg].c_str() ) ) { 00883 cerr << "Could not open: " << input_files[iArg] << endl; 00884 return 1; } 00885 vecveciGenes.resize( DatIn.GetGenes( ) ); 00886 for( i = 0; i < DatIn.GetGenes( ); ++i ) { 00887 for( j = 0; j < vecstrTerms.size( ); ++j ) 00888 if( DatIn.GetGene( i ) == vecstrTerms[j] ) 00889 break; 00890 if( j < vecstrTerms.size( ) ) { 00891 for( k = 0; k < vecpTerms[j]->GetGenes( ); ++k ) 00892 if( ( iOne = DatOut.GetGene( vecpTerms[j]->GetGene( k ).GetName( ) ) ) != -1 ) 00893 vecveciGenes[i].push_back( iOne ); } 00894 else 00895 cerr << "Unrecognized gene: " << DatIn.GetGene( i ) << endl; } 00896 for( i = 0; i < DatIn.GetGenes( ); ++i ) { 00897 if( vecveciGenes[i].empty( ) ) 00898 continue; 00899 for( j = ( i + 1 ); j < DatIn.GetGenes( ); ++j ) { 00900 if( CMeta::IsNaN( d = DatIn.Get( i, j ) ) ) 00901 continue; 00902 for( iA = 0; iA < vecveciGenes[i].size( ); ++iA ) { 00903 iOne = vecveciGenes[i][iA]; 00904 for( iB = 0; iB < vecveciGenes[j].size( ); ++iB ) { 00905 iTwo = vecveciGenes[j][iB]; 00906 MatCounts.Get( iOne, iTwo )++; 00907 DatOut.Get( iOne, iTwo ) += d; } } } } } 00908 00909 for( i = 0; i < DatOut.GetGenes( ); ++i ) 00910 for( j = ( i + 1 ); j < DatOut.GetGenes( ); ++j ) 00911 if( k = MatCounts.Get( i, j ) ) 00912 DatOut.Get( i, j ) /= k; 00913 else 00914 DatOut.Set( i, j, CMeta::GetNaN( ) ); 00915 DatOut.Save( sArgs.output_arg ); 00916 00917 for( i = 0; i < vecpTerms.size( ); ++i ) 00918 delete vecpTerms[i]; 00919 return 0; }