Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below.flip 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 00026 #define WT_MULTIPLIER 50 00027 #define WT_MULTIPLIERf 50.0 00028 00029 class CRegularize; 00030 00031 static const char c_acDab[] = ".dab"; 00032 static const char c_acDat[] = ".dat"; 00033 static const char c_acQDab[] = ".qdab"; 00034 static const char c_acQuant[] = ".quant"; 00035 static const char c_acTxt[] = ".txt"; 00036 00037 typedef CFullMatrix<size_t> CCountMatrix; 00038 00039 struct SLearn { 00040 CCountMatrix* m_pMatCounts; 00041 const CGenes* m_pGenes; 00042 const CGenes* m_pUbikGenes; 00043 const CDataPair* m_pAnswers; 00044 const CDatFilter* m_pDat; 00045 size_t m_iZero; 00046 CRegularize* m_pRegularize; 00047 size_t m_iDat; 00048 bool m_bInPos; 00049 bool m_bInNeg; 00050 bool m_bBridgePos; 00051 bool m_bBridgeNeg; 00052 bool m_bOutPos; 00053 bool m_bOutNeg; 00054 bool m_isDatWeighted; 00055 bool m_bFlipNeg; 00056 const CDat* m_pwDat; 00057 }; 00058 00059 struct SEvaluate { 00060 const CBayesNetMinimal* m_pBN; 00061 const CDataPair* m_pDat; 00062 const CGenes* m_pGenes; 00063 CDat* m_pYes; 00064 CDat* m_pNo; 00065 size_t m_iZero; 00066 size_t m_iNode; 00067 const vector<size_t>* m_pveciGenes; 00068 bool m_fFirst; 00069 string m_strName; 00070 bool m_bLogratio; 00071 }; 00072 00073 struct SEvaluate2 { 00074 size_t m_iBegin; 00075 size_t m_iEnd; 00076 const CBayesNetMinimal* m_pBN; 00077 const vector<CBayesNetMinimal*>* m_pvecpBNs; 00078 const CDataPair* m_pDat; 00079 CDat* m_pYes; 00080 CDat* m_pNo; 00081 size_t m_iZero; 00082 const vector<size_t>* m_pveciGenes; 00083 string m_strName; 00084 const vector<size_t>* m_pveciBNs; 00085 size_t m_iNode; 00086 pthread_spinlock_t m_sLock; 00087 }; 00088 00089 class CRegularize { 00090 public: 00091 ~CRegularize( ) { 00092 00093 Clear( ); 00094 } 00095 00096 bool Open( const char* szFile, CGenome& Genome, const CDat& Answers ) { 00097 vector<CGenes*> vecpGenes; 00098 size_t i, j, k; 00099 map<string, size_t> mapstriGenes; 00100 map<string, size_t>::const_iterator iterGene; 00101 00102 Clear( ); 00103 if( !Genome.Open( szFile, vecpGenes ) ) 00104 return false; 00105 00106 m_vecsetiGenes.resize( vecpGenes.size( ) ); 00107 m_vecveciGenes.resize( Answers.GetGenes( ) ); 00108 for( i = 0; i < m_vecsetiGenes.size( ); ++i ) { 00109 const CGenes& Genes = *vecpGenes[i]; 00110 00111 for( j = 0; j < Genes.GetGenes( ); ++j ) { 00112 const string& strGene = Genes.GetGene( j ).GetName( ); 00113 00114 if( ( iterGene = mapstriGenes.find( strGene ) ) == mapstriGenes.end( ) ) 00115 mapstriGenes[strGene] = k = Answers.GetGene( strGene ); 00116 else 00117 k = iterGene->second; 00118 if( k != -1 ) { 00119 m_vecsetiGenes[i].insert( k ); 00120 m_vecveciGenes[k].push_back( i ); 00121 } 00122 } 00123 } 00124 for( i = 0; i < vecpGenes.size( ); ++i ) 00125 delete vecpGenes[i]; 00126 00127 return true; 00128 } 00129 00130 void SaveCounts( ostream& ostm, const vector<string>& vecstrNames ) { 00131 size_t iDataset, iValue, iGroup; 00132 const CFullMatrix<size_t>* pMat; 00133 00134 for( iDataset = 0; iDataset < m_vecpMatCounts.size( ); ++iDataset ) { 00135 if( !( pMat = m_vecpMatCounts[iDataset] ) ) 00136 continue; 00137 ostm << vecstrNames[iDataset] << endl; 00138 for( iGroup = 0; iGroup < m_vecsetiGenes.size( ); ++iGroup ) { 00139 for( iValue = 0; iValue < pMat->GetRows( ); ++iValue ) 00140 ostm << ( iValue ? "\t" : "" ) << pMat->Get( iValue, iGroup ); 00141 ostm << endl; 00142 } 00143 } 00144 } 00145 00146 void Save( ostream& ostm, const vector<string>& vecstrNames ) { 00147 size_t iDatasetOne, iDatasetTwo, iValueOne, iValueTwo, iGroup, iCountOne, iCountTwo; 00148 const CFullMatrix<size_t>* pMatOne; 00149 const CFullMatrix<size_t>* pMatTwo; 00150 vector<double> vecdOne, vecdTwo, vecdGroups; 00151 vector<size_t> veciCounts; 00152 CFullMatrix<double> MatJoint; 00153 double dOne, dTwo, dJoint, dMI; 00154 00155 veciCounts.resize( m_vecsetiGenes.size( ) ); 00156 fill( veciCounts.begin( ), veciCounts.end( ), 0 ); 00157 for( iCountOne = iDatasetOne = 0; iDatasetOne < m_vecpMatCounts.size( ); ++iDatasetOne ) { 00158 if( !( pMatOne = m_vecpMatCounts[iDatasetOne] ) ) 00159 continue; 00160 for( iValueOne = 0; iValueOne < pMatOne->GetRows( ); ++iValueOne ) 00161 for( iGroup = 0; iGroup < pMatOne->GetColumns( ); ++iGroup ) { 00162 iCountTwo = pMatOne->Get( iValueOne, iGroup ); 00163 veciCounts[iGroup] += iCountTwo; 00164 iCountOne += iCountTwo; 00165 } 00166 } 00167 vecdGroups.resize( veciCounts.size( ) ); 00168 for( iGroup = 0; iGroup < vecdGroups.size( ); ++iGroup ) 00169 vecdGroups[iGroup] = (double)veciCounts[iGroup] / iCountOne; 00170 00171 for( iDatasetOne = 0; iDatasetOne < m_vecpMatCounts.size( ); ++iDatasetOne ) { 00172 if( !( pMatOne = m_vecpMatCounts[iDatasetOne] ) ) 00173 continue; 00174 /* 00175 for( iGroup = 0; iGroup < m_vecsetiGenes.size( ); ++iGroup ) { 00176 for( iCountOne = iValueOne = 0; iValueOne < pMatOne->GetRows( ); ++iValueOne ) 00177 iCountOne += pMatOne->Get( iValueOne, iGroup ); 00178 for( iValueOne = 0; iValueOne < pMatOne->GetRows( ); ++iValueOne ) 00179 cerr << ( iValueOne ? "\t" : "" ) << ( (double)pMatOne->Get( iValueOne, iGroup ) / iCountOne ); 00180 cerr << endl; } 00181 //*/ 00182 vecdOne.resize( pMatOne->GetRows( ) ); 00183 for( iDatasetTwo = iDatasetOne; iDatasetTwo < m_vecpMatCounts.size( ); ++iDatasetTwo ) { 00184 if( !( pMatTwo = m_vecpMatCounts[iDatasetTwo] ) ) 00185 continue; 00186 vecdTwo.resize( pMatTwo->GetRows( ) ); 00187 MatJoint.Initialize( vecdOne.size( ), vecdTwo.size( ) ); 00188 00189 fill( vecdOne.begin( ), vecdOne.end( ), 0.0f ); 00190 fill( vecdTwo.begin( ), vecdTwo.end( ), 0.0f ); 00191 MatJoint.Clear( ); 00192 for( iGroup = 0; iGroup < m_vecsetiGenes.size( ); ++iGroup ) { 00193 for( iCountOne = iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne ) 00194 iCountOne += pMatOne->Get( iValueOne, iGroup ); 00195 for( iCountTwo = iValueTwo = 0; iValueTwo < vecdTwo.size( ); ++iValueTwo ) 00196 iCountTwo += pMatTwo->Get( iValueTwo, iGroup ); 00197 for( iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne ) { 00198 dOne = (double)pMatOne->Get( iValueOne, iGroup ) / ( iCountOne ? iCountOne : 1 ); 00199 vecdOne[iValueOne] += dOne * vecdGroups[iGroup]; 00200 for( iValueTwo = 0; iValueTwo < vecdTwo.size( ); ++iValueTwo ) { 00201 dTwo = (double)pMatTwo->Get( iValueTwo, iGroup ) / ( iCountTwo ? iCountTwo : 1 ); 00202 if( !iValueOne ) 00203 vecdTwo[iValueTwo] += dTwo * vecdGroups[iGroup]; 00204 MatJoint.Get( iValueOne, iValueTwo ) += dOne * dTwo * vecdGroups[iGroup]; 00205 } 00206 } 00207 } 00208 /* 00209 if( iDatasetOne == iDatasetTwo ) { 00210 MatJoint.Clear( ); 00211 for( iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne ) 00212 MatJoint.Set( iValueOne, iValueOne, vecdOne[iValueOne] ); } 00213 //*/ 00214 /* 00215 cerr << "One: " << vecstrNames[iDatasetOne] << endl; 00216 for( iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne ) 00217 cerr << ( iValueOne ? "\t" : "" ) << vecdOne[iValueOne]; 00218 cerr << endl; 00219 cerr << "Two: " << vecstrNames[iDatasetTwo] << endl; 00220 for( iValueTwo = 0; iValueTwo < vecdTwo.size( ); ++iValueTwo ) 00221 cerr << ( iValueTwo ? "\t" : "" ) << vecdTwo[iValueTwo]; 00222 cerr << endl; 00223 cerr << "Joint:" << endl; 00224 for( iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne ) { 00225 for( iValueTwo = 0; iValueTwo < vecdTwo.size( ); ++iValueTwo ) 00226 cerr << ( iValueTwo ? "\t" : "" ) << MatJoint.Get( iValueOne, iValueTwo ); 00227 cerr << endl; } 00228 //*/ 00229 00230 for( dMI = 0,iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne ) { 00231 dOne = vecdOne[iValueOne]; 00232 for( iValueTwo = 0; iValueTwo < vecdTwo.size( ); ++iValueTwo ) 00233 if( dJoint = MatJoint.Get( iValueOne, iValueTwo ) ) 00234 dMI += dJoint * ( dJoint ? log( dJoint / dOne / vecdTwo[iValueTwo] ) : 0 ); 00235 } 00236 //cerr << "MI: " << dMI << endl; 00237 dMI = ( dMI < 0 ) ? 0 : ( dMI / log( 2.0f ) ); 00238 00239 ostm << vecstrNames[iDatasetOne] << '\t' << vecstrNames[iDatasetTwo] << '\t' << dMI << endl; 00240 } 00241 } 00242 } 00243 00244 void Add( size_t iDataset, const CDatFilter& Dat, size_t iOne, size_t iTwo, size_t iValue ) { 00245 size_t i; 00246 CFullMatrix<size_t>* pMat; 00247 00248 if( m_vecsetiGenes.empty( ) ) 00249 return; 00250 00251 while( m_vecpMatCounts.size( ) <= iDataset ) 00252 m_vecpMatCounts.push_back( NULL ); 00253 if( !( pMat = m_vecpMatCounts[iDataset] ) ) { 00254 m_vecpMatCounts[iDataset] = pMat = new CFullMatrix<size_t>( ); 00255 pMat->Initialize( Dat.GetValues( ), m_vecsetiGenes.size( ) ); 00256 pMat->Clear( ); 00257 } 00258 for( i = 0; i < m_vecveciGenes[iOne].size( ); ++i ) 00259 pMat->Get( iValue, m_vecveciGenes[iOne][i] )++; 00260 for( i = 0; i < m_vecveciGenes[iTwo].size( ); ++i ) 00261 pMat->Get( iValue, m_vecveciGenes[iTwo][i] )++; 00262 } 00263 00264 private: 00265 vector<set<size_t> > m_vecsetiGenes; 00266 vector<vector<size_t> > m_vecveciGenes; 00267 vector<CFullMatrix<size_t>*> m_vecpMatCounts; 00268 00269 void Clear( ) { 00270 size_t i; 00271 CFullMatrix<size_t>* pMat; 00272 00273 for( i = 0; i < m_vecpMatCounts.size( ); ++i ) 00274 if( pMat = m_vecpMatCounts[i] ) 00275 delete pMat; 00276 m_vecpMatCounts.clear( ); 00277 m_vecsetiGenes.clear( ); 00278 m_vecveciGenes.clear( ); 00279 } 00280 }; 00281 00282 void* learn( void* ); 00283 void* evaluate( void* ); 00284 void* evaluate2( void* ); 00285 void* finalize( void* ); 00286 int main_count( const gengetopt_args_info&, const map<string, size_t>&, const CGenes&, const CGenes&, 00287 const CGenes&, const CGenes&, const CGenes&); 00288 int main_xdsls( const gengetopt_args_info&, const map<string, size_t>&, const map<string, size_t>&, 00289 const vector<string>& ); 00290 int main_inference( const gengetopt_args_info&, const map<string, size_t>&, const map<string, size_t>& ); 00291 int main_inference2( const gengetopt_args_info&, const map<string, size_t>&, const map<string, size_t>& ); 00292 00293 int main( int iArgs, char** aszArgs ) { 00294 gengetopt_args_info sArgs; 00295 map<string, size_t> mapstriZeros, mapstriDatasets; 00296 vector<string> vecstrContexts; 00297 int iRet; 00298 size_t i; 00299 CGenome Genome; 00300 CGenes GenesIn( Genome ), GenesEx( Genome ), GenesEd( Genome ), GenesTm( Genome ), GenesUbik( Genome); 00301 00302 #ifdef WIN32 00303 pthread_win32_process_attach_np( ); 00304 #endif // WIN32 00305 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00306 cmdline_parser_print_help( ); 00307 return 1; 00308 } 00309 CMeta Meta( sArgs.verbosity_arg ); 00310 if(sArgs.reggroups_given && sArgs.weights_flag){ 00311 cerr << "Regularization is not supported for weighted contexts." << endl; 00312 return 1;} 00313 if( sArgs.pseudocounts_arg < 0 ) 00314 sArgs.pseudocounts_arg = CMeta::GetNaN( ); 00315 if( sArgs.zeros_arg ) { 00316 ifstream ifsm; 00317 vector<string> vecstrLine; 00318 char acLine[ 1024 ]; 00319 00320 ifsm.open( sArgs.zeros_arg ); 00321 if( !ifsm.is_open( ) ) { 00322 cerr << "Couldn't open: " << sArgs.zeros_arg << endl; 00323 return 1; 00324 } 00325 while( !ifsm.eof( ) ) { 00326 ifsm.getline( acLine, ARRAYSIZE(acLine) - 1 ); 00327 acLine[ ARRAYSIZE(acLine) - 1 ] = 0; 00328 vecstrLine.clear( ); 00329 CMeta::Tokenize( acLine, vecstrLine ); 00330 if( vecstrLine.empty( ) ) 00331 continue; 00332 mapstriZeros[ vecstrLine[ 0 ] ] = atoi( vecstrLine[ 1 ].c_str( ) ); 00333 } 00334 } 00335 00336 if( sArgs.datasets_arg ) { 00337 ifstream ifsm; 00338 char acLine[ 1024 ]; 00339 vector<string> vecstrLine, vecstrDatasets; 00340 00341 ifsm.open( sArgs.datasets_arg ); 00342 if( !ifsm.is_open( ) ) { 00343 cerr << "Could not open: " << sArgs.datasets_arg << endl; 00344 return 1; 00345 } 00346 while( !ifsm.eof( ) ) { 00347 ifsm.getline( acLine, ARRAYSIZE(acLine) - 1 ); 00348 acLine[ ARRAYSIZE(acLine) - 1 ] = 0; 00349 if( !acLine[ 0 ] ) 00350 continue; 00351 vecstrLine.clear( ); 00352 CMeta::Tokenize( acLine, vecstrLine ); 00353 if( vecstrLine.size( ) < 2 ) { 00354 cerr << "Illegal datasets line: " << acLine << endl; 00355 return 1; 00356 } 00357 if( ( iRet = atol( vecstrLine[ 0 ].c_str( ) ) ) <= 0 ) { 00358 cerr << "Dataset indices must be greater than zero: " << acLine << endl; 00359 return 1; 00360 } 00361 mapstriDatasets[ vecstrLine[ 1 ] ] = --iRet; 00362 if( vecstrDatasets.size( ) <= (size_t)iRet ) 00363 vecstrDatasets.resize( iRet + 1 ); 00364 vecstrDatasets[ iRet ] = vecstrLine[ 1 ]; 00365 } 00366 ifsm.close( ); 00367 for( i = 0; i < vecstrDatasets.size( ); ++i ) 00368 if( vecstrDatasets[ i ].empty( ) ) { 00369 cerr << "Missing dataset: " << ( i + 1 ) << endl; 00370 return 1; 00371 } 00372 } 00373 00374 if( sArgs.contexts_arg ) { 00375 ifstream ifsm; 00376 char acLine[ 2048 ]; 00377 vector<string> vecstrLine; 00378 00379 ifsm.open( sArgs.contexts_arg ); 00380 if( !ifsm.is_open( ) ) { 00381 cerr << "Could not open: " << sArgs.contexts_arg << endl; 00382 return 1; 00383 } 00384 while( !ifsm.eof( ) ) { 00385 ifsm.getline( acLine, ARRAYSIZE(acLine) - 1 ); 00386 acLine[ ARRAYSIZE(acLine) - 1 ] = 0; 00387 if( !acLine[ 0 ] ) 00388 continue; 00389 vecstrLine.clear( ); 00390 CMeta::Tokenize( acLine, vecstrLine ); 00391 if( vecstrLine.size( ) < 3 ) { 00392 cerr << "Illegal contexts line: " << acLine << endl; 00393 return 1; 00394 } 00395 if( ( atoi( vecstrLine[ 0 ].c_str( ) ) ) != ( vecstrContexts.size( ) + 1 ) ) { 00396 cerr << "Inconsistent context ID: " << acLine << endl; 00397 return 1; 00398 } 00399 vecstrContexts.push_back( vecstrLine[ 2 ] ); 00400 } 00401 ifsm.close( ); 00402 } 00403 00404 if( sArgs.genes_arg ) { 00405 if( !GenesIn.Open( sArgs.genes_arg ) ) { 00406 cerr << "Could not open: " << sArgs.genes_arg << endl; 00407 return 1; 00408 } 00409 } 00410 if( sArgs.genex_arg ) { 00411 if( !GenesEx.Open( sArgs.genex_arg ) ) { 00412 cerr << "Could not open: " << sArgs.genex_arg << endl; 00413 return 1; 00414 } 00415 } 00416 if( sArgs.genet_arg ) { 00417 if( !GenesTm.Open( sArgs.genet_arg ) ) { 00418 cerr << "Could not open: " << sArgs.genet_arg << endl; 00419 return 1; 00420 } 00421 } 00422 if( sArgs.genee_arg ) { 00423 if( !GenesEd.Open( sArgs.genee_arg ) ) { 00424 cerr << "Could not open: " << sArgs.genee_arg << endl; 00425 return 1; 00426 } 00427 } 00428 if( sArgs.ubiqg_arg ) { 00429 if( !GenesUbik.Open( sArgs.ubiqg_arg ) ) { 00430 cerr << "Could not open: " << sArgs.ubiqg_arg << endl; 00431 return 1; 00432 } 00433 } 00434 00435 00436 00437 if( sArgs.answers_arg ) 00438 iRet = main_count( sArgs, mapstriZeros, GenesIn, GenesEx, GenesTm, GenesEd, GenesUbik); 00439 else if( sArgs.counts_arg ) 00440 iRet = main_xdsls( sArgs, mapstriZeros, mapstriDatasets, vecstrContexts ); 00441 else if( sArgs.networks_arg ) 00442 iRet = sArgs.genewise_flag ? main_inference2( sArgs, mapstriZeros, mapstriDatasets ) : 00443 main_inference( sArgs, mapstriZeros, mapstriDatasets ); 00444 00445 #ifdef WIN32 00446 pthread_win32_process_detach_np( ); 00447 #endif // WIN32 00448 return iRet; 00449 } 00450 00451 int main_count( const gengetopt_args_info& sArgs, const map<string, size_t>& mapstriZeros, 00452 const CGenes& GenesIn, const CGenes& GenesEx, const CGenes& GenesTm, const CGenes& GenesEd, const CGenes& GenesUbik ) { 00453 size_t i, j, k, m, iTerm, iThread; 00454 vector<vector<CCountMatrix*>* > vecpvecpMats; 00455 vector<CCountMatrix*> vecpMatRoots; 00456 vector<CGenes*> vecpGenes; 00457 CDataPair Answers, Dat; 00458 CDat wDat; 00459 CDatFilter Filter, FilterIn, FilterEx, FilterTm, FilterEd; 00460 CDatFilter* pFilter; 00461 string strFile; 00462 vector<pthread_t> vecpthdThreads; 00463 vector<SLearn> vecsData; 00464 map<string, size_t>::const_iterator iterZero; 00465 CGenome Genome; 00466 vector<CGenome> Genomes; 00467 vector<string> vecstrNames; 00468 CRegularize Regularize; 00469 bool isDatWeighted=false; 00470 if( !Answers.Open( sArgs.answers_arg, false, !!sArgs.memmap_flag ) ) { 00471 cerr << "Could not open: " << sArgs.answers_arg << endl; 00472 return 1; 00473 } 00474 if( !sArgs.inputs_num && sArgs.reggroups_arg && !Regularize.Open( sArgs.reggroups_arg, Genome, Answers ) ) { 00475 cerr << "Could not open: " << sArgs.reggroups_arg << endl; 00476 return 1; 00477 } 00478 00479 vecpGenes.resize( sArgs.inputs_num ); 00480 Genomes.resize(vecpGenes.size( )); 00481 for( i = 0; i < vecpGenes.size( ); ++i ) { 00482 ifstream ifsm; 00483 00484 vecpGenes[ i ] = new CGenes( Genome ); 00485 ifsm.open( sArgs.inputs[ i ] ); 00486 00487 if(sArgs.weights_flag){ 00488 delete vecpGenes[ i ]; 00489 vecpGenes[ i ] = new CGenes(Genomes[ i ]); 00490 if( !vecpGenes[ i ]->OpenWeighted( ifsm ) ) { 00491 if(!wDat.Open(sArgs.inputs[i], !!sArgs.memmap_flag )){ 00492 cerr << "Couldn't open: " << sArgs.inputs[ i ] << endl; 00493 return 1; } 00494 else{ 00495 isDatWeighted = true; 00496 vecpGenes[ i ]->Open(wDat.GetGeneNames());} 00497 } 00498 } 00499 else{ 00500 if( !vecpGenes[ i ]->Open( ifsm ) ) { 00501 cerr << "Couldn't open: " << sArgs.inputs[ i ] << endl; 00502 return 1; } 00503 } 00504 } 00505 00506 if( !vecpGenes.size( ) ) { 00507 vecpGenes.insert( vecpGenes.begin( ), new CGenes( Genome ) ); 00508 vecpGenes[ 0 ]->Open( Answers.GetGeneNames( ) ); 00509 } 00510 vecpMatRoots.resize( vecpGenes.size( ) ); 00511 vecpthdThreads.resize( vecpMatRoots.size( ) ); 00512 vecsData.resize( vecpthdThreads.size( ) ); 00513 for( iTerm = 0; iTerm < vecpMatRoots.size( ); iTerm += iThread ) { 00514 cerr << "Learning root " << iTerm << '/' << vecpMatRoots.size( ) << endl; 00515 for( iThread = 0; ( ( sArgs.threads_arg == -1 ) || ( iThread < (size_t)sArgs.threads_arg ) ) && 00516 ( ( iTerm + iThread ) < vecpMatRoots.size( ) ); ++iThread ) { 00517 i = iTerm + iThread; 00518 vecsData[ i ].m_pMatCounts = vecpMatRoots[ i ] = new CCountMatrix( ); 00519 vecsData[ i ].m_pDat = NULL; 00520 vecsData[ i ].m_iDat = -1; 00521 vecsData[ i ].m_pGenes = vecpGenes[ i ]; 00522 vecsData[ i ].m_pUbikGenes = &GenesUbik; 00523 vecsData[ i ].m_pAnswers = &Answers; 00524 vecsData[ i ].m_iZero = -1; 00525 vecsData[ i ].m_pRegularize = &Regularize; 00526 vecsData[ i ].m_bInPos = sArgs.ctxtpos_flag; 00527 vecsData[ i ].m_bInNeg = sArgs.ctxtneg_flag; 00528 vecsData[ i ].m_bBridgePos = sArgs.bridgepos_flag; 00529 vecsData[ i ].m_bBridgeNeg = sArgs.bridgeneg_flag; 00530 vecsData[ i ].m_bOutPos = sArgs.outpos_flag; 00531 vecsData[ i ].m_bOutNeg = sArgs.outneg_flag; 00532 vecsData[ i ].m_isDatWeighted = false; 00533 vecsData[ i ].m_bFlipNeg =false; 00534 vecsData[ i ].m_pwDat = NULL; 00535 if( pthread_create( &vecpthdThreads[ i ], NULL, learn, &vecsData[ i ] ) ) { 00536 cerr << "Couldn't create root thread: " << sArgs.inputs[ i ] << endl; 00537 return 1; 00538 } 00539 } 00540 for( i = 0; i < iThread; ++i ) 00541 pthread_join( vecpthdThreads[ iTerm + i ], NULL ); 00542 } 00543 FOR_EACH_DIRECTORY_FILE((string)sArgs.directory_arg, strFile) 00544 string strName; 00545 vector<CCountMatrix*>* pvecpMatCounts; 00546 00547 if( CMeta::IsExtension( strFile, c_acDab ) ) { 00548 i = strFile.rfind( '.' ); 00549 strName = (string) sArgs.directory_arg + "/" + strFile.substr( 0, i ) + c_acDab; 00550 } else if( CMeta::IsExtension( strFile, c_acQDab ) ) { 00551 i = strFile.rfind( '.' ); 00552 strName = (string) sArgs.directory_arg + "/" + strFile.substr( 0, i ) + c_acQDab; 00553 } else if( CMeta::IsExtension( strFile, c_acDat ) ) { 00554 i = strFile.rfind( '.' ); 00555 strName = (string) sArgs.directory_arg + "/" + strFile.substr( 0, i ) + c_acDat; 00556 } else { 00557 continue; 00558 } 00559 00560 if( !(Dat.Open( strName.c_str( ), false, !!sArgs.memmap_flag )) ) { 00561 cerr << "Couldn't open: " << strName << endl; 00562 return 1; 00563 } 00564 cerr << "Processing: " << strName << endl; 00565 strName = CMeta::Filename( CMeta::Deextension( CMeta::Basename( strName.c_str( ) ) ) ); 00566 vecstrNames.push_back( strName ); 00567 00568 Filter.Attach( Dat ); 00569 pFilter = &Filter; 00570 if( GenesIn.GetGenes( ) ) { 00571 FilterIn.Attach( *pFilter, GenesIn, CDat::EFilterInclude, &Answers ); 00572 pFilter = &FilterIn; 00573 } 00574 if( GenesEx.GetGenes( ) ) { 00575 FilterEx.Attach( *pFilter, GenesEx, CDat::EFilterExclude, &Answers ); 00576 pFilter = &FilterEx; 00577 } 00578 if( GenesTm.GetGenes( ) ) { 00579 FilterTm.Attach( *pFilter, GenesTm, CDat::EFilterTerm, &Answers ); 00580 pFilter = &FilterTm; 00581 } 00582 if( GenesEd.GetGenes( ) ) { 00583 FilterEd.Attach( *pFilter, GenesEd, CDat::EFilterEdge, &Answers ); 00584 pFilter = &FilterEd; 00585 } 00586 00587 pvecpMatCounts = new vector<CCountMatrix*>( ); 00588 pvecpMatCounts->resize( vecpGenes.size( ) ); 00589 for( iTerm = 0; iTerm < vecpMatRoots.size( ); iTerm += iThread ) { 00590 cerr << "Learning term " << iTerm << '/' << vecpMatRoots.size( ) << endl; 00591 for( iThread = 0; ( ( sArgs.threads_arg == -1 ) || ( iThread < (size_t)sArgs.threads_arg ) ) && 00592 ( ( iTerm + iThread ) < vecpMatRoots.size( ) ); ++iThread ) { 00593 i = iTerm + iThread; 00594 vecsData[ i ].m_pMatCounts = (*pvecpMatCounts)[ i ] = new CCountMatrix( ); 00595 vecsData[ i ].m_pDat = pFilter; 00596 vecsData[ i ].m_iDat = vecstrNames.size( ) - 1; 00597 vecsData[ i ].m_pGenes = vecpGenes[ i ]; 00598 vecsData[ i ].m_pUbikGenes = &GenesUbik; 00599 vecsData[ i ].m_pAnswers = &Answers; 00600 vecsData[ i ].m_iZero = ( ( iterZero = mapstriZeros.find( strName ) ) == mapstriZeros.end( ) ) ? -1 : iterZero->second; 00601 vecsData[ i ].m_pRegularize = &Regularize; 00602 vecsData[ i ].m_bInPos = sArgs.ctxtpos_flag; 00603 vecsData[ i ].m_bInNeg = sArgs.ctxtneg_flag; 00604 vecsData[ i ].m_bBridgePos = sArgs.bridgepos_flag; 00605 vecsData[ i ].m_bBridgeNeg = sArgs.bridgeneg_flag; 00606 vecsData[ i ].m_bOutPos = sArgs.outpos_flag; 00607 vecsData[ i ].m_bOutNeg = sArgs.outneg_flag; 00608 vecsData[ i ].m_isDatWeighted = isDatWeighted; 00609 vecsData[ i ].m_bFlipNeg = !!sArgs.flipneg_flag; 00610 vecsData[ i ].m_pwDat = isDatWeighted? &wDat : NULL; 00611 if( pthread_create( &vecpthdThreads[ i ], NULL, learn, &vecsData[ i ] ) ) { 00612 cerr << "Couldn't create root thread: " << sArgs.inputs[ i ] << endl; 00613 return 1; 00614 } 00615 } 00616 for( i = 0; i < iThread; ++i ) 00617 pthread_join( vecpthdThreads[ iTerm + i ], NULL ); 00618 } 00619 vecpvecpMats.push_back( pvecpMatCounts ); 00620 } 00621 #ifdef _MSC_VER 00622 FindClose( hSearch ); 00623 #else // _MSC_VER 00624 closedir( pDir ); 00625 #endif // _MSC_VER 00626 00627 for( i = 0; i < vecpMatRoots.size( ); ++i ) { 00628 ofstream ofsm; 00629 00630 ofsm.open( ( (string)sArgs.output_arg + '/' + ( sArgs.inputs ? 00631 CMeta::Deextension( CMeta::Basename( sArgs.inputs[ i ] ) ) : sArgs.countname_arg ) + c_acTxt ).c_str( ) ); 00632 ofsm << ( sArgs.inputs ? CMeta::Deextension( CMeta::Basename( sArgs.inputs[ i ] ) ) : sArgs.countname_arg ) << 00633 '\t' << vecpvecpMats.size( ) << endl; 00634 for( j = 0; j < vecpMatRoots[ i ]->GetRows( ); ++j ) 00635 ofsm << ( j ? "\t" : "" ) << vecpMatRoots[ i ]->Get( j, 0 ); 00636 ofsm << endl; 00637 for( j = 0; j < vecpvecpMats.size( ); ++j ) { 00638 ofsm << vecstrNames[ j ] << endl; 00639 for( k = 0; k < (*vecpvecpMats[ j ])[ i ]->GetColumns( ); ++k ) { 00640 for( m = 0; m < (*vecpvecpMats[ j ])[ i ]->GetRows( ); ++m ) 00641 ofsm << ( m ? "\t" : "" ) << (*vecpvecpMats[ j ])[ i ]->Get( m, k ); 00642 ofsm << endl; 00643 } 00644 } 00645 } 00646 if( sArgs.reggroups_arg ) 00647 Regularize.Save( cout, vecstrNames ); 00648 00649 for( i = 0; i < vecpvecpMats.size( ); ++i ) { 00650 for( j = 0; j < vecpvecpMats[ i ]->size( ); ++j ) 00651 delete (*vecpvecpMats[ i ])[ j ]; 00652 delete vecpvecpMats[ i ]; 00653 } 00654 for( i = 0; i < vecpMatRoots.size( ); ++i ) { 00655 delete vecpMatRoots[ i ]; 00656 delete vecpGenes[ i ]; 00657 } 00658 00659 return 0; 00660 } 00661 00662 void* learn( void* pData ) { 00663 SLearn* psData; 00664 size_t i, j, k, iAnswer, iVal, iOne, iTwo; 00665 vector<bool> vecfGenes, vecfUbik; 00666 vector<size_t> veciGenes, vecfiGenes; 00667 vector<float> vecGeneWeights; 00668 psData = (SLearn*)pData; 00669 float w; 00670 00671 if (psData->m_pUbikGenes->GetGenes( )) { 00672 vecfUbik.resize( psData->m_pAnswers->GetGenes( ) ); 00673 for( i = 0; i < vecfUbik.size( ); ++i) { 00674 vecfUbik[ i ] = psData->m_pUbikGenes->IsGene( psData->m_pAnswers->GetGene( i ) ); 00675 } 00676 } 00677 vecfGenes.resize( psData->m_pAnswers->GetGenes( ) ); 00678 vecGeneWeights.resize( psData->m_pAnswers->GetGenes( ) ); 00679 vecfiGenes.resize(psData->m_pAnswers->GetGenes( )); 00680 for( i = 0; i < vecfGenes.size( ); ++i ){ 00681 vecfGenes[ i ] = psData->m_pGenes->IsGene( psData->m_pAnswers->GetGene( i ) );} 00682 if(psData->m_isDatWeighted){ 00683 for( i = 0; i < vecfGenes.size( ); ++i ){ 00684 vecfiGenes[ i ] = psData->m_pwDat->GetGene( psData->m_pAnswers->GetGene( i ) );} 00685 } 00686 else{ 00687 for( i = 0; i < vecfGenes.size( ); ++i ){ 00688 vecfiGenes[ i ] = psData->m_pGenes->GetGene( psData->m_pAnswers->GetGene( i ) );} 00689 } 00690 if(psData->m_pGenes->IsWeighted()){ 00691 for( i = 0; i < vecfGenes.size( ); ++i ){ 00692 vecGeneWeights[ i ] = psData->m_pGenes->GetGeneWeight(psData->m_pGenes->GetGene( psData->m_pAnswers->GetGene( i ) ));}} 00693 if( psData->m_pDat ) { 00694 psData->m_pMatCounts->Initialize( psData->m_pDat->GetValues( ), psData->m_pAnswers->GetValues( ) ); 00695 veciGenes.resize( psData->m_pAnswers->GetGenes( ) ); 00696 for( i = 0; i < veciGenes.size( ); ++i ) 00697 veciGenes[ i ] = psData->m_pDat->GetGene( psData->m_pAnswers->GetGene( i ) ); 00698 } 00699 else 00700 psData->m_pMatCounts->Initialize( psData->m_pAnswers->GetValues( ), 1 ); 00701 psData->m_pMatCounts->Clear( ); 00702 for( i = 0; i < psData->m_pAnswers->GetGenes( ); ++i ) { 00703 if( psData->m_pDat ) 00704 iOne = veciGenes[ i ]; 00705 for( j = ( i + 1 ); j < psData->m_pAnswers->GetGenes( ); ++j ) { 00706 iAnswer = psData->m_pAnswers->Quantize( psData->m_pAnswers->Get( i, j ) ); 00707 if( iAnswer == -1 ) { 00708 continue; 00709 } 00710 00711 if ( CMeta::SkipEdge( !!iAnswer, i, j, vecfGenes, vecfUbik, psData->m_bInPos, psData->m_bInNeg, psData->m_bBridgePos, psData->m_bBridgeNeg, psData->m_bOutPos, psData->m_bOutNeg ) ) { 00712 continue; 00713 } 00714 00715 if( psData->m_pDat ) { 00716 iTwo = veciGenes[ j ]; 00717 iVal = -1; 00718 iVal = psData->m_pDat->Quantize( iOne, iTwo, psData->m_iZero ); 00719 if( iVal == -1 ) 00720 continue; 00721 //When contexts are weighted, add counts = WT_MULTIPLIER * weight1 * weight 2 00722 if(psData->m_pGenes->IsWeighted()){ 00723 if(iAnswer==1 || !psData->m_bFlipNeg) 00724 for( k = 0; k <(vecGeneWeights[i]*vecGeneWeights[j]*WT_MULTIPLIER-0.5); k++){ 00725 psData->m_pMatCounts->Get( iVal, iAnswer )++; 00726 } 00727 else 00728 for( k = 0; k <((1-vecGeneWeights[i]*vecGeneWeights[j])*WT_MULTIPLIER-0.5); k++){ 00729 psData->m_pMatCounts->Get( iVal, iAnswer )++; 00730 } 00731 } 00732 else if(psData->m_isDatWeighted){ 00733 if(vecfiGenes[i] == -1 || vecfiGenes[j] == -1) 00734 continue; 00735 if(CMeta::IsNaN(w = psData->m_pwDat->Get( vecfiGenes[i],vecfiGenes[j] )) ) 00736 continue; 00737 if(iAnswer==1 || !psData->m_bFlipNeg) 00738 for( k = 0; k <(w *WT_MULTIPLIER-0.5); k++){ 00739 psData->m_pMatCounts->Get( iVal, iAnswer )++; 00740 } 00741 else 00742 for( k = 0; k <((1-w) *WT_MULTIPLIER-0.5); k++){ 00743 psData->m_pMatCounts->Get( iVal, iAnswer )++; 00744 } 00745 } 00746 else{ 00747 psData->m_pMatCounts->Get( iVal, iAnswer )++; 00748 //FIXME: Regularization has not been supported for weighted context 00749 psData->m_pRegularize->Add( psData->m_iDat, *psData->m_pDat, i, j, iVal ); 00750 } 00751 } 00752 else{ 00753 psData->m_pMatCounts->Get( iAnswer, 0 )++;} 00754 } 00755 } 00756 00757 00758 //Recale counts 00759 if(psData->m_pGenes->IsWeighted()||psData->m_isDatWeighted){ 00760 for (i=0; i< psData->m_pMatCounts->GetRows();i++) 00761 for(j=0; j<psData->m_pMatCounts->GetColumns();j++) 00762 psData->m_pMatCounts->Get( i,j ) = int(psData->m_pMatCounts->Get( i,j )/ WT_MULTIPLIERf + 0.5); 00763 } 00764 00765 return NULL; 00766 } 00767 00768 int main_xdsls( const gengetopt_args_info& sArgs, const map<string, size_t>& mapstriZeros, 00769 const map<string, size_t>& mapstriDatasets, const vector<string>& vecstrContexts ) { 00770 static const size_t c_iBuffer = 1024; 00771 char szBuffer[ c_iBuffer ]; 00772 string strFile; 00773 CBayesNetMinimal BNDefault; 00774 CBayesNetMinimal* pBN; 00775 vector<CBayesNetMinimal*> vecpBNs; 00776 ifstream ifsm; 00777 vector<string> vecstrLine; 00778 ofstream ofsm; 00779 uint32_t iSize; 00780 size_t i; 00781 vector<float> vecdAlphas; 00782 vector<unsigned char> vecbZeros; 00783 map<string, size_t>::const_iterator iterZero, iterDataset; 00784 float d; 00785 00786 if( mapstriDatasets.empty( ) ) { 00787 cerr << "No datasets given" << endl; 00788 return 1; 00789 } 00790 if( sArgs.alphas_arg ) { 00791 vecdAlphas.resize( mapstriDatasets.size( ) ); 00792 ifsm.clear( ); 00793 ifsm.open( sArgs.alphas_arg ); 00794 if( !ifsm.is_open( ) ) { 00795 cerr << "Could not open: " << sArgs.alphas_arg << endl; 00796 return 1; 00797 } 00798 while( !ifsm.eof( ) ) { 00799 ifsm.getline( szBuffer, c_iBuffer - 1 ); 00800 szBuffer[ c_iBuffer - 1 ] = 0; 00801 if( !szBuffer[ 0 ] ) 00802 continue; 00803 vecstrLine.clear( ); 00804 CMeta::Tokenize( szBuffer, vecstrLine ); 00805 if( vecstrLine.size( ) != 2 ) { 00806 cerr << "Illegal alphas line: " << szBuffer << endl; 00807 return 1; 00808 } 00809 if( ( iterDataset = mapstriDatasets.find( vecstrLine[ 0 ] ) ) == mapstriDatasets.end( ) ) 00810 cerr << "Dataset in counts but not database: " << vecstrLine[ 0 ] << endl; 00811 else 00812 vecdAlphas[ iterDataset->second ] = (float)atof( vecstrLine[ 1 ].c_str( ) ); 00813 } 00814 ifsm.close( ); 00815 } 00816 00817 vecbZeros.resize( mapstriDatasets.size( ) ); 00818 fill( vecbZeros.begin( ), vecbZeros.end( ), 0xFF ); 00819 for( iterZero = mapstriZeros.begin( ); iterZero != mapstriZeros.end( ); ++iterZero ) 00820 if( ( iterDataset = mapstriDatasets.find( iterZero->first ) ) == mapstriDatasets.end( ) ) 00821 cerr << "Unknown dataset in zeros file: " << iterZero->first << endl; 00822 else 00823 vecbZeros[ iterDataset->second ] = (unsigned char)iterZero->second; 00824 00825 if( !BNDefault.OpenCounts( sArgs.default_arg, mapstriDatasets, vecbZeros, vecdAlphas, 00826 sArgs.pseudocounts_arg ) ) { 00827 cerr << "Could not open default counts: " << ( sArgs.default_arg ? sArgs.default_arg : "not given" ) << 00828 endl; 00829 return 1; 00830 } 00831 if( sArgs.regularize_flag ) { 00832 d = BNDefault.Regularize( vecdAlphas ); 00833 BNDefault.OpenCounts( sArgs.default_arg, mapstriDatasets, vecbZeros, vecdAlphas, d ); 00834 } 00835 00836 for( i = 0; i < vecstrContexts.size( ); ++i ) { 00837 strFile = (string)sArgs.counts_arg + '/' + CMeta::Filename( vecstrContexts[ i ] ) + c_acTxt; 00838 cerr << "Processing: " << strFile << endl; 00839 pBN = new CBayesNetMinimal( ); 00840 if( !pBN->OpenCounts( strFile.c_str( ), mapstriDatasets, vecbZeros, vecdAlphas, sArgs.pseudocounts_arg, 00841 &BNDefault ) ) 00842 return 1; 00843 if( sArgs.regularize_flag ) { 00844 d = pBN->Regularize( vecdAlphas ); 00845 pBN->OpenCounts( strFile.c_str( ), mapstriDatasets, vecbZeros, vecdAlphas, d, &BNDefault ); 00846 } 00847 vecpBNs.push_back( pBN ); 00848 } 00849 00850 cerr << "Created " << ( vecpBNs.size( ) + 1 ) << " Bayesian classifiers" << endl; 00851 00852 if( sArgs.output_arg ) { 00853 if( sArgs.smile_flag ) { 00854 CBayesNetSmile BNSmile; 00855 vector<string> vecstrNames; 00856 map<string, size_t>::const_iterator iterName; 00857 00858 vecstrNames.resize( mapstriDatasets.size( ) ); 00859 for( iterName = mapstriDatasets.begin( ); iterName != mapstriDatasets.end( ); ++iterName ) 00860 vecstrNames[ iterName->second ] = iterName->first; 00861 00862 BNSmile.Open( BNDefault, vecstrNames ); 00863 BNSmile.Save( ( (string)sArgs.output_arg + '/' + BNDefault.GetID( ) + 00864 ( sArgs.xdsl_flag ? ".x" : "." ) + "dsl" ).c_str( ) ); 00865 for( i = 0; i < vecpBNs.size( ); ++i ) { 00866 BNSmile.Open( *vecpBNs[ i ], vecstrNames ); 00867 BNSmile.Save( ( (string)sArgs.output_arg + '/' + vecpBNs[ i ]->GetID( ) + 00868 ( sArgs.xdsl_flag ? ".x" : "." ) + "dsl" ).c_str( ) ); 00869 } 00870 } 00871 else { 00872 ofsm.open( sArgs.output_arg, ios_base::binary ); 00873 BNDefault.Save( ofsm ); 00874 iSize = (uint32_t)vecpBNs.size( ); 00875 ofsm.write( (const char*)&iSize, sizeof(iSize) ); 00876 for( i = 0; i < vecpBNs.size( ); ++i ) 00877 vecpBNs[ i ]->Save( ofsm ); 00878 ofsm.close( ); 00879 } 00880 } 00881 for( i = 0; i < vecpBNs.size( ); ++i ) 00882 delete vecpBNs[ i ]; 00883 00884 return 0; 00885 } 00886 00887 int main_inference( const gengetopt_args_info& sArgs, const map<string, size_t>& mapstriZeros, 00888 const map<string, size_t>& mapstriDatasets ) { 00889 map<string, size_t>::const_iterator iterDataset; 00890 vector<size_t> veciGenes; 00891 size_t i, j, iTerm, iThread; 00892 vector<CGenes*> vecpGenes; 00893 vector<CDat*> vecpYes, vecpNo; 00894 vector<string> vecstrTmps; 00895 CGenome Genome; 00896 vector<SEvaluate> vecsData; 00897 CBayesNetMinimal BNDefault; 00898 vector<CBayesNetMinimal*> vecpBNs; 00899 ifstream ifsm; 00900 uint32_t iSize; 00901 map<string, size_t>::const_iterator iterZero; 00902 vector<pthread_t> vecpthdThreads; 00903 bool fFirst; 00904 00905 ifsm.open( sArgs.networks_arg, ios_base::binary ); 00906 if( !BNDefault.Open( ifsm ) ) { 00907 cerr << "Could not open: " << sArgs.networks_arg << endl; 00908 return 1; 00909 } 00910 ifsm.read( (char*)&iSize, sizeof(iSize) ); 00911 vecpBNs.resize( iSize ); 00912 for( i = 0; i < vecpBNs.size( ); ++i ) { 00913 vecpBNs[ i ] = new CBayesNetMinimal( ); 00914 if( !vecpBNs[ i ]->Open( ifsm ) ) { 00915 cerr << "Could not open: " << sArgs.networks_arg << endl; 00916 return 1; 00917 } 00918 } 00919 ifsm.close( ); 00920 00921 /* 00922 size_t k, m; 00923 for( i = 0; i < BNDefault.GetNodes( ); ++i ) 00924 for( j = 0; j < BNDefault.GetCPT( i ).GetRows( ); ++j ) 00925 for( k = 0; k < BNDefault.GetCPT( i ).GetColumns( ); ++k ) 00926 cout << i << '\t' << j << '\t' << k << '\t' << BNDefault.GetCPT( i ).Get( j, k ) << endl; 00927 for( m = 0; m < vecpBNs.size( ); ++m ) 00928 for( i = 0; i < vecpBNs[ m ]->GetNodes( ); ++i ) 00929 for( j = 0; j < vecpBNs[ m ]->GetCPT( i ).GetRows( ); ++j ) 00930 for( k = 0; k < vecpBNs[ m ]->GetCPT( i ).GetColumns( ); ++k ) 00931 cout << i << '\t' << j << '\t' << k << '\t' << vecpBNs[ m ]->GetCPT( i ).Get( j, k ) << endl; 00932 return 0; 00933 //*/ 00934 00935 if( !sArgs.genome_arg ) { 00936 cerr << "No genes given" << endl; 00937 return 1; 00938 } 00939 { 00940 CPCL PCLGenes( false ); 00941 00942 if( !PCLGenes.Open( sArgs.genome_arg, 1 ) ) { 00943 cerr << "Could not open: " << sArgs.genome_arg << endl; 00944 return 1; 00945 } 00946 for( i = 0; i < PCLGenes.GetGenes( ); ++i ) 00947 Genome.AddGene( PCLGenes.GetFeature( i, 1 ) ); 00948 } 00949 00950 vecpGenes.resize( sArgs.inputs_num ? sArgs.inputs_num : 1 ); 00951 vecpYes.resize( vecpGenes.size( ) ); 00952 vecpNo.resize( vecpGenes.size( ) ); 00953 vecstrTmps.resize( vecpNo.size( ) ); 00954 for( i = 0; i < vecpGenes.size( ); ++i ) { 00955 char* szTemp; 00956 00957 vecpGenes[ i ] = new CGenes( Genome ); 00958 if( sArgs.inputs_num ) { 00959 ifstream ifsm; 00960 } 00961 else 00962 vecpGenes[ i ]->Open( Genome.GetGeneNames( ), false ); 00963 vecpYes[ i ] = new CDat( ); 00964 vecpNo[ i ] = new CDat( ); 00965 if( sArgs.memmapout_flag ) { 00966 vecpYes[ i ]->Open( Genome.GetGeneNames( ), false, 00967 ((string) sArgs.output_arg + '/' + (sArgs.inputs_num ? CMeta::Basename( 00968 sArgs.inputs[ i ] ) : "global") + c_acDab).c_str( ) ); 00969 /*FIXME: _tempnam can result in a filename that ends up in use later via race condition */ 00970 if( !(szTemp = _tempnam( sArgs.temporary_arg, NULL )) ) { 00971 cerr << "Could not generate temporary file name in: " << sArgs.temporary_arg << endl; 00972 return 1; 00973 } 00974 cout << "File: " << szTemp << endl; 00975 vecstrTmps[ i ] = szTemp; 00976 free( szTemp ); 00977 vecpNo[ i ]->Open( Genome.GetGeneNames( ), false, vecstrTmps[ i ].c_str( ) ); 00978 } else { 00979 vecpYes[ i ]->Open( Genome.GetGeneNames( ), false, NULL ); 00980 vecpNo[ i ]->Open( Genome.GetGeneNames( ), false, NULL ); 00981 } 00982 } 00983 00984 veciGenes.resize( vecpYes[ 0 ]->GetGenes( ) ); 00985 vecsData.resize( vecpGenes.size( ) ); 00986 vecpthdThreads.resize( vecsData.size( ) ); 00987 for( fFirst = true,iterDataset = mapstriDatasets.begin( ); iterDataset != mapstriDatasets.end( ); 00988 fFirst = false,++iterDataset ) { 00989 CDataPair Dat; 00990 string strFile; 00991 00992 if( !Dat.Open( ( strFile = ( (string)sArgs.directory_arg + '/' + iterDataset->first + 00993 c_acDab ) ).c_str( ), false, !!sArgs.memmap_flag ) && !Dat.Open( ( strFile = ( (string)sArgs.directory_arg + '/' + iterDataset->first + 00994 c_acQDab ) ).c_str( ), false, !!sArgs.memmap_flag ) ) { 00995 cerr << "Couldn't open: " << strFile << endl; 00996 return 1; 00997 } 00998 cerr << "Processing: " << strFile << endl; 00999 for( i = 0; i < veciGenes.size( ); ++i ) 01000 veciGenes[ i ] = Dat.GetGene( vecpYes[ 0 ]->GetGene( i ) ); 01001 for( iTerm = 0; iTerm < vecpGenes.size( ); iTerm += iThread ) { 01002 for( iThread = 0; ( ( sArgs.threads_arg == -1 ) || ( iThread < (size_t)sArgs.threads_arg ) ) && 01003 ( ( iTerm + iThread ) < vecpGenes.size( ) ); ++iThread ) { 01004 i = iTerm + iThread; 01005 if( sArgs.inputs_num ) { 01006 for( j = 0; j < vecpBNs.size( ); ++j ) 01007 if( vecpBNs[ j ]->GetID( ) == CMeta::Deextension( CMeta::Basename( 01008 sArgs.inputs[ i ] ) ) ) 01009 break; 01010 if( j >= vecpBNs.size( ) ) { 01011 cerr << "Could not locate Bayes net for: " << sArgs.inputs[ i ] << endl; 01012 return 1; 01013 } 01014 vecsData[ i ].m_pBN = vecpBNs[ j ]; 01015 } 01016 else 01017 vecsData[ i ].m_pBN = &BNDefault; 01018 vecsData[ i ].m_pDat = &Dat; 01019 vecsData[ i ].m_pGenes = vecpGenes[ i ]; 01020 vecsData[ i ].m_pYes = vecpYes[ i ]; 01021 vecsData[ i ].m_pNo = vecpNo[ i ]; 01022 vecsData[ i ].m_iZero = ( ( iterZero = mapstriZeros.find( iterDataset->first ) ) == 01023 mapstriZeros.end( ) ) ? vecsData[ i ].m_pBN->GetDefault( iterDataset->second + 1 ) : 01024 iterZero->second; 01025 if( vecsData[ i ].m_iZero == 0xFF ) 01026 vecsData[ i ].m_iZero = -1; 01027 vecsData[ i ].m_iNode = iterDataset->second + 1; 01028 vecsData[ i ].m_pveciGenes = &veciGenes; 01029 vecsData[ i ].m_fFirst = fFirst; 01030 vecsData[ i ].m_strName = sArgs.inputs_num ? sArgs.inputs[ i ] : "global"; 01031 if( pthread_create( &vecpthdThreads[ i ], NULL, evaluate, &vecsData[ i ] ) ) { 01032 cerr << "Couldn't create evaluation thread: " << sArgs.inputs[ i ] << endl; 01033 return 1; 01034 } 01035 } 01036 for( i = 0; i < iThread; ++i ) 01037 pthread_join( vecpthdThreads[ iTerm + i ], NULL ); 01038 } 01039 } 01040 01041 for( iTerm = 0; iTerm < vecpGenes.size( ); iTerm += iThread ) { 01042 for( iThread = 0; ( ( sArgs.threads_arg == -1 ) || ( iThread < (size_t)sArgs.threads_arg ) ) && 01043 ( ( iTerm + iThread ) < vecpGenes.size( ) ); ++iThread ) { 01044 i = iTerm + iThread; 01045 if( sArgs.inputs_num ) { 01046 for( j = 0; j < vecpBNs.size( ); ++j ) 01047 if( vecpBNs[ j ]->GetID( ) == CMeta::Deextension( CMeta::Basename( sArgs.inputs[ i ] ) ) ) 01048 break; 01049 if( j >= vecpBNs.size( ) ) { 01050 cerr << "Could not locate Bayes net for: " << sArgs.inputs[ i ] << endl; 01051 return 1; 01052 } 01053 vecsData[ i ].m_pBN = vecpBNs[ j ]; 01054 } 01055 else 01056 vecsData[ i ].m_pBN = &BNDefault; 01057 vecsData[ i ].m_pYes = vecpYes[ i ]; 01058 vecsData[ i ].m_pNo = vecpNo[ i ]; 01059 vecsData[ i ].m_strName = sArgs.inputs_num ? sArgs.inputs[ i ] : "global"; 01060 vecsData[ i ].m_bLogratio = sArgs.logratio_flag; 01061 if( pthread_create( &vecpthdThreads[ i ], NULL, finalize, &vecsData[ i ] ) ) { 01062 cerr << "Couldn't create finalization thread: " << sArgs.inputs[ i ] << endl; 01063 return 1; 01064 } 01065 } 01066 for( i = 0; i < iThread; ++i ) 01067 pthread_join( vecpthdThreads[ iTerm + i ], NULL ); 01068 } 01069 01070 if( sArgs.memmapout_flag ) { 01071 for( i = 0; i < vecstrTmps.size( ); ++i ) { 01072 _unlink( vecstrTmps[ i ].c_str( ) ); 01073 } 01074 } 01075 for( i = 0; i < vecpBNs.size( ); ++i ) 01076 delete vecpBNs[ i ]; 01077 for( i = 0; i < vecpGenes.size( ); ++i ) { 01078 delete vecpNo[ i ]; 01079 if( !sArgs.memmapout_flag ) { 01080 vecpYes[ i ]->Save( ((string) sArgs.output_arg + '/' + (sArgs.inputs_num ? CMeta::Basename( 01081 sArgs.inputs[ i ] ) : "global") + c_acDab).c_str( ) ); 01082 } 01083 delete vecpYes[ i ]; 01084 delete vecpGenes[ i ]; 01085 } 01086 01087 return 0; 01088 } 01089 01090 void* evaluate( void* pData ) { 01091 SEvaluate* psData; 01092 size_t i, j, iOne, iTwo, iBin, iIndex; 01093 float* adYes; 01094 float* adNo; 01095 float dNo, dYes; 01096 01097 psData = (SEvaluate*)pData; 01098 if( psData->m_fFirst ) { 01099 float* adBuffer; 01100 01101 adBuffer = new float[ psData->m_pYes->GetGenes( ) ]; 01102 for( i = 0; i < psData->m_pNo->GetGenes( ); ++i ) 01103 adBuffer[ i ] = CMeta::GetNaN( ); 01104 for( i = 0; i < psData->m_pNo->GetGenes( ); ++i ) { 01105 if( !( i % 1000 ) ) 01106 cerr << "IN: " << psData->m_strName << ", " << i << endl; 01107 psData->m_pNo->Set( i, adBuffer ); 01108 } 01109 for( i = 0; i < psData->m_pYes->GetGenes( ); ++i ) { 01110 if( !( i % 1000 ) ) 01111 cerr << "IY: " << psData->m_strName << ", " << i << endl; 01112 psData->m_pYes->Set( i, adBuffer ); 01113 } 01114 delete[] adBuffer; 01115 } 01116 01117 dNo = log( psData->m_pBN->GetCPT( 0 ).Get( 0, 0 ) ); 01118 dYes = log( psData->m_pBN->GetCPT( 0 ).Get( 1, 0 ) ); 01119 adYes = new float[ psData->m_pYes->GetGenes( ) ]; 01120 adNo = new float[ psData->m_pNo->GetGenes( ) ]; 01121 for( i = 0; i < psData->m_pYes->GetGenes( ); ++i ) { 01122 if( !( i % 1000 ) ) 01123 cerr << "C: " << psData->m_strName << ", " << i << endl; 01124 if( ( ( iOne = (*psData->m_pveciGenes)[ i ] ) == -1 ) && ( psData->m_iZero == -1 ) ) 01125 continue; 01126 memcpy( adYes, psData->m_pYes->Get( i ), ( psData->m_pYes->GetGenes( ) - i - 1 ) * sizeof(*adYes) ); 01127 memcpy( adNo, psData->m_pNo->Get( i ), ( psData->m_pNo->GetGenes( ) - i - 1 ) * sizeof(*adNo) ); 01128 for( j = ( i + 1 ); j < psData->m_pYes->GetGenes( ); ++j ) { 01129 if( ( ( iTwo = (*psData->m_pveciGenes)[ j ] ) == -1 ) && ( psData->m_iZero == -1 ) ) 01130 continue; 01131 01132 iBin = psData->m_pDat->Quantize( iOne, iTwo, psData->m_iZero ); 01133 if( iBin == -1 ) 01134 continue; 01135 if( CMeta::IsNaN( adYes[ iIndex = ( j - i - 1 ) ] ) ) { 01136 adYes[ iIndex ] = dYes; 01137 adNo[ iIndex ] = dNo; 01138 } 01139 adNo[ iIndex ] += log( psData->m_pBN->GetCPT( psData->m_iNode ).Get( iBin, 0 ) ); 01140 adYes[ iIndex ] += log( psData->m_pBN->GetCPT( psData->m_iNode ).Get( iBin, 1 ) ); 01141 } 01142 psData->m_pNo->Set( i, adNo ); 01143 psData->m_pYes->Set( i, adYes ); 01144 } 01145 delete[] adYes; 01146 delete[] adNo; 01147 01148 return NULL; 01149 } 01150 01151 void* finalize( void* pData ) { 01152 SEvaluate* psData; 01153 size_t i, j; 01154 float* adYes; 01155 float* adNo; 01156 float dPrior, dYes, dNo; 01157 01158 psData = (SEvaluate*)pData; 01159 dPrior = psData->m_pBN->GetCPT( 0 ).Get( 1, 0 ); 01160 adYes = new float[ psData->m_pYes->GetGenes( ) ]; 01161 adNo = new float[ psData->m_pNo->GetGenes( ) ]; 01162 01163 dYes = log( dPrior ); 01164 dNo = log( psData->m_pBN->GetCPT( 0 ).Get( 0, 0 ) ); 01165 01166 for( i = 0; i < psData->m_pYes->GetGenes( ); ++i ) { 01167 if( !( i % 1000 ) ) 01168 cerr << "F: " << psData->m_strName << ", " << i << endl; 01169 memcpy( adYes, psData->m_pYes->Get( i ), ( psData->m_pYes->GetGenes( ) - i - 1 ) * sizeof(*adYes) ); 01170 memcpy( adNo, psData->m_pNo->Get( i ), ( psData->m_pNo->GetGenes( ) - i - 1 ) * sizeof(*adNo) ); 01171 for( j = 0; j < ( psData->m_pYes->GetGenes( ) - i - 1 ); ++j ) { 01172 if( psData->m_bLogratio ) { 01173 adYes[ j ] = CMeta::IsNaN( adYes[ j ] ) ? 0.0 : 01174 (float)( (double)(adYes[ j ] - dYes) - (double)(adNo[ j ] - dNo) ); 01175 } 01176 else { 01177 adYes[ j ] = CMeta::IsNaN( adYes[ j ] ) ? dPrior : 01178 (float)( 1 / ( 1 + exp( (double)adNo[ j ] - (double)adYes[ j ] ) ) ); 01179 } 01180 } 01181 psData->m_pYes->Set( i, adYes ); 01182 } 01183 delete[] adNo; 01184 delete[] adYes; 01185 01186 return NULL; 01187 } 01188 01190 01191 int main_inference2( const gengetopt_args_info& sArgs, const map<string, size_t>& mapstriZeros, 01192 const map<string, size_t>& mapstriDatasets ) { 01193 map<string, size_t>::const_iterator iterDataset; 01194 vector<size_t> veciGenes, veciBNs; 01195 size_t i, j, iThread; 01196 vector<CGenes*> vecpGenes; 01197 vector<CDat*> vecpYes, vecpNo; 01198 vector<string> vecstrTmps; 01199 CGenome Genome; 01200 vector<SEvaluate2> vecsData; 01201 CBayesNetMinimal BNDefault; 01202 vector<CBayesNetMinimal*> vecpBNs; 01203 ifstream ifsm; 01204 uint32_t iSize; 01205 map<string, size_t>::const_iterator iterZero; 01206 vector<pthread_t> vecpthdThreads; 01207 bool fFirst; 01208 01209 ifsm.open( sArgs.networks_arg, ios_base::binary ); 01210 if( !BNDefault.Open( ifsm ) ) { 01211 cerr << "Could not open: " << sArgs.networks_arg << endl; 01212 return 1; 01213 } 01214 ifsm.read( (char*)&iSize, sizeof(iSize) ); 01215 vecpBNs.resize( iSize ); 01216 for( i = 0; i < vecpBNs.size( ); ++i ) { 01217 vecpBNs[ i ] = new CBayesNetMinimal( ); 01218 if( !vecpBNs[ i ]->Open( ifsm ) ) { 01219 cerr << "Could not open: " << sArgs.networks_arg << endl; 01220 return 1; 01221 } 01222 } 01223 ifsm.close( ); 01224 01225 if( !sArgs.genome_arg ) { 01226 cerr << "No genes given" << endl; 01227 return 1; 01228 } 01229 { 01230 CPCL PCLGenes( false ); 01231 01232 if( !PCLGenes.Open( sArgs.genome_arg, 1 ) ) { 01233 cerr << "Could not open: " << sArgs.genome_arg << endl; 01234 return 1; 01235 } 01236 for( i = 0; i < PCLGenes.GetGenes( ); ++i ) 01237 Genome.AddGene( PCLGenes.GetFeature( i, 1 ) ); 01238 } 01239 01240 vecpGenes.resize( sArgs.inputs_num ? sArgs.inputs_num : 1 ); 01241 vecpYes.resize( 1 ); 01242 vecpNo.resize( vecpYes.size( ) ); 01243 vecstrTmps.resize( vecpNo.size( ) ); 01244 for( i = 0; i < vecpYes.size( ); ++i ) { 01245 char* szTemp; 01246 01247 vecpYes[ i ] = new CDat( ); 01248 vecpYes[ i ]->Open( Genome.GetGeneNames( ), false, ( (string)sArgs.output_arg + '/' + "global" + c_acDab ).c_str( ) ); 01249 vecpNo[ i ] = new CDat( ); 01250 if( !( szTemp = _tempnam( sArgs.temporary_arg, NULL ) ) ) { 01251 cerr << "Could not generate temporary file name in: " << sArgs.temporary_arg << endl; 01252 return 1; 01253 } 01254 vecstrTmps[ i ] = szTemp; 01255 free( szTemp ); 01256 vecpNo[ i ]->Open( Genome.GetGeneNames( ), false, vecstrTmps[ i ].c_str( ) ); 01257 } 01258 for( i = 0; i < vecpGenes.size( ); ++i ) { 01259 vecpGenes[ i ] = new CGenes( Genome ); 01260 if( sArgs.inputs_num ) { 01261 ifstream ifsm; 01262 } 01263 else 01264 vecpGenes[ i ]->Open( Genome.GetGeneNames( ), false ); 01265 } 01266 01267 { 01268 map<string, size_t> mapstriBNs; 01269 map<string, size_t>::const_iterator iterBN; 01270 string strGene; 01271 01272 for( i = 0; i < vecpBNs.size( ); ++i ) 01273 mapstriBNs[ vecpBNs[ i ]->GetID( ) ] = i; 01274 veciBNs.resize( vecpYes[ 0 ]->GetGenes( ) ); 01275 for( i = 0; i < veciBNs.size( ); ++i ) { 01276 strGene = CMeta::Filename( vecpYes[ 0 ]->GetGene( i ) ); 01277 veciBNs[ i ] = ( ( iterBN = mapstriBNs.find( strGene ) ) == mapstriBNs.end( ) ) ? -1 : 01278 iterBN->second; 01279 } 01280 } 01281 01282 for( i = 0; i < vecpYes.size( ); ++i ) { 01283 float* adBuffer; 01284 01285 adBuffer = new float[ vecpYes[ i ]->GetGenes( ) ]; 01286 for( j = 0; j < vecpNo[ i ]->GetGenes( ); ++j ) 01287 adBuffer[ j ] = log( BNDefault.GetCPT( 0 ).Get( 0, 0 ) ); 01288 for( j = 0; j < vecpNo[ i ]->GetGenes( ); ++j ) { 01289 if( !( j % 1000 ) ) 01290 cerr << "IN: global, " << j << endl; 01291 vecpNo[ i ]->Set( j, adBuffer ); 01292 } 01293 for( j = 0; j < vecpYes[ i ]->GetGenes( ); ++j ) 01294 adBuffer[ j ] = log( BNDefault.GetCPT( 0 ).Get( 1, 0 ) ); 01295 for( j = 0; j < vecpYes[ i ]->GetGenes( ); ++j ) { 01296 if( !( j % 1000 ) ) 01297 cerr << "IY: global, " << j << endl; 01298 vecpYes[ i ]->Set( j, adBuffer ); 01299 } 01300 delete[] adBuffer; 01301 } 01302 01303 veciGenes.resize( vecpYes[ 0 ]->GetGenes( ) ); 01304 vecsData.resize( ( sArgs.threads_arg == -1 ) ? 1 : sArgs.threads_arg ); 01305 vecpthdThreads.resize( vecsData.size( ) ); 01306 for( fFirst = true,iterDataset = mapstriDatasets.begin( ); iterDataset != mapstriDatasets.end( ); 01307 fFirst = false,++iterDataset ) { 01308 CDataPair Dat; 01309 string strFile; 01310 vector<size_t> veciCur; 01311 size_t iZero; 01312 01313 if( !Dat.Open( ( strFile = ( (string)sArgs.directory_arg + '/' + iterDataset->first + 01314 c_acDab ) ).c_str( ), false, !!sArgs.memmap_flag ) ) { 01315 cerr << "Couldn't open: " << strFile << endl; 01316 return 1; 01317 } 01318 cerr << "Processing: " << strFile << endl; 01319 for( i = 0; i < veciGenes.size( ); ++i ) 01320 veciGenes[ i ] = Dat.GetGene( vecpYes[ 0 ]->GetGene( i ) ); 01321 iZero = ( ( iterZero = mapstriZeros.find( iterDataset->first ) ) == mapstriZeros.end( ) ) ? 01322 BNDefault.GetDefault( iterDataset->second + 1 ) : iterZero->second; 01323 if( iZero == 0xFF ) 01324 iZero = -1; 01325 iThread = ( vecpYes[ 0 ]->GetGenes( ) + 1 ) / vecsData.size( ); 01326 for( i = j = 0; i < vecsData.size( ); ++i,j += iThread ) { 01327 vecsData[ i ].m_iBegin = j; 01328 vecsData[ i ].m_iEnd = min( vecpYes[ 0 ]->GetGenes( ), j + iThread ); 01329 vecsData[ i ].m_pBN = &BNDefault; 01330 vecsData[ i ].m_pvecpBNs = &vecpBNs; 01331 vecsData[ i ].m_pDat = &Dat; 01332 vecsData[ i ].m_pYes = vecpYes[ 0 ]; 01333 vecsData[ i ].m_pNo = vecpNo[ 0 ]; 01334 vecsData[ i ].m_iZero = iZero; 01335 vecsData[ i ].m_pveciGenes = &veciGenes; 01336 vecsData[ i ].m_strName = iterDataset->first; 01337 vecsData[ i ].m_pveciBNs = &veciBNs; 01338 vecsData[ i ].m_iNode = iterDataset->second + 1; 01339 if( fFirst ) 01340 pthread_spin_init( &vecsData[ i ].m_sLock, 0 ); 01341 if( pthread_create( &vecpthdThreads[ i ], NULL, evaluate2, &vecsData[ i ] ) ) { 01342 cerr << "Couldn't create evaluation thread: " << i << endl; 01343 return 1; 01344 } 01345 } 01346 for( i = 0; i < vecpthdThreads.size( ); ++i ) 01347 pthread_join( vecpthdThreads[ i ], NULL ); 01348 } 01349 01350 { 01351 CBayesNetMinimal* pBN; 01352 float* adYes; 01353 float* adNo; 01354 float dPrior; 01355 01356 if( sArgs.inputs_num ) { 01357 for( j = 0; j < vecpBNs.size( ); ++j ) 01358 if( vecpBNs[ j ]->GetID( ) == CMeta::Deextension( CMeta::Basename( sArgs.inputs[ i ] ) ) ) 01359 break; 01360 if( j >= vecpBNs.size( ) ) { 01361 cerr << "Could not locate Bayes net for: " << sArgs.inputs[ i ] << endl; 01362 return 1; 01363 } 01364 pBN = vecpBNs[ j ]; 01365 } 01366 else 01367 pBN = &BNDefault; 01368 01369 dPrior = pBN->GetCPT( 0 ).Get( 1, 0 ); 01370 adYes = new float[ vecpYes[ 0 ]->GetGenes( ) ]; 01371 adNo = new float[ vecpYes[ 0 ]->GetGenes( ) ]; 01372 for( i = 0; i < vecpYes[ 0 ]->GetGenes( ); ++i ) { 01373 if( !( i % 1000 ) ) 01374 cerr << "F: global, " << i << endl; 01375 memcpy( adYes, vecpYes[ 0 ]->Get( i ), ( vecpYes[ 0 ]->GetGenes( ) - i - 1 ) * sizeof(*adYes) ); 01376 memcpy( adNo, vecpNo[ 0 ]->Get( i ), ( vecpNo[ 0 ]->GetGenes( ) - i - 1 ) * sizeof(*adNo) ); 01377 for( j = 0; j < ( vecpYes[ 0 ]->GetGenes( ) - i - 1 ); ++j ) 01378 adYes[ j ] = CMeta::IsNaN( adYes[ j ] ) ? dPrior : 01379 (float)( 1 / ( 1 + exp( (double)adNo[ j ] - (double)adYes[ j ] ) ) ); 01380 vecpYes[ 0 ]->Set( i, adYes ); 01381 } 01382 } 01383 01384 for( i = 0; i < vecsData.size( ); ++i ) 01385 pthread_spin_destroy( &vecsData[ i ].m_sLock ); 01386 for( i = 0; i < vecstrTmps.size( ); ++i ) 01387 _unlink( vecstrTmps[ i ].c_str( ) ); 01388 for( i = 0; i < vecpBNs.size( ); ++i ) 01389 delete vecpBNs[ i ]; 01390 for( i = 0; i < vecpGenes.size( ); ++i ) 01391 delete vecpGenes[ i ]; 01392 for( i = 0; i < vecpYes.size( ); ++i ) { 01393 delete vecpYes[ i ]; 01394 delete vecpNo[ i ]; 01395 } 01396 01397 return 0; 01398 } 01399 01400 void* evaluate2( void* pData ) { 01401 SEvaluate2* psData; 01402 size_t i, j, iBNOne, iBNTwo; 01403 vector<size_t> veciCur; 01404 float dYes, dNo; 01405 01406 psData = (SEvaluate2*)pData; 01407 for( i = psData->m_iBegin; i < psData->m_iEnd; ++i ) { 01408 size_t iOne, iTwo, iBin, k; 01409 01410 if( !( i % 1000 ) ) 01411 cerr << "C: " << psData->m_strName << ", " << i << endl; 01412 if( ( ( iOne = (*psData->m_pveciGenes)[ i ] ) == -1 ) && ( psData->m_iZero == -1 ) ) 01413 continue; 01414 veciCur.clear( ); 01415 if( ( iBNOne = (*psData->m_pveciBNs)[ i ] ) != -1 ) 01416 veciCur.push_back( iBNOne ); 01417 for( j = ( i + 1 ); j < psData->m_pYes->GetGenes( ); ++j ) { 01418 if( ( ( iTwo = (*psData->m_pveciGenes)[ j ] ) == -1 ) && ( psData->m_iZero == -1 ) ) 01419 continue; 01420 01421 iBin = psData->m_pDat->Quantize( iOne, iTwo, psData->m_iZero ); 01422 if( iBin == -1 ) 01423 continue; 01424 01425 if( ( iBNTwo = (*psData->m_pveciBNs)[ j ] ) != -1 ) 01426 veciCur.push_back( iBNTwo ); 01427 for( k = 0; k < max( (size_t)1, veciCur.size( ) ); ++k ) { 01428 const CBayesNetMinimal* pBN = ( k >= veciCur.size( ) ) ? psData->m_pBN: (*psData->m_pvecpBNs)[ veciCur[ k ] ]; 01429 01430 dNo = log( pBN->GetCPT( psData->m_iNode ).Get( iBin, 0 ) ); 01431 dYes = log( pBN->GetCPT( psData->m_iNode ).Get( iBin, 1 ) ); 01432 pthread_spin_lock( &psData->m_sLock ); 01433 psData->m_pNo->Get( i, j ) += dNo; 01434 psData->m_pYes->Get( i, j ) += dYes; 01435 pthread_spin_unlock( &psData->m_sLock ); 01436 } 01437 if( iBNTwo != -1 ) 01438 veciCur.pop_back( ); 01439 } 01440 } 01441 01442 return NULL; 01443 }