Sleipnir
tools/Counter/Counter.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.flip
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 
00025 
00026 #define WT_MULTIPLIER 50
00027 #define WT_MULTIPLIERf 50.0
00028 
00029 class CRegularize;
00030 
00031 static const char   c_acDab[]   = ".dab";
00032 static const char   c_acDat[]   = ".dat";
00033 static const char   c_acQDab[]  = ".qdab";
00034 static const char   c_acQuant[] = ".quant";
00035 static const char   c_acTxt[]   = ".txt";
00036 
00037 typedef CFullMatrix<size_t> CCountMatrix;
00038 
00039 struct SLearn {
00040     CCountMatrix*           m_pMatCounts;
00041     const CGenes*           m_pGenes;
00042     const CGenes*           m_pUbikGenes;
00043     const CDataPair*        m_pAnswers;
00044     const CDatFilter*       m_pDat;
00045     size_t                  m_iZero;
00046     CRegularize*            m_pRegularize;
00047     size_t                  m_iDat;
00048     bool                    m_bInPos;
00049     bool                    m_bInNeg;
00050     bool                    m_bBridgePos;
00051     bool                    m_bBridgeNeg;
00052     bool                    m_bOutPos;
00053     bool                    m_bOutNeg;
00054         bool                    m_isDatWeighted;
00055         bool                    m_bFlipNeg;
00056         const CDat*         m_pwDat;
00057 };
00058 
00059 struct SEvaluate {
00060     const CBayesNetMinimal* m_pBN;
00061     const CDataPair*        m_pDat;
00062     const CGenes*       m_pGenes;
00063     CDat*           m_pYes;
00064     CDat*           m_pNo;
00065     size_t          m_iZero;
00066     size_t          m_iNode;
00067     const vector<size_t>*   m_pveciGenes;
00068     bool            m_fFirst;
00069     string          m_strName;
00070     bool            m_bLogratio;
00071 };
00072 
00073 struct SEvaluate2 {
00074     size_t              m_iBegin;
00075     size_t              m_iEnd;
00076     const CBayesNetMinimal*     m_pBN;
00077     const vector<CBayesNetMinimal*>*    m_pvecpBNs;
00078     const CDataPair*            m_pDat;
00079     CDat*               m_pYes;
00080     CDat*               m_pNo;
00081     size_t              m_iZero;
00082     const vector<size_t>*       m_pveciGenes;
00083     string              m_strName;
00084     const vector<size_t>*       m_pveciBNs;
00085     size_t              m_iNode;
00086     pthread_spinlock_t          m_sLock;
00087 };
00088 
00089 class CRegularize {
00090 public:
00091     ~CRegularize( ) {
00092 
00093         Clear( );
00094     }
00095 
00096     bool Open( const char* szFile, CGenome& Genome, const CDat& Answers ) {
00097         vector<CGenes*>                     vecpGenes;
00098         size_t                              i, j, k;
00099         map<string, size_t>                 mapstriGenes;
00100         map<string, size_t>::const_iterator iterGene;
00101 
00102         Clear( );
00103         if( !Genome.Open( szFile, vecpGenes ) )
00104             return false;
00105 
00106         m_vecsetiGenes.resize( vecpGenes.size( ) );
00107         m_vecveciGenes.resize( Answers.GetGenes( ) );
00108         for( i = 0; i < m_vecsetiGenes.size( ); ++i ) {
00109             const CGenes&   Genes   = *vecpGenes[i];
00110 
00111             for( j = 0; j < Genes.GetGenes( ); ++j ) {
00112                 const string&   strGene = Genes.GetGene( j ).GetName( );
00113 
00114                 if( ( iterGene = mapstriGenes.find( strGene ) ) == mapstriGenes.end( ) )
00115                     mapstriGenes[strGene] = k = Answers.GetGene( strGene );
00116                 else
00117                     k = iterGene->second;
00118                 if( k != -1 ) {
00119                     m_vecsetiGenes[i].insert( k );
00120                     m_vecveciGenes[k].push_back( i );
00121                 }
00122             }
00123         }
00124         for( i = 0; i < vecpGenes.size( ); ++i )
00125             delete vecpGenes[i];
00126 
00127         return true;
00128     }
00129 
00130     void SaveCounts( ostream& ostm, const vector<string>& vecstrNames ) {
00131         size_t                      iDataset, iValue, iGroup;
00132         const CFullMatrix<size_t>*  pMat;
00133 
00134         for( iDataset = 0; iDataset < m_vecpMatCounts.size( ); ++iDataset ) {
00135             if( !( pMat = m_vecpMatCounts[iDataset] ) )
00136                 continue;
00137             ostm << vecstrNames[iDataset] << endl;
00138             for( iGroup = 0; iGroup < m_vecsetiGenes.size( ); ++iGroup ) {
00139                 for( iValue = 0; iValue < pMat->GetRows( ); ++iValue )
00140                     ostm << ( iValue ? "\t" : "" ) << pMat->Get( iValue, iGroup );
00141                 ostm << endl;
00142             }
00143         }
00144     }
00145 
00146     void Save( ostream& ostm, const vector<string>& vecstrNames ) {
00147         size_t                      iDatasetOne, iDatasetTwo, iValueOne, iValueTwo, iGroup, iCountOne, iCountTwo;
00148         const CFullMatrix<size_t>*  pMatOne;
00149         const CFullMatrix<size_t>*  pMatTwo;
00150         vector<double>              vecdOne, vecdTwo, vecdGroups;
00151         vector<size_t>              veciCounts;
00152         CFullMatrix<double>         MatJoint;
00153         double                      dOne, dTwo, dJoint, dMI;
00154 
00155         veciCounts.resize( m_vecsetiGenes.size( ) );
00156         fill( veciCounts.begin( ), veciCounts.end( ), 0 );
00157         for( iCountOne = iDatasetOne = 0; iDatasetOne < m_vecpMatCounts.size( ); ++iDatasetOne ) {
00158             if( !( pMatOne = m_vecpMatCounts[iDatasetOne] ) )
00159                 continue;
00160             for( iValueOne = 0; iValueOne < pMatOne->GetRows( ); ++iValueOne )
00161                 for( iGroup = 0; iGroup < pMatOne->GetColumns( ); ++iGroup ) {
00162                     iCountTwo = pMatOne->Get( iValueOne, iGroup );
00163                     veciCounts[iGroup] += iCountTwo;
00164                     iCountOne += iCountTwo;
00165                 }
00166         }
00167         vecdGroups.resize( veciCounts.size( ) );
00168         for( iGroup = 0; iGroup < vecdGroups.size( ); ++iGroup )
00169             vecdGroups[iGroup] = (double)veciCounts[iGroup] / iCountOne;
00170 
00171         for( iDatasetOne = 0; iDatasetOne < m_vecpMatCounts.size( ); ++iDatasetOne ) {
00172             if( !( pMatOne = m_vecpMatCounts[iDatasetOne] ) )
00173                 continue;
00174             /*
00175             for( iGroup = 0; iGroup < m_vecsetiGenes.size( ); ++iGroup ) {
00176             for( iCountOne = iValueOne = 0; iValueOne < pMatOne->GetRows( ); ++iValueOne )
00177             iCountOne += pMatOne->Get( iValueOne, iGroup );
00178             for( iValueOne = 0; iValueOne < pMatOne->GetRows( ); ++iValueOne )
00179             cerr << ( iValueOne ? "\t" : "" ) << ( (double)pMatOne->Get( iValueOne, iGroup ) / iCountOne );
00180             cerr << endl; }
00181             //*/
00182             vecdOne.resize( pMatOne->GetRows( ) );
00183             for( iDatasetTwo = iDatasetOne; iDatasetTwo < m_vecpMatCounts.size( ); ++iDatasetTwo ) {
00184                 if( !( pMatTwo = m_vecpMatCounts[iDatasetTwo] ) )
00185                     continue;
00186                 vecdTwo.resize( pMatTwo->GetRows( ) );
00187                 MatJoint.Initialize( vecdOne.size( ), vecdTwo.size( ) );
00188 
00189                 fill( vecdOne.begin( ), vecdOne.end( ), 0.0f );
00190                 fill( vecdTwo.begin( ), vecdTwo.end( ), 0.0f );
00191                 MatJoint.Clear( );
00192                 for( iGroup = 0; iGroup < m_vecsetiGenes.size( ); ++iGroup ) {
00193                     for( iCountOne = iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne )
00194                         iCountOne += pMatOne->Get( iValueOne, iGroup );
00195                     for( iCountTwo = iValueTwo = 0; iValueTwo < vecdTwo.size( ); ++iValueTwo )
00196                         iCountTwo += pMatTwo->Get( iValueTwo, iGroup );
00197                     for( iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne ) {
00198                         dOne = (double)pMatOne->Get( iValueOne, iGroup ) / ( iCountOne ? iCountOne : 1 );
00199                         vecdOne[iValueOne] += dOne * vecdGroups[iGroup];
00200                         for( iValueTwo = 0; iValueTwo < vecdTwo.size( ); ++iValueTwo ) {
00201                             dTwo = (double)pMatTwo->Get( iValueTwo, iGroup ) / ( iCountTwo ? iCountTwo : 1 );
00202                             if( !iValueOne )
00203                                 vecdTwo[iValueTwo] += dTwo * vecdGroups[iGroup];
00204                             MatJoint.Get( iValueOne, iValueTwo ) += dOne * dTwo * vecdGroups[iGroup];
00205                         }
00206                     }
00207                 }
00208                 /*
00209                                                 if( iDatasetOne == iDatasetTwo ) {
00210                                                         MatJoint.Clear( );
00211                                                         for( iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne )
00212                                                                 MatJoint.Set( iValueOne, iValueOne, vecdOne[iValueOne] ); }
00213                 //*/
00214                 /*
00215                 cerr << "One: " << vecstrNames[iDatasetOne] << endl;
00216                 for( iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne )
00217                 cerr << ( iValueOne ? "\t" : "" ) << vecdOne[iValueOne];
00218                 cerr << endl;
00219                 cerr << "Two: " << vecstrNames[iDatasetTwo] << endl;
00220                 for( iValueTwo = 0; iValueTwo < vecdTwo.size( ); ++iValueTwo )
00221                 cerr << ( iValueTwo ? "\t" : "" ) << vecdTwo[iValueTwo];
00222                 cerr << endl;
00223                 cerr << "Joint:" << endl;
00224                 for( iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne ) {
00225                 for( iValueTwo = 0; iValueTwo < vecdTwo.size( ); ++iValueTwo )
00226                 cerr << ( iValueTwo ? "\t" : "" ) << MatJoint.Get( iValueOne, iValueTwo );
00227                 cerr << endl; }
00228                 //*/
00229 
00230                 for( dMI = 0,iValueOne = 0; iValueOne < vecdOne.size( ); ++iValueOne ) {
00231                     dOne = vecdOne[iValueOne];
00232                     for( iValueTwo = 0; iValueTwo < vecdTwo.size( ); ++iValueTwo )
00233                         if( dJoint = MatJoint.Get( iValueOne, iValueTwo ) )
00234                             dMI += dJoint * ( dJoint ? log( dJoint / dOne / vecdTwo[iValueTwo] ) : 0 );
00235                 }
00236 //cerr << "MI: " << dMI << endl;
00237                 dMI = ( dMI < 0 ) ? 0 : ( dMI / log( 2.0f ) );
00238 
00239                 ostm << vecstrNames[iDatasetOne] << '\t' << vecstrNames[iDatasetTwo] << '\t' << dMI << endl;
00240             }
00241         }
00242     }
00243 
00244     void Add( size_t iDataset, const CDatFilter& Dat, size_t iOne, size_t iTwo, size_t iValue ) {
00245         size_t                  i;
00246         CFullMatrix<size_t>*    pMat;
00247 
00248         if( m_vecsetiGenes.empty( ) )
00249             return;
00250 
00251         while( m_vecpMatCounts.size( ) <= iDataset )
00252             m_vecpMatCounts.push_back( NULL );
00253         if( !( pMat = m_vecpMatCounts[iDataset] ) ) {
00254             m_vecpMatCounts[iDataset] = pMat = new CFullMatrix<size_t>( );
00255             pMat->Initialize( Dat.GetValues( ), m_vecsetiGenes.size( ) );
00256             pMat->Clear( );
00257         }
00258         for( i = 0; i < m_vecveciGenes[iOne].size( ); ++i )
00259             pMat->Get( iValue, m_vecveciGenes[iOne][i] )++;
00260         for( i = 0; i < m_vecveciGenes[iTwo].size( ); ++i )
00261             pMat->Get( iValue, m_vecveciGenes[iTwo][i] )++;
00262     }
00263 
00264 private:
00265     vector<set<size_t> >            m_vecsetiGenes;
00266     vector<vector<size_t> >         m_vecveciGenes;
00267     vector<CFullMatrix<size_t>*>    m_vecpMatCounts;
00268 
00269     void Clear( ) {
00270         size_t                  i;
00271         CFullMatrix<size_t>*    pMat;
00272 
00273         for( i = 0; i < m_vecpMatCounts.size( ); ++i )
00274             if( pMat = m_vecpMatCounts[i] )
00275                 delete pMat;
00276         m_vecpMatCounts.clear( );
00277         m_vecsetiGenes.clear( );
00278         m_vecveciGenes.clear( );
00279     }
00280 };
00281 
00282 void* learn( void* );
00283 void* evaluate( void* );
00284 void* evaluate2( void* );
00285 void* finalize( void* );
00286 int main_count( const gengetopt_args_info&, const map<string, size_t>&, const CGenes&, const CGenes&,
00287                 const CGenes&, const CGenes&, const CGenes&);
00288 int main_xdsls( const gengetopt_args_info&, const map<string, size_t>&, const map<string, size_t>&,
00289                 const vector<string>& );
00290 int main_inference( const gengetopt_args_info&, const map<string, size_t>&, const map<string, size_t>& );
00291 int main_inference2( const gengetopt_args_info&, const map<string, size_t>&, const map<string, size_t>& );
00292 
00293 int main( int iArgs, char** aszArgs ) {
00294     gengetopt_args_info sArgs;
00295     map<string, size_t> mapstriZeros, mapstriDatasets;
00296     vector<string>      vecstrContexts;
00297     int                 iRet;
00298     size_t              i;
00299     CGenome             Genome;
00300     CGenes              GenesIn( Genome ), GenesEx( Genome ), GenesEd( Genome ), GenesTm( Genome ), GenesUbik( Genome);
00301 
00302 #ifdef WIN32
00303     pthread_win32_process_attach_np( );
00304 #endif // WIN32
00305     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00306         cmdline_parser_print_help( );
00307         return 1;
00308     }
00309     CMeta Meta( sArgs.verbosity_arg );
00310         if(sArgs.reggroups_given && sArgs.weights_flag){
00311                 cerr << "Regularization is not supported for weighted contexts." << endl;
00312                 return 1;}
00313     if( sArgs.pseudocounts_arg < 0 )
00314         sArgs.pseudocounts_arg = CMeta::GetNaN( );
00315     if( sArgs.zeros_arg ) {
00316         ifstream        ifsm;
00317         vector<string>  vecstrLine;
00318         char            acLine[ 1024 ];
00319 
00320         ifsm.open( sArgs.zeros_arg );
00321         if( !ifsm.is_open( ) ) {
00322             cerr << "Couldn't open: " << sArgs.zeros_arg << endl;
00323             return 1;
00324         }
00325         while( !ifsm.eof( ) ) {
00326             ifsm.getline( acLine, ARRAYSIZE(acLine) - 1 );
00327             acLine[ ARRAYSIZE(acLine) - 1 ] = 0;
00328             vecstrLine.clear( );
00329             CMeta::Tokenize( acLine, vecstrLine );
00330             if( vecstrLine.empty( ) )
00331                 continue;
00332             mapstriZeros[ vecstrLine[ 0 ] ] = atoi( vecstrLine[ 1 ].c_str( ) );
00333         }
00334     }
00335 
00336     if( sArgs.datasets_arg ) {
00337         ifstream        ifsm;
00338         char            acLine[ 1024 ];
00339         vector<string>  vecstrLine, vecstrDatasets;
00340 
00341         ifsm.open( sArgs.datasets_arg );
00342         if( !ifsm.is_open( ) ) {
00343             cerr << "Could not open: " << sArgs.datasets_arg << endl;
00344             return 1;
00345         }
00346         while( !ifsm.eof( ) ) {
00347             ifsm.getline( acLine, ARRAYSIZE(acLine) - 1 );
00348             acLine[ ARRAYSIZE(acLine) - 1 ] = 0;
00349             if( !acLine[ 0 ] )
00350                 continue;
00351             vecstrLine.clear( );
00352             CMeta::Tokenize( acLine, vecstrLine );
00353             if( vecstrLine.size( ) < 2 ) {
00354                 cerr << "Illegal datasets line: " << acLine << endl;
00355                 return 1;
00356             }
00357             if( ( iRet = atol( vecstrLine[ 0 ].c_str( ) ) ) <= 0 ) {
00358                 cerr << "Dataset indices must be greater than zero: " << acLine << endl;
00359                 return 1;
00360             }
00361             mapstriDatasets[ vecstrLine[ 1 ] ] = --iRet;
00362             if( vecstrDatasets.size( ) <= (size_t)iRet )
00363                 vecstrDatasets.resize( iRet + 1 );
00364             vecstrDatasets[ iRet ] = vecstrLine[ 1 ];
00365         }
00366         ifsm.close( );
00367         for( i = 0; i < vecstrDatasets.size( ); ++i )
00368             if( vecstrDatasets[ i ].empty( ) ) {
00369                 cerr << "Missing dataset: " << ( i + 1 ) << endl;
00370                 return 1;
00371             }
00372     }
00373 
00374     if( sArgs.contexts_arg ) {
00375         ifstream        ifsm;
00376         char            acLine[ 2048 ];
00377         vector<string>  vecstrLine;
00378 
00379         ifsm.open( sArgs.contexts_arg );
00380         if( !ifsm.is_open( ) ) {
00381             cerr << "Could not open: " << sArgs.contexts_arg << endl;
00382             return 1;
00383         }
00384         while( !ifsm.eof( ) ) {
00385             ifsm.getline( acLine, ARRAYSIZE(acLine) - 1 );
00386             acLine[ ARRAYSIZE(acLine) - 1 ] = 0;
00387             if( !acLine[ 0 ] )
00388                 continue;
00389             vecstrLine.clear( );
00390             CMeta::Tokenize( acLine, vecstrLine );
00391             if( vecstrLine.size( ) < 3 ) {
00392                 cerr << "Illegal contexts line: " << acLine << endl;
00393                 return 1;
00394             }
00395             if( ( atoi( vecstrLine[ 0 ].c_str( ) ) ) != ( vecstrContexts.size( ) + 1 ) ) {
00396                 cerr << "Inconsistent context ID: " << acLine << endl;
00397                 return 1;
00398             }
00399             vecstrContexts.push_back( vecstrLine[ 2 ] );
00400         }
00401         ifsm.close( );
00402     }
00403 
00404     if( sArgs.genes_arg ) {
00405         if( !GenesIn.Open( sArgs.genes_arg ) ) {
00406             cerr << "Could not open: " << sArgs.genes_arg << endl;
00407             return 1;
00408         }
00409     }
00410     if( sArgs.genex_arg ) {
00411         if( !GenesEx.Open( sArgs.genex_arg ) ) {
00412             cerr << "Could not open: " << sArgs.genex_arg << endl;
00413             return 1;
00414         }
00415     }
00416     if( sArgs.genet_arg ) {
00417         if( !GenesTm.Open( sArgs.genet_arg ) ) {
00418             cerr << "Could not open: " << sArgs.genet_arg << endl;
00419             return 1;
00420         }
00421     }
00422     if( sArgs.genee_arg ) {
00423         if( !GenesEd.Open( sArgs.genee_arg ) ) {
00424             cerr << "Could not open: " << sArgs.genee_arg << endl;
00425             return 1;
00426         }
00427     }
00428     if( sArgs.ubiqg_arg ) {
00429         if( !GenesUbik.Open( sArgs.ubiqg_arg ) ) {
00430             cerr << "Could not open: " << sArgs.ubiqg_arg << endl;
00431             return 1;
00432         }
00433     }
00434 
00435 
00436 
00437     if( sArgs.answers_arg )
00438         iRet = main_count( sArgs, mapstriZeros, GenesIn, GenesEx, GenesTm, GenesEd, GenesUbik);
00439     else if( sArgs.counts_arg )
00440         iRet = main_xdsls( sArgs, mapstriZeros, mapstriDatasets, vecstrContexts );
00441     else if( sArgs.networks_arg )
00442         iRet = sArgs.genewise_flag ? main_inference2( sArgs, mapstriZeros, mapstriDatasets ) :
00443                main_inference( sArgs, mapstriZeros, mapstriDatasets );
00444 
00445 #ifdef WIN32
00446     pthread_win32_process_detach_np( );
00447 #endif // WIN32
00448     return iRet;
00449 }
00450 
00451 int main_count( const gengetopt_args_info& sArgs, const map<string, size_t>& mapstriZeros,
00452                 const CGenes& GenesIn, const CGenes& GenesEx, const CGenes& GenesTm, const CGenes& GenesEd, const CGenes& GenesUbik ) {
00453     size_t              i, j, k, m, iTerm, iThread;
00454     vector<vector<CCountMatrix*>* > vecpvecpMats;
00455     vector<CCountMatrix*>       vecpMatRoots;
00456     vector<CGenes*>         vecpGenes;
00457     CDataPair               Answers, Dat;
00458         CDat                    wDat;
00459     CDatFilter              Filter, FilterIn, FilterEx, FilterTm, FilterEd;
00460     CDatFilter*             pFilter;
00461     string                              strFile;
00462     vector<pthread_t>                   vecpthdThreads;
00463     vector<SLearn>          vecsData;
00464     map<string, size_t>::const_iterator iterZero;
00465     CGenome                             Genome;
00466         vector<CGenome>         Genomes;
00467     vector<string>          vecstrNames;
00468     CRegularize                                 Regularize;
00469         bool                    isDatWeighted=false;
00470     if( !Answers.Open( sArgs.answers_arg, false, !!sArgs.memmap_flag ) ) {
00471         cerr << "Could not open: " << sArgs.answers_arg << endl;
00472         return 1;
00473     }
00474     if( !sArgs.inputs_num && sArgs.reggroups_arg && !Regularize.Open( sArgs.reggroups_arg, Genome, Answers ) ) {
00475         cerr << "Could not open: " << sArgs.reggroups_arg << endl;
00476         return 1;
00477     }
00478 
00479     vecpGenes.resize( sArgs.inputs_num );
00480         Genomes.resize(vecpGenes.size( ));
00481     for( i = 0; i < vecpGenes.size( ); ++i ) {
00482         ifstream    ifsm;
00483 
00484         vecpGenes[ i ]  = new CGenes( Genome );
00485         ifsm.open( sArgs.inputs[ i ] );
00486 
00487                 if(sArgs.weights_flag){
00488                         delete vecpGenes[ i ];
00489                         vecpGenes[ i ] = new CGenes(Genomes[ i ]);
00490                         if( !vecpGenes[ i ]->OpenWeighted( ifsm ) ) {
00491                                 if(!wDat.Open(sArgs.inputs[i], !!sArgs.memmap_flag )){
00492                                         cerr << "Couldn't open: " << sArgs.inputs[ i ] << endl;
00493                                         return 1;   }
00494                                 else{
00495                                         isDatWeighted = true;
00496                                         vecpGenes[ i ]->Open(wDat.GetGeneNames());}
00497                         }
00498                 }
00499                 else{
00500                         if( !vecpGenes[ i ]->Open( ifsm ) ) {
00501                                 cerr << "Couldn't open: " << sArgs.inputs[ i ] << endl;
00502                                 return 1;   }
00503                 }
00504         }
00505 
00506     if( !vecpGenes.size( ) ) {
00507         vecpGenes.insert( vecpGenes.begin( ), new CGenes( Genome ) );
00508         vecpGenes[ 0 ]->Open( Answers.GetGeneNames( ) );
00509     }
00510     vecpMatRoots.resize( vecpGenes.size( ) );
00511     vecpthdThreads.resize( vecpMatRoots.size( ) );
00512     vecsData.resize( vecpthdThreads.size( ) );
00513     for( iTerm = 0; iTerm < vecpMatRoots.size( ); iTerm += iThread ) {
00514         cerr << "Learning root " << iTerm << '/' << vecpMatRoots.size( ) << endl;
00515         for( iThread = 0; ( ( sArgs.threads_arg == -1 ) || ( iThread < (size_t)sArgs.threads_arg ) ) &&
00516                 ( ( iTerm + iThread ) < vecpMatRoots.size( ) ); ++iThread ) {
00517             i = iTerm + iThread;
00518             vecsData[ i ].m_pMatCounts = vecpMatRoots[ i ] = new CCountMatrix( );
00519             vecsData[ i ].m_pDat = NULL;
00520             vecsData[ i ].m_iDat = -1;
00521             vecsData[ i ].m_pGenes = vecpGenes[ i ];
00522             vecsData[ i ].m_pUbikGenes = &GenesUbik;
00523             vecsData[ i ].m_pAnswers = &Answers;
00524             vecsData[ i ].m_iZero = -1;
00525             vecsData[ i ].m_pRegularize = &Regularize;
00526             vecsData[ i ].m_bInPos = sArgs.ctxtpos_flag;
00527             vecsData[ i ].m_bInNeg = sArgs.ctxtneg_flag;
00528             vecsData[ i ].m_bBridgePos = sArgs.bridgepos_flag;
00529             vecsData[ i ].m_bBridgeNeg = sArgs.bridgeneg_flag;
00530             vecsData[ i ].m_bOutPos = sArgs.outpos_flag;
00531             vecsData[ i ].m_bOutNeg = sArgs.outneg_flag;
00532                         vecsData[ i ].m_isDatWeighted = false;
00533                         vecsData[ i ].m_bFlipNeg =false;
00534                         vecsData[ i ].m_pwDat = NULL;
00535             if( pthread_create( &vecpthdThreads[ i ], NULL, learn, &vecsData[ i ] ) ) {
00536                 cerr << "Couldn't create root thread: " << sArgs.inputs[ i ] << endl;
00537                 return 1;
00538             }
00539         }
00540         for( i = 0; i < iThread; ++i )
00541             pthread_join( vecpthdThreads[ iTerm + i ], NULL );
00542     }
00543     FOR_EACH_DIRECTORY_FILE((string)sArgs.directory_arg, strFile)
00544     string                  strName;
00545     vector<CCountMatrix*>*  pvecpMatCounts;
00546 
00547     if( CMeta::IsExtension( strFile, c_acDab ) ) {
00548         i = strFile.rfind( '.' );
00549         strName = (string) sArgs.directory_arg + "/" + strFile.substr( 0, i ) + c_acDab;
00550     } else if( CMeta::IsExtension( strFile, c_acQDab ) ) {
00551         i = strFile.rfind( '.' );
00552         strName = (string) sArgs.directory_arg + "/" + strFile.substr( 0, i ) + c_acQDab;
00553     } else if( CMeta::IsExtension( strFile, c_acDat ) ) {
00554         i = strFile.rfind( '.' );
00555         strName = (string) sArgs.directory_arg + "/" + strFile.substr( 0, i ) + c_acDat;
00556     } else {
00557         continue;
00558     }
00559 
00560     if( !(Dat.Open( strName.c_str( ), false, !!sArgs.memmap_flag )) ) {
00561         cerr << "Couldn't open: " << strName << endl;
00562         return 1;
00563     }
00564     cerr << "Processing: " << strName << endl;
00565     strName = CMeta::Filename( CMeta::Deextension( CMeta::Basename( strName.c_str( ) ) ) );
00566     vecstrNames.push_back( strName );
00567 
00568     Filter.Attach( Dat );
00569     pFilter = &Filter;
00570     if( GenesIn.GetGenes( ) ) {
00571         FilterIn.Attach( *pFilter, GenesIn, CDat::EFilterInclude, &Answers );
00572         pFilter = &FilterIn;
00573     }
00574     if( GenesEx.GetGenes( ) ) {
00575         FilterEx.Attach( *pFilter, GenesEx, CDat::EFilterExclude, &Answers );
00576         pFilter = &FilterEx;
00577     }
00578     if( GenesTm.GetGenes( ) ) {
00579         FilterTm.Attach( *pFilter, GenesTm, CDat::EFilterTerm, &Answers );
00580         pFilter = &FilterTm;
00581     }
00582     if( GenesEd.GetGenes( ) ) {
00583         FilterEd.Attach( *pFilter, GenesEd, CDat::EFilterEdge, &Answers );
00584         pFilter = &FilterEd;
00585     }
00586 
00587     pvecpMatCounts = new vector<CCountMatrix*>( );
00588     pvecpMatCounts->resize( vecpGenes.size( ) );
00589     for( iTerm = 0; iTerm < vecpMatRoots.size( ); iTerm += iThread ) {
00590         cerr << "Learning term " << iTerm << '/' << vecpMatRoots.size( ) << endl;
00591         for( iThread = 0; ( ( sArgs.threads_arg == -1 ) || ( iThread < (size_t)sArgs.threads_arg ) ) &&
00592                 ( ( iTerm + iThread ) < vecpMatRoots.size( ) ); ++iThread ) {
00593             i = iTerm + iThread;
00594             vecsData[ i ].m_pMatCounts = (*pvecpMatCounts)[ i ] = new CCountMatrix( );
00595             vecsData[ i ].m_pDat = pFilter;
00596             vecsData[ i ].m_iDat = vecstrNames.size( ) - 1;
00597             vecsData[ i ].m_pGenes = vecpGenes[ i ];
00598             vecsData[ i ].m_pUbikGenes = &GenesUbik;
00599             vecsData[ i ].m_pAnswers = &Answers;
00600             vecsData[ i ].m_iZero = ( ( iterZero = mapstriZeros.find( strName ) ) == mapstriZeros.end( ) ) ? -1 : iterZero->second;
00601             vecsData[ i ].m_pRegularize = &Regularize;
00602             vecsData[ i ].m_bInPos = sArgs.ctxtpos_flag;
00603             vecsData[ i ].m_bInNeg = sArgs.ctxtneg_flag;
00604             vecsData[ i ].m_bBridgePos = sArgs.bridgepos_flag;
00605             vecsData[ i ].m_bBridgeNeg = sArgs.bridgeneg_flag;
00606             vecsData[ i ].m_bOutPos = sArgs.outpos_flag;
00607             vecsData[ i ].m_bOutNeg = sArgs.outneg_flag;
00608                         vecsData[ i ].m_isDatWeighted = isDatWeighted;
00609                         vecsData[ i ].m_bFlipNeg = !!sArgs.flipneg_flag;
00610                         vecsData[ i ].m_pwDat = isDatWeighted? &wDat : NULL;
00611             if( pthread_create( &vecpthdThreads[ i ], NULL, learn, &vecsData[ i ] ) ) {
00612                 cerr << "Couldn't create root thread: " << sArgs.inputs[ i ] << endl;
00613                 return 1;
00614             }
00615         }
00616         for( i = 0; i < iThread; ++i )
00617             pthread_join( vecpthdThreads[ iTerm + i ], NULL );
00618     }
00619     vecpvecpMats.push_back( pvecpMatCounts );
00620 }
00621 #ifdef _MSC_VER
00622 FindClose( hSearch );
00623 #else // _MSC_VER
00624 closedir( pDir );
00625 #endif // _MSC_VER
00626 
00627 for( i = 0; i < vecpMatRoots.size( ); ++i ) {
00628     ofstream    ofsm;
00629 
00630     ofsm.open( ( (string)sArgs.output_arg + '/' + ( sArgs.inputs ?
00631                  CMeta::Deextension( CMeta::Basename( sArgs.inputs[ i ] ) ) : sArgs.countname_arg ) + c_acTxt ).c_str( ) );
00632     ofsm << ( sArgs.inputs ? CMeta::Deextension( CMeta::Basename( sArgs.inputs[ i ] ) ) : sArgs.countname_arg ) <<
00633          '\t' << vecpvecpMats.size( ) << endl;
00634     for( j = 0; j < vecpMatRoots[ i ]->GetRows( ); ++j )
00635         ofsm << ( j ? "\t" : "" ) << vecpMatRoots[ i ]->Get( j, 0 );
00636     ofsm << endl;
00637     for( j = 0; j < vecpvecpMats.size( ); ++j ) {
00638         ofsm << vecstrNames[ j ] << endl;
00639         for( k = 0; k < (*vecpvecpMats[ j ])[ i ]->GetColumns( ); ++k ) {
00640             for( m = 0; m < (*vecpvecpMats[ j ])[ i ]->GetRows( ); ++m )
00641                 ofsm << ( m ? "\t" : "" ) << (*vecpvecpMats[ j ])[ i ]->Get( m, k );
00642             ofsm << endl;
00643         }
00644     }
00645 }
00646 if( sArgs.reggroups_arg )
00647     Regularize.Save( cout, vecstrNames );
00648 
00649 for( i = 0; i < vecpvecpMats.size( ); ++i ) {
00650     for( j = 0; j < vecpvecpMats[ i ]->size( ); ++j )
00651         delete (*vecpvecpMats[ i ])[ j ];
00652     delete vecpvecpMats[ i ];
00653 }
00654 for( i = 0; i < vecpMatRoots.size( ); ++i ) {
00655     delete vecpMatRoots[ i ];
00656     delete vecpGenes[ i ];
00657 }
00658 
00659 return 0;
00660 }
00661 
00662 void* learn( void* pData ) {
00663     SLearn*     psData;
00664     size_t      i, j, k, iAnswer, iVal, iOne, iTwo;
00665     vector<bool>    vecfGenes, vecfUbik;
00666     vector<size_t>  veciGenes, vecfiGenes;
00667         vector<float>   vecGeneWeights;
00668     psData = (SLearn*)pData;
00669         float           w;
00670 
00671     if (psData->m_pUbikGenes->GetGenes( )) {
00672                 vecfUbik.resize( psData->m_pAnswers->GetGenes( ) );
00673                 for( i = 0; i < vecfUbik.size( ); ++i) {
00674                         vecfUbik[ i ] = psData->m_pUbikGenes->IsGene( psData->m_pAnswers->GetGene( i ) );
00675                 }
00676     }
00677     vecfGenes.resize( psData->m_pAnswers->GetGenes( ) );
00678         vecGeneWeights.resize( psData->m_pAnswers->GetGenes( ) );
00679         vecfiGenes.resize(psData->m_pAnswers->GetGenes( ));
00680     for( i = 0; i < vecfGenes.size( ); ++i ){
00681                 vecfGenes[ i ] = psData->m_pGenes->IsGene( psData->m_pAnswers->GetGene( i ) );}
00682     if(psData->m_isDatWeighted){
00683       for( i = 0; i < vecfGenes.size( ); ++i ){
00684         vecfiGenes[ i ] = psData->m_pwDat->GetGene( psData->m_pAnswers->GetGene( i ) );}
00685     }
00686     else{
00687     for( i = 0; i < vecfGenes.size( ); ++i ){
00688                 vecfiGenes[ i ] = psData->m_pGenes->GetGene( psData->m_pAnswers->GetGene( i ) );}
00689     }
00690         if(psData->m_pGenes->IsWeighted()){
00691                 for( i = 0; i < vecfGenes.size( ); ++i ){
00692                         vecGeneWeights[ i ] = psData->m_pGenes->GetGeneWeight(psData->m_pGenes->GetGene( psData->m_pAnswers->GetGene( i ) ));}}
00693     if( psData->m_pDat ) {
00694         psData->m_pMatCounts->Initialize( psData->m_pDat->GetValues( ), psData->m_pAnswers->GetValues( ) );
00695         veciGenes.resize( psData->m_pAnswers->GetGenes( ) );
00696         for( i = 0; i < veciGenes.size( ); ++i )
00697             veciGenes[ i ] = psData->m_pDat->GetGene( psData->m_pAnswers->GetGene( i ) );
00698     }
00699     else
00700         psData->m_pMatCounts->Initialize( psData->m_pAnswers->GetValues( ), 1 );
00701         psData->m_pMatCounts->Clear( );
00702     for( i = 0; i < psData->m_pAnswers->GetGenes( ); ++i ) {
00703         if( psData->m_pDat )
00704             iOne = veciGenes[ i ];
00705         for( j = ( i + 1 ); j < psData->m_pAnswers->GetGenes( ); ++j ) {
00706                         iAnswer = psData->m_pAnswers->Quantize( psData->m_pAnswers->Get( i, j ) );
00707                                 if( iAnswer == -1 ) {
00708                                         continue;
00709                                 }
00710 
00711                         if ( CMeta::SkipEdge( !!iAnswer, i, j, vecfGenes, vecfUbik, psData->m_bInPos, psData->m_bInNeg, psData->m_bBridgePos, psData->m_bBridgeNeg, psData->m_bOutPos, psData->m_bOutNeg ) ) {
00712                         continue;
00713                         }
00714 
00715                         if( psData->m_pDat ) {
00716                                         iTwo = veciGenes[ j ];
00717                                         iVal = -1;
00718                                         iVal = psData->m_pDat->Quantize( iOne, iTwo, psData->m_iZero );
00719                                         if( iVal == -1 )
00720                                                 continue;
00721                                         //When contexts are weighted, add counts = WT_MULTIPLIER * weight1 * weight 2
00722                                         if(psData->m_pGenes->IsWeighted()){
00723                                                 if(iAnswer==1 || !psData->m_bFlipNeg)
00724                                                         for( k = 0; k <(vecGeneWeights[i]*vecGeneWeights[j]*WT_MULTIPLIER-0.5); k++){
00725                                                                 psData->m_pMatCounts->Get( iVal, iAnswer )++;
00726                                                                 }
00727                                                 else
00728                                                         for( k = 0; k <((1-vecGeneWeights[i]*vecGeneWeights[j])*WT_MULTIPLIER-0.5); k++){
00729                                                                 psData->m_pMatCounts->Get( iVal, iAnswer )++;
00730                                                                 }
00731                                         }
00732                                         else if(psData->m_isDatWeighted){
00733                                                 if(vecfiGenes[i] == -1 || vecfiGenes[j] == -1)
00734                                                         continue;
00735                                                 if(CMeta::IsNaN(w = psData->m_pwDat->Get( vecfiGenes[i],vecfiGenes[j] )) )
00736                                                   continue;
00737                                                 if(iAnswer==1 || !psData->m_bFlipNeg)
00738                                                         for( k = 0; k <(w *WT_MULTIPLIER-0.5); k++){
00739                                                                 psData->m_pMatCounts->Get( iVal, iAnswer )++;
00740                                                                 }
00741                                                 else
00742                                                         for( k = 0; k <((1-w) *WT_MULTIPLIER-0.5); k++){
00743                                                                 psData->m_pMatCounts->Get( iVal, iAnswer )++;
00744                                                                 }
00745                                         }
00746                                         else{
00747                                         psData->m_pMatCounts->Get( iVal, iAnswer )++;
00748                                         //FIXME: Regularization has not been supported for weighted context
00749                                         psData->m_pRegularize->Add( psData->m_iDat, *psData->m_pDat, i, j, iVal );
00750                                         }
00751                         }
00752                         else{
00753                                 psData->m_pMatCounts->Get( iAnswer, 0 )++;}
00754         }
00755     }
00756 
00757 
00758         //Recale counts
00759         if(psData->m_pGenes->IsWeighted()||psData->m_isDatWeighted){
00760                 for (i=0; i< psData->m_pMatCounts->GetRows();i++)
00761                         for(j=0; j<psData->m_pMatCounts->GetColumns();j++)
00762                                 psData->m_pMatCounts->Get( i,j ) = int(psData->m_pMatCounts->Get( i,j )/ WT_MULTIPLIERf + 0.5);
00763         }
00764 
00765     return NULL;
00766 }
00767 
00768 int main_xdsls( const gengetopt_args_info& sArgs, const map<string, size_t>& mapstriZeros,
00769                 const map<string, size_t>& mapstriDatasets, const vector<string>& vecstrContexts ) {
00770     static const size_t c_iBuffer   = 1024;
00771     char                        szBuffer[ c_iBuffer ];
00772     string                      strFile;
00773     CBayesNetMinimal            BNDefault;
00774     CBayesNetMinimal*           pBN;
00775     vector<CBayesNetMinimal*>   vecpBNs;
00776     ifstream                    ifsm;
00777     vector<string>              vecstrLine;
00778     ofstream                    ofsm;
00779     uint32_t                    iSize;
00780     size_t                      i;
00781     vector<float>               vecdAlphas;
00782     vector<unsigned char>       vecbZeros;
00783     map<string, size_t>::const_iterator iterZero, iterDataset;
00784     float                       d;
00785 
00786     if( mapstriDatasets.empty( ) ) {
00787         cerr << "No datasets given" << endl;
00788         return 1;
00789     }
00790     if( sArgs.alphas_arg ) {
00791         vecdAlphas.resize( mapstriDatasets.size( ) );
00792         ifsm.clear( );
00793         ifsm.open( sArgs.alphas_arg );
00794         if( !ifsm.is_open( ) ) {
00795             cerr << "Could not open: " << sArgs.alphas_arg << endl;
00796             return 1;
00797         }
00798         while( !ifsm.eof( ) ) {
00799             ifsm.getline( szBuffer, c_iBuffer - 1 );
00800             szBuffer[ c_iBuffer - 1 ] = 0;
00801             if( !szBuffer[ 0 ] )
00802                 continue;
00803             vecstrLine.clear( );
00804             CMeta::Tokenize( szBuffer, vecstrLine );
00805             if( vecstrLine.size( ) != 2 ) {
00806                 cerr << "Illegal alphas line: " << szBuffer << endl;
00807                 return 1;
00808             }
00809             if( ( iterDataset = mapstriDatasets.find( vecstrLine[ 0 ] ) ) == mapstriDatasets.end( ) )
00810                 cerr << "Dataset in counts but not database: " << vecstrLine[ 0 ] << endl;
00811             else
00812                 vecdAlphas[ iterDataset->second ] = (float)atof( vecstrLine[ 1 ].c_str( ) );
00813         }
00814         ifsm.close( );
00815     }
00816 
00817     vecbZeros.resize( mapstriDatasets.size( ) );
00818     fill( vecbZeros.begin( ), vecbZeros.end( ), 0xFF );
00819     for( iterZero = mapstriZeros.begin( ); iterZero != mapstriZeros.end( ); ++iterZero )
00820         if( ( iterDataset = mapstriDatasets.find( iterZero->first ) ) == mapstriDatasets.end( ) )
00821             cerr << "Unknown dataset in zeros file: " << iterZero->first << endl;
00822         else
00823             vecbZeros[ iterDataset->second ] = (unsigned char)iterZero->second;
00824 
00825     if( !BNDefault.OpenCounts( sArgs.default_arg, mapstriDatasets, vecbZeros, vecdAlphas,
00826                                sArgs.pseudocounts_arg ) ) {
00827         cerr << "Could not open default counts: " << ( sArgs.default_arg ? sArgs.default_arg : "not given" ) <<
00828              endl;
00829         return 1;
00830     }
00831     if( sArgs.regularize_flag ) {
00832         d = BNDefault.Regularize( vecdAlphas );
00833         BNDefault.OpenCounts( sArgs.default_arg, mapstriDatasets, vecbZeros, vecdAlphas, d );
00834     }
00835 
00836     for( i = 0; i < vecstrContexts.size( ); ++i ) {
00837         strFile = (string)sArgs.counts_arg + '/' + CMeta::Filename( vecstrContexts[ i ] ) + c_acTxt;
00838         cerr << "Processing: " << strFile << endl;
00839         pBN = new CBayesNetMinimal( );
00840         if( !pBN->OpenCounts( strFile.c_str( ), mapstriDatasets, vecbZeros, vecdAlphas, sArgs.pseudocounts_arg,
00841                               &BNDefault ) )
00842             return 1;
00843         if( sArgs.regularize_flag ) {
00844             d = pBN->Regularize( vecdAlphas );
00845             pBN->OpenCounts( strFile.c_str( ), mapstriDatasets, vecbZeros, vecdAlphas, d, &BNDefault );
00846         }
00847         vecpBNs.push_back( pBN );
00848     }
00849 
00850     cerr << "Created " << ( vecpBNs.size( ) + 1 ) << " Bayesian classifiers" << endl;
00851 
00852     if( sArgs.output_arg ) {
00853         if( sArgs.smile_flag ) {
00854             CBayesNetSmile                      BNSmile;
00855             vector<string>                      vecstrNames;
00856             map<string, size_t>::const_iterator iterName;
00857 
00858             vecstrNames.resize( mapstriDatasets.size( ) );
00859             for( iterName = mapstriDatasets.begin( ); iterName != mapstriDatasets.end( ); ++iterName )
00860                 vecstrNames[ iterName->second ] = iterName->first;
00861 
00862             BNSmile.Open( BNDefault, vecstrNames );
00863             BNSmile.Save( ( (string)sArgs.output_arg + '/' + BNDefault.GetID( ) +
00864                             ( sArgs.xdsl_flag ? ".x" : "." ) + "dsl" ).c_str( ) );
00865             for( i = 0; i < vecpBNs.size( ); ++i ) {
00866                 BNSmile.Open( *vecpBNs[ i ], vecstrNames );
00867                 BNSmile.Save( ( (string)sArgs.output_arg + '/' + vecpBNs[ i ]->GetID( ) +
00868                                 ( sArgs.xdsl_flag ? ".x" : "." ) + "dsl" ).c_str( ) );
00869             }
00870         }
00871         else {
00872             ofsm.open( sArgs.output_arg, ios_base::binary );
00873             BNDefault.Save( ofsm );
00874             iSize = (uint32_t)vecpBNs.size( );
00875             ofsm.write( (const char*)&iSize, sizeof(iSize) );
00876             for( i = 0; i < vecpBNs.size( ); ++i )
00877                 vecpBNs[ i ]->Save( ofsm );
00878             ofsm.close( );
00879         }
00880     }
00881     for( i = 0; i < vecpBNs.size( ); ++i )
00882         delete vecpBNs[ i ];
00883 
00884     return 0;
00885 }
00886 
00887 int main_inference( const gengetopt_args_info& sArgs, const map<string, size_t>& mapstriZeros,
00888                     const map<string, size_t>& mapstriDatasets ) {
00889     map<string, size_t>::const_iterator iterDataset;
00890     vector<size_t>                      veciGenes;
00891     size_t                              i, j, iTerm, iThread;
00892     vector<CGenes*>                     vecpGenes;
00893     vector<CDat*>                       vecpYes, vecpNo;
00894     vector<string>                      vecstrTmps;
00895     CGenome                             Genome;
00896     vector<SEvaluate>                   vecsData;
00897     CBayesNetMinimal                    BNDefault;
00898     vector<CBayesNetMinimal*>           vecpBNs;
00899     ifstream                            ifsm;
00900     uint32_t                            iSize;
00901     map<string, size_t>::const_iterator iterZero;
00902     vector<pthread_t>                   vecpthdThreads;
00903     bool                                fFirst;
00904 
00905     ifsm.open( sArgs.networks_arg, ios_base::binary );
00906     if( !BNDefault.Open( ifsm ) ) {
00907         cerr << "Could not open: " << sArgs.networks_arg << endl;
00908         return 1;
00909     }
00910     ifsm.read( (char*)&iSize, sizeof(iSize) );
00911     vecpBNs.resize( iSize );
00912     for( i = 0; i < vecpBNs.size( ); ++i ) {
00913         vecpBNs[ i ] = new CBayesNetMinimal( );
00914         if( !vecpBNs[ i ]->Open( ifsm ) ) {
00915             cerr << "Could not open: " << sArgs.networks_arg << endl;
00916             return 1;
00917         }
00918     }
00919     ifsm.close( );
00920 
00921     /*
00922     size_t k, m;
00923     for( i = 0; i < BNDefault.GetNodes( ); ++i )
00924     for( j = 0; j < BNDefault.GetCPT( i ).GetRows( ); ++j )
00925     for( k = 0; k < BNDefault.GetCPT( i ).GetColumns( ); ++k )
00926     cout << i << '\t' << j << '\t' << k << '\t' << BNDefault.GetCPT( i ).Get( j, k ) << endl;
00927     for( m = 0; m < vecpBNs.size( ); ++m )
00928     for( i = 0; i < vecpBNs[ m ]->GetNodes( ); ++i )
00929     for( j = 0; j < vecpBNs[ m ]->GetCPT( i ).GetRows( ); ++j )
00930     for( k = 0; k < vecpBNs[ m ]->GetCPT( i ).GetColumns( ); ++k )
00931     cout << i << '\t' << j << '\t' << k << '\t' << vecpBNs[ m ]->GetCPT( i ).Get( j, k ) << endl;
00932     return 0;
00933     //*/
00934 
00935     if( !sArgs.genome_arg ) {
00936         cerr << "No genes given" << endl;
00937         return 1;
00938     }
00939     {
00940         CPCL    PCLGenes( false );
00941 
00942         if( !PCLGenes.Open( sArgs.genome_arg, 1 ) ) {
00943             cerr << "Could not open: " << sArgs.genome_arg << endl;
00944             return 1;
00945         }
00946         for( i = 0; i < PCLGenes.GetGenes( ); ++i )
00947             Genome.AddGene( PCLGenes.GetFeature( i, 1 ) );
00948     }
00949 
00950     vecpGenes.resize( sArgs.inputs_num ? sArgs.inputs_num : 1 );
00951     vecpYes.resize( vecpGenes.size( ) );
00952     vecpNo.resize( vecpGenes.size( ) );
00953     vecstrTmps.resize( vecpNo.size( ) );
00954     for( i = 0; i < vecpGenes.size( ); ++i ) {
00955         char*   szTemp;
00956 
00957         vecpGenes[ i ]  = new CGenes( Genome );
00958         if( sArgs.inputs_num ) {
00959             ifstream    ifsm;
00960         }
00961         else
00962             vecpGenes[ i ]->Open( Genome.GetGeneNames( ), false );
00963         vecpYes[ i ] = new CDat( );
00964         vecpNo[ i ] = new CDat( );
00965         if( sArgs.memmapout_flag ) {
00966             vecpYes[ i ]->Open( Genome.GetGeneNames( ), false,
00967                                 ((string) sArgs.output_arg + '/' + (sArgs.inputs_num ? CMeta::Basename(
00968                                             sArgs.inputs[ i ] ) : "global") + c_acDab).c_str( ) );
00969             /*FIXME: _tempnam can result in a filename that ends up in use later via race condition */
00970             if( !(szTemp = _tempnam( sArgs.temporary_arg, NULL )) ) {
00971                 cerr << "Could not generate temporary file name in: " << sArgs.temporary_arg << endl;
00972                 return 1;
00973             }
00974             cout << "File: " << szTemp << endl;
00975             vecstrTmps[ i ] = szTemp;
00976             free( szTemp );
00977             vecpNo[ i ]->Open( Genome.GetGeneNames( ), false, vecstrTmps[ i ].c_str( ) );
00978         } else {
00979             vecpYes[ i ]->Open( Genome.GetGeneNames( ), false, NULL );
00980             vecpNo[ i ]->Open( Genome.GetGeneNames( ), false, NULL );
00981         }
00982     }
00983 
00984     veciGenes.resize( vecpYes[ 0 ]->GetGenes( ) );
00985     vecsData.resize( vecpGenes.size( ) );
00986     vecpthdThreads.resize( vecsData.size( ) );
00987     for( fFirst = true,iterDataset = mapstriDatasets.begin( ); iterDataset != mapstriDatasets.end( );
00988             fFirst = false,++iterDataset ) {
00989         CDataPair   Dat;
00990         string      strFile;
00991 
00992         if( !Dat.Open( ( strFile = ( (string)sArgs.directory_arg + '/' + iterDataset->first +
00993                                      c_acDab ) ).c_str( ), false, !!sArgs.memmap_flag ) && !Dat.Open( ( strFile = ( (string)sArgs.directory_arg + '/' + iterDataset->first +
00994                                              c_acQDab ) ).c_str( ), false, !!sArgs.memmap_flag ) ) {
00995             cerr << "Couldn't open: " << strFile << endl;
00996             return 1;
00997         }
00998         cerr << "Processing: " << strFile << endl;
00999         for( i = 0; i < veciGenes.size( ); ++i )
01000             veciGenes[ i ] = Dat.GetGene( vecpYes[ 0 ]->GetGene( i ) );
01001         for( iTerm = 0; iTerm < vecpGenes.size( ); iTerm += iThread ) {
01002             for( iThread = 0; ( ( sArgs.threads_arg == -1 ) || ( iThread < (size_t)sArgs.threads_arg ) ) &&
01003                     ( ( iTerm + iThread ) < vecpGenes.size( ) ); ++iThread ) {
01004                 i = iTerm + iThread;
01005                 if( sArgs.inputs_num ) {
01006                     for( j = 0; j < vecpBNs.size( ); ++j )
01007                         if( vecpBNs[ j ]->GetID( ) == CMeta::Deextension( CMeta::Basename(
01008                                     sArgs.inputs[ i ] ) ) )
01009                             break;
01010                     if( j >= vecpBNs.size( ) ) {
01011                         cerr << "Could not locate Bayes net for: " << sArgs.inputs[ i ] << endl;
01012                         return 1;
01013                     }
01014                     vecsData[ i ].m_pBN = vecpBNs[ j ];
01015                 }
01016                 else
01017                     vecsData[ i ].m_pBN = &BNDefault;
01018                 vecsData[ i ].m_pDat = &Dat;
01019                 vecsData[ i ].m_pGenes = vecpGenes[ i ];
01020                 vecsData[ i ].m_pYes = vecpYes[ i ];
01021                 vecsData[ i ].m_pNo = vecpNo[ i ];
01022                 vecsData[ i ].m_iZero = ( ( iterZero = mapstriZeros.find( iterDataset->first ) ) ==
01023                                           mapstriZeros.end( ) ) ? vecsData[ i ].m_pBN->GetDefault( iterDataset->second + 1 ) :
01024                                         iterZero->second;
01025                 if( vecsData[ i ].m_iZero == 0xFF )
01026                     vecsData[ i ].m_iZero = -1;
01027                 vecsData[ i ].m_iNode = iterDataset->second + 1;
01028                 vecsData[ i ].m_pveciGenes = &veciGenes;
01029                 vecsData[ i ].m_fFirst = fFirst;
01030                 vecsData[ i ].m_strName = sArgs.inputs_num ? sArgs.inputs[ i ] : "global";
01031                 if( pthread_create( &vecpthdThreads[ i ], NULL, evaluate, &vecsData[ i ] ) ) {
01032                     cerr << "Couldn't create evaluation thread: " << sArgs.inputs[ i ] << endl;
01033                     return 1;
01034                 }
01035             }
01036             for( i = 0; i < iThread; ++i )
01037                 pthread_join( vecpthdThreads[ iTerm + i ], NULL );
01038         }
01039     }
01040 
01041     for( iTerm = 0; iTerm < vecpGenes.size( ); iTerm += iThread ) {
01042         for( iThread = 0; ( ( sArgs.threads_arg == -1 ) || ( iThread < (size_t)sArgs.threads_arg ) ) &&
01043                 ( ( iTerm + iThread ) < vecpGenes.size( ) ); ++iThread ) {
01044             i = iTerm + iThread;
01045             if( sArgs.inputs_num ) {
01046                 for( j = 0; j < vecpBNs.size( ); ++j )
01047                     if( vecpBNs[ j ]->GetID( ) == CMeta::Deextension( CMeta::Basename( sArgs.inputs[ i ] ) ) )
01048                         break;
01049                 if( j >= vecpBNs.size( ) ) {
01050                     cerr << "Could not locate Bayes net for: " << sArgs.inputs[ i ] << endl;
01051                     return 1;
01052                 }
01053                 vecsData[ i ].m_pBN = vecpBNs[ j ];
01054             }
01055             else
01056                 vecsData[ i ].m_pBN = &BNDefault;
01057             vecsData[ i ].m_pYes = vecpYes[ i ];
01058             vecsData[ i ].m_pNo = vecpNo[ i ];
01059             vecsData[ i ].m_strName = sArgs.inputs_num ? sArgs.inputs[ i ] : "global";
01060             vecsData[ i ].m_bLogratio = sArgs.logratio_flag;
01061             if( pthread_create( &vecpthdThreads[ i ], NULL, finalize, &vecsData[ i ] ) ) {
01062                 cerr << "Couldn't create finalization thread: " << sArgs.inputs[ i ] << endl;
01063                 return 1;
01064             }
01065         }
01066         for( i = 0; i < iThread; ++i )
01067             pthread_join( vecpthdThreads[ iTerm + i ], NULL );
01068     }
01069 
01070     if( sArgs.memmapout_flag ) {
01071         for( i = 0; i < vecstrTmps.size( ); ++i ) {
01072             _unlink( vecstrTmps[ i ].c_str( ) );
01073         }
01074     }
01075     for( i = 0; i < vecpBNs.size( ); ++i )
01076         delete vecpBNs[ i ];
01077     for( i = 0; i < vecpGenes.size( ); ++i ) {
01078         delete vecpNo[ i ];
01079         if( !sArgs.memmapout_flag ) {
01080             vecpYes[ i ]->Save( ((string) sArgs.output_arg + '/' + (sArgs.inputs_num ? CMeta::Basename(
01081                                      sArgs.inputs[ i ] ) : "global") + c_acDab).c_str( ) );
01082         }
01083         delete vecpYes[ i ];
01084         delete vecpGenes[ i ];
01085     }
01086 
01087     return 0;
01088 }
01089 
01090 void* evaluate( void* pData ) {
01091     SEvaluate*  psData;
01092     size_t      i, j, iOne, iTwo, iBin, iIndex;
01093     float*      adYes;
01094     float*      adNo;
01095     float       dNo, dYes;
01096 
01097     psData = (SEvaluate*)pData;
01098     if( psData->m_fFirst ) {
01099         float*  adBuffer;
01100 
01101         adBuffer = new float[ psData->m_pYes->GetGenes( ) ];
01102         for( i = 0; i < psData->m_pNo->GetGenes( ); ++i )
01103             adBuffer[ i ] = CMeta::GetNaN( );
01104         for( i = 0; i < psData->m_pNo->GetGenes( ); ++i ) {
01105             if( !( i % 1000 ) )
01106                 cerr << "IN: " << psData->m_strName << ", " << i << endl;
01107             psData->m_pNo->Set( i, adBuffer );
01108         }
01109         for( i = 0; i < psData->m_pYes->GetGenes( ); ++i ) {
01110             if( !( i % 1000 ) )
01111                 cerr << "IY: " << psData->m_strName << ", " << i << endl;
01112             psData->m_pYes->Set( i, adBuffer );
01113         }
01114         delete[] adBuffer;
01115     }
01116 
01117     dNo = log( psData->m_pBN->GetCPT( 0 ).Get( 0, 0 ) );
01118     dYes = log( psData->m_pBN->GetCPT( 0 ).Get( 1, 0 ) );
01119     adYes = new float[ psData->m_pYes->GetGenes( ) ];
01120     adNo = new float[ psData->m_pNo->GetGenes( ) ];
01121     for( i = 0; i < psData->m_pYes->GetGenes( ); ++i ) {
01122         if( !( i % 1000 ) )
01123             cerr << "C: " << psData->m_strName << ", " << i << endl;
01124         if( ( ( iOne = (*psData->m_pveciGenes)[ i ] ) == -1 ) && ( psData->m_iZero == -1 ) )
01125             continue;
01126         memcpy( adYes, psData->m_pYes->Get( i ), ( psData->m_pYes->GetGenes( ) - i - 1 ) * sizeof(*adYes) );
01127         memcpy( adNo, psData->m_pNo->Get( i ), ( psData->m_pNo->GetGenes( ) - i - 1 ) * sizeof(*adNo) );
01128         for( j = ( i + 1 ); j < psData->m_pYes->GetGenes( ); ++j ) {
01129             if( ( ( iTwo = (*psData->m_pveciGenes)[ j ] ) == -1 ) && ( psData->m_iZero == -1 ) )
01130                 continue;
01131 
01132             iBin = psData->m_pDat->Quantize( iOne, iTwo, psData->m_iZero );
01133             if( iBin == -1 )
01134                 continue;
01135             if( CMeta::IsNaN( adYes[ iIndex = ( j - i - 1 ) ] ) ) {
01136                 adYes[ iIndex ] = dYes;
01137                 adNo[ iIndex ] = dNo;
01138             }
01139             adNo[ iIndex ] += log( psData->m_pBN->GetCPT( psData->m_iNode ).Get( iBin, 0 ) );
01140             adYes[ iIndex ] += log( psData->m_pBN->GetCPT( psData->m_iNode ).Get( iBin, 1 ) );
01141         }
01142         psData->m_pNo->Set( i, adNo );
01143         psData->m_pYes->Set( i, adYes );
01144     }
01145     delete[] adYes;
01146     delete[] adNo;
01147 
01148     return NULL;
01149 }
01150 
01151 void* finalize( void* pData ) {
01152     SEvaluate*  psData;
01153     size_t      i, j;
01154     float*      adYes;
01155     float*      adNo;
01156     float       dPrior, dYes, dNo;
01157 
01158     psData = (SEvaluate*)pData;
01159     dPrior = psData->m_pBN->GetCPT( 0 ).Get( 1, 0 );
01160     adYes = new float[ psData->m_pYes->GetGenes( ) ];
01161     adNo = new float[ psData->m_pNo->GetGenes( ) ];
01162 
01163     dYes = log( dPrior );
01164     dNo = log( psData->m_pBN->GetCPT( 0 ).Get( 0, 0 ) );
01165 
01166     for( i = 0; i < psData->m_pYes->GetGenes( ); ++i ) {
01167         if( !( i % 1000 ) )
01168             cerr << "F: " << psData->m_strName << ", " << i << endl;
01169         memcpy( adYes, psData->m_pYes->Get( i ), ( psData->m_pYes->GetGenes( ) - i - 1 ) * sizeof(*adYes) );
01170         memcpy( adNo, psData->m_pNo->Get( i ), ( psData->m_pNo->GetGenes( ) - i - 1 ) * sizeof(*adNo) );
01171         for( j = 0; j < ( psData->m_pYes->GetGenes( ) - i - 1 ); ++j ) {
01172             if( psData->m_bLogratio ) {
01173                 adYes[ j ] = CMeta::IsNaN( adYes[ j ] ) ? 0.0 :
01174                              (float)( (double)(adYes[ j ] - dYes) - (double)(adNo[ j ] - dNo) );
01175             }
01176             else {
01177                 adYes[ j ] = CMeta::IsNaN( adYes[ j ] ) ? dPrior :
01178                              (float)( 1 / ( 1 + exp( (double)adNo[ j ] - (double)adYes[ j ] ) ) );
01179             }
01180         }
01181         psData->m_pYes->Set( i, adYes );
01182     }
01183     delete[] adNo;
01184     delete[] adYes;
01185 
01186     return NULL;
01187 }
01188 
01190 
01191 int main_inference2( const gengetopt_args_info& sArgs, const map<string, size_t>& mapstriZeros,
01192                      const map<string, size_t>& mapstriDatasets ) {
01193     map<string, size_t>::const_iterator iterDataset;
01194     vector<size_t>                      veciGenes, veciBNs;
01195     size_t                              i, j, iThread;
01196     vector<CGenes*>                     vecpGenes;
01197     vector<CDat*>                       vecpYes, vecpNo;
01198     vector<string>                      vecstrTmps;
01199     CGenome                             Genome;
01200     vector<SEvaluate2>                  vecsData;
01201     CBayesNetMinimal                    BNDefault;
01202     vector<CBayesNetMinimal*>           vecpBNs;
01203     ifstream                            ifsm;
01204     uint32_t                            iSize;
01205     map<string, size_t>::const_iterator iterZero;
01206     vector<pthread_t>                   vecpthdThreads;
01207     bool                                fFirst;
01208 
01209     ifsm.open( sArgs.networks_arg, ios_base::binary );
01210     if( !BNDefault.Open( ifsm ) ) {
01211         cerr << "Could not open: " << sArgs.networks_arg << endl;
01212         return 1;
01213     }
01214     ifsm.read( (char*)&iSize, sizeof(iSize) );
01215     vecpBNs.resize( iSize );
01216     for( i = 0; i < vecpBNs.size( ); ++i ) {
01217         vecpBNs[ i ] = new CBayesNetMinimal( );
01218         if( !vecpBNs[ i ]->Open( ifsm ) ) {
01219             cerr << "Could not open: " << sArgs.networks_arg << endl;
01220             return 1;
01221         }
01222     }
01223     ifsm.close( );
01224 
01225     if( !sArgs.genome_arg ) {
01226         cerr << "No genes given" << endl;
01227         return 1;
01228     }
01229     {
01230         CPCL    PCLGenes( false );
01231 
01232         if( !PCLGenes.Open( sArgs.genome_arg, 1 ) ) {
01233             cerr << "Could not open: " << sArgs.genome_arg << endl;
01234             return 1;
01235         }
01236         for( i = 0; i < PCLGenes.GetGenes( ); ++i )
01237             Genome.AddGene( PCLGenes.GetFeature( i, 1 ) );
01238     }
01239 
01240     vecpGenes.resize( sArgs.inputs_num ? sArgs.inputs_num : 1 );
01241     vecpYes.resize( 1 );
01242     vecpNo.resize( vecpYes.size( ) );
01243     vecstrTmps.resize( vecpNo.size( ) );
01244     for( i = 0; i < vecpYes.size( ); ++i ) {
01245         char*   szTemp;
01246 
01247         vecpYes[ i ] = new CDat( );
01248         vecpYes[ i ]->Open( Genome.GetGeneNames( ), false, ( (string)sArgs.output_arg + '/' + "global" + c_acDab ).c_str( ) );
01249         vecpNo[ i ] = new CDat( );
01250         if( !( szTemp = _tempnam( sArgs.temporary_arg, NULL ) ) ) {
01251             cerr << "Could not generate temporary file name in: " << sArgs.temporary_arg << endl;
01252             return 1;
01253         }
01254         vecstrTmps[ i ] = szTemp;
01255         free( szTemp );
01256         vecpNo[ i ]->Open( Genome.GetGeneNames( ), false, vecstrTmps[ i ].c_str( ) );
01257     }
01258     for( i = 0; i < vecpGenes.size( ); ++i ) {
01259         vecpGenes[ i ]  = new CGenes( Genome );
01260         if( sArgs.inputs_num ) {
01261             ifstream    ifsm;
01262         }
01263         else
01264             vecpGenes[ i ]->Open( Genome.GetGeneNames( ), false );
01265     }
01266 
01267     {
01268         map<string, size_t>                 mapstriBNs;
01269         map<string, size_t>::const_iterator iterBN;
01270         string                              strGene;
01271 
01272         for( i = 0; i < vecpBNs.size( ); ++i )
01273             mapstriBNs[ vecpBNs[ i ]->GetID( ) ] = i;
01274         veciBNs.resize( vecpYes[ 0 ]->GetGenes( ) );
01275         for( i = 0; i < veciBNs.size( ); ++i ) {
01276             strGene = CMeta::Filename( vecpYes[ 0 ]->GetGene( i ) );
01277             veciBNs[ i ] = ( ( iterBN = mapstriBNs.find( strGene ) ) == mapstriBNs.end( ) ) ? -1 :
01278                            iterBN->second;
01279         }
01280     }
01281 
01282     for( i = 0; i < vecpYes.size( ); ++i ) {
01283         float*  adBuffer;
01284 
01285         adBuffer = new float[ vecpYes[ i ]->GetGenes( ) ];
01286         for( j = 0; j < vecpNo[ i ]->GetGenes( ); ++j )
01287             adBuffer[ j ] = log( BNDefault.GetCPT( 0 ).Get( 0, 0 ) );
01288         for( j = 0; j < vecpNo[ i ]->GetGenes( ); ++j ) {
01289             if( !( j % 1000 ) )
01290                 cerr << "IN: global, " << j << endl;
01291             vecpNo[ i ]->Set( j, adBuffer );
01292         }
01293         for( j = 0; j < vecpYes[ i ]->GetGenes( ); ++j )
01294             adBuffer[ j ] = log( BNDefault.GetCPT( 0 ).Get( 1, 0 ) );
01295         for( j = 0; j < vecpYes[ i ]->GetGenes( ); ++j ) {
01296             if( !( j % 1000 ) )
01297                 cerr << "IY: global, " << j << endl;
01298             vecpYes[ i ]->Set( j, adBuffer );
01299         }
01300         delete[] adBuffer;
01301     }
01302 
01303     veciGenes.resize( vecpYes[ 0 ]->GetGenes( ) );
01304     vecsData.resize( ( sArgs.threads_arg == -1 ) ? 1 : sArgs.threads_arg );
01305     vecpthdThreads.resize( vecsData.size( ) );
01306     for( fFirst = true,iterDataset = mapstriDatasets.begin( ); iterDataset != mapstriDatasets.end( );
01307             fFirst = false,++iterDataset ) {
01308         CDataPair       Dat;
01309         string          strFile;
01310         vector<size_t>  veciCur;
01311         size_t          iZero;
01312 
01313         if( !Dat.Open( ( strFile = ( (string)sArgs.directory_arg + '/' + iterDataset->first +
01314                                      c_acDab ) ).c_str( ), false, !!sArgs.memmap_flag ) ) {
01315             cerr << "Couldn't open: " << strFile << endl;
01316             return 1;
01317         }
01318         cerr << "Processing: " << strFile << endl;
01319         for( i = 0; i < veciGenes.size( ); ++i )
01320             veciGenes[ i ] = Dat.GetGene( vecpYes[ 0 ]->GetGene( i ) );
01321         iZero = ( ( iterZero = mapstriZeros.find( iterDataset->first ) ) == mapstriZeros.end( ) ) ?
01322                 BNDefault.GetDefault( iterDataset->second + 1 ) : iterZero->second;
01323         if( iZero == 0xFF )
01324             iZero = -1;
01325         iThread = ( vecpYes[ 0 ]->GetGenes( ) + 1 ) / vecsData.size( );
01326         for( i = j = 0; i < vecsData.size( ); ++i,j += iThread ) {
01327             vecsData[ i ].m_iBegin = j;
01328             vecsData[ i ].m_iEnd = min( vecpYes[ 0 ]->GetGenes( ), j + iThread );
01329             vecsData[ i ].m_pBN = &BNDefault;
01330             vecsData[ i ].m_pvecpBNs = &vecpBNs;
01331             vecsData[ i ].m_pDat = &Dat;
01332             vecsData[ i ].m_pYes = vecpYes[ 0 ];
01333             vecsData[ i ].m_pNo = vecpNo[ 0 ];
01334             vecsData[ i ].m_iZero = iZero;
01335             vecsData[ i ].m_pveciGenes = &veciGenes;
01336             vecsData[ i ].m_strName = iterDataset->first;
01337             vecsData[ i ].m_pveciBNs = &veciBNs;
01338             vecsData[ i ].m_iNode = iterDataset->second + 1;
01339             if( fFirst )
01340                 pthread_spin_init( &vecsData[ i ].m_sLock, 0 );
01341             if( pthread_create( &vecpthdThreads[ i ], NULL, evaluate2, &vecsData[ i ] ) ) {
01342                 cerr << "Couldn't create evaluation thread: " << i << endl;
01343                 return 1;
01344             }
01345         }
01346         for( i = 0; i < vecpthdThreads.size( ); ++i )
01347             pthread_join( vecpthdThreads[ i ], NULL );
01348     }
01349 
01350     {
01351         CBayesNetMinimal*   pBN;
01352         float*              adYes;
01353         float*              adNo;
01354         float               dPrior;
01355 
01356         if( sArgs.inputs_num ) {
01357             for( j = 0; j < vecpBNs.size( ); ++j )
01358                 if( vecpBNs[ j ]->GetID( ) == CMeta::Deextension( CMeta::Basename( sArgs.inputs[ i ] ) ) )
01359                     break;
01360             if( j >= vecpBNs.size( ) ) {
01361                 cerr << "Could not locate Bayes net for: " << sArgs.inputs[ i ] << endl;
01362                 return 1;
01363             }
01364             pBN = vecpBNs[ j ];
01365         }
01366         else
01367             pBN = &BNDefault;
01368 
01369         dPrior = pBN->GetCPT( 0 ).Get( 1, 0 );
01370         adYes = new float[ vecpYes[ 0 ]->GetGenes( ) ];
01371         adNo = new float[ vecpYes[ 0 ]->GetGenes( ) ];
01372         for( i = 0; i < vecpYes[ 0 ]->GetGenes( ); ++i ) {
01373             if( !( i % 1000 ) )
01374                 cerr << "F: global, " << i << endl;
01375             memcpy( adYes, vecpYes[ 0 ]->Get( i ), ( vecpYes[ 0 ]->GetGenes( ) - i - 1 ) * sizeof(*adYes) );
01376             memcpy( adNo, vecpNo[ 0 ]->Get( i ), ( vecpNo[ 0 ]->GetGenes( ) - i - 1 ) * sizeof(*adNo) );
01377             for( j = 0; j < ( vecpYes[ 0 ]->GetGenes( ) - i - 1 ); ++j )
01378                 adYes[ j ] = CMeta::IsNaN( adYes[ j ] ) ? dPrior :
01379                              (float)( 1 / ( 1 + exp( (double)adNo[ j ] - (double)adYes[ j ] ) ) );
01380             vecpYes[ 0 ]->Set( i, adYes );
01381         }
01382     }
01383 
01384     for( i = 0; i < vecsData.size( ); ++i )
01385         pthread_spin_destroy( &vecsData[ i ].m_sLock );
01386     for( i = 0; i < vecstrTmps.size( ); ++i )
01387         _unlink( vecstrTmps[ i ].c_str( ) );
01388     for( i = 0; i < vecpBNs.size( ); ++i )
01389         delete vecpBNs[ i ];
01390     for( i = 0; i < vecpGenes.size( ); ++i )
01391         delete vecpGenes[ i ];
01392     for( i = 0; i < vecpYes.size( ); ++i ) {
01393         delete vecpYes[ i ];
01394         delete vecpNo[ i ];
01395     }
01396 
01397     return 0;
01398 }
01399 
01400 void* evaluate2( void* pData ) {
01401     SEvaluate2*     psData;
01402     size_t          i, j, iBNOne, iBNTwo;
01403     vector<size_t>  veciCur;
01404     float           dYes, dNo;
01405 
01406     psData = (SEvaluate2*)pData;
01407     for( i = psData->m_iBegin; i < psData->m_iEnd; ++i ) {
01408         size_t  iOne, iTwo, iBin, k;
01409 
01410         if( !( i % 1000 ) )
01411             cerr << "C: " << psData->m_strName << ", " << i << endl;
01412         if( ( ( iOne = (*psData->m_pveciGenes)[ i ] ) == -1 ) && ( psData->m_iZero == -1 ) )
01413             continue;
01414         veciCur.clear( );
01415         if( ( iBNOne = (*psData->m_pveciBNs)[ i ] ) != -1 )
01416             veciCur.push_back( iBNOne );
01417         for( j = ( i + 1 ); j < psData->m_pYes->GetGenes( ); ++j ) {
01418             if( ( ( iTwo = (*psData->m_pveciGenes)[ j ] ) == -1 ) && ( psData->m_iZero == -1 ) )
01419                 continue;
01420 
01421             iBin = psData->m_pDat->Quantize( iOne, iTwo, psData->m_iZero );
01422             if( iBin == -1 )
01423                 continue;
01424 
01425             if( ( iBNTwo = (*psData->m_pveciBNs)[ j ] ) != -1 )
01426                 veciCur.push_back( iBNTwo );
01427             for( k = 0; k < max( (size_t)1, veciCur.size( ) ); ++k ) {
01428                 const CBayesNetMinimal* pBN = ( k >= veciCur.size( ) ) ? psData->m_pBN: (*psData->m_pvecpBNs)[ veciCur[ k ] ];
01429 
01430                 dNo = log( pBN->GetCPT( psData->m_iNode ).Get( iBin, 0 ) );
01431                 dYes = log( pBN->GetCPT( psData->m_iNode ).Get( iBin, 1 ) );
01432                 pthread_spin_lock( &psData->m_sLock );
01433                 psData->m_pNo->Get( i, j ) += dNo;
01434                 psData->m_pYes->Get( i, j ) += dYes;
01435                 pthread_spin_unlock( &psData->m_sLock );
01436             }
01437             if( iBNTwo != -1 )
01438                 veciCur.pop_back( );
01439         }
01440     }
01441 
01442     return NULL;
01443 }