Sleipnir
tools/SpeciesConnector/SpeciesConnector.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 
00025 static const char   c_acDab[]   = ".dab";
00026 
00027 int main( int iArgs, char** aszArgs ) {
00028     gengetopt_args_info         sArgs;
00029     size_t                      i, j, k, iDatOne, iDatTwo, iGeneOne, iGeneTwo, iZero;
00030     size_t                      iCountJoint, iValueOne, iValueTwo;
00031     vector<vector<size_t> >     vecveciJoint;
00032     vector<string>              vecstrInputs, vecstrxInputs, vecstrGenes;
00033     map<string, size_t>         mapZeros;
00034     CBayesNetSmile              BNIn;
00035     vector<size_t>              veciGenesOneI, veciGenesTwoI;
00036     float                       Threshold;
00037 
00038     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00039         cmdline_parser_print_help( );
00040         return 1; }
00041     CMeta Meta( sArgs.verbosity_arg, sArgs.random_arg );    
00042 
00043     if( sArgs.zeros_arg ) {
00044         ifstream        ifsm;
00045         vector<string>  vecstrZeros;
00046         char            acLine[ 1024 ];
00047 
00048         ifsm.open( sArgs.zeros_arg );
00049         if( !ifsm.is_open( ) ) {
00050             cerr << "Could not open: " << sArgs.zeros_arg << endl;
00051             return 1; }
00052         while( !ifsm.eof( ) ) {
00053             ifsm.getline( acLine, ARRAYSIZE(acLine) - 1 );
00054             acLine[ ARRAYSIZE(acLine) - 1 ] = 0;
00055             vecstrZeros.clear( );
00056             CMeta::Tokenize( acLine, vecstrZeros );
00057             if( vecstrZeros.empty( ) )
00058                 continue;
00059             mapZeros[ vecstrZeros[ 0 ] ] = atoi( vecstrZeros[ 1 ].c_str( ) ); } }
00060     
00061     _mkdir( sArgs.odirectory_arg );
00062 
00063     vecstrInputs.resize( sArgs.inputs_num );
00064     copy( sArgs.inputs, sArgs.inputs + sArgs.inputs_num, vecstrInputs.begin( ) );
00065 
00066     vecstrxInputs.resize( sArgs.inputs_num );
00067 
00068     for( i = 0; i < vecstrInputs.size( ); ++i ){
00069         if( strcmp( &vecstrInputs[ i ][ vecstrInputs[ i ].rfind( "." ) + 1 ], "xdsl" ) == 0 ){ 
00070             vecstrxInputs[ i ].resize( vecstrInputs[ i ].rfind( "." ) - vecstrInputs[ i ].rfind( "/" ) - 1 );
00071             vecstrxInputs[ i ] = vecstrInputs[ i ].substr( vecstrInputs[ i ].rfind( "/" ) + 1, vecstrInputs[ i ].rfind( "." ) - vecstrInputs[ i ].rfind( "/" ) - 1 );}
00072         else{
00073             cerr << "inputs file types should be xdsl." <<  endl;
00074             return 1;}}
00075 
00076     vector<vector<float> >                  vecPrior;
00077     vector<vector<vector<float> > >         vecDataGSpZero;
00078     vector<vector<vector<float> > >         vecDataGSpOne;
00079     vector<vector<string> >                 vecvecSpDat;
00080     CDataMatrix     MatCPT;
00081     vector<string>  vecstrFiles;
00082 
00083     vecPrior.resize( sArgs.inputs_num );
00084     vecDataGSpZero.resize( sArgs.inputs_num );
00085     vecDataGSpOne.resize( sArgs.inputs_num );
00086     vecvecSpDat.resize( sArgs.inputs_num );
00087     for( i = 0; i < vecPrior.size(  ); ++i ){
00088         if( !BNIn.Open( vecstrInputs[ i ].c_str( ) ) ) {
00089             cerr << "Couldn't open: " << vecstrInputs[ i ] << endl;
00090             return 1;}
00091         BNIn.GetCPT( 0, MatCPT );
00092         vecPrior[ i ].resize( MatCPT.GetRows( ) );
00093         vecPrior[ i ][ 0 ] = MatCPT.Get( 0, 0 );
00094         vecPrior[ i ][ 1 ] = MatCPT.Get( 1, 0 );
00095 
00096         BNIn.GetNodes( vecstrFiles );
00097         vecstrFiles.erase( vecstrFiles.begin( ) );
00098         vecDataGSpZero[ i ].resize( vecstrFiles.size( ) );
00099         vecDataGSpOne[ i ].resize( vecstrFiles.size( ) );
00100         vecvecSpDat[ i ].resize( vecstrFiles.size( ) );
00101         for( j = 0; j < vecstrFiles.size( ); ++j ){
00102             vecvecSpDat[ i ][ j ] = vecstrFiles[ j ];
00103             BNIn.GetCPT( j+1, MatCPT );
00104             vecDataGSpZero[ i ][ j ].resize( MatCPT.GetRows( ) );
00105             vecDataGSpOne[ i ][ j ].resize( MatCPT.GetRows( ) );
00106             for( k = 0; k < MatCPT.GetRows( ); ++k ){
00107                 vecDataGSpZero[ i ][ j ][ k ] = MatCPT.Get( k , 0 );
00108                 vecDataGSpOne[ i ][ j ][ k ] = MatCPT.Get( k , 1 );}}
00109         vecstrFiles.clear( );}
00110 
00111     if( sArgs.learn_flag ){
00112 
00113         _mkdir( sArgs.l0directory_arg );
00114         _mkdir( sArgs.l1directory_arg );
00115         _mkdir( sArgs.jdirectory_arg );
00116 
00117         Threshold = ( sArgs.threshold_arg == -1 ) ? 0.5f : float( sArgs.threshold_arg );
00118         
00119         vector<vector<string> >     vecvecstrInputs;
00120         size_t                      countstrInputs;
00121         vecvecstrInputs.resize( vecvecSpDat.size( ) );
00122         countstrInputs = 0;
00123         for( i = 0; i < vecvecSpDat.size( ); ++i ){
00124             vecvecstrInputs[ i ].resize( vecvecSpDat[ i ].size( ) ); 
00125             for( j = 0; j < vecvecSpDat[ i ].size( ); ++j ){
00126                 vecvecstrInputs[ i ][ j ] =  ( string )sArgs.ddirectory_arg + '/' + vecstrxInputs[ i ] + '/' + vecvecSpDat[ i ][ j ] + c_acDab;
00127                 countstrInputs++;}}
00128         
00129         vector<string>              vecstrFDAInputs;
00130         size_t                      countstrFDAInputs;
00131         vecstrFDAInputs.resize( countstrInputs + vecvecstrInputs.size( ) );
00132         countstrFDAInputs = 0;
00133         for( i = 0; i < vecvecstrInputs.size( ); ++i ){
00134             for( j = 0; j < vecvecstrInputs[ i ].size( ); ++j ){
00135                 vecstrFDAInputs[ countstrFDAInputs ] = vecvecstrInputs[ i ][ j ].c_str( );
00136                 countstrFDAInputs++;}
00137             vecstrFDAInputs[ countstrFDAInputs ] = ( string )sArgs.adirectory_arg + '/' + vecstrxInputs[ i ] + c_acDab;
00138             countstrFDAInputs++;}
00139         
00140         vector<string>      vecstrFGenes;
00141         {   CDataset    DataF;
00142 
00143         DataF.OpenGenes( vecstrFDAInputs );
00144         vecstrFGenes.resize( DataF.GetGenes( ) );
00145         copy( DataF.GetGeneNames( ).begin( ), DataF.GetGeneNames( ).end( ), vecstrFGenes.begin( ) );}
00146         
00147         if( sArgs.genelist_flag ){
00148             for( i = 0; i < vecstrFGenes.size( ); ++i )
00149                 cout << vecstrFGenes[ i ] << endl;
00150             return 1;}
00151 
00152         vector<CDat*>           DatOutB;
00153         DatOutB.resize( vecvecstrInputs.size( ) );
00154         for( i = 0; i < DatOutB.size( ); ++i ){
00155             DatOutB[ i ] = new CDat( );
00156             DatOutB[ i ]->Open( vecstrFGenes );}
00157 
00158         vector<CDat*>       vecDataIntZero;
00159         vector<CDat*>       vecDataIntOne;
00160         vecDataIntZero.resize( vecvecstrInputs.size( ) );
00161         vecDataIntOne.resize( vecvecstrInputs.size( ) );
00162         for( i = 0; i < vecvecstrInputs.size( ); ++i ){
00163             vecDataIntZero[ i ] = new CDat( );
00164             vecDataIntOne[ i ] = new CDat( );
00165             vecDataIntZero[ i ]->Open( vecstrFGenes );
00166             vecDataIntOne[ i ]->Open( vecstrFGenes );}
00167         
00168         for( i = 0; i < vecvecstrInputs.size( ); ++i ){
00169             map<string,size_t>::const_iterator  iterZero;
00170             for( j = 0; j < vecvecstrInputs[ i ].size( ); ++j ){
00171                 iZero = ( ( iterZero = mapZeros.find( vecvecstrInputs[ i ][ j ] ) ) == mapZeros.end( ) ) ? -1 : iterZero->second;
00172 
00173                 CDataPair       DataF;
00174                 if( !( DataF.Open( vecvecstrInputs[ i ][ j ].c_str( ), false, !!sArgs.memmap_flag ) ||
00175                     DataF.Open( vecvecstrInputs[ i ][ j ].c_str( ), true, !!sArgs.memmap_flag ) ) ){
00176                         cerr << "Could not open:" << vecvecstrInputs[ i ][ j ] << endl;
00177                         return 1;}
00178                 vector<size_t>      vecGeneIndex;
00179                 vecGeneIndex.resize( vecstrFGenes.size( ) );
00180                 for( k = 0; k < vecstrFGenes.size( ); ++k )
00181                     vecGeneIndex[ k ] = DataF.GetGene( vecstrFGenes[ k ] );
00182 
00183                 for( iDatOne = 0; iDatOne < vecstrFGenes.size( ); ++iDatOne ){ 
00184                     for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrFGenes.size( ); ++iDatTwo ){ 
00185                         iGeneOne = vecGeneIndex[ iDatOne ];
00186                         iGeneTwo = vecGeneIndex[ iDatTwo ];
00187                         float       DFValue;
00188                         DFValue = ( ( iGeneOne == -1 ) || ( iGeneTwo == -1 ) ) ? CMeta::GetNaN( ) : DataF.Get( iGeneOne, iGeneTwo );
00189                         if( !CMeta::IsNaN( DFValue ) ) {
00190                             size_t      DatPFI = DataF.Quantize( DFValue );
00191                             float       Zero, One;
00192 
00193                             if( CMeta::IsNaN( Zero = vecDataIntZero[ i ]->Get( iDatOne, iDatTwo ) ) )
00194                                 vecDataIntZero[ i ]->Set( iDatOne, iDatTwo, log( vecDataGSpZero[ i ][ j ][ DatPFI ] ) );
00195                             else
00196                                 vecDataIntZero[ i ]->Set( iDatOne, iDatTwo, Zero + log( vecDataGSpZero[ i ][ j ][ DatPFI ] ) );
00197 
00198                             if( CMeta::IsNaN( One = vecDataIntOne[ i ]->Get( iDatOne, iDatTwo ) ) )
00199                                 vecDataIntOne[ i ]->Set( iDatOne, iDatTwo, log( vecDataGSpOne[ i ][ j ][ DatPFI ] ) );
00200                             else
00201                                 vecDataIntOne[ i ]->Set( iDatOne, iDatTwo, One + log( vecDataGSpOne[ i ][ j ][ DatPFI ] ) );}
00202 
00203                         if( CMeta::IsNaN( DFValue ) && ( iZero != -1 ) ){
00204                             float       Zero, One;
00205 
00206                             if( CMeta::IsNaN( Zero = vecDataIntZero[ i ]->Get( iDatOne, iDatTwo ) ) )
00207                                 vecDataIntZero[ i ]->Set( iDatOne, iDatTwo, log( vecDataGSpZero[ i ][ j ][ iZero ] ) );
00208                             else
00209                                 vecDataIntZero[ i ]->Set( iDatOne, iDatTwo, Zero + log( vecDataGSpZero[ i ][ j ][ iZero ] ) );
00210 
00211                             if( CMeta::IsNaN( One = vecDataIntOne[ i ]->Get( iDatOne, iDatTwo ) ) )
00212                                 vecDataIntOne[ i ]->Set( iDatOne, iDatTwo, log( vecDataGSpOne[ i ][ j ][ iZero ] ) );
00213                             else
00214                                 vecDataIntOne[ i ]->Set( iDatOne, iDatTwo, One + log( vecDataGSpOne[ i ][ j ][ iZero ] ) );}}}}
00215 
00216             for( iDatOne = 0; iDatOne < vecstrFGenes.size( ); ++iDatOne ){ 
00217                 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrFGenes.size( ); ++iDatTwo ){ 
00218                     float       sumOneB = 0, sumZeroB = 0, FinalB = 0 ;
00219                     float       Zero, One;
00220 
00221                     if( CMeta::IsNaN( Zero = vecDataIntZero[ i ]->Get( iDatOne, iDatTwo ) ) ){
00222                         vecDataIntZero[ i ]->Set( iDatOne, iDatTwo, 0.0f);
00223                         sumZeroB =  log( vecPrior[ i ][ 0 ] );}
00224                     else 
00225                         sumZeroB =  Zero + log( vecPrior[ i ][ 0 ] );
00226 
00227                     if( CMeta::IsNaN( One = vecDataIntOne[ i ]->Get( iDatOne, iDatTwo ) ) ){
00228                         vecDataIntOne[ i ]->Set( iDatOne, iDatTwo, 0.0f );
00229                         sumOneB =  log( vecPrior[ i ][ 1 ] );}
00230                     else
00231                         sumOneB =  One + log( vecPrior[ i ][ 1 ] );
00232 
00233                     FinalB = (float) ( 1 / ( 1 + exp ( sumZeroB - sumOneB ) ) );
00234                     DatOutB[ i ]->Set( iDatOne, iDatTwo, FinalB );}}}
00235 
00236         for( i = 0; i < vecvecstrInputs.size( ); ++i ){
00237             DatOutB[ i ]->Save( ( ( string ) sArgs.odirectory_arg + '/' + vecstrxInputs[ i ] + 'b' + c_acDab ).c_str( ) );
00238             vecDataIntZero[ i ]->Save( ( ( string ) sArgs.l0directory_arg + '/' + vecstrxInputs[ i ] + c_acDab ).c_str( ) );    
00239             vecDataIntOne[ i ]->Save( ( ( string ) sArgs.l1directory_arg + '/' + vecstrxInputs[ i ] + c_acDab ).c_str( ) );}    
00240 
00241         vector<size_t>      MapstrGenes;    
00242         if( sArgs.genex_arg ) {
00243             CGenome             Genome;     
00244             CGenes              GenesEx( Genome );
00245             if( !GenesEx.Open( sArgs.genex_arg ) ) {
00246                 cerr << "Could not open: " << sArgs.genex_arg << endl;
00247                 return 1; } 
00248             vecstrGenes.resize( vecstrFGenes.size( ) - GenesEx.GetGenes( ) );
00249             MapstrGenes.resize( vecstrGenes.size( ) );
00250             size_t      GenesExCount = 0;
00251             for( i = 0; i < vecstrFGenes.size( ); ++i ){
00252                 if( !GenesEx.IsGene( vecstrFGenes[ i ] ) ){
00253                     vecstrGenes[ GenesExCount ] = vecstrFGenes[ i ];
00254                     MapstrGenes[ GenesExCount ] = i;
00255                     GenesExCount++;}}}
00256         else{
00257             vecstrGenes.resize( vecstrFGenes.size( ) );
00258             MapstrGenes.resize( vecstrFGenes.size( ) );
00259             for( i = 0; i < vecstrFGenes.size( ); ++i ){
00260                 vecstrGenes[ i ] = vecstrFGenes[ i ];
00261                 MapstrGenes[ i ] = i;}}
00262         
00263         CDat        DatOut00, DatOut01, DatOut10, DatOut11; 
00264         DatOut00.Open( vecstrxInputs );
00265         DatOut01.Open( vecstrxInputs );
00266         DatOut10.Open( vecstrxInputs );
00267         DatOut11.Open( vecstrxInputs );
00268 
00269         if( sArgs.uniformjoint_flag ){
00270             for( iDatOne = 0; iDatOne < vecstrInputs.size( ); ++iDatOne ){
00271                 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrInputs.size( ); ++iDatTwo ){
00272                     for( i = 0; i < vecPrior[ iDatOne ].size( ); ++i ){
00273                         for( j = 0; j < vecPrior[ iDatTwo ].size( ); ++j ){
00274                             float   UnifProb = 1.0f / ( vecPrior[ iDatOne ].size( ) * vecPrior[ iDatTwo ].size( ) );
00275                             DatOut00.Set( iDatOne, iDatTwo, UnifProb );
00276                             DatOut01.Set( iDatOne, iDatTwo, UnifProb );
00277                             DatOut10.Set( iDatOne, iDatTwo, UnifProb );
00278                             DatOut11.Set( iDatOne, iDatTwo, UnifProb );}}}}}
00279                     
00280         if( !sArgs.uniformjoint_flag ){
00281 
00282             for( iDatOne = 0; iDatOne < vecstrInputs.size( ); ++iDatOne ) {
00283                 if( ( iDatOne + 1 ) == vecstrInputs.size( ) )
00284                     break;
00285 
00286                 veciGenesOneI.resize( vecstrGenes.size( ) );
00287                 CDataset    DataInd;
00288                 vector<string>              vecstrIndInputs;
00289                 vecstrIndInputs.resize( vecvecstrInputs[ iDatOne ].size( ) + 1 );
00290                 for( i = 0; i < vecvecstrInputs[ iDatOne ].size( ); ++i ){
00291                     vecstrIndInputs[ i ] = vecvecstrInputs[ iDatOne ][ i ].c_str( );}
00292                 vecstrIndInputs[ i ] = ( string )sArgs.adirectory_arg + '/' + vecstrxInputs[ iDatOne ] + c_acDab;
00293                 DataInd.OpenGenes( vecstrIndInputs );
00294                 for( i = 0; i < vecstrGenes.size( ); ++i )
00295                     veciGenesOneI[ i ] = DataInd.GetGene( vecstrGenes[ i ] );
00296 
00297                 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrInputs.size( ); ++iDatTwo ) {
00298                     veciGenesTwoI.resize( vecstrGenes.size( ) );
00299                     CDataset    DataInd;
00300                     vector<string>              vecstrIndInputs;
00301                     vecstrIndInputs.resize( vecvecstrInputs[ iDatTwo ].size( ) + 1 );
00302                     for( i = 0; i < vecvecstrInputs[ iDatTwo ].size( ); ++i ){
00303                         vecstrIndInputs[ i ] = vecvecstrInputs[ iDatTwo ][ i ].c_str( );}
00304                     vecstrIndInputs[ i ] = ( string )sArgs.adirectory_arg + '/' + vecstrxInputs[ iDatTwo ] + c_acDab;
00305                     DataInd.OpenGenes( vecstrIndInputs );
00306                     for( i = 0; i < vecstrGenes.size( ); ++i )
00307                         veciGenesTwoI[ i ] = DataInd.GetGene( vecstrGenes[ i ] );
00308 
00309                     vecveciJoint.resize( vecPrior[ iDatOne ].size( ) );
00310                     for( i = 0; i < vecveciJoint.size( ); ++i ) {
00311                         vecveciJoint[ i ].resize( vecPrior[ iDatTwo ].size( ) );
00312                         fill( vecveciJoint[ i ].begin( ), vecveciJoint[ i ].end( ), 0 ); }
00313 
00314                     for( i = iCountJoint = 0; i < vecstrGenes.size( ); ++i ) {
00315                         if( ( veciGenesOneI[ i ] == -1 ) || ( veciGenesTwoI[ i ] == -1 ) )
00316                             continue;
00317 
00318                         for( j = ( i + 1 ); j < vecstrGenes.size( ); ++j ) {
00319                             if( ( veciGenesOneI[ j ] == -1 ) || ( veciGenesTwoI[ j ] == -1 ) )
00320                                 continue;
00321 
00322                             iValueOne = DatOutB[ iDatOne ]->Get( MapstrGenes[ i ], MapstrGenes[ j ] ) >= Threshold ? 1 : 0;
00323                             iValueTwo = DatOutB[ iDatTwo ]->Get( MapstrGenes[ i ], MapstrGenes[ j ] ) >= Threshold ? 1 : 0;
00324                             vecveciJoint[ iValueOne ][ iValueTwo ]++;
00325                             iCountJoint++;}}
00326 
00327                     for( i = 0; i < vecveciJoint.size( ); ++i ){
00328                         for( j = 0; j < vecveciJoint.size( ); ++j ){
00329                             vecveciJoint[ i ][ j ]++;
00330                             iCountJoint++;}}                    
00331 
00332                     vector<vector<float> >      vecveciTJoint;
00333                     vecveciTJoint.resize( vecveciJoint.size( ) );
00334                     for( i = 0; i < vecveciTJoint.size( ); ++i ){
00335                         vecveciTJoint[ i ].resize( vecveciJoint[ i ].size( ) );
00336                         for( j = 0; j < vecveciTJoint.size( ); ++j ){ 
00337                             vecveciTJoint[ i ][ j ] = ( (float)vecveciJoint[ i ][ j ] ) / iCountJoint;}}
00338 
00339                     DatOut00.Set( iDatOne, iDatTwo, vecveciTJoint[ 0 ][ 0 ] );
00340                     DatOut01.Set( iDatOne, iDatTwo, vecveciTJoint[ 0 ][ 1 ] );
00341                     DatOut10.Set( iDatOne, iDatTwo, vecveciTJoint[ 1 ][ 0 ] );
00342                     DatOut11.Set( iDatOne, iDatTwo, vecveciTJoint[ 1 ][ 1 ] );}}}
00343                 
00344         DatOut00.Save( ( ( string ) sArgs.jdirectory_arg + "/Learned00" + c_acDab ).c_str( ) );
00345         DatOut01.Save( ( ( string ) sArgs.jdirectory_arg + "/Learned01" + c_acDab ).c_str( ) ); 
00346         DatOut10.Save( ( ( string ) sArgs.jdirectory_arg + "/Learned10" + c_acDab ).c_str( ) );
00347         DatOut11.Save( ( ( string ) sArgs.jdirectory_arg + "/Learned11" + c_acDab ).c_str( ) );
00348         
00349         for( i = 0; i < vecvecstrInputs.size( ); ++i ){
00350             delete DatOutB[ i ];
00351             delete vecDataIntZero[ i ]; 
00352             delete vecDataIntOne[ i ];}
00353         
00354         return 1;}
00355     
00356     else{
00357 
00358         vector<string>      vecDataIntZero;
00359         vector<string>      vecDataIntOne;
00360         vecDataIntZero.resize( vecstrxInputs.size( ) );
00361         vecDataIntOne.resize( vecstrxInputs.size( ) );
00362         for( i = 0; i < vecstrxInputs.size( ); ++i ){
00363             vecDataIntZero[ i ] = ( string )sArgs.l0directory_arg + '/' + vecstrxInputs[ i ] + c_acDab; 
00364             vecDataIntOne[ i ] = ( string )sArgs.l1directory_arg + '/' + vecstrxInputs[ i ] + c_acDab;}
00365 
00366         vector<CDataPair*>          DataZero;
00367         DataZero.resize( vecDataIntZero.size( ) );
00368         for( i = 0; i < vecDataIntZero.size( ); ++i ){
00369             DataZero[ i ] = new CDataPair( );
00370             if( !( DataZero[ i ]->Open( vecDataIntZero[ i ].c_str( ), true, !!sArgs.memmap_flag ) ) ){
00371                     cerr << "Could not open:" << vecDataIntZero[ i ] << endl;
00372                     return 1;}}
00373                                 
00374         vector<CDataPair*>          DataOne;
00375         DataOne.resize( vecDataIntOne.size( ) );
00376         for( i = 0; i < vecDataIntOne.size( ); ++i ){
00377             DataOne[ i ] = new CDataPair( );
00378             if( !( DataOne[ i ]->Open( vecDataIntOne[ i ].c_str( ), true, !!sArgs.memmap_flag ) ) ){
00379                     cerr << "Could not open:" << vecDataIntOne[ i ] << endl;
00380                     return 1;}}
00381 
00382         vector<string>      vecstrFGenes;
00383         {   CDataset    DataF;
00384 
00385         DataF.OpenGenes( vecDataIntZero );
00386         vecstrFGenes.resize( DataF.GetGenes( ) );
00387         copy( DataF.GetGeneNames( ).begin( ), DataF.GetGeneNames( ).end( ), vecstrFGenes.begin( ) );}
00388 
00389         vector<vector<vector<vector<float> > > >        vec4OSpGSp;
00390         size_t          JointDim = vecPrior[ 0 ].size( );
00391 
00392         vector<vector<string> >     VecLearnedNames;
00393         VecLearnedNames.resize( JointDim );
00394         VecLearnedNames[ 0 ].resize( JointDim );
00395         VecLearnedNames[ 1 ].resize( JointDim );
00396         VecLearnedNames[ 0 ][ 0 ] = "/Learned00";
00397         VecLearnedNames[ 0 ][ 1 ] = "/Learned01";
00398         VecLearnedNames[ 1 ][ 0 ] = "/Learned10";
00399         VecLearnedNames[ 1 ][ 1 ] = "/Learned11";
00400 
00401         vec4OSpGSp.resize( vecstrxInputs.size( ) );
00402         for( iDatOne = 0; iDatOne < vecstrxInputs.size( ); ++iDatOne ) {
00403             vec4OSpGSp[ iDatOne ].resize( vecstrxInputs.size( ) );
00404             for( iDatTwo = 0; iDatTwo < vecstrxInputs.size( ); ++iDatTwo ) {
00405                 if( iDatTwo == iDatOne )
00406                     continue;
00407                 vec4OSpGSp[ iDatOne ][ iDatTwo ].resize( JointDim );
00408                 for( i = 0; i < JointDim; ++i ){
00409                     vec4OSpGSp[ iDatOne ][ iDatTwo ][ i ].resize( JointDim );}}}
00410     
00411     for( i = 0; i < JointDim; ++i ){
00412         for( j = 0; j < JointDim; ++j ){
00413             CDataPair       DataF;
00414             string          JointFile = ( string )sArgs.jdirectory_arg + VecLearnedNames[ i ][ j ] + c_acDab; 
00415             if( !( DataF.Open( JointFile.c_str( ), true, !!sArgs.memmap_flag ) ) ){
00416                         cerr << "Could not open:" << JointFile << endl;
00417                         return 1;}
00418             
00419             vector<size_t>  veciSpecies;
00420             veciSpecies.resize( vecstrxInputs.size( ) );
00421             for( k = 0; k < vecstrxInputs.size( ); ++k )
00422                 veciSpecies[ k ] = DataF.GetGene( vecstrxInputs[ k ] );
00423             
00424             for( iDatOne = 0; iDatOne < vecstrxInputs.size( ); ++iDatOne ) {
00425                 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrxInputs.size( ); ++iDatTwo ) {
00426                     vec4OSpGSp[ iDatOne ][ iDatTwo ][ i ][ j ] = DataF.Get( veciSpecies[ iDatOne ], veciSpecies[ iDatTwo ] );
00427                     vec4OSpGSp[ iDatTwo ][ iDatOne ][ j ][ i ] = DataF.Get( veciSpecies[ iDatOne ], veciSpecies[ iDatTwo ] );}}}}
00428     
00429     vector<float>       veciValueOnep;
00430     vector<float>       veciValueTwop;
00431 
00432     for( iDatOne = 0; iDatOne < vecstrxInputs.size( ); ++iDatOne ) {
00433         for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrxInputs.size( ); ++iDatTwo ) {
00434             
00435             veciValueOnep.resize( JointDim );
00436             veciValueTwop.resize( JointDim );
00437 
00438             fill( veciValueOnep.begin( ), veciValueOnep.end( ), 0.0f );
00439             fill( veciValueTwop.begin( ), veciValueTwop.end( ), 0.0f );
00440 
00441             for( i = 0; i < JointDim; ++i ){
00442                 for( j = 0; j < JointDim; ++j ){
00443                     veciValueOnep[ i ] += vec4OSpGSp[ iDatOne ][ iDatTwo ][ i ][ j ];
00444                     veciValueTwop[ j ] += vec4OSpGSp[ iDatOne ][ iDatTwo ][ i ][ j ];}}
00445 
00446             for( i = 0; i < JointDim; ++i ){
00447                 for( j = 0; j < JointDim; ++j ){
00448                     vec4OSpGSp[ iDatOne ][ iDatTwo ][ i ][ j ] /= veciValueOnep[ i ];
00449                     vec4OSpGSp[ iDatTwo ][ iDatOne ][ i ][ j ] /= veciValueTwop[ i ];}}}}
00450     
00451     vector<CDat*>           DatOutCS;
00452     
00453     DatOutCS.resize( vecstrxInputs.size( ) );
00454     for( i = 0; i < DatOutCS.size( ); ++i ){
00455         DatOutCS[ i ] = new CDat( );
00456         DatOutCS[ i ]->Open( vecstrFGenes );}
00457 
00458     vector<vector<size_t> >     veciGenesOne, veciGenesZero;
00459     veciGenesOne.resize( vecDataIntOne.size( ) );
00460     veciGenesZero.resize( vecDataIntZero.size( ) );
00461     for( i = 0; i < vecstrxInputs.size( ); ++i ){
00462         veciGenesOne[ i ].resize( vecstrFGenes.size( ) );
00463         veciGenesZero[ i ].resize( vecstrFGenes.size( ) );
00464         for( j = 0; j < vecstrFGenes.size( ); ++j ){
00465             veciGenesOne[ i ][ j ] = DataOne[ i ]->GetGene( vecstrFGenes[ j ] );
00466             veciGenesZero[ i ][ j ] = DataZero[ i ]->GetGene( vecstrFGenes[ j ] );}}
00467 
00468     if( !sArgs.holdout_flag ){
00469         for( i = 0; i < vecstrxInputs.size( ); ++i ){
00470             for( iDatOne = 0; iDatOne < vecstrFGenes.size( ); ++iDatOne ){ 
00471                 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrFGenes.size( ); ++iDatTwo ){ 
00472                     float       sumOne = 0, sumZero = 0, Final = 0 ;
00473                     for( k = 0; k < vecstrxInputs.size( ); ++k ){
00474                         if( k != i ){
00475                             sumOne += log( exp( log( vec4OSpGSp[ i ][ k ][ 1 ][ 0 ] ) + DataZero[ k ]->Get( veciGenesZero[ k ][ iDatOne ], veciGenesZero[ k ][ iDatTwo ] ) ) + exp( log( vec4OSpGSp[ i ][ k ][ 1 ][ 1 ] ) + DataOne[ k ]->Get( veciGenesOne[ k ][ iDatOne ], veciGenesOne[ k ][ iDatTwo ] ) ) );
00476                             sumZero += log( exp( log( vec4OSpGSp[ i ][ k ][ 0 ][ 0 ] ) + DataZero[ k ]->Get( veciGenesZero[ k ][ iDatOne ], veciGenesZero[ k ][ iDatTwo ] ) ) + exp( log( vec4OSpGSp[ i ][ k ][ 0 ][ 1 ] ) + DataOne[ k ]->Get( veciGenesOne[ k ][ iDatOne ], veciGenesOne[ k ][ iDatTwo ] ) ) );}}
00477                     sumOne += ( DataOne[ i ]->Get( veciGenesOne[ i ][ iDatOne ], veciGenesOne[ i ][ iDatTwo ] ) + log( vecPrior[ i ][ 1 ] ) );
00478                     sumZero += ( DataZero[ i ]->Get( veciGenesZero[ i ][ iDatOne ], veciGenesZero[ i ][ iDatTwo ] ) + log( vecPrior[ i ][ 0 ] ) );
00479                     Final = (float) ( 1 / ( 1 + exp ( sumZero - sumOne ) ) );
00480                     DatOutCS[ i ]->Set( iDatOne, iDatTwo, Final );}}}}
00481 
00482     else{
00483         for( i = 0; i < vecstrxInputs.size( ); ++i ){
00484             for( iDatOne = 0; iDatOne < vecstrFGenes.size( ); ++iDatOne ){ 
00485                 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrFGenes.size( ); ++iDatTwo ){ 
00486                     float       sumOne = 0, sumZero = 0, Final = 0 ;
00487                     for( k = 0; k < vecstrxInputs.size( ); ++k ){
00488                         if( k != i ){
00489                             sumOne += log( exp( log( vec4OSpGSp[ i ][ k ][ 1 ][ 0 ] ) + DataZero[ k ]->Get( veciGenesZero[ k ][ iDatOne ], veciGenesZero[ k ][ iDatTwo ] ) ) + exp( log( vec4OSpGSp[ i ][ k ][ 1 ][ 1 ] ) + DataOne[ k ]->Get( veciGenesOne[ k ][ iDatOne ], veciGenesOne[ k ][ iDatTwo ] ) ) );
00490                             sumZero += log( exp( log( vec4OSpGSp[ i ][ k ][ 0 ][ 0 ] ) + DataZero[ k ]->Get( veciGenesZero[ k ][ iDatOne ], veciGenesZero[ k ][ iDatTwo ] ) ) + exp( log( vec4OSpGSp[ i ][ k ][ 0 ][ 1 ] ) + DataOne[ k ]->Get( veciGenesOne[ k ][ iDatOne ], veciGenesOne[ k ][ iDatTwo ] ) ) );}}
00491                     sumOne += log( vecPrior[ i ][ 1 ] );
00492                     sumZero += log( vecPrior[ i ][ 0 ] );
00493                     Final = (float) ( 1 / ( 1 + exp ( sumZero - sumOne ) ) );
00494                     DatOutCS[ i ]->Set( iDatOne, iDatTwo, Final );}}}}
00495     
00496     for( i = 0; i < vecstrxInputs.size( ); ++i ){
00497         DatOutCS[ i ]->Save( ( ( string ) sArgs.odirectory_arg + '/' + vecstrxInputs[ i ] + c_acDab ).c_str( ) );
00498         delete DatOutCS[ i ];
00499         delete DataOne[ i ];
00500         delete DataZero[ i ];}}
00501 
00502     return 0;}