Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 static const char c_acDab[] = ".dab"; 00026 00027 int main( int iArgs, char** aszArgs ) { 00028 gengetopt_args_info sArgs; 00029 size_t i, j, k, iDatOne, iDatTwo, iGeneOne, iGeneTwo, iZero; 00030 size_t iCountJoint, iValueOne, iValueTwo; 00031 vector<vector<size_t> > vecveciJoint; 00032 vector<string> vecstrInputs, vecstrxInputs, vecstrGenes; 00033 map<string, size_t> mapZeros; 00034 CBayesNetSmile BNIn; 00035 vector<size_t> veciGenesOneI, veciGenesTwoI; 00036 float Threshold; 00037 00038 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00039 cmdline_parser_print_help( ); 00040 return 1; } 00041 CMeta Meta( sArgs.verbosity_arg, sArgs.random_arg ); 00042 00043 if( sArgs.zeros_arg ) { 00044 ifstream ifsm; 00045 vector<string> vecstrZeros; 00046 char acLine[ 1024 ]; 00047 00048 ifsm.open( sArgs.zeros_arg ); 00049 if( !ifsm.is_open( ) ) { 00050 cerr << "Could not open: " << sArgs.zeros_arg << endl; 00051 return 1; } 00052 while( !ifsm.eof( ) ) { 00053 ifsm.getline( acLine, ARRAYSIZE(acLine) - 1 ); 00054 acLine[ ARRAYSIZE(acLine) - 1 ] = 0; 00055 vecstrZeros.clear( ); 00056 CMeta::Tokenize( acLine, vecstrZeros ); 00057 if( vecstrZeros.empty( ) ) 00058 continue; 00059 mapZeros[ vecstrZeros[ 0 ] ] = atoi( vecstrZeros[ 1 ].c_str( ) ); } } 00060 00061 _mkdir( sArgs.odirectory_arg ); 00062 00063 vecstrInputs.resize( sArgs.inputs_num ); 00064 copy( sArgs.inputs, sArgs.inputs + sArgs.inputs_num, vecstrInputs.begin( ) ); 00065 00066 vecstrxInputs.resize( sArgs.inputs_num ); 00067 00068 for( i = 0; i < vecstrInputs.size( ); ++i ){ 00069 if( strcmp( &vecstrInputs[ i ][ vecstrInputs[ i ].rfind( "." ) + 1 ], "xdsl" ) == 0 ){ 00070 vecstrxInputs[ i ].resize( vecstrInputs[ i ].rfind( "." ) - vecstrInputs[ i ].rfind( "/" ) - 1 ); 00071 vecstrxInputs[ i ] = vecstrInputs[ i ].substr( vecstrInputs[ i ].rfind( "/" ) + 1, vecstrInputs[ i ].rfind( "." ) - vecstrInputs[ i ].rfind( "/" ) - 1 );} 00072 else{ 00073 cerr << "inputs file types should be xdsl." << endl; 00074 return 1;}} 00075 00076 vector<vector<float> > vecPrior; 00077 vector<vector<vector<float> > > vecDataGSpZero; 00078 vector<vector<vector<float> > > vecDataGSpOne; 00079 vector<vector<string> > vecvecSpDat; 00080 CDataMatrix MatCPT; 00081 vector<string> vecstrFiles; 00082 00083 vecPrior.resize( sArgs.inputs_num ); 00084 vecDataGSpZero.resize( sArgs.inputs_num ); 00085 vecDataGSpOne.resize( sArgs.inputs_num ); 00086 vecvecSpDat.resize( sArgs.inputs_num ); 00087 for( i = 0; i < vecPrior.size( ); ++i ){ 00088 if( !BNIn.Open( vecstrInputs[ i ].c_str( ) ) ) { 00089 cerr << "Couldn't open: " << vecstrInputs[ i ] << endl; 00090 return 1;} 00091 BNIn.GetCPT( 0, MatCPT ); 00092 vecPrior[ i ].resize( MatCPT.GetRows( ) ); 00093 vecPrior[ i ][ 0 ] = MatCPT.Get( 0, 0 ); 00094 vecPrior[ i ][ 1 ] = MatCPT.Get( 1, 0 ); 00095 00096 BNIn.GetNodes( vecstrFiles ); 00097 vecstrFiles.erase( vecstrFiles.begin( ) ); 00098 vecDataGSpZero[ i ].resize( vecstrFiles.size( ) ); 00099 vecDataGSpOne[ i ].resize( vecstrFiles.size( ) ); 00100 vecvecSpDat[ i ].resize( vecstrFiles.size( ) ); 00101 for( j = 0; j < vecstrFiles.size( ); ++j ){ 00102 vecvecSpDat[ i ][ j ] = vecstrFiles[ j ]; 00103 BNIn.GetCPT( j+1, MatCPT ); 00104 vecDataGSpZero[ i ][ j ].resize( MatCPT.GetRows( ) ); 00105 vecDataGSpOne[ i ][ j ].resize( MatCPT.GetRows( ) ); 00106 for( k = 0; k < MatCPT.GetRows( ); ++k ){ 00107 vecDataGSpZero[ i ][ j ][ k ] = MatCPT.Get( k , 0 ); 00108 vecDataGSpOne[ i ][ j ][ k ] = MatCPT.Get( k , 1 );}} 00109 vecstrFiles.clear( );} 00110 00111 if( sArgs.learn_flag ){ 00112 00113 _mkdir( sArgs.l0directory_arg ); 00114 _mkdir( sArgs.l1directory_arg ); 00115 _mkdir( sArgs.jdirectory_arg ); 00116 00117 Threshold = ( sArgs.threshold_arg == -1 ) ? 0.5f : float( sArgs.threshold_arg ); 00118 00119 vector<vector<string> > vecvecstrInputs; 00120 size_t countstrInputs; 00121 vecvecstrInputs.resize( vecvecSpDat.size( ) ); 00122 countstrInputs = 0; 00123 for( i = 0; i < vecvecSpDat.size( ); ++i ){ 00124 vecvecstrInputs[ i ].resize( vecvecSpDat[ i ].size( ) ); 00125 for( j = 0; j < vecvecSpDat[ i ].size( ); ++j ){ 00126 vecvecstrInputs[ i ][ j ] = ( string )sArgs.ddirectory_arg + '/' + vecstrxInputs[ i ] + '/' + vecvecSpDat[ i ][ j ] + c_acDab; 00127 countstrInputs++;}} 00128 00129 vector<string> vecstrFDAInputs; 00130 size_t countstrFDAInputs; 00131 vecstrFDAInputs.resize( countstrInputs + vecvecstrInputs.size( ) ); 00132 countstrFDAInputs = 0; 00133 for( i = 0; i < vecvecstrInputs.size( ); ++i ){ 00134 for( j = 0; j < vecvecstrInputs[ i ].size( ); ++j ){ 00135 vecstrFDAInputs[ countstrFDAInputs ] = vecvecstrInputs[ i ][ j ].c_str( ); 00136 countstrFDAInputs++;} 00137 vecstrFDAInputs[ countstrFDAInputs ] = ( string )sArgs.adirectory_arg + '/' + vecstrxInputs[ i ] + c_acDab; 00138 countstrFDAInputs++;} 00139 00140 vector<string> vecstrFGenes; 00141 { CDataset DataF; 00142 00143 DataF.OpenGenes( vecstrFDAInputs ); 00144 vecstrFGenes.resize( DataF.GetGenes( ) ); 00145 copy( DataF.GetGeneNames( ).begin( ), DataF.GetGeneNames( ).end( ), vecstrFGenes.begin( ) );} 00146 00147 if( sArgs.genelist_flag ){ 00148 for( i = 0; i < vecstrFGenes.size( ); ++i ) 00149 cout << vecstrFGenes[ i ] << endl; 00150 return 1;} 00151 00152 vector<CDat*> DatOutB; 00153 DatOutB.resize( vecvecstrInputs.size( ) ); 00154 for( i = 0; i < DatOutB.size( ); ++i ){ 00155 DatOutB[ i ] = new CDat( ); 00156 DatOutB[ i ]->Open( vecstrFGenes );} 00157 00158 vector<CDat*> vecDataIntZero; 00159 vector<CDat*> vecDataIntOne; 00160 vecDataIntZero.resize( vecvecstrInputs.size( ) ); 00161 vecDataIntOne.resize( vecvecstrInputs.size( ) ); 00162 for( i = 0; i < vecvecstrInputs.size( ); ++i ){ 00163 vecDataIntZero[ i ] = new CDat( ); 00164 vecDataIntOne[ i ] = new CDat( ); 00165 vecDataIntZero[ i ]->Open( vecstrFGenes ); 00166 vecDataIntOne[ i ]->Open( vecstrFGenes );} 00167 00168 for( i = 0; i < vecvecstrInputs.size( ); ++i ){ 00169 map<string,size_t>::const_iterator iterZero; 00170 for( j = 0; j < vecvecstrInputs[ i ].size( ); ++j ){ 00171 iZero = ( ( iterZero = mapZeros.find( vecvecstrInputs[ i ][ j ] ) ) == mapZeros.end( ) ) ? -1 : iterZero->second; 00172 00173 CDataPair DataF; 00174 if( !( DataF.Open( vecvecstrInputs[ i ][ j ].c_str( ), false, !!sArgs.memmap_flag ) || 00175 DataF.Open( vecvecstrInputs[ i ][ j ].c_str( ), true, !!sArgs.memmap_flag ) ) ){ 00176 cerr << "Could not open:" << vecvecstrInputs[ i ][ j ] << endl; 00177 return 1;} 00178 vector<size_t> vecGeneIndex; 00179 vecGeneIndex.resize( vecstrFGenes.size( ) ); 00180 for( k = 0; k < vecstrFGenes.size( ); ++k ) 00181 vecGeneIndex[ k ] = DataF.GetGene( vecstrFGenes[ k ] ); 00182 00183 for( iDatOne = 0; iDatOne < vecstrFGenes.size( ); ++iDatOne ){ 00184 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrFGenes.size( ); ++iDatTwo ){ 00185 iGeneOne = vecGeneIndex[ iDatOne ]; 00186 iGeneTwo = vecGeneIndex[ iDatTwo ]; 00187 float DFValue; 00188 DFValue = ( ( iGeneOne == -1 ) || ( iGeneTwo == -1 ) ) ? CMeta::GetNaN( ) : DataF.Get( iGeneOne, iGeneTwo ); 00189 if( !CMeta::IsNaN( DFValue ) ) { 00190 size_t DatPFI = DataF.Quantize( DFValue ); 00191 float Zero, One; 00192 00193 if( CMeta::IsNaN( Zero = vecDataIntZero[ i ]->Get( iDatOne, iDatTwo ) ) ) 00194 vecDataIntZero[ i ]->Set( iDatOne, iDatTwo, log( vecDataGSpZero[ i ][ j ][ DatPFI ] ) ); 00195 else 00196 vecDataIntZero[ i ]->Set( iDatOne, iDatTwo, Zero + log( vecDataGSpZero[ i ][ j ][ DatPFI ] ) ); 00197 00198 if( CMeta::IsNaN( One = vecDataIntOne[ i ]->Get( iDatOne, iDatTwo ) ) ) 00199 vecDataIntOne[ i ]->Set( iDatOne, iDatTwo, log( vecDataGSpOne[ i ][ j ][ DatPFI ] ) ); 00200 else 00201 vecDataIntOne[ i ]->Set( iDatOne, iDatTwo, One + log( vecDataGSpOne[ i ][ j ][ DatPFI ] ) );} 00202 00203 if( CMeta::IsNaN( DFValue ) && ( iZero != -1 ) ){ 00204 float Zero, One; 00205 00206 if( CMeta::IsNaN( Zero = vecDataIntZero[ i ]->Get( iDatOne, iDatTwo ) ) ) 00207 vecDataIntZero[ i ]->Set( iDatOne, iDatTwo, log( vecDataGSpZero[ i ][ j ][ iZero ] ) ); 00208 else 00209 vecDataIntZero[ i ]->Set( iDatOne, iDatTwo, Zero + log( vecDataGSpZero[ i ][ j ][ iZero ] ) ); 00210 00211 if( CMeta::IsNaN( One = vecDataIntOne[ i ]->Get( iDatOne, iDatTwo ) ) ) 00212 vecDataIntOne[ i ]->Set( iDatOne, iDatTwo, log( vecDataGSpOne[ i ][ j ][ iZero ] ) ); 00213 else 00214 vecDataIntOne[ i ]->Set( iDatOne, iDatTwo, One + log( vecDataGSpOne[ i ][ j ][ iZero ] ) );}}}} 00215 00216 for( iDatOne = 0; iDatOne < vecstrFGenes.size( ); ++iDatOne ){ 00217 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrFGenes.size( ); ++iDatTwo ){ 00218 float sumOneB = 0, sumZeroB = 0, FinalB = 0 ; 00219 float Zero, One; 00220 00221 if( CMeta::IsNaN( Zero = vecDataIntZero[ i ]->Get( iDatOne, iDatTwo ) ) ){ 00222 vecDataIntZero[ i ]->Set( iDatOne, iDatTwo, 0.0f); 00223 sumZeroB = log( vecPrior[ i ][ 0 ] );} 00224 else 00225 sumZeroB = Zero + log( vecPrior[ i ][ 0 ] ); 00226 00227 if( CMeta::IsNaN( One = vecDataIntOne[ i ]->Get( iDatOne, iDatTwo ) ) ){ 00228 vecDataIntOne[ i ]->Set( iDatOne, iDatTwo, 0.0f ); 00229 sumOneB = log( vecPrior[ i ][ 1 ] );} 00230 else 00231 sumOneB = One + log( vecPrior[ i ][ 1 ] ); 00232 00233 FinalB = (float) ( 1 / ( 1 + exp ( sumZeroB - sumOneB ) ) ); 00234 DatOutB[ i ]->Set( iDatOne, iDatTwo, FinalB );}}} 00235 00236 for( i = 0; i < vecvecstrInputs.size( ); ++i ){ 00237 DatOutB[ i ]->Save( ( ( string ) sArgs.odirectory_arg + '/' + vecstrxInputs[ i ] + 'b' + c_acDab ).c_str( ) ); 00238 vecDataIntZero[ i ]->Save( ( ( string ) sArgs.l0directory_arg + '/' + vecstrxInputs[ i ] + c_acDab ).c_str( ) ); 00239 vecDataIntOne[ i ]->Save( ( ( string ) sArgs.l1directory_arg + '/' + vecstrxInputs[ i ] + c_acDab ).c_str( ) );} 00240 00241 vector<size_t> MapstrGenes; 00242 if( sArgs.genex_arg ) { 00243 CGenome Genome; 00244 CGenes GenesEx( Genome ); 00245 if( !GenesEx.Open( sArgs.genex_arg ) ) { 00246 cerr << "Could not open: " << sArgs.genex_arg << endl; 00247 return 1; } 00248 vecstrGenes.resize( vecstrFGenes.size( ) - GenesEx.GetGenes( ) ); 00249 MapstrGenes.resize( vecstrGenes.size( ) ); 00250 size_t GenesExCount = 0; 00251 for( i = 0; i < vecstrFGenes.size( ); ++i ){ 00252 if( !GenesEx.IsGene( vecstrFGenes[ i ] ) ){ 00253 vecstrGenes[ GenesExCount ] = vecstrFGenes[ i ]; 00254 MapstrGenes[ GenesExCount ] = i; 00255 GenesExCount++;}}} 00256 else{ 00257 vecstrGenes.resize( vecstrFGenes.size( ) ); 00258 MapstrGenes.resize( vecstrFGenes.size( ) ); 00259 for( i = 0; i < vecstrFGenes.size( ); ++i ){ 00260 vecstrGenes[ i ] = vecstrFGenes[ i ]; 00261 MapstrGenes[ i ] = i;}} 00262 00263 CDat DatOut00, DatOut01, DatOut10, DatOut11; 00264 DatOut00.Open( vecstrxInputs ); 00265 DatOut01.Open( vecstrxInputs ); 00266 DatOut10.Open( vecstrxInputs ); 00267 DatOut11.Open( vecstrxInputs ); 00268 00269 if( sArgs.uniformjoint_flag ){ 00270 for( iDatOne = 0; iDatOne < vecstrInputs.size( ); ++iDatOne ){ 00271 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrInputs.size( ); ++iDatTwo ){ 00272 for( i = 0; i < vecPrior[ iDatOne ].size( ); ++i ){ 00273 for( j = 0; j < vecPrior[ iDatTwo ].size( ); ++j ){ 00274 float UnifProb = 1.0f / ( vecPrior[ iDatOne ].size( ) * vecPrior[ iDatTwo ].size( ) ); 00275 DatOut00.Set( iDatOne, iDatTwo, UnifProb ); 00276 DatOut01.Set( iDatOne, iDatTwo, UnifProb ); 00277 DatOut10.Set( iDatOne, iDatTwo, UnifProb ); 00278 DatOut11.Set( iDatOne, iDatTwo, UnifProb );}}}}} 00279 00280 if( !sArgs.uniformjoint_flag ){ 00281 00282 for( iDatOne = 0; iDatOne < vecstrInputs.size( ); ++iDatOne ) { 00283 if( ( iDatOne + 1 ) == vecstrInputs.size( ) ) 00284 break; 00285 00286 veciGenesOneI.resize( vecstrGenes.size( ) ); 00287 CDataset DataInd; 00288 vector<string> vecstrIndInputs; 00289 vecstrIndInputs.resize( vecvecstrInputs[ iDatOne ].size( ) + 1 ); 00290 for( i = 0; i < vecvecstrInputs[ iDatOne ].size( ); ++i ){ 00291 vecstrIndInputs[ i ] = vecvecstrInputs[ iDatOne ][ i ].c_str( );} 00292 vecstrIndInputs[ i ] = ( string )sArgs.adirectory_arg + '/' + vecstrxInputs[ iDatOne ] + c_acDab; 00293 DataInd.OpenGenes( vecstrIndInputs ); 00294 for( i = 0; i < vecstrGenes.size( ); ++i ) 00295 veciGenesOneI[ i ] = DataInd.GetGene( vecstrGenes[ i ] ); 00296 00297 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrInputs.size( ); ++iDatTwo ) { 00298 veciGenesTwoI.resize( vecstrGenes.size( ) ); 00299 CDataset DataInd; 00300 vector<string> vecstrIndInputs; 00301 vecstrIndInputs.resize( vecvecstrInputs[ iDatTwo ].size( ) + 1 ); 00302 for( i = 0; i < vecvecstrInputs[ iDatTwo ].size( ); ++i ){ 00303 vecstrIndInputs[ i ] = vecvecstrInputs[ iDatTwo ][ i ].c_str( );} 00304 vecstrIndInputs[ i ] = ( string )sArgs.adirectory_arg + '/' + vecstrxInputs[ iDatTwo ] + c_acDab; 00305 DataInd.OpenGenes( vecstrIndInputs ); 00306 for( i = 0; i < vecstrGenes.size( ); ++i ) 00307 veciGenesTwoI[ i ] = DataInd.GetGene( vecstrGenes[ i ] ); 00308 00309 vecveciJoint.resize( vecPrior[ iDatOne ].size( ) ); 00310 for( i = 0; i < vecveciJoint.size( ); ++i ) { 00311 vecveciJoint[ i ].resize( vecPrior[ iDatTwo ].size( ) ); 00312 fill( vecveciJoint[ i ].begin( ), vecveciJoint[ i ].end( ), 0 ); } 00313 00314 for( i = iCountJoint = 0; i < vecstrGenes.size( ); ++i ) { 00315 if( ( veciGenesOneI[ i ] == -1 ) || ( veciGenesTwoI[ i ] == -1 ) ) 00316 continue; 00317 00318 for( j = ( i + 1 ); j < vecstrGenes.size( ); ++j ) { 00319 if( ( veciGenesOneI[ j ] == -1 ) || ( veciGenesTwoI[ j ] == -1 ) ) 00320 continue; 00321 00322 iValueOne = DatOutB[ iDatOne ]->Get( MapstrGenes[ i ], MapstrGenes[ j ] ) >= Threshold ? 1 : 0; 00323 iValueTwo = DatOutB[ iDatTwo ]->Get( MapstrGenes[ i ], MapstrGenes[ j ] ) >= Threshold ? 1 : 0; 00324 vecveciJoint[ iValueOne ][ iValueTwo ]++; 00325 iCountJoint++;}} 00326 00327 for( i = 0; i < vecveciJoint.size( ); ++i ){ 00328 for( j = 0; j < vecveciJoint.size( ); ++j ){ 00329 vecveciJoint[ i ][ j ]++; 00330 iCountJoint++;}} 00331 00332 vector<vector<float> > vecveciTJoint; 00333 vecveciTJoint.resize( vecveciJoint.size( ) ); 00334 for( i = 0; i < vecveciTJoint.size( ); ++i ){ 00335 vecveciTJoint[ i ].resize( vecveciJoint[ i ].size( ) ); 00336 for( j = 0; j < vecveciTJoint.size( ); ++j ){ 00337 vecveciTJoint[ i ][ j ] = ( (float)vecveciJoint[ i ][ j ] ) / iCountJoint;}} 00338 00339 DatOut00.Set( iDatOne, iDatTwo, vecveciTJoint[ 0 ][ 0 ] ); 00340 DatOut01.Set( iDatOne, iDatTwo, vecveciTJoint[ 0 ][ 1 ] ); 00341 DatOut10.Set( iDatOne, iDatTwo, vecveciTJoint[ 1 ][ 0 ] ); 00342 DatOut11.Set( iDatOne, iDatTwo, vecveciTJoint[ 1 ][ 1 ] );}}} 00343 00344 DatOut00.Save( ( ( string ) sArgs.jdirectory_arg + "/Learned00" + c_acDab ).c_str( ) ); 00345 DatOut01.Save( ( ( string ) sArgs.jdirectory_arg + "/Learned01" + c_acDab ).c_str( ) ); 00346 DatOut10.Save( ( ( string ) sArgs.jdirectory_arg + "/Learned10" + c_acDab ).c_str( ) ); 00347 DatOut11.Save( ( ( string ) sArgs.jdirectory_arg + "/Learned11" + c_acDab ).c_str( ) ); 00348 00349 for( i = 0; i < vecvecstrInputs.size( ); ++i ){ 00350 delete DatOutB[ i ]; 00351 delete vecDataIntZero[ i ]; 00352 delete vecDataIntOne[ i ];} 00353 00354 return 1;} 00355 00356 else{ 00357 00358 vector<string> vecDataIntZero; 00359 vector<string> vecDataIntOne; 00360 vecDataIntZero.resize( vecstrxInputs.size( ) ); 00361 vecDataIntOne.resize( vecstrxInputs.size( ) ); 00362 for( i = 0; i < vecstrxInputs.size( ); ++i ){ 00363 vecDataIntZero[ i ] = ( string )sArgs.l0directory_arg + '/' + vecstrxInputs[ i ] + c_acDab; 00364 vecDataIntOne[ i ] = ( string )sArgs.l1directory_arg + '/' + vecstrxInputs[ i ] + c_acDab;} 00365 00366 vector<CDataPair*> DataZero; 00367 DataZero.resize( vecDataIntZero.size( ) ); 00368 for( i = 0; i < vecDataIntZero.size( ); ++i ){ 00369 DataZero[ i ] = new CDataPair( ); 00370 if( !( DataZero[ i ]->Open( vecDataIntZero[ i ].c_str( ), true, !!sArgs.memmap_flag ) ) ){ 00371 cerr << "Could not open:" << vecDataIntZero[ i ] << endl; 00372 return 1;}} 00373 00374 vector<CDataPair*> DataOne; 00375 DataOne.resize( vecDataIntOne.size( ) ); 00376 for( i = 0; i < vecDataIntOne.size( ); ++i ){ 00377 DataOne[ i ] = new CDataPair( ); 00378 if( !( DataOne[ i ]->Open( vecDataIntOne[ i ].c_str( ), true, !!sArgs.memmap_flag ) ) ){ 00379 cerr << "Could not open:" << vecDataIntOne[ i ] << endl; 00380 return 1;}} 00381 00382 vector<string> vecstrFGenes; 00383 { CDataset DataF; 00384 00385 DataF.OpenGenes( vecDataIntZero ); 00386 vecstrFGenes.resize( DataF.GetGenes( ) ); 00387 copy( DataF.GetGeneNames( ).begin( ), DataF.GetGeneNames( ).end( ), vecstrFGenes.begin( ) );} 00388 00389 vector<vector<vector<vector<float> > > > vec4OSpGSp; 00390 size_t JointDim = vecPrior[ 0 ].size( ); 00391 00392 vector<vector<string> > VecLearnedNames; 00393 VecLearnedNames.resize( JointDim ); 00394 VecLearnedNames[ 0 ].resize( JointDim ); 00395 VecLearnedNames[ 1 ].resize( JointDim ); 00396 VecLearnedNames[ 0 ][ 0 ] = "/Learned00"; 00397 VecLearnedNames[ 0 ][ 1 ] = "/Learned01"; 00398 VecLearnedNames[ 1 ][ 0 ] = "/Learned10"; 00399 VecLearnedNames[ 1 ][ 1 ] = "/Learned11"; 00400 00401 vec4OSpGSp.resize( vecstrxInputs.size( ) ); 00402 for( iDatOne = 0; iDatOne < vecstrxInputs.size( ); ++iDatOne ) { 00403 vec4OSpGSp[ iDatOne ].resize( vecstrxInputs.size( ) ); 00404 for( iDatTwo = 0; iDatTwo < vecstrxInputs.size( ); ++iDatTwo ) { 00405 if( iDatTwo == iDatOne ) 00406 continue; 00407 vec4OSpGSp[ iDatOne ][ iDatTwo ].resize( JointDim ); 00408 for( i = 0; i < JointDim; ++i ){ 00409 vec4OSpGSp[ iDatOne ][ iDatTwo ][ i ].resize( JointDim );}}} 00410 00411 for( i = 0; i < JointDim; ++i ){ 00412 for( j = 0; j < JointDim; ++j ){ 00413 CDataPair DataF; 00414 string JointFile = ( string )sArgs.jdirectory_arg + VecLearnedNames[ i ][ j ] + c_acDab; 00415 if( !( DataF.Open( JointFile.c_str( ), true, !!sArgs.memmap_flag ) ) ){ 00416 cerr << "Could not open:" << JointFile << endl; 00417 return 1;} 00418 00419 vector<size_t> veciSpecies; 00420 veciSpecies.resize( vecstrxInputs.size( ) ); 00421 for( k = 0; k < vecstrxInputs.size( ); ++k ) 00422 veciSpecies[ k ] = DataF.GetGene( vecstrxInputs[ k ] ); 00423 00424 for( iDatOne = 0; iDatOne < vecstrxInputs.size( ); ++iDatOne ) { 00425 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrxInputs.size( ); ++iDatTwo ) { 00426 vec4OSpGSp[ iDatOne ][ iDatTwo ][ i ][ j ] = DataF.Get( veciSpecies[ iDatOne ], veciSpecies[ iDatTwo ] ); 00427 vec4OSpGSp[ iDatTwo ][ iDatOne ][ j ][ i ] = DataF.Get( veciSpecies[ iDatOne ], veciSpecies[ iDatTwo ] );}}}} 00428 00429 vector<float> veciValueOnep; 00430 vector<float> veciValueTwop; 00431 00432 for( iDatOne = 0; iDatOne < vecstrxInputs.size( ); ++iDatOne ) { 00433 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrxInputs.size( ); ++iDatTwo ) { 00434 00435 veciValueOnep.resize( JointDim ); 00436 veciValueTwop.resize( JointDim ); 00437 00438 fill( veciValueOnep.begin( ), veciValueOnep.end( ), 0.0f ); 00439 fill( veciValueTwop.begin( ), veciValueTwop.end( ), 0.0f ); 00440 00441 for( i = 0; i < JointDim; ++i ){ 00442 for( j = 0; j < JointDim; ++j ){ 00443 veciValueOnep[ i ] += vec4OSpGSp[ iDatOne ][ iDatTwo ][ i ][ j ]; 00444 veciValueTwop[ j ] += vec4OSpGSp[ iDatOne ][ iDatTwo ][ i ][ j ];}} 00445 00446 for( i = 0; i < JointDim; ++i ){ 00447 for( j = 0; j < JointDim; ++j ){ 00448 vec4OSpGSp[ iDatOne ][ iDatTwo ][ i ][ j ] /= veciValueOnep[ i ]; 00449 vec4OSpGSp[ iDatTwo ][ iDatOne ][ i ][ j ] /= veciValueTwop[ i ];}}}} 00450 00451 vector<CDat*> DatOutCS; 00452 00453 DatOutCS.resize( vecstrxInputs.size( ) ); 00454 for( i = 0; i < DatOutCS.size( ); ++i ){ 00455 DatOutCS[ i ] = new CDat( ); 00456 DatOutCS[ i ]->Open( vecstrFGenes );} 00457 00458 vector<vector<size_t> > veciGenesOne, veciGenesZero; 00459 veciGenesOne.resize( vecDataIntOne.size( ) ); 00460 veciGenesZero.resize( vecDataIntZero.size( ) ); 00461 for( i = 0; i < vecstrxInputs.size( ); ++i ){ 00462 veciGenesOne[ i ].resize( vecstrFGenes.size( ) ); 00463 veciGenesZero[ i ].resize( vecstrFGenes.size( ) ); 00464 for( j = 0; j < vecstrFGenes.size( ); ++j ){ 00465 veciGenesOne[ i ][ j ] = DataOne[ i ]->GetGene( vecstrFGenes[ j ] ); 00466 veciGenesZero[ i ][ j ] = DataZero[ i ]->GetGene( vecstrFGenes[ j ] );}} 00467 00468 if( !sArgs.holdout_flag ){ 00469 for( i = 0; i < vecstrxInputs.size( ); ++i ){ 00470 for( iDatOne = 0; iDatOne < vecstrFGenes.size( ); ++iDatOne ){ 00471 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrFGenes.size( ); ++iDatTwo ){ 00472 float sumOne = 0, sumZero = 0, Final = 0 ; 00473 for( k = 0; k < vecstrxInputs.size( ); ++k ){ 00474 if( k != i ){ 00475 sumOne += log( exp( log( vec4OSpGSp[ i ][ k ][ 1 ][ 0 ] ) + DataZero[ k ]->Get( veciGenesZero[ k ][ iDatOne ], veciGenesZero[ k ][ iDatTwo ] ) ) + exp( log( vec4OSpGSp[ i ][ k ][ 1 ][ 1 ] ) + DataOne[ k ]->Get( veciGenesOne[ k ][ iDatOne ], veciGenesOne[ k ][ iDatTwo ] ) ) ); 00476 sumZero += log( exp( log( vec4OSpGSp[ i ][ k ][ 0 ][ 0 ] ) + DataZero[ k ]->Get( veciGenesZero[ k ][ iDatOne ], veciGenesZero[ k ][ iDatTwo ] ) ) + exp( log( vec4OSpGSp[ i ][ k ][ 0 ][ 1 ] ) + DataOne[ k ]->Get( veciGenesOne[ k ][ iDatOne ], veciGenesOne[ k ][ iDatTwo ] ) ) );}} 00477 sumOne += ( DataOne[ i ]->Get( veciGenesOne[ i ][ iDatOne ], veciGenesOne[ i ][ iDatTwo ] ) + log( vecPrior[ i ][ 1 ] ) ); 00478 sumZero += ( DataZero[ i ]->Get( veciGenesZero[ i ][ iDatOne ], veciGenesZero[ i ][ iDatTwo ] ) + log( vecPrior[ i ][ 0 ] ) ); 00479 Final = (float) ( 1 / ( 1 + exp ( sumZero - sumOne ) ) ); 00480 DatOutCS[ i ]->Set( iDatOne, iDatTwo, Final );}}}} 00481 00482 else{ 00483 for( i = 0; i < vecstrxInputs.size( ); ++i ){ 00484 for( iDatOne = 0; iDatOne < vecstrFGenes.size( ); ++iDatOne ){ 00485 for( iDatTwo = ( iDatOne + 1 ); iDatTwo < vecstrFGenes.size( ); ++iDatTwo ){ 00486 float sumOne = 0, sumZero = 0, Final = 0 ; 00487 for( k = 0; k < vecstrxInputs.size( ); ++k ){ 00488 if( k != i ){ 00489 sumOne += log( exp( log( vec4OSpGSp[ i ][ k ][ 1 ][ 0 ] ) + DataZero[ k ]->Get( veciGenesZero[ k ][ iDatOne ], veciGenesZero[ k ][ iDatTwo ] ) ) + exp( log( vec4OSpGSp[ i ][ k ][ 1 ][ 1 ] ) + DataOne[ k ]->Get( veciGenesOne[ k ][ iDatOne ], veciGenesOne[ k ][ iDatTwo ] ) ) ); 00490 sumZero += log( exp( log( vec4OSpGSp[ i ][ k ][ 0 ][ 0 ] ) + DataZero[ k ]->Get( veciGenesZero[ k ][ iDatOne ], veciGenesZero[ k ][ iDatTwo ] ) ) + exp( log( vec4OSpGSp[ i ][ k ][ 0 ][ 1 ] ) + DataOne[ k ]->Get( veciGenesOne[ k ][ iDatOne ], veciGenesOne[ k ][ iDatTwo ] ) ) );}} 00491 sumOne += log( vecPrior[ i ][ 1 ] ); 00492 sumZero += log( vecPrior[ i ][ 0 ] ); 00493 Final = (float) ( 1 / ( 1 + exp ( sumZero - sumOne ) ) ); 00494 DatOutCS[ i ]->Set( iDatOne, iDatTwo, Final );}}}} 00495 00496 for( i = 0; i < vecstrxInputs.size( ); ++i ){ 00497 DatOutCS[ i ]->Save( ( ( string ) sArgs.odirectory_arg + '/' + vecstrxInputs[ i ] + c_acDab ).c_str( ) ); 00498 delete DatOutCS[ i ]; 00499 delete DataOne[ i ]; 00500 delete DataZero[ i ];}} 00501 00502 return 0;}