Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 static void Evaluate( const IDataset*, const IBayesNet*, bool, ostream& ); 00026 static void DebugDataset( const IDataset* ); 00027 00028 int main( int iArgs, char** aszArgs ) { 00029 CDatasetCompact Data; 00030 CDatasetCompactMap DataMap; 00031 CDataset DataFull; 00032 CDataPair Answers; 00033 CDataMask Train, Test; 00034 IDataset* pData; 00035 gengetopt_args_info sArgs; 00036 ofstream ofsm; 00037 IBayesNet* pNet; 00038 size_t i; 00039 00040 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00041 cmdline_parser_print_help( ); 00042 return 1; } 00043 CMeta Meta( sArgs.verbosity_arg, sArgs.random_arg ); 00044 00045 CBayesNetSmile BNSmile( !!sArgs.group_flag ), BNDefault( !!sArgs.group_flag ); 00046 #ifdef PNL_ENABLED 00047 CBayesNetPNL BNPNL( !!sArgs.group_flag ); 00048 #endif // PNL_ENABLED 00049 CBayesNetFN BNFN; 00050 00051 if( sArgs.function_flag ) { 00052 if( !BNFN.Open( sArgs.input_arg ) ) { 00053 cerr << "Couldn't open: " << sArgs.input_arg << endl; 00054 return 1; } 00055 pNet = &BNFN; } 00056 else { 00057 if( !BNSmile.Open( sArgs.input_arg ) ) { 00058 cerr << "Couldn't open: " << sArgs.input_arg << endl; 00059 return 1; } 00060 if( sArgs.default_arg ) { 00061 if( !BNDefault.Open( sArgs.default_arg ) ) { 00062 cerr << "Couldn't open: " << sArgs.default_arg << endl; 00063 return 1; } 00064 BNSmile.SetDefault( BNDefault ); } 00065 #ifdef PNL_ENABLED 00066 if( sArgs.pnl_flag ) { 00067 BNSmile.Convert( BNPNL ); 00068 pNet = &BNPNL; } 00069 else 00070 #endif // PNL_ENABLED 00071 pNet = &BNSmile; } 00072 if( sArgs.randomize_flag ) 00073 pNet->Randomize( ); 00074 00075 if( sArgs.dataset_arg ) { 00076 if( !DataMap.Open( sArgs.dataset_arg ) ) { 00077 cerr << "Couldn't open: " << sArgs.dataset_arg << endl; 00078 return 1; } 00079 00080 if( sArgs.genes_arg && !DataMap.FilterGenes( sArgs.genes_arg, CDat::EFilterInclude ) ) { 00081 cerr << "Couldn't open: " << sArgs.genes_arg << endl; 00082 return 1; } 00083 if( sArgs.genet_arg && !DataMap.FilterGenes( sArgs.genet_arg, CDat::EFilterTerm ) ) { 00084 cerr << "Couldn't open: " << sArgs.genet_arg << endl; 00085 return 1; } 00086 if( sArgs.genex_arg && !DataMap.FilterGenes( sArgs.genex_arg, CDat::EFilterExclude ) ) { 00087 cerr << "Couldn't open: " << sArgs.genex_arg << endl; 00088 return 1; } 00089 DataMap.FilterAnswers( ); 00090 pData = &DataMap; } 00091 else { 00092 if( !Answers.Open( sArgs.answers_arg, pNet->IsContinuous( 0 ) ) ) { 00093 cerr << "Couldn't open: " << sArgs.answers_arg << endl; 00094 return 1; } 00095 if( sArgs.genes_arg && !Answers.FilterGenes( sArgs.genes_arg, CDat::EFilterInclude ) ) { 00096 cerr << "Couldn't open: " << sArgs.genes_arg << endl; 00097 return 1; } 00098 if( sArgs.genet_arg && !Answers.FilterGenes( sArgs.genet_arg, CDat::EFilterTerm ) ) { 00099 cerr << "Couldn't open: " << sArgs.genet_arg << endl; 00100 return 1; } 00101 if( sArgs.genex_arg && !Answers.FilterGenes( sArgs.genex_arg, CDat::EFilterExclude ) ) { 00102 cerr << "Couldn't open: " << sArgs.genex_arg << endl; 00103 return 1; } 00104 00105 if( pNet->IsContinuous( ) ) { 00106 if( !DataFull.Open( Answers, sArgs.datadir_arg, pNet ) ) { 00107 cerr << "Couldn't open: " << sArgs.datadir_arg << endl; 00108 return 1; } 00109 pData = &DataFull; } 00110 else { 00111 if( !Data.Open( Answers, sArgs.datadir_arg, pNet ) ) { 00112 cerr << "Couldn't open: " << sArgs.datadir_arg << endl; 00113 return 1; } 00114 Data.FilterAnswers( ); 00115 pData = &Data; } } 00116 00117 if( sArgs.test_arg ) { 00118 Train.AttachRandom( pData, (float)( 1 - sArgs.test_arg ) ); 00119 Test.AttachComplement( Train ); 00120 pData = &Train; } 00121 if( sArgs.test_arg < 1 ) { 00122 if( sArgs.checkpoint_flag ) 00123 for( i = 0; i < (size_t)sArgs.iterations_arg; ++i ) { 00124 pNet->Learn( pData, 1, !!sArgs.zero_flag, !!sArgs.elr_flag ); 00125 cerr << "Iteration " << i << '/' << sArgs.iterations_arg << " complete" << endl; 00126 pNet->Save( sArgs.output_arg ); } 00127 else 00128 pNet->Learn( pData, sArgs.iterations_arg, !!sArgs.zero_flag, !!sArgs.elr_flag ); } 00129 00130 if( sArgs.murder_given ) 00131 pNet->Randomize( sArgs.murder_arg ); 00132 pNet->Save( sArgs.output_arg ); 00133 00134 if( sArgs.eval_train_arg && ( sArgs.test_arg < 1 ) ) { 00135 ofsm.open( sArgs.eval_train_arg, ios_base::binary ); 00136 Evaluate( pData, pNet, !!sArgs.zero_flag, ofsm ); 00137 ofsm.close( ); } 00138 if( sArgs.eval_test_arg && sArgs.test_arg ) { 00139 ofsm.clear( ); 00140 ofsm.open( sArgs.eval_test_arg, ios_base::binary ); 00141 Evaluate( &Test, pNet, !!sArgs.zero_flag, ofsm ); 00142 ofsm.close( ); } 00143 00144 return 0; } 00145 00146 static void Evaluate( const IDataset* pData, const IBayesNet* pNet, bool fZero, ostream& ostm ) { 00147 size_t i, j; 00148 float d; 00149 CDat Dat; 00150 00151 Dat.Open( pData->GetGeneNames( ) ); 00152 pNet->Evaluate( pData, Dat, fZero ); 00153 if( !pNet->IsContinuous( ) ) 00154 for( i = 0; i < Dat.GetGenes( ); ++i ) 00155 for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ) 00156 if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) ) 00157 Dat.Set( i, j, 1 - d ); 00158 Dat.Save( ostm ); } 00159 00160 static void DebugDataset( const IDataset* pData ) { 00161 size_t i, j; 00162 00163 for( i = 0; i < pData->GetGenes( ); ++i ) { 00164 for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) 00165 cerr << ( pData->IsExample( i, j ) ? 1 : 0 ); 00166 if( ( i + 1 ) < pData->GetGenes( ) ) 00167 cerr << endl; } }