Sleipnir
tools/BNConverter/BNConverter.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 
00025 static void Evaluate( const IDataset*, const IBayesNet*, bool, ostream& );
00026 static void DebugDataset( const IDataset* );
00027 
00028 int main( int iArgs, char** aszArgs ) {
00029     CDatasetCompact     Data;
00030     CDatasetCompactMap  DataMap;
00031     CDataset            DataFull;
00032     CDataPair           Answers;
00033     CDataMask           Train, Test;
00034     IDataset*           pData;
00035     gengetopt_args_info sArgs;
00036     ofstream            ofsm;
00037     IBayesNet*          pNet;
00038     size_t              i;
00039 
00040     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00041         cmdline_parser_print_help( );
00042         return 1; }
00043     CMeta Meta( sArgs.verbosity_arg, sArgs.random_arg );
00044 
00045     CBayesNetSmile  BNSmile( !!sArgs.group_flag ), BNDefault( !!sArgs.group_flag );
00046 #ifdef PNL_ENABLED
00047     CBayesNetPNL    BNPNL( !!sArgs.group_flag );
00048 #endif // PNL_ENABLED
00049     CBayesNetFN     BNFN;
00050 
00051     if( sArgs.function_flag ) {
00052         if( !BNFN.Open( sArgs.input_arg ) ) {
00053             cerr << "Couldn't open: " << sArgs.input_arg << endl;
00054             return 1; }
00055         pNet = &BNFN; }
00056     else {
00057         if( !BNSmile.Open( sArgs.input_arg ) ) {
00058             cerr << "Couldn't open: " << sArgs.input_arg << endl;
00059             return 1; }
00060         if( sArgs.default_arg ) {
00061             if( !BNDefault.Open( sArgs.default_arg ) ) {
00062                 cerr << "Couldn't open: " << sArgs.default_arg << endl;
00063                 return 1; }
00064             BNSmile.SetDefault( BNDefault ); }
00065 #ifdef PNL_ENABLED
00066         if( sArgs.pnl_flag ) {
00067             BNSmile.Convert( BNPNL );
00068             pNet = &BNPNL; }
00069         else
00070 #endif // PNL_ENABLED
00071             pNet = &BNSmile; }
00072     if( sArgs.randomize_flag )
00073         pNet->Randomize( );
00074 
00075     if( sArgs.dataset_arg ) {
00076         if( !DataMap.Open( sArgs.dataset_arg ) ) {
00077             cerr << "Couldn't open: " << sArgs.dataset_arg << endl;
00078             return 1; }
00079 
00080         if( sArgs.genes_arg && !DataMap.FilterGenes( sArgs.genes_arg, CDat::EFilterInclude ) ) {
00081             cerr << "Couldn't open: " << sArgs.genes_arg << endl;
00082             return 1; }
00083         if( sArgs.genet_arg && !DataMap.FilterGenes( sArgs.genet_arg, CDat::EFilterTerm ) ) {
00084             cerr << "Couldn't open: " << sArgs.genet_arg << endl;
00085             return 1; }
00086         if( sArgs.genex_arg && !DataMap.FilterGenes( sArgs.genex_arg, CDat::EFilterExclude ) ) {
00087             cerr << "Couldn't open: " << sArgs.genex_arg << endl;
00088             return 1; }
00089         DataMap.FilterAnswers( );
00090         pData = &DataMap; }
00091     else {
00092         if( !Answers.Open( sArgs.answers_arg, pNet->IsContinuous( 0 ) ) ) {
00093             cerr << "Couldn't open: " << sArgs.answers_arg << endl;
00094             return 1; }
00095         if( sArgs.genes_arg && !Answers.FilterGenes( sArgs.genes_arg, CDat::EFilterInclude ) ) {
00096             cerr << "Couldn't open: " << sArgs.genes_arg << endl;
00097             return 1; }
00098         if( sArgs.genet_arg && !Answers.FilterGenes( sArgs.genet_arg, CDat::EFilterTerm ) ) {
00099             cerr << "Couldn't open: " << sArgs.genet_arg << endl;
00100             return 1; }
00101         if( sArgs.genex_arg && !Answers.FilterGenes( sArgs.genex_arg, CDat::EFilterExclude ) ) {
00102             cerr << "Couldn't open: " << sArgs.genex_arg << endl;
00103             return 1; }
00104 
00105         if( pNet->IsContinuous( ) ) {
00106             if( !DataFull.Open( Answers, sArgs.datadir_arg, pNet ) ) {
00107                 cerr << "Couldn't open: " << sArgs.datadir_arg << endl;
00108                 return 1; }
00109             pData = &DataFull; }
00110         else {
00111             if( !Data.Open( Answers, sArgs.datadir_arg, pNet ) ) {
00112                 cerr << "Couldn't open: " << sArgs.datadir_arg << endl;
00113                 return 1; }
00114             Data.FilterAnswers( );
00115             pData = &Data; } }
00116 
00117     if( sArgs.test_arg ) {
00118         Train.AttachRandom( pData, (float)( 1 - sArgs.test_arg ) );
00119         Test.AttachComplement( Train );
00120         pData = &Train; }
00121     if( sArgs.test_arg < 1 ) {
00122         if( sArgs.checkpoint_flag )
00123             for( i = 0; i < (size_t)sArgs.iterations_arg; ++i ) {
00124                 pNet->Learn( pData, 1, !!sArgs.zero_flag, !!sArgs.elr_flag );
00125                 cerr << "Iteration " << i << '/' << sArgs.iterations_arg << " complete" << endl;
00126                 pNet->Save( sArgs.output_arg ); }
00127         else
00128             pNet->Learn( pData, sArgs.iterations_arg, !!sArgs.zero_flag, !!sArgs.elr_flag ); }
00129 
00130     if( sArgs.murder_given )
00131         pNet->Randomize( sArgs.murder_arg );
00132     pNet->Save( sArgs.output_arg );
00133 
00134     if( sArgs.eval_train_arg && ( sArgs.test_arg < 1 ) ) {
00135         ofsm.open( sArgs.eval_train_arg, ios_base::binary );
00136         Evaluate( pData, pNet, !!sArgs.zero_flag, ofsm );
00137         ofsm.close( ); }
00138     if( sArgs.eval_test_arg && sArgs.test_arg ) {
00139         ofsm.clear( );
00140         ofsm.open( sArgs.eval_test_arg, ios_base::binary );
00141         Evaluate( &Test, pNet, !!sArgs.zero_flag, ofsm );
00142         ofsm.close( ); }
00143 
00144     return 0; }
00145 
00146 static void Evaluate( const IDataset* pData, const IBayesNet* pNet, bool fZero, ostream& ostm ) {
00147     size_t  i, j;
00148     float   d;
00149     CDat    Dat;
00150 
00151     Dat.Open( pData->GetGeneNames( ) );
00152     pNet->Evaluate( pData, Dat, fZero );
00153     if( !pNet->IsContinuous( ) )
00154         for( i = 0; i < Dat.GetGenes( ); ++i )
00155             for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
00156                 if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) )
00157                     Dat.Set( i, j, 1 - d );
00158     Dat.Save( ostm ); }
00159 
00160 static void DebugDataset( const IDataset* pData ) {
00161     size_t  i, j;
00162 
00163     for( i = 0; i < pData->GetGenes( ); ++i ) {
00164         for( j = ( i + 1 ); j < pData->GetGenes( ); ++j )
00165             cerr << ( pData->IsExample( i, j ) ? 1 : 0 );
00166         if( ( i + 1 ) < pData->GetGenes( ) )
00167             cerr << endl; } }