Sleipnir
tools/BNTester/BNTester.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 
00025 static int Genes( const char*, CGenes& );
00026 
00027 int main( int iArgs, char** aszArgs ) {
00028     IBayesNet*              pBN;
00029     CDatasetCompact         Data;
00030     CDatasetCompactMap      DataMap;
00031     CDataset                DataFull;
00032     IDataset*               pData;
00033     CDat                    Dat;
00034     CGenome                 Genome;
00035     CGenes                  GenesIn( Genome ), GenesEx( Genome ), GenesOv( Genome );
00036     ifstream                ifsm;
00037     vector<vector<float> >  vecvecdResults;
00038     float                   d;
00039     size_t                  i, j, k;
00040     gengetopt_args_info     sArgs;
00041     int                     iRet;
00042     vector<bool>            vecfGenes;
00043 
00044     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00045         cmdline_parser_print_help( );
00046         return 1; }
00047     CMeta Meta( sArgs.verbosity_arg );
00048 
00049     CBayesNetSmile  BNSmile( !!sArgs.group_flag );
00050 #ifdef PNL_ENABLED
00051     CBayesNetPNL    BNPNL( !!sArgs.group_flag );
00052 #endif // PNL_ENABLED
00053     CBayesNetFN     BNFN;
00054 
00055     if( sArgs.function_flag ) {
00056         if( !BNFN.Open( sArgs.input_arg ) ) {
00057             cerr << "Couldn't open: " << sArgs.input_arg << endl;
00058             return 1; }
00059         pBN = &BNFN; }
00060     else {
00061         if( !BNSmile.Open( sArgs.input_arg ) ) {
00062             cerr << "Couldn't open: " << sArgs.input_arg << endl;
00063             return 1; }
00064 #ifdef PNL_ENABLED
00065         if( sArgs.pnl_flag ) {
00066             BNSmile.Convert( BNPNL );
00067             pBN = &BNPNL; }
00068         else
00069 #endif // PNL_ENABLED
00070             pBN = &BNSmile; }
00071 
00072     if( ( iRet = Genes( sArgs.genes_arg, GenesIn ) ) ||
00073         ( iRet = Genes( sArgs.genee_arg, GenesOv ) ) ||
00074         ( iRet = Genes( sArgs.genex_arg, GenesEx ) ) )
00075         return iRet;
00076     if( pBN->IsContinuous( ) ) {
00077         if( !DataFull.Open( sArgs.datadir_arg, pBN ) ) {
00078             cerr << "Couldn't open: " << sArgs.datadir_arg << endl;
00079             return 1; }
00080         DataFull.FilterGenes( GenesIn, CDat::EFilterInclude );
00081         DataFull.FilterGenes( GenesEx, CDat::EFilterExclude );
00082         pData = &DataFull; }
00083     else if( sArgs.dataset_arg ) {
00084         if( !DataMap.Open( sArgs.dataset_arg ) ) {
00085             cerr << "Couldn't open: " << sArgs.dataset_arg << endl;
00086             return 1; }
00087         if( sArgs.genes_arg && !DataMap.FilterGenes( sArgs.genes_arg, CDat::EFilterInclude ) ) {
00088             cerr << "Couldn't open: " << sArgs.genes_arg << endl;
00089             return 1; }
00090         if( sArgs.genex_arg && !DataMap.FilterGenes( sArgs.genex_arg, CDat::EFilterExclude ) ) {
00091             cerr << "Couldn't open: " << sArgs.genex_arg << endl;
00092             return 1; }
00093         if( !sArgs.everything_flag )
00094             DataMap.FilterAnswers( );
00095         pData = &DataMap; }
00096     else {
00097         if( !Data.Open( sArgs.datadir_arg, pBN, GenesIn, GenesEx ) ) {
00098             cerr << "Couldn't open: " << sArgs.datadir_arg << endl;
00099             return 1; }
00100         pData = &Data; }
00101     pData->FilterGenes( GenesOv, CDat::EFilterTerm );
00102 
00103     if( sArgs.output_arg )
00104         Dat.Open( sArgs.genes_arg ? GenesIn.GetGeneNames( ) : pData->GetGeneNames( ) );
00105     vecvecdResults.clear( );
00106     cerr << "Evaluating..." << endl;
00107     if( sArgs.output_arg )
00108         pBN->Evaluate( pData, Dat, !!sArgs.zero_flag );
00109     else
00110         pBN->Evaluate( pData, vecvecdResults, !!sArgs.zero_flag );
00111 
00112     if( sArgs.output_arg ) {
00113         cerr << "Saving..." << endl;
00114         for( i = 0; i < Dat.GetGenes( ); ++i )
00115             for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
00116                 if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) )
00117                     Dat.Set( i, j, 1 - d );
00118         Dat.Save( sArgs.output_arg ); }
00119     else {
00120         cerr << "Storing..." << endl;
00121         for( k = i = 0; i < pData->GetGenes( ); ++i )
00122             for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) {
00123                 if( !pData->IsExample( i, j ) )
00124                     continue;
00125                 d = vecvecdResults[ k++ ][ 0 ];
00126                 if( !pBN->IsContinuous( ) )
00127                     d = 1 - d;
00128                 cout << pData->GetGene( i ) << '\t' << pData->GetGene( j ) << '\t' << d <<
00129                     endl; }
00130         cout.flush( ); }
00131 
00132     return 0; }
00133 
00134 static int Genes( const char* szGenes, CGenes& Genes ) {
00135     ifstream    ifsm;
00136 
00137     if( !szGenes )
00138         return 0;
00139 
00140     ifsm.open( szGenes );
00141     if( !Genes.Open( ifsm ) ) {
00142         cerr << "Couldn't open: " << szGenes << endl;
00143         return 1; }
00144     ifsm.close( );
00145     return 0; }