Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 static const char c_acDab[] = ".dab"; 00026 00027 int main( int iArgs, char** aszArgs ) { 00028 gengetopt_args_info sArgs; 00029 size_t i, j, k, iPairs, iPair, iArg; 00030 map<string,size_t> mapZeros; 00031 vector<string> vecstrNames; 00032 CDataPair Answers; 00033 CFullMatrix<unsigned char> MatData; 00034 vector<size_t> veciGenes; 00035 float d; 00036 00037 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00038 cmdline_parser_print_help( ); 00039 return 1; } 00040 CMeta Meta( sArgs.verbosity_arg ); 00041 00042 if( sArgs.zeros_arg ) { 00043 ifstream ifsm; 00044 vector<string> vecstrZeros; 00045 char acLine[ 1024 ]; 00046 00047 ifsm.open( sArgs.zeros_arg ); 00048 if( !ifsm.is_open( ) ) { 00049 cerr << "Couldn't open: " << sArgs.zeros_arg << endl; 00050 return 1; } 00051 while( !ifsm.eof( ) ) { 00052 ifsm.getline( acLine, ARRAYSIZE(acLine) - 1 ); 00053 acLine[ ARRAYSIZE(acLine) - 1 ] = 0; 00054 vecstrZeros.clear( ); 00055 CMeta::Tokenize( acLine, vecstrZeros ); 00056 if( vecstrZeros.empty( ) ) 00057 continue; 00058 mapZeros[ vecstrZeros[ 0 ] ] = atoi( vecstrZeros[ 1 ].c_str( ) ); } } 00059 00060 if( !Answers.Open( sArgs.answers_arg, false ) ) { 00061 cerr << "Couldn't open: " << sArgs.answers_arg << endl; 00062 return 1; } 00063 if( sArgs.genes_arg && !Answers.FilterGenes( sArgs.genes_arg, CDat::EFilterInclude ) ) { 00064 cerr << "Couldn't open: " << sArgs.genes_arg << endl; 00065 return 1; } 00066 if( sArgs.genet_arg && !Answers.FilterGenes( sArgs.genet_arg, CDat::EFilterTerm ) ) { 00067 cerr << "Couldn't open: " << sArgs.genet_arg << endl; 00068 return 1; } 00069 if( sArgs.genex_arg && !Answers.FilterGenes( sArgs.genex_arg, CDat::EFilterExclude ) ) { 00070 cerr << "Couldn't open: " << sArgs.genex_arg << endl; 00071 return 1; } 00072 00073 for( iPairs = i = 0; i < Answers.GetGenes( ); ++i ) 00074 for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) 00075 if( !CMeta::IsNaN( Answers.Get( i, j ) ) ) 00076 iPairs++; 00077 MatData.Initialize( iPairs, sArgs.inputs_num ); 00078 MatData.Clear( ); 00079 00080 veciGenes.resize( Answers.GetGenes( ) ); 00081 for( iArg = 0; iArg < sArgs.inputs_num; ++iArg ) { 00082 CDatasetCompact Data; 00083 size_t iOne, iTwo, iZero, iVal; 00084 map<string,size_t>::const_iterator iterZero; 00085 00086 vecstrNames.clear( ); 00087 vecstrNames.push_back( sArgs.inputs[ iArg ] ); 00088 if( !Data.Open( Answers, vecstrNames, true ) ) { 00089 cerr << "Couldn't open: " << sArgs.inputs[ iArg ] << endl; 00090 return 1; } 00091 vecstrNames[ 0 ] = CMeta::Filename( CMeta::Deextension( CMeta::Basename( vecstrNames[ 0 ].c_str( ) ) ) ); 00092 iZero = ( ( iterZero = mapZeros.find( vecstrNames[ 0 ] ) ) == mapZeros.end( ) ) ? -1 : 00093 iterZero->second; 00094 for( i = 0; i < veciGenes.size( ); ++i ) 00095 veciGenes[ i ] = Data.GetGene( Answers.GetGene( i ) ); 00096 for( iPair = i = 0; i < veciGenes.size( ); ++i ) { 00097 iOne = veciGenes[ i ]; 00098 for( j = ( i + 1 ); j < veciGenes.size( ); ++j ) { 00099 if( CMeta::IsNaN( Answers.Get( i, j ) ) ) 00100 continue; 00101 if( ( iOne != -1 ) && ( ( iTwo = veciGenes[ j ] ) != -1 ) ) { 00102 iVal = Data.GetDiscrete( iOne, iTwo, 1 ); 00103 if( ( iVal != -1 ) || ( ( iVal = iZero ) != -1 ) || ( sArgs.zero_flag && !( iVal = 0 ) ) ) 00104 MatData.Set( iPair, iArg, (unsigned char)( iVal + 1 ) ); } 00105 iPair++; } } } 00106 00107 cout << "Gene 1 Gene 2 Answer"; 00108 for( i = 0; i < sArgs.inputs_num; ++i ) 00109 cout << '\t' << sArgs.inputs[ i ]; 00110 cout << endl; 00111 for( iPair = i = 0; i < Answers.GetGenes( ); ++i ) 00112 for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) { 00113 if( CMeta::IsNaN( d = Answers.Get( i, j ) ) ) 00114 continue; 00115 cout << Answers.GetGene( i ) << '\t' << Answers.GetGene( j ) << '\t' << d; 00116 for( k = 0; k < MatData.GetColumns( ); ++k ) 00117 cout << '\t' << ( (int)MatData.Get( iPair, k ) - 1 ); 00118 cout << endl; 00119 iPair++; } 00120 00121 return 0; }