Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 00026 int main( int iArgs, char** aszArgs ) { 00027 static const size_t c_iBuffer = 1024; 00028 #ifdef WIN32 00029 pthread_win32_process_attach_np( ); 00030 #endif // WIN32 00031 gengetopt_args_info sArgs; 00032 ifstream ifsm; 00033 istream* pistm; 00034 vector<string> vecstrLine, vecstrGenes, vecstrDatasets; 00035 char acBuffer[ c_iBuffer ]; 00036 CBayesNetSmile BNSmile; 00037 size_t i; 00038 map<string, size_t> mapstriZeros; 00039 00040 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00041 cmdline_parser_print_help( ); 00042 return 1; } 00043 CMeta Meta( sArgs.verbosity_arg ); 00044 00045 if( sArgs.input_arg ) { 00046 ifsm.open( sArgs.input_arg ); 00047 pistm = &ifsm; } 00048 else 00049 pistm = &cin; 00050 while( !pistm->eof( ) ) { 00051 pistm->getline( acBuffer, c_iBuffer - 1 ); 00052 acBuffer[ c_iBuffer - 1 ] = 0; 00053 vecstrLine.clear( ); 00054 CMeta::Tokenize( acBuffer, vecstrLine ); 00055 if( vecstrLine.size( ) < 2 ) { 00056 cerr << "Ignoring line: " << acBuffer << endl; 00057 continue; } 00058 if( !( i = atoi( vecstrLine[ 0 ].c_str( ) ) ) ) { 00059 cerr << "Illegal gene ID: " << vecstrLine[ 0 ] << " for " << vecstrLine[ 1 ] << endl; 00060 return 1; } 00061 i--; 00062 if( vecstrGenes.size( ) <= i ) 00063 vecstrGenes.resize( i + 1 ); 00064 vecstrGenes[ i ] = vecstrLine[ 1 ]; } 00065 if( sArgs.input_arg ) 00066 ifsm.close( ); 00067 00068 if( sArgs.zeros_arg ) { 00069 ifstream ifsm_zero; 00070 vector<string> vecstrLine; 00071 char acLine[ 1024 ]; 00072 00073 ifsm_zero.open( sArgs.zeros_arg ); 00074 if( !ifsm_zero.is_open( ) ) { 00075 cerr << "Couldn't open: " << sArgs.zeros_arg << endl; 00076 return 1; 00077 } 00078 while( !ifsm_zero.eof( ) ) { 00079 ifsm_zero.getline( acLine, ARRAYSIZE(acLine) - 1 ); 00080 acLine[ ARRAYSIZE(acLine) - 1 ] = 0; 00081 vecstrLine.clear( ); 00082 CMeta::Tokenize( acLine, vecstrLine ); 00083 if( vecstrLine.empty( ) ) 00084 continue; 00085 mapstriZeros[ vecstrLine[ 0 ] ] = atoi( vecstrLine[ 1 ].c_str( ) ); 00086 } 00087 } 00088 00089 00090 bool useNibble = false; 00091 if(sArgs.use_nibble_flag==1){ 00092 useNibble = true; 00093 } 00094 00095 CDatabase DB(useNibble); 00096 DB.SetMemmap( !!sArgs.memmap_flag ); 00097 DB.SetBuffer( !!sArgs.buffer_flag ); 00098 DB.SetBlockOut( sArgs.block_files_arg ); 00099 DB.SetBlockIn( sArgs.block_datasets_arg ); 00100 00101 if(sArgs.network_arg){ 00102 if(sArgs.dataset_arg){ 00103 cerr << "Confused. Only network OR dataset list." << endl; 00104 return 1; 00105 } 00106 00107 if( !BNSmile.Open( sArgs.network_arg ) ) { 00108 cerr << "Could not open: " << sArgs.network_arg << endl; 00109 return 1; } 00110 if( !DB.Open( vecstrGenes, sArgs.dir_in_arg, &BNSmile, sArgs.dir_out_arg, min((size_t)sArgs.files_arg, 00111 vecstrGenes.size( )), mapstriZeros ) ) { 00112 cerr << "Could not open data" << endl; 00113 return 1; 00114 } 00115 00116 }else if(sArgs.dataset_arg){ 00117 00118 ifsm.open(sArgs.dataset_arg); 00119 while(!pistm->eof()){ 00120 pistm->getline(acBuffer, c_iBuffer -1); 00121 if(acBuffer[0]==0) 00122 break; 00123 acBuffer[c_iBuffer-1] = 0; 00124 //If line contains multiple columns, 00125 //use the first column, which is the dataset column 00126 vector<string> tok; 00127 CMeta::Tokenize(acBuffer, tok, " \t"); 00128 vecstrDatasets.push_back(tok[0]); 00129 } 00130 vecstrDatasets.resize(vecstrDatasets.size()); 00131 ifsm.close(); 00132 00133 if( !DB.Open( vecstrGenes, vecstrDatasets, sArgs.dir_in_arg, sArgs.dir_out_arg, min((size_t)sArgs.files_arg, 00134 vecstrGenes.size( )), mapstriZeros ) ) { 00135 cerr << "Could not open data" << endl; 00136 return 1; 00137 } 00138 00139 }else{ 00140 cerr << "Must give a network or a dataset list." << endl; 00141 return 1; 00142 00143 } 00144 00145 #ifdef WIN32 00146 pthread_win32_process_detach_np( ); 00147 #endif // WIN32 00148 return 0; }