Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 00026 int main( int iArgs, char** aszArgs ) { 00027 static const size_t c_iBuffer = 1024; 00028 #ifdef WIN32 00029 pthread_win32_process_attach_np( ); 00030 #endif // WIN32 00031 gengetopt_args_info sArgs; 00032 ifstream ifsm; 00033 istream* pistm; 00034 vector<string> vecstrLine, vecstrGenes, vecstrDBs; 00035 char acBuffer[ c_iBuffer ]; 00036 size_t i; 00037 00038 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00039 cmdline_parser_print_help( ); 00040 return 1; } 00041 00042 if( sArgs.input_arg ) { 00043 ifsm.open( sArgs.input_arg ); 00044 pistm = &ifsm; } 00045 else 00046 pistm = &cin; 00047 while( !pistm->eof( ) ) { 00048 pistm->getline( acBuffer, c_iBuffer - 1 ); 00049 acBuffer[ c_iBuffer - 1 ] = 0; 00050 vecstrLine.clear( ); 00051 CMeta::Tokenize( acBuffer, vecstrLine ); 00052 if( vecstrLine.size( ) < 2 ) { 00053 cerr << "Ignoring line: " << acBuffer << endl; 00054 continue; } 00055 if( !( i = atoi( vecstrLine[ 0 ].c_str( ) ) ) ) { 00056 cerr << "Illegal gene ID: " << vecstrLine[ 0 ] << 00057 " for " << vecstrLine[ 1 ] << endl; 00058 return 1; } 00059 i--; 00060 if( vecstrGenes.size( ) <= i ) 00061 vecstrGenes.resize( i + 1 ); 00062 vecstrGenes[ i ] = vecstrLine[ 1 ]; } 00063 if( sArgs.input_arg ) 00064 ifsm.close( ); 00065 00066 bool useNibble = false; 00067 if(sArgs.is_nibble_flag==1){ 00068 useNibble = true; 00069 } 00070 00071 if(sArgs.reorganize_flag==1){ 00072 vector<string> vecstrDataset; 00073 ifstream ifsm2; 00074 ifsm2.open(sArgs.dataset_arg); 00075 while(!ifsm2.eof()){ 00076 ifsm2.getline(acBuffer, c_iBuffer-1); 00077 if(acBuffer[0]==0) break; 00078 acBuffer[c_iBuffer-1] = 0; 00079 vector<string> vecstrLine; 00080 CMeta::Tokenize(acBuffer, vecstrLine); 00081 vecstrDataset.push_back(vecstrLine[0]); 00082 } 00083 ifsm2.close(); 00084 00085 if(useNibble){ 00086 fprintf(stderr, "The use of nibble flag is not supported for --reorganize mode\n"); 00087 return 1; 00088 } 00089 CDatabase db(false); 00090 db.Open(sArgs.db_dir_arg, vecstrGenes, vecstrDataset.size(), 00091 sArgs.src_db_num_arg); 00092 db.Reorganize(sArgs.dest_db_dir_arg, sArgs.dest_db_num_arg); 00093 return 0; 00094 } 00095 00096 if(sArgs.combine_flag==1){ 00097 CDatabase DB(useNibble); 00098 00099 bool fSplit = false; 00100 if(sArgs.split_flag==1){ 00101 fSplit = true; 00102 } 00103 00104 if(sArgs.db_arg){ 00105 ifsm.open(sArgs.db_arg); 00106 while(!pistm->eof()){ 00107 pistm->getline(acBuffer, c_iBuffer -1); 00108 if(acBuffer[0]==0){ 00109 break; 00110 } 00111 acBuffer[c_iBuffer-1] = 0; 00112 vecstrDBs.push_back(acBuffer); 00113 } 00114 vecstrDBs.resize(vecstrDBs.size()); 00115 ifsm.close(); 00116 00117 //printf("Reading DBS"); getchar(); 00118 vector<CDatabaselet*> DBS; 00119 DBS.resize(vecstrDBs.size()); 00120 for(i=0; i<vecstrDBs.size(); i++){ 00121 DBS[i] = new CDatabaselet(useNibble); 00122 DBS[i]->Open(vecstrDBs[i]); 00123 } 00124 //printf("Finished reading DBS"); getchar(); 00125 00126 CDatabaselet::Combine(DBS, sArgs.dir_out_arg, vecstrGenes, fSplit); 00127 for(i=0; i<vecstrDBs.size(); i++){ 00128 free(DBS[i]); 00129 } 00130 00131 }else{ 00132 cerr << "Must give a db list." << endl; 00133 return 1; 00134 00135 } 00136 } 00137 #ifdef WIN32 00138 pthread_win32_process_detach_np( ); 00139 #endif // WIN32 00140 return 0; }