Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 int main( int iArgs, char** aszArgs ) { 00026 static const size_t c_iBuffer = 1024; 00027 gengetopt_args_info sArgs; 00028 ifstream ifsm; 00029 istream* pistm; 00030 size_t iFile, i, j, iOne, iTwo, iFirst, iSecond, iCount; 00031 float d; 00032 map<string, size_t> mapstriGenes; 00033 map<string, size_t>::const_iterator iterGene; 00034 vector<string> vecstrLine; 00035 char acBuffer[ c_iBuffer ]; 00036 vector<size_t> veciGenes; 00037 00038 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00039 cmdline_parser_print_help( ); 00040 return 1; } 00041 CMeta Meta( sArgs.verbosity_arg ); 00042 00043 if( sArgs.input_arg ) { 00044 ifsm.open( sArgs.input_arg ); 00045 pistm = &ifsm; } 00046 else 00047 pistm = &cin; 00048 while( !pistm->eof( ) ) { 00049 pistm->getline( acBuffer, c_iBuffer - 1 ); 00050 acBuffer[ c_iBuffer - 1 ] = 0; 00051 vecstrLine.clear( ); 00052 CMeta::Tokenize( acBuffer, vecstrLine ); 00053 if( vecstrLine.size( ) != 2 ) { 00054 cerr << "Ignoring line: " << acBuffer << endl; 00055 continue; } 00056 mapstriGenes[ vecstrLine[ 1 ] ] = atoi( vecstrLine[ 0 ].c_str( ) ); } 00057 if( sArgs.input_arg ) 00058 ifsm.close( ); 00059 00060 for( iCount = iFile = 0; iFile < sArgs.inputs_num; ++iFile ) { 00061 CDataPair Dat; 00062 00063 if( sArgs.datasets_flag ) { 00064 cout << ( iFile + 1 ) << '\t' << CMeta::Deextension( CMeta::Basename( sArgs.inputs[ iFile ] ) ) << 00065 endl; 00066 continue; } 00067 if( !Dat.Open( sArgs.inputs[ iFile ], false, !!sArgs.memmap_flag ) ) { 00068 cerr << "Could not open: " << sArgs.inputs[ iFile ] << endl; 00069 return 1; } 00070 veciGenes.resize( Dat.GetGenes( ) ); 00071 for( i = 0; i < veciGenes.size( ); ++i ) 00072 #ifdef _MSC_VER 00073 (size_t) 00074 #endif // _MSC_VER 00075 veciGenes[ i ] = ( ( iterGene = mapstriGenes.find( Dat.GetGene( i ) ) ) == 00076 mapstriGenes.end( ) ) ? -1 : iterGene->second; 00077 for( i = 0; i < veciGenes.size( ); ++i ) { 00078 if( !( i % 100 ) ) 00079 cerr << i << '/' << veciGenes.size( ) << endl; 00080 if( ( iOne = veciGenes[ i ] ) == -1 ) 00081 continue; 00082 for( j = ( i + 1 ); j < veciGenes.size( ); ++j ) 00083 if( ( ( iTwo = veciGenes[ j ] ) != -1 ) && !CMeta::IsNaN( d = Dat.Get( i, j ) ) ) { 00084 if( iOne < iTwo ) { 00085 iFirst = iOne; 00086 iSecond = iTwo; } 00087 else { 00088 iFirst = iTwo; 00089 iSecond = iOne; } 00090 if( iCount % sArgs.block_arg ) 00091 cout << ','; 00092 else 00093 cout << "INSERT INTO " << sArgs.table_arg << " VALUES " << endl; 00094 cout << '(' << ( iFile + 1 ) << ',' << iFirst << ',' << iSecond << ',' << 00095 Dat.Quantize( d ) << ')'; 00096 if( !( ++iCount % sArgs.block_arg ) ) 00097 cout << ';' << endl; } } } 00098 if( iCount ) 00099 cout << ';' << endl; 00100 00101 return 0; }