Sleipnir
tools/Data2Sql/Data2Sql.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 
00025 int main( int iArgs, char** aszArgs ) {
00026     static const size_t                 c_iBuffer   = 1024;
00027     gengetopt_args_info                 sArgs;
00028     ifstream                            ifsm;
00029     istream*                            pistm;
00030     size_t                              iFile, i, j, iOne, iTwo, iFirst, iSecond, iCount;
00031     float                               d;
00032     map<string, size_t>                 mapstriGenes;
00033     map<string, size_t>::const_iterator iterGene;
00034     vector<string>                      vecstrLine;
00035     char                                acBuffer[ c_iBuffer ];
00036     vector<size_t>                      veciGenes;
00037 
00038     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00039         cmdline_parser_print_help( );
00040         return 1; }
00041     CMeta Meta( sArgs.verbosity_arg );
00042 
00043     if( sArgs.input_arg ) {
00044         ifsm.open( sArgs.input_arg );
00045         pistm = &ifsm; }
00046     else
00047         pistm = &cin;
00048     while( !pistm->eof( ) ) {
00049         pistm->getline( acBuffer, c_iBuffer - 1 );
00050         acBuffer[ c_iBuffer - 1 ] = 0;
00051         vecstrLine.clear( );
00052         CMeta::Tokenize( acBuffer, vecstrLine );
00053         if( vecstrLine.size( ) != 2 ) {
00054             cerr << "Ignoring line: " << acBuffer << endl;
00055             continue; }
00056         mapstriGenes[ vecstrLine[ 1 ] ] = atoi( vecstrLine[ 0 ].c_str( ) ); }
00057     if( sArgs.input_arg )
00058         ifsm.close( );
00059 
00060     for( iCount = iFile = 0; iFile < sArgs.inputs_num; ++iFile ) {
00061         CDataPair   Dat;
00062 
00063         if( sArgs.datasets_flag ) {
00064             cout << ( iFile + 1 ) << '\t' << CMeta::Deextension( CMeta::Basename( sArgs.inputs[ iFile ] ) ) <<
00065                 endl;
00066             continue; }
00067         if( !Dat.Open( sArgs.inputs[ iFile ], false, !!sArgs.memmap_flag ) ) {
00068             cerr << "Could not open: " << sArgs.inputs[ iFile ] << endl;
00069             return 1; }
00070         veciGenes.resize( Dat.GetGenes( ) );
00071         for( i = 0; i < veciGenes.size( ); ++i )
00072 #ifdef _MSC_VER
00073             (size_t)
00074 #endif // _MSC_VER
00075             veciGenes[ i ] = ( ( iterGene = mapstriGenes.find( Dat.GetGene( i ) ) ) ==
00076                 mapstriGenes.end( ) ) ? -1 : iterGene->second;
00077         for( i = 0; i < veciGenes.size( ); ++i ) {
00078             if( !( i % 100 ) )
00079                 cerr << i << '/' << veciGenes.size( ) << endl;
00080             if( ( iOne = veciGenes[ i ] ) == -1 )
00081                 continue;
00082             for( j = ( i + 1 ); j < veciGenes.size( ); ++j )
00083                 if( ( ( iTwo = veciGenes[ j ] ) != -1 ) && !CMeta::IsNaN( d = Dat.Get( i, j ) ) ) {
00084                     if( iOne < iTwo ) {
00085                         iFirst = iOne;
00086                         iSecond = iTwo; }
00087                     else {
00088                         iFirst = iTwo;
00089                         iSecond = iOne; }
00090                     if( iCount % sArgs.block_arg )
00091                         cout << ',';
00092                     else
00093                         cout << "INSERT INTO " << sArgs.table_arg << " VALUES " << endl;
00094                     cout << '(' << ( iFile + 1 ) << ',' << iFirst << ',' << iSecond << ',' <<
00095                         Dat.Quantize( d ) << ')';
00096                     if( !( ++iCount % sArgs.block_arg ) )
00097                         cout << ';' << endl; } } }
00098     if( iCount )
00099         cout << ';' << endl;
00100 
00101     return 0; }