Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 static const char c_acDAB[] = ".dab"; 00026 00027 bool read_values( const CDat& Dat, size_t iMin, size_t iMax, vector<float>& vecdValues ) { 00028 size_t i, j, iCount; 00029 float d; 00030 00031 for( iCount = i = 0; i < Dat.GetGenes( ); ++i ) 00032 for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j ) { 00033 if( CMeta::IsNaN( d = Dat.Get( i, j ) ) ) 00034 continue; 00035 iCount++; 00036 if( vecdValues.size( ) < iMax ) 00037 vecdValues.push_back( d ); 00038 else if( ( (float)rand( ) / RAND_MAX ) < ( (float)iMax / iCount ) ) 00039 vecdValues[ rand( ) % vecdValues.size( ) ] = d; } 00040 if( !( i = vecdValues.size( ) ) ) 00041 return false; 00042 while( vecdValues.size( ) < iMin ) 00043 vecdValues.push_back( vecdValues[ rand( ) % i ] ); 00044 sort( vecdValues.begin( ), vecdValues.end( ) ); 00045 00046 return true; } 00047 00048 int main( int iArgs, char** aszArgs ) { 00049 gengetopt_args_info sArgs; 00050 IMeasure* pMeasure; 00051 CMeasurePearson Pearson; 00052 CMeasureEuclidean Euclidean; 00053 CMeasureKendallsTau KendallsTau; 00054 CMeasureKolmogorovSmirnov KolmSmir; 00055 CMeasureHypergeometric Hypergeom; 00056 CMeasureQuickPearson PearQuick; 00057 CMeasureInnerProduct InnerProd; 00058 CMeasureBinaryInnerProduct BinInnerProd; 00059 size_t i, iTwo; 00060 string strFile; 00061 00062 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00063 cmdline_parser_print_help( ); 00064 return 1; } 00065 CMeta Meta( sArgs.verbosity_arg, sArgs.random_arg ); 00066 00067 CMeasureSigmoid EuclideanSig( &Euclidean, false, 1.0f / sArgs.inputs_num ); 00068 IMeasure* apMeasures[] = { &Pearson, &EuclideanSig, &KendallsTau, 00069 &KolmSmir, &Hypergeom, &PearQuick, &InnerProd, &BinInnerProd, NULL }; 00070 00071 pMeasure = NULL; 00072 if( sArgs.distance_arg ) 00073 for( i = 0; apMeasures[ i ]; ++i ) 00074 if( !strcmp( apMeasures[ i ]->GetName( ), sArgs.distance_arg ) ) { 00075 pMeasure = apMeasures[ i ]; 00076 break; } 00077 if( !pMeasure ) { 00078 cerr << "Could not recognize: " << sArgs.distance_arg << endl; 00079 return 1; } 00080 00081 if( sArgs.table_flag ) { 00082 for( i = 0; i < sArgs.inputs_num; ++i ) 00083 cout << '\t' << sArgs.inputs[ i ]; 00084 cout << endl; } 00085 if( !sArgs.inputs_num ) 00086 return 0; 00087 FOR_EACH_DIRECTORY_FILE((string)sArgs.input_arg, strFile) 00088 CDat DatOne; 00089 vector<float> vecdOne; 00090 00091 if( !CMeta::IsExtension( strFile, c_acDAB ) ) 00092 continue; 00093 00094 strFile = (string)sArgs.input_arg + '/' + strFile; 00095 if( !DatOne.Open( strFile.c_str( ), !!sArgs.memmap_flag ) ) { 00096 cerr << "Could not open: " << strFile << endl; 00097 return 1; } 00098 if( !read_values( DatOne, sArgs.size_min_arg, sArgs.size_max_arg, vecdOne ) ) 00099 continue; 00100 00101 for( iTwo = 0; iTwo < sArgs.inputs_num; ++iTwo ) { 00102 CDat DatTwo; 00103 vector<float> vecdTwo; 00104 00105 if( !DatTwo.Open( sArgs.inputs[ iTwo ], !!sArgs.memmap_flag ) ) { 00106 cerr << "Could not open: " << sArgs.inputs[ iTwo ] << endl; 00107 return 1; } 00108 if( sArgs.table_flag ) 00109 cout << '\t'; 00110 if( !read_values( DatTwo, sArgs.size_min_arg, sArgs.size_max_arg, vecdTwo ) ) 00111 continue; 00112 if( !sArgs.table_flag ) 00113 cout << strFile << '\t' << sArgs.inputs[ iTwo ] << '\t'; 00114 cout << pMeasure->Measure( &vecdOne.front( ), vecdOne.size( ), &vecdTwo.front( ), vecdTwo.size( ), 00115 IMeasure::EMapNone ); 00116 if( !sArgs.table_flag ) 00117 cout << endl; } 00118 if( sArgs.table_flag ) 00119 cout << endl; } 00120 00121 return 0; }