Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 static const char c_szRBF[] = "rbf"; 00026 static const char c_szPolynomial[] = "poly"; 00027 00028 int main( int iArgs, char** aszArgs ) { 00029 CPCL Data; 00030 CSVM SVM; 00031 ifstream ifsm; 00032 ofstream ofsm; 00033 CGenome Genome; 00034 CGenes Genes( Genome ), GenesEx( Genome ); 00035 gengetopt_args_info sArgs; 00036 vector<float> vecdResults; 00037 size_t i, j; 00038 float dAve, dStd; 00039 00040 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00041 cmdline_parser_print_help( ); 00042 return 1; } 00043 CMeta Meta( sArgs.verbosity_arg, sArgs.random_arg ); 00044 00045 ifsm.open( sArgs.input_arg ); 00046 if( !Data.Open( ifsm, sArgs.skip_arg ) ) { 00047 cerr << "Could not open: " << sArgs.input_arg << endl; 00048 return 1; } 00049 ifsm.close( ); 00050 if( sArgs.normalize_flag ) 00051 Data.Normalize( CPCL::ENormalizeZScore ); 00052 if( sArgs.random_features_flag ) 00053 Data.Randomize( ); 00054 00055 if( sArgs.genes_arg ) { 00056 ifsm.clear( ); 00057 ifsm.open( sArgs.genes_arg ); } 00058 if( !Genes.Open( sArgs.genes_arg ? ifsm : cin ) ) { 00059 cerr << "Could not open: " << ( sArgs.genes_arg ? sArgs.genes_arg : "gene input" ) << endl; 00060 return 1; } 00061 if( sArgs.genes_arg ) 00062 ifsm.close( ); 00063 00064 if( sArgs.genex_arg ) { 00065 ifsm.clear( ); 00066 ifsm.open( sArgs.genex_arg ); 00067 if( !GenesEx.Open( ifsm ) ) { 00068 cerr << "Could not open: " << sArgs.genex_arg << endl; 00069 return 1; } 00070 ifsm.close( ); 00071 00072 for( i = 0; i < GenesEx.GetGenes( ); ++i ) 00073 if( ( j = Data.GetGene( GenesEx.GetGene( i ).GetName( ) ) ) != -1 ) 00074 Data.MaskGene( j ); } 00075 00076 if( sArgs.alphas_arg ) { 00077 ifsm.clear( ); 00078 ifsm.open( sArgs.alphas_arg ); 00079 if( !SVM.OpenAlphas( ifsm ) ) { 00080 cerr << "Could not open: " << sArgs.alphas_arg << endl; 00081 return 1; } 00082 ifsm.close( ); } 00083 00084 if( !strcmp( sArgs.kernel_arg, c_szRBF ) ) 00085 SVM.SetKernel( CSVM::EKernelRBF ); 00086 else if( !strcmp( sArgs.kernel_arg, c_szPolynomial ) ) 00087 SVM.SetKernel( CSVM::EKernelPolynomial ); 00088 else 00089 SVM.SetKernel( CSVM::EKernelLinear ); 00090 00091 SVM.SetCache( sArgs.cache_arg ); 00092 SVM.SetIterations( sArgs.iterations_arg ); 00093 SVM.SetGamma( sArgs.gamma_arg ); 00094 SVM.SetDegree( sArgs.degree_arg ); 00095 if( sArgs.tradeoff_given ) 00096 SVM.SetTradeoff( sArgs.tradeoff_arg ); 00097 SVM.SetVerbosity( 0 ); 00098 00099 SVM.Learn( Data, Genes ); 00100 if( sArgs.model_arg ) { 00101 ofsm.open( sArgs.model_arg ); 00102 SVM.Save( sArgs.model_arg ? (ostream&)ofsm : cout ); 00103 ofsm.close( ); } 00104 00105 if( sArgs.heldout_flag ) 00106 for( i = 0; i < Data.GetGenes( ); ++i ) 00107 Data.MaskGene( i, !Data.IsMasked( i ) ); 00108 SVM.Evaluate( Data, vecdResults ); 00109 if( sArgs.random_output_flag ) 00110 random_shuffle( vecdResults.begin( ), vecdResults.end( ) ); 00111 00112 dAve = (float)CStatistics::Average( vecdResults ); 00113 dStd = (float)sqrt( CStatistics::Variance( vecdResults, dAve ) ); 00114 for( i = 0; i < vecdResults.size( ); ++i ) 00115 vecdResults[ i ] = ( vecdResults[ i ] - dAve ) / dStd; 00116 for( i = j = 0; i < Data.GetGenes( ); ++i ) 00117 if( !Data.IsMasked( i ) ) 00118 cout << Data.GetGene( i ) << '\t' << vecdResults[ j++ ] << endl; 00119 00120 return 0; }