Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 struct SSorter { 00026 const vector<float>& m_vecdScores; 00027 00028 SSorter( const vector<float>& vecdScores ) : m_vecdScores(vecdScores) { } 00029 00030 bool operator()( size_t iOne, size_t iTwo ) { 00031 00032 return ( m_vecdScores[iOne] > m_vecdScores[iTwo] ); } 00033 }; 00034 00035 int main( int iArgs, char** aszArgs ) { 00036 gengetopt_args_info sArgs; 00037 CPCL PCL; 00038 CDat Dat; 00039 size_t i, j, k, iGene; 00040 vector<bool> vecfClinical; 00041 vector<size_t> veciGenes2PCL, veciPCL2Genes, veciIndices, veciScores; 00042 vector<float> vecdScores; 00043 CGenome Genome; 00044 float d; 00045 00046 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00047 cmdline_parser_print_help( ); 00048 return 1; } 00049 CMeta Meta( sArgs.verbosity_arg ); 00050 00051 if( !PCL.Open( sArgs.input_arg, sArgs.skip_arg ) ) { 00052 cerr << "Could not open: " << ( sArgs.input_arg ? sArgs.input_arg : "stdin" ) << endl; 00053 return 1; } 00054 if( PCL.GetFeatures( ) < 2 ) { 00055 cerr << "PCL requires at least one clinical variable feature" << endl; 00056 return 1; } 00057 if( sArgs.spearman_flag ) 00058 PCL.RankTransform( ); 00059 if( sArgs.global_arg && !Dat.Open( sArgs.global_arg, !!sArgs.memmap_flag ) ) { 00060 cerr << "Could not open: " << sArgs.global_arg << endl; 00061 return 1; } 00062 00063 vecfClinical.resize( PCL.GetGenes( ) ); 00064 veciPCL2Genes.resize( PCL.GetGenes( ) ); 00065 fill( veciPCL2Genes.begin( ), veciPCL2Genes.end( ), -1 ); 00066 for( iGene = i = 0; i < vecfClinical.size( ); ++i ) 00067 if( !( vecfClinical[i] = !!atoi( PCL.GetFeature( i, 1 ).c_str( ) ) ) ) 00068 veciPCL2Genes[i] = iGene++; 00069 veciGenes2PCL.resize( iGene ); 00070 vecdScores.resize( veciGenes2PCL.size( ) ); 00071 veciScores.resize( vecdScores.size( ) ); 00072 veciIndices.resize( iGene ); 00073 for( iGene = i = 0; i < PCL.GetGenes( ); ++i ) 00074 if( !vecfClinical[i] ) 00075 veciGenes2PCL[iGene++] = i; 00076 for( i = 0; i < PCL.GetGenes( ); ++i ) { 00077 CGenes GenesFinal( Genome ); 00078 vector<size_t> veciFinal; 00079 00080 if( !vecfClinical[i] ) 00081 continue; 00082 cerr << "Processing: " << PCL.GetGene( i ) << endl; 00083 for( iGene = j = 0; j < PCL.GetGenes( ); ++j ) 00084 if( !vecfClinical[j] ) { 00085 vecdScores[iGene] = (float)CMeasurePearson::Pearson( PCL.Get( i ), PCL.GetExperiments( ), PCL.Get( j ), PCL.GetExperiments( ), 00086 IMeasure::EMapNone, NULL, NULL, &veciScores[iGene] ); 00087 iGene++; } 00088 for( j = 0; j < veciIndices.size( ); ++j ) 00089 veciIndices[j] = j; 00090 sort( veciIndices.begin( ), veciIndices.end( ), SSorter( vecdScores ) ); 00091 00092 if( sArgs.global_arg && sArgs.initial_arg ) { 00093 CDat DatCopy; 00094 CGenome Genome; 00095 CGenes GenesInitial( Genome ); 00096 00097 for( j = 0; ( j < (size_t)sArgs.initial_arg ) && ( j < veciIndices.size( ) ); ++j ) 00098 GenesInitial.AddGene( PCL.GetGene( veciGenes2PCL[veciIndices[j]] ) ); 00099 DatCopy.Open( Dat ); 00100 cerr << "Filtering..."; 00101 DatCopy.FilterGenes( GenesInitial, sArgs.hefalmp_flag ? CDat::EFilterHefalmp : CDat::EFilterPixie, sArgs.final_arg, CMeta::GetNaN( ) ); 00102 cerr << " done!" << endl; 00103 for( j = 0; j < DatCopy.GetGenes( ); ++j ) 00104 for( k = ( j + 1 ); k < DatCopy.GetGenes( ); ++k ) 00105 if( !CMeta::IsNaN( DatCopy.Get( j, k ) ) ) { 00106 GenesFinal.AddGene( Dat.GetGene( j ) ); 00107 GenesFinal.AddGene( Dat.GetGene( k ) ); } 00108 veciFinal.reserve( GenesFinal.GetGenes( ) ); 00109 for( j = 0; j < GenesFinal.GetGenes( ); ++j ) 00110 if( ( k = PCL.GetGene( GenesFinal.GetGene( j ).GetName( ) ) ) != -1 ) 00111 veciFinal.push_back( k ); 00112 iGene = veciFinal.size( ); } 00113 else { 00114 veciFinal.reserve( sArgs.final_arg ); 00115 for( j = 0; ( j < (size_t)sArgs.final_arg ) && ( j < veciIndices.size( ) ); ++j ) 00116 veciFinal.push_back( veciGenes2PCL[veciIndices[j]] ); 00117 iGene = veciGenes2PCL.size( ); } 00118 00119 for( j = 0; j < veciFinal.size( ); ++j ) { 00120 k = veciPCL2Genes[veciFinal[j]]; 00121 d = (float)( sArgs.spearman_flag ? CStatistics::PValueSpearman : CStatistics::PValuePearson )( vecdScores[k], veciScores[k] ); 00122 cout << PCL.GetGene( i ) << '\t' << PCL.GetGene( veciFinal[j] ) << '\t' << vecdScores[k] << '\t' << veciScores[k] << '\t' << 00123 d << '\t' << ( d * iGene ) << endl; } } 00124 00125 return 0; }