Sleipnir
tools/Clinician/Clinician.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 
00025 struct SSorter {
00026     const vector<float>&    m_vecdScores;
00027 
00028     SSorter( const vector<float>& vecdScores ) : m_vecdScores(vecdScores) { }
00029 
00030     bool operator()( size_t iOne, size_t iTwo ) {
00031 
00032         return ( m_vecdScores[iOne] > m_vecdScores[iTwo] ); }
00033 };
00034 
00035 int main( int iArgs, char** aszArgs ) {
00036     gengetopt_args_info sArgs;
00037     CPCL                PCL;
00038     CDat                Dat;
00039     size_t              i, j, k, iGene;
00040     vector<bool>        vecfClinical;
00041     vector<size_t>      veciGenes2PCL, veciPCL2Genes, veciIndices, veciScores;
00042     vector<float>       vecdScores;
00043     CGenome             Genome;
00044     float               d;
00045 
00046     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00047         cmdline_parser_print_help( );
00048         return 1; }
00049     CMeta Meta( sArgs.verbosity_arg );
00050 
00051     if( !PCL.Open( sArgs.input_arg, sArgs.skip_arg ) ) {
00052         cerr << "Could not open: " << ( sArgs.input_arg ? sArgs.input_arg : "stdin" ) << endl;
00053         return 1; }
00054     if( PCL.GetFeatures( ) < 2 ) {
00055         cerr << "PCL requires at least one clinical variable feature" << endl;
00056         return 1; }
00057     if( sArgs.spearman_flag )
00058         PCL.RankTransform( );
00059     if( sArgs.global_arg && !Dat.Open( sArgs.global_arg, !!sArgs.memmap_flag ) ) {
00060         cerr << "Could not open: " << sArgs.global_arg << endl;
00061         return 1; }
00062 
00063     vecfClinical.resize( PCL.GetGenes( ) );
00064     veciPCL2Genes.resize( PCL.GetGenes( ) );
00065     fill( veciPCL2Genes.begin( ), veciPCL2Genes.end( ), -1 );
00066     for( iGene = i = 0; i < vecfClinical.size( ); ++i )
00067         if( !( vecfClinical[i] = !!atoi( PCL.GetFeature( i, 1 ).c_str( ) ) ) )
00068             veciPCL2Genes[i] = iGene++;
00069     veciGenes2PCL.resize( iGene );
00070     vecdScores.resize( veciGenes2PCL.size( ) );
00071     veciScores.resize( vecdScores.size( ) );
00072     veciIndices.resize( iGene );
00073     for( iGene = i = 0; i < PCL.GetGenes( ); ++i )
00074         if( !vecfClinical[i] )
00075             veciGenes2PCL[iGene++] = i;
00076     for( i = 0; i < PCL.GetGenes( ); ++i ) {
00077         CGenes          GenesFinal( Genome );
00078         vector<size_t>  veciFinal;
00079 
00080         if( !vecfClinical[i] )
00081             continue;
00082         cerr << "Processing: " << PCL.GetGene( i ) << endl;
00083         for( iGene = j = 0; j < PCL.GetGenes( ); ++j )
00084             if( !vecfClinical[j] ) {
00085                 vecdScores[iGene] = (float)CMeasurePearson::Pearson( PCL.Get( i ), PCL.GetExperiments( ), PCL.Get( j ), PCL.GetExperiments( ),
00086                     IMeasure::EMapNone, NULL, NULL, &veciScores[iGene] );
00087                 iGene++; }
00088         for( j = 0; j < veciIndices.size( ); ++j )
00089             veciIndices[j] = j;
00090         sort( veciIndices.begin( ), veciIndices.end( ), SSorter( vecdScores ) );
00091 
00092         if( sArgs.global_arg && sArgs.initial_arg ) {
00093             CDat    DatCopy;
00094             CGenome Genome;
00095             CGenes  GenesInitial( Genome );
00096 
00097             for( j = 0; ( j < (size_t)sArgs.initial_arg ) && ( j < veciIndices.size( ) ); ++j )
00098                 GenesInitial.AddGene( PCL.GetGene( veciGenes2PCL[veciIndices[j]] ) );
00099             DatCopy.Open( Dat );
00100             cerr << "Filtering...";
00101             DatCopy.FilterGenes( GenesInitial, sArgs.hefalmp_flag ? CDat::EFilterHefalmp : CDat::EFilterPixie, sArgs.final_arg, CMeta::GetNaN( ) );
00102             cerr << "  done!" << endl;
00103             for( j = 0; j < DatCopy.GetGenes( ); ++j )
00104                 for( k = ( j + 1 ); k < DatCopy.GetGenes( ); ++k )
00105                     if( !CMeta::IsNaN( DatCopy.Get( j, k ) ) ) {
00106                         GenesFinal.AddGene( Dat.GetGene( j ) );
00107                         GenesFinal.AddGene( Dat.GetGene( k ) ); }
00108             veciFinal.reserve( GenesFinal.GetGenes( ) );
00109             for( j = 0; j < GenesFinal.GetGenes( ); ++j )
00110                 if( ( k = PCL.GetGene( GenesFinal.GetGene( j ).GetName( ) ) ) != -1 )
00111                     veciFinal.push_back( k );
00112             iGene = veciFinal.size( ); }
00113         else {
00114             veciFinal.reserve( sArgs.final_arg );
00115             for( j = 0; ( j < (size_t)sArgs.final_arg ) && ( j < veciIndices.size( ) ); ++j )
00116                 veciFinal.push_back( veciGenes2PCL[veciIndices[j]] );
00117             iGene = veciGenes2PCL.size( ); }
00118 
00119         for( j = 0; j < veciFinal.size( ); ++j ) {
00120             k = veciPCL2Genes[veciFinal[j]];
00121             d = (float)( sArgs.spearman_flag ? CStatistics::PValueSpearman : CStatistics::PValuePearson )( vecdScores[k], veciScores[k] );
00122             cout << PCL.GetGene( i ) << '\t' << PCL.GetGene( veciFinal[j] ) << '\t' << vecdScores[k] << '\t' << veciScores[k] << '\t' <<
00123                 d << '\t' << ( d * iGene ) << endl; } }
00124 
00125     return 0; }