Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 static inline float enrange( float d, double dEpsilon ) { 00026 float dTmp; 00027 00028 if( d < dEpsilon ) 00029 return (float)dEpsilon; 00030 if( d > ( dTmp = ( 1 - (float)dEpsilon ) ) ) 00031 return dTmp; 00032 00033 return d; } 00034 00035 int main( int iArgs, char** aszArgs ) { 00036 gengetopt_args_info sArgs; 00037 CPCL PCL; 00038 CDat Dat; 00039 size_t iIter, iCur, iPCL, iDat, i; 00040 float d, dCur, dLogIn, dLogOut; 00041 vector<size_t> veciPCL2Dat, veciShuffle; 00042 00043 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00044 cmdline_parser_print_help( ); 00045 return 1; } 00046 CMeta Meta( sArgs.verbosity_arg, sArgs.random_arg ); 00047 00048 if( !Dat.Open( sArgs.graph_arg, !!sArgs.memmap_flag ) ) { 00049 cerr << "Could not open: " << sArgs.graph_arg << endl; 00050 return 1; } 00051 if( !PCL.Open( sArgs.pvalues_arg, sArgs.skip_arg ) ) { 00052 cerr << "Could not open: " << sArgs.pvalues_arg << endl; 00053 return 1; } 00054 veciPCL2Dat.resize( PCL.GetGenes( ) ); 00055 for( i = 0; i < veciPCL2Dat.size( ); ++i ) 00056 veciPCL2Dat[i] = Dat.GetGene( PCL.GetGene( i ) ); 00057 00058 veciShuffle.resize( PCL.GetGenes( ) ); 00059 for( i = 0; i < veciShuffle.size( ); ++i ) 00060 veciShuffle[i] = i; 00061 for( iIter = 0; iIter < (size_t)sArgs.iterations_arg; ++iIter ) { 00062 cerr << "Iteration: " << iIter << '/' << sArgs.iterations_arg << endl; 00063 random_shuffle( veciShuffle.begin( ), veciShuffle.end( ) ); 00064 for( iCur = 0; iCur < veciShuffle.size( ); ++iCur ) { 00065 if( !( iCur % 1000 ) ) 00066 cerr << "Gene: " << iCur << '/' << veciShuffle.size( ) << endl; 00067 iPCL = veciShuffle[iCur]; 00068 if( ( iDat = veciPCL2Dat[iPCL] ) == -1 ) 00069 continue; 00070 dLogIn = dLogOut = 0; 00071 for( i = 0; i < PCL.GetGenes( ); ++i ) { 00072 if( ( ( sArgs.neighbors_arg < 1 ) && ( ( (float)rand( ) / RAND_MAX ) > sArgs.neighbors_arg ) ) || 00073 ( i == iPCL ) || ( veciPCL2Dat[i] == -1 ) || 00074 CMeta::IsNaN( d = Dat.Get( iDat, veciPCL2Dat[i] ) ) ) 00075 continue; 00076 d = enrange( d, sArgs.epsilon_arg ); 00077 dCur = enrange( PCL.Get( i, 0 ), sArgs.epsilon_arg ); 00078 dLogIn += log( ( d * ( 1 - dCur ) ) + ( ( 1 - d ) * dCur ) ); 00079 dLogOut += log( ( d * dCur ) + ( ( 1 - d ) * ( 1 - dCur ) ) ); } 00080 d = enrange( PCL.Get( iPCL, 0 ), sArgs.epsilon_arg ); 00081 // P(gout|data) = P(data|gout)P(gout)/P(data) = P(n1|gout)P(n2|gout)...P(gout)/(P(data|gout)P(gout)+P(data|~gout)P(~gout)) 00082 // P(n1|gout) = P(n1)P(rel) + P(~n1)P(~rel) 00083 PCL.Set( iPCL, 0, 1 / ( 1 + ( ( 1 - d ) * exp( ( dLogIn - dLogOut ) / PCL.GetGenes( ) ) / d ) ) ); } } 00084 00085 PCL.Save( cout ); 00086 00087 return 0; }