Sleipnir
tools/SMRF/SMRF.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 
00025 static inline float enrange( float d, double dEpsilon ) {
00026     float   dTmp;
00027 
00028     if( d < dEpsilon )
00029         return (float)dEpsilon;
00030     if( d > ( dTmp = ( 1 - (float)dEpsilon ) ) )
00031         return dTmp;
00032 
00033     return d; }
00034 
00035 int main( int iArgs, char** aszArgs ) {
00036     gengetopt_args_info sArgs;
00037     CPCL                PCL;
00038     CDat                Dat;
00039     size_t              iIter, iCur, iPCL, iDat, i;
00040     float               d, dCur, dLogIn, dLogOut;
00041     vector<size_t>      veciPCL2Dat, veciShuffle;
00042 
00043     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00044         cmdline_parser_print_help( );
00045         return 1; }
00046     CMeta Meta( sArgs.verbosity_arg, sArgs.random_arg );
00047 
00048     if( !Dat.Open( sArgs.graph_arg, !!sArgs.memmap_flag ) ) {
00049         cerr << "Could not open: " << sArgs.graph_arg << endl;
00050         return 1; }
00051     if( !PCL.Open( sArgs.pvalues_arg, sArgs.skip_arg ) ) {
00052         cerr << "Could not open: " << sArgs.pvalues_arg << endl;
00053         return 1; }
00054     veciPCL2Dat.resize( PCL.GetGenes( ) );
00055     for( i = 0; i < veciPCL2Dat.size( ); ++i )
00056         veciPCL2Dat[i] = Dat.GetGene( PCL.GetGene( i ) );
00057 
00058     veciShuffle.resize( PCL.GetGenes( ) );
00059     for( i = 0; i < veciShuffle.size( ); ++i )
00060         veciShuffle[i] = i;
00061     for( iIter = 0; iIter < (size_t)sArgs.iterations_arg; ++iIter ) {
00062         cerr << "Iteration: " << iIter << '/' << sArgs.iterations_arg << endl;
00063         random_shuffle( veciShuffle.begin( ), veciShuffle.end( ) );
00064         for( iCur = 0; iCur < veciShuffle.size( ); ++iCur ) {
00065             if( !( iCur % 1000 ) )
00066                 cerr << "Gene: " << iCur << '/' << veciShuffle.size( ) << endl;
00067             iPCL = veciShuffle[iCur];
00068             if( ( iDat = veciPCL2Dat[iPCL] ) == -1 )
00069                 continue;
00070             dLogIn = dLogOut = 0;
00071             for( i = 0; i < PCL.GetGenes( ); ++i ) {
00072                 if( ( ( sArgs.neighbors_arg < 1 ) && ( ( (float)rand( ) / RAND_MAX ) > sArgs.neighbors_arg ) ) ||
00073                     ( i == iPCL ) || ( veciPCL2Dat[i] == -1 ) ||
00074                     CMeta::IsNaN( d = Dat.Get( iDat, veciPCL2Dat[i] ) ) )
00075                     continue;
00076                 d = enrange( d, sArgs.epsilon_arg );
00077                 dCur = enrange( PCL.Get( i, 0 ), sArgs.epsilon_arg );
00078                 dLogIn += log( ( d * ( 1 - dCur ) ) + ( ( 1 - d ) * dCur ) );
00079                 dLogOut += log( ( d * dCur ) + ( ( 1 - d ) * ( 1 - dCur ) ) ); }
00080             d = enrange( PCL.Get( iPCL, 0 ), sArgs.epsilon_arg );
00081 // P(gout|data) = P(data|gout)P(gout)/P(data) = P(n1|gout)P(n2|gout)...P(gout)/(P(data|gout)P(gout)+P(data|~gout)P(~gout))
00082 // P(n1|gout) = P(n1)P(rel) + P(~n1)P(~rel)
00083             PCL.Set( iPCL, 0, 1 / ( 1 + ( ( 1 - d ) * exp( ( dLogIn - dLogOut ) / PCL.GetGenes( ) ) / d ) ) ); } }
00084 
00085     PCL.Save( cout );
00086 
00087     return 0; }