Sleipnir
tools/BNTruster/BNTruster.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 
00024 static int MainPosteriors( const gengetopt_args_info& );
00025 static int MainSums( const gengetopt_args_info& );
00026 static int MainRatios( const gengetopt_args_info& );
00027 
00028 static const TPFnTruster    c_apfnTrusters[]    = { MainPosteriors, MainSums, MainRatios, NULL };
00029 static const char*          c_aszTrusters[]     = { "posteriors", "sums", "ratios", NULL };
00030 
00031 int main( int iArgs, char** aszArgs ) {
00032 #ifdef WIN32
00033     pthread_win32_process_attach_np( );
00034 #endif // WIN32
00035     gengetopt_args_info sArgs;
00036     size_t              i;
00037     int                 iRet;
00038 
00039     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00040         cmdline_parser_print_help( );
00041         return 1; }
00042     CMeta Meta( sArgs.verbosity_arg );
00043 
00044     for( iRet = 1,i = 0; c_aszTrusters[ i ]; ++i )
00045         if( !strcmp( c_aszTrusters[ i ], sArgs.type_arg ) ) {
00046             iRet = c_apfnTrusters[ i ]( sArgs );
00047             break; }
00048 
00049 #ifdef WIN32
00050     pthread_win32_process_detach_np( );
00051 #endif // WIN32
00052     return iRet; }
00053 
00054 struct SData {
00055     const char*                 m_szDSL;
00056     const map<string, size_t>*  m_pmapstriNodes;
00057     bool                        m_fBins;
00058     vector<float>               m_vecdResults;
00059     vector<size_t>              m_veciBins;
00060 };
00061 
00062 void* posteriorsOneDSL( void* pData ) {
00063     SData*                  psData;
00064     CBayesNetSmile          BNSmile;
00065     size_t                  iNode;
00066     unsigned char           bValue;
00067     float                   dPrior;
00068     vector<string>          vecstrNodes;
00069     CDataMatrix             MatFR;
00070     vector<unsigned char>   vecbDatum;
00071     vector<float>           vecdOut;
00072     size_t                  i, j, iBins, iResult;
00073 
00074     psData = (SData*)pData;
00075     if( !BNSmile.Open( psData->m_szDSL ) ) {
00076         cerr << "Couldn't open: " << psData->m_szDSL << endl;
00077         return (void*)1; }
00078     cerr << "Opened: " << psData->m_szDSL << endl;
00079     BNSmile.GetNodes( vecstrNodes );
00080     vecbDatum.resize( vecstrNodes.size( ) );
00081     fill( vecbDatum.begin( ), vecbDatum.end( ), 0 );
00082     if( psData->m_fBins ) {
00083         psData->m_veciBins.resize( vecstrNodes.size( ) - 1 );
00084         for( iBins = 0,i = 1; i < vecstrNodes.size( ); ++i ) {
00085             psData->m_veciBins[ i - 1 ] = j = BNSmile.GetValues( i );
00086             iBins += j; }
00087         psData->m_vecdResults.resize( iBins ); }
00088     else {
00089         psData->m_vecdResults.resize( vecstrNodes.size( ) );
00090         fill( psData->m_vecdResults.begin( ), psData->m_vecdResults.end( ), CMeta::GetNaN( ) ); }
00091 
00092     vecdOut.clear( );
00093     BNSmile.Evaluate( vecbDatum, vecdOut, false, 0, true );
00094     dPrior = 1 - vecdOut[ 0 ];
00095     for( iResult = 0,iNode = 1; iNode < vecstrNodes.size( ); ++iNode ) {
00096         float           d, dSum;
00097         CDataMatrix     MatCPT;
00098         vector<float>   vecdProbs;
00099 
00100         vecbDatum[ iNode - 1 ] = 0;
00101         vecdProbs.clear( );
00102         BNSmile.Evaluate( vecbDatum, vecdProbs, false, iNode, true );
00103         for( dSum = 0,i = 0; i < vecdProbs.size( ); ++i )
00104             dSum += vecdProbs[ i ];
00105         vecdProbs.push_back( 1 - dSum );
00106         vecdOut.clear( );
00107         for( bValue = 0; bValue < BNSmile.GetValues( iNode ); ++bValue ) {
00108             vecbDatum[ iNode ] = bValue + 1;
00109             BNSmile.Evaluate( vecbDatum, vecdOut, false, 0, true ); }
00110 
00111         for( dSum = 0,i = 0; i < vecdOut.size( ); ++i ) {
00112             vecdOut[ i ] = 1 - vecdOut[ i ];
00113             d = fabs( dPrior - vecdOut[ i ] );
00114             if( psData->m_fBins )
00115                 psData->m_vecdResults[ iResult++ ] = ( vecdOut[ i ] > dPrior ) ? ( d / ( 1 - dPrior ) ) :
00116                     -( d / dPrior );
00117             else
00118                 dSum += d * vecdProbs[ i ]; }
00119         if( !psData->m_fBins )
00120             psData->m_vecdResults[ psData->m_pmapstriNodes->find( vecstrNodes[ iNode ] )->second ] = dSum; }
00121 
00122     return NULL; }
00123 
00124 int MainPosteriors( const gengetopt_args_info& sArgs ) {
00125     size_t                  i, j, k, iDSLBase, iDSLOffset;
00126     vector<string>          vecstrNodes;
00127     map<string,size_t>      mapstriNodes;
00128     vector<pthread_t>       vecpthdThreads;
00129     vector<SData>           vecsData;
00130 
00131     for( iDSLOffset = 0; iDSLOffset < sArgs.inputs_num; ++iDSLOffset ) {
00132         CBayesNetSmile  BNSmile;
00133         vector<string>  vecstrCur;
00134 
00135         if( !BNSmile.Open( sArgs.inputs[ iDSLOffset ] ) ) {
00136             cerr << "Couldn't open: " << sArgs.inputs[ iDSLOffset ] << endl;
00137             return 1; }
00138         BNSmile.GetNodes( vecstrCur );
00139         for( i = 1; i < vecstrCur.size( ); ++i )
00140             if( mapstriNodes.find( vecstrCur[ i ] ) == mapstriNodes.end( ) ) {
00141                 mapstriNodes[ vecstrCur[ i ] ] = vecstrNodes.size( );
00142                 vecstrNodes.push_back( vecstrCur[ i ] ); } }
00143     if( !sArgs.bins_flag ) {
00144         for( i = 0; i < vecstrNodes.size( ); ++i )
00145             cout << '\t' << vecstrNodes[ i ];
00146         cout << endl; }
00147 
00148     vecpthdThreads.resize( sArgs.threads_arg );
00149     vecsData.resize( vecpthdThreads.size( ) );
00150     for( iDSLBase = 0; iDSLBase < sArgs.inputs_num; iDSLBase += sArgs.threads_arg ) {
00151         for( iDSLOffset = 0; ( iDSLOffset < (size_t)sArgs.threads_arg ) && ( ( iDSLBase + iDSLOffset ) <
00152             sArgs.inputs_num ); ++iDSLOffset ) {
00153             vecsData[ iDSLOffset ].m_fBins = !!sArgs.bins_flag;
00154             vecsData[ iDSLOffset ].m_pmapstriNodes = &mapstriNodes;
00155             vecsData[ iDSLOffset ].m_szDSL = sArgs.inputs[ iDSLBase + iDSLOffset ];
00156             if( pthread_create( &vecpthdThreads[ iDSLOffset ], NULL, posteriorsOneDSL,
00157                 &vecsData[ iDSLOffset ] ) ) {
00158                 cerr << "Couldn't create thread: " << sArgs.inputs[ iDSLBase + iDSLOffset ] << endl;
00159                 return 1; } }
00160         for( i = 0; i < iDSLOffset; ++i ) {
00161             void*           pValue;
00162             float           d;
00163             size_t          iResult;
00164             const SData&    sDatum  = vecsData[ i ];
00165 
00166             pthread_join( vecpthdThreads[ i ], &pValue );
00167             if( pValue )
00168                 return 1;
00169             if( sArgs.bins_flag )
00170                 for( iResult = j = 0; j < sDatum.m_veciBins.size( ); ++j )
00171                     for( k = 0; k < sDatum.m_veciBins[ j ]; ++k )
00172                         cout << ( iDSLBase + i ) << '\t' << j << '\t' << k << '\t' <<
00173                             sDatum.m_vecdResults[ iResult++ ] << endl;
00174             else {
00175                 cout << sArgs.inputs[ iDSLBase + i ];
00176                 for( j = 0; j < sDatum.m_vecdResults.size( ); ++j ) {
00177                     cout << '\t';
00178                     if( !CMeta::IsNaN( d = sDatum.m_vecdResults[ j ] ) )
00179                         cout << d; }
00180                 cout << endl; } } }
00181 
00182     return 0; }
00183 
00184 int MainSums( const gengetopt_args_info& sArgs ) {
00185     size_t  iDSL;
00186 
00187     for( iDSL = 0; iDSL < sArgs.inputs_num; ++iDSL ) {
00188         CBayesNetSmile  BNSmile;
00189         size_t          iNode;
00190         unsigned char   bValue;
00191         float           dSum;
00192         vector<string>  vecstrNodes;
00193 
00194         if( !BNSmile.Open( sArgs.inputs[ iDSL ] ) ) {
00195             cerr << "Couldn't open: " << sArgs.inputs[ iDSL ] << endl;
00196             return 1; }
00197         BNSmile.GetNodes( vecstrNodes );
00198         if( !iDSL ) {
00199             for( iNode = 1; iNode < vecstrNodes.size( ); ++iNode )
00200                 cout << '\t' << vecstrNodes[ iNode ];
00201             cout << endl; }
00202 
00203         cout << sArgs.inputs[ iDSL ];
00204         for( iNode = 1; iNode < vecstrNodes.size( ); ++iNode ) {
00205             CDataMatrix MatCPT;
00206 
00207             dSum = 0;
00208             BNSmile.GetCPT( iNode, MatCPT );
00209             for( bValue = 0; bValue < MatCPT.GetRows( ); ++bValue )
00210                 dSum += fabs( MatCPT.Get( bValue, 0 ) - MatCPT.Get( bValue, 1 ) );
00211             cout << '\t' << dSum; }
00212         cout << endl; }
00213 
00214     return 0; }
00215 
00216 int MainRatios( const gengetopt_args_info& sArgs ) {
00217     size_t  iDSL;
00218 
00219     for( iDSL = 0; iDSL < sArgs.inputs_num; ++iDSL ) {
00220         CBayesNetSmile  BNSmile;
00221         size_t          iNode;
00222         unsigned char   bValue;
00223         float           dProd;
00224         vector<string>  vecstrNodes;
00225 
00226         if( !BNSmile.Open( sArgs.inputs[ iDSL ] ) ) {
00227             cerr << "Couldn't open: " << sArgs.inputs[ iDSL ] << endl;
00228             return 1; }
00229         BNSmile.GetNodes( vecstrNodes );
00230         if( !iDSL ) {
00231             for( iNode = 1; iNode < vecstrNodes.size( ); ++iNode )
00232                 cout << '\t' << vecstrNodes[ iNode ];
00233             cout << endl; }
00234 
00235         cout << sArgs.inputs[ iDSL ];
00236         for( iNode = 1; iNode < vecstrNodes.size( ); ++iNode ) {
00237             CDataMatrix MatCPT;
00238             float       dMin, dMax;
00239 
00240             dProd = 1;
00241             BNSmile.GetCPT( iNode, MatCPT );
00242             for( bValue = 0; bValue < MatCPT.GetRows( ); ++bValue ) {
00243                 dMin = MatCPT.Get( bValue, 0 );
00244                 dMax = MatCPT.Get( bValue, 1 );
00245                 if( dMin > dMax )
00246                     swap( dMin, dMax );
00247                 dProd *= dMax / dMin; }
00248             cout << '\t' << log( dProd ); }
00249         cout << endl; }
00250 
00251     return 0; }