Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 00024 static int MainPosteriors( const gengetopt_args_info& ); 00025 static int MainSums( const gengetopt_args_info& ); 00026 static int MainRatios( const gengetopt_args_info& ); 00027 00028 static const TPFnTruster c_apfnTrusters[] = { MainPosteriors, MainSums, MainRatios, NULL }; 00029 static const char* c_aszTrusters[] = { "posteriors", "sums", "ratios", NULL }; 00030 00031 int main( int iArgs, char** aszArgs ) { 00032 #ifdef WIN32 00033 pthread_win32_process_attach_np( ); 00034 #endif // WIN32 00035 gengetopt_args_info sArgs; 00036 size_t i; 00037 int iRet; 00038 00039 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00040 cmdline_parser_print_help( ); 00041 return 1; } 00042 CMeta Meta( sArgs.verbosity_arg ); 00043 00044 for( iRet = 1,i = 0; c_aszTrusters[ i ]; ++i ) 00045 if( !strcmp( c_aszTrusters[ i ], sArgs.type_arg ) ) { 00046 iRet = c_apfnTrusters[ i ]( sArgs ); 00047 break; } 00048 00049 #ifdef WIN32 00050 pthread_win32_process_detach_np( ); 00051 #endif // WIN32 00052 return iRet; } 00053 00054 struct SData { 00055 const char* m_szDSL; 00056 const map<string, size_t>* m_pmapstriNodes; 00057 bool m_fBins; 00058 vector<float> m_vecdResults; 00059 vector<size_t> m_veciBins; 00060 }; 00061 00062 void* posteriorsOneDSL( void* pData ) { 00063 SData* psData; 00064 CBayesNetSmile BNSmile; 00065 size_t iNode; 00066 unsigned char bValue; 00067 float dPrior; 00068 vector<string> vecstrNodes; 00069 CDataMatrix MatFR; 00070 vector<unsigned char> vecbDatum; 00071 vector<float> vecdOut; 00072 size_t i, j, iBins, iResult; 00073 00074 psData = (SData*)pData; 00075 if( !BNSmile.Open( psData->m_szDSL ) ) { 00076 cerr << "Couldn't open: " << psData->m_szDSL << endl; 00077 return (void*)1; } 00078 cerr << "Opened: " << psData->m_szDSL << endl; 00079 BNSmile.GetNodes( vecstrNodes ); 00080 vecbDatum.resize( vecstrNodes.size( ) ); 00081 fill( vecbDatum.begin( ), vecbDatum.end( ), 0 ); 00082 if( psData->m_fBins ) { 00083 psData->m_veciBins.resize( vecstrNodes.size( ) - 1 ); 00084 for( iBins = 0,i = 1; i < vecstrNodes.size( ); ++i ) { 00085 psData->m_veciBins[ i - 1 ] = j = BNSmile.GetValues( i ); 00086 iBins += j; } 00087 psData->m_vecdResults.resize( iBins ); } 00088 else { 00089 psData->m_vecdResults.resize( vecstrNodes.size( ) ); 00090 fill( psData->m_vecdResults.begin( ), psData->m_vecdResults.end( ), CMeta::GetNaN( ) ); } 00091 00092 vecdOut.clear( ); 00093 BNSmile.Evaluate( vecbDatum, vecdOut, false, 0, true ); 00094 dPrior = 1 - vecdOut[ 0 ]; 00095 for( iResult = 0,iNode = 1; iNode < vecstrNodes.size( ); ++iNode ) { 00096 float d, dSum; 00097 CDataMatrix MatCPT; 00098 vector<float> vecdProbs; 00099 00100 vecbDatum[ iNode - 1 ] = 0; 00101 vecdProbs.clear( ); 00102 BNSmile.Evaluate( vecbDatum, vecdProbs, false, iNode, true ); 00103 for( dSum = 0,i = 0; i < vecdProbs.size( ); ++i ) 00104 dSum += vecdProbs[ i ]; 00105 vecdProbs.push_back( 1 - dSum ); 00106 vecdOut.clear( ); 00107 for( bValue = 0; bValue < BNSmile.GetValues( iNode ); ++bValue ) { 00108 vecbDatum[ iNode ] = bValue + 1; 00109 BNSmile.Evaluate( vecbDatum, vecdOut, false, 0, true ); } 00110 00111 for( dSum = 0,i = 0; i < vecdOut.size( ); ++i ) { 00112 vecdOut[ i ] = 1 - vecdOut[ i ]; 00113 d = fabs( dPrior - vecdOut[ i ] ); 00114 if( psData->m_fBins ) 00115 psData->m_vecdResults[ iResult++ ] = ( vecdOut[ i ] > dPrior ) ? ( d / ( 1 - dPrior ) ) : 00116 -( d / dPrior ); 00117 else 00118 dSum += d * vecdProbs[ i ]; } 00119 if( !psData->m_fBins ) 00120 psData->m_vecdResults[ psData->m_pmapstriNodes->find( vecstrNodes[ iNode ] )->second ] = dSum; } 00121 00122 return NULL; } 00123 00124 int MainPosteriors( const gengetopt_args_info& sArgs ) { 00125 size_t i, j, k, iDSLBase, iDSLOffset; 00126 vector<string> vecstrNodes; 00127 map<string,size_t> mapstriNodes; 00128 vector<pthread_t> vecpthdThreads; 00129 vector<SData> vecsData; 00130 00131 for( iDSLOffset = 0; iDSLOffset < sArgs.inputs_num; ++iDSLOffset ) { 00132 CBayesNetSmile BNSmile; 00133 vector<string> vecstrCur; 00134 00135 if( !BNSmile.Open( sArgs.inputs[ iDSLOffset ] ) ) { 00136 cerr << "Couldn't open: " << sArgs.inputs[ iDSLOffset ] << endl; 00137 return 1; } 00138 BNSmile.GetNodes( vecstrCur ); 00139 for( i = 1; i < vecstrCur.size( ); ++i ) 00140 if( mapstriNodes.find( vecstrCur[ i ] ) == mapstriNodes.end( ) ) { 00141 mapstriNodes[ vecstrCur[ i ] ] = vecstrNodes.size( ); 00142 vecstrNodes.push_back( vecstrCur[ i ] ); } } 00143 if( !sArgs.bins_flag ) { 00144 for( i = 0; i < vecstrNodes.size( ); ++i ) 00145 cout << '\t' << vecstrNodes[ i ]; 00146 cout << endl; } 00147 00148 vecpthdThreads.resize( sArgs.threads_arg ); 00149 vecsData.resize( vecpthdThreads.size( ) ); 00150 for( iDSLBase = 0; iDSLBase < sArgs.inputs_num; iDSLBase += sArgs.threads_arg ) { 00151 for( iDSLOffset = 0; ( iDSLOffset < (size_t)sArgs.threads_arg ) && ( ( iDSLBase + iDSLOffset ) < 00152 sArgs.inputs_num ); ++iDSLOffset ) { 00153 vecsData[ iDSLOffset ].m_fBins = !!sArgs.bins_flag; 00154 vecsData[ iDSLOffset ].m_pmapstriNodes = &mapstriNodes; 00155 vecsData[ iDSLOffset ].m_szDSL = sArgs.inputs[ iDSLBase + iDSLOffset ]; 00156 if( pthread_create( &vecpthdThreads[ iDSLOffset ], NULL, posteriorsOneDSL, 00157 &vecsData[ iDSLOffset ] ) ) { 00158 cerr << "Couldn't create thread: " << sArgs.inputs[ iDSLBase + iDSLOffset ] << endl; 00159 return 1; } } 00160 for( i = 0; i < iDSLOffset; ++i ) { 00161 void* pValue; 00162 float d; 00163 size_t iResult; 00164 const SData& sDatum = vecsData[ i ]; 00165 00166 pthread_join( vecpthdThreads[ i ], &pValue ); 00167 if( pValue ) 00168 return 1; 00169 if( sArgs.bins_flag ) 00170 for( iResult = j = 0; j < sDatum.m_veciBins.size( ); ++j ) 00171 for( k = 0; k < sDatum.m_veciBins[ j ]; ++k ) 00172 cout << ( iDSLBase + i ) << '\t' << j << '\t' << k << '\t' << 00173 sDatum.m_vecdResults[ iResult++ ] << endl; 00174 else { 00175 cout << sArgs.inputs[ iDSLBase + i ]; 00176 for( j = 0; j < sDatum.m_vecdResults.size( ); ++j ) { 00177 cout << '\t'; 00178 if( !CMeta::IsNaN( d = sDatum.m_vecdResults[ j ] ) ) 00179 cout << d; } 00180 cout << endl; } } } 00181 00182 return 0; } 00183 00184 int MainSums( const gengetopt_args_info& sArgs ) { 00185 size_t iDSL; 00186 00187 for( iDSL = 0; iDSL < sArgs.inputs_num; ++iDSL ) { 00188 CBayesNetSmile BNSmile; 00189 size_t iNode; 00190 unsigned char bValue; 00191 float dSum; 00192 vector<string> vecstrNodes; 00193 00194 if( !BNSmile.Open( sArgs.inputs[ iDSL ] ) ) { 00195 cerr << "Couldn't open: " << sArgs.inputs[ iDSL ] << endl; 00196 return 1; } 00197 BNSmile.GetNodes( vecstrNodes ); 00198 if( !iDSL ) { 00199 for( iNode = 1; iNode < vecstrNodes.size( ); ++iNode ) 00200 cout << '\t' << vecstrNodes[ iNode ]; 00201 cout << endl; } 00202 00203 cout << sArgs.inputs[ iDSL ]; 00204 for( iNode = 1; iNode < vecstrNodes.size( ); ++iNode ) { 00205 CDataMatrix MatCPT; 00206 00207 dSum = 0; 00208 BNSmile.GetCPT( iNode, MatCPT ); 00209 for( bValue = 0; bValue < MatCPT.GetRows( ); ++bValue ) 00210 dSum += fabs( MatCPT.Get( bValue, 0 ) - MatCPT.Get( bValue, 1 ) ); 00211 cout << '\t' << dSum; } 00212 cout << endl; } 00213 00214 return 0; } 00215 00216 int MainRatios( const gengetopt_args_info& sArgs ) { 00217 size_t iDSL; 00218 00219 for( iDSL = 0; iDSL < sArgs.inputs_num; ++iDSL ) { 00220 CBayesNetSmile BNSmile; 00221 size_t iNode; 00222 unsigned char bValue; 00223 float dProd; 00224 vector<string> vecstrNodes; 00225 00226 if( !BNSmile.Open( sArgs.inputs[ iDSL ] ) ) { 00227 cerr << "Couldn't open: " << sArgs.inputs[ iDSL ] << endl; 00228 return 1; } 00229 BNSmile.GetNodes( vecstrNodes ); 00230 if( !iDSL ) { 00231 for( iNode = 1; iNode < vecstrNodes.size( ); ++iNode ) 00232 cout << '\t' << vecstrNodes[ iNode ]; 00233 cout << endl; } 00234 00235 cout << sArgs.inputs[ iDSL ]; 00236 for( iNode = 1; iNode < vecstrNodes.size( ); ++iNode ) { 00237 CDataMatrix MatCPT; 00238 float dMin, dMax; 00239 00240 dProd = 1; 00241 BNSmile.GetCPT( iNode, MatCPT ); 00242 for( bValue = 0; bValue < MatCPT.GetRows( ); ++bValue ) { 00243 dMin = MatCPT.Get( bValue, 0 ); 00244 dMax = MatCPT.Get( bValue, 1 ); 00245 if( dMin > dMax ) 00246 swap( dMin, dMax ); 00247 dProd *= dMax / dMin; } 00248 cout << '\t' << log( dProd ); } 00249 cout << endl; } 00250 00251 return 0; }