Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef BAYESNET_H 00023 #define BAYESNET_H 00024 00025 #include "bayesnetint.h" 00026 #include "bayesneti.h" 00027 00028 namespace Sleipnir { 00029 00030 class IDataset; 00031 00032 #ifndef NO_SMILE 00033 00053 class CBayesNetSmile : public CBayesNetSmileImpl, public IBayesNet { 00054 public: 00055 CBayesNetSmile( bool fGroup = true ); 00056 00057 bool Open( const std::vector<std::string>& vecstrFiles, size_t iValues ); 00058 #ifdef PNL_ENABLED 00059 bool Convert( CBayesNetPNL& BNPNL ) const; 00060 #endif // PNL_ENABLED 00061 bool Open( const IDataset* pDataset, const std::vector<std::string>& vecstrNames, 00062 const std::vector<size_t>& veciDefaults ); 00063 bool Open( const CBayesNetSmile& BNPrior, const std::vector<CBayesNetSmile*>& vecpBNs ); 00064 bool Open( const CBayesNetMinimal& BNMinimal, const std::vector<std::string>& vecstrNames ); 00065 float Evaluate( size_t iNode, unsigned char bValue ) const; 00066 unsigned char GetDefault( size_t iNode ) const; 00067 00081 void SetDefault( const CBayesNetSmile& Defaults ) { 00082 00083 m_pDefaults = &Defaults; } 00084 00085 bool Learn( const IDataset* pDataset, size_t iIterations, bool fZero = false, bool fELR = false ); 00086 bool Evaluate( const std::vector<unsigned char>& vecbDatum, std::vector<float>& vecdResults, 00087 bool fZero = false, size_t iNode = 0, bool fIgnoreMissing = false ) const; 00088 bool Evaluate( const CPCLPair& PCLData, CPCL& PCLResults, bool fZero = false, 00089 int iAlgorithm = DSL_ALG_BN_LAURITZEN ) const; 00090 void GetNodes( std::vector<std::string>& vecstrNodes ) const; 00091 void Randomize( ); 00092 void Randomize( size_t iNode ); 00093 void Reverse( size_t iNode ); 00094 00095 bool Open( const char* szFile ) { 00096 00097 return ( m_fSmileNet = !m_SmileNet.ReadFile( szFile ) ); } 00098 00099 bool Save( const char* szFile ) const { 00100 00101 return ( m_fSmileNet ? !((CBayesNetSmile*)this)->m_SmileNet.WriteFile( szFile ) : false ); } 00102 00103 bool GetCPT( size_t iNode, CDataMatrix& MatCPT ) const { 00104 00105 return CBayesNetSmileImpl::GetCPT( m_SmileNet.GetNode( (int)iNode ), MatCPT ); } 00106 00107 unsigned char GetValues( size_t iNode ) const { 00108 00109 return m_SmileNet.GetNode( (int)iNode )->Definition( )->GetNumberOfOutcomes( ); } 00110 00111 bool IsContinuous( size_t iNode ) const { 00112 UNUSED_PARAMETER(iNode); 00113 00114 return IsContinuous( ); } 00115 00116 bool IsContinuous( ) const { 00117 00118 return CBayesNetSmileImpl::IsContinuous( ); } 00119 00120 bool Evaluate( const IDataset* pDataset, std::vector<std::vector<float> >& vecvecdResults, 00121 bool fZero ) const { 00122 00123 return CBayesNetSmileImpl::Evaluate( pDataset, NULL, &vecvecdResults, fZero ); } 00124 00125 bool Evaluate( const IDataset* pDataset, CDat& DatResults, bool fZero ) const { 00126 00127 return CBayesNetSmileImpl::Evaluate( pDataset, &DatResults, NULL, fZero ); } 00128 }; 00129 00149 class CBayesNetFN : CBayesNetFNImpl, public IBayesNet { 00150 public: 00151 bool Open( const char* szFile ); 00152 bool Save( const char* szFile ) const; 00153 bool Learn( const IDataset* pDataset, size_t iIterations, bool fZero = false, bool fELR = false ); 00154 bool Evaluate( const std::vector<unsigned char>& vecbDatum, std::vector<float>& vecdResults, 00155 bool fZero = false, size_t iNode = 0, bool fIgnoreMissing = false ) const; 00156 void GetNodes( std::vector<std::string>& vecstrNodes ) const; 00157 unsigned char GetValues( size_t iNode ) const; 00158 bool IsContinuous( ) const; 00159 00160 bool Evaluate( const IDataset* pDataset, std::vector<std::vector<float> >& vecvecdResults, 00161 bool fZero ) const { 00162 00163 return CBayesNetFNImpl::Evaluate( pDataset, NULL, &vecvecdResults, fZero ); } 00164 00165 bool Evaluate( const IDataset* pDataset, CDat& DatResults, bool fZero ) const { 00166 00167 return CBayesNetFNImpl::Evaluate( pDataset, &DatResults, NULL, fZero ); } 00168 00169 bool IsContinuous( size_t iNode ) const { 00170 00171 return m_apNodes[ iNode ]->IsContinuous( ); } 00172 00173 void Randomize( ) { 00174 size_t i; 00175 00176 for( i = 0; i < m_iNodes; ++i ) 00177 Randomize( i ); } 00178 00179 void Randomize( size_t iNode ) { 00180 00181 m_apNodes[ iNode ]->Randomize( ); } 00182 00183 void Reverse( size_t iNode ) { 00184 00185 m_apNodes[ iNode ]->Reverse( ); } 00186 00187 bool GetCPT( size_t iNode, CDataMatrix& MatCPT ) const { 00188 00189 return CBayesNetSmileImpl::GetCPT( m_SmileNet.GetNode( (int)iNode ), MatCPT ); } 00190 00191 bool Evaluate( const CPCLPair& PCLData, CPCL& PCLResults, bool fZero, int iAlgorithm ) const { 00192 UNUSED_PARAMETER(PCLData); 00193 UNUSED_PARAMETER(PCLResults); 00194 UNUSED_PARAMETER(fZero); 00195 UNUSED_PARAMETER(iAlgorithm); 00196 00197 return false; } 00198 }; 00199 00200 #endif // NO_SMILE 00201 00202 #ifdef PNL_ENABLED 00203 00217 class CBayesNetPNL : public CBayesNetPNLImpl, public IBayesNet { 00218 public: 00219 CBayesNetPNL( bool fGroup = true ); 00220 00221 bool Open( const char* szFile ); 00222 bool Save( const char* szFile ) const; 00223 bool Learn( const IDataset* pDataset, size_t iIterations, bool fZero = false, bool fELR = false ); 00224 00225 void GetNodes( std::vector<std::string>& vecstrNodes ) const { 00226 UNUSED_PARAMETER(vecstrNodes); } 00227 00228 bool IsContinuous( size_t iNode ) const { 00229 UNUSED_PARAMETER(iNode); 00230 00231 return IsContinuous( ); } 00232 00233 bool IsContinuous( ) const { 00234 00235 return CBayesNetPNLImpl::IsContinuous( ); } 00236 00237 bool Evaluate( const IDataset* pDataset, std::vector<std::vector<float> >& vecvecdResults, 00238 bool fZero ) const { 00239 00240 return CBayesNetPNLImpl::Evaluate( pDataset, NULL, &vecvecdResults, fZero ); } 00241 00242 bool Evaluate( const IDataset* pDataset, CDat& DatResults, bool fZero ) const { 00243 00244 return CBayesNetPNLImpl::Evaluate( pDataset, &DatResults, NULL, fZero ); } 00245 00246 void Randomize( ) { } 00247 00248 void Randomize( size_t iNode ) { 00249 UNUSED_PARAMETER(iNode); } 00250 00251 void Reverse( size_t iNode ) { 00252 UNUSED_PARAMETER(iNode); } 00253 00254 virtual bool Evaluate( const std::vector<unsigned char>& vecbDatum, std::vector<float>& vecdResults, 00255 bool fZero = false, size_t iNode = 0, bool fIgnoreMissing = false ) const { 00256 00257 return false; } 00258 00259 unsigned char GetValues( size_t iNode ) const { 00260 UNUSED_PARAMETER(iNode); 00261 00262 return 0; } 00263 00264 bool GetCPT( size_t iNode, CDataMatrix& MatCPT ) const { 00265 UNUSED_PARAMETER(iNode); 00266 UNUSED_PARAMETER(MatCPT); 00267 00268 return false; } 00269 00270 bool Evaluate( const CPCLPair& PCLData, CPCL& PCLResults, bool fZero, int iAlgorithm ) const { 00271 UNUSED_PARAMETER(PCLData); 00272 UNUSED_PARAMETER(PCLResults); 00273 UNUSED_PARAMETER(fZero); 00274 UNUSED_PARAMETER(iAlgorithm); 00275 00276 return false; } 00277 }; 00278 00279 #endif // PNL_ENABLED 00280 00291 class CBayesNetMinimal : public CBayesNetMinimalImpl { 00292 public: 00293 #ifndef NO_SMILE 00294 bool Open( const CBayesNetSmile& BNSmile ); 00295 #endif // NO_SMILE 00296 bool Open( std::istream& istm ); 00297 bool OpenCounts( const char* szFileCounts, const std::map<std::string, size_t>& mapstriNodes, 00298 const std::vector<unsigned char>& vecbDefaults, const std::vector<float>& vecdAlphas, 00299 float dPseudocounts = HUGE_VAL, const CBayesNetMinimal* pBNDefault = NULL ); 00300 void Save( std::ostream& ostm ) const; 00301 float Evaluate( const std::vector<unsigned char>& vecbDatum, size_t iOffset = 0 ) const; 00302 bool Evaluate( const std::vector<unsigned char>& vecbData, float* adResults, size_t iGenes, 00303 size_t iStart = 0 ) const; 00304 float Regularize( std::vector<float>& vecdAlphas ) const; 00305 00320 const CDataMatrix& GetCPT( size_t iNode ) const { 00321 00322 return ( iNode ? m_vecNodes[ iNode - 1 ].m_MatCPT : m_MatRoot ); } 00323 00334 size_t GetNodes( ) const { 00335 00336 return ( m_vecNodes.size( ) + 1 ); } 00337 00348 void SetID( const std::string& strID ) { 00349 00350 m_strID = strID; } 00351 00362 const std::string& GetID( ) const { 00363 00364 return m_strID; } 00365 00379 const unsigned char GetDefault( size_t iNode ) const { 00380 00381 return ( iNode ? m_vecNodes[ iNode - 1 ].m_bDefault : 0xFF ); } 00382 }; 00383 00384 } 00385 00386 #endif // BAYESNET_H