Sleipnir
src/bayesnet.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef BAYESNET_H
00023 #define BAYESNET_H
00024 
00025 #include "bayesnetint.h"
00026 #include "bayesneti.h"
00027 
00028 namespace Sleipnir {
00029 
00030 class IDataset;
00031 
00032 #ifndef NO_SMILE
00033 
00053 class CBayesNetSmile : public CBayesNetSmileImpl, public IBayesNet {
00054 public:
00055     CBayesNetSmile( bool fGroup = true );
00056 
00057     bool Open( const std::vector<std::string>& vecstrFiles, size_t iValues );
00058 #ifdef PNL_ENABLED
00059     bool Convert( CBayesNetPNL& BNPNL ) const;
00060 #endif // PNL_ENABLED
00061     bool Open( const IDataset* pDataset, const std::vector<std::string>& vecstrNames,
00062         const std::vector<size_t>& veciDefaults );
00063     bool Open( const CBayesNetSmile& BNPrior, const std::vector<CBayesNetSmile*>& vecpBNs );
00064     bool Open( const CBayesNetMinimal& BNMinimal, const std::vector<std::string>& vecstrNames );
00065     float Evaluate( size_t iNode, unsigned char bValue ) const;
00066     unsigned char GetDefault( size_t iNode ) const;
00067 
00081     void SetDefault( const CBayesNetSmile& Defaults ) {
00082 
00083         m_pDefaults = &Defaults; }
00084 
00085     bool Learn( const IDataset* pDataset, size_t iIterations, bool fZero = false, bool fELR = false );
00086     bool Evaluate( const std::vector<unsigned char>& vecbDatum, std::vector<float>& vecdResults,
00087         bool fZero = false, size_t iNode = 0, bool fIgnoreMissing = false ) const;
00088     bool Evaluate( const CPCLPair& PCLData, CPCL& PCLResults, bool fZero = false,
00089         int iAlgorithm = DSL_ALG_BN_LAURITZEN ) const;
00090     void GetNodes( std::vector<std::string>& vecstrNodes ) const;
00091     void Randomize( );
00092     void Randomize( size_t iNode );
00093     void Reverse( size_t iNode );
00094 
00095     bool Open( const char* szFile ) {
00096 
00097         return ( m_fSmileNet = !m_SmileNet.ReadFile( szFile ) ); }
00098 
00099     bool Save( const char* szFile ) const {
00100 
00101         return ( m_fSmileNet ? !((CBayesNetSmile*)this)->m_SmileNet.WriteFile( szFile ) : false ); }
00102 
00103     bool GetCPT( size_t iNode, CDataMatrix& MatCPT ) const {
00104 
00105         return CBayesNetSmileImpl::GetCPT( m_SmileNet.GetNode( (int)iNode ), MatCPT ); }
00106 
00107     unsigned char GetValues( size_t iNode ) const {
00108 
00109         return m_SmileNet.GetNode( (int)iNode )->Definition( )->GetNumberOfOutcomes( ); }
00110 
00111     bool IsContinuous( size_t iNode ) const {
00112         UNUSED_PARAMETER(iNode);
00113 
00114         return IsContinuous( ); }
00115 
00116     bool IsContinuous( ) const {
00117 
00118         return CBayesNetSmileImpl::IsContinuous( ); }
00119 
00120     bool Evaluate( const IDataset* pDataset, std::vector<std::vector<float> >& vecvecdResults,
00121         bool fZero ) const {
00122 
00123         return CBayesNetSmileImpl::Evaluate( pDataset, NULL, &vecvecdResults, fZero ); }
00124 
00125     bool Evaluate( const IDataset* pDataset, CDat& DatResults, bool fZero ) const {
00126 
00127         return CBayesNetSmileImpl::Evaluate( pDataset, &DatResults, NULL, fZero ); }
00128 };
00129 
00149 class CBayesNetFN : CBayesNetFNImpl, public IBayesNet {
00150 public:
00151     bool Open( const char* szFile );
00152     bool Save( const char* szFile ) const;
00153     bool Learn( const IDataset* pDataset, size_t iIterations, bool fZero = false, bool fELR = false );
00154     bool Evaluate( const std::vector<unsigned char>& vecbDatum, std::vector<float>& vecdResults,
00155         bool fZero = false, size_t iNode = 0, bool fIgnoreMissing = false ) const;
00156     void GetNodes( std::vector<std::string>& vecstrNodes ) const;
00157     unsigned char GetValues( size_t iNode ) const;
00158     bool IsContinuous( ) const;
00159 
00160     bool Evaluate( const IDataset* pDataset, std::vector<std::vector<float> >& vecvecdResults,
00161         bool fZero ) const {
00162 
00163         return CBayesNetFNImpl::Evaluate( pDataset, NULL, &vecvecdResults, fZero ); }
00164 
00165     bool Evaluate( const IDataset* pDataset, CDat& DatResults, bool fZero ) const {
00166 
00167         return CBayesNetFNImpl::Evaluate( pDataset, &DatResults, NULL, fZero ); }
00168 
00169     bool IsContinuous( size_t iNode ) const {
00170 
00171         return m_apNodes[ iNode ]->IsContinuous( ); }
00172 
00173     void Randomize( ) {
00174         size_t  i;
00175 
00176         for( i = 0; i < m_iNodes; ++i )
00177             Randomize( i ); }
00178 
00179     void Randomize( size_t iNode ) {
00180 
00181         m_apNodes[ iNode ]->Randomize( ); }
00182 
00183     void Reverse( size_t iNode ) {
00184 
00185         m_apNodes[ iNode ]->Reverse( ); }
00186 
00187     bool GetCPT( size_t iNode, CDataMatrix& MatCPT ) const {
00188 
00189         return CBayesNetSmileImpl::GetCPT( m_SmileNet.GetNode( (int)iNode ), MatCPT ); }
00190 
00191     bool Evaluate( const CPCLPair& PCLData, CPCL& PCLResults, bool fZero, int iAlgorithm ) const {
00192         UNUSED_PARAMETER(PCLData);
00193         UNUSED_PARAMETER(PCLResults);
00194         UNUSED_PARAMETER(fZero);
00195         UNUSED_PARAMETER(iAlgorithm);
00196 
00197         return false; }
00198 };
00199 
00200 #endif // NO_SMILE
00201 
00202 #ifdef PNL_ENABLED
00203 
00217 class CBayesNetPNL : public CBayesNetPNLImpl, public IBayesNet {
00218 public:
00219     CBayesNetPNL( bool fGroup = true );
00220 
00221     bool Open( const char* szFile );
00222     bool Save( const char* szFile ) const;
00223     bool Learn( const IDataset* pDataset, size_t iIterations, bool fZero = false, bool fELR = false );
00224 
00225     void GetNodes( std::vector<std::string>& vecstrNodes ) const {
00226         UNUSED_PARAMETER(vecstrNodes); }
00227 
00228     bool IsContinuous( size_t iNode ) const {
00229         UNUSED_PARAMETER(iNode);
00230 
00231         return IsContinuous( ); }
00232 
00233     bool IsContinuous( ) const {
00234 
00235         return CBayesNetPNLImpl::IsContinuous( ); }
00236 
00237     bool Evaluate( const IDataset* pDataset, std::vector<std::vector<float> >& vecvecdResults,
00238         bool fZero ) const {
00239 
00240         return CBayesNetPNLImpl::Evaluate( pDataset, NULL, &vecvecdResults, fZero ); }
00241 
00242     bool Evaluate( const IDataset* pDataset, CDat& DatResults, bool fZero ) const {
00243 
00244         return CBayesNetPNLImpl::Evaluate( pDataset, &DatResults, NULL, fZero ); }
00245 
00246     void Randomize( ) { }
00247 
00248     void Randomize( size_t iNode ) {
00249         UNUSED_PARAMETER(iNode); }
00250 
00251     void Reverse( size_t iNode ) {
00252         UNUSED_PARAMETER(iNode); }
00253 
00254     virtual bool Evaluate( const std::vector<unsigned char>& vecbDatum, std::vector<float>& vecdResults,
00255         bool fZero = false, size_t iNode = 0, bool fIgnoreMissing = false ) const {
00256 
00257         return false; }
00258 
00259     unsigned char GetValues( size_t iNode ) const {
00260         UNUSED_PARAMETER(iNode);
00261 
00262         return 0; }
00263 
00264     bool GetCPT( size_t iNode, CDataMatrix& MatCPT ) const {
00265         UNUSED_PARAMETER(iNode);
00266         UNUSED_PARAMETER(MatCPT);
00267 
00268         return false; }
00269 
00270     bool Evaluate( const CPCLPair& PCLData, CPCL& PCLResults, bool fZero, int iAlgorithm ) const {
00271         UNUSED_PARAMETER(PCLData);
00272         UNUSED_PARAMETER(PCLResults);
00273         UNUSED_PARAMETER(fZero);
00274         UNUSED_PARAMETER(iAlgorithm);
00275 
00276         return false; }
00277 };
00278 
00279 #endif // PNL_ENABLED
00280 
00291 class CBayesNetMinimal : public CBayesNetMinimalImpl {
00292 public:
00293 #ifndef NO_SMILE
00294     bool Open( const CBayesNetSmile& BNSmile );
00295 #endif // NO_SMILE
00296     bool Open( std::istream& istm );
00297     bool OpenCounts( const char* szFileCounts, const std::map<std::string, size_t>& mapstriNodes,
00298         const std::vector<unsigned char>& vecbDefaults, const std::vector<float>& vecdAlphas,
00299         float dPseudocounts = HUGE_VAL, const CBayesNetMinimal* pBNDefault = NULL );
00300     void Save( std::ostream& ostm ) const;
00301     float Evaluate( const std::vector<unsigned char>& vecbDatum, size_t iOffset = 0 ) const;
00302     bool Evaluate( const std::vector<unsigned char>& vecbData, float* adResults, size_t iGenes,
00303         size_t iStart = 0 ) const;
00304     float Regularize( std::vector<float>& vecdAlphas ) const;
00305 
00320     const CDataMatrix& GetCPT( size_t iNode ) const {
00321 
00322         return ( iNode ? m_vecNodes[ iNode - 1 ].m_MatCPT : m_MatRoot ); }
00323 
00334     size_t GetNodes( ) const {
00335 
00336         return ( m_vecNodes.size( ) + 1 ); }
00337 
00348     void SetID( const std::string& strID ) {
00349 
00350         m_strID = strID; }
00351 
00362     const std::string& GetID( ) const {
00363 
00364         return m_strID; }
00365 
00379     const unsigned char GetDefault( size_t iNode ) const {
00380 
00381         return ( iNode ? m_vecNodes[ iNode - 1 ].m_bDefault : 0xFF ); }
00382 };
00383 
00384 }
00385 
00386 #endif // BAYESNET_H