Sleipnir
src/bayesneti.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef BAYESNETI_H
00023 #define BAYESNETI_H
00024 
00025 #ifndef NO_SMILE
00026 #include <smile.h>
00027 #include <syscoord.h>
00028 #endif // NO_SMILE
00029 
00030 #include "dataset.h"
00031 //#include "trie.h"
00032 
00033 namespace pnl {
00034 
00035 class CBNet;
00036 
00037 }
00038 
00039 namespace Sleipnir {
00040 
00041 class CBayesNetPNL;
00042 class CBayesNetSmile;
00043 class CPCLPair;
00044 
00045 class CBayesNetImpl {
00046 protected:
00047     static const size_t c_iMinimum  = 25;
00048     static const char   c_cMissing  = '_';
00049     static const char   c_cBase     = 'A';
00050     static const char   c_szFR[];
00051     static const char   c_szZero[];
00052 
00053     typedef std::vector<std::vector<float> >    TVecVecD;
00054     typedef std::map<std::string, size_t>       TMapData;
00055 //  typedef CTrie<size_t>                       TTrieData;
00056 
00057     CBayesNetImpl( bool );
00058 
00059     static void EncodeData( const IDataset*, TMapData& );
00060     static std::string EncodeDatum( const IDataset*, size_t, size_t );
00061     static std::string EncodeDatum( const CPCLPair&, size_t, const std::vector<size_t>& );
00062     static void DecodeDatum( const std::string&, std::vector<size_t>& );
00063     static bool IsAnswer( const std::string& );
00064 
00065     bool    m_fGroup;
00066 };
00067 
00068 #ifndef NO_SMILE
00069 
00070 class CBayesNetSmileImpl : protected CBayesNetImpl {
00071 protected:
00072     friend class CBayesNetFN;
00073 
00074     static const char   c_szGaussian[];
00075 
00076     static bool IsGaussian( const DSL_network& );
00077     static bool IsNaive( const DSL_network& );
00078     static float ELRDot( const TVecVecD&, const TVecVecD& );
00079     static float ELRAvoidZero( float );
00080     static void ELRComputeDirection( float, const TVecVecD&, TVecVecD& );
00081     static bool GetCPT( DSL_node*, CDataMatrix& );
00082 
00083     CBayesNetSmileImpl( bool );
00084 
00085     bool ConvertGraph( CBayesNetPNL& ) const;
00086     bool ConvertCPTs( CBayesNetPNL& ) const;
00087     void LearnExpected( DSL_node*, DSL_Dmatrix*, size_t = 1 );
00088     bool Evaluate( const IDataset*, CDat*, TVecVecD*, bool ) const;
00089     bool FillCPTs( const IDataset*, size_t, size_t, bool, bool );
00090     bool FillCPTs( const std::vector<bool>&, const std::string&, bool, bool, bool = false );
00091     bool FillCPTs( const std::vector<bool>&, const std::vector<unsigned char>&, bool, bool, bool = false );
00092     bool LearnGrouped( const IDataset*, size_t, bool );
00093     bool LearnUngrouped( const IDataset*, size_t, bool );
00094     bool LearnNaive( const IDataset*, bool );
00095     bool LearnELR( const IDataset*, size_t, bool );
00096     size_t ELRCountParameters( ) const;
00097     void ELRCopyParameters( TVecVecD& );
00098     void ELRComputeGradient( const std::vector<bool>&, const TMapData&, bool, TVecVecD& );
00099     void ELRUpdateGradient( float, TVecVecD& );
00100     void ELRNormalizeDirection( TVecVecD& ) const;
00101     float ELRLineSearch( const std::vector<bool>&, const TMapData&, const TVecVecD&, const TVecVecD&, TVecVecD&,
00102         float&, float&, bool );
00103     float ELREvalFunction( const std::vector<bool>&, const TMapData&, float, const TVecVecD&, const TVecVecD&,
00104         TVecVecD&, bool );
00105     void ELRBracket( const std::vector<bool>&, const TMapData&, const TVecVecD&, const TVecVecD&, TVecVecD&,
00106         float&, float&, float&, float&, float&, float&, bool );
00107     float ELRConditionalLikelihood( const std::vector<bool>&, const TMapData&, bool );
00108     float ELRBrent( const std::vector<bool>&, const TMapData&, const TVecVecD&, const TVecVecD&, TVecVecD&,
00109         float&, float&, float, float, float, float, bool );
00110 
00111     bool IsNaive( ) const {
00112 
00113         return ( m_fSmileNet ? CBayesNetSmileImpl::IsNaive( m_SmileNet ) : false ); }
00114 
00115     bool IsContinuous( ) const {
00116 
00117         return ( m_fSmileNet ? IsGaussian( m_SmileNet ) : false ); }
00118 
00119     bool                    m_fSmileNet;
00120     DSL_network             m_SmileNet;
00121     const CBayesNetSmile*   m_pDefaults;
00122 };
00123 
00124 #endif // NO_SMILE
00125 
00126 class CBayesNetPNLImpl : protected CBayesNetImpl {
00127 protected:
00128     friend class CBayesNetSmileImpl;
00129 
00130     static const char   c_szBN[];
00131 
00132     CBayesNetPNLImpl( bool );
00133     ~CBayesNetPNLImpl( );
00134 
00135     bool Evaluate( const IDataset*, CDat*, TVecVecD*, bool ) const;
00136     bool IsContinuous( ) const;
00137 
00138     pnl::CBNet* m_pPNLNet;
00139 };
00140 
00141 }
00142 
00143 #include "bayesnetfni.h"
00144 
00145 #endif // BAYESNETI_H