Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef BAYESNETI_H 00023 #define BAYESNETI_H 00024 00025 #ifndef NO_SMILE 00026 #include <smile.h> 00027 #include <syscoord.h> 00028 #endif // NO_SMILE 00029 00030 #include "dataset.h" 00031 //#include "trie.h" 00032 00033 namespace pnl { 00034 00035 class CBNet; 00036 00037 } 00038 00039 namespace Sleipnir { 00040 00041 class CBayesNetPNL; 00042 class CBayesNetSmile; 00043 class CPCLPair; 00044 00045 class CBayesNetImpl { 00046 protected: 00047 static const size_t c_iMinimum = 25; 00048 static const char c_cMissing = '_'; 00049 static const char c_cBase = 'A'; 00050 static const char c_szFR[]; 00051 static const char c_szZero[]; 00052 00053 typedef std::vector<std::vector<float> > TVecVecD; 00054 typedef std::map<std::string, size_t> TMapData; 00055 // typedef CTrie<size_t> TTrieData; 00056 00057 CBayesNetImpl( bool ); 00058 00059 static void EncodeData( const IDataset*, TMapData& ); 00060 static std::string EncodeDatum( const IDataset*, size_t, size_t ); 00061 static std::string EncodeDatum( const CPCLPair&, size_t, const std::vector<size_t>& ); 00062 static void DecodeDatum( const std::string&, std::vector<size_t>& ); 00063 static bool IsAnswer( const std::string& ); 00064 00065 bool m_fGroup; 00066 }; 00067 00068 #ifndef NO_SMILE 00069 00070 class CBayesNetSmileImpl : protected CBayesNetImpl { 00071 protected: 00072 friend class CBayesNetFN; 00073 00074 static const char c_szGaussian[]; 00075 00076 static bool IsGaussian( const DSL_network& ); 00077 static bool IsNaive( const DSL_network& ); 00078 static float ELRDot( const TVecVecD&, const TVecVecD& ); 00079 static float ELRAvoidZero( float ); 00080 static void ELRComputeDirection( float, const TVecVecD&, TVecVecD& ); 00081 static bool GetCPT( DSL_node*, CDataMatrix& ); 00082 00083 CBayesNetSmileImpl( bool ); 00084 00085 bool ConvertGraph( CBayesNetPNL& ) const; 00086 bool ConvertCPTs( CBayesNetPNL& ) const; 00087 void LearnExpected( DSL_node*, DSL_Dmatrix*, size_t = 1 ); 00088 bool Evaluate( const IDataset*, CDat*, TVecVecD*, bool ) const; 00089 bool FillCPTs( const IDataset*, size_t, size_t, bool, bool ); 00090 bool FillCPTs( const std::vector<bool>&, const std::string&, bool, bool, bool = false ); 00091 bool FillCPTs( const std::vector<bool>&, const std::vector<unsigned char>&, bool, bool, bool = false ); 00092 bool LearnGrouped( const IDataset*, size_t, bool ); 00093 bool LearnUngrouped( const IDataset*, size_t, bool ); 00094 bool LearnNaive( const IDataset*, bool ); 00095 bool LearnELR( const IDataset*, size_t, bool ); 00096 size_t ELRCountParameters( ) const; 00097 void ELRCopyParameters( TVecVecD& ); 00098 void ELRComputeGradient( const std::vector<bool>&, const TMapData&, bool, TVecVecD& ); 00099 void ELRUpdateGradient( float, TVecVecD& ); 00100 void ELRNormalizeDirection( TVecVecD& ) const; 00101 float ELRLineSearch( const std::vector<bool>&, const TMapData&, const TVecVecD&, const TVecVecD&, TVecVecD&, 00102 float&, float&, bool ); 00103 float ELREvalFunction( const std::vector<bool>&, const TMapData&, float, const TVecVecD&, const TVecVecD&, 00104 TVecVecD&, bool ); 00105 void ELRBracket( const std::vector<bool>&, const TMapData&, const TVecVecD&, const TVecVecD&, TVecVecD&, 00106 float&, float&, float&, float&, float&, float&, bool ); 00107 float ELRConditionalLikelihood( const std::vector<bool>&, const TMapData&, bool ); 00108 float ELRBrent( const std::vector<bool>&, const TMapData&, const TVecVecD&, const TVecVecD&, TVecVecD&, 00109 float&, float&, float, float, float, float, bool ); 00110 00111 bool IsNaive( ) const { 00112 00113 return ( m_fSmileNet ? CBayesNetSmileImpl::IsNaive( m_SmileNet ) : false ); } 00114 00115 bool IsContinuous( ) const { 00116 00117 return ( m_fSmileNet ? IsGaussian( m_SmileNet ) : false ); } 00118 00119 bool m_fSmileNet; 00120 DSL_network m_SmileNet; 00121 const CBayesNetSmile* m_pDefaults; 00122 }; 00123 00124 #endif // NO_SMILE 00125 00126 class CBayesNetPNLImpl : protected CBayesNetImpl { 00127 protected: 00128 friend class CBayesNetSmileImpl; 00129 00130 static const char c_szBN[]; 00131 00132 CBayesNetPNLImpl( bool ); 00133 ~CBayesNetPNLImpl( ); 00134 00135 bool Evaluate( const IDataset*, CDat*, TVecVecD*, bool ) const; 00136 bool IsContinuous( ) const; 00137 00138 pnl::CBNet* m_pPNLNet; 00139 }; 00140 00141 } 00142 00143 #include "bayesnetfni.h" 00144 00145 #endif // BAYESNETI_H