Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef HMM_H 00023 #define HMM_H 00024 00025 #include "hmmi.h" 00026 00027 namespace Sleipnir { 00028 00037 class CHMM : protected CHMMImpl { 00038 public: 00052 void Open( size_t iDegree, const std::string& strAlphabet ) { 00053 00054 m_iDegree = iDegree; 00055 m_strAlphabet = strAlphabet; 00056 m_MatTransitions.Initialize( GetStates( ), GetSymbols( ) - 1 ); 00057 m_MatTransitions.Clear( ); } 00058 00066 void Save( std::ostream& ostm ) const { 00067 size_t i, j; 00068 00069 ostm << m_iDegree << endl; 00070 ostm << m_strAlphabet << endl; 00071 for( i = 0; i < m_MatTransitions.GetRows( ); ++i ) { 00072 ostm << Decode( i ); 00073 for( j = 0; j < m_MatTransitions.GetColumns( ); ++j ) 00074 ostm << '\t' << m_MatTransitions.Get( i, j ); 00075 ostm << endl; } } 00076 00093 bool Add( const std::string& strData ) { 00094 size_t i, iState; 00095 00096 iState = 0; 00097 for( i = 0; i < strData.length( ); ++i ) { 00098 m_MatTransitions.Get( iState, Encode( strData[ i ] ) )++; 00099 iState = ( i < m_iDegree ) ? Encode( strData, i + 1 ) : 00100 Encode( strData.substr( i - m_iDegree + 1, m_iDegree ), m_iDegree ); 00101 if( iState == -1 ) 00102 return false; } 00103 00104 return true; } 00105 00120 std::string Get( size_t iLength ) const { 00121 std::string strRet; 00122 size_t i, iState, iTotal, iCur; 00123 00124 for( iState = 0; strRet.length( ) < iLength; ) { 00125 for( iTotal = i = 0; i < m_MatTransitions.GetColumns( ); ++i ) 00126 iTotal += m_MatTransitions.Get( iState, i ); 00127 if( ( iCur = rand( ) ) == RAND_MAX ) 00128 iCur--; 00129 iCur = (size_t)( ( (float)iCur / RAND_MAX ) * iTotal ); 00130 for( i = 0; ( i + 1 ) < m_MatTransitions.GetColumns( ); ++i ) { 00131 iTotal -= m_MatTransitions.Get( iState, i ); 00132 if( iCur >= iTotal ) 00133 break; } 00134 strRet += m_strAlphabet[ i ]; 00135 iState = ( ( iState * GetSymbols( ) ) + i + 1 ) % m_MatTransitions.GetRows( ); } 00136 00137 return strRet; } 00138 00143 void SetUniform( ) { 00144 size_t i, j; 00145 00146 for( i = 0; i < m_MatTransitions.GetRows( ); ++i ) 00147 for( j = 0; j < m_MatTransitions.GetColumns( ); ++j ) 00148 m_MatTransitions.Set( i, j, 1 ); } 00149 }; 00150 00151 } 00152 00153 #endif // HMM_H