Sleipnir
src/hmm.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef HMM_H
00023 #define HMM_H
00024 
00025 #include "hmmi.h"
00026 
00027 namespace Sleipnir {
00028 
00037 class CHMM : protected CHMMImpl {
00038 public:
00052     void Open( size_t iDegree, const std::string& strAlphabet ) {
00053 
00054         m_iDegree = iDegree;
00055         m_strAlphabet = strAlphabet;
00056         m_MatTransitions.Initialize( GetStates( ), GetSymbols( ) - 1 );
00057         m_MatTransitions.Clear( ); }
00058 
00066     void Save( std::ostream& ostm ) const {
00067         size_t  i, j;
00068 
00069         ostm << m_iDegree << endl;
00070         ostm << m_strAlphabet << endl;
00071         for( i = 0; i < m_MatTransitions.GetRows( ); ++i ) {
00072             ostm << Decode( i );
00073             for( j = 0; j < m_MatTransitions.GetColumns( ); ++j )
00074                 ostm << '\t' << m_MatTransitions.Get( i, j );
00075             ostm << endl; } }
00076 
00093     bool Add( const std::string& strData ) {
00094         size_t  i, iState;
00095 
00096         iState = 0;
00097         for( i = 0; i < strData.length( ); ++i ) {
00098             m_MatTransitions.Get( iState, Encode( strData[ i ] ) )++;
00099             iState = ( i < m_iDegree ) ? Encode( strData, i + 1 ) :
00100                 Encode( strData.substr( i - m_iDegree + 1, m_iDegree ), m_iDegree );
00101             if( iState == -1 )
00102                 return false; }
00103 
00104         return true; }
00105 
00120     std::string Get( size_t iLength ) const {
00121         std::string strRet;
00122         size_t      i, iState, iTotal, iCur;
00123 
00124         for( iState = 0; strRet.length( ) < iLength; ) {
00125             for( iTotal = i = 0; i < m_MatTransitions.GetColumns( ); ++i )
00126                 iTotal += m_MatTransitions.Get( iState, i );
00127             if( ( iCur = rand( ) ) == RAND_MAX )
00128                 iCur--;
00129             iCur = (size_t)( ( (float)iCur / RAND_MAX ) * iTotal );
00130             for( i = 0; ( i + 1 ) < m_MatTransitions.GetColumns( ); ++i ) {
00131                 iTotal -= m_MatTransitions.Get( iState, i );
00132                 if( iCur >= iTotal )
00133                     break; }
00134             strRet += m_strAlphabet[ i ];
00135             iState = ( ( iState * GetSymbols( ) ) + i + 1 ) % m_MatTransitions.GetRows( ); }
00136 
00137         return strRet; }
00138 
00143     void SetUniform( ) {
00144         size_t  i, j;
00145 
00146         for( i = 0; i < m_MatTransitions.GetRows( ); ++i )
00147             for( j = 0; j < m_MatTransitions.GetColumns( ); ++j )
00148                 m_MatTransitions.Set( i, j, 1 ); }
00149 };
00150 
00151 }
00152 
00153 #endif // HMM_H