Sleipnir
src/hmmi.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef HMMI_H
00023 #define HMMI_H
00024 
00025 #include "fullmatrix.h"
00026 
00027 #include <cstdlib>
00028 
00029 namespace Sleipnir {
00030 
00031 class CHMMImpl {
00032 protected:
00033     size_t GetStates( ) const {
00034         size_t  i, iRet;
00035 
00036         iRet = 1;
00037         for( i = 0; i < m_iDegree; ++i )
00038             iRet *= GetSymbols( );
00039 
00040         return iRet; }
00041 
00042     size_t GetSymbols( ) const {
00043 
00044         return ( m_strAlphabet.length( ) + 1 ); }
00045 
00046     size_t Encode( const std::string& strData, size_t iCount ) const {
00047         size_t  i, iRet;
00048 
00049         iRet = 0;
00050         for( i = 0; ( i < iCount ) && ( i < strData.size( ) ); ++i )
00051             iRet = ( iRet * GetSymbols( ) ) + Encode( strData[ i ] ) + 1;
00052 
00053         return iRet; }
00054 
00055     std::string Decode( size_t iState ) const {
00056         std::string strRet;
00057         size_t      i, iCur;
00058 
00059         for( i = 0; i < m_iDegree; ++i ) {
00060             iCur = iState % GetSymbols( );
00061             strRet = ( iCur ? m_strAlphabet[ iCur - 1 ] : '_' ) + strRet;
00062             iState = ( iState - iCur ) / GetSymbols( ); }
00063 
00064         return strRet; }
00065 
00066     size_t Encode( char cDatum ) const {
00067         size_t  i;
00068 
00069         for( i = 0; i < m_strAlphabet.length( ); ++i )
00070             if( cDatum == m_strAlphabet[ i ] )
00071                 return i;
00072 
00073 // I can't think of a better way to handle all exception cases...
00074         return ( rand( ) % m_strAlphabet.length( ) ); }
00075 
00076     std::string         m_strAlphabet;
00077     size_t              m_iDegree;
00078     CFullMatrix<size_t> m_MatTransitions;
00079 };
00080 
00081 }
00082 
00083 #endif // HMMI_H