Sleipnir
src/pst.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef PST_H
00023 #define PST_H
00024 
00025 #include "psti.h"
00026 #include "meta.h"
00027 
00028 namespace Sleipnir {
00029 
00055 class CPST : protected CPSTImpl {
00056 public:
00064     CPST( size_t iArity ) : CPSTImpl(iArity) { }
00065 
00066     void RemoveRCs( float dPenaltyGap, float dPenaltyMismatch, CPST& PSTOut ) const;
00067 
00083     void Add( const std::string& strSequence, const CPST& PST, int iOffset ) {
00084 
00085         if( iOffset < 0 ) {
00086             Add( strSequence );
00087             Add( PST, -iOffset ); }
00088         else {
00089             Add( PST );
00090             Add( strSequence, iOffset ); } }
00091 
00106     void Add( const std::string& strOne, const std::string& strTwo, int iOffset ) {
00107         size_t  i, j;
00108 
00109         if( iOffset < 0 )
00110             return Add( strTwo, strOne, -iOffset );
00111         i = CPSTImpl::Add( strOne, 0, m_sRoot );
00112         j = CPSTImpl::Add( strTwo, iOffset, m_sRoot );
00113         if( ( i = max( i, j ) ) > m_iDepth )
00114             m_iDepth = i; }
00115 
00127     void Add( const std::string& strSequence, size_t iOffset = 0 ) {
00128         size_t  i;
00129 
00130         if( ( i = CPSTImpl::Add( strSequence, iOffset, m_sRoot ) ) > GetDepth( ) )
00131             m_iDepth = i; }
00132 
00145     void Add( const CPST& PST, size_t iOffset = 0 ) {
00146         size_t  i;
00147 
00148         if( ( i = CPSTImpl::Add( PST.m_sRoot, iOffset, m_sRoot ) ) > GetDepth( ) )
00149             m_iDepth = i; }
00150 
00168     float GetMatch( const std::string& strTarget, size_t iOffset = 0 ) const {
00169         float   dPMatch;
00170         size_t  iMatched;
00171 
00172         iMatched = 0;
00173         return ( ( ( dPMatch = CPSTImpl::GetMatch( strTarget, m_sRoot, iOffset, iMatched ) ) && iMatched ) ?
00174             ( dPMatch * pow( 1.0f / m_iArity, (float)( GetDepth( ) - iMatched ) ) ) : 0 ); }
00175 
00183     size_t GetDepth( ) const {
00184 
00185         return m_iDepth; }
00186 
00207     std::string GetMotif( ) const {
00208 
00209         return CPSTImpl::GetMotif( m_sRoot ); }
00210 
00237     float Align( const std::string& strSequence, float dPenaltyGap, float dPenaltyMismatch, float dCutoff,
00238         int& iOffset ) const {
00239         float   dRet;
00240 
00241         dRet = CPSTImpl::Align( m_sRoot, GetDepth( ), strSequence, strSequence.length( ), dPenaltyGap,
00242             dPenaltyMismatch, dCutoff, iOffset );
00243         iOffset *= -1;
00244         return dRet; }
00245 
00272     float Align( const CPST& PST, float dPenaltyGap, float dPenaltyMismatch, float dCutoff,
00273         int& iOffset ) const {
00274 
00275         return CPSTImpl::Align( m_sRoot, GetDepth( ), PST.m_sRoot, PST.GetDepth( ), dPenaltyGap,
00276             dPenaltyMismatch, dCutoff, iOffset ); }
00277 
00291     bool Open( const std::string& strPST ) {
00292 
00293         return ( ( m_iDepth = CPSTImpl::Open( strPST, m_sRoot ) ) != -1 ); }
00294 
00313     bool GetPWM( CFullMatrix<uint16_t>& MatPWM, const char* szSymbols ) const {
00314         std::map<unsigned char, size_t>                 mapciChars;
00315         std::vector<size_t>                             veciOrder;
00316         std::map<unsigned char, size_t>::const_iterator iterChar;
00317         size_t                                          i, j;
00318 
00319         if( strlen( szSymbols ) != m_iArity )
00320             return false;
00321 
00322         MatPWM.Initialize( m_iArity, GetDepth( ) );
00323         MatPWM.Clear( );
00324         if( !CPSTImpl::GetPWM( m_sRoot, 0, mapciChars, MatPWM ) )
00325             return false;
00326 
00327         veciOrder.resize( MatPWM.GetRows( ) );
00328         for( i = j = 0; i < m_iArity; ++i )
00329             veciOrder[ i ] = ( ( iterChar = mapciChars.find( szSymbols[ i ] ) ) == mapciChars.end( ) ) ?
00330                 ( mapciChars.size( ) + j++ ) : iterChar->second;
00331         CMeta::Permute( MatPWM.Get( ), veciOrder );
00332         return true; }
00333 
00341     size_t Integrate( ) const {
00342         size_t  iRet;
00343 
00344         CPSTImpl::Integrate( m_sRoot, iRet = 0 );
00345         return iRet; }
00346 
00357     bool Simplify( ) {
00358 
00359         return CPSTImpl::Simplify( 1.0f / m_iArity, m_sRoot ); }
00360 };
00361 
00362 }
00363 
00364 #endif // PST_H