Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef PST_H 00023 #define PST_H 00024 00025 #include "psti.h" 00026 #include "meta.h" 00027 00028 namespace Sleipnir { 00029 00055 class CPST : protected CPSTImpl { 00056 public: 00064 CPST( size_t iArity ) : CPSTImpl(iArity) { } 00065 00066 void RemoveRCs( float dPenaltyGap, float dPenaltyMismatch, CPST& PSTOut ) const; 00067 00083 void Add( const std::string& strSequence, const CPST& PST, int iOffset ) { 00084 00085 if( iOffset < 0 ) { 00086 Add( strSequence ); 00087 Add( PST, -iOffset ); } 00088 else { 00089 Add( PST ); 00090 Add( strSequence, iOffset ); } } 00091 00106 void Add( const std::string& strOne, const std::string& strTwo, int iOffset ) { 00107 size_t i, j; 00108 00109 if( iOffset < 0 ) 00110 return Add( strTwo, strOne, -iOffset ); 00111 i = CPSTImpl::Add( strOne, 0, m_sRoot ); 00112 j = CPSTImpl::Add( strTwo, iOffset, m_sRoot ); 00113 if( ( i = max( i, j ) ) > m_iDepth ) 00114 m_iDepth = i; } 00115 00127 void Add( const std::string& strSequence, size_t iOffset = 0 ) { 00128 size_t i; 00129 00130 if( ( i = CPSTImpl::Add( strSequence, iOffset, m_sRoot ) ) > GetDepth( ) ) 00131 m_iDepth = i; } 00132 00145 void Add( const CPST& PST, size_t iOffset = 0 ) { 00146 size_t i; 00147 00148 if( ( i = CPSTImpl::Add( PST.m_sRoot, iOffset, m_sRoot ) ) > GetDepth( ) ) 00149 m_iDepth = i; } 00150 00168 float GetMatch( const std::string& strTarget, size_t iOffset = 0 ) const { 00169 float dPMatch; 00170 size_t iMatched; 00171 00172 iMatched = 0; 00173 return ( ( ( dPMatch = CPSTImpl::GetMatch( strTarget, m_sRoot, iOffset, iMatched ) ) && iMatched ) ? 00174 ( dPMatch * pow( 1.0f / m_iArity, (float)( GetDepth( ) - iMatched ) ) ) : 0 ); } 00175 00183 size_t GetDepth( ) const { 00184 00185 return m_iDepth; } 00186 00207 std::string GetMotif( ) const { 00208 00209 return CPSTImpl::GetMotif( m_sRoot ); } 00210 00237 float Align( const std::string& strSequence, float dPenaltyGap, float dPenaltyMismatch, float dCutoff, 00238 int& iOffset ) const { 00239 float dRet; 00240 00241 dRet = CPSTImpl::Align( m_sRoot, GetDepth( ), strSequence, strSequence.length( ), dPenaltyGap, 00242 dPenaltyMismatch, dCutoff, iOffset ); 00243 iOffset *= -1; 00244 return dRet; } 00245 00272 float Align( const CPST& PST, float dPenaltyGap, float dPenaltyMismatch, float dCutoff, 00273 int& iOffset ) const { 00274 00275 return CPSTImpl::Align( m_sRoot, GetDepth( ), PST.m_sRoot, PST.GetDepth( ), dPenaltyGap, 00276 dPenaltyMismatch, dCutoff, iOffset ); } 00277 00291 bool Open( const std::string& strPST ) { 00292 00293 return ( ( m_iDepth = CPSTImpl::Open( strPST, m_sRoot ) ) != -1 ); } 00294 00313 bool GetPWM( CFullMatrix<uint16_t>& MatPWM, const char* szSymbols ) const { 00314 std::map<unsigned char, size_t> mapciChars; 00315 std::vector<size_t> veciOrder; 00316 std::map<unsigned char, size_t>::const_iterator iterChar; 00317 size_t i, j; 00318 00319 if( strlen( szSymbols ) != m_iArity ) 00320 return false; 00321 00322 MatPWM.Initialize( m_iArity, GetDepth( ) ); 00323 MatPWM.Clear( ); 00324 if( !CPSTImpl::GetPWM( m_sRoot, 0, mapciChars, MatPWM ) ) 00325 return false; 00326 00327 veciOrder.resize( MatPWM.GetRows( ) ); 00328 for( i = j = 0; i < m_iArity; ++i ) 00329 veciOrder[ i ] = ( ( iterChar = mapciChars.find( szSymbols[ i ] ) ) == mapciChars.end( ) ) ? 00330 ( mapciChars.size( ) + j++ ) : iterChar->second; 00331 CMeta::Permute( MatPWM.Get( ), veciOrder ); 00332 return true; } 00333 00341 size_t Integrate( ) const { 00342 size_t iRet; 00343 00344 CPSTImpl::Integrate( m_sRoot, iRet = 0 ); 00345 return iRet; } 00346 00357 bool Simplify( ) { 00358 00359 return CPSTImpl::Simplify( 1.0f / m_iArity, m_sRoot ); } 00360 }; 00361 00362 } 00363 00364 #endif // PST_H