Sleipnir
src/coalescemotifs.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef COALESCEMOTIFS_H
00023 #define COALESCEMOTIFS_H
00024 
00025 #include "coalescemotifsi.h"
00026 
00027 namespace Sleipnir {
00028 
00029 struct SCoalesceModifierCache;
00030 struct SMotifMatch;
00031 
00050 class CCoalesceMotifLibrary : CCoalesceMotifLibraryImpl {
00051 public:
00052     static bool Open( std::istream& istm, std::vector<SMotifMatch>& vecsMotifs,
00053         CCoalesceMotifLibrary* pMotifs = NULL );
00054 
00065     static std::string GetReverseComplement( const std::string& strKMer ) {
00066 
00067         return CCoalesceMotifLibraryImpl::GetReverseComplement( strKMer ); }
00068 
00076     CCoalesceMotifLibrary( size_t iK ) : CCoalesceMotifLibraryImpl( iK ) { }
00077 
00078     float GetMatch( const std::string& strSequence, uint32_t iMotif, size_t iOffset,
00079         SCoalesceModifierCache& sModifiers ) const;
00080     uint32_t Open( const std::string& strMotif );
00081     bool OpenKnown( std::istream& istm );
00082     std::string GetPWM( uint32_t iMotif, float dCutoffPWMs, float dPenaltyGap, float dPenaltyMismatch,
00083         bool fNoRCs ) const;
00084     bool Simplify( uint32_t iMotif ) const;
00085     bool GetKnown( uint32_t iMotif, SMotifMatch::EType eMatchType, float dPenaltyGap, float dPenaltyMismatch,
00086         std::vector<std::pair<std::string, float> >& vecprstrdKnown, float dPValue = 1 ) const;
00087 
00098     size_t GetKnowns( ) const {
00099 
00100         return m_sKnowns.GetSize( ); }
00101 
00119     std::string GetMotif( uint32_t iMotif ) const {
00120 
00121         return CCoalesceMotifLibraryImpl::GetMotif( iMotif ); }
00122 
00153     uint32_t Merge( uint32_t iOne, uint32_t iTwo, float dCutoff, bool fAllowDuplicates ) {
00154         std::pair<uint32_t, uint32_t>   priiMerged;
00155         TMapPrIII::const_iterator       iterMerged;
00156         uint32_t                        iRet;
00157 
00158         if( iOne == iTwo )
00159             return ( fAllowDuplicates ? iOne : -1 );
00160         priiMerged.first = (uint32_t)min( iOne, iTwo );
00161         priiMerged.second = max( iOne, iTwo );
00162         if( ( iterMerged = m_mappriiiMerged.find( priiMerged ) ) != m_mappriiiMerged.end( ) )
00163             return ( fAllowDuplicates ? iterMerged->second : -1 );
00164 
00165         switch( GetType( iOne ) ) {
00166             case ETypeRC:
00167                 switch( GetType( iTwo ) ) {
00168                     case ETypeKMer:
00169                         iRet = MergeKMerRC( iTwo, iOne, dCutoff, fAllowDuplicates );
00170                         break;
00171 
00172                     case ETypeRC:
00173                         iRet = MergeRCs( iOne, iTwo, dCutoff, fAllowDuplicates );
00174                         break;
00175 
00176                     case ETypePST:
00177                         iRet = MergeRCPST( iOne, *GetPST( iTwo ), dCutoff, fAllowDuplicates );
00178                         break; }
00179                 break;
00180 
00181             case ETypePST:
00182                 switch( GetType( iTwo ) ) {
00183                     case ETypeKMer:
00184                         iRet = MergeKMerPST( GetMotif( iTwo ), *GetPST( iOne ), dCutoff, fAllowDuplicates );
00185                         break;
00186 
00187                     case ETypeRC:
00188                         iRet = MergeRCPST( iTwo, *GetPST( iOne ), dCutoff, fAllowDuplicates );
00189                         break;
00190 
00191                     case ETypePST:
00192                         iRet = MergePSTs( *GetPST( iOne ), *GetPST( iTwo ), dCutoff, fAllowDuplicates );
00193                         break; }
00194                 break;
00195 
00196             case ETypeKMer:
00197                 switch( GetType( iTwo ) ) {
00198                     case ETypeRC:
00199                         iRet = MergeKMerRC( iOne, iTwo, dCutoff, fAllowDuplicates );
00200                         break;
00201 
00202                     case ETypePST:
00203                         iRet = MergeKMerPST( GetMotif( iOne ), *GetPST( iTwo ), dCutoff, fAllowDuplicates );
00204                         break;
00205 
00206                     case ETypeKMer:
00207                         iRet = MergeKMers( GetMotif( iOne ), GetMotif( iTwo ), dCutoff, fAllowDuplicates );
00208                         break; }
00209                 break; }
00210 
00211         if( iRet != -1 )
00212             m_mappriiiMerged[ priiMerged ] = iRet;
00213         return iRet; }
00214 
00238     uint32_t RemoveRCs( uint32_t iMotif, float dPenaltyGap, float dPenaltyMismatch ) {
00239 
00240         switch( GetType( iMotif ) ) {
00241             case ETypePST:
00242                 return CCoalesceMotifLibraryImpl::RemoveRCs( *GetPST( iMotif ), dPenaltyGap,
00243                     dPenaltyMismatch );
00244 
00245             case ETypeRC:
00246                 return (uint32_t)m_veciRC2KMer[ iMotif - GetBaseRCs( ) ]; }
00247 
00248         return iMotif; }
00249 
00272     float Align( uint32_t iOne, uint32_t iTwo, float dCutoff ) {
00273 
00274         switch( GetType( iOne ) ) {
00275             case ETypeRC:
00276                 switch( GetType( iTwo ) ) {
00277                     case ETypeKMer:
00278                         return AlignKMerRC( GetMotif( iTwo ), iOne, dCutoff );
00279 
00280                     case ETypeRC:
00281                         return AlignRCs( iOne, iTwo, dCutoff );
00282 
00283                     case ETypePST:
00284                         return AlignRCPST( iOne, *GetPST( iTwo ), dCutoff ); }
00285 
00286             case ETypePST:
00287                 switch( GetType( iTwo ) ) {
00288                     case ETypeKMer:
00289                         return AlignKMerPST( GetMotif( iTwo ), *GetPST( iOne ), dCutoff );
00290 
00291                     case ETypeRC:
00292                         return AlignRCPST( iTwo, *GetPST( iOne ), dCutoff );
00293 
00294                     case ETypePST:
00295                         return AlignPSTs( *GetPST( iOne ), *GetPST( iTwo ), dCutoff ); } }
00296 
00297         switch( GetType( iTwo ) ) {
00298             case ETypeRC:
00299                 return AlignKMerRC( GetMotif( iOne ), iTwo, dCutoff );
00300 
00301             case ETypePST:
00302                 return AlignKMerPST( GetMotif( iOne ), *GetPST( iTwo ), dCutoff ); }
00303 
00304         return AlignKMers( GetMotif( iOne ), GetMotif( iTwo ), dCutoff ); }
00305 
00318     size_t GetMotifs( ) const {
00319 
00320 // kmers plus reverse complements plus psts
00321         return ( GetBasePSTs( ) + GetPSTs( ) ); }
00322 
00330     size_t GetK( ) const {
00331 
00332         return m_iK; }
00333 
00355     bool GetMatches( const std::string& strKMer, std::vector<uint32_t>& veciMotifs ) const {
00356         uint32_t    iMotif;
00357         size_t      iRC;
00358 
00359         if( IsIgnorableKMer( strKMer ) )
00360             return true;
00361 // kmer
00362         if( ( iMotif = KMer2ID( strKMer ) ) == -1 )
00363             return false;
00364         veciMotifs.push_back( iMotif );
00365 // reverse complement
00366         if( ( iRC = m_veciKMer2RC[ iMotif ] ) != -1 )
00367             veciMotifs.push_back( GetBaseRCs( ) + iRC );
00368         return true; }
00369 
00383     void SetPenaltyGap( float dPenalty ) {
00384 
00385         m_dPenaltyGap = dPenalty; }
00386 
00400     float GetPenaltyGap( ) const {
00401 
00402         return m_dPenaltyGap; }
00403 
00414     void SetPenaltyMismatch( float dPenalty ) {
00415 
00416         m_dPenaltyMismatch = dPenalty; }
00417 
00428     float GetPenaltyMismatch( ) const {
00429 
00430         return m_dPenaltyMismatch; }
00431 
00442     const CPST* GetPST( uint32_t iMotif ) const {
00443 
00444         return ( ( GetType( iMotif ) == ETypePST ) ? CCoalesceMotifLibraryImpl::GetPST( iMotif ) : NULL ); }
00445 };
00446 
00447 }
00448 
00449 #endif // COALESCEMOTIFS_H