Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef COALESCEMOTIFS_H 00023 #define COALESCEMOTIFS_H 00024 00025 #include "coalescemotifsi.h" 00026 00027 namespace Sleipnir { 00028 00029 struct SCoalesceModifierCache; 00030 struct SMotifMatch; 00031 00050 class CCoalesceMotifLibrary : CCoalesceMotifLibraryImpl { 00051 public: 00052 static bool Open( std::istream& istm, std::vector<SMotifMatch>& vecsMotifs, 00053 CCoalesceMotifLibrary* pMotifs = NULL ); 00054 00065 static std::string GetReverseComplement( const std::string& strKMer ) { 00066 00067 return CCoalesceMotifLibraryImpl::GetReverseComplement( strKMer ); } 00068 00076 CCoalesceMotifLibrary( size_t iK ) : CCoalesceMotifLibraryImpl( iK ) { } 00077 00078 float GetMatch( const std::string& strSequence, uint32_t iMotif, size_t iOffset, 00079 SCoalesceModifierCache& sModifiers ) const; 00080 uint32_t Open( const std::string& strMotif ); 00081 bool OpenKnown( std::istream& istm ); 00082 std::string GetPWM( uint32_t iMotif, float dCutoffPWMs, float dPenaltyGap, float dPenaltyMismatch, 00083 bool fNoRCs ) const; 00084 bool Simplify( uint32_t iMotif ) const; 00085 bool GetKnown( uint32_t iMotif, SMotifMatch::EType eMatchType, float dPenaltyGap, float dPenaltyMismatch, 00086 std::vector<std::pair<std::string, float> >& vecprstrdKnown, float dPValue = 1 ) const; 00087 00098 size_t GetKnowns( ) const { 00099 00100 return m_sKnowns.GetSize( ); } 00101 00119 std::string GetMotif( uint32_t iMotif ) const { 00120 00121 return CCoalesceMotifLibraryImpl::GetMotif( iMotif ); } 00122 00153 uint32_t Merge( uint32_t iOne, uint32_t iTwo, float dCutoff, bool fAllowDuplicates ) { 00154 std::pair<uint32_t, uint32_t> priiMerged; 00155 TMapPrIII::const_iterator iterMerged; 00156 uint32_t iRet; 00157 00158 if( iOne == iTwo ) 00159 return ( fAllowDuplicates ? iOne : -1 ); 00160 priiMerged.first = (uint32_t)min( iOne, iTwo ); 00161 priiMerged.second = max( iOne, iTwo ); 00162 if( ( iterMerged = m_mappriiiMerged.find( priiMerged ) ) != m_mappriiiMerged.end( ) ) 00163 return ( fAllowDuplicates ? iterMerged->second : -1 ); 00164 00165 switch( GetType( iOne ) ) { 00166 case ETypeRC: 00167 switch( GetType( iTwo ) ) { 00168 case ETypeKMer: 00169 iRet = MergeKMerRC( iTwo, iOne, dCutoff, fAllowDuplicates ); 00170 break; 00171 00172 case ETypeRC: 00173 iRet = MergeRCs( iOne, iTwo, dCutoff, fAllowDuplicates ); 00174 break; 00175 00176 case ETypePST: 00177 iRet = MergeRCPST( iOne, *GetPST( iTwo ), dCutoff, fAllowDuplicates ); 00178 break; } 00179 break; 00180 00181 case ETypePST: 00182 switch( GetType( iTwo ) ) { 00183 case ETypeKMer: 00184 iRet = MergeKMerPST( GetMotif( iTwo ), *GetPST( iOne ), dCutoff, fAllowDuplicates ); 00185 break; 00186 00187 case ETypeRC: 00188 iRet = MergeRCPST( iTwo, *GetPST( iOne ), dCutoff, fAllowDuplicates ); 00189 break; 00190 00191 case ETypePST: 00192 iRet = MergePSTs( *GetPST( iOne ), *GetPST( iTwo ), dCutoff, fAllowDuplicates ); 00193 break; } 00194 break; 00195 00196 case ETypeKMer: 00197 switch( GetType( iTwo ) ) { 00198 case ETypeRC: 00199 iRet = MergeKMerRC( iOne, iTwo, dCutoff, fAllowDuplicates ); 00200 break; 00201 00202 case ETypePST: 00203 iRet = MergeKMerPST( GetMotif( iOne ), *GetPST( iTwo ), dCutoff, fAllowDuplicates ); 00204 break; 00205 00206 case ETypeKMer: 00207 iRet = MergeKMers( GetMotif( iOne ), GetMotif( iTwo ), dCutoff, fAllowDuplicates ); 00208 break; } 00209 break; } 00210 00211 if( iRet != -1 ) 00212 m_mappriiiMerged[ priiMerged ] = iRet; 00213 return iRet; } 00214 00238 uint32_t RemoveRCs( uint32_t iMotif, float dPenaltyGap, float dPenaltyMismatch ) { 00239 00240 switch( GetType( iMotif ) ) { 00241 case ETypePST: 00242 return CCoalesceMotifLibraryImpl::RemoveRCs( *GetPST( iMotif ), dPenaltyGap, 00243 dPenaltyMismatch ); 00244 00245 case ETypeRC: 00246 return (uint32_t)m_veciRC2KMer[ iMotif - GetBaseRCs( ) ]; } 00247 00248 return iMotif; } 00249 00272 float Align( uint32_t iOne, uint32_t iTwo, float dCutoff ) { 00273 00274 switch( GetType( iOne ) ) { 00275 case ETypeRC: 00276 switch( GetType( iTwo ) ) { 00277 case ETypeKMer: 00278 return AlignKMerRC( GetMotif( iTwo ), iOne, dCutoff ); 00279 00280 case ETypeRC: 00281 return AlignRCs( iOne, iTwo, dCutoff ); 00282 00283 case ETypePST: 00284 return AlignRCPST( iOne, *GetPST( iTwo ), dCutoff ); } 00285 00286 case ETypePST: 00287 switch( GetType( iTwo ) ) { 00288 case ETypeKMer: 00289 return AlignKMerPST( GetMotif( iTwo ), *GetPST( iOne ), dCutoff ); 00290 00291 case ETypeRC: 00292 return AlignRCPST( iTwo, *GetPST( iOne ), dCutoff ); 00293 00294 case ETypePST: 00295 return AlignPSTs( *GetPST( iOne ), *GetPST( iTwo ), dCutoff ); } } 00296 00297 switch( GetType( iTwo ) ) { 00298 case ETypeRC: 00299 return AlignKMerRC( GetMotif( iOne ), iTwo, dCutoff ); 00300 00301 case ETypePST: 00302 return AlignKMerPST( GetMotif( iOne ), *GetPST( iTwo ), dCutoff ); } 00303 00304 return AlignKMers( GetMotif( iOne ), GetMotif( iTwo ), dCutoff ); } 00305 00318 size_t GetMotifs( ) const { 00319 00320 // kmers plus reverse complements plus psts 00321 return ( GetBasePSTs( ) + GetPSTs( ) ); } 00322 00330 size_t GetK( ) const { 00331 00332 return m_iK; } 00333 00355 bool GetMatches( const std::string& strKMer, std::vector<uint32_t>& veciMotifs ) const { 00356 uint32_t iMotif; 00357 size_t iRC; 00358 00359 if( IsIgnorableKMer( strKMer ) ) 00360 return true; 00361 // kmer 00362 if( ( iMotif = KMer2ID( strKMer ) ) == -1 ) 00363 return false; 00364 veciMotifs.push_back( iMotif ); 00365 // reverse complement 00366 if( ( iRC = m_veciKMer2RC[ iMotif ] ) != -1 ) 00367 veciMotifs.push_back( GetBaseRCs( ) + iRC ); 00368 return true; } 00369 00383 void SetPenaltyGap( float dPenalty ) { 00384 00385 m_dPenaltyGap = dPenalty; } 00386 00400 float GetPenaltyGap( ) const { 00401 00402 return m_dPenaltyGap; } 00403 00414 void SetPenaltyMismatch( float dPenalty ) { 00415 00416 m_dPenaltyMismatch = dPenalty; } 00417 00428 float GetPenaltyMismatch( ) const { 00429 00430 return m_dPenaltyMismatch; } 00431 00442 const CPST* GetPST( uint32_t iMotif ) const { 00443 00444 return ( ( GetType( iMotif ) == ETypePST ) ? CCoalesceMotifLibraryImpl::GetPST( iMotif ) : NULL ); } 00445 }; 00446 00447 } 00448 00449 #endif // COALESCEMOTIFS_H