Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef COALESCECLUSTER_H 00023 #define COALESCECLUSTER_H 00024 00025 #include "coalesceclusteri.h" 00026 00027 namespace Sleipnir { 00028 00048 class CCoalesceCluster : public CCoalesceClusterImpl { 00049 public: 00050 bool Initialize( const CPCL& PCL, CCoalesceCluster& Pot, const std::vector<SCoalesceDataset>& vecsDatasets, 00051 std::set<std::pair<size_t, size_t> >& setpriiSeeds, const std::vector<float>& vecdSeed, size_t iPairs, float dPValue, 00052 float dProbability, size_t iThreads ); 00053 void Subtract( CPCL& PCL, const CCoalesceCluster& Pot ) const; 00054 void Subtract( CCoalesceGeneScores& GeneScores ) const; 00055 bool SelectConditions( const CPCL& PCL, const CCoalesceCluster& Pot, size_t iThreads, float dPValue, 00056 float dZScore ); 00057 bool SelectMotifs( const CCoalesceGroupHistograms& HistsCluster, const CCoalesceGroupHistograms& HistsPot, 00058 float dPValue, float dZScore, size_t iMaxMotifs, size_t iThreads, 00059 const CCoalesceMotifLibrary* pMotifs = NULL ); 00060 bool SelectGenes( const CPCL& PCL, const CCoalesceGeneScores& GeneScores, 00061 const CCoalesceGroupHistograms& HistsCluster, const CCoalesceGroupHistograms& HistsPot, 00062 size_t iMinimum, size_t iThreads, CCoalesceCluster& Pot, float dProbability, 00063 const CCoalesceMotifLibrary* pMotifs = NULL ); 00064 void CalculateHistograms( const CCoalesceGeneScores& GeneScores, 00065 CCoalesceGroupHistograms& HistogramsCluster, CCoalesceGroupHistograms* pHistogramsPot ) const; 00066 size_t Open( const std::string& strPCL, size_t iSkip, const CPCL& PCL, 00067 CCoalesceMotifLibrary* pMotifs = NULL ); 00068 size_t Open( std::istream& istm, const CPCL& PCL, CCoalesceMotifLibrary* pMotifs = NULL ); 00069 bool Open( const CHierarchy& Hierarchy, const std::vector<CCoalesceCluster>& vecClusters, 00070 const std::vector<std::string>& vecstrClusters, float dFraction, float dCutoff, size_t iCutoff, 00071 CCoalesceMotifLibrary* pMotifs = NULL ); 00072 bool Save( const std::string& strDirectory, size_t iID, const CPCL& PCL, 00073 const CCoalesceMotifLibrary* pMotifs = NULL ) const; 00074 void Save( std::ostream& ostm, size_t iID, const CPCL& PCL, const CCoalesceMotifLibrary* pMotifs = NULL, 00075 float dCutoffPWMs = 0, float dPenaltyGap = 0, float dPenaltyMismatch = 0, bool fNoRCs = false ) const; 00076 float GetSimilarity( const CCoalesceCluster& Cluster, size_t iGenes, size_t iDatasets ) const; 00077 void Snapshot( const CCoalesceGeneScores& GeneScores, CCoalesceGroupHistograms& Histograms ); 00078 bool LabelMotifs( const CCoalesceMotifLibrary& Motifs, SMotifMatch::EType eMatchType, float dPenaltyGap, 00079 float dPenaltyMismatch, float dPValue ); 00080 00093 bool IsConverged( ) { 00094 00095 return ( m_setiHistory.find( GetHash( ) ) != m_setiHistory.end( ) ); } 00096 00104 bool IsEmpty( ) const { 00105 00106 return ( m_setiGenes.empty( ) || m_setiDatasets.empty( ) ); } 00107 00118 void SetGenes( size_t iGenes ) { 00119 size_t i; 00120 00121 m_setiGenes.clear( ); 00122 for( i = 0; i < iGenes; ++i ) 00123 m_setiGenes.insert( i ); } 00124 00132 const std::set<size_t>& GetGenes( ) const { 00133 00134 return CCoalesceClusterImpl::GetGenes( ); } 00135 00143 const std::set<size_t>& GetDatasets( ) const { 00144 00145 return m_setiDatasets; } 00146 00154 const std::set<SMotifMatch>& GetMotifs( ) const { 00155 00156 return m_setsMotifs; } 00157 00168 bool IsGene( size_t iGene ) const { 00169 00170 return CCoalesceClusterImpl::IsGene( iGene ); } 00171 00182 bool IsDataset( size_t iDataset ) const { 00183 00184 return ( m_setiDatasets.find( iDataset ) != m_setiDatasets.end( ) ); } 00185 00193 void RemoveGenes( const std::vector<size_t>& veciGenes ) { 00194 size_t i; 00195 00196 for( i = 0; i < veciGenes.size( ); ++i ) 00197 m_setiGenes.erase( veciGenes[ i ] ); } 00198 }; 00199 00200 } 00201 00202 #endif // COALESCECLUSTER_H