Sleipnir
src/coalescecluster.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef COALESCECLUSTER_H
00023 #define COALESCECLUSTER_H
00024 
00025 #include "coalesceclusteri.h"
00026 
00027 namespace Sleipnir {
00028 
00048 class CCoalesceCluster : public CCoalesceClusterImpl {
00049 public:
00050     bool Initialize( const CPCL& PCL, CCoalesceCluster& Pot, const std::vector<SCoalesceDataset>& vecsDatasets,
00051         std::set<std::pair<size_t, size_t> >& setpriiSeeds, const std::vector<float>& vecdSeed, size_t iPairs, float dPValue,
00052         float dProbability, size_t iThreads );
00053     void Subtract( CPCL& PCL, const CCoalesceCluster& Pot ) const;
00054     void Subtract( CCoalesceGeneScores& GeneScores ) const;
00055     bool SelectConditions( const CPCL& PCL, const CCoalesceCluster& Pot, size_t iThreads, float dPValue,
00056         float dZScore );
00057     bool SelectMotifs( const CCoalesceGroupHistograms& HistsCluster, const CCoalesceGroupHistograms& HistsPot,
00058         float dPValue, float dZScore, size_t iMaxMotifs, size_t iThreads,
00059         const CCoalesceMotifLibrary* pMotifs = NULL );
00060     bool SelectGenes( const CPCL& PCL, const CCoalesceGeneScores& GeneScores,
00061         const CCoalesceGroupHistograms& HistsCluster, const CCoalesceGroupHistograms& HistsPot,
00062         size_t iMinimum, size_t iThreads, CCoalesceCluster& Pot, float dProbability,
00063         const CCoalesceMotifLibrary* pMotifs = NULL );
00064     void CalculateHistograms( const CCoalesceGeneScores& GeneScores,
00065         CCoalesceGroupHistograms& HistogramsCluster, CCoalesceGroupHistograms* pHistogramsPot ) const;
00066     size_t Open( const std::string& strPCL, size_t iSkip, const CPCL& PCL,
00067         CCoalesceMotifLibrary* pMotifs = NULL );
00068     size_t Open( std::istream& istm, const CPCL& PCL, CCoalesceMotifLibrary* pMotifs = NULL );
00069     bool Open( const CHierarchy& Hierarchy, const std::vector<CCoalesceCluster>& vecClusters,
00070         const std::vector<std::string>& vecstrClusters, float dFraction, float dCutoff, size_t iCutoff,
00071         CCoalesceMotifLibrary* pMotifs = NULL );
00072     bool Save( const std::string& strDirectory, size_t iID, const CPCL& PCL,
00073         const CCoalesceMotifLibrary* pMotifs = NULL ) const;
00074     void Save( std::ostream& ostm, size_t iID, const CPCL& PCL, const CCoalesceMotifLibrary* pMotifs = NULL,
00075         float dCutoffPWMs = 0, float dPenaltyGap = 0, float dPenaltyMismatch = 0, bool fNoRCs = false ) const;
00076     float GetSimilarity( const CCoalesceCluster& Cluster, size_t iGenes, size_t iDatasets ) const;
00077     void Snapshot( const CCoalesceGeneScores& GeneScores, CCoalesceGroupHistograms& Histograms );
00078     bool LabelMotifs( const CCoalesceMotifLibrary& Motifs, SMotifMatch::EType eMatchType, float dPenaltyGap,
00079         float dPenaltyMismatch, float dPValue );
00080 
00093     bool IsConverged( ) {
00094 
00095         return ( m_setiHistory.find( GetHash( ) ) != m_setiHistory.end( ) ); }
00096 
00104     bool IsEmpty( ) const {
00105 
00106         return ( m_setiGenes.empty( ) || m_setiDatasets.empty( ) ); }
00107 
00118     void SetGenes( size_t iGenes ) {
00119         size_t  i;
00120 
00121         m_setiGenes.clear( );
00122         for( i = 0; i < iGenes; ++i )
00123             m_setiGenes.insert( i ); }
00124 
00132     const std::set<size_t>& GetGenes( ) const {
00133 
00134         return CCoalesceClusterImpl::GetGenes( ); }
00135 
00143     const std::set<size_t>& GetDatasets( ) const {
00144 
00145         return m_setiDatasets; }
00146 
00154     const std::set<SMotifMatch>& GetMotifs( ) const {
00155 
00156         return m_setsMotifs; }
00157 
00168     bool IsGene( size_t iGene ) const {
00169 
00170         return CCoalesceClusterImpl::IsGene( iGene ); }
00171 
00182     bool IsDataset( size_t iDataset ) const {
00183 
00184         return ( m_setiDatasets.find( iDataset ) != m_setiDatasets.end( ) ); }
00185 
00193     void RemoveGenes( const std::vector<size_t>& veciGenes ) {
00194         size_t  i;
00195 
00196         for( i = 0; i < veciGenes.size( ); ++i )
00197             m_setiGenes.erase( veciGenes[ i ] ); }
00198 };
00199 
00200 }
00201 
00202 #endif // COALESCECLUSTER_H