Sleipnir
src/coalescei.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef COALESCEI_H
00023 #define COALESCEI_H
00024 
00025 #include <algorithm>
00026 #include <iostream>
00027 #include <set>
00028 #include <sstream>
00029 
00030 #include "coalescebasei.h"
00031 #include "statistics.h"
00032 
00033 namespace Sleipnir {
00034 
00035 class CCoalesceCluster;
00036 class CCoalesceGeneScores;
00037 class CCoalesceGroupHistograms;
00038 class CCoalesceMotifLibrary;
00039 class CFASTA;
00040 class CPCL;
00041 struct SCoalesceDataset;
00042 struct SCoalesceModifierCache;
00043 struct SCoalesceModifiers;
00044 struct SFASTASequence;
00045 struct SMotifMatch;
00046 
00047 template<class tValue = float, class tCount = unsigned short>
00048 class CCoalesceHistogramSet {
00049 public:
00050     double ZScore( size_t iMember, const CCoalesceHistogramSet& HistSet, double& dAveOne, double& dAverage,
00051         double& dZ, bool fCount = true ) const {
00052 
00053         return ZScore( iMember, HistSet, iMember, fCount, dAveOne, dAverage, dZ ); }
00054 
00055     double ZScore( size_t iOne, const CCoalesceHistogramSet& HistSet, size_t iTwo, bool fCount,
00056         double& dAveOne, double& dAverage, double& dZ ) const {
00057         static const double c_dEpsilon  = 1e-6;
00058         tValue  AveOne, VarOne, AveTwo, VarTwo;
00059         double  dStd;
00060         size_t  iTotal;
00061 
00062         if( !GetEdges( ) || ( GetEdges( ) != HistSet.GetEdges( ) ) )
00063             return 1;
00064 
00065         Sums( iOne, AveOne, VarOne );
00066         HistSet.Sums( iTwo, AveTwo, VarTwo );
00067         iTotal = GetTotal( ) + HistSet.GetTotal( );
00068         dAverage = (double)( AveOne + AveTwo ) / iTotal;
00069         dAveOne = (double)AveOne / GetTotal( );
00070         dStd = sqrt( ( (double)( VarOne + VarTwo ) / iTotal ) - ( dAverage * dAverage ) );
00071 
00072         dZ = ( dAveOne - dAverage ) / dStd;
00073         return ( ( dStd > c_dEpsilon ) ? ( 2 * CStatistics::ZTest( dZ, fCount ?
00074             min( GetTotal( ), HistSet.GetTotal( ) ) : 1 ) ) :
00075             ( ( fabs( dAveOne - dAverage ) < c_dEpsilon ) ? 0 : 1 ) ); }
00076 
00077     double CohensD( size_t iMember, const CCoalesceHistogramSet& HistSet, double& dAveOne, double& dAverage,
00078         double& dZ, bool fCount = true ) const {
00079 
00080         return CohensD( iMember, HistSet, iMember, fCount, dAveOne, dAverage, dZ ); }
00081 
00082     double CohensD( size_t iOne, const CCoalesceHistogramSet& HistSet, size_t iTwo, bool fCount,
00083         double& dAveOne, double& dAverage, double& dZ ) const {
00084         static const double c_dEpsilon  = 1e-6;
00085         tValue  AveOne, VarOne, AveTwo, VarTwo;
00086         double  dAveTwo, dVarOne, dVarTwo, dStd;
00087 
00088         if( !GetEdges( ) || ( GetEdges( ) != HistSet.GetEdges( ) ) )
00089             return 1;
00090 
00091         Sums( iOne, AveOne, VarOne );
00092         HistSet.Sums( iTwo, AveTwo, VarTwo );
00093         dAverage = (double)( AveOne + AveTwo ) / ( GetTotal( ) + HistSet.GetTotal( ) );
00094         dAveOne = (double)AveOne / GetTotal( );
00095         dAveTwo = (double)AveTwo / HistSet.GetTotal( );
00096         dVarOne = max( 0.0, ( (double)VarOne / GetTotal( ) ) - ( dAveOne * dAveOne ) );
00097         dVarTwo = max( 0.0, ( (double)VarTwo / HistSet.GetTotal( ) ) - ( dAveTwo * dAveTwo ) );
00098         dStd = sqrt( ( dVarOne + dVarTwo ) / 2 );
00099 
00100         dZ = dStd ? ( ( dAveOne - dAveTwo ) / dStd ) : 0;
00101 // dZ *= 1 - pow( (float)GetTotal( ) / ( GetTotal( ) + HistSet.GetTotal( ) ), 1 ); // doesn't work
00102 // dZ *= exp( -(float)GetTotal( ) / ( GetTotal( ) + HistSet.GetTotal( ) ) ); // doesn't work
00103 // dZ *= exp( -(float)GetTotal( ) / HistSet.GetTotal( ) ); // works pretty well
00104 // dZ *= fabs( (float)( GetTotal( ) - HistSet.GetTotal( ) ) ) / max( GetTotal( ), HistSet.GetTotal( ) ); // works pretty well
00105 // This prevents large clusters from blowing up the motif set
00106 /*
00107         if( iOne == iTwo )
00108             dZ *= pow( fabs( (float)( GetTotal( ) - HistSet.GetTotal( ) ) ) /
00109                 max( GetTotal( ), HistSet.GetTotal( ) ), max( 1.0f,
00110                 log10f( (float)min( GetTotal( ), HistSet.GetTotal( ) ) ) ) );
00111 //*/
00112 
00113         return ( ( dStd > c_dEpsilon ) ? ( 2 * CStatistics::ZTest( dZ, fCount ?
00114             min( GetTotal( ), HistSet.GetTotal( ) ) : 1 ) ) :
00115             ( ( fabs( dAveOne - dAveTwo ) < c_dEpsilon ) ? 0 : 1 ) ); }
00116 
00117     void Initialize( size_t iMembers, const std::vector<tValue>& vecEdges ) {
00118 
00119         m_vecEdges.resize( vecEdges.size( ) );
00120         std::copy( vecEdges.begin( ), vecEdges.end( ), m_vecEdges.begin( ) );
00121         m_iZero = GetBin( 0 );
00122         SetMembers( iMembers ); }
00123 
00124     void Initialize( size_t iMembers, size_t iBins, tValue Step ) {
00125         std::vector<tValue> vecEdges;
00126         size_t              i;
00127 
00128         vecEdges.resize( iBins );
00129         for( i = 0; i < vecEdges.size( ); ++i )
00130             vecEdges[ i ] = i * Step;
00131 
00132         Initialize( iMembers, vecEdges ); }
00133 
00134     bool Add( size_t iMember, tValue Value, tCount Count ) {
00135         size_t  iBin;
00136 
00137 // lock
00138         if( m_vecEdges.empty( ) )
00139 // unlock
00140             return false;
00141 
00142         SetMembers( max( GetMembers( ), iMember + 1 ) );
00143         if( ( iBin = GetBin( Value ) ) != m_iZero ) {
00144             m_vecCounts[ GetOffset( iMember ) + iBin ] += Count;
00145             m_vecTotal[ iMember ] += Count; }
00146 // unlock
00147 
00148         return true; }
00149 
00150     tCount Integrate( size_t iMember, tValue Value, bool fUp ) const {
00151         tCount  Ret;
00152         size_t  iBin;
00153 
00154         for( Ret = 0,iBin = GetBin( Value ); iBin < GetEdges( ); iBin += ( fUp ? 1 : -1 ) )
00155             Ret += Get( iMember, iBin );
00156 
00157         return Ret; }
00158 
00159     tCount Get( size_t iMember, size_t iBin ) const {
00160 
00161         if( iBin >= GetEdges( ) )
00162             return 0;
00163         if( iBin == m_iZero )
00164             return ( GetTotal( ) - ( ( iMember < GetMembers( ) ) ? m_vecTotal[ iMember ] : 0 ) );
00165 
00166         return ( ( iMember < GetMembers( ) ) ? m_vecCounts[ GetOffset( iMember ) + iBin ] : 0 ); }
00167 
00168     tCount Get( size_t iMember, tValue Value ) const {
00169 
00170         return Get( iMember, GetBin( Value ) ); }
00171 
00172     size_t GetBin( tValue Value ) const {
00173         size_t  i;
00174 
00175         if( !GetEdges( ) )
00176             return -1;
00177         for( i = 0; i < GetEdges( ); ++i )
00178             if( Value <= GetEdge( i ) )
00179                 break;
00180 
00181         return min( i, GetEdges( ) - 1 ); }
00182 
00183     size_t GetMembers( ) const {
00184 
00185         return m_vecTotal.size( ); }
00186 
00187     size_t GetEdges( ) const {
00188 
00189         return m_vecEdges.size( ); }
00190 
00191     tValue GetEdge( size_t iBin ) const {
00192 
00193         return m_vecEdges[ iBin ]; }
00194 
00195     std::string Save( size_t iMember ) const {
00196         std::ostringstream  ossm;
00197         size_t              i;
00198 
00199         if( GetEdges( ) ) {
00200             ossm << (size_t)GetTotal( ) << ':';
00201             for( i = 0; i < GetEdges( ); ++i )
00202                 ossm << '\t' << (size_t)Get( iMember, i ); }
00203 
00204         return ossm.str( ); }
00205 
00206     void Clear( ) {
00207 
00208         fill( m_vecCounts.begin( ), m_vecCounts.end( ), 0 );
00209         fill( m_vecTotal.begin( ), m_vecTotal.end( ), 0 ); }
00210 
00211     tCount GetTotal( ) const {
00212 
00213         return m_Total; }
00214 
00215     void SetTotal( tCount Total ) {
00216 
00217         m_Total = Total; }
00218 
00219     const std::vector<tValue>& GetBins( ) const {
00220 
00221         return m_vecEdges; }
00222 
00223 protected:
00224     void SetMembers( size_t iMembers ) {
00225 
00226         m_vecTotal.resize( iMembers );
00227         m_vecCounts.resize( GetOffset( GetMembers( ) ) ); }
00228 
00229     size_t GetOffset( size_t iMember ) const {
00230 
00231         return ( iMember * GetEdges( ) ); }
00232 
00233     void Sums( size_t iMember, tValue& Sum, tValue& SumSq ) const {
00234         size_t  i;
00235         tValue  Cur, BinLow, BinHigh;
00236 
00237         for( SumSq = 0,i = 0; i < GetEdges( ); ++i ) {
00238             BinLow = i ? BinHigh : ( GetEdge( i ) + GetEdge( i ) - GetEdge( i + 1 ) );
00239             BinHigh = GetEdge( i );
00240             if( ( i + 1 ) == GetEdges( ) )
00241                 BinHigh += BinHigh - BinLow;
00242 // Expected value of x^2 from BinLow to BinHigh
00243             Cur = ( pow( BinHigh, 3 ) - pow( BinLow, 3 ) ) / ( BinHigh - BinLow ) / 3;
00244             SumSq += Get( iMember, i ) * Cur; }
00245         for( Sum = 0,i = 0; i < GetEdges( ); ++i ) {
00246             BinLow = ( !i || ( ( i + 1 ) == GetEdges( ) ) ) ? GetEdge( i ) :
00247                 ( ( GetEdge( i ) + GetEdge( i - 1 ) ) / 2 );
00248             Cur = Get( iMember, i ) * BinLow;
00249             Sum += Cur; } }
00250 
00251     void AveVar( size_t iMember, double& dAve, double& dVar ) const {
00252         tValue  Ave, Var;
00253 
00254         Sums( iMember, Ave, Var );
00255         dAve = (double)Ave / GetTotal( );
00256         dVar = ( (double)Var / GetTotal( ) ) - ( dAve * dAve ); }
00257 
00258     size_t              m_iZero;
00259     tCount              m_Total;
00260     std::vector<tCount> m_vecTotal;
00261     std::vector<tValue> m_vecEdges;
00262     std::vector<tCount> m_vecCounts;
00263 };
00264 
00265 // One score per type per subtype per gene per motif atom
00266 class CCoalesceGeneScores : public CCoalesceSequencer<float*> {
00267 public:
00268     CCoalesceGeneScores( ) : m_iMotifs(0), m_iGenes(0), m_iCapacity(0) {
00269 
00270         pthread_mutex_init( &m_mutx, NULL ); }
00271 
00272     virtual ~CCoalesceGeneScores( ) {
00273         size_t  iType, iSubtype;
00274         float*  ad;
00275 
00276         for( iType = 0; iType < GetTypes( ); ++iType )
00277             for( iSubtype = 0; iSubtype < GetSubsequences( iType ); ++iSubtype )
00278                 if( ad = Get( iType, (ESubsequence)iSubtype ) )
00279                     delete[] ad;
00280         pthread_mutex_destroy( &m_mutx ); }
00281 
00282     bool Add( size_t, const CCoalesceMotifLibrary&, const SFASTASequence&, SCoalesceModifierCache&,
00283         std::vector<std::vector<float> >&, std::vector<size_t>& );
00284     bool Add( size_t, const CCoalesceMotifLibrary&, const SFASTASequence&, SCoalesceModifierCache&, uint32_t,
00285         std::vector<float>&, std::vector<size_t>& );
00286     void Subtract( const SMotifMatch&, size_t );
00287     bool CalculateWeights( );
00288 
00289     float* Get( size_t iType, ESubsequence eSubsequence, size_t iGene, bool fSet = false ) const {
00290         float*  ad;
00291 
00292         if( !( ad = Get( iType, eSubsequence ) ) )
00293             return NULL;
00294         ad += iGene * m_iCapacity;
00295         return ( ( fSet || !CMeta::IsNaN( ad[ 0 ] ) ) ? ad : NULL ); }
00296 
00297     float Get( size_t iType, ESubsequence eSubsequence, size_t iGene, uint32_t iMotif ) const {
00298         const float*    ad;
00299 
00300         return ( ( ad = Get( iType, eSubsequence, iGene ) ) ? ad[ iMotif ] : 0 ); }
00301 
00302     size_t GetMotifs( ) const {
00303 
00304         return m_iMotifs; }
00305 
00306     void SetGenes( size_t iGenes ) {
00307 
00308         m_iGenes = iGenes; }
00309 
00310     void Validate( ) const {
00311         size_t      iType, iSubsequence, iGene;
00312         uint32_t    iMotif;
00313         float       dCur, dTotal;
00314 
00315         for( iType = 0; iType < GetTypes( ); ++iType )
00316             for( iSubsequence = ( ESubsequenceBegin + 1 ); iSubsequence < GetSubsequences( iType );
00317                 ++iSubsequence )
00318                 for( iGene = 0; iGene < m_iGenes; ++iGene ) {
00319                     if( !Get( iType, (ESubsequence)iSubsequence, iGene ) )
00320                         continue;
00321                     for( iMotif = 0; iMotif < m_iMotifs; ++iMotif )
00322                         if( ( dCur = Get( iType, (ESubsequence)iSubsequence, iGene, iMotif ) ) !=
00323                             ( dTotal = Get( iType, ESubsequenceTotal, iGene, iMotif ) ) )
00324                             std::cerr << "INVALID" << '\t' << GetType( iType ) << '\t' << iSubsequence <<
00325                                 '\t' << iGene << '\t' << iMotif << '\t' << dCur << '\t' << dTotal <<
00326                                 std::endl; } }
00327 
00328 protected:
00329     static const size_t c_iLookahead    = 128;
00330 
00331     static bool Add( const CCoalesceMotifLibrary&, const std::string&, size_t, bool,
00332         std::vector<std::vector<float> >&, std::vector<size_t>&, size_t, SCoalesceModifierCache& );
00333     static bool Add( const CCoalesceMotifLibrary&, const std::string&, size_t, bool, uint32_t,
00334         std::vector<float>&, std::vector<size_t>&, size_t, SCoalesceModifierCache& );
00335 
00336     static void Add( ESubsequence eSubsequence, uint32_t iMotif, uint32_t iMotifs,
00337         vector<vector<float> >& vecvecdCounts, float dValue ) {
00338         std::vector<float>& vecdCountsTotal = vecvecdCounts[ ESubsequenceTotal ];
00339         std::vector<float>& vecdCounts      = vecvecdCounts[ eSubsequence ];
00340 
00341         vecdCountsTotal.resize( iMotifs );
00342         vecdCountsTotal[ iMotif ] += dValue;
00343         vecdCounts.resize( iMotifs );
00344         vecdCounts[ iMotif ] += dValue; }
00345 
00346     float* Get( size_t iType, ESubsequence eSubsequence ) const {
00347 
00348         return CCoalesceSequencer<float*>::Get( iType, eSubsequence ); }
00349 
00350     void Set( size_t iType, ESubsequence eSubsequence, size_t iGene, uint32_t iMotif, float dValue,
00351         uint32_t iMotifs = 0 ) {
00352         float*  adTotal;
00353         size_t  i;
00354 
00355         pthread_mutex_lock( &m_mutx );
00356         Grow( iMotif, iMotifs );
00357         if( !( adTotal = Get( iType, eSubsequence ) ) ) {
00358             m_vecvecValues[ iType ][ eSubsequence ] = adTotal = new float[ m_iGenes * m_iCapacity ];
00359             memset( adTotal, 0, m_iGenes * m_iCapacity * sizeof(*adTotal) );
00360             for( i = 0; i < m_iGenes; ++i )
00361                 adTotal[ i * m_iCapacity ] = CMeta::GetNaN( ); }
00362         {
00363             float*  ad  = Get( iType, eSubsequence, iGene, true );
00364 
00365             if( CMeta::IsNaN( ad[ 0 ] ) )
00366                 ad[ 0 ] = 0;
00367             ad[ iMotif ] = dValue;
00368         }
00369         pthread_mutex_unlock( &m_mutx ); }
00370 
00371     void Grow( uint32_t iMotif, uint32_t iMotifs ) {
00372         size_t  iType, iSubtype, iGene, iTarget, iCapacity;
00373         float*  adTotal;
00374         float*  ad;
00375         float*  adFrom;
00376         float*  adTo;
00377 
00378         iTarget = max( iMotif + 1, iMotifs );
00379         if( iTarget <= m_iCapacity ) {
00380             if( iTarget > m_iMotifs )
00381                 m_iMotifs = iTarget;
00382             return; }
00383         iCapacity = m_iCapacity;
00384         m_iCapacity = iTarget + c_iLookahead;
00385         for( iType = 0; iType < GetTypes( ); ++iType )
00386             for( iSubtype = 0; iSubtype < GetSubsequences( iType ); ++iSubtype ) {
00387                 if( !( adTotal = Get( iType, (ESubsequence)iSubtype ) ) )
00388                     continue;
00389                 ad = new float[ m_iGenes * m_iCapacity ];
00390                 for( iGene = 0,adFrom = adTotal,adTo = ad; iGene < m_iGenes;
00391                     ++iGene,adFrom += iCapacity,adTo += m_iCapacity ) {
00392                     memcpy( adTo, adFrom, m_iMotifs * sizeof(*adTo) );
00393                     memset( adTo + m_iMotifs, 0, ( m_iCapacity - m_iMotifs ) * sizeof(*adTo) ); }
00394                 delete[] adTotal;
00395                 m_vecvecValues[ iType ][ iSubtype ] = ad; }
00396         m_iMotifs = iTarget; }
00397 
00398     size_t                              m_iGenes;
00399     size_t                              m_iMotifs;
00400     size_t                              m_iCapacity;
00401     pthread_mutex_t                     m_mutx;
00402     std::vector<std::vector<float> >    m_vecvecdWeights;
00403 };
00404 
00405 // One histogram per motif atom
00406 class CCoalesceGroupHistograms : public CCoalesceSequencer<CCoalesceHistogramSet<> > {
00407 public:
00408     CCoalesceGroupHistograms( size_t iBins, float dStep ) : m_iBins(iBins), m_dStep(dStep) { }
00409 
00410     bool Add( const CCoalesceGeneScores&, size_t, bool, uint32_t = -1 );
00411     void Save( std::ostream&, const CCoalesceMotifLibrary* ) const;
00412     bool IsSimilar( const CCoalesceMotifLibrary*, const SMotifMatch&, const SMotifMatch&, float ) const;
00413 
00414     void SetTotal( const CCoalesceGeneScores& GeneScores, const std::set<size_t>& setiGenes ) {
00415         std::set<size_t>::const_iterator    iterGene;
00416         size_t                              i, iTypeUs, iTypeThem, iSubsequence;
00417 
00418         m_vecsTotals.resize( GetTypes( ) * ESubsequenceEnd );
00419         std::fill( m_vecsTotals.begin( ), m_vecsTotals.end( ), 0 );
00420         for( iterGene = setiGenes.begin( ); iterGene != setiGenes.end( ); ++iterGene )
00421             for( iTypeUs = 0; iTypeUs < GetTypes( ); ++iTypeUs ) {
00422                 if( ( iTypeThem = GeneScores.GetType( GetType( iTypeUs ) ) ) == -1 )
00423                     continue;
00424                 for( iSubsequence = ESubsequenceBegin; iSubsequence < GetSubsequences( iTypeUs );
00425                     ++iSubsequence )
00426                     if( GeneScores.Get( iTypeThem, (ESubsequence)iSubsequence, *iterGene ) )
00427                         m_vecsTotals[ ( iTypeUs * ESubsequenceEnd ) + iSubsequence ]++; }
00428 
00429         for( i = iTypeUs = 0; iTypeUs < GetTypes( ); ++iTypeUs )
00430             for( iSubsequence = ESubsequenceBegin; iSubsequence < GetSubsequences( iTypeUs );
00431                 ++iSubsequence ) {
00432 //              g_CatSleipnir( ).info( "CCoalesceGroupHistograms::SetTotal( ) type %s, subsequence %d contains %d genes with sequences",
00433 //                  GetType( iTypeUs ).c_str( ), iSubsequence, m_vecsTotals[ i ] );
00434                 Get( iTypeUs, (ESubsequence)iSubsequence ).SetTotal( m_vecsTotals[ i++ ] ); } }
00435 
00436     void Validate( ) const {
00437         size_t      iType, iSubsequence, iEdge;
00438         uint32_t    iMotif;
00439 
00440         for( iType = 0; iType < GetTypes( ); ++iType ) {
00441             const CCoalesceHistogramSet<>&  HistsTotal  = Get( iType, ESubsequenceTotal );
00442 
00443             for( iSubsequence = ( ESubsequenceBegin + 1 ); iSubsequence < GetSubsequences( iType );
00444                 ++iSubsequence ) {
00445                 const CCoalesceHistogramSet<>&  HistsCur    = Get( iType, (ESubsequence)iSubsequence );
00446 
00447                 if( !HistsCur.GetTotal( ) )
00448                     continue;
00449                 for( iMotif = 0; iMotif < HistsCur.GetMembers( ); ++iMotif )
00450                     for( iEdge = 0; iEdge < HistsCur.GetEdges( ); ++iEdge )
00451                         if( HistsCur.Get( iMotif, iEdge ) != HistsTotal.Get( iMotif, iEdge ) ) {
00452                             std::cerr << "INVALID" << '\t' << GetType( iType ) << '\t' << iSubsequence <<
00453                                 '\t' << iMotif << endl << HistsTotal.Save( iMotif ) << endl <<
00454                                 HistsCur.Save( iMotif ) << std::endl;
00455                             break; } } } }
00456 
00457 protected:
00458     uint32_t                    m_iMotifs;
00459     size_t                      m_iBins;
00460     float                       m_dStep;
00461     std::vector<unsigned short> m_vecsTotals;
00462 };
00463 
00464 class CCoalesceImpl {
00465 protected:
00466     struct SThreadCombineMotif {
00467         size_t                              m_iOffset;
00468         size_t                              m_iStep;
00469         const std::vector<size_t>*          m_pveciPCL2FASTA;
00470         CCoalesceGeneScores*                m_pGeneScores;
00471         const CCoalesceMotifLibrary*        m_pMotifs;
00472         uint32_t                            m_iMotif;
00473         const CFASTA*                       m_pFASTA;
00474         const SCoalesceModifiers*           m_psModifiers;
00475     };
00476 
00477     static void* ThreadCombineMotif( void* );
00478     static void Normalize( CPCL& );
00479 
00480     CCoalesceImpl( ) : m_iK(7), m_dPValueCorrelation(0.05f), m_iBins(12), m_dZScoreCondition(0.5f),
00481         m_dProbabilityGene(0.95f), m_dZScoreMotif(0.5f), m_pMotifs(NULL), m_fMotifs(false),
00482         m_iBasesPerMatch(5000), m_dPValueMerge(0.05f), m_dCutoffMerge(2.5f), m_iSizeMinimum(5),
00483         m_iThreads(1), m_iSizeMerge(100), m_iSizeMaximum(1000), m_dPValueCondition(0.05f),
00484         m_dPValueMotif(0.05f), m_fNormalize(false) { }
00485     virtual ~CCoalesceImpl( );
00486 
00487     void Clear( );
00488     size_t GetMotifCount( ) const;
00489     bool CombineMotifs( const CFASTA&, const std::vector<size_t>&, SCoalesceModifiers&,
00490         const CCoalesceCluster&, size_t, CCoalesceGeneScores&, CCoalesceGroupHistograms&,
00491         CCoalesceGroupHistograms& ) const;
00492     bool InitializeDatasets( const CPCL& );
00493     bool InitializeGeneScores( const CPCL&, const CFASTA&, std::vector<size_t>&, SCoalesceModifiers&,
00494         CCoalesceGeneScores& );
00495 
00496     float                           m_dPValueMerge;
00497     float                           m_dProbabilityGene;
00498     float                           m_dPValueCondition;
00499     float                           m_dZScoreCondition;
00500     float                           m_dPValueCorrelation;
00501     float                           m_dPValueMotif;
00502     float                           m_dZScoreMotif;
00503     float                           m_dCutoffMerge;
00504     size_t                          m_iNumberCorrelation;
00505     size_t                          m_iBins;
00506     size_t                          m_iK;
00507     size_t                          m_iSizeMerge;
00508     size_t                          m_iSizeMinimum;
00509     size_t                          m_iSizeMaximum;
00510     size_t                          m_iThreads;
00511     std::string                     m_strDirectoryIntermediate;
00512     CCoalesceMotifLibrary*          m_pMotifs;
00513     bool                            m_fMotifs;
00514     bool                            m_fNormalize;
00515     size_t                          m_iBasesPerMatch;
00516     std::vector<SCoalesceDataset>   m_vecsDatasets;
00517     std::vector<const CFASTA*>      m_vecpWiggles;
00518     std::vector<std::ostream*>      m_vecpostm;
00519     std::vector<float>              m_vecdSeed;
00520 };
00521 
00522 }
00523 
00524 #endif // COALESCEI_H