Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef COALESCEI_H 00023 #define COALESCEI_H 00024 00025 #include <algorithm> 00026 #include <iostream> 00027 #include <set> 00028 #include <sstream> 00029 00030 #include "coalescebasei.h" 00031 #include "statistics.h" 00032 00033 namespace Sleipnir { 00034 00035 class CCoalesceCluster; 00036 class CCoalesceGeneScores; 00037 class CCoalesceGroupHistograms; 00038 class CCoalesceMotifLibrary; 00039 class CFASTA; 00040 class CPCL; 00041 struct SCoalesceDataset; 00042 struct SCoalesceModifierCache; 00043 struct SCoalesceModifiers; 00044 struct SFASTASequence; 00045 struct SMotifMatch; 00046 00047 template<class tValue = float, class tCount = unsigned short> 00048 class CCoalesceHistogramSet { 00049 public: 00050 double ZScore( size_t iMember, const CCoalesceHistogramSet& HistSet, double& dAveOne, double& dAverage, 00051 double& dZ, bool fCount = true ) const { 00052 00053 return ZScore( iMember, HistSet, iMember, fCount, dAveOne, dAverage, dZ ); } 00054 00055 double ZScore( size_t iOne, const CCoalesceHistogramSet& HistSet, size_t iTwo, bool fCount, 00056 double& dAveOne, double& dAverage, double& dZ ) const { 00057 static const double c_dEpsilon = 1e-6; 00058 tValue AveOne, VarOne, AveTwo, VarTwo; 00059 double dStd; 00060 size_t iTotal; 00061 00062 if( !GetEdges( ) || ( GetEdges( ) != HistSet.GetEdges( ) ) ) 00063 return 1; 00064 00065 Sums( iOne, AveOne, VarOne ); 00066 HistSet.Sums( iTwo, AveTwo, VarTwo ); 00067 iTotal = GetTotal( ) + HistSet.GetTotal( ); 00068 dAverage = (double)( AveOne + AveTwo ) / iTotal; 00069 dAveOne = (double)AveOne / GetTotal( ); 00070 dStd = sqrt( ( (double)( VarOne + VarTwo ) / iTotal ) - ( dAverage * dAverage ) ); 00071 00072 dZ = ( dAveOne - dAverage ) / dStd; 00073 return ( ( dStd > c_dEpsilon ) ? ( 2 * CStatistics::ZTest( dZ, fCount ? 00074 min( GetTotal( ), HistSet.GetTotal( ) ) : 1 ) ) : 00075 ( ( fabs( dAveOne - dAverage ) < c_dEpsilon ) ? 0 : 1 ) ); } 00076 00077 double CohensD( size_t iMember, const CCoalesceHistogramSet& HistSet, double& dAveOne, double& dAverage, 00078 double& dZ, bool fCount = true ) const { 00079 00080 return CohensD( iMember, HistSet, iMember, fCount, dAveOne, dAverage, dZ ); } 00081 00082 double CohensD( size_t iOne, const CCoalesceHistogramSet& HistSet, size_t iTwo, bool fCount, 00083 double& dAveOne, double& dAverage, double& dZ ) const { 00084 static const double c_dEpsilon = 1e-6; 00085 tValue AveOne, VarOne, AveTwo, VarTwo; 00086 double dAveTwo, dVarOne, dVarTwo, dStd; 00087 00088 if( !GetEdges( ) || ( GetEdges( ) != HistSet.GetEdges( ) ) ) 00089 return 1; 00090 00091 Sums( iOne, AveOne, VarOne ); 00092 HistSet.Sums( iTwo, AveTwo, VarTwo ); 00093 dAverage = (double)( AveOne + AveTwo ) / ( GetTotal( ) + HistSet.GetTotal( ) ); 00094 dAveOne = (double)AveOne / GetTotal( ); 00095 dAveTwo = (double)AveTwo / HistSet.GetTotal( ); 00096 dVarOne = max( 0.0, ( (double)VarOne / GetTotal( ) ) - ( dAveOne * dAveOne ) ); 00097 dVarTwo = max( 0.0, ( (double)VarTwo / HistSet.GetTotal( ) ) - ( dAveTwo * dAveTwo ) ); 00098 dStd = sqrt( ( dVarOne + dVarTwo ) / 2 ); 00099 00100 dZ = dStd ? ( ( dAveOne - dAveTwo ) / dStd ) : 0; 00101 // dZ *= 1 - pow( (float)GetTotal( ) / ( GetTotal( ) + HistSet.GetTotal( ) ), 1 ); // doesn't work 00102 // dZ *= exp( -(float)GetTotal( ) / ( GetTotal( ) + HistSet.GetTotal( ) ) ); // doesn't work 00103 // dZ *= exp( -(float)GetTotal( ) / HistSet.GetTotal( ) ); // works pretty well 00104 // dZ *= fabs( (float)( GetTotal( ) - HistSet.GetTotal( ) ) ) / max( GetTotal( ), HistSet.GetTotal( ) ); // works pretty well 00105 // This prevents large clusters from blowing up the motif set 00106 /* 00107 if( iOne == iTwo ) 00108 dZ *= pow( fabs( (float)( GetTotal( ) - HistSet.GetTotal( ) ) ) / 00109 max( GetTotal( ), HistSet.GetTotal( ) ), max( 1.0f, 00110 log10f( (float)min( GetTotal( ), HistSet.GetTotal( ) ) ) ) ); 00111 //*/ 00112 00113 return ( ( dStd > c_dEpsilon ) ? ( 2 * CStatistics::ZTest( dZ, fCount ? 00114 min( GetTotal( ), HistSet.GetTotal( ) ) : 1 ) ) : 00115 ( ( fabs( dAveOne - dAveTwo ) < c_dEpsilon ) ? 0 : 1 ) ); } 00116 00117 void Initialize( size_t iMembers, const std::vector<tValue>& vecEdges ) { 00118 00119 m_vecEdges.resize( vecEdges.size( ) ); 00120 std::copy( vecEdges.begin( ), vecEdges.end( ), m_vecEdges.begin( ) ); 00121 m_iZero = GetBin( 0 ); 00122 SetMembers( iMembers ); } 00123 00124 void Initialize( size_t iMembers, size_t iBins, tValue Step ) { 00125 std::vector<tValue> vecEdges; 00126 size_t i; 00127 00128 vecEdges.resize( iBins ); 00129 for( i = 0; i < vecEdges.size( ); ++i ) 00130 vecEdges[ i ] = i * Step; 00131 00132 Initialize( iMembers, vecEdges ); } 00133 00134 bool Add( size_t iMember, tValue Value, tCount Count ) { 00135 size_t iBin; 00136 00137 // lock 00138 if( m_vecEdges.empty( ) ) 00139 // unlock 00140 return false; 00141 00142 SetMembers( max( GetMembers( ), iMember + 1 ) ); 00143 if( ( iBin = GetBin( Value ) ) != m_iZero ) { 00144 m_vecCounts[ GetOffset( iMember ) + iBin ] += Count; 00145 m_vecTotal[ iMember ] += Count; } 00146 // unlock 00147 00148 return true; } 00149 00150 tCount Integrate( size_t iMember, tValue Value, bool fUp ) const { 00151 tCount Ret; 00152 size_t iBin; 00153 00154 for( Ret = 0,iBin = GetBin( Value ); iBin < GetEdges( ); iBin += ( fUp ? 1 : -1 ) ) 00155 Ret += Get( iMember, iBin ); 00156 00157 return Ret; } 00158 00159 tCount Get( size_t iMember, size_t iBin ) const { 00160 00161 if( iBin >= GetEdges( ) ) 00162 return 0; 00163 if( iBin == m_iZero ) 00164 return ( GetTotal( ) - ( ( iMember < GetMembers( ) ) ? m_vecTotal[ iMember ] : 0 ) ); 00165 00166 return ( ( iMember < GetMembers( ) ) ? m_vecCounts[ GetOffset( iMember ) + iBin ] : 0 ); } 00167 00168 tCount Get( size_t iMember, tValue Value ) const { 00169 00170 return Get( iMember, GetBin( Value ) ); } 00171 00172 size_t GetBin( tValue Value ) const { 00173 size_t i; 00174 00175 if( !GetEdges( ) ) 00176 return -1; 00177 for( i = 0; i < GetEdges( ); ++i ) 00178 if( Value <= GetEdge( i ) ) 00179 break; 00180 00181 return min( i, GetEdges( ) - 1 ); } 00182 00183 size_t GetMembers( ) const { 00184 00185 return m_vecTotal.size( ); } 00186 00187 size_t GetEdges( ) const { 00188 00189 return m_vecEdges.size( ); } 00190 00191 tValue GetEdge( size_t iBin ) const { 00192 00193 return m_vecEdges[ iBin ]; } 00194 00195 std::string Save( size_t iMember ) const { 00196 std::ostringstream ossm; 00197 size_t i; 00198 00199 if( GetEdges( ) ) { 00200 ossm << (size_t)GetTotal( ) << ':'; 00201 for( i = 0; i < GetEdges( ); ++i ) 00202 ossm << '\t' << (size_t)Get( iMember, i ); } 00203 00204 return ossm.str( ); } 00205 00206 void Clear( ) { 00207 00208 fill( m_vecCounts.begin( ), m_vecCounts.end( ), 0 ); 00209 fill( m_vecTotal.begin( ), m_vecTotal.end( ), 0 ); } 00210 00211 tCount GetTotal( ) const { 00212 00213 return m_Total; } 00214 00215 void SetTotal( tCount Total ) { 00216 00217 m_Total = Total; } 00218 00219 const std::vector<tValue>& GetBins( ) const { 00220 00221 return m_vecEdges; } 00222 00223 protected: 00224 void SetMembers( size_t iMembers ) { 00225 00226 m_vecTotal.resize( iMembers ); 00227 m_vecCounts.resize( GetOffset( GetMembers( ) ) ); } 00228 00229 size_t GetOffset( size_t iMember ) const { 00230 00231 return ( iMember * GetEdges( ) ); } 00232 00233 void Sums( size_t iMember, tValue& Sum, tValue& SumSq ) const { 00234 size_t i; 00235 tValue Cur, BinLow, BinHigh; 00236 00237 for( SumSq = 0,i = 0; i < GetEdges( ); ++i ) { 00238 BinLow = i ? BinHigh : ( GetEdge( i ) + GetEdge( i ) - GetEdge( i + 1 ) ); 00239 BinHigh = GetEdge( i ); 00240 if( ( i + 1 ) == GetEdges( ) ) 00241 BinHigh += BinHigh - BinLow; 00242 // Expected value of x^2 from BinLow to BinHigh 00243 Cur = ( pow( BinHigh, 3 ) - pow( BinLow, 3 ) ) / ( BinHigh - BinLow ) / 3; 00244 SumSq += Get( iMember, i ) * Cur; } 00245 for( Sum = 0,i = 0; i < GetEdges( ); ++i ) { 00246 BinLow = ( !i || ( ( i + 1 ) == GetEdges( ) ) ) ? GetEdge( i ) : 00247 ( ( GetEdge( i ) + GetEdge( i - 1 ) ) / 2 ); 00248 Cur = Get( iMember, i ) * BinLow; 00249 Sum += Cur; } } 00250 00251 void AveVar( size_t iMember, double& dAve, double& dVar ) const { 00252 tValue Ave, Var; 00253 00254 Sums( iMember, Ave, Var ); 00255 dAve = (double)Ave / GetTotal( ); 00256 dVar = ( (double)Var / GetTotal( ) ) - ( dAve * dAve ); } 00257 00258 size_t m_iZero; 00259 tCount m_Total; 00260 std::vector<tCount> m_vecTotal; 00261 std::vector<tValue> m_vecEdges; 00262 std::vector<tCount> m_vecCounts; 00263 }; 00264 00265 // One score per type per subtype per gene per motif atom 00266 class CCoalesceGeneScores : public CCoalesceSequencer<float*> { 00267 public: 00268 CCoalesceGeneScores( ) : m_iMotifs(0), m_iGenes(0), m_iCapacity(0) { 00269 00270 pthread_mutex_init( &m_mutx, NULL ); } 00271 00272 virtual ~CCoalesceGeneScores( ) { 00273 size_t iType, iSubtype; 00274 float* ad; 00275 00276 for( iType = 0; iType < GetTypes( ); ++iType ) 00277 for( iSubtype = 0; iSubtype < GetSubsequences( iType ); ++iSubtype ) 00278 if( ad = Get( iType, (ESubsequence)iSubtype ) ) 00279 delete[] ad; 00280 pthread_mutex_destroy( &m_mutx ); } 00281 00282 bool Add( size_t, const CCoalesceMotifLibrary&, const SFASTASequence&, SCoalesceModifierCache&, 00283 std::vector<std::vector<float> >&, std::vector<size_t>& ); 00284 bool Add( size_t, const CCoalesceMotifLibrary&, const SFASTASequence&, SCoalesceModifierCache&, uint32_t, 00285 std::vector<float>&, std::vector<size_t>& ); 00286 void Subtract( const SMotifMatch&, size_t ); 00287 bool CalculateWeights( ); 00288 00289 float* Get( size_t iType, ESubsequence eSubsequence, size_t iGene, bool fSet = false ) const { 00290 float* ad; 00291 00292 if( !( ad = Get( iType, eSubsequence ) ) ) 00293 return NULL; 00294 ad += iGene * m_iCapacity; 00295 return ( ( fSet || !CMeta::IsNaN( ad[ 0 ] ) ) ? ad : NULL ); } 00296 00297 float Get( size_t iType, ESubsequence eSubsequence, size_t iGene, uint32_t iMotif ) const { 00298 const float* ad; 00299 00300 return ( ( ad = Get( iType, eSubsequence, iGene ) ) ? ad[ iMotif ] : 0 ); } 00301 00302 size_t GetMotifs( ) const { 00303 00304 return m_iMotifs; } 00305 00306 void SetGenes( size_t iGenes ) { 00307 00308 m_iGenes = iGenes; } 00309 00310 void Validate( ) const { 00311 size_t iType, iSubsequence, iGene; 00312 uint32_t iMotif; 00313 float dCur, dTotal; 00314 00315 for( iType = 0; iType < GetTypes( ); ++iType ) 00316 for( iSubsequence = ( ESubsequenceBegin + 1 ); iSubsequence < GetSubsequences( iType ); 00317 ++iSubsequence ) 00318 for( iGene = 0; iGene < m_iGenes; ++iGene ) { 00319 if( !Get( iType, (ESubsequence)iSubsequence, iGene ) ) 00320 continue; 00321 for( iMotif = 0; iMotif < m_iMotifs; ++iMotif ) 00322 if( ( dCur = Get( iType, (ESubsequence)iSubsequence, iGene, iMotif ) ) != 00323 ( dTotal = Get( iType, ESubsequenceTotal, iGene, iMotif ) ) ) 00324 std::cerr << "INVALID" << '\t' << GetType( iType ) << '\t' << iSubsequence << 00325 '\t' << iGene << '\t' << iMotif << '\t' << dCur << '\t' << dTotal << 00326 std::endl; } } 00327 00328 protected: 00329 static const size_t c_iLookahead = 128; 00330 00331 static bool Add( const CCoalesceMotifLibrary&, const std::string&, size_t, bool, 00332 std::vector<std::vector<float> >&, std::vector<size_t>&, size_t, SCoalesceModifierCache& ); 00333 static bool Add( const CCoalesceMotifLibrary&, const std::string&, size_t, bool, uint32_t, 00334 std::vector<float>&, std::vector<size_t>&, size_t, SCoalesceModifierCache& ); 00335 00336 static void Add( ESubsequence eSubsequence, uint32_t iMotif, uint32_t iMotifs, 00337 vector<vector<float> >& vecvecdCounts, float dValue ) { 00338 std::vector<float>& vecdCountsTotal = vecvecdCounts[ ESubsequenceTotal ]; 00339 std::vector<float>& vecdCounts = vecvecdCounts[ eSubsequence ]; 00340 00341 vecdCountsTotal.resize( iMotifs ); 00342 vecdCountsTotal[ iMotif ] += dValue; 00343 vecdCounts.resize( iMotifs ); 00344 vecdCounts[ iMotif ] += dValue; } 00345 00346 float* Get( size_t iType, ESubsequence eSubsequence ) const { 00347 00348 return CCoalesceSequencer<float*>::Get( iType, eSubsequence ); } 00349 00350 void Set( size_t iType, ESubsequence eSubsequence, size_t iGene, uint32_t iMotif, float dValue, 00351 uint32_t iMotifs = 0 ) { 00352 float* adTotal; 00353 size_t i; 00354 00355 pthread_mutex_lock( &m_mutx ); 00356 Grow( iMotif, iMotifs ); 00357 if( !( adTotal = Get( iType, eSubsequence ) ) ) { 00358 m_vecvecValues[ iType ][ eSubsequence ] = adTotal = new float[ m_iGenes * m_iCapacity ]; 00359 memset( adTotal, 0, m_iGenes * m_iCapacity * sizeof(*adTotal) ); 00360 for( i = 0; i < m_iGenes; ++i ) 00361 adTotal[ i * m_iCapacity ] = CMeta::GetNaN( ); } 00362 { 00363 float* ad = Get( iType, eSubsequence, iGene, true ); 00364 00365 if( CMeta::IsNaN( ad[ 0 ] ) ) 00366 ad[ 0 ] = 0; 00367 ad[ iMotif ] = dValue; 00368 } 00369 pthread_mutex_unlock( &m_mutx ); } 00370 00371 void Grow( uint32_t iMotif, uint32_t iMotifs ) { 00372 size_t iType, iSubtype, iGene, iTarget, iCapacity; 00373 float* adTotal; 00374 float* ad; 00375 float* adFrom; 00376 float* adTo; 00377 00378 iTarget = max( iMotif + 1, iMotifs ); 00379 if( iTarget <= m_iCapacity ) { 00380 if( iTarget > m_iMotifs ) 00381 m_iMotifs = iTarget; 00382 return; } 00383 iCapacity = m_iCapacity; 00384 m_iCapacity = iTarget + c_iLookahead; 00385 for( iType = 0; iType < GetTypes( ); ++iType ) 00386 for( iSubtype = 0; iSubtype < GetSubsequences( iType ); ++iSubtype ) { 00387 if( !( adTotal = Get( iType, (ESubsequence)iSubtype ) ) ) 00388 continue; 00389 ad = new float[ m_iGenes * m_iCapacity ]; 00390 for( iGene = 0,adFrom = adTotal,adTo = ad; iGene < m_iGenes; 00391 ++iGene,adFrom += iCapacity,adTo += m_iCapacity ) { 00392 memcpy( adTo, adFrom, m_iMotifs * sizeof(*adTo) ); 00393 memset( adTo + m_iMotifs, 0, ( m_iCapacity - m_iMotifs ) * sizeof(*adTo) ); } 00394 delete[] adTotal; 00395 m_vecvecValues[ iType ][ iSubtype ] = ad; } 00396 m_iMotifs = iTarget; } 00397 00398 size_t m_iGenes; 00399 size_t m_iMotifs; 00400 size_t m_iCapacity; 00401 pthread_mutex_t m_mutx; 00402 std::vector<std::vector<float> > m_vecvecdWeights; 00403 }; 00404 00405 // One histogram per motif atom 00406 class CCoalesceGroupHistograms : public CCoalesceSequencer<CCoalesceHistogramSet<> > { 00407 public: 00408 CCoalesceGroupHistograms( size_t iBins, float dStep ) : m_iBins(iBins), m_dStep(dStep) { } 00409 00410 bool Add( const CCoalesceGeneScores&, size_t, bool, uint32_t = -1 ); 00411 void Save( std::ostream&, const CCoalesceMotifLibrary* ) const; 00412 bool IsSimilar( const CCoalesceMotifLibrary*, const SMotifMatch&, const SMotifMatch&, float ) const; 00413 00414 void SetTotal( const CCoalesceGeneScores& GeneScores, const std::set<size_t>& setiGenes ) { 00415 std::set<size_t>::const_iterator iterGene; 00416 size_t i, iTypeUs, iTypeThem, iSubsequence; 00417 00418 m_vecsTotals.resize( GetTypes( ) * ESubsequenceEnd ); 00419 std::fill( m_vecsTotals.begin( ), m_vecsTotals.end( ), 0 ); 00420 for( iterGene = setiGenes.begin( ); iterGene != setiGenes.end( ); ++iterGene ) 00421 for( iTypeUs = 0; iTypeUs < GetTypes( ); ++iTypeUs ) { 00422 if( ( iTypeThem = GeneScores.GetType( GetType( iTypeUs ) ) ) == -1 ) 00423 continue; 00424 for( iSubsequence = ESubsequenceBegin; iSubsequence < GetSubsequences( iTypeUs ); 00425 ++iSubsequence ) 00426 if( GeneScores.Get( iTypeThem, (ESubsequence)iSubsequence, *iterGene ) ) 00427 m_vecsTotals[ ( iTypeUs * ESubsequenceEnd ) + iSubsequence ]++; } 00428 00429 for( i = iTypeUs = 0; iTypeUs < GetTypes( ); ++iTypeUs ) 00430 for( iSubsequence = ESubsequenceBegin; iSubsequence < GetSubsequences( iTypeUs ); 00431 ++iSubsequence ) { 00432 // g_CatSleipnir( ).info( "CCoalesceGroupHistograms::SetTotal( ) type %s, subsequence %d contains %d genes with sequences", 00433 // GetType( iTypeUs ).c_str( ), iSubsequence, m_vecsTotals[ i ] ); 00434 Get( iTypeUs, (ESubsequence)iSubsequence ).SetTotal( m_vecsTotals[ i++ ] ); } } 00435 00436 void Validate( ) const { 00437 size_t iType, iSubsequence, iEdge; 00438 uint32_t iMotif; 00439 00440 for( iType = 0; iType < GetTypes( ); ++iType ) { 00441 const CCoalesceHistogramSet<>& HistsTotal = Get( iType, ESubsequenceTotal ); 00442 00443 for( iSubsequence = ( ESubsequenceBegin + 1 ); iSubsequence < GetSubsequences( iType ); 00444 ++iSubsequence ) { 00445 const CCoalesceHistogramSet<>& HistsCur = Get( iType, (ESubsequence)iSubsequence ); 00446 00447 if( !HistsCur.GetTotal( ) ) 00448 continue; 00449 for( iMotif = 0; iMotif < HistsCur.GetMembers( ); ++iMotif ) 00450 for( iEdge = 0; iEdge < HistsCur.GetEdges( ); ++iEdge ) 00451 if( HistsCur.Get( iMotif, iEdge ) != HistsTotal.Get( iMotif, iEdge ) ) { 00452 std::cerr << "INVALID" << '\t' << GetType( iType ) << '\t' << iSubsequence << 00453 '\t' << iMotif << endl << HistsTotal.Save( iMotif ) << endl << 00454 HistsCur.Save( iMotif ) << std::endl; 00455 break; } } } } 00456 00457 protected: 00458 uint32_t m_iMotifs; 00459 size_t m_iBins; 00460 float m_dStep; 00461 std::vector<unsigned short> m_vecsTotals; 00462 }; 00463 00464 class CCoalesceImpl { 00465 protected: 00466 struct SThreadCombineMotif { 00467 size_t m_iOffset; 00468 size_t m_iStep; 00469 const std::vector<size_t>* m_pveciPCL2FASTA; 00470 CCoalesceGeneScores* m_pGeneScores; 00471 const CCoalesceMotifLibrary* m_pMotifs; 00472 uint32_t m_iMotif; 00473 const CFASTA* m_pFASTA; 00474 const SCoalesceModifiers* m_psModifiers; 00475 }; 00476 00477 static void* ThreadCombineMotif( void* ); 00478 static void Normalize( CPCL& ); 00479 00480 CCoalesceImpl( ) : m_iK(7), m_dPValueCorrelation(0.05f), m_iBins(12), m_dZScoreCondition(0.5f), 00481 m_dProbabilityGene(0.95f), m_dZScoreMotif(0.5f), m_pMotifs(NULL), m_fMotifs(false), 00482 m_iBasesPerMatch(5000), m_dPValueMerge(0.05f), m_dCutoffMerge(2.5f), m_iSizeMinimum(5), 00483 m_iThreads(1), m_iSizeMerge(100), m_iSizeMaximum(1000), m_dPValueCondition(0.05f), 00484 m_dPValueMotif(0.05f), m_fNormalize(false) { } 00485 virtual ~CCoalesceImpl( ); 00486 00487 void Clear( ); 00488 size_t GetMotifCount( ) const; 00489 bool CombineMotifs( const CFASTA&, const std::vector<size_t>&, SCoalesceModifiers&, 00490 const CCoalesceCluster&, size_t, CCoalesceGeneScores&, CCoalesceGroupHistograms&, 00491 CCoalesceGroupHistograms& ) const; 00492 bool InitializeDatasets( const CPCL& ); 00493 bool InitializeGeneScores( const CPCL&, const CFASTA&, std::vector<size_t>&, SCoalesceModifiers&, 00494 CCoalesceGeneScores& ); 00495 00496 float m_dPValueMerge; 00497 float m_dProbabilityGene; 00498 float m_dPValueCondition; 00499 float m_dZScoreCondition; 00500 float m_dPValueCorrelation; 00501 float m_dPValueMotif; 00502 float m_dZScoreMotif; 00503 float m_dCutoffMerge; 00504 size_t m_iNumberCorrelation; 00505 size_t m_iBins; 00506 size_t m_iK; 00507 size_t m_iSizeMerge; 00508 size_t m_iSizeMinimum; 00509 size_t m_iSizeMaximum; 00510 size_t m_iThreads; 00511 std::string m_strDirectoryIntermediate; 00512 CCoalesceMotifLibrary* m_pMotifs; 00513 bool m_fMotifs; 00514 bool m_fNormalize; 00515 size_t m_iBasesPerMatch; 00516 std::vector<SCoalesceDataset> m_vecsDatasets; 00517 std::vector<const CFASTA*> m_vecpWiggles; 00518 std::vector<std::ostream*> m_vecpostm; 00519 std::vector<float> m_vecdSeed; 00520 }; 00521 00522 } 00523 00524 #endif // COALESCEI_H