Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef DATAPAIR_H 00023 #define DATAPAIR_H 00024 00025 #include "datapairi.h" 00026 00027 namespace Sleipnir { 00028 00029 class CSlim; 00030 00055 class CDataPair : public CDataPairImpl { 00056 public: 00057 bool Open( const char* szDatafile, bool fContinuous, bool fMemmap = false, size_t iSkip = 2, 00058 bool fZScore = false, bool fSeek = false ); 00059 bool Open( const CSlim& Slim ); 00060 bool Open( const CDat& dat ); 00061 bool OpenQuants( const char* szDatafile ); 00062 void SetQuants( const float* adBinEdges, size_t iBins ){ 00063 SetQuants(adBinEdges, iBins ); 00064 } 00065 void SetQuants( const std::vector<float>& vecdBinEdges ); 00066 std::vector<float> GetQuants(){ 00067 std::vector<float> v; 00068 size_t i; 00069 for(i=0; i<m_vecdQuant.size(); i++){ 00070 v.push_back(m_vecdQuant[i]); 00071 } 00072 return v; 00073 } 00074 00075 size_t Quantize( float dValue ) const; 00076 void Quantize( ); 00077 size_t Quantize( size_t iY, size_t iX, size_t iZero ) const; 00078 00079 void Save( const char* szFile ) const; 00080 00081 00095 unsigned char GetValues( ) const { 00096 00097 return (unsigned char)m_vecdQuant.size( ); } 00098 00109 bool IsContinuous( ) const { 00110 00111 return m_fContinuous; } 00112 00142 bool Open( const CDat& DatKnown, const std::vector<CGenes*>& vecpOther, const CGenome& Genome, 00143 bool fKnownNegatives ) { 00144 00145 return CDat::Open( DatKnown, vecpOther, Genome, fKnownNegatives ); } 00146 00166 bool Open( const std::vector<std::string>& vecstrGenes, const CDistanceMatrix& MatScores ) { 00167 00168 return CDat::Open( vecstrGenes, MatScores ); } 00169 }; 00170 00192 class CPCLPair : public CPCLPairImpl { 00193 public: 00194 bool Open( const char* szDatafile, size_t iSkip ); 00195 size_t Quantize( float dValue, size_t iExperiment ) const; 00196 void Quantize( ); 00197 00211 unsigned char GetValues( size_t iExperiment ) const { 00212 00213 return (unsigned char)m_vecvecdQuants[ iExperiment ].size( ); } 00214 00215 00216 }; 00217 00235 class CDatFilter : public CDatFilterImpl { 00236 public: 00237 00238 bool Attach( const CDataPair& Dat, const CGenes& Genes, CDat::EFilter eFilter, 00239 const CDat* pAnswers = NULL ); 00240 bool Attach( const CDatFilter& Dat, const CGenes& Genes, CDat::EFilter eFilter, 00241 const CDat* pAnswers = NULL ); 00242 00256 bool Attach( const CDataPair& Dat ) { 00257 00258 return CDatFilterImpl::Attach( &Dat, NULL, NULL, CDat::EFilterInclude, NULL ); } 00259 00270 size_t GetValues( ) const { 00271 00272 return ( m_pFilter ? m_pFilter->GetValues( ) : ( m_pDat ? m_pDat->GetValues( ) : -1 ) ); } 00273 00287 size_t GetGene( const std::string& strGene ) const { 00288 00289 return ( m_pFilter ? m_pFilter->GetGene( strGene ) : ( m_pDat ? m_pDat->GetGene( strGene ) : -1 ) ); } 00290 00304 std::string GetGene( size_t iGene ) const { 00305 00306 return CDatFilterImpl::GetGene( iGene ); } 00307 00321 size_t Quantize( float dValue ) const { 00322 00323 return ( m_pFilter ? m_pFilter->Quantize( dValue ) : ( m_pDat ? m_pDat->Quantize( dValue ) : -1 ) ); } 00324 00325 00326 size_t Quantize( size_t iY, size_t iX, size_t iZero ) const { 00327 float d; 00328 if( iY == -1 || iX == -1 ) { 00329 return -1; 00330 }else if( CMeta::IsNaN( (d = Get( iY, iX )) ) ) { 00331 return iZero; 00332 }else { 00333 return Quantize(d); 00334 } 00335 } 00336 00337 00338 00355 float& Get( size_t iY, size_t iX ) const { 00356 static float c_dNaN = CMeta::GetNaN( ); 00357 00358 if( !( m_pDat || m_pFilter ) ) 00359 return c_dNaN; 00360 if( m_vecfGenes.empty( ) ) 00361 return ( m_pFilter ? m_pFilter->Get( iY, iX ) : ( m_pDat ? m_pDat->Get( iY, iX ) : c_dNaN ) ); 00362 00363 switch( m_eFilter ) { 00364 case CDat::EFilterInclude: 00365 if( !( m_vecfGenes[ iX ] && m_vecfGenes[ iY ] ) ) 00366 return c_dNaN; 00367 break; 00368 00369 case CDat::EFilterExclude: 00370 if( m_vecfGenes[ iX ] || m_vecfGenes[ iY ] ) 00371 return c_dNaN; 00372 break; 00373 00374 case CDat::EFilterEdge: 00375 if( !( m_vecfGenes[ iX ] || m_vecfGenes[ iY ] ) ) 00376 return c_dNaN; 00377 break; 00378 00379 case CDat::EFilterTerm: 00380 float d; 00381 size_t iOne, iTwo; 00382 00383 if( !m_pAnswers ) 00384 return c_dNaN; 00385 d = ( ( ( iOne = m_veciAnswers[ iX ] ) != -1 ) && ( ( iTwo = m_veciAnswers[ iY ] ) != -1 ) ) ? 00386 m_pAnswers->Get( iTwo, iOne ) : CMeta::GetNaN( ); 00387 if( !( m_vecfGenes[ iX ] || m_vecfGenes[ iY ] ) || 00388 ( ( m_vecfGenes[ iX ] != m_vecfGenes[ iY ] ) && !CMeta::IsNaN( d ) && ( d > 0 ) ) ) 00389 return c_dNaN; 00390 break; } 00391 00392 return ( m_pFilter ? m_pFilter->Get( iY, iX ) : m_pDat->Get( iY, iX ) ); } 00393 }; 00394 00395 } 00396 00397 #endif // DATAPAIR_H