Sleipnir
src/datapair.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef DATAPAIR_H
00023 #define DATAPAIR_H
00024 
00025 #include "datapairi.h"
00026 
00027 namespace Sleipnir {
00028 
00029 class CSlim;
00030 
00055 class CDataPair : public CDataPairImpl {
00056 public:
00057     bool Open( const char* szDatafile, bool fContinuous, bool fMemmap = false, size_t iSkip = 2,
00058         bool fZScore = false, bool fSeek = false );
00059     bool Open( const CSlim& Slim );
00060     bool Open( const CDat& dat );
00061     bool OpenQuants( const char* szDatafile );
00062     void SetQuants( const float* adBinEdges, size_t iBins ){
00063       SetQuants(adBinEdges, iBins );
00064     }
00065     void SetQuants( const std::vector<float>& vecdBinEdges );
00066     std::vector<float> GetQuants(){
00067         std::vector<float> v;
00068         size_t i;
00069         for(i=0; i<m_vecdQuant.size(); i++){
00070             v.push_back(m_vecdQuant[i]);
00071         }
00072         return v;
00073     }
00074 
00075     size_t Quantize( float dValue ) const;
00076     void Quantize( );
00077     size_t Quantize( size_t iY, size_t iX, size_t iZero ) const;
00078 
00079     void Save( const char* szFile ) const;
00080     
00081     
00095     unsigned char GetValues( ) const {
00096 
00097         return (unsigned char)m_vecdQuant.size( ); }
00098 
00109     bool IsContinuous( ) const {
00110 
00111         return m_fContinuous; }
00112 
00142     bool Open( const CDat& DatKnown, const std::vector<CGenes*>& vecpOther, const CGenome& Genome,
00143         bool fKnownNegatives ) {
00144 
00145         return CDat::Open( DatKnown, vecpOther, Genome, fKnownNegatives ); }
00146 
00166     bool Open( const std::vector<std::string>& vecstrGenes, const CDistanceMatrix& MatScores ) {
00167 
00168         return CDat::Open( vecstrGenes, MatScores ); }
00169 };
00170 
00192 class CPCLPair : public CPCLPairImpl {
00193 public:
00194     bool Open( const char* szDatafile, size_t iSkip );
00195     size_t Quantize( float dValue, size_t iExperiment ) const;
00196     void Quantize( );
00197 
00211     unsigned char GetValues( size_t iExperiment ) const {
00212       
00213       return (unsigned char)m_vecvecdQuants[ iExperiment ].size( ); }
00214     
00215 
00216 };
00217 
00235 class CDatFilter : public CDatFilterImpl {
00236 public:
00237 
00238     bool Attach( const CDataPair& Dat, const CGenes& Genes, CDat::EFilter eFilter,
00239         const CDat* pAnswers = NULL );
00240     bool Attach( const CDatFilter& Dat, const CGenes& Genes, CDat::EFilter eFilter,
00241         const CDat* pAnswers = NULL );
00242 
00256     bool Attach( const CDataPair& Dat ) {
00257 
00258         return CDatFilterImpl::Attach( &Dat, NULL, NULL, CDat::EFilterInclude, NULL ); }
00259 
00270     size_t GetValues( ) const {
00271 
00272         return ( m_pFilter ? m_pFilter->GetValues( ) : ( m_pDat ? m_pDat->GetValues( ) : -1 ) ); }
00273 
00287     size_t GetGene( const std::string& strGene ) const {
00288 
00289         return ( m_pFilter ? m_pFilter->GetGene( strGene ) : ( m_pDat ? m_pDat->GetGene( strGene ) : -1 ) ); }
00290 
00304     std::string GetGene( size_t iGene ) const {
00305 
00306         return CDatFilterImpl::GetGene( iGene ); }
00307 
00321     size_t Quantize( float dValue ) const {
00322 
00323         return ( m_pFilter ? m_pFilter->Quantize( dValue ) : ( m_pDat ? m_pDat->Quantize( dValue ) : -1 ) ); }
00324 
00325 
00326     size_t Quantize( size_t iY, size_t iX, size_t iZero ) const {
00327         float d;
00328         if( iY == -1 || iX == -1 ) {
00329             return -1;
00330         }else if( CMeta::IsNaN( (d = Get( iY, iX )) ) ) {
00331             return iZero;
00332         }else {
00333             return Quantize(d);
00334         }
00335     }
00336 
00337 
00338 
00355     float& Get( size_t iY, size_t iX ) const {
00356         static float    c_dNaN  = CMeta::GetNaN( );
00357 
00358         if( !( m_pDat || m_pFilter ) )
00359             return c_dNaN;
00360         if( m_vecfGenes.empty( ) )
00361             return ( m_pFilter ? m_pFilter->Get( iY, iX ) : ( m_pDat ? m_pDat->Get( iY, iX ) : c_dNaN ) );
00362 
00363         switch( m_eFilter ) {
00364             case CDat::EFilterInclude:
00365                 if( !( m_vecfGenes[ iX ] && m_vecfGenes[ iY ] ) )
00366                     return c_dNaN;
00367                 break;
00368 
00369             case CDat::EFilterExclude:
00370                 if( m_vecfGenes[ iX ] || m_vecfGenes[ iY ] )
00371                     return c_dNaN;
00372                 break;
00373 
00374             case CDat::EFilterEdge:
00375                 if( !( m_vecfGenes[ iX ] || m_vecfGenes[ iY ] ) )
00376                     return c_dNaN;
00377                 break;
00378 
00379             case CDat::EFilterTerm:
00380                 float   d;
00381                 size_t  iOne, iTwo;
00382 
00383                 if( !m_pAnswers )
00384                     return c_dNaN;
00385                 d = ( ( ( iOne = m_veciAnswers[ iX ] ) != -1 ) && ( ( iTwo = m_veciAnswers[ iY ] ) != -1 ) ) ?
00386                     m_pAnswers->Get( iTwo, iOne ) : CMeta::GetNaN( );
00387                 if( !( m_vecfGenes[ iX ] || m_vecfGenes[ iY ] ) ||
00388                     ( ( m_vecfGenes[ iX ] != m_vecfGenes[ iY ] ) && !CMeta::IsNaN( d ) && ( d > 0 ) ) )
00389                     return c_dNaN;
00390                 break; }
00391 
00392         return ( m_pFilter ? m_pFilter->Get( iY, iX ) : m_pDat->Get( iY, iX ) ); }
00393 };
00394 
00395 }
00396 
00397 #endif // DATAPAIR_H