Sleipnir
src/annotation.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef ANNOTATION_H
00023 #define ANNOTATION_H
00024 
00025 namespace Sleipnir {
00026 
00038 struct STermFound {
00043     size_t  m_iID;
00048     double  m_dP;
00053     size_t  m_iHitsTerm;
00058     size_t  m_iSizeTerm;
00063     size_t  m_iHitsTotal;
00068     size_t  m_iSizeTotal;
00069 
00092     STermFound( size_t iID, double dP, size_t iHitsTerm, size_t iSizeTerm, size_t iHitsTotal, size_t iSizeTotal ) :
00093         m_iID(iID), m_dP(dP), m_iHitsTerm(iHitsTerm), m_iSizeTerm(iSizeTerm), m_iHitsTotal(iHitsTotal),
00094         m_iSizeTotal(iSizeTotal) { }
00095 };
00096 
00097 }
00098 
00099 #include "annotationi.h"
00100 
00101 namespace Sleipnir {
00102 
00117 class IOntology {
00118 public:
00126     virtual const std::string& GetID( ) const = 0;
00134     virtual size_t GetNodes( ) const = 0;
00145     virtual const std::string& GetID( size_t iTerm ) const = 0;
00156     virtual const std::string& GetGloss( size_t iTerm ) const = 0;
00167     virtual size_t GetParents( size_t iTerm ) const = 0;
00184     virtual size_t GetParent( size_t iTerm, size_t iParent ) const = 0;
00195     virtual size_t GetChildren( size_t iTerm ) const = 0;
00212     virtual size_t GetChild( size_t iTerm, size_t iChild ) const = 0;
00232     virtual bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const = 0;
00252     virtual bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const = 0;
00266     virtual size_t GetGenes( size_t iTerm, bool fRecursive = false ) const = 0;
00285     virtual const CGene& GetGene( size_t iTerm, size_t iGene ) const = 0;
00302     virtual bool IsAnnotated( size_t iTerm, const CGene& Gene, bool fRecursive = true ) const = 0;
00313     virtual size_t GetNode( const std::string& strID ) const = 0;
00321     virtual void GetGeneNames( std::vector<std::string>& vecstrGenes ) const = 0;
00353     virtual void TermFinder( const CGenes& Genes, std::vector<STermFound>& vecsTerms, bool fBonferroni = true,
00354         bool fRecursive = true, bool fGenome = false, float dPValue = 1,
00355         const CGenes* pBackground = NULL ) const = 0;
00356 };
00357 
00358 // TODO: These should really be templated instead of duplicated like this...
00359 
00367 class COntologyKEGG : COntologyKEGGImpl, public IOntology {
00368 public:
00369     COntologyKEGG( );
00370     bool Open( std::istream& istm, CGenome& Genome, const std::string& strOrganism, bool fSynonyms = false );
00371 
00372     void GetGeneNames( std::vector<std::string>& vecstrGenes ) const {
00373 
00374         return COntologyImpl::GetGeneNames( vecstrGenes ); }
00375 
00376     void TermFinder( const CGenes& Genes, std::vector<STermFound>& vecsTerms, bool fBonferroni = true,
00377         bool fRecursive = true, bool fGenome = false, float dPValue = 1,
00378         const CGenes* pBackground = NULL ) const {
00379 
00380         return COntologyImpl::TermFinder( Genes, vecsTerms, fBonferroni, fRecursive, fGenome, dPValue,
00381             pBackground ); }
00382 
00383     size_t GetNode( const std::string& strID ) const {
00384 
00385         return COntologyImpl::GetNode( strID ); }
00386 
00387     bool IsAnnotated( size_t iTerm, const CGene& Gene, bool fRecursive ) const {
00388 
00389         return COntologyImpl::IsAnnotated( iTerm, Gene, fRecursive ); }
00390 
00391     size_t GetNodes( ) const {
00392 
00393         return COntologyImpl::GetNodes( ); }
00394 
00395     const std::string& GetID( ) const {
00396 
00397         return COntologyImpl::GetID( ); }
00398 
00399     const std::string& GetID( size_t iTerm ) const {
00400 
00401         return COntologyImpl::GetID( iTerm ); }
00402 
00403     const std::string& GetGloss( size_t iTerm ) const {
00404 
00405         return COntologyImpl::GetGloss( iTerm ); }
00406 
00407     size_t GetParents( size_t iTerm ) const {
00408 
00409         return COntologyImpl::GetParents( iTerm ); }
00410 
00411     size_t GetParent( size_t iTerm, size_t iParent ) const {
00412 
00413         return COntologyImpl::GetParent( iTerm, iParent ); }
00414 
00415     size_t GetChildren( size_t iTerm ) const {
00416 
00417         return COntologyImpl::GetChildren( iTerm ); }
00418 
00419     size_t GetChild( size_t iTerm, size_t iChild ) const {
00420 
00421         return COntologyImpl::GetChild( iTerm, iChild ); }
00422 
00423     size_t GetGenes( size_t iTerm, bool fRecursive ) const {
00424 
00425         return COntologyImpl::GetGenes( iTerm, fRecursive ); }
00426 
00427     const CGene& GetGene( size_t iTerm, size_t iGene ) const {
00428 
00429         return COntologyImpl::GetGene( iTerm, iGene ); }
00430 
00431     bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const {
00432 
00433         return COntologyImpl::GetParents( iTerm, setiParents ); }
00434 
00435     bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const {
00436 
00437         return COntologyImpl::GetChildren( iTerm, setiChildren ); }
00438 };
00439 
00447 class COntologyOBO : COntologyOBOImpl, public IOntology {
00448 public:
00453     static const char   c_szBiologicalProcess[];
00458     static const char   c_szCellularComponent[];
00463     static const char   c_szMolecularFunction[];
00464 
00465     static bool Open( std::istream& istmOntology, std::istream& istmAnnotations, CGenome& Genome,
00466             COntologyOBO& OntoBP, COntologyOBO& OntoMF, COntologyOBO& OntoCC, bool fDatabaseIDs = false,
00467         bool fSynonyms = false );
00468 
00469     COntologyOBO( );
00470     bool Open( std::istream& istmOntology, std::istream& istmAnnotations, CGenome& Genome,
00471         const char* szNamespace, bool fDatabaseIDs = false, bool fSynonyms = false );
00472 
00473     void GetGeneNames( std::vector<std::string>& vecstrGenes ) const {
00474 
00475         return COntologyImpl::GetGeneNames( vecstrGenes ); }
00476 
00477     void TermFinder( const CGenes& Genes, std::vector<STermFound>& vecsTerms, bool fBonferroni = true,
00478         bool fRecursive = true, bool fGenome = false, float dPValue = 1,
00479         const CGenes* pBackground = NULL ) const {
00480 
00481         return COntologyImpl::TermFinder( Genes, vecsTerms, fBonferroni, fRecursive, fGenome, dPValue,
00482             pBackground ); }
00483 
00484     size_t GetNode( const std::string& strID ) const {
00485 
00486         return COntologyImpl::GetNode( strID ); }
00487 
00488     bool IsAnnotated( size_t iTerm, const CGene& Gene, bool fRecursive ) const {
00489 
00490         return COntologyImpl::IsAnnotated( iTerm, Gene, fRecursive ); }
00491 
00492     size_t GetNodes( ) const {
00493 
00494         return COntologyImpl::GetNodes( ); }
00495 
00496     const std::string& GetID( ) const {
00497 
00498         return COntologyImpl::GetID( ); }
00499 
00500     const std::string& GetID( size_t iTerm ) const {
00501 
00502         return COntologyImpl::GetID( iTerm ); }
00503 
00504     const std::string& GetGloss( size_t iTerm ) const {
00505 
00506         return COntologyImpl::GetGloss( iTerm ); }
00507 
00508     size_t GetParents( size_t iTerm ) const {
00509 
00510         return COntologyImpl::GetParents( iTerm ); }
00511 
00512     size_t GetParent( size_t iTerm, size_t iParent ) const {
00513 
00514         return COntologyImpl::GetParent( iTerm, iParent ); }
00515 
00516     size_t GetChildren( size_t iTerm ) const {
00517 
00518         return COntologyImpl::GetChildren( iTerm ); }
00519 
00520     size_t GetChild( size_t iTerm, size_t iChild ) const {
00521 
00522         return COntologyImpl::GetChild( iTerm, iChild ); }
00523 
00524     size_t GetGenes( size_t iTerm, bool fRecursive ) const {
00525 
00526         return COntologyImpl::GetGenes( iTerm, fRecursive ); }
00527 
00528     const CGene& GetGene( size_t iTerm, size_t iGene ) const {
00529 
00530         return COntologyImpl::GetGene( iTerm, iGene ); }
00531 
00532     bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const {
00533 
00534         return COntologyImpl::GetParents( iTerm, setiParents ); }
00535 
00536     bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const {
00537 
00538         return COntologyImpl::GetChildren( iTerm, setiChildren ); }
00539 };
00540 
00548 class COntologyMIPS : protected COntologyMIPSImpl, public IOntology {
00549 public:
00550     COntologyMIPS( );
00551     bool Open( std::istream& istmOntology, std::istream& istmAnnotations, CGenome& Genome );
00552 
00553     void GetGeneNames( std::vector<std::string>& vecstrGenes ) const {
00554 
00555         return COntologyImpl::GetGeneNames( vecstrGenes ); }
00556 
00557     void TermFinder( const CGenes& Genes, std::vector<STermFound>& vecsTerms, bool fBonferroni = true,
00558         bool fRecursive = true, bool fGenome = false, float dPValue = 1,
00559         const CGenes* pBackground = NULL ) const {
00560 
00561         return COntologyImpl::TermFinder( Genes, vecsTerms, fBonferroni, fRecursive, fGenome, dPValue,
00562             pBackground ); }
00563 
00564     size_t GetNode( const std::string& strID ) const {
00565 
00566         return COntologyImpl::GetNode( strID ); }
00567 
00568     bool IsAnnotated( size_t iTerm, const CGene& Gene, bool fRecursive ) const {
00569 
00570         return COntologyImpl::IsAnnotated( iTerm, Gene, fRecursive ); }
00571 
00572     size_t GetNodes( ) const {
00573 
00574         return COntologyImpl::GetNodes( ); }
00575 
00576     const std::string& GetID( ) const {
00577 
00578         return COntologyImpl::GetID( ); }
00579 
00580     const std::string& GetID( size_t iTerm ) const {
00581 
00582         return COntologyImpl::GetID( iTerm ); }
00583 
00584     const std::string& GetGloss( size_t iTerm ) const {
00585 
00586         return COntologyImpl::GetGloss( iTerm ); }
00587 
00588     size_t GetParents( size_t iTerm ) const {
00589 
00590         return COntologyImpl::GetParents( iTerm ); }
00591 
00592     size_t GetParent( size_t iTerm, size_t iParent ) const {
00593 
00594         return COntologyImpl::GetParent( iTerm, iParent ); }
00595 
00596     size_t GetChildren( size_t iTerm ) const {
00597 
00598         return COntologyImpl::GetChildren( iTerm ); }
00599 
00600     size_t GetChild( size_t iTerm, size_t iChild ) const {
00601 
00602         return COntologyImpl::GetChild( iTerm, iChild ); }
00603 
00604     size_t GetGenes( size_t iTerm, bool fRecursive ) const {
00605 
00606         return COntologyImpl::GetGenes( iTerm, fRecursive ); }
00607 
00608     const CGene& GetGene( size_t iTerm, size_t iGene ) const {
00609 
00610         return COntologyImpl::GetGene( iTerm, iGene ); }
00611 
00612     bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const {
00613 
00614         return COntologyImpl::GetParents( iTerm, setiParents ); }
00615 
00616     bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const {
00617 
00618         return COntologyImpl::GetChildren( iTerm, setiChildren ); }
00619 };
00620 
00628 class COntologyMIPSPhenotypes : public COntologyMIPS {
00629 public:
00630     COntologyMIPSPhenotypes( );
00631 
00632 protected:
00637     static const char   c_szMIPSPhen[];
00638 };
00639 
00651 class CSlim : CSlimImpl {
00652 public:
00653     bool Open( std::istream& istmSlim, const IOntology* pOntology );
00654     void GetGeneNames( std::vector<std::string>& vecstrGenes ) const;
00655 
00676     const CGene& GetGene( size_t iSlim, size_t iGene ) const {
00677 
00678         return *m_vecvecpGenes[ iSlim ][ iGene ]; }
00679 
00687     size_t GetSlims( ) const {
00688 
00689         return m_vecstrSlims.size( ); }
00690 
00704     size_t GetGenes( size_t iSlim ) const {
00705 
00706         return m_vecvecpGenes[ iSlim ].size( ); }
00707 
00721     const std::string& GetSlim( size_t iSlim ) const {
00722 
00723         return m_vecstrSlims[ iSlim ]; }
00724 
00738     size_t GetNodes( size_t iSlim ) const {
00739 
00740         return m_vecveciTerms[ iSlim ].size( ); }
00741 
00761     size_t GetNode( size_t iSlim, size_t iTerm ) const {
00762 
00763         return m_vecveciTerms[ iSlim ][ iTerm ]; }
00764 };
00765 
00766 }
00767 
00768 #endif // ANNOTATION_H