Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef ANNOTATION_H 00023 #define ANNOTATION_H 00024 00025 namespace Sleipnir { 00026 00038 struct STermFound { 00043 size_t m_iID; 00048 double m_dP; 00053 size_t m_iHitsTerm; 00058 size_t m_iSizeTerm; 00063 size_t m_iHitsTotal; 00068 size_t m_iSizeTotal; 00069 00092 STermFound( size_t iID, double dP, size_t iHitsTerm, size_t iSizeTerm, size_t iHitsTotal, size_t iSizeTotal ) : 00093 m_iID(iID), m_dP(dP), m_iHitsTerm(iHitsTerm), m_iSizeTerm(iSizeTerm), m_iHitsTotal(iHitsTotal), 00094 m_iSizeTotal(iSizeTotal) { } 00095 }; 00096 00097 } 00098 00099 #include "annotationi.h" 00100 00101 namespace Sleipnir { 00102 00117 class IOntology { 00118 public: 00126 virtual const std::string& GetID( ) const = 0; 00134 virtual size_t GetNodes( ) const = 0; 00145 virtual const std::string& GetID( size_t iTerm ) const = 0; 00156 virtual const std::string& GetGloss( size_t iTerm ) const = 0; 00167 virtual size_t GetParents( size_t iTerm ) const = 0; 00184 virtual size_t GetParent( size_t iTerm, size_t iParent ) const = 0; 00195 virtual size_t GetChildren( size_t iTerm ) const = 0; 00212 virtual size_t GetChild( size_t iTerm, size_t iChild ) const = 0; 00232 virtual bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const = 0; 00252 virtual bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const = 0; 00266 virtual size_t GetGenes( size_t iTerm, bool fRecursive = false ) const = 0; 00285 virtual const CGene& GetGene( size_t iTerm, size_t iGene ) const = 0; 00302 virtual bool IsAnnotated( size_t iTerm, const CGene& Gene, bool fRecursive = true ) const = 0; 00313 virtual size_t GetNode( const std::string& strID ) const = 0; 00321 virtual void GetGeneNames( std::vector<std::string>& vecstrGenes ) const = 0; 00353 virtual void TermFinder( const CGenes& Genes, std::vector<STermFound>& vecsTerms, bool fBonferroni = true, 00354 bool fRecursive = true, bool fGenome = false, float dPValue = 1, 00355 const CGenes* pBackground = NULL ) const = 0; 00356 }; 00357 00358 // TODO: These should really be templated instead of duplicated like this... 00359 00367 class COntologyKEGG : COntologyKEGGImpl, public IOntology { 00368 public: 00369 COntologyKEGG( ); 00370 bool Open( std::istream& istm, CGenome& Genome, const std::string& strOrganism, bool fSynonyms = false ); 00371 00372 void GetGeneNames( std::vector<std::string>& vecstrGenes ) const { 00373 00374 return COntologyImpl::GetGeneNames( vecstrGenes ); } 00375 00376 void TermFinder( const CGenes& Genes, std::vector<STermFound>& vecsTerms, bool fBonferroni = true, 00377 bool fRecursive = true, bool fGenome = false, float dPValue = 1, 00378 const CGenes* pBackground = NULL ) const { 00379 00380 return COntologyImpl::TermFinder( Genes, vecsTerms, fBonferroni, fRecursive, fGenome, dPValue, 00381 pBackground ); } 00382 00383 size_t GetNode( const std::string& strID ) const { 00384 00385 return COntologyImpl::GetNode( strID ); } 00386 00387 bool IsAnnotated( size_t iTerm, const CGene& Gene, bool fRecursive ) const { 00388 00389 return COntologyImpl::IsAnnotated( iTerm, Gene, fRecursive ); } 00390 00391 size_t GetNodes( ) const { 00392 00393 return COntologyImpl::GetNodes( ); } 00394 00395 const std::string& GetID( ) const { 00396 00397 return COntologyImpl::GetID( ); } 00398 00399 const std::string& GetID( size_t iTerm ) const { 00400 00401 return COntologyImpl::GetID( iTerm ); } 00402 00403 const std::string& GetGloss( size_t iTerm ) const { 00404 00405 return COntologyImpl::GetGloss( iTerm ); } 00406 00407 size_t GetParents( size_t iTerm ) const { 00408 00409 return COntologyImpl::GetParents( iTerm ); } 00410 00411 size_t GetParent( size_t iTerm, size_t iParent ) const { 00412 00413 return COntologyImpl::GetParent( iTerm, iParent ); } 00414 00415 size_t GetChildren( size_t iTerm ) const { 00416 00417 return COntologyImpl::GetChildren( iTerm ); } 00418 00419 size_t GetChild( size_t iTerm, size_t iChild ) const { 00420 00421 return COntologyImpl::GetChild( iTerm, iChild ); } 00422 00423 size_t GetGenes( size_t iTerm, bool fRecursive ) const { 00424 00425 return COntologyImpl::GetGenes( iTerm, fRecursive ); } 00426 00427 const CGene& GetGene( size_t iTerm, size_t iGene ) const { 00428 00429 return COntologyImpl::GetGene( iTerm, iGene ); } 00430 00431 bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const { 00432 00433 return COntologyImpl::GetParents( iTerm, setiParents ); } 00434 00435 bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const { 00436 00437 return COntologyImpl::GetChildren( iTerm, setiChildren ); } 00438 }; 00439 00447 class COntologyOBO : COntologyOBOImpl, public IOntology { 00448 public: 00453 static const char c_szBiologicalProcess[]; 00458 static const char c_szCellularComponent[]; 00463 static const char c_szMolecularFunction[]; 00464 00465 static bool Open( std::istream& istmOntology, std::istream& istmAnnotations, CGenome& Genome, 00466 COntologyOBO& OntoBP, COntologyOBO& OntoMF, COntologyOBO& OntoCC, bool fDatabaseIDs = false, 00467 bool fSynonyms = false ); 00468 00469 COntologyOBO( ); 00470 bool Open( std::istream& istmOntology, std::istream& istmAnnotations, CGenome& Genome, 00471 const char* szNamespace, bool fDatabaseIDs = false, bool fSynonyms = false ); 00472 00473 void GetGeneNames( std::vector<std::string>& vecstrGenes ) const { 00474 00475 return COntologyImpl::GetGeneNames( vecstrGenes ); } 00476 00477 void TermFinder( const CGenes& Genes, std::vector<STermFound>& vecsTerms, bool fBonferroni = true, 00478 bool fRecursive = true, bool fGenome = false, float dPValue = 1, 00479 const CGenes* pBackground = NULL ) const { 00480 00481 return COntologyImpl::TermFinder( Genes, vecsTerms, fBonferroni, fRecursive, fGenome, dPValue, 00482 pBackground ); } 00483 00484 size_t GetNode( const std::string& strID ) const { 00485 00486 return COntologyImpl::GetNode( strID ); } 00487 00488 bool IsAnnotated( size_t iTerm, const CGene& Gene, bool fRecursive ) const { 00489 00490 return COntologyImpl::IsAnnotated( iTerm, Gene, fRecursive ); } 00491 00492 size_t GetNodes( ) const { 00493 00494 return COntologyImpl::GetNodes( ); } 00495 00496 const std::string& GetID( ) const { 00497 00498 return COntologyImpl::GetID( ); } 00499 00500 const std::string& GetID( size_t iTerm ) const { 00501 00502 return COntologyImpl::GetID( iTerm ); } 00503 00504 const std::string& GetGloss( size_t iTerm ) const { 00505 00506 return COntologyImpl::GetGloss( iTerm ); } 00507 00508 size_t GetParents( size_t iTerm ) const { 00509 00510 return COntologyImpl::GetParents( iTerm ); } 00511 00512 size_t GetParent( size_t iTerm, size_t iParent ) const { 00513 00514 return COntologyImpl::GetParent( iTerm, iParent ); } 00515 00516 size_t GetChildren( size_t iTerm ) const { 00517 00518 return COntologyImpl::GetChildren( iTerm ); } 00519 00520 size_t GetChild( size_t iTerm, size_t iChild ) const { 00521 00522 return COntologyImpl::GetChild( iTerm, iChild ); } 00523 00524 size_t GetGenes( size_t iTerm, bool fRecursive ) const { 00525 00526 return COntologyImpl::GetGenes( iTerm, fRecursive ); } 00527 00528 const CGene& GetGene( size_t iTerm, size_t iGene ) const { 00529 00530 return COntologyImpl::GetGene( iTerm, iGene ); } 00531 00532 bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const { 00533 00534 return COntologyImpl::GetParents( iTerm, setiParents ); } 00535 00536 bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const { 00537 00538 return COntologyImpl::GetChildren( iTerm, setiChildren ); } 00539 }; 00540 00548 class COntologyMIPS : protected COntologyMIPSImpl, public IOntology { 00549 public: 00550 COntologyMIPS( ); 00551 bool Open( std::istream& istmOntology, std::istream& istmAnnotations, CGenome& Genome ); 00552 00553 void GetGeneNames( std::vector<std::string>& vecstrGenes ) const { 00554 00555 return COntologyImpl::GetGeneNames( vecstrGenes ); } 00556 00557 void TermFinder( const CGenes& Genes, std::vector<STermFound>& vecsTerms, bool fBonferroni = true, 00558 bool fRecursive = true, bool fGenome = false, float dPValue = 1, 00559 const CGenes* pBackground = NULL ) const { 00560 00561 return COntologyImpl::TermFinder( Genes, vecsTerms, fBonferroni, fRecursive, fGenome, dPValue, 00562 pBackground ); } 00563 00564 size_t GetNode( const std::string& strID ) const { 00565 00566 return COntologyImpl::GetNode( strID ); } 00567 00568 bool IsAnnotated( size_t iTerm, const CGene& Gene, bool fRecursive ) const { 00569 00570 return COntologyImpl::IsAnnotated( iTerm, Gene, fRecursive ); } 00571 00572 size_t GetNodes( ) const { 00573 00574 return COntologyImpl::GetNodes( ); } 00575 00576 const std::string& GetID( ) const { 00577 00578 return COntologyImpl::GetID( ); } 00579 00580 const std::string& GetID( size_t iTerm ) const { 00581 00582 return COntologyImpl::GetID( iTerm ); } 00583 00584 const std::string& GetGloss( size_t iTerm ) const { 00585 00586 return COntologyImpl::GetGloss( iTerm ); } 00587 00588 size_t GetParents( size_t iTerm ) const { 00589 00590 return COntologyImpl::GetParents( iTerm ); } 00591 00592 size_t GetParent( size_t iTerm, size_t iParent ) const { 00593 00594 return COntologyImpl::GetParent( iTerm, iParent ); } 00595 00596 size_t GetChildren( size_t iTerm ) const { 00597 00598 return COntologyImpl::GetChildren( iTerm ); } 00599 00600 size_t GetChild( size_t iTerm, size_t iChild ) const { 00601 00602 return COntologyImpl::GetChild( iTerm, iChild ); } 00603 00604 size_t GetGenes( size_t iTerm, bool fRecursive ) const { 00605 00606 return COntologyImpl::GetGenes( iTerm, fRecursive ); } 00607 00608 const CGene& GetGene( size_t iTerm, size_t iGene ) const { 00609 00610 return COntologyImpl::GetGene( iTerm, iGene ); } 00611 00612 bool GetParents( size_t iTerm, std::set<size_t>& setiParents ) const { 00613 00614 return COntologyImpl::GetParents( iTerm, setiParents ); } 00615 00616 bool GetChildren( size_t iTerm, std::set<size_t>& setiChildren ) const { 00617 00618 return COntologyImpl::GetChildren( iTerm, setiChildren ); } 00619 }; 00620 00628 class COntologyMIPSPhenotypes : public COntologyMIPS { 00629 public: 00630 COntologyMIPSPhenotypes( ); 00631 00632 protected: 00637 static const char c_szMIPSPhen[]; 00638 }; 00639 00651 class CSlim : CSlimImpl { 00652 public: 00653 bool Open( std::istream& istmSlim, const IOntology* pOntology ); 00654 void GetGeneNames( std::vector<std::string>& vecstrGenes ) const; 00655 00676 const CGene& GetGene( size_t iSlim, size_t iGene ) const { 00677 00678 return *m_vecvecpGenes[ iSlim ][ iGene ]; } 00679 00687 size_t GetSlims( ) const { 00688 00689 return m_vecstrSlims.size( ); } 00690 00704 size_t GetGenes( size_t iSlim ) const { 00705 00706 return m_vecvecpGenes[ iSlim ].size( ); } 00707 00721 const std::string& GetSlim( size_t iSlim ) const { 00722 00723 return m_vecstrSlims[ iSlim ]; } 00724 00738 size_t GetNodes( size_t iSlim ) const { 00739 00740 return m_vecveciTerms[ iSlim ].size( ); } 00741 00761 size_t GetNode( size_t iSlim, size_t iTerm ) const { 00762 00763 return m_vecveciTerms[ iSlim ][ iTerm ]; } 00764 }; 00765 00766 } 00767 00768 #endif // ANNOTATION_H