Sleipnir
src/genome.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef GENOME_H
00023 #define GENOME_H
00024 
00025 #include <fstream>
00026 #include <string>
00027 
00028 #include "genomei.h"
00029 
00030 namespace Sleipnir {
00031 
00032 class CGenes;
00033 
00061 class CGene : CGeneImpl {
00062 public:
00063     CGene( const std::string& strID );
00064 
00065     bool AddSynonym( const std::string& strName );
00066     bool AddAnnotation( const IOntology* pOntology, size_t iTerm );
00067     bool SetWeight( float weight);
00068     bool IsAnnotated( const IOntology* pOntology ) const;
00069     bool IsAnnotated( const IOntology* pOntology, size_t iTerm ) const;
00070 
00078     const std::string& GetName( ) const {
00079 
00080         return m_strName; }
00081 
00092     size_t GetSynonyms( ) const {
00093 
00094         return m_iSynonyms; }
00095 
00109     const std::string& GetSynonym( size_t iSynonym ) const {
00110 
00111         return m_astrSynonyms[ iSynonym ]; }
00112 
00123     void SetGloss( const std::string& strGloss ) {
00124 
00125         m_strGloss = strGloss; }
00126 
00137     const std::string& GetGloss( ) const {
00138 
00139         return m_strGloss; }
00140 
00151     void SetDubious( bool fDubious ) {
00152 
00153         m_fDubious = fDubious; }
00154 
00165     bool GetDubious( ) const {
00166 
00167         return m_fDubious; }
00168 
00179     void SetRNA( bool fRNA ) {
00180 
00181         m_fRNA = fRNA; }
00182 
00193     bool GetRNA( ) const {
00194 
00195         return m_fRNA; }
00206     const float GetWeight() const {
00207         return m_weight; }
00218     size_t GetOntologies( ) const {
00219 
00220         return m_iOntologies; }
00221 
00235     const IOntology* GetOntology( size_t iOntology ) const {
00236 
00237         return m_apOntologies[ iOntology ]; }
00238 
00252     size_t GetAnnotations( size_t iOntology ) const {
00253 
00254         return m_apveciAnnotations[ iOntology ]->size( ); }
00255 
00273     size_t GetAnnotation( size_t iOntology, size_t iAnnotation ) const {
00274 
00275         return (*m_apveciAnnotations[ iOntology ])[ iAnnotation ]; }
00276 
00277 
00278 };
00279 
00302 class CGenome : CGenomeImpl {
00303 public:
00304     bool Open( std::istream& istmFeatures );
00305     bool Open( const std::vector<std::string>& vecstrGenes );
00306     bool Open( const char* szFile, std::vector<CGenes*>& vecpGenes );
00307     bool Open( std::istream& istmGenes, std::vector<CGenes*>& vecpGenes );
00308     CGene& AddGene( const std::string& strID );
00309     size_t FindGene( const std::string& strGene ) const;
00310     std::vector<std::string> GetGeneNames( ) const;
00311     size_t CountGenes( const IOntology* pOntology ) const;
00312     bool AddSynonym( CGene& Gene, const std::string& strName );
00313 
00327     CGene& GetGene( size_t iGene ) const {
00328 
00329         return *m_vecpGenes[ iGene ]; }
00330 
00345     size_t GetGene( const std::string& strGene ) const {
00346         TMapStrI::const_iterator    iterGene;
00347 
00348         return ( ( ( iterGene = m_mapGenes.find( strGene ) ) == m_mapGenes.end( ) ) ? -1 :
00349             iterGene->second ); }
00350 
00358     size_t GetGenes( ) const {
00359 
00360         return m_vecpGenes.size( ); }
00361 };
00362 
00373 class CGenes : CGenesImpl {
00374 public:
00375     static bool Open( const char* szFile, CGenome& Genome, std::vector<std::string>& vecstrNames, std::vector<CGenes*>& vecpGenes );
00376 
00377     CGenes( CGenome& Genome );
00378 
00379     bool Open( std::istream& istm, bool fCreate = true );
00380     bool Open( const std::vector<std::string>& vecstrGenes, bool fCreate = true );
00381     bool OpenWeighted( std::istream& istm, bool fCreate = true );
00382     void Filter( const CGenes& GenesExclude );
00383     size_t CountAnnotations( const IOntology* pOntology, size_t iTerm, bool fRecursive = true,
00384         const CGenes* pBackground = NULL ) const;
00385     std::vector<std::string> GetGeneNames( ) const;
00386     
00387 
00414     bool Open( const char* szFile, bool fCreate = true ) {
00415         std::ifstream   ifsm;
00416 
00417         ifsm.open( szFile );
00418         return ( ifsm.is_open( ) && Open( ifsm, fCreate ) ); }
00445     bool OpenWeighted( const char* szFile, bool fCreate = true ) {
00446         std::ifstream   ifsm;
00447 
00448         ifsm.open( szFile );
00449         return ( ifsm.is_open( ) && OpenWeighted( ifsm, fCreate ) ); }
00457     size_t GetGenes( ) const {
00458 
00459         return m_vecpGenes.size( ); }
00460 
00474     bool IsGene( const std::string& strGene ) const {
00475 
00476         return ( m_mapGenes.find( strGene ) != m_mapGenes.end( ) ); }
00485     bool IsWeighted() const {
00486         return isWeighted;}
00487 
00495     CGenome& GetGenome( ) const {
00496 
00497         return m_Genome; }
00498 
00512     const CGene& GetGene( size_t iGene ) const {
00513 
00514         return *m_vecpGenes[ iGene ]; }
00515 
00529     const float GetGeneWeight( size_t iGene ) const {
00530         if (iGene!=-1)
00531             return m_vecpGenes[ iGene ]->GetWeight(); 
00532         return 0;}
00546     size_t GetGene( const std::string& strGene ) const {
00547         TMapStrI::const_iterator    iterGene;
00548 
00549         return ( ( ( iterGene = m_mapGenes.find( strGene ) ) == m_mapGenes.end( ) ) ? -1 :
00550             iterGene->second ); }
00551 
00565     bool AddGene( const std::string& strGene ) {
00566 
00567         if( GetGene( strGene ) != -1 )
00568             return false;
00569 
00570         m_mapGenes[ strGene ] = m_vecpGenes.size( );
00571         m_vecpGenes.push_back( &m_Genome.AddGene( strGene ) );
00572         return true; }
00573 };
00574 
00575 }
00576 
00577 #endif // GENOME_H