Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef GENOME_H 00023 #define GENOME_H 00024 00025 #include <fstream> 00026 #include <string> 00027 00028 #include "genomei.h" 00029 00030 namespace Sleipnir { 00031 00032 class CGenes; 00033 00061 class CGene : CGeneImpl { 00062 public: 00063 CGene( const std::string& strID ); 00064 00065 bool AddSynonym( const std::string& strName ); 00066 bool AddAnnotation( const IOntology* pOntology, size_t iTerm ); 00067 bool SetWeight( float weight); 00068 bool IsAnnotated( const IOntology* pOntology ) const; 00069 bool IsAnnotated( const IOntology* pOntology, size_t iTerm ) const; 00070 00078 const std::string& GetName( ) const { 00079 00080 return m_strName; } 00081 00092 size_t GetSynonyms( ) const { 00093 00094 return m_iSynonyms; } 00095 00109 const std::string& GetSynonym( size_t iSynonym ) const { 00110 00111 return m_astrSynonyms[ iSynonym ]; } 00112 00123 void SetGloss( const std::string& strGloss ) { 00124 00125 m_strGloss = strGloss; } 00126 00137 const std::string& GetGloss( ) const { 00138 00139 return m_strGloss; } 00140 00151 void SetDubious( bool fDubious ) { 00152 00153 m_fDubious = fDubious; } 00154 00165 bool GetDubious( ) const { 00166 00167 return m_fDubious; } 00168 00179 void SetRNA( bool fRNA ) { 00180 00181 m_fRNA = fRNA; } 00182 00193 bool GetRNA( ) const { 00194 00195 return m_fRNA; } 00206 const float GetWeight() const { 00207 return m_weight; } 00218 size_t GetOntologies( ) const { 00219 00220 return m_iOntologies; } 00221 00235 const IOntology* GetOntology( size_t iOntology ) const { 00236 00237 return m_apOntologies[ iOntology ]; } 00238 00252 size_t GetAnnotations( size_t iOntology ) const { 00253 00254 return m_apveciAnnotations[ iOntology ]->size( ); } 00255 00273 size_t GetAnnotation( size_t iOntology, size_t iAnnotation ) const { 00274 00275 return (*m_apveciAnnotations[ iOntology ])[ iAnnotation ]; } 00276 00277 00278 }; 00279 00302 class CGenome : CGenomeImpl { 00303 public: 00304 bool Open( std::istream& istmFeatures ); 00305 bool Open( const std::vector<std::string>& vecstrGenes ); 00306 bool Open( const char* szFile, std::vector<CGenes*>& vecpGenes ); 00307 bool Open( std::istream& istmGenes, std::vector<CGenes*>& vecpGenes ); 00308 CGene& AddGene( const std::string& strID ); 00309 size_t FindGene( const std::string& strGene ) const; 00310 std::vector<std::string> GetGeneNames( ) const; 00311 size_t CountGenes( const IOntology* pOntology ) const; 00312 bool AddSynonym( CGene& Gene, const std::string& strName ); 00313 00327 CGene& GetGene( size_t iGene ) const { 00328 00329 return *m_vecpGenes[ iGene ]; } 00330 00345 size_t GetGene( const std::string& strGene ) const { 00346 TMapStrI::const_iterator iterGene; 00347 00348 return ( ( ( iterGene = m_mapGenes.find( strGene ) ) == m_mapGenes.end( ) ) ? -1 : 00349 iterGene->second ); } 00350 00358 size_t GetGenes( ) const { 00359 00360 return m_vecpGenes.size( ); } 00361 }; 00362 00373 class CGenes : CGenesImpl { 00374 public: 00375 static bool Open( const char* szFile, CGenome& Genome, std::vector<std::string>& vecstrNames, std::vector<CGenes*>& vecpGenes ); 00376 00377 CGenes( CGenome& Genome ); 00378 00379 bool Open( std::istream& istm, bool fCreate = true ); 00380 bool Open( const std::vector<std::string>& vecstrGenes, bool fCreate = true ); 00381 bool OpenWeighted( std::istream& istm, bool fCreate = true ); 00382 void Filter( const CGenes& GenesExclude ); 00383 size_t CountAnnotations( const IOntology* pOntology, size_t iTerm, bool fRecursive = true, 00384 const CGenes* pBackground = NULL ) const; 00385 std::vector<std::string> GetGeneNames( ) const; 00386 00387 00414 bool Open( const char* szFile, bool fCreate = true ) { 00415 std::ifstream ifsm; 00416 00417 ifsm.open( szFile ); 00418 return ( ifsm.is_open( ) && Open( ifsm, fCreate ) ); } 00445 bool OpenWeighted( const char* szFile, bool fCreate = true ) { 00446 std::ifstream ifsm; 00447 00448 ifsm.open( szFile ); 00449 return ( ifsm.is_open( ) && OpenWeighted( ifsm, fCreate ) ); } 00457 size_t GetGenes( ) const { 00458 00459 return m_vecpGenes.size( ); } 00460 00474 bool IsGene( const std::string& strGene ) const { 00475 00476 return ( m_mapGenes.find( strGene ) != m_mapGenes.end( ) ); } 00485 bool IsWeighted() const { 00486 return isWeighted;} 00487 00495 CGenome& GetGenome( ) const { 00496 00497 return m_Genome; } 00498 00512 const CGene& GetGene( size_t iGene ) const { 00513 00514 return *m_vecpGenes[ iGene ]; } 00515 00529 const float GetGeneWeight( size_t iGene ) const { 00530 if (iGene!=-1) 00531 return m_vecpGenes[ iGene ]->GetWeight(); 00532 return 0;} 00546 size_t GetGene( const std::string& strGene ) const { 00547 TMapStrI::const_iterator iterGene; 00548 00549 return ( ( ( iterGene = m_mapGenes.find( strGene ) ) == m_mapGenes.end( ) ) ? -1 : 00550 iterGene->second ); } 00551 00565 bool AddGene( const std::string& strGene ) { 00566 00567 if( GetGene( strGene ) != -1 ) 00568 return false; 00569 00570 m_mapGenes[ strGene ] = m_vecpGenes.size( ); 00571 m_vecpGenes.push_back( &m_Genome.AddGene( strGene ) ); 00572 return true; } 00573 }; 00574 00575 } 00576 00577 #endif // GENOME_H