Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "orthology.h" 00024 #include "genome.h" 00025 #include "meta.h" 00026 00027 namespace Sleipnir { 00028 00029 COrthologyImpl::~COrthologyImpl( ) { 00030 00031 Reset( ); } 00032 00033 void COrthologyImpl::Reset( ) { 00034 size_t i; 00035 00036 for( i = 0; i < m_vecpGenomes.size( ); ++i ) 00037 delete m_vecpGenomes[ i ]; 00038 m_vecpGenomes.clear( ); 00039 m_vecstrOrganisms.clear( ); 00040 m_mapGenes.clear( ); 00041 m_vecvecpGenes.clear( ); } 00042 00059 bool COrthology::Open( std::istream& istm ) { 00060 vector<string> vecstrLine; 00061 char* acBuf; 00062 size_t i, j; 00063 string strOrganism, strGene; 00064 CGenome* pGenome; 00065 CGene* pGene; 00066 00067 Reset( ); 00068 acBuf = new char[ c_iBufferSize ]; 00069 while( istm.peek( ) != EOF ) { 00070 istm.getline( acBuf, c_iBufferSize - 1 ); 00071 vecstrLine.clear( ); 00072 CMeta::Tokenize( acBuf, vecstrLine ); 00073 if( vecstrLine.empty( ) ) 00074 continue; 00075 00076 m_vecvecpGenes.resize( m_vecvecpGenes.size( ) + 1 ); 00077 { 00078 vector<CGene*>& vecpGenes = m_vecvecpGenes[ m_vecvecpGenes.size( ) - 1 ]; 00079 00080 for( i = 0; i < vecstrLine.size( ); ++i ) { 00081 if( vecstrLine[ i ].length( ) == 0 ) 00082 continue; 00083 if( ( j = vecstrLine[ i ].find( c_cOrgSep ) ) == string::npos ) { 00084 g_CatSleipnir( ).warn( "COrthology::Open( ) illegal gene token: %s", 00085 vecstrLine[ i ].c_str( ) ); 00086 continue; } 00087 strOrganism = vecstrLine[ i ].substr( 0, j ); 00088 strGene = vecstrLine[ i ].substr( j + 1 ); 00089 for( j = 0; j < m_vecstrOrganisms.size( ); ++j ) 00090 if( strOrganism == m_vecstrOrganisms[ j ] ) 00091 break; 00092 if( j < m_vecpGenomes.size( ) ) 00093 pGenome = m_vecpGenomes[ j ]; 00094 else { 00095 m_vecpGenomes.push_back( pGenome = new CGenome( ) ); 00096 m_vecstrOrganisms.push_back( strOrganism ); } 00097 vecpGenes.push_back( pGene = &pGenome->AddGene( strGene ) ); 00098 m_mapGenes[ pGene ] = j; } 00099 } } 00100 delete[] acBuf; 00101 00102 return true; } 00103 00114 void COrthology::Save( std::ostream& ostm ) const { 00115 size_t i, j; 00116 00117 for( i = 0; i < m_vecvecpGenes.size( ); ++i ) { 00118 for( j = 0; j < m_vecvecpGenes[ i ].size( ); ++j ) 00119 ostm << ( j ? "\t" : "" ) << m_vecstrOrganisms[ ((COrthology*)this)->m_mapGenes[ 00120 m_vecvecpGenes[ i ][ j ] ] ] << c_cOrgSep << m_vecvecpGenes[ i ][ j ]->GetName( ); 00121 00122 ostm << endl; } } 00123 00124 }