Sleipnir
src/orthology.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "orthology.h"
00024 #include "genome.h"
00025 #include "meta.h"
00026 
00027 namespace Sleipnir {
00028 
00029 COrthologyImpl::~COrthologyImpl( ) {
00030 
00031     Reset( ); }
00032 
00033 void COrthologyImpl::Reset( ) {
00034     size_t  i;
00035 
00036     for( i = 0; i < m_vecpGenomes.size( ); ++i )
00037         delete m_vecpGenomes[ i ];
00038     m_vecpGenomes.clear( );
00039     m_vecstrOrganisms.clear( );
00040     m_mapGenes.clear( );
00041     m_vecvecpGenes.clear( ); }
00042 
00059 bool COrthology::Open( std::istream& istm ) {
00060     vector<string>  vecstrLine;
00061     char*           acBuf;
00062     size_t          i, j;
00063     string          strOrganism, strGene;
00064     CGenome*        pGenome;
00065     CGene*          pGene;
00066 
00067     Reset( );
00068     acBuf = new char[ c_iBufferSize ];
00069     while( istm.peek( ) != EOF ) {
00070         istm.getline( acBuf, c_iBufferSize - 1 );
00071         vecstrLine.clear( );
00072         CMeta::Tokenize( acBuf, vecstrLine );
00073         if( vecstrLine.empty( ) )
00074             continue;
00075 
00076         m_vecvecpGenes.resize( m_vecvecpGenes.size( ) + 1 );
00077         {
00078             vector<CGene*>& vecpGenes   = m_vecvecpGenes[ m_vecvecpGenes.size( ) - 1 ];
00079 
00080             for( i = 0; i < vecstrLine.size( ); ++i ) {
00081                 if( vecstrLine[ i ].length( ) == 0 )
00082                     continue;
00083                 if( ( j = vecstrLine[ i ].find( c_cOrgSep ) ) == string::npos ) {
00084                     g_CatSleipnir( ).warn( "COrthology::Open( ) illegal gene token: %s",
00085                         vecstrLine[ i ].c_str( ) );
00086                     continue; }
00087                 strOrganism = vecstrLine[ i ].substr( 0, j );
00088                 strGene = vecstrLine[ i ].substr( j + 1 );
00089                 for( j = 0; j < m_vecstrOrganisms.size( ); ++j )
00090                     if( strOrganism == m_vecstrOrganisms[ j ] )
00091                         break;
00092                 if( j < m_vecpGenomes.size( ) )
00093                     pGenome = m_vecpGenomes[ j ];
00094                 else {
00095                     m_vecpGenomes.push_back( pGenome = new CGenome( ) );
00096                     m_vecstrOrganisms.push_back( strOrganism ); }
00097                 vecpGenes.push_back( pGene = &pGenome->AddGene( strGene ) );
00098                 m_mapGenes[ pGene ] = j; }
00099         } }
00100     delete[] acBuf;
00101 
00102     return true; }
00103 
00114 void COrthology::Save( std::ostream& ostm ) const {
00115     size_t  i, j;
00116 
00117     for( i = 0; i < m_vecvecpGenes.size( ); ++i ) {
00118         for( j = 0; j < m_vecvecpGenes[ i ].size( ); ++j )
00119             ostm << ( j ? "\t" : "" ) << m_vecstrOrganisms[ ((COrthology*)this)->m_mapGenes[
00120                 m_vecvecpGenes[ i ][ j ] ] ] << c_cOrgSep << m_vecvecpGenes[ i ][ j ]->GetName( );
00121 
00122         ostm << endl; } }
00123 
00124 }