Sleipnir
src/annotationmips.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "annotation.h"
00024 #include "genome.h"
00025 
00026 namespace Sleipnir {
00027 
00028 const char  COntologyMIPSImpl::c_szMIPS[]   = "MIPS";
00029 
00030 COntologyMIPS::COntologyMIPS( ) {
00031 
00032     m_pOntology = this; }
00033 
00034 COntologyMIPSImpl::SParserMIPS::SParserMIPS( std::istream& istm, CGenome& Genome ) :
00035     SParser( istm, Genome ) { }
00036 
00037 COntologyMIPSImpl::COntologyMIPSImpl( ) : COntologyImpl( c_szMIPS ) { }
00038 
00061 bool COntologyMIPS::Open( std::istream& istmOntology, std::istream& istmAnnotations, CGenome& Genome ) {
00062     SParserMIPS sParserOnto( istmOntology, Genome );
00063     SParserMIPS sParserGene( istmAnnotations, Genome );
00064 
00065     if( !OpenOntology( sParserOnto ) ) {
00066         g_CatSleipnir( ).error( "COntologyMIPS::Open( ) failed on ontology line %d: %s", sParserOnto.m_iLine,
00067             sParserOnto.m_szLine );
00068         return false; }
00069     if( !OpenGenes( sParserGene ) ) {
00070         g_CatSleipnir( ).error( "COntologyMIPS::Open( ) failed on genes line %d: %s", sParserGene.m_iLine,
00071             sParserGene.m_szLine );
00072         return false; }
00073 
00074     return true; }
00075 
00076 bool COntologyMIPSImpl::OpenOntology( SParserMIPS& sParser ) {
00077     size_t                  i, j;
00078     vector<vector<size_t> > vecveciChildren;
00079 
00080     g_CatSleipnir( ).info( "COntologyMIPSImpl::OpenOntology( )" );
00081     if( !( sParser.GetLine( ) && ( sParser.m_szLine[ 0 ] == '#' ) && sParser.GetLine( ) ) )
00082         return false;
00083 
00084     while( sParser.m_istm.peek( ) != EOF )
00085         if( !OpenCategory( sParser ) )
00086             return false;
00087     if( !OpenCategory( sParser ) )
00088         return false;
00089 
00090     m_aNodes = new SNode[ m_iNodes = sParser.m_veciParents.size( ) ];
00091     vecveciChildren.resize( m_iNodes );
00092     for( i = 0; i < m_iNodes; ++i ) {
00093         m_aNodes[ i ].m_strID = sParser.m_vecstrIDs[ i ];
00094         m_mapNodes[ m_aNodes[ i ].m_strID ] = i;
00095         m_aNodes[ i ].m_strGloss = sParser.m_vecstrGlosses[ i ];
00096         if( sParser.m_veciParents[ i ] != -1 ) {
00097             m_aNodes[ i ].m_aiParents = new size_t[ m_aNodes[ i ].m_iParents = 1 ];
00098             m_aNodes[ i ].m_aiParents[ 0 ] = sParser.m_veciParents[ i ];
00099             vecveciChildren[ sParser.m_veciParents[ i ] ].push_back( i ); } }
00100     for( i = 0; i < m_iNodes; ++i ) {
00101         if( !vecveciChildren[ i ].size( ) )
00102             continue;
00103         m_aNodes[ i ].m_aiChildren = new size_t[ m_aNodes[ i ].m_iChildren =
00104             vecveciChildren[ i ].size( ) ];
00105         for( j = 0; j < m_aNodes[ i ].m_iChildren; ++j )
00106             m_aNodes[ i ].m_aiChildren[ j ] = vecveciChildren[ i ][ j ]; }
00107 
00108     return true; }
00109 
00110 bool COntologyMIPSImpl::OpenCategory( SParserMIPS& sParser ) {
00111     char*   pch;
00112     size_t  i, iDepth;
00113 
00114     if( !( pch = strchr( sParser.m_szLine, ' ' ) ) )
00115         return false;
00116 
00117     *(pch++) = 0;
00118     sParser.m_vecstrIDs.push_back( sParser.m_szLine );
00119     while( *pch && isspace( *pch ) )
00120         pch++;
00121     sParser.m_vecstrGlosses.push_back( pch );
00122     if( ( iDepth = OpenID( sParser ) ) == -1 )
00123         return false;
00124     while( iDepth < sParser.m_stakiHier.size( ) )
00125         sParser.m_stakiHier.pop( );
00126     i = sParser.m_veciParents.size( );
00127     sParser.m_veciParents.push_back( sParser.m_stakiHier.empty( ) ? -1 :
00128         sParser.m_stakiHier.top( ) );
00129     if( iDepth >= sParser.m_stakiHier.size( ) )
00130         sParser.m_stakiHier.push( i );
00131 
00132     return sParser.GetLine( ); }
00133 
00134 size_t COntologyMIPSImpl::OpenID( SParserMIPS& sParser ) {
00135     size_t  iRet;
00136     char*   pch;
00137 
00138     for( iRet = 0,pch = strchr( sParser.m_szLine, '.' ); pch; ++iRet,
00139         pch = strchr( ++pch, '.' ) );
00140 
00141     return iRet; }
00142 
00143 bool COntologyMIPSImpl::OpenGenes( SParserMIPS& sParser ) {
00144     size_t  i, j;
00145 
00146     g_CatSleipnir( ).info( "COntologyMIPSImpl::OpenGenes( )" );
00147     if( !sParser.GetLine( ) )
00148         return false;
00149     if( !sParser.m_szLine[ 0 ] )
00150         return true;
00151 
00152     sParser.m_vecpGenes.resize( m_iNodes );
00153     while( sParser.m_istm.peek( ) != EOF )
00154         if( !OpenGene( sParser ) )
00155             return false;
00156     if( !OpenGene( sParser ) )
00157         return false;
00158 
00159     for( i = 0; i < m_iNodes; ++i ) {
00160         if( !sParser.m_vecpGenes[ i ].size( ) )
00161             continue;
00162         m_aNodes[ i ].m_apGenes = new const CGene*[ m_aNodes[ i ].m_iGenes =
00163             sParser.m_vecpGenes[ i ].size( ) ];
00164         for( j = 0; j < m_aNodes[ i ].m_iGenes; ++j )
00165             m_aNodes[ i ].m_apGenes[ j ] = sParser.m_vecpGenes[ i ][ j ]; }
00166 
00167     return true; }
00168 
00169 bool COntologyMIPSImpl::OpenGene( SParserMIPS& sParser ) {
00170     char*   pchOne;
00171     char*   pchTwo;
00172     size_t  iNode;
00173 
00174     if( !( ( pchOne = strchr( sParser.m_szLine, '|' ) ) &&
00175         ( pchTwo = strchr( pchOne + 1, '|' ) ) ) )
00176         return false;
00177     *(pchOne++) = *pchTwo = 0;
00178 
00179     iNode = m_mapNodes[ pchOne ];
00180     {
00181         CGene&  Gene    = sParser.m_Genome.AddGene( sParser.m_szLine );
00182 
00183         Gene.AddAnnotation( m_pOntology, iNode );
00184         sParser.m_vecpGenes[ iNode ].push_back( &Gene );
00185     }
00186 
00187     return sParser.GetLine( ); }
00188 
00189 const char  COntologyMIPSPhenotypes::c_szMIPSPhen[] = "MIPSP";
00190 
00191 COntologyMIPSPhenotypes::COntologyMIPSPhenotypes( ) {
00192 
00193     m_pOntology = this;
00194     m_strID = c_szMIPSPhen; }
00195 
00196 }