Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "annotation.h" 00024 #include "genome.h" 00025 00026 namespace Sleipnir { 00027 00028 const char COntologyMIPSImpl::c_szMIPS[] = "MIPS"; 00029 00030 COntologyMIPS::COntologyMIPS( ) { 00031 00032 m_pOntology = this; } 00033 00034 COntologyMIPSImpl::SParserMIPS::SParserMIPS( std::istream& istm, CGenome& Genome ) : 00035 SParser( istm, Genome ) { } 00036 00037 COntologyMIPSImpl::COntologyMIPSImpl( ) : COntologyImpl( c_szMIPS ) { } 00038 00061 bool COntologyMIPS::Open( std::istream& istmOntology, std::istream& istmAnnotations, CGenome& Genome ) { 00062 SParserMIPS sParserOnto( istmOntology, Genome ); 00063 SParserMIPS sParserGene( istmAnnotations, Genome ); 00064 00065 if( !OpenOntology( sParserOnto ) ) { 00066 g_CatSleipnir( ).error( "COntologyMIPS::Open( ) failed on ontology line %d: %s", sParserOnto.m_iLine, 00067 sParserOnto.m_szLine ); 00068 return false; } 00069 if( !OpenGenes( sParserGene ) ) { 00070 g_CatSleipnir( ).error( "COntologyMIPS::Open( ) failed on genes line %d: %s", sParserGene.m_iLine, 00071 sParserGene.m_szLine ); 00072 return false; } 00073 00074 return true; } 00075 00076 bool COntologyMIPSImpl::OpenOntology( SParserMIPS& sParser ) { 00077 size_t i, j; 00078 vector<vector<size_t> > vecveciChildren; 00079 00080 g_CatSleipnir( ).info( "COntologyMIPSImpl::OpenOntology( )" ); 00081 if( !( sParser.GetLine( ) && ( sParser.m_szLine[ 0 ] == '#' ) && sParser.GetLine( ) ) ) 00082 return false; 00083 00084 while( sParser.m_istm.peek( ) != EOF ) 00085 if( !OpenCategory( sParser ) ) 00086 return false; 00087 if( !OpenCategory( sParser ) ) 00088 return false; 00089 00090 m_aNodes = new SNode[ m_iNodes = sParser.m_veciParents.size( ) ]; 00091 vecveciChildren.resize( m_iNodes ); 00092 for( i = 0; i < m_iNodes; ++i ) { 00093 m_aNodes[ i ].m_strID = sParser.m_vecstrIDs[ i ]; 00094 m_mapNodes[ m_aNodes[ i ].m_strID ] = i; 00095 m_aNodes[ i ].m_strGloss = sParser.m_vecstrGlosses[ i ]; 00096 if( sParser.m_veciParents[ i ] != -1 ) { 00097 m_aNodes[ i ].m_aiParents = new size_t[ m_aNodes[ i ].m_iParents = 1 ]; 00098 m_aNodes[ i ].m_aiParents[ 0 ] = sParser.m_veciParents[ i ]; 00099 vecveciChildren[ sParser.m_veciParents[ i ] ].push_back( i ); } } 00100 for( i = 0; i < m_iNodes; ++i ) { 00101 if( !vecveciChildren[ i ].size( ) ) 00102 continue; 00103 m_aNodes[ i ].m_aiChildren = new size_t[ m_aNodes[ i ].m_iChildren = 00104 vecveciChildren[ i ].size( ) ]; 00105 for( j = 0; j < m_aNodes[ i ].m_iChildren; ++j ) 00106 m_aNodes[ i ].m_aiChildren[ j ] = vecveciChildren[ i ][ j ]; } 00107 00108 return true; } 00109 00110 bool COntologyMIPSImpl::OpenCategory( SParserMIPS& sParser ) { 00111 char* pch; 00112 size_t i, iDepth; 00113 00114 if( !( pch = strchr( sParser.m_szLine, ' ' ) ) ) 00115 return false; 00116 00117 *(pch++) = 0; 00118 sParser.m_vecstrIDs.push_back( sParser.m_szLine ); 00119 while( *pch && isspace( *pch ) ) 00120 pch++; 00121 sParser.m_vecstrGlosses.push_back( pch ); 00122 if( ( iDepth = OpenID( sParser ) ) == -1 ) 00123 return false; 00124 while( iDepth < sParser.m_stakiHier.size( ) ) 00125 sParser.m_stakiHier.pop( ); 00126 i = sParser.m_veciParents.size( ); 00127 sParser.m_veciParents.push_back( sParser.m_stakiHier.empty( ) ? -1 : 00128 sParser.m_stakiHier.top( ) ); 00129 if( iDepth >= sParser.m_stakiHier.size( ) ) 00130 sParser.m_stakiHier.push( i ); 00131 00132 return sParser.GetLine( ); } 00133 00134 size_t COntologyMIPSImpl::OpenID( SParserMIPS& sParser ) { 00135 size_t iRet; 00136 char* pch; 00137 00138 for( iRet = 0,pch = strchr( sParser.m_szLine, '.' ); pch; ++iRet, 00139 pch = strchr( ++pch, '.' ) ); 00140 00141 return iRet; } 00142 00143 bool COntologyMIPSImpl::OpenGenes( SParserMIPS& sParser ) { 00144 size_t i, j; 00145 00146 g_CatSleipnir( ).info( "COntologyMIPSImpl::OpenGenes( )" ); 00147 if( !sParser.GetLine( ) ) 00148 return false; 00149 if( !sParser.m_szLine[ 0 ] ) 00150 return true; 00151 00152 sParser.m_vecpGenes.resize( m_iNodes ); 00153 while( sParser.m_istm.peek( ) != EOF ) 00154 if( !OpenGene( sParser ) ) 00155 return false; 00156 if( !OpenGene( sParser ) ) 00157 return false; 00158 00159 for( i = 0; i < m_iNodes; ++i ) { 00160 if( !sParser.m_vecpGenes[ i ].size( ) ) 00161 continue; 00162 m_aNodes[ i ].m_apGenes = new const CGene*[ m_aNodes[ i ].m_iGenes = 00163 sParser.m_vecpGenes[ i ].size( ) ]; 00164 for( j = 0; j < m_aNodes[ i ].m_iGenes; ++j ) 00165 m_aNodes[ i ].m_apGenes[ j ] = sParser.m_vecpGenes[ i ][ j ]; } 00166 00167 return true; } 00168 00169 bool COntologyMIPSImpl::OpenGene( SParserMIPS& sParser ) { 00170 char* pchOne; 00171 char* pchTwo; 00172 size_t iNode; 00173 00174 if( !( ( pchOne = strchr( sParser.m_szLine, '|' ) ) && 00175 ( pchTwo = strchr( pchOne + 1, '|' ) ) ) ) 00176 return false; 00177 *(pchOne++) = *pchTwo = 0; 00178 00179 iNode = m_mapNodes[ pchOne ]; 00180 { 00181 CGene& Gene = sParser.m_Genome.AddGene( sParser.m_szLine ); 00182 00183 Gene.AddAnnotation( m_pOntology, iNode ); 00184 sParser.m_vecpGenes[ iNode ].push_back( &Gene ); 00185 } 00186 00187 return sParser.GetLine( ); } 00188 00189 const char COntologyMIPSPhenotypes::c_szMIPSPhen[] = "MIPSP"; 00190 00191 COntologyMIPSPhenotypes::COntologyMIPSPhenotypes( ) { 00192 00193 m_pOntology = this; 00194 m_strID = c_szMIPSPhen; } 00195 00196 }