Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "parser.h" 00024 00025 const char CParser::SLocation::c_szRoot[] = "/"; 00026 const char CParser::c_szDot[] = "."; 00027 const char CParser::c_szDotDot[] = ".."; 00028 const char* CParser::c_aszParsers[] = { "cat", "cd", "find", "help", "ls", "parentage", NULL }; 00029 00030 CParser::SLocation::SLocation( ) : m_pOnto(NULL), m_iNode(-1) { } 00031 00032 string CParser::SLocation::ToString( bool fGloss ) const { 00033 00034 return ( m_pOnto ? ( ( m_iNode == -1 ) ? m_pOnto->GetID( ) : 00035 m_pOnto->GetID( m_iNode ) + ( fGloss ? '\t' + m_pOnto->GetGloss( m_iNode ) : "" ) ) : 00036 c_szRoot ); } 00037 00038 bool CParser::SLocation::IsValid( ) const { 00039 00040 return ( m_pOnto || ( m_iNode == -1 ) ); } 00041 00042 void CParser::SLocation::Invalidate( ) { 00043 00044 m_pOnto = NULL; 00045 m_iNode = 0; } 00046 00047 bool CParser::SLocation::operator==( const SLocation& sLoc ) const { 00048 00049 return ( ( m_pOnto == sLoc.m_pOnto ) && ( m_iNode == sLoc.m_iNode ) ); } 00050 00051 const char* CParser::GetCommand( size_t iCommand ) { 00052 00053 return c_aszParsers[ iCommand ]; } 00054 00055 bool CParser::IsRooted( const string& strLoc ) { 00056 00057 return ( strLoc.length( ) && ( strLoc[ 0 ] == c_cSep ) ); } 00058 00059 bool CParser::SplitLocation( const string& strLoc, vector<string>& vecstrPath ) { 00060 size_t i; 00061 string strCur; 00062 00063 if( !strLoc.size( ) ) 00064 return true; 00065 00066 i = 0; 00067 while( true ) { 00068 strCur.clear( ); 00069 for( ; ( i < strLoc.size( ) ) && ( strLoc[ i ] == c_cSep ); ++i ); 00070 if( i >= strLoc.size( ) ) 00071 break; 00072 for( ; ( i < strLoc.size( ) ) && ( strLoc[ i ] != c_cSep ); ++i ) 00073 strCur += strLoc[ i ]; 00074 vecstrPath.push_back( strCur ); } 00075 00076 return true; } 00077 00078 CParser::SLocation CParser::GetLocation( const vector<const IOntology*>& vecpOntologies, 00079 const string& strLoc, bool fLast, const SLocation* psLoc ) { 00080 SLocation sRet; 00081 vector<string> vecstrPath; 00082 size_t i; 00083 00084 if( !IsRooted( strLoc ) && psLoc ) 00085 sRet = *psLoc; 00086 if( !SplitLocation( strLoc, vecstrPath ) ) { 00087 sRet.Invalidate( ); 00088 return sRet; } 00089 00090 if( !fLast ) { 00091 if( vecstrPath.empty( ) ) 00092 return sRet; 00093 if( strLoc[ strLoc.size( ) - 1 ] != c_cSep ) 00094 vecstrPath.resize( vecstrPath.size( ) - 1 ); } 00095 for( i = 0; i < vecstrPath.size( ); ++i ) 00096 if( !MoveLocation( sRet, vecstrPath[ i ], vecpOntologies ) ) { 00097 sRet.Invalidate( ); 00098 break; } 00099 00100 return sRet; } 00101 00102 bool CParser::MoveLocation( SLocation& sLoc, const string& strPath, 00103 const vector<const IOntology*>& vecpOntologies ) { 00104 size_t i, iNode; 00105 00106 if( strPath == c_szDot ) 00107 return true; 00108 00109 if( sLoc.m_pOnto ) { 00110 if( strPath == c_szDotDot ) { 00111 if( sLoc.m_iNode == -1 ) { 00112 sLoc.m_pOnto = NULL; 00113 return true; } 00114 switch( sLoc.m_pOnto->GetParents( sLoc.m_iNode ) ) { 00115 case 0: 00116 sLoc.m_iNode = -1; 00117 return true; 00118 00119 case 1: 00120 sLoc.m_iNode = sLoc.m_pOnto->GetParent( sLoc.m_iNode, 0 ); 00121 return true; } } 00122 else if( ( iNode = sLoc.m_pOnto->GetNode( strPath ) ) != -1 ) { 00123 sLoc.m_iNode = iNode; 00124 return true; } } 00125 else 00126 for( i = 0; i < vecpOntologies.size( ); ++i ) 00127 if( strPath == vecpOntologies[ i ]->GetID( ) ) { 00128 sLoc.m_pOnto = vecpOntologies[ i ]; 00129 sLoc.m_iNode = -1; 00130 return true; } 00131 00132 return false; } 00133 00134 void CParser::CollectGenes( const vector<SLocation>& vecLocations, 00135 TSetPGenes& setpGenes ) { 00136 size_t i, j; 00137 00138 for( i = 0; i < vecLocations.size( ); ++i ) { 00139 const SLocation& sLoc = vecLocations[ i ]; 00140 00141 if( sLoc.m_iNode != -1 ) 00142 for( j = 0; j < sLoc.m_pOnto->GetGenes( sLoc.m_iNode ); ++j ) 00143 setpGenes.insert( &sLoc.m_pOnto->GetGene( sLoc.m_iNode, j ) ); } } 00144 00145 CParser::CParser( const IOntology** apOntologies, const CGenome& Genome ) : 00146 m_Genome(Genome) { 00147 size_t i; 00148 00149 if( apOntologies ) 00150 for( i = 0; apOntologies[ i ]; ++i ) 00151 m_vecpOntologies.push_back( apOntologies[ i ] ); } 00152 00153 size_t CParser::GetOntologies( ) const { 00154 00155 return m_vecpOntologies.size( ); } 00156 00157 const IOntology* CParser::GetOntology( size_t iOnto ) const { 00158 00159 return m_vecpOntologies[ iOnto ]; } 00160 00161 const CGenome& CParser::GetGenome( ) const { 00162 00163 return m_Genome; } 00164 00165 bool CParser::Recurse( SLocation sLoc, bool fRecursive, bool fZeroes, 00166 vector<SLocation>& vecVisited ) const { 00167 const IOntology* pOnto; 00168 size_t i, j; 00169 bool fOK; 00170 00171 if( !sLoc.IsValid( ) ) 00172 return false; 00173 if( !fZeroes ) { 00174 fOK = false; 00175 if( sLoc.m_pOnto ) 00176 fOK = !!( ( sLoc.m_iNode == -1 ) ? m_Genome.CountGenes( sLoc.m_pOnto ) : 00177 sLoc.m_pOnto->GetGenes( sLoc.m_iNode, true ) ); 00178 else 00179 for( i = 0; i < m_vecpOntologies.size( ); ++i ) 00180 if( m_Genome.CountGenes( m_vecpOntologies[ i ] ) ) { 00181 fOK = true; 00182 break; } 00183 if( !fOK ) 00184 return true; } 00185 for( i = 0; i < vecVisited.size( ); ++i ) 00186 if( vecVisited[ i ] == sLoc ) 00187 return true; 00188 vecVisited.push_back( sLoc ); 00189 if( !fRecursive ) 00190 return true; 00191 00192 if( pOnto = sLoc.m_pOnto ) { 00193 if( sLoc.m_iNode == -1 ) { 00194 for( i = 0; i < pOnto->GetNodes( ); ++i ) 00195 if( !pOnto->GetParents( i ) ) { 00196 sLoc.m_iNode = i; 00197 Recurse( sLoc, fRecursive, fZeroes, vecVisited ); } } 00198 else { 00199 j = sLoc.m_iNode; 00200 for( i = 0; i < pOnto->GetChildren( j ); ++i ) { 00201 sLoc.m_iNode = pOnto->GetChild( j, i ); 00202 Recurse( sLoc, fRecursive, fZeroes, vecVisited ); } } } 00203 else 00204 for( i = 0; i < m_vecpOntologies.size( ); ++i ) { 00205 sLoc.m_pOnto = m_vecpOntologies[ i ]; 00206 Recurse( sLoc, fRecursive, fZeroes, vecVisited ); } 00207 00208 return true; } 00209 00210 struct SSortFind { 00211 bool operator()( const STermFound& sOne, const STermFound& sTwo ) { 00212 00213 return ( sOne.m_dP < sTwo.m_dP ); } 00214 }; 00215 00216 void CParser::TermFinder( const CGenes& Genes, float dP, const CGenes& GenesBkg, bool fBonferroni, bool fSibs, 00217 bool fBackground, vector<size_t>& veciOnto, vector<STermFound>& vecsTerms ) const { 00218 size_t i, j; 00219 vector<STermFound> vecsCur; 00220 SSortFind sSort; 00221 00222 veciOnto.resize( m_vecpOntologies.size( ) ); 00223 for( i = 0; i < m_vecpOntologies.size( ); ++i ) { 00224 vecsCur.clear( ); 00225 m_vecpOntologies[ i ]->TermFinder( Genes, vecsCur, fBonferroni, 00226 fSibs, fBackground, dP, GenesBkg.GetGenes( ) ? &GenesBkg : NULL ); 00227 sort( vecsCur.begin( ), vecsCur.end( ), sSort ); 00228 for( j = 0; j < vecsCur.size( ); ++j ) 00229 vecsTerms.push_back( vecsCur[ j ] ); 00230 veciOnto[ i ] = vecsTerms.size( ); } }