Sleipnir
tools/OntoShell/parser.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "parser.h"
00024 
00025 const char  CParser::SLocation::c_szRoot[]  = "/";
00026 const char  CParser::c_szDot[]              = ".";
00027 const char  CParser::c_szDotDot[]           = "..";
00028 const char* CParser::c_aszParsers[]         = { "cat", "cd", "find", "help", "ls", "parentage", NULL };
00029 
00030 CParser::SLocation::SLocation( ) : m_pOnto(NULL), m_iNode(-1) { }
00031 
00032 string CParser::SLocation::ToString( bool fGloss ) const {
00033 
00034     return ( m_pOnto ? ( ( m_iNode == -1 ) ? m_pOnto->GetID( ) :
00035         m_pOnto->GetID( m_iNode ) + ( fGloss ? '\t' + m_pOnto->GetGloss( m_iNode ) : "" ) ) :
00036         c_szRoot ); }
00037 
00038 bool CParser::SLocation::IsValid( ) const {
00039 
00040     return ( m_pOnto || ( m_iNode == -1 ) ); }
00041 
00042 void CParser::SLocation::Invalidate( ) {
00043 
00044     m_pOnto = NULL;
00045     m_iNode = 0; }
00046 
00047 bool CParser::SLocation::operator==( const SLocation& sLoc ) const {
00048 
00049     return ( ( m_pOnto == sLoc.m_pOnto ) && ( m_iNode == sLoc.m_iNode ) ); }
00050 
00051 const char* CParser::GetCommand( size_t iCommand ) {
00052 
00053     return c_aszParsers[ iCommand ]; }
00054 
00055 bool CParser::IsRooted( const string& strLoc ) {
00056 
00057     return ( strLoc.length( ) && ( strLoc[ 0 ] == c_cSep ) ); }
00058 
00059 bool CParser::SplitLocation( const string& strLoc, vector<string>& vecstrPath ) {
00060     size_t  i;
00061     string  strCur;
00062 
00063     if( !strLoc.size( ) )
00064         return true;
00065 
00066     i = 0;
00067     while( true ) {
00068         strCur.clear( );
00069         for( ; ( i < strLoc.size( ) ) && ( strLoc[ i ] == c_cSep ); ++i );
00070         if( i >= strLoc.size( ) )
00071             break;
00072         for( ; ( i < strLoc.size( ) ) && ( strLoc[ i ] != c_cSep ); ++i )
00073             strCur += strLoc[ i ];
00074         vecstrPath.push_back( strCur ); }
00075 
00076     return true; }
00077 
00078 CParser::SLocation CParser::GetLocation( const vector<const IOntology*>& vecpOntologies,
00079     const string& strLoc, bool fLast, const SLocation* psLoc ) {
00080     SLocation       sRet;
00081     vector<string>  vecstrPath;
00082     size_t          i;
00083 
00084     if( !IsRooted( strLoc ) && psLoc )
00085         sRet = *psLoc;
00086     if( !SplitLocation( strLoc, vecstrPath ) ) {
00087         sRet.Invalidate( );
00088         return sRet; }
00089 
00090     if( !fLast ) {
00091         if( vecstrPath.empty( ) )
00092             return sRet;
00093         if( strLoc[ strLoc.size( ) - 1 ] != c_cSep )
00094             vecstrPath.resize( vecstrPath.size( ) - 1 ); }
00095     for( i = 0; i < vecstrPath.size( ); ++i )
00096         if( !MoveLocation( sRet, vecstrPath[ i ], vecpOntologies ) ) {
00097             sRet.Invalidate( );
00098             break; }
00099 
00100     return sRet; }
00101 
00102 bool CParser::MoveLocation( SLocation& sLoc, const string& strPath,
00103     const vector<const IOntology*>& vecpOntologies ) {
00104     size_t  i, iNode;
00105 
00106     if( strPath == c_szDot )
00107         return true;
00108 
00109     if( sLoc.m_pOnto ) {
00110         if( strPath == c_szDotDot ) {
00111             if( sLoc.m_iNode == -1 ) {
00112                 sLoc.m_pOnto = NULL;
00113                 return true; }
00114             switch( sLoc.m_pOnto->GetParents( sLoc.m_iNode ) ) {
00115                 case 0:
00116                     sLoc.m_iNode = -1;
00117                     return true;
00118 
00119                 case 1:
00120                     sLoc.m_iNode = sLoc.m_pOnto->GetParent( sLoc.m_iNode, 0 );
00121                     return true; } }
00122         else if( ( iNode = sLoc.m_pOnto->GetNode( strPath ) ) != -1 ) {
00123             sLoc.m_iNode = iNode;
00124             return true; } }
00125     else
00126         for( i = 0; i < vecpOntologies.size( ); ++i )
00127             if( strPath == vecpOntologies[ i ]->GetID( ) ) {
00128                 sLoc.m_pOnto = vecpOntologies[ i ];
00129                 sLoc.m_iNode = -1;
00130                 return true; }
00131 
00132     return false; }
00133 
00134 void CParser::CollectGenes( const vector<SLocation>& vecLocations,
00135     TSetPGenes& setpGenes ) {
00136     size_t  i, j;
00137 
00138     for( i = 0; i < vecLocations.size( ); ++i ) {
00139         const SLocation&    sLoc    = vecLocations[ i ];
00140 
00141         if( sLoc.m_iNode != -1 )
00142             for( j = 0; j < sLoc.m_pOnto->GetGenes( sLoc.m_iNode ); ++j )
00143                 setpGenes.insert( &sLoc.m_pOnto->GetGene( sLoc.m_iNode, j ) ); } }
00144 
00145 CParser::CParser( const IOntology** apOntologies, const CGenome& Genome ) :
00146     m_Genome(Genome) {
00147     size_t  i;
00148 
00149     if( apOntologies )
00150         for( i = 0; apOntologies[ i ]; ++i )
00151             m_vecpOntologies.push_back( apOntologies[ i ] ); }
00152 
00153 size_t CParser::GetOntologies( ) const {
00154 
00155     return m_vecpOntologies.size( ); }
00156 
00157 const IOntology* CParser::GetOntology( size_t iOnto ) const {
00158 
00159     return m_vecpOntologies[ iOnto ]; }
00160 
00161 const CGenome& CParser::GetGenome( ) const {
00162 
00163     return m_Genome; }
00164 
00165 bool CParser::Recurse( SLocation sLoc, bool fRecursive, bool fZeroes,
00166     vector<SLocation>& vecVisited ) const {
00167     const IOntology*    pOnto;
00168     size_t              i, j;
00169     bool                fOK;
00170 
00171     if( !sLoc.IsValid( ) )
00172         return false;
00173     if( !fZeroes ) {
00174         fOK = false;
00175         if( sLoc.m_pOnto )
00176             fOK = !!( ( sLoc.m_iNode == -1 ) ? m_Genome.CountGenes( sLoc.m_pOnto ) :
00177                 sLoc.m_pOnto->GetGenes( sLoc.m_iNode, true ) );
00178         else
00179             for( i = 0; i < m_vecpOntologies.size( ); ++i )
00180                 if( m_Genome.CountGenes( m_vecpOntologies[ i ] ) ) {
00181                     fOK = true;
00182                     break; }
00183         if( !fOK )
00184             return true; }
00185     for( i = 0; i < vecVisited.size( ); ++i )
00186         if( vecVisited[ i ] == sLoc )
00187             return true;
00188     vecVisited.push_back( sLoc );
00189     if( !fRecursive )
00190         return true;
00191 
00192     if( pOnto = sLoc.m_pOnto ) {
00193         if( sLoc.m_iNode == -1 ) {
00194             for( i = 0; i < pOnto->GetNodes( ); ++i )
00195                 if( !pOnto->GetParents( i ) ) {
00196                     sLoc.m_iNode = i;
00197                     Recurse( sLoc, fRecursive, fZeroes, vecVisited ); } }
00198         else {
00199             j = sLoc.m_iNode;
00200             for( i = 0; i < pOnto->GetChildren( j ); ++i ) {
00201                 sLoc.m_iNode = pOnto->GetChild( j, i );
00202                 Recurse( sLoc, fRecursive, fZeroes, vecVisited ); } } }
00203     else
00204         for( i = 0; i < m_vecpOntologies.size( ); ++i ) {
00205             sLoc.m_pOnto = m_vecpOntologies[ i ];
00206             Recurse( sLoc, fRecursive, fZeroes, vecVisited ); }
00207 
00208     return true; }
00209 
00210 struct SSortFind {
00211     bool operator()( const STermFound& sOne, const STermFound& sTwo ) {
00212 
00213         return ( sOne.m_dP < sTwo.m_dP ); }
00214 };
00215 
00216 void CParser::TermFinder( const CGenes& Genes, float dP, const CGenes& GenesBkg, bool fBonferroni, bool fSibs,
00217     bool fBackground, vector<size_t>& veciOnto, vector<STermFound>& vecsTerms ) const {
00218     size_t              i, j;
00219     vector<STermFound>  vecsCur;
00220     SSortFind           sSort;
00221 
00222     veciOnto.resize( m_vecpOntologies.size( ) );
00223     for( i = 0; i < m_vecpOntologies.size( ); ++i ) {
00224         vecsCur.clear( );
00225         m_vecpOntologies[ i ]->TermFinder( Genes, vecsCur, fBonferroni,
00226             fSibs, fBackground, dP, GenesBkg.GetGenes( ) ? &GenesBkg : NULL );
00227         sort( vecsCur.begin( ), vecsCur.end( ), sSort );
00228         for( j = 0; j < vecsCur.size( ); ++j )
00229             vecsTerms.push_back( vecsCur[ j ] );
00230         veciOnto[ i ] = vecsTerms.size( ); } }