Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "parserconsole.h" 00024 00025 const char* CParserConsole::SArgs::c_aszFlags[] = 00026 { CParserConsole::c_szGenes, CParserConsole::c_szLong, CParserConsole::c_szSibs, 00027 CParserConsole::c_szZeroes, CParserConsole::c_szBonferroni, 00028 CParserConsole::c_szRecursive, CParserConsole::c_szBackground, NULL }; 00029 const char CParserConsole::c_szDotDotDot[] = "..."; 00030 const char CParserConsole::c_szBackground[] = "-k"; 00031 const char CParserConsole::c_szBonferroni[] = "-b"; 00032 const char CParserConsole::c_szGenes[] = "-g"; 00033 const char CParserConsole::c_szLong[] = "-l"; 00034 const char CParserConsole::c_szSibs[] = "-s"; 00035 const char CParserConsole::c_szZeroes[] = "-a"; 00036 const char CParserConsole::c_szRecursive[] = "-r"; 00037 const char CParserConsole::c_szStar[] = "*"; 00038 const char CParserConsole::c_szHelpHelp[] = "Commands:\n" 00039 "cat <gene>+ Displays information on individual genes.\n" 00040 "cd [path] Display or change current term.\n" 00041 "find <filename> [p] [bkg] Runs term finder on the given gene list.\n" 00042 "help [command] Provides help on command syntax.\n" 00043 "ls [path] List parents, children, and annotations.\n" 00044 "parentage <onto> <file> For terms in onto, list parents in the given set."; 00045 const CParserConsole::TPFnParser CParserConsole::c_apfnParsers[] = 00046 { &CParserConsole::ParseCat, &CParserConsole::ParseCd, &CParserConsole::ParseFind, 00047 &CParserConsole::ParseHelp, &CParserConsole::ParseLs, &CParserConsole::ParseParentage, NULL }; 00048 const char* CParserConsole::c_aszHelps[] = { 00049 "cat [-l] [-r] [path]<gene>+\n\n" 00050 "Displays the name, synonyms, and annotations for the given gene(s).\n" 00051 "Annotations are listed per ontology as available, with ontology term glosses\n" 00052 "abbreviated unless the -l flag is given. A * will list all genes annotated\n" 00053 "to the given location or, in combination with the -r flag, to it and its\n" 00054 "descendants.", 00055 "cd [path]\n\n" 00056 "With no argument, cd displays the current path - either an ontology term, an\n" 00057 "ontology name, or the root marker. When given a path, cd changes the current\n" 00058 "term to that path's target. As in DOS and Unix, paths can contain . and ..\n" 00059 "characters to indicate the current term or its parent. In nodes with\n" 00060 "multiple parents, the parent ID must be specified explicitly. / serves as\n" 00061 "the path separator and root marker.", 00062 "find <filename> [p=0.05] [bkg] [-l] [-b] [-g] [-a] [-s] [-k]\n\n" 00063 "Performs a hypergeometric test over each ontology using the given gene list.\n" 00064 "Only terms with probability less than p are displayed, with a default p-value\n" 00065 "of 0.05. The total number of possible genes is assumed to be the entire\n" 00066 "genome. If given, the second file is used as a background distribution. The\n" 00067 "other optional flags are:\n" 00068 "-l Long listings; deactivates term gloss abbreviation.\n" 00069 "-b Bonferroni correction; deactivates Bonferroni correction.\n" 00070 "-g Genes; display genes associated with each ontology term.\n" 00071 "-a All listings; display additional information for ontology terms.\n" 00072 "-s Siblings; deactivates child annotations during analysis.\n" 00073 "-k Background; uses whole genome background in place of ontology background.", 00074 CParserConsole::c_szHelpHelp, 00075 "ls [-l] [-a] [-g] [-s] [-r] [path]\n\n" 00076 "With no arguments, the ls command displays the parents, children, and gene\n" 00077 "annotations of the current term. Given a path, it displays the same\n" 00078 "information for that target instead. The four optional flags are:\n" 00079 "-l Long listings; deactivates term gloss abbreviation.\n" 00080 "-a All listings; includes terms with zero gene annotations.\n" 00081 "-g Genes; deactives gene listings.\n" 00082 "-s Siblings; deactivates parent and child listings.\n" 00083 "-r Recursive; descend into child nodes.", 00084 "parentage [-a] <ontology> <filename>\n\n" 00085 "Loads an ontology slim from the given filename. Then, for each term in\n" 00086 "the indicated ontology, outputs the zero or more parents of that term that\n" 00087 "fall within the given set. This \"bubbles up\" the ontology to the level\n" 00088 "given in the input slim file. Optional flags are:\n" 00089 "-a All listings; include terms with no parents in the slim.", 00090 NULL }; 00091 00092 CParserConsole::SArgs::SArgs( ) : m_fGenes(m_afFlags[ 0 ]), m_fLong(m_afFlags[ 1 ]), 00093 m_fSibs(m_afFlags[ 2 ]), m_fZeroes(m_afFlags[ 3 ]), m_fBonferroni(m_afFlags[ 4 ]), 00094 m_fRecursive(m_afFlags[ 5 ]), m_fBackground(m_afFlags[ 6 ]) { 00095 00096 m_fGenes = false; 00097 m_fLong = false; 00098 m_fSibs = true; 00099 m_fZeroes = false; 00100 m_fBonferroni = true; 00101 m_fRecursive = false; 00102 m_fBackground = false; } 00103 00104 bool CParserConsole::SArgs::Parse( const string& strArg ) { 00105 size_t i; 00106 00107 for( i = 0; SArgs::c_aszFlags[ i ]; ++i ) 00108 if( strArg == SArgs::c_aszFlags[ i ] ) { 00109 m_afFlags[ i ] = !m_afFlags[ i ]; 00110 return true; } 00111 00112 return false; } 00113 00114 void CParserConsole::PrintLink( const IOntology* pOnto, size_t iNode, char cType, 00115 const SArgs& sArgs ) { 00116 size_t iGenes, iCount; 00117 string strID, strGloss; 00118 00119 if( !( iCount = pOnto->GetGenes( iNode, true ) ) && !sArgs.m_fZeroes ) 00120 return; 00121 00122 cout << cType << ' ' << ( strID = pOnto->GetID( iNode ) ); 00123 PrintSpaces( c_iWidthID - strID.size( ) ); 00124 iGenes = pOnto->GetGenes( iNode ); 00125 PrintNumber( iGenes, c_iWidthGenes ); 00126 PrintNumber( iCount - iGenes, c_iWidthGenes ); 00127 PrintGloss( pOnto->GetGloss( iNode ), c_iWidthGloss, sArgs.m_fLong ); 00128 cout << endl; } 00129 00130 void CParserConsole::PrintNumber( size_t iNumber, size_t iWidth ) { 00131 size_t iUsed; 00132 00133 cout << (unsigned int)iNumber; 00134 iUsed = iNumber ? (size_t)log10( (float)iNumber ) : 0; 00135 PrintSpaces( iWidth - iUsed ); } 00136 00137 void CParserConsole::PrintSpaces( size_t iSpaces ) { 00138 size_t i; 00139 00140 for( i = 0; i < iSpaces; ++i ) 00141 cout << ' '; } 00142 00143 void CParserConsole::PrintAnnotation( const IOntology* pOnto, size_t iNode, 00144 const SArgs& sArgs, const STermFound* psFound ) { 00145 char szBuf[ 128 ]; 00146 string strID; 00147 size_t iWidth; 00148 00149 iWidth = c_iWidthGloss + c_iWidthGenes; 00150 cout << ( strID = pOnto->GetID( iNode ) ); 00151 PrintSpaces( c_iWidthID - strID.size( ) ); 00152 if( psFound ) { 00153 iWidth -= c_iWidthGenes; 00154 sprintf_s( szBuf, "%g", psFound->m_dP ); 00155 cout << szBuf; 00156 PrintSpaces( c_iWidthP - strlen( szBuf ) ); 00157 sprintf_s( szBuf, "%-4d %-4d %-4d %-4d ", psFound->m_iHitsTerm, psFound->m_iSizeTerm, 00158 psFound->m_iHitsTotal, psFound->m_iSizeTotal ); 00159 iWidth -= strlen( szBuf ); 00160 cout << szBuf; } 00161 PrintGloss( pOnto->GetGloss( iNode ), iWidth, sArgs.m_fLong ); 00162 cout << endl; } 00163 00164 void CParserConsole::PrintGloss( string strGloss, size_t iWidth, bool fLong ) { 00165 00166 if( ( strGloss.length( ) > iWidth ) && !fLong ) { 00167 strGloss.resize( iWidth ); 00168 strGloss += c_szDotDotDot; } 00169 cout << strGloss; } 00170 00171 void CParserConsole::PrintGene( const CGene& Gene, const SArgs& sArgs ) { 00172 size_t i, j; 00173 const IOntology* pOnto; 00174 00175 cout << Gene.GetName( ); 00176 if( Gene.GetSynonyms( ) ) { 00177 cout << " (" << Gene.GetSynonym( 0 ); 00178 for( i = 1; i < Gene.GetSynonyms( ); ++i ) 00179 cout << ',' << Gene.GetSynonym( i ); 00180 cout << ')'; } 00181 if( Gene.GetDubious( ) ) 00182 cout << " Dubious"; 00183 if( Gene.GetRNA( ) ) 00184 cout << " RNA"; 00185 cout << endl; 00186 if( Gene.GetGloss( ).length( ) ) 00187 cout << Gene.GetGloss( ) << endl; 00188 for( i = 0; i < Gene.GetOntologies( ); ++i ) { 00189 pOnto = Gene.GetOntology( i ); 00190 cout << pOnto->GetID( ) << ':' << endl; 00191 PrintAnnotation( pOnto, Gene.GetAnnotation( i, 0 ), sArgs ); 00192 for( j = 1; j < Gene.GetAnnotations( i ); ++j ) 00193 PrintAnnotation( pOnto, Gene.GetAnnotation( i, j ), sArgs ); } } 00194 00195 void CParserConsole::PrintGenes( const vector<const CGene*>& vecpGenes, size_t iWidth, const CGenes* pGenes ) { 00196 size_t i, iCol, iCols, iSpaces; 00197 vector<string> vecstrGenes; 00198 00199 iSpaces = 1; 00200 i = FormatGenes( vecpGenes, vecstrGenes, pGenes ); 00201 if( !iWidth ) 00202 iWidth = i; 00203 iCols = ( iWidth >= c_iWidthScreen ) ? 1 : ( c_iWidthScreen / iWidth ); 00204 for( iCol = i = 0; i < vecpGenes.size( ); ++i,iCol %= iCols ) { 00205 PrintSpaces( iSpaces ); 00206 cout << vecstrGenes[ i ]; 00207 if( ++iCol == iCols ) { 00208 iSpaces = 1; 00209 cout << endl; } 00210 else 00211 iSpaces = iWidth - vecstrGenes[ i ].length( ); } 00212 if( iCol ) 00213 cout << endl; } 00214 00215 size_t CParserConsole::FormatGenes( const vector<const CGene*>& vecpGenes, 00216 vector<string>& vecstrGenes, const CGenes* pGenes ) { 00217 size_t i, j, iRet; 00218 00219 vecstrGenes.resize( vecpGenes.size( ) ); 00220 for( iRet = i = 0; i < vecpGenes.size( ); ++i ) { 00221 vecstrGenes[ i ] = ( pGenes && pGenes->IsGene( vecpGenes[ i ]->GetName( ) ) ) ? "*" : ""; 00222 vecstrGenes[ i ] += vecpGenes[ i ]->GetName( ); 00223 if( vecpGenes[ i ]->GetSynonyms( ) ) { 00224 vecstrGenes[ i ] += "(" + vecpGenes[ i ]->GetSynonym( 0 ); 00225 for( j = 1; j < vecpGenes[ i ]->GetSynonyms( ); ++j ) 00226 vecstrGenes[ i ] += "," + vecpGenes[ i ]->GetSynonym( j ); 00227 vecstrGenes[ i ] += ")"; } 00228 if( vecpGenes[ i ]->GetRNA( ) ) 00229 vecstrGenes[ i ] += "'"; 00230 if( vecpGenes[ i ]->GetDubious( ) ) 00231 vecstrGenes[ i ] += "!"; 00232 if( vecstrGenes[ i ].length( ) > iRet ) 00233 iRet = vecstrGenes[ i ].length( ); } 00234 sort( vecstrGenes.begin( ), vecstrGenes.end( ) ); 00235 00236 return ++iRet; } 00237 00238 CParserConsole::CParserConsole( const IOntology** apOntologies, const CGenome& Genome ) : 00239 CParser( apOntologies, Genome ) { 00240 00241 m_sLocation.m_pOnto = NULL; 00242 m_sLocation.m_iNode = -1; } 00243 00244 CParser::SLocation CParserConsole::GetLocation( const string& strLoc, bool fLast ) const { 00245 00246 return CParser::GetLocation( m_vecpOntologies, strLoc, fLast, &m_sLocation ); } 00247 00248 bool CParserConsole::ProcessLine( const char* szLine ) { 00249 vector<string> vecstrLine; 00250 size_t i; 00251 string strLine; 00252 const char* pcPrev; 00253 const char* pcNext; 00254 00255 if( !szLine ) 00256 return false; 00257 00258 for( pcPrev = szLine; pcPrev && *pcPrev; pcPrev = pcNext ) { 00259 vecstrLine.clear( ); 00260 if( pcNext = strchr( pcPrev, c_cSemicolon ) ) 00261 strLine.assign( pcPrev, pcNext++ - pcPrev ); 00262 else 00263 strLine.assign( pcPrev ); 00264 CMeta::Tokenize( strLine.c_str( ), vecstrLine, CMeta::c_szWS, true ); 00265 if( vecstrLine.empty( ) ) 00266 continue; 00267 if( vecstrLine[ 0 ][ 0 ] == c_cShell ) { 00268 if( !ParseShell( strLine ) ) 00269 return false; 00270 continue; } 00271 for( i = 0; c_aszParsers[ i ]; ++i ) 00272 if( !strcmp( vecstrLine[ 0 ].c_str( ), c_aszParsers[ i ] ) ) 00273 break; 00274 if( !c_aszParsers[ i ] ) { 00275 cout << "Unknown command: " << strLine << endl; 00276 return false; } 00277 if( !(this->*c_apfnParsers[ i ])( vecstrLine ) ) 00278 return false; } 00279 00280 return true; } 00281 00282 bool CParserConsole::ParseCat( const vector<string>& vecstrLine ) { 00283 size_t i, j; 00284 vector<string> vecstrGenes; 00285 SArgs sArgs; 00286 string strGene, strPath; 00287 SLocation sLoc; 00288 vector<SLocation> vecVisited; 00289 00290 for( i = 1; i < vecstrLine.size( ); ++i ) 00291 if( !sArgs.Parse( vecstrLine[ i ] ) ) 00292 vecstrGenes.push_back( vecstrLine[ i ] ); 00293 if( !vecstrGenes.size( ) ) { 00294 cout << "Cat, no genes given" << endl; 00295 return false; } 00296 00297 for( i = 0; i < vecstrGenes.size( ); ++i ) { 00298 strPath.clear( ); 00299 strGene = vecstrGenes[ i ]; 00300 if( ( j = strGene.rfind( c_cSep ) ) != -1 ) { 00301 strPath = strGene.substr( 0, j ); 00302 strGene = strGene.substr( j + 1 ); } 00303 if( strGene == c_szStar ) { 00304 if( !Recurse( GetLocation( strPath ), sArgs.m_fRecursive, sArgs.m_fZeroes, 00305 vecVisited ) ) { 00306 cout << "cat, illegal location: " << strPath << endl; 00307 return false; } 00308 PrintGenes( vecVisited, sArgs ); } 00309 else if( ( j = m_Genome.GetGene( strGene ) ) == -1 ) 00310 cout << "cat, unknown gene: " << strGene << endl; 00311 else 00312 PrintGene( m_Genome.GetGene( j ), sArgs ); } 00313 00314 return true; } 00315 00316 void CParserConsole::PrintGenes( const vector<SLocation>& vecVisited, 00317 const SArgs& sArgs ) const { 00318 TSetPGenes setpGenes; 00319 TSetPGenes::const_iterator iterGene; 00320 00321 CParser::CollectGenes( vecVisited, setpGenes ); 00322 for( iterGene = setpGenes.begin( ); iterGene != setpGenes.end( ); ++iterGene ) 00323 PrintGene( **iterGene, sArgs ); } 00324 00325 bool CParserConsole::ParseCd( const vector<string>& vecstrLine ) { 00326 SLocation sLoc; 00327 00328 if( vecstrLine.size( ) < 2 ) { 00329 cout << m_sLocation.ToString( true ) << endl; 00330 return true; } 00331 00332 sLoc = GetLocation( vecstrLine[ 1 ] ); 00333 if( !sLoc.IsValid( ) ) { 00334 cout << "cd, illegal location: " << vecstrLine[ 1 ] << endl; 00335 return false; } 00336 m_sLocation = sLoc; 00337 00338 return true; } 00339 00340 bool CParserConsole::ParseFind( const vector<string>& vecstrLine ) { 00341 CGenes Genes( (CGenome&)m_Genome ), GenesBkg( (CGenome&)m_Genome ); 00342 ifstream ifsm; 00343 size_t i, j, k, l, iWidth, iTotal; 00344 vector<STermFound> vecsTerms; 00345 vector<size_t> veciOnto; 00346 string strFile, strP, strBkg; 00347 SArgs sArgs; 00348 const IOntology* pOnto; 00349 vector<const CGene*> vecpGenes; 00350 vector<string> vecstrGenes; 00351 float dP = 0.05f; 00352 00353 if( vecstrLine.size( ) < 2 ) 00354 return false; 00355 for( i = 1; i < vecstrLine.size( ); ++i ) { 00356 if( sArgs.Parse( vecstrLine[ i ] ) ) 00357 continue; 00358 if( !strFile.length( ) ) 00359 strFile = vecstrLine[ i ]; 00360 else if( !strP.length( ) ) 00361 strP = vecstrLine[ i ]; 00362 else if( !strBkg.length( ) ) 00363 strBkg = vecstrLine[ i ]; } 00364 ifsm.open( strFile.c_str( ) ); 00365 if( !( ifsm.is_open( ) && Genes.Open( ifsm, false ) ) ) { 00366 cout << "find, can't open file: " << strFile << endl; 00367 return false; } 00368 ifsm.close( ); 00369 if( strP.length( ) ) 00370 dP = (float)atof( strP.c_str( ) ); 00371 if( strBkg.length( ) ) { 00372 ifsm.clear( ); 00373 ifsm.open( strBkg.c_str( ) ); 00374 if( !( ifsm.is_open( ) && GenesBkg.Open( ifsm ) ) ) { 00375 cout << "find, can't open background: " << strBkg << endl; 00376 return false; } 00377 ifsm.close( ); } 00378 00379 CParser::TermFinder( Genes, dP, GenesBkg, sArgs.m_fBonferroni, sArgs.m_fSibs, sArgs.m_fBackground, 00380 veciOnto, vecsTerms ); 00381 00382 for( i = j = 0; i < m_vecpOntologies.size( ); ++i ) { 00383 pOnto = m_vecpOntologies[ i ]; 00384 if( j >= veciOnto[ i ] ) 00385 continue; 00386 cout << pOnto->GetID( ) << ':' << endl; 00387 00388 l = j; 00389 if( !sArgs.m_fGenes ) { 00390 vecpGenes.clear( ); 00391 for( ; j < veciOnto[ i ]; ++j ) { 00392 iTotal = pOnto->GetGenes( vecsTerms[ j ].m_iID, sArgs.m_fSibs ); 00393 if( iTotal <= c_iSizeCutoff ) 00394 for( k = 0; k < pOnto->GetGenes( vecsTerms[ j ].m_iID, sArgs.m_fSibs ); ++k ) 00395 vecpGenes.push_back( &pOnto->GetGene( vecsTerms[ j ].m_iID, k ) ); 00396 else 00397 for( k = 0; k < Genes.GetGenes( ); ++k ) 00398 if( pOnto->IsAnnotated( vecsTerms[ j ].m_iID, Genes.GetGene( k ) ) ) 00399 vecpGenes.push_back( &Genes.GetGene( k ) ); } 00400 vecstrGenes.clear( ); 00401 iWidth = FormatGenes( vecpGenes, vecstrGenes, &Genes ); } 00402 00403 for( j = l; j < veciOnto[ i ]; ++j ) { 00404 PrintAnnotation( pOnto, vecsTerms[ j ].m_iID, sArgs, &vecsTerms[ j ] ); 00405 if( !sArgs.m_fGenes ) { 00406 vecpGenes.clear( ); 00407 iTotal = pOnto->GetGenes( vecsTerms[ j ].m_iID, sArgs.m_fSibs ); 00408 if( iTotal <= c_iSizeCutoff ) 00409 for( k = 0; k < pOnto->GetGenes( vecsTerms[ j ].m_iID, sArgs.m_fSibs ); ++k ) 00410 vecpGenes.push_back( &pOnto->GetGene( vecsTerms[ j ].m_iID, k ) ); 00411 else 00412 for( k = 0; k < Genes.GetGenes( ); ++k ) 00413 if( pOnto->IsAnnotated( vecsTerms[ j ].m_iID, Genes.GetGene( k ), 00414 sArgs.m_fSibs ) ) 00415 vecpGenes.push_back( &Genes.GetGene( k ) ); 00416 PrintGenes( vecpGenes, iWidth, &Genes ); } } } 00417 00418 return true; } 00419 00420 bool CParserConsole::ParseHelp( const vector<string>& vecstrLine ) { 00421 size_t i; 00422 00423 if( vecstrLine.size( ) > 1 ) 00424 for( i = 0; c_aszParsers[ i ]; ++i ) 00425 if( vecstrLine[ 1 ] == c_aszParsers[ i ] ) { 00426 cout << c_aszHelps[ i ] << endl; 00427 return true; } 00428 00429 cout << c_szHelpHelp << endl; 00430 00431 return true; } 00432 00433 bool CParserConsole::ParseLs( const vector<string>& vecstrLine ) { 00434 SLocation sLoc; 00435 size_t i; 00436 string strLoc; 00437 SArgs sArgs; 00438 vector<SLocation> vecVisited; 00439 00440 for( i = 1; i < vecstrLine.size( ); ++i ) 00441 if( !( sArgs.Parse( vecstrLine[ i ] ) || strLoc.size( ) ) ) 00442 strLoc = vecstrLine[ i ]; 00443 sLoc = strLoc.size( ) ? GetLocation( strLoc ) : m_sLocation; 00444 if( !Recurse( sLoc, sArgs.m_fRecursive, sArgs.m_fZeroes, vecVisited ) ) { 00445 cout << "ls, illegal location: " << strLoc << endl; 00446 return false; } 00447 00448 PrintLocations( vecVisited, sArgs ); 00449 return true; } 00450 00451 void CParserConsole::PrintLocations( const vector<SLocation>& vecVisited, 00452 const SArgs& sArgs ) const { 00453 const IOntology* pOnto; 00454 size_t i, j; 00455 vector<const CGene*> vecpGenes; 00456 string strLoc; 00457 00458 for( i = 0; i < vecVisited.size( ); ++i ) { 00459 const SLocation& sLoc = vecVisited[ i ]; 00460 00461 if( pOnto = sLoc.m_pOnto ) { 00462 if( sLoc.m_iNode == -1 ) { 00463 PrintOntology( pOnto, '-' ); 00464 if( sArgs.m_fSibs ) { 00465 for( j = 0; j < pOnto->GetNodes( ); ++j ) 00466 if( !pOnto->GetParents( j ) ) 00467 PrintLink( pOnto, j, 'C', sArgs ); } } 00468 else { 00469 PrintLink( pOnto, sLoc.m_iNode, '-', sArgs ); 00470 if( sArgs.m_fSibs ) { 00471 for( j = 0; j < pOnto->GetParents( sLoc.m_iNode ); ++j ) 00472 PrintLink( pOnto, pOnto->GetParent( sLoc.m_iNode, j ), 'P', sArgs ); 00473 for( j = 0; j < pOnto->GetChildren( sLoc.m_iNode ); ++j ) 00474 PrintLink( pOnto, pOnto->GetChild( sLoc.m_iNode, j ), 'C', sArgs ); } 00475 if( sArgs.m_fGenes ) { 00476 if( sArgs.m_fSibs ) 00477 vecpGenes.clear( ); 00478 for( j = 0; j < pOnto->GetGenes( sLoc.m_iNode ); ++j ) 00479 vecpGenes.push_back( &pOnto->GetGene( sLoc.m_iNode, j ) ); 00480 if( sArgs.m_fSibs ) 00481 PrintGenes( vecpGenes ); } } } 00482 else { 00483 PrintOntology( NULL, '-' ); 00484 if( sArgs.m_fSibs ) 00485 for( j = 0; j < m_vecpOntologies.size( ); ++j ) 00486 PrintOntology( m_vecpOntologies[ j ], 'O' ); } } 00487 00488 if( sArgs.m_fGenes && !sArgs.m_fSibs ) { 00489 set<const CGene*> setpGenes; 00490 00491 for( i = 0; i < vecpGenes.size( ); ++i ) 00492 setpGenes.insert( vecpGenes[ i ] ); 00493 vecpGenes.resize( setpGenes.size( ) ); 00494 copy( setpGenes.begin( ), setpGenes.end( ), vecpGenes.begin( ) ); 00495 PrintGenes( vecpGenes ); } } 00496 00497 void CParserConsole::PrintOntology( const IOntology* pOnto, char cType ) const { 00498 string strLoc; 00499 00500 strLoc = pOnto ? pOnto->GetID( ) : "ROOT"; 00501 cout << cType << ' ' << strLoc; 00502 PrintSpaces( c_iWidthOnto - strLoc.length( ) ); 00503 if( pOnto ) 00504 cout << (unsigned int)m_Genome.CountGenes( pOnto ); 00505 cout << endl; } 00506 00507 bool CParserConsole::ParseShell( const string& strCmd ) const { 00508 size_t i; 00509 00510 i = strCmd.find( c_cShell ); 00511 system( strCmd.substr( i + 1 ).c_str( ) ); 00512 return true; } 00513 00514 bool CParserConsole::ParseParentage( const vector<string>& vecstrLine ) { 00515 string strOnto, strFile; 00516 CSlim Slim; 00517 const IOntology* pOnto; 00518 size_t i, j; 00519 ifstream ifsm; 00520 SArgs sArgs; 00521 vector<bool> vecfTerms; 00522 00523 if( vecstrLine.size( ) < 2 ) 00524 return false; 00525 for( i = 1; i < vecstrLine.size( ); ++i ) { 00526 if( sArgs.Parse( vecstrLine[ i ] ) ) 00527 continue; 00528 if( strOnto.empty( ) ) 00529 strOnto = vecstrLine[ i ]; 00530 else if( strFile.empty( ) ) 00531 strFile = vecstrLine[ i ]; } 00532 00533 pOnto = NULL; 00534 for( i = 0; i < m_vecpOntologies.size( ); ++i ) 00535 if( strOnto == m_vecpOntologies[ i ]->GetID( ) ) { 00536 pOnto = m_vecpOntologies[ i ]; 00537 break; } 00538 if( !pOnto ) { 00539 cout << "parentage, can't find ontology: " << strOnto << endl; 00540 return false; } 00541 00542 ifsm.open( strFile.c_str( ) ); 00543 if( !( ifsm.is_open( ) && Slim.Open( ifsm, pOnto ) ) ) { 00544 cout << "parentage, can't open file: " << strFile << endl; 00545 return false; } 00546 ifsm.close( ); 00547 00548 vecfTerms.resize( pOnto->GetNodes( ) ); 00549 for( i = 0; i < Slim.GetSlims( ); ++i ) 00550 for( j = 0; j < Slim.GetNodes( i ); ++j ) 00551 vecfTerms[ Slim.GetNode( i, j ) ] = true; 00552 for( i = 0; i < pOnto->GetNodes( ); ++i ) { 00553 set<size_t> setiParents; 00554 set<size_t>::const_iterator iterParent; 00555 vector<size_t> veciIntersection; 00556 00557 if( vecfTerms[ i ] ) 00558 veciIntersection.push_back( i ); 00559 pOnto->GetParents( i, setiParents ); 00560 for( iterParent = setiParents.begin( ); iterParent != setiParents.end( ); ++iterParent ) 00561 if( vecfTerms[ *iterParent ] ) 00562 veciIntersection.push_back( *iterParent ); 00563 if( veciIntersection.empty( ) && !sArgs.m_fZeroes ) 00564 continue; 00565 cout << pOnto->GetID( i ); 00566 for( j = 0; j < veciIntersection.size( ); ++j ) 00567 cout << '\t' << pOnto->GetID( veciIntersection[ j ] ); 00568 cout << endl; } 00569 00570 return true; }