Sleipnir
tools/OntoShell/OntoShell.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 #include "parserconsole.h"
00025 
00026 static const char   c_szCat[]       = "cat";
00027 static const char   c_szFind[]      = "find";
00028 static const char   c_szPrompt[]    = "> ";
00029 
00030 static const CParserConsole*    g_pParser;
00031 static bool                     g_fGenes;
00032 static bool                     g_fZeroes;
00033 
00034 bool ProcessLine( const char* );
00035 char** CompletionAll( const char*, int, int );
00036 char* CompletionCommands( const char*, int );
00037 char* CompletionMembers( const char*, int );
00038 size_t CompletionGetParents( const CParser::SLocation&, vector<string>& );
00039 size_t CompletionGetKids( const CParser::SLocation&, vector<string>& );
00040 size_t CompletionGetGenes( const CParser::SLocation&, vector<string>& );
00041 
00042 int main( int iArgs, char** aszArgs ) {
00043     COntologyKEGG           KEGG;
00044     COntologyOBO                GOBP, GOMF, GOCC;
00045     COntologyMIPS           MIPS;
00046     COntologyMIPSPhenotypes MIPSPhen;
00047     CGenome                 Genome;
00048     ifstream                ifsmOnto, ifsmGenes;
00049     gengetopt_args_info     sArgs;
00050     char*                   szLine;
00051     int                     iRet;
00052     const IOntology*        apOntologies[]
00053         = { &KEGG, &GOBP, &GOMF, &GOCC, &MIPS, &MIPSPhen, NULL };
00054     CParserConsole          Parser( apOntologies, Genome );
00055 
00056     g_pParser = &Parser;
00057     iRet = cmdline_parser2( iArgs, aszArgs, &sArgs, 0, 1, 0 );
00058     if( sArgs.config_arg )
00059         iRet = cmdline_parser_configfile( sArgs.config_arg, &sArgs, 0, 0, 1 ) && iRet;
00060     if( iRet ) {
00061         cmdline_parser_print_help( );
00062         return iRet; }
00063     g_fZeroes = !!sArgs.zeroes_flag;
00064     CMeta Meta( sArgs.verbosity_arg );
00065 
00066     if( sArgs.features_arg ) {
00067         ifsmGenes.open( sArgs.features_arg );
00068         if( !Genome.Open( ifsmGenes ) ) {
00069             cerr << "Could not open: " << sArgs.features_arg << endl;
00070             return 1; }
00071         ifsmGenes.close( ); }
00072 
00073     if( sArgs.kegg_arg ) {
00074         ifsmOnto.open( sArgs.kegg_arg );
00075         if( !KEGG.Open( ifsmOnto, Genome, sArgs.kegg_org_arg, !!sArgs.altids_flag ) ) {
00076             cerr << "Could not open: " << sArgs.kegg_arg << endl;
00077             return 1; }
00078         ifsmOnto.close( ); }
00079 
00080     if( sArgs.go_onto_arg ) {
00081         ifsmOnto.clear( );
00082         ifsmOnto.open( sArgs.go_onto_arg );
00083         if( sArgs.go_anno_arg ) {
00084             ifsmGenes.clear( );
00085             ifsmGenes.open( sArgs.go_anno_arg ); }
00086         if( !COntologyOBO::Open( ifsmOnto, ifsmGenes, Genome, GOBP, GOMF, GOCC, !!sArgs.dbids_flag,
00087             !!sArgs.altids_flag ) ) {
00088             cerr << "Could not open: " << sArgs.go_onto_arg << ", " << sArgs.go_anno_arg << endl;
00089             return 1; }
00090         ifsmOnto.close( );
00091         if( sArgs.go_anno_arg )
00092             ifsmGenes.close( ); }
00093 
00094     if( sArgs.mips_onto_arg ) {
00095         ifsmOnto.clear( );
00096         ifsmOnto.open( sArgs.mips_onto_arg );
00097         if( sArgs.mips_anno_arg ) {
00098             ifsmGenes.clear( );
00099             ifsmGenes.open( sArgs.mips_anno_arg ); }
00100         if( !MIPS.Open( ifsmOnto, ifsmGenes, Genome ) ) {
00101             cerr << "Could not open: " << sArgs.mips_onto_arg << ", " << sArgs.mips_anno_arg <<
00102                 endl;
00103             return 1; }
00104         ifsmOnto.close( );
00105         if( sArgs.mips_anno_arg )
00106             ifsmGenes.close( ); }
00107 
00108     if( sArgs.mipsp_onto_arg ) {
00109         ifsmOnto.clear( );
00110         ifsmOnto.open( sArgs.mipsp_onto_arg );
00111         if( sArgs.mipsp_anno_arg ) {
00112             ifsmGenes.clear( );
00113             ifsmGenes.open( sArgs.mipsp_anno_arg ); }
00114         if( !MIPSPhen.Open( ifsmOnto, ifsmGenes, Genome ) ) {
00115             cerr << "Could not open: " << sArgs.mipsp_onto_arg << ", " <<
00116                 sArgs.mipsp_anno_arg << endl;
00117             return 1; }
00118         ifsmOnto.close( );
00119         if( sArgs.mipsp_anno_arg )
00120             ifsmGenes.close( ); }
00121 
00122     if( sArgs.exec_arg )
00123         Parser.ProcessLine( sArgs.exec_arg );
00124     else {
00125         rl_attempted_completion_function = CompletionAll;
00126         do {
00127             if( !( szLine = readline( ( Parser.GetLocation( ).ToString( false ) +
00128                 "> " ).c_str( ) ) ) )
00129                 break;
00130             add_history( szLine );
00131             Parser.ProcessLine( szLine );
00132             free( szLine ); }
00133         while( true ); }
00134 
00135     return 0; }
00136 
00137 char** CompletionAll( const char* szText, int iStart, int iEnd ) {
00138 
00139     if( !iStart )
00140         return rl_completion_matches( szText, CompletionCommands );
00141     if( !strncmp( rl_line_buffer, c_szFind, strlen( c_szFind ) ) )
00142         return NULL; // rl_completion_matches( szText, rl_filename_completion_function );
00143 
00144     g_fGenes = !strncmp( rl_line_buffer, c_szCat, strlen( c_szCat ) );
00145     return rl_completion_matches( szText, CompletionMembers ); }
00146 
00147 char* CompletionCommands( const char* szText, int iState ) {
00148     static size_t   iTry, iLen;
00149     const char* szCur;
00150 
00151     if( !iState ) {
00152         rl_completion_append_character = ' ';
00153         iTry = 0;
00154         iLen = strlen( szText ); }
00155 
00156     while( true ) {
00157         if( !( szCur = CParser::GetCommand( iTry++ ) ) )
00158             break;
00159         if( !strncmp( szCur, szText, iLen ) )
00160             return _strdup( szCur ); }
00161 
00162     return NULL; }
00163 
00164 char* CompletionMembers( const char* szText, int iState ) {
00165     static size_t               iTry, iLen, iLinks;
00166     static CParser::SLocation   sLoc;
00167     static const char*          szReal;
00168     static vector<string>       vecstrTries;
00169     static string               strBase;
00170 
00171     if( !iState ) {
00172         iTry = 0;
00173         vecstrTries.clear( );
00174         sLoc = g_pParser->GetLocation( szText, false );
00175         iLinks = CompletionGetParents( sLoc, vecstrTries );
00176         iLinks += CompletionGetKids( sLoc, vecstrTries );
00177         if( g_fGenes )
00178             CompletionGetGenes( sLoc, vecstrTries );
00179         strBase = szText;
00180         if( szReal = strrchr( szText, CParser::c_cSep ) )
00181             szReal++;
00182         else
00183             szReal = szText;
00184         strBase.resize( szReal - szText );
00185         iLen = strlen( szReal ); }
00186     if( !sLoc.IsValid( ) )
00187         return NULL;
00188 
00189     while( iTry < vecstrTries.size( ) )
00190         if( !strncmp( szReal, vecstrTries[ iTry++ ].c_str( ), iLen ) ) {
00191             rl_completion_append_character = ( iTry <= iLinks ) ? '/' : ' ';
00192             return _strdup( ( strBase + vecstrTries[ iTry - 1 ] ).c_str( ) ); }
00193 
00194     return NULL; }
00195 
00196 size_t CompletionGetParents( const CParser::SLocation& sLoc,
00197     vector<string>& vecstrParents ) {
00198     const IOntology*    pOnto;
00199     size_t              i, iSize;
00200 
00201     if( !( pOnto = sLoc.m_pOnto ) || ( sLoc.m_iNode == -1 ) )
00202         return 0;
00203 
00204     iSize = vecstrParents.size( );
00205     for( i = 0; i < pOnto->GetParents( sLoc.m_iNode ); ++i )
00206         vecstrParents.push_back( pOnto->GetID( pOnto->GetParent( sLoc.m_iNode, i ) ) );
00207 
00208     return ( vecstrParents.size( ) - iSize ); }
00209 
00210 size_t CompletionGetKids( const CParser::SLocation& sLoc, vector<string>& vecstrKids ) {
00211     size_t              i, iSize, iChild;
00212     const IOntology*    pOnto;
00213 
00214     iSize = vecstrKids.size( );
00215     if( sLoc.m_pOnto && ( sLoc.m_iNode == -1 ) ) {
00216         for( i = 0; i < sLoc.m_pOnto->GetNodes( ); ++i )
00217             if( !sLoc.m_pOnto->GetParents( i ) && sLoc.m_pOnto->GetGenes( i, true ) )
00218                 vecstrKids.push_back( sLoc.m_pOnto->GetID( i ) ); }
00219     else if( sLoc.m_pOnto ) {
00220         for( i = 0; i < sLoc.m_pOnto->GetChildren( sLoc.m_iNode ); ++i )
00221             if( sLoc.m_pOnto->GetGenes( iChild = sLoc.m_pOnto->GetChild( sLoc.m_iNode,
00222                 i ), true ) )
00223                 vecstrKids.push_back( sLoc.m_pOnto->GetID( iChild ) ); }
00224     else
00225         for( i = 0; i < g_pParser->GetOntologies( ); ++i )
00226             if( g_pParser->GetGenome( ).CountGenes( pOnto = g_pParser->GetOntology( i ) ) )
00227                 vecstrKids.push_back( pOnto->GetID( ) );
00228 
00229     return ( vecstrKids.size( ) - iSize ); }
00230 
00231 size_t CompletionGetGenes( const CParser::SLocation& sLoc, vector<string>& vecstrGenes ) {
00232     size_t  i, iSize;
00233 
00234     if( !sLoc.m_pOnto || ( sLoc.m_iNode == -1 ) )
00235         return 0;
00236 
00237     iSize = vecstrGenes.size( );
00238     for( i = 0; i < sLoc.m_pOnto->GetGenes( sLoc.m_iNode ); ++i )
00239         vecstrGenes.push_back( sLoc.m_pOnto->GetGene( sLoc.m_iNode, i ).GetName( ) );
00240 
00241     return ( vecstrGenes.size( ) - iSize ); }