Sleipnir
tools/DChecker/DChecker.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 
00025 enum ETFPN {
00026     ETFPN_TP    = 0,
00027     ETFPN_FP    = ETFPN_TP + 1,
00028     ETFPN_TN    = ETFPN_FP + 1,
00029     ETFPN_FN    = ETFPN_TN + 1
00030 };
00031 
00032 struct SDatum {
00033     float   m_dValue;
00034     size_t  m_iOne;
00035     size_t  m_iTwo;
00036     float   m_dAnswer;
00037 
00038     SDatum( float dValue, size_t iOne, size_t iTwo, float dAnswer ) : m_dValue(dValue), m_iOne(iOne), m_iTwo(iTwo),
00039         m_dAnswer(dAnswer) { }
00040 };
00041 
00042 struct SSorter {
00043     bool    m_fInvert;
00044 
00045     SSorter( bool fInvert ) : m_fInvert(fInvert) { }
00046 
00047     bool operator()( const SDatum& sOne, const SDatum& sTwo ) const {
00048 
00049         return ( m_fInvert ? ( sOne.m_dValue > sTwo.m_dValue ) : ( sOne.m_dValue < sTwo.m_dValue ) );
00050     }
00051 };
00052 
00053 double AUCMod( const CDat&, const CDat&, const vector<bool>&, const vector<bool>&, const gengetopt_args_info&, bool, float );
00054 
00055 int main( int iArgs, char** aszArgs ) {
00056     CDat            Answers, Data, wDat;
00057     gengetopt_args_info     sArgs;
00058     size_t              i, j, k, m, iOne, iTwo, iGenes, iPositives, iNegatives, iBins, iRand;
00059     vector<size_t>      veciGenes, veciRec, veciRecTerm;
00060     CFullMatrix<bool>       MatGenes;
00061     CFullMatrix<size_t>     MatResults;
00062     ETFPN           eTFPN;
00063     int             iMax;
00064     float           dAnswer, dValue;
00065     vector<bool>            vecfHere, vecfUbik;
00066     vector<float>       vecdScores, vecdSSE, vecdBinValue,vecGeneWeights;
00067     vector<size_t>      veciPositives, veciNegatives, veciGenesTerm;
00068     CGenome             Genome;
00069     CGenes              wGenes(Genome);
00070     ofstream            ofsm;
00071     ostream*            postm;
00072     map<float,size_t>       mapValues;
00073     bool            fMapAnswers,isDatWeighted=false;
00074     CGenes          GenesTm( Genome ), GenesUbik( Genome );
00075 
00076     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00077         cmdline_parser_print_help( );
00078         return 1;
00079     }
00080     CMeta Meta( sArgs.verbosity_arg );
00081 
00082     fMapAnswers = !!sArgs.memmap_flag && !( sArgs.genes_arg || sArgs.genet_arg || sArgs.genex_arg || sArgs.genee_arg );
00083     if( !Answers.Open( sArgs.answers_arg, fMapAnswers ) ) {
00084         cerr << "Couldn't open: " << sArgs.answers_arg << endl;
00085         return 1;
00086     }
00087     if( sArgs.genes_arg && !Answers.FilterGenes( sArgs.genes_arg, CDat::EFilterInclude ) ) {
00088         cerr << "Couldn't open: " << sArgs.genes_arg << endl;
00089         return 1;
00090     }
00091     if( sArgs.genee_arg && !Answers.FilterGenes( sArgs.genee_arg, CDat::EFilterEdge ) ) {
00092         cerr << "Couldn't open: " << sArgs.genee_arg << endl;
00093         return 1;
00094     }
00095     if( sArgs.genet_arg ) {
00096         if( !( Answers.FilterGenes( sArgs.genet_arg, CDat::EFilterTerm ) &&
00097                 GenesTm.Open( sArgs.genet_arg ) ) ) {
00098             cerr << "Couldn't open: " << sArgs.genet_arg << endl;
00099             return 1;
00100         }
00101         veciGenesTerm.reserve( GenesTm.GetGenes( ) );
00102         for( i = 0; i < GenesTm.GetGenes( ); ++i )
00103             if( ( j = Answers.GetGene( GenesTm.GetGene( i ).GetName( ) ) ) != -1 )
00104                 veciGenesTerm.push_back( j );
00105     }
00106     if( sArgs.genex_arg && !Answers.FilterGenes( sArgs.genex_arg, CDat::EFilterExclude ) ) {
00107         cerr << "Couldn't open: " << sArgs.genex_arg << endl;
00108         return 1;
00109     }
00110     if( !Data.Open( sArgs.input_arg, !!sArgs.memmap_flag ) ) {
00111         cerr << "Couldn't open: " << sArgs.input_arg << endl;
00112         return 1;
00113     }
00114     if( sArgs.normalize_flag )
00115         Data.Normalize( CDat::ENormalizeMinMax );
00116     
00117     
00118     if(sArgs.weights_arg){
00119       ifstream  ifsm;
00120       ifsm.open( sArgs.weights_arg );
00121       if( !wGenes.OpenWeighted( ifsm ) ) {
00122     if(!wDat.Open(sArgs.weights_arg, !!sArgs.memmap_flag )){
00123       cerr << "Couldn't open: " << sArgs.inputs[ i ] << endl;
00124       return 1; }
00125     else{
00126       isDatWeighted = true;
00127     }
00128       }else{
00129     vecGeneWeights.resize(Answers.GetGenes( ));
00130     for( i = 0; i < vecGeneWeights.size( ); ++i ){
00131                     vecGeneWeights[ i ] = wGenes.GetGeneWeight(wGenes.GetGene( Answers.GetGene( i ) ));}
00132       }
00133     }   
00134     
00135     if( sArgs.abs_arg ){
00136       float d;
00137       for( i = 0; i < Data.GetGenes( ); ++i ){
00138     for( j = ( i + 1 ); j < Data.GetGenes( ); ++j ){
00139       if( !CMeta::IsNaN( (d = Data.Get( i, j )) ) ){
00140         Data.Set(i, j, fabs(d - sArgs.abs_arg));
00141       }
00142     }
00143       }
00144     }
00145     
00146     veciGenes.resize( Answers.GetGenes( ) );
00147     for( i = 0; i < Answers.GetGenes( ); ++i )
00148         veciGenes[ i ] = Data.GetGene( Answers.GetGene( i ) );
00149     for( iRand = 0; iRand <= (size_t)sArgs.randomize_arg; ++iRand ) {
00150         if( iRand )
00151             Data.Randomize( );
00152         if( sArgs.finite_flag ) {
00153             vector<float>   vecdValues;
00154             {
00155                 set<float>      setdValues;
00156 
00157                 for( i = 0; i < Answers.GetGenes( ); ++i ) {
00158                     if( ( iOne = veciGenes[ i ] ) == -1 )
00159                         continue;
00160                     for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
00161                         if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
00162                                 CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
00163                                 CMeta::IsNaN( Answers.Get( i, j ) ) )
00164                             continue;
00165                         if( sArgs.invert_flag )
00166                             dValue = 1 - dValue;
00167                         setdValues.insert( dValue );
00168                     }
00169                 }
00170                 vecdValues.resize( setdValues.size( ) );
00171                 copy( setdValues.begin( ), setdValues.end( ), vecdValues.begin( ) );
00172             }
00173             sort( vecdValues.begin( ), vecdValues.end( ) );
00174             for( i = 0; i < vecdValues.size( ); ++i )
00175                 mapValues[ vecdValues[ i ] ] = i;
00176             iBins = mapValues.size( );
00177         }
00178         else
00179             iBins = sArgs.bins_arg;
00180         MatResults.Initialize( iBins ? ( iBins + 1 ) :
00181                                (size_t)( ( sArgs.max_arg - sArgs.min_arg ) / sArgs.delta_arg ) + 1, 4 );
00182         MatGenes.Initialize( veciGenes.size( ), MatResults.GetRows( ) );
00183 
00184         for( iGenes = 0; !sArgs.inputs_num || ( iGenes < sArgs.inputs_num ); ++iGenes ) {
00185             MatResults.Clear( );
00186             MatGenes.Clear( );
00187 
00188             if( sArgs.genep_arg ) {
00189                 CGenes      Genes( Genome );
00190                 if( !Genes.Open( sArgs.genep_arg ) ) {
00191                     cerr << "Couldn't open: " << sArgs.genep_arg << endl;
00192                     return 1;
00193                 }
00194                 vecfHere.resize( Answers.GetGenes( ) );
00195                 for( i = 0; i < vecfHere.size( ); ++i ) {
00196                     vecfHere[ i ] = Genes.IsGene( Answers.GetGene( i ) );
00197         }
00198             }
00199         if( sArgs.ubiqg_arg ) {
00200         if( !GenesUbik.Open( sArgs.ubiqg_arg ) ) {
00201             cerr << "Could not open: " << sArgs.ubiqg_arg << endl;
00202             return 1;
00203         }
00204         vecfUbik.resize( Answers.GetGenes( ) );
00205         for( i = 0; i < vecfUbik.size( ); ++i ) {
00206             vecfUbik[ i ] = GenesUbik.IsGene( Answers.GetGene( i ) );
00207         }
00208         }
00209 
00210             if( mapValues.size( ) ) {
00211                 for( i = 0; i < Answers.GetGenes( ); ++i ) {
00212                     if( ( iOne = veciGenes[ i ] ) == -1 )
00213                         continue;
00214                     for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
00215                         if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
00216                                 CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) ||
00217                                 CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) )
00218                             continue;
00219             if ( CMeta::SkipEdge( !!dAnswer, i, j, vecfHere, vecfUbik, sArgs.ctxtpos_flag, sArgs.ctxtneg_flag, sArgs.bridgepos_flag, sArgs.bridgeneg_flag, sArgs.outpos_flag, sArgs.outneg_flag ) ) {
00220                 continue;
00221             }
00222                         if( sArgs.invert_flag )
00223                             dValue = 1 - dValue;
00224                         for( k = 0; k <= mapValues[ dValue ]; ++k ) {
00225                             MatGenes.Set( i, k, true );
00226                             MatGenes.Set( j, k, true );
00227                             MatResults.Get( k, dAnswer ? ETFPN_TP : ETFPN_FP )++;
00228                         }
00229                         for( ; k < MatResults.GetRows( ); ++k )
00230                             MatResults.Get( k, dAnswer ? ETFPN_FN : ETFPN_TN )++;
00231                     }
00232                 }
00233             }
00234             else if( iBins ) {
00235                 vector<SDatum>      vecsData;
00236                 size_t          iChunk;
00237 
00238                 for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i ) {
00239                     if( ( iOne = veciGenes[ i ] ) == -1 )
00240                         continue;
00241                     for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
00242                         if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
00243                                 CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
00244                                 CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
00245                             continue;
00246             if ( CMeta::SkipEdge( !!dAnswer, i, j, vecfHere, vecfUbik, sArgs.ctxtpos_flag, sArgs.ctxtneg_flag, sArgs.bridgepos_flag, sArgs.bridgeneg_flag, sArgs.outpos_flag, sArgs.outneg_flag ) ) {
00247                 continue;
00248             }
00249     
00250                         MatGenes.Set( i, 0, true );
00251                         MatGenes.Set( j, 0, true );
00252                         if( dAnswer )
00253                             iPositives++;
00254                         else
00255                             iNegatives++;
00256                         vecsData.push_back( SDatum( dValue, i, j, dAnswer ) );
00257                     }
00258                 }
00259                 sort( vecsData.begin( ), vecsData.end( ), SSorter( !!sArgs.invert_flag ) );
00260                 //instead of putting all of the uneveness in one bin, spread it out into each bin.
00261                 //N.B. only the part without the sse_flag is fixed in this regard
00262                 size_t perChunk = (size_t)(vecsData.size()/MatResults.GetRows());
00263                 size_t chnkRem = (size_t)(vecsData.size()%MatResults.GetRows());
00264                 iChunk = (size_t)( 0.5 + ( (float)vecsData.size( ) / ( MatResults.GetRows( ) ) ) );
00265                 if( sArgs.sse_flag ) {
00266                     vecdSSE.resize( MatResults.GetRows( ) );
00267                     veciPositives.resize( vecdSSE.size( ) );
00268                     for( i = 1,j = 0; i < vecdSSE.size( ); ++i,j += iChunk ) {
00269                         veciPositives[ veciPositives.size( ) - i - 1 ] = veciPositives[ veciPositives.size( ) - i ];
00270                         vecdSSE[ vecdSSE.size( ) - i - 1 ] = vecdSSE[ vecdSSE.size( ) - i ];
00271                         for( k = 0; k < iChunk; ++k ) {
00272                             if( ( j + k ) >= vecsData.size( ) )
00273                                 break;
00274                             const SDatum&   sDatum  = vecsData[ vecsData.size( ) - ( j + k ) - 1 ];
00275 
00276                             for( m = 0; m < ( vecdSSE.size( ) - i ); ++m ) {
00277                                 MatGenes.Set( sDatum.m_iOne, m, true );
00278                                 MatGenes.Set( sDatum.m_iTwo, m, true );
00279                             }
00280                             dValue = sDatum.m_dValue - sDatum.m_dAnswer;
00281                             veciPositives[ veciPositives.size( ) - i - 1 ]++;
00282                             vecdSSE[ vecdSSE.size( ) - i - 1 ] += dValue * dValue;
00283                         }
00284                     }
00285                 }
00286                 else {
00287                     veciPositives.resize( MatResults.GetRows( ) - 1 );
00288                     veciNegatives.resize( veciPositives.size( ) );
00289                     vecdBinValue.resize( veciPositives.size( )+1 );
00290                     for( i = 0; i < veciNegatives.size( ); ++i )
00291                         veciNegatives[ i ] = veciPositives[ i ] = 0;
00292                     for( i = j = 0; i < veciPositives.size( )+1; ++i,j += k ) {
00293                         size_t thisChunk = (i < chnkRem) ? (perChunk + 1) : (perChunk);
00294                         for( k = 0; k < thisChunk; ++k ) {
00295                             //if( ( j + k ) >= vecsData.size( ) )
00296                             //  break;
00297                             const SDatum&   sDatum  = vecsData[ j + k ];
00298                             vecdBinValue[ i ] = sDatum.m_dValue;
00299 
00300                             for( m = i; m > 0; --m ) {
00301                                 MatGenes.Set( sDatum.m_iOne, m, true );
00302                                 MatGenes.Set( sDatum.m_iTwo, m, true );
00303                             }
00304                             if( i >= veciPositives.size() )
00305                                 continue;
00306                             if( Answers.Get( sDatum.m_iOne, sDatum.m_iTwo ) )
00307                                 veciPositives[ i ]++;
00308                             else
00309                                 veciNegatives[ i ]++;
00310                         }
00311                     }
00312                     MatResults.Set( 0, ETFPN_TP, iPositives );
00313                     MatResults.Set( 0, ETFPN_FP, iNegatives );
00314                     MatResults.Set( 0, ETFPN_TN, 0 );
00315                     MatResults.Set( 0, ETFPN_FN, 0 );
00316                     for( i = 1; i < MatResults.GetRows( ); ++i ) {
00317                         MatResults.Set( i, ETFPN_TP, MatResults.Get( i - 1, ETFPN_TP ) - veciPositives[ i - 1 ] );
00318                         MatResults.Set( i, ETFPN_FP, MatResults.Get( i - 1, ETFPN_FP ) - veciNegatives[ i - 1 ] );
00319                         MatResults.Set( i, ETFPN_TN, MatResults.Get( i - 1, ETFPN_TN ) + veciNegatives[ i - 1 ] );
00320                         MatResults.Set( i, ETFPN_FN, MatResults.Get( i - 1, ETFPN_FN ) +
00321                                         veciPositives[ i - 1 ] );
00322                     }
00323                 }
00324             }
00325             else
00326                 for( i = 0; i < Answers.GetGenes( ); ++i ) {
00327                     if( !( i % 1000 ) )
00328                         cerr << "Processing gene " << i << '/' << Answers.GetGenes( ) << endl;
00329                     if( ( iOne = veciGenes[ i ] ) == -1 )
00330                         continue;
00331                     for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
00332                         if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
00333                                 CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ||
00334                                 CMeta::IsNaN( dValue = Data.Get( iOne, iTwo ) ) )
00335                             continue;
00336             if ( CMeta::SkipEdge( !!dAnswer, i, j, vecfHere, vecfUbik, sArgs.ctxtpos_flag, sArgs.ctxtneg_flag, sArgs.bridgepos_flag, sArgs.bridgeneg_flag, sArgs.outpos_flag, sArgs.outneg_flag ) ) {
00337                 continue;
00338             }
00339 
00340                         if( sArgs.invert_flag )
00341                             dValue = 1 - dValue;
00342 
00343                         iMax = (int)ceil( ( dValue - sArgs.min_arg ) / sArgs.delta_arg );
00344                         if( iMax > (int)MatResults.GetRows( ) )
00345                             iMax = (int)MatResults.GetRows( );
00346                         eTFPN = (ETFPN)!dAnswer;
00347                         for( k = 0; (int)k < iMax; ++k ) {
00348                             MatResults.Get( k, eTFPN )++;
00349                             MatGenes.Set( i, k, true );
00350                             MatGenes.Set( j, k, true );
00351                         }
00352                         eTFPN = (ETFPN)( 2 + !eTFPN );
00353                         for( ; k < (int)MatResults.GetRows( ); ++k )
00354                             MatResults.Get( k, eTFPN )++;
00355                     }
00356                 }
00357             for( iPositives = iNegatives = i = 0; i < Answers.GetGenes( ); ++i )
00358                 for( j = ( i + 1 ); j < Answers.GetGenes( ); ++j ) {
00359                     if( CMeta::IsNaN( dAnswer = Answers.Get( i, j ) ) ) {
00360                         continue;
00361             }
00362 
00363             if ( CMeta::SkipEdge( !!dAnswer, i, j, vecfHere, vecfUbik, sArgs.ctxtpos_flag, sArgs.ctxtneg_flag, sArgs.bridgepos_flag, sArgs.bridgeneg_flag, sArgs.outpos_flag, sArgs.outneg_flag ) ) {
00364             continue;
00365             }
00366 
00367                     if( dAnswer )
00368                         iPositives++;
00369                     else {
00370                         iNegatives++;
00371             }
00372                 }
00373 
00374             veciRec.resize( MatResults.GetRows( ) );
00375             veciRecTerm.resize( MatResults.GetRows( ) );
00376             for( i = 0; i < veciRec.size( ); ++i ) {
00377                 veciRec[ i ] = veciRecTerm[ i ] = 0;
00378                 for( j = 0; j < MatGenes.GetRows( ); ++j )
00379                     if( MatGenes.Get( j, i ) ) {
00380                         veciRec[ i ]++;
00381                         if( vecfHere.size( ) && vecfHere[ j ] )
00382                             veciRecTerm[ i ]++;
00383                     }
00384                 for( j = 0; j < veciGenesTerm.size( ); ++j )
00385                     if( MatGenes.Get( veciGenesTerm[ j ], i ) &&
00386                             ( vecfHere.empty( ) || !vecfHere[ veciGenesTerm[ j ] ] ) )
00387                         veciRecTerm[ i ]++;
00388             }
00389 
00390             if( sArgs.inputs_num ) {
00391                 ofsm.open( ( (string)sArgs.directory_arg + '/' +
00392                              CMeta::Basename( sArgs.inputs[ iGenes ] ) + ".bins" ).c_str( ) );
00393                 postm = &ofsm;
00394             }
00395             else
00396                 postm = &cout;
00397 
00398             if( !sArgs.sse_flag  ) { //Weighted context is currently only supported for AUC calculation
00399                 *postm << "#    P   " << iPositives << endl;
00400                 *postm << "#    N   " << iNegatives << endl;
00401             }
00402 
00403             if(!sArgs.weights_arg){
00404             *postm << "Cut  Genes   " << ( sArgs.sse_flag ? "Pairs  SSE" : "TP  FP  TN  FN  RC  PR  VALUE" ) << endl;
00405             for( i = 0; i < MatResults.GetRows( ); ++i ) {
00406                 *postm << ( iBins ? i : ( sArgs.min_arg + ( i * sArgs.delta_arg ) ) ) << '\t' <<
00407                        veciRec[ i ];
00408                 if( sArgs.sse_flag )
00409                     *postm << '\t' << veciPositives[ i ] << '\t' << vecdSSE[ i ];
00410                 else
00411                     for( j = 0; j < MatResults.GetColumns( ); ++j )
00412                         *postm << '\t' << MatResults.Get( i, j );
00413                 if( veciGenesTerm.size( ) || vecfHere.size( ) )
00414                     *postm << '\t' << veciRecTerm[ i ];
00415 
00416                 // print precision/recall
00417                 *postm << '\t' << (float)MatResults.Get(i,0)/(MatResults.Get(0,0));
00418 
00419                 if( (MatResults.Get(i,1)+MatResults.Get(i,0)) != 0)
00420                     *postm << '\t' << (float)MatResults.Get(i,0)/(MatResults.Get(i,1)+MatResults.Get(i,0));
00421                 else
00422                     *postm << '\t' << 0.0;
00423                 *postm << '\t' << vecdBinValue[ i ];
00424 
00425                 *postm << endl;
00426             }}
00427             else{
00428                 *postm << "AUC is calculated using weighted context."<<endl;
00429                 if(sArgs.flipneg_flag)
00430                     *postm << "Flipneg ON"<<endl;
00431                 else
00432                     *postm << "Flipneg OFF"<<endl;
00433             }
00434             
00435             if( !sArgs.sse_flag )
00436             {
00437                 if(sArgs.weights_arg){
00438                     if(isDatWeighted)
00439                      *postm << "#   AUC " <<  CStatistics::WilcoxonRankSum( Data, Answers, wDat,sArgs.flipneg_flag ) << endl;
00440                     else
00441                         *postm << "#    AUC " <<  CStatistics::WilcoxonRankSum( Data, Answers, vecGeneWeights,sArgs.flipneg_flag ) << endl;
00442                 }
00443                 else{
00444                     if( sArgs.auc_arg)
00445                           *postm << "#  AUC " <<  AUCMod( Data, Answers, vecfHere, vecfUbik, sArgs, !!sArgs.invert_flag, sArgs.auc_arg ) << endl;
00446                     else{
00447                         *postm << "#    AUC " <<  CStatistics::WilcoxonRankSum( Data, Answers, vecfHere, vecfUbik, !!sArgs.ctxtpos_flag, !!sArgs.ctxtneg_flag, !!sArgs.bridgepos_flag, !!sArgs.bridgeneg_flag, !!sArgs.outpos_flag, !!sArgs.outneg_flag, !!sArgs.invert_flag )  << endl;
00448 
00449                     }
00450                 }
00451             }
00452                
00453             if( sArgs.inputs_num )
00454                 ofsm.close( );
00455             else
00456                 cout.flush( );
00457 
00458             if( !sArgs.inputs_num )
00459                 break;
00460         }
00461     }
00462 
00463     return 0;
00464 }
00465 
00466 struct SSorterMod {
00467     const vector<float>&    m_vecdValues;
00468 
00469     SSorterMod( const vector<float>& vecdValues ) : m_vecdValues(vecdValues) { }
00470 
00471     bool operator()( size_t iOne, size_t iTwo ) {
00472 
00473         return ( m_vecdValues[iTwo] < m_vecdValues[iOne] );
00474     }
00475 };
00476 
00477 double AUCMod( const CDat& DatData, const CDat& DatAnswers, const vector<bool>& vecfHere, const vector<bool>& vecfUbik, const gengetopt_args_info& sArgs, bool fInvert,
00478                float dAUC ) {
00479     size_t          i, j, iPos, iNeg, iPosCur, iNegCur, iOne, iTwo, iIndex, iAUC;
00480     vector<float>       vecdValues, vecdAnswers;
00481     vector<size_t>      veciGenes, veciIndices;
00482     bool            fAnswer;
00483     double          dRet;
00484     float           d, dAnswer, dSens, dSpec, dSensPrev, dSpecPrev;
00485 
00486     veciGenes.resize( DatAnswers.GetGenes( ) );
00487     for( i = 0; i < veciGenes.size( ); ++i )
00488         veciGenes[ i ] = DatData.GetGene( DatAnswers.GetGene( i ) );
00489 
00490     for( iPos = iNeg = i = 0; i < DatAnswers.GetGenes( ); ++i ) {
00491         if( ( iOne = veciGenes[ i ] ) == -1 )
00492             continue;
00493         for( j = ( i + 1 ); j < DatAnswers.GetGenes( ); ++j ) {
00494             if( ( ( iTwo = veciGenes[ j ] ) == -1 ) ||
00495                     CMeta::IsNaN( dAnswer = DatAnswers.Get( i, j ) ) ||
00496                     CMeta::IsNaN( d = DatData.Get( iOne, iTwo ) ) )
00497                 continue;
00498             fAnswer = dAnswer > 0;
00499 
00500         if ( CMeta::SkipEdge( fAnswer, i, j, vecfHere, vecfUbik, sArgs.ctxtpos_flag, sArgs.ctxtneg_flag, sArgs.bridgepos_flag, sArgs.bridgeneg_flag, sArgs.outpos_flag, sArgs.outneg_flag ) ) {
00501         continue;
00502         }
00503 
00504             if( fAnswer )
00505                 iPos++;
00506             else
00507                 iNeg++;
00508             if( fInvert )
00509                 d = 1 - d;
00510             vecdAnswers.push_back( dAnswer );
00511             vecdValues.push_back( d );
00512         }
00513     }
00514 
00515     veciIndices.resize( vecdValues.size( ) );
00516     for( i = 0; i < veciIndices.size( ); ++i )
00517         veciIndices[i] = i;
00518     sort( veciIndices.begin( ), veciIndices.end( ), SSorterMod( vecdValues ) );
00519 
00520     iAUC = (size_t)( ( dAUC < 1 ) ? ( dAUC * iNeg ) : dAUC );
00521     dRet = dSensPrev = dSpecPrev = 0;
00522     for( iPosCur = iNegCur = i = 0; ( i < veciIndices.size( ) ) && ( iNegCur < iAUC ); ++i ) {
00523         iIndex = veciIndices[i];
00524         if( vecdAnswers[iIndex] > 0 )
00525             iPosCur++;
00526         else
00527             iNegCur++;
00528         dSens = (float)iPosCur / iPos;
00529         dSpec = 1 - (float)( iNeg - iNegCur ) / iNeg;
00530         if( dSpec > dSpecPrev ) {
00531             dRet += ( dSpec - dSpecPrev ) * dSens;
00532             dSensPrev = dSens;
00533             dSpecPrev = dSpec;
00534         }
00535     }
00536     dRet *= max( 1.0f, (float)iNeg / iAUC );
00537 
00538     return dRet;
00539 }