Sleipnir
tools/Combiner/Combiner.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 
00024 static int MainDABs( const gengetopt_args_info& );
00025 static int MainDATs( const gengetopt_args_info& );
00026 static int MainRevDATs( const gengetopt_args_info& );
00027 static int MainPCLs( const gengetopt_args_info& );
00028 static int MainModules( const gengetopt_args_info& );
00029 
00030 static const TPFnCombiner   c_apfnCombiners[]   = { MainPCLs, MainDATs, MainDABs, MainModules, MainRevDATs, NULL };
00031 static const char*          c_aszCombiners[]    = { "pcl", "dat", "dad", "module", "revdat", NULL };
00032 static const char   c_acDab[]   = ".dab";
00033 static const char   c_acDat[]   = ".dat";
00034 static const char   c_acQDab[]   = ".qdab";
00035 
00036 // store all input file names
00037 vector<string> input_files;
00038 
00039 enum EMethod {
00040     EMethodBegin    = 0,
00041     EMethodMean     = EMethodBegin,
00042     EMethodSum      = EMethodMean + 1,
00043     EMethodGMean    = EMethodSum + 1,
00044     EMethodHMean    = EMethodGMean + 1,
00045     EMethodMax      = EMethodHMean + 1,
00046     EMethodMin      = EMethodMax + 1,
00047     EMethodDiff     = EMethodMin + 1,
00048     EMethodMeta     = EMethodDiff + 1,
00049     EMethodQMeta    = EMethodMeta + 1,
00050     EMethodEnd      = EMethodQMeta + 1
00051 };
00052 
00053 static const char*  c_aszMethods[]  = {
00054     "mean", "sum", "gmean", "hmean", "max", "min", "diff", "meta", "qmeta", NULL
00055 };
00056 
00057 int main( int iArgs, char** aszArgs ) {
00058     gengetopt_args_info sArgs;
00059     int                 iRet;
00060     size_t              i;
00061     DIR* dp;
00062     struct dirent* ep;
00063         
00064     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00065         cmdline_parser_print_help( );
00066         return 1; }
00067     CMeta Meta( sArgs.verbosity_arg );
00068     
00069     // now collect the data files from directory if given
00070     if(sArgs.directory_arg){
00071       dp = opendir (sArgs.directory_arg);
00072       if (dp != NULL){
00073         while (ep = readdir (dp)){
00074           // skip . .. files and temp files with ~
00075           if (ep->d_name[0] == '.' || ep->d_name[strlen(ep->d_name)-1] == '~') 
00076         continue;
00077           
00078           // currently opens all files. Add filter here if want pick file extensions
00079           input_files.push_back((string)sArgs.directory_arg + "/" + ep->d_name);          
00080         }
00081         (void) closedir (dp);       
00082       }
00083       else{
00084         cerr << "Couldn't open the directory: " << sArgs.directory_arg << '\n';
00085         return 1;
00086       }
00087     }
00088     else{
00089       input_files.resize( sArgs.inputs_num );
00090       copy( sArgs.inputs, sArgs.inputs + sArgs.inputs_num, input_files.begin( ) );
00091     }
00092     
00093     for( i = 0; c_aszCombiners[ i ]; ++i )
00094         if( !strcmp( c_aszCombiners[ i ], sArgs.type_arg ) ) {
00095             iRet = c_apfnCombiners[ i ]( sArgs );
00096             break; }
00097     
00098     return iRet; }
00099 
00100 int MainPCLs( const gengetopt_args_info& sArgs ) {
00101     CPCL                        PCL, PCLNew;
00102     size_t                      i, j, iArg, iExp, iGene;
00103     vector<string>              vecstrGenes, vecstrExps, vecstrFeatures;
00104     set<string>                 setstrGenes;
00105     set<string>::const_iterator iterGenes;
00106     ifstream                    ifsm;
00107     ofstream                    ofsm;
00108 
00109     for( iArg = 0; iArg < input_files.size(); ++iArg ) {
00110         ifsm.clear( );
00111         ifsm.open( input_files[ iArg ].c_str() );
00112         if( !PCL.Open( ifsm, sArgs.skip_arg ) ) {
00113             cerr << "Could not open: " << input_files[ iArg ] << endl;
00114             return 1; }
00115         if( !iArg )
00116             for( i = 0; i < PCL.GetFeatures( ); ++i )
00117                 vecstrFeatures.push_back( PCL.GetFeature( i ) );
00118         for( i = 0; i < PCL.GetExperiments( ); ++i )
00119             vecstrExps.push_back( PCL.GetExperiment( i ) );
00120         for( i = 0; i < PCL.GetGenes( ); ++i )
00121             setstrGenes.insert( PCL.GetGene( i ) );
00122         ifsm.close( ); }
00123     vecstrGenes.resize( setstrGenes.size( ) );
00124     copy( setstrGenes.begin( ), setstrGenes.end( ), vecstrGenes.begin( ) );
00125 
00126     PCLNew.Open( vecstrGenes, vecstrExps, vecstrFeatures );
00127     iExp = 0;
00128     for( iArg = 0; iArg < input_files.size(); ++iArg ) {
00129         cerr << "Processing " << input_files[ iArg ] << "..." << endl;
00130         ifsm.clear( );
00131         ifsm.open( input_files[ iArg ].c_str() );
00132         PCL.Open( ifsm, sArgs.skip_arg );
00133         for( i = 0; i < PCLNew.GetGenes( ); ++i )
00134             if( ( iGene = PCL.GetGene( vecstrGenes[ i ] ) ) != -1 ) {
00135                 if( !iArg )
00136                     for( j = 1; j < PCLNew.GetFeatures( ); ++j )
00137                         PCLNew.SetFeature( i, j, PCL.GetFeature( iGene, j ) );
00138                 for( j = 0; j < PCL.GetExperiments( ); ++j )
00139                     PCLNew.Set( i, iExp + j, PCL.Get( iGene, j ) ); }
00140         iExp += PCL.GetExperiments( );
00141         ifsm.close( ); }
00142 
00143     if( sArgs.output_arg ) {
00144         ofsm.open( sArgs.output_arg );
00145         PCLNew.Save( ofsm );
00146         ofsm.close( ); }
00147     else {
00148         PCLNew.Save( cout );
00149         cout.flush( ); }
00150 
00151     return 0; }
00152 
00153 struct SCallbackMeta {
00154     CDat                    m_DatWei;
00155     CDat                    m_DatYBare;
00156     CDat                    m_DatQe;
00157     CDat                    m_DatDeltaeD;
00158 };
00159 
00160 struct SCallbackVars {
00161     size_t                          m_iDataset;
00162     size_t                          m_iOne;
00163     size_t                          m_iTwo;
00164     float                           m_dValue;
00165     float                           m_dWeight;
00166     EMethod                         m_eMethod;
00167     size_t                          m_iDatasets;
00168     CDat*                           m_pDatOut;
00169     CDat*                           m_pDatCur;
00170     CHalfMatrix<float>*             m_pMatCounts;
00171     const vector<set<size_t> >*     m_pvecsetiGenes;
00172     const vector<vector<size_t> >*  m_pvecveciGenes;
00173     SCallbackMeta                   m_sCallbackMeta;
00174 };
00175 
00176 struct SCallback {
00177     const gengetopt_args_info&  m_sArgs;
00178     const CGenes&               m_GenesIn;
00179     const CPCL&                 m_PCLWeights;
00180     const vector<string>&       m_vecstrTerms;
00181     const vector<CGenes*>&      m_vecpTerms;
00182     SCallbackVars               m_sCallbackVars;
00183 
00184     SCallback( const gengetopt_args_info& sArgs, const CGenes& GenesIn, const CPCL& PCLWeights,
00185         const vector<string>& vecstrTerms, const vector<CGenes*>& vecpTerms ) :
00186         m_sArgs(sArgs), m_GenesIn(GenesIn), m_PCLWeights(PCLWeights), m_vecstrTerms(vecstrTerms),
00187         m_vecpTerms(vecpTerms) { }
00188 };
00189 
00190 int iterate_inputs(SCallback& sCallback, void (*pfnCallback)( SCallbackVars& ), void (*pfnInitialize)( SCallbackVars& ) = NULL ) {
00191     SCallbackVars&              sCallbackVars   = sCallback.m_sCallbackVars;
00192     CDat&                       DatOut          = *sCallbackVars.m_pDatOut;
00193     const gengetopt_args_info&  sArgs           = sCallback.m_sArgs;
00194     const CGenes&               GenesIn         = sCallback.m_GenesIn;
00195     const CPCL&                 PCLWeights      = sCallback.m_PCLWeights;
00196     const vector<string>&       vecstrTerms     = sCallback.m_vecstrTerms;
00197     const vector<CGenes*>&      vecpTerms       = sCallback.m_vecpTerms;
00198     vector<set<size_t> >        vecsetiGenes;
00199     vector<vector<size_t> >     vecveciGenes;
00200     size_t                      i, j, k, iOne, iTwo, iA, iB;
00201     CDat                        DatCur;
00202     float                       d;
00203     
00204     sCallbackVars.m_iDatasets = input_files.size( );
00205     sCallbackVars.m_pvecveciGenes = &vecveciGenes;
00206     sCallbackVars.m_pvecsetiGenes = &vecsetiGenes;
00207     vecsetiGenes.resize( DatOut.GetGenes( ) );
00208     for( i = 0; i < input_files.size( ); ++i ) {
00209       if( !DatCur.Open( input_files[ i ].c_str(), !!sArgs.memmap_flag && !sArgs.normalize_flag && !GenesIn.GetGenes( ) ) ) {
00210             cerr << "Couldn't open: " << input_files[ i ] << endl;
00211             return 1; }
00212         sCallbackVars.m_iDataset = i;
00213         sCallbackVars.m_pDatCur = &DatCur;
00214         if( PCLWeights.GetGenes( ) ) {
00215             if( ( j = PCLWeights.GetGene( CMeta::Deextension( CMeta::Basename( input_files[ i ].c_str() ) ) ) ) == -1 ) {
00216                 cerr << "Ignoring unweighted graph: " << input_files[ i ] << endl;
00217                 continue; }
00218             sCallbackVars.m_dWeight = PCLWeights.Get( j, 0 ); }
00219         else
00220             sCallbackVars.m_dWeight = 1;
00221         cerr << "Opened: " << input_files[ i ] << endl;
00222         if( sArgs.zero_flag ) {
00223             vector<string>  vecstrGenes;
00224 
00225             for( j = 0; j < DatOut.GetGenes( ); ++j )
00226                 if( DatCur.GetGene( DatOut.GetGene( j ) ) == -1 )
00227                     vecstrGenes.push_back( DatOut.GetGene( j ) );
00228             DatCur.AddGenes( vecstrGenes );
00229             for( j = 0; j < DatCur.GetGenes( ); ++j )
00230                 for( k = ( j + 1 ); k < DatCur.GetGenes( ); ++k )
00231                     if( CMeta::IsNaN( DatCur.Get( j, k ) ) )
00232                         DatCur.Set( j, k, 0 ); }
00233         if( sArgs.normalize_flag )
00234             DatCur.Normalize( CDat::ENormalizeZScore );
00235         if( sArgs.quantiles_arg > 0 )
00236             DatCur.NormalizeQuantiles( sArgs.quantiles_arg );
00237         if( GenesIn.GetGenes( ) )
00238             DatCur.FilterGenes( GenesIn, CDat::EFilterInclude );
00239         vecveciGenes.resize( DatCur.GetGenes( ) );
00240         for( j = 0; j < vecveciGenes.size( ); ++j )
00241             vecveciGenes[j].clear( );
00242         for( j = 0; j < DatOut.GetGenes( ); ++j ) {
00243             vecsetiGenes[j].clear( );
00244             if( vecstrTerms.empty( ) ) {
00245                 if( ( iOne = DatCur.GetGene( DatOut.GetGene( j ) ) ) != -1 )
00246                     vecveciGenes[iOne].push_back( j ); }
00247             else
00248                 for( k = 0; k < vecpTerms[j]->GetGenes( ); ++k )
00249                     if( ( iOne = DatCur.GetGene( vecpTerms[j]->GetGene( k ).GetName( ) ) ) != -1 ) {
00250                         vecveciGenes[iOne].push_back( j );
00251                         vecsetiGenes[j].insert( iOne ); } }
00252         if( pfnInitialize )
00253             pfnInitialize( sCallbackVars );
00254         for( j = 0; j < DatCur.GetGenes( ); ++j )
00255             for( k = ( j + 1 ); k < DatCur.GetGenes( ); ++k ) {
00256                 if( CMeta::IsNaN( d = DatCur.Get( j, k ) ) )
00257                     continue;
00258                 sCallbackVars.m_dValue = d;
00259                 for( iA = 0; iA < vecveciGenes[j].size( ); ++iA ) {
00260                     iOne = vecveciGenes[j][iA];
00261                     if( vecsetiGenes[iOne].find( k ) != vecsetiGenes[iOne].end( ) )
00262                         continue;
00263                     sCallbackVars.m_iOne = iOne;
00264                     for( iB = 0; iB < vecveciGenes[k].size( ); ++iB ) {
00265                         iTwo = vecveciGenes[k][iB];
00266                         if( vecsetiGenes[iTwo].find( j ) != vecsetiGenes[iTwo].end( ) )
00267                             continue;
00268                         sCallbackVars.m_iTwo = iTwo;
00269                         pfnCallback( sCallbackVars ); } } } }
00270 
00271     return 0; }
00272 
00273 float weight_weistar( const SCallbackVars& sCallback ) {
00274     const SCallbackMeta&    sCallbackMeta   = sCallback.m_sCallbackMeta;
00275     float                   dDelta2;
00276 
00277     if( dDelta2 = sCallbackMeta.m_DatDeltaeD.Get( sCallback.m_iOne, sCallback.m_iTwo ) ) {
00278         if( ( dDelta2 = ( sCallbackMeta.m_DatQe.Get( sCallback.m_iOne, sCallback.m_iTwo ) -
00279             sCallback.m_iDatasets + 1 ) / dDelta2 ) < 0 )
00280             dDelta2 = 0; }
00281 /*
00282 if( ( (float)rand( ) / RAND_MAX ) < 0.000001 )
00283 cerr << ( 1 / ( ( 1 / sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ) ) + dDelta2 ) ) << '\t' <<
00284 sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ) << '\t' << dDelta2 << endl;
00285 //*/
00286     return ( 1 / ( ( 1 / sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ) ) + dDelta2 ) ); }
00287 
00288 void callback_andersondarling( SCallbackVars& sCallback ) {
00289     static const float  c_dFrac     = 0.001f;
00290     vector<float>       vecdValues;
00291     size_t              i, j, iGenes;
00292     float               d;
00293 
00294     iGenes = sCallback.m_pDatCur->GetGenes( );
00295     vecdValues.reserve( (size_t)( iGenes * iGenes * c_dFrac ) );
00296     for( i = 0; i < iGenes; ++i )
00297         for( j = ( i + 1 ); j < iGenes; ++j )
00298             if( !CMeta::IsNaN( d = sCallback.m_pDatCur->Get( i, j ) ) &&
00299                 ( ( (float)rand( ) / RAND_MAX ) < c_dFrac ) )
00300                 vecdValues.push_back( d );
00301     sCallback.m_dWeight = (float)( log( CStatistics::AndersonDarlingScore<float>(
00302         vecdValues.begin( ), vecdValues.end( ) ) ) / log( 2.0 ) ); }
00303 
00304 void callback_combine( SCallbackVars& sCallback ) {
00305 
00306     if( sCallback.m_eMethod == EMethodMeta )
00307         sCallback.m_dWeight = weight_weistar( sCallback );
00308     switch( sCallback.m_eMethod ) {
00309         case EMethodGMean:
00310             sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) *= pow( sCallback.m_dValue, sCallback.m_dWeight );
00311             sCallback.m_pMatCounts->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallback.m_dWeight;
00312             break;
00313 
00314         case EMethodHMean:
00315             sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallback.m_dWeight / sCallback.m_dValue;
00316             sCallback.m_pMatCounts->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallback.m_dWeight;
00317             break;
00318 
00319         case EMethodMax:
00320             if( sCallback.m_dValue > sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) )
00321                 sCallback.m_pDatOut->Set( sCallback.m_iOne, sCallback.m_iTwo, sCallback.m_dValue );
00322             break;
00323 
00324         case EMethodMin:
00325             if( sCallback.m_dValue < sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) )
00326                 sCallback.m_pDatOut->Set( sCallback.m_iOne, sCallback.m_iTwo, sCallback.m_dValue );
00327             break;
00328 
00329         default:
00330             sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallback.m_dWeight * sCallback.m_dValue *
00331                 ( ( sCallback.m_iDataset && ( sCallback.m_eMethod == EMethodDiff ) ) ? -1 : 1 );
00332             sCallback.m_pMatCounts->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallback.m_dWeight; } }
00333 
00334 void callback_wei( SCallbackVars& sCallback ) {
00335     static const float              c_dFrac         = 0.001f;
00336     SCallbackMeta&                  sCallbackMeta   = sCallback.m_sCallbackMeta;
00337     const vector<vector<size_t> >&  vecveciGenes    = *sCallback.m_pvecveciGenes;
00338     const vector<set<size_t> >&     vecsetiGenes    = *sCallback.m_pvecsetiGenes;
00339     size_t                          i, j, iOne, iTwo, iA, iB, iGenes;
00340     float                           d, dSum, dSumSqs;
00341     vector<float>                   vecdSums, vecdSumSqs, vecdValues;
00342 
00343     iGenes = sCallback.m_pDatCur->GetGenes( );
00344     vecdValues.reserve( (size_t)( iGenes * ( iGenes - 1 ) * c_dFrac ) );
00345     vecdSums.resize( sCallback.m_pDatOut->GetGenes( ) );
00346     fill( vecdSums.begin( ), vecdSums.end( ), 0.0f );
00347     vecdSumSqs.resize( sCallback.m_pDatOut->GetGenes( ) );
00348     fill( vecdSumSqs.begin( ), vecdSumSqs.end( ), 0.0f );
00349     dSum = dSumSqs = 0;
00350     for( i = 0; i < iGenes; ++i )
00351         for( j = ( i + 1 ); j < iGenes; ++j ) {
00352             if( CMeta::IsNaN( d = sCallback.m_pDatCur->Get( i, j ) ) )
00353                 continue;
00354             for( iA = 0; iA < vecveciGenes[i].size( ); ++iA ) {
00355                 iOne = vecveciGenes[i][iA];
00356                 if( vecsetiGenes[iOne].find( j ) != vecsetiGenes[iOne].end( ) )
00357                     continue;
00358                 for( iB = 0; iB < vecveciGenes[j].size( ); ++iB ) {
00359                     iTwo = vecveciGenes[j][iB];
00360                     if( vecsetiGenes[iTwo].find( i ) != vecsetiGenes[iTwo].end( ) )
00361                         continue;
00362                     if( !( iA || iB ) && ( ( (float)rand( ) / RAND_MAX ) < c_dFrac ) )
00363                         vecdValues.push_back( d );
00364                     dSum += d;
00365                     vecdSums[iOne] += d;
00366                     vecdSums[iTwo] += d;
00367                     d *= d;
00368                     dSumSqs += d;
00369                     vecdSumSqs[iOne] += d;
00370                     vecdSumSqs[iTwo] += d; } } }
00371     i = iGenes * ( iGenes - 1 ) / 2;
00372     dSum /= i;
00373     dSumSqs = ( dSumSqs / ( i - 1 ) ) - ( dSum * dSum );
00374     for( i = 0; i < vecdSums.size( ); ++i ) {
00375         d = ( vecdSums[i] /= iGenes - 1 );
00376         vecdSumSqs[i] = ( vecdSumSqs[i] / ( iGenes - 2 ) ) - ( d * d ); }
00377 
00378     d = (float)( log( CStatistics::AndersonDarlingScore<float>( vecdValues.begin( ), vecdValues.end( ) ) ) /
00379         log( 2.0 ) );
00380     sCallbackMeta.m_DatWei.Open( sCallback.m_pDatOut->GetGeneNames( ) );
00381     sCallbackMeta.m_DatWei.Clear( 0 );
00382     for( i = 0; i < vecdSumSqs.size( ); ++i )
00383         for( j = ( i + 1 ); j < vecdSumSqs.size( ); ++j )
00384             sCallbackMeta.m_DatWei.Set( i, j,
00385 //              2 / ( vecdSumSqs[i] + vecdSumSqs[j] )           // pooled variance of adjacent genes
00386 //              3 / ( vecdSumSqs[i] + vecdSumSqs[j] + dSumSqs ) // unweighted pool of adjanced genes + whole network
00387                 3 * d / ( vecdSumSqs[i] + vecdSumSqs[j] + dSumSqs )
00388             ); }
00389 
00390 void callback_deltaed( SCallbackVars& sCallback ) {
00391     SCallbackMeta&  sCallbackMeta   = sCallback.m_sCallbackMeta;
00392     float           d;
00393 
00394     d = sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo );
00395     sCallbackMeta.m_DatDeltaeD.Get( sCallback.m_iOne, sCallback.m_iTwo ) += d;
00396     sCallback.m_pDatOut->Get( sCallback.m_iOne, sCallback.m_iTwo ) += d * d; }
00397 
00398 void callback_ybare( SCallbackVars& sCallback ) {
00399     SCallbackMeta&  sCallbackMeta   = sCallback.m_sCallbackMeta;
00400 
00401     sCallbackMeta.m_DatYBare.Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ) *
00402         sCallback.m_dValue;
00403     sCallback.m_pMatCounts->Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ); }
00404 
00405 void callback_qe( SCallbackVars& sCallback ) {
00406     SCallbackMeta&  sCallbackMeta   = sCallback.m_sCallbackMeta;
00407     float           d;
00408 
00409     d = sCallback.m_dValue - sCallbackMeta.m_DatYBare.Get( sCallback.m_iOne, sCallback.m_iTwo );
00410     sCallbackMeta.m_DatQe.Get( sCallback.m_iOne, sCallback.m_iTwo ) += sCallbackMeta.m_DatWei.Get( sCallback.m_iOne, sCallback.m_iTwo ) * d * d; }
00411 
00412 void initialize_meta( SCallback& sCallback ) {
00413     SCallbackMeta&          sCallbackMeta   = sCallback.m_sCallbackVars.m_sCallbackMeta;
00414     const vector<string>&   vecstrGenes     = sCallback.m_sCallbackVars.m_pDatOut->GetGeneNames( );
00415     size_t                  i, j;
00416     CDat*                   pDatOut;
00417     CHalfMatrix<float>*     pMatCounts;
00418     CHalfMatrix<float>      MatCounts;
00419     float                   d;
00420     CDat                    DatTmp;
00421 
00422     sCallbackMeta.m_DatDeltaeD.Open( vecstrGenes );
00423     sCallbackMeta.m_DatDeltaeD.Clear( 0 );
00424     DatTmp.Open( vecstrGenes, false );
00425     DatTmp.Clear( 0 );
00426     pDatOut = sCallback.m_sCallbackVars.m_pDatOut;
00427     sCallback.m_sCallbackVars.m_pDatOut = &DatTmp;
00428     iterate_inputs( sCallback, callback_deltaed, callback_wei );
00429     sCallback.m_sCallbackVars.m_pDatOut = pDatOut;
00430     for( i = 0; i < sCallbackMeta.m_DatDeltaeD.GetGenes( ); ++i )
00431         for( j = ( i + 1 ); j < sCallbackMeta.m_DatDeltaeD.GetGenes( ); ++j )
00432             if( d = sCallbackMeta.m_DatDeltaeD.Get( i, j ) )
00433                 sCallbackMeta.m_DatDeltaeD.Get( i, j ) -= DatTmp.Get( i, j ) / d;
00434 
00435     sCallbackMeta.m_DatYBare.Open( vecstrGenes, false );
00436     sCallbackMeta.m_DatYBare.Clear( 0 );
00437     MatCounts.Initialize( vecstrGenes.size( ) );
00438     MatCounts.Clear( );
00439     pMatCounts = sCallback.m_sCallbackVars.m_pMatCounts;
00440     sCallback.m_sCallbackVars.m_pMatCounts = &MatCounts;
00441     iterate_inputs( sCallback, callback_ybare, callback_wei );
00442     sCallback.m_sCallbackVars.m_pMatCounts = pMatCounts;
00443     for( i = 0; i < sCallbackMeta.m_DatYBare.GetGenes( ); ++i )
00444         for( j = ( i + 1 ); j < sCallbackMeta.m_DatYBare.GetGenes( ); ++j )
00445             if( d = MatCounts.Get( i, j ) )
00446                 sCallbackMeta.m_DatYBare.Get( i, j ) /= d;
00447 
00448     sCallbackMeta.m_DatQe.Open( vecstrGenes, false );
00449     sCallbackMeta.m_DatQe.Clear( 0 );
00450     iterate_inputs( sCallback, callback_qe, callback_wei ); }
00451 
00452 int MainDATs( const gengetopt_args_info& sArgs ) {
00453     CDataset                Dataset;
00454     CDat                    DatOut, DatCur, DatQW;
00455     CHalfMatrix<float>      MatCounts;
00456     size_t                  i, j;
00457     float                   d;
00458     vector<string>          vecstrTerms;
00459     CPCL                    PCLWeights( false );
00460     CGenome                 Genome;
00461     CGenes                  GenesIn( Genome );
00462     vector<CGenes*>         vecpTerms;
00463     EMethod                 eMethod;
00464     vector<float>           vecdWis;
00465     int                     iRet;
00466     SCallback               sCallback( sArgs, GenesIn, PCLWeights, vecstrTerms, vecpTerms );
00467     void (*pfnInitialize)( SCallbackVars& );
00468 
00469     if( !input_files.size() )
00470       return 1;
00471     
00472     if( !Dataset.OpenGenes( input_files ) ) {
00473         cerr << "Couldn't open: " << input_files[ 0 ];
00474         for( i = 1; i < input_files.size( ); ++i )
00475             cerr << ", " << input_files[ i ];
00476         cerr << endl;
00477         return 1; }
00478     if( sArgs.weights_arg && !PCLWeights.Open( sArgs.weights_arg, 0 ) ) {
00479         cerr << "Could not open: " << sArgs.weights_arg << endl;
00480         return 1; }
00481     if( sArgs.genes_arg && !GenesIn.Open( sArgs.genes_arg ) ) {
00482         cerr << "Could not open: " << sArgs.genes_arg << endl;
00483         return 1; }
00484     if( sArgs.terms_arg && !CGenes::Open( sArgs.terms_arg, Genome, vecstrTerms, vecpTerms ) ) {
00485         cerr << "Could not open: " << sArgs.terms_arg << endl;
00486         return 1; }
00487 
00488     for( eMethod = EMethodBegin; eMethod < EMethodEnd; eMethod = (EMethod)( eMethod + 1 ) )
00489         if( !strcmp( c_aszMethods[eMethod], sArgs.method_arg ) )
00490             break;
00491     if( eMethod >= EMethodEnd ) {
00492         cmdline_parser_print_help( );
00493         return 1; }
00494 
00495     DatOut.Open( vecstrTerms.empty( ) ? Dataset.GetGeneNames( ) : vecstrTerms, false, sArgs.memmap_flag ? sArgs.output_arg : NULL );
00496     switch( eMethod ) {
00497         case EMethodMax:
00498             d = -FLT_MAX;
00499             break;
00500 
00501         case EMethodMin:
00502             d = FLT_MAX;
00503             break;
00504 
00505         case EMethodGMean:
00506             d = 1;
00507             break;
00508 
00509         default:
00510             d = 0; }
00511     for( i = 0; i < DatOut.GetGenes( ); ++i )
00512         for( j = ( i + 1 ); j < DatOut.GetGenes( ); ++j )
00513             DatOut.Set( i, j, d );
00514     if( fabs( d ) < 2 ) {
00515         MatCounts.Initialize( DatOut.GetGenes( ) );
00516         MatCounts.Clear( ); }
00517 
00518     sCallback.m_sCallbackVars.m_eMethod = eMethod;
00519     sCallback.m_sCallbackVars.m_pDatOut = &DatOut;
00520     sCallback.m_sCallbackVars.m_pMatCounts = &MatCounts;
00521     if( eMethod == EMethodMeta )
00522         initialize_meta( sCallback );
00523 
00524     switch( eMethod ) {
00525         case EMethodMeta:
00526             pfnInitialize = callback_wei;
00527             break;
00528 
00529         case EMethodQMeta:
00530             pfnInitialize = callback_andersondarling;
00531             break;
00532 
00533         default:
00534             pfnInitialize = NULL; }
00535     if( iRet = iterate_inputs( sCallback, callback_combine, pfnInitialize ) )
00536         return iRet;
00537     for( i = 0; i < DatOut.GetGenes( ); ++i )
00538         for( j = ( i + 1 ); j < DatOut.GetGenes( ); ++j )
00539             switch( eMethod ) {
00540                 case EMethodMean:
00541                 case EMethodMeta:
00542                 case EMethodQMeta:
00543                     DatOut.Set( i, j, ( d = MatCounts.Get( i, j ) ) ? ( DatOut.Get( i, j ) / ( sArgs.reweight_flag ? 1 : d ) ) :
00544                         CMeta::GetNaN( ) );
00545                     break;
00546 
00547                 case EMethodGMean:
00548                     DatOut.Set( i, j, ( d = MatCounts.Get( i, j ) ) ?
00549                         (float)pow( (double)DatOut.Get( i, j ), 1.0 / ( sArgs.reweight_flag ? 1 : d ) ) : CMeta::GetNaN( ) );
00550                     break;
00551 
00552                 case EMethodHMean:
00553                     DatOut.Set( i, j, ( d = MatCounts.Get( i, j ) ) ? ( ( sArgs.reweight_flag ? 1 : d ) / DatOut.Get( i, j ) ) :
00554                         CMeta::GetNaN( ) );
00555                     break;
00556 
00557                 case EMethodMax:
00558                     if( DatOut.Get( i, j ) == -FLT_MAX )
00559                         DatOut.Set( i, j, CMeta::GetNaN( ) );
00560                     break;
00561 
00562                 case EMethodMin:
00563                     if( DatOut.Get( i, j ) == FLT_MAX )
00564                         DatOut.Set( i, j, CMeta::GetNaN( ) ); }
00565 
00566     if( sArgs.zscore_flag )
00567         DatOut.Normalize( CDat::ENormalizeZScore );
00568     if( !sArgs.memmap_flag )
00569         DatOut.Save( sArgs.output_arg );
00570 
00571     for( i = 0; i < vecpTerms.size( ); ++i )
00572         delete vecpTerms[i];
00573     return 0; }
00574 
00575 static int MainDABs( const gengetopt_args_info& sArgs ) {
00576     CDatasetCompact Dataset;
00577     size_t          i;
00578     ofstream        ofsm;
00579 
00580     if( !input_files.size() )
00581         return 1;
00582 
00583     if( !Dataset.Open( input_files ) ) {
00584         cerr << "Couldn't open: " << input_files[ 0 ];
00585         for( i = 1; i < input_files.size( ); ++i )
00586             cerr << ", " << input_files[ i ];
00587         cerr << endl;
00588         return 1; }
00589 
00590     if( sArgs.output_arg ) {
00591         ofsm.open( sArgs.output_arg );
00592         Dataset.Save( ofsm, true );
00593         ofsm.close( ); }
00594     else {
00595         Dataset.Save( cout, false );
00596         cout.flush( ); }
00597 
00598     return 0; }
00599 
00600 struct SSimpleome {
00601     map<string, size_t> m_mapstriGenes;
00602     map<size_t, string> m_mapistrGenes;
00603 
00604     size_t Get( const string& strGene ) {
00605         map<string, size_t>::const_iterator iterGene;
00606         size_t                              iRet;
00607 
00608         if( ( iterGene = m_mapstriGenes.find( strGene ) ) != m_mapstriGenes.end( ) )
00609             return iterGene->second;
00610 
00611         m_mapstriGenes[ strGene ] = iRet = m_mapstriGenes.size( );
00612         m_mapistrGenes[ iRet ] = strGene;
00613         return iRet; }
00614 
00615     const string& Get( size_t iGene ) const {
00616 
00617         return m_mapistrGenes.find( iGene )->second; }
00618 };
00619 
00620 struct SModule {
00621     float               m_dSpecificity;
00622     set<size_t>         m_setiGenes;
00623     set<const SModule*> m_setpsChildren;
00624 
00625     static float Open( const char* szFile, SSimpleome& sSimpleome, vector<SModule*>& vecpsModules ) {
00626         static const size_t c_iBuffer   = 131072;
00627         ifstream        ifsm;
00628         char            acBuffer[ c_iBuffer ];
00629         vector<string>  vecstrLine;
00630         float           dRet;
00631         size_t          i, iModule;
00632         SModule*        psModule;
00633 
00634         ifsm.open( szFile );
00635         if( !ifsm.is_open( ) ) {
00636             cerr << "Could not open: " << szFile << endl;
00637             return CMeta::GetNaN( ); }
00638         for( iModule = 0; !ifsm.eof( ); ++iModule ) {
00639             ifsm.getline( acBuffer, c_iBuffer - 1 );
00640             if( !acBuffer[ 0 ] )
00641                 iModule--; }
00642         ifsm.close( );
00643         if( !iModule ) {
00644             cerr << "No modules found in: " << szFile << endl;
00645             return CMeta::GetNaN( ); }
00646         cerr << "Found " << --iModule << " modules in: " << szFile << endl;
00647         vecpsModules.resize( iModule );
00648 
00649         dRet = CMeta::GetNaN( );
00650         ifsm.clear( );
00651         ifsm.open( szFile );
00652         for( iModule = 0; !ifsm.eof( ); ++iModule ) {
00653             ifsm.getline( acBuffer, c_iBuffer - 1 );
00654             acBuffer[ c_iBuffer - 1 ] = 0;
00655             if( CMeta::IsNaN( dRet ) ) {
00656                 dRet = (float)atof( acBuffer );
00657                 iModule--;
00658                 continue; }
00659             vecstrLine.clear( );
00660             CMeta::Tokenize( acBuffer, vecstrLine );
00661             if( vecstrLine.size( ) < 2 ) {
00662                 iModule--;
00663                 continue; }
00664             vecpsModules[ iModule ] = psModule = new SModule( );
00665             psModule->m_dSpecificity = (float)atof( vecstrLine[ 0 ].c_str( ) );
00666             for( i = 1; i < vecstrLine.size( ); ++i )
00667                 psModule->m_setiGenes.insert( sSimpleome.Get( vecstrLine[ i ] ) ); }
00668 
00669         return dRet; }
00670 
00671     float Jaccard( const SModule& sModule ) const {
00672         set<size_t>::const_iterator iterGene;
00673         size_t                      iUnion, iIntersection;
00674 
00675         iIntersection = 0;
00676         iUnion = sModule.m_setiGenes.size( );
00677         for( iterGene = m_setiGenes.begin( ); iterGene != m_setiGenes.end( ); ++iterGene )
00678             if( sModule.m_setiGenes.find( *iterGene ) != sModule.m_setiGenes.end( ) )
00679                 iIntersection++;
00680             else
00681                 iUnion++;
00682 
00683         return ( (float)iIntersection / iUnion ); }
00684 
00685     size_t Intersection( const SModule& sModule ) const {
00686         size_t                      iRet;
00687         set<size_t>::const_iterator iterGene;
00688 
00689         for( iRet = 0,iterGene = m_setiGenes.begin( ); iterGene != m_setiGenes.end( ); ++iterGene )
00690             if( sModule.m_setiGenes.find( *iterGene ) != sModule.m_setiGenes.end( ) )
00691                 iRet++;
00692 
00693         return iRet; }
00694 
00695     void Merge( const SModule& sModule ) {
00696         set<size_t>::const_iterator iterGene;
00697 
00698         for( iterGene = sModule.m_setiGenes.begin( ); iterGene != sModule.m_setiGenes.end( ); ++iterGene )
00699             m_setiGenes.insert( *iterGene );
00700         m_dSpecificity = ( m_dSpecificity + sModule.m_dSpecificity ) / 2; }
00701 
00702     bool IsChild( const SModule* psModule ) const {
00703 
00704         return ( m_setpsChildren.find( psModule ) != m_setpsChildren.end( ) ); }
00705 
00706     void Save( ostream& ostm, float dCutoff, const SSimpleome& sSimpleome ) const {
00707         set<const SModule*>::const_iterator iterChild;
00708         set<size_t>::const_iterator         iterGene;
00709 
00710         ostm << this << '\t' << dCutoff << '\t' << m_dSpecificity << '\t';
00711         for( iterChild = m_setpsChildren.begin( ); iterChild != m_setpsChildren.end( ); ++iterChild ) {
00712             if( iterChild != m_setpsChildren.begin( ) )
00713                 ostm << '|';
00714             ostm << *iterChild; }
00715         for( iterGene = m_setiGenes.begin( ); iterGene != m_setiGenes.end( ); ++iterGene )
00716             ostm << '\t' << sSimpleome.Get( *iterGene );
00717         ostm << endl; }
00718 };
00719 
00720 struct SSorterModules {
00721     const vector<float>&    m_vecdModules;
00722 
00723     SSorterModules( const vector<float>& vecdModules ) : m_vecdModules(vecdModules) { }
00724 
00725     bool operator()( size_t iOne, size_t iTwo ) const {
00726 
00727         return ( m_vecdModules[ iOne ] > m_vecdModules[ iTwo ] ); }
00728 };
00729 
00730 int MainModules( const gengetopt_args_info& sArgs ) {
00731     vector<float>               vecdModules;
00732     vector<vector<SModule*> >   vecvecpsModules;
00733     vector<size_t>              veciIndices;
00734     size_t                      i, j, iOuter, iCutoffOne, iCutoffTwo, iModuleOne, iModuleTwo;
00735     SSimpleome                  sSimpleome;
00736     bool                        fDone;
00737     float                       d;
00738     ofstream                    ofsm;
00739     ostream*                    postm;
00740 
00741     vecdModules.resize( input_files.size() );
00742     vecvecpsModules.resize( input_files.size() );
00743     veciIndices.resize( input_files.size() );
00744     for( i = 0; i < vecvecpsModules.size( ); ++i ) {
00745         veciIndices[ i ] = i;
00746         if( CMeta::IsNaN( vecdModules[ i ] = SModule::Open( input_files[ i ].c_str(), sSimpleome,
00747             vecvecpsModules[ i ] ) ) )
00748             return 1; }
00749     sort( veciIndices.begin( ), veciIndices.end( ), SSorterModules( vecdModules ) );
00750 
00751     for( iOuter = 0,fDone = false; !fDone; ++iOuter ) {
00752         fDone = true;
00753         cerr << "Outer loop: " << iOuter << endl;
00754         for( iCutoffOne = 0; iCutoffOne < veciIndices.size( ); ++iCutoffOne ) {
00755             vector<SModule*>&   vecpsModulesOne = vecvecpsModules[ veciIndices[ iCutoffOne ] ];
00756 
00757             cerr << "Merging cutoff: " << vecdModules[ veciIndices[ iCutoffOne ] ] << endl;
00758             for( iModuleOne = 0; iModuleOne < vecpsModulesOne.size( ); ++iModuleOne ) {
00759                 SModule*    psOne   = vecpsModulesOne[ iModuleOne ];
00760 
00761                 if( !psOne )
00762                     continue;
00763                 for( iModuleTwo = ( iModuleOne + 1 ); iModuleTwo < vecpsModulesOne.size( ); ++iModuleTwo ) {
00764                     SModule*    psTwo   = vecpsModulesOne[ iModuleTwo ];
00765                     
00766                     if( !psTwo )
00767                         continue;
00768                     if( ( d = psOne->Jaccard( *psTwo ) ) >= sArgs.jaccard_arg ) {
00769                         cerr << "Merging @" << d << ' ' << iCutoffOne << ':' << iModuleOne << " (" <<
00770                             psOne->m_setiGenes.size( ) << ") " << iCutoffOne << ':' << iModuleTwo << " (" <<
00771                             psTwo->m_setiGenes.size( ) << ')' << endl;
00772                         psOne->Merge( *psTwo );
00773                         delete vecpsModulesOne[ iModuleTwo ];
00774                         vecpsModulesOne[ iModuleTwo ] = NULL;
00775                         iModuleTwo--;
00776                         fDone = false; } } }
00777             for( iCutoffTwo = ( iCutoffOne + 1 ); iCutoffTwo < veciIndices.size( ); ++iCutoffTwo ) {
00778                 vector<SModule*>&   vecpsModulesTwo = vecvecpsModules[ veciIndices[ iCutoffTwo ] ];
00779 
00780                 for( iModuleOne = 0; iModuleOne < vecpsModulesOne.size( ); ++iModuleOne ) {
00781                     SModule*    psOne   = vecpsModulesOne[ iModuleOne ];
00782 
00783                     if( !psOne )
00784                         continue;
00785                     for( iModuleTwo = 0; iModuleTwo < vecpsModulesTwo.size( ); ++iModuleTwo ) {
00786                         SModule*    psTwo   = vecpsModulesTwo[ iModuleTwo ];
00787                         
00788                         if( !psTwo )
00789                             continue;
00790                         if( ( d = psOne->Jaccard( *psTwo ) ) >= sArgs.jaccard_arg ) {
00791                             cerr << "Merging @" << d << ' ' << iCutoffOne << ':' << iModuleOne << " (" <<
00792                                 psOne->m_setiGenes.size( ) << ") " << iCutoffTwo << ':' << iModuleTwo <<
00793                                 " (" << psTwo->m_setiGenes.size( ) << ')' << endl;
00794                             psOne->Merge( *psTwo );
00795                             delete vecpsModulesTwo[ iModuleTwo ];
00796                             vecpsModulesTwo[ iModuleTwo ] = NULL;
00797                             iModuleTwo--;
00798                             fDone = false; } } } } } }
00799 
00800     for( iCutoffOne = 0; iCutoffOne < veciIndices.size( ); ++iCutoffOne ) {
00801         vector<SModule*>&   vecpsModulesOne = vecvecpsModules[ veciIndices[ iCutoffOne ] ];
00802 
00803         for( iCutoffTwo = ( iCutoffOne + 1 ); iCutoffTwo < veciIndices.size( ); ++iCutoffTwo ) {
00804             vector<SModule*>&   vecpsModulesTwo = vecvecpsModules[ veciIndices[ iCutoffTwo ] ];
00805 
00806             for( iModuleOne = 0; iModuleOne < vecpsModulesOne.size( ); ++iModuleOne ) {
00807                 SModule*    psOne   = vecpsModulesOne[ iModuleOne ];
00808 
00809                 if( !psOne )
00810                     continue;
00811                 for( iModuleTwo = 0; iModuleTwo < vecpsModulesTwo.size( ); ++iModuleTwo ) {
00812                     SModule*    psTwo   = vecpsModulesTwo[ iModuleTwo ];
00813                     
00814                     if( !psTwo )
00815                         continue;
00816                     if( !psTwo->IsChild( psOne ) && ( ( d = ( (float)psOne->Intersection( *psTwo ) /
00817                         psOne->m_setiGenes.size( ) ) ) >= sArgs.intersection_arg ) ) {
00818                         cerr << vecdModules[ veciIndices[ iCutoffOne ] ] << ':' << iModuleOne <<
00819                             " child of " << vecdModules[ veciIndices[ iCutoffTwo ] ] << ':' << iModuleTwo <<
00820                             " (" << psOne->m_setiGenes.size( ) << ", " << psTwo->m_setiGenes.size( ) << ", " <<
00821                             d << ')' << endl;
00822                         psTwo->m_setpsChildren.insert( psOne ); } } } } }
00823 
00824     if( sArgs.output_arg ) {
00825         ofsm.open( sArgs.output_arg );
00826         postm = &ofsm; }
00827     else
00828         postm = &cout;
00829     for( iCutoffOne = 0; iCutoffOne < veciIndices.size( ); ++iCutoffOne ) {
00830         vector<SModule*>&   vecpsModulesOne = vecvecpsModules[ veciIndices[ iCutoffOne ] ];
00831 
00832         for( iModuleOne = 0; iModuleOne < vecpsModulesOne.size( ); ++iModuleOne ) {
00833             SModule*    psOne   = vecpsModulesOne[ iModuleOne ];
00834 
00835             if( psOne )
00836                 psOne->Save( *postm, vecdModules[ veciIndices[ iCutoffOne ] ], sSimpleome ); } }
00837     if( sArgs.output_arg )
00838         ofsm.close( );
00839 
00840     for( i = 0; i < vecvecpsModules.size( ); ++i )
00841         for( j = 0; j < vecvecpsModules[ i ].size( ); ++j )
00842             if( vecvecpsModules[ i ][ j ] )
00843                 delete vecvecpsModules[ i ][ j ];
00844 
00845     return 0; }
00846 
00847 int MainRevDATs( const gengetopt_args_info& sArgs ) {
00848     vector<string>          vecstrTerms, vecstrGenes;
00849     vector<CGenes*>         vecpTerms;
00850     CGenome                 Genome;
00851     CDat                    DatOut;
00852     CHalfMatrix<uint32_t>   MatCounts;
00853     size_t                  i, j, k, iOne, iTwo, iArg, iA, iB;
00854     float                   d;
00855 
00856     if( !sArgs.terms_arg ) {
00857         cerr << "Terms argument required" << endl;
00858         return 1; }
00859     if( !CGenes::Open( sArgs.terms_arg, Genome, vecstrTerms, vecpTerms ) ) {
00860         cerr << "Could not open: " << sArgs.terms_arg << endl;
00861         return 1; }
00862 
00863     {
00864         set<string>     setstrGenes;
00865 
00866         for( i = 0; i < vecpTerms.size( ); ++i )
00867             for( j = 0; j < vecpTerms[i]->GetGenes( ); ++j )
00868                 setstrGenes.insert( vecpTerms[i]->GetGene( j ).GetName( ) );
00869         vecstrGenes.resize( setstrGenes.size( ) );
00870         copy( setstrGenes.begin( ), setstrGenes.end( ), vecstrGenes.begin( ) );
00871     }
00872 
00873     DatOut.Open( vecstrGenes, false );
00874     for( i = 0; i < DatOut.GetGenes( ); ++i )
00875         memset( DatOut.Get( i ), 0, ( DatOut.GetGenes( ) - i - 1 ) * sizeof(*DatOut.Get( i )) );
00876     MatCounts.Initialize( DatOut.GetGenes( ) );
00877     MatCounts.Clear( );
00878     for( iArg = 0; iArg < input_files.size(); ++iArg ) {
00879         CDat                    DatIn;
00880         vector<vector<size_t> > vecveciGenes;
00881 
00882         if( !DatIn.Open( input_files[iArg].c_str() ) ) {
00883             cerr << "Could not open: " << input_files[iArg] << endl;
00884             return 1; }
00885         vecveciGenes.resize( DatIn.GetGenes( ) );
00886         for( i = 0; i < DatIn.GetGenes( ); ++i ) {
00887             for( j = 0; j < vecstrTerms.size( ); ++j )
00888                 if( DatIn.GetGene( i ) == vecstrTerms[j] )
00889                     break;
00890             if( j < vecstrTerms.size( ) ) {
00891                 for( k = 0; k < vecpTerms[j]->GetGenes( ); ++k )
00892                     if( ( iOne = DatOut.GetGene( vecpTerms[j]->GetGene( k ).GetName( ) ) ) != -1 )
00893                         vecveciGenes[i].push_back( iOne ); }
00894             else
00895                 cerr << "Unrecognized gene: " << DatIn.GetGene( i ) << endl; }
00896         for( i = 0; i < DatIn.GetGenes( ); ++i ) {
00897             if( vecveciGenes[i].empty( ) )
00898                 continue;
00899             for( j = ( i + 1 ); j < DatIn.GetGenes( ); ++j ) {
00900                 if( CMeta::IsNaN( d = DatIn.Get( i, j ) ) )
00901                     continue;
00902                 for( iA = 0; iA < vecveciGenes[i].size( ); ++iA ) {
00903                     iOne = vecveciGenes[i][iA];
00904                     for( iB = 0; iB < vecveciGenes[j].size( ); ++iB ) {
00905                         iTwo = vecveciGenes[j][iB];
00906                         MatCounts.Get( iOne, iTwo )++;
00907                         DatOut.Get( iOne, iTwo ) += d; } } } } }
00908 
00909     for( i = 0; i < DatOut.GetGenes( ); ++i )
00910         for( j = ( i + 1 ); j < DatOut.GetGenes( ); ++j )
00911             if( k = MatCounts.Get( i, j ) )
00912                 DatOut.Get( i, j ) /= k;
00913             else
00914                 DatOut.Set( i, j, CMeta::GetNaN( ) );
00915     DatOut.Save( sArgs.output_arg );
00916 
00917     for( i = 0; i < vecpTerms.size( ); ++i )
00918         delete vecpTerms[i];
00919     return 0; }