Sleipnir
tools/Overlapper/Overlapper.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 
00025 int main( int iArgs, char** aszArgs ) {
00026     gengetopt_args_info sArgs;
00027     CDataPair           DatOne, DatTwo;
00028     size_t              i, j, iOne, iTwo;
00029     float               dOne, dTwo;
00030     vector<size_t>      veciGenes;
00031     CFullMatrix<size_t> MatConfusion;
00032     const char*         szOne;
00033     const char*         szTwo;
00034     bool                fOneSmall;
00035 
00036     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00037         cmdline_parser_print_help( );
00038         return 1; }
00039     CMeta Meta( sArgs.verbosity_arg );
00040 
00041     if( !DatOne.Open( sArgs.first_arg, false, !!sArgs.memmap_flag ) ) {
00042         cerr << "Couldn't open: " << sArgs.first_arg << endl;
00043         return 1; }
00044     if( !DatTwo.Open( sArgs.second_arg, false, !!sArgs.memmap_flag ) ) {
00045         cerr << "Couldn't open: " << sArgs.second_arg << endl;
00046         return 1; }
00047 
00048     cout << sArgs.first_arg << ": " << DatOne.GetGenes( ) << " genes" << endl;
00049     cout << sArgs.second_arg << ": " << DatTwo.GetGenes( ) << " genes" << endl << endl;
00050 
00051     fOneSmall = DatOne.GetGenes( ) < DatTwo.GetGenes( );
00052     szOne = fOneSmall ? sArgs.first_arg : sArgs.second_arg;
00053     szTwo = fOneSmall ? sArgs.second_arg : sArgs.first_arg;
00054     {
00055         const CDataPair&    DatSmall    = fOneSmall ? DatOne : DatTwo;
00056         CDataPair&          DatBig      = fOneSmall ? DatTwo : DatOne;
00057         vector<bool>        vecfShared;
00058 
00059         MatConfusion.Initialize( DatSmall.GetValues( ) + 1, DatBig.GetValues( ) + 1 );
00060         MatConfusion.Clear( );
00061         veciGenes.resize( DatSmall.GetGenes( ) );
00062         for( i = 0; i < veciGenes.size( ); ++i )
00063             veciGenes[ i ] = DatBig.GetGene( DatSmall.GetGene( i ) );
00064         for( i = 0; i < DatSmall.GetGenes( ); ++i ) {
00065             if( ( iOne = veciGenes[ i ] ) == -1 ) {
00066                 for( j = ( i + 1 ); j < DatSmall.GetGenes( ); ++j )
00067                     if( !CMeta::IsNaN( dOne = DatSmall.Get( i, j ) ) )
00068                         MatConfusion.Get( DatSmall.Quantize( dOne ), DatBig.GetValues( ) )++;
00069                 continue; }
00070             for( j = ( i + 1 ); j < DatSmall.GetGenes( ); ++j )
00071                 if( !CMeta::IsNaN( dOne = DatSmall.Get( i, j ) ) )
00072                     MatConfusion.Get( DatSmall.Quantize( dOne ), ( ( iTwo = veciGenes[ j ] ) == -1 ) || CMeta::IsNaN( dTwo = DatBig.Get( iOne, iTwo ) ) ?
00073                         DatBig.GetValues( ) : DatBig.Quantize( dTwo ) )++; }
00074         vecfShared.resize( DatBig.GetGenes( ) );
00075         for( i = 0; i < vecfShared.size( ); ++i )
00076             vecfShared[ i ] = ( DatSmall.GetGene( DatBig.GetGene( i ) ) != -1 );
00077         for( i = 0; i < DatBig.GetGenes( ); ++i )
00078             for( j = ( i + 1 ); j < DatBig.GetGenes( ); ++j )
00079                 if( !( vecfShared[ i ] && vecfShared[ j ] ) && !CMeta::IsNaN( dTwo = DatBig.Get( i, j ) ) )
00080                     MatConfusion.Get( DatSmall.GetValues( ), DatBig.Quantize( dTwo ) )++;
00081     }
00082 
00083     cout << '\t' << szTwo << endl << szOne;
00084     for( i = 0; i < MatConfusion.GetRows( ); ++i ) {
00085         for( j = 0; j < MatConfusion.GetColumns( ); ++j )
00086             cout << '\t' << MatConfusion.Get( i, j );
00087         cout << endl; }
00088 
00089     return 0; }