Sleipnir
tools/Normalizer/Normalizer.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 
00025 const struct {
00026     const char*         m_szName;
00027     CPCL::ENormalize    m_eType;
00028 } c_asTypesPCL[]    = {
00029     {"columnz",     CPCL::ENormalizeColumn},
00030     {"rowz",        CPCL::ENormalizeRow},
00031     {"globalz",     CPCL::ENormalizeZScore},
00032     {"0to1",        CPCL::ENormalizeMinMax},
00033     {"colcenter",   CPCL::ENormalizeColumnCenter},
00034     {"colfrac",     CPCL::ENormalizeColumnFraction},
00035     {NULL,          CPCL::ENormalizeNone}
00036 };
00037 
00038 const struct {
00039     const char*         m_szName;
00040     CDat::ENormalize    m_eType;
00041 } c_asTypesDAT[]    = {
00042     {"globalz", CDat::ENormalizeZScore},
00043     {"0to1",    CDat::ENormalizeMinMax},
00044     {"sigmoid", CDat::ENormalizeSigmoid},
00045     {"normcdf", CDat::ENormalizeNormCDF},
00046     {"pcc",     CDat::ENormalizePCC},
00047     {NULL,      CDat::ENormalizeNone}
00048 };
00049 
00050 int main( int iArgs, char** aszArgs ) {
00051     CDat                Dat;
00052     CPCL                PCL;
00053     size_t              i, j;
00054     ofstream            ofsm;
00055     gengetopt_args_info sArgs;
00056     float               d;
00057     istream*            pistm;
00058     ifstream            ifsm;
00059     ostream*            postm;
00060     CPCL::ENormalize    eTypePCL;
00061     CDat::ENormalize    eTypeDAT;
00062 
00063     if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) {
00064         cmdline_parser_print_help( );
00065         return 1; }
00066     CMeta Meta( sArgs.verbosity_arg );
00067 
00068     if( !strcmp( "pcl", sArgs.itype_arg ) ) {
00069         if( sArgs.input_arg ) {
00070             ifsm.open( sArgs.input_arg );
00071             pistm = &ifsm; }
00072         else
00073             pistm = &cin;
00074         if( !PCL.Open( *pistm, sArgs.skip_arg ) ) {
00075             cerr << "Could not open input: " << ( sArgs.input_arg ? sArgs.input_arg : "standard input" ) << endl;
00076             return 1; }
00077         if( sArgs.input_arg )
00078             ifsm.close( );
00079 
00080         if( !strcmp( "medmult", sArgs.otype_arg ) )
00081             PCL.MedianMultiples( );
00082         else {
00083             eTypePCL = CPCL::ENormalizeNone;
00084             for( i = 0; c_asTypesPCL[i].m_szName; ++i )
00085                 if( !strcmp( c_asTypesPCL[i].m_szName, sArgs.otype_arg ) ) {
00086                     eTypePCL = c_asTypesPCL[i].m_eType;
00087                     break; }
00088             PCL.Normalize( eTypePCL ); }
00089 
00090         if( sArgs.output_arg ) {
00091             ofsm.open( sArgs.output_arg );
00092             postm = &ofsm; }
00093         else
00094             postm = &cout;
00095         PCL.Save( *postm );
00096         if( sArgs.output_arg )
00097             ofsm.close( ); }
00098     else if( !sArgs.input_arg ) {
00099         cmdline_parser_print_help( );
00100         return 1; }
00101     else {
00102         if( !Dat.Open( sArgs.input_arg ) ) {
00103             cerr << "Could not open input: " << sArgs.input_arg << endl;
00104             return 1; }
00105 
00106         eTypeDAT = CDat::ENormalizeNone;
00107         for( i = 0; c_asTypesDAT[i].m_szName; ++i )
00108             if( !strcmp( c_asTypesDAT[i].m_szName, sArgs.otype_arg ) ) {
00109                 eTypeDAT = c_asTypesDAT[i].m_eType;
00110                 break; }
00111 
00112         Dat.Normalize( eTypeDAT );
00113         if( sArgs.flip_flag )
00114             for( i = 0; i < Dat.GetGenes( ); ++i )
00115                 for( j = ( i + 1 ); j < Dat.GetGenes( ); ++j )
00116                     if( !CMeta::IsNaN( d = Dat.Get( i, j ) ) )
00117                         Dat.Set( i, j, 1 - d );
00118 
00119         Dat.Save( sArgs.output_arg ? sArgs.output_arg : sArgs.input_arg ); }
00120 
00121     return 0; }