Sleipnir
tools/PCL2Bin/PCL2Bin.cpp
00001 /*****************************************************************************
00002  * This file is provided under the Creative Commons Attribution 3.0 license.
00003  *
00004  * You are free to share, copy, distribute, transmit, or adapt this work
00005  * PROVIDED THAT you attribute the work to the authors listed below.
00006  * For more information, please see the following web page:
00007  * http://creativecommons.org/licenses/by/3.0/
00008  *
00009  * This file is a component of the Sleipnir library for functional genomics,
00010  * authored by:
00011  * Curtis Huttenhower (chuttenh@princeton.edu)
00012  * Mark Schroeder
00013  * Maria D. Chikina
00014  * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015  *
00016  * If you use this library, the included executable tools, or any related
00017  * code in your work, please cite the following publication:
00018  * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019  * Olga G. Troyanskaya.
00020  * "The Sleipnir library for computational functional genomics"
00021  *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 #include "statistics.h"
00025 static const char c_szRBF[] = "rbf";
00026 static const char c_szPolynomial[] = "poly";
00027 static const char c_acDab[] = ".dab";
00028 static const char c_acPcl[] = ".pcl";
00029 
00030 int main(int iArgs, char** aszArgs) {
00031     gengetopt_args_info sArgs;
00032     CGenome Genome;
00033     CGenes Genes(Genome);
00034     ifstream ifsm;
00035     CPCL PCL;
00036     size_t i, j;
00037     bool fModified;
00038     char* file_ext = NULL;
00039 
00040     if (cmdline_parser(iArgs, aszArgs, &sArgs)) {
00041         cmdline_parser_print_help();
00042         return 1;
00043     }
00044     CMeta Meta(sArgs.verbosity_arg);
00045 
00046     if (sArgs.input_arg) {
00047       if (!PCL.Open(sArgs.input_arg, sArgs.skip_arg, !!sArgs.mmap_flag, sArgs.rPCL_flag)) {
00048             cerr << "Could not open: " << sArgs.input_arg << endl;
00049             return 1;
00050         }
00051     }
00052     // if coming from stdin, always assume it's non binary form
00053 
00054     else if (!PCL.Open(cin, sArgs.skip_arg)) {
00055         cerr << "Could not open input" << endl;
00056         return 1;
00057     }
00058     if (sArgs.Genes_flag) {
00059         for (size_t i = 0; i < PCL.GetGenes(); i++) {
00060             cout << i << '\t' << PCL.GetGene(i) << endl;
00061         }
00062     }
00063     else if (sArgs.transpose_flag){
00064       for ( j = 0; j < PCL.GetGenes(); j++)
00065         cout<<'\t'<<PCL.GetGene(j);
00066       cout<<endl;
00067       for ( i = 0; i < PCL.GetExperiments(); i++) {
00068         cout<<PCL.GetExperiment(i);
00069         for ( j = 0; j < PCL.GetGenes(); j++) {
00070           cout << '\t' << PCL.Get(j,i);
00071         }
00072         cout<<endl;
00073       }
00074     }
00075  else {
00076         //vector<size_t> vec_iGenes;
00077         ifstream ifsm;
00078 
00079         if (sArgs.genex_given) {
00080             ifsm.open(sArgs.genex_arg);
00081             if (!Genes.Open(ifsm)) {
00082                 cerr << "Could not open: " << sArgs.genex_arg << endl;
00083                 return 1;
00084             }
00085 
00086             for (i = 0; i < PCL.GetGenes(); i++)
00087                 if (Genes.GetGene(PCL.GetGene(i)) != -1)
00088                     PCL.MaskGene(i, true);
00089         }
00090 
00091         else if (sArgs.genes_given) {
00092             ifsm.open(sArgs.genes_arg);
00093             if (!Genes.Open(ifsm)) {
00094                 cerr << "Could not open: " << sArgs.genes_arg << endl;
00095                 return 1;
00096             }
00097             for (i = 0; i < PCL.GetGenes(); i++)
00098                 if (Genes.GetGene(PCL.GetGene(i)) == -1)
00099                     PCL.MaskGene(i, true);
00100 
00101         }
00102         ifsm.close();
00103 
00104         /*  if( Genes.GetGenes( ) )
00105          PCL.FilterGenes( Genes, CPCL::EFilterInclude );
00106          if( sArgs.genex_arg )
00107          PCL.FilterGenes( sArgs.genex_arg, CPCL::EFilterExclude );
00108          */
00109         //Normalize Zscore
00110         if (sArgs.normalize_flag)
00111             PCL.Normalize(CPCL::ENormalizeMinMax);
00112         if (sArgs.zrow_flag)
00113                     PCL.Normalize(CPCL::ENormalizeRow);
00114         if (sArgs.zcol_flag)
00115                     PCL.Normalize(CPCL::ENormalizeColumn);
00116         if (sArgs.scol_flag)
00117                     PCL.Normalize(CPCL::EMeanSubtractColumn);
00118         else if (sArgs.normalize_flag)
00119             PCL.Normalize(CPCL::ENormalizeMinMax);
00120         if (sArgs.output_arg) {
00121 
00122                 PCL.Save(sArgs.output_arg);
00123         } else {
00124 
00125                 PCL.Save(cout, NULL);
00126             cout.flush();
00127         }
00128 
00129         return 0;
00130 
00131     }
00132 }