Sleipnir
src/pcli.h
00001 /*****************************************************************************
00002  * This file is provided under the Creative Commons Attribution 3.0 license.
00003  *
00004  * You are free to share, copy, distribute, transmit, or adapt this work
00005  * PROVIDED THAT you attribute the work to the authors listed below.
00006  * For more information, please see the following web page:
00007  * http://creativecommons.org/licenses/by/3.0/
00008  *
00009  * This file is a component of the Sleipnir library for functional genomics,
00010  * authored by:
00011  * Curtis Huttenhower (chuttenh@princeton.edu)
00012  * Mark Schroeder
00013  * Maria D. Chikina
00014  * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015  *
00016  * If you use this library, the included executable tools, or any related
00017  * code in your work, please cite the following publication:
00018  * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019  * Olga G. Troyanskaya.
00020  * "The Sleipnir library for computational functional genomics"
00021  *****************************************************************************/
00022 #ifndef PCLI_H
00023 #define PCLI_H
00024 
00025 #include <map>
00026 #include <set>
00027 #include <vector>
00028 
00029 #include "meta.h"
00030 #include "file.h"
00031 #include "fullmatrix.h"
00032 
00033 namespace Sleipnir {
00034 
00035 class CPCLImpl: protected CFile {
00036 protected:
00037     static const size_t c_iSkip = 2;
00038     static const char c_szEWEIGHT[];
00039     static const char c_szGENE[];
00040     static const char c_szGID[];
00041     static const char c_szGWEIGHT[];
00042     static const char c_szNAME[];
00043     static const char c_szOne[];
00044     static const char c_szExtension[];
00045     static const char c_szBinExtension[];
00046     static const char c_szDabExtension[];
00047     
00048     typedef std::vector<std::string> TVecStr;
00049     typedef std::set<size_t> TSetI;
00050     typedef std::map<std::string, size_t> TMapStrI;
00051 
00052     static size_t MedianMultiplesBin(float dValue, float dAve, float dStd,
00053             size_t iBins, float dBinSize) {
00054         size_t iRet;
00055         int i;
00056 
00057         i = (int) (0.5 + ((dValue - dAve) / dStd / dBinSize));
00058         iRet = iBins / 2;
00059         iRet = ((i < 0) && ((size_t) -i > iRet)) ? 0 : min(iBins, i + iRet);
00060         // cerr << dValue << '\t' << dAve << '\t' << dStd << '\t' << i << '\t' << iRet << endl;
00061 
00062         return iRet;
00063     }
00064 
00065     static void MedianMultiplesSmooth(float dPower,
00066             std::vector<float>& vecdValues) {
00067         static const size_t c_iRadius = 40;
00068         std::vector<float> vecdTmp;
00069         size_t i, j, k;
00070         float d, dSum;
00071 
00072         vecdTmp.resize(vecdValues.size());
00073         for (dSum = 0, i = 0; i < vecdTmp.size(); ++i)
00074             for (j = (max(i, c_iRadius) - c_iRadius); j < min(vecdTmp.size(), i
00075                     + c_iRadius); ++j) {
00076                 k = max(i, j) - min(i, j);
00077                 vecdTmp[i] += (d = (vecdValues[j]
00078                         / (1 + pow((float) k, dPower))));
00079                 dSum += d;
00080             }
00081         for (i = 0; i < vecdValues.size(); ++i)
00082             vecdValues[i] = vecdTmp[i] / dSum;
00083     }
00084 
00085     CPCLImpl(bool fHeader) :
00086         m_fHeader(fHeader) {
00087     }
00088     ~CPCLImpl();
00089     
00090     bool OpenExperiments(std::istream&, size_t, string&, bool rTable=false);
00091     bool OpenGene(std::istream&, std::vector<float>&, string&);
00092     void Reset();
00093     void MedianMultiplesMapped(const std::vector<std::vector<size_t> >&,
00094             std::vector<float>&);
00095     bool OpenHelper();
00096     bool OpenMemmap(const unsigned char*);
00097 
00098     void SetGene(size_t iGene, const std::string& strGene) {
00099 
00100         m_mapstriGenes.erase(m_vecstrGenes[iGene]);
00101         m_mapstriGenes[m_vecstrGenes[iGene] = strGene] = iGene;
00102     }
00103 
00104     CDataMatrix m_Data;
00105     TVecStr m_vecstrGenes;
00106     TVecStr m_vecstrExperiments;
00107     TVecStr m_vecstrFeatures;
00108     std::vector<TVecStr> m_vecvecstrFeatures;
00109     TSetI m_setiGenes;
00110     bool m_fHeader;
00111     TMapStrI m_mapstriGenes;
00112 
00113     // Memory mapped back end
00114     unsigned char* m_abData;
00115     size_t m_iData;
00116     HANDLE m_hndlData;
00117     float** m_aadData;
00118 };
00119 
00120 }
00121 
00122 #endif // PCLI_H