Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef PCLI_H 00023 #define PCLI_H 00024 00025 #include <map> 00026 #include <set> 00027 #include <vector> 00028 00029 #include "meta.h" 00030 #include "file.h" 00031 #include "fullmatrix.h" 00032 00033 namespace Sleipnir { 00034 00035 class CPCLImpl: protected CFile { 00036 protected: 00037 static const size_t c_iSkip = 2; 00038 static const char c_szEWEIGHT[]; 00039 static const char c_szGENE[]; 00040 static const char c_szGID[]; 00041 static const char c_szGWEIGHT[]; 00042 static const char c_szNAME[]; 00043 static const char c_szOne[]; 00044 static const char c_szExtension[]; 00045 static const char c_szBinExtension[]; 00046 static const char c_szDabExtension[]; 00047 00048 typedef std::vector<std::string> TVecStr; 00049 typedef std::set<size_t> TSetI; 00050 typedef std::map<std::string, size_t> TMapStrI; 00051 00052 static size_t MedianMultiplesBin(float dValue, float dAve, float dStd, 00053 size_t iBins, float dBinSize) { 00054 size_t iRet; 00055 int i; 00056 00057 i = (int) (0.5 + ((dValue - dAve) / dStd / dBinSize)); 00058 iRet = iBins / 2; 00059 iRet = ((i < 0) && ((size_t) -i > iRet)) ? 0 : min(iBins, i + iRet); 00060 // cerr << dValue << '\t' << dAve << '\t' << dStd << '\t' << i << '\t' << iRet << endl; 00061 00062 return iRet; 00063 } 00064 00065 static void MedianMultiplesSmooth(float dPower, 00066 std::vector<float>& vecdValues) { 00067 static const size_t c_iRadius = 40; 00068 std::vector<float> vecdTmp; 00069 size_t i, j, k; 00070 float d, dSum; 00071 00072 vecdTmp.resize(vecdValues.size()); 00073 for (dSum = 0, i = 0; i < vecdTmp.size(); ++i) 00074 for (j = (max(i, c_iRadius) - c_iRadius); j < min(vecdTmp.size(), i 00075 + c_iRadius); ++j) { 00076 k = max(i, j) - min(i, j); 00077 vecdTmp[i] += (d = (vecdValues[j] 00078 / (1 + pow((float) k, dPower)))); 00079 dSum += d; 00080 } 00081 for (i = 0; i < vecdValues.size(); ++i) 00082 vecdValues[i] = vecdTmp[i] / dSum; 00083 } 00084 00085 CPCLImpl(bool fHeader) : 00086 m_fHeader(fHeader) { 00087 } 00088 ~CPCLImpl(); 00089 00090 bool OpenExperiments(std::istream&, size_t, string&, bool rTable=false); 00091 bool OpenGene(std::istream&, std::vector<float>&, string&); 00092 void Reset(); 00093 void MedianMultiplesMapped(const std::vector<std::vector<size_t> >&, 00094 std::vector<float>&); 00095 bool OpenHelper(); 00096 bool OpenMemmap(const unsigned char*); 00097 00098 void SetGene(size_t iGene, const std::string& strGene) { 00099 00100 m_mapstriGenes.erase(m_vecstrGenes[iGene]); 00101 m_mapstriGenes[m_vecstrGenes[iGene] = strGene] = iGene; 00102 } 00103 00104 CDataMatrix m_Data; 00105 TVecStr m_vecstrGenes; 00106 TVecStr m_vecstrExperiments; 00107 TVecStr m_vecstrFeatures; 00108 std::vector<TVecStr> m_vecvecstrFeatures; 00109 TSetI m_setiGenes; 00110 bool m_fHeader; 00111 TMapStrI m_mapstriGenes; 00112 00113 // Memory mapped back end 00114 unsigned char* m_abData; 00115 size_t m_iData; 00116 HANDLE m_hndlData; 00117 float** m_aadData; 00118 }; 00119 00120 } 00121 00122 #endif // PCLI_H