Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef SEEKDATASET_H 00023 #define SEEKDATASET_H 00024 00025 #include "seekbasic.h" 00026 #include "seekmap.h" 00027 #include "datapair.h" 00028 #include "seekplatform.h" 00029 00030 namespace Sleipnir { 00031 00032 class CSeekDBSetting{ 00033 public: 00034 CSeekDBSetting(const string &gvar, 00035 const string &sinfo, const string &plat, 00036 const string &prep, const string &db, 00037 const string &gene, const string &quant, 00038 const string &dset, const utype &numDB){ 00039 m_gvarDirectory = gvar; 00040 m_sinfoDirectory = sinfo; 00041 m_platformDirectory = plat; 00042 m_prepDirectory = prep; 00043 m_dbDirectory = db; 00044 m_geneMapFile = gene; 00045 m_quantFile = quant; 00046 m_dsetFile = dset; 00047 m_numDB = numDB; 00048 } 00049 CSeekDBSetting(const char *gvar, 00050 const char* sinfo, const char* plat, 00051 const char* prep, const char* db, 00052 const char* gene, const char* quant, 00053 const char* dset, const utype &numDB){ 00054 m_gvarDirectory = gvar; 00055 m_sinfoDirectory = sinfo; 00056 m_platformDirectory = plat; 00057 m_prepDirectory = prep; 00058 m_dbDirectory = db; 00059 m_geneMapFile = gene; 00060 m_quantFile = quant; 00061 m_dsetFile = dset; 00062 m_numDB = numDB; 00063 } 00064 00065 ~CSeekDBSetting(){ 00066 } 00067 00068 string GetValue(const string &str){ 00069 if(str=="gene") 00070 return m_geneMapFile; 00071 else if(str=="dset") 00072 return m_dsetFile; 00073 else if(str=="quant") 00074 return m_quantFile; 00075 else if(str=="gvar") 00076 return m_gvarDirectory; 00077 else if(str=="sinfo") 00078 return m_sinfoDirectory; 00079 else if(str=="db") 00080 return m_dbDirectory; 00081 else if(str=="prep") 00082 return m_prepDirectory; 00083 else if(str=="platform") 00084 return m_platformDirectory; 00085 else 00086 return "NULL"; 00087 } 00088 00089 utype GetNumDB(){ 00090 return m_numDB; 00091 } 00092 00093 private: 00094 string m_gvarDirectory; 00095 string m_sinfoDirectory; 00096 string m_platformDirectory; 00097 string m_prepDirectory; 00098 string m_dbDirectory; 00099 string m_geneMapFile; 00100 string m_quantFile; 00101 string m_dsetFile; 00102 utype m_numDB; 00103 }; 00104 00105 00106 00133 class CSeekDataset{ 00134 public: 00135 00140 enum DistanceMeasure{ 00141 CORRELATION = 0, 00142 Z_SCORE = CORRELATION + 1 00143 }; 00144 00149 CSeekDataset(); 00150 00155 ~CSeekDataset(); 00156 00166 bool ReadDatasetAverageStdev(const string &); 00167 00177 bool ReadGeneAverage(const string &); 00178 00188 bool ReadGeneVariance(const string &); 00189 00199 bool ReadGenePresence(const string &); 00200 00207 bool InitializeGeneMap(); 00208 00217 bool InitializeQuery(const vector<utype> &); 00218 00228 bool InitializeQueryBlock(const vector<utype> &); 00229 00236 bool DeleteQuery(); 00237 00244 bool DeleteQueryBlock(); 00245 00270 bool InitializeDataMatrix(utype**, const vector<float> &, 00271 const utype&, const utype&, const bool=true, 00272 const bool=false, const bool=false, 00273 const enum DistanceMeasure=Z_SCORE, 00274 const float cutoff=-1.0*CMeta::GetNaN(), 00275 const bool=false, gsl_rng *rand=NULL); 00276 00282 bool Copy(CSeekDataset *); 00283 00293 utype** GetDataMatrix(); 00294 00301 unsigned char** GetMatrix(); 00302 00307 CSeekIntIntMap* GetGeneMap(); 00308 00313 CSeekIntIntMap* GetDBMap(); 00314 00319 CSeekIntIntMap* GetQueryMap(); 00320 00325 const vector<utype>& GetQuery() const; 00326 00331 const vector<utype>& GetQueryIndex() const; 00332 00337 float GetGeneVariance(const utype&) const; 00342 float GetGeneAverage(const utype&) const; 00347 float GetDatasetAverage() const; 00352 float GetDatasetStdev() const; 00357 utype GetNumGenes() const; 00358 00367 bool InitializeCVWeight(const utype&); 00368 00374 bool SetCVWeight(const utype&, const float&); 00375 00380 float GetCVWeight(const utype&); 00381 00386 const vector<float>& GetCVWeight() const; 00387 00392 float GetDatasetSumWeight(); 00393 00398 void SetPlatform(CSeekPlatform &); 00403 CSeekPlatform& GetPlatform() const; 00404 00405 private: 00406 CSeekPlatform *platform; 00407 vector<float> geneAverage; 00408 vector<float> geneVariance; 00409 vector<char> genePresence; 00410 CSeekIntIntMap *geneMap; 00411 00412 /* previously known as sinfo file */ 00413 float m_fDsetAverage; 00414 float m_fDsetStdev; 00415 00416 CSeekIntIntMap *dbMap; 00417 CSeekIntIntMap *queryMap; 00418 vector<utype> query; 00419 vector<utype> queryIndex; 00420 00421 utype iQuerySize; 00422 utype iNumGenes; 00423 utype iDBSize; 00424 00425 vector<float> weight; 00426 00427 utype **rData; 00428 unsigned char **r; 00429 00430 float sum_weight; 00431 bool m_bIsNibble; 00432 }; 00433 00434 00435 00436 } 00437 #endif