Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef SEEKREADER_H 00023 #define SEEKREADER_H 00024 00025 #include "seekbasic.h" 00026 #include "seekmap.h" 00027 #include "datapair.h" 00028 #include "seekdataset.h" 00029 #include "seekplatform.h" 00030 #include "database.h" 00031 #include <sstream> 00032 #include "seeknetwork.h" 00033 00034 namespace Sleipnir { 00035 00052 class CSeekTools{ 00053 public: 00054 /* binary */ 00066 template<class tType> 00067 static bool ReadArray(const char *fileName, vector<tType> &vData){ 00068 FILE *f = fopen(fileName, "rb"); 00069 if(f==NULL){ 00070 fprintf(stderr, "File not found %s\n", fileName); 00071 return false; 00072 } 00073 00074 //do not change type 00075 size_t iSize; 00076 00077 utype ret; 00078 ret = fread((char*) (&iSize), 1, sizeof(iSize), f); 00079 vData.clear(); 00080 vData.resize(iSize); 00081 tType *m_Data = (tType*)malloc(iSize*sizeof(tType)); 00082 ret = fread((char*)m_Data, 1, iSize*sizeof(tType), f); 00083 typename vector<tType>::iterator iter; 00084 tType *mp; 00085 for(iter=vData.begin(), mp=&m_Data[0]; iter!=vData.end(); 00086 iter++, mp++){ 00087 *iter = *mp; 00088 } 00089 free(m_Data); 00090 fclose(f); 00091 return true; 00092 } 00093 00094 /* binary */ 00106 template<class tType> 00107 static bool WriteArray(const char *fileName, const vector<tType> &vData){ 00108 FILE *f = fopen(fileName, "wb"); 00109 if(f==NULL){ 00110 fprintf(stderr, "File not found %s\n", fileName); 00111 return false; 00112 } 00113 size_t i; 00114 tType *m_Data = (tType*)malloc(vData.size()*sizeof(tType)); 00115 for(i=0; i<vData.size(); i++){ 00116 m_Data[i] = vData[i]; 00117 } 00118 //do not change type 00119 size_t iSize = vData.size(); 00120 fwrite((char*) (&iSize), 1, sizeof(iSize), f); 00121 fwrite((char*) (m_Data), 1, iSize*sizeof(tType), f); 00122 free(m_Data); 00123 fclose(f); 00124 return true; 00125 } 00126 00136 template<class tType> 00137 static bool WriteArrayText(const char *fileName, 00138 const vector<tType> &vData){ 00139 ofstream outfile; 00140 outfile.open(fileName); 00141 size_t i; 00142 for(i=0; i<vData.size()-1; i++){ 00143 outfile << vData[i] << " "; 00144 } 00145 outfile << vData[vData.size()-1] << endl; 00146 outfile.close(); 00147 return true; 00148 } 00149 00159 template<class tType> 00160 static bool Write2DArrayText(const char *fileName, 00161 const vector<vector<tType> > &vData){ 00162 ofstream outfile; 00163 outfile.open(fileName); 00164 size_t i,j; 00165 for(j=0; j<vData.size(); j++){ 00166 if(vData[j].size()==0){ 00167 outfile << "None" << endl; 00168 continue; 00169 } 00170 for(i=0; i<vData[j].size()-1; i++){ 00171 outfile << vData[j][i] << " "; 00172 } 00173 outfile << vData[j][vData[j].size()-1] << endl; 00174 } 00175 outfile.close(); 00176 return true; 00177 } 00178 00189 template<class tType> 00190 static bool InitVector(vector<tType> &vData, const utype &iSize, 00191 const tType &tValue) { 00192 vData.clear(); 00193 vData.resize(iSize); 00194 fill(vData.begin(), vData.end(), tValue); 00195 return true; 00196 } 00197 00206 template<class tType> 00207 static bool InitVector(vector<tType> &vData, const utype &iSize) { 00208 vData.clear(); 00209 vData.resize(iSize); 00210 return true; 00211 } 00212 00223 template<class tType> 00224 static tType** Init2DArray(const size_t &iSize1, const size_t &iSize2, 00225 const tType &tValue){ 00226 tType **f = (tType**)malloc(iSize1*sizeof(tType*)); 00227 f[0] = (tType*)malloc(iSize1*iSize2*sizeof(tType)); 00228 /*tType **itF = &f[1]; 00229 tType **itLast = &f[0] + iSize1; 00230 for(; itF!=itLast; itF++){ 00231 *itF = *(itF - 1) + iSize2; 00232 } 00233 tType *itVal = &f[0][0]; 00234 tType *itValLast = &f[iSize1-1][iSize2-1] + 1; 00235 for(; itVal!=itValLast; itVal++){ 00236 *itVal = tValue; 00237 }*/ 00238 int i, j; 00239 for(i=1; i<iSize1; i++){ 00240 f[i] = f[i-1] + iSize2; 00241 } 00242 for(i=0; i<iSize1; i++){ 00243 for(j=0; j<iSize2; j++){ 00244 f[i][j] = tValue; 00245 } 00246 } 00247 return f; 00248 } 00249 00254 template<class tType> 00255 static void Free2DArray(tType** f){ 00256 free(f[0]); 00257 free(f); 00258 } 00259 00265 static bool IsNaN(const utype &); 00266 00270 static utype GetNaN(); 00271 00277 static string ConvertInt(const int &); 00278 00304 static bool ReadDatabaselets(const vector<CDatabase*>&, 00305 const size_t&, const size_t&, 00306 const vector<vector<string> >&, 00307 vector<CSeekDataset*>&, 00308 const map<string,utype> &, 00309 const vector<vector<string> > &, const map<string,utype> &, 00310 //network mode options 00311 const int&, const bool&); 00312 00334 static bool LoadDatabase(const vector<CDatabase*>&, 00335 const size_t&, const size_t&, 00336 const vector<CSeekDBSetting*>&, 00337 const vector<string>&, 00338 const map<string,string>&, 00339 const map<string,utype>&, vector<CSeekPlatform>&, 00340 vector<CSeekDataset*>&, const vector<vector<string> >&, 00341 const map<string,utype>&, 00342 const bool=false, const bool=false); 00343 00359 static bool LoadDatabase( 00360 const vector<CDatabase*>&, const size_t&, const size_t&, 00361 vector<CSeekDataset*>&, 00362 const vector<CSeekDataset*>&, vector<CSeekPlatform>&, 00363 const vector<CSeekPlatform>&, const vector<string>&, 00364 const map<string,string>&, const map<string,utype>&); 00365 00379 static bool ReadPlatforms(const string &strPlatformDirectory, 00380 vector<CSeekPlatform> &plat, vector<string> &vecstrPlatforms, 00381 map<string, utype> &mapstriPlatforms, const int lineSize = 1024); 00382 00389 static bool ReadPlatforms(const char *plat_dir, 00390 vector<CSeekPlatform> &plat, vector<string> &vecstrPlatforms, 00391 map<string, utype> &mapstriPlatforms, const int lineSize = 1024); 00392 00403 static bool ReadListOneColumn(const string &strFile, 00404 vector<string> &vecstrList, CSeekStrIntMap &mapstriList, const int lineSize = 1024); 00405 00412 static bool ReadListOneColumn(const char *file, 00413 vector<string> &vecstrList, CSeekStrIntMap &mapstriList, const int lineSize = 1024); 00414 00421 static bool ReadListOneColumn(const string &strFile, 00422 vector<string> &vecstrList, const int lineSize = 1024); 00423 00430 static bool ReadListOneColumn(const char *file, 00431 vector<string> &vecstrList, const int lineSize = 1024); 00432 00441 static bool ReadListTwoColumns(const string &strFile, 00442 vector<string> &list1, vector<string> &list2, const int lineSize = 1024); 00443 00450 static bool ReadListTwoColumns(const char *file, 00451 vector<string> &list1, vector<string> &list2, const int lineSize = 1024); 00452 00462 static bool ReadMultipleQueries(const string &strFile, 00463 vector< vector<string> > &qList, const int lineSize = 1024); 00464 00465 00466 static bool ReadMultipleNotQueries(const char *file, 00467 vector<vector<vector<string> > > &qList, const int lineSize = 1024); 00468 00475 static bool ReadMultipleQueries(const char *file, 00476 vector< vector<string> > &qList, const int lineSize = 1024); 00477 00488 static bool ReadMultiGeneOneLine(const string &strFile, 00489 vector<string> &list1, const int lineSize = 1024); 00490 00497 static bool ReadMultiGeneOneLine(const char *file, 00498 vector<string> &list1, const int lineSize = 1024); 00499 00508 static bool ReadQuantFile(const string &strFile, vector<float> &quant, const int lineSize = 5000); 00509 00516 static bool ReadQuantFile(const char *file, vector<float> &quant, const int lineSize = 5000); 00517 00518 00519 }; 00520 00521 00522 } 00523 #endif