Sleipnir
src/seekreader.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef SEEKREADER_H
00023 #define SEEKREADER_H
00024 
00025 #include "seekbasic.h"
00026 #include "seekmap.h"
00027 #include "datapair.h"
00028 #include "seekdataset.h"
00029 #include "seekplatform.h"
00030 #include "database.h"
00031 #include <sstream>
00032 #include "seeknetwork.h"
00033 
00034 namespace Sleipnir {
00035 
00052 class CSeekTools{
00053 public:
00054     /* binary */
00066     template<class tType>
00067     static bool ReadArray(const char *fileName, vector<tType> &vData){
00068         FILE *f = fopen(fileName, "rb");
00069         if(f==NULL){
00070             fprintf(stderr, "File not found %s\n", fileName);
00071             return false;
00072         }
00073 
00074         //do not change type
00075         size_t iSize;
00076 
00077         utype ret;
00078         ret = fread((char*) (&iSize), 1, sizeof(iSize), f);
00079         vData.clear();
00080         vData.resize(iSize);
00081         tType *m_Data = (tType*)malloc(iSize*sizeof(tType));
00082         ret = fread((char*)m_Data, 1, iSize*sizeof(tType), f);
00083         typename vector<tType>::iterator iter;
00084         tType *mp;
00085         for(iter=vData.begin(), mp=&m_Data[0]; iter!=vData.end();
00086             iter++, mp++){
00087             *iter = *mp;
00088         }
00089         free(m_Data);
00090         fclose(f);
00091         return true;
00092     }
00093 
00094     /* binary */
00106     template<class tType>
00107     static bool WriteArray(const char *fileName, const vector<tType> &vData){
00108         FILE *f = fopen(fileName, "wb");
00109         if(f==NULL){
00110             fprintf(stderr, "File not found %s\n", fileName);
00111             return false;
00112         }
00113         size_t i;
00114         tType *m_Data = (tType*)malloc(vData.size()*sizeof(tType));
00115         for(i=0; i<vData.size(); i++){
00116             m_Data[i] = vData[i];
00117         }
00118         //do not change type
00119         size_t iSize = vData.size();
00120         fwrite((char*) (&iSize), 1, sizeof(iSize), f);
00121         fwrite((char*) (m_Data), 1, iSize*sizeof(tType), f);
00122         free(m_Data);
00123         fclose(f);
00124         return true;
00125     }
00126 
00136     template<class tType>
00137     static bool WriteArrayText(const char *fileName,
00138         const vector<tType> &vData){
00139         ofstream outfile;
00140         outfile.open(fileName);
00141         size_t i;
00142         for(i=0; i<vData.size()-1; i++){
00143             outfile << vData[i] << " ";
00144         }
00145         outfile << vData[vData.size()-1] << endl;
00146         outfile.close();
00147         return true;
00148     }
00149 
00159     template<class tType>
00160     static bool Write2DArrayText(const char *fileName,
00161         const vector<vector<tType> > &vData){
00162         ofstream outfile;
00163         outfile.open(fileName);
00164         size_t i,j;
00165         for(j=0; j<vData.size(); j++){
00166             if(vData[j].size()==0){
00167                 outfile << "None" << endl;
00168                 continue;
00169             }
00170             for(i=0; i<vData[j].size()-1; i++){
00171                 outfile << vData[j][i] << " ";
00172             }
00173             outfile << vData[j][vData[j].size()-1] << endl;
00174         }
00175         outfile.close();
00176         return true;
00177     }
00178 
00189     template<class tType>
00190     static bool InitVector(vector<tType> &vData, const utype &iSize,
00191         const tType &tValue) {
00192         vData.clear();
00193         vData.resize(iSize);
00194         fill(vData.begin(), vData.end(), tValue);
00195         return true;
00196     }
00197 
00206     template<class tType>
00207     static bool InitVector(vector<tType> &vData, const utype &iSize) {
00208         vData.clear();
00209         vData.resize(iSize);
00210         return true;
00211     }
00212 
00223     template<class tType>
00224     static tType** Init2DArray(const size_t &iSize1, const size_t &iSize2,
00225         const tType &tValue){
00226         tType **f = (tType**)malloc(iSize1*sizeof(tType*));
00227         f[0] = (tType*)malloc(iSize1*iSize2*sizeof(tType));
00228         /*tType **itF = &f[1];
00229         tType **itLast = &f[0] + iSize1;
00230         for(; itF!=itLast; itF++){
00231             *itF = *(itF - 1) + iSize2;
00232         }
00233         tType *itVal = &f[0][0];
00234         tType *itValLast = &f[iSize1-1][iSize2-1] + 1;
00235         for(; itVal!=itValLast; itVal++){
00236             *itVal = tValue;
00237         }*/
00238         int i, j;
00239         for(i=1; i<iSize1; i++){
00240             f[i] = f[i-1] + iSize2;
00241         }
00242         for(i=0; i<iSize1; i++){
00243             for(j=0; j<iSize2; j++){
00244                 f[i][j] = tValue;
00245             }
00246         }
00247         return f;
00248     }
00249 
00254     template<class tType>
00255     static void Free2DArray(tType** f){
00256         free(f[0]);
00257         free(f);
00258     }
00259 
00265     static bool IsNaN(const utype &);
00266 
00270     static utype GetNaN();
00271 
00277     static string ConvertInt(const int &);
00278 
00304     static bool ReadDatabaselets(const vector<CDatabase*>&,
00305         const size_t&, const size_t&,
00306         const vector<vector<string> >&,
00307         vector<CSeekDataset*>&,
00308         const map<string,utype> &,
00309         const vector<vector<string> > &, const map<string,utype> &,
00310         //network mode options
00311         const int&, const bool&);
00312 
00334     static bool LoadDatabase(const vector<CDatabase*>&,
00335         const size_t&, const size_t&,
00336         const vector<CSeekDBSetting*>&,
00337         const vector<string>&,
00338         const map<string,string>&,
00339         const map<string,utype>&, vector<CSeekPlatform>&,
00340         vector<CSeekDataset*>&, const vector<vector<string> >&,
00341         const map<string,utype>&,
00342         const bool=false, const bool=false);
00343 
00359     static bool LoadDatabase(
00360         const vector<CDatabase*>&, const size_t&, const size_t&,
00361         vector<CSeekDataset*>&,
00362         const vector<CSeekDataset*>&, vector<CSeekPlatform>&, 
00363         const vector<CSeekPlatform>&, const vector<string>&, 
00364         const map<string,string>&, const map<string,utype>&);
00365 
00379     static bool ReadPlatforms(const string &strPlatformDirectory,
00380         vector<CSeekPlatform> &plat, vector<string> &vecstrPlatforms,
00381         map<string, utype> &mapstriPlatforms, const int lineSize = 1024);
00382 
00389     static bool ReadPlatforms(const char *plat_dir,
00390         vector<CSeekPlatform> &plat, vector<string> &vecstrPlatforms,
00391         map<string, utype> &mapstriPlatforms, const int lineSize = 1024);
00392 
00403     static bool ReadListOneColumn(const string &strFile,
00404         vector<string> &vecstrList, CSeekStrIntMap &mapstriList, const int lineSize = 1024);
00405 
00412     static bool ReadListOneColumn(const char *file,
00413         vector<string> &vecstrList, CSeekStrIntMap &mapstriList, const int lineSize = 1024);
00414 
00421     static bool ReadListOneColumn(const string &strFile,
00422         vector<string> &vecstrList, const int lineSize = 1024);
00423 
00430     static bool ReadListOneColumn(const char *file,
00431         vector<string> &vecstrList, const int lineSize = 1024);
00432 
00441     static bool ReadListTwoColumns(const string &strFile,
00442         vector<string> &list1, vector<string> &list2, const int lineSize = 1024);
00443 
00450     static bool ReadListTwoColumns(const char *file,
00451         vector<string> &list1, vector<string> &list2, const int lineSize = 1024);
00452 
00462     static bool ReadMultipleQueries(const string &strFile,
00463         vector< vector<string> > &qList, const int lineSize = 1024);
00464 
00465 
00466     static bool ReadMultipleNotQueries(const char *file,
00467         vector<vector<vector<string> > > &qList, const int lineSize = 1024);
00468 
00475     static bool ReadMultipleQueries(const char *file,
00476         vector< vector<string> > &qList, const int lineSize = 1024);
00477 
00488     static bool ReadMultiGeneOneLine(const string &strFile,
00489         vector<string> &list1, const int lineSize = 1024);
00490 
00497     static bool ReadMultiGeneOneLine(const char *file,
00498         vector<string> &list1, const int lineSize = 1024);
00499 
00508     static bool ReadQuantFile(const string &strFile, vector<float> &quant, const int lineSize = 5000);
00509 
00516     static bool ReadQuantFile(const char *file, vector<float> &quant, const int lineSize = 5000);
00517 
00518 
00519 };
00520 
00521 
00522 }
00523 #endif