Sleipnir
|
A suite of file I/O and general purpose tools that are used by Seek. More...
#include <seekreader.h>
Static Public Member Functions | |
template<class tType > | |
static bool | ReadArray (const char *fileName, vector< tType > &vData) |
Read an array from a given binary file. | |
template<class tType > | |
static bool | WriteArray (const char *fileName, const vector< tType > &vData) |
Write an array in binary format. | |
template<class tType > | |
static bool | WriteArrayText (const char *fileName, const vector< tType > &vData) |
Write an array in text format. | |
template<class tType > | |
static bool | Write2DArrayText (const char *fileName, const vector< vector< tType > > &vData) |
Write a two-dimensional array in text format. | |
template<class tType > | |
static bool | InitVector (vector< tType > &vData, const utype &iSize, const tType &tValue) |
Initialize a vector with a given value. | |
template<class tType > | |
static bool | InitVector (vector< tType > &vData, const utype &iSize) |
Initialize a vector. | |
template<class tType > | |
static tType ** | Init2DArray (const size_t &iSize1, const size_t &iSize2, const tType &tValue) |
Initialize a two-dimensional array with the given size and value. | |
template<class tType > | |
static void | Free2DArray (tType **f) |
Free a two-dimensional array. | |
static bool | IsNaN (const utype &) |
Checks if a utype value is invalid. | |
static utype | GetNaN () |
Return the NaN value as a utype. | |
static string | ConvertInt (const int &) |
Converts an integer to a string. | |
static bool | ReadDatabaselets (const vector< CDatabase * > &, const size_t &, const size_t &, const vector< vector< string > > &, vector< CSeekDataset * > &, const map< string, utype > &, const vector< vector< string > > &, const map< string, utype > &, const int &, const bool &) |
Read a set of CDatabaselet from CDatabase instance. | |
static bool | LoadDatabase (const vector< CDatabase * > &, const size_t &, const size_t &, const vector< CSeekDBSetting * > &, const vector< string > &, const map< string, string > &, const map< string, utype > &, vector< CSeekPlatform > &, vector< CSeekDataset * > &, const vector< vector< string > > &, const map< string, utype > &, const bool=false, const bool=false) |
Read the search setting files and load the CDatabase. | |
static bool | LoadDatabase (const vector< CDatabase * > &, const size_t &, const size_t &, vector< CSeekDataset * > &, const vector< CSeekDataset * > &, vector< CSeekPlatform > &, const vector< CSeekPlatform > &, const vector< string > &, const map< string, string > &, const map< string, utype > &) |
Load a CDatabase by copying from an existing instance. | |
static bool | ReadPlatforms (const string &strPlatformDirectory, vector< CSeekPlatform > &plat, vector< string > &vecstrPlatforms, map< string, utype > &mapstriPlatforms, const int lineSize=1024) |
Read the platforms. | |
static bool | ReadPlatforms (const char *plat_dir, vector< CSeekPlatform > &plat, vector< string > &vecstrPlatforms, map< string, utype > &mapstriPlatforms, const int lineSize=1024) |
Read the platforms. | |
static bool | ReadListOneColumn (const string &strFile, vector< string > &vecstrList, CSeekStrIntMap &mapstriList, const int lineSize=1024) |
Read a table with one column. | |
static bool | ReadListOneColumn (const char *file, vector< string > &vecstrList, CSeekStrIntMap &mapstriList, const int lineSize=1024) |
Read a table with one column. | |
static bool | ReadListOneColumn (const string &strFile, vector< string > &vecstrList, const int lineSize=1024) |
Read a table with one column. | |
static bool | ReadListOneColumn (const char *file, vector< string > &vecstrList, const int lineSize=1024) |
Read a table with one column. | |
static bool | ReadListTwoColumns (const string &strFile, vector< string > &list1, vector< string > &list2, const int lineSize=1024) |
Read a table with two columns. | |
static bool | ReadListTwoColumns (const char *file, vector< string > &list1, vector< string > &list2, const int lineSize=1024) |
Read a table with two columns. | |
static bool | ReadMultipleQueries (const string &strFile, vector< vector< string > > &qList, const int lineSize=1024) |
Read a list of queries. | |
static bool | ReadMultipleNotQueries (const char *file, vector< vector< vector< string > > > &qList, const int lineSize=1024) |
static bool | ReadMultipleQueries (const char *file, vector< vector< string > > &qList, const int lineSize=1024) |
Read a list of queries. | |
static bool | ReadMultiGeneOneLine (const string &strFile, vector< string > &list1, const int lineSize=1024) |
Read just one gene-set line. | |
static bool | ReadMultiGeneOneLine (const char *file, vector< string > &list1, const int lineSize=1024) |
Read just one gene-set line. | |
static bool | ReadQuantFile (const string &strFile, vector< float > &quant, const int lineSize=5000) |
Read the correlation discretization. | |
static bool | ReadQuantFile (const char *file, vector< float > &quant, const int lineSize=5000) |
Read the correlation discretization. |
A suite of file I/O and general purpose tools that are used by Seek.
These tools are critical for initializing the search parameters, and are highly beneficial to the routine manipulations of vectors and files.
Some examples of these tools include:
int
, char
, string
, float
) Definition at line 52 of file seekreader.h.
string Sleipnir::CSeekTools::ConvertInt | ( | const int & | number | ) | [static] |
Converts an integer to a string.
number | The given integer number |
Definition at line 26 of file seekreader.cpp.
Referenced by ReadDatabaselets().
static void Sleipnir::CSeekTools::Free2DArray | ( | tType ** | f | ) | [inline, static] |
Free a two-dimensional array.
f | The two-dimensional array |
Definition at line 255 of file seekreader.h.
Referenced by Sleipnir::CSeekDataset::DeleteQueryBlock(), Sleipnir::CSeekDataset::InitializeDataMatrix(), Sleipnir::CSeekWeighter::OrderStatisticsRankAggregation(), and Sleipnir::CSeekCentral::~CSeekCentral().
static tType** Sleipnir::CSeekTools::Init2DArray | ( | const size_t & | iSize1, |
const size_t & | iSize2, | ||
const tType & | tValue | ||
) | [inline, static] |
Initialize a two-dimensional array with the given size and value.
iSize1 | The first dimension size |
iSize2 | The second dimension size |
tValue | The value |
Creates a two-dimensional array of the given dimension, then populates it with the given value.
Definition at line 224 of file seekreader.h.
Referenced by Sleipnir::CSeekDataset::InitializeDataMatrix(), Sleipnir::CSeekDataset::InitializeQueryBlock(), and Sleipnir::CSeekWeighter::OrderStatisticsRankAggregation().
static bool Sleipnir::CSeekTools::InitVector | ( | vector< tType > & | vData, |
const utype & | iSize, | ||
const tType & | tValue | ||
) | [inline, static] |
Initialize a vector with a given value.
vData | The source vector |
iSize | The number of elements that the vector should contain |
tValue | The value |
Resizes the source vector to the given size, then sets all elements in the vector to the given value.
Definition at line 190 of file seekreader.h.
Referenced by Sleipnir::CSeekWeighter::CVWeighting(), Sleipnir::CSeekDataset::InitializeDataMatrix(), Sleipnir::CSeekPlatform::InitializePlatform(), Sleipnir::CSeekWeighter::LinearCombine(), Sleipnir::CSeekWeighter::OneGeneWeighting(), ReadDatabaselets(), and Sleipnir::CSeekCentral::VarianceWeightSearch().
static bool Sleipnir::CSeekTools::InitVector | ( | vector< tType > & | vData, |
const utype & | iSize | ||
) | [inline, static] |
Initialize a vector.
vData | The source vector |
iSize | The number of elements that the vector should contain |
Resizes the source vector to the given size.
Definition at line 207 of file seekreader.h.
bool Sleipnir::CSeekTools::IsNaN | ( | const utype & | v | ) | [static] |
Checks if a utype
value is invalid.
v | The value to be checked A utype value is invalid if it is maximum (65535). |
Definition at line 32 of file seekreader.cpp.
Referenced by Sleipnir::CSeekQuery::CreateCVPartitions(), Sleipnir::CSeekWeighter::CVWeighting(), Sleipnir::CSeekDataset::InitializeQuery(), Sleipnir::CSeekDataset::InitializeQueryBlock(), Sleipnir::CSeekWeighter::OneGeneWeighting(), and ReadDatabaselets().
bool Sleipnir::CSeekTools::LoadDatabase | ( | const vector< CDatabase * > & | DB, |
const size_t & | iGenes, | ||
const size_t & | iDatasets, | ||
const vector< CSeekDBSetting * > & | DBSetting, | ||
const vector< string > & | vecstrDatasets, | ||
const map< string, string > & | mapstrstrDatasetPlatform, | ||
const map< string, utype > & | mapstriPlatform, | ||
vector< CSeekPlatform > & | vp, | ||
vector< CSeekDataset * > & | vc, | ||
const vector< vector< string > > & | dbDataset, | ||
const map< string, utype > & | mapstriDataset, | ||
const bool | bVariance = false , |
||
const bool | bCorrelation = false |
||
) | [static] |
Read the search setting files and load the CDatabase.
Performs the following search initializing operations:
*
.gpres, the gene averages *
.gavg, the gene variances *
.gvar, and each dataset's correlation average and standard deviation *
.sinfo. DB | The CDatabase instance |
strPrepInputDirectory | The prep directory which contains the * .gavg and * .gpres files |
strGvarInputDirectory | The directory that contains the gene variance files * .gvar |
strSinfoInputDirectory | The directory that contains the * .sinfo files |
vecstrDatasets | The dataset definition |
mapstrstrDatasetPlatform | The dataset-platform mapping |
mapstriPlatform | Platform name-platform ID mapping |
vp | The vector of CSeekPlatform |
vc | The vector of CSeekDataset, the output |
Definition at line 215 of file seekreader.cpp.
Referenced by Sleipnir::CSeekCentral::Initialize().
bool Sleipnir::CSeekTools::LoadDatabase | ( | const vector< CDatabase * > & | DB, |
const size_t & | iGenes, | ||
const size_t & | iDatasets, | ||
vector< CSeekDataset * > & | vc, | ||
const vector< CSeekDataset * > & | vc_src, | ||
vector< CSeekPlatform > & | vp, | ||
const vector< CSeekPlatform > & | vp_src, | ||
const vector< string > & | vecstrDatasets, | ||
const map< string, string > & | mapstrstrDatasetPlatform, | ||
const map< string, utype > & | mapstriPlatform | ||
) | [static] |
Load a CDatabase by copying from an existing instance.
Copies the vector of initialized CSeekDataset to a new vector. Copies the vector of initialized CSeekPlatform to a new vector.
DB | The CDatabase |
vc | The destination dataset vector |
vc_src | The source dataset vector |
vp | The destination platform vector |
vp_src | The source platform vector |
vecstrDatasets | The dataset definition |
mapstrstrDatasetPlatform | The dataset-platform mapping |
mapstriPlatform | Platform name-platform ID mapping |
Definition at line 176 of file seekreader.cpp.
static bool Sleipnir::CSeekTools::ReadArray | ( | const char * | fileName, |
vector< tType > & | vData | ||
) | [inline, static] |
Read an array from a given binary file.
fileName | The file name |
vData | The destination array |
size_t
). 2) The second field is a set of N elements. Definition at line 67 of file seekreader.h.
Referenced by Sleipnir::CSeekDataset::ReadDatasetAverageStdev(), Sleipnir::CSeekDataset::ReadGeneAverage(), Sleipnir::CSeekDataset::ReadGenePresence(), and Sleipnir::CSeekDataset::ReadGeneVariance().
bool Sleipnir::CSeekTools::ReadDatabaselets | ( | const vector< CDatabase * > & | DB, |
const size_t & | iGenes, | ||
const size_t & | iDatasets, | ||
const vector< vector< string > > & | vecstrAllQuery, | ||
vector< CSeekDataset * > & | vc, | ||
const map< string, utype > & | mapstriGenes, | ||
const vector< vector< string > > & | dbDatasets, | ||
const map< string, utype > & | mapstriDatasets, | ||
const int & | iClient, | ||
const bool & | bNetwork | ||
) | [static] |
Read a set of CDatabaselet from CDatabase instance.
DB | The CDatabase instance |
vecstrAllQuery | The list of queries |
vc | A vector of datasets (the output) |
iClient | If the network mode is enabled, the client's socket |
bNetwork | If true, the network mode is enabled |
Definition at line 41 of file seekreader.cpp.
References ConvertInt(), Sleipnir::CSeekIntIntMap::GetForward(), InitVector(), IsNaN(), and Sleipnir::CSeekNetwork::Send().
bool Sleipnir::CSeekTools::ReadListOneColumn | ( | const string & | strFile, |
vector< string > & | vecstrList, | ||
CSeekStrIntMap & | mapstriList, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read a table with one column.
Outputs the lines in the table as a vector of strings
strFile | The file name |
vecstrList | The output |
mapstriList | Mapping the line to its line number |
lineSize | The maximum characters per line in the file (default 1024) |
Definition at line 387 of file seekreader.cpp.
Referenced by ReadListOneColumn().
bool Sleipnir::CSeekTools::ReadListOneColumn | ( | const char * | file, |
vector< string > & | vecstrList, | ||
CSeekStrIntMap & | mapstriList, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read a table with one column.
This is the same as the previous CSeekTools::ReadListOneColumn() declaration, except that the accepted string arguments are of the type const char*.
Definition at line 394 of file seekreader.cpp.
References Sleipnir::CSeekStrIntMap::Set().
bool Sleipnir::CSeekTools::ReadListOneColumn | ( | const string & | strFile, |
vector< string > & | vecstrList, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read a table with one column.
Same as the previous CSeekTools::ReadListOneColumn() declaration, except that this does not generate the line to line number mapping.
Definition at line 541 of file seekreader.cpp.
References ReadListOneColumn().
bool Sleipnir::CSeekTools::ReadListOneColumn | ( | const char * | file, |
vector< string > & | vecstrList, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read a table with one column.
Same as the previous CSeekTools::ReadListOneColumn() declaration, except that this does not generate the line to line number mapping, and accepts the file name as const char *.
Definition at line 546 of file seekreader.cpp.
bool Sleipnir::CSeekTools::ReadListTwoColumns | ( | const string & | strFile, |
vector< string > & | list1, | ||
vector< string > & | list2, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read a table with two columns.
strFile | The file name |
list1 | The column 1 output |
list2 | The column 2 output |
lineSize | The maximum characters per line in the file (default 1024) |
Definition at line 352 of file seekreader.cpp.
Referenced by Sleipnir::CSeekCentral::Initialize().
bool Sleipnir::CSeekTools::ReadListTwoColumns | ( | const char * | file, |
vector< string > & | list1, | ||
vector< string > & | list2, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read a table with two columns.
This is the same as the previous CSeekTools::ReadListTwoColumns() declaration, except that the accepted string arguments are of the type const char *.
Definition at line 358 of file seekreader.cpp.
References Sleipnir::CMeta::Tokenize().
bool Sleipnir::CSeekTools::ReadMultiGeneOneLine | ( | const string & | strFile, |
vector< string > & | list1, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read just one gene-set line.
Reads the first line in the file. The line contains a set of gene names delimited by spaces. The output is a vector of strings representing the genes in that line.
strFile | The file name |
list1 | The output |
lineSize | The maximum characters per line in the file (default 1024) |
Definition at line 508 of file seekreader.cpp.
bool Sleipnir::CSeekTools::ReadMultiGeneOneLine | ( | const char * | file, |
vector< string > & | list1, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read just one gene-set line.
Same as the previous CSeekTools::ReadMultiGeneOneLine() except that the accepted string argument is of the type const char *.
Definition at line 513 of file seekreader.cpp.
References Sleipnir::CMeta::Tokenize().
bool Sleipnir::CSeekTools::ReadMultipleQueries | ( | const string & | strFile, |
vector< vector< string > > & | qList, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read a list of queries.
A query is specified as a set of gene names delimited by spaces. A query occupies one line in the file.
strFile | The file name |
qList | The output |
lineSize | The maximum characters per line in the file (default 1024) |
Definition at line 422 of file seekreader.cpp.
Referenced by Sleipnir::CSeekCentral::Initialize().
bool Sleipnir::CSeekTools::ReadMultipleQueries | ( | const char * | file, |
vector< vector< string > > & | qList, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read a list of queries.
Same as the previous CSeekTools::ReadMultipleQueries() declaration, except that this function accepts the string argument as a const char *.
Definition at line 427 of file seekreader.cpp.
References Sleipnir::CMeta::Tokenize().
bool Sleipnir::CSeekTools::ReadPlatforms | ( | const string & | strPlatformDirectory, |
vector< CSeekPlatform > & | plat, | ||
vector< string > & | vecstrPlatforms, | ||
map< string, utype > & | mapstriPlatforms, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read the platforms.
Reading the platforms mainly involves reading the correlation average and the correlation standard deviation for each platform in the database. The purpose is to correct the platform specific biases on the correlation values.
strPlatformDirectory | The directory that contains the platform average and standard deviation files |
plat | The output |
vecstrPlatforms | The platform names |
mapstriPlatform | The platform name - platform ID mapping |
lineSize | The maximum characters per line in the file (default 1024) |
Definition at line 296 of file seekreader.cpp.
Referenced by Sleipnir::CSeekCentral::Initialize().
bool Sleipnir::CSeekTools::ReadPlatforms | ( | const char * | plat_dir, |
vector< CSeekPlatform > & | plat, | ||
vector< string > & | vecstrPlatforms, | ||
map< string, utype > & | mapstriPlatforms, | ||
const int | lineSize = 1024 |
||
) | [static] |
Read the platforms.
This is the same as the previous CSeekTools::ReadPlatforms() declaration, except that the accepted string arguments are of the type const
char
*
.
Definition at line 303 of file seekreader.cpp.
References Sleipnir::CFullMatrix< tType >::Get(), Sleipnir::CFullMatrix< tType >::GetColumns(), Sleipnir::CFullMatrix< tType >::GetRows(), and Sleipnir::CFullMatrix< tType >::Open().
bool Sleipnir::CSeekTools::ReadQuantFile | ( | const string & | strFile, |
vector< float > & | quant, | ||
const int | lineSize = 5000 |
||
) | [static] |
Read the correlation discretization.
Specifies how the correlations should be binned. The file contains the bin boundaries separated by spaces.
strFile | The file name |
quant | The output |
lineSize | The maximum characters per line in the file (default 5000) |
Definition at line 150 of file seekreader.cpp.
Referenced by Sleipnir::CSeekCentral::Initialize().
bool Sleipnir::CSeekTools::ReadQuantFile | ( | const char * | file, |
vector< float > & | quant, | ||
const int | lineSize = 5000 |
||
) | [static] |
Read the correlation discretization.
Same as the previous CSeekTools::ReadQuantFile() except that the accepted string argument is of the type const char *.
Definition at line 155 of file seekreader.cpp.
References Sleipnir::CMeta::Tokenize().
static bool Sleipnir::CSeekTools::Write2DArrayText | ( | const char * | fileName, |
const vector< vector< tType > > & | vData | ||
) | [inline, static] |
Write a two-dimensional array in text format.
fileName | The file name |
vData | The source array |
Definition at line 160 of file seekreader.h.
static bool Sleipnir::CSeekTools::WriteArray | ( | const char * | fileName, |
const vector< tType > & | vData | ||
) | [inline, static] |
Write an array in binary format.
fileName | The file name |
vData | The source array |
size_t
). 2) The second field is the N elements in the array. Definition at line 107 of file seekreader.h.
static bool Sleipnir::CSeekTools::WriteArrayText | ( | const char * | fileName, |
const vector< tType > & | vData | ||
) | [inline, static] |
Write an array in text format.
fileName | The file name |
vData | The source array |
Definition at line 137 of file seekreader.h.