Sleipnir
src/seekquery.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "seekquery.h"
00023 #include "seekreader.h"
00024 
00025 
00026 namespace Sleipnir {
00027 
00028 CSeekQuery::CSeekQuery(){
00029     crossValGenes = NULL;
00030     queryGenes.clear();
00031     queryGenePresence.clear();
00032     iNumFold = 0;
00033     iFoldSize = 0;
00034 }
00035 CSeekQuery::~CSeekQuery(){
00036     Reset();
00037 }
00038 
00039 bool CSeekQuery::Reset(){
00040     if(crossValGenes!=NULL){
00041         delete[] crossValGenes;
00042         crossValGenes = NULL;
00043     }
00044     queryGenePresence.clear();
00045     queryGenes.clear();
00046     iNumFold = 0;
00047     iFoldSize = 0;
00048 }
00049 
00050 bool CSeekQuery::InitializeQuery(const vector<char> &query){
00051     Reset();
00052     utype i;
00053     queryGenePresence.resize(query.size());
00054 
00055     for(i=0; i<query.size(); i++){
00056         if(query[i]==1) queryGenes.push_back(i);
00057         queryGenes[i] = query[i];
00058     }
00059     queryGenes.resize(queryGenes.size());
00060     return true;
00061 }
00062 
00063 bool CSeekQuery::InitializeQuery(const vector<utype> &query,
00064     const utype &iGenes){
00065     Reset();
00066     utype i;
00067     queryGenePresence.resize(iGenes);
00068     fill(queryGenePresence.begin(), queryGenePresence.end(), (char) 0);
00069     for(i=0; i<query.size(); i++){
00070         queryGenes.push_back(query[i]);
00071         queryGenePresence[query[i]] = 1;
00072     }
00073     queryGenes.resize(queryGenes.size());
00074     return true;
00075 }
00076 
00077 utype CSeekQuery::GetNumFold() const{
00078     return iNumFold;
00079 }
00080 
00081 const vector<char>& CSeekQuery::GetQueryPresence() const{
00082     return queryGenePresence;
00083 }
00084 
00085 const vector<utype>& CSeekQuery::GetQuery() const{
00086     return queryGenes;
00087 }
00088 
00089 const vector<utype>& CSeekQuery::GetCVQuery(utype &i) const{
00090     return crossValGenes[i];
00091 }
00092 
00093 bool CSeekQuery::CreateCVPartitions(const gsl_rng *rnd,
00094         const CSeekQuery::PartitionMode &p, const utype iFold){
00095     //must have run initializequery beforehand
00096     if(p!=LEAVE_ONE_IN && p!=LEAVE_ONE_OUT && p!=CUSTOM_PARTITION){
00097         cerr << "Error, unknown partition mode" << endl;
00098         return false;
00099     }
00100     qSize = queryGenes.size();
00101     utype fold_size = 0;
00102     utype iFoldx = iFold;
00103     if(CSeekTools::IsNaN(iFold)){
00104         if(p==LEAVE_ONE_IN){
00105             iFoldx = qSize;
00106             fold_size = 1;
00107         }else if(p==LEAVE_ONE_OUT){
00108             iFoldx = qSize;
00109             fold_size = qSize-1;
00110         }else{
00111             cerr << "Error, must specify number of folds if \
00112                     CustomPartition mode" << endl;
00113             return false;
00114         }
00115     }else{
00116         if(p==LEAVE_ONE_IN){
00117             iFoldx = qSize;
00118             fold_size = 1;
00119         }else if(p==LEAVE_ONE_OUT){
00120             iFoldx = qSize;
00121             fold_size = qSize - 1;
00122         }else{ //CUSTOM_PART
00123         /*if(p==LEAVE_ONE_IN || p==LEAVE_ONE_OUT){
00124             cerr << "Error, specified number of folds, so this must NOT be \
00125                     LEAVE_ONE_OUT or LEAVE_ONE_IN" << endl;
00126             return false;
00127         }*/
00128             if(qSize <= iFoldx){
00129                 fold_size = 1;
00130                 iFoldx = qSize;
00131             }else{
00132                 fold_size = qSize / iFoldx;
00133                 if(qSize % iFoldx > 0){
00134                     fold_size++;
00135                 }
00136             }
00137         }
00138     }
00139     iNumFold = iFoldx;
00140     iFoldSize = fold_size;
00141     crossValGenes = new vector<utype>[iNumFold];
00142     //printf("Fold size %d %d\n", iNumFold, iFoldSize);
00143 
00144     utype i, j, k;
00145     utype *q_b = (utype*)malloc(qSize*sizeof(utype));
00146     for(i=0; i<qSize; i++){
00147         q_b[i] = queryGenes[i];
00148         //printf("%d ", q_b[i]);
00149     }
00150     //printf("\n");
00151     //getchar();
00152     gsl_ran_shuffle(rnd, q_b, qSize, sizeof(utype));
00153 
00154     if(p==LEAVE_ONE_IN || p==CUSTOM_PARTITION){
00155         k = 0;
00156         for(i=0; i<iNumFold; i++){
00157             for(j=0; j<iFoldSize; j++){
00158                 if(k==qSize) continue;
00159                 crossValGenes[i].push_back(q_b[k]);
00160                 k++;
00161             }
00162             crossValGenes[i].resize(crossValGenes[i].size());
00163         }
00164     }else if(p==LEAVE_ONE_OUT){
00165         utype current_index = -1;
00166         for(i=0; i<iNumFold; i++){
00167             for(j=0; j<iFoldSize; j++){
00168                 current_index = (i+j) % qSize;
00169                 crossValGenes[i].push_back(q_b[current_index]);
00170             }
00171             crossValGenes[i].resize(crossValGenes[i].size());
00172         }
00173     }
00174 
00175     free(q_b);
00176     return true;
00177 }
00178 }