Sleipnir
src/seekevaluate.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "seekevaluate.h"
00023 namespace Sleipnir {
00024 
00025 bool CSeekPerformanceMeasure::SortRankVector(
00026     const vector<utype> &rank,
00027     const CSeekIntIntMap &mapG, vector<AResult> &a,
00028     //optional argument
00029     const utype top){
00030 
00031     utype numGenesD = mapG.GetNumSet();
00032     utype TOP = 0;
00033     utype numNonZero = 0;
00034     utype i;
00035     bool DEBUG = false;
00036 
00037     //a should be the same size as rank
00038     if(top==0) TOP = rank.size();
00039     else TOP = top;
00040 
00041     vector<utype>::const_iterator itRank = rank.begin();
00042     vector<AResult>::iterator itA = a.begin();
00043     for(i = 0; itRank!=rank.end(); itRank++, itA++, i++){
00044         itA->i = i;
00045         itA->f = *itRank;
00046         if(*itRank>0) numNonZero++;
00047     }
00048 
00049     if(numNonZero==0){
00050         if(DEBUG) cerr << "This dataset is all zero!" << endl;
00051         return false;
00052     }
00053 
00054     //printf("Top is %d", TOP); getchar();
00055     if(TOP==rank.size()){
00056         sort(a.begin(), a.end());
00057     }else{
00058         nth_element(a.begin(), a.begin()+TOP, a.end());
00059         sort(a.begin(), a.begin()+TOP);
00060     }
00061 
00062     return true;
00063 }
00064 
00065 /* designed specifically for a CSeekDataset */
00066 /* mask: the query genes which are not included in RBP calcualtion */
00067 bool CSeekPerformanceMeasure::RankBiasedPrecision(const float &rate,
00068     const vector<utype> &rank, float &rbp,
00069     const vector<char> &mask, const vector<char> &gold,
00070     const CSeekIntIntMap &mapG, vector<AResult> *ar,
00071     /* optional arguments */
00072     const utype top){
00073 
00074     utype i, ii, j, jj;
00075     float x;
00076 
00077     utype TOP = top;
00078     if(top==0) TOP = rank.size();
00079 
00080     //ar should be same size as rank
00081     vector<AResult> *sing = ar;
00082     bool ret =
00083         CSeekPerformanceMeasure::SortRankVector(rank, mapG, *sing, top);
00084 
00085     if(!ret){
00086         rbp = -1;
00087         return false;
00088     }
00089 
00090     x = 0;
00091     jj = 0;
00092 
00093     AResult *aa;
00094     for(i=0; i<TOP; i++){
00095         aa = &(*sing)[i];
00096         if(aa->f==0) break;
00097         if(mask[aa->i]==1) continue;
00098         if(gold[aa->i]==1){
00099             x+=pow(rate, jj);
00100             //fprintf(stderr, "Sorted %d %d %.5f\n", jj, aa->i, (aa->f-320)/100.0);
00101         }
00102         jj++;
00103     }
00104     x *= (1.0-rate);
00105     rbp = x;
00106     //fprintf(stderr, "%.3e\n", 0, rbp);
00107 
00108     return true;
00109 }
00110 
00111 bool CSeekPerformanceMeasure::AveragePrecision(
00112     const vector<utype> &rank, float &ap,
00113     const vector<char> &mask, const vector<char> &gold,
00114     const CSeekIntIntMap &mapG, vector<AResult> *ar){
00115 
00116     utype i, ii, j, jj;
00117     float x;
00118 
00119     utype TOP = rank.size();
00120 
00121     //ar should be same size as rank
00122     vector<AResult> *sing = ar;
00123     bool ret =
00124         CSeekPerformanceMeasure::SortRankVector(rank, mapG, *sing, TOP);
00125 
00126     if(!ret){
00127         ap = -1;
00128         return false;
00129     }
00130 
00131     jj = 0;
00132 
00133     AResult *aa;
00134     int num = 0;
00135     float sum = 0;
00136     for(i=0; i<TOP; i++){
00137         aa = &(*sing)[i];
00138         if(aa->f==0) break;
00139         if(mask[aa->i]==1) continue;
00140         if(gold[aa->i]==1){
00141             sum+=(float) (num+1) / (float) (jj+1);
00142             num++;
00143         }
00144         jj++;
00145     }
00146     if(num==0){
00147         ap = 0;
00148     }else{
00149         ap = sum / num;
00150     }
00151     return true;
00152 }
00153 
00154 }