Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "seekevaluate.h" 00023 namespace Sleipnir { 00024 00025 bool CSeekPerformanceMeasure::SortRankVector( 00026 const vector<utype> &rank, 00027 const CSeekIntIntMap &mapG, vector<AResult> &a, 00028 //optional argument 00029 const utype top){ 00030 00031 utype numGenesD = mapG.GetNumSet(); 00032 utype TOP = 0; 00033 utype numNonZero = 0; 00034 utype i; 00035 bool DEBUG = false; 00036 00037 //a should be the same size as rank 00038 if(top==0) TOP = rank.size(); 00039 else TOP = top; 00040 00041 vector<utype>::const_iterator itRank = rank.begin(); 00042 vector<AResult>::iterator itA = a.begin(); 00043 for(i = 0; itRank!=rank.end(); itRank++, itA++, i++){ 00044 itA->i = i; 00045 itA->f = *itRank; 00046 if(*itRank>0) numNonZero++; 00047 } 00048 00049 if(numNonZero==0){ 00050 if(DEBUG) cerr << "This dataset is all zero!" << endl; 00051 return false; 00052 } 00053 00054 //printf("Top is %d", TOP); getchar(); 00055 if(TOP==rank.size()){ 00056 sort(a.begin(), a.end()); 00057 }else{ 00058 nth_element(a.begin(), a.begin()+TOP, a.end()); 00059 sort(a.begin(), a.begin()+TOP); 00060 } 00061 00062 return true; 00063 } 00064 00065 /* designed specifically for a CSeekDataset */ 00066 /* mask: the query genes which are not included in RBP calcualtion */ 00067 bool CSeekPerformanceMeasure::RankBiasedPrecision(const float &rate, 00068 const vector<utype> &rank, float &rbp, 00069 const vector<char> &mask, const vector<char> &gold, 00070 const CSeekIntIntMap &mapG, vector<AResult> *ar, 00071 /* optional arguments */ 00072 const utype top){ 00073 00074 utype i, ii, j, jj; 00075 float x; 00076 00077 utype TOP = top; 00078 if(top==0) TOP = rank.size(); 00079 00080 //ar should be same size as rank 00081 vector<AResult> *sing = ar; 00082 bool ret = 00083 CSeekPerformanceMeasure::SortRankVector(rank, mapG, *sing, top); 00084 00085 if(!ret){ 00086 rbp = -1; 00087 return false; 00088 } 00089 00090 x = 0; 00091 jj = 0; 00092 00093 AResult *aa; 00094 for(i=0; i<TOP; i++){ 00095 aa = &(*sing)[i]; 00096 if(aa->f==0) break; 00097 if(mask[aa->i]==1) continue; 00098 if(gold[aa->i]==1){ 00099 x+=pow(rate, jj); 00100 //fprintf(stderr, "Sorted %d %d %.5f\n", jj, aa->i, (aa->f-320)/100.0); 00101 } 00102 jj++; 00103 } 00104 x *= (1.0-rate); 00105 rbp = x; 00106 //fprintf(stderr, "%.3e\n", 0, rbp); 00107 00108 return true; 00109 } 00110 00111 bool CSeekPerformanceMeasure::AveragePrecision( 00112 const vector<utype> &rank, float &ap, 00113 const vector<char> &mask, const vector<char> &gold, 00114 const CSeekIntIntMap &mapG, vector<AResult> *ar){ 00115 00116 utype i, ii, j, jj; 00117 float x; 00118 00119 utype TOP = rank.size(); 00120 00121 //ar should be same size as rank 00122 vector<AResult> *sing = ar; 00123 bool ret = 00124 CSeekPerformanceMeasure::SortRankVector(rank, mapG, *sing, TOP); 00125 00126 if(!ret){ 00127 ap = -1; 00128 return false; 00129 } 00130 00131 jj = 0; 00132 00133 AResult *aa; 00134 int num = 0; 00135 float sum = 0; 00136 for(i=0; i<TOP; i++){ 00137 aa = &(*sing)[i]; 00138 if(aa->f==0) break; 00139 if(mask[aa->i]==1) continue; 00140 if(gold[aa->i]==1){ 00141 sum+=(float) (num+1) / (float) (jj+1); 00142 num++; 00143 } 00144 jj++; 00145 } 00146 if(num==0){ 00147 ap = 0; 00148 }else{ 00149 ap = sum / num; 00150 } 00151 return true; 00152 } 00153 00154 }