Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef SPARSEMATRIX_H 00023 #define SPARSEMATRIX_H 00024 00025 #include <map> 00026 00027 #include "sparsematrixi.h" 00028 00029 namespace Sleipnir { 00030 00031 template <typename tType> 00032 struct CPair { 00033 utype i; 00034 tType v; 00035 }; 00036 00037 template <typename tType> 00038 struct CAscendingKey{ 00039 bool operator()(const CPair<tType>& lx, const CPair<tType>& rx) const{ 00040 if(lx.i < rx.i) return true; 00041 if(lx.i > rx.i) return false; 00042 if(lx.v < rx.v) return true; 00043 return false; 00044 } 00045 }; 00046 00047 template <typename tType> 00048 struct CAscendingValue{ 00049 bool operator()(const CPair<tType> &lx, const CPair<tType> &rx) const{ 00050 if(lx.v < rx.v) return true; 00051 if(lx.v > rx.v) return false; 00052 if(lx.i < rx.i) return true; 00053 return false; 00054 } 00055 }; 00056 00057 template <typename tType> 00058 struct CDescendingKey{ 00059 bool operator()(const CPair<tType>& lx, const CPair<tType>& rx) const{ 00060 if(lx.i < rx.i) return false; 00061 if(lx.i > rx.i) return true; 00062 if(lx.v < rx.v) return false; 00063 return true; 00064 } 00065 }; 00066 00067 template <typename tType> 00068 struct CDescendingValue{ 00069 bool operator()(const CPair<tType> &lx, const CPair<tType> &rx) const{ 00070 if(lx.v < rx.v) return false; 00071 if(lx.v > rx.v) return true; 00072 if(lx.i < rx.i) return false; 00073 return true; 00074 } 00075 }; 00076 00077 00078 //A full matrix 00079 template<class tType> 00080 class CSparseFlatMatrix : protected CSparseMatrixImpl<tType> { 00081 public: 00082 CSparseFlatMatrix(const tType& Default): CSparseMatrixImpl<tType>(Default){} 00083 ~CSparseFlatMatrix(){ 00084 Reset(); 00085 } 00086 void Initialize(size_t iR){ 00087 Reset(); 00088 CSparseMatrixImpl<tType>::m_iR = iR; 00089 m_vecData.resize(iR); 00090 m_currentIndex.resize(iR); 00091 m_bSorted.resize(iR); 00092 } 00093 //num is initial capacity 00094 void InitializeRow(size_t rowID, size_t num){ 00095 m_vecData[rowID] = vector<CPair<tType> >(); 00096 m_vecData[rowID].reserve(num); 00097 m_currentIndex[rowID] = 0; 00098 m_bSorted[rowID] = false; 00099 } 00100 void Reset() { 00101 size_t i; 00102 for(i=0; i<CSparseMatrixImpl<tType>::m_iR; i++) 00103 m_vecData[i].clear(); 00104 m_vecData.clear(); 00105 CSparseMatrixImpl<tType>::m_iR = 0; 00106 } 00107 const tType& GetDefault() const { 00108 return CSparseMatrixImpl<tType>::GetDefault(); 00109 } 00110 size_t GetRows() const { 00111 return CSparseMatrixImpl<tType>::GetRows(); 00112 } 00113 //does not check if [iY][iX] already exists 00114 //nor if the row m_vecData[iY] is already full 00115 void Add(size_t iY, size_t iX, tType v){ 00116 CPair<tType> cp; 00117 cp.i = (utype) iX; 00118 cp.v = v; 00119 m_vecData[iY].push_back(cp); 00120 m_bSorted[iY] = false; 00121 m_currentIndex[iY]++; 00122 } 00123 const vector<CPair<tType> >& GetRow(size_t iY) const{ 00124 return m_vecData[iY]; 00125 } 00126 typename vector<CPair<tType> >::iterator RowBegin(size_t iY){ 00127 return m_vecData[iY].begin(); 00128 } 00129 typename vector<CPair<tType> >::iterator RowEnd(size_t iY){ 00130 return m_vecData[iY].end(); 00131 } 00132 void Shrink(){ 00133 size_t i; 00134 for(i=0; i<m_vecData.size(); i++) 00135 m_vecData[i].resize(m_currentIndex[i]); 00136 } 00137 void Organize(){ 00138 size_t i; 00139 for(i=0; i<m_vecData.size(); i++){ 00140 sort(m_vecData[i].begin(), m_vecData[i].end(), CAscendingKey<tType>()); 00141 m_bSorted[i] = true; 00142 } 00143 } 00144 bool Check(size_t iY, size_t iX){ 00145 if(!m_bSorted[iY]) 00146 SortRow(iY); 00147 size_t ind = GetIndex(iY, iX); 00148 if(ind!=(size_t) -1) 00149 return true; 00150 return false; 00151 } 00152 //assume element at this coordinate exists 00153 tType Get(size_t iY, size_t iX) const { 00154 if(!m_bSorted[iY]) 00155 SortRow(iY); 00156 size_t ind = GetIndex(iY, iX); 00157 return m_vecData[iY][ind].v; 00158 } 00159 //assume element at this coordinate exists 00160 void Set(size_t iY, size_t iX, tType v) { 00161 if(!m_bSorted[iY]) 00162 SortRow(iY); 00163 size_t ind = GetIndex(iY, iX); 00164 m_vecData[iY][ind].v = v; 00165 } 00166 void Increment(size_t iY, size_t iX, tType v){ 00167 if(!m_bSorted[iY]) 00168 SortRow(iY); 00169 size_t ind = GetIndex(iY, iX); 00170 m_vecData[iY][ind].v += v; 00171 } 00172 00173 private: 00174 vector<vector<CPair<tType> > > m_vecData; 00175 //parent class contains m_iR and m_Default 00176 vector<bool> m_bSorted; 00177 vector<size_t> m_currentIndex; 00178 00179 void SortRow(size_t iY){ 00180 sort(m_vecData[iY].begin(), m_vecData[iY].end(), CAscendingKey<tType>()); 00181 m_bSorted[iY] = true; 00182 } 00183 size_t GetIndex(size_t iY, size_t iX) { 00184 //suppose m_bSorted[iY] is true 00185 return BinarySearch(m_vecData[iY], iX); 00186 } 00187 size_t BinarySearch(vector<CPair<tType> > &A, size_t iX){ 00188 int iMax = A.size()-1; 00189 int iMin = 0; 00190 while(iMax>=iMin){ 00191 int iMid = (iMin + iMax) / 2; 00192 if(A[iMid].i < iX) 00193 iMin = iMid + 1; 00194 else if(A[iMid].i > iX) 00195 iMax = iMid - 1; 00196 else 00197 return (size_t) iMid; 00198 } 00199 //fprintf(stderr, "Element %d is not found!\n", iX); 00200 return (size_t) -1; 00201 } 00202 }; 00203 00204 //A half-matrix 00205 template<class tType> 00206 class CSparseFlatHalfMatrix : protected CSparseMatrixImpl<tType> { 00207 public: 00208 CSparseFlatHalfMatrix(const tType& Default): CSparseMatrixImpl<tType>(Default){} 00209 ~CSparseFlatHalfMatrix(){ 00210 Reset(); 00211 } 00212 void Copy(const CSparseFlatMatrix<tType> &cf){ //a full matrix (symmetric) 00213 CSparseMatrixImpl<tType>::m_Default = cf.GetDefault(); 00214 Initialize(cf.GetRows()); 00215 size_t i,j; 00216 for(i=0; i<CSparseMatrixImpl<tType>::m_iR; i++){ 00217 const vector<CPair<tType> > &allR = cf.GetRow(i); 00218 InitializeRow(i, allR.size()); 00219 for(j=0; j<allR.size(); j++) 00220 if(allR[j].i > i) 00221 Add(i, allR[j].i, allR[j].v); 00222 } 00223 Organize(); 00224 } 00225 void Initialize(size_t iR){ 00226 Reset(); 00227 CSparseMatrixImpl<tType>::m_iR = iR; 00228 m_vecData.resize(iR); 00229 m_currentIndex.resize(iR); 00230 m_bSorted.resize(iR); 00231 } 00232 //num is initial capacity 00233 void InitializeRow(size_t rowID, size_t num){ 00234 m_vecData[rowID] = vector<CPair<tType> >(); 00235 m_vecData[rowID].reserve(num); 00236 m_currentIndex[rowID] = 0; 00237 m_bSorted[rowID] = false; 00238 } 00239 void Reset() { 00240 size_t i; 00241 for(i=0; i<CSparseMatrixImpl<tType>::m_iR; i++) 00242 m_vecData[i].clear(); 00243 m_vecData.clear(); 00244 CSparseMatrixImpl<tType>::m_iR = 0; 00245 } 00246 const tType& GetDefault() const { 00247 return CSparseMatrixImpl<tType>::GetDefault(); 00248 } 00249 size_t GetRows() const { 00250 return CSparseMatrixImpl<tType>::GetRows(); 00251 } 00252 //does not check if [iY][iX] already exists 00253 //nor if the row m_vecData[iY] is already full 00254 void Add(size_t iY, size_t iX, tType v){ 00255 AdjustCoord(iY, iX); 00256 CPair<tType> cp; 00257 cp.i = (utype) iX; 00258 cp.v = v; 00259 m_vecData[iY].push_back(cp); 00260 m_bSorted[iY] = false; 00261 m_currentIndex[iY]++; 00262 } 00263 const vector<CPair<tType> >& GetRow(size_t iY) const{ 00264 return m_vecData[iY]; 00265 } 00266 typename vector<CPair<tType> >::iterator RowBegin(size_t iY){ 00267 return m_vecData[iY].begin(); 00268 } 00269 typename vector<CPair<tType> >::iterator RowEnd(size_t iY){ 00270 return m_vecData[iY].end(); 00271 } 00272 void Shrink(){ 00273 size_t i; 00274 for(i=0; i<m_vecData.size(); i++) 00275 m_vecData[i].resize(m_currentIndex[i]); 00276 } 00277 void SortRow(size_t iY){ 00278 sort(m_vecData[iY].begin(), m_vecData[iY].end(), CAscendingKey<tType>()); 00279 m_bSorted[iY] = true; 00280 } 00281 void Organize(){ 00282 size_t i; 00283 for(i=0; i<m_vecData.size(); i++) 00284 SortRow(i); 00285 } 00286 void AdjustCoord(size_t &iY, size_t &iX){ 00287 if(iY>=iX){ //second must be the greater 00288 size_t tmp = iY; 00289 iY = iX; 00290 iX = tmp; 00291 } 00292 } 00293 bool Check(size_t iY, size_t iX){ 00294 AdjustCoord(iY, iX); 00295 if(!m_bSorted[iY]) SortRow(iY); 00296 size_t ind = GetIndex(iY, iX); 00297 if(ind!=(size_t) -1) return true; 00298 return false; 00299 } 00300 CPair<tType>* GetElement(size_t iY, size_t iX){ 00301 AdjustCoord(iY, iX); 00302 if(!m_bSorted[iY]) SortRow(iY); 00303 size_t ind = GetIndex(iY, iX); 00304 if(ind==(size_t)-1) return NULL; 00305 return &m_vecData[iY][ind]; 00306 } 00307 //assume element at this coordinate exists 00308 tType Get(size_t iY, size_t iX) const { 00309 AdjustCoord(iY, iX); 00310 if(!m_bSorted[iY]) SortRow(iY); 00311 size_t ind = GetIndex(iY, iX); 00312 return m_vecData[iY][ind].v; 00313 } 00314 //assume element at this coordinate exists 00315 void Set(size_t iY, size_t iX, tType v) { 00316 AdjustCoord(iY, iX); 00317 if(!m_bSorted[iY]) SortRow(iY); 00318 size_t ind = GetIndex(iY, iX); 00319 m_vecData[iY][ind].v = v; 00320 } 00321 void Increment(size_t iY, size_t iX, tType v){ 00322 AdjustCoord(iY, iX); 00323 if(!m_bSorted[iY]) SortRow(iY); 00324 size_t ind = GetIndex(iY, iX); 00325 m_vecData[iY][ind].v += v; 00326 } 00327 00328 private: 00329 //parent class contains m_iR and m_Default 00330 vector<vector<CPair<tType> > > m_vecData; 00331 vector<bool> m_bSorted; 00332 vector<size_t> m_currentIndex; 00333 00334 size_t GetIndex(size_t iY, size_t iX) { 00335 return BinarySearch(m_vecData[iY], iX); //suppose m_bSorted[iY] = true 00336 } 00337 size_t BinarySearch(vector<CPair<tType> > &A, size_t iX){ 00338 int iMax = A.size()-1; 00339 int iMin = 0; 00340 while(iMax>=iMin){ 00341 int iMid = (iMin + iMax) / 2; 00342 if(A[iMid].i < iX) 00343 iMin = iMid + 1; 00344 else if(A[iMid].i > iX) 00345 iMax = iMid - 1; 00346 else 00347 return (size_t) iMid; 00348 } 00349 return (size_t) -1; 00350 } 00351 }; 00365 template<class tType> 00366 class CSparseMapMatrix : protected CSparseMatrixImpl<tType> { 00367 public: 00375 CSparseMapMatrix( const tType& Default ) : CSparseMatrixImpl<tType>( Default ), m_amapData(NULL) { } 00376 00377 ~CSparseMapMatrix( ) { 00378 00379 Reset( ); } 00380 00385 void Reset( ) { 00386 00387 if( m_amapData ) 00388 delete[] m_amapData; 00389 CSparseMatrixImpl<tType>::m_iR = 0; } 00390 00401 void Initialize( size_t iR ) { 00402 00403 Reset( ); 00404 m_amapData = new std::map<size_t,tType>[ CSparseMatrixImpl<tType>::m_iR = iR ]; } 00405 00413 size_t GetRows( ) const { 00414 00415 return CSparseMatrixImpl<tType>::GetRows( ); } 00416 00437 tType Get( size_t iY, size_t iX ) const { 00438 typename std::map<size_t,tType>::const_iterator iter; 00439 00440 return ( ( ( iter = m_amapData[ iY ].find( iX ) ) == m_amapData[ iY ].end( ) ) ? 00441 CSparseMatrixImpl<tType>::m_Default : iter->second ); } 00442 00462 void Set( size_t iY, size_t iX, const tType& Value ) { 00463 00464 m_amapData[ iY ][ iX ] = Value; } 00465 00473 const tType& GetDefault( ) const { 00474 00475 return CSparseMatrixImpl<tType>::GetDefault( ); } 00476 00477 private: 00478 std::map<size_t,tType>* m_amapData; 00479 }; 00480 00495 template<class tType> 00496 class CSparseListMatrix : protected CSparseMatrixImpl<tType> { 00497 private: 00498 struct SNode { 00499 size_t m_iX; 00500 tType m_Value; 00501 SNode* m_pNext; 00502 00503 SNode( size_t iX, tType Value, SNode* pNext ) : m_iX(iX), m_Value(Value), m_pNext(pNext) { } 00504 }; 00505 00506 public: 00514 CSparseListMatrix( const tType& Default ) : CSparseMatrixImpl<tType>( Default ), m_apsData(NULL) { } 00515 00516 ~CSparseListMatrix( ) { 00517 00518 Reset( ); } 00519 00524 void Reset( ) { 00525 size_t i; 00526 SNode* pNode; 00527 SNode* pNext; 00528 00529 if( m_apsData ) { 00530 for( i = 0; i < CSparseMatrixImpl<tType>::m_iR; ++i ) 00531 for( pNode = m_apsData[ i ]; pNode; pNode = pNext ) { 00532 pNext = pNode->m_pNext; 00533 delete pNode; } 00534 delete[] m_apsData; } 00535 CSparseMatrixImpl<tType>::m_iR = 0; } 00536 00547 void Initialize( size_t iR ) { 00548 00549 Reset( ); 00550 m_apsData = new SNode*[ CSparseMatrixImpl<tType>::m_iR = iR ]; 00551 memset( m_apsData, 0, CSparseMatrixImpl<tType>::m_iR * sizeof(*m_apsData) ); } 00552 00573 tType Get( size_t iY, size_t iX ) const { 00574 SNode* pNode; 00575 00576 for( pNode = m_apsData[ iY ]; pNode && ( pNode->m_iX >= iX ); pNode = pNode->m_pNext ) 00577 if( pNode->m_iX == iX ) 00578 return pNode->m_Value; 00579 00580 return CSparseMatrixImpl<tType>::m_Default; } 00581 00601 void Set( size_t iY, size_t iX, const tType& Value ) { 00602 SNode* pNode; 00603 00604 if( !( pNode = m_apsData[ iY ] ) || ( iX > pNode->m_iX ) ) { 00605 m_apsData[ iY ] = new SNode( iX, Value, pNode ); 00606 return; } 00607 for( ; pNode->m_pNext; pNode = pNode->m_pNext ) { 00608 if( pNode->m_iX == iX ) { 00609 pNode->m_Value = Value; 00610 return; } 00611 if( iX > pNode->m_pNext->m_iX ) { 00612 pNode->m_pNext = new SNode( iX, Value, pNode->m_pNext ); 00613 return; } } 00614 if( pNode->m_iX == iX ) 00615 pNode->m_Value = Value; 00616 else 00617 pNode->m_pNext = new SNode( iX, Value, NULL ); } 00618 00626 const tType& GetDefault( ) const { 00627 00628 return CSparseMatrixImpl<tType>::GetDefault( ); } 00629 00637 size_t GetRows( ) const { 00638 00639 return CSparseMatrixImpl<tType>::GetRows( ); } 00640 00641 private: 00642 SNode** m_apsData; 00643 }; 00644 00645 } 00646 00647 #endif // SPARSEMATRIX_H