Sleipnir
src/sparsematrix.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef SPARSEMATRIX_H
00023 #define SPARSEMATRIX_H
00024 
00025 #include <map>
00026 
00027 #include "sparsematrixi.h"
00028 
00029 namespace Sleipnir {
00030 
00031 template <typename tType>
00032 struct CPair {
00033     utype i;
00034     tType v;
00035 };
00036 
00037 template <typename tType>
00038 struct CAscendingKey{
00039     bool operator()(const CPair<tType>& lx, const CPair<tType>& rx) const{
00040         if(lx.i < rx.i) return true;
00041         if(lx.i > rx.i) return false;
00042         if(lx.v < rx.v) return true;
00043         return false;
00044     }
00045 };
00046 
00047 template <typename tType>
00048 struct CAscendingValue{
00049     bool operator()(const CPair<tType> &lx, const CPair<tType> &rx) const{
00050         if(lx.v < rx.v) return true;
00051         if(lx.v > rx.v) return false;
00052         if(lx.i < rx.i) return true;
00053         return false;
00054     }
00055 };
00056 
00057 template <typename tType>
00058 struct CDescendingKey{
00059     bool operator()(const CPair<tType>& lx, const CPair<tType>& rx) const{
00060         if(lx.i < rx.i) return false;
00061         if(lx.i > rx.i) return true;
00062         if(lx.v < rx.v) return false;
00063         return true;
00064     }
00065 };
00066 
00067 template <typename tType>
00068 struct CDescendingValue{
00069     bool operator()(const CPair<tType> &lx, const CPair<tType> &rx) const{
00070         if(lx.v < rx.v) return false;
00071         if(lx.v > rx.v) return true;
00072         if(lx.i < rx.i) return false;
00073         return true;
00074     }
00075 };
00076 
00077 
00078 //A full matrix
00079 template<class tType>
00080 class CSparseFlatMatrix : protected CSparseMatrixImpl<tType> {
00081 public:
00082     CSparseFlatMatrix(const tType& Default): CSparseMatrixImpl<tType>(Default){}
00083     ~CSparseFlatMatrix(){
00084         Reset();
00085     }
00086     void Initialize(size_t iR){
00087         Reset();
00088         CSparseMatrixImpl<tType>::m_iR = iR;
00089         m_vecData.resize(iR);
00090         m_currentIndex.resize(iR);
00091         m_bSorted.resize(iR);
00092     }
00093     //num is initial capacity
00094     void InitializeRow(size_t rowID, size_t num){
00095         m_vecData[rowID] = vector<CPair<tType> >();
00096         m_vecData[rowID].reserve(num);
00097         m_currentIndex[rowID] = 0;
00098         m_bSorted[rowID] = false;
00099     }
00100     void Reset() {
00101         size_t i;
00102         for(i=0; i<CSparseMatrixImpl<tType>::m_iR; i++)
00103             m_vecData[i].clear();
00104         m_vecData.clear();
00105         CSparseMatrixImpl<tType>::m_iR = 0; 
00106     }
00107     const tType& GetDefault() const {
00108         return CSparseMatrixImpl<tType>::GetDefault();
00109     }
00110     size_t GetRows() const {
00111         return CSparseMatrixImpl<tType>::GetRows(); 
00112     }
00113     //does not check if [iY][iX] already exists
00114     //nor if the row m_vecData[iY] is already full
00115     void Add(size_t iY, size_t iX, tType v){
00116         CPair<tType> cp;
00117         cp.i = (utype) iX;
00118         cp.v = v;
00119         m_vecData[iY].push_back(cp);
00120         m_bSorted[iY] = false;
00121         m_currentIndex[iY]++;
00122     }
00123     const vector<CPair<tType> >& GetRow(size_t iY) const{
00124         return m_vecData[iY];
00125     }
00126     typename vector<CPair<tType> >::iterator RowBegin(size_t iY){
00127         return m_vecData[iY].begin();
00128     }
00129     typename vector<CPair<tType> >::iterator RowEnd(size_t iY){
00130         return m_vecData[iY].end();
00131     }
00132     void Shrink(){
00133         size_t i;
00134         for(i=0; i<m_vecData.size(); i++)
00135             m_vecData[i].resize(m_currentIndex[i]);
00136     }
00137     void Organize(){
00138         size_t i;
00139         for(i=0; i<m_vecData.size(); i++){
00140             sort(m_vecData[i].begin(), m_vecData[i].end(), CAscendingKey<tType>()); 
00141             m_bSorted[i] = true;
00142         }
00143     }
00144     bool Check(size_t iY, size_t iX){
00145         if(!m_bSorted[iY])
00146             SortRow(iY);
00147         size_t ind = GetIndex(iY, iX);
00148         if(ind!=(size_t) -1) 
00149             return true;
00150         return false;
00151     }
00152     //assume element at this coordinate exists
00153     tType Get(size_t iY, size_t iX) const {
00154         if(!m_bSorted[iY])
00155             SortRow(iY);
00156         size_t ind = GetIndex(iY, iX);
00157         return m_vecData[iY][ind].v;
00158     }
00159     //assume element at this coordinate exists
00160     void Set(size_t iY, size_t iX, tType v) {
00161         if(!m_bSorted[iY])
00162             SortRow(iY);
00163         size_t ind = GetIndex(iY, iX);
00164         m_vecData[iY][ind].v = v;
00165     }
00166     void Increment(size_t iY, size_t iX, tType v){
00167         if(!m_bSorted[iY])
00168             SortRow(iY);
00169         size_t ind = GetIndex(iY, iX);
00170         m_vecData[iY][ind].v += v;
00171     }
00172 
00173 private:
00174     vector<vector<CPair<tType> > > m_vecData;
00175     //parent class contains m_iR and m_Default
00176     vector<bool> m_bSorted;
00177     vector<size_t> m_currentIndex;
00178 
00179     void SortRow(size_t iY){
00180         sort(m_vecData[iY].begin(), m_vecData[iY].end(), CAscendingKey<tType>());
00181         m_bSorted[iY] = true;
00182     }
00183     size_t GetIndex(size_t iY, size_t iX) {
00184         //suppose m_bSorted[iY] is true
00185         return BinarySearch(m_vecData[iY], iX);
00186     }
00187     size_t BinarySearch(vector<CPair<tType> > &A, size_t iX){
00188         int iMax = A.size()-1;
00189         int iMin = 0;
00190         while(iMax>=iMin){
00191             int iMid = (iMin + iMax) / 2;
00192             if(A[iMid].i < iX)
00193                 iMin = iMid + 1;
00194             else if(A[iMid].i > iX)
00195                 iMax = iMid - 1;
00196             else
00197                 return (size_t) iMid;
00198         }
00199         //fprintf(stderr, "Element %d is not found!\n", iX);
00200         return (size_t) -1;
00201     }
00202 };
00203 
00204 //A half-matrix
00205 template<class tType>
00206 class CSparseFlatHalfMatrix : protected CSparseMatrixImpl<tType> {
00207 public:
00208     CSparseFlatHalfMatrix(const tType& Default): CSparseMatrixImpl<tType>(Default){}
00209     ~CSparseFlatHalfMatrix(){
00210         Reset();
00211     }
00212     void Copy(const CSparseFlatMatrix<tType> &cf){ //a full matrix (symmetric)
00213         CSparseMatrixImpl<tType>::m_Default = cf.GetDefault();
00214         Initialize(cf.GetRows());
00215         size_t i,j;
00216         for(i=0; i<CSparseMatrixImpl<tType>::m_iR; i++){
00217             const vector<CPair<tType> > &allR = cf.GetRow(i);
00218             InitializeRow(i, allR.size());
00219             for(j=0; j<allR.size(); j++)
00220                 if(allR[j].i > i)
00221                     Add(i, allR[j].i, allR[j].v);
00222         }
00223         Organize();
00224     }
00225     void Initialize(size_t iR){
00226         Reset();
00227         CSparseMatrixImpl<tType>::m_iR = iR;
00228         m_vecData.resize(iR);
00229         m_currentIndex.resize(iR);
00230         m_bSorted.resize(iR);
00231     }
00232     //num is initial capacity
00233     void InitializeRow(size_t rowID, size_t num){
00234         m_vecData[rowID] = vector<CPair<tType> >();
00235         m_vecData[rowID].reserve(num);
00236         m_currentIndex[rowID] = 0;
00237         m_bSorted[rowID] = false;
00238     }
00239     void Reset() {
00240         size_t i;
00241         for(i=0; i<CSparseMatrixImpl<tType>::m_iR; i++)
00242             m_vecData[i].clear();
00243         m_vecData.clear();
00244         CSparseMatrixImpl<tType>::m_iR = 0; 
00245     }
00246     const tType& GetDefault() const {
00247         return CSparseMatrixImpl<tType>::GetDefault();
00248     }
00249     size_t GetRows() const {
00250         return CSparseMatrixImpl<tType>::GetRows(); 
00251     }
00252     //does not check if [iY][iX] already exists
00253     //nor if the row m_vecData[iY] is already full
00254     void Add(size_t iY, size_t iX, tType v){
00255         AdjustCoord(iY, iX);
00256         CPair<tType> cp;
00257         cp.i = (utype) iX;
00258         cp.v = v;
00259         m_vecData[iY].push_back(cp);
00260         m_bSorted[iY] = false;
00261         m_currentIndex[iY]++;
00262     }
00263     const vector<CPair<tType> >& GetRow(size_t iY) const{
00264         return m_vecData[iY];
00265     }
00266     typename vector<CPair<tType> >::iterator RowBegin(size_t iY){
00267         return m_vecData[iY].begin();
00268     }
00269     typename vector<CPair<tType> >::iterator RowEnd(size_t iY){
00270         return m_vecData[iY].end();
00271     }
00272     void Shrink(){
00273         size_t i;
00274         for(i=0; i<m_vecData.size(); i++)
00275             m_vecData[i].resize(m_currentIndex[i]);
00276     }
00277     void SortRow(size_t iY){
00278         sort(m_vecData[iY].begin(), m_vecData[iY].end(), CAscendingKey<tType>());
00279         m_bSorted[iY] = true;
00280     }
00281     void Organize(){
00282         size_t i;
00283         for(i=0; i<m_vecData.size(); i++)
00284             SortRow(i);
00285     }
00286     void AdjustCoord(size_t &iY, size_t &iX){
00287         if(iY>=iX){ //second must be the greater
00288             size_t tmp = iY;
00289             iY = iX;
00290             iX = tmp;
00291         }
00292     }
00293     bool Check(size_t iY, size_t iX){
00294         AdjustCoord(iY, iX);
00295         if(!m_bSorted[iY]) SortRow(iY);
00296         size_t ind = GetIndex(iY, iX);
00297         if(ind!=(size_t) -1) return true;
00298         return false;
00299     }
00300     CPair<tType>* GetElement(size_t iY, size_t iX){
00301         AdjustCoord(iY, iX);
00302         if(!m_bSorted[iY]) SortRow(iY);
00303         size_t ind = GetIndex(iY, iX);
00304         if(ind==(size_t)-1) return NULL;
00305         return &m_vecData[iY][ind];
00306     }
00307     //assume element at this coordinate exists
00308     tType Get(size_t iY, size_t iX) const {
00309         AdjustCoord(iY, iX);
00310         if(!m_bSorted[iY]) SortRow(iY);
00311         size_t ind = GetIndex(iY, iX);
00312         return m_vecData[iY][ind].v;
00313     }
00314     //assume element at this coordinate exists
00315     void Set(size_t iY, size_t iX, tType v) {
00316         AdjustCoord(iY, iX);
00317         if(!m_bSorted[iY]) SortRow(iY);
00318         size_t ind = GetIndex(iY, iX);
00319         m_vecData[iY][ind].v = v;
00320     }
00321     void Increment(size_t iY, size_t iX, tType v){
00322         AdjustCoord(iY, iX);
00323         if(!m_bSorted[iY]) SortRow(iY);
00324         size_t ind = GetIndex(iY, iX);
00325         m_vecData[iY][ind].v += v;
00326     }
00327 
00328 private:
00329     //parent class contains m_iR and m_Default
00330     vector<vector<CPair<tType> > > m_vecData;
00331     vector<bool> m_bSorted;
00332     vector<size_t> m_currentIndex;
00333 
00334     size_t GetIndex(size_t iY, size_t iX) {
00335         return BinarySearch(m_vecData[iY], iX); //suppose m_bSorted[iY] = true
00336     }
00337     size_t BinarySearch(vector<CPair<tType> > &A, size_t iX){
00338         int iMax = A.size()-1;
00339         int iMin = 0;
00340         while(iMax>=iMin){
00341             int iMid = (iMin + iMax) / 2;
00342             if(A[iMid].i < iX)
00343                 iMin = iMid + 1;
00344             else if(A[iMid].i > iX)
00345                 iMax = iMid - 1;
00346             else
00347                 return (size_t) iMid;
00348         }
00349         return (size_t) -1;
00350     }
00351 };
00365 template<class tType>
00366 class CSparseMapMatrix : protected CSparseMatrixImpl<tType> {
00367 public:
00375     CSparseMapMatrix( const tType& Default ) : CSparseMatrixImpl<tType>( Default ), m_amapData(NULL) { }
00376 
00377     ~CSparseMapMatrix( ) {
00378 
00379         Reset( ); }
00380 
00385     void Reset( ) {
00386 
00387         if( m_amapData )
00388             delete[] m_amapData;
00389         CSparseMatrixImpl<tType>::m_iR = 0; }
00390 
00401     void Initialize( size_t iR ) {
00402 
00403         Reset( );
00404         m_amapData = new std::map<size_t,tType>[ CSparseMatrixImpl<tType>::m_iR = iR ]; }
00405 
00413     size_t GetRows( ) const {
00414 
00415         return CSparseMatrixImpl<tType>::GetRows( ); }
00416 
00437     tType Get( size_t iY, size_t iX ) const {
00438         typename std::map<size_t,tType>::const_iterator iter;
00439 
00440         return ( ( ( iter = m_amapData[ iY ].find( iX ) ) == m_amapData[ iY ].end( ) ) ?
00441             CSparseMatrixImpl<tType>::m_Default : iter->second ); }
00442 
00462     void Set( size_t iY, size_t iX, const tType& Value ) {
00463 
00464         m_amapData[ iY ][ iX ] = Value; }
00465 
00473     const tType& GetDefault( ) const {
00474 
00475         return CSparseMatrixImpl<tType>::GetDefault( ); }
00476 
00477 private:
00478     std::map<size_t,tType>* m_amapData;
00479 };
00480 
00495 template<class tType>
00496 class CSparseListMatrix : protected CSparseMatrixImpl<tType> {
00497 private:
00498     struct SNode {
00499         size_t  m_iX;
00500         tType   m_Value;
00501         SNode*  m_pNext;
00502 
00503         SNode( size_t iX, tType Value, SNode* pNext ) : m_iX(iX), m_Value(Value), m_pNext(pNext) { }
00504     };
00505 
00506 public:
00514     CSparseListMatrix( const tType& Default ) : CSparseMatrixImpl<tType>( Default ), m_apsData(NULL) { }
00515 
00516     ~CSparseListMatrix( ) {
00517 
00518         Reset( ); }
00519 
00524     void Reset( ) {
00525         size_t  i;
00526         SNode*  pNode;
00527         SNode*  pNext;
00528 
00529         if( m_apsData ) {
00530             for( i = 0; i < CSparseMatrixImpl<tType>::m_iR; ++i )
00531                 for( pNode = m_apsData[ i ]; pNode; pNode = pNext ) {
00532                     pNext = pNode->m_pNext;
00533                     delete pNode; }
00534             delete[] m_apsData; }
00535         CSparseMatrixImpl<tType>::m_iR = 0; }
00536 
00547     void Initialize( size_t iR ) {
00548 
00549         Reset( );
00550         m_apsData = new SNode*[ CSparseMatrixImpl<tType>::m_iR = iR ];
00551         memset( m_apsData, 0, CSparseMatrixImpl<tType>::m_iR * sizeof(*m_apsData) ); }
00552 
00573     tType Get( size_t iY, size_t iX ) const {
00574         SNode*  pNode;
00575 
00576         for( pNode = m_apsData[ iY ]; pNode && ( pNode->m_iX >= iX ); pNode = pNode->m_pNext )
00577             if( pNode->m_iX == iX )
00578                 return pNode->m_Value;
00579 
00580         return CSparseMatrixImpl<tType>::m_Default; }
00581 
00601     void Set( size_t iY, size_t iX, const tType& Value ) {
00602         SNode*  pNode;
00603 
00604         if( !( pNode = m_apsData[ iY ] ) || ( iX > pNode->m_iX ) ) {
00605             m_apsData[ iY ] = new SNode( iX, Value, pNode );
00606             return; }
00607         for( ; pNode->m_pNext; pNode = pNode->m_pNext ) {
00608             if( pNode->m_iX == iX ) {
00609                 pNode->m_Value = Value;
00610                 return; }
00611             if( iX > pNode->m_pNext->m_iX ) {
00612                 pNode->m_pNext = new SNode( iX, Value, pNode->m_pNext );
00613                 return; } }
00614         if( pNode->m_iX == iX )
00615             pNode->m_Value = Value;
00616         else
00617             pNode->m_pNext = new SNode( iX, Value, NULL ); }
00618 
00626     const tType& GetDefault( ) const {
00627 
00628         return CSparseMatrixImpl<tType>::GetDefault( ); }
00629 
00637     size_t GetRows( ) const {
00638 
00639         return CSparseMatrixImpl<tType>::GetRows( ); }
00640 
00641 private:
00642     SNode** m_apsData;
00643 };
00644 
00645 }
00646 
00647 #endif // SPARSEMATRIX_H