Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "clustpivot.h" 00024 #include "meta.h" 00025 00026 namespace Sleipnir { 00027 00056 uint16_t CClustPivot::Cluster( const CDistanceMatrix& MatSimilarities, float dCutoff, 00057 vector<uint16_t>& vecsClusters ) { 00058 size_t i, j, iRand, iTmp, iPivot; 00059 uint16_t sRet; 00060 vector<size_t> veciPerm; 00061 float d; 00062 00063 vecsClusters.resize( MatSimilarities.GetSize( ) ); 00064 veciPerm.resize( MatSimilarities.GetSize( ) ); 00065 // Pick a random permutation of the genes 00066 for( i = 0; i < veciPerm.size( ); ++i ) 00067 veciPerm[ i ] = i; 00068 for( i = 0; i < MatSimilarities.GetSize( ); ++i ) { 00069 iRand = rand( ) % ( veciPerm.size( ) - i ); 00070 iTmp = veciPerm[ i ]; 00071 veciPerm[ i ] = veciPerm[ i + iRand ]; 00072 veciPerm[ i + iRand ] = iTmp; } 00073 00074 // reset the cluster data 00075 for( i = 0; i < vecsClusters.size( ); ++i ) 00076 vecsClusters[ i ] = -1; 00077 00078 for( sRet = i = 0; i < MatSimilarities.GetSize( ); ++i ) { 00079 iPivot = veciPerm[ i ]; 00080 // If gene was already clustered (or excluded), continue 00081 if( vecsClusters[ iPivot ] != (uint16_t)-1 ) 00082 continue; 00083 00084 vecsClusters[ iPivot ] = sRet++; 00085 for( j = 0; j < MatSimilarities.GetSize( ); ++j ) { 00086 // check if already clustered (or thrown away) 00087 if( vecsClusters[ j ] != (uint16_t)-1 ) 00088 continue; 00089 00090 if( !CMeta::IsNaN( d = MatSimilarities.Get( iPivot, j ) ) && ( d > dCutoff ) ) 00091 vecsClusters[ j ] = vecsClusters[ iPivot ]; } } 00092 00093 return sRet; } 00094 00095 }