Sleipnir
src/clustpivot.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "clustpivot.h"
00024 #include "meta.h"
00025 
00026 namespace Sleipnir {
00027 
00056 uint16_t CClustPivot::Cluster( const CDistanceMatrix& MatSimilarities, float dCutoff,
00057     vector<uint16_t>& vecsClusters ) {
00058     size_t          i, j, iRand, iTmp, iPivot;
00059     uint16_t        sRet;
00060     vector<size_t>  veciPerm;
00061     float           d;
00062 
00063     vecsClusters.resize( MatSimilarities.GetSize( ) );
00064     veciPerm.resize( MatSimilarities.GetSize( ) );
00065     // Pick a random permutation of the genes
00066     for( i = 0; i < veciPerm.size( ); ++i )
00067         veciPerm[ i ] = i;
00068     for( i = 0; i < MatSimilarities.GetSize( ); ++i ) {
00069         iRand = rand( ) % ( veciPerm.size( ) - i );
00070         iTmp = veciPerm[ i ];
00071         veciPerm[ i ] = veciPerm[ i + iRand ];
00072         veciPerm[ i + iRand ] = iTmp; }
00073 
00074     // reset the cluster data
00075     for( i = 0; i < vecsClusters.size( ); ++i )
00076         vecsClusters[ i ] = -1;
00077 
00078     for( sRet = i = 0; i < MatSimilarities.GetSize( ); ++i ) {
00079         iPivot = veciPerm[ i ];
00080         // If gene was already clustered (or excluded), continue
00081         if( vecsClusters[ iPivot ] != (uint16_t)-1 )
00082             continue;
00083 
00084         vecsClusters[ iPivot ] = sRet++;
00085         for( j = 0; j < MatSimilarities.GetSize( ); ++j ) {
00086             // check if already clustered (or thrown away)
00087             if( vecsClusters[ j ] != (uint16_t)-1 )
00088                 continue;
00089 
00090         if( !CMeta::IsNaN( d = MatSimilarities.Get( iPivot, j ) ) && ( d > dCutoff ) )
00091             vecsClusters[ j ] = vecsClusters[ iPivot ]; } }
00092 
00093     return sRet; }
00094 
00095 }