Sleipnir
src/coalescestructsi.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef COALESCESTRUCTSI_H
00023 #define COALESCESTRUCTSI_H
00024 
00025 #ifdef _MSC_VER
00026 #include <hash_map>
00027 #else // _MSC_VER
00028 #include <ext/hash_map>
00029 
00030 #define hash_value  hash<const char*>( )
00031 #define stdext      __gnu_cxx
00032 #endif // _MSC_VER
00033 
00034 #include "coalescebasei.h"
00035 #include "fastai.h"
00036 #include "fullmatrix.h"
00037 
00038 namespace Sleipnir {
00039 
00040 class CCoalesceMotifLibrary;
00041 class CFASTA;
00042 class CHierarchy;
00043 class CPCL;
00044 
00045 struct SCoalesceModifiers {
00046 
00047     void Initialize( const CPCL& );
00048 
00049     bool Add( const CFASTA* pWiggle ) {
00050 
00051         if( !pWiggle )
00052             return false;
00053         m_vecpWiggles.push_back( pWiggle );
00054         return true; }
00055 
00056     std::vector<const CFASTA*>          m_vecpWiggles;
00057     std::vector<std::vector<size_t> >   m_vecveciPCL2Wiggles;
00058 };
00059 
00060 struct SCoalesceModifierCache {
00061     SCoalesceModifierCache( const SCoalesceModifiers& Modifiers ) : m_Modifiers(Modifiers) { }
00062 
00063     void Get( size_t );
00064     void InitializeWeight( size_t, size_t );
00065     void AddWeight( size_t, size_t, size_t );
00066     void SetType( const std::string& );
00067 
00068     float GetWeight( size_t iK ) const {
00069         size_t  i, iWiggles;
00070 
00071         for( iWiggles = i = 0; i < m_veciWiggleTypes.size( ); ++i )
00072             if( m_veciWiggleTypes[ i ] != -1 )
00073                 iWiggles++;
00074 
00075         return ( iWiggles ? ( m_dWeight / iWiggles ) : iK ); }
00076 
00077     const SCoalesceModifiers&               m_Modifiers;
00078     std::vector<std::vector<SFASTAWiggle> > m_vecvecsWiggles;
00079     std::vector<size_t>                     m_veciWiggleTypes;
00080     float                                   m_dWeight;
00081 };
00082 
00083 struct SMotifMatch {
00084     enum EType {
00085         ETypePValue,
00086         ETypeRMSE,
00087         ETypeJensenShannon
00088     };
00089 
00090     SMotifMatch( ) { }
00091 
00092     SMotifMatch( uint32_t iMotif, const std::string& strType,
00093         CCoalesceSequencerBase::ESubsequence eSubsequence, float dZ, float dAverage ) : m_iMotif(iMotif),
00094         m_strType(strType), m_eSubsequence(eSubsequence), m_dZ(dZ), m_dAverage(dAverage) { }
00095 
00096     bool Open( std::istream&, CCoalesceMotifLibrary& );
00097     uint32_t Open( const CHierarchy&, const std::vector<SMotifMatch>&, CCoalesceMotifLibrary&, size_t& );
00098     uint32_t Open( const SMotifMatch&, const SMotifMatch&, CCoalesceMotifLibrary& );
00099     std::string Save( const CCoalesceMotifLibrary*, bool = false, float = 0, float = 0, float = 0,
00100         bool = false ) const;
00101     bool Label( const CCoalesceMotifLibrary&, EType, float, float, float );
00102 
00103     bool operator==( const SMotifMatch& sMotif ) const {
00104 
00105         return ( ( m_iMotif == sMotif.m_iMotif ) && ( m_strType == sMotif.m_strType ) &&
00106             ( m_eSubsequence == sMotif.m_eSubsequence ) ); }
00107 
00108     bool operator!=( const SMotifMatch& sMotif ) const {
00109 
00110         return !( *this == sMotif ); }
00111 
00112     bool operator<( const SMotifMatch& sMotif ) const {
00113 
00114         if( m_iMotif == sMotif.m_iMotif ) {
00115             if( m_strType == sMotif.m_strType )
00116                 return ( m_eSubsequence < sMotif.m_eSubsequence );
00117             return ( m_strType < sMotif.m_strType ); }
00118 
00119         return ( m_iMotif < sMotif.m_iMotif ); }
00120 
00121     size_t GetHash( ) const {
00122         size_t  iMotif, iType, iSubsequence;
00123 
00124         iMotif = m_iMotif * ( (size_t)-1 / 20000 );
00125         iType = stdext::hash_value( m_strType.c_str( ) ) * ( (size_t)-1 / 5 );
00126         iSubsequence = m_eSubsequence * ( (size_t)-1 / CCoalesceSequencerBase::ESubsequenceEnd );
00127 
00128         return ( iMotif ^ iType ^ iSubsequence ); }
00129 
00130     uint32_t                                    m_iMotif;
00131     std::string                                 m_strType;
00132     CCoalesceSequencerBase::ESubsequence        m_eSubsequence;
00133     float                                       m_dZ;
00134     float                                       m_dAverage;
00135     std::vector<std::pair<std::string, float> > m_vecprstrdKnown;
00136 };
00137 
00138 struct SCoalesceDataset {
00139     template<class tType>
00140     SCoalesceDataset( const tType& Conditions ) {
00141 
00142         m_veciConditions.resize( Conditions.size( ) );
00143         std::copy( Conditions.begin( ), Conditions.end( ), m_veciConditions.begin( ) ); }
00144 
00145     SCoalesceDataset( size_t iCondition ) {
00146 
00147         m_veciConditions.resize( 1 );
00148         m_veciConditions[ 0 ] = iCondition; }
00149 
00150     bool CalculateCovariance( const CPCL& );
00151 
00152     bool IsCondition( size_t iCondition ) const {
00153 
00154         return ( std::find( m_veciConditions.begin( ), m_veciConditions.end( ), iCondition ) !=
00155             m_veciConditions.end( ) ); }
00156 
00157     size_t GetConditions( ) const {
00158 
00159         return m_veciConditions.size( ); }
00160 
00161     size_t GetCondition( size_t iCondition ) const {
00162 
00163         return m_veciConditions[ iCondition ]; }
00164 
00165     std::vector<size_t> m_veciConditions;
00166     CDataMatrix         m_MatSigmaChol;
00167     CDataMatrix         m_MatSigmaInv;
00168     double              m_dSigmaDetSqrt;
00169     std::vector<float>  m_vecdStdevs;
00170 };
00171 
00172 }
00173 
00174 #endif // COALESCESTRUCTSI_H