Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef COALESCESTRUCTSI_H 00023 #define COALESCESTRUCTSI_H 00024 00025 #ifdef _MSC_VER 00026 #include <hash_map> 00027 #else // _MSC_VER 00028 #include <ext/hash_map> 00029 00030 #define hash_value hash<const char*>( ) 00031 #define stdext __gnu_cxx 00032 #endif // _MSC_VER 00033 00034 #include "coalescebasei.h" 00035 #include "fastai.h" 00036 #include "fullmatrix.h" 00037 00038 namespace Sleipnir { 00039 00040 class CCoalesceMotifLibrary; 00041 class CFASTA; 00042 class CHierarchy; 00043 class CPCL; 00044 00045 struct SCoalesceModifiers { 00046 00047 void Initialize( const CPCL& ); 00048 00049 bool Add( const CFASTA* pWiggle ) { 00050 00051 if( !pWiggle ) 00052 return false; 00053 m_vecpWiggles.push_back( pWiggle ); 00054 return true; } 00055 00056 std::vector<const CFASTA*> m_vecpWiggles; 00057 std::vector<std::vector<size_t> > m_vecveciPCL2Wiggles; 00058 }; 00059 00060 struct SCoalesceModifierCache { 00061 SCoalesceModifierCache( const SCoalesceModifiers& Modifiers ) : m_Modifiers(Modifiers) { } 00062 00063 void Get( size_t ); 00064 void InitializeWeight( size_t, size_t ); 00065 void AddWeight( size_t, size_t, size_t ); 00066 void SetType( const std::string& ); 00067 00068 float GetWeight( size_t iK ) const { 00069 size_t i, iWiggles; 00070 00071 for( iWiggles = i = 0; i < m_veciWiggleTypes.size( ); ++i ) 00072 if( m_veciWiggleTypes[ i ] != -1 ) 00073 iWiggles++; 00074 00075 return ( iWiggles ? ( m_dWeight / iWiggles ) : iK ); } 00076 00077 const SCoalesceModifiers& m_Modifiers; 00078 std::vector<std::vector<SFASTAWiggle> > m_vecvecsWiggles; 00079 std::vector<size_t> m_veciWiggleTypes; 00080 float m_dWeight; 00081 }; 00082 00083 struct SMotifMatch { 00084 enum EType { 00085 ETypePValue, 00086 ETypeRMSE, 00087 ETypeJensenShannon 00088 }; 00089 00090 SMotifMatch( ) { } 00091 00092 SMotifMatch( uint32_t iMotif, const std::string& strType, 00093 CCoalesceSequencerBase::ESubsequence eSubsequence, float dZ, float dAverage ) : m_iMotif(iMotif), 00094 m_strType(strType), m_eSubsequence(eSubsequence), m_dZ(dZ), m_dAverage(dAverage) { } 00095 00096 bool Open( std::istream&, CCoalesceMotifLibrary& ); 00097 uint32_t Open( const CHierarchy&, const std::vector<SMotifMatch>&, CCoalesceMotifLibrary&, size_t& ); 00098 uint32_t Open( const SMotifMatch&, const SMotifMatch&, CCoalesceMotifLibrary& ); 00099 std::string Save( const CCoalesceMotifLibrary*, bool = false, float = 0, float = 0, float = 0, 00100 bool = false ) const; 00101 bool Label( const CCoalesceMotifLibrary&, EType, float, float, float ); 00102 00103 bool operator==( const SMotifMatch& sMotif ) const { 00104 00105 return ( ( m_iMotif == sMotif.m_iMotif ) && ( m_strType == sMotif.m_strType ) && 00106 ( m_eSubsequence == sMotif.m_eSubsequence ) ); } 00107 00108 bool operator!=( const SMotifMatch& sMotif ) const { 00109 00110 return !( *this == sMotif ); } 00111 00112 bool operator<( const SMotifMatch& sMotif ) const { 00113 00114 if( m_iMotif == sMotif.m_iMotif ) { 00115 if( m_strType == sMotif.m_strType ) 00116 return ( m_eSubsequence < sMotif.m_eSubsequence ); 00117 return ( m_strType < sMotif.m_strType ); } 00118 00119 return ( m_iMotif < sMotif.m_iMotif ); } 00120 00121 size_t GetHash( ) const { 00122 size_t iMotif, iType, iSubsequence; 00123 00124 iMotif = m_iMotif * ( (size_t)-1 / 20000 ); 00125 iType = stdext::hash_value( m_strType.c_str( ) ) * ( (size_t)-1 / 5 ); 00126 iSubsequence = m_eSubsequence * ( (size_t)-1 / CCoalesceSequencerBase::ESubsequenceEnd ); 00127 00128 return ( iMotif ^ iType ^ iSubsequence ); } 00129 00130 uint32_t m_iMotif; 00131 std::string m_strType; 00132 CCoalesceSequencerBase::ESubsequence m_eSubsequence; 00133 float m_dZ; 00134 float m_dAverage; 00135 std::vector<std::pair<std::string, float> > m_vecprstrdKnown; 00136 }; 00137 00138 struct SCoalesceDataset { 00139 template<class tType> 00140 SCoalesceDataset( const tType& Conditions ) { 00141 00142 m_veciConditions.resize( Conditions.size( ) ); 00143 std::copy( Conditions.begin( ), Conditions.end( ), m_veciConditions.begin( ) ); } 00144 00145 SCoalesceDataset( size_t iCondition ) { 00146 00147 m_veciConditions.resize( 1 ); 00148 m_veciConditions[ 0 ] = iCondition; } 00149 00150 bool CalculateCovariance( const CPCL& ); 00151 00152 bool IsCondition( size_t iCondition ) const { 00153 00154 return ( std::find( m_veciConditions.begin( ), m_veciConditions.end( ), iCondition ) != 00155 m_veciConditions.end( ) ); } 00156 00157 size_t GetConditions( ) const { 00158 00159 return m_veciConditions.size( ); } 00160 00161 size_t GetCondition( size_t iCondition ) const { 00162 00163 return m_veciConditions[ iCondition ]; } 00164 00165 std::vector<size_t> m_veciConditions; 00166 CDataMatrix m_MatSigmaChol; 00167 CDataMatrix m_MatSigmaInv; 00168 double m_dSigmaDetSqrt; 00169 std::vector<float> m_vecdStdevs; 00170 }; 00171 00172 } 00173 00174 #endif // COALESCESTRUCTSI_H