Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #ifndef BNSERVER_H 00023 #define BNSERVER_H 00024 00025 class CDot; 00026 00027 struct SBNServerData { 00028 const IOntology** m_apOntologies; 00029 const CBayesNetMinimal& m_BNDefault; 00030 const CDatabase& m_Database; 00031 const CGenome& m_Genome; 00032 size_t m_iLimit; 00033 const CDataMatrix& m_MatBackgrounds; 00034 const CDataMatrix& m_MatBetweenCC; 00035 const CDataMatrix& m_MatBetweenDC; 00036 const CDataMatrix& m_MatBetweenDD; 00037 const CDataMatrix& m_MatParameters; 00038 const CDataMatrix& m_MatWithinC; 00039 const CDataMatrix& m_MatWithinD; 00040 string m_strFiles; 00041 string m_strGraphviz; 00042 const vector<CBayesNetMinimal>& m_vecBNs; 00043 const vector<float>& m_vecdPriors; 00044 const vector<size_t>& m_veciContexts; 00045 const vector<size_t>& m_veciDiseases; 00046 const vector<vector<size_t> >& m_vecveciContexts; 00047 const vector<vector<size_t> >& m_vecveciDiseases; 00048 00049 SBNServerData( 00050 const IOntology** apOntologies, 00051 const CBayesNetMinimal& BNDefault, 00052 const CDatabase& Database, 00053 const CGenome& Genome, 00054 size_t iLimit, 00055 const CDataMatrix& MatBackgrounds, 00056 const CDataMatrix& MatBetweenCC, 00057 const CDataMatrix& MatBetweenDC, 00058 const CDataMatrix& MatBetweenDD, 00059 const CDataMatrix& MatParameters, 00060 const CDataMatrix& MatWithinC, 00061 const CDataMatrix& MatWithinD, 00062 const string& strFiles, 00063 const string& strGraphviz, 00064 const vector<CBayesNetMinimal>& vecBNs, 00065 const vector<float>& vecdPriors, 00066 const vector<size_t>& veciContexts, 00067 const vector<size_t>& veciDiseases, 00068 const vector<vector<size_t> >& vecveciContexts, 00069 const vector<vector<size_t> >& vecveciDiseases 00070 ) : 00071 m_apOntologies (apOntologies), 00072 m_BNDefault (BNDefault), 00073 m_Database (Database), 00074 m_Genome (Genome), 00075 m_iLimit (iLimit), 00076 m_MatBackgrounds (MatBackgrounds), 00077 m_MatBetweenCC (MatBetweenCC), 00078 m_MatBetweenDC (MatBetweenDC), 00079 m_MatBetweenDD (MatBetweenDD), 00080 m_MatParameters (MatParameters), 00081 m_MatWithinC (MatWithinC), 00082 m_MatWithinD (MatWithinD), 00083 m_strFiles (strFiles), 00084 m_strGraphviz (strGraphviz), 00085 m_vecBNs (vecBNs), 00086 m_vecdPriors (vecdPriors), 00087 m_veciContexts (veciContexts), 00088 m_veciDiseases (veciDiseases), 00089 m_vecveciContexts (vecveciContexts), 00090 m_vecveciDiseases (vecveciDiseases) 00091 { } 00092 }; 00093 00094 class CBNServer : public IServerClient { 00095 public: 00096 static bool Get( size_t, size_t, float*, const Sleipnir::CDatabase&, 00097 const std::vector<Sleipnir::CBayesNetMinimal>&, const Sleipnir::CBayesNetMinimal& ); 00098 00099 CBNServer( SOCKET, const string&, const SBNServerData& ); 00100 ~CBNServer( ); 00101 00102 IServerClient* NewInstance( SOCKET, uint32_t, uint16_t ); 00103 void Destroy( ); 00104 bool ProcessMessage( const std::vector<unsigned char>& ); 00105 bool GenerateNetworkIcons( ) const; 00106 bool GenerateAssociations( const char*, size_t ); 00107 00108 private: 00109 typedef size_t (CBNServer::*TPFNProcessor)( const std::vector<unsigned char>&, size_t ); 00110 00111 enum EGraphOutput { 00112 EGraphOutputFile, 00113 EGraphOutputSocket, 00114 EGraphOutputNamed 00115 }; 00116 00117 enum ESetType { 00118 ESetGenes = 0, 00119 ESetContext = ESetGenes + 1, 00120 ESetDisease = ESetContext + 1 00121 }; 00122 00123 static const size_t c_iDegree = 1; 00124 static const size_t c_iValues = 4; 00125 static const size_t c_iOverestimate = 100; 00126 static const TPFNProcessor c_apfnProcessors[]; 00127 static const size_t c_iProcessors; 00128 static const float c_dCutoff; 00129 static const float c_adColorMin[]; 00130 static const float c_adColorMax[]; 00131 00132 template<class tType> 00133 static bool SetPointer( tType* pPointer, tType Value ) { 00134 00135 if( pPointer ) 00136 *pPointer = Value; 00137 00138 return !!pPointer; } 00139 00140 template<class tType> 00141 static bool PushPointer( std::vector<tType>* pvecPointer, tType Value ) { 00142 00143 if( pvecPointer ) 00144 pvecPointer->push_back( Value ); 00145 00146 return !!pvecPointer; } 00147 00148 template<class tType> 00149 static bool PushPointer( tType* pPointer, std::vector<tType>& vecValues, tType Value ) { 00150 00151 if( pPointer ) 00152 vecValues.push_back( Value ); 00153 00154 return !!pPointer; } 00155 00156 template<class tType> 00157 static void SetArray( tType* ad, size_t iChunk, size_t iOffset, float dA, float dB, float dC, float dD ) { 00158 float adParams[] = {dA, dB, dC, dD}; 00159 size_t i; 00160 00161 for( i = 0; i < ARRAYSIZE(adParams); ++i ) 00162 ad[ ( i * iChunk ) + iOffset ] = adParams[ i ]; } 00163 00164 static float GetMatrix( const CDataMatrix& Mat, size_t iR, size_t iC ) { 00165 00166 return ( ( ( iR < Mat.GetRows( ) ) && ( iC < Mat.GetColumns( ) ) ) ? Mat.Get( iR, iC ) : 00167 CMeta::GetNaN( ) ); } 00168 00169 static float GetBetween( const CDataMatrix& Mat, size_t iR, size_t iOne, size_t iTwo, 00170 size_t iChunk ) { 00171 float d; 00172 00173 return ( CMeta::IsNaN( d = GetMatrix( Mat, iR, ( iOne * iChunk ) + iTwo ) ) ? 0 : d ); } 00174 00175 template<class tType> 00176 static bool Winsorize( std::vector<tType>& vecValues ) { 00177 00178 return CStatistics::Winsorize( vecValues, ( vecValues.size( ) / 10 ) + 1 ); } 00179 00180 // Utility 00181 bool Get( size_t, size_t, float* = NULL ); 00182 bool Get( size_t, const std::vector<size_t>&, size_t, float* ); 00183 bool GetGenes( const std::vector<size_t>&, size_t, float ); 00184 bool GetWithin( const std::vector<size_t>&, size_t, float*, std::vector<float>* ) const; 00185 float Evaluate( const vector<vector<float> >& binEffects, vector<unsigned char>& vecbData, size_t iOffset ); 00186 // Association processing 00187 bool GetAssociationsSet( unsigned char, const std::vector<size_t>&, size_t ) const; 00188 bool GetAssociationsDC( unsigned char, unsigned char, size_t, size_t, bool = false ) const; 00189 bool GetAssociation( size_t, const std::vector<unsigned char>&, const std::vector<size_t>&, size_t, bool, 00190 float*, float*, float*, std::vector<float>*, std::vector<float>*, float ) const; 00191 bool GetAssociation( const std::vector<size_t>&, const std::vector<size_t>&, size_t, float&, 00192 float&, float& ) const; 00193 // Graph processing 00194 bool GraphCreate( const std::vector<size_t>&, size_t, size_t, float, std::vector<bool>&, 00195 std::vector<size_t>&, Sleipnir::CDat& ) const; 00196 bool GraphWrite( const Sleipnir::CDat&, const std::vector<size_t>&, const std::vector<size_t>&, 00197 const std::vector<bool>&, size_t, EGraphOutput ) const; 00198 bool SelectNeighborsPixie( const std::vector<size_t>&, const std::vector<bool>&, size_t, size_t, 00199 const Sleipnir::CDataMatrix&, std::vector<size_t>& ) const; 00200 bool SelectNeighborsRatio( const std::vector<size_t>&, const std::vector<bool>&, size_t, size_t, 00201 const Sleipnir::CDataMatrix&, std::vector<size_t>& ) const; 00202 bool SendGenes( const std::vector<size_t>&, const std::vector<size_t>& ) const; 00203 // Message processors 00204 size_t ProcessInference( const std::vector<unsigned char>&, size_t ); 00205 size_t ProcessCPT( const std::vector<unsigned char>&, size_t, vector<vector<float> >& ); 00206 size_t ProcessInferenceOTF( const std::vector<unsigned char>&, size_t ); 00207 size_t ProcessEdges( const std::vector<unsigned char>&, size_t ); 00208 size_t ProcessData( const std::vector<unsigned char>&, size_t ); 00209 size_t ProcessGraph( const std::vector<unsigned char>&, size_t ); 00210 size_t ProcessContexts( const std::vector<unsigned char>&, size_t ); 00211 size_t ProcessTermFinder( const std::vector<unsigned char>&, size_t ); 00212 size_t ProcessDiseases( const std::vector<unsigned char>&, size_t ); 00213 size_t ProcessGenes( const std::vector<unsigned char>&, size_t ); 00214 size_t ProcessAssociation( const std::vector<unsigned char>&, size_t ); 00215 size_t ProcessAssociations( const std::vector<unsigned char>&, size_t ); 00216 00217 size_t GetGenes( ) const { 00218 00219 return m_sData.m_Database.GetGenes( ); } 00220 00221 size_t GetContexts( ) const { 00222 00223 return m_sData.m_vecveciContexts.size( ); } 00224 00225 size_t GetDiseases( ) const { 00226 00227 return m_sData.m_vecveciDiseases.size( ); } 00228 00229 float GetBackground( size_t iContext, size_t iGene ) const { 00230 00231 return ( ( ( iContext < m_sData.m_MatBackgrounds.GetRows( ) ) && 00232 ( iGene < m_sData.m_MatBackgrounds.GetColumns( ) ) ) ? 00233 m_sData.m_MatBackgrounds.Get( iContext, iGene ) : 1 ); } 00234 00235 size_t InitializeDiseases( ) { 00236 size_t iRet; 00237 00238 iRet = c_iValues * GetDiseases( ); 00239 if( !m_adDiseases ) 00240 m_adDiseases = new float[ iRet ]; 00241 00242 return iRet; } 00243 00244 size_t InitializeGenes( ) { 00245 size_t iRet; 00246 00247 iRet = c_iValues * GetGenes( ); 00248 if( !m_adGenes ) 00249 m_adGenes = new float[ iRet ]; 00250 00251 return iRet; } 00252 00253 size_t InitializeContexts( ) { 00254 size_t iRet; 00255 00256 iRet = c_iValues * GetContexts( ); 00257 if( !m_adContexts ) 00258 m_adContexts = new float[ iRet ]; 00259 00260 return iRet; } 00261 00262 const CBayesNetMinimal& GetBN( size_t iContext ) const { 00263 00264 return ( ( iContext && GetContexts( ) ) ? m_sData.m_vecBNs[ ( iContext - 1 ) % GetContexts( ) ] : 00265 m_sData.m_BNDefault ); } 00266 00267 const std::string& GetGene( size_t iGene ) const { 00268 00269 return m_sData.m_Database.GetGene( iGene ); } 00270 00271 size_t GetGene( const std::string& strGene ) const { 00272 00273 return m_sData.m_Database.GetGene( strGene ); } 00274 00275 float GetFraction( size_t iSize ) const { 00276 00277 return ( ( iSize > m_sData.m_iLimit ) ? ( (float)m_sData.m_iLimit / iSize ) : 1 ); } 00278 00279 bool IsFraction( float dFraction ) const { 00280 00281 return ( ( dFraction < 1 ) && ( ( (float)rand( ) / RAND_MAX ) > dFraction ) ); } 00282 00283 size_t GetContext( unsigned char bDiseases, size_t iContext, size_t iCurrent ) const { 00284 00285 return ( ( bDiseases || ( iContext != -1 ) ) ? iContext : ( iCurrent + 1 ) ); } 00286 00287 float GetPValue( float dBetween, float dBackground, float dWithin, size_t iContext, size_t iSmall, 00288 size_t iBig, size_t iCount = 0, bool fZ = false ) const { 00289 // static const size_t c_iCutoff = 75; 00290 const float* adParams; 00291 float dValue, dStd, dRet, dA, dB, dC; 00292 00293 dValue = GetPrior( iContext ) * dBetween / dBackground / dWithin; 00294 if( CMeta::IsNaN( dValue ) ) 00295 return 1; 00296 if( iSmall > iBig ) 00297 swap( iBig, iSmall ); 00298 // if( iBig > c_iCutoff ) { 00299 // iSmall = max( (size_t)( iSmall * ( (float)c_iCutoff / iBig ) ), (size_t)1 ); 00300 // iBig = c_iCutoff; } 00301 adParams = GetParameters( iContext ); 00302 dA = ( ( adParams[ 0 ] * iSmall ) + adParams[ 1 ] ) / ( iSmall + 1 ); 00303 dB = adParams[ 2 ]; 00304 dC = ( ( adParams[ 3 ] * iSmall ) + adParams[ 4 ] ) / ( iSmall + adParams[ 5 ] ); 00305 dStd = ( ( dA * iBig ) + dB ) / ( iBig + dC ); 00306 dRet = fZ ? ( ( dValue - 1 ) / dStd ) : ( 1 - (float)CStatistics::NormalCDF( dValue, 1, dStd ) ); 00307 /* 00308 cerr << iContext << ": " << iBig << '\t' << iSmall << endl; 00309 cerr << dBetween << '\t' << dBackground << '\t' << dWithin << '\t' << GetPrior( iContext ) << endl; 00310 for( size_t i = 0; i < 6; ++i ) 00311 cerr << ( i ? "\t" : "" ) << adParams[ i ]; 00312 cerr << endl << dA << '\t' << dB << '\t' << dC << '\t' << dStd << endl; 00313 cerr << dValue << ": " << dRet << endl; 00314 //*/ 00315 if( !fZ && iCount ) 00316 dRet *= iCount; 00317 00318 return dRet; } 00319 00320 const float* GetParameters( size_t iContext ) const { 00321 00322 return m_sData.m_MatParameters.Get( iContext ); } 00323 00324 float GetPrior( size_t iContext ) const { 00325 00326 // Alternative: calculate priors in the current context rather than the global context 00327 iContext = 0; 00328 return m_sData.m_vecdPriors[ iContext ]; } 00329 00330 const CDatabase& GetDatabase( ) const { 00331 00332 return m_sData.m_Database; } 00333 00334 const string& GetFiles( ) const { 00335 00336 return m_sData.m_strFiles; } 00337 00338 const vector<size_t>& GetContext( size_t iContext ) const { 00339 00340 return m_sData.m_vecveciContexts[ iContext ]; } 00341 00342 const vector<size_t>& GetDisease( size_t iDisease ) const { 00343 00344 return m_sData.m_vecveciDiseases[ iDisease ]; } 00345 00346 CGenome& GetGenome( ) const { 00347 00348 return *(CGenome*)&m_sData.m_Genome; } 00349 00350 const IOntology* GetOntology( size_t iOntology ) const { 00351 size_t i; 00352 00353 for( i = 0; ( i < iOntology ) && m_sData.m_apOntologies[ i ]; ++i ); 00354 return m_sData.m_apOntologies[ i ]; } 00355 00356 size_t GetDatachunk( ) const { 00357 00358 return ( ( GetDatabase( ).GetDatasets( ) + 1 ) / 2 ); } 00359 00360 const vector<size_t>& GetDiseaseGenes( ) const { 00361 00362 return m_sData.m_veciDiseases; } 00363 00364 const vector<vector<size_t> >& GetGeneSets( unsigned char bDiseases ) const { 00365 00366 return ( bDiseases ? m_sData.m_vecveciDiseases : m_sData.m_vecveciContexts ); } 00367 00368 float GetWithinContext( size_t iContext, size_t iSet ) const { 00369 00370 // Alternative: calculate within scores in the current context rather than the global context 00371 iContext = 0; 00372 return GetMatrix( m_sData.m_MatWithinC, iContext, iSet ); } 00373 00374 float GetWithinDisease( size_t iContext, size_t iSet ) const { 00375 00376 // Alternative: calculate within scores in the current context rather than the global context 00377 iContext = 0; 00378 return GetMatrix( m_sData.m_MatWithinD, iContext, iSet ); } 00379 00380 float GetWithin( unsigned char bDiseases, size_t iContext, size_t iSet ) const { 00381 00382 return ( bDiseases ? GetWithinDisease( iContext, iSet ) : GetWithinContext( iContext, iSet ) ); } 00383 00384 float GetBetween( size_t iContext, unsigned char bDiseaseOne, size_t iOne, unsigned char bDiseaseTwo, 00385 size_t iTwo ) const { 00386 00387 return ( bDiseaseOne ? 00388 ( bDiseaseTwo ? GetBetweenDD( iContext, iOne, iTwo ) : GetBetweenDC( iContext, iOne, iTwo ) ) : 00389 ( bDiseaseTwo ? GetBetweenDC( iContext, iTwo, iOne ) : GetBetweenCC( iContext, iOne, iTwo ) ) ); } 00390 00391 float GetBetweenCC( size_t iContext, size_t iOne, size_t iTwo ) const { 00392 00393 return GetBetween( m_sData.m_MatBetweenCC, iContext, iOne, iTwo, GetContexts( ) ); } 00394 00395 float GetBetweenDD( size_t iContext, size_t iOne, size_t iTwo ) const { 00396 00397 return GetBetween( m_sData.m_MatBetweenDD, iContext, iOne, iTwo, GetDiseases( ) ); } 00398 00399 float GetBetweenDC( size_t iContext, size_t iDisease, size_t iProcess ) const { 00400 00401 return GetBetween( m_sData.m_MatBetweenDC, iContext, iDisease, iProcess, GetContexts( ) ); } 00402 00403 float* m_adContexts; 00404 float* m_adDiseases; 00405 float* m_adGenes; 00406 SOCKET m_iSocket; 00407 const SBNServerData& m_sData; 00408 string m_strConnection; 00409 }; 00410 00411 #endif // BNSERVER_H