Sleipnir
tools/BNServer/BNServer.h
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #ifndef BNSERVER_H
00023 #define BNSERVER_H
00024 
00025 class CDot;
00026 
00027 struct SBNServerData {
00028     const IOntology**               m_apOntologies;
00029     const CBayesNetMinimal&         m_BNDefault;
00030     const CDatabase&                m_Database;
00031     const CGenome&                  m_Genome;
00032     size_t                          m_iLimit;
00033     const CDataMatrix&              m_MatBackgrounds;
00034     const CDataMatrix&              m_MatBetweenCC;
00035     const CDataMatrix&              m_MatBetweenDC;
00036     const CDataMatrix&              m_MatBetweenDD;
00037     const CDataMatrix&              m_MatParameters;
00038     const CDataMatrix&              m_MatWithinC;
00039     const CDataMatrix&              m_MatWithinD;
00040     string                          m_strFiles;
00041     string                          m_strGraphviz;
00042     const vector<CBayesNetMinimal>& m_vecBNs;
00043     const vector<float>&            m_vecdPriors;
00044     const vector<size_t>&           m_veciContexts;
00045     const vector<size_t>&           m_veciDiseases;
00046     const vector<vector<size_t> >&  m_vecveciContexts;
00047     const vector<vector<size_t> >&  m_vecveciDiseases;
00048 
00049     SBNServerData( 
00050         const IOntology**               apOntologies,
00051         const CBayesNetMinimal&         BNDefault,
00052         const CDatabase&                Database,
00053         const CGenome&                  Genome,
00054         size_t                          iLimit,
00055         const CDataMatrix&              MatBackgrounds,
00056         const CDataMatrix&              MatBetweenCC,
00057         const CDataMatrix&              MatBetweenDC,
00058         const CDataMatrix&              MatBetweenDD,
00059         const CDataMatrix&              MatParameters,
00060         const CDataMatrix&              MatWithinC,
00061         const CDataMatrix&              MatWithinD,
00062         const string&                   strFiles,
00063         const string&                   strGraphviz,
00064         const vector<CBayesNetMinimal>& vecBNs,
00065         const vector<float>&            vecdPriors,
00066         const vector<size_t>&           veciContexts,
00067         const vector<size_t>&           veciDiseases,
00068         const vector<vector<size_t> >&  vecveciContexts,
00069         const vector<vector<size_t> >&  vecveciDiseases
00070     ) :
00071         m_apOntologies      (apOntologies),
00072         m_BNDefault         (BNDefault),
00073         m_Database          (Database),
00074         m_Genome            (Genome),
00075         m_iLimit            (iLimit),
00076         m_MatBackgrounds    (MatBackgrounds),
00077         m_MatBetweenCC      (MatBetweenCC),
00078         m_MatBetweenDC      (MatBetweenDC),
00079         m_MatBetweenDD      (MatBetweenDD),
00080         m_MatParameters     (MatParameters),
00081         m_MatWithinC        (MatWithinC),
00082         m_MatWithinD        (MatWithinD),
00083         m_strFiles          (strFiles),
00084         m_strGraphviz       (strGraphviz),
00085         m_vecBNs            (vecBNs),
00086         m_vecdPriors        (vecdPriors),
00087         m_veciContexts      (veciContexts),
00088         m_veciDiseases      (veciDiseases),
00089         m_vecveciContexts   (vecveciContexts),
00090         m_vecveciDiseases   (vecveciDiseases)
00091         { }
00092 };
00093 
00094 class CBNServer : public IServerClient {
00095 public:
00096     static bool Get( size_t, size_t, float*, const Sleipnir::CDatabase&,
00097         const std::vector<Sleipnir::CBayesNetMinimal>&, const Sleipnir::CBayesNetMinimal& );
00098 
00099     CBNServer( SOCKET, const string&, const SBNServerData& );
00100     ~CBNServer( );
00101 
00102     IServerClient* NewInstance( SOCKET, uint32_t, uint16_t );
00103     void Destroy( );
00104     bool ProcessMessage( const std::vector<unsigned char>& );
00105     bool GenerateNetworkIcons( ) const;
00106     bool GenerateAssociations( const char*, size_t );
00107 
00108 private:
00109     typedef size_t (CBNServer::*TPFNProcessor)( const std::vector<unsigned char>&, size_t );
00110 
00111     enum EGraphOutput {
00112         EGraphOutputFile,
00113         EGraphOutputSocket,
00114         EGraphOutputNamed
00115     };
00116 
00117     enum ESetType {
00118         ESetGenes   = 0,
00119         ESetContext = ESetGenes + 1,
00120         ESetDisease = ESetContext + 1
00121     };
00122 
00123     static const size_t         c_iDegree           = 1;
00124     static const size_t         c_iValues           = 4;
00125     static const size_t         c_iOverestimate     = 100;
00126     static const TPFNProcessor  c_apfnProcessors[];
00127     static const size_t         c_iProcessors;
00128     static const float          c_dCutoff;
00129     static const float          c_adColorMin[];
00130     static const float          c_adColorMax[];
00131 
00132     template<class tType>
00133     static bool SetPointer( tType* pPointer, tType Value ) {
00134 
00135         if( pPointer )
00136             *pPointer = Value;
00137 
00138         return !!pPointer; }
00139 
00140     template<class tType>
00141     static bool PushPointer( std::vector<tType>* pvecPointer, tType Value ) {
00142 
00143         if( pvecPointer )
00144             pvecPointer->push_back( Value );
00145 
00146         return !!pvecPointer; }
00147 
00148     template<class tType>
00149     static bool PushPointer( tType* pPointer, std::vector<tType>& vecValues, tType Value ) {
00150 
00151         if( pPointer )
00152             vecValues.push_back( Value );
00153 
00154         return !!pPointer; }
00155 
00156     template<class tType>
00157     static void SetArray( tType* ad, size_t iChunk, size_t iOffset, float dA, float dB, float dC, float dD ) {
00158         float   adParams[]  = {dA, dB, dC, dD};
00159         size_t  i;
00160 
00161         for( i = 0; i < ARRAYSIZE(adParams); ++i )
00162             ad[ ( i * iChunk ) + iOffset ] = adParams[ i ]; }
00163 
00164     static float GetMatrix( const CDataMatrix& Mat, size_t iR, size_t iC ) {
00165 
00166         return ( ( ( iR < Mat.GetRows( ) ) && ( iC < Mat.GetColumns( ) ) ) ? Mat.Get( iR, iC ) :
00167             CMeta::GetNaN( ) ); }
00168 
00169     static float GetBetween( const CDataMatrix& Mat, size_t iR, size_t iOne, size_t iTwo,
00170         size_t iChunk ) {
00171         float   d;
00172 
00173         return ( CMeta::IsNaN( d = GetMatrix( Mat, iR, ( iOne * iChunk ) + iTwo ) ) ? 0 : d ); }
00174 
00175     template<class tType>
00176     static bool Winsorize( std::vector<tType>& vecValues ) {
00177 
00178         return CStatistics::Winsorize( vecValues, ( vecValues.size( ) / 10 ) + 1 ); }
00179 
00180     // Utility
00181     bool Get( size_t, size_t, float* = NULL );
00182     bool Get( size_t, const std::vector<size_t>&, size_t, float* );
00183     bool GetGenes( const std::vector<size_t>&, size_t, float );
00184     bool GetWithin( const std::vector<size_t>&, size_t, float*, std::vector<float>* ) const;
00185     float Evaluate( const vector<vector<float> >& binEffects, vector<unsigned char>& vecbData, size_t iOffset );
00186     // Association processing
00187     bool GetAssociationsSet( unsigned char, const std::vector<size_t>&, size_t ) const;
00188     bool GetAssociationsDC( unsigned char, unsigned char, size_t, size_t, bool = false ) const;
00189     bool GetAssociation( size_t, const std::vector<unsigned char>&, const std::vector<size_t>&, size_t, bool,
00190         float*, float*, float*, std::vector<float>*, std::vector<float>*, float ) const;
00191     bool GetAssociation( const std::vector<size_t>&, const std::vector<size_t>&, size_t, float&,
00192         float&, float& ) const;
00193     // Graph processing
00194     bool GraphCreate( const std::vector<size_t>&, size_t, size_t, float, std::vector<bool>&,
00195         std::vector<size_t>&, Sleipnir::CDat& ) const;
00196     bool GraphWrite( const Sleipnir::CDat&, const std::vector<size_t>&, const std::vector<size_t>&,
00197         const std::vector<bool>&, size_t, EGraphOutput ) const;
00198     bool SelectNeighborsPixie( const std::vector<size_t>&, const std::vector<bool>&, size_t, size_t,
00199         const Sleipnir::CDataMatrix&, std::vector<size_t>& ) const;
00200     bool SelectNeighborsRatio( const std::vector<size_t>&, const std::vector<bool>&, size_t, size_t,
00201         const Sleipnir::CDataMatrix&, std::vector<size_t>& ) const;
00202     bool SendGenes( const std::vector<size_t>&, const std::vector<size_t>& ) const;
00203     // Message processors
00204     size_t ProcessInference( const std::vector<unsigned char>&, size_t );
00205     size_t ProcessCPT( const std::vector<unsigned char>&, size_t, vector<vector<float> >& );
00206     size_t ProcessInferenceOTF( const std::vector<unsigned char>&, size_t );
00207     size_t ProcessEdges( const std::vector<unsigned char>&, size_t );
00208     size_t ProcessData( const std::vector<unsigned char>&, size_t );
00209     size_t ProcessGraph( const std::vector<unsigned char>&, size_t );
00210     size_t ProcessContexts( const std::vector<unsigned char>&, size_t );
00211     size_t ProcessTermFinder( const std::vector<unsigned char>&, size_t );
00212     size_t ProcessDiseases( const std::vector<unsigned char>&, size_t );
00213     size_t ProcessGenes( const std::vector<unsigned char>&, size_t );
00214     size_t ProcessAssociation( const std::vector<unsigned char>&, size_t );
00215     size_t ProcessAssociations( const std::vector<unsigned char>&, size_t );
00216 
00217     size_t GetGenes( ) const {
00218 
00219         return m_sData.m_Database.GetGenes( ); }
00220 
00221     size_t GetContexts( ) const {
00222 
00223         return m_sData.m_vecveciContexts.size( ); }
00224 
00225     size_t GetDiseases( ) const {
00226 
00227         return m_sData.m_vecveciDiseases.size( ); }
00228 
00229     float GetBackground( size_t iContext, size_t iGene ) const {
00230 
00231         return ( ( ( iContext < m_sData.m_MatBackgrounds.GetRows( ) ) &&
00232             ( iGene < m_sData.m_MatBackgrounds.GetColumns( ) ) ) ?
00233             m_sData.m_MatBackgrounds.Get( iContext, iGene ) : 1 ); }
00234 
00235     size_t InitializeDiseases( ) {
00236         size_t  iRet;
00237 
00238         iRet = c_iValues * GetDiseases( );
00239         if( !m_adDiseases )
00240             m_adDiseases = new float[ iRet ];
00241 
00242         return iRet; }
00243 
00244     size_t InitializeGenes( ) {
00245         size_t  iRet;
00246 
00247         iRet = c_iValues * GetGenes( );
00248         if( !m_adGenes )
00249             m_adGenes = new float[ iRet ];
00250 
00251         return iRet; }
00252 
00253     size_t InitializeContexts( ) {
00254         size_t  iRet;
00255 
00256         iRet = c_iValues * GetContexts( );
00257         if( !m_adContexts )
00258             m_adContexts = new float[ iRet ];
00259 
00260         return iRet; }
00261 
00262     const CBayesNetMinimal& GetBN( size_t iContext ) const {
00263 
00264         return ( ( iContext && GetContexts( ) ) ? m_sData.m_vecBNs[ ( iContext - 1 ) % GetContexts( ) ] :
00265             m_sData.m_BNDefault ); }
00266 
00267     const std::string& GetGene( size_t iGene ) const {
00268 
00269         return m_sData.m_Database.GetGene( iGene ); }
00270 
00271     size_t GetGene( const std::string& strGene ) const {
00272 
00273         return m_sData.m_Database.GetGene( strGene ); }
00274 
00275     float GetFraction( size_t iSize ) const {
00276 
00277         return ( ( iSize > m_sData.m_iLimit ) ? ( (float)m_sData.m_iLimit / iSize ) : 1 ); }
00278 
00279     bool IsFraction( float dFraction ) const {
00280 
00281         return ( ( dFraction < 1 ) && ( ( (float)rand( ) / RAND_MAX ) > dFraction ) ); }
00282 
00283     size_t GetContext( unsigned char bDiseases, size_t iContext, size_t iCurrent ) const {
00284 
00285         return ( ( bDiseases || ( iContext != -1 ) ) ? iContext : ( iCurrent + 1 ) ); }
00286 
00287     float GetPValue( float dBetween, float dBackground, float dWithin, size_t iContext, size_t iSmall,
00288         size_t iBig, size_t iCount = 0, bool fZ = false ) const {
00289 //      static const size_t c_iCutoff   = 75;
00290         const float*    adParams;
00291         float           dValue, dStd, dRet, dA, dB, dC;
00292 
00293         dValue = GetPrior( iContext ) * dBetween / dBackground / dWithin;
00294         if( CMeta::IsNaN( dValue ) )
00295             return 1;
00296         if( iSmall > iBig )
00297             swap( iBig, iSmall );
00298 //      if( iBig > c_iCutoff ) {
00299 //          iSmall = max( (size_t)( iSmall * ( (float)c_iCutoff / iBig ) ), (size_t)1 );
00300 //          iBig = c_iCutoff; }
00301         adParams = GetParameters( iContext );
00302         dA = ( ( adParams[ 0 ] * iSmall ) + adParams[ 1 ] ) / ( iSmall + 1 );
00303         dB = adParams[ 2 ];
00304         dC = ( ( adParams[ 3 ] * iSmall ) + adParams[ 4 ] ) / ( iSmall + adParams[ 5 ] );
00305         dStd = ( ( dA * iBig ) + dB ) / ( iBig + dC );
00306         dRet = fZ ? ( ( dValue - 1 ) / dStd ) : ( 1 - (float)CStatistics::NormalCDF( dValue, 1, dStd ) );
00307 /*
00308 cerr << iContext << ":  " << iBig << '\t' << iSmall << endl;
00309 cerr << dBetween << '\t' << dBackground << '\t' << dWithin << '\t' << GetPrior( iContext ) << endl;
00310 for( size_t i = 0; i < 6; ++i )
00311 cerr << ( i ? "\t" : "" ) << adParams[ i ];
00312 cerr << endl << dA << '\t' << dB << '\t' << dC << '\t' << dStd << endl;
00313 cerr << dValue << ":    " << dRet << endl;
00314 //*/
00315         if( !fZ && iCount )
00316             dRet *= iCount;
00317 
00318         return dRet; }
00319 
00320     const float* GetParameters( size_t iContext ) const {
00321 
00322         return m_sData.m_MatParameters.Get( iContext ); }
00323 
00324     float GetPrior( size_t iContext ) const {
00325 
00326 // Alternative: calculate priors in the current context rather than the global context
00327         iContext = 0;
00328         return m_sData.m_vecdPriors[ iContext ]; }
00329 
00330     const CDatabase& GetDatabase( ) const {
00331 
00332         return m_sData.m_Database; }
00333 
00334     const string& GetFiles( ) const {
00335 
00336         return m_sData.m_strFiles; }
00337 
00338     const vector<size_t>& GetContext( size_t iContext ) const {
00339 
00340         return m_sData.m_vecveciContexts[ iContext ]; }
00341 
00342     const vector<size_t>& GetDisease( size_t iDisease ) const {
00343 
00344         return m_sData.m_vecveciDiseases[ iDisease ]; }
00345 
00346     CGenome& GetGenome( ) const {
00347 
00348         return *(CGenome*)&m_sData.m_Genome; }
00349 
00350     const IOntology* GetOntology( size_t iOntology ) const {
00351         size_t  i;
00352 
00353         for( i = 0; ( i < iOntology ) && m_sData.m_apOntologies[ i ]; ++i );
00354         return m_sData.m_apOntologies[ i ]; }
00355 
00356     size_t GetDatachunk( ) const {
00357 
00358         return ( ( GetDatabase( ).GetDatasets( ) + 1 ) / 2 ); }
00359 
00360     const vector<size_t>& GetDiseaseGenes( ) const {
00361 
00362         return m_sData.m_veciDiseases; }
00363 
00364     const vector<vector<size_t> >& GetGeneSets( unsigned char bDiseases ) const {
00365 
00366         return ( bDiseases ? m_sData.m_vecveciDiseases : m_sData.m_vecveciContexts ); }
00367 
00368     float GetWithinContext( size_t iContext, size_t iSet ) const {
00369 
00370 // Alternative: calculate within scores in the current context rather than the global context
00371         iContext = 0;
00372         return GetMatrix( m_sData.m_MatWithinC, iContext, iSet ); }
00373 
00374     float GetWithinDisease( size_t iContext, size_t iSet ) const {
00375 
00376 // Alternative: calculate within scores in the current context rather than the global context
00377         iContext = 0;
00378         return GetMatrix( m_sData.m_MatWithinD, iContext, iSet ); }
00379 
00380     float GetWithin( unsigned char bDiseases, size_t iContext, size_t iSet ) const {
00381 
00382         return ( bDiseases ? GetWithinDisease( iContext, iSet ) : GetWithinContext( iContext, iSet ) ); }
00383 
00384     float GetBetween( size_t iContext, unsigned char bDiseaseOne, size_t iOne, unsigned char bDiseaseTwo,
00385         size_t iTwo ) const {
00386 
00387         return ( bDiseaseOne ?
00388             ( bDiseaseTwo ? GetBetweenDD( iContext, iOne, iTwo ) : GetBetweenDC( iContext, iOne, iTwo ) ) :
00389             ( bDiseaseTwo ? GetBetweenDC( iContext, iTwo, iOne ) : GetBetweenCC( iContext, iOne, iTwo ) ) ); }
00390 
00391     float GetBetweenCC( size_t iContext, size_t iOne, size_t iTwo ) const {
00392 
00393         return GetBetween( m_sData.m_MatBetweenCC, iContext, iOne, iTwo, GetContexts( ) ); }
00394 
00395     float GetBetweenDD( size_t iContext, size_t iOne, size_t iTwo ) const {
00396 
00397         return GetBetween( m_sData.m_MatBetweenDD, iContext, iOne, iTwo, GetDiseases( ) ); }
00398 
00399     float GetBetweenDC( size_t iContext, size_t iDisease, size_t iProcess ) const {
00400 
00401         return GetBetween( m_sData.m_MatBetweenDC, iContext, iDisease, iProcess, GetContexts( ) ); }
00402 
00403     float*                  m_adContexts;
00404     float*                  m_adDiseases;
00405     float*                  m_adGenes;
00406     SOCKET                  m_iSocket;
00407     const SBNServerData&    m_sData;
00408     string                  m_strConnection;
00409 };
00410 
00411 #endif // BNSERVER_H