Sleipnir
tools/BNServer/BNServer.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "cmdline.h"
00024 #include "BNServer.h"
00025 #include "dot.h"
00026 
00027 static const char               c_szXDSL[]                      = ".xdsl";
00028 static const char               c_szDSL[]                       = ".dsl";
00029 static const char               c_szDOT[]                       = ".dot";
00030 static const char               c_szSVG[]                       = ".svg";
00031 static const size_t             c_iContextOffset                = 2;
00032 static const size_t             c_iParamOffset                  = 4;
00033 const float                     CBNServer::c_dCutoff            = 0.1f;
00034 const float                     CBNServer::c_adColorMin[]       = {0, 1, 0};
00035 const float                     CBNServer::c_adColorMax[]       = {1, 0, 0};
00036 const CBNServer::TPFNProcessor  CBNServer::c_apfnProcessors[]   =
00037     {&CBNServer::ProcessInference, &CBNServer::ProcessData, &CBNServer::ProcessGraph,
00038     &CBNServer::ProcessContexts, &CBNServer::ProcessTermFinder, &CBNServer::ProcessDiseases,
00039     &CBNServer::ProcessGenes, &CBNServer::ProcessAssociation, &CBNServer::ProcessAssociations,
00040     &CBNServer::ProcessInferenceOTF, &CBNServer::ProcessEdges};
00041 
00042 struct SPixie {
00043     size_t  m_iNode;
00044     float   m_dScore;
00045 
00046     SPixie( size_t iNode, float dScore ) : m_iNode(iNode), m_dScore(dScore) { }
00047 
00048     bool operator<( const SPixie& sPixie ) const {
00049 
00050         return ( m_dScore < sPixie.m_dScore ); }
00051 };
00052 
00053 int main( int iArgs, char** aszArgs ) {
00054     gengetopt_args_info         sArgs;
00055     CServer                     Server;
00056     CBayesNetSmile              BNSmile;
00057     ifstream                    ifsm, ifsmGenes;
00058     vector<string>              vecstrLine;
00059     size_t                      i, j, k;
00060     uint32_t                    iSize;
00061     ofstream                    ofsm;
00062     int                         iRet;
00063     COntologyKEGG               KEGG;
00064     COntologyOBO                    GOBP, GOMF, GOCC;
00065     set<size_t>                 setiContexts;
00066 // Server data
00067     vector<float>               vecdPriors;
00068     CBayesNetMinimal            BNDefault;
00069     vector<CBayesNetMinimal>    vecBNs;
00070 
00071 
00072     CDataMatrix                 MatBackgrounds, MatParameters, MatWithinC, MatWithinD;
00073     CDataMatrix                 MatBetweenCC, MatBetweenDD, MatBetweenDC;
00074     vector<vector<size_t> >     vecveciDiseases, vecveciContexts;
00075     vector<size_t>              veciDiseases, veciContexts;
00076     CGenome                     Genome;
00077     const IOntology*            apOntologies[]  = {&GOBP, &GOMF, &GOCC, &KEGG, NULL };
00078 
00079     iRet = cmdline_parser2( iArgs, aszArgs, &sArgs, 0, 1, 0 );
00080     CDatabase Database(sArgs.is_nibble_flag);
00081 
00082     if( sArgs.config_arg )
00083         iRet = cmdline_parser_configfile( sArgs.config_arg, &sArgs, 0, 0, 1 ) && iRet;
00084     if( iRet ) {
00085         cmdline_parser_print_help( );
00086         return iRet; }
00087     CMeta Meta( sArgs.verbosity_arg );
00088 
00089 // Open Gene Ontology
00090     if( sArgs.go_onto_arg ) {
00091         cerr << "Loading the Gene Ontology..." << endl;
00092         ifsm.clear( );
00093         ifsm.open( sArgs.go_onto_arg );
00094         if( sArgs.go_anno_arg ) {
00095             ifsmGenes.clear( );
00096             ifsmGenes.open( sArgs.go_anno_arg ); }
00097         if( !COntologyOBO::Open( ifsm, ifsmGenes, Genome, GOBP, GOMF, GOCC, false, true ) ) {
00098             cerr << "Could not open: " << sArgs.go_onto_arg << ", " << sArgs.go_anno_arg << endl;
00099             return 1; }
00100         ifsm.close( );
00101         if( sArgs.go_anno_arg )
00102             ifsmGenes.close( ); }
00103 
00104 // Open KEGG
00105     if( sArgs.kegg_arg ) {
00106         cerr << "Loading KEGG..." << endl;
00107         ifsm.clear( );
00108         ifsm.open( sArgs.kegg_arg );
00109         if( !KEGG.Open( ifsm, Genome, sArgs.kegg_org_arg, true ) ) {
00110             cerr << "Could not open: " << sArgs.kegg_arg << endl;
00111             return 1; }
00112         ifsm.close( ); }
00113 
00114 // Open database
00115     cerr << "Loading the database..." << endl;
00116     if( !Database.Open( sArgs.database_arg ) ) {
00117         cerr << "Could not open: " << sArgs.database_arg << endl;
00118         return 1; }
00119 // Open Bayes networks
00120     if( sArgs.minimal_in_flag ) {
00121         cerr << "Loading minimal Bayesian classifiers..." << endl;
00122         ifsm.clear( );
00123         ifsm.open( sArgs.networks_arg, ios_base::binary );
00124         if( !BNDefault.Open( ifsm ) ) {
00125             cerr << "Could not read: " << sArgs.networks_arg << endl;
00126             return 1; }
00127         ifsm.read( (char*)&iSize, sizeof(iSize) );
00128         vecBNs.resize( iSize );
00129         for( i = 0; i < vecBNs.size( ); ++i )
00130             if( !vecBNs[ i ].Open( ifsm ) ) {
00131                 cerr << "Could not read: " << sArgs.networks_arg << " (" << i << ")" << endl;
00132                 return 1; }
00133         ifsm.close( ); }
00134     if( !sArgs.minimal_in_flag ) {
00135         if( !sArgs.default_arg ) {
00136             cmdline_parser_print_help( );
00137             return 1; }
00138         if( !( BNSmile.Open( sArgs.default_arg ) && BNDefault.Open( BNSmile ) ) ) {
00139             cerr << "Could not open: " << sArgs.default_arg << endl;
00140             return 1; }
00141         BNDefault.SetID( sArgs.default_arg ); }
00142 // Open context IDs and p-value parameters
00143     {
00144         CPCL    PCLContexts( false );
00145 
00146         cerr << "Loading contexts..." << endl;
00147         if( !PCLContexts.Open( sArgs.input_arg, c_iParamOffset - 1 ) ) {
00148             cerr << "Could not open: " << ( sArgs.input_arg ? sArgs.input_arg : "standard input" ) << endl;
00149             return 1; }
00150         MatParameters.Initialize( PCLContexts.GetGenes( ) + 1, PCLContexts.GetExperiments( ) );
00151         for( i = 0; i < PCLContexts.GetGenes( ); ++i )
00152             MatParameters.Set( i + 1, PCLContexts.Get( i ) );
00153 
00154         if( !sArgs.minimal_in_flag ) {
00155             vecBNs.resize( PCLContexts.GetGenes( ) );
00156             for( i = 0; i < vecBNs.size( ); ++i ) {
00157                 const string&   strContext  = PCLContexts.GetFeature( i, c_iContextOffset );
00158 
00159                 if( !( BNSmile.Open( ( (string)sArgs.networks_arg + '/' + CMeta::Filename( strContext ) +
00160                     ( sArgs.xdsl_flag ? c_szXDSL : c_szDSL ) ).c_str( ) ) &&
00161                     vecBNs[ i ].Open( BNSmile ) ) ) {
00162                     cerr << "Could not open: " << strContext << endl;
00163                     return 1; }
00164                 vecBNs[ i ].SetID( strContext ); } }
00165     }
00166 // Open global p-value parameters
00167     if( sArgs.global_given )
00168     {
00169         CPCL    PCLParams( false );
00170 
00171         cerr << "Loading global p-value parameters..." << endl;
00172         if( !PCLParams.Open( sArgs.global_arg, 0 ) ) {
00173             cerr << "Could not open: " << sArgs.global_arg << endl;
00174             return 1; }
00175         MatParameters.Set( 0, 0, (float)atof( PCLParams.GetGene( 0 ).c_str( ) ) );
00176         for( i = 0; i < PCLParams.GetExperiments( ); ++i )
00177             MatParameters.Set( 0, i + 1, PCLParams.Get( 0, i ) );
00178     }
00179 // Save minimal Bayes nets
00180     if( sArgs.minimal_out_arg ) {
00181         cerr << "Saving minimal Bayesian classifiers..." << endl;
00182         ofsm.open( sArgs.minimal_out_arg, ios_base::binary );
00183         BNDefault.Save( ofsm );
00184         iSize = (uint32_t)vecBNs.size( );
00185         ofsm.write( (const char*)&iSize, sizeof(iSize) );
00186         for( i = 0; i < vecBNs.size( ); ++i )
00187             vecBNs[ i ].Save( ofsm );
00188         ofsm.close( ); }
00189 // Open context<->gene mapping
00190     if( sArgs.contexts_given )
00191     {
00192         CPCL    PCLContextGenes( false );
00193 
00194         cerr << "Loading context contents..." << endl;
00195         if( !PCLContextGenes.Open( sArgs.contexts_arg, 1 ) ) {
00196             cerr << "Could not open: " << sArgs.contexts_arg << endl;
00197             return 1; }
00198         vecveciContexts.resize( vecBNs.size( ) );
00199         for( i = 0; i < PCLContextGenes.GetGenes( ); ++i ) {
00200             j = atoi( PCLContextGenes.GetGene( i ).c_str( ) ) - 1;
00201             k = atoi( PCLContextGenes.GetFeature( i, 1 ).c_str( ) ) - 1;
00202             if( j >= vecveciContexts.size( ) ) {
00203                 cerr << "Unknown context: " << j << " (" << PCLContextGenes.GetGene( i ) << ')' << endl;
00204                 return 1; }
00205             if( k >= Database.GetGenes( ) ) {
00206                 cerr << "Unknown gene: " << k << " (" << PCLContextGenes.GetFeature( i, 1 ) << ')' << endl;
00207                 return 1; }
00208             setiContexts.insert( k );
00209             vecveciContexts[ j ].push_back( k ); }
00210         veciContexts.resize( setiContexts.size( ) );
00211         copy( setiContexts.begin( ), setiContexts.end( ), veciContexts.begin( ) );
00212     }
00213 // Open disease<->gene mapping
00214     if( sArgs.diseases_given )
00215     {
00216         CPCL        PCLDiseaseGenes( false );
00217 
00218         cerr << "Loading disease contents..." << endl;
00219         if( !PCLDiseaseGenes.Open( sArgs.diseases_arg, 1 ) ) {
00220             cerr << "Could not open: " << sArgs.diseases_arg << endl;
00221             return 1; }
00222         for( i = j = 0; i < PCLDiseaseGenes.GetGenes( ); ++i )
00223             if( ( k = atoi( PCLDiseaseGenes.GetGene( i ).c_str( ) ) ) > j )
00224                 j = k;
00225         vecveciDiseases.resize( j );
00226         setiContexts.clear( );
00227         for( i = 0; i < PCLDiseaseGenes.GetGenes( ); ++i ) {
00228             j = atoi( PCLDiseaseGenes.GetGene( i ).c_str( ) ) - 1;
00229             k = atoi( PCLDiseaseGenes.GetFeature( i, 1 ).c_str( ) ) - 1;
00230             if( j >= vecveciContexts.size( ) ) {
00231                 cerr << "Unknown disease: " << j << " (" << PCLDiseaseGenes.GetGene( i ) << ')' << endl;
00232                 return 1; }
00233             if( k >= Database.GetGenes( ) ) {
00234                 cerr << "Unknown gene: " << k << " (" << PCLDiseaseGenes.GetFeature( i, 1 ) << ')' << endl;
00235                 return 1; }
00236             setiContexts.insert( k );
00237             vecveciDiseases[ j ].push_back( k ); }
00238         veciDiseases.resize( setiContexts.size( ) );
00239         copy( setiContexts.begin( ), setiContexts.end( ), veciDiseases.begin( ) );
00240     }
00241 // Open gene backgrounds
00242     cerr << "Loading gene backgrounds..." << endl;
00243     if( sArgs.backgrounds_arg && !MatBackgrounds.Open( sArgs.backgrounds_arg ) ) {
00244         cerr << "Could not open: " << sArgs.backgrounds_arg << endl;
00245         return 1; }
00246 // Initialize context priors
00247     vecdPriors.resize( MatBackgrounds.GetRows( ) );
00248     fill( vecdPriors.begin( ), vecdPriors.end( ), 0.0f );
00249     for( i = 0; i < vecdPriors.size( ); ++i ) {
00250         for( j = 0; j < MatBackgrounds.GetColumns( ); ++j )
00251             vecdPriors[ i ] += MatBackgrounds.Get( i, j );
00252         vecdPriors[ i ] /= MatBackgrounds.GetColumns( ); }
00253 // Open context withins
00254     if( sArgs.within_c_arg ) {
00255         cerr << "Loading within context scores..." << endl;
00256         if( !MatWithinC.Open( sArgs.within_c_arg ) ) {
00257             cerr << "Could not open: " << sArgs.within_c_arg << endl;
00258             return 1; } }
00259 // Open disease withins
00260     if( sArgs.within_d_arg ) {
00261         cerr << "Loading within disease scores..." << endl;
00262         if( !MatWithinD.Open( sArgs.within_d_arg ) ) {
00263             cerr << "Could not open: " << sArgs.within_d_arg << endl;
00264             return 1; } }
00265 // Open context betweens
00266     if( sArgs.between_cc_arg ) {
00267         cerr << "Loading between context scores..." << endl;
00268         if( !MatBetweenCC.Open( sArgs.between_cc_arg ) ) {
00269             cerr << "Could not open: " << sArgs.between_cc_arg << endl;
00270             return 1; } }
00271 // Open disease betweens
00272     if( sArgs.between_dd_arg ) {
00273         cerr << "Loading between disease scores..." << endl;
00274         if( !MatBetweenDD.Open( sArgs.between_dd_arg ) ) {
00275             cerr << "Could not open: " << sArgs.between_dd_arg << endl;
00276             return 1; } }
00277 // Open context-disease betweens
00278     if( sArgs.between_dc_arg ) {
00279         cerr << "Loading between disease-context scores..." << endl;
00280         if( !MatBetweenDC.Open( sArgs.between_dc_arg ) ) {
00281             cerr << "Could not open: " << sArgs.between_dc_arg << endl;
00282             return 1; } }
00283 
00284     SBNServerData   sData(
00285         apOntologies,
00286         BNDefault,
00287         Database,
00288         Genome,
00289         sArgs.limit_arg,
00290         MatBackgrounds,
00291         MatBetweenCC,
00292         MatBetweenDC,
00293         MatBetweenDD,
00294         MatParameters,
00295         MatWithinC,
00296         MatWithinD,
00297         sArgs.files_arg,
00298         sArgs.graphviz_arg,
00299         vecBNs,
00300         vecdPriors,
00301         veciContexts,
00302         veciDiseases,
00303         vecveciContexts,
00304         vecveciDiseases
00305         );
00306     CBNServer   BNServer( 0, "", sData );
00307     if( sArgs.networklets_flag )
00308         return ( BNServer.GenerateNetworkIcons( ) ? 0 : 1 );
00309     if( sArgs.assoc_diseases_arg )
00310         return ( BNServer.GenerateAssociations( sArgs.assoc_diseases_arg, sArgs.assoc_context_arg ) ? 0 : 1 );
00311 
00312     Server.Initialize( sArgs.port_arg, sArgs.timeout_arg, &BNServer );
00313 #ifdef WIN32
00314     pthread_win32_process_attach_np( );
00315 #endif // WIN32
00316     Server.Start( );
00317 #ifdef WIN32
00318     pthread_win32_process_detach_np( );
00319 #endif // WIN32
00320 
00321     return 0; }
00322 
00323 bool CBNServer::Get( size_t iGene, size_t iContext, float* adValues, const CDatabase& Database,
00324     const vector<CBayesNetMinimal>& vecBNs, const CBayesNetMinimal& BNDefault ) {
00325     const CBayesNetMinimal& BNet        = ( iContext && vecBNs.size( ) ) ?
00326                                             vecBNs[ ( iContext - 1 ) % vecBNs.size( ) ] : BNDefault;
00327     vector<unsigned char>   vecbData;
00328 
00329     if( !( Database.Get( iGene, vecbData ) && BNet.Evaluate( vecbData, adValues, Database.GetGenes( ) ) ) )
00330         return false;
00331     adValues[ iGene % Database.GetGenes( ) ] = CMeta::GetNaN( );
00332 
00333     return true; }
00334 
00335 CBNServer::CBNServer( SOCKET iSocket, const string& strConnection, const SBNServerData& sData ) :
00336     m_iSocket(iSocket), m_strConnection(strConnection), m_sData(sData), m_adContexts(NULL),
00337     m_adDiseases(NULL), m_adGenes(NULL) {
00338 
00339     if( m_strConnection.length( ) > 0 )
00340         cerr << "New connection from: " << m_strConnection << endl; }
00341 
00342 CBNServer::~CBNServer( ) {
00343 
00344     if( m_adGenes )
00345         delete[] m_adGenes;
00346     if( m_adContexts )
00347         delete[] m_adContexts;
00348     if( m_adDiseases )
00349         delete[] m_adDiseases; }
00350 
00351 IServerClient* CBNServer::NewInstance( SOCKET iSocket, uint32_t iHost, uint16_t sPort ) {
00352     string  strConnection;
00353     char    acBuffer[ 16 ];
00354     in_addr sAddr;
00355 
00356 #pragma warning(disable : 4996)
00357     sprintf( acBuffer, "%hu", sPort );
00358 #pragma warning(default : 4996)
00359     sAddr.s_addr = htonl( iHost );
00360     strConnection = (string)inet_ntoa( sAddr ) + ":" + acBuffer;
00361     return new CBNServer( iSocket, strConnection, m_sData ); }
00362 
00363 void CBNServer::Destroy( ) {
00364 
00365     cerr << "Disconnected: " << m_strConnection << endl;
00366 
00367     delete this; }
00368 
00369 bool CBNServer::ProcessMessage( const vector<unsigned char>& vecbMessage ) {
00370     size_t  i, iProcessed, iOffset;
00371 
00372     for( iOffset = 0; iOffset < vecbMessage.size( ); iOffset += ( iProcessed + 1 ) ) {
00373         cerr << "LOG    " << time( NULL ) << '\t' << m_strConnection << endl; //'\t' << hex;
00374         //for( i = 0; i < vecbMessage.size( ); ++i )
00375         //  cerr << setfill( '0' ) << setw( 2 ) << (unsigned int)vecbMessage[ i ];
00376         //cerr << dec << endl;
00377         if( vecbMessage[ iOffset ] >= ARRAYSIZE(c_apfnProcessors) ) {
00378             cerr << m_strConnection << " unknown opcode: " << (int)vecbMessage[ iOffset ] << endl;
00379             return false; }
00380         else {
00381             cerr << m_strConnection << " opcode: " << (int)vecbMessage[ iOffset ] << endl;
00382         }
00383         if( ( iProcessed = (this->*c_apfnProcessors[ vecbMessage[ iOffset ] ])( vecbMessage,
00384             iOffset + 1 ) ) == -1 )
00385             return false; }
00386 
00387     return true; }
00388     
00389 size_t CBNServer::ProcessInference( const vector<unsigned char>& vecbMessage, size_t iOffset ) {
00390     size_t      iStart;
00391     uint32_t    iGene, iContext;
00392 
00393     if( ( iOffset + sizeof(iContext) ) > vecbMessage.size( ) )
00394         return -1;
00395     iStart = iOffset;
00396     iContext = *(uint32_t*)&vecbMessage[ iOffset ];
00397     for( iOffset += sizeof(iContext); ( iOffset + sizeof(iGene) ) <= vecbMessage.size( );
00398         iOffset += sizeof(iGene) )
00399         if( !( ( iGene = *(uint32_t*)&vecbMessage[ iOffset ] ) && Get( iGene - 1, iContext ) ) )
00400             return -1;
00401 
00402     return ( iOffset - iStart ); }
00403 
00404 
00405 size_t CBNServer::ProcessCPT( const vector<unsigned char>& vecbMessage, size_t iOffset, vector<vector<float> >& binEffects ) {
00406     size_t      i, j, iStart;
00407     uint32_t    iGene, iContext, iDataCount, iBinsCount, iData, iGenes, iChunk, iSize, iGeneOffset;
00408     float   bineffect;
00409     vector<unsigned char> vecbData;
00410 
00411     cerr << "Processing CPT" << endl;
00412 
00413     binEffects.resize(GetDatabase().GetDatasets());
00414 
00415     iStart = iOffset;
00416     iDataCount = *(uint32_t*)&vecbMessage[ iOffset ];
00417     cerr << "Total datasets: " << iDataCount << endl;
00418 
00419     // Load bin effects from message
00420     for ( i = 0, iOffset += sizeof(iDataCount); i < iDataCount && iOffset < vecbMessage.size(); i++ ) {
00421         iData = *(uint32_t*)&vecbMessage[ iOffset ];
00422         iOffset += sizeof(iData);
00423 
00424         if ( iData >= binEffects.size() ) {
00425         cerr << "Bad dataset ID: " << iData << endl;
00426         return -1;
00427         }
00428 
00429         iBinsCount = *(uint32_t*)&vecbMessage[ iOffset ];
00430         iOffset += sizeof(iBinsCount);
00431         binEffects[iData].resize(iBinsCount);
00432 
00433         for ( j = 0; j < iBinsCount; j++, iOffset += sizeof(float) ) {
00434         bineffect = *(float*)&vecbMessage[ iOffset ];
00435         binEffects[iData][j] = bineffect;
00436         }
00437     }
00438     return iOffset;
00439 }
00440 
00441 
00442 size_t CBNServer::ProcessInferenceOTF( const vector<unsigned char>& vecbMessage, size_t iOffset ) {
00443     size_t      i, j, iStart;
00444     uint32_t    iGene, iContext, iData, iGenes, iChunk, iSize, iGeneOffset;
00445     float   bineffect;
00446     vector<vector<float> > binEffects;
00447     vector<unsigned char> vecbData;
00448     float* fVals;
00449 
00450     cerr << "Inference OTF: " << vecbMessage.size() << endl;
00451 
00452     iOffset = ProcessCPT( vecbMessage, iOffset, binEffects );
00453 
00454     iGenes = GetDatabase().GetGenes();
00455     iChunk = ( GetDatabase().GetDatasets() + 1 ) / 2;
00456 
00457     // Load gene id
00458     for ( ; iOffset < vecbMessage.size() ; iOffset += sizeof(uint32_t) ) {
00459         iGene = *(uint32_t*)&vecbMessage[ iOffset ];
00460         cerr << "Gene id: " << iGene << endl;
00461 
00462         // Arg -- BNServer genes are 1-indexed
00463         if( !iGene )
00464         continue;
00465         iGene--;
00466     
00467         // Infer posteriors
00468         vecbData.clear();
00469         GetDatabase().Get( iGene, vecbData );
00470         fVals = new float[iGenes];
00471 
00472         for ( i = 0, iGeneOffset = 0; i < iGenes; i++, iGeneOffset += iChunk ) {
00473         fVals[i] = Evaluate( binEffects, vecbData, iGeneOffset ); 
00474         }
00475 
00476         // Send results back
00477         iSize = (uint32_t)( GetGenes( ) * sizeof(*fVals) );
00478         send( m_iSocket, (char*)&iSize, sizeof(iSize), 0 );
00479         send( m_iSocket, (char*)fVals, iSize, 0 ); 
00480     }
00481 
00482     return ( iOffset - iStart ); }
00483 
00484 
00485 size_t CBNServer::ProcessEdges( const vector<unsigned char>& vecbMessage, size_t iOffset ) {
00486     size_t      i, j, iStart;
00487     uint32_t    iGeneOne, iGeneTwo, iGenes, iChunk, iSize, iGeneOffset, iEdges;
00488     float   bineffect;
00489     vector<vector<float> > binEffects;
00490     float* fVals;
00491     vector<unsigned char> vecbData;
00492     unsigned char c;
00493     float fLogOdds = 0;
00494 
00495     iStart = iOffset;
00496 
00497     iOffset = ProcessCPT( vecbMessage, iOffset, binEffects );
00498 
00499     iEdges = *(uint32_t*)&vecbMessage[ iOffset ];
00500     fVals = new float[iEdges];
00501 
00502     cerr << "Edges: " << iEdges << endl;
00503 
00504     // Load gene id pairs
00505     for ( j = 0, iOffset += sizeof(iEdges); iOffset < vecbMessage.size() && j < iEdges ; j++ ) {
00506         iGeneOne = *(uint32_t*)&vecbMessage[ iOffset ];
00507         iOffset += sizeof(iGeneOne);
00508         iGeneTwo = *(uint32_t*)&vecbMessage[ iOffset ];
00509         iOffset += sizeof(iGeneTwo);
00510         //cerr << "Genes: " << iGeneOne << "\t" << iGeneTwo << endl;
00511 
00512         iGeneOne -= 1;
00513         iGeneTwo -= 1;
00514         if ( iGeneOne < 0 or iGeneTwo < 0 )
00515         continue;
00516 
00517         fLogOdds = 0;
00518         vecbData.clear();
00519         GetDatabase().Get(iGeneOne, iGeneTwo, vecbData);
00520         for( i = 0; i < GetDatabase().GetDatasets(); ++i ) {
00521         if ( binEffects[i].size() == 0 ) // Skip dataset
00522             continue;
00523         c = vecbData[ ( i / 2 ) ];
00524         if( i % 2 )
00525             c >>= 4;
00526         if( ( c &= 0xF ) == 0xF ) // Skip missing data
00527             continue;
00528         //if ( iGeneTwo == 1  || iGeneTwo == 0 )
00529         //    cerr << i << "\t" << (size_t)c << endl;
00530         fLogOdds += binEffects[i][(size_t)c];
00531         }
00532         fVals[j] = fLogOdds;
00533     }
00534 
00535     // Send results back
00536     iSize = (uint32_t)( iEdges * sizeof(float) );
00537     send( m_iSocket, (char*)&iSize, sizeof(iSize), 0 );
00538     send( m_iSocket, (char*)fVals, iSize, 0 ); 
00539 
00540     return ( iOffset - iStart );
00541 }
00542 
00543 float CBNServer::Evaluate( const vector<vector<float> >& binEffects, vector<unsigned char>& vecbDatum, size_t iOffset ) {
00544 
00545     unsigned char   c;
00546     size_t  i;
00547     float fLogOdds = 0;
00548 
00549 
00550     for( i = 0; i < GetDatabase().GetDatasets(); ++i ) {
00551         if ( binEffects[i].size() == 0 ) // Skip dataset
00552             continue;
00553 
00554         c = vecbDatum[ ( i / 2 ) + iOffset ];
00555         if( i % 2 )
00556             c >>= 4;
00557         if( ( c &= 0xF ) == 0xF ) // Skip missing data
00558             continue;
00559         //if ( debug ) 
00560         //  cerr << i << "\t" << (size_t)c << endl;
00561 
00562         fLogOdds += binEffects[i][(size_t)c];
00563     }
00564 
00565     return fLogOdds;
00566 
00567 }
00568 
00569 bool CBNServer::Get( size_t iGene, size_t iContext, float* adValues ) {
00570     float*      adTarget;
00571             uint32_t    iSize;
00572 
00573     cerr << m_strConnection << " inferring " << iGene  << " (" << GetGene( iGene ) << ") in " <<
00574         iContext << endl;
00575     if( !( adTarget = adValues ) ) {
00576         InitializeGenes( );
00577         adTarget = m_adGenes; }
00578     if( !CBNServer::Get( iGene, iContext, adTarget, GetDatabase( ), m_sData.m_vecBNs, m_sData.m_BNDefault ) )
00579         return false;
00580     if( !adValues ) {
00581         iSize = (uint32_t)( GetGenes( ) * sizeof(*m_adGenes) );
00582         send( m_iSocket, (char*)&iSize, sizeof(iSize), 0 );
00583         send( m_iSocket, (char*)m_adGenes, iSize, 0 ); }
00584 
00585     return true; }
00586 
00587 bool CBNServer::Get( size_t iGene, const vector<size_t>& veciGenes, size_t iContext, float* adValues ) {
00588     const CBayesNetMinimal& BNet    = GetBN( iContext );
00589     vector<unsigned char>   vecbData;
00590 
00591     return ( GetDatabase( ).Get( iGene, veciGenes, vecbData ) &&
00592         BNet.Evaluate( vecbData, adValues, veciGenes.size( ) ) ); }
00593 
00594 size_t CBNServer::ProcessData( const vector<unsigned char>& vecbMessage, size_t iOffset ) {
00595     uint32_t                iSize, iOne, iTwo;
00596     vector<unsigned char>   vecbData, vecbSend;
00597     size_t                  i;
00598     unsigned char           b, bValue;
00599 
00600     iSize = sizeof(iOne) + sizeof(iTwo);
00601     if( ( iOffset + iSize ) > vecbMessage.size( ) )
00602         return -1;
00603     iOne = *(uint32_t*)&vecbMessage[ iOffset ];
00604     iTwo = *(uint32_t*)&vecbMessage[ iOffset + sizeof(iOne) ];
00605     if( !( iOne-- && iTwo-- ) )
00606         return -1;
00607     cerr << m_strConnection << " requested " << GetGene( iOne ) << ',' << GetGene( iTwo ) << endl;
00608     if( !GetDatabase( ).Get( iOne, iTwo, vecbData ) )
00609         return -1;
00610 
00611     vecbSend.resize( vecbData.size( ) * 2 );
00612     for( i = 0; i < vecbData.size( ); ++i ) {
00613         b = vecbData[ i ];
00614         if( ( bValue = ( b & 0xF ) ) == 0xF )
00615             bValue = -1;
00616         vecbSend[ 2 * i ] = bValue;
00617         if( ( bValue = ( ( b >> 4 ) & 0xF ) ) == 0xF )
00618             bValue = -1;
00619         vecbSend[ ( 2 * i ) + 1 ] = bValue; }
00620     iOne = (uint32_t)vecbSend.size( );
00621     send( m_iSocket, (char*)&iOne, sizeof(iOne), 0 );
00622     send( m_iSocket, (char*)&vecbSend[ 0 ], (int)vecbSend.size( ), 0 );
00623 
00624     return iSize; }
00625 
00626 size_t CBNServer::ProcessGraph( const vector<unsigned char>& vecbMessage, size_t iOffset ) {
00627     size_t                      iRet, i, iSize;
00628     float                       dEdgeAggressiveness;
00629     uint32_t                    iGene, iContext, iLimit;
00630     vector<size_t>              veciQuery, veciNeighbors;
00631     set<size_t>                 setiQuery;
00632     CDat                        DatGraph;
00633     vector<bool>                vecfQuery;
00634     set<size_t>::const_iterator iterQuery;
00635     unsigned char               bFile;
00636 
00637     iSize = sizeof(bFile) + sizeof(iContext) + sizeof(iLimit) + sizeof(dEdgeAggressiveness);
00638     if( ( iOffset + iSize ) > vecbMessage.size( ) )
00639         return -1;
00640     bFile = vecbMessage[ iOffset ];
00641     iContext = *(uint32_t*)&vecbMessage[ iOffset + sizeof(bFile) ];
00642     iLimit = *(uint32_t*)&vecbMessage[ iOffset + sizeof(bFile) + sizeof(iContext) ];
00643     dEdgeAggressiveness = *(float*)&vecbMessage[ iOffset + sizeof(bFile) + sizeof(iContext) + sizeof(iLimit) ];
00644     for( i = iOffset + iSize; ( i + sizeof(iGene) ) <= vecbMessage.size( ); i += sizeof(iGene) ) {
00645         iGene = *(uint32_t*)&vecbMessage[ i ];
00646         if( !iGene-- )
00647             return -1;
00648         setiQuery.insert( iGene ); }
00649     iRet = i - iOffset;
00650 
00651     veciQuery.reserve( setiQuery.size( ) );
00652     for( iterQuery = setiQuery.begin( ); iterQuery != setiQuery.end( ); ++iterQuery )
00653         veciQuery.push_back( *iterQuery );
00654     if( !( GraphCreate( veciQuery, iContext, iLimit, dEdgeAggressiveness, vecfQuery, veciNeighbors,
00655         DatGraph ) && GraphWrite( DatGraph, veciQuery, veciNeighbors, vecfQuery, iContext,
00656         bFile ? EGraphOutputFile : EGraphOutputSocket ) ) )
00657         return -1;
00658 
00659     return iRet; }
00660 
00661 bool CBNServer::SelectNeighborsPixie( const vector<size_t>& veciQuery, const vector<bool>& vecfQuery,
00662     size_t iContext, size_t iLimit, const CDataMatrix& MatQuery, vector<size_t>& veciNeighbors ) const {
00663     vector<float>           vecdNeighbors;
00664     priority_queue<SPixie>  pqueNeighbors;
00665     float                   d, dAve, dStd;
00666     size_t                  i, j, iN;
00667 
00668     cerr << m_strConnection << " PIXIE query " << iContext << ':';
00669     for( i = 0; i < veciQuery.size( ); ++i )
00670         cerr << ' ' << GetGene( veciQuery[ i ] );
00671     cerr << endl;
00672 
00673     vecdNeighbors.resize( GetGenes( ) );
00674     fill( vecdNeighbors.begin( ), vecdNeighbors.end( ), 0.0f );
00675     dAve = dStd = 0;
00676     for( iN = i = 0; i < veciQuery.size( ); ++i )
00677         for( j = 0; j < GetGenes( ); ++j )
00678             if( !CMeta::IsNaN( d = MatQuery.Get( i, j ) ) ) {
00679                 iN++;
00680                 dAve += d;
00681                 dStd += d * d;
00682                 if( !vecfQuery[ j ] )
00683                     vecdNeighbors[ j ] += d; }
00684     for( i = 0; i < vecdNeighbors.size( ); ++i )
00685         if( ( d = vecdNeighbors[ i ] ) > 0 )
00686             pqueNeighbors.push( SPixie( i, d ) );
00687     while( !pqueNeighbors.empty( ) && ( ( veciQuery.size( ) + veciNeighbors.size( ) ) < iLimit ) ) {
00688         veciNeighbors.push_back( pqueNeighbors.top( ).m_iNode );
00689         pqueNeighbors.pop( ); }
00690 
00691     return true; }
00692 
00693 bool CBNServer::SelectNeighborsRatio( const vector<size_t>& veciQuery, const vector<bool>& vecfQuery,
00694     size_t iContext, size_t iLimit, const CDataMatrix& MatQuery, vector<size_t>& veciNeighbors ) const {
00695     size_t                  i, j;
00696     priority_queue<SPixie>  pqueNeighbors;
00697     float                   d;
00698     vector<float>           vecdValues, vecdBackground, vecdCur;
00699 
00700     cerr << m_strConnection << " RATIO query " << iContext << " (" << ( iContext ?
00701         GetBN( iContext ).GetID( ) : "total" ) << "):";
00702     for( i = 0; i < veciQuery.size( ); ++i )
00703         cerr << ' ' << GetGene( veciQuery[ i ] );
00704     cerr << endl;
00705 
00706     for( i = 0; i < veciQuery.size( ); ++i )
00707         if( !CMeta::IsNaN( d = GetBackground( iContext, veciQuery[ i ] ) ) )
00708             vecdBackground.push_back( d );
00709     vecdCur.resize( vecdBackground.size( ) + 1 );
00710     for( i = 0; i < MatQuery.GetColumns( ); ++i ) {
00711         if( vecfQuery[ i ] )
00712             continue;
00713         vecdValues.clear( );
00714         vecdValues.reserve( MatQuery.GetRows( ) );
00715         for( j = 0; j < MatQuery.GetRows( ); ++j )
00716             if( !CMeta::IsNaN( d = MatQuery.Get( j, i ) ) )
00717                 vecdValues.push_back( d );
00718         if( !vecdValues.empty( ) ) {
00719             Winsorize( vecdValues );
00720             copy( vecdBackground.begin( ), vecdBackground.end( ), vecdCur.begin( ) );
00721             vecdCur[ vecdBackground.size( ) ] = GetBackground( iContext, i );
00722             Winsorize( vecdCur );
00723             pqueNeighbors.push( SPixie( i, (float)( CStatistics::Average( vecdValues ) /
00724                 CStatistics::Average( vecdCur ) ) ) ); } }
00725     while( !pqueNeighbors.empty( ) && ( ( veciQuery.size( ) + veciNeighbors.size( ) ) < iLimit ) ) {
00726         veciNeighbors.push_back( pqueNeighbors.top( ).m_iNode );
00727         pqueNeighbors.pop( ); }
00728 
00729     return true; }
00730 
00731 bool CBNServer::GraphCreate( const vector<size_t>& veciQuery, size_t iContext, size_t iLimit,
00732     float dEdgeAggressiveness, vector<bool>& vecfQuery, vector<size_t>& veciNeighbors, CDat& DatGraph ) const {
00733     vector<float>           vecdNeighbors, vecdEdges;
00734     float                   d, dAve, dStd, dMin, dCutoff;
00735     size_t                  i, j, iMinOne, iMinTwo;
00736     vector<size_t>          veciDegree;
00737     bool                    fDone;
00738     CDataMatrix             MatQuery, MatNeighbors;
00739     vector<string>          vecstrGenes;
00740 
00741     MatQuery.Initialize( veciQuery.size( ), GetGenes( ) );
00742     for( i = 0; i < veciQuery.size( ); ++i )
00743         if( !((CBNServer*)this)->Get( veciQuery[ i ], iContext, MatQuery.Get( i ) ) )
00744             return false;
00745     vecfQuery.resize( GetGenes( ) );
00746     fill( vecfQuery.begin( ), vecfQuery.end( ), false );
00747     for( i = 0; i < veciQuery.size( ); ++i )
00748         vecfQuery[ veciQuery[ i ] ] = true;
00749     veciNeighbors.clear( );
00750     fDone = SelectNeighborsRatio( veciQuery, vecfQuery, iContext, iLimit, MatQuery, veciNeighbors );
00751 //      SelectNeighborsPixie( veciQuery, vecfQuery, iContext, iLimit, MatQuery, veciNeighbors );
00752     if( !fDone )
00753         return false;
00754 
00755     vecstrGenes.resize( veciQuery.size( ) + veciNeighbors.size( ) );
00756     vecfQuery.resize( vecstrGenes.size( ) );
00757     fill( vecfQuery.begin( ), vecfQuery.end( ), false );
00758     for( i = 0; i < veciQuery.size( ); ++i ) {
00759         vecstrGenes[ i ] = GetGene( veciQuery[ i ] );
00760         vecfQuery[ i ] = true; }
00761     for( i = 0; i < veciNeighbors.size( ); ++i )
00762         vecstrGenes[ veciQuery.size( ) + i ] = GetGene( veciNeighbors[ i ] );
00763     MatNeighbors.Initialize( veciNeighbors.size( ), veciNeighbors.size( ) );
00764     for( i = 0; i < veciNeighbors.size( ); ++i )
00765         if( !((CBNServer*)this)->Get( veciNeighbors[ i ], veciNeighbors, iContext,
00766             MatNeighbors.Get( i ) ) )
00767             return false;
00768     DatGraph.Open( vecstrGenes );
00769     for( i = 0; i < veciQuery.size( ); ++i ) {
00770         for( j = ( i + 1 ); j < veciQuery.size( ); ++j )
00771             DatGraph.Set( i, j, MatQuery.Get( i, veciQuery[ j ] % MatQuery.GetColumns( ) ) );
00772         for( j = 0; j < veciNeighbors.size( ); ++j )
00773             DatGraph.Set( i, veciQuery.size( ) + j, MatQuery.Get( i, veciNeighbors[ j ] ) ); }
00774 
00775     vecdEdges.reserve( veciNeighbors.size( ) * ( veciNeighbors.size( ) - 1 ) / 2 );
00776     for( i = 0; i < veciNeighbors.size( ); ++i ) {
00777         const float*    adOne   = MatNeighbors.Get( i );
00778         size_t          iOne    = veciQuery.size( ) + i;
00779 
00780         for( j = ( i + 1 ); j < veciNeighbors.size( ); ++j )
00781             if( !CMeta::IsNaN( d = adOne[ j ] ) ) {
00782                 vecdEdges.push_back( d );
00783                 DatGraph.Set( iOne, veciQuery.size( ) + j, adOne[ j ] ); } }
00784 
00785     Winsorize( vecdEdges );
00786     dAve = (float)CStatistics::Average( vecdEdges );
00787     dStd = (float)sqrt( CStatistics::Variance( vecdEdges, dAve ) );
00788     dCutoff = (float)( dAve + ( dEdgeAggressiveness * dStd ) );
00789     veciDegree.resize( DatGraph.GetGenes( ) );
00790     fill( veciDegree.begin( ), veciDegree.end( ), veciDegree.size( ) - 1 );
00791     for( fDone = false; !fDone; ) {
00792         fDone = true;
00793         dMin = FLT_MAX;
00794         for( i = 0; i < DatGraph.GetGenes( ); ++i )
00795             for( j = ( i + 1 ); j < DatGraph.GetGenes( ); ++j )
00796                 if( !CMeta::IsNaN( d = DatGraph.Get( i, j ) ) && ( d < dCutoff ) && ( d < dMin ) &&
00797                     ( veciDegree[ i ] > c_iDegree ) && ( veciDegree[ j ] > c_iDegree ) ) {
00798                     fDone = false;
00799                     dMin = d;
00800                     iMinOne = i;
00801                     iMinTwo = j; }
00802         if( !fDone ) {
00803             veciDegree[ iMinOne ]--;
00804             veciDegree[ iMinTwo ]--;
00805             DatGraph.Set( iMinOne, iMinTwo, CMeta::GetNaN( ) ); } }
00806 
00807     return true; }
00808 
00809 bool CBNServer::GraphWrite( const CDat& DatGraph, const vector<size_t>& veciQuery,
00810     const vector<size_t>& veciNeighbors, const vector<bool>& vecfQuery, size_t iContext,
00811     EGraphOutput eOutput ) const {
00812     static const size_t c_iBuffer   = 1024;
00813     char        acBuffer[ c_iBuffer ];
00814     string      strCmd, strDotIn, strDotOut, strSvg;
00815     ofstream    ofsm;
00816     CDot        DotOut( DatGraph );
00817     CGenome     Genome;
00818     uint32_t    iSize;
00819 
00820     if( eOutput == EGraphOutputNamed )
00821         strDotIn = GetFiles( ) + "/" + CMeta::Filename( GetGene( veciQuery[ 0 ] ) );
00822     else {
00823         sprintf_s( acBuffer, ( GetFiles( ) + "/inXXXXXX" ).c_str( ) );
00824         if( _mktemp_s( acBuffer ) )
00825             return false;
00826         strDotIn = acBuffer; }
00827     strDotIn += c_szDOT;
00828     ofsm.open( strDotIn.c_str( ) );
00829     if( !ofsm.is_open( ) )
00830         return false;
00831     Genome.Open( DatGraph.GetGeneNames( ) );
00832     DatGraph.SaveDOT( ofsm, CMeta::GetNaN( ), &Genome, false, false );
00833     ofsm.close( );
00834     if( eOutput == EGraphOutputNamed )
00835         return true;
00836 
00837     sprintf_s( acBuffer, ( GetFiles( ) + "/outXXXXXX" ).c_str( ) );
00838     if( _mktemp_s( acBuffer ) )
00839         return false;
00840     strDotOut = acBuffer;
00841     strDotOut += c_szDOT;
00842     strCmd = m_sData.m_strGraphviz + " -Tdot -o" + strDotOut + ' ' + strDotIn;
00843     system( strCmd.c_str( ) );
00844 // THE PAIN, IT BURNS!
00845 // The Boost DOT parser doesn't handle continuation lines.
00846 // sed doesn't handle newlines unless you beat it with a stick.
00847 // Backslashes have to be triple escaped to get from here to sed.
00848 // The combination makes me cry, but it works.
00849     system( ( "sed -c -i -n '1h;2,$H;${g;s/\\\\\\n//g;p}' " + strDotOut ).c_str( ) );
00850 
00851     if( !DotOut.Open( strDotOut.c_str( ) ) )
00852         return false;
00853     switch( eOutput ) {
00854         case EGraphOutputFile:
00855             sprintf_s( acBuffer, "svgXXXXXX" );
00856             if( _mktemp_s( acBuffer ) )
00857                 return false;
00858             strSvg = GetFiles( ) + '/' + acBuffer + c_szSVG;
00859             ofsm.clear( );
00860             ofsm.open( strSvg.c_str( ) );
00861             if( !( ofsm.is_open( ) && DotOut.Save( ofsm, vecfQuery, iContext ) ) )
00862                 return false;
00863             ofsm.close( );
00864 
00865             strSvg = acBuffer;
00866             strSvg += c_szSVG;
00867             iSize = (uint32_t)strSvg.length( ) + sizeof(iSize) + ( sizeof(iSize) * ( veciQuery.size( ) +
00868                 veciNeighbors.size( ) ) );
00869             send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 );
00870             SendGenes( veciQuery, veciNeighbors );
00871             send( m_iSocket, strSvg.c_str( ), (int)strSvg.length( ), 0 );
00872             break;
00873 
00874         case EGraphOutputSocket:
00875             stringstream    sssm;
00876 
00877             if( !DotOut.Save( sssm, vecfQuery, iContext ) )
00878                 return false;
00879 
00880             iSize = (uint32_t)( sssm.str( ).length( ) + sizeof(iSize) + ( sizeof(iSize) * ( veciQuery.size( ) +
00881                 veciNeighbors.size( ) ) ) );
00882             send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 );
00883             SendGenes( veciQuery, veciNeighbors );
00884             send( m_iSocket, sssm.str( ).c_str( ), (int)sssm.str( ).length( ), 0 );
00885             break; }
00886 
00887     return true; }
00888 
00889 bool CBNServer::SendGenes( const vector<size_t>& veciQuery, const vector<size_t>& veciNeighbors ) const {
00890     size_t      i;
00891     uint32_t    iSize;
00892     uint32_t*   aiGenes;
00893 
00894     iSize = (uint32_t)( veciQuery.size( ) + veciNeighbors.size( ) );
00895     send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 );
00896     aiGenes = new uint32_t[ iSize ];
00897     for( i = 0; i < veciQuery.size( ); ++i )
00898         aiGenes[ i ] = (uint32_t)veciQuery[ i ] + 1;
00899     for( i = 0; i < veciNeighbors.size( ); ++i )
00900         aiGenes[ veciQuery.size( ) + i ] = (uint32_t)veciNeighbors[ i ] + 1;
00901     send( m_iSocket, (const char*)aiGenes, iSize * sizeof(*aiGenes), 0 );
00902     delete[] aiGenes;
00903 
00904     return true; }
00905 
00906 size_t CBNServer::ProcessContexts( const vector<unsigned char>& vecbMessage, size_t iOffset ) {
00907     uint32_t                iGene;
00908     size_t                  iCount, iPlace, iContext;
00909     vector<unsigned char>   vecbData;
00910     float                   dBetween, dBackground, dWithin;
00911 
00912     iCount = InitializeContexts( );
00913     iGene = (uint32_t)( ( ( vecbMessage.size( ) - iOffset ) / sizeof(iGene) ) *
00914         ( iCount * sizeof(*m_adContexts) ) );
00915     send( m_iSocket, (const char*)&iGene, sizeof(iGene), 0 );
00916     for( iPlace = iOffset; ( iPlace + sizeof(iGene) ) <= vecbMessage.size( ); iPlace += sizeof(iGene) ) {
00917         iGene = *(uint32_t*)&vecbMessage[ iPlace ];
00918         if( !( iGene-- && GetDatabase( ).Get( iGene, vecbData, true ) ) )
00919             return -1;
00920         cerr << m_strConnection << " contexting " << iGene  << " (" << GetGene( iGene ) <<
00921             ")" << endl;
00922         for( iContext = 0; iContext < GetContexts( ); ++iContext ) {
00923             if( !GetAssociation( iGene, vecbData, GetContext( iContext ), iContext + 1, true,
00924                 &dBetween, &dBackground, &dWithin, NULL, NULL, GetWithinContext( iContext + 1, iContext ) ) )
00925                 return -1;
00926             SetArray( m_adContexts, GetContexts( ), iContext, GetPValue( dBetween, dBackground,
00927                 dWithin, iContext + 1, 1, GetContext( iContext ).size( ), GetContexts( ) ), dBetween,
00928                 dBackground, dWithin ); }
00929         send( m_iSocket, (const char*)m_adContexts, (int)( iCount * sizeof(*m_adContexts) ), 0 ); }
00930 
00931     return ( iPlace - iOffset ); }
00932 
00933 struct SSortFind {
00934     bool operator()( const STermFound& sOne, const STermFound& sTwo ) {
00935 
00936         return ( sOne.m_dP < sTwo.m_dP ); }
00937 };
00938 
00939 size_t CBNServer::ProcessTermFinder( const vector<unsigned char>& vecbMessage, size_t iOffset ) {
00940     uint32_t            iOntology, iGene, iSize;
00941     float               d, dP;
00942     size_t              i, j, iPlace;
00943     vector<string>      vecstrGenes;
00944     vector<STermFound>  vecsTerms;
00945     CGenes              Genes( GetGenome( ) );
00946     const IOntology*    pOnto;
00947     vector<size_t>      veciMapping;
00948 
00949     iSize = sizeof(iOntology) + sizeof(dP);
00950     if( ( iOffset + iSize ) > vecbMessage.size( ) )
00951         return -1;
00952     iOntology = *(uint32_t*)&vecbMessage[ iOffset ];
00953     dP = *(float*)&vecbMessage[ iOffset + sizeof(iOntology) ];
00954     for( iPlace = iOffset + iSize; ( iPlace + sizeof(iGene) ) <= vecbMessage.size( );
00955         iPlace += sizeof(iGene) ) {
00956         iGene = *(uint32_t*)&vecbMessage[ iPlace ];
00957         vecstrGenes.push_back( GetGene( iGene - 1 ) ); }
00958 
00959     if( !( pOnto = GetOntology( iOntology ) ) )
00960         return -1;
00961     cerr << m_strConnection << " TermFinder " << pOnto->GetID( ) << '@' << dP << ':';
00962     for( i = 0; i < vecstrGenes.size( ); ++i )
00963         cerr << ' ' << vecstrGenes[ i ];
00964     cerr << endl;
00965 
00966     Genes.Open( vecstrGenes, false );
00967     veciMapping.resize( Genes.GetGenes( ) );
00968     for( i = 0; i < Genes.GetGenes( ); ++i ) {
00969         const CGene&    Gene    = Genes.GetGene( i );
00970 
00971         veciMapping[ i ] = GetGene( Gene.GetName( ) );
00972         for( j = 0; ( j < Gene.GetSynonyms( ) ) && ( veciMapping[ i ] == -1 ); ++j )
00973             veciMapping[ i ] = GetGene( Gene.GetSynonym( j ) ); }
00974             
00975     pOnto->TermFinder( Genes, vecsTerms, true, true, false, dP );
00976     sort( vecsTerms.begin( ), vecsTerms.end( ), SSortFind( ) );
00977     iSize = sizeof(iSize);
00978     for( i = 0; i < vecsTerms.size( ); ++i ) {
00979         iSize += (uint32_t)( sizeof(float) + ( 5 * sizeof(uint32_t) ) + 2 +
00980             pOnto->GetGloss( vecsTerms[ i ].m_iID ).length( ) +
00981             pOnto->GetID( vecsTerms[ i ].m_iID ).length( ) );
00982         for( j = 0; j < Genes.GetGenes( ); ++j )
00983             if( pOnto->IsAnnotated( vecsTerms[ i ].m_iID, Genes.GetGene( j ) ) && ( veciMapping[ j ] != -1 ) )
00984                 iSize += sizeof(uint32_t); }
00985 
00986     send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 );
00987     iSize = (uint32_t)i;
00988     send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 );
00989     for( i = 0; ( i < vecsTerms.size( ) ) && ( vecsTerms[ i ].m_dP <= dP ); ++i ) {
00990         const string&       strID       = pOnto->GetID( vecsTerms[ i ].m_iID );
00991         const string&       strGloss    = pOnto->GetGloss( vecsTerms[ i ].m_iID );
00992         vector<uint32_t>    veciGenes;
00993 
00994         cerr << m_strConnection << " found " << strGloss << " @ " << vecsTerms[ i ].m_dP << endl;
00995         send( m_iSocket, strID.c_str( ), (int)strID.length( ) + 1, 0 );
00996         send( m_iSocket, strGloss.c_str( ), (int)strGloss.length( ) + 1, 0 );
00997         d = (float)vecsTerms[ i ].m_dP;
00998         send( m_iSocket, (const char*)&d, sizeof(d), 0 );
00999         iSize = (uint32_t)vecsTerms[ i ].m_iHitsTerm;
01000         send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 );
01001         iSize = (uint32_t)vecsTerms[ i ].m_iSizeTerm;
01002         send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 );
01003         iSize = (uint32_t)vecsTerms[ i ].m_iHitsTotal;
01004         send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 );
01005         iSize = (uint32_t)vecsTerms[ i ].m_iSizeTotal;
01006         send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 );
01007 
01008         for( j = 0; j < Genes.GetGenes( ); ++j )
01009             if( pOnto->IsAnnotated( vecsTerms[ i ].m_iID, Genes.GetGene( j ) ) && ( veciMapping[ j ] != -1 ) )
01010                 veciGenes.push_back( (uint32_t)veciMapping[ j ] );
01011         iSize = (uint32_t)veciGenes.size( );
01012         send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 );
01013         for( j = 0; j < veciGenes.size( ); ++j ) {
01014             iSize = (uint32_t)veciGenes[ j ] + 1;
01015             send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); } }
01016 
01017     return ( iPlace - iOffset ); }
01018 
01019 size_t CBNServer::ProcessDiseases( const vector<unsigned char>& vecbMessage, size_t iOffset ) {
01020     uint32_t                iGene, iContext;
01021     size_t                  i, iCount, iPlace, iDisease, iChunk;
01022     vector<unsigned char>   vecbData, vecbTmp;
01023     float                   dBackground, dWithin, dBetween;
01024 
01025     if( ( iOffset + sizeof(iContext) ) > vecbMessage.size( ) )
01026         return -1;
01027     iContext = *(uint32_t*)&vecbMessage[ iOffset ];
01028 
01029     iCount = InitializeDiseases( );
01030     iGene = (uint32_t)( ( ( vecbMessage.size( ) - iOffset - sizeof(iContext) ) / sizeof(iGene) ) *
01031         ( iCount * sizeof(*m_adDiseases) ) );
01032     send( m_iSocket, (const char*)&iGene, sizeof(iGene), 0 );
01033 
01034     iChunk = GetDatachunk( );
01035     vecbData.resize( GetGenes( ) * iChunk );
01036     for( iPlace = iOffset + sizeof(iContext); ( iPlace + sizeof(iGene) ) <= vecbMessage.size( );
01037         iPlace += sizeof(iGene) ) {
01038         iGene = *(uint32_t*)&vecbMessage[ iPlace ];
01039         if( !iGene-- )
01040             return -1;
01041         cerr << m_strConnection << " diseasing " << iGene  << " (" << GetGene( iGene ) <<
01042             ") in " << iContext << endl;
01043 
01044         if( !GetDatabase( ).Get( iGene, GetDiseaseGenes( ), vecbTmp, true ) )
01045             return -1;
01046         for( i = 0; i < GetDiseaseGenes( ).size( ); ++i )
01047 // Hackedy hack hack hack
01048             memcpy( &vecbData[ GetDiseaseGenes( )[ i ] * iChunk ], &vecbTmp[ i * iChunk ], iChunk );
01049         for( iDisease = 0; iDisease < GetDiseases( ); ++iDisease ) {
01050             if( !GetAssociation( iGene, vecbData, GetDisease( iDisease ), iContext, true, &dBetween,
01051                 &dBackground, &dWithin, NULL, NULL, GetWithinDisease( iContext, iDisease ) ) )
01052                 return -1;
01053             SetArray( m_adDiseases, GetDiseases( ), iDisease, GetPValue( dBetween, dBackground,
01054                 dWithin, iContext, 1, GetDisease( iDisease ).size( ), GetDiseases( ) ), dBetween,
01055                 dBackground, dWithin ); }
01056         send( m_iSocket, (const char*)m_adDiseases, (int)( iCount * sizeof(*m_adDiseases) ), 0 ); }
01057 
01058     return ( iPlace - iOffset ); }
01059 
01060 bool CBNServer::GenerateNetworkIcons( ) const {
01061     static const size_t c_iSize                 = 6;
01062     static const float  c_dEdgeAggressiveness   = 0;
01063     vector<size_t>  veciQuery, veciNeighbors;
01064     vector<bool>    vecfQuery;
01065     CDat            DatGraph;
01066     size_t          i;
01067 
01068     veciQuery.resize( 1 );
01069     for( i = 0; i < GetGenes( ); ++i ) {
01070         veciQuery[ 0 ] = i;
01071         if( !( GraphCreate( veciQuery, 0, c_iSize, c_dEdgeAggressiveness, vecfQuery, veciNeighbors,
01072             DatGraph ) && GraphWrite( DatGraph, veciQuery, veciNeighbors, vecfQuery, 0, EGraphOutputNamed ) ) )
01073             return false; }
01074 
01075     return true; }
01076 
01077 bool CBNServer::GenerateAssociations( const char* szDiseases, size_t iContext ) {
01078     CPCL            PCLDiseases( false ), PCLAssociations;
01079     size_t          i, j;
01080     ofstream        ofsm;
01081     vector<string>  vecstrDCs;
01082 
01083     if( !PCLDiseases.Open( szDiseases, 0 ) ) {
01084         cerr << "Could not open: " << szDiseases << endl;
01085         return false; }
01086     if( PCLDiseases.GetGenes( ) != GetDiseases( ) ) {
01087         cerr << "Unexpected disease count " << PCLDiseases.GetGenes( ) << ", wanted " << GetDiseases( ) <<
01088             endl;
01089         return false; }
01090     vecstrDCs.resize( GetDiseases( ) + GetContexts( ) );
01091     copy( PCLDiseases.GetGeneNames( ).begin( ), PCLDiseases.GetGeneNames( ).end( ), vecstrDCs.begin( ) );
01092     for( i = 0; i < GetContexts( ); ++i )
01093         vecstrDCs[ GetDiseases( ) + i ] = GetBN( i + 1 ).GetID( );
01094     PCLAssociations.Open( vecstrDCs, vecstrDCs, vector<string>( ) );
01095 
01096     // Disease/disease
01097     InitializeDiseases( );
01098     for( i = 0; i < GetDiseases( ); ++i ) {
01099         memset( m_adDiseases, 0, GetDiseases( ) * c_iValues * sizeof(*m_adDiseases) );
01100         if( !GetAssociationsDC( 1, ESetDisease, i, iContext, true ) )
01101             return false;
01102         for( j = i; j < GetDiseases( ); ++j ) {
01103             PCLAssociations.Set( i, j, m_adDiseases[ j ] );
01104             PCLAssociations.Set( j, i, m_adDiseases[ j ] ); } }
01105     // Disease/context
01106     InitializeContexts( );
01107     for( i = 0; i < GetDiseases( ); ++i ) {
01108         memset( m_adContexts, 0, GetContexts( ) * c_iValues * sizeof(*m_adContexts) );
01109         if( !GetAssociationsDC( 0, ESetDisease, i, -1, true ) )
01110             return false;
01111         for( j = 0; j < GetContexts( ); ++j )
01112             PCLAssociations.Set( i, GetDiseases( ) + j, m_adContexts[ j ] ); }
01113     for( i = 0; i < GetContexts( ); ++i ) {
01114         memset( m_adDiseases, 0, GetDiseases( ) * c_iValues * sizeof(*m_adDiseases) );
01115         if( !GetAssociationsDC( 1, ESetContext, i, iContext, true ) )
01116             return false;
01117         for( j = 0; j < GetDiseases( ); ++j )
01118             PCLAssociations.Set( GetDiseases( ) + i, j, m_adDiseases[ j ] ); }
01119     // Context/context
01120     for( i = 0; i < GetContexts( ); ++i ) {
01121         memset( m_adContexts, 0, GetContexts( ) * c_iValues * sizeof(*m_adContexts) );
01122         if( !GetAssociationsDC( 0, ESetContext, i, -1, true ) )
01123             return false;
01124         for( j = 0; j < GetContexts( ); ++j )
01125             PCLAssociations.Set( GetDiseases( ) + i, GetDiseases( ) + j, m_adContexts[ j ] ); }
01126 
01127     ofsm.open( "associations.pcl" );
01128     PCLAssociations.Save( ofsm );
01129     return true; }
01130 
01131 size_t CBNServer::ProcessGenes( const vector<unsigned char>& vecbMessage, size_t iOffset ) {
01132     uint32_t        iContext, iSize, iGene;
01133     size_t          i, iPlace;
01134     vector<size_t>  veciGenes;
01135     unsigned char   bType;
01136 
01137     if( ( iOffset + sizeof(iContext) + sizeof(bType) ) > vecbMessage.size( ) )
01138         return -1;
01139     iContext = *(uint32_t*)&vecbMessage[ iOffset ];
01140     bType = vecbMessage[ iOffset + sizeof(iContext) ];
01141     for( iPlace = iOffset + sizeof(iContext) + sizeof(bType);
01142         ( iPlace + sizeof(iGene) ) <= vecbMessage.size( ); iPlace += sizeof(iGene) ) {
01143         if( !( iGene = *(uint32_t*)&vecbMessage[ iPlace ] ) )
01144             return -1;
01145         iGene--;
01146         if( bType == ESetGenes )
01147             veciGenes.push_back( iGene ); }
01148 
01149     cerr << m_strConnection << " genesing in " << iContext << ' ';
01150     if( bType == ESetGenes ) {
01151         for( i = 0; i < veciGenes.size( ); ++i )
01152             cerr << ( i ? ' ' : '{' ) << GetGene( veciGenes[ i ] );
01153         cerr << '}' << endl; }
01154     else
01155         cerr << ( ( bType == ESetContext ) ? "context" : "disease" ) << ' ' << ( iGene + 1 ) << endl;
01156     {
01157         const vector<size_t>&   veciSet = ( bType == ESetGenes ) ? veciGenes :
01158                                             GetGeneSets( bType - 1 )[ iGene ];
01159         float                   dWithin;
01160 
01161         dWithin = ( bType == ESetGenes ) ? CMeta::GetNaN( ) : GetWithin( bType - 1, iContext, iGene );
01162         if( !GetGenes( veciSet, iContext, dWithin ) )
01163             return -1;
01164     }
01165     iSize = (uint32_t)( c_iValues * GetGenes( ) * sizeof(*m_adGenes) );
01166     send( m_iSocket, (char*)&iSize, sizeof(iSize), 0 );
01167     send( m_iSocket, (char*)m_adGenes, iSize, 0 );
01168 
01169     return ( iPlace - iOffset ); }
01170 
01171 bool CBNServer::GetGenes( const vector<size_t>& veciGenes, size_t iContext, float dWithin ) {
01172     CDataMatrix     MatGenes;
01173     size_t          i, j;
01174     vector<float>   vecdBetween, vecdBackground;
01175     float           d, dFrac, dBackground;
01176 
01177     dFrac = GetFraction( veciGenes.size( ) );
01178     MatGenes.Initialize( veciGenes.size( ), GetGenes( ) );
01179     vecdBackground.resize( veciGenes.size( ) );
01180     for( i = 0; i < MatGenes.GetRows( ); ++i ) {
01181         vecdBackground[ i ] = GetBackground( iContext, veciGenes[ i ] );
01182         if( IsFraction( dFrac ) ) {
01183             fill( MatGenes.Get( i ), MatGenes.Get( i ) + MatGenes.GetColumns( ), CMeta::GetNaN( ) );
01184             continue; }
01185         if( !Get( veciGenes[ i ], iContext, MatGenes.Get( i ) ) )
01186             return false; }
01187     Winsorize( vecdBackground );
01188     dBackground = (float)CStatistics::Average( vecdBackground );
01189     i = veciGenes.size( ) * ( veciGenes.size( ) + 1 ) / 2;
01190     if( CMeta::IsNaN( dWithin ) ) {
01191         vector<float>   vecdWithin;
01192 
01193         vecdWithin.reserve( 1 + i );
01194         if( !GetWithin( veciGenes, iContext, NULL, &vecdWithin ) )
01195             return false;
01196 // Alternative: calculate an average over all edges (weighted) rather than between sets (unweighted)
01197 //      vecdWithin.push_back( GetPrior( iContext ) );
01198         Winsorize( vecdWithin );
01199 //      dWithin = (float)CStatistics::Average( vecdWithin ); }
01200         dWithin = ( (float)CStatistics::Average( vecdWithin ) + GetPrior( iContext ) ) / 2; }
01201     else
01202 //      dWithin = ( ( dWithin * i ) + GetPrior( iContext )  ) / ( i + 1 );
01203         dWithin = ( dWithin + GetPrior( iContext ) ) / 2;
01204 
01205     InitializeGenes( );
01206     for( i = 0; i < GetGenes( ); ++i ) {
01207         float   dBetween, dTmp;
01208 
01209         vecdBetween.clear( );
01210         vecdBetween.reserve( MatGenes.GetRows( ) );
01211         for( j = 0; j < MatGenes.GetRows( ); ++j )
01212             if( !CMeta::IsNaN( d = MatGenes.Get( j, i ) ) )
01213                 vecdBetween.push_back( d );
01214         Winsorize( vecdBetween );
01215         dBetween = (float)CStatistics::Average( vecdBetween );
01216         dTmp = ( ( dBackground * veciGenes.size( ) ) + GetBackground( iContext, i ) ) /
01217             ( veciGenes.size( ) + 1 );
01218         SetArray( m_adGenes, GetGenes( ), i, GetPValue( dBetween, dTmp, dWithin, iContext,
01219 // Alternative: multiple hypothesis correction on all genes (gets too big)
01220 //          1, veciGenes.size( ), GetGenes( ) ), dBetween, dTmp, dWithin ); }
01221             1, veciGenes.size( ), 0 ), dBetween, dTmp, dWithin ); }
01222 
01223     return true; }
01224 
01225 size_t CBNServer::ProcessAssociation( const vector<unsigned char>& vecbMessage, size_t iOffset ) {
01226     uint32_t                iGene, iContext;
01227     size_t                  i, iPlace, iBack;
01228     vector<unsigned char>   vecbData;
01229     vector<size_t>          veciFore, veciBack;
01230     float                   dBetween, dBackground, dWithin, dWith;
01231     const vector<size_t>*   pveciBack;
01232     unsigned char           bType;
01233 
01234     if( ( iOffset + sizeof(iContext) ) > vecbMessage.size( ) )
01235         return -1;
01236     iContext = *(uint32_t*)&vecbMessage[ iOffset ];
01237     for( iPlace = iOffset + sizeof(iContext); ( iPlace + sizeof(iGene) ) <= vecbMessage.size( );
01238         iPlace += sizeof(iGene) ) {
01239         if( !( iGene = *(uint32_t*)&vecbMessage[ iPlace ] ) )
01240             break;
01241         veciFore.push_back( --iGene ); }
01242     if( ( ( iPlace += sizeof(iGene) ) + sizeof(bType) ) > vecbMessage.size( ) )
01243         return -1;
01244     bType = vecbMessage[ iPlace++ ];
01245     if( bType == ESetGenes ) {
01246         for( ; ( iPlace + sizeof(iGene) ) <= vecbMessage.size( ); iPlace += sizeof(iGene) ) {
01247             if( !( iGene = *(uint32_t*)&vecbMessage[ iPlace ] ) )
01248                 return -1;
01249             veciBack.push_back( --iGene ); }
01250         iBack = -1;
01251         pveciBack = &veciBack; }
01252     else {
01253         if( ( ( iPlace + sizeof(iGene) ) > vecbMessage.size( ) ) ||
01254             !( iGene = *(uint32_t*)&vecbMessage[ iPlace ] ) )
01255             return -1;
01256         iPlace += sizeof(iGene);
01257         iBack = iGene - 1;
01258         pveciBack = &GetGeneSets( bType - 1 )[ iBack ]; }
01259     if( veciFore.empty( ) || pveciBack->empty( ) )
01260         return -1;
01261 
01262     cerr << m_strConnection << " association in " << iContext << ' ';
01263     for( i = 0; i < veciFore.size( ); ++i )
01264         cerr << ( i ? ' ' : '{' ) << GetGene( veciFore[ i ] );
01265     cerr << "} with ";
01266     if( iBack == -1 ) {
01267         for( i = 0; i < pveciBack->size( ); ++i )
01268             cerr << ( i ? ' ' : '{' ) << GetGene( pveciBack->at( i ) );
01269         cerr << '}'; }
01270     else
01271         cerr << ( ( bType == ESetContext ) ? "context" : "disease" ) << ' ' << ( iBack + 1 );
01272     cerr << endl;
01273 
01274     dWith = ( bType == ESetGenes ) ? CMeta::GetNaN( ) : GetWithin( bType - 1, iContext, iBack );
01275     InitializeGenes( );
01276     for( i = 0; i < veciFore.size( ); ++i ) {
01277         if( !( i % 100 ) )
01278             cerr << m_strConnection << " association " << i << '/' << veciFore.size( ) << ": " <<
01279                 GetGene( veciFore[ i ] ) << endl;
01280         if( !( GetDatabase( ).Get( veciFore[ i ], *pveciBack, vecbData, true ) &&
01281             GetAssociation( veciFore[ i ], vecbData, *pveciBack, iContext, false, &dBetween, &dBackground,
01282                 &dWithin, NULL, NULL, dWith ) ) )
01283             return -1;
01284         SetArray( m_adGenes, veciFore.size( ), i, GetPValue( dBetween, dBackground, dWithin,
01285             iContext, 1, pveciBack->size( ), veciFore.size( ) ), dBetween, dBackground, dWithin ); }
01286 
01287     iGene = (uint32_t)( c_iValues * veciFore.size( ) * sizeof(*m_adGenes) );
01288     send( m_iSocket, (const char*)&iGene, sizeof(iGene), 0 );
01289     send( m_iSocket, (const char*)m_adGenes, iGene, 0 );
01290 
01291     return ( iPlace - iOffset ); }
01292 
01293 size_t CBNServer::ProcessAssociations( const vector<unsigned char>& vecbMessage, size_t iOffset ) {
01294     uint32_t        iGene, iCount;
01295     vector<size_t>  veciGenes;
01296     unsigned char   bType, bDiseases;
01297     size_t          i, iPlace, iContext;
01298     float*          ad;
01299 
01300     if( ( iOffset + sizeof(iGene) + sizeof(bType) + sizeof(bDiseases) ) > vecbMessage.size( ) )
01301         return -1;
01302     bDiseases = vecbMessage[ iOffset ];
01303     iGene = *(uint32_t*)&vecbMessage[ iOffset + sizeof(bDiseases) ];
01304     iContext = ( iGene == -1 ) ? -1 : (size_t)iGene;
01305     bType = vecbMessage[ iOffset + sizeof(bDiseases) + sizeof(iGene) ];
01306     for( iPlace = ( iOffset + sizeof(iGene) + sizeof(bDiseases) + sizeof(bType) );
01307         ( iPlace + sizeof(iGene) ) <= vecbMessage.size( ); iPlace += sizeof(iGene) ) {
01308         if( !( iGene = *(uint32_t*)&vecbMessage[ iPlace ] ) )
01309             return -1;
01310         iGene--;
01311         if( bType == ESetGenes )
01312             veciGenes.push_back( iGene ); }
01313 
01314     cerr << m_strConnection << " associationsing ";
01315     if( bType == ESetGenes ) {
01316         for( i = 0; i < veciGenes.size( ); ++i )
01317             cerr << ( i ? ' ' : '{' ) << GetGene( veciGenes[ i ] );
01318         cerr << '}'; }
01319     else
01320         cerr << ( ( bType == ESetContext ) ? "context" : "disease" ) << ' ' << ( iGene + 1 );
01321     cerr << " with " << ( bDiseases ? "diseases" : "contexts" ) << " in " << iContext << endl;
01322 
01323     iCount = (uint32_t)( ( bDiseases ? InitializeDiseases( ) : InitializeContexts( ) ) * sizeof(*ad) );
01324     ad = bDiseases ? m_adDiseases : m_adContexts;
01325     memset( ad, 0, iCount );
01326     if( !( ( bType == ESetGenes ) ?
01327         GetAssociationsSet( bDiseases, veciGenes, iContext ) :
01328         GetAssociationsDC( bDiseases, bType, iGene, iContext ) ) )
01329         return -1;
01330     send( m_iSocket, (char*)&iCount, sizeof(iCount), 0 );
01331     send( m_iSocket, (char*)ad, iCount, 0 );
01332 
01333     return ( iPlace - iOffset ); }
01334 
01335 bool CBNServer::GetAssociationsSet( unsigned char bDiseases, const vector<size_t>& veciGenes,
01336     size_t iContext ) const {
01337     const vector<vector<size_t> >&  vecveciSets = GetGeneSets( bDiseases );
01338     size_t                  i, j, k;
01339     vector<unsigned char>   vecbData;
01340     float                   d, dFrac, dWithinGenes, dBetween;
01341     float*                  ad;
01342     vector<vector<float> >  vecvecdBackground, vecvecdBetween;
01343     vector<float>           vecdCur;
01344 
01345     vecvecdBetween.resize( vecveciSets.size( ) );
01346     for( i = 0; i < vecvecdBetween.size( ); ++i ) {
01347         vecvecdBetween[ i ].resize( veciGenes.size( ) + vecveciSets[ i ].size( ) );
01348         fill( vecvecdBetween[ i ].begin( ), vecvecdBetween[ i ].end( ), 0.0f ); }
01349     vecvecdBackground.resize( vecveciSets.size( ) );
01350     dFrac = GetFraction( veciGenes.size( ) );
01351     for( i = 0; i < veciGenes.size( ); ++i ) {
01352         if( IsFraction( dFrac ) ) {
01353             for( j = 0; j < vecveciSets.size( ); ++j ) {
01354                 k = GetContext( bDiseases, iContext, j );
01355                 vecvecdBackground[ j ].push_back( GetBackground( k, veciGenes[ i ] ) ); }
01356             continue; }
01357         if( !GetDatabase( ).Get( veciGenes[ i ], vecbData, true ) )
01358             return false;
01359         cerr << m_strConnection << " associations " << GetGene( veciGenes[ i ] ) << endl;
01360         for( j = 0; j < vecveciSets.size( ); ++j ) {
01361             vecdCur.clear( );
01362             vecdCur.reserve( vecveciSets[ j ].size( ) );
01363             k = GetContext( bDiseases, iContext, j );
01364             if( !GetAssociation( veciGenes[ i ], vecbData, vecveciSets[ j ], k, true, &dBetween, NULL, NULL,
01365                 &vecdCur, &vecvecdBackground[ j ], GetWithin( bDiseases, k, j ) ) )
01366                 return false;
01367             vecvecdBetween[ j ][ i ] += dBetween;
01368             for( k = 0; k < vecveciSets[ j ].size( ); ++k )
01369                 vecvecdBetween[ j ][ veciGenes.size( ) + k ] += vecdCur[ k ]; } }
01370     for( i = 0; i < vecvecdBetween.size( ); ++i ) {
01371         for( j = 0; j < veciGenes.size( ); ++j )
01372             vecvecdBetween[ i ][ j ] /= vecveciSets[ i ].size( );
01373         for( j = 0; j < vecveciSets[ i ].size( ); ++j )
01374             vecvecdBetween[ i ][ veciGenes.size( ) + j ] /= veciGenes.size( ); }
01375     for( i = 0; i < vecveciSets.size( ); ++i ) {
01376         k = GetContext( bDiseases, iContext, i );
01377         for( j = 0; j < vecveciSets[ i ].size( ); ++j )
01378             if( !CMeta::IsNaN( d = GetBackground( k, vecveciSets[ i ][ j ] ) ) )
01379                 vecvecdBackground[ i ].push_back( d ); }
01380     if( ( iContext != -1 ) && !GetWithin( veciGenes, iContext, &dWithinGenes, NULL ) )
01381         return false;
01382 
01383     ad = bDiseases ? m_adDiseases : m_adContexts;
01384     for( i = 0; i < vecveciSets.size( ); ++i ) {
01385         float   dBackground, dWithin;
01386         size_t  iTmp;
01387 
01388         if( vecvecdBetween[ i ].empty( ) )
01389             continue;
01390         Winsorize( vecvecdBetween[ i ] );
01391         Winsorize( vecvecdBackground[ i ] );
01392         dBetween = (float)CStatistics::Average( vecvecdBetween[ i ] );
01393         dBackground = (float)CStatistics::Average( vecvecdBackground[ i ] );
01394         iTmp = GetContext( bDiseases, iContext, i );
01395         if( ( iContext == -1 ) && !GetWithin( veciGenes, iTmp, &dWithinGenes, NULL ) )
01396             return false;
01397         dWithin = ( dWithinGenes + GetWithin( bDiseases, iTmp, i ) ) / 2;
01398         SetArray( ad, vecveciSets.size( ), i, GetPValue( dBetween, dBackground, dWithin, iTmp,
01399             veciGenes.size( ), vecveciSets[ i ].size( ), vecveciSets.size( ) ), dBetween, dBackground,
01400             dWithin ); }
01401 
01402     return true; }
01403 
01404 bool CBNServer::GetAssociationsDC( unsigned char bDiseases, unsigned char bType, size_t iDC,
01405     size_t iContext, bool fZ ) const {
01406     const vector<vector<size_t> >&  vecveciSets = GetGeneSets( bDiseases );
01407     const vector<size_t>&           veciGenes   = GetGeneSets( bType - 1 )[ iDC ];
01408     size_t  i, j, k, iEdgesOne, iEdgesTwo;
01409     float   d, dBetween, dWithin, dBackground, dBackgroundOne;
01410     float*  ad;
01411 
01412     if( iContext == -1 )
01413         dBackgroundOne = CMeta::GetNaN( );
01414     else
01415         for( dBackgroundOne = 0,i = 0; i < veciGenes.size( ); ++i )
01416             dBackgroundOne += GetBackground( iContext, veciGenes[ i ] );
01417     iEdgesOne = veciGenes.size( ) * ( veciGenes.size( ) + 1 ) / 2;
01418     ad = bDiseases ? m_adDiseases : m_adContexts;
01419     for( i = 0; i < vecveciSets.size( ); ++i ) {
01420         iEdgesTwo = vecveciSets[ i ].size( ) * ( vecveciSets[ i ].size( ) + 1 ) / 2;
01421         k = GetContext( bDiseases, iContext, i );
01422         if( CMeta::IsNaN( dBackgroundOne ) )
01423             for( dBackground = 0,j = 0; j < veciGenes.size( ); ++j )
01424                 dBackground += GetBackground( k, veciGenes[ j ] );
01425         else
01426             dBackground = dBackgroundOne;
01427         for( j = 0; j < vecveciSets[ i ].size( ); ++j )
01428             dBackground += GetBackground( k, vecveciSets[ i ][ j ] );
01429         dBackground /= veciGenes.size( ) + vecveciSets[ i ].size( );
01430 
01431         d = GetWithin( bType - 1, k, iDC );
01432         dWithin = GetWithin( bDiseases, k, i );
01433 // Alternative: calculate an average over all edges (weighted) rather than between sets (unweighted)
01434 //      dWithin = ( ( d * iEdgesOne ) + ( dWithin * iEdgesTwo ) ) / ( iEdgesOne + iEdgesTwo );
01435         dWithin = ( d + dWithin ) / 2;
01436         dBetween = GetBetween( k, bType - 1, iDC, bDiseases, i );
01437 
01438         SetArray( ad, vecveciSets.size( ), i, GetPValue( dBetween, dBackground, dWithin, k, veciGenes.size( ),
01439             vecveciSets[ i ].size( ), vecveciSets.size( ), fZ ), dBetween, dBackground, dWithin ); }
01440 
01441     return true; }
01442 
01443 bool CBNServer::GetAssociation( size_t iGene, const vector<unsigned char>& vecbData,
01444     const vector<size_t>& veciGenes, size_t iContext, bool fAll, float* pdBetween, float* pdBackground,
01445     float* pdWithin, vector<float>* pvecdBetween, vector<float>* pvecdBackground, float dWithin ) const {
01446     const CBayesNetMinimal& BNet    = GetBN( iContext );
01447     float                   d, dFrac, dBackground;
01448     size_t                  i;
01449     vector<float>           vecdBetween, vecdBackground;
01450 
01451     if( veciGenes.empty( ) ) {
01452         SetPointer( pdBetween, CMeta::GetNaN( ) );
01453         SetPointer( pdBackground, CMeta::GetNaN( ) );
01454         SetPointer( pdWithin, CMeta::GetNaN( ) );
01455         return true; }
01456 
01457     dFrac = GetFraction( veciGenes.size( ) );
01458     if( pdBetween )
01459         vecdBetween.reserve( veciGenes.size( ) );
01460     if( pdBackground )
01461         vecdBackground.reserve( veciGenes.size( ) + 1 );
01462     dBackground = GetBackground( iContext, iGene );
01463     PushPointer( pdBackground, vecdBackground, dBackground );
01464     PushPointer( pvecdBackground, dBackground );
01465     for( i = 0; i < veciGenes.size( ); ++i ) {
01466         PushPointer( pdBackground, vecdBackground, GetBackground( iContext, veciGenes[ i ] ) );
01467         if( veciGenes[ i ] == iGene )
01468             d = 1;
01469         else if( IsFraction( dFrac ) )
01470             continue;
01471         else
01472             d = BNet.Evaluate( vecbData, ( fAll ? veciGenes[ i ] : i ) * GetDatachunk( ) );
01473         if( !CMeta::IsNaN( d ) ) {
01474             PushPointer( pdBetween, vecdBetween, d );
01475             PushPointer( pvecdBetween, d ); } }
01476     Winsorize( vecdBetween );
01477     SetPointer( pdBetween, (float)CStatistics::Average( vecdBetween ) );
01478     Winsorize( vecdBackground );
01479     SetPointer( pdBackground, (float)CStatistics::Average( vecdBackground ) );
01480 
01481     if( pdWithin ) {
01482         i = veciGenes.size( ) * ( veciGenes.size( ) + 1 ) / 2;
01483         if( CMeta::IsNaN( dWithin ) ) {
01484             vector<float>   vecdWithin;
01485 
01486             vecdWithin.reserve( 1 + i );
01487 // Alternative: calculate an average over all edges (weighted) rather than between sets (unweighted)
01488 //          vecdWithin.push_back( GetPrior( iContext ) );
01489             if( !GetWithin( veciGenes, iContext, NULL, &vecdWithin ) )
01490                 return false;
01491             Winsorize( vecdWithin );
01492 //          SetPointer( pdWithin, (float)CStatistics::Average( vecdWithin ) ); }
01493             SetPointer( pdWithin, ( (float)CStatistics::Average( vecdWithin ) + GetPrior( iContext ) ) / 2 ); }
01494         else
01495 //          SetPointer( pdWithin, ( GetPrior( iContext ) + ( i * dWithin ) ) / ( 1 + i ) ); }
01496             SetPointer( pdWithin, ( GetPrior( iContext ) + dWithin ) / 2 ); }
01497 
01498     return true; }
01499 
01500 bool CBNServer::GetAssociation( const vector<size_t>& veciOne, const vector<size_t>& veciTwo,
01501     size_t iContext, float& dBetween, float& dBackground, float& dWithin ) const {
01502     const vector<size_t>&   veciSmall   = ( veciOne.size( ) < veciTwo.size( ) ) ? veciOne : veciTwo;
01503     const vector<size_t>&   veciBig     = ( veciOne.size( ) < veciTwo.size( ) ) ? veciTwo : veciOne;
01504     size_t                  i, j;
01505     float                   dFrac;
01506     vector<unsigned char>   vecbData;
01507     vector<float>           vecdBetween, vecdBackground, vecdWithin;
01508 
01509     dFrac = GetFraction( veciSmall.size( ) );
01510     vecdBetween.reserve( (size_t)( dFrac * veciSmall.size( ) * veciBig.size( ) ) + c_iOverestimate );
01511     vecdBackground.reserve( veciSmall.size( ) + veciBig.size( ) );
01512     for( i = 0; i < veciSmall.size( ); ++i ) {
01513         if( IsFraction( dFrac ) ) {
01514             vecdBackground.push_back( GetBackground( iContext, veciSmall[ i ] ) );
01515             continue; }
01516         if( !( GetDatabase( ).Get( veciSmall[ i ], veciBig, vecbData, true ) &&
01517             GetAssociation( veciSmall[ i ], vecbData, veciBig, iContext, false, NULL, NULL, NULL,
01518             &vecdBetween, &vecdBackground, CMeta::GetNaN( ) ) ) )
01519             return false; }
01520     for( i = 0; i < veciBig.size( ); ++i )
01521         vecdBackground.push_back( GetBackground( iContext, veciBig[ i ] ) );
01522 
01523     i = veciSmall.size( ) + 1;
01524     j = veciBig.size( ) + 1;
01525     vecdWithin.reserve( (size_t)( ( dFrac * i * i / 2 ) + ( dFrac * j * j / 2 ) ) + c_iOverestimate );
01526     if( !( GetWithin( veciSmall, iContext, NULL, &vecdWithin ) &&
01527         GetWithin( veciBig, iContext, NULL, &vecdWithin ) ) )
01528         return false;
01529 
01530     Winsorize( vecdBetween );
01531     dBetween = (float)CStatistics::Average( vecdBetween );
01532     Winsorize( vecdBackground );
01533     dBackground = (float)CStatistics::Average( vecdBackground );
01534     Winsorize( vecdWithin );
01535     dWithin = (float)CStatistics::Average( vecdWithin );
01536 
01537     return true; }
01538 
01539 bool CBNServer::GetWithin( const vector<size_t>& veciGenes, size_t iContext, float* pdWithin,
01540     vector<float>* pvecdWithin ) const {
01541 // Alternative: calculate priors in the current context rather than the global context
01542     iContext = 0;
01543     const CBayesNetMinimal& BNet    = GetBN( iContext );
01544     size_t                  i, j;
01545     vector<unsigned char>   vecbData;
01546     vector<float>           vecdWithin;
01547     float                   d, dFrac;
01548 
01549     dFrac = GetFraction( veciGenes.size( ) );
01550     if( pdWithin ) {
01551         i = veciGenes.size( ) + 1;
01552         vecdWithin.reserve( (size_t)( dFrac * i * i / 2 ) + c_iOverestimate ); }
01553     for( i = 0; i < veciGenes.size( ); ++i ) {
01554         if( IsFraction( dFrac ) )
01555             continue;
01556         if( !GetDatabase( ).Get( veciGenes[ i ], veciGenes, vecbData, true ) )
01557             return false;
01558         d = GetPrior( iContext );
01559         PushPointer( pdWithin, vecdWithin, d );
01560         PushPointer( pvecdWithin, d );
01561         for( j = ( i + 1 ); j < veciGenes.size( ); ++j ) {
01562             if( IsFraction( dFrac ) )
01563                 continue;
01564             if( !CMeta::IsNaN( d = BNet.Evaluate( vecbData, j * GetDatachunk( ) ) ) ) {
01565                 PushPointer( pdWithin, vecdWithin, d );
01566                 PushPointer( pvecdWithin, d ); } } }
01567     if( pdWithin ) {
01568         Winsorize( vecdWithin );
01569         SetPointer( pdWithin, (float)CStatistics::Average( vecdWithin ) ); }
01570 
01571     return true; }