Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 #include "BNServer.h" 00025 #include "dot.h" 00026 00027 static const char c_szXDSL[] = ".xdsl"; 00028 static const char c_szDSL[] = ".dsl"; 00029 static const char c_szDOT[] = ".dot"; 00030 static const char c_szSVG[] = ".svg"; 00031 static const size_t c_iContextOffset = 2; 00032 static const size_t c_iParamOffset = 4; 00033 const float CBNServer::c_dCutoff = 0.1f; 00034 const float CBNServer::c_adColorMin[] = {0, 1, 0}; 00035 const float CBNServer::c_adColorMax[] = {1, 0, 0}; 00036 const CBNServer::TPFNProcessor CBNServer::c_apfnProcessors[] = 00037 {&CBNServer::ProcessInference, &CBNServer::ProcessData, &CBNServer::ProcessGraph, 00038 &CBNServer::ProcessContexts, &CBNServer::ProcessTermFinder, &CBNServer::ProcessDiseases, 00039 &CBNServer::ProcessGenes, &CBNServer::ProcessAssociation, &CBNServer::ProcessAssociations, 00040 &CBNServer::ProcessInferenceOTF, &CBNServer::ProcessEdges}; 00041 00042 struct SPixie { 00043 size_t m_iNode; 00044 float m_dScore; 00045 00046 SPixie( size_t iNode, float dScore ) : m_iNode(iNode), m_dScore(dScore) { } 00047 00048 bool operator<( const SPixie& sPixie ) const { 00049 00050 return ( m_dScore < sPixie.m_dScore ); } 00051 }; 00052 00053 int main( int iArgs, char** aszArgs ) { 00054 gengetopt_args_info sArgs; 00055 CServer Server; 00056 CBayesNetSmile BNSmile; 00057 ifstream ifsm, ifsmGenes; 00058 vector<string> vecstrLine; 00059 size_t i, j, k; 00060 uint32_t iSize; 00061 ofstream ofsm; 00062 int iRet; 00063 COntologyKEGG KEGG; 00064 COntologyOBO GOBP, GOMF, GOCC; 00065 set<size_t> setiContexts; 00066 // Server data 00067 vector<float> vecdPriors; 00068 CBayesNetMinimal BNDefault; 00069 vector<CBayesNetMinimal> vecBNs; 00070 00071 00072 CDataMatrix MatBackgrounds, MatParameters, MatWithinC, MatWithinD; 00073 CDataMatrix MatBetweenCC, MatBetweenDD, MatBetweenDC; 00074 vector<vector<size_t> > vecveciDiseases, vecveciContexts; 00075 vector<size_t> veciDiseases, veciContexts; 00076 CGenome Genome; 00077 const IOntology* apOntologies[] = {&GOBP, &GOMF, &GOCC, &KEGG, NULL }; 00078 00079 iRet = cmdline_parser2( iArgs, aszArgs, &sArgs, 0, 1, 0 ); 00080 CDatabase Database(sArgs.is_nibble_flag); 00081 00082 if( sArgs.config_arg ) 00083 iRet = cmdline_parser_configfile( sArgs.config_arg, &sArgs, 0, 0, 1 ) && iRet; 00084 if( iRet ) { 00085 cmdline_parser_print_help( ); 00086 return iRet; } 00087 CMeta Meta( sArgs.verbosity_arg ); 00088 00089 // Open Gene Ontology 00090 if( sArgs.go_onto_arg ) { 00091 cerr << "Loading the Gene Ontology..." << endl; 00092 ifsm.clear( ); 00093 ifsm.open( sArgs.go_onto_arg ); 00094 if( sArgs.go_anno_arg ) { 00095 ifsmGenes.clear( ); 00096 ifsmGenes.open( sArgs.go_anno_arg ); } 00097 if( !COntologyOBO::Open( ifsm, ifsmGenes, Genome, GOBP, GOMF, GOCC, false, true ) ) { 00098 cerr << "Could not open: " << sArgs.go_onto_arg << ", " << sArgs.go_anno_arg << endl; 00099 return 1; } 00100 ifsm.close( ); 00101 if( sArgs.go_anno_arg ) 00102 ifsmGenes.close( ); } 00103 00104 // Open KEGG 00105 if( sArgs.kegg_arg ) { 00106 cerr << "Loading KEGG..." << endl; 00107 ifsm.clear( ); 00108 ifsm.open( sArgs.kegg_arg ); 00109 if( !KEGG.Open( ifsm, Genome, sArgs.kegg_org_arg, true ) ) { 00110 cerr << "Could not open: " << sArgs.kegg_arg << endl; 00111 return 1; } 00112 ifsm.close( ); } 00113 00114 // Open database 00115 cerr << "Loading the database..." << endl; 00116 if( !Database.Open( sArgs.database_arg ) ) { 00117 cerr << "Could not open: " << sArgs.database_arg << endl; 00118 return 1; } 00119 // Open Bayes networks 00120 if( sArgs.minimal_in_flag ) { 00121 cerr << "Loading minimal Bayesian classifiers..." << endl; 00122 ifsm.clear( ); 00123 ifsm.open( sArgs.networks_arg, ios_base::binary ); 00124 if( !BNDefault.Open( ifsm ) ) { 00125 cerr << "Could not read: " << sArgs.networks_arg << endl; 00126 return 1; } 00127 ifsm.read( (char*)&iSize, sizeof(iSize) ); 00128 vecBNs.resize( iSize ); 00129 for( i = 0; i < vecBNs.size( ); ++i ) 00130 if( !vecBNs[ i ].Open( ifsm ) ) { 00131 cerr << "Could not read: " << sArgs.networks_arg << " (" << i << ")" << endl; 00132 return 1; } 00133 ifsm.close( ); } 00134 if( !sArgs.minimal_in_flag ) { 00135 if( !sArgs.default_arg ) { 00136 cmdline_parser_print_help( ); 00137 return 1; } 00138 if( !( BNSmile.Open( sArgs.default_arg ) && BNDefault.Open( BNSmile ) ) ) { 00139 cerr << "Could not open: " << sArgs.default_arg << endl; 00140 return 1; } 00141 BNDefault.SetID( sArgs.default_arg ); } 00142 // Open context IDs and p-value parameters 00143 { 00144 CPCL PCLContexts( false ); 00145 00146 cerr << "Loading contexts..." << endl; 00147 if( !PCLContexts.Open( sArgs.input_arg, c_iParamOffset - 1 ) ) { 00148 cerr << "Could not open: " << ( sArgs.input_arg ? sArgs.input_arg : "standard input" ) << endl; 00149 return 1; } 00150 MatParameters.Initialize( PCLContexts.GetGenes( ) + 1, PCLContexts.GetExperiments( ) ); 00151 for( i = 0; i < PCLContexts.GetGenes( ); ++i ) 00152 MatParameters.Set( i + 1, PCLContexts.Get( i ) ); 00153 00154 if( !sArgs.minimal_in_flag ) { 00155 vecBNs.resize( PCLContexts.GetGenes( ) ); 00156 for( i = 0; i < vecBNs.size( ); ++i ) { 00157 const string& strContext = PCLContexts.GetFeature( i, c_iContextOffset ); 00158 00159 if( !( BNSmile.Open( ( (string)sArgs.networks_arg + '/' + CMeta::Filename( strContext ) + 00160 ( sArgs.xdsl_flag ? c_szXDSL : c_szDSL ) ).c_str( ) ) && 00161 vecBNs[ i ].Open( BNSmile ) ) ) { 00162 cerr << "Could not open: " << strContext << endl; 00163 return 1; } 00164 vecBNs[ i ].SetID( strContext ); } } 00165 } 00166 // Open global p-value parameters 00167 if( sArgs.global_given ) 00168 { 00169 CPCL PCLParams( false ); 00170 00171 cerr << "Loading global p-value parameters..." << endl; 00172 if( !PCLParams.Open( sArgs.global_arg, 0 ) ) { 00173 cerr << "Could not open: " << sArgs.global_arg << endl; 00174 return 1; } 00175 MatParameters.Set( 0, 0, (float)atof( PCLParams.GetGene( 0 ).c_str( ) ) ); 00176 for( i = 0; i < PCLParams.GetExperiments( ); ++i ) 00177 MatParameters.Set( 0, i + 1, PCLParams.Get( 0, i ) ); 00178 } 00179 // Save minimal Bayes nets 00180 if( sArgs.minimal_out_arg ) { 00181 cerr << "Saving minimal Bayesian classifiers..." << endl; 00182 ofsm.open( sArgs.minimal_out_arg, ios_base::binary ); 00183 BNDefault.Save( ofsm ); 00184 iSize = (uint32_t)vecBNs.size( ); 00185 ofsm.write( (const char*)&iSize, sizeof(iSize) ); 00186 for( i = 0; i < vecBNs.size( ); ++i ) 00187 vecBNs[ i ].Save( ofsm ); 00188 ofsm.close( ); } 00189 // Open context<->gene mapping 00190 if( sArgs.contexts_given ) 00191 { 00192 CPCL PCLContextGenes( false ); 00193 00194 cerr << "Loading context contents..." << endl; 00195 if( !PCLContextGenes.Open( sArgs.contexts_arg, 1 ) ) { 00196 cerr << "Could not open: " << sArgs.contexts_arg << endl; 00197 return 1; } 00198 vecveciContexts.resize( vecBNs.size( ) ); 00199 for( i = 0; i < PCLContextGenes.GetGenes( ); ++i ) { 00200 j = atoi( PCLContextGenes.GetGene( i ).c_str( ) ) - 1; 00201 k = atoi( PCLContextGenes.GetFeature( i, 1 ).c_str( ) ) - 1; 00202 if( j >= vecveciContexts.size( ) ) { 00203 cerr << "Unknown context: " << j << " (" << PCLContextGenes.GetGene( i ) << ')' << endl; 00204 return 1; } 00205 if( k >= Database.GetGenes( ) ) { 00206 cerr << "Unknown gene: " << k << " (" << PCLContextGenes.GetFeature( i, 1 ) << ')' << endl; 00207 return 1; } 00208 setiContexts.insert( k ); 00209 vecveciContexts[ j ].push_back( k ); } 00210 veciContexts.resize( setiContexts.size( ) ); 00211 copy( setiContexts.begin( ), setiContexts.end( ), veciContexts.begin( ) ); 00212 } 00213 // Open disease<->gene mapping 00214 if( sArgs.diseases_given ) 00215 { 00216 CPCL PCLDiseaseGenes( false ); 00217 00218 cerr << "Loading disease contents..." << endl; 00219 if( !PCLDiseaseGenes.Open( sArgs.diseases_arg, 1 ) ) { 00220 cerr << "Could not open: " << sArgs.diseases_arg << endl; 00221 return 1; } 00222 for( i = j = 0; i < PCLDiseaseGenes.GetGenes( ); ++i ) 00223 if( ( k = atoi( PCLDiseaseGenes.GetGene( i ).c_str( ) ) ) > j ) 00224 j = k; 00225 vecveciDiseases.resize( j ); 00226 setiContexts.clear( ); 00227 for( i = 0; i < PCLDiseaseGenes.GetGenes( ); ++i ) { 00228 j = atoi( PCLDiseaseGenes.GetGene( i ).c_str( ) ) - 1; 00229 k = atoi( PCLDiseaseGenes.GetFeature( i, 1 ).c_str( ) ) - 1; 00230 if( j >= vecveciContexts.size( ) ) { 00231 cerr << "Unknown disease: " << j << " (" << PCLDiseaseGenes.GetGene( i ) << ')' << endl; 00232 return 1; } 00233 if( k >= Database.GetGenes( ) ) { 00234 cerr << "Unknown gene: " << k << " (" << PCLDiseaseGenes.GetFeature( i, 1 ) << ')' << endl; 00235 return 1; } 00236 setiContexts.insert( k ); 00237 vecveciDiseases[ j ].push_back( k ); } 00238 veciDiseases.resize( setiContexts.size( ) ); 00239 copy( setiContexts.begin( ), setiContexts.end( ), veciDiseases.begin( ) ); 00240 } 00241 // Open gene backgrounds 00242 cerr << "Loading gene backgrounds..." << endl; 00243 if( sArgs.backgrounds_arg && !MatBackgrounds.Open( sArgs.backgrounds_arg ) ) { 00244 cerr << "Could not open: " << sArgs.backgrounds_arg << endl; 00245 return 1; } 00246 // Initialize context priors 00247 vecdPriors.resize( MatBackgrounds.GetRows( ) ); 00248 fill( vecdPriors.begin( ), vecdPriors.end( ), 0.0f ); 00249 for( i = 0; i < vecdPriors.size( ); ++i ) { 00250 for( j = 0; j < MatBackgrounds.GetColumns( ); ++j ) 00251 vecdPriors[ i ] += MatBackgrounds.Get( i, j ); 00252 vecdPriors[ i ] /= MatBackgrounds.GetColumns( ); } 00253 // Open context withins 00254 if( sArgs.within_c_arg ) { 00255 cerr << "Loading within context scores..." << endl; 00256 if( !MatWithinC.Open( sArgs.within_c_arg ) ) { 00257 cerr << "Could not open: " << sArgs.within_c_arg << endl; 00258 return 1; } } 00259 // Open disease withins 00260 if( sArgs.within_d_arg ) { 00261 cerr << "Loading within disease scores..." << endl; 00262 if( !MatWithinD.Open( sArgs.within_d_arg ) ) { 00263 cerr << "Could not open: " << sArgs.within_d_arg << endl; 00264 return 1; } } 00265 // Open context betweens 00266 if( sArgs.between_cc_arg ) { 00267 cerr << "Loading between context scores..." << endl; 00268 if( !MatBetweenCC.Open( sArgs.between_cc_arg ) ) { 00269 cerr << "Could not open: " << sArgs.between_cc_arg << endl; 00270 return 1; } } 00271 // Open disease betweens 00272 if( sArgs.between_dd_arg ) { 00273 cerr << "Loading between disease scores..." << endl; 00274 if( !MatBetweenDD.Open( sArgs.between_dd_arg ) ) { 00275 cerr << "Could not open: " << sArgs.between_dd_arg << endl; 00276 return 1; } } 00277 // Open context-disease betweens 00278 if( sArgs.between_dc_arg ) { 00279 cerr << "Loading between disease-context scores..." << endl; 00280 if( !MatBetweenDC.Open( sArgs.between_dc_arg ) ) { 00281 cerr << "Could not open: " << sArgs.between_dc_arg << endl; 00282 return 1; } } 00283 00284 SBNServerData sData( 00285 apOntologies, 00286 BNDefault, 00287 Database, 00288 Genome, 00289 sArgs.limit_arg, 00290 MatBackgrounds, 00291 MatBetweenCC, 00292 MatBetweenDC, 00293 MatBetweenDD, 00294 MatParameters, 00295 MatWithinC, 00296 MatWithinD, 00297 sArgs.files_arg, 00298 sArgs.graphviz_arg, 00299 vecBNs, 00300 vecdPriors, 00301 veciContexts, 00302 veciDiseases, 00303 vecveciContexts, 00304 vecveciDiseases 00305 ); 00306 CBNServer BNServer( 0, "", sData ); 00307 if( sArgs.networklets_flag ) 00308 return ( BNServer.GenerateNetworkIcons( ) ? 0 : 1 ); 00309 if( sArgs.assoc_diseases_arg ) 00310 return ( BNServer.GenerateAssociations( sArgs.assoc_diseases_arg, sArgs.assoc_context_arg ) ? 0 : 1 ); 00311 00312 Server.Initialize( sArgs.port_arg, sArgs.timeout_arg, &BNServer ); 00313 #ifdef WIN32 00314 pthread_win32_process_attach_np( ); 00315 #endif // WIN32 00316 Server.Start( ); 00317 #ifdef WIN32 00318 pthread_win32_process_detach_np( ); 00319 #endif // WIN32 00320 00321 return 0; } 00322 00323 bool CBNServer::Get( size_t iGene, size_t iContext, float* adValues, const CDatabase& Database, 00324 const vector<CBayesNetMinimal>& vecBNs, const CBayesNetMinimal& BNDefault ) { 00325 const CBayesNetMinimal& BNet = ( iContext && vecBNs.size( ) ) ? 00326 vecBNs[ ( iContext - 1 ) % vecBNs.size( ) ] : BNDefault; 00327 vector<unsigned char> vecbData; 00328 00329 if( !( Database.Get( iGene, vecbData ) && BNet.Evaluate( vecbData, adValues, Database.GetGenes( ) ) ) ) 00330 return false; 00331 adValues[ iGene % Database.GetGenes( ) ] = CMeta::GetNaN( ); 00332 00333 return true; } 00334 00335 CBNServer::CBNServer( SOCKET iSocket, const string& strConnection, const SBNServerData& sData ) : 00336 m_iSocket(iSocket), m_strConnection(strConnection), m_sData(sData), m_adContexts(NULL), 00337 m_adDiseases(NULL), m_adGenes(NULL) { 00338 00339 if( m_strConnection.length( ) > 0 ) 00340 cerr << "New connection from: " << m_strConnection << endl; } 00341 00342 CBNServer::~CBNServer( ) { 00343 00344 if( m_adGenes ) 00345 delete[] m_adGenes; 00346 if( m_adContexts ) 00347 delete[] m_adContexts; 00348 if( m_adDiseases ) 00349 delete[] m_adDiseases; } 00350 00351 IServerClient* CBNServer::NewInstance( SOCKET iSocket, uint32_t iHost, uint16_t sPort ) { 00352 string strConnection; 00353 char acBuffer[ 16 ]; 00354 in_addr sAddr; 00355 00356 #pragma warning(disable : 4996) 00357 sprintf( acBuffer, "%hu", sPort ); 00358 #pragma warning(default : 4996) 00359 sAddr.s_addr = htonl( iHost ); 00360 strConnection = (string)inet_ntoa( sAddr ) + ":" + acBuffer; 00361 return new CBNServer( iSocket, strConnection, m_sData ); } 00362 00363 void CBNServer::Destroy( ) { 00364 00365 cerr << "Disconnected: " << m_strConnection << endl; 00366 00367 delete this; } 00368 00369 bool CBNServer::ProcessMessage( const vector<unsigned char>& vecbMessage ) { 00370 size_t i, iProcessed, iOffset; 00371 00372 for( iOffset = 0; iOffset < vecbMessage.size( ); iOffset += ( iProcessed + 1 ) ) { 00373 cerr << "LOG " << time( NULL ) << '\t' << m_strConnection << endl; //'\t' << hex; 00374 //for( i = 0; i < vecbMessage.size( ); ++i ) 00375 // cerr << setfill( '0' ) << setw( 2 ) << (unsigned int)vecbMessage[ i ]; 00376 //cerr << dec << endl; 00377 if( vecbMessage[ iOffset ] >= ARRAYSIZE(c_apfnProcessors) ) { 00378 cerr << m_strConnection << " unknown opcode: " << (int)vecbMessage[ iOffset ] << endl; 00379 return false; } 00380 else { 00381 cerr << m_strConnection << " opcode: " << (int)vecbMessage[ iOffset ] << endl; 00382 } 00383 if( ( iProcessed = (this->*c_apfnProcessors[ vecbMessage[ iOffset ] ])( vecbMessage, 00384 iOffset + 1 ) ) == -1 ) 00385 return false; } 00386 00387 return true; } 00388 00389 size_t CBNServer::ProcessInference( const vector<unsigned char>& vecbMessage, size_t iOffset ) { 00390 size_t iStart; 00391 uint32_t iGene, iContext; 00392 00393 if( ( iOffset + sizeof(iContext) ) > vecbMessage.size( ) ) 00394 return -1; 00395 iStart = iOffset; 00396 iContext = *(uint32_t*)&vecbMessage[ iOffset ]; 00397 for( iOffset += sizeof(iContext); ( iOffset + sizeof(iGene) ) <= vecbMessage.size( ); 00398 iOffset += sizeof(iGene) ) 00399 if( !( ( iGene = *(uint32_t*)&vecbMessage[ iOffset ] ) && Get( iGene - 1, iContext ) ) ) 00400 return -1; 00401 00402 return ( iOffset - iStart ); } 00403 00404 00405 size_t CBNServer::ProcessCPT( const vector<unsigned char>& vecbMessage, size_t iOffset, vector<vector<float> >& binEffects ) { 00406 size_t i, j, iStart; 00407 uint32_t iGene, iContext, iDataCount, iBinsCount, iData, iGenes, iChunk, iSize, iGeneOffset; 00408 float bineffect; 00409 vector<unsigned char> vecbData; 00410 00411 cerr << "Processing CPT" << endl; 00412 00413 binEffects.resize(GetDatabase().GetDatasets()); 00414 00415 iStart = iOffset; 00416 iDataCount = *(uint32_t*)&vecbMessage[ iOffset ]; 00417 cerr << "Total datasets: " << iDataCount << endl; 00418 00419 // Load bin effects from message 00420 for ( i = 0, iOffset += sizeof(iDataCount); i < iDataCount && iOffset < vecbMessage.size(); i++ ) { 00421 iData = *(uint32_t*)&vecbMessage[ iOffset ]; 00422 iOffset += sizeof(iData); 00423 00424 if ( iData >= binEffects.size() ) { 00425 cerr << "Bad dataset ID: " << iData << endl; 00426 return -1; 00427 } 00428 00429 iBinsCount = *(uint32_t*)&vecbMessage[ iOffset ]; 00430 iOffset += sizeof(iBinsCount); 00431 binEffects[iData].resize(iBinsCount); 00432 00433 for ( j = 0; j < iBinsCount; j++, iOffset += sizeof(float) ) { 00434 bineffect = *(float*)&vecbMessage[ iOffset ]; 00435 binEffects[iData][j] = bineffect; 00436 } 00437 } 00438 return iOffset; 00439 } 00440 00441 00442 size_t CBNServer::ProcessInferenceOTF( const vector<unsigned char>& vecbMessage, size_t iOffset ) { 00443 size_t i, j, iStart; 00444 uint32_t iGene, iContext, iData, iGenes, iChunk, iSize, iGeneOffset; 00445 float bineffect; 00446 vector<vector<float> > binEffects; 00447 vector<unsigned char> vecbData; 00448 float* fVals; 00449 00450 cerr << "Inference OTF: " << vecbMessage.size() << endl; 00451 00452 iOffset = ProcessCPT( vecbMessage, iOffset, binEffects ); 00453 00454 iGenes = GetDatabase().GetGenes(); 00455 iChunk = ( GetDatabase().GetDatasets() + 1 ) / 2; 00456 00457 // Load gene id 00458 for ( ; iOffset < vecbMessage.size() ; iOffset += sizeof(uint32_t) ) { 00459 iGene = *(uint32_t*)&vecbMessage[ iOffset ]; 00460 cerr << "Gene id: " << iGene << endl; 00461 00462 // Arg -- BNServer genes are 1-indexed 00463 if( !iGene ) 00464 continue; 00465 iGene--; 00466 00467 // Infer posteriors 00468 vecbData.clear(); 00469 GetDatabase().Get( iGene, vecbData ); 00470 fVals = new float[iGenes]; 00471 00472 for ( i = 0, iGeneOffset = 0; i < iGenes; i++, iGeneOffset += iChunk ) { 00473 fVals[i] = Evaluate( binEffects, vecbData, iGeneOffset ); 00474 } 00475 00476 // Send results back 00477 iSize = (uint32_t)( GetGenes( ) * sizeof(*fVals) ); 00478 send( m_iSocket, (char*)&iSize, sizeof(iSize), 0 ); 00479 send( m_iSocket, (char*)fVals, iSize, 0 ); 00480 } 00481 00482 return ( iOffset - iStart ); } 00483 00484 00485 size_t CBNServer::ProcessEdges( const vector<unsigned char>& vecbMessage, size_t iOffset ) { 00486 size_t i, j, iStart; 00487 uint32_t iGeneOne, iGeneTwo, iGenes, iChunk, iSize, iGeneOffset, iEdges; 00488 float bineffect; 00489 vector<vector<float> > binEffects; 00490 float* fVals; 00491 vector<unsigned char> vecbData; 00492 unsigned char c; 00493 float fLogOdds = 0; 00494 00495 iStart = iOffset; 00496 00497 iOffset = ProcessCPT( vecbMessage, iOffset, binEffects ); 00498 00499 iEdges = *(uint32_t*)&vecbMessage[ iOffset ]; 00500 fVals = new float[iEdges]; 00501 00502 cerr << "Edges: " << iEdges << endl; 00503 00504 // Load gene id pairs 00505 for ( j = 0, iOffset += sizeof(iEdges); iOffset < vecbMessage.size() && j < iEdges ; j++ ) { 00506 iGeneOne = *(uint32_t*)&vecbMessage[ iOffset ]; 00507 iOffset += sizeof(iGeneOne); 00508 iGeneTwo = *(uint32_t*)&vecbMessage[ iOffset ]; 00509 iOffset += sizeof(iGeneTwo); 00510 //cerr << "Genes: " << iGeneOne << "\t" << iGeneTwo << endl; 00511 00512 iGeneOne -= 1; 00513 iGeneTwo -= 1; 00514 if ( iGeneOne < 0 or iGeneTwo < 0 ) 00515 continue; 00516 00517 fLogOdds = 0; 00518 vecbData.clear(); 00519 GetDatabase().Get(iGeneOne, iGeneTwo, vecbData); 00520 for( i = 0; i < GetDatabase().GetDatasets(); ++i ) { 00521 if ( binEffects[i].size() == 0 ) // Skip dataset 00522 continue; 00523 c = vecbData[ ( i / 2 ) ]; 00524 if( i % 2 ) 00525 c >>= 4; 00526 if( ( c &= 0xF ) == 0xF ) // Skip missing data 00527 continue; 00528 //if ( iGeneTwo == 1 || iGeneTwo == 0 ) 00529 // cerr << i << "\t" << (size_t)c << endl; 00530 fLogOdds += binEffects[i][(size_t)c]; 00531 } 00532 fVals[j] = fLogOdds; 00533 } 00534 00535 // Send results back 00536 iSize = (uint32_t)( iEdges * sizeof(float) ); 00537 send( m_iSocket, (char*)&iSize, sizeof(iSize), 0 ); 00538 send( m_iSocket, (char*)fVals, iSize, 0 ); 00539 00540 return ( iOffset - iStart ); 00541 } 00542 00543 float CBNServer::Evaluate( const vector<vector<float> >& binEffects, vector<unsigned char>& vecbDatum, size_t iOffset ) { 00544 00545 unsigned char c; 00546 size_t i; 00547 float fLogOdds = 0; 00548 00549 00550 for( i = 0; i < GetDatabase().GetDatasets(); ++i ) { 00551 if ( binEffects[i].size() == 0 ) // Skip dataset 00552 continue; 00553 00554 c = vecbDatum[ ( i / 2 ) + iOffset ]; 00555 if( i % 2 ) 00556 c >>= 4; 00557 if( ( c &= 0xF ) == 0xF ) // Skip missing data 00558 continue; 00559 //if ( debug ) 00560 // cerr << i << "\t" << (size_t)c << endl; 00561 00562 fLogOdds += binEffects[i][(size_t)c]; 00563 } 00564 00565 return fLogOdds; 00566 00567 } 00568 00569 bool CBNServer::Get( size_t iGene, size_t iContext, float* adValues ) { 00570 float* adTarget; 00571 uint32_t iSize; 00572 00573 cerr << m_strConnection << " inferring " << iGene << " (" << GetGene( iGene ) << ") in " << 00574 iContext << endl; 00575 if( !( adTarget = adValues ) ) { 00576 InitializeGenes( ); 00577 adTarget = m_adGenes; } 00578 if( !CBNServer::Get( iGene, iContext, adTarget, GetDatabase( ), m_sData.m_vecBNs, m_sData.m_BNDefault ) ) 00579 return false; 00580 if( !adValues ) { 00581 iSize = (uint32_t)( GetGenes( ) * sizeof(*m_adGenes) ); 00582 send( m_iSocket, (char*)&iSize, sizeof(iSize), 0 ); 00583 send( m_iSocket, (char*)m_adGenes, iSize, 0 ); } 00584 00585 return true; } 00586 00587 bool CBNServer::Get( size_t iGene, const vector<size_t>& veciGenes, size_t iContext, float* adValues ) { 00588 const CBayesNetMinimal& BNet = GetBN( iContext ); 00589 vector<unsigned char> vecbData; 00590 00591 return ( GetDatabase( ).Get( iGene, veciGenes, vecbData ) && 00592 BNet.Evaluate( vecbData, adValues, veciGenes.size( ) ) ); } 00593 00594 size_t CBNServer::ProcessData( const vector<unsigned char>& vecbMessage, size_t iOffset ) { 00595 uint32_t iSize, iOne, iTwo; 00596 vector<unsigned char> vecbData, vecbSend; 00597 size_t i; 00598 unsigned char b, bValue; 00599 00600 iSize = sizeof(iOne) + sizeof(iTwo); 00601 if( ( iOffset + iSize ) > vecbMessage.size( ) ) 00602 return -1; 00603 iOne = *(uint32_t*)&vecbMessage[ iOffset ]; 00604 iTwo = *(uint32_t*)&vecbMessage[ iOffset + sizeof(iOne) ]; 00605 if( !( iOne-- && iTwo-- ) ) 00606 return -1; 00607 cerr << m_strConnection << " requested " << GetGene( iOne ) << ',' << GetGene( iTwo ) << endl; 00608 if( !GetDatabase( ).Get( iOne, iTwo, vecbData ) ) 00609 return -1; 00610 00611 vecbSend.resize( vecbData.size( ) * 2 ); 00612 for( i = 0; i < vecbData.size( ); ++i ) { 00613 b = vecbData[ i ]; 00614 if( ( bValue = ( b & 0xF ) ) == 0xF ) 00615 bValue = -1; 00616 vecbSend[ 2 * i ] = bValue; 00617 if( ( bValue = ( ( b >> 4 ) & 0xF ) ) == 0xF ) 00618 bValue = -1; 00619 vecbSend[ ( 2 * i ) + 1 ] = bValue; } 00620 iOne = (uint32_t)vecbSend.size( ); 00621 send( m_iSocket, (char*)&iOne, sizeof(iOne), 0 ); 00622 send( m_iSocket, (char*)&vecbSend[ 0 ], (int)vecbSend.size( ), 0 ); 00623 00624 return iSize; } 00625 00626 size_t CBNServer::ProcessGraph( const vector<unsigned char>& vecbMessage, size_t iOffset ) { 00627 size_t iRet, i, iSize; 00628 float dEdgeAggressiveness; 00629 uint32_t iGene, iContext, iLimit; 00630 vector<size_t> veciQuery, veciNeighbors; 00631 set<size_t> setiQuery; 00632 CDat DatGraph; 00633 vector<bool> vecfQuery; 00634 set<size_t>::const_iterator iterQuery; 00635 unsigned char bFile; 00636 00637 iSize = sizeof(bFile) + sizeof(iContext) + sizeof(iLimit) + sizeof(dEdgeAggressiveness); 00638 if( ( iOffset + iSize ) > vecbMessage.size( ) ) 00639 return -1; 00640 bFile = vecbMessage[ iOffset ]; 00641 iContext = *(uint32_t*)&vecbMessage[ iOffset + sizeof(bFile) ]; 00642 iLimit = *(uint32_t*)&vecbMessage[ iOffset + sizeof(bFile) + sizeof(iContext) ]; 00643 dEdgeAggressiveness = *(float*)&vecbMessage[ iOffset + sizeof(bFile) + sizeof(iContext) + sizeof(iLimit) ]; 00644 for( i = iOffset + iSize; ( i + sizeof(iGene) ) <= vecbMessage.size( ); i += sizeof(iGene) ) { 00645 iGene = *(uint32_t*)&vecbMessage[ i ]; 00646 if( !iGene-- ) 00647 return -1; 00648 setiQuery.insert( iGene ); } 00649 iRet = i - iOffset; 00650 00651 veciQuery.reserve( setiQuery.size( ) ); 00652 for( iterQuery = setiQuery.begin( ); iterQuery != setiQuery.end( ); ++iterQuery ) 00653 veciQuery.push_back( *iterQuery ); 00654 if( !( GraphCreate( veciQuery, iContext, iLimit, dEdgeAggressiveness, vecfQuery, veciNeighbors, 00655 DatGraph ) && GraphWrite( DatGraph, veciQuery, veciNeighbors, vecfQuery, iContext, 00656 bFile ? EGraphOutputFile : EGraphOutputSocket ) ) ) 00657 return -1; 00658 00659 return iRet; } 00660 00661 bool CBNServer::SelectNeighborsPixie( const vector<size_t>& veciQuery, const vector<bool>& vecfQuery, 00662 size_t iContext, size_t iLimit, const CDataMatrix& MatQuery, vector<size_t>& veciNeighbors ) const { 00663 vector<float> vecdNeighbors; 00664 priority_queue<SPixie> pqueNeighbors; 00665 float d, dAve, dStd; 00666 size_t i, j, iN; 00667 00668 cerr << m_strConnection << " PIXIE query " << iContext << ':'; 00669 for( i = 0; i < veciQuery.size( ); ++i ) 00670 cerr << ' ' << GetGene( veciQuery[ i ] ); 00671 cerr << endl; 00672 00673 vecdNeighbors.resize( GetGenes( ) ); 00674 fill( vecdNeighbors.begin( ), vecdNeighbors.end( ), 0.0f ); 00675 dAve = dStd = 0; 00676 for( iN = i = 0; i < veciQuery.size( ); ++i ) 00677 for( j = 0; j < GetGenes( ); ++j ) 00678 if( !CMeta::IsNaN( d = MatQuery.Get( i, j ) ) ) { 00679 iN++; 00680 dAve += d; 00681 dStd += d * d; 00682 if( !vecfQuery[ j ] ) 00683 vecdNeighbors[ j ] += d; } 00684 for( i = 0; i < vecdNeighbors.size( ); ++i ) 00685 if( ( d = vecdNeighbors[ i ] ) > 0 ) 00686 pqueNeighbors.push( SPixie( i, d ) ); 00687 while( !pqueNeighbors.empty( ) && ( ( veciQuery.size( ) + veciNeighbors.size( ) ) < iLimit ) ) { 00688 veciNeighbors.push_back( pqueNeighbors.top( ).m_iNode ); 00689 pqueNeighbors.pop( ); } 00690 00691 return true; } 00692 00693 bool CBNServer::SelectNeighborsRatio( const vector<size_t>& veciQuery, const vector<bool>& vecfQuery, 00694 size_t iContext, size_t iLimit, const CDataMatrix& MatQuery, vector<size_t>& veciNeighbors ) const { 00695 size_t i, j; 00696 priority_queue<SPixie> pqueNeighbors; 00697 float d; 00698 vector<float> vecdValues, vecdBackground, vecdCur; 00699 00700 cerr << m_strConnection << " RATIO query " << iContext << " (" << ( iContext ? 00701 GetBN( iContext ).GetID( ) : "total" ) << "):"; 00702 for( i = 0; i < veciQuery.size( ); ++i ) 00703 cerr << ' ' << GetGene( veciQuery[ i ] ); 00704 cerr << endl; 00705 00706 for( i = 0; i < veciQuery.size( ); ++i ) 00707 if( !CMeta::IsNaN( d = GetBackground( iContext, veciQuery[ i ] ) ) ) 00708 vecdBackground.push_back( d ); 00709 vecdCur.resize( vecdBackground.size( ) + 1 ); 00710 for( i = 0; i < MatQuery.GetColumns( ); ++i ) { 00711 if( vecfQuery[ i ] ) 00712 continue; 00713 vecdValues.clear( ); 00714 vecdValues.reserve( MatQuery.GetRows( ) ); 00715 for( j = 0; j < MatQuery.GetRows( ); ++j ) 00716 if( !CMeta::IsNaN( d = MatQuery.Get( j, i ) ) ) 00717 vecdValues.push_back( d ); 00718 if( !vecdValues.empty( ) ) { 00719 Winsorize( vecdValues ); 00720 copy( vecdBackground.begin( ), vecdBackground.end( ), vecdCur.begin( ) ); 00721 vecdCur[ vecdBackground.size( ) ] = GetBackground( iContext, i ); 00722 Winsorize( vecdCur ); 00723 pqueNeighbors.push( SPixie( i, (float)( CStatistics::Average( vecdValues ) / 00724 CStatistics::Average( vecdCur ) ) ) ); } } 00725 while( !pqueNeighbors.empty( ) && ( ( veciQuery.size( ) + veciNeighbors.size( ) ) < iLimit ) ) { 00726 veciNeighbors.push_back( pqueNeighbors.top( ).m_iNode ); 00727 pqueNeighbors.pop( ); } 00728 00729 return true; } 00730 00731 bool CBNServer::GraphCreate( const vector<size_t>& veciQuery, size_t iContext, size_t iLimit, 00732 float dEdgeAggressiveness, vector<bool>& vecfQuery, vector<size_t>& veciNeighbors, CDat& DatGraph ) const { 00733 vector<float> vecdNeighbors, vecdEdges; 00734 float d, dAve, dStd, dMin, dCutoff; 00735 size_t i, j, iMinOne, iMinTwo; 00736 vector<size_t> veciDegree; 00737 bool fDone; 00738 CDataMatrix MatQuery, MatNeighbors; 00739 vector<string> vecstrGenes; 00740 00741 MatQuery.Initialize( veciQuery.size( ), GetGenes( ) ); 00742 for( i = 0; i < veciQuery.size( ); ++i ) 00743 if( !((CBNServer*)this)->Get( veciQuery[ i ], iContext, MatQuery.Get( i ) ) ) 00744 return false; 00745 vecfQuery.resize( GetGenes( ) ); 00746 fill( vecfQuery.begin( ), vecfQuery.end( ), false ); 00747 for( i = 0; i < veciQuery.size( ); ++i ) 00748 vecfQuery[ veciQuery[ i ] ] = true; 00749 veciNeighbors.clear( ); 00750 fDone = SelectNeighborsRatio( veciQuery, vecfQuery, iContext, iLimit, MatQuery, veciNeighbors ); 00751 // SelectNeighborsPixie( veciQuery, vecfQuery, iContext, iLimit, MatQuery, veciNeighbors ); 00752 if( !fDone ) 00753 return false; 00754 00755 vecstrGenes.resize( veciQuery.size( ) + veciNeighbors.size( ) ); 00756 vecfQuery.resize( vecstrGenes.size( ) ); 00757 fill( vecfQuery.begin( ), vecfQuery.end( ), false ); 00758 for( i = 0; i < veciQuery.size( ); ++i ) { 00759 vecstrGenes[ i ] = GetGene( veciQuery[ i ] ); 00760 vecfQuery[ i ] = true; } 00761 for( i = 0; i < veciNeighbors.size( ); ++i ) 00762 vecstrGenes[ veciQuery.size( ) + i ] = GetGene( veciNeighbors[ i ] ); 00763 MatNeighbors.Initialize( veciNeighbors.size( ), veciNeighbors.size( ) ); 00764 for( i = 0; i < veciNeighbors.size( ); ++i ) 00765 if( !((CBNServer*)this)->Get( veciNeighbors[ i ], veciNeighbors, iContext, 00766 MatNeighbors.Get( i ) ) ) 00767 return false; 00768 DatGraph.Open( vecstrGenes ); 00769 for( i = 0; i < veciQuery.size( ); ++i ) { 00770 for( j = ( i + 1 ); j < veciQuery.size( ); ++j ) 00771 DatGraph.Set( i, j, MatQuery.Get( i, veciQuery[ j ] % MatQuery.GetColumns( ) ) ); 00772 for( j = 0; j < veciNeighbors.size( ); ++j ) 00773 DatGraph.Set( i, veciQuery.size( ) + j, MatQuery.Get( i, veciNeighbors[ j ] ) ); } 00774 00775 vecdEdges.reserve( veciNeighbors.size( ) * ( veciNeighbors.size( ) - 1 ) / 2 ); 00776 for( i = 0; i < veciNeighbors.size( ); ++i ) { 00777 const float* adOne = MatNeighbors.Get( i ); 00778 size_t iOne = veciQuery.size( ) + i; 00779 00780 for( j = ( i + 1 ); j < veciNeighbors.size( ); ++j ) 00781 if( !CMeta::IsNaN( d = adOne[ j ] ) ) { 00782 vecdEdges.push_back( d ); 00783 DatGraph.Set( iOne, veciQuery.size( ) + j, adOne[ j ] ); } } 00784 00785 Winsorize( vecdEdges ); 00786 dAve = (float)CStatistics::Average( vecdEdges ); 00787 dStd = (float)sqrt( CStatistics::Variance( vecdEdges, dAve ) ); 00788 dCutoff = (float)( dAve + ( dEdgeAggressiveness * dStd ) ); 00789 veciDegree.resize( DatGraph.GetGenes( ) ); 00790 fill( veciDegree.begin( ), veciDegree.end( ), veciDegree.size( ) - 1 ); 00791 for( fDone = false; !fDone; ) { 00792 fDone = true; 00793 dMin = FLT_MAX; 00794 for( i = 0; i < DatGraph.GetGenes( ); ++i ) 00795 for( j = ( i + 1 ); j < DatGraph.GetGenes( ); ++j ) 00796 if( !CMeta::IsNaN( d = DatGraph.Get( i, j ) ) && ( d < dCutoff ) && ( d < dMin ) && 00797 ( veciDegree[ i ] > c_iDegree ) && ( veciDegree[ j ] > c_iDegree ) ) { 00798 fDone = false; 00799 dMin = d; 00800 iMinOne = i; 00801 iMinTwo = j; } 00802 if( !fDone ) { 00803 veciDegree[ iMinOne ]--; 00804 veciDegree[ iMinTwo ]--; 00805 DatGraph.Set( iMinOne, iMinTwo, CMeta::GetNaN( ) ); } } 00806 00807 return true; } 00808 00809 bool CBNServer::GraphWrite( const CDat& DatGraph, const vector<size_t>& veciQuery, 00810 const vector<size_t>& veciNeighbors, const vector<bool>& vecfQuery, size_t iContext, 00811 EGraphOutput eOutput ) const { 00812 static const size_t c_iBuffer = 1024; 00813 char acBuffer[ c_iBuffer ]; 00814 string strCmd, strDotIn, strDotOut, strSvg; 00815 ofstream ofsm; 00816 CDot DotOut( DatGraph ); 00817 CGenome Genome; 00818 uint32_t iSize; 00819 00820 if( eOutput == EGraphOutputNamed ) 00821 strDotIn = GetFiles( ) + "/" + CMeta::Filename( GetGene( veciQuery[ 0 ] ) ); 00822 else { 00823 sprintf_s( acBuffer, ( GetFiles( ) + "/inXXXXXX" ).c_str( ) ); 00824 if( _mktemp_s( acBuffer ) ) 00825 return false; 00826 strDotIn = acBuffer; } 00827 strDotIn += c_szDOT; 00828 ofsm.open( strDotIn.c_str( ) ); 00829 if( !ofsm.is_open( ) ) 00830 return false; 00831 Genome.Open( DatGraph.GetGeneNames( ) ); 00832 DatGraph.SaveDOT( ofsm, CMeta::GetNaN( ), &Genome, false, false ); 00833 ofsm.close( ); 00834 if( eOutput == EGraphOutputNamed ) 00835 return true; 00836 00837 sprintf_s( acBuffer, ( GetFiles( ) + "/outXXXXXX" ).c_str( ) ); 00838 if( _mktemp_s( acBuffer ) ) 00839 return false; 00840 strDotOut = acBuffer; 00841 strDotOut += c_szDOT; 00842 strCmd = m_sData.m_strGraphviz + " -Tdot -o" + strDotOut + ' ' + strDotIn; 00843 system( strCmd.c_str( ) ); 00844 // THE PAIN, IT BURNS! 00845 // The Boost DOT parser doesn't handle continuation lines. 00846 // sed doesn't handle newlines unless you beat it with a stick. 00847 // Backslashes have to be triple escaped to get from here to sed. 00848 // The combination makes me cry, but it works. 00849 system( ( "sed -c -i -n '1h;2,$H;${g;s/\\\\\\n//g;p}' " + strDotOut ).c_str( ) ); 00850 00851 if( !DotOut.Open( strDotOut.c_str( ) ) ) 00852 return false; 00853 switch( eOutput ) { 00854 case EGraphOutputFile: 00855 sprintf_s( acBuffer, "svgXXXXXX" ); 00856 if( _mktemp_s( acBuffer ) ) 00857 return false; 00858 strSvg = GetFiles( ) + '/' + acBuffer + c_szSVG; 00859 ofsm.clear( ); 00860 ofsm.open( strSvg.c_str( ) ); 00861 if( !( ofsm.is_open( ) && DotOut.Save( ofsm, vecfQuery, iContext ) ) ) 00862 return false; 00863 ofsm.close( ); 00864 00865 strSvg = acBuffer; 00866 strSvg += c_szSVG; 00867 iSize = (uint32_t)strSvg.length( ) + sizeof(iSize) + ( sizeof(iSize) * ( veciQuery.size( ) + 00868 veciNeighbors.size( ) ) ); 00869 send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); 00870 SendGenes( veciQuery, veciNeighbors ); 00871 send( m_iSocket, strSvg.c_str( ), (int)strSvg.length( ), 0 ); 00872 break; 00873 00874 case EGraphOutputSocket: 00875 stringstream sssm; 00876 00877 if( !DotOut.Save( sssm, vecfQuery, iContext ) ) 00878 return false; 00879 00880 iSize = (uint32_t)( sssm.str( ).length( ) + sizeof(iSize) + ( sizeof(iSize) * ( veciQuery.size( ) + 00881 veciNeighbors.size( ) ) ) ); 00882 send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); 00883 SendGenes( veciQuery, veciNeighbors ); 00884 send( m_iSocket, sssm.str( ).c_str( ), (int)sssm.str( ).length( ), 0 ); 00885 break; } 00886 00887 return true; } 00888 00889 bool CBNServer::SendGenes( const vector<size_t>& veciQuery, const vector<size_t>& veciNeighbors ) const { 00890 size_t i; 00891 uint32_t iSize; 00892 uint32_t* aiGenes; 00893 00894 iSize = (uint32_t)( veciQuery.size( ) + veciNeighbors.size( ) ); 00895 send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); 00896 aiGenes = new uint32_t[ iSize ]; 00897 for( i = 0; i < veciQuery.size( ); ++i ) 00898 aiGenes[ i ] = (uint32_t)veciQuery[ i ] + 1; 00899 for( i = 0; i < veciNeighbors.size( ); ++i ) 00900 aiGenes[ veciQuery.size( ) + i ] = (uint32_t)veciNeighbors[ i ] + 1; 00901 send( m_iSocket, (const char*)aiGenes, iSize * sizeof(*aiGenes), 0 ); 00902 delete[] aiGenes; 00903 00904 return true; } 00905 00906 size_t CBNServer::ProcessContexts( const vector<unsigned char>& vecbMessage, size_t iOffset ) { 00907 uint32_t iGene; 00908 size_t iCount, iPlace, iContext; 00909 vector<unsigned char> vecbData; 00910 float dBetween, dBackground, dWithin; 00911 00912 iCount = InitializeContexts( ); 00913 iGene = (uint32_t)( ( ( vecbMessage.size( ) - iOffset ) / sizeof(iGene) ) * 00914 ( iCount * sizeof(*m_adContexts) ) ); 00915 send( m_iSocket, (const char*)&iGene, sizeof(iGene), 0 ); 00916 for( iPlace = iOffset; ( iPlace + sizeof(iGene) ) <= vecbMessage.size( ); iPlace += sizeof(iGene) ) { 00917 iGene = *(uint32_t*)&vecbMessage[ iPlace ]; 00918 if( !( iGene-- && GetDatabase( ).Get( iGene, vecbData, true ) ) ) 00919 return -1; 00920 cerr << m_strConnection << " contexting " << iGene << " (" << GetGene( iGene ) << 00921 ")" << endl; 00922 for( iContext = 0; iContext < GetContexts( ); ++iContext ) { 00923 if( !GetAssociation( iGene, vecbData, GetContext( iContext ), iContext + 1, true, 00924 &dBetween, &dBackground, &dWithin, NULL, NULL, GetWithinContext( iContext + 1, iContext ) ) ) 00925 return -1; 00926 SetArray( m_adContexts, GetContexts( ), iContext, GetPValue( dBetween, dBackground, 00927 dWithin, iContext + 1, 1, GetContext( iContext ).size( ), GetContexts( ) ), dBetween, 00928 dBackground, dWithin ); } 00929 send( m_iSocket, (const char*)m_adContexts, (int)( iCount * sizeof(*m_adContexts) ), 0 ); } 00930 00931 return ( iPlace - iOffset ); } 00932 00933 struct SSortFind { 00934 bool operator()( const STermFound& sOne, const STermFound& sTwo ) { 00935 00936 return ( sOne.m_dP < sTwo.m_dP ); } 00937 }; 00938 00939 size_t CBNServer::ProcessTermFinder( const vector<unsigned char>& vecbMessage, size_t iOffset ) { 00940 uint32_t iOntology, iGene, iSize; 00941 float d, dP; 00942 size_t i, j, iPlace; 00943 vector<string> vecstrGenes; 00944 vector<STermFound> vecsTerms; 00945 CGenes Genes( GetGenome( ) ); 00946 const IOntology* pOnto; 00947 vector<size_t> veciMapping; 00948 00949 iSize = sizeof(iOntology) + sizeof(dP); 00950 if( ( iOffset + iSize ) > vecbMessage.size( ) ) 00951 return -1; 00952 iOntology = *(uint32_t*)&vecbMessage[ iOffset ]; 00953 dP = *(float*)&vecbMessage[ iOffset + sizeof(iOntology) ]; 00954 for( iPlace = iOffset + iSize; ( iPlace + sizeof(iGene) ) <= vecbMessage.size( ); 00955 iPlace += sizeof(iGene) ) { 00956 iGene = *(uint32_t*)&vecbMessage[ iPlace ]; 00957 vecstrGenes.push_back( GetGene( iGene - 1 ) ); } 00958 00959 if( !( pOnto = GetOntology( iOntology ) ) ) 00960 return -1; 00961 cerr << m_strConnection << " TermFinder " << pOnto->GetID( ) << '@' << dP << ':'; 00962 for( i = 0; i < vecstrGenes.size( ); ++i ) 00963 cerr << ' ' << vecstrGenes[ i ]; 00964 cerr << endl; 00965 00966 Genes.Open( vecstrGenes, false ); 00967 veciMapping.resize( Genes.GetGenes( ) ); 00968 for( i = 0; i < Genes.GetGenes( ); ++i ) { 00969 const CGene& Gene = Genes.GetGene( i ); 00970 00971 veciMapping[ i ] = GetGene( Gene.GetName( ) ); 00972 for( j = 0; ( j < Gene.GetSynonyms( ) ) && ( veciMapping[ i ] == -1 ); ++j ) 00973 veciMapping[ i ] = GetGene( Gene.GetSynonym( j ) ); } 00974 00975 pOnto->TermFinder( Genes, vecsTerms, true, true, false, dP ); 00976 sort( vecsTerms.begin( ), vecsTerms.end( ), SSortFind( ) ); 00977 iSize = sizeof(iSize); 00978 for( i = 0; i < vecsTerms.size( ); ++i ) { 00979 iSize += (uint32_t)( sizeof(float) + ( 5 * sizeof(uint32_t) ) + 2 + 00980 pOnto->GetGloss( vecsTerms[ i ].m_iID ).length( ) + 00981 pOnto->GetID( vecsTerms[ i ].m_iID ).length( ) ); 00982 for( j = 0; j < Genes.GetGenes( ); ++j ) 00983 if( pOnto->IsAnnotated( vecsTerms[ i ].m_iID, Genes.GetGene( j ) ) && ( veciMapping[ j ] != -1 ) ) 00984 iSize += sizeof(uint32_t); } 00985 00986 send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); 00987 iSize = (uint32_t)i; 00988 send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); 00989 for( i = 0; ( i < vecsTerms.size( ) ) && ( vecsTerms[ i ].m_dP <= dP ); ++i ) { 00990 const string& strID = pOnto->GetID( vecsTerms[ i ].m_iID ); 00991 const string& strGloss = pOnto->GetGloss( vecsTerms[ i ].m_iID ); 00992 vector<uint32_t> veciGenes; 00993 00994 cerr << m_strConnection << " found " << strGloss << " @ " << vecsTerms[ i ].m_dP << endl; 00995 send( m_iSocket, strID.c_str( ), (int)strID.length( ) + 1, 0 ); 00996 send( m_iSocket, strGloss.c_str( ), (int)strGloss.length( ) + 1, 0 ); 00997 d = (float)vecsTerms[ i ].m_dP; 00998 send( m_iSocket, (const char*)&d, sizeof(d), 0 ); 00999 iSize = (uint32_t)vecsTerms[ i ].m_iHitsTerm; 01000 send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); 01001 iSize = (uint32_t)vecsTerms[ i ].m_iSizeTerm; 01002 send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); 01003 iSize = (uint32_t)vecsTerms[ i ].m_iHitsTotal; 01004 send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); 01005 iSize = (uint32_t)vecsTerms[ i ].m_iSizeTotal; 01006 send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); 01007 01008 for( j = 0; j < Genes.GetGenes( ); ++j ) 01009 if( pOnto->IsAnnotated( vecsTerms[ i ].m_iID, Genes.GetGene( j ) ) && ( veciMapping[ j ] != -1 ) ) 01010 veciGenes.push_back( (uint32_t)veciMapping[ j ] ); 01011 iSize = (uint32_t)veciGenes.size( ); 01012 send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); 01013 for( j = 0; j < veciGenes.size( ); ++j ) { 01014 iSize = (uint32_t)veciGenes[ j ] + 1; 01015 send( m_iSocket, (const char*)&iSize, sizeof(iSize), 0 ); } } 01016 01017 return ( iPlace - iOffset ); } 01018 01019 size_t CBNServer::ProcessDiseases( const vector<unsigned char>& vecbMessage, size_t iOffset ) { 01020 uint32_t iGene, iContext; 01021 size_t i, iCount, iPlace, iDisease, iChunk; 01022 vector<unsigned char> vecbData, vecbTmp; 01023 float dBackground, dWithin, dBetween; 01024 01025 if( ( iOffset + sizeof(iContext) ) > vecbMessage.size( ) ) 01026 return -1; 01027 iContext = *(uint32_t*)&vecbMessage[ iOffset ]; 01028 01029 iCount = InitializeDiseases( ); 01030 iGene = (uint32_t)( ( ( vecbMessage.size( ) - iOffset - sizeof(iContext) ) / sizeof(iGene) ) * 01031 ( iCount * sizeof(*m_adDiseases) ) ); 01032 send( m_iSocket, (const char*)&iGene, sizeof(iGene), 0 ); 01033 01034 iChunk = GetDatachunk( ); 01035 vecbData.resize( GetGenes( ) * iChunk ); 01036 for( iPlace = iOffset + sizeof(iContext); ( iPlace + sizeof(iGene) ) <= vecbMessage.size( ); 01037 iPlace += sizeof(iGene) ) { 01038 iGene = *(uint32_t*)&vecbMessage[ iPlace ]; 01039 if( !iGene-- ) 01040 return -1; 01041 cerr << m_strConnection << " diseasing " << iGene << " (" << GetGene( iGene ) << 01042 ") in " << iContext << endl; 01043 01044 if( !GetDatabase( ).Get( iGene, GetDiseaseGenes( ), vecbTmp, true ) ) 01045 return -1; 01046 for( i = 0; i < GetDiseaseGenes( ).size( ); ++i ) 01047 // Hackedy hack hack hack 01048 memcpy( &vecbData[ GetDiseaseGenes( )[ i ] * iChunk ], &vecbTmp[ i * iChunk ], iChunk ); 01049 for( iDisease = 0; iDisease < GetDiseases( ); ++iDisease ) { 01050 if( !GetAssociation( iGene, vecbData, GetDisease( iDisease ), iContext, true, &dBetween, 01051 &dBackground, &dWithin, NULL, NULL, GetWithinDisease( iContext, iDisease ) ) ) 01052 return -1; 01053 SetArray( m_adDiseases, GetDiseases( ), iDisease, GetPValue( dBetween, dBackground, 01054 dWithin, iContext, 1, GetDisease( iDisease ).size( ), GetDiseases( ) ), dBetween, 01055 dBackground, dWithin ); } 01056 send( m_iSocket, (const char*)m_adDiseases, (int)( iCount * sizeof(*m_adDiseases) ), 0 ); } 01057 01058 return ( iPlace - iOffset ); } 01059 01060 bool CBNServer::GenerateNetworkIcons( ) const { 01061 static const size_t c_iSize = 6; 01062 static const float c_dEdgeAggressiveness = 0; 01063 vector<size_t> veciQuery, veciNeighbors; 01064 vector<bool> vecfQuery; 01065 CDat DatGraph; 01066 size_t i; 01067 01068 veciQuery.resize( 1 ); 01069 for( i = 0; i < GetGenes( ); ++i ) { 01070 veciQuery[ 0 ] = i; 01071 if( !( GraphCreate( veciQuery, 0, c_iSize, c_dEdgeAggressiveness, vecfQuery, veciNeighbors, 01072 DatGraph ) && GraphWrite( DatGraph, veciQuery, veciNeighbors, vecfQuery, 0, EGraphOutputNamed ) ) ) 01073 return false; } 01074 01075 return true; } 01076 01077 bool CBNServer::GenerateAssociations( const char* szDiseases, size_t iContext ) { 01078 CPCL PCLDiseases( false ), PCLAssociations; 01079 size_t i, j; 01080 ofstream ofsm; 01081 vector<string> vecstrDCs; 01082 01083 if( !PCLDiseases.Open( szDiseases, 0 ) ) { 01084 cerr << "Could not open: " << szDiseases << endl; 01085 return false; } 01086 if( PCLDiseases.GetGenes( ) != GetDiseases( ) ) { 01087 cerr << "Unexpected disease count " << PCLDiseases.GetGenes( ) << ", wanted " << GetDiseases( ) << 01088 endl; 01089 return false; } 01090 vecstrDCs.resize( GetDiseases( ) + GetContexts( ) ); 01091 copy( PCLDiseases.GetGeneNames( ).begin( ), PCLDiseases.GetGeneNames( ).end( ), vecstrDCs.begin( ) ); 01092 for( i = 0; i < GetContexts( ); ++i ) 01093 vecstrDCs[ GetDiseases( ) + i ] = GetBN( i + 1 ).GetID( ); 01094 PCLAssociations.Open( vecstrDCs, vecstrDCs, vector<string>( ) ); 01095 01096 // Disease/disease 01097 InitializeDiseases( ); 01098 for( i = 0; i < GetDiseases( ); ++i ) { 01099 memset( m_adDiseases, 0, GetDiseases( ) * c_iValues * sizeof(*m_adDiseases) ); 01100 if( !GetAssociationsDC( 1, ESetDisease, i, iContext, true ) ) 01101 return false; 01102 for( j = i; j < GetDiseases( ); ++j ) { 01103 PCLAssociations.Set( i, j, m_adDiseases[ j ] ); 01104 PCLAssociations.Set( j, i, m_adDiseases[ j ] ); } } 01105 // Disease/context 01106 InitializeContexts( ); 01107 for( i = 0; i < GetDiseases( ); ++i ) { 01108 memset( m_adContexts, 0, GetContexts( ) * c_iValues * sizeof(*m_adContexts) ); 01109 if( !GetAssociationsDC( 0, ESetDisease, i, -1, true ) ) 01110 return false; 01111 for( j = 0; j < GetContexts( ); ++j ) 01112 PCLAssociations.Set( i, GetDiseases( ) + j, m_adContexts[ j ] ); } 01113 for( i = 0; i < GetContexts( ); ++i ) { 01114 memset( m_adDiseases, 0, GetDiseases( ) * c_iValues * sizeof(*m_adDiseases) ); 01115 if( !GetAssociationsDC( 1, ESetContext, i, iContext, true ) ) 01116 return false; 01117 for( j = 0; j < GetDiseases( ); ++j ) 01118 PCLAssociations.Set( GetDiseases( ) + i, j, m_adDiseases[ j ] ); } 01119 // Context/context 01120 for( i = 0; i < GetContexts( ); ++i ) { 01121 memset( m_adContexts, 0, GetContexts( ) * c_iValues * sizeof(*m_adContexts) ); 01122 if( !GetAssociationsDC( 0, ESetContext, i, -1, true ) ) 01123 return false; 01124 for( j = 0; j < GetContexts( ); ++j ) 01125 PCLAssociations.Set( GetDiseases( ) + i, GetDiseases( ) + j, m_adContexts[ j ] ); } 01126 01127 ofsm.open( "associations.pcl" ); 01128 PCLAssociations.Save( ofsm ); 01129 return true; } 01130 01131 size_t CBNServer::ProcessGenes( const vector<unsigned char>& vecbMessage, size_t iOffset ) { 01132 uint32_t iContext, iSize, iGene; 01133 size_t i, iPlace; 01134 vector<size_t> veciGenes; 01135 unsigned char bType; 01136 01137 if( ( iOffset + sizeof(iContext) + sizeof(bType) ) > vecbMessage.size( ) ) 01138 return -1; 01139 iContext = *(uint32_t*)&vecbMessage[ iOffset ]; 01140 bType = vecbMessage[ iOffset + sizeof(iContext) ]; 01141 for( iPlace = iOffset + sizeof(iContext) + sizeof(bType); 01142 ( iPlace + sizeof(iGene) ) <= vecbMessage.size( ); iPlace += sizeof(iGene) ) { 01143 if( !( iGene = *(uint32_t*)&vecbMessage[ iPlace ] ) ) 01144 return -1; 01145 iGene--; 01146 if( bType == ESetGenes ) 01147 veciGenes.push_back( iGene ); } 01148 01149 cerr << m_strConnection << " genesing in " << iContext << ' '; 01150 if( bType == ESetGenes ) { 01151 for( i = 0; i < veciGenes.size( ); ++i ) 01152 cerr << ( i ? ' ' : '{' ) << GetGene( veciGenes[ i ] ); 01153 cerr << '}' << endl; } 01154 else 01155 cerr << ( ( bType == ESetContext ) ? "context" : "disease" ) << ' ' << ( iGene + 1 ) << endl; 01156 { 01157 const vector<size_t>& veciSet = ( bType == ESetGenes ) ? veciGenes : 01158 GetGeneSets( bType - 1 )[ iGene ]; 01159 float dWithin; 01160 01161 dWithin = ( bType == ESetGenes ) ? CMeta::GetNaN( ) : GetWithin( bType - 1, iContext, iGene ); 01162 if( !GetGenes( veciSet, iContext, dWithin ) ) 01163 return -1; 01164 } 01165 iSize = (uint32_t)( c_iValues * GetGenes( ) * sizeof(*m_adGenes) ); 01166 send( m_iSocket, (char*)&iSize, sizeof(iSize), 0 ); 01167 send( m_iSocket, (char*)m_adGenes, iSize, 0 ); 01168 01169 return ( iPlace - iOffset ); } 01170 01171 bool CBNServer::GetGenes( const vector<size_t>& veciGenes, size_t iContext, float dWithin ) { 01172 CDataMatrix MatGenes; 01173 size_t i, j; 01174 vector<float> vecdBetween, vecdBackground; 01175 float d, dFrac, dBackground; 01176 01177 dFrac = GetFraction( veciGenes.size( ) ); 01178 MatGenes.Initialize( veciGenes.size( ), GetGenes( ) ); 01179 vecdBackground.resize( veciGenes.size( ) ); 01180 for( i = 0; i < MatGenes.GetRows( ); ++i ) { 01181 vecdBackground[ i ] = GetBackground( iContext, veciGenes[ i ] ); 01182 if( IsFraction( dFrac ) ) { 01183 fill( MatGenes.Get( i ), MatGenes.Get( i ) + MatGenes.GetColumns( ), CMeta::GetNaN( ) ); 01184 continue; } 01185 if( !Get( veciGenes[ i ], iContext, MatGenes.Get( i ) ) ) 01186 return false; } 01187 Winsorize( vecdBackground ); 01188 dBackground = (float)CStatistics::Average( vecdBackground ); 01189 i = veciGenes.size( ) * ( veciGenes.size( ) + 1 ) / 2; 01190 if( CMeta::IsNaN( dWithin ) ) { 01191 vector<float> vecdWithin; 01192 01193 vecdWithin.reserve( 1 + i ); 01194 if( !GetWithin( veciGenes, iContext, NULL, &vecdWithin ) ) 01195 return false; 01196 // Alternative: calculate an average over all edges (weighted) rather than between sets (unweighted) 01197 // vecdWithin.push_back( GetPrior( iContext ) ); 01198 Winsorize( vecdWithin ); 01199 // dWithin = (float)CStatistics::Average( vecdWithin ); } 01200 dWithin = ( (float)CStatistics::Average( vecdWithin ) + GetPrior( iContext ) ) / 2; } 01201 else 01202 // dWithin = ( ( dWithin * i ) + GetPrior( iContext ) ) / ( i + 1 ); 01203 dWithin = ( dWithin + GetPrior( iContext ) ) / 2; 01204 01205 InitializeGenes( ); 01206 for( i = 0; i < GetGenes( ); ++i ) { 01207 float dBetween, dTmp; 01208 01209 vecdBetween.clear( ); 01210 vecdBetween.reserve( MatGenes.GetRows( ) ); 01211 for( j = 0; j < MatGenes.GetRows( ); ++j ) 01212 if( !CMeta::IsNaN( d = MatGenes.Get( j, i ) ) ) 01213 vecdBetween.push_back( d ); 01214 Winsorize( vecdBetween ); 01215 dBetween = (float)CStatistics::Average( vecdBetween ); 01216 dTmp = ( ( dBackground * veciGenes.size( ) ) + GetBackground( iContext, i ) ) / 01217 ( veciGenes.size( ) + 1 ); 01218 SetArray( m_adGenes, GetGenes( ), i, GetPValue( dBetween, dTmp, dWithin, iContext, 01219 // Alternative: multiple hypothesis correction on all genes (gets too big) 01220 // 1, veciGenes.size( ), GetGenes( ) ), dBetween, dTmp, dWithin ); } 01221 1, veciGenes.size( ), 0 ), dBetween, dTmp, dWithin ); } 01222 01223 return true; } 01224 01225 size_t CBNServer::ProcessAssociation( const vector<unsigned char>& vecbMessage, size_t iOffset ) { 01226 uint32_t iGene, iContext; 01227 size_t i, iPlace, iBack; 01228 vector<unsigned char> vecbData; 01229 vector<size_t> veciFore, veciBack; 01230 float dBetween, dBackground, dWithin, dWith; 01231 const vector<size_t>* pveciBack; 01232 unsigned char bType; 01233 01234 if( ( iOffset + sizeof(iContext) ) > vecbMessage.size( ) ) 01235 return -1; 01236 iContext = *(uint32_t*)&vecbMessage[ iOffset ]; 01237 for( iPlace = iOffset + sizeof(iContext); ( iPlace + sizeof(iGene) ) <= vecbMessage.size( ); 01238 iPlace += sizeof(iGene) ) { 01239 if( !( iGene = *(uint32_t*)&vecbMessage[ iPlace ] ) ) 01240 break; 01241 veciFore.push_back( --iGene ); } 01242 if( ( ( iPlace += sizeof(iGene) ) + sizeof(bType) ) > vecbMessage.size( ) ) 01243 return -1; 01244 bType = vecbMessage[ iPlace++ ]; 01245 if( bType == ESetGenes ) { 01246 for( ; ( iPlace + sizeof(iGene) ) <= vecbMessage.size( ); iPlace += sizeof(iGene) ) { 01247 if( !( iGene = *(uint32_t*)&vecbMessage[ iPlace ] ) ) 01248 return -1; 01249 veciBack.push_back( --iGene ); } 01250 iBack = -1; 01251 pveciBack = &veciBack; } 01252 else { 01253 if( ( ( iPlace + sizeof(iGene) ) > vecbMessage.size( ) ) || 01254 !( iGene = *(uint32_t*)&vecbMessage[ iPlace ] ) ) 01255 return -1; 01256 iPlace += sizeof(iGene); 01257 iBack = iGene - 1; 01258 pveciBack = &GetGeneSets( bType - 1 )[ iBack ]; } 01259 if( veciFore.empty( ) || pveciBack->empty( ) ) 01260 return -1; 01261 01262 cerr << m_strConnection << " association in " << iContext << ' '; 01263 for( i = 0; i < veciFore.size( ); ++i ) 01264 cerr << ( i ? ' ' : '{' ) << GetGene( veciFore[ i ] ); 01265 cerr << "} with "; 01266 if( iBack == -1 ) { 01267 for( i = 0; i < pveciBack->size( ); ++i ) 01268 cerr << ( i ? ' ' : '{' ) << GetGene( pveciBack->at( i ) ); 01269 cerr << '}'; } 01270 else 01271 cerr << ( ( bType == ESetContext ) ? "context" : "disease" ) << ' ' << ( iBack + 1 ); 01272 cerr << endl; 01273 01274 dWith = ( bType == ESetGenes ) ? CMeta::GetNaN( ) : GetWithin( bType - 1, iContext, iBack ); 01275 InitializeGenes( ); 01276 for( i = 0; i < veciFore.size( ); ++i ) { 01277 if( !( i % 100 ) ) 01278 cerr << m_strConnection << " association " << i << '/' << veciFore.size( ) << ": " << 01279 GetGene( veciFore[ i ] ) << endl; 01280 if( !( GetDatabase( ).Get( veciFore[ i ], *pveciBack, vecbData, true ) && 01281 GetAssociation( veciFore[ i ], vecbData, *pveciBack, iContext, false, &dBetween, &dBackground, 01282 &dWithin, NULL, NULL, dWith ) ) ) 01283 return -1; 01284 SetArray( m_adGenes, veciFore.size( ), i, GetPValue( dBetween, dBackground, dWithin, 01285 iContext, 1, pveciBack->size( ), veciFore.size( ) ), dBetween, dBackground, dWithin ); } 01286 01287 iGene = (uint32_t)( c_iValues * veciFore.size( ) * sizeof(*m_adGenes) ); 01288 send( m_iSocket, (const char*)&iGene, sizeof(iGene), 0 ); 01289 send( m_iSocket, (const char*)m_adGenes, iGene, 0 ); 01290 01291 return ( iPlace - iOffset ); } 01292 01293 size_t CBNServer::ProcessAssociations( const vector<unsigned char>& vecbMessage, size_t iOffset ) { 01294 uint32_t iGene, iCount; 01295 vector<size_t> veciGenes; 01296 unsigned char bType, bDiseases; 01297 size_t i, iPlace, iContext; 01298 float* ad; 01299 01300 if( ( iOffset + sizeof(iGene) + sizeof(bType) + sizeof(bDiseases) ) > vecbMessage.size( ) ) 01301 return -1; 01302 bDiseases = vecbMessage[ iOffset ]; 01303 iGene = *(uint32_t*)&vecbMessage[ iOffset + sizeof(bDiseases) ]; 01304 iContext = ( iGene == -1 ) ? -1 : (size_t)iGene; 01305 bType = vecbMessage[ iOffset + sizeof(bDiseases) + sizeof(iGene) ]; 01306 for( iPlace = ( iOffset + sizeof(iGene) + sizeof(bDiseases) + sizeof(bType) ); 01307 ( iPlace + sizeof(iGene) ) <= vecbMessage.size( ); iPlace += sizeof(iGene) ) { 01308 if( !( iGene = *(uint32_t*)&vecbMessage[ iPlace ] ) ) 01309 return -1; 01310 iGene--; 01311 if( bType == ESetGenes ) 01312 veciGenes.push_back( iGene ); } 01313 01314 cerr << m_strConnection << " associationsing "; 01315 if( bType == ESetGenes ) { 01316 for( i = 0; i < veciGenes.size( ); ++i ) 01317 cerr << ( i ? ' ' : '{' ) << GetGene( veciGenes[ i ] ); 01318 cerr << '}'; } 01319 else 01320 cerr << ( ( bType == ESetContext ) ? "context" : "disease" ) << ' ' << ( iGene + 1 ); 01321 cerr << " with " << ( bDiseases ? "diseases" : "contexts" ) << " in " << iContext << endl; 01322 01323 iCount = (uint32_t)( ( bDiseases ? InitializeDiseases( ) : InitializeContexts( ) ) * sizeof(*ad) ); 01324 ad = bDiseases ? m_adDiseases : m_adContexts; 01325 memset( ad, 0, iCount ); 01326 if( !( ( bType == ESetGenes ) ? 01327 GetAssociationsSet( bDiseases, veciGenes, iContext ) : 01328 GetAssociationsDC( bDiseases, bType, iGene, iContext ) ) ) 01329 return -1; 01330 send( m_iSocket, (char*)&iCount, sizeof(iCount), 0 ); 01331 send( m_iSocket, (char*)ad, iCount, 0 ); 01332 01333 return ( iPlace - iOffset ); } 01334 01335 bool CBNServer::GetAssociationsSet( unsigned char bDiseases, const vector<size_t>& veciGenes, 01336 size_t iContext ) const { 01337 const vector<vector<size_t> >& vecveciSets = GetGeneSets( bDiseases ); 01338 size_t i, j, k; 01339 vector<unsigned char> vecbData; 01340 float d, dFrac, dWithinGenes, dBetween; 01341 float* ad; 01342 vector<vector<float> > vecvecdBackground, vecvecdBetween; 01343 vector<float> vecdCur; 01344 01345 vecvecdBetween.resize( vecveciSets.size( ) ); 01346 for( i = 0; i < vecvecdBetween.size( ); ++i ) { 01347 vecvecdBetween[ i ].resize( veciGenes.size( ) + vecveciSets[ i ].size( ) ); 01348 fill( vecvecdBetween[ i ].begin( ), vecvecdBetween[ i ].end( ), 0.0f ); } 01349 vecvecdBackground.resize( vecveciSets.size( ) ); 01350 dFrac = GetFraction( veciGenes.size( ) ); 01351 for( i = 0; i < veciGenes.size( ); ++i ) { 01352 if( IsFraction( dFrac ) ) { 01353 for( j = 0; j < vecveciSets.size( ); ++j ) { 01354 k = GetContext( bDiseases, iContext, j ); 01355 vecvecdBackground[ j ].push_back( GetBackground( k, veciGenes[ i ] ) ); } 01356 continue; } 01357 if( !GetDatabase( ).Get( veciGenes[ i ], vecbData, true ) ) 01358 return false; 01359 cerr << m_strConnection << " associations " << GetGene( veciGenes[ i ] ) << endl; 01360 for( j = 0; j < vecveciSets.size( ); ++j ) { 01361 vecdCur.clear( ); 01362 vecdCur.reserve( vecveciSets[ j ].size( ) ); 01363 k = GetContext( bDiseases, iContext, j ); 01364 if( !GetAssociation( veciGenes[ i ], vecbData, vecveciSets[ j ], k, true, &dBetween, NULL, NULL, 01365 &vecdCur, &vecvecdBackground[ j ], GetWithin( bDiseases, k, j ) ) ) 01366 return false; 01367 vecvecdBetween[ j ][ i ] += dBetween; 01368 for( k = 0; k < vecveciSets[ j ].size( ); ++k ) 01369 vecvecdBetween[ j ][ veciGenes.size( ) + k ] += vecdCur[ k ]; } } 01370 for( i = 0; i < vecvecdBetween.size( ); ++i ) { 01371 for( j = 0; j < veciGenes.size( ); ++j ) 01372 vecvecdBetween[ i ][ j ] /= vecveciSets[ i ].size( ); 01373 for( j = 0; j < vecveciSets[ i ].size( ); ++j ) 01374 vecvecdBetween[ i ][ veciGenes.size( ) + j ] /= veciGenes.size( ); } 01375 for( i = 0; i < vecveciSets.size( ); ++i ) { 01376 k = GetContext( bDiseases, iContext, i ); 01377 for( j = 0; j < vecveciSets[ i ].size( ); ++j ) 01378 if( !CMeta::IsNaN( d = GetBackground( k, vecveciSets[ i ][ j ] ) ) ) 01379 vecvecdBackground[ i ].push_back( d ); } 01380 if( ( iContext != -1 ) && !GetWithin( veciGenes, iContext, &dWithinGenes, NULL ) ) 01381 return false; 01382 01383 ad = bDiseases ? m_adDiseases : m_adContexts; 01384 for( i = 0; i < vecveciSets.size( ); ++i ) { 01385 float dBackground, dWithin; 01386 size_t iTmp; 01387 01388 if( vecvecdBetween[ i ].empty( ) ) 01389 continue; 01390 Winsorize( vecvecdBetween[ i ] ); 01391 Winsorize( vecvecdBackground[ i ] ); 01392 dBetween = (float)CStatistics::Average( vecvecdBetween[ i ] ); 01393 dBackground = (float)CStatistics::Average( vecvecdBackground[ i ] ); 01394 iTmp = GetContext( bDiseases, iContext, i ); 01395 if( ( iContext == -1 ) && !GetWithin( veciGenes, iTmp, &dWithinGenes, NULL ) ) 01396 return false; 01397 dWithin = ( dWithinGenes + GetWithin( bDiseases, iTmp, i ) ) / 2; 01398 SetArray( ad, vecveciSets.size( ), i, GetPValue( dBetween, dBackground, dWithin, iTmp, 01399 veciGenes.size( ), vecveciSets[ i ].size( ), vecveciSets.size( ) ), dBetween, dBackground, 01400 dWithin ); } 01401 01402 return true; } 01403 01404 bool CBNServer::GetAssociationsDC( unsigned char bDiseases, unsigned char bType, size_t iDC, 01405 size_t iContext, bool fZ ) const { 01406 const vector<vector<size_t> >& vecveciSets = GetGeneSets( bDiseases ); 01407 const vector<size_t>& veciGenes = GetGeneSets( bType - 1 )[ iDC ]; 01408 size_t i, j, k, iEdgesOne, iEdgesTwo; 01409 float d, dBetween, dWithin, dBackground, dBackgroundOne; 01410 float* ad; 01411 01412 if( iContext == -1 ) 01413 dBackgroundOne = CMeta::GetNaN( ); 01414 else 01415 for( dBackgroundOne = 0,i = 0; i < veciGenes.size( ); ++i ) 01416 dBackgroundOne += GetBackground( iContext, veciGenes[ i ] ); 01417 iEdgesOne = veciGenes.size( ) * ( veciGenes.size( ) + 1 ) / 2; 01418 ad = bDiseases ? m_adDiseases : m_adContexts; 01419 for( i = 0; i < vecveciSets.size( ); ++i ) { 01420 iEdgesTwo = vecveciSets[ i ].size( ) * ( vecveciSets[ i ].size( ) + 1 ) / 2; 01421 k = GetContext( bDiseases, iContext, i ); 01422 if( CMeta::IsNaN( dBackgroundOne ) ) 01423 for( dBackground = 0,j = 0; j < veciGenes.size( ); ++j ) 01424 dBackground += GetBackground( k, veciGenes[ j ] ); 01425 else 01426 dBackground = dBackgroundOne; 01427 for( j = 0; j < vecveciSets[ i ].size( ); ++j ) 01428 dBackground += GetBackground( k, vecveciSets[ i ][ j ] ); 01429 dBackground /= veciGenes.size( ) + vecveciSets[ i ].size( ); 01430 01431 d = GetWithin( bType - 1, k, iDC ); 01432 dWithin = GetWithin( bDiseases, k, i ); 01433 // Alternative: calculate an average over all edges (weighted) rather than between sets (unweighted) 01434 // dWithin = ( ( d * iEdgesOne ) + ( dWithin * iEdgesTwo ) ) / ( iEdgesOne + iEdgesTwo ); 01435 dWithin = ( d + dWithin ) / 2; 01436 dBetween = GetBetween( k, bType - 1, iDC, bDiseases, i ); 01437 01438 SetArray( ad, vecveciSets.size( ), i, GetPValue( dBetween, dBackground, dWithin, k, veciGenes.size( ), 01439 vecveciSets[ i ].size( ), vecveciSets.size( ), fZ ), dBetween, dBackground, dWithin ); } 01440 01441 return true; } 01442 01443 bool CBNServer::GetAssociation( size_t iGene, const vector<unsigned char>& vecbData, 01444 const vector<size_t>& veciGenes, size_t iContext, bool fAll, float* pdBetween, float* pdBackground, 01445 float* pdWithin, vector<float>* pvecdBetween, vector<float>* pvecdBackground, float dWithin ) const { 01446 const CBayesNetMinimal& BNet = GetBN( iContext ); 01447 float d, dFrac, dBackground; 01448 size_t i; 01449 vector<float> vecdBetween, vecdBackground; 01450 01451 if( veciGenes.empty( ) ) { 01452 SetPointer( pdBetween, CMeta::GetNaN( ) ); 01453 SetPointer( pdBackground, CMeta::GetNaN( ) ); 01454 SetPointer( pdWithin, CMeta::GetNaN( ) ); 01455 return true; } 01456 01457 dFrac = GetFraction( veciGenes.size( ) ); 01458 if( pdBetween ) 01459 vecdBetween.reserve( veciGenes.size( ) ); 01460 if( pdBackground ) 01461 vecdBackground.reserve( veciGenes.size( ) + 1 ); 01462 dBackground = GetBackground( iContext, iGene ); 01463 PushPointer( pdBackground, vecdBackground, dBackground ); 01464 PushPointer( pvecdBackground, dBackground ); 01465 for( i = 0; i < veciGenes.size( ); ++i ) { 01466 PushPointer( pdBackground, vecdBackground, GetBackground( iContext, veciGenes[ i ] ) ); 01467 if( veciGenes[ i ] == iGene ) 01468 d = 1; 01469 else if( IsFraction( dFrac ) ) 01470 continue; 01471 else 01472 d = BNet.Evaluate( vecbData, ( fAll ? veciGenes[ i ] : i ) * GetDatachunk( ) ); 01473 if( !CMeta::IsNaN( d ) ) { 01474 PushPointer( pdBetween, vecdBetween, d ); 01475 PushPointer( pvecdBetween, d ); } } 01476 Winsorize( vecdBetween ); 01477 SetPointer( pdBetween, (float)CStatistics::Average( vecdBetween ) ); 01478 Winsorize( vecdBackground ); 01479 SetPointer( pdBackground, (float)CStatistics::Average( vecdBackground ) ); 01480 01481 if( pdWithin ) { 01482 i = veciGenes.size( ) * ( veciGenes.size( ) + 1 ) / 2; 01483 if( CMeta::IsNaN( dWithin ) ) { 01484 vector<float> vecdWithin; 01485 01486 vecdWithin.reserve( 1 + i ); 01487 // Alternative: calculate an average over all edges (weighted) rather than between sets (unweighted) 01488 // vecdWithin.push_back( GetPrior( iContext ) ); 01489 if( !GetWithin( veciGenes, iContext, NULL, &vecdWithin ) ) 01490 return false; 01491 Winsorize( vecdWithin ); 01492 // SetPointer( pdWithin, (float)CStatistics::Average( vecdWithin ) ); } 01493 SetPointer( pdWithin, ( (float)CStatistics::Average( vecdWithin ) + GetPrior( iContext ) ) / 2 ); } 01494 else 01495 // SetPointer( pdWithin, ( GetPrior( iContext ) + ( i * dWithin ) ) / ( 1 + i ) ); } 01496 SetPointer( pdWithin, ( GetPrior( iContext ) + dWithin ) / 2 ); } 01497 01498 return true; } 01499 01500 bool CBNServer::GetAssociation( const vector<size_t>& veciOne, const vector<size_t>& veciTwo, 01501 size_t iContext, float& dBetween, float& dBackground, float& dWithin ) const { 01502 const vector<size_t>& veciSmall = ( veciOne.size( ) < veciTwo.size( ) ) ? veciOne : veciTwo; 01503 const vector<size_t>& veciBig = ( veciOne.size( ) < veciTwo.size( ) ) ? veciTwo : veciOne; 01504 size_t i, j; 01505 float dFrac; 01506 vector<unsigned char> vecbData; 01507 vector<float> vecdBetween, vecdBackground, vecdWithin; 01508 01509 dFrac = GetFraction( veciSmall.size( ) ); 01510 vecdBetween.reserve( (size_t)( dFrac * veciSmall.size( ) * veciBig.size( ) ) + c_iOverestimate ); 01511 vecdBackground.reserve( veciSmall.size( ) + veciBig.size( ) ); 01512 for( i = 0; i < veciSmall.size( ); ++i ) { 01513 if( IsFraction( dFrac ) ) { 01514 vecdBackground.push_back( GetBackground( iContext, veciSmall[ i ] ) ); 01515 continue; } 01516 if( !( GetDatabase( ).Get( veciSmall[ i ], veciBig, vecbData, true ) && 01517 GetAssociation( veciSmall[ i ], vecbData, veciBig, iContext, false, NULL, NULL, NULL, 01518 &vecdBetween, &vecdBackground, CMeta::GetNaN( ) ) ) ) 01519 return false; } 01520 for( i = 0; i < veciBig.size( ); ++i ) 01521 vecdBackground.push_back( GetBackground( iContext, veciBig[ i ] ) ); 01522 01523 i = veciSmall.size( ) + 1; 01524 j = veciBig.size( ) + 1; 01525 vecdWithin.reserve( (size_t)( ( dFrac * i * i / 2 ) + ( dFrac * j * j / 2 ) ) + c_iOverestimate ); 01526 if( !( GetWithin( veciSmall, iContext, NULL, &vecdWithin ) && 01527 GetWithin( veciBig, iContext, NULL, &vecdWithin ) ) ) 01528 return false; 01529 01530 Winsorize( vecdBetween ); 01531 dBetween = (float)CStatistics::Average( vecdBetween ); 01532 Winsorize( vecdBackground ); 01533 dBackground = (float)CStatistics::Average( vecdBackground ); 01534 Winsorize( vecdWithin ); 01535 dWithin = (float)CStatistics::Average( vecdWithin ); 01536 01537 return true; } 01538 01539 bool CBNServer::GetWithin( const vector<size_t>& veciGenes, size_t iContext, float* pdWithin, 01540 vector<float>* pvecdWithin ) const { 01541 // Alternative: calculate priors in the current context rather than the global context 01542 iContext = 0; 01543 const CBayesNetMinimal& BNet = GetBN( iContext ); 01544 size_t i, j; 01545 vector<unsigned char> vecbData; 01546 vector<float> vecdWithin; 01547 float d, dFrac; 01548 01549 dFrac = GetFraction( veciGenes.size( ) ); 01550 if( pdWithin ) { 01551 i = veciGenes.size( ) + 1; 01552 vecdWithin.reserve( (size_t)( dFrac * i * i / 2 ) + c_iOverestimate ); } 01553 for( i = 0; i < veciGenes.size( ); ++i ) { 01554 if( IsFraction( dFrac ) ) 01555 continue; 01556 if( !GetDatabase( ).Get( veciGenes[ i ], veciGenes, vecbData, true ) ) 01557 return false; 01558 d = GetPrior( iContext ); 01559 PushPointer( pdWithin, vecdWithin, d ); 01560 PushPointer( pvecdWithin, d ); 01561 for( j = ( i + 1 ); j < veciGenes.size( ); ++j ) { 01562 if( IsFraction( dFrac ) ) 01563 continue; 01564 if( !CMeta::IsNaN( d = BNet.Evaluate( vecbData, j * GetDatachunk( ) ) ) ) { 01565 PushPointer( pdWithin, vecdWithin, d ); 01566 PushPointer( pvecdWithin, d ); } } } 01567 if( pdWithin ) { 01568 Winsorize( vecdWithin ); 01569 SetPointer( pdWithin, (float)CStatistics::Average( vecdWithin ) ); } 01570 01571 return true; }