Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "bayesnet.h" 00024 #include "dat.h" 00025 #include "dataset.h" 00026 #include "meta.h" 00027 00028 #ifndef NO_SMILE 00029 00030 #if ( defined(_MSC_VER) && defined(_DEBUG) ) 00031 extern "C" void __cdecl _invalid_parameter_noinfo( ) { } 00032 #endif // ( defined(_MSC_VER) && defined(_DEBUG) ) 00033 00034 namespace Sleipnir { 00035 00036 const char CBayesNetSmileImpl::c_szGaussian[] = "gaussian"; 00037 00038 bool CBayesNetSmileImpl::GetCPT( DSL_node* pNode, CDataMatrix& MatCPT ) { 00039 DSL_Dmatrix* pMat; 00040 DSL_intArray veciCoord; 00041 00042 pMat = pNode->Definition( )->GetMatrix( ); 00043 const DSL_intArray& veciDims = pMat->GetDimensions( ); 00044 00045 if( veciDims.GetSize( ) > 2 ) 00046 return false; 00047 pMat->IndexToCoordinates( 0, veciCoord ); 00048 if( veciDims.GetSize( ) == 1 ) { 00049 MatCPT.Initialize( veciDims[ 0 ], 1 ); 00050 for( veciCoord[ 0 ] = 0; veciCoord[ 0 ] < veciDims[ 0 ]; ++veciCoord[ 0 ] ) 00051 MatCPT.Set( veciCoord[ 0 ], 0, (float)(*pMat)[ veciCoord ] ); 00052 return true; } 00053 00054 MatCPT.Initialize( veciDims[ 1 ], veciDims[ 0 ] ); 00055 for( veciCoord[ 0 ] = 0; veciCoord[ 0 ] < veciDims[ 0 ]; ++veciCoord[ 0 ] ) 00056 for( veciCoord[ 1 ] = 0; veciCoord[ 1 ] < veciDims[ 1 ]; ++veciCoord[ 1 ] ) 00057 MatCPT.Set( veciCoord[ 1 ], veciCoord[ 0 ], (float)(*pMat)[ veciCoord ] ); 00058 return true; } 00059 00060 bool CBayesNetSmileImpl::IsGaussian( const DSL_network& BayesNet ) { 00061 int i; 00062 00063 if( ( i = ((DSL_network&)BayesNet).UserProperties( ).FindProperty( c_szGaussian ) ) < 0 ) 00064 return false; 00065 00066 return !!atoi( ((DSL_network&)BayesNet).UserProperties( ).GetPropertyValue( i ) ); } 00067 00068 bool CBayesNetSmileImpl::IsNaive( const DSL_network& BayesNet ) { 00069 int i; 00070 00071 { 00072 const DSL_intArray& veciParents = ((DSL_network&)BayesNet).GetNode( 0 )->Parents( ); 00073 00074 if( veciParents.NumItems( ) != 0 ) 00075 return false; 00076 } 00077 for( i = 1; i < BayesNet.GetNumberOfNodes( ); ++i ) { 00078 const DSL_intArray& veciParents = ((DSL_network&)BayesNet).GetNode( i )->Parents( ); 00079 00080 if( ( veciParents.NumItems( ) > 1 ) || ( veciParents[ 0 ] != 0 ) ) 00081 return false; } 00082 00083 return true; } 00084 00085 CBayesNetSmileImpl::CBayesNetSmileImpl( bool fGroup ) : CBayesNetImpl(fGroup), 00086 m_fSmileNet(false), m_pDefaults(NULL) { } 00087 00098 CBayesNetSmile::CBayesNetSmile( bool fGroup ) : CBayesNetSmileImpl( fGroup ) { } 00099 00100 bool CBayesNetSmileImpl::LearnGrouped( const IDataset* pData, size_t iIterations, bool fZero ) { 00101 size_t i, j, iIter, iDatum; 00102 string strCur; 00103 TMapData mapData; 00104 TMapData::iterator iterDatum; 00105 DSL_Dmatrix* pMat; 00106 vector<DSL_Dmatrix*> vecpExpected; 00107 DSL_intArray veciCoords; 00108 vector<bool> vecfHidden; 00109 00110 vecfHidden.resize( pData->GetExperiments( ) ); 00111 for( i = 0; i < vecfHidden.size( ); ++i ) 00112 vecfHidden[ i ] = pData->IsHidden( i ); 00113 EncodeData( pData, mapData ); 00114 vecpExpected.resize( m_SmileNet.GetNumberOfNodes( ) ); 00115 for( i = 0; i < vecpExpected.size( ); ++i ) 00116 vecpExpected[ i ] = new DSL_Dmatrix( *m_SmileNet.GetNode( (int)i )->Definition( 00117 )->GetMatrix( ) ); 00118 for( iIter = 0; iIter < iIterations; ++iIter ) { 00119 for( iDatum = i = 0; i < vecpExpected.size( ); ++i ) 00120 vecpExpected[ i ]->FillWith( 0 ); 00121 for( iterDatum = mapData.begin( ); iterDatum != mapData.end( ); ++iterDatum ) { 00122 if( !( iDatum++ % 50 ) ) 00123 g_CatSleipnir( ).notice( "CBayesNetSmile::LearnGrouped( %d, %d ) iteration %d, datum %d/%d", 00124 iIterations, fZero, iIter, ( iDatum - 1 ), mapData.size( ) ); 00125 FillCPTs( vecfHidden, iterDatum->first, fZero, true ); 00126 m_SmileNet.UpdateBeliefs( ); 00127 00128 for( i = 0; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) 00129 LearnExpected( m_SmileNet.GetNode( (int)i ), vecpExpected[ i ], 00130 iterDatum->second ); } 00131 for( i = 0; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) { 00132 pMat = m_SmileNet.GetNode( (int)i )->Definition( )->GetMatrix( ); 00133 for( pMat->IndexToCoordinates( (int)( j = 0 ), veciCoords ); 00134 j != DSL_OUT_OF_RANGE; j = pMat->NextCoordinates( veciCoords ) ) 00135 pMat->Subscript( veciCoords ) = vecpExpected[ i ]->Subscript( veciCoords ); 00136 pMat->Normalize( ); } } 00137 for( i = 0; i < vecpExpected.size( ); ++i ) 00138 delete vecpExpected[ i ]; 00139 00140 return true; } 00141 00142 bool CBayesNetSmileImpl::FillCPTs( const IDataset* pData, size_t iOne, size_t iTwo, bool fZero, bool fLearn ) { 00143 size_t i, iVal, iZero; 00144 int iProp; 00145 00146 if( !pData->IsExample( iOne, iTwo ) || ( fLearn && ( pData->GetDiscrete( iOne, iTwo, 0 ) == -1 ) ) ) 00147 return false; 00148 00149 m_SmileNet.ClearAllEvidence( ); 00150 for( i = fLearn ? 0 : 1; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) { 00151 if( pData->IsHidden( i ) ) 00152 continue; 00153 00154 DSL_userProperties& Props = m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( ); 00155 00156 if( ( iProp = Props.FindProperty( c_szZero ) ) < 0 ) 00157 iZero = fZero ? 0 : -1; 00158 else 00159 iZero = atoi( Props.GetPropertyValue( iProp ) ); 00160 00161 if( ( iVal = pData->GetDiscrete( iOne, iTwo, i ) ) == -1 ) { 00162 if( iZero == -1 ) 00163 continue; 00164 iVal = iZero; } 00165 m_SmileNet.GetNode( (int)i )->Value( )->SetEvidence( (int)iVal ); } 00166 00167 return true; } 00168 00169 bool CBayesNetSmileImpl::FillCPTs( const std::vector<bool>& vecfHidden, const std::string& strDatum, 00170 bool fZero, bool fLearn, bool fAll ) { 00171 size_t i, iVal, iZero; 00172 int iProp; 00173 00174 if( !fAll && fLearn && !IsAnswer( strDatum ) ) 00175 return false; 00176 00177 m_SmileNet.ClearAllEvidence( ); 00178 for( i = ( fAll || fLearn ) ? 0 : 1; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) { 00179 if( vecfHidden[ i ] ) 00180 continue; 00181 00182 DSL_userProperties& Props = m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( ); 00183 00184 if( ( iProp = Props.FindProperty( c_szZero ) ) < 0 ) 00185 iZero = fZero ? 0 : -1; 00186 else 00187 iZero = atoi( Props.GetPropertyValue( iProp ) ); 00188 00189 if( strDatum[ i ] == c_cMissing ) { 00190 if( iZero == -1 ) 00191 continue; 00192 iVal = iZero; } 00193 else 00194 iVal = strDatum[ i ] - c_cBase; 00195 m_SmileNet.GetNode( (int)i )->Value( )->SetEvidence( (int)iVal ); } 00196 00197 return true; } 00198 00199 bool CBayesNetSmileImpl::FillCPTs( const vector<bool>& vecfHidden, const vector<unsigned char>& vecbDatum, 00200 bool fZero, bool fLearn, bool fNoData ) { 00201 size_t i, iVal, iZero; 00202 int iProp; 00203 00204 if( fLearn && !vecbDatum[ 0 ] ) 00205 return false; 00206 00207 m_SmileNet.ClearAllEvidence( ); 00208 for( i = fLearn ? 0 : 1; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) { 00209 if( vecfHidden[ i ] ) 00210 continue; 00211 00212 DSL_userProperties& Props = m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( ); 00213 00214 if( ( iProp = Props.FindProperty( c_szZero ) ) < 0 ) 00215 iZero = fZero ? 0 : -1; 00216 else 00217 iZero = atoi( Props.GetPropertyValue( iProp ) ); 00218 00219 if( !vecbDatum[ i ] ) { 00220 if( fNoData || ( iZero == -1 ) ) 00221 continue; 00222 iVal = iZero; } 00223 else 00224 iVal = vecbDatum[ i ] - 1; 00225 m_SmileNet.GetNode( (int)i )->Value( )->SetEvidence( (int)iVal ); } 00226 00227 return true; } 00228 00229 bool CBayesNetSmileImpl::LearnUngrouped( const IDataset* pData, size_t iIterations, bool fZero ) { 00230 size_t iIter, i, j, k; 00231 DSL_Dmatrix* pMat; 00232 vector<DSL_Dmatrix*> vecpExpected; 00233 DSL_intArray veciCoords; 00234 00235 if( !m_fSmileNet || IsContinuous( ) ) 00236 return false; 00237 00238 vecpExpected.resize( m_SmileNet.GetNumberOfNodes( ) ); 00239 for( i = 0; i < vecpExpected.size( ); ++i ) 00240 vecpExpected[ i ] = new DSL_Dmatrix( *m_SmileNet.GetNode( (int)i )->Definition( 00241 )->GetMatrix( ) ); 00242 for( iIter = 0; iIter < iIterations; ++iIter ) { 00243 for( i = 0; i < vecpExpected.size( ); ++i ) 00244 vecpExpected[ i ]->FillWith( 0 ); 00245 for( i = 0; i < pData->GetGenes( ); ++i ) { 00246 if( !( i % 50 ) ) 00247 g_CatSleipnir( ).notice( "CBayesNetSmile::LearnUngrouped( %d, %d ) iteration %d, gene %d/%d", 00248 iIterations, fZero, iIter, i, pData->GetGenes( ) ); 00249 for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) { 00250 if( !FillCPTs( pData, i, j, fZero, true ) ) 00251 continue; 00252 m_SmileNet.UpdateBeliefs( ); 00253 00254 for( k = 0; k < (size_t)m_SmileNet.GetNumberOfNodes( ); ++k ) 00255 LearnExpected( m_SmileNet.GetNode( (int)k ), vecpExpected[ k ] ); } } 00256 for( i = 0; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) { 00257 pMat = m_SmileNet.GetNode( (int)i )->Definition( )->GetMatrix( ); 00258 for( pMat->IndexToCoordinates( (int)( j = 0 ), veciCoords ); 00259 j != DSL_OUT_OF_RANGE; j = pMat->NextCoordinates( veciCoords ) ) 00260 pMat->Subscript( veciCoords ) = vecpExpected[ i ]->Subscript( veciCoords ); 00261 pMat->Normalize( ); } } 00262 for( i = 0; i < vecpExpected.size( ); ++i ) 00263 delete vecpExpected[ i ]; 00264 00265 return true; } 00266 00267 bool CBayesNetSmile::Learn( const IDataset* pData, size_t iIterations, bool fZero, bool fELR ) { 00268 00269 if( fELR ) 00270 return LearnELR( pData, iIterations, fZero ); 00271 if( IsNaive( ) ) 00272 return LearnNaive( pData, fZero ); 00273 00274 return ( m_fGroup ? LearnGrouped( pData, iIterations, fZero ) : 00275 LearnUngrouped( pData, iIterations, fZero ) ); } 00276 00277 void CBayesNetSmileImpl::LearnExpected( DSL_node* pNode, DSL_Dmatrix* pExpected, 00278 size_t iWeight ) { 00279 int iEvid, iLast, i, j; 00280 DSL_intArray veciParents, veciCoords; 00281 DSL_Dmatrix* pDef; 00282 DSL_nodeValue* pVal; 00283 double dProd; 00284 00285 veciParents = pNode->Parents( ); 00286 pDef = pNode->Definition( )->GetMatrix( ); 00287 pVal = pNode->Value( ); 00288 iEvid = pVal->GetEvidence( ); 00289 for( pDef->IndexToCoordinates( i = 0, veciCoords ); i != DSL_OUT_OF_RANGE; 00290 i = pDef->NextCoordinates( veciCoords ) ) { 00291 iLast = veciCoords[ veciCoords.GetSize( ) - 1 ]; 00292 if( veciParents.NumItems( ) ) { 00293 if( iEvid == DSL_OUT_OF_RANGE ) { 00294 dProd = pVal->GetMatrix( )->Subscript( iLast ); 00295 pVal->SetEvidence( iLast ); 00296 m_SmileNet.UpdateBeliefs( ); } 00297 else if( iLast == iEvid ) 00298 dProd = 1; 00299 else 00300 continue; 00301 00302 for( j = 0; j < veciParents.NumItems( ); ++j ) 00303 dProd *= m_SmileNet.GetNode( veciParents[ j ] )->Value( )->GetMatrix( 00304 )->Subscript( veciCoords[ j ] ); 00305 if( iEvid == DSL_OUT_OF_RANGE ) { 00306 pVal->ClearEvidence( ); 00307 m_SmileNet.UpdateBeliefs( ); } } 00308 else 00309 dProd = pVal->GetMatrix( )->Subscript( veciCoords[ 0 ] ); 00310 00311 pExpected->Subscript( veciCoords ) += dProd * iWeight; } } 00312 00313 #ifdef PNL_ENABLED 00314 00325 bool CBayesNetSmile::Convert( CBayesNetPNL& BNPNL ) const { 00326 00327 if( !m_fSmileNet ) 00328 return false; 00329 00330 return( ConvertGraph( BNPNL ) && ConvertCPTs( BNPNL ) ); } 00331 00332 #endif // PNL_ENABLED 00333 00334 void CBayesNetSmile::GetNodes( std::vector<std::string>& vecstrNodes ) const { 00335 int i; 00336 00337 if( m_fSmileNet ) 00338 for( i = 0; i < m_SmileNet.GetNumberOfNodes( ); ++i ) 00339 vecstrNodes.push_back( m_SmileNet.GetNode( i )->Info( ).Header( ).GetId( ) ); } 00340 00341 bool CBayesNetSmileImpl::Evaluate( const IDataset* pData, CDat* pDatOut, TVecVecD* pvecvecdOut, 00342 bool fZero ) const { 00343 size_t i, j, k, iOne, iTwo; 00344 DSL_nodeValue* pValue; 00345 string strCur; 00346 map<string,float> mapData; 00347 map<string,float>::iterator iterDatum; 00348 vector<bool> vecfHidden; 00349 bool fZeroable; 00350 float dPrior; 00351 vector<size_t> veciGenes; 00352 00353 if( !m_fSmileNet || IsContinuous( ) ) 00354 return false; 00355 00356 vecfHidden.resize( pData->GetExperiments( ) ); 00357 for( i = 0; i < vecfHidden.size( ); ++i ) 00358 vecfHidden[ i ] = pData->IsHidden( i ); 00359 if( !( fZeroable = fZero ) ) 00360 for( i = 1; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) { 00361 DSL_userProperties& Props = m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( ); 00362 if( Props.FindProperty( c_szZero ) >= 0 ) { 00363 fZeroable = true; 00364 break; } } 00365 if( pDatOut ) { 00366 veciGenes.resize( pData->GetGenes( ) ); 00367 for( i = 0; i < pData->GetGenes( ); ++i ) 00368 veciGenes[ i ] = pDatOut->GetGene( pData->GetGene( i ) ); 00369 ((CBayesNetSmileImpl*)this)->m_SmileNet.UpdateBeliefs( ); 00370 pValue = m_SmileNet.GetNode( 0 )->Value( ); 00371 dPrior = (float)(*pValue->GetMatrix( ))[ 0 ]; } 00372 for( i = 0; i < pData->GetGenes( ); ++i ) { 00373 if( !( i % 250 ) ) 00374 g_CatSleipnir( ).notice( "CBayesNetSmile::Evaluate( %d ) %d/%d", fZero, i, 00375 pData->GetGenes( ) ); 00376 if( pDatOut && !pvecvecdOut && ( ( iOne = veciGenes[ i ] ) == -1 ) ) 00377 continue; 00378 for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) { 00379 if( pDatOut && !pvecvecdOut && ( ( iTwo = veciGenes[ j ] ) == -1 ) ) 00380 continue; 00381 if( !( fZeroable || pData->IsExample( i, j ) ) ) { 00382 if( pDatOut && ( iOne != -1 ) && ( iTwo != -1 ) ) 00383 pDatOut->Set( iOne, iTwo, dPrior ); 00384 continue; } 00385 strCur = EncodeDatum( pData, i, j ); 00386 if( m_fGroup && ( ( iterDatum = mapData.find( strCur ) ) != mapData.end( ) ) ) { 00387 if( pDatOut && ( iOne != -1 ) && ( iTwo != -1 ) ) 00388 pDatOut->Set( iOne, iTwo, iterDatum->second ); 00389 if( pvecvecdOut ) { 00390 pvecvecdOut->resize( pvecvecdOut->size( ) + 1 ); 00391 (*pvecvecdOut)[ pvecvecdOut->size( ) - 1 ].push_back( 00392 iterDatum->second ); } 00393 continue; } 00394 00395 ((CBayesNetSmileImpl*)this)->FillCPTs( vecfHidden, strCur, fZero, false ); 00396 ((CBayesNetSmileImpl*)this)->m_SmileNet.UpdateBeliefs( ); 00397 pValue = m_SmileNet.GetNode( 0 )->Value( ); 00398 if( m_fGroup ) 00399 mapData[ strCur ] = (float)(*pValue->GetMatrix( ))[ 0 ]; 00400 if( pvecvecdOut ) { 00401 pvecvecdOut->resize( pvecvecdOut->size( ) + 1 ); 00402 { 00403 vector<float>& vecdCur = (*pvecvecdOut)[ pvecvecdOut->size( ) - 1 ]; 00404 00405 for( k = 0; ( k + 1 ) < (size_t)pValue->GetSize( ); ++k ) 00406 vecdCur.push_back( (float)(*pValue->GetMatrix( ))[ (int)k ] ); 00407 } } 00408 if( pDatOut && ( iOne != -1 ) && ( iTwo != -1 ) ) 00409 pDatOut->Set( iOne, iTwo, (float)(*pValue->GetMatrix( ))[ 0 ] ); } } 00410 00411 return true; } 00412 00413 bool CBayesNetSmile::Evaluate( const vector<unsigned char>& vecbDatum, vector<float>& vecdResults, bool fZero, 00414 size_t iNode, bool fIgnoreMissing ) const { 00415 vector<bool> vecfHidden; 00416 DSL_nodeValue* pValue; 00417 size_t i; 00418 00419 if( !m_fSmileNet || IsContinuous( ) ) 00420 return false; 00421 00422 vecfHidden.resize( vecbDatum.size( ) ); 00423 for( i = 0; i < vecfHidden.size( ); ++i ) 00424 vecfHidden[ i ] = false; 00425 ((CBayesNetSmile*)this)->FillCPTs( vecfHidden, vecbDatum, fZero, false, fIgnoreMissing ); 00426 ((CBayesNetSmile*)this)->m_SmileNet.UpdateBeliefs( ); 00427 pValue = m_SmileNet.GetNode( iNode )->Value( ); 00428 for( i = 0; ( i + 1 ) < (size_t)pValue->GetSize( ); ++i ) 00429 vecdResults.push_back( (float)(*pValue->GetMatrix( ))[ (int)i ] ); 00430 00431 return true; } 00432 00453 float CBayesNetSmile::Evaluate( size_t iNode, unsigned char bValue ) const { 00454 vector<bool> vecfHidden; 00455 vector<unsigned char> vecbDatum; 00456 DSL_nodeValue* pValue; 00457 size_t i; 00458 00459 if( !m_fSmileNet || IsContinuous( ) ) 00460 return CMeta::GetNaN( ); 00461 00462 vecbDatum.resize( m_SmileNet.GetNumberOfNodes( ) ); 00463 vecbDatum[ iNode ] = bValue + 1; 00464 vecfHidden.resize( vecbDatum.size( ) ); 00465 for( i = 0; i < vecbDatum.size( ); ++i ) 00466 vecfHidden[ i ] = ( i != iNode ); 00467 ((CBayesNetSmile*)this)->FillCPTs( vecfHidden, vecbDatum, false, false ); 00468 ((CBayesNetSmile*)this)->m_SmileNet.UpdateBeliefs( ); 00469 pValue = m_SmileNet.GetNode( 0 )->Value( ); 00470 00471 return (float)(*pValue->GetMatrix( ))[ 0 ]; } 00472 00483 unsigned char CBayesNetSmile::GetDefault( size_t iNode ) const { 00484 int i; 00485 00486 if( !m_fSmileNet || 00487 ( ( i = ((DSL_network&)m_SmileNet).GetNode( 00488 iNode )->Info( ).UserProperties( ).FindProperty( c_szZero ) ) < 0 ) ) 00489 return -1; 00490 00491 return atoi( ((DSL_network&)m_SmileNet).GetNode( 00492 iNode )->Info( ).UserProperties( ).GetPropertyValue( i ) ); } 00493 00494 void CBayesNetSmile::Randomize( ) { 00495 int i; 00496 00497 if( !m_fSmileNet ) 00498 return; 00499 00500 for( i = m_SmileNet.GetFirstNode( ); i != DSL_OUT_OF_RANGE; 00501 i = m_SmileNet.GetNextNode( i ) ) 00502 Randomize( i ); } 00503 00504 void CBayesNetSmile::Randomize( size_t iNode ) { 00505 DSL_Dmatrix* pMat; 00506 00507 if( !m_fSmileNet ) 00508 return; 00509 00510 pMat = m_SmileNet.GetNode( (int)iNode )->Definition( )->GetMatrix( ); 00511 00512 { 00513 DSL_sysCoordinates Coords( *pMat ); 00514 00515 Coords.GoFirst( ); 00516 do 00517 Coords.CheckedValue( ) = (float)rand( ) / RAND_MAX; 00518 while( Coords.Next( ) != DSL_OUT_OF_RANGE ); 00519 } 00520 00521 pMat->Normalize( ); } 00522 00523 void CBayesNetSmile::Reverse( size_t iNode ) { 00524 int iCoords; 00525 DSL_Dmatrix* pMat; 00526 00527 if( !m_fSmileNet ) 00528 return; 00529 00530 pMat = m_SmileNet.GetNode( (int)iNode )->Definition( )->GetMatrix( ); 00531 { 00532 DSL_sysCoordinates Coords( *pMat ); 00533 00534 iCoords = pMat->GetSizeOfDimension( pMat->GetLastDimension( ) ); 00535 Coords.GoFirst( ); 00536 do { 00537 DSL_intArray veciCoords = Coords.Coordinates( ); 00538 int iCoord; 00539 double d; 00540 00541 iCoord = veciCoords[ veciCoords.GetSize( ) - 1 ]; 00542 if( iCoord >= ( iCoords / 2 ) ) 00543 continue; 00544 d = Coords.CheckedValue( ); 00545 veciCoords[ veciCoords.GetSize( ) - 1 ] = iCoords - iCoord - 1; 00546 Coords.CheckedValue( ) = (*pMat)[ veciCoords ]; 00547 (*pMat)[ veciCoords ] = d; } 00548 while( Coords.Next( ) != DSL_OUT_OF_RANGE ); 00549 } } 00550 00551 bool CBayesNetSmileImpl::LearnNaive( const IDataset* pData, bool fZero ) { 00552 vector<vector<size_t> > vecveciCounts; 00553 size_t i, j, k, iAnswer, iAnswers, iVal, iCount; 00554 DSL_nodeDefinition* pDef; 00555 DSL_Dmatrix* pMat; 00556 DSL_Dmatrix* pDefault; 00557 DSL_intArray veciCoords; 00558 vector<size_t> veciZeros; 00559 int iProp; 00560 bool fZeroable, fFallback; 00561 float dLambda; 00562 double dCount; 00563 00564 vecveciCounts.resize( m_SmileNet.GetNumberOfNodes( ) ); 00565 iAnswers = m_SmileNet.GetNode( 0 )->Definition( )->GetNumberOfOutcomes( ); 00566 vecveciCounts[ 0 ].resize( iAnswers ); 00567 for( i = 1; i < vecveciCounts.size( ); ++i ) 00568 vecveciCounts[ i ].resize( iAnswers * 00569 m_SmileNet.GetNode( (int)i )->Definition( )->GetNumberOfOutcomes( ) ); 00570 veciZeros.resize( m_SmileNet.GetNumberOfNodes( ) ); 00571 fZeroable = fZero; 00572 for( i = 0; i < veciZeros.size( ); ++i ) { 00573 DSL_userProperties& Props = m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( ); 00574 00575 if( ( iProp = Props.FindProperty( c_szZero ) ) < 0 ) 00576 veciZeros[ i ] = fZero ? 0 : -1; 00577 else { 00578 fZeroable = true; 00579 veciZeros[ i ] = atoi( Props.GetPropertyValue( iProp ) ); } } 00580 for( iCount = i = 0; i < pData->GetGenes( ); ++i ) 00581 for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) 00582 if( ( fZeroable || pData->IsExample( i, j ) ) && 00583 ( ( iAnswer = pData->GetDiscrete( i, j, 0 ) ) != -1 ) ) { 00584 vecveciCounts[ 0 ][ iAnswer ]++; 00585 iCount++; 00586 for( k = 1; k < pData->GetExperiments( ); ++k ) { 00587 if( ( iVal = pData->GetDiscrete( i, j, k ) ) == -1 ) { 00588 if( veciZeros[ k ] == -1 ) 00589 continue; 00590 iVal = veciZeros[ k ]; } 00591 //iVal = iVal % m_SmileNet.GetNode( k )->Definition( )->GetNumberOfOutcomes( ); 00592 vecveciCounts[ k ][ ( iVal * iAnswers ) + iAnswer ]++; } } 00593 00594 fFallback = m_pDefaults && ( iCount < c_iMinimum ); 00595 pMat = m_SmileNet.GetNode( 0 )->Definition( )->GetMatrix( ); 00596 for( i = 0; i < iAnswers; ++i ) 00597 (*pMat)[ (int)i ] = ( j = vecveciCounts[ 0 ][ (int)i ] ) ? j : ( fFallback ? 0 : 1 ); 00598 if( fFallback ) { 00599 g_CatSleipnir( ).warn( "CBayesNetSmile::LearnNaive( %d ) insufficient data for node %s", 00600 fZero, m_SmileNet.GetNode( 0 )->Info( ).Header( ).GetId( ) ); 00601 dLambda = 1 - ( (float)iCount / c_iMinimum ); 00602 pMat->Normalize( ); 00603 pDefault = m_pDefaults->m_SmileNet.GetNode( 0 )->Definition( )->GetMatrix( ); 00604 for( i = 0; i < iAnswers; ++i ) 00605 (*pMat)[ (int)i ] = ( ( 1 - dLambda ) * (*pMat)[ (int)i ] ) + 00606 ( dLambda * (*pDefault)[ (int)i ] ); } 00607 pMat->Normalize( ); 00608 for( i = 1; i < vecveciCounts.size( ); ++i ) { 00609 pDef = m_SmileNet.GetNode( (int)i )->Definition( ); 00610 pMat = pDef->GetMatrix( ); 00611 pMat->IndexToCoordinates( 0, veciCoords ); 00612 pDefault = m_pDefaults ? m_pDefaults->m_SmileNet.GetNode( (int)i )->Definition( )->GetMatrix( ) : NULL; 00613 for( j = 0; j < iAnswers; ++j ) { 00614 veciCoords[ 0 ] = (int)j; 00615 for( k = 0; k < (size_t)pDef->GetNumberOfOutcomes( ); ++k ) { 00616 veciCoords[ 1 ] = (int)k; 00617 (*pMat)[ veciCoords ] = vecveciCounts[ i ][ ( k * iAnswers ) + j ]; } } 00618 if( pDefault ) 00619 for( j = 0; j < iAnswers; ++j ) { 00620 veciCoords[ 0 ] = (int)j; 00621 for( dCount = k = 0; k < (size_t)pDef->GetNumberOfOutcomes( ); ++k ) { 00622 veciCoords[ 1 ] = (int)k; 00623 dCount += (*pMat)[ veciCoords ]; } 00624 if( dCount < c_iMinimum ) { 00625 g_CatSleipnir( ).warn( "CBayesNetSmile::LearnNaive( %d ) insufficient data for node %s, column %d", 00626 fZero, m_SmileNet.GetNode( (int)i )->Info( ).Header( ).GetId( ), j ); 00627 dLambda = 1 - ( (float)dCount / c_iMinimum ); 00628 for( k = 0; k < (size_t)pDef->GetNumberOfOutcomes( ); ++k ) { 00629 veciCoords[ 1 ] = (int)k; 00630 (*pMat)[ veciCoords ] = ( dCount ? ( ( 1 - dLambda ) * (*pMat)[ veciCoords ] / 00631 dCount ) : 0 ) + ( dLambda * (*pDefault)[ veciCoords ] ); } } 00632 else 00633 for( k = 0; k < (size_t)pDef->GetNumberOfOutcomes( ); ++k ) { 00634 veciCoords[ 1 ] = (int)k; 00635 if( !(*pMat)[ veciCoords ] ) 00636 (*pMat)[ veciCoords ] = 1; } } 00637 else 00638 for( j = 0; j < iAnswers; ++j ) { 00639 veciCoords[ 0 ] = (int)j; 00640 for( k = 0; k < (size_t)pDef->GetNumberOfOutcomes( ); ++k ) { 00641 veciCoords[ 1 ] = (int)k; 00642 if( !(*pMat)[ veciCoords ] ) 00643 (*pMat)[ veciCoords ] = 1; } } 00644 pMat->Normalize( ); } 00645 00646 return true; } 00647 00648 bool CBayesNetSmile::Evaluate( const CPCLPair& PCLData, CPCL& PCLResults, bool fZero, int iAlgorithm ) const { 00649 size_t i, j, k, iExp; 00650 string strCur; 00651 map<string, vector<float> > mapData; 00652 map<string, vector<float> >::iterator iterDatum; 00653 vector<size_t> veciMap; 00654 vector<bool> vecfHidden; 00655 int iPrev; 00656 00657 if( !m_fSmileNet || IsContinuous( ) ) 00658 return false; 00659 00660 iPrev = ((CBayesNetSmile*)this)->m_SmileNet.GetDefaultBNAlgorithm( ); 00661 veciMap.resize( m_SmileNet.GetNumberOfNodes( ) ); 00662 vecfHidden.resize( veciMap.size( ) ); 00663 for( i = 0; i < veciMap.size( ); ++i ) { 00664 veciMap[ i ] = -1; 00665 vecfHidden[ i ] = true; 00666 for( j = 0; j < PCLData.GetExperiments( ); ++j ) 00667 if( PCLData.GetExperiment( j ) == m_SmileNet.GetNode( (int)i )->Info( ).Header( ).GetId( ) ) { 00668 vecfHidden[ i ] = false; 00669 veciMap[ i ] = (unsigned int)j; 00670 break; } } 00671 ((CBayesNetSmile*)this)->m_SmileNet.SetDefaultBNAlgorithm( iAlgorithm ); 00672 for( i = 0; i < PCLResults.GetGenes( ); ++i ) { 00673 if( !( i % 1 ) ) 00674 g_CatSleipnir( ).notice( "CBayesNetSmile::Evaluate( %d ) %d/%d", fZero, i, 00675 PCLResults.GetGenes( ) ); 00676 strCur = EncodeDatum( PCLData, PCLData.GetGene( PCLResults.GetGene( i ) ), veciMap ); 00677 if( m_fGroup && ( ( iterDatum = mapData.find( strCur ) ) != mapData.end( ) ) ) { 00678 for( j = 0; j < iterDatum->second.size( ); ++j ) 00679 PCLResults.Set( i, j, iterDatum->second[ j ] ); 00680 continue; } 00681 00682 ((CBayesNetSmile*)this)->FillCPTs( vecfHidden, strCur, fZero, false, true ); 00683 ((CBayesNetSmile*)this)->m_SmileNet.UpdateBeliefs( ); 00684 for( iExp = j = 0; j < veciMap.size( ); ++j ) { 00685 DSL_Dmatrix* pMatrix; 00686 00687 if( veciMap[ j ] != -1 ) 00688 continue; 00689 pMatrix = m_SmileNet.GetNode( (int)j )->Value( )->GetMatrix( ); 00690 for( k = 0; k < GetValues( j ); ++k ) 00691 PCLResults.Set( i, iExp++, (float)(*pMatrix)[ (int)k ] ); } 00692 if( m_fGroup ) { 00693 vector<float> vecfCur; 00694 00695 vecfCur.resize( PCLResults.GetExperiments( ) ); 00696 for( j = 0; j < vecfCur.size( ); ++j ) 00697 vecfCur[ j ] = PCLResults.Get( i, j ); 00698 mapData[ strCur ] = vecfCur; } } 00699 ((CBayesNetSmile*)this)->m_SmileNet.SetDefaultBNAlgorithm( iPrev ); 00700 00701 return true; } 00702 00725 bool CBayesNetSmile::Open( const std::vector<std::string>& vecstrFiles, size_t iValues ) { 00726 size_t i, j; 00727 DSL_stringArray vecstrOutcomes; 00728 string strCur; 00729 00730 m_fSmileNet = true; 00731 m_SmileNet.DeleteAllNodes( ); 00732 m_SmileNet.AddNode( DSL_CPT, (char*)c_szFR ); 00733 vecstrOutcomes.Add( ( (string)c_szFR + "No" ).c_str( ) ); 00734 vecstrOutcomes.Add( ( (string)c_szFR + "Yes" ).c_str( ) ); 00735 m_SmileNet.GetNode( 0 )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes ); 00736 for( i = 0; i < vecstrFiles.size( ); ++i ) { 00737 m_SmileNet.AddNode( DSL_CPT, (char*)( strCur = 00738 CMeta::Filename( CMeta::Deextension( vecstrFiles[ i ] ) ) ).c_str( ) ); 00739 vecstrOutcomes.Flush( ); 00740 for( j = 0; j < iValues; ++j ) { 00741 char acNum[ 8 ]; 00742 00743 #pragma warning( disable : 4996 ) 00744 sprintf( acNum, "%02d", j ); 00745 #pragma warning( default : 4996 ) 00746 vecstrOutcomes.Add( ( strCur + acNum ).c_str( ) ); } 00747 m_SmileNet.GetNode( (int)i + 1 )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes ); 00748 m_SmileNet.AddArc( 0, (int)i + 1 ); } 00749 00750 return true; } 00751 00779 bool CBayesNetSmile::Open( const IDataset* pData, const std::vector<std::string>& vecstrNames, 00780 const vector<size_t>& veciDefaults ) { 00781 size_t i, j; 00782 DSL_stringArray vecstrOutcomes; 00783 char acNum[ 8 ]; 00784 00785 if( pData->GetExperiments( ) != vecstrNames.size( ) ) 00786 return false; 00787 00788 m_fSmileNet = true; 00789 m_SmileNet.DeleteAllNodes( ); 00790 m_SmileNet.AddNode( DSL_CPT, (char*)c_szFR ); 00791 vecstrOutcomes.Add( ( (string)c_szFR + "No" ).c_str( ) ); 00792 vecstrOutcomes.Add( ( (string)c_szFR + "Yes" ).c_str( ) ); 00793 m_SmileNet.GetNode( 0 )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes ); 00794 for( i = 1; i < pData->GetExperiments( ); ++i ) { 00795 m_SmileNet.AddNode( DSL_CPT, (char*)vecstrNames[ i ].c_str( ) ); 00796 vecstrOutcomes.Flush( ); 00797 for( j = 0; j < pData->GetBins( i ); ++j ) { 00798 #pragma warning( disable : 4996 ) 00799 sprintf( acNum, "%02d", j ); 00800 #pragma warning( default : 4996 ) 00801 vecstrOutcomes.Add( ( vecstrNames[ i ] + acNum ).c_str( ) ); } 00802 m_SmileNet.GetNode( (int)i )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes ); 00803 if( veciDefaults[ i ] != -1 ) { 00804 #pragma warning( disable : 4996 ) 00805 sprintf( acNum, "%d", veciDefaults[ i ] ); 00806 #pragma warning( default : 4996 ) 00807 m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( ).AddProperty( c_szZero, acNum ); } 00808 m_SmileNet.AddArc( 0, (int)i ); } 00809 00810 return true; } 00811 00836 bool CBayesNetSmile::Open( const CBayesNetSmile& BNPrior, const vector<CBayesNetSmile*>& vecpBNs ) { 00837 DSL_node* pFrom; 00838 size_t iNet, iNode; 00839 int iTo, iProp; 00840 00841 if( !BNPrior.m_fSmileNet ) 00842 return false; 00843 for( iNet = 0; iNet < vecpBNs.size( ); ++iNet ) 00844 if( !vecpBNs[ iNet ]->m_fSmileNet ) 00845 return false; 00846 00847 m_fSmileNet = true; 00848 m_SmileNet.DeleteAllNodes( ); 00849 pFrom = BNPrior.m_SmileNet.GetNode( 0 ); 00850 m_SmileNet.AddNode( pFrom->Definition( )->GetType( ), pFrom->Info( ).Header( ).GetId( ) ); 00851 m_SmileNet.GetNode( 0 )->Definition( )->SetNumberOfOutcomes( *pFrom->Definition( )->GetOutcomesNames( ) ); 00852 m_SmileNet.GetNode( 0 )->Definition( )->SetDefinition( *pFrom->Definition( )->GetMatrix( ) ); 00853 for( iNet = 0; iNet < vecpBNs.size( ); ++iNet ) 00854 for( iNode = 1; iNode < (size_t)vecpBNs[ iNet ]->m_SmileNet.GetNumberOfNodes( ); ++iNode ) { 00855 pFrom = vecpBNs[ iNet ]->m_SmileNet.GetNode( iNode ); 00856 m_SmileNet.AddNode( pFrom->Definition( )->GetType( ), pFrom->Info( ).Header( ).GetId( ) ); 00857 m_SmileNet.AddArc( 0, iTo = ( m_SmileNet.GetNumberOfNodes( ) - 1 ) ); 00858 for( iProp = 0; iProp < pFrom->Info( ).UserProperties( ).GetNumberOfProperties( ); ++iProp ) 00859 m_SmileNet.GetNode( iTo )->Info( ).UserProperties( ).AddProperty( 00860 pFrom->Info( ).UserProperties( ).GetPropertyName( iProp ), 00861 pFrom->Info( ).UserProperties( ).GetPropertyValue( iProp ) ); 00862 m_SmileNet.GetNode( iTo )->Definition( )->SetNumberOfOutcomes( *pFrom->Definition( )->GetOutcomesNames( ) ); 00863 m_SmileNet.GetNode( iTo )->Definition( )->SetDefinition( *pFrom->Definition( )->GetMatrix( ) ); } 00864 00865 return true; } 00866 00883 bool CBayesNetSmile::Open( const CBayesNetMinimal& BNMinimal, const std::vector<std::string>& vecstrNames ) { 00884 DSL_stringArray vecstrOutcomes; 00885 char acNum[ 8 ]; 00886 size_t i, j, k; 00887 string strCur; 00888 DSL_Dmatrix* pMat; 00889 00890 m_fSmileNet = true; 00891 m_SmileNet.DeleteAllNodes( ); 00892 m_SmileNet.AddNode( DSL_CPT, (char*)c_szFR ); 00893 for( i = 0; i < BNMinimal.GetCPT( 0 ).GetRows( ); ++i ) { 00894 #pragma warning( disable : 4996 ) 00895 sprintf( acNum, "%02d", i ); 00896 vecstrOutcomes.Add( ( (string)c_szFR + acNum ).c_str( ) ); } 00897 m_SmileNet.GetNode( 0 )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes ); 00898 pMat = m_SmileNet.GetNode( 0 )->Definition( )->GetMatrix( ); 00899 for( i = 0; i < BNMinimal.GetCPT( 0 ).GetRows( ); ++i ) 00900 (*pMat)[ i ] = BNMinimal.GetCPT( 0 ).Get( i, 0 ); 00901 for( i = 1; i < BNMinimal.GetNodes( ); ++i ) { 00902 m_SmileNet.AddNode( DSL_CPT, (char*)( strCur = CMeta::Filename( vecstrNames[ i - 1 ] ) ).c_str( ) ); 00903 vecstrOutcomes.Flush( ); 00904 for( j = 0; j < BNMinimal.GetCPT( i ).GetRows( ); ++j ) { 00905 sprintf( acNum, "%02d", j ); 00906 #pragma warning( default : 4996 ) 00907 vecstrOutcomes.Add( ( strCur + acNum ).c_str( ) ); } 00908 m_SmileNet.GetNode( (int)i )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes ); 00909 m_SmileNet.AddArc( 0, (int)i ); 00910 pMat = m_SmileNet.GetNode( i )->Definition( )->GetMatrix( ); 00911 for( j = 0; j < BNMinimal.GetCPT( i ).GetColumns( ); ++j ) 00912 for( k = 0; k < BNMinimal.GetCPT( i ).GetRows( ); ++k ) 00913 (*pMat)[ ( j * BNMinimal.GetCPT( i ).GetRows( ) ) + k ] = 00914 BNMinimal.GetCPT( i ).Get( k, j ); 00915 if( BNMinimal.GetDefault( i ) != 0xFF ) { 00916 char acNum[ 16 ]; 00917 00918 #pragma warning( disable : 4996 ) 00919 sprintf( acNum, "%d", BNMinimal.GetDefault( i ) ); 00920 #pragma warning( default : 4996 ) 00921 m_SmileNet.GetNode( i )->Info( ).UserProperties( ).AddProperty( c_szZero, acNum ); } } 00922 00923 return true; } 00924 00925 } 00926 00927 #endif // NO_SMILE