Sleipnir
src/bayesnetsmile.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "bayesnet.h"
00024 #include "dat.h"
00025 #include "dataset.h"
00026 #include "meta.h"
00027 
00028 #ifndef NO_SMILE
00029 
00030 #if ( defined(_MSC_VER) && defined(_DEBUG) )
00031 extern "C" void __cdecl _invalid_parameter_noinfo( ) { }
00032 #endif // ( defined(_MSC_VER) && defined(_DEBUG) )
00033 
00034 namespace Sleipnir {
00035 
00036 const char  CBayesNetSmileImpl::c_szGaussian[]  = "gaussian";
00037 
00038 bool CBayesNetSmileImpl::GetCPT( DSL_node* pNode, CDataMatrix& MatCPT ) {
00039     DSL_Dmatrix*    pMat;
00040     DSL_intArray    veciCoord;
00041 
00042     pMat = pNode->Definition( )->GetMatrix( );
00043     const DSL_intArray& veciDims    = pMat->GetDimensions( );
00044 
00045     if( veciDims.GetSize( ) > 2 )
00046         return false;
00047     pMat->IndexToCoordinates( 0, veciCoord );
00048     if( veciDims.GetSize( ) == 1 ) {
00049         MatCPT.Initialize( veciDims[ 0 ], 1 );
00050         for( veciCoord[ 0 ] = 0; veciCoord[ 0 ] < veciDims[ 0 ]; ++veciCoord[ 0 ] )
00051             MatCPT.Set( veciCoord[ 0 ], 0, (float)(*pMat)[ veciCoord ] );
00052         return true; }
00053 
00054     MatCPT.Initialize( veciDims[ 1 ], veciDims[ 0 ] );
00055     for( veciCoord[ 0 ] = 0; veciCoord[ 0 ] < veciDims[ 0 ]; ++veciCoord[ 0 ] )
00056         for( veciCoord[ 1 ] = 0; veciCoord[ 1 ] < veciDims[ 1 ]; ++veciCoord[ 1 ] )
00057             MatCPT.Set( veciCoord[ 1 ], veciCoord[ 0 ], (float)(*pMat)[ veciCoord ] );
00058     return true; }
00059 
00060 bool CBayesNetSmileImpl::IsGaussian( const DSL_network& BayesNet ) {
00061     int i;
00062 
00063     if( ( i = ((DSL_network&)BayesNet).UserProperties( ).FindProperty( c_szGaussian ) ) < 0 )
00064         return false;
00065 
00066     return !!atoi( ((DSL_network&)BayesNet).UserProperties( ).GetPropertyValue( i ) ); }
00067 
00068 bool CBayesNetSmileImpl::IsNaive( const DSL_network& BayesNet ) {
00069     int i;
00070 
00071     {
00072         const DSL_intArray& veciParents = ((DSL_network&)BayesNet).GetNode( 0 )->Parents( );
00073 
00074         if( veciParents.NumItems( ) != 0 )
00075             return false;
00076     }
00077     for( i = 1; i < BayesNet.GetNumberOfNodes( ); ++i ) {
00078         const DSL_intArray& veciParents = ((DSL_network&)BayesNet).GetNode( i )->Parents( );
00079 
00080         if( ( veciParents.NumItems( ) > 1 ) || ( veciParents[ 0 ] != 0 ) )
00081             return false; }
00082 
00083     return true; }
00084 
00085 CBayesNetSmileImpl::CBayesNetSmileImpl( bool fGroup ) : CBayesNetImpl(fGroup),
00086     m_fSmileNet(false), m_pDefaults(NULL) { }
00087 
00098 CBayesNetSmile::CBayesNetSmile( bool fGroup ) : CBayesNetSmileImpl( fGroup ) { }
00099 
00100 bool CBayesNetSmileImpl::LearnGrouped( const IDataset* pData, size_t iIterations, bool fZero ) {
00101     size_t                  i, j, iIter, iDatum;
00102     string                  strCur;
00103     TMapData                mapData;
00104     TMapData::iterator      iterDatum;
00105     DSL_Dmatrix*            pMat;
00106     vector<DSL_Dmatrix*>    vecpExpected;
00107     DSL_intArray            veciCoords;
00108     vector<bool>            vecfHidden;
00109 
00110     vecfHidden.resize( pData->GetExperiments( ) );
00111     for( i = 0; i < vecfHidden.size( ); ++i )
00112         vecfHidden[ i ] = pData->IsHidden( i );
00113     EncodeData( pData, mapData );
00114     vecpExpected.resize( m_SmileNet.GetNumberOfNodes( ) );
00115     for( i = 0; i < vecpExpected.size( ); ++i )
00116         vecpExpected[ i ] = new DSL_Dmatrix( *m_SmileNet.GetNode( (int)i )->Definition(
00117             )->GetMatrix( ) );
00118     for( iIter = 0; iIter < iIterations; ++iIter ) {
00119         for( iDatum = i = 0; i < vecpExpected.size( ); ++i )
00120             vecpExpected[ i ]->FillWith( 0 );
00121         for( iterDatum = mapData.begin( ); iterDatum != mapData.end( ); ++iterDatum ) {
00122             if( !( iDatum++ % 50 ) )
00123                 g_CatSleipnir( ).notice( "CBayesNetSmile::LearnGrouped( %d, %d ) iteration %d, datum %d/%d",
00124                     iIterations, fZero, iIter, ( iDatum - 1 ), mapData.size( ) );
00125             FillCPTs( vecfHidden, iterDatum->first, fZero, true );
00126             m_SmileNet.UpdateBeliefs( );
00127 
00128             for( i = 0; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i )
00129                 LearnExpected( m_SmileNet.GetNode( (int)i ), vecpExpected[ i ],
00130                     iterDatum->second ); }
00131         for( i = 0; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) {
00132             pMat = m_SmileNet.GetNode( (int)i )->Definition( )->GetMatrix( );
00133             for( pMat->IndexToCoordinates( (int)( j = 0 ), veciCoords );
00134                 j != DSL_OUT_OF_RANGE; j = pMat->NextCoordinates( veciCoords ) )
00135                 pMat->Subscript( veciCoords ) = vecpExpected[ i ]->Subscript( veciCoords );
00136             pMat->Normalize( ); } }
00137     for( i = 0; i < vecpExpected.size( ); ++i )
00138         delete vecpExpected[ i ];
00139 
00140     return true; }
00141 
00142 bool CBayesNetSmileImpl::FillCPTs( const IDataset* pData, size_t iOne, size_t iTwo, bool fZero, bool fLearn ) {
00143     size_t  i, iVal, iZero;
00144     int     iProp;
00145 
00146     if( !pData->IsExample( iOne, iTwo ) || ( fLearn && ( pData->GetDiscrete( iOne, iTwo, 0 ) == -1 ) ) )
00147         return false;
00148 
00149     m_SmileNet.ClearAllEvidence( );
00150     for( i = fLearn ? 0 : 1; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) {
00151         if( pData->IsHidden( i ) )
00152             continue;
00153 
00154         DSL_userProperties& Props   = m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( );
00155 
00156         if( ( iProp = Props.FindProperty( c_szZero ) ) < 0 )
00157             iZero = fZero ? 0 : -1;
00158         else
00159             iZero = atoi( Props.GetPropertyValue( iProp ) );
00160 
00161         if( ( iVal = pData->GetDiscrete( iOne, iTwo, i ) ) == -1 ) {
00162             if( iZero == -1 )
00163                 continue;
00164             iVal = iZero; }
00165         m_SmileNet.GetNode( (int)i )->Value( )->SetEvidence( (int)iVal ); }
00166 
00167     return true; }
00168 
00169 bool CBayesNetSmileImpl::FillCPTs( const std::vector<bool>& vecfHidden, const std::string& strDatum,
00170     bool fZero, bool fLearn, bool fAll ) {
00171     size_t  i, iVal, iZero;
00172     int     iProp;
00173 
00174     if( !fAll && fLearn && !IsAnswer( strDatum ) )
00175         return false;
00176 
00177     m_SmileNet.ClearAllEvidence( );
00178     for( i = ( fAll || fLearn ) ? 0 : 1; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) {
00179         if( vecfHidden[ i ] )
00180             continue;
00181 
00182         DSL_userProperties& Props   = m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( );
00183 
00184         if( ( iProp = Props.FindProperty( c_szZero ) ) < 0 )
00185             iZero = fZero ? 0 : -1;
00186         else
00187             iZero = atoi( Props.GetPropertyValue( iProp ) );
00188 
00189         if( strDatum[ i ] == c_cMissing ) {
00190             if( iZero == -1 )
00191                 continue;
00192             iVal = iZero; }
00193         else
00194             iVal = strDatum[ i ] - c_cBase;
00195         m_SmileNet.GetNode( (int)i )->Value( )->SetEvidence( (int)iVal ); }
00196 
00197     return true; }
00198 
00199 bool CBayesNetSmileImpl::FillCPTs( const vector<bool>& vecfHidden, const vector<unsigned char>& vecbDatum,
00200     bool fZero, bool fLearn, bool fNoData ) {
00201     size_t  i, iVal, iZero;
00202     int     iProp;
00203 
00204     if( fLearn && !vecbDatum[ 0 ] )
00205         return false;
00206 
00207     m_SmileNet.ClearAllEvidence( );
00208     for( i = fLearn ? 0 : 1; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) {
00209         if( vecfHidden[ i ] )
00210             continue;
00211 
00212         DSL_userProperties& Props   = m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( );
00213 
00214         if( ( iProp = Props.FindProperty( c_szZero ) ) < 0 )
00215             iZero = fZero ? 0 : -1;
00216         else
00217             iZero = atoi( Props.GetPropertyValue( iProp ) );
00218 
00219         if( !vecbDatum[ i ] ) {
00220             if( fNoData || ( iZero == -1 ) )
00221                 continue;
00222             iVal = iZero; }
00223         else
00224             iVal = vecbDatum[ i ] - 1;
00225         m_SmileNet.GetNode( (int)i )->Value( )->SetEvidence( (int)iVal ); }
00226 
00227     return true; }
00228 
00229 bool CBayesNetSmileImpl::LearnUngrouped( const IDataset* pData, size_t iIterations, bool fZero ) {
00230     size_t                  iIter, i, j, k;
00231     DSL_Dmatrix*            pMat;
00232     vector<DSL_Dmatrix*>    vecpExpected;
00233     DSL_intArray            veciCoords;
00234 
00235     if( !m_fSmileNet || IsContinuous( ) )
00236         return false;
00237 
00238     vecpExpected.resize( m_SmileNet.GetNumberOfNodes( ) );
00239     for( i = 0; i < vecpExpected.size( ); ++i )
00240         vecpExpected[ i ] = new DSL_Dmatrix( *m_SmileNet.GetNode( (int)i )->Definition(
00241             )->GetMatrix( ) );
00242     for( iIter = 0; iIter < iIterations; ++iIter ) {
00243         for( i = 0; i < vecpExpected.size( ); ++i )
00244             vecpExpected[ i ]->FillWith( 0 );
00245         for( i = 0; i < pData->GetGenes( ); ++i ) {
00246             if( !( i % 50 ) )
00247                 g_CatSleipnir( ).notice( "CBayesNetSmile::LearnUngrouped( %d, %d ) iteration %d, gene %d/%d",
00248                     iIterations, fZero, iIter, i, pData->GetGenes( ) );
00249             for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) {
00250                 if( !FillCPTs( pData, i, j, fZero, true ) )
00251                     continue;
00252                 m_SmileNet.UpdateBeliefs( );
00253 
00254                 for( k = 0; k < (size_t)m_SmileNet.GetNumberOfNodes( ); ++k )
00255                     LearnExpected( m_SmileNet.GetNode( (int)k ), vecpExpected[ k ] ); } }
00256         for( i = 0; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) {
00257             pMat = m_SmileNet.GetNode( (int)i )->Definition( )->GetMatrix( );
00258             for( pMat->IndexToCoordinates( (int)( j = 0 ), veciCoords );
00259                 j != DSL_OUT_OF_RANGE; j = pMat->NextCoordinates( veciCoords ) )
00260                 pMat->Subscript( veciCoords ) = vecpExpected[ i ]->Subscript( veciCoords );
00261             pMat->Normalize( ); } }
00262     for( i = 0; i < vecpExpected.size( ); ++i )
00263         delete vecpExpected[ i ];
00264 
00265     return true; }
00266 
00267 bool CBayesNetSmile::Learn( const IDataset* pData, size_t iIterations, bool fZero, bool fELR ) {
00268 
00269     if( fELR )
00270         return LearnELR( pData, iIterations, fZero );
00271     if( IsNaive( ) )
00272         return LearnNaive( pData, fZero );
00273 
00274     return ( m_fGroup ? LearnGrouped( pData, iIterations, fZero ) :
00275         LearnUngrouped( pData, iIterations, fZero ) ); }
00276 
00277 void CBayesNetSmileImpl::LearnExpected( DSL_node* pNode, DSL_Dmatrix* pExpected,
00278     size_t iWeight ) {
00279     int             iEvid, iLast, i, j;
00280     DSL_intArray    veciParents, veciCoords;
00281     DSL_Dmatrix*    pDef;
00282     DSL_nodeValue*  pVal;
00283     double          dProd;
00284 
00285     veciParents = pNode->Parents( );
00286     pDef = pNode->Definition( )->GetMatrix( );
00287     pVal = pNode->Value( );
00288     iEvid = pVal->GetEvidence( );
00289     for( pDef->IndexToCoordinates( i = 0, veciCoords ); i != DSL_OUT_OF_RANGE;
00290         i = pDef->NextCoordinates( veciCoords ) ) {
00291         iLast = veciCoords[ veciCoords.GetSize( ) - 1 ];
00292         if( veciParents.NumItems( ) ) {
00293             if( iEvid == DSL_OUT_OF_RANGE ) {
00294                 dProd = pVal->GetMatrix( )->Subscript( iLast );
00295                 pVal->SetEvidence( iLast );
00296                 m_SmileNet.UpdateBeliefs( ); }
00297             else if( iLast == iEvid )
00298                 dProd = 1;
00299             else
00300                 continue;
00301 
00302             for( j = 0; j < veciParents.NumItems( ); ++j )
00303                 dProd *= m_SmileNet.GetNode( veciParents[ j ] )->Value( )->GetMatrix(
00304                     )->Subscript( veciCoords[ j ] );
00305             if( iEvid == DSL_OUT_OF_RANGE ) {
00306                 pVal->ClearEvidence( );
00307                 m_SmileNet.UpdateBeliefs( ); } }
00308         else
00309             dProd = pVal->GetMatrix( )->Subscript( veciCoords[ 0 ] );
00310 
00311         pExpected->Subscript( veciCoords ) += dProd * iWeight; } }
00312 
00313 #ifdef PNL_ENABLED
00314 
00325 bool CBayesNetSmile::Convert( CBayesNetPNL& BNPNL ) const {
00326 
00327     if( !m_fSmileNet )
00328         return false;
00329 
00330     return( ConvertGraph( BNPNL ) && ConvertCPTs( BNPNL ) ); }
00331 
00332 #endif // PNL_ENABLED
00333 
00334 void CBayesNetSmile::GetNodes( std::vector<std::string>& vecstrNodes ) const {
00335     int i;
00336 
00337     if( m_fSmileNet )
00338         for( i = 0; i < m_SmileNet.GetNumberOfNodes( ); ++i )
00339             vecstrNodes.push_back( m_SmileNet.GetNode( i )->Info( ).Header( ).GetId( ) ); }
00340 
00341 bool CBayesNetSmileImpl::Evaluate( const IDataset* pData, CDat* pDatOut, TVecVecD* pvecvecdOut,
00342     bool fZero ) const {
00343     size_t                      i, j, k, iOne, iTwo;
00344     DSL_nodeValue*              pValue;
00345     string                      strCur;
00346     map<string,float>           mapData;
00347     map<string,float>::iterator iterDatum;
00348     vector<bool>                vecfHidden;
00349     bool                        fZeroable;
00350     float                       dPrior;
00351     vector<size_t>              veciGenes;
00352 
00353     if( !m_fSmileNet || IsContinuous( ) )
00354         return false;
00355 
00356     vecfHidden.resize( pData->GetExperiments( ) );
00357     for( i = 0; i < vecfHidden.size( ); ++i )
00358         vecfHidden[ i ] = pData->IsHidden( i );
00359     if( !( fZeroable = fZero ) )
00360         for( i = 1; i < (size_t)m_SmileNet.GetNumberOfNodes( ); ++i ) {
00361             DSL_userProperties& Props   = m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( );
00362             if( Props.FindProperty( c_szZero ) >= 0 ) {
00363                 fZeroable = true;
00364                 break; } }
00365     if( pDatOut ) {
00366         veciGenes.resize( pData->GetGenes( ) );
00367         for( i = 0; i < pData->GetGenes( ); ++i )
00368             veciGenes[ i ] = pDatOut->GetGene( pData->GetGene( i ) );
00369         ((CBayesNetSmileImpl*)this)->m_SmileNet.UpdateBeliefs( );
00370         pValue = m_SmileNet.GetNode( 0 )->Value( );
00371         dPrior = (float)(*pValue->GetMatrix( ))[ 0 ]; }
00372     for( i = 0; i < pData->GetGenes( ); ++i ) {
00373         if( !( i % 250 ) )
00374             g_CatSleipnir( ).notice( "CBayesNetSmile::Evaluate( %d ) %d/%d", fZero, i,
00375                 pData->GetGenes( ) );
00376         if( pDatOut && !pvecvecdOut && ( ( iOne = veciGenes[ i ] ) == -1 ) )
00377             continue;
00378         for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) {
00379             if( pDatOut && !pvecvecdOut && ( ( iTwo = veciGenes[ j ] ) == -1 ) )
00380                 continue;
00381             if( !( fZeroable || pData->IsExample( i, j ) ) ) {
00382                 if( pDatOut && ( iOne != -1 ) && ( iTwo != -1 ) )
00383                     pDatOut->Set( iOne, iTwo, dPrior );
00384                 continue; }
00385             strCur = EncodeDatum( pData, i, j );
00386             if( m_fGroup && ( ( iterDatum = mapData.find( strCur ) ) != mapData.end( ) ) ) {
00387                 if( pDatOut && ( iOne != -1 ) && ( iTwo != -1 ) )
00388                     pDatOut->Set( iOne, iTwo, iterDatum->second );
00389                 if( pvecvecdOut ) {
00390                     pvecvecdOut->resize( pvecvecdOut->size( ) + 1 );
00391                     (*pvecvecdOut)[ pvecvecdOut->size( ) - 1 ].push_back(
00392                         iterDatum->second ); }
00393                 continue; }
00394 
00395             ((CBayesNetSmileImpl*)this)->FillCPTs( vecfHidden, strCur, fZero, false );
00396             ((CBayesNetSmileImpl*)this)->m_SmileNet.UpdateBeliefs( );
00397             pValue = m_SmileNet.GetNode( 0 )->Value( );
00398             if( m_fGroup )
00399                 mapData[ strCur ] = (float)(*pValue->GetMatrix( ))[ 0 ];
00400             if( pvecvecdOut ) {
00401                 pvecvecdOut->resize( pvecvecdOut->size( ) + 1 );
00402                 {
00403                     vector<float>&  vecdCur = (*pvecvecdOut)[ pvecvecdOut->size( ) - 1 ];
00404 
00405                     for( k = 0; ( k + 1 ) < (size_t)pValue->GetSize( ); ++k )
00406                         vecdCur.push_back( (float)(*pValue->GetMatrix( ))[ (int)k ] );
00407                 } }
00408             if( pDatOut && ( iOne != -1 ) && ( iTwo != -1 ) )
00409                 pDatOut->Set( iOne, iTwo, (float)(*pValue->GetMatrix( ))[ 0 ] ); } }
00410 
00411     return true; }
00412 
00413 bool CBayesNetSmile::Evaluate( const vector<unsigned char>& vecbDatum, vector<float>& vecdResults, bool fZero,
00414     size_t iNode, bool fIgnoreMissing ) const {
00415     vector<bool>    vecfHidden;
00416     DSL_nodeValue*  pValue;
00417     size_t          i;
00418 
00419     if( !m_fSmileNet || IsContinuous( ) )
00420         return false;
00421 
00422     vecfHidden.resize( vecbDatum.size( ) );
00423     for( i = 0; i < vecfHidden.size( ); ++i )
00424         vecfHidden[ i ] = false;
00425     ((CBayesNetSmile*)this)->FillCPTs( vecfHidden, vecbDatum, fZero, false, fIgnoreMissing );
00426     ((CBayesNetSmile*)this)->m_SmileNet.UpdateBeliefs( );
00427     pValue = m_SmileNet.GetNode( iNode )->Value( );
00428     for( i = 0; ( i + 1 ) < (size_t)pValue->GetSize( ); ++i )
00429         vecdResults.push_back( (float)(*pValue->GetMatrix( ))[ (int)i ] );
00430 
00431     return true; }
00432 
00453 float CBayesNetSmile::Evaluate( size_t iNode, unsigned char bValue ) const {
00454     vector<bool>            vecfHidden;
00455     vector<unsigned char>   vecbDatum;
00456     DSL_nodeValue*          pValue;
00457     size_t                  i;
00458 
00459     if( !m_fSmileNet || IsContinuous( ) )
00460         return CMeta::GetNaN( );
00461 
00462     vecbDatum.resize( m_SmileNet.GetNumberOfNodes( ) );
00463     vecbDatum[ iNode ] = bValue + 1;
00464     vecfHidden.resize( vecbDatum.size( ) );
00465     for( i = 0; i < vecbDatum.size( ); ++i )
00466         vecfHidden[ i ] = ( i != iNode );
00467     ((CBayesNetSmile*)this)->FillCPTs( vecfHidden, vecbDatum, false, false );
00468     ((CBayesNetSmile*)this)->m_SmileNet.UpdateBeliefs( );
00469     pValue = m_SmileNet.GetNode( 0 )->Value( );
00470 
00471     return (float)(*pValue->GetMatrix( ))[ 0 ]; }
00472 
00483 unsigned char CBayesNetSmile::GetDefault( size_t iNode ) const {
00484     int i;
00485 
00486     if( !m_fSmileNet ||
00487         ( ( i = ((DSL_network&)m_SmileNet).GetNode(
00488         iNode )->Info( ).UserProperties( ).FindProperty( c_szZero ) ) < 0 ) )
00489         return -1;
00490 
00491     return atoi( ((DSL_network&)m_SmileNet).GetNode(
00492         iNode )->Info( ).UserProperties( ).GetPropertyValue( i ) ); }
00493 
00494 void CBayesNetSmile::Randomize( ) {
00495     int i;
00496 
00497     if( !m_fSmileNet )
00498         return;
00499 
00500     for( i = m_SmileNet.GetFirstNode( ); i != DSL_OUT_OF_RANGE;
00501         i = m_SmileNet.GetNextNode( i ) )
00502         Randomize( i ); }
00503 
00504 void CBayesNetSmile::Randomize( size_t iNode ) {
00505     DSL_Dmatrix*    pMat;
00506 
00507     if( !m_fSmileNet )
00508         return;
00509 
00510     pMat = m_SmileNet.GetNode( (int)iNode )->Definition( )->GetMatrix( );
00511 
00512     {
00513         DSL_sysCoordinates  Coords( *pMat );
00514 
00515         Coords.GoFirst( );
00516         do
00517             Coords.CheckedValue( ) = (float)rand( ) / RAND_MAX;
00518         while( Coords.Next( ) != DSL_OUT_OF_RANGE );
00519     }
00520 
00521     pMat->Normalize( ); }
00522 
00523 void CBayesNetSmile::Reverse( size_t iNode ) {
00524     int             iCoords;
00525     DSL_Dmatrix*    pMat;
00526 
00527     if( !m_fSmileNet )
00528         return;
00529 
00530     pMat = m_SmileNet.GetNode( (int)iNode )->Definition( )->GetMatrix( );
00531     {
00532         DSL_sysCoordinates  Coords( *pMat );
00533 
00534         iCoords = pMat->GetSizeOfDimension( pMat->GetLastDimension( ) );
00535         Coords.GoFirst( );
00536         do {
00537             DSL_intArray    veciCoords  = Coords.Coordinates( );
00538             int             iCoord;
00539             double          d;
00540 
00541             iCoord = veciCoords[ veciCoords.GetSize( ) - 1 ];
00542             if( iCoord >= ( iCoords / 2 ) )
00543                 continue;
00544             d = Coords.CheckedValue( );
00545             veciCoords[ veciCoords.GetSize( ) - 1 ] = iCoords - iCoord - 1;
00546             Coords.CheckedValue( ) = (*pMat)[ veciCoords ];
00547             (*pMat)[ veciCoords ] = d; }
00548         while( Coords.Next( ) != DSL_OUT_OF_RANGE );
00549     } }
00550 
00551 bool CBayesNetSmileImpl::LearnNaive( const IDataset* pData, bool fZero ) {
00552     vector<vector<size_t> > vecveciCounts;
00553     size_t                  i, j, k, iAnswer, iAnswers, iVal, iCount;
00554     DSL_nodeDefinition*     pDef;
00555     DSL_Dmatrix*            pMat;
00556     DSL_Dmatrix*            pDefault;
00557     DSL_intArray            veciCoords;
00558     vector<size_t>          veciZeros;
00559     int                     iProp;
00560     bool                    fZeroable, fFallback;
00561     float                   dLambda;
00562     double                  dCount;
00563 
00564     vecveciCounts.resize( m_SmileNet.GetNumberOfNodes( ) );
00565     iAnswers = m_SmileNet.GetNode( 0 )->Definition( )->GetNumberOfOutcomes( );
00566     vecveciCounts[ 0 ].resize( iAnswers );
00567     for( i = 1; i < vecveciCounts.size( ); ++i )
00568         vecveciCounts[ i ].resize( iAnswers *
00569             m_SmileNet.GetNode( (int)i )->Definition( )->GetNumberOfOutcomes( ) );
00570     veciZeros.resize( m_SmileNet.GetNumberOfNodes( ) );
00571     fZeroable = fZero;
00572     for( i = 0; i < veciZeros.size( ); ++i ) {
00573         DSL_userProperties& Props   = m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( );
00574 
00575         if( ( iProp = Props.FindProperty( c_szZero ) ) < 0 )
00576             veciZeros[ i ] = fZero ? 0 : -1;
00577         else {
00578             fZeroable = true;
00579             veciZeros[ i ] = atoi( Props.GetPropertyValue( iProp ) ); } }
00580     for( iCount = i = 0; i < pData->GetGenes( ); ++i )
00581         for( j = ( i + 1 ); j < pData->GetGenes( ); ++j )
00582             if( ( fZeroable || pData->IsExample( i, j ) ) &&
00583                 ( ( iAnswer = pData->GetDiscrete( i, j, 0 ) ) != -1 ) ) {
00584                 vecveciCounts[ 0 ][ iAnswer ]++;
00585                 iCount++;
00586                 for( k = 1; k < pData->GetExperiments( ); ++k ) {
00587                     if( ( iVal = pData->GetDiscrete( i, j, k ) ) == -1 ) {
00588                         if( veciZeros[ k ] == -1 )
00589                             continue;
00590                         iVal = veciZeros[ k ]; }
00591 //iVal = iVal % m_SmileNet.GetNode( k )->Definition( )->GetNumberOfOutcomes( );
00592                     vecveciCounts[ k ][ ( iVal * iAnswers ) + iAnswer ]++; } }
00593 
00594     fFallback = m_pDefaults && ( iCount < c_iMinimum );
00595     pMat = m_SmileNet.GetNode( 0 )->Definition( )->GetMatrix( );
00596     for( i = 0; i < iAnswers; ++i )
00597         (*pMat)[ (int)i ] = ( j = vecveciCounts[ 0 ][ (int)i ] ) ? j : ( fFallback ? 0 : 1 );
00598     if( fFallback ) {
00599         g_CatSleipnir( ).warn( "CBayesNetSmile::LearnNaive( %d ) insufficient data for node %s",
00600             fZero, m_SmileNet.GetNode( 0 )->Info( ).Header( ).GetId( ) );
00601         dLambda = 1 - ( (float)iCount / c_iMinimum );
00602         pMat->Normalize( );
00603         pDefault = m_pDefaults->m_SmileNet.GetNode( 0 )->Definition( )->GetMatrix( );
00604         for( i = 0; i < iAnswers; ++i )
00605             (*pMat)[ (int)i ] = ( ( 1 - dLambda ) * (*pMat)[ (int)i ] ) +
00606                 ( dLambda * (*pDefault)[ (int)i ] ); }
00607     pMat->Normalize( );
00608     for( i = 1; i < vecveciCounts.size( ); ++i ) {
00609         pDef = m_SmileNet.GetNode( (int)i )->Definition( );
00610         pMat = pDef->GetMatrix( );
00611         pMat->IndexToCoordinates( 0, veciCoords );
00612         pDefault = m_pDefaults ? m_pDefaults->m_SmileNet.GetNode( (int)i )->Definition( )->GetMatrix( ) : NULL;
00613         for( j = 0; j < iAnswers; ++j ) {
00614             veciCoords[ 0 ] = (int)j;
00615             for( k = 0; k < (size_t)pDef->GetNumberOfOutcomes( ); ++k ) {
00616                 veciCoords[ 1 ] = (int)k;
00617                 (*pMat)[ veciCoords ] = vecveciCounts[ i ][ ( k * iAnswers ) + j ]; } }
00618         if( pDefault )
00619             for( j = 0; j < iAnswers; ++j ) {
00620                 veciCoords[ 0 ] = (int)j;
00621                 for( dCount = k = 0; k < (size_t)pDef->GetNumberOfOutcomes( ); ++k ) {
00622                     veciCoords[ 1 ] = (int)k;
00623                     dCount += (*pMat)[ veciCoords ]; }
00624                 if( dCount < c_iMinimum ) {
00625                     g_CatSleipnir( ).warn( "CBayesNetSmile::LearnNaive( %d ) insufficient data for node %s, column %d",
00626                         fZero, m_SmileNet.GetNode( (int)i )->Info( ).Header( ).GetId( ), j );
00627                     dLambda = 1 - ( (float)dCount / c_iMinimum );
00628                     for( k = 0; k < (size_t)pDef->GetNumberOfOutcomes( ); ++k ) {
00629                         veciCoords[ 1 ] = (int)k;
00630                         (*pMat)[ veciCoords ] = ( dCount ? ( ( 1 - dLambda ) * (*pMat)[ veciCoords ] /
00631                             dCount ) : 0 ) + ( dLambda * (*pDefault)[ veciCoords ] ); } }
00632                 else
00633                     for( k = 0; k < (size_t)pDef->GetNumberOfOutcomes( ); ++k ) {
00634                         veciCoords[ 1 ] = (int)k;
00635                         if( !(*pMat)[ veciCoords ] )
00636                             (*pMat)[ veciCoords ] = 1; } }
00637         else
00638             for( j = 0; j < iAnswers; ++j ) {
00639                 veciCoords[ 0 ] = (int)j;
00640                 for( k = 0; k < (size_t)pDef->GetNumberOfOutcomes( ); ++k ) {
00641                     veciCoords[ 1 ] = (int)k;
00642                     if( !(*pMat)[ veciCoords ] )
00643                         (*pMat)[ veciCoords ] = 1; } }
00644         pMat->Normalize( ); }
00645 
00646     return true; }
00647 
00648 bool CBayesNetSmile::Evaluate( const CPCLPair& PCLData, CPCL& PCLResults, bool fZero, int iAlgorithm ) const {
00649     size_t                                  i, j, k, iExp;
00650     string                                  strCur;
00651     map<string, vector<float> >             mapData;
00652     map<string, vector<float> >::iterator   iterDatum;
00653     vector<size_t>                          veciMap;
00654     vector<bool>                            vecfHidden;
00655     int                                     iPrev;
00656 
00657     if( !m_fSmileNet || IsContinuous( ) )
00658         return false;
00659 
00660     iPrev = ((CBayesNetSmile*)this)->m_SmileNet.GetDefaultBNAlgorithm( );
00661     veciMap.resize( m_SmileNet.GetNumberOfNodes( ) );
00662     vecfHidden.resize( veciMap.size( ) );
00663     for( i = 0; i < veciMap.size( ); ++i ) {
00664         veciMap[ i ] = -1;
00665         vecfHidden[ i ] = true;
00666         for( j = 0; j < PCLData.GetExperiments( ); ++j )
00667             if( PCLData.GetExperiment( j ) == m_SmileNet.GetNode( (int)i )->Info( ).Header( ).GetId( ) ) {
00668                 vecfHidden[ i ] = false;
00669                 veciMap[ i ] = (unsigned int)j;
00670                 break; } }
00671     ((CBayesNetSmile*)this)->m_SmileNet.SetDefaultBNAlgorithm( iAlgorithm );
00672     for( i = 0; i < PCLResults.GetGenes( ); ++i ) {
00673         if( !( i % 1 ) )
00674             g_CatSleipnir( ).notice( "CBayesNetSmile::Evaluate( %d ) %d/%d", fZero, i,
00675                 PCLResults.GetGenes( ) );
00676         strCur = EncodeDatum( PCLData, PCLData.GetGene( PCLResults.GetGene( i ) ), veciMap );
00677         if( m_fGroup && ( ( iterDatum = mapData.find( strCur ) ) != mapData.end( ) ) ) {
00678             for( j = 0; j < iterDatum->second.size( ); ++j )
00679                 PCLResults.Set( i, j, iterDatum->second[ j ] );
00680             continue; }
00681 
00682         ((CBayesNetSmile*)this)->FillCPTs( vecfHidden, strCur, fZero, false, true );
00683         ((CBayesNetSmile*)this)->m_SmileNet.UpdateBeliefs( );
00684         for( iExp = j = 0; j < veciMap.size( ); ++j ) {
00685             DSL_Dmatrix*    pMatrix;
00686 
00687             if( veciMap[ j ] != -1 )
00688                 continue;
00689             pMatrix = m_SmileNet.GetNode( (int)j )->Value( )->GetMatrix( );
00690             for( k = 0; k < GetValues( j ); ++k )
00691                 PCLResults.Set( i, iExp++, (float)(*pMatrix)[ (int)k ] ); }
00692         if( m_fGroup ) {
00693             vector<float>   vecfCur;
00694 
00695             vecfCur.resize( PCLResults.GetExperiments( ) );
00696             for( j = 0; j < vecfCur.size( ); ++j )
00697                 vecfCur[ j ] = PCLResults.Get( i, j );
00698             mapData[ strCur ] = vecfCur; } }
00699     ((CBayesNetSmile*)this)->m_SmileNet.SetDefaultBNAlgorithm( iPrev );
00700 
00701     return true; }
00702 
00725 bool CBayesNetSmile::Open( const std::vector<std::string>& vecstrFiles, size_t iValues ) {
00726     size_t          i, j;
00727     DSL_stringArray vecstrOutcomes;
00728     string          strCur;
00729 
00730     m_fSmileNet = true;
00731     m_SmileNet.DeleteAllNodes( );
00732     m_SmileNet.AddNode( DSL_CPT, (char*)c_szFR );
00733     vecstrOutcomes.Add( ( (string)c_szFR + "No" ).c_str( ) );
00734     vecstrOutcomes.Add( ( (string)c_szFR + "Yes" ).c_str( ) );
00735     m_SmileNet.GetNode( 0 )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes );
00736     for( i = 0; i < vecstrFiles.size( ); ++i ) {
00737         m_SmileNet.AddNode( DSL_CPT, (char*)( strCur =
00738             CMeta::Filename( CMeta::Deextension( vecstrFiles[ i ] ) ) ).c_str( ) );
00739         vecstrOutcomes.Flush( );
00740         for( j = 0; j < iValues; ++j ) {
00741             char    acNum[ 8 ];
00742 
00743 #pragma warning( disable : 4996 )
00744             sprintf( acNum, "%02d", j );
00745 #pragma warning( default : 4996 )
00746             vecstrOutcomes.Add( ( strCur + acNum ).c_str( ) ); }
00747         m_SmileNet.GetNode( (int)i + 1 )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes );
00748         m_SmileNet.AddArc( 0, (int)i + 1 ); }
00749 
00750     return true; }
00751 
00779 bool CBayesNetSmile::Open( const IDataset* pData, const std::vector<std::string>& vecstrNames,
00780     const vector<size_t>& veciDefaults ) {
00781     size_t          i, j;
00782     DSL_stringArray vecstrOutcomes;
00783     char            acNum[ 8 ];
00784 
00785     if( pData->GetExperiments( ) != vecstrNames.size( ) )
00786         return false;
00787 
00788     m_fSmileNet = true;
00789     m_SmileNet.DeleteAllNodes( );
00790     m_SmileNet.AddNode( DSL_CPT, (char*)c_szFR );
00791     vecstrOutcomes.Add( ( (string)c_szFR + "No" ).c_str( ) );
00792     vecstrOutcomes.Add( ( (string)c_szFR + "Yes" ).c_str( ) );
00793     m_SmileNet.GetNode( 0 )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes );
00794     for( i = 1; i < pData->GetExperiments( ); ++i ) {
00795         m_SmileNet.AddNode( DSL_CPT, (char*)vecstrNames[ i ].c_str( ) );
00796         vecstrOutcomes.Flush( );
00797         for( j = 0; j < pData->GetBins( i ); ++j ) {
00798 #pragma warning( disable : 4996 )
00799             sprintf( acNum, "%02d", j );
00800 #pragma warning( default : 4996 )
00801             vecstrOutcomes.Add( ( vecstrNames[ i ] + acNum ).c_str( ) ); }
00802         m_SmileNet.GetNode( (int)i )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes );
00803         if( veciDefaults[ i ] != -1 ) {
00804 #pragma warning( disable : 4996 )
00805             sprintf( acNum, "%d", veciDefaults[ i ] );
00806 #pragma warning( default : 4996 )
00807             m_SmileNet.GetNode( (int)i )->Info( ).UserProperties( ).AddProperty( c_szZero, acNum ); }
00808         m_SmileNet.AddArc( 0, (int)i ); }
00809 
00810     return true; }
00811 
00836 bool CBayesNetSmile::Open( const CBayesNetSmile& BNPrior, const vector<CBayesNetSmile*>& vecpBNs ) {
00837     DSL_node*   pFrom;
00838     size_t      iNet, iNode;
00839     int         iTo, iProp;
00840 
00841     if( !BNPrior.m_fSmileNet )
00842         return false;
00843     for( iNet = 0; iNet < vecpBNs.size( ); ++iNet )
00844         if( !vecpBNs[ iNet ]->m_fSmileNet )
00845             return false;
00846 
00847     m_fSmileNet = true;
00848     m_SmileNet.DeleteAllNodes( );
00849     pFrom = BNPrior.m_SmileNet.GetNode( 0 );
00850     m_SmileNet.AddNode( pFrom->Definition( )->GetType( ), pFrom->Info( ).Header( ).GetId( ) );
00851     m_SmileNet.GetNode( 0 )->Definition( )->SetNumberOfOutcomes( *pFrom->Definition( )->GetOutcomesNames( ) );
00852     m_SmileNet.GetNode( 0 )->Definition( )->SetDefinition( *pFrom->Definition( )->GetMatrix( ) );
00853     for( iNet = 0; iNet < vecpBNs.size( ); ++iNet )
00854         for( iNode = 1; iNode < (size_t)vecpBNs[ iNet ]->m_SmileNet.GetNumberOfNodes( ); ++iNode ) {
00855             pFrom = vecpBNs[ iNet ]->m_SmileNet.GetNode( iNode );
00856             m_SmileNet.AddNode( pFrom->Definition( )->GetType( ), pFrom->Info( ).Header( ).GetId( ) );
00857             m_SmileNet.AddArc( 0, iTo = ( m_SmileNet.GetNumberOfNodes( ) - 1 ) );
00858             for( iProp = 0; iProp < pFrom->Info( ).UserProperties( ).GetNumberOfProperties( ); ++iProp )
00859                 m_SmileNet.GetNode( iTo )->Info( ).UserProperties( ).AddProperty(
00860                     pFrom->Info( ).UserProperties( ).GetPropertyName( iProp ),
00861                     pFrom->Info( ).UserProperties( ).GetPropertyValue( iProp ) );
00862             m_SmileNet.GetNode( iTo )->Definition( )->SetNumberOfOutcomes( *pFrom->Definition( )->GetOutcomesNames( ) );
00863             m_SmileNet.GetNode( iTo )->Definition( )->SetDefinition( *pFrom->Definition( )->GetMatrix( ) ); }
00864 
00865     return true; }
00866 
00883 bool CBayesNetSmile::Open( const CBayesNetMinimal& BNMinimal, const std::vector<std::string>& vecstrNames ) {
00884     DSL_stringArray vecstrOutcomes;
00885     char            acNum[ 8 ];
00886     size_t          i, j, k;
00887     string          strCur;
00888     DSL_Dmatrix*    pMat;
00889 
00890     m_fSmileNet = true;
00891     m_SmileNet.DeleteAllNodes( );
00892     m_SmileNet.AddNode( DSL_CPT, (char*)c_szFR );
00893     for( i = 0; i < BNMinimal.GetCPT( 0 ).GetRows( ); ++i ) {
00894 #pragma warning( disable : 4996 )
00895         sprintf( acNum, "%02d", i );
00896         vecstrOutcomes.Add( ( (string)c_szFR + acNum ).c_str( ) ); }
00897     m_SmileNet.GetNode( 0 )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes );
00898     pMat = m_SmileNet.GetNode( 0 )->Definition( )->GetMatrix( );
00899     for( i = 0; i < BNMinimal.GetCPT( 0 ).GetRows( ); ++i )
00900         (*pMat)[ i ] = BNMinimal.GetCPT( 0 ).Get( i, 0 );
00901     for( i = 1; i < BNMinimal.GetNodes( ); ++i ) {
00902         m_SmileNet.AddNode( DSL_CPT, (char*)( strCur = CMeta::Filename( vecstrNames[ i - 1 ] ) ).c_str( ) );
00903         vecstrOutcomes.Flush( );
00904         for( j = 0; j < BNMinimal.GetCPT( i ).GetRows( ); ++j ) {
00905             sprintf( acNum, "%02d", j );
00906 #pragma warning( default : 4996 )
00907             vecstrOutcomes.Add( ( strCur + acNum ).c_str( ) ); }
00908         m_SmileNet.GetNode( (int)i )->Definition( )->SetNumberOfOutcomes( vecstrOutcomes );
00909         m_SmileNet.AddArc( 0, (int)i );
00910         pMat = m_SmileNet.GetNode( i )->Definition( )->GetMatrix( );
00911         for( j = 0; j < BNMinimal.GetCPT( i ).GetColumns( ); ++j )
00912             for( k = 0; k < BNMinimal.GetCPT( i ).GetRows( ); ++k )
00913                 (*pMat)[ ( j * BNMinimal.GetCPT( i ).GetRows( ) ) + k ] =
00914                     BNMinimal.GetCPT( i ).Get( k, j );
00915         if( BNMinimal.GetDefault( i ) != 0xFF ) {
00916             char    acNum[ 16 ];
00917 
00918 #pragma warning( disable : 4996 )
00919             sprintf( acNum, "%d", BNMinimal.GetDefault( i ) );
00920 #pragma warning( default : 4996 )
00921             m_SmileNet.GetNode( i )->Info( ).UserProperties( ).AddProperty( c_szZero, acNum ); } }
00922     
00923     return true; }
00924 
00925 }
00926 
00927 #endif // NO_SMILE