Sleipnir
src/meta.cpp
00001 /*****************************************************************************
00002 * This file is provided under the Creative Commons Attribution 3.0 license.
00003 *
00004 * You are free to share, copy, distribute, transmit, or adapt this work
00005 * PROVIDED THAT you attribute the work to the authors listed below.
00006 * For more information, please see the following web page:
00007 * http://creativecommons.org/licenses/by/3.0/
00008 *
00009 * This file is a component of the Sleipnir library for functional genomics,
00010 * authored by:
00011 * Curtis Huttenhower (chuttenh@princeton.edu)
00012 * Mark Schroeder
00013 * Maria D. Chikina
00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact)
00015 *
00016 * If you use this library, the included executable tools, or any related
00017 * code in your work, please cite the following publication:
00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and
00019 * Olga G. Troyanskaya.
00020 * "The Sleipnir library for computational functional genomics"
00021 *****************************************************************************/
00022 #include "stdafx.h"
00023 #include "meta.h"
00024 
00025 namespace Sleipnir {
00026 
00027 const char  CMeta::c_szWS[]     = " \t\r\n";
00028 
00029 CMeta::CMeta( int iVerbosity, size_t iRandomSeed ) {
00030 #ifndef USE_LOG4CPP_STUB
00031     OstreamAppender*    pAppOstm    = new OstreamAppender( "cerr", &cerr );
00032 #endif // USE_LOG4CPP_STUB
00033 
00034     srand( ( iRandomSeed == -1 ) ?
00035 #ifdef _MSC_VER
00036         GetTickCount( )
00037 #else
00038         time( NULL )
00039 #endif // _MSC_VER
00040         : iRandomSeed );
00041 #ifndef USE_LOG4CPP_STUB
00042     pAppOstm->setLayout( new BasicLayout( ) );
00043     g_CatSleipnir( ).setAdditivity( false );
00044     g_CatSleipnir( ).setAppender( pAppOstm );
00045     g_CatSleipnir( ).setPriority( iVerbosity * Priority::ALERT );
00046 #endif // USE_LOG4CPP_STUB
00047 }
00048 
00049 CMeta::~CMeta( ) {
00050 
00051     Category::shutdown( ); }
00052 
00070 string CMeta::Filename( const std::string& strString, char cReplacement ) {
00071     size_t  i;
00072     string  strRet;
00073     char    c;
00074 
00075     for( i = 0; i < strString.length( ); ++i )
00076         strRet += isalnum( c = strString[ i ] ) ? c : cReplacement;
00077 
00078     return strRet; }
00079 
00096 void CMeta::Tokenize( const char* szString, std::vector<std::string>& vecstrTokens, const char* szSeparators,
00097     bool fNoEmpties ) {
00098     const char* pc;
00099     string      strCur;
00100     bool        fPush;
00101 
00102     if( !( pc = szString ) )
00103         return;
00104 
00105     fPush = false;
00106     while( true ) {
00107         strCur.clear( );
00108         if( fNoEmpties )
00109             for( ; *pc && strchr( szSeparators, *pc ); ++pc );
00110         if( !*pc ) {
00111             if( !fNoEmpties && fPush )
00112                 vecstrTokens.push_back( strCur );
00113             return; }
00114         for( ; *pc && !strchr( szSeparators, *pc ); ++pc )
00115             strCur += *pc;
00116         if( fPush = !!*pc )
00117             pc++;
00118         vecstrTokens.push_back( strCur ); } }
00119 
00134 string CMeta::Basename( const char* szPath ) {
00135     const char* pchOne;
00136     const char* pchTwo;
00137 
00138     if( pchOne = strrchr( szPath, '\\' ) )
00139         pchOne++;
00140     if( pchTwo = strrchr( szPath, '/' ) )
00141         pchTwo++;
00142 
00143     return ( pchOne ? ( pchTwo ? max( pchOne, pchTwo ) : pchOne ) :
00144         ( pchTwo ? pchTwo : szPath ) ); }
00145 
00156 string CMeta::Trim( const char* szString ) {
00157     size_t  iBeg, iEnd, iLen;
00158 
00159     if( !szString || !( iLen = strlen( szString ) ) )
00160         return "";
00161 
00162     for( iBeg = 0; szString[ iBeg ]; ++iBeg )
00163         if( !isspace( szString[ iBeg ] ) )
00164             break;
00165     for( iEnd = 0; szString[ iLen - iEnd - 1 ]; ++iEnd )
00166         if( !isspace( szString[ iLen - iEnd - 1 ] ) )
00167             break;
00168 
00169     return string( szString + iBeg, iLen - iBeg - iEnd ); }
00170 
00199 bool CMeta::MapRead( unsigned char*& pbData, HANDLE& hndlMap, size_t& iSize, const char* szFile ) {
00200 
00201     Unmap( pbData, hndlMap, iSize );
00202 #ifdef _MSC_VER
00203     HANDLE  hndlFile;
00204 
00205     if( !( hndlFile = CreateFile( szFile, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING,
00206         FILE_ATTRIBUTE_READONLY, NULL ) ) )
00207         return false;
00208     if( !( hndlMap = CreateFileMapping( hndlFile, NULL, PAGE_READONLY, 0,
00209         (DWORD)( iSize = GetFileSize( hndlFile, NULL ) ), szFile ) ) ) {
00210         CloseHandle( hndlFile );
00211         return false; }
00212     CloseHandle( hndlFile );
00213 
00214     if( !( pbData = (unsigned char*)MapViewOfFile( hndlMap, FILE_MAP_READ, 0, 0, 0 ) ) ) {
00215         CloseHandle( hndlMap );
00216         return false; }
00217 #else // _MSC_VER
00218     int         iFile;
00219     struct stat sStat;
00220 
00221     if( !( iFile = open( szFile, O_RDONLY ) ) )
00222         return false;
00223     fstat( iFile, &sStat );
00224     iSize = sStat.st_size;
00225 
00226     if( ( pbData = (unsigned char*)mmap( NULL, iSize, PROT_READ, MAP_SHARED, iFile, 0 ) ) == MAP_FAILED ) {
00227         g_CatSleipnir( ).error( "CMeta::MapRead( %s ) %s", szFile, strerror( errno ) );
00228         pbData = NULL;
00229         close( iFile );
00230         return false; }
00231     close( iFile );
00232 #endif // _MSC_VER
00233 
00234     return true; }
00235 
00267 bool CMeta::MapWrite( unsigned char*& pbData, HANDLE& hndlMap, size_t iSize, const char* szFile ) {
00268 
00269     Unmap( pbData, hndlMap, iSize );
00270 #ifdef _MSC_VER
00271     HANDLE  hndlFile;
00272 
00273     if( !( hndlFile = CreateFile( szFile, GENERIC_READ | GENERIC_WRITE, 0, NULL, CREATE_ALWAYS,
00274         FILE_ATTRIBUTE_NORMAL, NULL ) ) )
00275         return false;
00276     if( !( hndlMap = CreateFileMapping( hndlFile, NULL, PAGE_READWRITE, 0, (DWORD)iSize, NULL ) ) ) {
00277         CloseHandle( hndlFile );
00278         return false; }
00279     CloseHandle( hndlFile );
00280 
00281     if( !( pbData = (unsigned char*)MapViewOfFile( hndlMap, FILE_MAP_WRITE, 0, 0, iSize ) ) ) {
00282         CloseHandle( hndlMap );
00283         return false; }
00284 #else // _MSC_VER
00285     int         iFile;
00286     struct stat sStat;
00287 
00288     if( !( iFile = open( szFile, O_RDWR | O_CREAT | O_TRUNC, S_IREAD | S_IWRITE | S_IRGRP | S_IWGRP | S_IROTH ) ) )
00289         return false;
00290     lseek( iFile, iSize - 1, SEEK_SET );
00291     write( iFile, &iSize, 1 );
00292     if( ( pbData = (unsigned char*)mmap( NULL, iSize, PROT_READ | PROT_WRITE, MAP_SHARED, iFile, 0 ) ) == MAP_FAILED ) {
00293         g_CatSleipnir( ).error( "CMeta::MapWrite( %s ) %s", szFile, strerror( errno ) );
00294         pbData = NULL;
00295         close( iFile );
00296         return false; }
00297     close( iFile );
00298 #endif // _MSC_VER
00299 
00300     return true; }
00301 
00318 void CMeta::Unmap( const unsigned char* pbData, HANDLE hndlMap, size_t iSize ) {
00319 
00320 #ifdef _MSC_VER
00321     if( pbData )
00322         UnmapViewOfFile( pbData );
00323     if( hndlMap )
00324         CloseHandle( hndlMap );
00325 #else // _MSC_VER
00326     if( pbData )
00327         munmap( (void*)pbData, iSize );
00328 #endif // _MSC_VER
00329 }
00330 
00342 size_t CMeta::GetMemoryUsage( ) {
00343 #if defined(_MSC_VER)
00344 #if 0
00345     PROCESS_MEMORY_COUNTERS sMem;
00346 
00347     if( !GetProcessMemoryInfo( GetCurrentProcess( ), &sMem, sizeof(sMem) ) )
00348         return -1;
00349     return sMem.WorkingSetSize;
00350 #endif // 0
00351     return -1;
00352 #else // defined(_MSC_VER)
00353     ifstream            ifsm;
00354     char                acBuffer[ 1024 ];
00355     size_t              iRet;
00356 
00357     sprintf( acBuffer, "/proc/%d/statm", getpid( ) );
00358     ifsm.open( acBuffer );
00359     if( !ifsm.is_open( ) )
00360         return -1;
00361     ifsm >> iRet;
00362     ifsm >> iRet;
00363     return ( iRet * 4096 );
00364 #endif // defined(_MSC_VER)
00365 }
00366 
00367 }