Sleipnir
|
00001 /***************************************************************************** 00002 * This file is provided under the Creative Commons Attribution 3.0 license. 00003 * 00004 * You are free to share, copy, distribute, transmit, or adapt this work 00005 * PROVIDED THAT you attribute the work to the authors listed below. 00006 * For more information, please see the following web page: 00007 * http://creativecommons.org/licenses/by/3.0/ 00008 * 00009 * This file is a component of the Sleipnir library for functional genomics, 00010 * authored by: 00011 * Curtis Huttenhower (chuttenh@princeton.edu) 00012 * Mark Schroeder 00013 * Maria D. Chikina 00014 * Olga G. Troyanskaya (ogt@princeton.edu, primary contact) 00015 * 00016 * If you use this library, the included executable tools, or any related 00017 * code in your work, please cite the following publication: 00018 * Curtis Huttenhower, Mark Schroeder, Maria D. Chikina, and 00019 * Olga G. Troyanskaya. 00020 * "The Sleipnir library for computational functional genomics" 00021 *****************************************************************************/ 00022 #include "stdafx.h" 00023 #include "cmdline.h" 00024 00025 int main( int iArgs, char** aszArgs ) { 00026 gengetopt_args_info sArgs; 00027 CDatasetCompact Data; 00028 CDataset DataContinuous; 00029 IDataset* pData; 00030 CGenome Genome; 00031 CGenes GenesIn( Genome ), GenesEx( Genome ); 00032 ifstream ifsm; 00033 size_t i, j, k, iOne, iTwo; 00034 float d; 00035 00036 if( cmdline_parser( iArgs, aszArgs, &sArgs ) ) { 00037 cmdline_parser_print_help( ); 00038 return 1; } 00039 CMeta Meta( sArgs.verbosity_arg ); 00040 00041 if( sArgs.load_arg ) { 00042 CDatasetCompactMap DataMap; 00043 00044 if( !DataMap.Open( sArgs.load_arg ) ) { 00045 cerr << "Couldn't open: " << sArgs.load_arg << endl; 00046 return 1; } 00047 #ifdef _MSC_VER 00048 Sleep( INFINITE ); 00049 #else // _MSC_VER 00050 sleep( -1 ); 00051 #endif // _MSC_VER 00052 return 0; } 00053 00054 if( sArgs.genes_arg ) { 00055 ifsm.open( sArgs.genes_arg ); 00056 if( !GenesIn.Open( ifsm ) ) { 00057 cerr << "Couldn't open: " << sArgs.genes_arg << endl; 00058 return 1; } 00059 ifsm.close( ); } 00060 if( sArgs.genex_arg ) { 00061 ifsm.clear( ); 00062 ifsm.open( sArgs.genex_arg ); 00063 if( !GenesEx.Open( ifsm ) ) { 00064 cerr << "Couldn't open: " << sArgs.genex_arg << endl; 00065 return 1; } 00066 ifsm.close( ); } 00067 00068 if( sArgs.input_arg ) { 00069 ifsm.clear( ); 00070 ifsm.open( sArgs.input_arg, ios_base::binary ); 00071 if( !Data.Open( ifsm ) ) { 00072 cerr << "Couldn't open: " << sArgs.input_arg << endl; 00073 return 1; } 00074 ifsm.close( ); } 00075 else if( sArgs.network_arg ) { 00076 CDataPair Answers; 00077 CBayesNetSmile BNSmile; 00078 00079 if( !sArgs.answers_arg ) { 00080 cmdline_parser_print_help( ); 00081 return 1; } 00082 if( !BNSmile.Open( sArgs.network_arg ) ) { 00083 cerr << "Couldn't open: " << sArgs.network_arg << endl; 00084 return 1; } 00085 if( !Answers.Open( sArgs.answers_arg, false ) ) { 00086 cerr << "Couldn't open: " << sArgs.answers_arg << endl; 00087 return 1; } 00088 if( !Data.Open( Answers, sArgs.directory_arg, &BNSmile, GenesIn, GenesEx, !!sArgs.everything_flag ) ) { 00089 cerr << "Couldn't open: " << sArgs.directory_arg << endl; 00090 return 1; } } 00091 else if( sArgs.lookupp_arg ) { 00092 CDat DatLookup; 00093 vector<size_t> veciGenes; 00094 CPCL PCLLookup; 00095 vector<string> vecstrNames, vecstrExperiments, vecstrDummy; 00096 00097 ifsm.clear( ); 00098 ifsm.open( sArgs.lookupp_arg ); 00099 if( !DatLookup.Open( ifsm, CDat::EFormatText, 1 ) ) { 00100 cerr << "Couldn't open: " << sArgs.lookupp_arg << endl; 00101 return 1; } 00102 ifsm.close( ); 00103 00104 for( i = 0; i < DatLookup.GetGenes( ); ++i ) 00105 for( j = ( i + 1 ); j < DatLookup.GetGenes( ); ++j ) 00106 if( !CMeta::IsNaN( DatLookup.Get( i, j ) ) ) 00107 vecstrNames.push_back( DatLookup.GetGene( i ) + " - " + DatLookup.GetGene( j ) ); 00108 vecstrExperiments.resize( sArgs.inputs_num ); 00109 copy( sArgs.inputs, sArgs.inputs + sArgs.inputs_num, vecstrExperiments.begin( ) ); 00110 PCLLookup.Open( vecstrNames, vecstrExperiments, vecstrDummy ); 00111 00112 veciGenes.resize( DatLookup.GetGenes( ) ); 00113 for( i = 0; i < sArgs.inputs_num; ++i ) { 00114 CDataPair Dat; 00115 size_t iGene; 00116 00117 if( !Dat.Open( sArgs.inputs[ i ], false, !!sArgs.memmap_flag ) ) { 00118 cerr << "Couldn't open: " << sArgs.inputs[ i ] << endl; 00119 return 1; } 00120 for( j = 0; j < veciGenes.size( ); ++j ) 00121 veciGenes[ j ] = Dat.GetGene( DatLookup.GetGene( j ) ); 00122 for( iGene = j = 0; j < DatLookup.GetGenes( ); ++j ) { 00123 iOne = veciGenes[ j ]; 00124 for( k = ( j + 1 ); k < DatLookup.GetGenes( ); ++k ) { 00125 if( CMeta::IsNaN( DatLookup.Get( j, k ) ) ) 00126 continue; 00127 iTwo = veciGenes[ k ]; 00128 if( ( iOne != -1 ) && ( iTwo != -1 ) ) 00129 PCLLookup.Set( iGene, i, Dat.Get( iOne, iTwo ) ); 00130 iGene++; } } } 00131 PCLLookup.Save( cout ); 00132 return 0; } 00133 else if( sArgs.lookups_arg ) { 00134 CGenes GenesLk( Genome ); 00135 vector<size_t> veciGenes; 00136 00137 ifsm.clear( ); 00138 ifsm.open( sArgs.lookups_arg ); 00139 if( !GenesLk.Open( ifsm ) ) { 00140 cerr << "Couldn't open: " << sArgs.lookups_arg << endl; 00141 return 1; } 00142 ifsm.close( ); 00143 00144 cout << "GID"; 00145 if( sArgs.lookup1_arg ) 00146 for( i = 0; i < GenesLk.GetGenes( ); ++i ) 00147 cout << '\t' << GenesLk.GetGene( i ).GetName( ); 00148 else 00149 for( i = 0; i < GenesLk.GetGenes( ); ++i ) { 00150 const string& strOne = GenesLk.GetGene( i ).GetName( ); 00151 for( j = ( i + 1 ); j < GenesLk.GetGenes( ); ++j ) 00152 cout << '\t' << strOne << '-' << GenesLk.GetGene( j ).GetName( ); } 00153 cout << endl; 00154 00155 veciGenes.resize( GenesLk.GetGenes( ) ); 00156 for( i = 0; i < sArgs.inputs_num; ++i ) { 00157 CDataPair Dat; 00158 00159 if( !Dat.Open( sArgs.inputs[ i ], false, !!sArgs.memmap_flag ) ) { 00160 cerr << "Couldn't open: " << sArgs.inputs[ i ] << endl; 00161 return 1; } 00162 for( j = 0; j < GenesLk.GetGenes( ); ++j ) 00163 veciGenes[ j ] = Dat.GetGene( GenesLk.GetGene( j ).GetName( ) ); 00164 cout << sArgs.inputs[ i ]; 00165 if( sArgs.lookup1_arg ) { 00166 if( ( iOne = Dat.GetGene( sArgs.lookup1_arg ) ) != -1 ) 00167 for( j = 0; j < veciGenes.size( ); ++j ) { 00168 cout << '\t'; 00169 if( ( ( iTwo = veciGenes[ j ] ) != -1 ) && 00170 !CMeta::IsNaN( d = Dat.Get( iOne, iTwo ) ) ) 00171 cout << d; } } 00172 else 00173 for( j = 0; j < veciGenes.size( ); ++j ) { 00174 iOne = veciGenes[ j ]; 00175 for( k = ( j + 1 ); k < veciGenes.size( ); ++k ) { 00176 cout << '\t'; 00177 if( ( iOne != -1 ) && ( ( iTwo = veciGenes[ k ] ) != -1 ) && 00178 !CMeta::IsNaN( d = Dat.Get( iOne, iTwo ) ) ) 00179 cout << Dat.Get( iOne, iTwo ); } } 00180 cout << endl; } 00181 return 0; } 00182 else if( sArgs.lookup1_arg && sArgs.lookup2_arg ) { 00183 for( i = 0; i < sArgs.inputs_num; ++i ) { 00184 CDataPair Dat; 00185 00186 if( !Dat.Open( sArgs.inputs[ i ], false, !!sArgs.memmap_flag ) ) { 00187 cerr << "Couldn't open: " << sArgs.inputs[ i ] << endl; 00188 return 1; } 00189 cout << sArgs.inputs[ i ]; 00190 if( ( ( iOne = Dat.GetGene( sArgs.lookup1_arg ) ) != -1 ) && 00191 ( ( iTwo = Dat.GetGene( sArgs.lookup2_arg ) ) != -1 ) && 00192 !CMeta::IsNaN( d = Dat.Get( iOne, iTwo ) ) ) 00193 cout << '\t' << ( sArgs.quantize_flag ? Dat.Quantize( d ) : d ); 00194 cout << endl; } 00195 return 0; } 00196 else if( sArgs.paircount_arg != -1 ) { 00197 CHalfMatrix<size_t> DatCounts; 00198 00199 if( !GenesIn.GetGenes( ) ) { 00200 cerr << "Pair count requires gene list -g" << endl; 00201 return 1; } 00202 DatCounts.Initialize( GenesIn.GetGenes( ) ); 00203 DatCounts.Clear( ); 00204 for( i = 0; i < sArgs.inputs_num; ++i ) { 00205 CDat Dat; 00206 vector<size_t> veciGenes; 00207 00208 if( !Dat.Open( sArgs.inputs[ i ], !!sArgs.memmap_flag ) ) { 00209 cerr << "Couldn't open: " << sArgs.inputs[ i ] << endl; 00210 return 1; } 00211 veciGenes.resize( Dat.GetGenes( ) ); 00212 for( j = 0; j < veciGenes.size( ); ++j ) 00213 veciGenes[ j ] = GenesIn.GetGene( Dat.GetGene( j ) ); 00214 for( j = 0; j < veciGenes.size( ); ++j ) { 00215 if( ( iOne = veciGenes[ j ] ) == -1 ) 00216 continue; 00217 for( k = ( j + 1 ); k < veciGenes.size( ); ++k ) 00218 if( ( ( iTwo = veciGenes[ k ] ) != -1 ) && !CMeta::IsNaN( Dat.Get( j, k ) ) ) 00219 DatCounts.Get( iOne, iTwo )++; } } 00220 for( i = 0; i < DatCounts.GetSize( ); ++i ) 00221 for( j = ( i + 1 ); j < DatCounts.GetSize( ); ++j ) 00222 if( DatCounts.Get( i, j ) > (size_t)sArgs.paircount_arg ) 00223 cout << GenesIn.GetGene( i ).GetName( ) << '\t' << GenesIn.GetGene( j ).GetName( ) << 00224 '\t' << DatCounts.Get( i, j ) << endl; } 00225 else { 00226 vector<string> vecstrFiles; 00227 bool fOpen; 00228 00229 vecstrFiles.resize( sArgs.inputs_num ); 00230 copy( sArgs.inputs, sArgs.inputs + sArgs.inputs_num, vecstrFiles.begin( ) ); 00231 fOpen = sArgs.continuous_flag ? DataContinuous.Open( vecstrFiles ) : Data.Open( vecstrFiles ); 00232 if( !fOpen ) { 00233 cerr << "Couldn't open inputs" << endl; 00234 return 1; } 00235 pData = sArgs.continuous_flag ? (IDataset*)&DataContinuous : (IDataset*)&Data; 00236 if( GenesIn.GetGenes( ) ) 00237 pData->FilterGenes( GenesIn, CDat::EFilterInclude ); 00238 if( GenesEx.GetGenes( ) ) 00239 pData->FilterGenes( GenesEx, CDat::EFilterExclude ); } 00240 00241 if( sArgs.mask_arg ) { 00242 CDat Mask; 00243 vector<int> veciGenes; 00244 00245 if( !Mask.Open( sArgs.mask_arg ) ) { 00246 cerr << "Couldn't open: " << sArgs.mask_arg << endl; 00247 return 1; } 00248 veciGenes.resize( pData->GetGenes( ) ); 00249 for( i = 0; i < pData->GetGenes( ); ++i ) 00250 veciGenes[ i ] = (int)Mask.GetGene( pData->GetGene( i ) ); 00251 for( i = 0; i < pData->GetGenes( ); ++i ) { 00252 if( ( iOne = veciGenes[ i ] ) == -1 ) { 00253 for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) 00254 pData->Remove( i, j ); 00255 continue; } 00256 for( j = ( i + 1 ); j < pData->GetGenes( ); ++j ) 00257 if( ( ( iTwo = veciGenes[ j ] ) == -1 ) || 00258 CMeta::IsNaN( d = Mask.Get( iOne, iTwo ) ) || ( d <= 0 ) ) 00259 pData->Remove( i, j ); } } 00260 00261 if( sArgs.output_arg ) { 00262 ofstream ofsm; 00263 00264 ofsm.open( sArgs.output_arg, ios_base::binary ); 00265 pData->Save( ofsm, true ); 00266 ofsm.close( ); } 00267 else 00268 pData->Save( cout, false ); 00269 00270 return 0; }