int length = -1;
int step = 1;
boolean realign = false;
- boolean norm = true;
+ boolean normalize_for_effective_seq_length = true;
String path_to_mafft = null;
int step_for_diagnostics = 1;
int min_length = -1;
msa = DeleteableMsa.createInstance( FastaParser.parseMsa( is ) );
}
else {
- msa = DeleteableMsa.createInstance( GeneralMsaParser.parse( is ) );
+ msa = DeleteableMsa.createInstance( GeneralMsaParser.parseMsa( is ) );
}
final DescriptiveStatistics initial_msa_stats = MsaMethods.calculateEffectiveLengthStatistics( msa );
if (cla.isOptionSet( INFO_ONLY_OPTION ) ) {
path_to_mafft = cla.getOptionValueAsCleanString( PATH_TO_MAFFT_OPTION );
}
if ( cla.isOptionSet( DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION ) ) {
- norm = false;
+ normalize_for_effective_seq_length = false;
}
if ( cla.isOptionSet( STEP_FOR_DIAGNOSTICS_OPTION ) ) {
step_for_diagnostics = cla.getOptionValueAsInt( STEP_FOR_DIAGNOSTICS_OPTION );
}
System.out.println( "Step for diagnostics reports : " + step_for_diagnostics );
System.out.println( "Calculate normalized Shannon Entropy : " + report_entropy );
- if ( !norm ) {
- System.out.println( "Normalize : " + norm );
+ if ( normalize_for_effective_seq_length ) {
+ System.out.println( "Normalize : with individual, effective sequence lenghts" );
+ }
+ else {
+ System.out.println( "Normalize : with MSA length" );
}
System.out.println( "Realign with MAFFT : " + realign );
if ( realign ) {
if ( removed_seqs_out_base != null ) {
mc.setRemovedSeqsOutBase( removed_seqs_out_base );
}
- mc.setNorm( norm );
+ mc.setNorm( normalize_for_effective_seq_length );
mc.setRealign( realign );
if ( realign ) {
mc.setPathToMafft( path_to_mafft );
msa_props = mc.removeViaLength( length );
}
else {
- msa_props = mc.chart( step, realign, norm );
+ msa_props = mc.chart( step, realign, normalize_for_effective_seq_length );
}
Chart.display( msa_props, initial_number_of_seqs, report_entropy, in.getName() );
+ System.out.println();
+ System.out.println( "Final MSA properties" );
+ printMsaInfo( msa, MsaMethods.calculateEffectiveLengthStatistics( msa ));
}
}
catch ( final IllegalArgumentException iae ) {
WWW,
ForesterUtil.getForesterLibraryInformation() );
System.out.println( "Input MSA : " + in );
- System.out.println( " MSA length : " + msa.getLength() );
- System.out.println( " Number of sequences : " + msa.getNumberOfSequences() );
- System.out.println( " Median sequence length : " + NF_1.format( initial_msa_stats.median() ) );
- System.out.println( " Mean sequence length : "
- + NF_1.format( initial_msa_stats.arithmeticMean() ) );
- System.out.println( " Max sequence length : " + ( ( int ) initial_msa_stats.getMax() ) );
- System.out.println( " Min sequence length : " + ( ( int ) initial_msa_stats.getMin() ) );
- System.out.println( " Gap ratio : "
+ printMsaInfo( msa, initial_msa_stats );
+ }
+
+ private static void printMsaInfo( DeleteableMsa msa, final DescriptiveStatistics msa_stats ) {
+ System.out.println( "MSA length : " + msa.getLength() );
+ System.out.println( "Number of sequences : " + msa.getNumberOfSequences() );
+ System.out.println( "Median sequence length : " + NF_1.format( msa_stats.median() ) );
+ System.out.println( "Mean sequence length : "
+ + NF_1.format( msa_stats.arithmeticMean() ) );
+ System.out.println( "Max sequence length : " + ( ( int ) msa_stats.getMax() ) );
+ System.out.println( "Min sequence length : " + ( ( int ) msa_stats.getMin() ) );
+ System.out.println( "Gap ratio : "
+ NF_4.format( MsaMethods.calcGapRatio( msa ) ) );
- System.out.println( " Mean gap count per 100 residues : "
- + NF_1.format( MsaMethods.calcNumberOfGapsPer100Stats( msa ).arithmeticMean() ) );
- System.out.println( " Normalized Shannon Entropy (entn7) : "
+ System.out.println( "Mean gap count per sequence : "
+ + NF_1.format( MsaMethods.calcNumberOfGapsStats( msa ).arithmeticMean() ) );
+ System.out.println( "Normalized Shannon Entropy (entn7) : "
+ NF_4.format( MsaMethods.calcNormalizedShannonsEntropy( 7, msa ) ) );
- System.out.println( " Normalized Shannon Entropy (entn21): "
+ System.out.println( "Normalized Shannon Entropy (entn21) : "
+ NF_4.format( MsaMethods.calcNormalizedShannonsEntropy( 21, msa ) ) );
}
+ "=<decimal> maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)" );
System.out.println( " -" + PERFORM_PHYLOGENETIC_INFERENCE
+ " to calculate a simple phylogenetic tree (Kimura distances, NJ)" );
+ System.out.println( " -" + DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION
+ + " to normalize gap-contributions with MSA length, instead of individual effective sequence lenghts" );
+
System.out.println();
System.out.println();
System.out.println();