WWW,
ForesterUtil.getForesterLibraryInformation() );
System.out.println( "Input MSA : " + in );
+ printMsaInfo( msa, initial_msa_stats );
+ }
+
+ private static void printMsaInfo( DeleteableMsa msa, final DescriptiveStatistics msa_stats ) {
System.out.println( "MSA length : " + msa.getLength() );
System.out.println( "Number of sequences : " + msa.getNumberOfSequences() );
- System.out.println( "Median sequence length : " + NF_1.format( initial_msa_stats.median() ) );
+ System.out.println( "Median sequence length : " + NF_1.format( msa_stats.median() ) );
System.out.println( "Mean sequence length : "
- + NF_1.format( initial_msa_stats.arithmeticMean() ) );
- System.out.println( "Max sequence length : " + ( ( int ) initial_msa_stats.getMax() ) );
- System.out.println( "Min sequence length : " + ( ( int ) initial_msa_stats.getMin() ) );
+ + NF_1.format( msa_stats.arithmeticMean() ) );
+ System.out.println( "Max sequence length : " + ( ( int ) msa_stats.getMax() ) );
+ System.out.println( "Min sequence length : " + ( ( int ) msa_stats.getMin() ) );
System.out.println( "Gap ratio : "
+ NF_4.format( MsaMethods.calcGapRatio( msa ) ) );
System.out.println( "Mean gap count per 100 residues : "
- + NF_1.format( MsaMethods.calcNumberOfGapsPer100Stats( msa ).arithmeticMean() ) );
+ + NF_1.format( MsaMethods.calcNumberOfGapsStats( msa ).arithmeticMean() ) );
System.out.println( "Normalized Shannon Entropy (entn7) : "
+ NF_4.format( MsaMethods.calcNormalizedShannonsEntropy( 7, msa ) ) );
System.out.println( "Normalized Shannon Entropy (entn21): "
import java.awt.BorderLayout;
import java.awt.event.ActionListener;
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
import java.util.List;
import javax.swing.JDialog;
public final class Chart extends JDialog implements ActionListener {
+ final private static NumberFormat NF_1 = new DecimalFormat( "0.##" );
private static final long serialVersionUID = -5292420246132943515L;
private ChartPanel _chart_panel = null;
private final int _initial_number_of_seqs;
final MultiScatterDataModel model = new MultiScatterDataModel();
final double[][] seqs_length = new double[ _msa_props.size() ][ 2 ];
int max_length = -1;
+ int min_length = Integer.MAX_VALUE;
+ double max_gap_ratio = -1;
+ double min_gap_ratio = Double.MAX_VALUE;
+ double max_avg_gap_count = -1;
+ double min_avg_gap_count = Double.MAX_VALUE;
for( int i = 0; i < _msa_props.size(); ++i ) {
seqs_length[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences();
- seqs_length[ i ][ 1 ] = _msa_props.get( i ).getLength();
- if ( _msa_props.get( i ).getLength() > max_length ) {
- max_length = _msa_props.get( i ).getLength();
+ //
+ final int length = _msa_props.get( i ).getLength();
+ seqs_length[ i ][ 1 ] = length;
+ if ( length > max_length ) {
+ max_length = length;
+ }
+ if ( length < min_length ) {
+ min_length = length;
+ }
+ //
+ final double gap_ratio = _msa_props.get( i ).getGapRatio();
+ if ( gap_ratio > max_gap_ratio ) {
+ max_gap_ratio = gap_ratio;
+ }
+ if ( gap_ratio < min_gap_ratio ) {
+ min_gap_ratio = gap_ratio;
+ }
+ //
+ final double avg_gap_count = _msa_props.get( i ).getAvgNumberOfGaps();
+ if ( avg_gap_count > max_avg_gap_count ) {
+ max_avg_gap_count = avg_gap_count;
+ }
+ if ( avg_gap_count < min_avg_gap_count ) {
+ min_avg_gap_count = avg_gap_count;
}
}
- model.addData( seqs_length, "Length" );
+ model.addData( seqs_length, "Length" + " (" + minMaxToString( min_length, max_length ) + ")" );
model.setSeriesLine( "Series " + "Length", true );
model.setSeriesMarker( "Series " + "Length", false );
final double[][] seqs_gaps = new double[ _msa_props.size() ][ 2 ];
- double max_gap_ratio = -1;
double max_ent7 = -1;
double max_ent21 = -1;
- for( int i = 0; i < _msa_props.size(); ++i ) {
- if ( _msa_props.get( i ).getGapRatio() > max_gap_ratio ) {
- max_gap_ratio = _msa_props.get( i ).getGapRatio();
- }
- if ( _show_msa_qual ) {
- if ( _msa_props.get( i ).getEntropy7() > max_ent7 ) {
- max_ent7 = _msa_props.get( i ).getEntropy7();
+ double min_ent7 = Double.MAX_VALUE;
+ double min_ent21 = Double.MAX_VALUE;
+ if ( _show_msa_qual ) {
+ for( int i = 0; i < _msa_props.size(); ++i ) {
+ final double ent7 = _msa_props.get( i ).getEntropy7();
+ if ( ent7 > max_ent7 ) {
+ max_ent7 = ent7;
+ }
+ if ( ent7 < max_ent7 ) {
+ min_ent7 = ent7;
}
- if ( _msa_props.get( i ).getEntropy21() > max_ent21 ) {
- max_ent21 = _msa_props.get( i ).getEntropy21();
+ final double ent21 = _msa_props.get( i ).getEntropy21();
+ if ( ent21 > min_ent21 ) {
+ max_ent21 = ent21;
+ }
+ if ( ent21 < min_ent21 ) {
+ min_ent21 = ent21;
}
}
}
final double gap_ratio_factor = ( max_length / 2.0 ) / max_gap_ratio;
+ final double avg_gaps_counts_factor = ( max_length / 2.0 ) / max_avg_gap_count;
final double ent7_factor = ( max_length / 2.0 ) / max_ent7;
final double ent21_factor = ( max_length / 2.0 ) / max_ent21;
for( int i = 0; i < _msa_props.size(); ++i ) {
seqs_gaps[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences();
seqs_gaps[ i ][ 1 ] = ForesterUtil.roundToInt( _msa_props.get( i ).getGapRatio() * gap_ratio_factor );
}
- model.addData( seqs_gaps, "Gap ratio" );
- model.setSeriesLine( "Series " + "Gap ratio", true );
- model.setSeriesMarker( "Series " + "Gap ratio", false );
+ model.addData( seqs_gaps, "Gap Ratio" + " (" + minMaxToString( min_gap_ratio, max_gap_ratio ) + ")" );
+ model.setSeriesLine( "Series " + "Gap Ratio", true );
+ model.setSeriesMarker( "Series " + "Gap Ratio", false );
+ final double[][] gap_counts = new double[ _msa_props.size() ][ 2 ];
+ for( int i = 0; i < _msa_props.size(); ++i ) {
+ gap_counts[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences();
+ gap_counts[ i ][ 1 ] = ForesterUtil.roundToInt( _msa_props.get( i ).getAvgNumberOfGaps()
+ * avg_gaps_counts_factor );
+ }
+ model.addData( gap_counts, "Mean Gap Count" + " (" + minMaxToString( min_avg_gap_count, max_avg_gap_count )
+ + ")" );
+ model.setSeriesLine( "Series " + "Mean Gap Count", true );
+ model.setSeriesMarker( "Series " + "Mean Gap Count", false );
if ( _show_msa_qual ) {
final double[][] entropy7 = new double[ _msa_props.size() ][ 2 ];
for( int i = 0; i < _msa_props.size(); ++i ) {
entropy7[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences();
entropy7[ i ][ 1 ] = ForesterUtil.roundToInt( _msa_props.get( i ).getEntropy7() * ent7_factor );
}
- model.addData( entropy7, "Entropy norm 7" );
+ model.addData( entropy7, "Entropy norm 7" + " (" + minMaxToString( min_ent7, max_ent7 ) + ")" );
model.setSeriesLine( "Series " + "Entropy norm 7", true );
model.setSeriesMarker( "Series " + "Entropy norm 7", false );
//
entropy21[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences();
entropy21[ i ][ 1 ] = ForesterUtil.roundToInt( _msa_props.get( i ).getEntropy21() * ent21_factor );
}
- model.addData( entropy21, "Entropy norm 21" );
+ model.addData( entropy21, "Entropy norm 21" + " (" + minMaxToString( min_ent21, max_ent21 ) + ")" );
model.setSeriesLine( "Series " + "Entropy norm 21", true );
model.setSeriesMarker( "Series " + "Entropy norm 21", false );
}
return _chart_panel;
}
+ private final static String minMaxToString( final double min, final double max ) {
+ return NF_1.format( min ) + "-" + NF_1.format( max );
+ }
+
public static void display( final List<MsaProperties> msa_props,
final int initial_number_of_seqs,
final boolean show_msa_qual,
}
else {
n.getNodeData()
- .getNodeVisualData()
- .setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(),
- min,
- max,
- mean,
- min_color,
- max_color,
- mean_color ) );
+ .getNodeVisualData()
+ .setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(),
+ min,
+ max,
+ mean,
+ min_color,
+ max_color,
+ mean_color ) );
}
}
}
config.setDisplaySequenceNames( false );
config.setDisplaySequenceSymbols( false );
config.setDisplayGeneNames( false );
+ config.setDisplayMultipleSequenceAlignment( true );
config.setShowScale( true );
config.setAddTaxonomyImagesCB( false );
config.setBaseFontSize( 9 );
}
public final List<MsaProperties> removeViaGapAverage( final double mean_gapiness ) throws IOException,
- InterruptedException {
+ InterruptedException {
final GapContribution stats[] = calcGapContribtionsStats( _norm );
final List<String> to_remove_ids = new ArrayList<String>();
final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
}
public final List<MsaProperties> removeWorstOffenders( final int to_remove ) throws IOException,
- InterruptedException {
+ InterruptedException {
final GapContribution stats[] = calcGapContribtionsStats( _norm );
final List<String> to_remove_ids = new ArrayList<String>();
final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
sb.append( "\t" );
sb.append( NF_4.format( msa_properties.getGapRatio() ) );
sb.append( "\t" );
- sb.append( NF_1.format( msa_properties.getAvgNumberOfGapsPer100() ) );
+ sb.append( NF_1.format( msa_properties.getAvgNumberOfGaps() ) );
if ( _calculate_shannon_entropy ) {
sb.append( "\t" );
sb.append( NF_4.format( msa_properties.getEntropy7() ) );
final private double _gap_ratio;
final private int _length;
final private int _number_of_sequences;
- final private double _avg_number_of_gaps_per_100;
+ final private double _avg_number_of_gaps;
final private String _removed_seq;
public MsaProperties( final int number_of_sequences,
final double gap_ratio,
final double entropy7,
final double entropy21,
- final double avg_number_of_gaps_per_100,
+ final double avg_number_of_gaps,
final String removed_seq ) {
_number_of_sequences = number_of_sequences;
_length = length;
_gap_ratio = gap_ratio;
_entropy7 = entropy7;
_entropy21 = entropy21;
- _avg_number_of_gaps_per_100 = avg_number_of_gaps_per_100;
+ _avg_number_of_gaps = avg_number_of_gaps;
_removed_seq = removed_seq;
}
_length = msa.getLength();
_gap_ratio = MsaMethods.calcGapRatio( msa );
_removed_seq = removed_seq;
- _avg_number_of_gaps_per_100 = MsaMethods.calcNumberOfGapsPer100Stats( msa ).arithmeticMean();
+ _avg_number_of_gaps = MsaMethods.calcNumberOfGapsStats( msa ).arithmeticMean();
if ( calculate_normalized_shannon_entropy ) {
_entropy7 = MsaMethods.calcNormalizedShannonsEntropy( 7, msa );
_entropy21 = MsaMethods.calcNormalizedShannonsEntropy( 21, msa );
return _gap_ratio;
}
- public final double getAvgNumberOfGapsPer100() {
- return _avg_number_of_gaps_per_100;
+ public final double getAvgNumberOfGaps() {
+ return _avg_number_of_gaps;
}
public final int getLength() {