_ignored_seqs_ids = new ArrayList<String>();
}
+ public static final DescriptiveStatistics calcNumberOfGapsPer100Stats( final Msa msa ) {
+ final int[] gaps = calcNumberOfGapsInMsa( msa );
+ final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+ final double n = 100.0 / msa.getLength();
+ for( final int gap : gaps ) {
+ stats.addValue( n * gap );
+ }
+ return stats;
+ }
+
+ public static final int[] calcNumberOfGapsInMsa( final Msa msa ) {
+ final int seqs = msa.getNumberOfSequences();
+ final int[] gaps= new int[ seqs ];
+ for( int i = 0; i < seqs; ++i ) {
+ gaps[ i ] = calcNumberOfGaps( msa.getSequence( i ) );
+ }
+ return gaps;
+ }
+
+
+
+ public final static int calcNumberOfGaps( final MolecularSequence seq ) {
+ int gaps = 0;
+ boolean was_gap = false;
+ for( int i = 0; i < seq.getLength(); ++i ) {
+ if ( seq.isGapAt( i ) ) {
+ if ( !was_gap ) {
+ ++gaps;
+ was_gap = true;
+ }
+ }
+ else {
+ was_gap = false;
+ }
+ }
+ return gaps;
+ }
+
public static DescriptiveStatistics calcBasicGapinessStatistics( final Msa msa ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
for( int i = 0; i < msa.getLength(); ++i ) {
it.next().getNodeData().setNodeVisualData( vis );
}
}
-
- for( int i = 0; i < msa_props.size(); ++i ) {
- final MsaProperties msa_prop = msa_props.get( i );
- final String id = msa_prop.getRemovedSeq();
- if ( !ForesterUtil.isEmpty( id ) ) {
- final PhylogenyNode n = phy.getNode( id );
- n.setName( n.getName() + " [" + i + "]" );
- if ( !chart_only ) {
- final NodeVisualData vis = new NodeVisualData();
- vis.setFillType( NodeFill.SOLID );
- vis.setShape( NodeShape.RECTANGLE );
- vis.setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(), min, max, mean_color, max_color ) );
- n.getNodeData().setNodeVisualData( vis );
- }
- else {
- n.getNodeData()
- .getNodeVisualData()
- .setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(),
- min,
- max,
- mean,
- min_color,
- max_color,
- mean_color ) );
- }
- }
-
+ for( int i = 0; i < msa_props.size(); ++i ) {
+ final MsaProperties msa_prop = msa_props.get( i );
+ final String id = msa_prop.getRemovedSeq();
+ if ( !ForesterUtil.isEmpty( id ) ) {
+ final PhylogenyNode n = phy.getNode( id );
+ n.setName( n.getName() + " [" + i + "]" );
+ if ( !chart_only ) {
+ final NodeVisualData vis = new NodeVisualData();
+ vis.setFillType( NodeFill.SOLID );
+ vis.setShape( NodeShape.RECTANGLE );
+ vis.setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(), min, max, mean_color, max_color ) );
+ n.getNodeData().setNodeVisualData( vis );
+ }
+ else {
+ n.getNodeData()
+ .getNodeVisualData()
+ .setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(),
+ min,
+ max,
+ mean,
+ min_color,
+ max_color,
+ mean_color ) );
}
+ }
+ }
}
final public void deleteGapColumns( final double max_allowed_gap_ratio ) {
}
public final List<MsaProperties> removeViaGapAverage( final double mean_gapiness ) throws IOException,
- InterruptedException {
+ InterruptedException {
final GapContribution stats[] = calcGapContribtionsStats( _norm );
final List<String> to_remove_ids = new ArrayList<String>();
final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
System.out.println( msg );
}
if ( _phylogentic_inference ) {
- decorateTree( phy, msa_props, false );
+ decorateTree( phy, msa_props, false );
displayTree( phy );
}
return msa_props;
System.out.println( msg );
}
if ( _phylogentic_inference ) {
- decorateTree( phy, msa_props, false );
+ decorateTree( phy, msa_props, false );
displayTree( phy );
}
return msa_props;
}
public final List<MsaProperties> removeWorstOffenders( final int to_remove ) throws IOException,
- InterruptedException {
+ InterruptedException {
final GapContribution stats[] = calcGapContribtionsStats( _norm );
final List<String> to_remove_ids = new ArrayList<String>();
final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
System.out.println( msg );
}
if ( _phylogentic_inference ) {
- decorateTree( phy, msa_props, false );
+ decorateTree( phy, msa_props, false );
displayTree( phy );
-
-
- System.out.println( "calculating phylogentic tree..." );
- System.out.println();
- Phylogeny phy2 = calcTree();
- addSeqs2Tree( _msa, phy2 );
- displayTree( phy2 );
-
+ System.out.println( "calculating phylogentic tree..." );
+ System.out.println();
+ final Phylogeny phy2 = calcTree();
+ addSeqs2Tree( _msa, phy2 );
+ displayTree( phy2 );
}
return msa_props;
}
sb.append( msa_properties.getLength() );
sb.append( "\t" );
sb.append( NF_4.format( msa_properties.getGapRatio() ) );
+ sb.append( "\t" );
+ sb.append( NF_1.format( msa_properties.getAvgNumberOfGapsPer100() ) );
if ( _calculate_shannon_entropy ) {
sb.append( "\t" );
sb.append( NF_4.format( msa_properties.getEntropy7() ) );
System.out.print( "\t" );
System.out.print( "Length" );
System.out.print( "\t" );
+ System.out.print( "Gap R" );
+ System.out.print( "\t" );
System.out.print( "Gaps" );
System.out.print( "\t" );
if ( _calculate_shannon_entropy ) {
final private double _gap_ratio;
final private int _length;
final private int _number_of_sequences;
+ final private double _avg_number_of_gaps_per_100;
final private String _removed_seq;
public MsaProperties( final int number_of_sequences,
final double gap_ratio,
final double entropy7,
final double entropy21,
+ final double avg_number_of_gaps_per_100,
final String removed_seq ) {
_number_of_sequences = number_of_sequences;
_length = length;
_gap_ratio = gap_ratio;
_entropy7 = entropy7;
_entropy21 = entropy21;
+ _avg_number_of_gaps_per_100 = avg_number_of_gaps_per_100;
_removed_seq = removed_seq;
}
_length = msa.getLength();
_gap_ratio = MsaMethods.calcGapRatio( msa );
_removed_seq = removed_seq;
+ _avg_number_of_gaps_per_100 = MsaMethods.calcNumberOfGapsPer100Stats( msa ).arithmeticMean();
if ( calculate_normalized_shannon_entropy ) {
_entropy7 = MsaMethods.calcNormalizedShannonsEntropy( 7, msa );
_entropy21 = MsaMethods.calcNormalizedShannonsEntropy( 21, msa );
return _gap_ratio;
}
+ public final double getAvgNumberOfGapsPer100() {
+ return _avg_number_of_gaps_per_100;
+ }
+
public final int getLength() {
return _length;
}