X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fmsa%2FMsaMethods.java;h=1a94b74f7ce0fa7d22ec49fe20d5ce39177ad899;hb=fda4b2cd36f7c4d7edf6381268ebcf5fbbc77297;hp=9975ab26a9d41096fc67ef3c63766eafeddb30b1;hpb=36376d95fc170ff20eb86705056b2d60438bcafb;p=jalview.git diff --git a/forester/java/src/org/forester/msa/MsaMethods.java b/forester/java/src/org/forester/msa/MsaMethods.java index 9975ab2..1a94b74 100644 --- a/forester/java/src/org/forester/msa/MsaMethods.java +++ b/forester/java/src/org/forester/msa/MsaMethods.java @@ -111,7 +111,7 @@ public final class MsaMethods { return BasicMsa.createInstance( seqs ); } - synchronized final public Msa removeGapColumns( final double max_allowed_gap_ratio, + synchronized final public Msa deleteGapColumns( final double max_allowed_gap_ratio, final int min_allowed_length, final Msa msa ) { init(); @@ -122,7 +122,7 @@ public final class MsaMethods { final boolean[] delete_cols = new boolean[ msa.getLength() ]; int new_length = 0; for( int col = 0; col < msa.getLength(); ++col ) { - delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) >= max_allowed_gap_ratio; + delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) > max_allowed_gap_ratio; if ( !delete_cols[ col ] ) { ++new_length; } @@ -159,28 +159,24 @@ public final class MsaMethods { return BasicMsa.createInstance( seqs ); } - synchronized final public static void removeGapColumns( final double max_allowed_gap_ratio, final DeleteableMsa msa ) { - if ( ( max_allowed_gap_ratio < 0 ) || ( max_allowed_gap_ratio > 1 ) ) { - throw new IllegalArgumentException( "max allowed gap ration is out of range: " + max_allowed_gap_ratio ); - } - // final boolean ignore_too_short_seqs = min_allowed_length > 0; - for( int col = msa.getLength() - 1; col >= 0 ; --col ) { - final boolean delete = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) >= max_allowed_gap_ratio; - if ( delete ) { - msa.deleteColumn( col ); - } + final public static DescriptiveStatistics calculateIdentityRatio( final int from, final int to, final Msa msa ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( int c = from; c <= to; ++c ) { + stats.addValue( calculateIdentityRatio( msa, c ) ); } + return stats; } - public static DescriptiveStatistics calculateIdentityRatio( final int from, final int to, final Msa msa ) { + final public static DescriptiveStatistics calculateEffectiveLengthStatistics( final Msa msa ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( int c = from; c <= to; ++c ) { - stats.addValue( calculateIdentityRatio( msa, c ) ); + for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { + final Sequence s = msa.getSequence( row ); + stats.addValue( s.getLength() - s.getNumberOfGapResidues() ); } return stats; } - public static double calculateIdentityRatio( final Msa msa, final int column ) { + final public static double calculateIdentityRatio( final Msa msa, final int column ) { final SortedMap dist = calculateResidueDestributionPerColumn( msa, column ); int majority_count = 0; final Iterator> it = dist.entrySet().iterator();