X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fmsa%2FMsaMethods.java;h=1a94b74f7ce0fa7d22ec49fe20d5ce39177ad899;hb=fda4b2cd36f7c4d7edf6381268ebcf5fbbc77297;hp=ac6cb1f3c566f9b5b47f18b74fec91f5b2160cdf;hpb=6479c35c4734850f517a6ef8de0fce500fdd6693;p=jalview.git diff --git a/forester/java/src/org/forester/msa/MsaMethods.java b/forester/java/src/org/forester/msa/MsaMethods.java index ac6cb1f..1a94b74 100644 --- a/forester/java/src/org/forester/msa/MsaMethods.java +++ b/forester/java/src/org/forester/msa/MsaMethods.java @@ -76,7 +76,7 @@ public final class MsaMethods { final List seqs = new ArrayList(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { if ( !to_remove_id.equals( msa.getIdentifier( row ) ) ) { - seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) ); + seqs.add( msa.getSequence( row ) ); } } if ( seqs.size() < 1 ) { @@ -89,7 +89,7 @@ public final class MsaMethods { final List seqs = new ArrayList(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { if ( !to_remove_ids.contains( msa.getIdentifier( row ) ) ) { - seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) ); + seqs.add( msa.getSequence( row ) ); } } if ( seqs.size() < 1 ) { @@ -102,7 +102,7 @@ public final class MsaMethods { final List seqs = new ArrayList(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { if ( !to_remove_rows.contains( row ) ) { - seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) ); + seqs.add( msa.getSequence( row ) ); } } if ( seqs.size() < 1 ) { @@ -111,7 +111,7 @@ public final class MsaMethods { return BasicMsa.createInstance( seqs ); } - synchronized final public Msa removeGapColumns( final double max_allowed_gap_ratio, + synchronized final public Msa deleteGapColumns( final double max_allowed_gap_ratio, final int min_allowed_length, final Msa msa ) { init(); @@ -122,7 +122,7 @@ public final class MsaMethods { final boolean[] delete_cols = new boolean[ msa.getLength() ]; int new_length = 0; for( int col = 0; col < msa.getLength(); ++col ) { - delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) >= max_allowed_gap_ratio; + delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) > max_allowed_gap_ratio; if ( !delete_cols[ col ] ) { ++new_length; } @@ -159,7 +159,24 @@ public final class MsaMethods { return BasicMsa.createInstance( seqs ); } - public static double calculateIdentityRatio( final Msa msa, final int column ) { + final public static DescriptiveStatistics calculateIdentityRatio( final int from, final int to, final Msa msa ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( int c = from; c <= to; ++c ) { + stats.addValue( calculateIdentityRatio( msa, c ) ); + } + return stats; + } + + final public static DescriptiveStatistics calculateEffectiveLengthStatistics( final Msa msa ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { + final Sequence s = msa.getSequence( row ); + stats.addValue( s.getLength() - s.getNumberOfGapResidues() ); + } + return stats; + } + + final public static double calculateIdentityRatio( final Msa msa, final int column ) { final SortedMap dist = calculateResidueDestributionPerColumn( msa, column ); int majority_count = 0; final Iterator> it = dist.entrySet().iterator();