X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fmsa%2FMsaMethods.java;h=1a94b74f7ce0fa7d22ec49fe20d5ce39177ad899;hb=fda4b2cd36f7c4d7edf6381268ebcf5fbbc77297;hp=ff8342c72ce41731368357d34da37776e73bb447;hpb=23aee285eb5f61d8cb4b2c51bab0cc101f79c7cc;p=jalview.git diff --git a/forester/java/src/org/forester/msa/MsaMethods.java b/forester/java/src/org/forester/msa/MsaMethods.java index ff8342c..1a94b74 100644 --- a/forester/java/src/org/forester/msa/MsaMethods.java +++ b/forester/java/src/org/forester/msa/MsaMethods.java @@ -65,7 +65,7 @@ public final class MsaMethods { public static int calcGapSumPerColumn( final Msa msa, final int col ) { int gap_rows = 0; for( int j = 0; j < msa.getNumberOfSequences(); ++j ) { - if ( msa.getResidueAt( j, col ) == Sequence.GAP ) { + if ( msa.isGapAt( j, col ) ) { gap_rows++; } } @@ -76,7 +76,7 @@ public final class MsaMethods { final List seqs = new ArrayList(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { if ( !to_remove_id.equals( msa.getIdentifier( row ) ) ) { - seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) ); + seqs.add( msa.getSequence( row ) ); } } if ( seqs.size() < 1 ) { @@ -89,7 +89,7 @@ public final class MsaMethods { final List seqs = new ArrayList(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { if ( !to_remove_ids.contains( msa.getIdentifier( row ) ) ) { - seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) ); + seqs.add( msa.getSequence( row ) ); } } if ( seqs.size() < 1 ) { @@ -102,7 +102,7 @@ public final class MsaMethods { final List seqs = new ArrayList(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { if ( !to_remove_rows.contains( row ) ) { - seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) ); + seqs.add( msa.getSequence( row ) ); } } if ( seqs.size() < 1 ) { @@ -111,7 +111,7 @@ public final class MsaMethods { return BasicMsa.createInstance( seqs ); } - synchronized final public Msa removeGapColumns( final double max_allowed_gap_ratio, + synchronized final public Msa deleteGapColumns( final double max_allowed_gap_ratio, final int min_allowed_length, final Msa msa ) { init(); @@ -122,7 +122,7 @@ public final class MsaMethods { final boolean[] delete_cols = new boolean[ msa.getLength() ]; int new_length = 0; for( int col = 0; col < msa.getLength(); ++col ) { - delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) >= max_allowed_gap_ratio; + delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) > max_allowed_gap_ratio; if ( !delete_cols[ col ] ) { ++new_length; } @@ -159,7 +159,24 @@ public final class MsaMethods { return BasicMsa.createInstance( seqs ); } - public static double calculateIdentityRatio( final Msa msa, final int column ) { + final public static DescriptiveStatistics calculateIdentityRatio( final int from, final int to, final Msa msa ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( int c = from; c <= to; ++c ) { + stats.addValue( calculateIdentityRatio( msa, c ) ); + } + return stats; + } + + final public static DescriptiveStatistics calculateEffectiveLengthStatistics( final Msa msa ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { + final Sequence s = msa.getSequence( row ); + stats.addValue( s.getLength() - s.getNumberOfGapResidues() ); + } + return stats; + } + + final public static double calculateIdentityRatio( final Msa msa, final int column ) { final SortedMap dist = calculateResidueDestributionPerColumn( msa, column ); int majority_count = 0; final Iterator> it = dist.entrySet().iterator(); @@ -175,11 +192,13 @@ public final class MsaMethods { public static SortedMap calculateResidueDestributionPerColumn( final Msa msa, final int column ) { final SortedMap map = new TreeMap(); for( final Character r : msa.getColumnAt( column ) ) { - if ( !map.containsKey( r ) ) { - map.put( r, 1 ); - } - else { - map.put( r, map.get( r ) + 1 ); + if ( r != Sequence.GAP ) { + if ( !map.containsKey( r ) ) { + map.put( r, 1 ); + } + else { + map.put( r, map.get( r ) + 1 ); + } } } return map;