X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fmsa%2FMsaMethods.java;h=06ca50bce885d4f83c59eb13b364702ed17ef540;hb=f3f08cb017c276411ba6c7e3f3e157ed5bba136e;hp=716c8431211cffb62118b3f183c7667bb61ac36d;hpb=7476d4a39fc5840955b57289b862bf08b8419fd7;p=jalview.git diff --git a/forester/java/src/org/forester/msa/MsaMethods.java b/forester/java/src/org/forester/msa/MsaMethods.java index 716c843..06ca50b 100644 --- a/forester/java/src/org/forester/msa/MsaMethods.java +++ b/forester/java/src/org/forester/msa/MsaMethods.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa; @@ -72,9 +72,35 @@ public final class MsaMethods { return gap_rows; } - synchronized public Msa removeGapColumns( final double max_allowed_gap_ratio, - final int min_allowed_length, - final Msa msa ) { + final public static Msa removeSequences( final Msa msa, final List to_remove_ids ) { + final List seqs = new ArrayList(); + for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { + if ( !to_remove_ids.contains( msa.getIdentifier( row ) ) ) { + seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) ); + } + } + if ( seqs.size() < 1 ) { + return null; + } + return BasicMsa.createInstance( seqs ); + } + + final public static Msa removeSequencesByRow( final Msa msa, final List to_remove_rows ) { + final List seqs = new ArrayList(); + for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { + if ( !to_remove_rows.contains( row ) ) { + seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) ); + } + } + if ( seqs.size() < 1 ) { + return null; + } + return BasicMsa.createInstance( seqs ); + } + + synchronized final public Msa removeGapColumns( final double max_allowed_gap_ratio, + final int min_allowed_length, + final Msa msa ) { init(); if ( ( max_allowed_gap_ratio < 0 ) || ( max_allowed_gap_ratio > 1 ) ) { throw new IllegalArgumentException( "max allowed gap ration is out of range: " + max_allowed_gap_ratio ); @@ -83,7 +109,7 @@ public final class MsaMethods { final boolean[] delete_cols = new boolean[ msa.getLength() ]; int new_length = 0; for( int col = 0; col < msa.getLength(); ++col ) { - delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) > max_allowed_gap_ratio; + delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) >= max_allowed_gap_ratio; if ( !delete_cols[ col ] ) { ++new_length; } @@ -153,4 +179,32 @@ public final class MsaMethods { } return stats; } + + public static Msa removeSequencesByMinimalLength( final Msa msa, final int min_effective_length ) { + final List to_remove_rows = new ArrayList(); + for( int seq = 0; seq < msa.getNumberOfSequences(); ++seq ) { + int eff_length = 0; + for( int i = 0; i < msa.getLength(); ++i ) { + if ( msa.getResidueAt( seq, i ) != Sequence.GAP ) { + eff_length++; + } + } + if ( eff_length < min_effective_length ) { + to_remove_rows.add( seq ); + } + } + return removeSequencesByRow( msa, to_remove_rows ); + } + + public static double calcGapRatio( final Msa msa ) { + int gaps = 0; + for( int seq = 0; seq < msa.getNumberOfSequences(); ++seq ) { + for( int i = 0; i < msa.getLength(); ++i ) { + if ( msa.getResidueAt( seq, i ) == Sequence.GAP ) { + gaps++; + } + } + } + return ( double ) gaps / ( msa.getLength() * msa.getNumberOfSequences() ); + } }