// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.msa;
public static int calcGapSumPerColumn( final Msa msa, final int col ) {
int gap_rows = 0;
for( int j = 0; j < msa.getNumberOfSequences(); ++j ) {
- if ( msa.getResidueAt( j, col ) == Sequence.GAP ) {
+ if ( msa.isGapAt( j, col ) ) {
gap_rows++;
}
}
return gap_rows;
}
+ final public static Msa removeSequence( final Msa msa, final String to_remove_id ) {
+ final List<Sequence> seqs = new ArrayList<Sequence>();
+ for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
+ if ( !to_remove_id.equals( msa.getIdentifier( row ) ) ) {
+ seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) );
+ }
+ }
+ if ( seqs.size() < 1 ) {
+ return null;
+ }
+ return BasicMsa.createInstance( seqs );
+ }
+
final public static Msa removeSequences( final Msa msa, final List<String> to_remove_ids ) {
final List<Sequence> seqs = new ArrayList<Sequence>();
for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
return BasicMsa.createInstance( seqs );
}
+ final public static Msa removeSequencesByRow( final Msa msa, final List<Integer> to_remove_rows ) {
+ final List<Sequence> seqs = new ArrayList<Sequence>();
+ for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
+ if ( !to_remove_rows.contains( row ) ) {
+ seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) );
+ }
+ }
+ if ( seqs.size() < 1 ) {
+ return null;
+ }
+ return BasicMsa.createInstance( seqs );
+ }
+
synchronized final public Msa removeGapColumns( final double max_allowed_gap_ratio,
final int min_allowed_length,
final Msa msa ) {
}
return stats;
}
+
+ public static Msa removeSequencesByMinimalLength( final Msa msa, final int min_effective_length ) {
+ final List<Integer> to_remove_rows = new ArrayList<Integer>();
+ for( int seq = 0; seq < msa.getNumberOfSequences(); ++seq ) {
+ int eff_length = 0;
+ for( int i = 0; i < msa.getLength(); ++i ) {
+ if ( msa.getResidueAt( seq, i ) != Sequence.GAP ) {
+ eff_length++;
+ }
+ }
+ if ( eff_length < min_effective_length ) {
+ to_remove_rows.add( seq );
+ }
+ }
+ return removeSequencesByRow( msa, to_remove_rows );
+ }
+
+ public static double calcGapRatio( final Msa msa ) {
+ int gaps = 0;
+ for( int seq = 0; seq < msa.getNumberOfSequences(); ++seq ) {
+ for( int i = 0; i < msa.getLength(); ++i ) {
+ if ( msa.getResidueAt( seq, i ) == Sequence.GAP ) {
+ gaps++;
+ }
+ }
+ }
+ return ( double ) gaps / ( msa.getLength() * msa.getNumberOfSequences() );
+ }
}