1 package org.forester.msa;
3 import org.forester.sequence.Sequence;
4 import org.forester.util.BasicDescriptiveStatistics;
5 import org.forester.util.DescriptiveStatistics;
8 public class MsaCompactor {
10 final private Msa _msa;
12 public MsaCompactor( Msa msa ) {
19 private DescriptiveStatistics[] calc() {
20 final double gappiness[] = calcGappiness();
21 final DescriptiveStatistics stats[] = new DescriptiveStatistics[ _msa.getNumberOfSequences() ];
22 for ( int row = 0; row < _msa.getNumberOfSequences(); ++row ) {
23 stats[ row ] = new BasicDescriptiveStatistics();
24 for( int col = 0; col < _msa.getLength(); ++col ) {
25 if ( _msa.getResidueAt( row, col ) != Sequence.GAP ) {
26 stats[ row ].addValue( gappiness[ col ] );
34 private double[] calcGappiness() {
35 final double gappiness[] = new double[ _msa.getLength() ];
36 final int seqs = _msa.getNumberOfSequences();
37 for( int i = 0; i < gappiness.length; ++i ) {
38 gappiness[ i ] = ( double ) MsaMethods.calcGapSumPerColumn( _msa, i ) / _msa.getNumberOfSequences();