X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fmsa%2FDeleteableMsa.java;h=172ce4dd5ef3491169d26e0c4af18bf6272a4f54;hb=fda4b2cd36f7c4d7edf6381268ebcf5fbbc77297;hp=304199444bdbe94771761bd4abe93594a4b98327;hpb=cb3fc11f9567fa1f17a0ca64eafc31c4a9075210;p=jalview.git diff --git a/forester/java/src/org/forester/msa/DeleteableMsa.java b/forester/java/src/org/forester/msa/DeleteableMsa.java index 3041994..172ce4d 100644 --- a/forester/java/src/org/forester/msa/DeleteableMsa.java +++ b/forester/java/src/org/forester/msa/DeleteableMsa.java @@ -24,17 +24,19 @@ package org.forester.msa; -import java.util.HashMap; +import java.util.List; + +import org.forester.sequence.BasicSequence; +import org.forester.sequence.Sequence; public final class DeleteableMsa extends BasicMsa { - private int _length = 0; - private int _mapped_col_positions[] = null; - private int _mapped_row_positions[] = null; - private int _seqs = 0; - private HashMap _seq_id_to_row_map = null; + private int _length = 0; + private int _mapped_col_positions[] = null; + private int _mapped_row_positions[] = null; + private int _seqs = 0; - public DeleteableMsa( final BasicMsa msa ) { + private DeleteableMsa( final BasicMsa msa ) { super( msa ); _mapped_col_positions = new int[ msa.getLength() ]; _mapped_row_positions = new int[ msa.getNumberOfSequences() ]; @@ -44,35 +46,63 @@ public final class DeleteableMsa extends BasicMsa { for( int i = 0; i < _mapped_row_positions.length; ++i ) { _mapped_row_positions[ i ] = i; } - _seq_id_to_row_map = new HashMap(); - for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { - _seq_id_to_row_map.put( msa.getIdentifier( row ), row ); - } _length = msa.getLength(); _seqs = msa.getNumberOfSequences(); } - public void deleteColumn( final int col ) { - if ( col >= _length || col < 0 ) { - throw new IllegalArgumentException( "column " + col + " is out of range" ); + public final double[] calcGappiness() { + final int length = getLength(); + final double gappiness[] = new double[ length ]; + final int seqs = getNumberOfSequences(); + for( int row = 0; row < seqs; ++row ) { + for( int col = 0; col < length; ++col ) { + } } - for( int c = col; c < _length - 1; ++c ) { - _mapped_col_positions[ c ] = _mapped_col_positions[ c + 1 ]; + return gappiness; + } + + public static int calcGapSumPerColumn( final Msa msa, final int col ) { + int gap_rows = 0; + for( int j = 0; j < msa.getNumberOfSequences(); ++j ) { + if ( msa.isGapAt( j, col ) ) { + gap_rows++; + } } - --_length; + return gap_rows; } - public void deleteRow( final int row ) { - if ( row >= _seqs || row < 0 ) { - throw new IllegalArgumentException( "row " + row + " is out of range" ); + public short determineMaxIdLength() { + short max = 0; + for( int row = 0; row < getNumberOfSequences(); ++row ) { + final short l = ( short ) getIdentifier( row ).length(); + if ( l > max ) { + max = l; + } } - for( int r = row; r < _seqs - 1; ++r ) { - _mapped_row_positions[ r ] = _mapped_row_positions[ r + 1 ]; + return max; + } + + final public void deleteGapColumns( final double max_allowed_gap_ratio ) { + if ( ( max_allowed_gap_ratio < 0 ) || ( max_allowed_gap_ratio > 1 ) ) { + throw new IllegalArgumentException( "max allowed gap ration is out of range: " + max_allowed_gap_ratio ); + } + for( int col = getLength() - 1; col >= 0; --col ) { + final boolean delete = ( ( double ) MsaMethods.calcGapSumPerColumn( this, col ) / getNumberOfSequences() ) > max_allowed_gap_ratio; + if ( delete ) { + deleteColumn( col ); + } + } + } + + final public void deleteGapOnlyColumns() { + for( int col = getLength() - 1; col >= 0; --col ) { + if ( isAllGap( col ) ) { + deleteColumn( col ); + } } - --_seqs; } - public void deleteRow( final String id ) { + final public Sequence deleteRow( final String id, final boolean return_removed_seq ) { int row = -1; for( int r = 0; r < getNumberOfSequences(); ++r ) { if ( getIdentifier( r ).equals( id ) ) { @@ -83,36 +113,112 @@ public final class DeleteableMsa extends BasicMsa { if ( row < 0 ) { throw new IllegalArgumentException( "id [" + id + "] not found" ); } + Sequence s = null; + StringBuilder sb = null; + if ( return_removed_seq ) { + s = getSequence( row ); + final char[] x = s.getMolecularSequence(); + sb = new StringBuilder( x.length ); + for( int i = 0; i < x.length; ++i ) { + if ( x[ i ] != Sequence.GAP ) { + sb.append( x[ i ] ); + } + } + } deleteRow( row ); + if ( return_removed_seq ) { + return new BasicSequence( new String( s.getIdentifier() ), sb.toString(), s.getType() ); + } + else { + return null; + } } @Override - public String getIdentifier( final int row ) { + final public String getIdentifier( final int row ) { + checkRow( row ); return super.getIdentifier( _mapped_row_positions[ row ] ); } @Override - public int getLength() { + final public int getLength() { return _length; } @Override - public int getNumberOfSequences() { + final public int getNumberOfSequences() { return _seqs; } @Override - public char getResidueAt( final int row, final int col ) { + final public char getResidueAt( final int row, final int col ) { + checkRow( row ); + checkColumn( col ); return super.getResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ] ); } @Override - public void setIdentifier( final int row, final String id ) { + public Sequence getSequence( final int row ) { + checkRow( row ); + return new BasicSequence( getIdentifier( row ), getSequenceAsString( row ).toString(), getType() ); + } + + final public boolean isAllGap( final int col ) { + final int m_col = _mapped_col_positions[ col ]; + for( int j = 0; j < getNumberOfSequences(); ++j ) { + if ( super.getResidueAt( _mapped_row_positions[ j ], m_col ) != Sequence.GAP ) { + return false; + } + } + return true; + } + + @Override + final public void setIdentifier( final int row, final String id ) { + checkRow( row ); super.setIdentifier( _mapped_row_positions[ row ], id ); } @Override - public void setResidueAt( final int row, final int col, final char residue ) { + final public void setResidueAt( final int row, final int col, final char residue ) { + checkRow( row ); + checkColumn( col ); super.setResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ], residue ); } + + final private void checkColumn( final int col ) { + if ( ( col >= _length ) || ( col < 0 ) ) { + throw new IllegalArgumentException( "column " + col + " is out of range" ); + } + } + + final private void checkRow( final int row ) { + if ( ( row >= _seqs ) || ( row < 0 ) ) { + throw new IllegalArgumentException( "row " + row + " is out of range" ); + } + } + + final private void deleteColumn( final int col ) { + checkColumn( col ); + for( int c = col; c < _length - 1; ++c ) { + _mapped_col_positions[ c ] = _mapped_col_positions[ c + 1 ]; + } + --_length; + } + + final private void deleteRow( final int row ) { + checkRow( row ); + for( int r = row; r < _seqs - 1; ++r ) { + _mapped_row_positions[ r ] = _mapped_row_positions[ r + 1 ]; + } + --_seqs; + } + + public final static DeleteableMsa createInstance( final List seqs ) { + return new DeleteableMsa( ( BasicMsa ) BasicMsa.createInstance( seqs ) ); + } + + public final static DeleteableMsa createInstance( final Msa msa ) { + return new DeleteableMsa( ( BasicMsa ) msa ); + } }