package org.forester.msa;
-import java.util.HashMap;
+import java.util.List;
+
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.Sequence;
public final class DeleteableMsa extends BasicMsa {
- private int _length = 0;
- private int _mapped_col_positions[] = null;
- private int _mapped_row_positions[] = null;
- private int _seqs = 0;
- private HashMap<String, Integer> _seq_id_to_row_map = null;
+ private int _length = 0;
+ private int _mapped_col_positions[] = null;
+ private int _mapped_row_positions[] = null;
+ private int _seqs = 0;
- public DeleteableMsa( final BasicMsa msa ) {
+ private DeleteableMsa( final BasicMsa msa ) {
super( msa );
_mapped_col_positions = new int[ msa.getLength() ];
_mapped_row_positions = new int[ msa.getNumberOfSequences() ];
for( int i = 0; i < _mapped_row_positions.length; ++i ) {
_mapped_row_positions[ i ] = i;
}
- _seq_id_to_row_map = new HashMap<String, Integer>();
- for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
- _seq_id_to_row_map.put( msa.getIdentifier( row ), row );
- }
_length = msa.getLength();
_seqs = msa.getNumberOfSequences();
}
- public void deleteColumn( final int col ) {
- if ( col >= _length || col < 0 ) {
- throw new IllegalArgumentException( "column " + col + " is out of range" );
+ public final double[] calcGappiness() {
+ final int length = getLength();
+ final double gappiness[] = new double[ length ];
+ final int seqs = getNumberOfSequences();
+ for( int row = 0; row < seqs; ++row ) {
+ for( int col = 0; col < length; ++col ) {
+ }
}
- for( int c = col; c < _length - 1; ++c ) {
- _mapped_col_positions[ c ] = _mapped_col_positions[ c + 1 ];
+ return gappiness;
+ }
+
+ public static int calcGapSumPerColumn( final Msa msa, final int col ) {
+ int gap_rows = 0;
+ for( int j = 0; j < msa.getNumberOfSequences(); ++j ) {
+ if ( msa.isGapAt( j, col ) ) {
+ gap_rows++;
+ }
}
- --_length;
+ return gap_rows;
}
- public void deleteRow( final int row ) {
- if ( row >= _seqs || row < 0 ) {
- throw new IllegalArgumentException( "row " + row + " is out of range" );
+ public short determineMaxIdLength() {
+ short max = 0;
+ for( int row = 0; row < getNumberOfSequences(); ++row ) {
+ final short l = ( short ) getIdentifier( row ).length();
+ if ( l > max ) {
+ max = l;
+ }
}
- for( int r = row; r < _seqs - 1; ++r ) {
- _mapped_row_positions[ r ] = _mapped_row_positions[ r + 1 ];
+ return max;
+ }
+
+ final public void deleteGapColumns( final double max_allowed_gap_ratio ) {
+ if ( ( max_allowed_gap_ratio < 0 ) || ( max_allowed_gap_ratio > 1 ) ) {
+ throw new IllegalArgumentException( "max allowed gap ration is out of range: " + max_allowed_gap_ratio );
+ }
+ for( int col = getLength() - 1; col >= 0; --col ) {
+ final boolean delete = ( ( double ) MsaMethods.calcGapSumPerColumn( this, col ) / getNumberOfSequences() ) > max_allowed_gap_ratio;
+ if ( delete ) {
+ deleteColumn( col );
+ }
+ }
+ }
+
+ final public void deleteGapOnlyColumns() {
+ for( int col = getLength() - 1; col >= 0; --col ) {
+ if ( isAllGap( col ) ) {
+ deleteColumn( col );
+ }
}
- --_seqs;
}
- public void deleteRow( final String id ) {
+ final public Sequence deleteRow( final String id, final boolean return_removed_seq ) {
int row = -1;
for( int r = 0; r < getNumberOfSequences(); ++r ) {
if ( getIdentifier( r ).equals( id ) ) {
if ( row < 0 ) {
throw new IllegalArgumentException( "id [" + id + "] not found" );
}
+ Sequence s = null;
+ StringBuilder sb = null;
+ if ( return_removed_seq ) {
+ s = getSequence( row );
+ final char[] x = s.getMolecularSequence();
+ sb = new StringBuilder( x.length );
+ for( int i = 0; i < x.length; ++i ) {
+ if ( x[ i ] != Sequence.GAP ) {
+ sb.append( x[ i ] );
+ }
+ }
+ }
deleteRow( row );
+ if ( return_removed_seq ) {
+ return new BasicSequence( new String( s.getIdentifier() ), sb.toString(), s.getType() );
+ }
+ else {
+ return null;
+ }
}
@Override
- public String getIdentifier( final int row ) {
+ final public String getIdentifier( final int row ) {
+ checkRow( row );
return super.getIdentifier( _mapped_row_positions[ row ] );
}
@Override
- public int getLength() {
+ final public int getLength() {
return _length;
}
@Override
- public int getNumberOfSequences() {
+ final public int getNumberOfSequences() {
return _seqs;
}
@Override
- public char getResidueAt( final int row, final int col ) {
+ final public char getResidueAt( final int row, final int col ) {
+ checkRow( row );
+ checkColumn( col );
return super.getResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ] );
}
@Override
- public void setIdentifier( final int row, final String id ) {
+ public Sequence getSequence( final int row ) {
+ checkRow( row );
+ return new BasicSequence( getIdentifier( row ), getSequenceAsString( row ).toString(), getType() );
+ }
+
+ final public boolean isAllGap( final int col ) {
+ final int m_col = _mapped_col_positions[ col ];
+ for( int j = 0; j < getNumberOfSequences(); ++j ) {
+ if ( super.getResidueAt( _mapped_row_positions[ j ], m_col ) != Sequence.GAP ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ final public void setIdentifier( final int row, final String id ) {
+ checkRow( row );
super.setIdentifier( _mapped_row_positions[ row ], id );
}
@Override
- public void setResidueAt( final int row, final int col, final char residue ) {
+ final public void setResidueAt( final int row, final int col, final char residue ) {
+ checkRow( row );
+ checkColumn( col );
super.setResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ], residue );
}
+
+ final private void checkColumn( final int col ) {
+ if ( ( col >= _length ) || ( col < 0 ) ) {
+ throw new IllegalArgumentException( "column " + col + " is out of range" );
+ }
+ }
+
+ final private void checkRow( final int row ) {
+ if ( ( row >= _seqs ) || ( row < 0 ) ) {
+ throw new IllegalArgumentException( "row " + row + " is out of range" );
+ }
+ }
+
+ final private void deleteColumn( final int col ) {
+ checkColumn( col );
+ for( int c = col; c < _length - 1; ++c ) {
+ _mapped_col_positions[ c ] = _mapped_col_positions[ c + 1 ];
+ }
+ --_length;
+ }
+
+ final private void deleteRow( final int row ) {
+ checkRow( row );
+ for( int r = row; r < _seqs - 1; ++r ) {
+ _mapped_row_positions[ r ] = _mapped_row_positions[ r + 1 ];
+ }
+ --_seqs;
+ }
+
+ public final static DeleteableMsa createInstance( final List<Sequence> seqs ) {
+ return new DeleteableMsa( ( BasicMsa ) BasicMsa.createInstance( seqs ) );
+ }
+
+ public final static DeleteableMsa createInstance( final Msa msa ) {
+ return new DeleteableMsa( ( BasicMsa ) msa );
+ }
}