2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2014 Christian M. Zmasek
6 // Copyright (C) 2014 Sanford-Burnham Medical Research Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25 package org.forester.msa;
27 import java.util.List;
29 import org.forester.sequence.BasicSequence;
30 import org.forester.sequence.Sequence;
32 public final class DeleteableMsa extends BasicMsa {
34 private int _length = 0;
35 private int _mapped_col_positions[] = null;
36 private int _mapped_row_positions[] = null;
37 private int _seqs = 0;
39 private DeleteableMsa( final BasicMsa msa ) {
41 _mapped_col_positions = new int[ msa.getLength() ];
42 _mapped_row_positions = new int[ msa.getNumberOfSequences() ];
43 for( int i = 0; i < _mapped_col_positions.length; ++i ) {
44 _mapped_col_positions[ i ] = i;
46 for( int i = 0; i < _mapped_row_positions.length; ++i ) {
47 _mapped_row_positions[ i ] = i;
49 _length = msa.getLength();
50 _seqs = msa.getNumberOfSequences();
53 public final double[] calcGappiness() {
54 final int length = getLength();
55 final double gappiness[] = new double[ length ];
56 final int seqs = getNumberOfSequences();
57 for( int row = 0; row < seqs; ++row ) {
58 for( int col = 0; col < length; ++col ) {
64 public static int calcGapSumPerColumn( final Msa msa, final int col ) {
66 for( int j = 0; j < msa.getNumberOfSequences(); ++j ) {
67 if ( msa.isGapAt( j, col ) ) {
74 public short determineMaxIdLength() {
76 for( int row = 0; row < getNumberOfSequences(); ++row ) {
77 final short l = ( short ) getIdentifier( row ).length();
85 final public void deleteGapColumns( final double max_allowed_gap_ratio ) {
86 if ( ( max_allowed_gap_ratio < 0 ) || ( max_allowed_gap_ratio > 1 ) ) {
87 throw new IllegalArgumentException( "max allowed gap ration is out of range: " + max_allowed_gap_ratio );
89 for( int col = getLength() - 1; col >= 0; --col ) {
90 final boolean delete = ( ( double ) MsaMethods.calcGapSumPerColumn( this, col ) / getNumberOfSequences() ) > max_allowed_gap_ratio;
97 final public void deleteGapOnlyColumns() {
98 for( int col = getLength() - 1; col >= 0; --col ) {
99 if ( isAllGap( col ) ) {
105 final public Sequence deleteRow( final String id, final boolean return_removed_seq ) {
107 for( int r = 0; r < getNumberOfSequences(); ++r ) {
108 if ( getIdentifier( r ).equals( id ) ) {
114 throw new IllegalArgumentException( "id [" + id + "] not found" );
117 StringBuilder sb = null;
118 if ( return_removed_seq ) {
119 s = getSequence( row );
120 final char[] x = s.getMolecularSequence();
121 sb = new StringBuilder( x.length );
122 for( int i = 0; i < x.length; ++i ) {
123 if ( x[ i ] != Sequence.GAP ) {
129 if ( return_removed_seq ) {
130 return new BasicSequence( new String( s.getIdentifier() ), sb.toString(), s.getType() );
138 final public String getIdentifier( final int row ) {
140 return super.getIdentifier( _mapped_row_positions[ row ] );
144 final public int getLength() {
149 final public int getNumberOfSequences() {
154 final public char getResidueAt( final int row, final int col ) {
157 return super.getResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ] );
161 public Sequence getSequence( final int row ) {
163 return new BasicSequence( getIdentifier( row ), getSequenceAsString( row ).toString(), getType() );
166 final public boolean isAllGap( final int col ) {
167 final int m_col = _mapped_col_positions[ col ];
168 for( int j = 0; j < getNumberOfSequences(); ++j ) {
169 if ( super.getResidueAt( _mapped_row_positions[ j ], m_col ) != Sequence.GAP ) {
177 final public void setIdentifier( final int row, final String id ) {
179 super.setIdentifier( _mapped_row_positions[ row ], id );
183 final public void setResidueAt( final int row, final int col, final char residue ) {
186 super.setResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ], residue );
189 final private void checkColumn( final int col ) {
190 if ( ( col >= _length ) || ( col < 0 ) ) {
191 throw new IllegalArgumentException( "column " + col + " is out of range" );
195 final private void checkRow( final int row ) {
196 if ( ( row >= _seqs ) || ( row < 0 ) ) {
197 throw new IllegalArgumentException( "row " + row + " is out of range" );
201 final private void deleteColumn( final int col ) {
203 for( int c = col; c < _length - 1; ++c ) {
204 _mapped_col_positions[ c ] = _mapped_col_positions[ c + 1 ];
209 final private void deleteRow( final int row ) {
211 for( int r = row; r < _seqs - 1; ++r ) {
212 _mapped_row_positions[ r ] = _mapped_row_positions[ r + 1 ];
217 public final static DeleteableMsa createInstance( final List<Sequence> seqs ) {
218 return new DeleteableMsa( ( BasicMsa ) BasicMsa.createInstance( seqs ) );
221 public final static DeleteableMsa createInstance( final Msa msa ) {
222 return new DeleteableMsa( ( BasicMsa ) msa );