public class BasicMsa implements Msa {
- private final char[][] _data;
- private final String[] _identifiers;
- private final TYPE _type;
+ private final char[][] _data;
+ private final String[] _identifiers;
+ private final Set<String> _identifiers_set;
+ private final TYPE _type;
public BasicMsa( final int rows, final int columns, final TYPE type ) {
if ( ( rows < 1 ) || ( columns < 1 ) ) {
}
_data = new char[ rows ][ columns ];
_identifiers = new String[ rows ];
+ _identifiers_set = new HashSet<String>();
_type = type;
}
_data = msa._data;
_identifiers = msa._identifiers;
_type = msa._type;
+ _identifiers_set = msa._identifiers_set;
}
@Override
return seqs;
}
- private int determineMaxIdLength() {
- int max = 0;
+ @Override
+ public List<Character> getColumnAt( final int col ) {
+ final List<Character> column = new ArrayList<Character>();
for( int row = 0; row < getNumberOfSequences(); ++row ) {
- final int l = getIdentifier( row ).length();
- if ( l > max ) {
- max = l;
- }
+ column.add( getResidueAt( row, col ) );
}
- return max;
+ return column;
}
@Override
}
@Override
+ public Sequence getSequence( final int row ) {
+ return new BasicSequence( getIdentifier( row ), _data[ row ], getType() );
+ }
+
+ @Override
public Sequence getSequence( final String id ) {
for( int i = 0; i < getNumberOfSequences(); ++i ) {
if ( getIdentifier( i ).equals( id ) ) {
}
@Override
- public Sequence getSequence( final int row ) {
- return new BasicSequence( getIdentifier( row ), _data[ row ], getType() );
- }
-
- @Override
public StringBuffer getSequenceAsString( final int row ) {
final StringBuffer sb = new StringBuffer( getLength() );
for( int col = 0; col < getLength(); ++col ) {
}
@Override
+ public boolean isGapAt( final int row, final int col ) {
+ return getResidueAt( row, col ) == Sequence.GAP;
+ }
+
+ @Override
public void setIdentifier( final int row, final String id ) {
+ if ( ForesterUtil.isEmpty( id ) ) {
+ throw new IllegalArgumentException( "illegal attempt to create msa with empty identifier" );
+ }
+ if ( _identifiers_set.contains( id ) ) {
+ throw new IllegalArgumentException( "illegal attempt to create msa with non-unique identifiers [" + id
+ + "]" );
+ }
+ _identifiers_set.add( id );
_identifiers[ row ] = id;
}
}
}
+ private int determineMaxIdLength() {
+ int max = 0;
+ for( int row = 0; row < getNumberOfSequences(); ++row ) {
+ final int l = getIdentifier( row ).length();
+ if ( l > max ) {
+ max = l;
+ }
+ }
+ return max;
+ }
+
private void writeToFasta( final Writer w ) throws IOException {
SequenceWriter.writeSeqs( asSequenceList(), w, SEQ_FORMAT.FASTA, 100 );
}
public static Msa createInstance( final List<Sequence> seqs ) {
if ( seqs.size() < 1 ) {
- throw new IllegalArgumentException( "cannot create basic msa from less than one sequence" );
+ throw new IllegalArgumentException( "cannot create msa from less than one sequence" );
}
- final Set<String> ids = new HashSet<String>();
final int length = seqs.get( 0 ).getLength();
final BasicMsa msa = new BasicMsa( seqs.size(), length, seqs.get( 0 ).getType() );
for( int row = 0; row < seqs.size(); ++row ) {
throw new IllegalArgumentException( "illegal attempt to build msa from sequences of different type ["
+ seq.getIdentifier() + "]" );
}
- if ( ids.contains( seq.getIdentifier() ) ) {
- throw new IllegalArgumentException( "illegal attempt to create msa with non-unique identifiers ["
- + seq.getIdentifier() + "]" );
- }
- ids.add( seq.getIdentifier() );
msa.setIdentifier( row, seq.getIdentifier() );
for( int col = 0; col < length; ++col ) {
msa._data[ row ][ col ] = seq.getResidueAt( col );
}
return msa;
}
-
- @Override
- public List<Character> getColumnAt( final int col ) {
- final List<Character> column = new ArrayList<Character>();
- for( int row = 0; row < getNumberOfSequences(); ++row ) {
- column.add( getResidueAt( row, col ) );
- }
- return column;
- }
-
- @Override
- public boolean isGapAt( final int row, final int col ) {
- return getResidueAt( row, col ) == Sequence.GAP;
- }
}
package org.forester.msa;
-import java.util.HashMap;
import java.util.List;
import org.forester.sequence.BasicSequence;
public final class DeleteableMsa extends BasicMsa {
- private int _length = 0;
- private int _mapped_col_positions[] = null;
- private int _mapped_row_positions[] = null;
- private HashMap<String, Integer> _seq_id_to_row_map = null;
- private int _seqs = 0;
+ private int _length = 0;
+ private int _mapped_col_positions[] = null;
+ private int _mapped_row_positions[] = null;
+ private int _seqs = 0;
private DeleteableMsa( final BasicMsa msa ) {
super( msa );
for( int i = 0; i < _mapped_row_positions.length; ++i ) {
_mapped_row_positions[ i ] = i;
}
- _seq_id_to_row_map = new HashMap<String, Integer>();
- for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
- _seq_id_to_row_map.put( msa.getIdentifier( row ), row );
- }
_length = msa.getLength();
_seqs = msa.getNumberOfSequences();
}
@Override
final public String getIdentifier( final int row ) {
+ checkRow( row );
return super.getIdentifier( _mapped_row_positions[ row ] );
}
@Override
final public char getResidueAt( final int row, final int col ) {
+ checkRow( row );
+ checkColumn( col );
return super.getResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ] );
}
@Override
public Sequence getSequence( final int row ) {
+ checkRow( row );
return new BasicSequence( getIdentifier( row ), getSequenceAsString( row ).toString(), getType() );
}
@Override
final public void setIdentifier( final int row, final String id ) {
+ checkRow( row );
super.setIdentifier( _mapped_row_positions[ row ], id );
}
@Override
final public void setResidueAt( final int row, final int col, final char residue ) {
+ checkRow( row );
+ checkColumn( col );
super.setResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ], residue );
}
- final private void deleteColumn( final int col ) {
+ final private void checkColumn( final int col ) {
if ( ( col >= _length ) || ( col < 0 ) ) {
throw new IllegalArgumentException( "column " + col + " is out of range" );
}
+ }
+
+ final private void checkRow( final int row ) {
+ if ( ( row >= _seqs ) || ( row < 0 ) ) {
+ throw new IllegalArgumentException( "row " + row + " is out of range" );
+ }
+ }
+
+ final private void deleteColumn( final int col ) {
+ checkColumn( col );
for( int c = col; c < _length - 1; ++c ) {
_mapped_col_positions[ c ] = _mapped_col_positions[ c + 1 ];
}
}
final private void deleteRow( final int row ) {
- if ( ( row >= _seqs ) || ( row < 0 ) ) {
- throw new IllegalArgumentException( "row " + row + " is out of range" );
- }
+ checkRow( row );
for( int r = row; r < _seqs - 1; ++r ) {
_mapped_row_positions[ r ] = _mapped_row_positions[ r + 1 ];
}
System.out.println( "failed." );
failed++;
}
- System.exit( 0 );
if ( PERFORM_DB_TESTS ) {
System.out.print( "Uniprot Entry Retrieval: " );
if ( Test.testUniprotEntryRetrieval() ) {
if ( !dmsa0.getIdentifier( 1 ).equals( "c" ) ) {
return false;
}
- System.out.println();
- System.out.println( dmsa0.toString() );
dmsa0.deleteRow( "e" );
- System.out.println();
- System.out.println( dmsa0.toString() );
dmsa0.deleteRow( "a" );
- System.out.println();
- System.out.println( dmsa0.toString() );
dmsa0.deleteRow( "f" );
- System.out.println();
- System.out.println( dmsa0.toString() );
if ( dmsa0.getLength() != 4 ) {
return false;
}
l1.add( s_4 );
l1.add( s_5 );
final DeleteableMsa dmsa1 = DeleteableMsa.createInstance( l1 );
- System.out.println( dmsa1.toString() );
dmsa1.deleteGapOnlyColumns();
- System.out.println( dmsa1.toString() );
dmsa1.deleteRow( "a" );
dmsa1.deleteRow( "f" );
dmsa1.deleteRow( "d" );
- System.out.println( dmsa1.toString() );
dmsa1.deleteGapOnlyColumns();
- System.out.println( dmsa1.toString() );
if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C-" ) ) {
return false;
}
dmsa1.deleteGapOnlyColumns();
final Writer w0 = new StringWriter();
dmsa1.write( w0, MSA_FORMAT.FASTA );
- System.out.println( w0.toString() );
final Writer w1 = new StringWriter();
dmsa1.write( w1, MSA_FORMAT.PHYLIP );
- System.out.println( w1.toString() );
if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C" ) ) {
return false;
}
l2.add( s__4 );
l2.add( s__5 );
final DeleteableMsa dmsa2 = DeleteableMsa.createInstance( l2 );
- System.out.println( dmsa2.toString() );
dmsa2.deleteGapColumns( 0.5 );
- System.out.println( dmsa2.toString() );
+ if ( !dmsa2.getSequenceAsString( 0 ).toString().equals( "A---" ) ) {
+ return false;
+ }
+ if ( !dmsa2.getSequenceAsString( 1 ).toString().equals( "BB--" ) ) {
+ return false;
+ }
+ if ( !dmsa2.getSequenceAsString( 2 ).toString().equals( "CCC-" ) ) {
+ return false;
+ }
+ dmsa2.deleteGapColumns( 0.2 );
+ if ( !dmsa2.getSequenceAsString( 0 ).toString().equals( "A-" ) ) {
+ return false;
+ }
+ if ( !dmsa2.getSequenceAsString( 1 ).toString().equals( "BB" ) ) {
+ return false;
+ }
+ if ( !dmsa2.getSequenceAsString( 2 ).toString().equals( "CC" ) ) {
+ return false;
+ }
+ dmsa2.deleteGapColumns( 0 );
+ dmsa2.deleteRow( "a" );
+ dmsa2.deleteRow( "b" );
+ dmsa2.deleteRow( "f" );
+ dmsa2.deleteRow( "e" );
+ dmsa2.setIdentifier( 0, "new_c" );
+ dmsa2.setIdentifier( 1, "new_d" );
+ dmsa2.setResidueAt( 0, 0, 'x' );
+ dmsa2.deleteRow( "new_d" );
+ final Writer w = new StringWriter();
+ dmsa2.write( w, MSA_FORMAT.PHYLIP );
+ final String phylip = w.toString();
+ if ( !phylip.equals( "new_c x" + ForesterUtil.LINE_SEPARATOR ) ) {
+ return false;
+ }
+ final Writer w2 = new StringWriter();
+ dmsa2.write( w2, MSA_FORMAT.FASTA );
+ final String fasta = w2.toString();
+ if ( !fasta.equals( ">new_c" + ForesterUtil.LINE_SEPARATOR + "x" + ForesterUtil.LINE_SEPARATOR ) ) {
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );