From ca9937e02d1d293eee015765ecb8564e563f591c Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Tue, 22 Apr 2014 23:01:22 +0000 Subject: [PATCH] inprogress (not working) --- forester/java/src/org/forester/msa/BasicMsa.java | 77 +++++++++++--------- .../java/src/org/forester/msa/DeleteableMsa.java | 37 ++++++---- forester/java/src/org/forester/test/Test.java | 57 ++++++++++----- 3 files changed, 104 insertions(+), 67 deletions(-) diff --git a/forester/java/src/org/forester/msa/BasicMsa.java b/forester/java/src/org/forester/msa/BasicMsa.java index 4c00a30..395a16d 100644 --- a/forester/java/src/org/forester/msa/BasicMsa.java +++ b/forester/java/src/org/forester/msa/BasicMsa.java @@ -42,9 +42,10 @@ import org.forester.util.ForesterUtil; public class BasicMsa implements Msa { - private final char[][] _data; - private final String[] _identifiers; - private final TYPE _type; + private final char[][] _data; + private final String[] _identifiers; + private final Set _identifiers_set; + private final TYPE _type; public BasicMsa( final int rows, final int columns, final TYPE type ) { if ( ( rows < 1 ) || ( columns < 1 ) ) { @@ -52,6 +53,7 @@ public class BasicMsa implements Msa { } _data = new char[ rows ][ columns ]; _identifiers = new String[ rows ]; + _identifiers_set = new HashSet(); _type = type; } @@ -59,6 +61,7 @@ public class BasicMsa implements Msa { _data = msa._data; _identifiers = msa._identifiers; _type = msa._type; + _identifiers_set = msa._identifiers_set; } @Override @@ -70,15 +73,13 @@ public class BasicMsa implements Msa { return seqs; } - private int determineMaxIdLength() { - int max = 0; + @Override + public List getColumnAt( final int col ) { + final List column = new ArrayList(); for( int row = 0; row < getNumberOfSequences(); ++row ) { - final int l = getIdentifier( row ).length(); - if ( l > max ) { - max = l; - } + column.add( getResidueAt( row, col ) ); } - return max; + return column; } @Override @@ -102,6 +103,11 @@ public class BasicMsa implements Msa { } @Override + public Sequence getSequence( final int row ) { + return new BasicSequence( getIdentifier( row ), _data[ row ], getType() ); + } + + @Override public Sequence getSequence( final String id ) { for( int i = 0; i < getNumberOfSequences(); ++i ) { if ( getIdentifier( i ).equals( id ) ) { @@ -112,11 +118,6 @@ public class BasicMsa implements Msa { } @Override - public Sequence getSequence( final int row ) { - return new BasicSequence( getIdentifier( row ), _data[ row ], getType() ); - } - - @Override public StringBuffer getSequenceAsString( final int row ) { final StringBuffer sb = new StringBuffer( getLength() ); for( int col = 0; col < getLength(); ++col ) { @@ -131,7 +132,20 @@ public class BasicMsa implements Msa { } @Override + public boolean isGapAt( final int row, final int col ) { + return getResidueAt( row, col ) == Sequence.GAP; + } + + @Override public void setIdentifier( final int row, final String id ) { + if ( ForesterUtil.isEmpty( id ) ) { + throw new IllegalArgumentException( "illegal attempt to create msa with empty identifier" ); + } + if ( _identifiers_set.contains( id ) ) { + throw new IllegalArgumentException( "illegal attempt to create msa with non-unique identifiers [" + id + + "]" ); + } + _identifiers_set.add( id ); _identifiers[ row ] = id; } @@ -166,6 +180,17 @@ public class BasicMsa implements Msa { } } + private int determineMaxIdLength() { + int max = 0; + for( int row = 0; row < getNumberOfSequences(); ++row ) { + final int l = getIdentifier( row ).length(); + if ( l > max ) { + max = l; + } + } + return max; + } + private void writeToFasta( final Writer w ) throws IOException { SequenceWriter.writeSeqs( asSequenceList(), w, SEQ_FORMAT.FASTA, 100 ); } @@ -183,9 +208,8 @@ public class BasicMsa implements Msa { public static Msa createInstance( final List seqs ) { if ( seqs.size() < 1 ) { - throw new IllegalArgumentException( "cannot create basic msa from less than one sequence" ); + throw new IllegalArgumentException( "cannot create msa from less than one sequence" ); } - final Set ids = new HashSet(); final int length = seqs.get( 0 ).getLength(); final BasicMsa msa = new BasicMsa( seqs.size(), length, seqs.get( 0 ).getType() ); for( int row = 0; row < seqs.size(); ++row ) { @@ -198,11 +222,6 @@ public class BasicMsa implements Msa { throw new IllegalArgumentException( "illegal attempt to build msa from sequences of different type [" + seq.getIdentifier() + "]" ); } - if ( ids.contains( seq.getIdentifier() ) ) { - throw new IllegalArgumentException( "illegal attempt to create msa with non-unique identifiers [" - + seq.getIdentifier() + "]" ); - } - ids.add( seq.getIdentifier() ); msa.setIdentifier( row, seq.getIdentifier() ); for( int col = 0; col < length; ++col ) { msa._data[ row ][ col ] = seq.getResidueAt( col ); @@ -210,18 +229,4 @@ public class BasicMsa implements Msa { } return msa; } - - @Override - public List getColumnAt( final int col ) { - final List column = new ArrayList(); - for( int row = 0; row < getNumberOfSequences(); ++row ) { - column.add( getResidueAt( row, col ) ); - } - return column; - } - - @Override - public boolean isGapAt( final int row, final int col ) { - return getResidueAt( row, col ) == Sequence.GAP; - } } diff --git a/forester/java/src/org/forester/msa/DeleteableMsa.java b/forester/java/src/org/forester/msa/DeleteableMsa.java index 17f72bb..da781b8 100644 --- a/forester/java/src/org/forester/msa/DeleteableMsa.java +++ b/forester/java/src/org/forester/msa/DeleteableMsa.java @@ -24,7 +24,6 @@ package org.forester.msa; -import java.util.HashMap; import java.util.List; import org.forester.sequence.BasicSequence; @@ -32,11 +31,10 @@ import org.forester.sequence.Sequence; public final class DeleteableMsa extends BasicMsa { - private int _length = 0; - private int _mapped_col_positions[] = null; - private int _mapped_row_positions[] = null; - private HashMap _seq_id_to_row_map = null; - private int _seqs = 0; + private int _length = 0; + private int _mapped_col_positions[] = null; + private int _mapped_row_positions[] = null; + private int _seqs = 0; private DeleteableMsa( final BasicMsa msa ) { super( msa ); @@ -48,10 +46,6 @@ public final class DeleteableMsa extends BasicMsa { for( int i = 0; i < _mapped_row_positions.length; ++i ) { _mapped_row_positions[ i ] = i; } - _seq_id_to_row_map = new HashMap(); - for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { - _seq_id_to_row_map.put( msa.getIdentifier( row ), row ); - } _length = msa.getLength(); _seqs = msa.getNumberOfSequences(); } @@ -92,6 +86,7 @@ public final class DeleteableMsa extends BasicMsa { @Override final public String getIdentifier( final int row ) { + checkRow( row ); return super.getIdentifier( _mapped_row_positions[ row ] ); } @@ -107,28 +102,44 @@ public final class DeleteableMsa extends BasicMsa { @Override final public char getResidueAt( final int row, final int col ) { + checkRow( row ); + checkColumn( col ); return super.getResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ] ); } @Override public Sequence getSequence( final int row ) { + checkRow( row ); return new BasicSequence( getIdentifier( row ), getSequenceAsString( row ).toString(), getType() ); } @Override final public void setIdentifier( final int row, final String id ) { + checkRow( row ); super.setIdentifier( _mapped_row_positions[ row ], id ); } @Override final public void setResidueAt( final int row, final int col, final char residue ) { + checkRow( row ); + checkColumn( col ); super.setResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ], residue ); } - final private void deleteColumn( final int col ) { + final private void checkColumn( final int col ) { if ( ( col >= _length ) || ( col < 0 ) ) { throw new IllegalArgumentException( "column " + col + " is out of range" ); } + } + + final private void checkRow( final int row ) { + if ( ( row >= _seqs ) || ( row < 0 ) ) { + throw new IllegalArgumentException( "row " + row + " is out of range" ); + } + } + + final private void deleteColumn( final int col ) { + checkColumn( col ); for( int c = col; c < _length - 1; ++c ) { _mapped_col_positions[ c ] = _mapped_col_positions[ c + 1 ]; } @@ -136,9 +147,7 @@ public final class DeleteableMsa extends BasicMsa { } final private void deleteRow( final int row ) { - if ( ( row >= _seqs ) || ( row < 0 ) ) { - throw new IllegalArgumentException( "row " + row + " is out of range" ); - } + checkRow( row ); for( int r = row; r < _seqs - 1; ++r ) { _mapped_row_positions[ r ] = _mapped_row_positions[ r + 1 ]; } diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 73c1c1f..3753c8b 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -912,7 +912,6 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.exit( 0 ); if ( PERFORM_DB_TESTS ) { System.out.print( "Uniprot Entry Retrieval: " ); if ( Test.testUniprotEntryRetrieval() ) { @@ -6109,17 +6108,9 @@ public final class Test { if ( !dmsa0.getIdentifier( 1 ).equals( "c" ) ) { return false; } - System.out.println(); - System.out.println( dmsa0.toString() ); dmsa0.deleteRow( "e" ); - System.out.println(); - System.out.println( dmsa0.toString() ); dmsa0.deleteRow( "a" ); - System.out.println(); - System.out.println( dmsa0.toString() ); dmsa0.deleteRow( "f" ); - System.out.println(); - System.out.println( dmsa0.toString() ); if ( dmsa0.getLength() != 4 ) { return false; } @@ -6161,15 +6152,11 @@ public final class Test { l1.add( s_4 ); l1.add( s_5 ); final DeleteableMsa dmsa1 = DeleteableMsa.createInstance( l1 ); - System.out.println( dmsa1.toString() ); dmsa1.deleteGapOnlyColumns(); - System.out.println( dmsa1.toString() ); dmsa1.deleteRow( "a" ); dmsa1.deleteRow( "f" ); dmsa1.deleteRow( "d" ); - System.out.println( dmsa1.toString() ); dmsa1.deleteGapOnlyColumns(); - System.out.println( dmsa1.toString() ); if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C-" ) ) { return false; } @@ -6183,10 +6170,8 @@ public final class Test { dmsa1.deleteGapOnlyColumns(); final Writer w0 = new StringWriter(); dmsa1.write( w0, MSA_FORMAT.FASTA ); - System.out.println( w0.toString() ); final Writer w1 = new StringWriter(); dmsa1.write( w1, MSA_FORMAT.PHYLIP ); - System.out.println( w1.toString() ); if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C" ) ) { return false; } @@ -6208,9 +6193,47 @@ public final class Test { l2.add( s__4 ); l2.add( s__5 ); final DeleteableMsa dmsa2 = DeleteableMsa.createInstance( l2 ); - System.out.println( dmsa2.toString() ); dmsa2.deleteGapColumns( 0.5 ); - System.out.println( dmsa2.toString() ); + if ( !dmsa2.getSequenceAsString( 0 ).toString().equals( "A---" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 1 ).toString().equals( "BB--" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 2 ).toString().equals( "CCC-" ) ) { + return false; + } + dmsa2.deleteGapColumns( 0.2 ); + if ( !dmsa2.getSequenceAsString( 0 ).toString().equals( "A-" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 1 ).toString().equals( "BB" ) ) { + return false; + } + if ( !dmsa2.getSequenceAsString( 2 ).toString().equals( "CC" ) ) { + return false; + } + dmsa2.deleteGapColumns( 0 ); + dmsa2.deleteRow( "a" ); + dmsa2.deleteRow( "b" ); + dmsa2.deleteRow( "f" ); + dmsa2.deleteRow( "e" ); + dmsa2.setIdentifier( 0, "new_c" ); + dmsa2.setIdentifier( 1, "new_d" ); + dmsa2.setResidueAt( 0, 0, 'x' ); + dmsa2.deleteRow( "new_d" ); + final Writer w = new StringWriter(); + dmsa2.write( w, MSA_FORMAT.PHYLIP ); + final String phylip = w.toString(); + if ( !phylip.equals( "new_c x" + ForesterUtil.LINE_SEPARATOR ) ) { + return false; + } + final Writer w2 = new StringWriter(); + dmsa2.write( w2, MSA_FORMAT.FASTA ); + final String fasta = w2.toString(); + if ( !fasta.equals( ">new_c" + ForesterUtil.LINE_SEPARATOR + "x" + ForesterUtil.LINE_SEPARATOR ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); -- 1.7.10.2