From 36376d95fc170ff20eb86705056b2d60438bcafb Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Tue, 22 Apr 2014 04:33:41 +0000 Subject: [PATCH] in progress --- forester/java/src/org/forester/msa/BasicMsa.java | 27 +++--- .../java/src/org/forester/msa/DeleteableMsa.java | 15 ++- forester/java/src/org/forester/msa/Msa.java | 2 + forester/java/src/org/forester/msa/MsaMethods.java | 2 +- forester/java/src/org/forester/test/Test.java | 99 ++++++++++++++++++++ 5 files changed, 131 insertions(+), 14 deletions(-) diff --git a/forester/java/src/org/forester/msa/BasicMsa.java b/forester/java/src/org/forester/msa/BasicMsa.java index a7367a2..7d90c3c 100644 --- a/forester/java/src/org/forester/msa/BasicMsa.java +++ b/forester/java/src/org/forester/msa/BasicMsa.java @@ -71,8 +71,8 @@ public class BasicMsa implements Msa { private int determineMaxIdLength() { int max = 0; - for( int row = 0; row < _data.length; ++row ) { - final int l = _identifiers[ row ].toString().length(); + for( int row = 0; row < getNumberOfSequences(); ++row ) { + final int l = getIdentifier(row).length(); if ( l > max ) { max = l; } @@ -112,17 +112,22 @@ public class BasicMsa implements Msa { @Override public Sequence getSequence( final int row ) { - return new BasicSequence( getIdentifier( row ), _data[ row ], getType() ); + return new BasicSequence( getIdentifier( row ), getSequenceAsArray( row ), getType() ); } @Override public StringBuffer getSequenceAsString( final int row ) { - final StringBuffer sb = new StringBuffer( _data[ 0 ].length ); - for( int col = 0; col < _data[ 0 ].length; ++col ) { + final StringBuffer sb = new StringBuffer(getLength() ); + for( int col = 0; col < getLength(); ++col ) { sb.append( getResidueAt( row, col ) ); } return sb; } + + @Override + public char[] getSequenceAsArray( final int row ) { + return _data[ row ]; + } @Override public TYPE getType() { @@ -143,9 +148,9 @@ public class BasicMsa implements Msa { public String toString() { final int max = determineMaxIdLength() + 1; final StringBuffer sb = new StringBuffer(); - for( int row = 0; row < _data.length; ++row ) { - sb.append( ForesterUtil.pad( _identifiers[ row ].toString(), max, ' ', false ) ); - for( int col = 0; col < _data[ 0 ].length; ++col ) { + for( int row = 0; row < getNumberOfSequences(); ++row ) { + sb.append( ForesterUtil.pad( getIdentifier( row ).toString(), max, ' ', false ) ); + for( int col = 0; col < getLength(); ++col ) { sb.append( getResidueAt( row, col ) ); } sb.append( ForesterUtil.LINE_SEPARATOR ); @@ -173,9 +178,9 @@ public class BasicMsa implements Msa { private void writeToPhylip( final Writer w ) throws IOException { final int max = determineMaxIdLength() + 1; - for( int row = 0; row < _data.length; ++row ) { - w.write( ForesterUtil.pad( _identifiers[ row ].toString(), max, ' ', false ).toString() ); - for( int col = 0; col < _data[ 0 ].length; ++col ) { + for( int row = 0; row < getNumberOfSequences(); ++row ) { + w.write( ForesterUtil.pad( getIdentifier( row ), max, ' ', false ).toString() ); + for( int col = 0; col < getLength(); ++col ) { w.write( getResidueAt( row, col ) ); } w.write( ForesterUtil.LINE_SEPARATOR ); diff --git a/forester/java/src/org/forester/msa/DeleteableMsa.java b/forester/java/src/org/forester/msa/DeleteableMsa.java index 3041994..392f239 100644 --- a/forester/java/src/org/forester/msa/DeleteableMsa.java +++ b/forester/java/src/org/forester/msa/DeleteableMsa.java @@ -26,6 +26,8 @@ package org.forester.msa; import java.util.HashMap; +import org.forester.util.ForesterUtil; + public final class DeleteableMsa extends BasicMsa { private int _length = 0; @@ -51,6 +53,12 @@ public final class DeleteableMsa extends BasicMsa { _length = msa.getLength(); _seqs = msa.getNumberOfSequences(); } + + + @Override + public char[] getSequenceAsArray( final int row ) { + return super.getSequenceAsArray( _mapped_row_positions[ row ] ); + } public void deleteColumn( final int col ) { if ( col >= _length || col < 0 ) { @@ -62,7 +70,9 @@ public final class DeleteableMsa extends BasicMsa { --_length; } - public void deleteRow( final int row ) { + + + private void deleteRow( final int row ) { if ( row >= _seqs || row < 0 ) { throw new IllegalArgumentException( "row " + row + " is out of range" ); } @@ -101,9 +111,10 @@ public final class DeleteableMsa extends BasicMsa { return _seqs; } + @Override public char getResidueAt( final int row, final int col ) { - return super.getResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ] ); + return super.getResidueAt( _mapped_row_positions[ row ], _mapped_col_positions[ col ] ); } @Override diff --git a/forester/java/src/org/forester/msa/Msa.java b/forester/java/src/org/forester/msa/Msa.java index 648006a..ff2b110 100644 --- a/forester/java/src/org/forester/msa/Msa.java +++ b/forester/java/src/org/forester/msa/Msa.java @@ -65,4 +65,6 @@ public interface Msa { public void setResidueAt( final int row, final int col, final char residue ); public void write( Writer w, MSA_FORMAT format ) throws IOException; + + char[] getSequenceAsArray( int row ); } diff --git a/forester/java/src/org/forester/msa/MsaMethods.java b/forester/java/src/org/forester/msa/MsaMethods.java index 2280320..9975ab2 100644 --- a/forester/java/src/org/forester/msa/MsaMethods.java +++ b/forester/java/src/org/forester/msa/MsaMethods.java @@ -164,7 +164,7 @@ public final class MsaMethods { throw new IllegalArgumentException( "max allowed gap ration is out of range: " + max_allowed_gap_ratio ); } // final boolean ignore_too_short_seqs = min_allowed_length > 0; - for( int col = 0; col < msa.getLength(); ++col ) { + for( int col = msa.getLength() - 1; col >= 0 ; --col ) { final boolean delete = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) >= max_allowed_gap_ratio; if ( delete ) { msa.deleteColumn( col ); diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 924b643..51024d5 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -61,6 +61,7 @@ import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.io.writers.SequenceWriter; import org.forester.msa.BasicMsa; +import org.forester.msa.DeleteableMsa; import org.forester.msa.Mafft; import org.forester.msa.Msa; import org.forester.msa.MsaInferrer; @@ -899,6 +900,16 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "Deleteable MSA: " ); + if ( Test.testDeleteableMsa() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } + System.exit( 0 ); if ( PERFORM_DB_TESTS ) { System.out.print( "Uniprot Entry Retrieval: " ); if ( Test.testUniprotEntryRetrieval() ) { @@ -6074,6 +6085,94 @@ public final class Test { } return true; } + + private static boolean testDeleteableMsa() { + try { + final Sequence s0 = BasicSequence.createAaSequence( "a", "AAAA" ); + final Sequence s1 = BasicSequence.createAaSequence( "b", "BAAA" ); + final Sequence s2 = BasicSequence.createAaSequence( "c", "CAAA" ); + final Sequence s3 = BasicSequence.createAaSequence( "d", "DAAA" ); + final Sequence s4 = BasicSequence.createAaSequence( "e", "EAAA" ); + final Sequence s5 = BasicSequence.createAaSequence( "f", "FAAA" ); + final List l0 = new ArrayList(); + l0.add( s0 ); + l0.add( s1 ); + l0.add( s2 ); + l0.add( s3 ); + l0.add( s4 ); + l0.add( s5 ); + final Msa msa0 = BasicMsa.createInstance( l0 ); + final DeleteableMsa dmsa0 = new DeleteableMsa( ( BasicMsa ) msa0 ); + dmsa0.deleteRow( "b" ); + if ( !dmsa0.getIdentifier( 1 ).equals( "c" ) ) { + return false; + } + System.out.println(); + System.out.println( dmsa0.toString() ); + dmsa0.deleteRow( "e" ); + System.out.println(); + System.out.println( dmsa0.toString() ); + dmsa0.deleteRow( "a" ); + System.out.println(); + System.out.println( dmsa0.toString() ); + dmsa0.deleteRow( "f" ); + System.out.println(); + System.out.println( dmsa0.toString() ); + + if ( dmsa0.getLength() != 4 ) { + return false; + } + if ( dmsa0.getNumberOfSequences() != 2 ) { + return false; + } + + if ( !dmsa0.getIdentifier( 0 ).equals( "c" ) ) { + return false; + } + if ( !dmsa0.getIdentifier( 1 ).equals( "d" ) ) { + return false; + } + if ( dmsa0.getResidueAt( 0, 0 ) != 'C') { + return false; + } + if ( !dmsa0.getSequenceAsString( 0 ).toString().equals( "CAAA" )) { + return false; + } + if ( dmsa0.getColumnAt( 0 ).size() !=2 ) { + return false; + } + dmsa0.deleteRow( "c" ); + dmsa0.deleteRow( "d" ); + if ( dmsa0.getNumberOfSequences() != 0 ) { + return false; + } + // + final Sequence s_0 = BasicSequence.createAaSequence( "a", "--A---A-A---" ); + final Sequence s_1 = BasicSequence.createAaSequence( "b", "--B-----A---" ); + final Sequence s_2 = BasicSequence.createAaSequence( "c", "--C--AA-A---" ); + final Sequence s_3 = BasicSequence.createAaSequence( "d", "--D--AA-A---" ); + final Sequence s_4 = BasicSequence.createAaSequence( "e", "--E--AA-A---" ); + final Sequence s_5 = BasicSequence.createAaSequence( "f", "--F--AA-AA--" ); + final List l1 = new ArrayList(); + l1.add( s_0 ); + l1.add( s_1 ); + l1.add( s_2 ); + l1.add( s_3 ); + l1.add( s_4 ); + l1.add( s_5 ); + final Msa msa1 = BasicMsa.createInstance( l1 ); + final DeleteableMsa dmsa1 = new DeleteableMsa( ( BasicMsa ) msa1 ); + System.out.println( dmsa1.toString() ); + MsaMethods.removeGapColumns( 1, dmsa1 ); + System.out.println( dmsa1.toString() ); + + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } private static boolean testNextNodeWithCollapsing() { try { -- 1.7.10.2