From 0465b8ebe16d57902022bba6f90f25dd1a18e656 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Mon, 28 Apr 2014 20:40:02 +0000 Subject: [PATCH] inprogress --- .../java/src/org/forester/msa/DeleteableMsa.java | 22 +++++++++++-- .../org/forester/msa_compactor/MsaCompactor.java | 12 +++---- forester/java/src/org/forester/test/Test.java | 33 +++++++++++--------- 3 files changed, 43 insertions(+), 24 deletions(-) diff --git a/forester/java/src/org/forester/msa/DeleteableMsa.java b/forester/java/src/org/forester/msa/DeleteableMsa.java index a781bf6..350aae2 100644 --- a/forester/java/src/org/forester/msa/DeleteableMsa.java +++ b/forester/java/src/org/forester/msa/DeleteableMsa.java @@ -81,7 +81,7 @@ public final class DeleteableMsa extends BasicMsa { } } - final public Sequence deleteRow( final String id ) { + final public Sequence deleteRow( final String id, final boolean return_removed_seq ) { int row = -1; for( int r = 0; r < getNumberOfSequences(); ++r ) { if ( getIdentifier( r ).equals( id ) ) { @@ -92,9 +92,25 @@ public final class DeleteableMsa extends BasicMsa { if ( row < 0 ) { throw new IllegalArgumentException( "id [" + id + "] not found" ); } - final Sequence s = getSequence( row ); + Sequence s = null; + StringBuilder sb = null; + if ( return_removed_seq ) { + s = getSequence( row ); + final char[] x = s.getMolecularSequence(); + sb = new StringBuilder( x.length ); + for( int i = 0; i < x.length; ++i ) { + if ( x[ i ] != Sequence.GAP ) { + sb.append( x[ i ] ); + } + } + } deleteRow( row ); - return s; + if ( return_removed_seq ) { + return new BasicSequence( new String( s.getIdentifier() ), sb.toString(), s.getType() ); + } + else { + return null; + } } @Override diff --git a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java index 374d3bb..3ade90f 100644 --- a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java +++ b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java @@ -114,7 +114,7 @@ public class MsaCompactor { while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) { final String id = to_remove_ids.get( i ); _removed_seq_ids.add( id ); - final Sequence deleted = _msa.deleteRow( id ); + final Sequence deleted = _msa.deleteRow( id, true ); _removed_seqs.add( deleted ); removeGapColumns(); if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) @@ -145,7 +145,7 @@ public class MsaCompactor { while ( _msa.getLength() > length ) { final String id = to_remove_ids.get( i ); _removed_seq_ids.add( id ); - final Sequence deleted = _msa.deleteRow( id ); + final Sequence deleted = _msa.deleteRow( id, true ); _removed_seqs.add( deleted ); removeGapColumns(); if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) || ( _msa.getLength() <= length ) ) { @@ -173,7 +173,7 @@ public class MsaCompactor { for( int i = 0; i < to_remove_ids.size(); ++i ) { final String id = to_remove_ids.get( i ); _removed_seq_ids.add( id ); - final Sequence deleted = _msa.deleteRow( id ); + final Sequence deleted = _msa.deleteRow( id, true ); _removed_seqs.add( deleted ); removeGapColumns(); if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( i == ( to_remove_ids.size() - 1 ) ) ) { @@ -209,7 +209,7 @@ public class MsaCompactor { System.out.println(); while ( _msa.getNumberOfSequences() > x ) { final String id = to_remove_ids.get( i ); - _msa.deleteRow( id ); + _msa.deleteRow( id, false ); if ( realign && isPrintMsaStatsWriteOutfileAndRealign( i ) ) { removeGapColumns(); realignWithMafft(); @@ -291,7 +291,7 @@ public class MsaCompactor { final StringBuilder msg = new StringBuilder(); final String n = _removed_seqs_out_base + "_" + _removed_seqs.size() + ".fasta"; SequenceWriter.writeSeqs( _removed_seqs, new File( n ), SEQ_FORMAT.FASTA, 100 ); - msg.append( "wrote " + _removed_seqs.size() + " removed sequences to " + n ); + msg.append( "wrote " + _removed_seqs.size() + " removed sequences to " + " to \"" + n + "\"" ); if ( _realign ) { final MsaInferrer mafft = Mafft.createInstance( _path_to_mafft ); final List opts = new ArrayList(); @@ -311,7 +311,7 @@ public class MsaCompactor { } s += suffix; writeMsa( removed_msa, s, _output_format ); - msg.append( ", and as MSA of length " + removed_msa.getLength() + " to " + s ); + msg.append( ", and as MSA of length " + removed_msa.getLength() + " to \"" + s + "\"" ); } return msg.toString(); } diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 3753c8b..df8fce2 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -6104,13 +6104,13 @@ public final class Test { l0.add( s4 ); l0.add( s5 ); final DeleteableMsa dmsa0 = DeleteableMsa.createInstance( l0 ); - dmsa0.deleteRow( "b" ); + dmsa0.deleteRow( "b", false ); if ( !dmsa0.getIdentifier( 1 ).equals( "c" ) ) { return false; } - dmsa0.deleteRow( "e" ); - dmsa0.deleteRow( "a" ); - dmsa0.deleteRow( "f" ); + dmsa0.deleteRow( "e", false ); + dmsa0.deleteRow( "a", false ); + dmsa0.deleteRow( "f", false ); if ( dmsa0.getLength() != 4 ) { return false; } @@ -6132,8 +6132,8 @@ public final class Test { if ( dmsa0.getColumnAt( 0 ).size() != 2 ) { return false; } - dmsa0.deleteRow( "c" ); - dmsa0.deleteRow( "d" ); + dmsa0.deleteRow( "c", false ); + dmsa0.deleteRow( "d", false ); if ( dmsa0.getNumberOfSequences() != 0 ) { return false; } @@ -6153,9 +6153,9 @@ public final class Test { l1.add( s_5 ); final DeleteableMsa dmsa1 = DeleteableMsa.createInstance( l1 ); dmsa1.deleteGapOnlyColumns(); - dmsa1.deleteRow( "a" ); - dmsa1.deleteRow( "f" ); - dmsa1.deleteRow( "d" ); + dmsa1.deleteRow( "a", false ); + dmsa1.deleteRow( "f", false ); + dmsa1.deleteRow( "d", false ); dmsa1.deleteGapOnlyColumns(); if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C-" ) ) { return false; @@ -6166,7 +6166,7 @@ public final class Test { if ( !dmsa1.getSequenceAsString( 2 ).toString().equals( "EAAC-" ) ) { return false; } - dmsa1.deleteRow( "c" ); + dmsa1.deleteRow( "c", false ); dmsa1.deleteGapOnlyColumns(); final Writer w0 = new StringWriter(); dmsa1.write( w0, MSA_FORMAT.FASTA ); @@ -6214,14 +6214,17 @@ public final class Test { return false; } dmsa2.deleteGapColumns( 0 ); - dmsa2.deleteRow( "a" ); - dmsa2.deleteRow( "b" ); - dmsa2.deleteRow( "f" ); - dmsa2.deleteRow( "e" ); + dmsa2.deleteRow( "a", false ); + dmsa2.deleteRow( "b", false ); + dmsa2.deleteRow( "f", false ); + dmsa2.deleteRow( "e", false ); dmsa2.setIdentifier( 0, "new_c" ); dmsa2.setIdentifier( 1, "new_d" ); dmsa2.setResidueAt( 0, 0, 'x' ); - dmsa2.deleteRow( "new_d" ); + final Sequence s = dmsa2.deleteRow( "new_d", true ); + if ( !s.getMolecularSequenceAsString().equals( "D" ) ) { + return false; + } final Writer w = new StringWriter(); dmsa2.write( w, MSA_FORMAT.PHYLIP ); final String phylip = w.toString(); -- 1.7.10.2