}
}
- final public Sequence deleteRow( final String id ) {
+ final public Sequence deleteRow( final String id, final boolean return_removed_seq ) {
int row = -1;
for( int r = 0; r < getNumberOfSequences(); ++r ) {
if ( getIdentifier( r ).equals( id ) ) {
if ( row < 0 ) {
throw new IllegalArgumentException( "id [" + id + "] not found" );
}
- final Sequence s = getSequence( row );
+ Sequence s = null;
+ StringBuilder sb = null;
+ if ( return_removed_seq ) {
+ s = getSequence( row );
+ final char[] x = s.getMolecularSequence();
+ sb = new StringBuilder( x.length );
+ for( int i = 0; i < x.length; ++i ) {
+ if ( x[ i ] != Sequence.GAP ) {
+ sb.append( x[ i ] );
+ }
+ }
+ }
deleteRow( row );
- return s;
+ if ( return_removed_seq ) {
+ return new BasicSequence( new String( s.getIdentifier() ), sb.toString(), s.getType() );
+ }
+ else {
+ return null;
+ }
}
@Override
while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) {
final String id = to_remove_ids.get( i );
_removed_seq_ids.add( id );
- final Sequence deleted = _msa.deleteRow( id );
+ final Sequence deleted = _msa.deleteRow( id, true );
_removed_seqs.add( deleted );
removeGapColumns();
if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) )
while ( _msa.getLength() > length ) {
final String id = to_remove_ids.get( i );
_removed_seq_ids.add( id );
- final Sequence deleted = _msa.deleteRow( id );
+ final Sequence deleted = _msa.deleteRow( id, true );
_removed_seqs.add( deleted );
removeGapColumns();
if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) || ( _msa.getLength() <= length ) ) {
for( int i = 0; i < to_remove_ids.size(); ++i ) {
final String id = to_remove_ids.get( i );
_removed_seq_ids.add( id );
- final Sequence deleted = _msa.deleteRow( id );
+ final Sequence deleted = _msa.deleteRow( id, true );
_removed_seqs.add( deleted );
removeGapColumns();
if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( i == ( to_remove_ids.size() - 1 ) ) ) {
System.out.println();
while ( _msa.getNumberOfSequences() > x ) {
final String id = to_remove_ids.get( i );
- _msa.deleteRow( id );
+ _msa.deleteRow( id, false );
if ( realign && isPrintMsaStatsWriteOutfileAndRealign( i ) ) {
removeGapColumns();
realignWithMafft();
final StringBuilder msg = new StringBuilder();
final String n = _removed_seqs_out_base + "_" + _removed_seqs.size() + ".fasta";
SequenceWriter.writeSeqs( _removed_seqs, new File( n ), SEQ_FORMAT.FASTA, 100 );
- msg.append( "wrote " + _removed_seqs.size() + " removed sequences to " + n );
+ msg.append( "wrote " + _removed_seqs.size() + " removed sequences to " + " to \"" + n + "\"" );
if ( _realign ) {
final MsaInferrer mafft = Mafft.createInstance( _path_to_mafft );
final List<String> opts = new ArrayList<String>();
}
s += suffix;
writeMsa( removed_msa, s, _output_format );
- msg.append( ", and as MSA of length " + removed_msa.getLength() + " to " + s );
+ msg.append( ", and as MSA of length " + removed_msa.getLength() + " to \"" + s + "\"" );
}
return msg.toString();
}
l0.add( s4 );
l0.add( s5 );
final DeleteableMsa dmsa0 = DeleteableMsa.createInstance( l0 );
- dmsa0.deleteRow( "b" );
+ dmsa0.deleteRow( "b", false );
if ( !dmsa0.getIdentifier( 1 ).equals( "c" ) ) {
return false;
}
- dmsa0.deleteRow( "e" );
- dmsa0.deleteRow( "a" );
- dmsa0.deleteRow( "f" );
+ dmsa0.deleteRow( "e", false );
+ dmsa0.deleteRow( "a", false );
+ dmsa0.deleteRow( "f", false );
if ( dmsa0.getLength() != 4 ) {
return false;
}
if ( dmsa0.getColumnAt( 0 ).size() != 2 ) {
return false;
}
- dmsa0.deleteRow( "c" );
- dmsa0.deleteRow( "d" );
+ dmsa0.deleteRow( "c", false );
+ dmsa0.deleteRow( "d", false );
if ( dmsa0.getNumberOfSequences() != 0 ) {
return false;
}
l1.add( s_5 );
final DeleteableMsa dmsa1 = DeleteableMsa.createInstance( l1 );
dmsa1.deleteGapOnlyColumns();
- dmsa1.deleteRow( "a" );
- dmsa1.deleteRow( "f" );
- dmsa1.deleteRow( "d" );
+ dmsa1.deleteRow( "a", false );
+ dmsa1.deleteRow( "f", false );
+ dmsa1.deleteRow( "d", false );
dmsa1.deleteGapOnlyColumns();
if ( !dmsa1.getSequenceAsString( 0 ).toString().equals( "B--C-" ) ) {
return false;
if ( !dmsa1.getSequenceAsString( 2 ).toString().equals( "EAAC-" ) ) {
return false;
}
- dmsa1.deleteRow( "c" );
+ dmsa1.deleteRow( "c", false );
dmsa1.deleteGapOnlyColumns();
final Writer w0 = new StringWriter();
dmsa1.write( w0, MSA_FORMAT.FASTA );
return false;
}
dmsa2.deleteGapColumns( 0 );
- dmsa2.deleteRow( "a" );
- dmsa2.deleteRow( "b" );
- dmsa2.deleteRow( "f" );
- dmsa2.deleteRow( "e" );
+ dmsa2.deleteRow( "a", false );
+ dmsa2.deleteRow( "b", false );
+ dmsa2.deleteRow( "f", false );
+ dmsa2.deleteRow( "e", false );
dmsa2.setIdentifier( 0, "new_c" );
dmsa2.setIdentifier( 1, "new_d" );
dmsa2.setResidueAt( 0, 0, 'x' );
- dmsa2.deleteRow( "new_d" );
+ final Sequence s = dmsa2.deleteRow( "new_d", true );
+ if ( !s.getMolecularSequenceAsString().equals( "D" ) ) {
+ return false;
+ }
final Writer w = new StringWriter();
dmsa2.write( w, MSA_FORMAT.PHYLIP );
final String phylip = w.toString();