X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fmsa%2FBasicMsa.java;h=02da257664a0817e2a58a9a5a094e847a0dd8a99;hb=2320da44a171273ec44c0adcaf991687a4780b6e;hp=d913531043ced4e5ddff2baf79e65307bc8e96ad;hpb=eb38568d7687f10934bef10881474845946777dc;p=jalview.git diff --git a/forester/java/src/org/forester/msa/BasicMsa.java b/forester/java/src/org/forester/msa/BasicMsa.java index d913531..02da257 100644 --- a/forester/java/src/org/forester/msa/BasicMsa.java +++ b/forester/java/src/org/forester/msa/BasicMsa.java @@ -36,8 +36,8 @@ import java.util.Set; import org.forester.io.writers.SequenceWriter; import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.sequence.BasicSequence; -import org.forester.sequence.Sequence; -import org.forester.sequence.Sequence.TYPE; +import org.forester.sequence.MolecularSequence; +import org.forester.sequence.MolecularSequence.TYPE; import org.forester.util.ForesterUtil; public class BasicMsa implements Msa { @@ -65,8 +65,8 @@ public class BasicMsa implements Msa { } @Override - public List asSequenceList() { - final List seqs = new ArrayList(); + public List asSequenceList() { + final List seqs = new ArrayList(); for( int i = 0; i < getNumberOfSequences(); ++i ) { seqs.add( getSequence( i ) ); } @@ -103,12 +103,12 @@ public class BasicMsa implements Msa { } @Override - public Sequence getSequence( final int row ) { + public MolecularSequence getSequence( final int row ) { return new BasicSequence( getIdentifier( row ), _data[ row ], getType() ); } @Override - public Sequence getSequence( final String id ) { + public MolecularSequence getSequence( final String id ) { for( int i = 0; i < getNumberOfSequences(); ++i ) { if ( getIdentifier( i ).equals( id ) ) { return getSequence( i ); @@ -133,7 +133,7 @@ public class BasicMsa implements Msa { @Override public boolean isGapAt( final int row, final int col ) { - return getResidueAt( row, col ) == Sequence.GAP; + return getResidueAt( row, col ) == MolecularSequence.GAP; } @Override @@ -143,7 +143,7 @@ public class BasicMsa implements Msa { } if ( _identifiers_set.contains( id ) ) { throw new IllegalArgumentException( "illegal attempt to create msa with non-unique identifiers [" + id - + "]" ); + + "]" ); } _identifiers_set.add( id ); _identifiers[ row ] = id; @@ -175,6 +175,9 @@ public class BasicMsa implements Msa { case FASTA: writeToFasta( w ); break; + case NEXUS: + writeToNexus( w ); + break; default: throw new RuntimeException( "unknown format " + format ); } @@ -195,10 +198,39 @@ public class BasicMsa implements Msa { SequenceWriter.writeSeqs( asSequenceList(), w, SEQ_FORMAT.FASTA, 100 ); } + private void writeToNexus( final Writer w ) throws IOException { + final int max = determineMaxIdLength() + 1; + w.write( "Begin Data;" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + w.write( " Dimensions NTax=" + getNumberOfSequences() ); + w.write( " NChar=" + getLength() ); + w.write( ";" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + w.write( " Format DataType=Protein Interleave=No gap=-;" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + w.write( " Matrix" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + for( int row = 0; row < getNumberOfSequences(); ++row ) { + final MolecularSequence seq = getSequence( row ); + final String s = seq.getMolecularSequenceAsString(); + w.write( " " ); + w.write( ForesterUtil.pad( getIdentifier( row ).replace( ' ', '_' ), max, ' ', false ).toString() ); + w.write( " " ); + w.write( s ); + w.write( ForesterUtil.LINE_SEPARATOR ); + } + w.write( " ;" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + w.write( "End;" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + } + private void writeToPhylip( final Writer w ) throws IOException { final int max = determineMaxIdLength() + 1; + w.write( getNumberOfSequences() + " " + getLength() ); + w.write( ForesterUtil.LINE_SEPARATOR ); for( int row = 0; row < getNumberOfSequences(); ++row ) { - w.write( ForesterUtil.pad( getIdentifier( row ), max, ' ', false ).toString() ); + w.write( ForesterUtil.pad( getIdentifier( row ).replace( ' ', '_' ), max, ' ', false ).toString() ); for( int col = 0; col < getLength(); ++col ) { w.write( getResidueAt( row, col ) ); } @@ -206,14 +238,14 @@ public class BasicMsa implements Msa { } } - public static Msa createInstance( final List seqs ) { + public static Msa createInstance( final List seqs ) { if ( seqs.size() < 1 ) { throw new IllegalArgumentException( "cannot create msa from less than one sequence" ); } final int length = seqs.get( 0 ).getLength(); final BasicMsa msa = new BasicMsa( seqs.size(), length, seqs.get( 0 ).getType() ); for( int row = 0; row < seqs.size(); ++row ) { - final Sequence seq = seqs.get( row ); + final MolecularSequence seq = seqs.get( row ); if ( seq.getLength() != length ) { throw new IllegalArgumentException( "illegal attempt to build msa from sequences of unequal length [" + seq.getIdentifier() + "]" );