X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fmsa%2FBasicMsa.java;h=907d212402a501c41e0610d675a972f72a868090;hb=862ca59d36af9ccbed3ef284b497f9c04263ba97;hp=d806c9011be0ecc70d9bf22f8d160ab2d259077d;hpb=505270dd6bd8892fe00658607de06e5f030b11db;p=jalview.git diff --git a/forester/java/src/org/forester/msa/BasicMsa.java b/forester/java/src/org/forester/msa/BasicMsa.java index d806c90..907d212 100644 --- a/forester/java/src/org/forester/msa/BasicMsa.java +++ b/forester/java/src/org/forester/msa/BasicMsa.java @@ -28,8 +28,13 @@ package org.forester.msa; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import org.forester.io.writers.SequenceWriter; +import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; +import org.forester.sequence.BasicSequence; import org.forester.sequence.Sequence; import org.forester.sequence.Sequence.TYPE; import org.forester.util.ForesterUtil; @@ -37,7 +42,7 @@ import org.forester.util.ForesterUtil; public class BasicMsa implements Msa { private final char[][] _data; - private final Object[] _identifiers; + private final String[] _identifiers; private final TYPE _type; public BasicMsa( final int rows, final int columns, final TYPE type ) { @@ -45,7 +50,7 @@ public class BasicMsa implements Msa { throw new IllegalArgumentException( "basic msa of size zero are illegal" ); } _data = new char[ rows ][ columns ]; - _identifiers = new Object[ rows ]; + _identifiers = new String[ rows ]; _type = type; } @@ -55,6 +60,15 @@ public class BasicMsa implements Msa { _type = msa._type; } + @Override + public List asSequenceList() { + final List seqs = new ArrayList(); + for( int i = 0; i < getNumberOfSequences(); ++i ) { + seqs.add( getSequence( i ) ); + } + return seqs; + } + private int determineMaxIdLength() { int max = 0; for( int row = 0; row < _data.length; ++row ) { @@ -67,7 +81,7 @@ public class BasicMsa implements Msa { } @Override - public Object getIdentifier( final int row ) { + public String getIdentifier( final int row ) { return _identifiers[ row ]; } @@ -87,6 +101,21 @@ public class BasicMsa implements Msa { } @Override + public Sequence getSequence( final String id ) { + for( int i = 0; i < getNumberOfSequences(); ++i ) { + if ( getIdentifier( i ).equals( id ) ) { + return getSequence( i ); + } + } + return null; + } + + @Override + public Sequence getSequence( final int row ) { + return new BasicSequence( getIdentifier( row ), _data[ row ], getType() ); + } + + @Override public StringBuffer getSequenceAsString( final int row ) { final StringBuffer sb = new StringBuffer( _data[ 0 ].length ); for( int col = 0; col < _data[ 0 ].length; ++col ) { @@ -101,7 +130,7 @@ public class BasicMsa implements Msa { } @Override - public void setIdentifier( final int row, final Object id ) { + public void setIdentifier( final int row, final String id ) { _identifiers[ row ] = id; } @@ -125,7 +154,24 @@ public class BasicMsa implements Msa { } @Override - public void write( final Writer w ) throws IOException { + public void write( final Writer w, final MSA_FORMAT format ) throws IOException { + switch ( format ) { + case PHYLIP: + writeToPhylip( w ); + break; + case FASTA: + writeToFasta( w ); + break; + default: + throw new RuntimeException( "unknown format " + format ); + } + } + + private void writeToFasta( final Writer w ) throws IOException { + SequenceWriter.writeSeqs( asSequenceList(), w, SEQ_FORMAT.FASTA, 100 ); + } + + private void writeToPhylip( final Writer w ) throws IOException { final int max = determineMaxIdLength() + 1; for( int row = 0; row < _data.length; ++row ) { w.write( ForesterUtil.pad( _identifiers[ row ].toString(), max, ' ', false ).toString() ); @@ -140,6 +186,7 @@ public class BasicMsa implements Msa { if ( seqs.size() < 1 ) { throw new IllegalArgumentException( "cannot create basic msa from less than one sequence" ); } + final Set ids = new HashSet(); final int length = seqs.get( 0 ).getLength(); final BasicMsa msa = new BasicMsa( seqs.size(), length, seqs.get( 0 ).getType() ); for( int row = 0; row < seqs.size(); ++row ) { @@ -150,6 +197,11 @@ public class BasicMsa implements Msa { if ( seq.getType() != msa.getType() ) { throw new IllegalArgumentException( "illegal attempt to build msa from sequences of different type" ); } + if ( ids.contains( seq.getIdentifier() ) ) { + throw new IllegalArgumentException( "illegal attempt to create msa with non-unique identifiers [" + + seq.getIdentifier() + "]" ); + } + ids.add( seq.getIdentifier() ); msa.setIdentifier( row, seq.getIdentifier() ); for( int col = 0; col < length; ++col ) { msa._data[ row ][ col ] = seq.getResidueAt( col );