X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fmsa%2FBasicMsa.java;h=907d212402a501c41e0610d675a972f72a868090;hb=862ca59d36af9ccbed3ef284b497f9c04263ba97;hp=6e407f83070b7ca48b076374f41b6aff05cd07fc;hpb=48f7a89be9d34f1930a1f863e608235cc27184c5;p=jalview.git diff --git a/forester/java/src/org/forester/msa/BasicMsa.java b/forester/java/src/org/forester/msa/BasicMsa.java index 6e407f8..907d212 100644 --- a/forester/java/src/org/forester/msa/BasicMsa.java +++ b/forester/java/src/org/forester/msa/BasicMsa.java @@ -5,7 +5,7 @@ // Copyright (C) 2010 Christian M Zmasek // Copyright (C) 2010 Sanford-Burnham Medical Research Institute // All rights reserved -// +// // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either @@ -15,7 +15,7 @@ // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. -// +// // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA @@ -27,8 +27,14 @@ package org.forester.msa; import java.io.IOException; import java.io.Writer; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import org.forester.io.writers.SequenceWriter; +import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; +import org.forester.sequence.BasicSequence; import org.forester.sequence.Sequence; import org.forester.sequence.Sequence.TYPE; import org.forester.util.ForesterUtil; @@ -36,7 +42,7 @@ import org.forester.util.ForesterUtil; public class BasicMsa implements Msa { private final char[][] _data; - private final Object[] _identifiers; + private final String[] _identifiers; private final TYPE _type; public BasicMsa( final int rows, final int columns, final TYPE type ) { @@ -44,7 +50,7 @@ public class BasicMsa implements Msa { throw new IllegalArgumentException( "basic msa of size zero are illegal" ); } _data = new char[ rows ][ columns ]; - _identifiers = new Object[ rows ]; + _identifiers = new String[ rows ]; _type = type; } @@ -54,6 +60,15 @@ public class BasicMsa implements Msa { _type = msa._type; } + @Override + public List asSequenceList() { + final List seqs = new ArrayList(); + for( int i = 0; i < getNumberOfSequences(); ++i ) { + seqs.add( getSequence( i ) ); + } + return seqs; + } + private int determineMaxIdLength() { int max = 0; for( int row = 0; row < _data.length; ++row ) { @@ -66,7 +81,7 @@ public class BasicMsa implements Msa { } @Override - public Object getIdentifier( final int row ) { + public String getIdentifier( final int row ) { return _identifiers[ row ]; } @@ -86,6 +101,21 @@ public class BasicMsa implements Msa { } @Override + public Sequence getSequence( final String id ) { + for( int i = 0; i < getNumberOfSequences(); ++i ) { + if ( getIdentifier( i ).equals( id ) ) { + return getSequence( i ); + } + } + return null; + } + + @Override + public Sequence getSequence( final int row ) { + return new BasicSequence( getIdentifier( row ), _data[ row ], getType() ); + } + + @Override public StringBuffer getSequenceAsString( final int row ) { final StringBuffer sb = new StringBuffer( _data[ 0 ].length ); for( int col = 0; col < _data[ 0 ].length; ++col ) { @@ -99,10 +129,12 @@ public class BasicMsa implements Msa { return _type; } - public void setIdentifier( final int row, final Object id ) { + @Override + public void setIdentifier( final int row, final String id ) { _identifiers[ row ] = id; } + @Override public void setResidueAt( final int row, final int col, final char residue ) { _data[ row ][ col ] = residue; } @@ -121,7 +153,25 @@ public class BasicMsa implements Msa { return sb.toString(); } - public void write( final Writer w ) throws IOException { + @Override + public void write( final Writer w, final MSA_FORMAT format ) throws IOException { + switch ( format ) { + case PHYLIP: + writeToPhylip( w ); + break; + case FASTA: + writeToFasta( w ); + break; + default: + throw new RuntimeException( "unknown format " + format ); + } + } + + private void writeToFasta( final Writer w ) throws IOException { + SequenceWriter.writeSeqs( asSequenceList(), w, SEQ_FORMAT.FASTA, 100 ); + } + + private void writeToPhylip( final Writer w ) throws IOException { final int max = determineMaxIdLength() + 1; for( int row = 0; row < _data.length; ++row ) { w.write( ForesterUtil.pad( _identifiers[ row ].toString(), max, ' ', false ).toString() ); @@ -136,6 +186,7 @@ public class BasicMsa implements Msa { if ( seqs.size() < 1 ) { throw new IllegalArgumentException( "cannot create basic msa from less than one sequence" ); } + final Set ids = new HashSet(); final int length = seqs.get( 0 ).getLength(); final BasicMsa msa = new BasicMsa( seqs.size(), length, seqs.get( 0 ).getType() ); for( int row = 0; row < seqs.size(); ++row ) { @@ -146,6 +197,11 @@ public class BasicMsa implements Msa { if ( seq.getType() != msa.getType() ) { throw new IllegalArgumentException( "illegal attempt to build msa from sequences of different type" ); } + if ( ids.contains( seq.getIdentifier() ) ) { + throw new IllegalArgumentException( "illegal attempt to create msa with non-unique identifiers [" + + seq.getIdentifier() + "]" ); + } + ids.add( seq.getIdentifier() ); msa.setIdentifier( row, seq.getIdentifier() ); for( int col = 0; col < length; ++col ) { msa._data[ row ][ col ] = seq.getResidueAt( col ); @@ -153,4 +209,13 @@ public class BasicMsa implements Msa { } return msa; } + + @Override + public List getColumnAt( final int col ) { + final List column = new ArrayList(); + for( int row = 0; row < getNumberOfSequences(); ++row ) { + column.add( getResidueAt( row, col ) ); + } + return column; + } }