2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.evoinference.matrix.character;
28 import java.io.IOException;
29 import java.io.Writer;
30 import java.util.HashMap;
31 import java.util.List;
34 import org.forester.io.parsers.nexus.NexusConstants;
35 import org.forester.util.ForesterUtil;
36 import org.forester.util.IllegalFormatUseException;
38 public class BasicCharacterStateMatrix<S> implements CharacterStateMatrix<S> {
40 final Object[][] _states;
41 final String[] _identifiers;
42 final String[] _characters;
43 final Map<String, Integer> _identifier_index_map;
44 final Map<String, Integer> _character_index_map;
46 public BasicCharacterStateMatrix( final int number_of_identifiers, final int number_of_characters ) {
47 _states = new Object[ number_of_identifiers ][ number_of_characters ];
48 _identifiers = new String[ number_of_identifiers ];
49 _characters = new String[ number_of_characters ];
50 _identifier_index_map = new HashMap<String, Integer>( number_of_identifiers );
51 _character_index_map = new HashMap<String, Integer>( number_of_characters );
54 public BasicCharacterStateMatrix( final int number_of_identifiers,
55 final int number_of_characters,
56 final S default_state ) {
57 this( number_of_identifiers, number_of_identifiers );
58 for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) {
59 for( int character = 0; character < number_of_characters; ++character ) {
60 setState( identifier, character, default_state );
65 public BasicCharacterStateMatrix( final List<List<S>> states ) {
66 if ( ( states == null ) || ( states.size() < 1 ) || ( states.get( 0 ) == null ) ) {
67 throw new IllegalArgumentException( "attempt to create character state matrix from empty list" );
69 final int number_of_characters = states.get( 0 ).size();
70 final int number_of_identifiers = states.size();
71 _states = new Object[ number_of_identifiers ][ number_of_characters ];
72 _identifiers = new String[ number_of_identifiers ];
73 _characters = new String[ number_of_characters ];
74 _identifier_index_map = new HashMap<String, Integer>( number_of_identifiers );
75 _character_index_map = new HashMap<String, Integer>( number_of_characters );
76 for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) {
77 for( int character = 0; character < number_of_characters; ++character ) {
78 setState( identifier, character, states.get( identifier ).get( character ) );
83 public BasicCharacterStateMatrix( final S[][] states ) {
84 this( states.length, states[ 0 ].length );
85 for( int identifier = 0; identifier < states.length; ++identifier ) {
86 for( int character = 0; character < states[ 0 ].length; ++character ) {
87 setState( identifier, character, states[ identifier ][ character ] );
93 public boolean containsCharacter( final String character ) {
94 return _character_index_map.containsKey( character );
98 public boolean containsIdentifier( final String identifier ) {
99 return _identifier_index_map.containsKey( identifier );
102 public CharacterStateMatrix<S> copy() {
103 final CharacterStateMatrix<S> new_matrix = new BasicCharacterStateMatrix<S>( getNumberOfIdentifiers(),
104 getNumberOfCharacters() );
105 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
106 if ( getCharacter( character ) != null ) {
107 new_matrix.setCharacter( character, getCharacter( character ) );
110 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
111 if ( getIdentifier( identifier ) != null ) {
112 new_matrix.setIdentifier( identifier, getIdentifier( identifier ) );
114 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
115 new_matrix.setState( identifier, character, getState( identifier, character ) );
122 public boolean equals( final Object o ) {
126 else if ( o == null ) {
127 throw new IllegalArgumentException( "attempt to check character state matrix equality to null" );
129 else if ( o.getClass() != this.getClass() ) {
130 throw new IllegalArgumentException( "attempt to check character state matrix to " + o + " [" + o.getClass()
134 final CharacterStateMatrix<S> other = ( CharacterStateMatrix<S> ) o;
135 if ( ( getNumberOfIdentifiers() != other.getNumberOfIdentifiers() )
136 || ( getNumberOfCharacters() != other.getNumberOfCharacters() ) ) {
138 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
139 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
140 final S s = getState( identifier, character );
141 final S os = other.getState( identifier, character );
145 else if ( ( s == null ) && ( os != null ) ) {
148 else if ( ( s != null ) && ( os == null ) ) {
151 else if ( !s.equals( other.getState( identifier, character ) ) ) {
161 public String getCharacter( final int character_index ) {
162 return _characters[ character_index ];
166 public int getCharacterIndex( final String character ) {
167 if ( !_character_index_map.containsKey( character ) ) {
168 throw new IllegalArgumentException( "character [" + character + "] not found" );
170 return _character_index_map.get( character );
174 public String getIdentifier( final int identifier_index ) {
175 return _identifiers[ identifier_index ];
179 public int getIdentifierIndex( final String identifier ) {
180 if ( !_identifier_index_map.containsKey( identifier ) ) {
181 throw new IllegalArgumentException( "indentifier [" + identifier + "] not found" );
183 return _identifier_index_map.get( identifier );
186 private int getLengthOfLongestState() {
188 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
189 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
190 final S s = getState( identifier, character );
192 final int l = getState( identifier, character ).toString().length();
203 public int getNumberOfCharacters() {
205 return _states[ 0 ].length;
213 public int getNumberOfIdentifiers() {
214 return _states.length;
218 public S getState( final int identifier_index, final int character_index ) {
219 return ( S ) _states[ identifier_index ][ character_index ];
223 public S getState( final String identifier, final int character_index ) {
224 if ( !containsIdentifier( identifier ) ) {
225 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
227 return getState( _identifier_index_map.get( identifier ), character_index );
231 public S getState( final String identifier, final String character ) {
232 if ( !containsIdentifier( identifier ) ) {
233 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
235 if ( !containsCharacter( character ) ) {
236 throw new IllegalArgumentException( "character [" + character + "] not found" );
238 return getState( _identifier_index_map.get( identifier ), _character_index_map.get( character ) );
242 public boolean isEmpty() {
243 return getNumberOfIdentifiers() <= 0;
246 public CharacterStateMatrix<S> pivot() {
247 final CharacterStateMatrix<S> new_matrix = new BasicCharacterStateMatrix<S>( getNumberOfCharacters(),
248 getNumberOfIdentifiers() );
249 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
250 if ( getCharacter( character ) != null ) {
251 new_matrix.setIdentifier( character, getCharacter( character ) );
254 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
255 if ( getIdentifier( identifier ) != null ) {
256 new_matrix.setCharacter( identifier, getIdentifier( identifier ) );
258 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
259 new_matrix.setState( character, identifier, getState( identifier, character ) );
266 public void setCharacter( final int character_index, final String character ) {
267 if ( character == null ) {
268 throw new IllegalArgumentException( "attempt to use null character" );
270 _characters[ character_index ] = character;
271 if ( _character_index_map.containsKey( character ) ) {
272 throw new IllegalArgumentException( "character [" + character + "] is not unique" );
274 _character_index_map.put( character, character_index );
278 public void setIdentifier( final int identifier_index, final String identifier ) {
279 if ( identifier == null ) {
280 throw new IllegalArgumentException( "attempt to use null identifier" );
282 _identifiers[ identifier_index ] = identifier;
283 if ( _identifier_index_map.containsKey( identifier ) ) {
284 throw new IllegalArgumentException( "identifier [" + identifier + "] is not unique" );
286 _identifier_index_map.put( identifier, identifier_index );
290 public void setState( final int identifier_index, final int character_index, final S state ) {
291 _states[ identifier_index ][ character_index ] = state;
295 public void setState( final String identifier, final int character_index, final S state ) {
296 if ( !_identifier_index_map.containsKey( identifier ) ) {
297 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
299 setState( _identifier_index_map.get( identifier ), character_index, state );
303 public void setState( final String identifier, final String character, final S state ) {
304 if ( !containsIdentifier( identifier ) ) {
305 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
307 if ( !containsCharacter( character ) ) {
308 throw new IllegalArgumentException( "character [" + character + "] not found" );
310 setState( _identifier_index_map.get( identifier ), _character_index_map.get( character ), state );
313 private void toForester( final Writer writer ) throws IOException {
314 final int longest = getLengthOfLongestState() + 5;
315 writer.write( "Identifiers: " );
316 writer.write( String.valueOf( getNumberOfIdentifiers() ) );
317 writer.write( ForesterUtil.LINE_SEPARATOR );
318 writer.write( "Characters : " );
319 writer.write( String.valueOf( getNumberOfCharacters() ) );
320 writer.write( ForesterUtil.LINE_SEPARATOR );
321 writer.write( ForesterUtil.pad( "", 20, ' ', false ).toString() );
323 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
324 final String c = getCharacter( character );
325 writer.write( c != null ? ForesterUtil.pad( c, longest, ' ', false ).toString() : ForesterUtil
326 .pad( "", longest, ' ', false ).toString() );
327 if ( character < getNumberOfCharacters() - 1 ) {
331 writer.write( ForesterUtil.LINE_SEPARATOR );
332 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
333 if ( getIdentifier( identifier ) != null ) {
334 writer.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() );
337 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
338 final S state = getState( identifier, character );
339 writer.write( state != null ? ForesterUtil.pad( state.toString(), longest, ' ', false ).toString()
340 : ForesterUtil.pad( "", longest, ' ', false ).toString() );
341 if ( character < getNumberOfCharacters() - 1 ) {
345 if ( identifier < getNumberOfIdentifiers() - 1 ) {
346 writer.write( ForesterUtil.LINE_SEPARATOR );
351 private void toNexus( final Writer writer ) throws IOException {
355 writer.write( NexusConstants.NEXUS );
356 writer.write( ForesterUtil.LINE_SEPARATOR );
357 writeNexusTaxaBlock( writer );
358 writeNexusBinaryChractersBlock( writer );
361 private void toPhylip( final Writer writer ) throws IOException {
367 writer.write( getNumberOfIdentifiers() );
369 writer.write( getNumberOfCharacters() );
370 writer.write( ForesterUtil.LINE_SEPARATOR );
371 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
372 if ( !ForesterUtil.isEmpty( getIdentifier( identifier ) ) ) {
373 writer.write( ForesterUtil.pad( getIdentifier( identifier ), pad, ' ', false ).toString() );
378 throw new IllegalFormatUseException( "Phylip format does not allow empty identifiers" );
381 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
382 final String state = getState( identifier, character ).toString();
383 writer.write( state != null ? ForesterUtil.pad( state, pad, ' ', false ).toString() : ForesterUtil
384 .pad( "", pad, ' ', false ).toString() );
385 if ( character < getNumberOfCharacters() - 1 ) {
390 if ( identifier < getNumberOfIdentifiers() - 1 ) {
391 writer.write( ForesterUtil.LINE_SEPARATOR );
397 //to format for microarray-style clustering
398 // states are ints in this case
400 public void toWriter( final Writer writer ) throws IOException {
401 toForester( writer );
405 public void toWriter( final Writer writer, final Format format ) throws IOException {
411 toForester( writer );
417 throw new IllegalArgumentException( "Unknown format:" + format );
421 public void writeNexusBinaryChractersBlock( final Writer w ) throws IOException {
423 // DIMENSIONS NCHAR=x;
424 //BEGIN CHARSTATELABELS
428 // FORMAT DATATYPE=STANDARD SYMBOLS=;
434 w.write( NexusConstants.BEGIN_CHARACTERS );
435 w.write( ForesterUtil.LINE_SEPARATOR );
437 w.write( NexusConstants.DIMENSIONS );
439 w.write( NexusConstants.NCHAR );
441 w.write( String.valueOf( getNumberOfCharacters() ) );
443 w.write( ForesterUtil.LINE_SEPARATOR );
444 writeNexusCharstatelabels( w );
446 w.write( NexusConstants.FORMAT );
448 w.write( NexusConstants.DATATYPE );
450 w.write( NexusConstants.STANDARD );
452 w.write( NexusConstants.SYMBOLS );
454 w.write( String.valueOf( BinaryStates.ABSENT ) );
455 w.write( String.valueOf( BinaryStates.PRESENT ) );
457 w.write( ForesterUtil.LINE_SEPARATOR );
458 writeNexusMatrix( w );
459 w.write( ForesterUtil.LINE_SEPARATOR );
460 w.write( NexusConstants.END );
461 w.write( ForesterUtil.LINE_SEPARATOR );
464 public void writeNexusCharstatelabels( final Writer w ) throws IOException {
466 w.write( NexusConstants.CHARSTATELABELS );
467 w.write( ForesterUtil.LINE_SEPARATOR );
468 for( int i = 0; i < getNumberOfCharacters(); ++i ) {
469 w.write( " " + ( i + 1 ) + " '" );
470 w.write( getCharacter( i ) );
472 if ( i < getNumberOfCharacters() - 1 ) {
474 w.write( ForesterUtil.LINE_SEPARATOR );
478 w.write( ForesterUtil.LINE_SEPARATOR );
481 public void writeNexusMatrix( final Writer w ) throws IOException {
483 w.write( NexusConstants.MATRIX );
484 w.write( ForesterUtil.LINE_SEPARATOR );
485 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
486 if ( getIdentifier( identifier ) != null ) {
488 w.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() );
491 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
492 final S state = getState( identifier, character );
493 if ( state == null ) {
494 throw new IllegalFormatUseException( "character state matrix cannot contain null if to be represented in nexus format" );
496 if ( !( state instanceof BinaryStates ) ) {
497 throw new IllegalFormatUseException( "nexus format representation expects binary character data - got ["
498 + getState( 0, 0 ).getClass() + "] instead" );
500 if ( state == BinaryStates.UNKNOWN ) {
501 throw new IllegalFormatUseException( "character state matrix cannot contain unknown states if to be represented in nexus format" );
503 w.write( state.toString() );
505 if ( identifier < getNumberOfIdentifiers() - 1 ) {
506 w.write( ForesterUtil.LINE_SEPARATOR );
512 public void writeNexusTaxaBlock( final Writer w ) throws IOException {
514 // DIMENSIONS NTAX=n;
515 // TAXLABELS fish frog snake;
517 w.write( NexusConstants.BEGIN_TAXA );
518 w.write( ForesterUtil.LINE_SEPARATOR );
520 w.write( NexusConstants.DIMENSIONS );
522 w.write( NexusConstants.NTAX );
524 w.write( String.valueOf( getNumberOfIdentifiers() ) );
526 w.write( ForesterUtil.LINE_SEPARATOR );
528 w.write( NexusConstants.TAXLABELS );
529 for( int i = 0; i < getNumberOfIdentifiers(); ++i ) {
531 w.write( getIdentifier( i ) );
534 w.write( ForesterUtil.LINE_SEPARATOR );
535 w.write( NexusConstants.END );
536 w.write( ForesterUtil.LINE_SEPARATOR );