2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.evoinference.matrix.character;
28 import java.io.IOException;
29 import java.io.StringWriter;
30 import java.io.Writer;
31 import java.util.HashMap;
32 import java.util.List;
35 import org.forester.io.parsers.nexus.NexusConstants;
36 import org.forester.util.ForesterUtil;
37 import org.forester.util.IllegalFormatUseException;
39 public class BasicCharacterStateMatrix<S> implements CharacterStateMatrix<S> {
41 final Object[][] _states;
42 final String[] _identifiers;
43 final String[] _characters;
44 final Map<String, Integer> _identifier_index_map;
45 final Map<String, Integer> _character_index_map;
47 public BasicCharacterStateMatrix( final int number_of_identifiers, final int number_of_characters ) {
48 _states = new Object[ number_of_identifiers ][ number_of_characters ];
49 _identifiers = new String[ number_of_identifiers ];
50 _characters = new String[ number_of_characters ];
51 _identifier_index_map = new HashMap<String, Integer>( number_of_identifiers );
52 _character_index_map = new HashMap<String, Integer>( number_of_characters );
55 public BasicCharacterStateMatrix( final int number_of_identifiers,
56 final int number_of_characters,
57 final S default_state ) {
58 this( number_of_identifiers, number_of_identifiers );
59 for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) {
60 for( int character = 0; character < number_of_characters; ++character ) {
61 setState( identifier, character, default_state );
66 public BasicCharacterStateMatrix( final List<List<S>> states ) {
67 if ( ( states == null ) || ( states.size() < 1 ) || ( states.get( 0 ) == null ) ) {
68 throw new IllegalArgumentException( "attempt to create character state matrix from empty list" );
70 final int number_of_characters = states.get( 0 ).size();
71 final int number_of_identifiers = states.size();
72 _states = new Object[ number_of_identifiers ][ number_of_characters ];
73 _identifiers = new String[ number_of_identifiers ];
74 _characters = new String[ number_of_characters ];
75 _identifier_index_map = new HashMap<String, Integer>( number_of_identifiers );
76 _character_index_map = new HashMap<String, Integer>( number_of_characters );
77 for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) {
78 for( int character = 0; character < number_of_characters; ++character ) {
79 setState( identifier, character, states.get( identifier ).get( character ) );
84 public BasicCharacterStateMatrix( final S[][] states ) {
85 this( states.length, states[ 0 ].length );
86 for( int identifier = 0; identifier < states.length; ++identifier ) {
87 for( int character = 0; character < states[ 0 ].length; ++character ) {
88 setState( identifier, character, states[ identifier ][ character ] );
94 public boolean containsCharacter( final String character ) {
95 return _character_index_map.containsKey( character );
99 public boolean containsIdentifier( final String identifier ) {
100 return _identifier_index_map.containsKey( identifier );
104 public CharacterStateMatrix<S> copy() {
105 final CharacterStateMatrix<S> new_matrix = new BasicCharacterStateMatrix<S>( getNumberOfIdentifiers(),
106 getNumberOfCharacters() );
107 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
108 if ( getCharacter( character ) != null ) {
109 new_matrix.setCharacter( character, getCharacter( character ) );
112 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
113 if ( getIdentifier( identifier ) != null ) {
114 new_matrix.setIdentifier( identifier, getIdentifier( identifier ) );
116 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
117 new_matrix.setState( identifier, character, getState( identifier, character ) );
124 @SuppressWarnings("unchecked")
125 public boolean equals( final Object o ) {
129 else if ( o == null ) {
130 throw new IllegalArgumentException( "attempt to check character state matrix equality to null" );
132 else if ( o.getClass() != this.getClass() ) {
133 throw new IllegalArgumentException( "attempt to check character state matrix to " + o + " [" + o.getClass()
137 final CharacterStateMatrix<S> other = ( CharacterStateMatrix<S> ) o;
138 if ( ( getNumberOfIdentifiers() != other.getNumberOfIdentifiers() )
139 || ( getNumberOfCharacters() != other.getNumberOfCharacters() ) ) {
141 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
142 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
143 final S s = getState( identifier, character );
144 final S os = other.getState( identifier, character );
148 else if ( ( s == null ) && ( os != null ) ) {
151 else if ( ( s != null ) && ( os == null ) ) {
154 else if ( !s.equals( other.getState( identifier, character ) ) ) {
164 public String getCharacter( final int character_index ) {
165 return _characters[ character_index ];
169 public int getCharacterIndex( final String character ) {
170 if ( !_character_index_map.containsKey( character ) ) {
171 throw new IllegalArgumentException( "character [" + character + "] not found" );
173 return _character_index_map.get( character );
177 public String getIdentifier( final int identifier_index ) {
178 return _identifiers[ identifier_index ];
182 public int getIdentifierIndex( final String identifier ) {
183 if ( !_identifier_index_map.containsKey( identifier ) ) {
184 throw new IllegalArgumentException( "indentifier [" + identifier + "] not found" );
186 return _identifier_index_map.get( identifier );
189 private int getLengthOfLongestState() {
191 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
192 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
193 final S s = getState( identifier, character );
195 final int l = getState( identifier, character ).toString().length();
206 public int getNumberOfCharacters() {
208 return _states[ 0 ].length;
216 public int getNumberOfIdentifiers() {
217 return _states.length;
221 @SuppressWarnings("unchecked")
222 public S getState( final int identifier_index, final int character_index ) {
223 return ( S ) _states[ identifier_index ][ character_index ];
227 public S getState( final String identifier, final int character_index ) {
228 if ( !containsIdentifier( identifier ) ) {
229 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
231 return getState( _identifier_index_map.get( identifier ), character_index );
235 public S getState( final String identifier, final String character ) {
236 if ( !containsIdentifier( identifier ) ) {
237 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
239 if ( !containsCharacter( character ) ) {
240 throw new IllegalArgumentException( "character [" + character + "] not found" );
242 return getState( _identifier_index_map.get( identifier ), _character_index_map.get( character ) );
246 public boolean isEmpty() {
247 return getNumberOfIdentifiers() <= 0;
251 public CharacterStateMatrix<S> pivot() {
252 final CharacterStateMatrix<S> new_matrix = new BasicCharacterStateMatrix<S>( getNumberOfCharacters(),
253 getNumberOfIdentifiers() );
254 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
255 if ( getCharacter( character ) != null ) {
256 new_matrix.setIdentifier( character, getCharacter( character ) );
259 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
260 if ( getIdentifier( identifier ) != null ) {
261 new_matrix.setCharacter( identifier, getIdentifier( identifier ) );
263 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
264 new_matrix.setState( character, identifier, getState( identifier, character ) );
271 public void setCharacter( final int character_index, final String character ) {
272 if ( character == null ) {
273 throw new IllegalArgumentException( "attempt to use null character" );
275 _characters[ character_index ] = character;
276 if ( _character_index_map.containsKey( character ) ) {
277 throw new IllegalArgumentException( "character [" + character + "] is not unique" );
279 _character_index_map.put( character, character_index );
283 public void setIdentifier( final int identifier_index, final String identifier ) {
284 if ( identifier == null ) {
285 throw new IllegalArgumentException( "attempt to use null identifier" );
287 _identifiers[ identifier_index ] = identifier;
288 if ( _identifier_index_map.containsKey( identifier ) ) {
289 throw new IllegalArgumentException( "identifier [" + identifier + "] is not unique" );
291 _identifier_index_map.put( identifier, identifier_index );
295 public void setState( final int identifier_index, final int character_index, final S state ) {
296 _states[ identifier_index ][ character_index ] = state;
300 public void setState( final String identifier, final int character_index, final S state ) {
301 if ( !_identifier_index_map.containsKey( identifier ) ) {
302 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
304 setState( _identifier_index_map.get( identifier ), character_index, state );
308 public void setState( final String identifier, final String character, final S state ) {
309 if ( !containsIdentifier( identifier ) ) {
310 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
312 if ( !containsCharacter( character ) ) {
313 throw new IllegalArgumentException( "character [" + character + "] not found" );
315 setState( _identifier_index_map.get( identifier ), _character_index_map.get( character ), state );
319 public String toString() {
320 StringWriter w = new StringWriter();
324 catch ( IOException e ) {
325 // TODO Auto-generated catch block
331 private void toForester( final Writer writer ) throws IOException {
332 final int longest = getLengthOfLongestState() + 5;
333 writer.write( "Identifiers: " );
334 writer.write( String.valueOf( getNumberOfIdentifiers() ) );
335 writer.write( ForesterUtil.LINE_SEPARATOR );
336 writer.write( "Characters : " );
337 writer.write( String.valueOf( getNumberOfCharacters() ) );
338 writer.write( ForesterUtil.LINE_SEPARATOR );
339 writer.write( ForesterUtil.pad( "", 20, ' ', false ).toString() );
341 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
342 final String c = getCharacter( character );
343 writer.write( c != null ? ForesterUtil.pad( c, longest, ' ', false ).toString() : ForesterUtil
344 .pad( "", longest, ' ', false ).toString() );
345 if ( character < ( getNumberOfCharacters() - 1 ) ) {
349 writer.write( ForesterUtil.LINE_SEPARATOR );
350 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
351 if ( getIdentifier( identifier ) != null ) {
352 writer.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() );
355 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
356 final S state = getState( identifier, character );
357 writer.write( state != null ? ForesterUtil.pad( state.toString(), longest, ' ', false ).toString()
358 : ForesterUtil.pad( "", longest, ' ', false ).toString() );
359 if ( character < ( getNumberOfCharacters() - 1 ) ) {
363 if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) {
364 writer.write( ForesterUtil.LINE_SEPARATOR );
369 private void toNexus( final Writer writer ) throws IOException {
373 writer.write( NexusConstants.NEXUS );
374 writer.write( ForesterUtil.LINE_SEPARATOR );
375 writeNexusTaxaBlock( writer );
376 writeNexusBinaryChractersBlock( writer );
379 private void toPhylip( final Writer writer ) throws IOException {
385 writer.write( getNumberOfIdentifiers() );
387 writer.write( getNumberOfCharacters() );
388 writer.write( ForesterUtil.LINE_SEPARATOR );
389 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
390 if ( !ForesterUtil.isEmpty( getIdentifier( identifier ) ) ) {
391 writer.write( ForesterUtil.pad( getIdentifier( identifier ), pad, ' ', false ).toString() );
396 throw new IllegalFormatUseException( "Phylip format does not allow empty identifiers" );
399 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
400 final String state = getState( identifier, character ).toString();
401 writer.write( state != null ? ForesterUtil.pad( state, pad, ' ', false ).toString() : ForesterUtil
402 .pad( "", pad, ' ', false ).toString() );
403 if ( character < ( getNumberOfCharacters() - 1 ) ) {
408 if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) {
409 writer.write( ForesterUtil.LINE_SEPARATOR );
416 public void toWriter( final Writer writer ) throws IOException {
417 toForester( writer );
421 public void toWriter( final Writer writer, final Format format ) throws IOException {
427 toForester( writer );
433 throw new IllegalArgumentException( "Unknown format:" + format );
437 public void writeNexusBinaryChractersBlock( final Writer w ) throws IOException {
439 // DIMENSIONS NCHAR=x;
440 //BEGIN CHARSTATELABELS
444 // FORMAT DATATYPE=STANDARD SYMBOLS=;
450 w.write( NexusConstants.BEGIN_CHARACTERS );
451 w.write( ForesterUtil.LINE_SEPARATOR );
453 w.write( NexusConstants.DIMENSIONS );
455 w.write( NexusConstants.NCHAR );
457 w.write( String.valueOf( getNumberOfCharacters() ) );
459 w.write( ForesterUtil.LINE_SEPARATOR );
460 writeNexusCharstatelabels( w );
462 w.write( NexusConstants.FORMAT );
464 w.write( NexusConstants.DATATYPE );
466 w.write( NexusConstants.STANDARD );
468 w.write( NexusConstants.SYMBOLS );
470 w.write( String.valueOf( BinaryStates.ABSENT ) );
471 w.write( String.valueOf( BinaryStates.PRESENT ) );
473 w.write( ForesterUtil.LINE_SEPARATOR );
474 writeNexusMatrix( w );
475 w.write( ForesterUtil.LINE_SEPARATOR );
476 w.write( NexusConstants.END );
477 w.write( ForesterUtil.LINE_SEPARATOR );
480 public void writeNexusCharstatelabels( final Writer w ) throws IOException {
482 w.write( NexusConstants.CHARSTATELABELS );
483 w.write( ForesterUtil.LINE_SEPARATOR );
484 for( int i = 0; i < getNumberOfCharacters(); ++i ) {
485 w.write( " " + ( i + 1 ) + " '" );
486 w.write( getCharacter( i ) );
488 if ( i < ( getNumberOfCharacters() - 1 ) ) {
490 w.write( ForesterUtil.LINE_SEPARATOR );
494 w.write( ForesterUtil.LINE_SEPARATOR );
497 public void writeNexusMatrix( final Writer w ) throws IOException {
499 w.write( NexusConstants.MATRIX );
500 w.write( ForesterUtil.LINE_SEPARATOR );
501 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
502 if ( getIdentifier( identifier ) != null ) {
504 w.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() );
507 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
508 final S state = getState( identifier, character );
509 if ( state == null ) {
510 throw new IllegalFormatUseException( "character state matrix cannot contain null if to be represented in nexus format" );
512 if ( !( state instanceof BinaryStates ) ) {
513 throw new IllegalFormatUseException( "nexus format representation expects binary character data - got ["
514 + getState( 0, 0 ).getClass() + "] instead" );
516 if ( state == BinaryStates.UNKNOWN ) {
517 throw new IllegalFormatUseException( "character state matrix cannot contain unknown states if to be represented in nexus format" );
519 w.write( state.toString() );
521 if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) {
522 w.write( ForesterUtil.LINE_SEPARATOR );
528 public void writeNexusTaxaBlock( final Writer w ) throws IOException {
530 // DIMENSIONS NTAX=n;
531 // TAXLABELS fish frog snake;
533 w.write( NexusConstants.BEGIN_TAXA );
534 w.write( ForesterUtil.LINE_SEPARATOR );
536 w.write( NexusConstants.DIMENSIONS );
538 w.write( NexusConstants.NTAX );
540 w.write( String.valueOf( getNumberOfIdentifiers() ) );
542 w.write( ForesterUtil.LINE_SEPARATOR );
544 w.write( NexusConstants.TAXLABELS );
545 for( int i = 0; i < getNumberOfIdentifiers(); ++i ) {
547 w.write( getIdentifier( i ) );
550 w.write( ForesterUtil.LINE_SEPARATOR );
551 w.write( NexusConstants.END );
552 w.write( ForesterUtil.LINE_SEPARATOR );