2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.evoinference.matrix.character;
28 import java.io.IOException;
29 import java.io.Writer;
30 import java.util.HashMap;
31 import java.util.List;
34 import org.forester.io.parsers.nexus.NexusConstants;
35 import org.forester.util.ForesterUtil;
36 import org.forester.util.IllegalFormatUseException;
38 public class BasicCharacterStateMatrix<S> implements CharacterStateMatrix<S> {
40 final Object[][] _states;
41 final String[] _identifiers;
42 final String[] _characters;
43 final Map<String, Integer> _identifier_index_map;
44 final Map<String, Integer> _character_index_map;
46 public BasicCharacterStateMatrix( final int number_of_identifiers, final int number_of_characters ) {
47 _states = new Object[ number_of_identifiers ][ number_of_characters ];
48 _identifiers = new String[ number_of_identifiers ];
49 _characters = new String[ number_of_characters ];
50 _identifier_index_map = new HashMap<String, Integer>( number_of_identifiers );
51 _character_index_map = new HashMap<String, Integer>( number_of_characters );
54 public BasicCharacterStateMatrix( final int number_of_identifiers,
55 final int number_of_characters,
56 final S default_state ) {
57 this( number_of_identifiers, number_of_identifiers );
58 for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) {
59 for( int character = 0; character < number_of_characters; ++character ) {
60 setState( identifier, character, default_state );
65 public BasicCharacterStateMatrix( final List<List<S>> states ) {
66 if ( ( states == null ) || ( states.size() < 1 ) || ( states.get( 0 ) == null ) ) {
67 throw new IllegalArgumentException( "attempt to create character state matrix from empty list" );
69 final int number_of_characters = states.get( 0 ).size();
70 final int number_of_identifiers = states.size();
71 _states = new Object[ number_of_identifiers ][ number_of_characters ];
72 _identifiers = new String[ number_of_identifiers ];
73 _characters = new String[ number_of_characters ];
74 _identifier_index_map = new HashMap<String, Integer>( number_of_identifiers );
75 _character_index_map = new HashMap<String, Integer>( number_of_characters );
76 for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) {
77 for( int character = 0; character < number_of_characters; ++character ) {
78 setState( identifier, character, states.get( identifier ).get( character ) );
83 public BasicCharacterStateMatrix( final S[][] states ) {
84 this( states.length, states[ 0 ].length );
85 for( int identifier = 0; identifier < states.length; ++identifier ) {
86 for( int character = 0; character < states[ 0 ].length; ++character ) {
87 setState( identifier, character, states[ identifier ][ character ] );
93 public boolean containsCharacter( final String character ) {
94 return _character_index_map.containsKey( character );
98 public boolean containsIdentifier( final String identifier ) {
99 return _identifier_index_map.containsKey( identifier );
103 public CharacterStateMatrix<S> copy() {
104 final CharacterStateMatrix<S> new_matrix = new BasicCharacterStateMatrix<S>( getNumberOfIdentifiers(),
105 getNumberOfCharacters() );
106 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
107 if ( getCharacter( character ) != null ) {
108 new_matrix.setCharacter( character, getCharacter( character ) );
111 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
112 if ( getIdentifier( identifier ) != null ) {
113 new_matrix.setIdentifier( identifier, getIdentifier( identifier ) );
115 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
116 new_matrix.setState( identifier, character, getState( identifier, character ) );
123 public boolean equals( final Object o ) {
127 else if ( o == null ) {
128 throw new IllegalArgumentException( "attempt to check character state matrix equality to null" );
130 else if ( o.getClass() != this.getClass() ) {
131 throw new IllegalArgumentException( "attempt to check character state matrix to " + o + " [" + o.getClass()
135 final CharacterStateMatrix<S> other = ( CharacterStateMatrix<S> ) o;
136 if ( ( getNumberOfIdentifiers() != other.getNumberOfIdentifiers() )
137 || ( getNumberOfCharacters() != other.getNumberOfCharacters() ) ) {
139 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
140 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
141 final S s = getState( identifier, character );
142 final S os = other.getState( identifier, character );
146 else if ( ( s == null ) && ( os != null ) ) {
149 else if ( ( s != null ) && ( os == null ) ) {
152 else if ( !s.equals( other.getState( identifier, character ) ) ) {
162 public String getCharacter( final int character_index ) {
163 return _characters[ character_index ];
167 public int getCharacterIndex( final String character ) {
168 if ( !_character_index_map.containsKey( character ) ) {
169 throw new IllegalArgumentException( "character [" + character + "] not found" );
171 return _character_index_map.get( character );
175 public String getIdentifier( final int identifier_index ) {
176 return _identifiers[ identifier_index ];
180 public int getIdentifierIndex( final String identifier ) {
181 if ( !_identifier_index_map.containsKey( identifier ) ) {
182 throw new IllegalArgumentException( "indentifier [" + identifier + "] not found" );
184 return _identifier_index_map.get( identifier );
187 private int getLengthOfLongestState() {
189 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
190 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
191 final S s = getState( identifier, character );
193 final int l = getState( identifier, character ).toString().length();
204 public int getNumberOfCharacters() {
206 return _states[ 0 ].length;
214 public int getNumberOfIdentifiers() {
215 return _states.length;
219 public S getState( final int identifier_index, final int character_index ) {
220 return ( S ) _states[ identifier_index ][ character_index ];
224 public S getState( final String identifier, final int character_index ) {
225 if ( !containsIdentifier( identifier ) ) {
226 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
228 return getState( _identifier_index_map.get( identifier ), character_index );
232 public S getState( final String identifier, final String character ) {
233 if ( !containsIdentifier( identifier ) ) {
234 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
236 if ( !containsCharacter( character ) ) {
237 throw new IllegalArgumentException( "character [" + character + "] not found" );
239 return getState( _identifier_index_map.get( identifier ), _character_index_map.get( character ) );
243 public boolean isEmpty() {
244 return getNumberOfIdentifiers() <= 0;
248 public CharacterStateMatrix<S> pivot() {
249 final CharacterStateMatrix<S> new_matrix = new BasicCharacterStateMatrix<S>( getNumberOfCharacters(),
250 getNumberOfIdentifiers() );
251 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
252 if ( getCharacter( character ) != null ) {
253 new_matrix.setIdentifier( character, getCharacter( character ) );
256 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
257 if ( getIdentifier( identifier ) != null ) {
258 new_matrix.setCharacter( identifier, getIdentifier( identifier ) );
260 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
261 new_matrix.setState( character, identifier, getState( identifier, character ) );
268 public void setCharacter( final int character_index, final String character ) {
269 if ( character == null ) {
270 throw new IllegalArgumentException( "attempt to use null character" );
272 _characters[ character_index ] = character;
273 if ( _character_index_map.containsKey( character ) ) {
274 throw new IllegalArgumentException( "character [" + character + "] is not unique" );
276 _character_index_map.put( character, character_index );
280 public void setIdentifier( final int identifier_index, final String identifier ) {
281 if ( identifier == null ) {
282 throw new IllegalArgumentException( "attempt to use null identifier" );
284 _identifiers[ identifier_index ] = identifier;
285 if ( _identifier_index_map.containsKey( identifier ) ) {
286 throw new IllegalArgumentException( "identifier [" + identifier + "] is not unique" );
288 _identifier_index_map.put( identifier, identifier_index );
292 public void setState( final int identifier_index, final int character_index, final S state ) {
293 _states[ identifier_index ][ character_index ] = state;
297 public void setState( final String identifier, final int character_index, final S state ) {
298 if ( !_identifier_index_map.containsKey( identifier ) ) {
299 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
301 setState( _identifier_index_map.get( identifier ), character_index, state );
305 public void setState( final String identifier, final String character, final S state ) {
306 if ( !containsIdentifier( identifier ) ) {
307 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
309 if ( !containsCharacter( character ) ) {
310 throw new IllegalArgumentException( "character [" + character + "] not found" );
312 setState( _identifier_index_map.get( identifier ), _character_index_map.get( character ), state );
315 private void toForester( final Writer writer ) throws IOException {
316 final int longest = getLengthOfLongestState() + 5;
317 writer.write( "Identifiers: " );
318 writer.write( String.valueOf( getNumberOfIdentifiers() ) );
319 writer.write( ForesterUtil.LINE_SEPARATOR );
320 writer.write( "Characters : " );
321 writer.write( String.valueOf( getNumberOfCharacters() ) );
322 writer.write( ForesterUtil.LINE_SEPARATOR );
323 writer.write( ForesterUtil.pad( "", 20, ' ', false ).toString() );
325 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
326 final String c = getCharacter( character );
327 writer.write( c != null ? ForesterUtil.pad( c, longest, ' ', false ).toString() : ForesterUtil
328 .pad( "", longest, ' ', false ).toString() );
329 if ( character < ( getNumberOfCharacters() - 1 ) ) {
333 writer.write( ForesterUtil.LINE_SEPARATOR );
334 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
335 if ( getIdentifier( identifier ) != null ) {
336 writer.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() );
339 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
340 final S state = getState( identifier, character );
341 writer.write( state != null ? ForesterUtil.pad( state.toString(), longest, ' ', false ).toString()
342 : ForesterUtil.pad( "", longest, ' ', false ).toString() );
343 if ( character < ( getNumberOfCharacters() - 1 ) ) {
347 if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) {
348 writer.write( ForesterUtil.LINE_SEPARATOR );
353 private void toNexus( final Writer writer ) throws IOException {
357 writer.write( NexusConstants.NEXUS );
358 writer.write( ForesterUtil.LINE_SEPARATOR );
359 writeNexusTaxaBlock( writer );
360 writeNexusBinaryChractersBlock( writer );
363 private void toPhylip( final Writer writer ) throws IOException {
369 writer.write( getNumberOfIdentifiers() );
371 writer.write( getNumberOfCharacters() );
372 writer.write( ForesterUtil.LINE_SEPARATOR );
373 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
374 if ( !ForesterUtil.isEmpty( getIdentifier( identifier ) ) ) {
375 writer.write( ForesterUtil.pad( getIdentifier( identifier ), pad, ' ', false ).toString() );
380 throw new IllegalFormatUseException( "Phylip format does not allow empty identifiers" );
383 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
384 final String state = getState( identifier, character ).toString();
385 writer.write( state != null ? ForesterUtil.pad( state, pad, ' ', false ).toString() : ForesterUtil
386 .pad( "", pad, ' ', false ).toString() );
387 if ( character < ( getNumberOfCharacters() - 1 ) ) {
392 if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) {
393 writer.write( ForesterUtil.LINE_SEPARATOR );
399 //to format for microarray-style clustering
400 // states are ints in this case
403 public void toWriter( final Writer writer ) throws IOException {
404 toForester( writer );
408 public void toWriter( final Writer writer, final Format format ) throws IOException {
414 toForester( writer );
420 throw new IllegalArgumentException( "Unknown format:" + format );
424 public void writeNexusBinaryChractersBlock( final Writer w ) throws IOException {
426 // DIMENSIONS NCHAR=x;
427 //BEGIN CHARSTATELABELS
431 // FORMAT DATATYPE=STANDARD SYMBOLS=;
437 w.write( NexusConstants.BEGIN_CHARACTERS );
438 w.write( ForesterUtil.LINE_SEPARATOR );
440 w.write( NexusConstants.DIMENSIONS );
442 w.write( NexusConstants.NCHAR );
444 w.write( String.valueOf( getNumberOfCharacters() ) );
446 w.write( ForesterUtil.LINE_SEPARATOR );
447 writeNexusCharstatelabels( w );
449 w.write( NexusConstants.FORMAT );
451 w.write( NexusConstants.DATATYPE );
453 w.write( NexusConstants.STANDARD );
455 w.write( NexusConstants.SYMBOLS );
457 w.write( String.valueOf( BinaryStates.ABSENT ) );
458 w.write( String.valueOf( BinaryStates.PRESENT ) );
460 w.write( ForesterUtil.LINE_SEPARATOR );
461 writeNexusMatrix( w );
462 w.write( ForesterUtil.LINE_SEPARATOR );
463 w.write( NexusConstants.END );
464 w.write( ForesterUtil.LINE_SEPARATOR );
467 public void writeNexusCharstatelabels( final Writer w ) throws IOException {
469 w.write( NexusConstants.CHARSTATELABELS );
470 w.write( ForesterUtil.LINE_SEPARATOR );
471 for( int i = 0; i < getNumberOfCharacters(); ++i ) {
472 w.write( " " + ( i + 1 ) + " '" );
473 w.write( getCharacter( i ) );
475 if ( i < ( getNumberOfCharacters() - 1 ) ) {
477 w.write( ForesterUtil.LINE_SEPARATOR );
481 w.write( ForesterUtil.LINE_SEPARATOR );
484 public void writeNexusMatrix( final Writer w ) throws IOException {
486 w.write( NexusConstants.MATRIX );
487 w.write( ForesterUtil.LINE_SEPARATOR );
488 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
489 if ( getIdentifier( identifier ) != null ) {
491 w.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() );
494 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
495 final S state = getState( identifier, character );
496 if ( state == null ) {
497 throw new IllegalFormatUseException( "character state matrix cannot contain null if to be represented in nexus format" );
499 if ( !( state instanceof BinaryStates ) ) {
500 throw new IllegalFormatUseException( "nexus format representation expects binary character data - got ["
501 + getState( 0, 0 ).getClass() + "] instead" );
503 if ( state == BinaryStates.UNKNOWN ) {
504 throw new IllegalFormatUseException( "character state matrix cannot contain unknown states if to be represented in nexus format" );
506 w.write( state.toString() );
508 if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) {
509 w.write( ForesterUtil.LINE_SEPARATOR );
515 public void writeNexusTaxaBlock( final Writer w ) throws IOException {
517 // DIMENSIONS NTAX=n;
518 // TAXLABELS fish frog snake;
520 w.write( NexusConstants.BEGIN_TAXA );
521 w.write( ForesterUtil.LINE_SEPARATOR );
523 w.write( NexusConstants.DIMENSIONS );
525 w.write( NexusConstants.NTAX );
527 w.write( String.valueOf( getNumberOfIdentifiers() ) );
529 w.write( ForesterUtil.LINE_SEPARATOR );
531 w.write( NexusConstants.TAXLABELS );
532 for( int i = 0; i < getNumberOfIdentifiers(); ++i ) {
534 w.write( getIdentifier( i ) );
537 w.write( ForesterUtil.LINE_SEPARATOR );
538 w.write( NexusConstants.END );
539 w.write( ForesterUtil.LINE_SEPARATOR );