2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.evoinference.matrix.character;
28 import java.io.IOException;
29 import java.io.Writer;
30 import java.util.HashMap;
31 import java.util.List;
34 import org.forester.io.parsers.nexus.NexusConstants;
35 import org.forester.util.ForesterUtil;
36 import org.forester.util.IllegalFormatUseException;
38 public class BasicCharacterStateMatrix<S> implements CharacterStateMatrix<S> {
40 final Object[][] _states;
41 final String[] _identifiers;
42 final String[] _characters;
43 final Map<String, Integer> _identifier_index_map;
44 final Map<String, Integer> _character_index_map;
46 public BasicCharacterStateMatrix( final int number_of_identifiers, final int number_of_characters ) {
47 _states = new Object[ number_of_identifiers ][ number_of_characters ];
48 _identifiers = new String[ number_of_identifiers ];
49 _characters = new String[ number_of_characters ];
50 _identifier_index_map = new HashMap<String, Integer>( number_of_identifiers );
51 _character_index_map = new HashMap<String, Integer>( number_of_characters );
54 public BasicCharacterStateMatrix( final int number_of_identifiers,
55 final int number_of_characters,
56 final S default_state ) {
57 this( number_of_identifiers, number_of_identifiers );
58 for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) {
59 for( int character = 0; character < number_of_characters; ++character ) {
60 setState( identifier, character, default_state );
65 public BasicCharacterStateMatrix( final List<List<S>> states ) {
66 if ( ( states == null ) || ( states.size() < 1 ) || ( states.get( 0 ) == null ) ) {
67 throw new IllegalArgumentException( "attempt to create character state matrix from empty list" );
69 final int number_of_characters = states.get( 0 ).size();
70 final int number_of_identifiers = states.size();
71 _states = new Object[ number_of_identifiers ][ number_of_characters ];
72 _identifiers = new String[ number_of_identifiers ];
73 _characters = new String[ number_of_characters ];
74 _identifier_index_map = new HashMap<String, Integer>( number_of_identifiers );
75 _character_index_map = new HashMap<String, Integer>( number_of_characters );
76 for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) {
77 for( int character = 0; character < number_of_characters; ++character ) {
78 setState( identifier, character, states.get( identifier ).get( character ) );
83 public BasicCharacterStateMatrix( final S[][] states ) {
84 this( states.length, states[ 0 ].length );
85 for( int identifier = 0; identifier < states.length; ++identifier ) {
86 for( int character = 0; character < states[ 0 ].length; ++character ) {
87 setState( identifier, character, states[ identifier ][ character ] );
93 public boolean containsCharacter( final String character ) {
94 return _character_index_map.containsKey( character );
98 public boolean containsIdentifier( final String identifier ) {
99 return _identifier_index_map.containsKey( identifier );
103 public CharacterStateMatrix<S> copy() {
104 final CharacterStateMatrix<S> new_matrix = new BasicCharacterStateMatrix<S>( getNumberOfIdentifiers(),
105 getNumberOfCharacters() );
106 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
107 if ( getCharacter( character ) != null ) {
108 new_matrix.setCharacter( character, getCharacter( character ) );
111 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
112 if ( getIdentifier( identifier ) != null ) {
113 new_matrix.setIdentifier( identifier, getIdentifier( identifier ) );
115 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
116 new_matrix.setState( identifier, character, getState( identifier, character ) );
123 @SuppressWarnings("unchecked")
124 public boolean equals( final Object o ) {
128 else if ( o == null ) {
129 throw new IllegalArgumentException( "attempt to check character state matrix equality to null" );
131 else if ( o.getClass() != this.getClass() ) {
132 throw new IllegalArgumentException( "attempt to check character state matrix to " + o + " [" + o.getClass()
136 final CharacterStateMatrix<S> other = ( CharacterStateMatrix<S> ) o;
137 if ( ( getNumberOfIdentifiers() != other.getNumberOfIdentifiers() )
138 || ( getNumberOfCharacters() != other.getNumberOfCharacters() ) ) {
140 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
141 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
142 final S s = getState( identifier, character );
143 final S os = other.getState( identifier, character );
147 else if ( ( s == null ) && ( os != null ) ) {
150 else if ( ( s != null ) && ( os == null ) ) {
153 else if ( !s.equals( other.getState( identifier, character ) ) ) {
163 public String getCharacter( final int character_index ) {
164 return _characters[ character_index ];
168 public int getCharacterIndex( final String character ) {
169 if ( !_character_index_map.containsKey( character ) ) {
170 throw new IllegalArgumentException( "character [" + character + "] not found" );
172 return _character_index_map.get( character );
176 public String getIdentifier( final int identifier_index ) {
177 return _identifiers[ identifier_index ];
181 public int getIdentifierIndex( final String identifier ) {
182 if ( !_identifier_index_map.containsKey( identifier ) ) {
183 throw new IllegalArgumentException( "indentifier [" + identifier + "] not found" );
185 return _identifier_index_map.get( identifier );
188 private int getLengthOfLongestState() {
190 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
191 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
192 final S s = getState( identifier, character );
194 final int l = getState( identifier, character ).toString().length();
205 public int getNumberOfCharacters() {
207 return _states[ 0 ].length;
215 public int getNumberOfIdentifiers() {
216 return _states.length;
220 @SuppressWarnings("unchecked")
221 public S getState( final int identifier_index, final int character_index ) {
222 return ( S ) _states[ identifier_index ][ character_index ];
226 public S getState( final String identifier, final int character_index ) {
227 if ( !containsIdentifier( identifier ) ) {
228 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
230 return getState( _identifier_index_map.get( identifier ), character_index );
234 public S getState( final String identifier, final String character ) {
235 if ( !containsIdentifier( identifier ) ) {
236 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
238 if ( !containsCharacter( character ) ) {
239 throw new IllegalArgumentException( "character [" + character + "] not found" );
241 return getState( _identifier_index_map.get( identifier ), _character_index_map.get( character ) );
245 public boolean isEmpty() {
246 return getNumberOfIdentifiers() <= 0;
250 public CharacterStateMatrix<S> pivot() {
251 final CharacterStateMatrix<S> new_matrix = new BasicCharacterStateMatrix<S>( getNumberOfCharacters(),
252 getNumberOfIdentifiers() );
253 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
254 if ( getCharacter( character ) != null ) {
255 new_matrix.setIdentifier( character, getCharacter( character ) );
258 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
259 if ( getIdentifier( identifier ) != null ) {
260 new_matrix.setCharacter( identifier, getIdentifier( identifier ) );
262 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
263 new_matrix.setState( character, identifier, getState( identifier, character ) );
270 public void setCharacter( final int character_index, final String character ) {
271 if ( character == null ) {
272 throw new IllegalArgumentException( "attempt to use null character" );
274 _characters[ character_index ] = character;
275 if ( _character_index_map.containsKey( character ) ) {
276 throw new IllegalArgumentException( "character [" + character + "] is not unique" );
278 _character_index_map.put( character, character_index );
282 public void setIdentifier( final int identifier_index, final String identifier ) {
283 if ( identifier == null ) {
284 throw new IllegalArgumentException( "attempt to use null identifier" );
286 _identifiers[ identifier_index ] = identifier;
287 if ( _identifier_index_map.containsKey( identifier ) ) {
288 throw new IllegalArgumentException( "identifier [" + identifier + "] is not unique" );
290 _identifier_index_map.put( identifier, identifier_index );
294 public void setState( final int identifier_index, final int character_index, final S state ) {
295 _states[ identifier_index ][ character_index ] = state;
299 public void setState( final String identifier, final int character_index, final S state ) {
300 if ( !_identifier_index_map.containsKey( identifier ) ) {
301 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
303 setState( _identifier_index_map.get( identifier ), character_index, state );
307 public void setState( final String identifier, final String character, final S state ) {
308 if ( !containsIdentifier( identifier ) ) {
309 throw new IllegalArgumentException( "identifier [" + identifier + "] not found" );
311 if ( !containsCharacter( character ) ) {
312 throw new IllegalArgumentException( "character [" + character + "] not found" );
314 setState( _identifier_index_map.get( identifier ), _character_index_map.get( character ), state );
317 private void toForester( final Writer writer ) throws IOException {
318 final int longest = getLengthOfLongestState() + 5;
319 writer.write( "Identifiers: " );
320 writer.write( String.valueOf( getNumberOfIdentifiers() ) );
321 writer.write( ForesterUtil.LINE_SEPARATOR );
322 writer.write( "Characters : " );
323 writer.write( String.valueOf( getNumberOfCharacters() ) );
324 writer.write( ForesterUtil.LINE_SEPARATOR );
325 writer.write( ForesterUtil.pad( "", 20, ' ', false ).toString() );
327 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
328 final String c = getCharacter( character );
329 writer.write( c != null ? ForesterUtil.pad( c, longest, ' ', false ).toString() : ForesterUtil
330 .pad( "", longest, ' ', false ).toString() );
331 if ( character < ( getNumberOfCharacters() - 1 ) ) {
335 writer.write( ForesterUtil.LINE_SEPARATOR );
336 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
337 if ( getIdentifier( identifier ) != null ) {
338 writer.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() );
341 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
342 final S state = getState( identifier, character );
343 writer.write( state != null ? ForesterUtil.pad( state.toString(), longest, ' ', false ).toString()
344 : ForesterUtil.pad( "", longest, ' ', false ).toString() );
345 if ( character < ( getNumberOfCharacters() - 1 ) ) {
349 if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) {
350 writer.write( ForesterUtil.LINE_SEPARATOR );
355 private void toNexus( final Writer writer ) throws IOException {
359 writer.write( NexusConstants.NEXUS );
360 writer.write( ForesterUtil.LINE_SEPARATOR );
361 writeNexusTaxaBlock( writer );
362 writeNexusBinaryChractersBlock( writer );
365 private void toPhylip( final Writer writer ) throws IOException {
371 writer.write( getNumberOfIdentifiers() );
373 writer.write( getNumberOfCharacters() );
374 writer.write( ForesterUtil.LINE_SEPARATOR );
375 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
376 if ( !ForesterUtil.isEmpty( getIdentifier( identifier ) ) ) {
377 writer.write( ForesterUtil.pad( getIdentifier( identifier ), pad, ' ', false ).toString() );
382 throw new IllegalFormatUseException( "Phylip format does not allow empty identifiers" );
385 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
386 final String state = getState( identifier, character ).toString();
387 writer.write( state != null ? ForesterUtil.pad( state, pad, ' ', false ).toString() : ForesterUtil
388 .pad( "", pad, ' ', false ).toString() );
389 if ( character < ( getNumberOfCharacters() - 1 ) ) {
394 if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) {
395 writer.write( ForesterUtil.LINE_SEPARATOR );
401 //to format for microarray-style clustering
402 // states are ints in this case
405 public void toWriter( final Writer writer ) throws IOException {
406 toForester( writer );
410 public void toWriter( final Writer writer, final Format format ) throws IOException {
416 toForester( writer );
422 throw new IllegalArgumentException( "Unknown format:" + format );
426 public void writeNexusBinaryChractersBlock( final Writer w ) throws IOException {
428 // DIMENSIONS NCHAR=x;
429 //BEGIN CHARSTATELABELS
433 // FORMAT DATATYPE=STANDARD SYMBOLS=;
439 w.write( NexusConstants.BEGIN_CHARACTERS );
440 w.write( ForesterUtil.LINE_SEPARATOR );
442 w.write( NexusConstants.DIMENSIONS );
444 w.write( NexusConstants.NCHAR );
446 w.write( String.valueOf( getNumberOfCharacters() ) );
448 w.write( ForesterUtil.LINE_SEPARATOR );
449 writeNexusCharstatelabels( w );
451 w.write( NexusConstants.FORMAT );
453 w.write( NexusConstants.DATATYPE );
455 w.write( NexusConstants.STANDARD );
457 w.write( NexusConstants.SYMBOLS );
459 w.write( String.valueOf( BinaryStates.ABSENT ) );
460 w.write( String.valueOf( BinaryStates.PRESENT ) );
462 w.write( ForesterUtil.LINE_SEPARATOR );
463 writeNexusMatrix( w );
464 w.write( ForesterUtil.LINE_SEPARATOR );
465 w.write( NexusConstants.END );
466 w.write( ForesterUtil.LINE_SEPARATOR );
469 public void writeNexusCharstatelabels( final Writer w ) throws IOException {
471 w.write( NexusConstants.CHARSTATELABELS );
472 w.write( ForesterUtil.LINE_SEPARATOR );
473 for( int i = 0; i < getNumberOfCharacters(); ++i ) {
474 w.write( " " + ( i + 1 ) + " '" );
475 w.write( getCharacter( i ) );
477 if ( i < ( getNumberOfCharacters() - 1 ) ) {
479 w.write( ForesterUtil.LINE_SEPARATOR );
483 w.write( ForesterUtil.LINE_SEPARATOR );
486 public void writeNexusMatrix( final Writer w ) throws IOException {
488 w.write( NexusConstants.MATRIX );
489 w.write( ForesterUtil.LINE_SEPARATOR );
490 for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) {
491 if ( getIdentifier( identifier ) != null ) {
493 w.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() );
496 for( int character = 0; character < getNumberOfCharacters(); ++character ) {
497 final S state = getState( identifier, character );
498 if ( state == null ) {
499 throw new IllegalFormatUseException( "character state matrix cannot contain null if to be represented in nexus format" );
501 if ( !( state instanceof BinaryStates ) ) {
502 throw new IllegalFormatUseException( "nexus format representation expects binary character data - got ["
503 + getState( 0, 0 ).getClass() + "] instead" );
505 if ( state == BinaryStates.UNKNOWN ) {
506 throw new IllegalFormatUseException( "character state matrix cannot contain unknown states if to be represented in nexus format" );
508 w.write( state.toString() );
510 if ( identifier < ( getNumberOfIdentifiers() - 1 ) ) {
511 w.write( ForesterUtil.LINE_SEPARATOR );
517 public void writeNexusTaxaBlock( final Writer w ) throws IOException {
519 // DIMENSIONS NTAX=n;
520 // TAXLABELS fish frog snake;
522 w.write( NexusConstants.BEGIN_TAXA );
523 w.write( ForesterUtil.LINE_SEPARATOR );
525 w.write( NexusConstants.DIMENSIONS );
527 w.write( NexusConstants.NTAX );
529 w.write( String.valueOf( getNumberOfIdentifiers() ) );
531 w.write( ForesterUtil.LINE_SEPARATOR );
533 w.write( NexusConstants.TAXLABELS );
534 for( int i = 0; i < getNumberOfIdentifiers(); ++i ) {
536 w.write( getIdentifier( i ) );
539 w.write( ForesterUtil.LINE_SEPARATOR );
540 w.write( NexusConstants.END );
541 w.write( ForesterUtil.LINE_SEPARATOR );