4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
7 // Copyright (C) 2009-2010 Christian M. Zmasek
8 // Copyright (C) 2009-2010 Burnham Institute for Medical Research
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester/
28 package org.forester.io.parsers.nexus;
30 import java.io.BufferedReader;
31 import java.io.IOException;
33 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
34 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
35 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
36 import org.forester.io.parsers.util.ParserUtils;
37 import org.forester.io.parsers.util.PhylogenyParserException;
39 public class NexusBinaryStatesMatrixParser {
41 private static final String UTF_8 = "UTF-8";
42 private Object _nexus_source;
43 private CharacterStateMatrix<BinaryStates> _matrix;
47 public CharacterStateMatrix<BinaryStates> getMatrix() {
51 public int getNChar() {
55 private Object getNexusSource() {
59 public int getNTax() {
63 public void parse() throws IOException {
65 final BufferedReader reader = ParserUtils.createReader( getNexusSource(), UTF_8 );
67 boolean in_matrix = false;
68 int identifier_index = 0;
69 int max_character_index = -1;
70 while ( ( line = reader.readLine() ) != null ) {
72 if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) {
73 if ( line.toLowerCase().indexOf( NexusConstants.NCHAR.toLowerCase() ) >= 0 ) {
74 final int i = line.toLowerCase().indexOf( NexusConstants.NCHAR.toLowerCase() );
75 String s = line.toLowerCase().substring( i + 6 );
76 s = s.replace( ';', ' ' ).trim();
77 setNChar( Integer.parseInt( s ) );
79 else if ( line.toLowerCase().indexOf( NexusConstants.NTAX.toLowerCase() ) >= 0 ) {
80 final int i = line.toLowerCase().indexOf( NexusConstants.NTAX.toLowerCase() );
81 String s = line.toLowerCase().substring( i + 5 );
82 s = s.replace( ';', ' ' ).trim();
83 setNTax( Integer.parseInt( s ) );
85 else if ( line.toLowerCase().startsWith( NexusConstants.MATRIX.toLowerCase() ) ) {
87 if ( getNTax() < 1 ) {
88 throw new NexusFormatException( "did not encounter " + NexusConstants.NTAX );
90 if ( getNChar() < 1 ) {
91 throw new NexusFormatException( "did not encounter " + NexusConstants.NCHAR );
93 if ( getMatrix() != null ) {
94 throw new NexusFormatException( "more than one matrix present" );
96 setMatrix( new BasicCharacterStateMatrix<BinaryStates>( getNTax(), getNChar() ) );
98 else if ( line.toLowerCase().startsWith( NexusConstants.END.toLowerCase() ) ) {
101 else if ( in_matrix ) {
102 final String[] line_ary = line.split( "\\s+" );
103 final String label = line_ary[ 0 ].trim();
104 String states_str = line_ary[ 1 ].trim();
105 if ( states_str.endsWith( ";" ) ) {
107 states_str = states_str.substring( 0, states_str.length() - 1 );
109 final char[] states = states_str.toCharArray();
110 getMatrix().setIdentifier( identifier_index, label );
111 int character_index = 0;
112 for( final char state : states ) {
113 if ( state == BinaryStates.PRESENT.toChar() ) {
115 getMatrix().setState( identifier_index, character_index, BinaryStates.PRESENT );
117 catch ( final ArrayIndexOutOfBoundsException ex ) {
118 throw new NexusFormatException( "problem at line " + line + " [" + ex + "]" );
121 else if ( state == BinaryStates.ABSENT.toChar() ) {
123 getMatrix().setState( identifier_index, character_index, BinaryStates.ABSENT );
125 catch ( final ArrayIndexOutOfBoundsException ex ) {
126 throw new NexusFormatException( "problem at line " + line + " [" + ex + "]" );
130 throw new NexusFormatException( "illegal state " + state );
134 if ( ( max_character_index > 0 ) && ( max_character_index != character_index ) ) {
135 throw new NexusFormatException( "unequal number of characters at line " + line );
137 max_character_index = character_index;
144 private void reset() {
150 private void setMatrix( final CharacterStateMatrix<BinaryStates> matrix ) {
154 private void setNChar( final int nchar ) {
158 private void setNTax( final int ntax ) {
162 public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException {
163 if ( nexus_source == null ) {
164 throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
166 _nexus_source = nexus_source;