4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
7 // Copyright (C) 2009-2010 Christian M. Zmasek
8 // Copyright (C) 2009-2010 Burnham Institute for Medical Research
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester/
28 package org.forester.io.parsers.nexus;
30 import java.io.BufferedReader;
31 import java.io.IOException;
33 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
34 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
35 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
36 import org.forester.io.parsers.util.ParserUtils;
37 import org.forester.io.parsers.util.PhylogenyParserException;
39 public class NexusBinaryStatesMatrixParser {
41 private Object _nexus_source;
42 private CharacterStateMatrix<BinaryStates> _matrix;
46 public CharacterStateMatrix<BinaryStates> getMatrix() {
50 public int getNChar() {
54 private Object getNexusSource() {
58 public int getNTax() {
62 public void parse() throws IOException {
64 final BufferedReader reader = ParserUtils.createReader( getNexusSource() );
66 boolean in_matrix = false;
67 int identifier_index = 0;
68 int max_character_index = -1;
69 while ( ( line = reader.readLine() ) != null ) {
71 if ( ( line.length() > 0 ) && !line.startsWith( "#" ) && !line.startsWith( ">" ) ) {
72 if ( line.toLowerCase().indexOf( NexusConstants.NCHAR.toLowerCase() ) >= 0 ) {
73 final int i = line.toLowerCase().indexOf( NexusConstants.NCHAR.toLowerCase() );
74 String s = line.toLowerCase().substring( i + 6 );
75 s = s.replace( ';', ' ' ).trim();
76 setNChar( Integer.parseInt( s ) );
78 else if ( line.toLowerCase().indexOf( NexusConstants.NTAX.toLowerCase() ) >= 0 ) {
79 final int i = line.toLowerCase().indexOf( NexusConstants.NTAX.toLowerCase() );
80 String s = line.toLowerCase().substring( i + 5 );
81 s = s.replace( ';', ' ' ).trim();
82 setNTax( Integer.parseInt( s ) );
84 else if ( line.toLowerCase().startsWith( NexusConstants.MATRIX.toLowerCase() ) ) {
86 if ( getNTax() < 1 ) {
87 throw new NexusFormatException( "did not encounter " + NexusConstants.NTAX );
89 if ( getNChar() < 1 ) {
90 throw new NexusFormatException( "did not encounter " + NexusConstants.NCHAR );
92 if ( getMatrix() != null ) {
93 throw new NexusFormatException( "more than one matrix present" );
95 setMatrix( new BasicCharacterStateMatrix<BinaryStates>( getNTax(), getNChar() ) );
97 else if ( line.toLowerCase().startsWith( NexusConstants.END.toLowerCase() ) ) {
100 else if ( in_matrix ) {
101 final String[] line_ary = line.split( "\\s+" );
102 final String label = line_ary[ 0 ].trim();
103 String states_str = line_ary[ 1 ].trim();
104 if ( states_str.endsWith( ";" ) ) {
106 states_str = states_str.substring( 0, states_str.length() - 1 );
108 final char[] states = states_str.toCharArray();
109 getMatrix().setIdentifier( identifier_index, label );
110 int character_index = 0;
111 for( final char state : states ) {
112 if ( state == BinaryStates.PRESENT.toChar() ) {
114 getMatrix().setState( identifier_index, character_index, BinaryStates.PRESENT );
116 catch ( final ArrayIndexOutOfBoundsException ex ) {
117 throw new NexusFormatException( "problem at line " + line + " [" + ex + "]" );
120 else if ( state == BinaryStates.ABSENT.toChar() ) {
122 getMatrix().setState( identifier_index, character_index, BinaryStates.ABSENT );
124 catch ( final ArrayIndexOutOfBoundsException ex ) {
125 throw new NexusFormatException( "problem at line " + line + " [" + ex + "]" );
129 throw new NexusFormatException( "illegal state " + state );
133 if ( ( max_character_index > 0 ) && ( max_character_index != character_index ) ) {
134 throw new NexusFormatException( "unequal number of characters at line " + line );
136 max_character_index = character_index;
143 private void reset() {
149 private void setMatrix( final CharacterStateMatrix<BinaryStates> matrix ) {
153 private void setNChar( final int nchar ) {
157 private void setNTax( final int ntax ) {
161 public void setSource( final Object nexus_source ) throws PhylogenyParserException, IOException {
162 if ( nexus_source == null ) {
163 throw new PhylogenyParserException( getClass() + ": attempt to parse null object." );
165 _nexus_source = nexus_source;