3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org/forester
27 package org.forester.io.parsers;
29 import java.io.IOException;
30 import java.util.List;
32 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
33 import org.forester.evoinference.matrix.distance.DistanceMatrix;
34 import org.forester.util.BasicTable;
35 import org.forester.util.BasicTableParser;
36 import org.forester.util.ForesterUtil;
39 * This can read full, lower triangular, and upper triangular distance matrices.
40 * In the case of a full matrix, the lower triangular values are used. Format
41 * (by example): id1 0 id2 0.3 0 id3 0.4 0.4 0
45 * id1 id2 0.3 id3 0.4 0.4
47 * Numbers before are after the data are ignored.
52 * @author Christian M Zmasek
54 public class SymmetricalDistanceMatrixParser {
56 private final static InputMatrixType INPUT_MATRIX_TYPE_DEFAULT = InputMatrixType.LOWER_TRIANGLE;
57 private final static String COMMENT = "#";
58 private final static String VALUE_SEPARATOR = " ";
59 private int _matrix_size;
60 private InputMatrixType _input_matrix_type;
62 private SymmetricalDistanceMatrixParser() {
66 private void checkValueIsZero( final BasicTable<String> table, final int row, final int i, final int start_row )
69 final String table_value = table.getValue( i, row + start_row );
70 if ( ForesterUtil.isEmpty( table_value ) ) {
71 throw new IOException( "value is null or empty at [" + ( i - 1 ) + ", " + row + "]" );
74 d = Double.parseDouble( table_value );
76 catch ( final NumberFormatException e ) {
77 throw new IOException( "illegal format for distance [" + table_value + "] at [" + ( i - 1 ) + ", " + row
80 if ( !ForesterUtil.isEqual( 0.0, d ) ) {
81 throw new IOException( "attempt to use non-zero diagonal value [" + table_value + "] at [" + ( i - 1 )
86 private InputMatrixType getInputMatrixType() {
87 return _input_matrix_type;
90 private int getMatrixSize() {
95 setInputMatrixType( INPUT_MATRIX_TYPE_DEFAULT );
99 public DistanceMatrix[] parse( final Object source ) throws IOException {
101 final List<BasicTable<String>> tables = BasicTableParser.parse( source, VALUE_SEPARATOR, false, COMMENT, true );
102 final DistanceMatrix[] distance_matrices = new DistanceMatrix[ tables.size() ];
104 for( final BasicTable<String> table : tables ) {
105 distance_matrices[ i++ ] = transform( table );
107 return distance_matrices;
110 private void reset() {
114 public void setInputMatrixType( final InputMatrixType input_matrix_type ) {
115 _input_matrix_type = input_matrix_type;
118 private void setMatrixSize( final int matrix_size ) {
119 _matrix_size = matrix_size;
122 private void transferValue( final BasicTable<String> table,
123 final DistanceMatrix distance_matrix,
127 final int col_offset ) throws IOException {
129 final String table_value = table.getValue( col, row + start_row );
130 if ( ForesterUtil.isEmpty( table_value ) ) {
131 throw new IOException( "value is null or empty at [" + ( col - 1 ) + ", " + row + "]" );
134 d = Double.parseDouble( table_value );
136 catch ( final NumberFormatException e ) {
137 throw new IOException( "illegal format for distance [" + table_value + "] at [" + ( col - 1 ) + ", " + row
140 distance_matrix.setValue( col - 1 + col_offset, row, d );
143 private DistanceMatrix transform( final BasicTable<String> table ) throws IllegalArgumentException, IOException {
144 boolean first_line_is_size = false;
145 if ( table.getNumberOfColumns() < 3 ) {
146 throw new IllegalArgumentException( "attempt to create distance matrix with with less than 3 columns [columns: "
147 + table.getNumberOfColumns() + ", rows: " + table.getNumberOfRows() + "]" );
149 if ( table.getNumberOfColumns() == table.getNumberOfRows() ) {
150 first_line_is_size = true;
152 else if ( table.getNumberOfColumns() != table.getNumberOfRows() + 1 ) {
153 throw new IllegalArgumentException( "attempt to create distance matrix with illegal dimensions [columns: "
154 + table.getNumberOfColumns() + ", rows: " + table.getNumberOfRows() + "]" );
156 final DistanceMatrix distance_matrix = new BasicSymmetricalDistanceMatrix( table.getNumberOfColumns() - 1 );
158 if ( first_line_is_size ) {
161 for( int row = 0; row < table.getNumberOfRows() - start_row; row++ ) {
162 distance_matrix.setIdentifier( row, table.getValue( 0, row + start_row ) );
163 switch ( getInputMatrixType() ) {
165 for( int col = 1; col <= row; ++col ) {
166 transferValue( table, distance_matrix, row, col, start_row, 0 );
168 checkValueIsZero( table, row, row + 1, start_row );
171 for( int col = 1; col < ( table.getNumberOfColumns() - row ); ++col ) {
172 transferValue( table, distance_matrix, row, col, start_row, row );
176 throw new AssertionError( "unkwnown input matrix type [" + getInputMatrixType() + "]" );
179 if ( getMatrixSize() < 1 ) {
180 setMatrixSize( distance_matrix.getSize() );
182 else if ( getMatrixSize() != distance_matrix.getSize() ) {
183 throw new IOException( "attempt to use matrices of unequal size: [" + getMatrixSize() + "] vs ["
184 + distance_matrix.getSize() + "]" );
186 return distance_matrix;
189 public static SymmetricalDistanceMatrixParser createInstance() {
190 return new SymmetricalDistanceMatrixParser();
193 public enum InputMatrixType {
194 UPPER_TRIANGLE, LOWER_TRIANGLE