2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.io.parsers.phyloxml;
28 import java.util.ArrayList;
29 import java.util.Collection;
30 import java.util.HashMap;
31 import java.util.List;
34 import org.forester.io.parsers.phyloxml.data.BinaryCharactersParser;
35 import org.forester.io.parsers.phyloxml.data.BranchWidthParser;
36 import org.forester.io.parsers.phyloxml.data.ColorParser;
37 import org.forester.io.parsers.phyloxml.data.ConfidenceParser;
38 import org.forester.io.parsers.phyloxml.data.DateParser;
39 import org.forester.io.parsers.phyloxml.data.DistributionParser;
40 import org.forester.io.parsers.phyloxml.data.EventParser;
41 import org.forester.io.parsers.phyloxml.data.IdentifierParser;
42 import org.forester.io.parsers.phyloxml.data.PropertyParser;
43 import org.forester.io.parsers.phyloxml.data.ReferenceParser;
44 import org.forester.io.parsers.phyloxml.data.SequenceParser;
45 import org.forester.io.parsers.phyloxml.data.SequenceRelationParser;
46 import org.forester.io.parsers.phyloxml.data.TaxonomyParser;
47 import org.forester.io.parsers.util.PhylogenyParserException;
48 import org.forester.phylogeny.Phylogeny;
49 import org.forester.phylogeny.PhylogenyNode;
50 import org.forester.phylogeny.data.BinaryCharacters;
51 import org.forester.phylogeny.data.BranchColor;
52 import org.forester.phylogeny.data.BranchWidth;
53 import org.forester.phylogeny.data.Confidence;
54 import org.forester.phylogeny.data.Date;
55 import org.forester.phylogeny.data.Distribution;
56 import org.forester.phylogeny.data.Event;
57 import org.forester.phylogeny.data.Identifier;
58 import org.forester.phylogeny.data.NodeVisualData;
59 import org.forester.phylogeny.data.PhylogenyDataUtil;
60 import org.forester.phylogeny.data.PropertiesMap;
61 import org.forester.phylogeny.data.Property;
62 import org.forester.phylogeny.data.Property.AppliesTo;
63 import org.forester.phylogeny.data.Reference;
64 import org.forester.phylogeny.data.Sequence;
65 import org.forester.phylogeny.data.SequenceRelation;
66 import org.forester.phylogeny.data.SequenceRelation.SEQUENCE_RELATION_TYPE;
67 import org.forester.phylogeny.data.Taxonomy;
68 import org.forester.util.FailedConditionCheckException;
69 import org.forester.util.ForesterConstants;
70 import org.forester.util.ForesterUtil;
71 import org.xml.sax.Attributes;
72 import org.xml.sax.SAXException;
73 import org.xml.sax.helpers.DefaultHandler;
75 public final class PhyloXmlHandler extends DefaultHandler {
77 private static final String PHYLOXML = "phyloxml";
78 private String _current_element_name;
79 private Phylogeny _current_phylogeny;
80 private List<Phylogeny> _phylogenies;
81 private XmlElement _current_xml_element;
82 private PhylogenyNode _current_node;
83 private static Map<Phylogeny, HashMap<String, Sequence>> phylogenySequencesById = new HashMap<Phylogeny, HashMap<String, Sequence>>();
89 private void addNode() {
90 final PhylogenyNode new_node = new PhylogenyNode();
91 getCurrentNode().addAsChild( new_node );
92 setCurrentNode( new_node );
96 public void characters( final char[] chars, final int start_index, final int end_index ) {
97 if ( ( ( getCurrentXmlElement() != null ) && ( getCurrentElementName() != null ) )
98 && !getCurrentElementName().equals( PhyloXmlMapping.CLADE )
99 && !getCurrentElementName().equals( PhyloXmlMapping.PHYLOGENY ) ) {
100 if ( !ForesterUtil.isEmpty( getCurrentXmlElement().getValueAsString() ) ) {
101 getCurrentXmlElement().appendValue( new String( chars, start_index, end_index ) );
104 getCurrentXmlElement().setValue( new String( chars, start_index, end_index ) );
110 public void endElement( final String namespace_uri, final String local_name, final String qualified_name )
111 throws SAXException {
112 if ( ForesterUtil.isEmpty( namespace_uri ) || namespace_uri.startsWith( ForesterConstants.PHYLO_XML_LOCATION ) ) {
113 if ( local_name.equals( PhyloXmlMapping.CLADE ) ) {
115 mapElementToPhylogenyNode( getCurrentXmlElement(), getCurrentNode() );
116 if ( !getCurrentNode().isRoot() ) {
117 setCurrentNode( getCurrentNode().getParent() );
119 getCurrentXmlElement().setValue( null );
120 setCurrentXmlElement( getCurrentXmlElement().getParent() );
122 catch ( final PhylogenyParserException ex ) {
123 throw new SAXException( ex.getMessage() );
125 catch ( final PhyloXmlDataFormatException e ) {
126 throw new SAXException( e.getMessage() );
129 else if ( local_name.equals( PhyloXmlMapping.SEQUENCE_RELATION ) ) {
131 if ( getCurrentPhylogeny() != null ) {
132 final SequenceRelation seqRelation = ( SequenceRelation ) SequenceRelationParser
133 .getInstance( getCurrentPhylogeny() ).parse( getCurrentXmlElement() );
134 final Map<String, Sequence> sequencesById = getSequenceMapByIdForPhylogeny( getCurrentPhylogeny() );
135 final Sequence ref0 = sequencesById.get( seqRelation.getRef0().getSourceId() ), ref1 = sequencesById
136 .get( seqRelation.getRef1().getSourceId() );
137 if ( ref0 != null ) {
138 // check for reverse relation
139 boolean fFoundReverse = false;
140 for( final SequenceRelation sr : ref0.getSequenceRelations() ) {
141 if ( sr.getType().equals( seqRelation.getType() )
142 && ( ( sr.getRef0().isEqual( ref1 ) && sr.getRef1().isEqual( ref0 ) ) || ( sr
143 .getRef0().isEqual( ref0 ) && sr.getRef1().isEqual( ref1 ) ) ) ) {
144 // in this case we don't need to re-add it, but we make sure we don't loose the confidence value
145 fFoundReverse = true;
146 if ( ( sr.getConfidence() == null ) && ( seqRelation.getConfidence() != null ) ) {
147 sr.setConfidence( seqRelation.getConfidence() );
151 if ( !fFoundReverse ) {
152 ref0.addSequenceRelation( seqRelation );
155 if ( ref1 != null ) {
156 // check for reverse relation
157 boolean fFoundReverse = false;
158 for( final SequenceRelation sr : ref1.getSequenceRelations() ) {
159 if ( sr.getType().equals( seqRelation.getType() )
160 && ( ( sr.getRef0().isEqual( ref1 ) && sr.getRef1().isEqual( ref0 ) ) || ( sr
161 .getRef0().isEqual( ref0 ) && sr.getRef1().isEqual( ref1 ) ) ) ) {
162 // in this case we don't need to re-add it, but we make sure we don't loose the confidence value
163 fFoundReverse = true;
164 if ( ( sr.getConfidence() == null ) && ( seqRelation.getConfidence() != null ) ) {
165 sr.setConfidence( seqRelation.getConfidence() );
169 if ( !fFoundReverse ) {
170 ref1.addSequenceRelation( seqRelation );
173 // we add the type to the current phylogeny so we can know it needs to be displayed in the combo
174 final Collection<SEQUENCE_RELATION_TYPE> relationTypesForCurrentPhylogeny = getCurrentPhylogeny()
175 .getRelevantSequenceRelationTypes();
176 if ( !relationTypesForCurrentPhylogeny.contains( seqRelation.getType() ) ) {
177 relationTypesForCurrentPhylogeny.add( seqRelation.getType() );
181 catch ( final PhyloXmlDataFormatException ex ) {
182 throw new SAXException( ex.getMessage() );
185 else if ( local_name.equals( PhyloXmlMapping.PHYLOGENY ) ) {
187 PhyloXmlHandler.mapElementToPhylogeny( getCurrentXmlElement(), getCurrentPhylogeny() );
189 catch ( final PhylogenyParserException e ) {
190 throw new SAXException( e.getMessage() );
192 catch ( final PhyloXmlDataFormatException e ) {
193 throw new SAXException( e.getMessage() );
198 else if ( local_name.equals( PHYLOXML ) ) {
201 else if ( ( getCurrentPhylogeny() != null ) && ( getCurrentXmlElement().getParent() != null ) ) {
202 setCurrentXmlElement( getCurrentXmlElement().getParent() );
204 setCurrentElementName( null );
208 private void finishPhylogeny() throws SAXException {
209 getCurrentPhylogeny().recalculateNumberOfExternalDescendants( false );
210 getPhylogenies().add( getCurrentPhylogeny() );
211 final HashMap<String, Sequence> phyloSequences = phylogenySequencesById.get( getCurrentPhylogeny() );
212 if ( phyloSequences != null ) {
213 getCurrentPhylogeny().setSequenceRelationQueries( phyloSequences.values() );
214 phylogenySequencesById.remove( getCurrentPhylogeny() );
218 private String getCurrentElementName() {
219 return _current_element_name;
222 private PhylogenyNode getCurrentNode() {
223 return _current_node;
226 private Phylogeny getCurrentPhylogeny() {
227 return _current_phylogeny;
230 private XmlElement getCurrentXmlElement() {
231 return _current_xml_element;
234 List<Phylogeny> getPhylogenies() {
238 private void init() {
240 setPhylogenies( new ArrayList<Phylogeny>() );
243 private void initCurrentNode() {
244 if ( getCurrentNode() != null ) {
245 throw new FailedConditionCheckException( "attempt to create new current node when current node already exists" );
247 if ( getCurrentPhylogeny() == null ) {
248 throw new FailedConditionCheckException( "attempt to create new current node for non-existing phylogeny" );
250 final PhylogenyNode node = new PhylogenyNode();
251 getCurrentPhylogeny().setRoot( node );
252 setCurrentNode( getCurrentPhylogeny().getRoot() );
255 private void mapElementToPhylogenyNode( final XmlElement xml_element, final PhylogenyNode node )
256 throws PhylogenyParserException, PhyloXmlDataFormatException {
257 if ( xml_element.isHasAttribute( PhyloXmlMapping.BRANCH_LENGTH ) ) {
260 d = Double.parseDouble( xml_element.getAttribute( PhyloXmlMapping.BRANCH_LENGTH ) );
262 catch ( final NumberFormatException e ) {
263 throw new PhylogenyParserException( "ill formatted distance in clade attribute ["
264 + xml_element.getAttribute( PhyloXmlMapping.BRANCH_LENGTH ) + "]: " + e.getMessage() );
266 node.setDistanceToParent( d );
268 if ( xml_element.isHasAttribute( PhyloXmlMapping.NODE_COLLAPSE ) ) {
269 final String collapse_str = xml_element.getAttribute( PhyloXmlMapping.NODE_COLLAPSE );
270 if ( !ForesterUtil.isEmpty( collapse_str ) && collapse_str.trim().equalsIgnoreCase( "true" ) ) {
271 node.setCollapse( true );
274 for( int i = 0; i < xml_element.getNumberOfChildElements(); ++i ) {
275 final XmlElement element = xml_element.getChildElement( i );
276 final String qualified_name = element.getQualifiedName();
277 if ( qualified_name.equals( PhyloXmlMapping.BRANCH_LENGTH ) ) {
278 if ( node.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
279 throw new PhylogenyParserException( "ill advised attempt to set distance twice for the same clade (probably via element and via attribute)" );
281 node.setDistanceToParent( element.getValueAsDouble() );
283 if ( qualified_name.equals( PhyloXmlMapping.NODE_NAME ) ) {
284 node.setName( element.getValueAsString() );
286 // else if ( qualified_name.equals( PhyloXmlMapping.NODE_IDENTIFIER ) ) {
287 // node.getNodeData().setNodeIdentifier( ( Identifier ) IdentifierParser.getInstance().parse( element ) );
289 else if ( qualified_name.equals( PhyloXmlMapping.TAXONOMY ) ) {
290 node.getNodeData().addTaxonomy( ( Taxonomy ) TaxonomyParser.getInstance().parse( element ) );
292 else if ( qualified_name.equals( PhyloXmlMapping.SEQUENCE ) ) {
293 final Sequence sequence = ( Sequence ) SequenceParser.getInstance().parse( element );
294 node.getNodeData().addSequence( sequence );
295 // we temporarily store all sequences that have a source ID so we can access them easily when we need to attach relations to them
296 final String sourceId = sequence.getSourceId();
297 if ( ( getCurrentPhylogeny() != null ) && !ForesterUtil.isEmpty( sourceId ) ) {
298 getSequenceMapByIdForPhylogeny( getCurrentPhylogeny() ).put( sourceId, sequence );
301 else if ( qualified_name.equals( PhyloXmlMapping.DISTRIBUTION ) ) {
302 node.getNodeData().addDistribution( ( Distribution ) DistributionParser.getInstance().parse( element ) );
304 else if ( qualified_name.equals( PhyloXmlMapping.CLADE_DATE ) ) {
305 node.getNodeData().setDate( ( Date ) DateParser.getInstance().parse( element ) );
307 else if ( qualified_name.equals( PhyloXmlMapping.REFERENCE ) ) {
308 node.getNodeData().addReference( ( Reference ) ReferenceParser.getInstance().parse( element ) );
310 else if ( qualified_name.equals( PhyloXmlMapping.BINARY_CHARACTERS ) ) {
311 node.getNodeData().setBinaryCharacters( ( BinaryCharacters ) BinaryCharactersParser.getInstance()
314 else if ( qualified_name.equals( PhyloXmlMapping.COLOR ) ) {
315 node.getBranchData().setBranchColor( ( BranchColor ) ColorParser.getInstance().parse( element ) );
317 else if ( qualified_name.equals( PhyloXmlMapping.CONFIDENCE ) ) {
318 node.getBranchData().addConfidence( ( Confidence ) ConfidenceParser.getInstance().parse( element ) );
320 else if ( qualified_name.equals( PhyloXmlMapping.WIDTH ) ) {
321 node.getBranchData().setBranchWidth( ( BranchWidth ) BranchWidthParser.getInstance().parse( element ) );
323 else if ( qualified_name.equals( PhyloXmlMapping.EVENTS ) ) {
324 node.getNodeData().setEvent( ( Event ) EventParser.getInstance().parse( element ) );
326 else if ( qualified_name.equals( PhyloXmlMapping.PROPERTY ) ) {
327 final Property prop = ( Property ) PropertyParser.getInstance().parse( element );
328 if ( prop.getRef().startsWith( NodeVisualData.APTX_VISUALIZATION_REF )
329 && ( prop.getAppliesTo() == AppliesTo.NODE ) ) {
330 if ( node.getNodeData().getNodeVisualData() == null ) {
331 node.getNodeData().setNodeVisualData( new NodeVisualData() );
333 node.getNodeData().getNodeVisualData().parseProperty( prop );
336 if ( !node.getNodeData().isHasProperties() ) {
337 node.getNodeData().setProperties( new PropertiesMap() );
339 node.getNodeData().getProperties().addProperty( prop );
345 private void newClade() {
346 if ( getCurrentNode() == null ) {
354 private void newPhylogeny() {
355 setCurrentPhylogeny( new Phylogeny() );
358 private void reset() {
359 setCurrentPhylogeny( null );
360 setCurrentNode( null );
361 setCurrentElementName( null );
362 setCurrentXmlElement( null );
365 private void setCurrentElementName( final String element_name ) {
366 _current_element_name = element_name;
369 private void setCurrentNode( final PhylogenyNode current_node ) {
370 _current_node = current_node;
373 private void setCurrentPhylogeny( final Phylogeny phylogeny ) {
374 _current_phylogeny = phylogeny;
377 private void setCurrentXmlElement( final XmlElement element ) {
378 _current_xml_element = element;
381 private void setPhylogenies( final List<Phylogeny> phylogenies ) {
382 _phylogenies = phylogenies;
386 public void startDocument() throws SAXException {
391 public void startElement( final String namespace_uri,
392 final String local_name,
393 final String qualified_name,
394 final Attributes attributes ) throws SAXException {
395 if ( ForesterUtil.isEmpty( namespace_uri ) || namespace_uri.startsWith( ForesterConstants.PHYLO_XML_LOCATION ) ) {
396 setCurrentElementName( local_name );
397 if ( local_name.equals( PhyloXmlMapping.CLADE ) ) {
398 final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes );
399 getCurrentXmlElement().addChildElement( element );
400 setCurrentXmlElement( element );
403 else if ( local_name.equals( PhyloXmlMapping.PHYLOGENY ) ) {
404 setCurrentXmlElement( new XmlElement( "", "", "", null ) );
406 final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes );
407 if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_IS_REROOTABLE_ATTR ) ) {
408 getCurrentPhylogeny().setRerootable( Boolean.parseBoolean( element
409 .getAttribute( PhyloXmlMapping.PHYLOGENY_IS_REROOTABLE_ATTR ) ) );
411 if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_BRANCHLENGTH_UNIT_ATTR ) ) {
412 getCurrentPhylogeny()
413 .setDistanceUnit( element.getAttribute( PhyloXmlMapping.PHYLOGENY_BRANCHLENGTH_UNIT_ATTR ) );
415 if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_IS_ROOTED_ATTR ) ) {
416 getCurrentPhylogeny().setRooted( Boolean.parseBoolean( element
417 .getAttribute( PhyloXmlMapping.PHYLOGENY_IS_ROOTED_ATTR ) ) );
419 if ( element.isHasAttribute( PhyloXmlMapping.PHYLOGENY_TYPE_ATTR ) ) {
420 getCurrentPhylogeny().setType( ( element.getAttribute( PhyloXmlMapping.PHYLOGENY_TYPE_ATTR ) ) );
423 else if ( local_name.equals( PHYLOXML ) ) {
425 else if ( getCurrentPhylogeny() != null ) {
426 final XmlElement element = new XmlElement( namespace_uri, local_name, local_name, attributes );
427 getCurrentXmlElement().addChildElement( element );
428 setCurrentXmlElement( element );
433 public static boolean attributeEqualsValue( final XmlElement element,
434 final String attributeName,
435 final String attributeValue ) {
436 final String attr = element.getAttribute( attributeName );
437 return ( ( attr != null ) && attr.equals( attributeValue ) );
440 public static String getAtttributeValue( final XmlElement element, final String attributeName ) {
441 final String attr = element.getAttribute( attributeName );
442 if ( attr != null ) {
450 static public Map<String, Sequence> getSequenceMapByIdForPhylogeny( final Phylogeny ph ) {
451 HashMap<String, Sequence> seqMap = phylogenySequencesById.get( ph );
452 if ( seqMap == null ) {
453 seqMap = new HashMap<String, Sequence>();
454 phylogenySequencesById.put( ph, seqMap );
459 private static void mapElementToPhylogeny( final XmlElement xml_element, final Phylogeny phylogeny )
460 throws PhylogenyParserException, PhyloXmlDataFormatException {
461 for( int i = 0; i < xml_element.getNumberOfChildElements(); ++i ) {
462 final XmlElement element = xml_element.getChildElement( i );
463 final String qualified_name = element.getQualifiedName();
464 if ( qualified_name.equals( PhyloXmlMapping.PHYLOGENY_NAME ) ) {
465 phylogeny.setName( element.getValueAsString() );
467 else if ( qualified_name.equals( PhyloXmlMapping.PHYLOGENY_DESCRIPTION ) ) {
468 phylogeny.setDescription( element.getValueAsString() );
470 else if ( qualified_name.equals( PhyloXmlMapping.IDENTIFIER ) ) {
471 phylogeny.setIdentifier( ( Identifier ) IdentifierParser.getInstance().parse( element ) );
473 else if ( qualified_name.equals( PhyloXmlMapping.CONFIDENCE ) ) {
474 phylogeny.setConfidence( ( Confidence ) ConfidenceParser.getInstance().parse( element ) );