// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; import java.io.IOException; import java.io.Writer; import java.math.BigDecimal; import java.util.ArrayList; import java.util.List; import java.util.SortedMap; import java.util.StringTokenizer; import java.util.TreeMap; import org.forester.io.parsers.nhx.NHXtags; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.writers.PhylogenyWriter; import org.forester.util.ForesterUtil; public class DomainArchitecture implements PhylogenyData { public final static String NHX_SEPARATOR = ">"; private static final BigDecimal INCREASE_KEY = new BigDecimal( "0.00001" ); private SortedMap _domains; private int _total_length; public DomainArchitecture() { init(); } public DomainArchitecture( final List domains, final int total_length ) { init(); for( final PhylogenyData phylogenyData : domains ) { final ProteinDomain pd = ( ProteinDomain ) phylogenyData; addDomain( pd ); } _total_length = total_length; } public DomainArchitecture( final String da_str ) { init(); int total_length = 0; int to = -1; try { final StringTokenizer st = new StringTokenizer( da_str, DomainArchitecture.NHX_SEPARATOR ); final String length_str = ( String ) st.nextElement(); total_length = new Integer( length_str ).intValue(); while ( st.hasMoreElements() ) { final String from_str = ( String ) st.nextElement(); final String to_str = ( String ) st.nextElement(); final String support_str = ( String ) st.nextElement(); final String name = ( String ) st.nextElement(); to = new Integer( to_str ).intValue(); final int from = new Integer( from_str ).intValue(); final double support = new Double( support_str ).doubleValue(); final ProteinDomain pd = new ProteinDomain( name, from, to, support ); addDomain( pd ); } } catch ( final Exception e ) { throw new IllegalArgumentException( "malformed format for domain structure \"" + da_str + "\": " + e.getMessage() ); } if ( to > total_length ) { throw new IllegalArgumentException( "total length of domain structure is too short" ); } _total_length = total_length; } public void addDomain( final ProteinDomain pd ) { BigDecimal key = new BigDecimal( "" + pd.getFrom() ); while ( _domains.containsKey( key ) ) { key = new BigDecimal( "" + ( key.doubleValue() + DomainArchitecture.INCREASE_KEY.doubleValue() ) ); } _domains.put( key, pd ); } @Override public StringBuffer asSimpleText() { final StringBuffer sb = new StringBuffer(); for( int i = 0; i < getDomains().size(); ++i ) { if ( i > 0 ) { sb.append( "~" ); } sb.append( getDomain( i ).asSimpleText() ); } return sb; } @Override public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); for( int i = 0; i < getDomains().size(); ++i ) { if ( i > 0 ) { sb.append( "~" ); } sb.append( getDomain( i ).asText() ); } return sb; } @Override public PhylogenyData copy() { final List domains = new ArrayList( getDomains().size() ); for( int i = 0; i < getDomains().size(); ++i ) { domains.add( getDomain( i ).copy() ); } return new DomainArchitecture( domains, getTotalLength() ); } public ProteinDomain getDomain( final int i ) { return ( ProteinDomain ) _domains.values().toArray()[ i ]; } public SortedMap getDomains() { return _domains; } public int getNumberOfDomains() { return _domains.size(); } public int getTotalLength() { return _total_length; } private void init() { _domains = new TreeMap(); _total_length = 0; } /** * Returns true if the names and the order of the domains match (domain and * linker lengths are ignored). * * */ @Override public boolean isEqual( final PhylogenyData domain_architecture ) { if ( domain_architecture == null ) { return false; } if ( !( domain_architecture instanceof DomainArchitecture ) ) { return false; } final DomainArchitecture d = ( DomainArchitecture ) domain_architecture; if ( getDomains().size() != d.getDomains().size() ) { return false; } for( int i = 0; i < getDomains().size(); ++i ) { if ( !getDomain( i ).getName().equals( d.getDomain( i ).getName() ) ) { return false; } } return true; } public void setTotalLength( final int total_length ) { _total_length = total_length; } @Override public StringBuffer toNHX() { final StringBuffer sb = new StringBuffer(); sb.append( ":" ); sb.append( NHXtags.DOMAIN_STRUCTURE ); sb.append( getTotalLength() ); if ( getDomains() != null ) { for( int i = 0; i < getDomains().size(); ++i ) { sb.append( DomainArchitecture.NHX_SEPARATOR ); sb.append( getDomain( i ).getFrom() ); sb.append( DomainArchitecture.NHX_SEPARATOR ); sb.append( getDomain( i ).getTo() ); sb.append( DomainArchitecture.NHX_SEPARATOR ); sb.append( getDomain( i ).getConfidence() ); sb.append( DomainArchitecture.NHX_SEPARATOR ); sb.append( ForesterUtil.replaceIllegalNhxCharacters( getDomain( i ).getName() ) ); } } return sb; } @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH, getTotalLength() + "" ); if ( getDomains() != null ) { final String ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE; for( int i = 0; i < getDomains().size(); ++i ) { getDomain( i ).toPhyloXML( writer, level, ind ); } } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE ); } @Override public String toString() { return asText().toString(); } }