2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.phylogeny.data;
28 import java.io.IOException;
29 import java.io.Writer;
30 import java.math.BigDecimal;
31 import java.util.ArrayList;
32 import java.util.List;
33 import java.util.SortedMap;
34 import java.util.StringTokenizer;
35 import java.util.TreeMap;
37 import org.forester.io.parsers.nhx.NHXtags;
38 import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
39 import org.forester.io.writers.PhylogenyWriter;
40 import org.forester.util.ForesterUtil;
42 public class DomainArchitecture implements PhylogenyData {
44 public final static String NHX_SEPARATOR = ">";
45 private static final BigDecimal INCREASE_KEY = new BigDecimal( "0.00001" );
46 private SortedMap<BigDecimal, ProteinDomain> _domains;
47 private int _total_length;
49 public DomainArchitecture() {
53 public DomainArchitecture( final List<PhylogenyData> domains, final int total_length ) {
55 for( final PhylogenyData phylogenyData : domains ) {
56 final ProteinDomain pd = ( ProteinDomain ) phylogenyData;
59 _total_length = total_length;
62 public DomainArchitecture( final String da_str ) {
67 final StringTokenizer st = new StringTokenizer( da_str, DomainArchitecture.NHX_SEPARATOR );
68 final String length_str = ( String ) st.nextElement();
69 total_length = new Integer( length_str ).intValue();
70 while ( st.hasMoreElements() ) {
71 final String from_str = ( String ) st.nextElement();
72 final String to_str = ( String ) st.nextElement();
73 final String support_str = ( String ) st.nextElement();
74 final String name = ( String ) st.nextElement();
75 to = new Integer( to_str ).intValue();
76 final int from = new Integer( from_str ).intValue();
77 final double support = new Double( support_str ).doubleValue();
78 final ProteinDomain pd = new ProteinDomain( name, from, to, support );
82 catch ( final Exception e ) {
83 throw new IllegalArgumentException( "malformed format for domain structure \"" + da_str + "\": "
86 if ( to > total_length ) {
87 throw new IllegalArgumentException( "total length of domain structure is too short" );
89 _total_length = total_length;
92 public void addDomain( final ProteinDomain pd ) {
93 BigDecimal key = new BigDecimal( "" + pd.getFrom() );
94 while ( _domains.containsKey( key ) ) {
95 key = new BigDecimal( "" + ( key.doubleValue() + DomainArchitecture.INCREASE_KEY.doubleValue() ) );
97 _domains.put( key, pd );
101 public StringBuffer asSimpleText() {
102 final StringBuffer sb = new StringBuffer();
103 for( int i = 0; i < getDomains().size(); ++i ) {
107 sb.append( getDomain( i ).asSimpleText() );
113 public StringBuffer asText() {
114 final StringBuffer sb = new StringBuffer();
115 for( int i = 0; i < getDomains().size(); ++i ) {
119 sb.append( getDomain( i ).asText() );
125 public PhylogenyData copy() {
126 final List<PhylogenyData> domains = new ArrayList<PhylogenyData>( getDomains().size() );
127 for( int i = 0; i < getDomains().size(); ++i ) {
128 domains.add( getDomain( i ).copy() );
130 return new DomainArchitecture( domains, getTotalLength() );
133 public ProteinDomain getDomain( final int i ) {
134 return ( ProteinDomain ) _domains.values().toArray()[ i ];
137 public SortedMap<BigDecimal, ProteinDomain> getDomains() {
141 public int getNumberOfDomains() {
142 return _domains.size();
145 public int getTotalLength() {
146 return _total_length;
149 private void init() {
150 _domains = new TreeMap<BigDecimal, ProteinDomain>();
155 * Returns true if the names and the order of the domains match (domain and
156 * linker lengths are ignored).
161 public boolean isEqual( final PhylogenyData domain_architecture ) {
162 if ( domain_architecture == null ) {
165 if ( !( domain_architecture instanceof DomainArchitecture ) ) {
168 final DomainArchitecture d = ( DomainArchitecture ) domain_architecture;
169 if ( getDomains().size() != d.getDomains().size() ) {
172 for( int i = 0; i < getDomains().size(); ++i ) {
173 if ( !getDomain( i ).getName().equals( d.getDomain( i ).getName() ) ) {
180 public void setTotalLength( final int total_length ) {
181 _total_length = total_length;
185 public StringBuffer toNHX() {
186 final StringBuffer sb = new StringBuffer();
188 sb.append( NHXtags.DOMAIN_STRUCTURE );
189 sb.append( getTotalLength() );
190 if ( getDomains() != null ) {
191 for( int i = 0; i < getDomains().size(); ++i ) {
192 sb.append( DomainArchitecture.NHX_SEPARATOR );
193 sb.append( getDomain( i ).getFrom() );
194 sb.append( DomainArchitecture.NHX_SEPARATOR );
195 sb.append( getDomain( i ).getTo() );
196 sb.append( DomainArchitecture.NHX_SEPARATOR );
197 sb.append( getDomain( i ).getConfidence() );
198 sb.append( DomainArchitecture.NHX_SEPARATOR );
199 sb.append( ForesterUtil.replaceIllegalNhxCharacters( getDomain( i ).getName() ) );
206 public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
207 writer.write( ForesterUtil.LINE_SEPARATOR );
208 writer.write( indentation );
209 PhylogenyDataUtil.appendOpen( writer,
210 PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE,
211 PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH,
212 getTotalLength() + "" );
213 if ( getDomains() != null ) {
214 final String ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE;
215 for( int i = 0; i < getDomains().size(); ++i ) {
216 getDomain( i ).toPhyloXML( writer, level, ind );
219 writer.write( ForesterUtil.LINE_SEPARATOR );
220 writer.write( indentation );
221 PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE );
225 public String toString() {
226 return asText().toString();