2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.phylogeny.data;
28 import java.io.IOException;
29 import java.io.Writer;
30 import java.util.ArrayList;
31 import java.util.List;
32 import java.util.SortedMap;
33 import java.util.StringTokenizer;
34 import java.util.TreeMap;
36 import org.forester.io.parsers.nhx.NHXtags;
37 import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
38 import org.forester.io.writers.PhylogenyWriter;
39 import org.forester.util.ForesterUtil;
41 public class DomainArchitecture implements PhylogenyData {
43 public final static String NHX_SEPARATOR = ">";
44 private static final double INCREASE_KEY = 0.0001;
45 private SortedMap<Double, ProteinDomain> _domains;
46 private int _total_length;
48 public DomainArchitecture() {
52 public DomainArchitecture( final List<PhylogenyData> domains, final int total_length ) {
54 for( final PhylogenyData phylogenyData : domains ) {
55 final ProteinDomain pd = ( ProteinDomain ) phylogenyData;
58 _total_length = total_length;
61 public DomainArchitecture( final String da_str ) {
66 final StringTokenizer st = new StringTokenizer( da_str, DomainArchitecture.NHX_SEPARATOR );
67 final String length_str = ( String ) st.nextElement();
68 total_length = new Integer( length_str ).intValue();
69 while ( st.hasMoreElements() ) {
70 final String from_str = ( String ) st.nextElement();
71 final String to_str = ( String ) st.nextElement();
72 final String support_str = ( String ) st.nextElement();
73 final String name = ( String ) st.nextElement();
74 to = new Integer( to_str ).intValue();
75 final int from = new Integer( from_str ).intValue();
76 final double support = new Double( support_str ).doubleValue();
77 final ProteinDomain pd = new ProteinDomain( name, from, to, support );
81 catch ( final Exception e ) {
82 throw new IllegalArgumentException( "Malformed format for domain structure \"" + da_str + "\": "
85 if ( to > total_length ) {
86 throw new IllegalArgumentException( "total length of domain structure is too short" );
88 _total_length = total_length;
91 public void addDomain( final ProteinDomain pd ) {
92 Double key = new Double( pd.getFrom() );
93 while ( _domains.containsKey( key ) ) {
94 key = new Double( key.doubleValue() + DomainArchitecture.INCREASE_KEY );
96 _domains.put( key, pd );
100 public StringBuffer asSimpleText() {
101 final StringBuffer sb = new StringBuffer();
102 for( int i = 0; i < getDomains().size(); ++i ) {
106 sb.append( getDomain( i ).asSimpleText() );
112 public StringBuffer asText() {
113 final StringBuffer sb = new StringBuffer();
114 for( int i = 0; i < getDomains().size(); ++i ) {
118 sb.append( getDomain( i ).asText() );
124 public PhylogenyData copy() {
125 final List<PhylogenyData> domains = new ArrayList<PhylogenyData>( getDomains().size() );
126 for( int i = 0; i < getDomains().size(); ++i ) {
127 domains.add( getDomain( i ).copy() );
129 return new DomainArchitecture( domains, getTotalLength() );
132 public ProteinDomain getDomain( final int i ) {
133 return ( ProteinDomain ) _domains.values().toArray()[ i ];
136 public SortedMap<Double, ProteinDomain> getDomains() {
140 public int getNumberOfDomains() {
141 return _domains.size();
144 public int getTotalLength() {
145 return _total_length;
148 private void init() {
149 _domains = new TreeMap<Double, ProteinDomain>();
154 * Returns true if the names and the order of the domains match (domain and
155 * linker lengths are ignored).
160 public boolean isEqual( final PhylogenyData domain_architecture ) {
161 if ( domain_architecture == null ) {
164 if ( !( domain_architecture instanceof DomainArchitecture ) ) {
167 final DomainArchitecture d = ( DomainArchitecture ) domain_architecture;
168 if ( getDomains().size() != d.getDomains().size() ) {
171 for( int i = 0; i < getDomains().size(); ++i ) {
172 if ( !getDomain( i ).getName().equals( d.getDomain( i ).getName() ) ) {
179 public void setTotalLength( final int total_length ) {
180 _total_length = total_length;
184 public StringBuffer toNHX() {
185 final StringBuffer sb = new StringBuffer();
187 sb.append( NHXtags.DOMAIN_STRUCTURE );
188 sb.append( getTotalLength() );
189 if ( getDomains() != null ) {
190 for( int i = 0; i < getDomains().size(); ++i ) {
191 sb.append( DomainArchitecture.NHX_SEPARATOR );
192 sb.append( getDomain( i ).getFrom() );
193 sb.append( DomainArchitecture.NHX_SEPARATOR );
194 sb.append( getDomain( i ).getTo() );
195 sb.append( DomainArchitecture.NHX_SEPARATOR );
196 sb.append( getDomain( i ).getConfidence() );
197 sb.append( DomainArchitecture.NHX_SEPARATOR );
198 sb.append( ForesterUtil.replaceIllegalNhxCharacters( getDomain( i ).getName() ) );
205 public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
206 writer.write( ForesterUtil.LINE_SEPARATOR );
207 writer.write( indentation );
208 PhylogenyDataUtil.appendOpen( writer,
209 PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE,
210 PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH,
211 getTotalLength() + "" );
212 if ( getDomains() != null ) {
213 final String ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE;
214 for( int i = 0; i < getDomains().size(); ++i ) {
215 getDomain( i ).toPhyloXML( writer, level, ind );
218 writer.write( ForesterUtil.LINE_SEPARATOR );
219 writer.write( indentation );
220 PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_DOMAIN_ARCHITECURE );
224 public String toString() {
225 return asText().toString();