2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.phylogeny.data;
28 import java.io.IOException;
29 import java.io.Writer;
30 import java.util.ArrayList;
31 import java.util.List;
32 import java.util.SortedSet;
33 import java.util.TreeSet;
35 import org.forester.io.parsers.nhx.NHXtags;
36 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
37 import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
38 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
39 import org.forester.io.writers.PhylogenyWriter;
40 import org.forester.util.ForesterUtil;
42 public class Sequence implements PhylogenyData, MultipleUris {
44 private String _mol_sequence;
45 private boolean _mol_sequence_is_aligned;
47 private String _source_id;
48 private Accession _accession;
49 private String _symbol;
50 private String _location;
52 private SortedSet<Annotation> _annotations;
53 private DomainArchitecture _da;
54 private List<Uri> _uris;
55 private List<SequenceRelation> _seq_relations;
56 private SortedSet<Accession> _xrefs;
62 public boolean isEmpty() {
63 return ( getAccession() == null ) && ForesterUtil.isEmpty( getName() ) && ForesterUtil.isEmpty( getSymbol() )
64 && ForesterUtil.isEmpty( getType() ) && ForesterUtil.isEmpty( getLocation() )
65 && ForesterUtil.isEmpty( getSourceId() ) && ForesterUtil.isEmpty( getMolecularSequence() )
66 && ( getDomainArchitecture() == null ) && ForesterUtil.isEmpty( _annotations )
67 && ForesterUtil.isEmpty( _uris ) && ForesterUtil.isEmpty( _seq_relations )
68 && ( getCrossReferences() == null || getCrossReferences().isEmpty() );
71 public void addAnnotation( final Annotation annotation ) {
72 getAnnotations().add( annotation );
75 public void addCrossReference( Accession cross_reference ) {
76 if ( getCrossReferences() == null ) {
77 setCrossReferences( new TreeSet<Accession>() );
79 getCrossReferences().add( cross_reference );
82 public SortedSet<Accession> getCrossReferences() {
86 private void setCrossReferences( TreeSet<Accession> cross_references ) {
87 _xrefs = cross_references;
91 public void addUri( final Uri uri ) {
92 if ( getUris() == null ) {
93 setUris( new ArrayList<Uri>() );
98 public void addSequenceRelation( final SequenceRelation sr ) {
99 _seq_relations.add( sr );
103 public StringBuffer asSimpleText() {
104 final StringBuffer sb = new StringBuffer();
105 if ( getAccession() != null ) {
107 sb.append( getAccession() );
110 if ( !ForesterUtil.isEmpty( getName() ) ) {
111 sb.append( getName() );
114 if ( !ForesterUtil.isEmpty( getLocation() ) ) {
115 sb.append( getLocation() );
121 public StringBuffer asText() {
122 return asSimpleText();
130 public PhylogenyData copy() {
131 final Sequence seq = new Sequence();
132 seq.setAnnotations( getAnnotations() );
133 seq.setName( getName() );
135 seq.setSymbol( getSymbol() );
137 catch ( final PhyloXmlDataFormatException e ) {
140 seq.setMolecularSequence( getMolecularSequence() );
141 seq.setMolecularSequenceAligned( isMolecularSequenceAligned() );
142 seq.setLocation( getLocation() );
143 if ( getAccession() != null ) {
144 seq.setAccession( ( Accession ) getAccession().copy() );
147 seq.setAccession( null );
150 seq.setType( getType() );
152 catch ( final PhyloXmlDataFormatException e ) {
155 if ( getUris() != null ) {
156 seq.setUris( new ArrayList<Uri>() );
157 for( final Uri uri : getUris() ) {
159 seq.getUris().add( uri );
163 if ( getDomainArchitecture() != null ) {
164 seq.setDomainArchitecture( ( DomainArchitecture ) getDomainArchitecture().copy() );
167 seq.setDomainArchitecture( null );
169 if ( getCrossReferences() != null ) {
170 seq.setCrossReferences( new TreeSet<Accession>() );
171 for( final Accession x : getCrossReferences() ) {
173 seq.getCrossReferences().add( x);
181 public boolean equals( final Object o ) {
185 else if ( o == null ) {
188 else if ( o.getClass() != this.getClass() ) {
189 throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " ["
190 + o.getClass() + "]" );
193 return isEqual( ( Sequence ) o );
197 public Accession getAccession() {
201 public Annotation getAnnotation( final int i ) {
202 return ( Annotation ) getAnnotations().toArray()[ i ];
205 public SortedSet<Annotation> getAnnotations() {
206 if ( _annotations == null ) {
207 _annotations = new TreeSet<Annotation>();
212 public DomainArchitecture getDomainArchitecture() {
216 public String getLocation() {
220 public String getMolecularSequence() {
221 return _mol_sequence;
224 public boolean isMolecularSequenceAligned() {
225 return _mol_sequence_is_aligned;
228 public String getName() {
232 public List<SequenceRelation> getSequenceRelations() {
233 if ( _seq_relations == null ) {
234 _seq_relations = new ArrayList<SequenceRelation>();
236 return _seq_relations;
239 private void setSequenceRelations( final List<SequenceRelation> seq_relations ) {
240 _seq_relations = seq_relations;
243 public String getSourceId() {
247 public String getSymbol() {
251 public String getType() {
256 public List<Uri> getUris() {
261 public Uri getUri( final int index ) {
262 return getUris().get( index );
266 public int hashCode() {
267 if ( getAccession() != null ) {
268 return getAccession().hashCode();
270 int result = getSymbol().hashCode();
271 if ( getName().length() > 0 ) {
272 result ^= getName().hashCode();
274 if ( getMolecularSequence().length() > 0 ) {
275 result ^= getMolecularSequence().hashCode();
280 public boolean hasSequenceRelations() {
281 return _seq_relations.size() > 0;
285 setAnnotations( null );
287 setMolecularSequence( "" );
288 setMolecularSequenceAligned( false );
290 setAccession( null );
294 catch ( final PhyloXmlDataFormatException e ) {
300 catch ( final PhyloXmlDataFormatException e ) {
303 setDomainArchitecture( null );
305 setSequenceRelations( null );
307 setCrossReferences(null);
311 public boolean isEqual( final PhylogenyData data ) {
312 if ( this == data ) {
315 final Sequence s = ( Sequence ) data;
316 if ( ( getAccession() != null ) && ( s.getAccession() != null ) ) {
317 return getAccession().isEqual( s.getAccession() );
319 return s.getMolecularSequence().equals( getMolecularSequence() ) && s.getName().equals( getName() )
320 && s.getSymbol().equals( getSymbol() );
323 public void setAccession( final Accession accession ) {
324 _accession = accession;
327 private void setAnnotations( final SortedSet<Annotation> annotations ) {
328 _annotations = annotations;
331 public void setDomainArchitecture( final DomainArchitecture ds ) {
335 public void setLocation( final String description ) {
336 _location = description;
339 public void setMolecularSequence( final String mol_sequence ) {
340 _mol_sequence = mol_sequence;
343 public void setMolecularSequenceAligned( final boolean aligned ) {
344 _mol_sequence_is_aligned = aligned;
347 public void setName( final String name ) {
351 public void setSourceId( final String source_id ) {
352 _source_id = source_id;
355 public void setSymbol( final String symbol ) throws PhyloXmlDataFormatException {
356 if ( !ForesterUtil.isEmpty( symbol ) && !PhyloXmlUtil.SEQUENCE_SYMBOL_PATTERN.matcher( symbol ).matches() ) {
357 throw new PhyloXmlDataFormatException( "illegal sequence symbol: [" + symbol + "]" );
362 public void setType( final String type ) throws PhyloXmlDataFormatException {
363 if ( !ForesterUtil.isEmpty( type ) && !PhyloXmlUtil.SEQUENCE_TYPES.contains( type ) ) {
364 throw new PhyloXmlDataFormatException( "illegal sequence type: [" + type + "]" );
370 public void setUris( final List<Uri> uris ) {
375 public StringBuffer toNHX() {
376 final StringBuffer sb = new StringBuffer();
377 if ( getName().length() > 0 ) {
379 sb.append( NHXtags.GENE_NAME );
380 sb.append( ForesterUtil.replaceIllegalNhxCharacters( getName() ) );
382 if ( getAccession() != null ) {
383 getAccession().toNHX();
385 if ( getDomainArchitecture() != null ) {
386 sb.append( getDomainArchitecture().toNHX() );
392 public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
396 final String my_ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE;
397 writer.write( ForesterUtil.LINE_SEPARATOR );
398 writer.write( indentation );
399 PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE, PhyloXmlMapping.SEQUENCE_TYPE, getType() );
400 if ( !ForesterUtil.isEmpty( getSymbol() ) ) {
401 PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_SYMBOL, getSymbol(), indentation );
403 if ( ( getAccession() != null ) && !ForesterUtil.isEmpty( getAccession().getValue() ) ) {
404 getAccession().toPhyloXML( writer, level, indentation );
406 if ( !ForesterUtil.isEmpty( getName() ) ) {
407 PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_NAME, getName(), indentation );
409 if ( !ForesterUtil.isEmpty( getLocation() ) ) {
410 PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_LOCATION, getLocation(), indentation );
412 if ( !ForesterUtil.isEmpty( getMolecularSequence() ) ) {
413 PhylogenyDataUtil.appendElement( writer,
414 PhyloXmlMapping.SEQUENCE_MOL_SEQ,
415 getMolecularSequence(),
416 PhyloXmlMapping.SEQUENCE_MOL_SEQ_ALIGNED_ATTR,
417 String.valueOf( isMolecularSequenceAligned() ),
420 if ( getUris() != null && !getUris().isEmpty() ) {
421 for( final Uri uri : getUris() ) {
423 uri.toPhyloXML( writer, level, indentation );
427 if ( getAnnotations() != null && !getAnnotations().isEmpty() ) {
428 for( final PhylogenyData annotation : getAnnotations() ) {
429 annotation.toPhyloXML( writer, level, my_ind );
432 if ( getDomainArchitecture() != null ) {
433 getDomainArchitecture().toPhyloXML( writer, level, my_ind );
435 if ( getCrossReferences() != null && !getCrossReferences().isEmpty() ) {
436 writer.write( ForesterUtil.LINE_SEPARATOR );
437 writer.write( my_ind );
438 PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_X_REFS );
439 for( final PhylogenyData x : getCrossReferences() ) {
440 x.toPhyloXML( writer, level, my_ind );
442 writer.write( ForesterUtil.LINE_SEPARATOR );
443 writer.write( my_ind );
444 PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_X_REFS );
446 writer.write( ForesterUtil.LINE_SEPARATOR );
447 writer.write( indentation );
448 PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE );
452 public String toString() {
453 return asText().toString();