2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.phylogeny.data;
28 import java.io.IOException;
29 import java.io.Writer;
30 import java.util.ArrayList;
31 import java.util.List;
32 import java.util.SortedSet;
33 import java.util.TreeSet;
35 import org.forester.io.parsers.nhx.NHXtags;
36 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
37 import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
38 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
39 import org.forester.io.writers.PhylogenyWriter;
40 import org.forester.sequence.MolecularSequence;
41 import org.forester.sequence.MolecularSequence.TYPE;
42 import org.forester.util.ForesterUtil;
44 public class Sequence implements PhylogenyData, MultipleUris, Comparable<Sequence> {
46 private Accession _accession;
47 private SortedSet<Annotation> _annotations;
48 private DomainArchitecture _da;
49 private String _gene_name;
50 private String _location;
51 private String _mol_sequence;
52 private boolean _mol_sequence_is_aligned;
54 private List<SequenceRelation> _seq_relations;
55 private String _source_id;
56 private String _symbol;
58 private List<Uri> _uris;
59 private SortedSet<Accession> _xrefs;
65 public Sequence( final MolecularSequence mol_seq ) {
67 setMolecularSequence( mol_seq.getMolecularSequenceAsString() );
68 setName( mol_seq.getIdentifier() );
70 if ( mol_seq.getType() == TYPE.AA ) {
73 else if ( mol_seq.getType() == TYPE.DNA ) {
76 else if ( mol_seq.getType() == TYPE.RNA ) {
80 throw new IllegalArgumentException( "unknown sequence type " + mol_seq.getType() );
85 catch ( final PhyloXmlDataFormatException e ) {
86 throw new IllegalArgumentException( "don't know how to handle type " + mol_seq.getType() );
90 public void addAnnotation( final Annotation annotation ) {
91 getAnnotations().add( annotation );
94 public void addCrossReference( final Accession cross_reference ) {
95 if ( getCrossReferences() == null ) {
96 setCrossReferences( new TreeSet<Accession>() );
98 getCrossReferences().add( cross_reference );
101 public void addSequenceRelation( final SequenceRelation sr ) {
102 getSequenceRelations().add( sr );
106 public void addUri( final Uri uri ) {
107 if ( getUris() == null ) {
108 setUris( new ArrayList<Uri>() );
110 getUris().add( uri );
114 public StringBuffer asSimpleText() {
115 final StringBuffer sb = new StringBuffer();
116 if ( getAccession() != null ) {
118 sb.append( getAccession() );
121 if ( !ForesterUtil.isEmpty( getName() ) ) {
122 sb.append( getName() );
125 if ( !ForesterUtil.isEmpty( getLocation() ) ) {
126 sb.append( getLocation() );
132 public StringBuffer asText() {
133 return asSimpleText();
137 public int compareTo( final Sequence o ) {
138 if ( ( !ForesterUtil.isEmpty( getName() ) ) && ( !ForesterUtil.isEmpty( o.getName() ) ) ) {
139 return getName().compareTo( o.getName() );
141 if ( ( !ForesterUtil.isEmpty( getSymbol() ) ) && ( !ForesterUtil.isEmpty( o.getSymbol() ) ) ) {
142 return getSymbol().compareTo( o.getSymbol() );
144 if ( ( !ForesterUtil.isEmpty( getGeneName() ) ) && ( !ForesterUtil.isEmpty( o.getGeneName() ) ) ) {
145 return getGeneName().compareTo( o.getGeneName() );
147 if ( ( getAccession() != null ) && ( o.getAccession() != null )
148 && !ForesterUtil.isEmpty( getAccession().getValue() )
149 && !ForesterUtil.isEmpty( o.getAccession().getValue() ) ) {
150 return getAccession().getValue().compareTo( o.getAccession().getValue() );
152 if ( ( !ForesterUtil.isEmpty( getMolecularSequence() ) )
153 && ( !ForesterUtil.isEmpty( o.getMolecularSequence() ) ) ) {
154 return getMolecularSequence().compareTo( o.getMolecularSequence() );
164 public PhylogenyData copy() {
165 final Sequence seq = new Sequence();
166 seq.setAnnotations( getAnnotations() );
167 seq.setName( getName() );
168 seq.setGeneName( getGeneName() );
170 seq.setSymbol( getSymbol() );
172 catch ( final PhyloXmlDataFormatException e ) {
175 seq.setMolecularSequence( getMolecularSequence() );
176 seq.setMolecularSequenceAligned( isMolecularSequenceAligned() );
177 seq.setLocation( getLocation() );
178 if ( getAccession() != null ) {
179 seq.setAccession( ( Accession ) getAccession().copy() );
182 seq.setAccession( null );
185 seq.setType( getType() );
187 catch ( final PhyloXmlDataFormatException e ) {
190 if ( getUris() != null ) {
191 seq.setUris( new ArrayList<Uri>() );
192 for( final Uri uri : getUris() ) {
194 seq.getUris().add( uri );
198 if ( getDomainArchitecture() != null ) {
199 seq.setDomainArchitecture( ( DomainArchitecture ) getDomainArchitecture().copy() );
202 seq.setDomainArchitecture( null );
204 if ( getCrossReferences() != null ) {
205 seq.setCrossReferences( new TreeSet<Accession>() );
206 for( final Accession x : getCrossReferences() ) {
208 seq.getCrossReferences().add( x );
216 public boolean equals( final Object o ) {
220 else if ( o == null ) {
223 else if ( o.getClass() != this.getClass() ) {
224 throw new IllegalArgumentException( "attempt to check [" + this.getClass() + "] equality to " + o + " ["
225 + o.getClass() + "]" );
228 return isEqual( ( Sequence ) o );
232 public Accession getAccession() {
236 public Annotation getAnnotation( final int i ) {
237 return ( Annotation ) getAnnotations().toArray()[ i ];
240 public SortedSet<Annotation> getAnnotations() {
241 if ( _annotations == null ) {
242 _annotations = new TreeSet<Annotation>();
247 public SortedSet<Accession> getCrossReferences() {
251 public DomainArchitecture getDomainArchitecture() {
255 public String getGeneName() {
259 public String getLocation() {
263 public String getMolecularSequence() {
264 return _mol_sequence;
267 public String getName() {
271 public List<SequenceRelation> getSequenceRelations() {
272 if ( _seq_relations == null ) {
273 _seq_relations = new ArrayList<SequenceRelation>();
275 return _seq_relations;
278 public String getSourceId() {
282 public String getSymbol() {
286 public String getType() {
291 public Uri getUri( final int index ) {
292 return getUris().get( index );
296 public List<Uri> getUris() {
301 public int hashCode() {
302 if ( getAccession() != null ) {
303 return getAccession().hashCode();
305 int result = getName().hashCode();
306 if ( getSymbol().length() > 0 ) {
307 result ^= getName().hashCode();
309 if ( getGeneName().length() > 0 ) {
310 result ^= getGeneName().hashCode();
312 if ( getMolecularSequence().length() > 0 ) {
313 result ^= getMolecularSequence().hashCode();
318 public boolean isHasSequenceRelations() {
319 return _seq_relations != null && _seq_relations.size() > 0;
325 setMolecularSequence( "" );
326 setMolecularSequenceAligned( false );
328 setAccession( null );
332 catch ( final PhyloXmlDataFormatException e ) {
338 catch ( final PhyloXmlDataFormatException e ) {
341 setDomainArchitecture( null );
343 setSequenceRelations( null );
345 setCrossReferences( null );
346 setAnnotations( null );
349 public boolean isEmpty() {
350 return ( getAccession() == null ) && ForesterUtil.isEmpty( getName() ) && ForesterUtil.isEmpty( getSymbol() )
351 && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getType() )
352 && ForesterUtil.isEmpty( getLocation() ) && ForesterUtil.isEmpty( getSourceId() )
353 && ForesterUtil.isEmpty( getMolecularSequence() ) && ( getDomainArchitecture() == null )
354 && ForesterUtil.isEmpty( _annotations ) && ForesterUtil.isEmpty( _uris )
355 && ForesterUtil.isEmpty( _seq_relations )
356 && ( ( getCrossReferences() == null ) || getCrossReferences().isEmpty() );
360 public boolean isEqual( final PhylogenyData data ) {
361 if ( this == data ) {
364 final Sequence s = ( Sequence ) data;
365 if ( ( getAccession() != null ) && ( s.getAccession() != null ) ) {
366 return getAccession().isEqual( s.getAccession() );
368 return s.getMolecularSequence().equals( getMolecularSequence() ) && s.getName().equals( getName() )
369 && s.getSymbol().equals( getSymbol() ) && s.getGeneName().equals( getGeneName() );
372 public boolean isMolecularSequenceAligned() {
373 return _mol_sequence_is_aligned;
376 public void setAccession( final Accession accession ) {
377 _accession = accession;
380 public void setDomainArchitecture( final DomainArchitecture ds ) {
384 public void setGeneName( final String gene_name ) {
385 _gene_name = gene_name;
388 public void setLocation( final String description ) {
389 _location = description;
392 public void setMolecularSequence( final String mol_sequence ) {
393 _mol_sequence = mol_sequence;
396 public void setMolecularSequenceAligned( final boolean aligned ) {
397 _mol_sequence_is_aligned = aligned;
400 public void setName( final String name ) {
404 public void setSourceId( final String source_id ) {
405 _source_id = source_id;
408 public void setSymbol( final String symbol ) throws PhyloXmlDataFormatException {
409 if ( !ForesterUtil.isEmpty( symbol ) && !PhyloXmlUtil.SEQUENCE_SYMBOL_PATTERN.matcher( symbol ).matches() ) {
410 throw new PhyloXmlDataFormatException( "illegal sequence symbol: [" + symbol + "]" );
415 public void setType( final String type ) throws PhyloXmlDataFormatException {
416 if ( !ForesterUtil.isEmpty( type ) && !PhyloXmlUtil.SEQUENCE_TYPES.contains( type ) ) {
417 throw new PhyloXmlDataFormatException( "illegal sequence type: [" + type + "]" );
423 public void setUris( final List<Uri> uris ) {
428 public StringBuffer toNHX() {
429 final StringBuffer sb = new StringBuffer();
430 if ( getName().length() > 0 ) {
432 sb.append( NHXtags.GENE_NAME );
433 sb.append( ForesterUtil.replaceIllegalNhxCharacters( getName() ) );
435 if ( getAccession() != null ) {
436 getAccession().toNHX();
442 public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
446 final String my_ind = indentation + PhylogenyWriter.PHYLO_XML_INTENDATION_BASE;
447 writer.write( ForesterUtil.LINE_SEPARATOR );
448 writer.write( indentation );
449 PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE, PhyloXmlMapping.SEQUENCE_TYPE, getType() );
450 if ( !ForesterUtil.isEmpty( getSymbol() ) ) {
451 PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_SYMBOL, getSymbol(), indentation );
453 if ( ( getAccession() != null ) && !ForesterUtil.isEmpty( getAccession().getValue() ) ) {
454 getAccession().toPhyloXML( writer, level, indentation );
456 if ( !ForesterUtil.isEmpty( getName() ) ) {
457 PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_NAME, getName(), indentation );
459 if ( !ForesterUtil.isEmpty( getGeneName() ) ) {
460 PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_GENE_NAME, getGeneName(), indentation );
462 if ( !ForesterUtil.isEmpty( getLocation() ) ) {
463 PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_LOCATION, getLocation(), indentation );
465 if ( !ForesterUtil.isEmpty( getMolecularSequence() ) ) {
466 PhylogenyDataUtil.appendElement( writer,
467 PhyloXmlMapping.SEQUENCE_MOL_SEQ,
468 getMolecularSequence(),
469 PhyloXmlMapping.SEQUENCE_MOL_SEQ_ALIGNED_ATTR,
470 String.valueOf( isMolecularSequenceAligned() ),
473 if ( ( getUris() != null ) && !getUris().isEmpty() ) {
474 for( final Uri uri : getUris() ) {
476 uri.toPhyloXML( writer, level, indentation );
480 if ( ( getAnnotations() != null ) && !getAnnotations().isEmpty() ) {
481 for( final PhylogenyData annotation : getAnnotations() ) {
482 annotation.toPhyloXML( writer, level, my_ind );
485 if ( ( getCrossReferences() != null ) && !getCrossReferences().isEmpty() ) {
486 writer.write( ForesterUtil.LINE_SEPARATOR );
487 writer.write( my_ind );
488 PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_X_REFS );
489 for( final PhylogenyData x : getCrossReferences() ) {
490 x.toPhyloXML( writer, level, my_ind );
492 writer.write( ForesterUtil.LINE_SEPARATOR );
493 writer.write( my_ind );
494 PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_X_REFS );
496 if ( getDomainArchitecture() != null ) {
497 getDomainArchitecture().toPhyloXML( writer, level, my_ind );
499 writer.write( ForesterUtil.LINE_SEPARATOR );
500 writer.write( indentation );
501 PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE );
505 public String toString() {
506 return asText().toString();
509 private void setAnnotations( final SortedSet<Annotation> annotations ) {
510 _annotations = annotations;
513 private void setCrossReferences( final TreeSet<Accession> cross_references ) {
514 _xrefs = cross_references;
517 private void setSequenceRelations( final List<SequenceRelation> seq_relations ) {
518 _seq_relations = seq_relations;