X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fphylogeny%2Fdata%2FSequence.java;h=fd366c618af33de2a6c4fe6a54712243ee521cbd;hb=10297bd8b8a4b4ab198a17a42fc6ff24ae2ed49b;hp=27b3b0c4c2dcfd4fddf2e1fc2ce866de0604e3ef;hpb=48f7a89be9d34f1930a1f863e608235cc27184c5;p=jalview.git diff --git a/forester/java/src/org/forester/phylogeny/data/Sequence.java b/forester/java/src/org/forester/phylogeny/data/Sequence.java index 27b3b0c..fd366c6 100644 --- a/forester/java/src/org/forester/phylogeny/data/Sequence.java +++ b/forester/java/src/org/forester/phylogeny/data/Sequence.java @@ -5,7 +5,7 @@ // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved -// +// // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either @@ -15,13 +15,13 @@ // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. -// +// // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.phylogeny.data; @@ -37,39 +37,72 @@ import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.writers.PhylogenyWriter; +import org.forester.sequence.MolecularSequence; +import org.forester.sequence.MolecularSequence.TYPE; import org.forester.util.ForesterUtil; -public class Sequence implements PhylogenyData, MultipleUris { +public class Sequence implements PhylogenyData, MultipleUris, Comparable { + private Accession _accession; + private SortedSet _annotations; + private DomainArchitecture _da; + private String _gene_name; + private String _location; private String _mol_sequence; private boolean _mol_sequence_is_aligned; private String _name; + private List _seq_relations; private String _source_id; - private Accession _accession; private String _symbol; - private String _location; private String _type; - private SortedSet _annotations; - private DomainArchitecture _da; private List _uris; - private List _seq_relations; + private SortedSet _xrefs; public Sequence() { init(); } - public boolean isEmpty() { - return ( getAccession() == null ) && ForesterUtil.isEmpty( getName() ) && ForesterUtil.isEmpty( getSymbol() ) - && ForesterUtil.isEmpty( getType() ) && ForesterUtil.isEmpty( getLocation() ) - && ForesterUtil.isEmpty( getSourceId() ) && ForesterUtil.isEmpty( getMolecularSequence() ) - && ( getDomainArchitecture() == null ) && ForesterUtil.isEmpty( _annotations ) - && ForesterUtil.isEmpty( _uris ) && ForesterUtil.isEmpty( _seq_relations ); + public Sequence( final MolecularSequence mol_seq ) { + init(); + setMolecularSequence( mol_seq.getMolecularSequenceAsString() ); + setName( mol_seq.getIdentifier() ); + String type; + if ( mol_seq.getType() == TYPE.AA ) { + type = "protein"; + } + else if ( mol_seq.getType() == TYPE.DNA ) { + type = "dna"; + } + else if ( mol_seq.getType() == TYPE.RNA ) { + type = "rna"; + } + else { + throw new IllegalArgumentException( "unknown sequence type " + mol_seq.getType() ); + } + try { + setType( type ); + } + catch ( final PhyloXmlDataFormatException e ) { + throw new IllegalArgumentException( "don't know how to handle type " + mol_seq.getType() ); + } } public void addAnnotation( final Annotation annotation ) { getAnnotations().add( annotation ); } + public void addCrossReference( final Accession cross_reference ) { + if ( getCrossReferences() == null ) { + setCrossReferences( new TreeSet() ); + } + getCrossReferences().add( cross_reference ); + } + + public void addSequenceRelation( final SequenceRelation sr ) { + getSequenceRelations().add( sr ); + } + + @Override public void addUri( final Uri uri ) { if ( getUris() == null ) { setUris( new ArrayList() ); @@ -77,10 +110,7 @@ public class Sequence implements PhylogenyData, MultipleUris { getUris().add( uri ); } - public void addSequenceRelation( final SequenceRelation sr ) { - _seq_relations.add( sr ); - } - + @Override public StringBuffer asSimpleText() { final StringBuffer sb = new StringBuffer(); if ( getAccession() != null ) { @@ -98,19 +128,50 @@ public class Sequence implements PhylogenyData, MultipleUris { return sb; } + @Override public StringBuffer asText() { return asSimpleText(); } + @Override + public int compareTo( final Sequence o ) { + if ( ( !ForesterUtil.isEmpty( getName() ) ) && ( !ForesterUtil.isEmpty( o.getName() ) ) ) { + return getName().compareTo( o.getName() ); + } + if ( ( !ForesterUtil.isEmpty( getSymbol() ) ) && ( !ForesterUtil.isEmpty( o.getSymbol() ) ) ) { + return getSymbol().compareTo( o.getSymbol() ); + } + if ( ( !ForesterUtil.isEmpty( getGeneName() ) ) && ( !ForesterUtil.isEmpty( o.getGeneName() ) ) ) { + return getGeneName().compareTo( o.getGeneName() ); + } + if ( ( getAccession() != null ) && ( o.getAccession() != null ) + && !ForesterUtil.isEmpty( getAccession().getValue() ) + && !ForesterUtil.isEmpty( o.getAccession().getValue() ) ) { + return getAccession().getValue().compareTo( o.getAccession().getValue() ); + } + if ( ( !ForesterUtil.isEmpty( getMolecularSequence() ) ) + && ( !ForesterUtil.isEmpty( o.getMolecularSequence() ) ) ) { + return getMolecularSequence().compareTo( o.getMolecularSequence() ); + } + return 0; + } + /** * Not a deep copy. - * + * */ + @Override public PhylogenyData copy() { final Sequence seq = new Sequence(); seq.setAnnotations( getAnnotations() ); seq.setName( getName() ); - seq.setSymbol( getSymbol() ); + seq.setGeneName( getGeneName() ); + try { + seq.setSymbol( getSymbol() ); + } + catch ( final PhyloXmlDataFormatException e ) { + e.printStackTrace(); + } seq.setMolecularSequence( getMolecularSequence() ); seq.setMolecularSequenceAligned( isMolecularSequenceAligned() ); seq.setLocation( getLocation() ); @@ -120,7 +181,12 @@ public class Sequence implements PhylogenyData, MultipleUris { else { seq.setAccession( null ); } - seq.setType( getType() ); + try { + seq.setType( getType() ); + } + catch ( final PhyloXmlDataFormatException e ) { + e.printStackTrace(); + } if ( getUris() != null ) { seq.setUris( new ArrayList() ); for( final Uri uri : getUris() ) { @@ -135,6 +201,14 @@ public class Sequence implements PhylogenyData, MultipleUris { else { seq.setDomainArchitecture( null ); } + if ( getCrossReferences() != null ) { + seq.setCrossReferences( new TreeSet() ); + for( final Accession x : getCrossReferences() ) { + if ( x != null ) { + seq.getCrossReferences().add( x ); + } + } + } return seq; } @@ -170,10 +244,18 @@ public class Sequence implements PhylogenyData, MultipleUris { return _annotations; } + public SortedSet getCrossReferences() { + return _xrefs; + } + public DomainArchitecture getDomainArchitecture() { return _da; } + public String getGeneName() { + return _gene_name; + } + public String getLocation() { return _location; } @@ -182,10 +264,6 @@ public class Sequence implements PhylogenyData, MultipleUris { return _mol_sequence; } - public boolean isMolecularSequenceAligned() { - return _mol_sequence_is_aligned; - } - public String getName() { return _name; } @@ -197,10 +275,6 @@ public class Sequence implements PhylogenyData, MultipleUris { return _seq_relations; } - private void setSequenceRelations( final List seq_relations ) { - _seq_relations = seq_relations; - } - public String getSourceId() { return _source_id; } @@ -213,23 +287,28 @@ public class Sequence implements PhylogenyData, MultipleUris { return _type; } - public List getUris() { - return _uris; - } - + @Override public Uri getUri( final int index ) { return getUris().get( index ); } @Override + public List getUris() { + return _uris; + } + + @Override public int hashCode() { if ( getAccession() != null ) { return getAccession().hashCode(); } - int result = getSymbol().hashCode(); - if ( getName().length() > 0 ) { + int result = getName().hashCode(); + if ( getSymbol().length() > 0 ) { result ^= getName().hashCode(); } + if ( getGeneName().length() > 0 ) { + result ^= getGeneName().hashCode(); + } if ( getMolecularSequence().length() > 0 ) { result ^= getMolecularSequence().hashCode(); } @@ -241,20 +320,43 @@ public class Sequence implements PhylogenyData, MultipleUris { } public void init() { - setAnnotations( null ); setName( "" ); + setGeneName( "" ); setMolecularSequence( "" ); setMolecularSequenceAligned( false ); setLocation( "" ); setAccession( null ); - setSymbol( "" ); - setType( "" ); + try { + setSymbol( "" ); + } + catch ( final PhyloXmlDataFormatException e ) { + e.printStackTrace(); + } + try { + setType( "" ); + } + catch ( final PhyloXmlDataFormatException e ) { + e.printStackTrace(); + } setDomainArchitecture( null ); setUris( null ); setSequenceRelations( null ); setSourceId( null ); + setCrossReferences( null ); + setAnnotations( null ); } + public boolean isEmpty() { + return ( getAccession() == null ) && ForesterUtil.isEmpty( getName() ) && ForesterUtil.isEmpty( getSymbol() ) + && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getType() ) + && ForesterUtil.isEmpty( getLocation() ) && ForesterUtil.isEmpty( getSourceId() ) + && ForesterUtil.isEmpty( getMolecularSequence() ) && ( getDomainArchitecture() == null ) + && ForesterUtil.isEmpty( _annotations ) && ForesterUtil.isEmpty( _uris ) + && ForesterUtil.isEmpty( _seq_relations ) + && ( ( getCrossReferences() == null ) || getCrossReferences().isEmpty() ); + } + + @Override public boolean isEqual( final PhylogenyData data ) { if ( this == data ) { return true; @@ -264,21 +366,25 @@ public class Sequence implements PhylogenyData, MultipleUris { return getAccession().isEqual( s.getAccession() ); } return s.getMolecularSequence().equals( getMolecularSequence() ) && s.getName().equals( getName() ) - && s.getSymbol().equals( getSymbol() ); + && s.getSymbol().equals( getSymbol() ) && s.getGeneName().equals( getGeneName() ); } - public void setAccession( final Accession accession ) { - _accession = accession; + public boolean isMolecularSequenceAligned() { + return _mol_sequence_is_aligned; } - private void setAnnotations( final SortedSet annotations ) { - _annotations = annotations; + public void setAccession( final Accession accession ) { + _accession = accession; } public void setDomainArchitecture( final DomainArchitecture ds ) { _da = ds; } + public void setGeneName( final String gene_name ) { + _gene_name = gene_name; + } + public void setLocation( final String description ) { _location = description; } @@ -299,24 +405,26 @@ public class Sequence implements PhylogenyData, MultipleUris { _source_id = source_id; } - public void setSymbol( final String symbol ) { + public void setSymbol( final String symbol ) throws PhyloXmlDataFormatException { if ( !ForesterUtil.isEmpty( symbol ) && !PhyloXmlUtil.SEQUENCE_SYMBOL_PATTERN.matcher( symbol ).matches() ) { throw new PhyloXmlDataFormatException( "illegal sequence symbol: [" + symbol + "]" ); } _symbol = symbol; } - public void setType( final String type ) { + public void setType( final String type ) throws PhyloXmlDataFormatException { if ( !ForesterUtil.isEmpty( type ) && !PhyloXmlUtil.SEQUENCE_TYPES.contains( type ) ) { throw new PhyloXmlDataFormatException( "illegal sequence type: [" + type + "]" ); } _type = type; } + @Override public void setUris( final List uris ) { _uris = uris; } + @Override public StringBuffer toNHX() { final StringBuffer sb = new StringBuffer(); if ( getName().length() > 0 ) { @@ -327,12 +435,10 @@ public class Sequence implements PhylogenyData, MultipleUris { if ( getAccession() != null ) { getAccession().toNHX(); } - if ( getDomainArchitecture() != null ) { - sb.append( getDomainArchitecture().toNHX() ); - } return sb; } + @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( isEmpty() ) { return; @@ -350,6 +456,9 @@ public class Sequence implements PhylogenyData, MultipleUris { if ( !ForesterUtil.isEmpty( getName() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_NAME, getName(), indentation ); } + if ( !ForesterUtil.isEmpty( getGeneName() ) ) { + PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_GENE_NAME, getGeneName(), indentation ); + } if ( !ForesterUtil.isEmpty( getLocation() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_LOCATION, getLocation(), indentation ); } @@ -361,18 +470,29 @@ public class Sequence implements PhylogenyData, MultipleUris { String.valueOf( isMolecularSequenceAligned() ), indentation ); } - if ( getUris() != null ) { + if ( ( getUris() != null ) && !getUris().isEmpty() ) { for( final Uri uri : getUris() ) { if ( uri != null ) { uri.toPhyloXML( writer, level, indentation ); } } } - if ( _annotations != null ) { + if ( ( getAnnotations() != null ) && !getAnnotations().isEmpty() ) { for( final PhylogenyData annotation : getAnnotations() ) { annotation.toPhyloXML( writer, level, my_ind ); } } + if ( ( getCrossReferences() != null ) && !getCrossReferences().isEmpty() ) { + writer.write( ForesterUtil.LINE_SEPARATOR ); + writer.write( my_ind ); + PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_X_REFS ); + for( final PhylogenyData x : getCrossReferences() ) { + x.toPhyloXML( writer, level, my_ind ); + } + writer.write( ForesterUtil.LINE_SEPARATOR ); + writer.write( my_ind ); + PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_X_REFS ); + } if ( getDomainArchitecture() != null ) { getDomainArchitecture().toPhyloXML( writer, level, my_ind ); } @@ -385,4 +505,16 @@ public class Sequence implements PhylogenyData, MultipleUris { public String toString() { return asText().toString(); } + + private void setAnnotations( final SortedSet annotations ) { + _annotations = annotations; + } + + private void setCrossReferences( final TreeSet cross_references ) { + _xrefs = cross_references; + } + + private void setSequenceRelations( final List seq_relations ) { + _seq_relations = seq_relations; + } }