From a648fae3c8d0402dbdafa379ff3d42bbea96633d Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Mon, 30 Sep 2013 04:40:10 +0000 Subject: [PATCH] in progress --- .../io/parsers/phyloxml/PhyloXmlMapping.java | 2 + .../forester/io/parsers/phyloxml/PhyloXmlUtil.java | 2 +- .../io/parsers/phyloxml/data/SequenceParser.java | 7 ++++ .../src/org/forester/phylogeny/data/Accession.java | 22 ++++++---- .../src/org/forester/phylogeny/data/Sequence.java | 43 ++++++++++++++++++-- forester/java/src/org/forester/test/Test.java | 35 +++++++++++++++- .../org/forester/ws/seqdb/SequenceDbWsTools.java | 3 +- 7 files changed, 100 insertions(+), 14 deletions(-) diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java index 9e72a30..bdf5889 100644 --- a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java +++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java @@ -84,6 +84,7 @@ public final class PhyloXmlMapping { public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM = "from"; public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO = "to"; public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE = "confidence"; + public final static String SEQUENCE_X_REFS = "cross_references"; // public final static String NODE_IDENTIFIER = "node_id"; public final static String IDENTIFIER = "id"; public final static String IDENTIFIER_PROVIDER_ATTR = "provider"; @@ -130,6 +131,7 @@ public final class PhyloXmlMapping { public final static String SEQUENCE_RELATION_DISTANCE = "distance"; public final static String SEQUENCE_SOURCE_ID = "id_source"; public final static String POLYGON = "polygon"; + private PhyloXmlMapping() { } diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java index e585cb3..95d5e9b 100644 --- a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java +++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java @@ -37,7 +37,7 @@ public final class PhyloXmlUtil { public static final String OTHER = "other"; public static final String UNKNOWN = "unknown"; - public final static Pattern SEQUENCE_SYMBOL_PATTERN = Pattern.compile( "\\S{1,20}" ); + public final static Pattern SEQUENCE_SYMBOL_PATTERN = Pattern.compile( "\\S{1,30}" ); public final static Pattern TAXOMONY_CODE_PATTERN = Pattern .compile( ParserUtils.TAX_CODE ); public final static Pattern LIT_REF_DOI_PATTERN = Pattern diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java index fa1caad..680cb4d 100644 --- a/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java +++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java @@ -89,6 +89,13 @@ public class SequenceParser implements PhylogenyDataPhyloXmlParser { else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.URI ) ) { sequence.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) ); } + else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_X_REFS ) ) { + for( int j = 0; j < child_element.getNumberOfChildElements(); ++j ) { + // final XmlElement c = child_element.getChildElement( j ); + sequence.addCrossReference( ( Accession ) AccessionParser.getInstance().parse( child_element.getChildElement( j ) ) ); + } + //sequence.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) ); + } } return sequence; } diff --git a/forester/java/src/org/forester/phylogeny/data/Accession.java b/forester/java/src/org/forester/phylogeny/data/Accession.java index 069978b..6e9f158 100644 --- a/forester/java/src/org/forester/phylogeny/data/Accession.java +++ b/forester/java/src/org/forester/phylogeny/data/Accession.java @@ -32,20 +32,20 @@ import org.forester.io.parsers.nhx.NHXtags; import org.forester.io.parsers.phyloxml.PhyloXmlMapping; import org.forester.util.ForesterUtil; -public final class Accession implements PhylogenyData { +public final class Accession implements PhylogenyData, Comparable { final private String _value; final private String _source; - final private String _value_source; + final private String _source_value; public Accession( final String value, final String source ) { _value = value; _source = source; if ( source != null ) { - _value_source = value + source; + _source_value = source + value; } else { - _value_source = value; + _source_value = value; } } @@ -98,10 +98,8 @@ public final class Accession implements PhylogenyData { @Override public int hashCode() { - //if ( getSource() != null ) { - // return ( getSource() + getValue() ).hashCode(); - // } - return _value_source.hashCode(); + + return _source_value.hashCode(); } @Override @@ -152,4 +150,12 @@ public final class Accession implements PhylogenyData { public String toString() { return asText().toString(); } + + @Override + public int compareTo( Accession o ) { + if ( equals( o ) ) { + return 0; + } + return _source_value.compareTo( o._source_value ); + } } diff --git a/forester/java/src/org/forester/phylogeny/data/Sequence.java b/forester/java/src/org/forester/phylogeny/data/Sequence.java index b9e6aa3..ac4be9e 100644 --- a/forester/java/src/org/forester/phylogeny/data/Sequence.java +++ b/forester/java/src/org/forester/phylogeny/data/Sequence.java @@ -53,6 +53,7 @@ public class Sequence implements PhylogenyData, MultipleUris { private DomainArchitecture _da; private List _uris; private List _seq_relations; + private SortedSet _xrefs; public Sequence() { init(); @@ -63,12 +64,28 @@ public class Sequence implements PhylogenyData, MultipleUris { && ForesterUtil.isEmpty( getType() ) && ForesterUtil.isEmpty( getLocation() ) && ForesterUtil.isEmpty( getSourceId() ) && ForesterUtil.isEmpty( getMolecularSequence() ) && ( getDomainArchitecture() == null ) && ForesterUtil.isEmpty( _annotations ) - && ForesterUtil.isEmpty( _uris ) && ForesterUtil.isEmpty( _seq_relations ); + && ForesterUtil.isEmpty( _uris ) && ForesterUtil.isEmpty( _seq_relations ) + && ( getCrossReferences() == null || getCrossReferences().isEmpty() ); } public void addAnnotation( final Annotation annotation ) { getAnnotations().add( annotation ); } + + public void addCrossReference( Accession cross_reference ) { + if ( getCrossReferences() == null ) { + setCrossReferences( new TreeSet() ); + } + getCrossReferences().add( cross_reference ); + } + + public SortedSet getCrossReferences() { + return _xrefs; + } + + private void setCrossReferences( TreeSet cross_references ) { + _xrefs = cross_references; + } @Override public void addUri( final Uri uri ) { @@ -149,6 +166,14 @@ public class Sequence implements PhylogenyData, MultipleUris { else { seq.setDomainArchitecture( null ); } + if ( getCrossReferences() != null ) { + seq.setCrossReferences( new TreeSet() ); + for( final Accession x : getCrossReferences() ) { + if ( x != null ) { + seq.getCrossReferences().add( x); + } + } + } return seq; } @@ -279,6 +304,7 @@ public class Sequence implements PhylogenyData, MultipleUris { setUris( null ); setSequenceRelations( null ); setSourceId( null ); + setCrossReferences(null); } @Override @@ -391,14 +417,14 @@ public class Sequence implements PhylogenyData, MultipleUris { String.valueOf( isMolecularSequenceAligned() ), indentation ); } - if ( getUris() != null ) { + if ( getUris() != null && !getUris().isEmpty() ) { for( final Uri uri : getUris() ) { if ( uri != null ) { uri.toPhyloXML( writer, level, indentation ); } } } - if ( _annotations != null ) { + if ( getAnnotations() != null && !getAnnotations().isEmpty() ) { for( final PhylogenyData annotation : getAnnotations() ) { annotation.toPhyloXML( writer, level, my_ind ); } @@ -406,6 +432,17 @@ public class Sequence implements PhylogenyData, MultipleUris { if ( getDomainArchitecture() != null ) { getDomainArchitecture().toPhyloXML( writer, level, my_ind ); } + if ( getCrossReferences() != null && !getCrossReferences().isEmpty() ) { + writer.write( ForesterUtil.LINE_SEPARATOR ); + writer.write( my_ind ); + PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_X_REFS ); + for( final PhylogenyData x : getCrossReferences() ) { + x.toPhyloXML( writer, level, my_ind ); + } + writer.write( ForesterUtil.LINE_SEPARATOR ); + writer.write( my_ind ); + PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_X_REFS ); + } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE ); diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 609b7df..02efa90 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -36,6 +36,7 @@ import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Set; +import java.util.SortedSet; import org.forester.application.support_transfer; import org.forester.archaeopteryx.TreePanelUtil; @@ -1208,6 +1209,22 @@ public final class Test { if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) { return false; } + SortedSet x = t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences(); + if ( x.size() != 4 ) { + return false; + } + int c = 0; + for( Accession acc : x ) { + if ( c == 0 ) { + if ( !acc.getSource().equals( "KEGG" ) ) { + return false; + } + if ( !acc.getValue().equals( "hsa:596" ) ) { + return false; + } + } + c++; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -1487,7 +1504,7 @@ public final class Test { } if ( ( ( BinaryCharacters ) t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() ) .getLostCount() != BinaryCharacters.COUNT_DEFAULT ) { - ; + return false; } if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) { @@ -1536,6 +1553,22 @@ public final class Test { .equalsIgnoreCase( "433" ) ) { return false; } + SortedSet x = t3_rt.getNode( "root node" ).getNodeData().getSequence().getCrossReferences(); + if ( x.size() != 4 ) { + return false; + } + int c = 0; + for( Accession acc : x ) { + if ( c == 0 ) { + if ( !acc.getSource().equals( "KEGG" ) ) { + return false; + } + if ( !acc.getValue().equals( "hsa:596" ) ) { + return false; + } + } + c++; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index f28a8f1..f370e3d 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -241,8 +241,9 @@ public final class SequenceDbWsTools { seq.setName( db_entry.getSequenceName() ); } if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) { + final String gn = db_entry.getGeneName().replace( ' ', '_' ); try { - seq.setSymbol( db_entry.getGeneName() ); + seq.setSymbol( gn ); } catch ( PhyloXmlDataFormatException e ) { // Eat this exception. -- 1.7.10.2