inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Mon, 30 Sep 2013 22:50:02 +0000 (22:50 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Mon, 30 Sep 2013 22:50:02 +0000 (22:50 +0000)
17 files changed:
forester/java/src/org/forester/archaeopteryx/NodePanel.java
forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java
forester/java/src/org/forester/go/GoNameSpace.java
forester/java/src/org/forester/io/parsers/nhx/NHXParser.java
forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java
forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java
forester/java/src/org/forester/io/parsers/phyloxml/data/AccessionParser.java
forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java
forester/java/src/org/forester/phylogeny/PhylogenyMethods.java
forester/java/src/org/forester/phylogeny/data/Accession.java
forester/java/src/org/forester/phylogeny/data/Annotation.java
forester/java/src/org/forester/phylogeny/data/Sequence.java
forester/java/src/org/forester/test/Test.java
forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java
forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java
forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java
forester/java/src/org/forester/ws/seqdb/UniProtEntry.java

index 686b348..90c7010 100644 (file)
@@ -43,6 +43,7 @@ import javax.swing.tree.TreePath;
 
 import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.Annotation;
 import org.forester.phylogeny.data.BinaryCharacters;
 import org.forester.phylogeny.data.BranchWidth;
@@ -62,56 +63,57 @@ import org.forester.util.ForesterUtil;
 
 class NodePanel extends JPanel implements TreeSelectionListener {
 
-    static final String       DIST_ALTITUDE            = "Altitude";
-    static final String       DIST_ALT_UNIT            = "Altitude unit";
-    static final String       DIST_LONGITUDE           = "Longitude";
-    static final String       DIST_LATITUDE            = "Latitude";
-    static final String       DIST_GEODETIC_DATUM      = "Geodetic datum";
-    static final String       DIST_DESCRIPTION         = "Description";
-    static final String       DATE_UNIT                = "Unit";
-    static final String       DATE_MAX                 = "Max";
-    static final String       DATE_MIN                 = "Min";
-    static final String       DATE_VALUE               = "Value";
-    static final String       DATE_DESCRIPTION         = "Description";
-    static final String       TAXONOMY_IDENTIFIER      = "Identifier";
-    static final String       SEQ_ACCESSION            = "Accession";
-    static final String       CONFIDENCE               = "Confidence";
-    static final String       PROP                     = "Properties";
-    static final String       BINARY_CHARACTERS        = "Binary characters";
-    static final String       REFERENCE                = "Reference";
-    static final String       LIT_REFERENCE            = "Reference";
-    static final String       LIT_REFERENCE_DESC       = "Description";
-    static final String       LIT_REFERENCE_DOI        = "DOI";
-    static final String       DISTRIBUTION             = "Distribution";
-    static final String       DATE                     = "Date";
-    static final String       EVENTS                   = "Events";
-    static final String       SEQUENCE                 = "Sequence";
-    static final String       TAXONOMY                 = "Taxonomy";
-    static final String       BASIC                    = "Basic";
-    static final String       TAXONOMY_SCIENTIFIC_NAME = "Scientific name";
-    static final String       SEQ_MOL_SEQ              = "Mol seq";
-    static final String       SEQ_TYPE                 = "Type";
-    static final String       SEQ_LOCATION             = "Location";
-    static final String       SEQ_SYMBOL               = "Symbol";
-    static final String       SEQ_URI                  = "URI";
-    static final String       NODE_BRANCH_LENGTH       = "Branch length";
-    static final String       NODE_BRANCH_WIDTH        = "Branch width";
-    static final String       NODE_BRANCH_COLOR        = "Branch color";
-    static final String       NODE_NAME                = "Name";
-    static final String       TAXONOMY_URI             = "URI";
-    static final String       TAXONOMY_RANK            = "Rank";
-    static final String       TAXONOMY_SYNONYM         = "Synonym";
-    static final String       TAXONOMY_COMMON_NAME     = "Common name";
-    static final String       TAXONOMY_AUTHORITY       = "Authority";
-    static final String       TAXONOMY_CODE            = "Code";
-    static final String       SEQ_NAME                 = "Name";
-    static final String       EVENTS_GENE_LOSSES       = "Gene losses";
-    static final String       EVENTS_SPECIATIONS       = "Speciations";
-    static final String       EVENTS_DUPLICATIONS      = "Duplications";
-    private static final long serialVersionUID         = 5120159904388100771L;
-    static final String       CONFIDENCE_TYPE          = "type";
-    private final JTree       _tree;
-    private final JEditorPane _pane;
+    static final String         BASIC                    = "Basic";
+    static final String         BINARY_CHARACTERS        = "Binary characters";
+    static final String         CONFIDENCE               = "Confidence";
+    static final String         CONFIDENCE_TYPE          = "type";
+    static final String         DATE                     = "Date";
+    static final String         DATE_DESCRIPTION         = "Description";
+    static final String         DATE_MAX                 = "Max";
+    static final String         DATE_MIN                 = "Min";
+    static final String         DATE_UNIT                = "Unit";
+    static final String         DATE_VALUE               = "Value";
+    static final String         DIST_ALT_UNIT            = "Altitude unit";
+    static final String         DIST_ALTITUDE            = "Altitude";
+    static final String         DIST_DESCRIPTION         = "Description";
+    static final String         DIST_GEODETIC_DATUM      = "Geodetic datum";
+    static final String         DIST_LATITUDE            = "Latitude";
+    static final String         DIST_LONGITUDE           = "Longitude";
+    static final String         DISTRIBUTION             = "Distribution";
+    static final String         EVENTS                   = "Events";
+    static final String         EVENTS_DUPLICATIONS      = "Duplications";
+    static final String         EVENTS_GENE_LOSSES       = "Gene losses";
+    static final String         EVENTS_SPECIATIONS       = "Speciations";
+    static final String         LIT_REFERENCE            = "Reference";
+    static final String         LIT_REFERENCE_DESC       = "Description";
+    static final String         LIT_REFERENCE_DOI        = "DOI";
+    static final String         NODE_BRANCH_COLOR        = "Branch color";
+    static final String         NODE_BRANCH_LENGTH       = "Branch length";
+    static final String         NODE_BRANCH_WIDTH        = "Branch width";
+    static final String         NODE_NAME                = "Name";
+    static final String         PROP                     = "Properties";
+    static final String         REFERENCE                = "Reference";
+    static final String         SEQ_ACCESSION            = "Accession";
+    static final String         SEQ_LOCATION             = "Location";
+    static final String         SEQ_MOL_SEQ              = "Mol seq";
+    static final String         SEQ_NAME                 = "Name";
+    static final String         SEQ_SYMBOL               = "Symbol";
+    static final String         SEQ_TYPE                 = "Type";
+    static final String         SEQ_URI                  = "URI";
+    static final String         SEQUENCE                 = "Sequence";
+    static final String         TAXONOMY                 = "Taxonomy";
+    static final String         TAXONOMY_AUTHORITY       = "Authority";
+    static final String         TAXONOMY_CODE            = "Code";
+    static final String         TAXONOMY_COMMON_NAME     = "Common name";
+    static final String         TAXONOMY_IDENTIFIER      = "Identifier";
+    static final String         TAXONOMY_RANK            = "Rank";
+    static final String         TAXONOMY_SCIENTIFIC_NAME = "Scientific name";
+    static final String         TAXONOMY_SYNONYM         = "Synonym";
+    static final String         TAXONOMY_URI             = "URI";
+    private static final String SEQ_GENE_NAME            = "Gene name";
+    private static final long   serialVersionUID         = 5120159904388100771L;
+    private final JEditorPane   _pane;
+    private final JTree         _tree;
 
     public NodePanel( final PhylogenyNode phylogeny_node ) {
         String node_name = "";
@@ -141,6 +143,11 @@ class NodePanel extends JPanel implements TreeSelectionListener {
         add( split_pane );
     }
 
+    @Override
+    public void valueChanged( final TreeSelectionEvent e ) {
+        // Do nothing.
+    }
+
     private void expandPath( final String name ) {
         final TreePath tp = getJTree().getNextMatch( name, 0, Position.Bias.Forward );
         if ( tp != null ) {
@@ -152,17 +159,10 @@ class NodePanel extends JPanel implements TreeSelectionListener {
         return _tree;
     }
 
-    @Override
-    public void valueChanged( final TreeSelectionEvent e ) {
-        // Do nothing.
-    }
-
     private static void addAnnotation( final DefaultMutableTreeNode top, final Annotation ann, final String name ) {
         DefaultMutableTreeNode category;
         category = new DefaultMutableTreeNode( name );
         top.add( category );
-        addSubelement( category, REFERENCE, ann.getRef() );
-        addSubelement( category, "Description", ann.getDesc() );
         addSubelement( category, "Source", ann.getSource() );
         addSubelement( category, "Type", ann.getType() );
         addSubelement( category, "Evidence", ann.getEvidence() );
@@ -174,60 +174,14 @@ class NodePanel extends JPanel implements TreeSelectionListener {
         }
     }
 
-    private static void addUri( final DefaultMutableTreeNode top, final Uri uri, final String name ) {
-        DefaultMutableTreeNode category;
-        category = new DefaultMutableTreeNode( name );
-        top.add( category );
-        addSubelement( category, "Description", uri.getDescription() );
-        addSubelement( category, "Type", uri.getType() );
-        addSubelement( category, "URI", uri.getValue().toString() );
-    }
-
     private static void addAnnotations( final DefaultMutableTreeNode top,
                                         final SortedSet<Annotation> annotations,
                                         final DefaultMutableTreeNode category ) {
         if ( ( annotations != null ) && ( annotations.size() > 0 ) ) {
             category.add( new DefaultMutableTreeNode( "Annotations" ) );
             final DefaultMutableTreeNode last = top.getLastLeaf();
-            int i = 0;
-            for( final PhylogenyData ann : annotations ) {
-                addAnnotation( last, ( Annotation ) ann, "Annotation " + ( i++ ) );
-            }
-        }
-    }
-
-    private static void addUris( final DefaultMutableTreeNode top,
-                                 final List<Uri> uris,
-                                 final DefaultMutableTreeNode category ) {
-        if ( ( uris != null ) && ( uris.size() > 0 ) ) {
-            category.add( new DefaultMutableTreeNode( "URIs" ) );
-            final DefaultMutableTreeNode last = top.getLastLeaf();
-            int i = 0;
-            for( final Uri uri : uris ) {
-                if ( uri != null ) {
-                    addUri( last, uri, "URI " + ( i++ ) );
-                }
-            }
-        }
-    }
-
-    private static void addLineage( final DefaultMutableTreeNode top,
-                                    final List<String> lineage,
-                                    final DefaultMutableTreeNode category ) {
-        if ( ( lineage != null ) && ( lineage.size() > 0 ) ) {
-            final StringBuilder sb = new StringBuilder();
-            for( final String lin : lineage ) {
-                if ( !ForesterUtil.isEmpty( lin ) ) {
-                    sb.append( lin );
-                    sb.append( " > " );
-                }
-            }
-            String str = null;
-            if ( sb.length() > 1 ) {
-                str = sb.substring( 0, sb.length() - 3 );
-            }
-            if ( !ForesterUtil.isEmpty( str ) ) {
-                addSubelement( category, "Lineage", str );
+            for( final Annotation ann : annotations ) {
+                addAnnotation( last, ann, ann.asText().toString() );
             }
         }
     }
@@ -299,6 +253,24 @@ class NodePanel extends JPanel implements TreeSelectionListener {
         addSubelement( chars, "Present", bc.getPresentCharactersAsStringBuffer().toString() );
     }
 
+    private static void addCrossReference( final DefaultMutableTreeNode top, final Accession x, final String name ) {
+        DefaultMutableTreeNode category;
+        category = new DefaultMutableTreeNode( name );
+        top.add( category );
+    }
+
+    private static void addCrossReferences( final DefaultMutableTreeNode top,
+                                            final SortedSet<Accession> xs,
+                                            final DefaultMutableTreeNode category ) {
+        if ( ( xs != null ) && ( xs.size() > 0 ) ) {
+            category.add( new DefaultMutableTreeNode( "Cross references" ) );
+            final DefaultMutableTreeNode last = top.getLastLeaf();
+            for( final Accession x : xs ) {
+                addCrossReference( last, x, x.asText().toString() );
+            }
+        }
+    }
+
     private static void addDate( final DefaultMutableTreeNode top, final Date date, final String name ) {
         DefaultMutableTreeNode category;
         category = new DefaultMutableTreeNode( name );
@@ -349,6 +321,27 @@ class NodePanel extends JPanel implements TreeSelectionListener {
         }
     }
 
+    private static void addLineage( final DefaultMutableTreeNode top,
+                                    final List<String> lineage,
+                                    final DefaultMutableTreeNode category ) {
+        if ( ( lineage != null ) && ( lineage.size() > 0 ) ) {
+            final StringBuilder sb = new StringBuilder();
+            for( final String lin : lineage ) {
+                if ( !ForesterUtil.isEmpty( lin ) ) {
+                    sb.append( lin );
+                    sb.append( " > " );
+                }
+            }
+            String str = null;
+            if ( sb.length() > 1 ) {
+                str = sb.substring( 0, sb.length() - 3 );
+            }
+            if ( !ForesterUtil.isEmpty( str ) ) {
+                addSubelement( category, "Lineage", str );
+            }
+        }
+    }
+
     private static void addProperties( final DefaultMutableTreeNode top,
                                        final PropertiesMap properties,
                                        final String string ) {
@@ -374,16 +367,22 @@ class NodePanel extends JPanel implements TreeSelectionListener {
         top.add( category );
         addSubelement( category, SEQ_NAME, seq.getName() );
         addSubelement( category, SEQ_SYMBOL, seq.getSymbol() );
+        addSubelement( category, SEQ_GENE_NAME, seq.getGeneName() );
         if ( seq.getAccession() != null ) {
             addSubelement( category, SEQ_ACCESSION, seq.getAccession().asText().toString() );
         }
         addSubelement( category, SEQ_LOCATION, seq.getLocation() );
         addSubelement( category, SEQ_TYPE, seq.getType() );
         addSubelement( category, SEQ_MOL_SEQ, seq.getMolecularSequence() );
+        if ( ( seq.getAnnotations() != null ) && !seq.getAnnotations().isEmpty() ) {
+            addAnnotations( top, seq.getAnnotations(), category );
+        }
+        if ( ( seq.getCrossReferences() != null ) && !seq.getCrossReferences().isEmpty() ) {
+            addCrossReferences( top, seq.getCrossReferences(), category );
+        }
         if ( ( seq.getUris() != null ) && !seq.getUris().isEmpty() ) {
             addUris( top, seq.getUris(), category );
         }
-        addAnnotations( top, seq.getAnnotations(), category );
     }
 
     private static void addSubelement( final DefaultMutableTreeNode node, final String name, final String value ) {
@@ -414,6 +413,30 @@ class NodePanel extends JPanel implements TreeSelectionListener {
         }
     }
 
+    private static void addUri( final DefaultMutableTreeNode top, final Uri uri, final String name ) {
+        DefaultMutableTreeNode category;
+        category = new DefaultMutableTreeNode( name );
+        top.add( category );
+        addSubelement( category, "Description", uri.getDescription() );
+        addSubelement( category, "Type", uri.getType() );
+        addSubelement( category, "URI", uri.getValue().toString() );
+    }
+
+    private static void addUris( final DefaultMutableTreeNode top,
+                                 final List<Uri> uris,
+                                 final DefaultMutableTreeNode category ) {
+        if ( ( uris != null ) && ( uris.size() > 0 ) ) {
+            category.add( new DefaultMutableTreeNode( "URIs" ) );
+            final DefaultMutableTreeNode last = top.getLastLeaf();
+            int i = 0;
+            for( final Uri uri : uris ) {
+                if ( uri != null ) {
+                    addUri( last, uri, "URI " + ( i++ ) );
+                }
+            }
+        }
+    }
+
     private static void createNodes( final DefaultMutableTreeNode top, final PhylogenyNode phylogeny_node ) {
         addBasics( top, phylogeny_node, BASIC );
         // Taxonomy
index 6b9b88b..9f3bcb1 100644 (file)
@@ -38,7 +38,7 @@ import org.forester.ws.seqdb.SequenceDbWsTools;
 
 public final class SequenceDataRetriver extends RunnableProcess {
 
-    private final static int           DEFAULT_LINES_TO_RETURN = 50;
+    private final static int           DEFAULT_LINES_TO_RETURN = 4000;
     private final Phylogeny            _phy;
     private final MainFrameApplication _mf;
     private final TreePanel            _treepanel;
index b487ea8..51932ec 100644 (file)
@@ -27,10 +27,10 @@ package org.forester.go;
 
 public class GoNameSpace {
 
-    public final static String           MOLECULAR_FUNCTION_STR = "molecular_function";
-    public final static String           BIOLOGICAL_PROCESS_STR = "biological_process";
-    public final static String           CELLULAR_COMPONENT_STR = "cellular_component";
-    public final static  String           UNASSIGNED_STR         = "unassigned";
+    public final static String    MOLECULAR_FUNCTION_STR = "molecular_function";
+    public final static String    BIOLOGICAL_PROCESS_STR = "biological_process";
+    public final static String    CELLULAR_COMPONENT_STR = "cellular_component";
+    public final static String    UNASSIGNED_STR         = "unassigned";
     private final GoNamespaceType _type;
 
     public GoNameSpace( final GoNamespaceType type ) {
index bebc238..d40e3f8 100644 (file)
@@ -46,7 +46,6 @@ import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.data.Accession;
 import org.forester.phylogeny.data.Confidence;
-import org.forester.phylogeny.data.DomainArchitecture;
 import org.forester.phylogeny.data.Event;
 import org.forester.phylogeny.data.Identifier;
 import org.forester.phylogeny.data.PhylogenyDataUtil;
@@ -596,13 +595,6 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
                         }
                         node_to_annotate.getNodeData().getTaxonomy().setIdentifier( new Identifier( s.substring( 2 ) ) );
                     }
-                    else if ( s.startsWith( NHXtags.DOMAIN_STRUCTURE ) ) {
-                        if ( !node_to_annotate.getNodeData().isHasSequence() ) {
-                            node_to_annotate.getNodeData().setSequence( new Sequence() );
-                        }
-                        node_to_annotate.getNodeData().getSequence()
-                                .setDomainArchitecture( new DomainArchitecture( s.substring( 3 ) ) );
-                    }
                     else if ( s.startsWith( NHXtags.SEQUENCE_ACCESSION ) ) {
                         if ( !node_to_annotate.getNodeData().isHasSequence() ) {
                             node_to_annotate.getNodeData().setSequence( new Sequence() );
index bdf5889..643a401 100644 (file)
@@ -33,105 +33,106 @@ package org.forester.io.parsers.phyloxml;
  */
 public final class PhyloXmlMapping {
 
-    public static final String PHYLOGENY                                           = "phylogeny";
-    public static final String PHYLOGENY_NAME                                      = "name";
-    public static final String PHYLOGENY_DESCRIPTION                               = "description";
-    public static final String PHYLOGENY_IS_REROOTABLE_ATTR                        = "rerootable";
-    public static final String PHYLOGENY_BRANCHLENGTH_UNIT_ATTR                    = "branch_length_unit";
-    public static final String PHYLOGENY_IS_ROOTED_ATTR                            = "rooted";
-    public static final String PHYLOGENY_TYPE_ATTR                                 = "type";
-    public static final String CLADE                                               = "clade";
-    public static final String NODE_NAME                                           = "name";
-    public static final String NODE_COLLAPSE                                       = "collapse";
-    public static final String SEQUENCE                                            = "sequence";
-    public static final String SEQUENCE_NAME                                       = "name";
-    public static final String SEQUENCE_SYMBOL                                     = "symbol";
     public static final String ACCESSION                                           = "accession";
+    public static final String ACCESSION_COMMENT_ATTR                              = "comment";
     public static final String ACCESSION_SOURCE_ATTR                               = "source";
-    public static final String SEQUENCE_LOCATION                                   = "location";
-    public static final String SEQUENCE_MOL_SEQ                                    = "mol_seq";
-    public static final String SEQUENCE_MOL_SEQ_ALIGNED_ATTR                       = "is_aligned";
     public static final String ANNOTATION                                          = "annotation";
     public static final String ANNOTATION_DESC                                     = "desc";
-    public static final String ANNOTATION_REF_ATTR                                 = "ref";
     public static final String ANNOTATION_EVIDENCE_ATTR                            = "evidence";
+    public static final String ANNOTATION_REF_ATTR                                 = "ref";
+    public static final String ANNOTATION_SOURCE_ATTR                              = "source";
     public static final String ANNOTATION_TYPE_ATTR                                = "type";
-    public static final String TAXONOMY                                            = "taxonomy";
-    public static final String TAXONOMY_SCIENTIFIC_NAME                            = "scientific_name";
-    public static final String TAXONOMY_COMMON_NAME                                = "common_name";
-    public static final String TAXONOMY_CODE                                       = "code";
-    public static final String TAXONOMY_RANK                                       = "rank";
-    public static final String TAXONOMY_SYNONYM                                    = "synonym";
-    public static final String TAXONOMY_AUTHORITY                                  = "authority";
-    public static final String DISTRIBUTION                                        = "distribution";
+    public static final String BINARY_CHARACTER                                    = "bc";
     public static final String BINARY_CHARACTERS                                   = "binary_characters";
-    public static final String BINARY_CHARACTERS_PRESENT                           = "present";
     public static final String BINARY_CHARACTERS_GAINED                            = "gained";
-    public static final String BINARY_CHARACTERS_LOST                              = "lost";
-    public static final String BINARY_CHARACTERS_TYPE_ATTR                         = "type";
-    public static final String BINARY_CHARACTERS_PRESENT_COUNT_ATTR                = "present_count";
     public static final String BINARY_CHARACTERS_GAINED_COUNT_ATTR                 = "gained_count";
+    public static final String BINARY_CHARACTERS_LOST                              = "lost";
     public static final String BINARY_CHARACTERS_LOST_COUNT_ATTR                   = "lost_count";
+    public static final String BINARY_CHARACTERS_PRESENT                           = "present";
+    public static final String BINARY_CHARACTERS_PRESENT_COUNT_ATTR                = "present_count";
+    public static final String BINARY_CHARACTERS_TYPE_ATTR                         = "type";
     public static final String BRANCH_LENGTH                                       = "branch_length";
+    public static final String CLADE                                               = "clade";
+    public static final String CLADE_DATE                                          = "date";
+    public static final String CLADE_DATE_DESC                                     = "desc";
+    public static final String CLADE_DATE_MAX                                      = "maximum";
+    public static final String CLADE_DATE_MIN                                      = "minimum";
+    public static final String CLADE_DATE_UNIT                                     = "unit";
+    public static final String CLADE_DATE_VALUE                                    = "value";
+    public static final String COLOR                                               = "color";
+    public static final String COLOR_BLUE                                          = "blue";
+    public static final String COLOR_GREEN                                         = "green";
+    public static final String COLOR_RED                                           = "red";
     public static final String CONFIDENCE                                          = "confidence";
     public static final String CONFIDENCE_SD_ATTR                                  = "stddev";
     public static final String CONFIDENCE_TYPE_ATTR                                = "type";
-    public static final String COLOR                                               = "color";
-    public static final String COLOR_RED                                           = "red";
-    public static final String COLOR_GREEN                                         = "green";
-    public static final String COLOR_BLUE                                          = "blue";
-    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN                 = "domain";
-    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM       = "from";
-    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO         = "to";
-    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE = "confidence";
-    public final static String SEQUENCE_X_REFS                                     = "cross_references";
-    // public final static String NODE_IDENTIFIER                                     = "node_id";
-    public final static String IDENTIFIER                                          = "id";
-    public final static String IDENTIFIER_PROVIDER_ATTR                            = "provider";
-    public static final String URI                                                 = "uri";
-    public static final String WIDTH                                               = "width";
-    public final static String EVENTS                                              = "events";
-    public final static String EVENT_TYPE                                          = "type";
+    public static final String DISTRIBUTION                                        = "distribution";
+    public static final String DISTRIBUTION_DESC                                   = "desc";
     public final static String EVENT_DUPLICATIONS                                  = "duplications";
-    public final static String EVENT_SPECIATIONS                                   = "speciations";
     public final static String EVENT_LOSSES                                        = "losses";
-    public final static String SEQUENCE_DOMAIN_ARCHITECURE                         = "domain_architecture";
-    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH                 = "length";
-    public final static String SEQUENCE_TYPE                                       = "type";
-    public static final String BINARY_CHARACTER                                    = "bc";
-    public static final String URI_DESC_ATTR                                       = "desc";
-    public static final String TYPE_ATTR                                           = "type";
-    public static final String REFERENCE                                           = "reference";
-    public static final String REFERENCE_DOI_ATTR                                  = "doi";
-    public static final String REFERENCE_DESC                                      = "desc";
-    public static final String PROPERTY                                            = "property";
-    public static final String PROPERTY_REF                                        = "ref";
-    public static final String PROPERTY_UNIT                                       = "unit";
-    public static final String PROPERTY_DATATYPE                                   = "datatype";
-    public static final String PROPERTY_APPLIES_TO                                 = "applies_to";
+    public final static String EVENT_SPECIATIONS                                   = "speciations";
+    public final static String EVENT_TYPE                                          = "type";
+    public final static String EVENTS                                              = "events";
     public static final String ID_REF                                              = "id_ref";
-    public static final String ANNOTATION_SOURCE_ATTR                              = "source";
-    public static final String DISTRIBUTION_DESC                                   = "desc";
+    // public final static String NODE_IDENTIFIER                                     = "node_id";
+    public final static String IDENTIFIER                                          = "id";
+    public final static String IDENTIFIER_PROVIDER_ATTR                            = "provider";
+    public static final String NODE_COLLAPSE                                       = "collapse";
+    public static final String NODE_NAME                                           = "name";
+    public static final String PHYLOGENY                                           = "phylogeny";
+    public static final String PHYLOGENY_BRANCHLENGTH_UNIT_ATTR                    = "branch_length_unit";
+    public static final String PHYLOGENY_DESCRIPTION                               = "description";
+    public static final String PHYLOGENY_IS_REROOTABLE_ATTR                        = "rerootable";
+    public static final String PHYLOGENY_IS_ROOTED_ATTR                            = "rooted";
+    public static final String PHYLOGENY_NAME                                      = "name";
+    public static final String PHYLOGENY_TYPE_ATTR                                 = "type";
     public static final String POINT                                               = "point";
-    public static final String POINT_LONGITUDE                                     = "long";
-    public static final String POINT_LATITUDE                                      = "lat";
     public static final String POINT_ALTITUDE                                      = "alt";
     public static final String POINT_ALTITUDE_UNIT_ATTR                            = "alt_unit";
     public static final String POINT_GEODETIC_DATUM                                = "geodetic_datum";
-    public static final String CLADE_DATE                                          = "date";
-    public static final String CLADE_DATE_UNIT                                     = "unit";
-    public static final String CLADE_DATE_DESC                                     = "desc";
-    public static final String CLADE_DATE_MIN                                      = "minimum";
-    public static final String CLADE_DATE_MAX                                      = "maximum";
-    public static final String CLADE_DATE_VALUE                                    = "value";
+    public static final String POINT_LATITUDE                                      = "lat";
+    public static final String POINT_LONGITUDE                                     = "long";
+    public final static String POLYGON                                             = "polygon";
+    public static final String PROPERTY                                            = "property";
+    public static final String PROPERTY_APPLIES_TO                                 = "applies_to";
+    public static final String PROPERTY_DATATYPE                                   = "datatype";
+    public static final String PROPERTY_REF                                        = "ref";
+    public static final String PROPERTY_UNIT                                       = "unit";
+    public static final String REFERENCE                                           = "reference";
+    public static final String REFERENCE_DESC                                      = "desc";
+    public static final String REFERENCE_DOI_ATTR                                  = "doi";
+    public static final String SEQUENCE                                            = "sequence";
+    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN                 = "domain";
+    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH                 = "length";
+    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE = "confidence";
+    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM       = "from";
+    public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO         = "to";
+    public final static String SEQUENCE_DOMAIN_ARCHITECURE                         = "domain_architecture";
+    public static final String SEQUENCE_GENE_NAME                                  = "gene_name";
+    public static final String SEQUENCE_LOCATION                                   = "location";
+    public static final String SEQUENCE_MOL_SEQ                                    = "mol_seq";
+    public static final String SEQUENCE_MOL_SEQ_ALIGNED_ATTR                       = "is_aligned";
+    public static final String SEQUENCE_NAME                                       = "name";
     public final static String SEQUENCE_RELATION                                   = "sequence_relation";
-    public final static String SEQUENCE_RELATION_TYPE                              = "type";
+    public final static String SEQUENCE_RELATION_DISTANCE                          = "distance";
     public final static String SEQUENCE_RELATION_ID_REF0                           = "id_ref_0";
     public final static String SEQUENCE_RELATION_ID_REF1                           = "id_ref_1";
-    public final static String SEQUENCE_RELATION_DISTANCE                          = "distance";
+    public final static String SEQUENCE_RELATION_TYPE                              = "type";
     public final static String SEQUENCE_SOURCE_ID                                  = "id_source";
-    public final static String POLYGON                                             = "polygon";
-  
+    public static final String SEQUENCE_SYMBOL                                     = "symbol";
+    public final static String SEQUENCE_TYPE                                       = "type";
+    public final static String SEQUENCE_X_REFS                                     = "cross_references";
+    public static final String TAXONOMY                                            = "taxonomy";
+    public static final String TAXONOMY_AUTHORITY                                  = "authority";
+    public static final String TAXONOMY_CODE                                       = "code";
+    public static final String TAXONOMY_COMMON_NAME                                = "common_name";
+    public static final String TAXONOMY_RANK                                       = "rank";
+    public static final String TAXONOMY_SCIENTIFIC_NAME                            = "scientific_name";
+    public static final String TAXONOMY_SYNONYM                                    = "synonym";
+    public static final String TYPE_ATTR                                           = "type";
+    public static final String URI                                                 = "uri";
+    public static final String URI_DESC_ATTR                                       = "desc";
+    public static final String WIDTH                                               = "width";
 
     private PhyloXmlMapping() {
     }
index 95d5e9b..e585cb3 100644 (file)
@@ -37,7 +37,7 @@ public final class PhyloXmlUtil {
 
     public static final String       OTHER                                      = "other";
     public static final String       UNKNOWN                                    = "unknown";
-    public final static Pattern      SEQUENCE_SYMBOL_PATTERN                    = Pattern.compile( "\\S{1,30}" );
+    public final static Pattern      SEQUENCE_SYMBOL_PATTERN                    = Pattern.compile( "\\S{1,20}" );
     public final static Pattern      TAXOMONY_CODE_PATTERN                      = Pattern
                                                                                         .compile( ParserUtils.TAX_CODE );
     public final static Pattern      LIT_REF_DOI_PATTERN                        = Pattern
index e70b11c..64d5539 100644 (file)
@@ -48,10 +48,21 @@ public class AccessionParser implements PhylogenyDataPhyloXmlParser {
 
     @Override
     public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException {
-        if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) ) {
+        if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR )
+                && element.isHasAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) ) {
+            return new Accession( element.getValueAsString(),
+                                  element.getAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ),
+                                  element.getAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) );
+        }
+        else if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) ) {
             return new Accession( element.getValueAsString(),
                                   element.getAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) );
         }
+        else if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) ) {
+            return new Accession( element.getValueAsString(),
+                                  "?",
+                                  element.getAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) );
+        }
         else {
             return new Accession( element.getValueAsString(), "?" );
         }
index 680cb4d..56a7ea1 100644 (file)
@@ -66,6 +66,9 @@ public class SequenceParser implements PhylogenyDataPhyloXmlParser {
             else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_NAME ) ) {
                 sequence.setName( child_element.getValueAsString() );
             }
+            else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_GENE_NAME ) ) {
+                sequence.setGeneName( child_element.getValueAsString() );
+            }
             else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_MOL_SEQ ) ) {
                 if ( child_element.isHasAttribute( PhyloXmlMapping.SEQUENCE_MOL_SEQ_ALIGNED_ATTR ) ) {
                     sequence.setMolecularSequenceAligned( Boolean.parseBoolean( child_element
@@ -91,8 +94,9 @@ public class SequenceParser implements PhylogenyDataPhyloXmlParser {
             }
             else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_X_REFS ) ) {
                 for( int j = 0; j < child_element.getNumberOfChildElements(); ++j ) {
-                //    final XmlElement c = child_element.getChildElement( j );
-                    sequence.addCrossReference( ( Accession ) AccessionParser.getInstance().parse( child_element.getChildElement( j ) ) );
+                    //    final XmlElement c = child_element.getChildElement( j );
+                    sequence.addCrossReference( ( Accession ) AccessionParser.getInstance().parse( child_element
+                            .getChildElement( j ) ) );
                 }
                 //sequence.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) );
             }
index cc140e4..9c83603 100644 (file)
@@ -44,6 +44,8 @@ import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
 import org.forester.io.parsers.util.PhylogenyParserException;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.Annotation;
 import org.forester.phylogeny.data.BranchColor;
 import org.forester.phylogeny.data.BranchWidth;
 import org.forester.phylogeny.data.Confidence;
@@ -918,6 +920,10 @@ public class PhylogenyMethods {
                 match = true;
             }
             if ( !match && node.getNodeData().isHasSequence()
+                    && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) {
+                match = true;
+            }
+            if ( !match && node.getNodeData().isHasSequence()
                     && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
                 match = true;
             }
@@ -940,6 +946,38 @@ public class PhylogenyMethods {
                     }
                 }
             }
+            //
+            if ( !match && node.getNodeData().isHasSequence()
+                    && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
+                for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
+                    if ( match( ann.getDesc(), query, case_sensitive, partial ) ) {
+                        match = true;
+                        break;
+                    }
+                    if ( match( ann.getRef(), query, case_sensitive, partial ) ) {
+                        match = true;
+                        break;
+                    }
+                }
+            }
+            if ( !match && node.getNodeData().isHasSequence()
+                    && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
+                for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
+                    if ( match( x.getComment(), query, case_sensitive, partial ) ) {
+                        match = true;
+                        break;
+                    }
+                    if ( match( x.getSource(), query, case_sensitive, partial ) ) {
+                        match = true;
+                        break;
+                    }
+                    if ( match( x.getValue(), query, case_sensitive, partial ) ) {
+                        match = true;
+                        break;
+                    }
+                }
+            }
+            //
             if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
                 Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
                 I: while ( it.hasNext() ) {
@@ -1018,6 +1056,10 @@ public class PhylogenyMethods {
                     match = true;
                 }
                 if ( !match && node.getNodeData().isHasSequence()
+                        && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) {
+                    match = true;
+                }
+                if ( !match && node.getNodeData().isHasSequence()
                         && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) {
                     match = true;
                 }
@@ -1040,6 +1082,38 @@ public class PhylogenyMethods {
                         }
                     }
                 }
+                //
+                if ( !match && node.getNodeData().isHasSequence()
+                        && ( node.getNodeData().getSequence().getAnnotations() != null ) ) {
+                    for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) {
+                        if ( match( ann.getDesc(), query, case_sensitive, partial ) ) {
+                            match = true;
+                            break;
+                        }
+                        if ( match( ann.getRef(), query, case_sensitive, partial ) ) {
+                            match = true;
+                            break;
+                        }
+                    }
+                }
+                if ( !match && node.getNodeData().isHasSequence()
+                        && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) {
+                    for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) {
+                        if ( match( x.getComment(), query, case_sensitive, partial ) ) {
+                            match = true;
+                            break;
+                        }
+                        if ( match( x.getSource(), query, case_sensitive, partial ) ) {
+                            match = true;
+                            break;
+                        }
+                        if ( match( x.getValue(), query, case_sensitive, partial ) ) {
+                            match = true;
+                            break;
+                        }
+                    }
+                }
+                //
                 if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) {
                     Iterator<String> it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator();
                     I: while ( it.hasNext() ) {
index 6e9f158..5fb3afe 100644 (file)
@@ -34,13 +34,27 @@ import org.forester.util.ForesterUtil;
 
 public final class Accession implements PhylogenyData, Comparable<Accession> {
 
-    final private String _value;
+    final private String _comment;
     final private String _source;
     final private String _source_value;
+    final private String _value;
 
     public Accession( final String value, final String source ) {
         _value = value;
         _source = source;
+        _comment = "";
+        if ( source != null ) {
+            _source_value = source + value;
+        }
+        else {
+            _source_value = value;
+        }
+    }
+
+    public Accession( final String value, final String source, final String comment ) {
+        _value = value;
+        _source = source;
+        _comment = comment;
         if ( source != null ) {
             _source_value = source + value;
         }
@@ -58,15 +72,27 @@ public final class Accession implements PhylogenyData, Comparable<Accession> {
     public StringBuffer asText() {
         final StringBuffer sb = new StringBuffer();
         if ( !ForesterUtil.isEmpty( getSource() ) ) {
-            sb.append( "[" );
             sb.append( getSource() );
-            sb.append( "] " );
+            sb.append( ": " );
         }
         sb.append( getValue() );
+        if ( !ForesterUtil.isEmpty( getComment() ) ) {
+            sb.append( " (" );
+            sb.append( getComment() );
+            sb.append( ")" );
+        }
         return sb;
     }
 
     @Override
+    public int compareTo( final Accession o ) {
+        if ( equals( o ) ) {
+            return 0;
+        }
+        return _source_value.compareTo( o._source_value );
+    }
+
+    @Override
     public PhylogenyData copy() {
         return new Accession( getValue(), getSource() );
     }
@@ -88,6 +114,10 @@ public final class Accession implements PhylogenyData, Comparable<Accession> {
         }
     }
 
+    public String getComment() {
+        return _comment;
+    }
+
     public String getSource() {
         return _source;
     }
@@ -98,7 +128,6 @@ public final class Accession implements PhylogenyData, Comparable<Accession> {
 
     @Override
     public int hashCode() {
-      
         return _source_value.hashCode();
     }
 
@@ -129,20 +158,44 @@ public final class Accession implements PhylogenyData, Comparable<Accession> {
     @Override
     public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException {
         if ( ForesterUtil.isEmpty( getSource() ) ) {
-            PhylogenyDataUtil.appendElement( writer,
-                                             PhyloXmlMapping.ACCESSION,
-                                             getValue(),
-                                             PhyloXmlMapping.ACCESSION_SOURCE_ATTR,
-                                             "unknown",
-                                             indentation );
+            if ( ForesterUtil.isEmpty( getComment() ) ) {
+                PhylogenyDataUtil.appendElement( writer,
+                                                 PhyloXmlMapping.ACCESSION,
+                                                 getValue(),
+                                                 PhyloXmlMapping.ACCESSION_SOURCE_ATTR,
+                                                 "unknown",
+                                                 indentation );
+            }
+            else {
+                PhylogenyDataUtil.appendElement( writer,
+                                                 PhyloXmlMapping.ACCESSION,
+                                                 getValue(),
+                                                 PhyloXmlMapping.ACCESSION_SOURCE_ATTR,
+                                                 "unknown",
+                                                 PhyloXmlMapping.ACCESSION_COMMENT_ATTR,
+                                                 getComment(),
+                                                 indentation );
+            }
         }
         else {
-            PhylogenyDataUtil.appendElement( writer,
-                                             PhyloXmlMapping.ACCESSION,
-                                             getValue(),
-                                             PhyloXmlMapping.ACCESSION_SOURCE_ATTR,
-                                             getSource(),
-                                             indentation );
+            if ( ForesterUtil.isEmpty( getComment() ) ) {
+                PhylogenyDataUtil.appendElement( writer,
+                                                 PhyloXmlMapping.ACCESSION,
+                                                 getValue(),
+                                                 PhyloXmlMapping.ACCESSION_SOURCE_ATTR,
+                                                 getSource(),
+                                                 indentation );
+            }
+            else {
+                PhylogenyDataUtil.appendElement( writer,
+                                                 PhyloXmlMapping.ACCESSION,
+                                                 getValue(),
+                                                 PhyloXmlMapping.ACCESSION_SOURCE_ATTR,
+                                                 getSource(),
+                                                 PhyloXmlMapping.ACCESSION_COMMENT_ATTR,
+                                                 getComment(),
+                                                 indentation );
+            }
         }
     }
 
@@ -150,12 +203,4 @@ public final class Accession implements PhylogenyData, Comparable<Accession> {
     public String toString() {
         return asText().toString();
     }
-
-    @Override
-    public int compareTo( Accession o ) {
-        if ( equals( o ) ) {
-            return 0;
-        }
-        return  _source_value.compareTo( o._source_value );
-    }
 }
index 99f9691..652a034 100644 (file)
@@ -89,7 +89,20 @@ public class Annotation implements PhylogenyData, MultipleUris, Comparable<Annot
 
     @Override
     public StringBuffer asText() {
-        return asSimpleText();
+        final StringBuffer sb = new StringBuffer();
+        if ( !ForesterUtil.isEmpty( getDesc() ) && !ForesterUtil.isEmpty( getRef() ) ) {
+            sb.append( getDesc() );
+            sb.append( " (" );
+            sb.append( getRef() );
+            sb.append( ")" );
+        }
+        else if ( !ForesterUtil.isEmpty( getDesc() ) ) {
+            sb.append( getDesc() );
+        }
+        else if ( !ForesterUtil.isEmpty( getRef() ) ) {
+            sb.append( getRef() );
+        }
+        return sb;
     }
 
     @Override
index ac4be9e..25f7e70 100644 (file)
@@ -44,6 +44,7 @@ public class Sequence implements PhylogenyData, MultipleUris {
     private String                 _mol_sequence;
     private boolean                _mol_sequence_is_aligned;
     private String                 _name;
+    private String                 _gene_name;
     private String                 _source_id;
     private Accession              _accession;
     private String                 _symbol;
@@ -65,25 +66,25 @@ public class Sequence implements PhylogenyData, MultipleUris {
                 && ForesterUtil.isEmpty( getSourceId() ) && ForesterUtil.isEmpty( getMolecularSequence() )
                 && ( getDomainArchitecture() == null ) && ForesterUtil.isEmpty( _annotations )
                 && ForesterUtil.isEmpty( _uris ) && ForesterUtil.isEmpty( _seq_relations )
-                && ( getCrossReferences() == null || getCrossReferences().isEmpty() );
+                && ( ( getCrossReferences() == null ) || getCrossReferences().isEmpty() );
     }
 
     public void addAnnotation( final Annotation annotation ) {
         getAnnotations().add( annotation );
     }
-    
-    public void addCrossReference( Accession cross_reference ) {
+
+    public void addCrossReference( final Accession cross_reference ) {
         if ( getCrossReferences() == null ) {
             setCrossReferences( new TreeSet<Accession>() );
         }
-        getCrossReferences().add( cross_reference  );
+        getCrossReferences().add( cross_reference );
     }
-    
+
     public SortedSet<Accession> getCrossReferences() {
         return _xrefs;
     }
-    
-    private void setCrossReferences( TreeSet<Accession> cross_references ) {
+
+    private void setCrossReferences( final TreeSet<Accession> cross_references ) {
         _xrefs = cross_references;
     }
 
@@ -131,6 +132,7 @@ public class Sequence implements PhylogenyData, MultipleUris {
         final Sequence seq = new Sequence();
         seq.setAnnotations( getAnnotations() );
         seq.setName( getName() );
+        seq.setGeneName( getGeneName() );
         try {
             seq.setSymbol( getSymbol() );
         }
@@ -170,7 +172,7 @@ public class Sequence implements PhylogenyData, MultipleUris {
             seq.setCrossReferences( new TreeSet<Accession>() );
             for( final Accession x : getCrossReferences() ) {
                 if ( x != null ) {
-                    seq.getCrossReferences().add( x);
+                    seq.getCrossReferences().add( x );
                 }
             }
         }
@@ -229,6 +231,10 @@ public class Sequence implements PhylogenyData, MultipleUris {
         return _name;
     }
 
+    public String getGeneName() {
+        return _gene_name;
+    }
+
     public List<SequenceRelation> getSequenceRelations() {
         if ( _seq_relations == null ) {
             _seq_relations = new ArrayList<SequenceRelation>();
@@ -282,8 +288,8 @@ public class Sequence implements PhylogenyData, MultipleUris {
     }
 
     public void init() {
-        setAnnotations( null );
         setName( "" );
+        setGeneName( "" );
         setMolecularSequence( "" );
         setMolecularSequenceAligned( false );
         setLocation( "" );
@@ -304,7 +310,8 @@ public class Sequence implements PhylogenyData, MultipleUris {
         setUris( null );
         setSequenceRelations( null );
         setSourceId( null );
-        setCrossReferences(null);
+        setCrossReferences( null );
+        setAnnotations( null );
     }
 
     @Override
@@ -317,7 +324,7 @@ public class Sequence implements PhylogenyData, MultipleUris {
             return getAccession().isEqual( s.getAccession() );
         }
         return s.getMolecularSequence().equals( getMolecularSequence() ) && s.getName().equals( getName() )
-                && s.getSymbol().equals( getSymbol() );
+                && s.getSymbol().equals( getSymbol() ) && s.getGeneName().equals( getGeneName() );
     }
 
     public void setAccession( final Accession accession ) {
@@ -348,6 +355,10 @@ public class Sequence implements PhylogenyData, MultipleUris {
         _name = name;
     }
 
+    public void setGeneName( final String gene_name ) {
+        _gene_name = gene_name;
+    }
+
     public void setSourceId( final String source_id ) {
         _source_id = source_id;
     }
@@ -382,9 +393,6 @@ public class Sequence implements PhylogenyData, MultipleUris {
         if ( getAccession() != null ) {
             getAccession().toNHX();
         }
-        if ( getDomainArchitecture() != null ) {
-            sb.append( getDomainArchitecture().toNHX() );
-        }
         return sb;
     }
 
@@ -406,6 +414,9 @@ public class Sequence implements PhylogenyData, MultipleUris {
         if ( !ForesterUtil.isEmpty( getName() ) ) {
             PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_NAME, getName(), indentation );
         }
+        if ( !ForesterUtil.isEmpty( getGeneName() ) ) {
+            PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_GENE_NAME, getGeneName(), indentation );
+        }
         if ( !ForesterUtil.isEmpty( getLocation() ) ) {
             PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_LOCATION, getLocation(), indentation );
         }
@@ -417,14 +428,14 @@ public class Sequence implements PhylogenyData, MultipleUris {
                                              String.valueOf( isMolecularSequenceAligned() ),
                                              indentation );
         }
-        if ( getUris() != null && !getUris().isEmpty() ) {
+        if ( ( getUris() != null ) && !getUris().isEmpty() ) {
             for( final Uri uri : getUris() ) {
                 if ( uri != null ) {
                     uri.toPhyloXML( writer, level, indentation );
                 }
             }
         }
-        if ( getAnnotations() != null && !getAnnotations().isEmpty() ) {
+        if ( ( getAnnotations() != null ) && !getAnnotations().isEmpty() ) {
             for( final PhylogenyData annotation : getAnnotations() ) {
                 annotation.toPhyloXML( writer, level, my_ind );
             }
@@ -432,7 +443,7 @@ public class Sequence implements PhylogenyData, MultipleUris {
         if ( getDomainArchitecture() != null ) {
             getDomainArchitecture().toPhyloXML( writer, level, my_ind );
         }
-        if ( getCrossReferences() != null && !getCrossReferences().isEmpty() ) {
+        if ( ( getCrossReferences() != null ) && !getCrossReferences().isEmpty() ) {
             writer.write( ForesterUtil.LINE_SEPARATOR );
             writer.write( my_ind );
             PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_X_REFS );
@@ -442,7 +453,7 @@ public class Sequence implements PhylogenyData, MultipleUris {
             writer.write( ForesterUtil.LINE_SEPARATOR );
             writer.write( my_ind );
             PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_X_REFS );
-        } 
+        }
         writer.write( ForesterUtil.LINE_SEPARATOR );
         writer.write( indentation );
         PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE );
index 02efa90..d1b94ed 100644 (file)
@@ -1209,12 +1209,12 @@ public final class Test {
             if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) {
                 return false;
             }
-            SortedSet<Accession> x =  t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences();
+            final SortedSet<Accession> x = t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences();
             if ( x.size() != 4 ) {
                 return false;
             }
             int c = 0;
-            for( Accession acc : x ) {
+            for( final Accession acc : x ) {
                 if ( c == 0 ) {
                     if ( !acc.getSource().equals( "KEGG" ) ) {
                         return false;
@@ -1504,7 +1504,6 @@ public final class Test {
             }
             if ( ( ( BinaryCharacters ) t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() )
                     .getLostCount() != BinaryCharacters.COUNT_DEFAULT ) {
-                
                 return false;
             }
             if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) {
@@ -1553,12 +1552,13 @@ public final class Test {
                     .equalsIgnoreCase( "433" ) ) {
                 return false;
             }
-            SortedSet<Accession> x =  t3_rt.getNode( "root node" ).getNodeData().getSequence().getCrossReferences();
+            final SortedSet<Accession> x = t3_rt.getNode( "root node" ).getNodeData().getSequence()
+                    .getCrossReferences();
             if ( x.size() != 4 ) {
                 return false;
             }
             int c = 0;
-            for( Accession acc : x ) {
+            for( final Accession acc : x ) {
                 if ( c == 0 ) {
                     if ( !acc.getSource().equals( "KEGG" ) ) {
                         return false;
index 8e319f2..8172d3d 100644 (file)
@@ -28,6 +28,7 @@ package org.forester.ws.seqdb;
 import java.util.List;
 
 import org.forester.go.GoTerm;
+import org.forester.phylogeny.data.Accession;
 import org.forester.util.ForesterUtil;
 
 public final class EbiDbEntry implements SequenceDatabaseEntry {
@@ -175,4 +176,9 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
     public List<GoTerm> getGoTerms() {
         return null;
     }
+
+    @Override
+    public List<Accession> getCrossReferences() {
+        return null;
+    }
 }
index 70ff7b4..3a28d6a 100644 (file)
@@ -28,24 +28,27 @@ package org.forester.ws.seqdb;
 import java.util.List;
 
 import org.forester.go.GoTerm;
+import org.forester.phylogeny.data.Accession;
 
 public interface SequenceDatabaseEntry {
 
-    public String getGeneName();
+    public String getAccession();
 
-    public boolean isEmpty();
+    public String getGeneName();
 
-    public String getAccession();
+    public List<GoTerm> getGoTerms();
 
     public String getProvider();
 
     public String getSequenceName();
 
-    public String getTaxonomyScientificName();
+    public String getSequenceSymbol();
 
     public String getTaxonomyIdentifier();
 
-    public String getSequenceSymbol();
+    public String getTaxonomyScientificName();
 
-    public List<GoTerm> getGoTerms();
+    public boolean isEmpty();
+
+    public List<Accession> getCrossReferences();
 }
\ No newline at end of file
index f370e3d..111fa68 100644 (file)
@@ -241,23 +241,28 @@ public final class SequenceDbWsTools {
                     seq.setName( db_entry.getSequenceName() );
                 }
                 if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) {
-                    final String gn = db_entry.getGeneName().replace( ' ', '_' );
+                    seq.setGeneName( db_entry.getGeneName() );
+                }
+                if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) {
                     try {
-                        seq.setSymbol( gn );
+                        seq.setSymbol( db_entry.getSequenceSymbol() );
                     }
-                    catch ( PhyloXmlDataFormatException e ) {
+                    catch ( final PhyloXmlDataFormatException e ) {
                         // Eat this exception.
                     }
                 }
-                if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) {
-                  //  seq.addAnnotation( new Annotation( "GN", db_entry.getGeneName() ) );
-                }
-                if ( db_entry.getGoTerms() != null &&  !db_entry.getGoTerms().isEmpty() ) {
+                if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) {
                     for( final GoTerm go : db_entry.getGoTerms() ) {
-                        seq.addAnnotation( new Annotation( go.getGoId().getId(), go.getName() ) );
+                        final Annotation ann = new Annotation( go.getGoId().getId() );
+                        ann.setDesc( go.getName() );
+                        seq.addAnnotation( ann );
+                    }
+                }
+                if ( ( db_entry.getCrossReferences() != null ) && !db_entry.getCrossReferences().isEmpty() ) {
+                    for( final Accession x : db_entry.getCrossReferences() ) {
+                        seq.addCrossReference( x );
                     }
                 }
-                
                 final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy()
                         : new Taxonomy();
                 if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) {
index aeabf63..2eaa720 100644 (file)
@@ -33,18 +33,30 @@ import java.util.regex.Pattern;
 import org.forester.go.BasicGoTerm;
 import org.forester.go.GoNameSpace;
 import org.forester.go.GoTerm;
+import org.forester.phylogeny.data.Accession;
 import org.forester.util.ForesterUtil;
 
 public final class UniProtEntry implements SequenceDatabaseEntry {
 
-    public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+(GO:\\d+);\\s+([PF]):([^;]+);" );
-    private String              _ac;
-    private String              _name;
-    private String              _symbol;
-    private String              _gene_name;
-    private String              _os_scientific_name;
-    private String              _tax_id;
-    private List<GoTerm> _go_terms;
+    public final static Pattern  BindingDB_PATTERN = Pattern.compile( "BindingDB;\\s+([0-9A-Z]+);" );
+    public final static Pattern  CTD_PATTERN       = Pattern.compile( "CTD;\\s+(\\d+);" );
+    public final static Pattern  DrugBank_PATTERN  = Pattern.compile( "DrugBank;\\s+([0-9A-Z]+);\\s+([^\\.]+)" );
+    public final static Pattern  GO_PATTERN        = Pattern.compile( "GO;\\s+(GO:\\d+);\\s+([PFC]):([^;]+);" );
+    public final static Pattern  KEGG_PATTERN      = Pattern.compile( "KEGG;\\s+([a-z]+:[0-9]+);" );
+    public final static Pattern  MIM_PATTERN       = Pattern.compile( "MIM;\\s+(\\d+);" );
+    public final static Pattern  NextBio_PATTERN   = Pattern.compile( "NextBio;\\s+(\\d+);" );
+    public final static Pattern  Orphanet_PATTERN  = Pattern.compile( "Orphanet;\\s+(\\d+);\\s+([^\\.]+)" );
+    public final static Pattern  PDB_PATTERN       = Pattern.compile( "PDB;\\s+([0-9A-Z]{4});\\s+([^;]+)" );
+    public final static Pattern  PharmGKB_PATTERN  = Pattern.compile( "PharmGKB;\\s+([0-9A-Z]+);" );
+    public final static Pattern  Reactome_PATTERN  = Pattern.compile( "Reactome;\\s+([0-9A-Z]+);\\s+([^\\.]+)" );
+    private String               _ac;
+    private ArrayList<Accession> _cross_references;
+    private String               _gene_name;
+    private List<GoTerm>         _go_terms;
+    private String               _name;
+    private String               _os_scientific_name;
+    private String               _symbol;
+    private String               _tax_id;
 
     private UniProtEntry() {
     }
@@ -54,6 +66,110 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
         throw new CloneNotSupportedException();
     }
 
+    @Override
+    public String getAccession() {
+        return _ac;
+    }
+
+    @Override
+    public List<Accession> getCrossReferences() {
+        return _cross_references;
+    }
+
+    @Override
+    public String getGeneName() {
+        return _gene_name;
+    }
+
+    @Override
+    public List<GoTerm> getGoTerms() {
+        return _go_terms;
+    }
+
+    @Override
+    public String getProvider() {
+        return "uniprot";
+    }
+
+    @Override
+    public String getSequenceName() {
+        return _name;
+    }
+
+    @Override
+    public String getSequenceSymbol() {
+        return _symbol;
+    }
+
+    @Override
+    public String getTaxonomyIdentifier() {
+        return _tax_id;
+    }
+
+    @Override
+    public String getTaxonomyScientificName() {
+        return _os_scientific_name;
+    }
+
+    @Override
+    public boolean isEmpty() {
+        return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
+                && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() )
+                && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() )
+                && ForesterUtil.isEmpty( getSequenceSymbol() ) && ( ( getGoTerms() == null ) || getGoTerms().isEmpty() ) && ( ( getCrossReferences() == null ) || getCrossReferences()
+                .isEmpty() ) );
+    }
+
+    private void addCrossReference( final Accession accession ) {
+        if ( _cross_references == null ) {
+            _cross_references = new ArrayList<Accession>();
+        }
+        System.out.println( "XREF ADDED: " + accession );
+        _cross_references.add( accession );
+    }
+
+    private void addGoTerm( final BasicGoTerm g ) {
+        if ( _go_terms == null ) {
+            _go_terms = new ArrayList<GoTerm>();
+        }
+        System.out.println( "GOTERM ADDED: " + g );
+        _go_terms.add( g );
+    }
+
+    private void setAc( final String ac ) {
+        if ( _ac == null ) {
+            _ac = ac;
+        }
+    }
+
+    private void setGeneName( final String gene_name ) {
+        if ( _gene_name == null ) {
+            _gene_name = gene_name;
+        }
+    }
+
+    private void setOsScientificName( final String os_scientific_name ) {
+        if ( _os_scientific_name == null ) {
+            _os_scientific_name = os_scientific_name;
+        }
+    }
+
+    private void setSequenceName( final String name ) {
+        if ( _name == null ) {
+            _name = name;
+        }
+    }
+
+    private void setSequenceSymbol( final String symbol ) {
+        _symbol = symbol;
+    }
+
+    private void setTaxId( final String tax_id ) {
+        if ( _tax_id == null ) {
+            _tax_id = tax_id;
+        }
+    }
+
     public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
         final UniProtEntry e = new UniProtEntry();
         for( final String line : lines ) {
@@ -81,19 +197,80 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
             }
             else if ( line.startsWith( "DR" ) ) {
                 if ( line.indexOf( "GO;" ) > 0 ) {
-                    Matcher m = GO_PATTERN.matcher( line );
+                    final Matcher m = GO_PATTERN.matcher( line );
                     if ( m.find() ) {
-                        String id = m.group( 1 );
-                        String ns_str = m.group( 2 );
-                        String desc = m.group( 3 );
+                        final String id = m.group( 1 );
+                        final String ns_str = m.group( 2 );
+                        final String desc = m.group( 3 );
                         String gns = GoNameSpace.BIOLOGICAL_PROCESS_STR;
-                        if ( ns_str.equals( "F" ) ) { 
-                            gns =  GoNameSpace.MOLECULAR_FUNCTION_STR;
-                        }    
-                        
+                        if ( ns_str.equals( "F" ) ) {
+                            gns = GoNameSpace.MOLECULAR_FUNCTION_STR;
+                        }
+                        else if ( ns_str.equals( "C" ) ) {
+                            gns = GoNameSpace.CELLULAR_COMPONENT_STR;
+                        }
                         System.out.println( "GO:" + id + " " + desc + " " + ns_str );
-                      
-                        e.addGoTerm( new BasicGoTerm( id, desc, gns, false ) ); 
+                        e.addGoTerm( new BasicGoTerm( id, desc, gns, false ) );
+                    }
+                }
+                else if ( line.indexOf( "PDB;" ) > 0 ) {
+                    final Matcher m = PDB_PATTERN.matcher( line );
+                    if ( m.find() ) {
+                        e.addCrossReference( new Accession( m.group( 1 ), "PDB", m.group( 2 ) ) );
+                    }
+                }
+                else if ( line.indexOf( "KEGG;" ) > 0 ) {
+                    final Matcher m = KEGG_PATTERN.matcher( line );
+                    if ( m.find() ) {
+                        e.addCrossReference( new Accession( m.group( 1 ), "KEGG" ) );
+                    }
+                }
+                else if ( line.indexOf( "CTD;" ) > 0 ) {
+                    final Matcher m = CTD_PATTERN.matcher( line );
+                    if ( m.find() ) {
+                        e.addCrossReference( new Accession( m.group( 1 ), "CTD" ) );
+                    }
+                }
+                else if ( line.indexOf( "MIM;" ) > 0 ) {
+                    final Matcher m = MIM_PATTERN.matcher( line );
+                    if ( m.find() ) {
+                        e.addCrossReference( new Accession( m.group( 1 ), "MIM" ) );
+                    }
+                }
+                else if ( line.indexOf( "Orphanet;" ) > 0 ) {
+                    final Matcher m = Orphanet_PATTERN.matcher( line );
+                    if ( m.find() ) {
+                        e.addCrossReference( new Accession( m.group( 1 ), "Orphanet", m.group( 2 ) ) );
+                    }
+                }
+                else if ( line.indexOf( "PharmGKB;" ) > 0 ) {
+                    final Matcher m = PharmGKB_PATTERN.matcher( line );
+                    if ( m.find() ) {
+                        e.addCrossReference( new Accession( m.group( 1 ), "PharmGKB" ) );
+                    }
+                }
+                else if ( line.indexOf( "BindingDB;" ) > 0 ) {
+                    final Matcher m = BindingDB_PATTERN.matcher( line );
+                    if ( m.find() ) {
+                        e.addCrossReference( new Accession( m.group( 1 ), "BindingDB" ) );
+                    }
+                }
+                else if ( line.indexOf( "DrugBank;" ) > 0 ) {
+                    final Matcher m = DrugBank_PATTERN.matcher( line );
+                    if ( m.find() ) {
+                        e.addCrossReference( new Accession( m.group( 1 ), "DrugBank", m.group( 2 ) ) );
+                    }
+                }
+                else if ( line.indexOf( "NextBio;" ) > 0 ) {
+                    final Matcher m = NextBio_PATTERN.matcher( line );
+                    if ( m.find() ) {
+                        e.addCrossReference( new Accession( m.group( 1 ), "NextBio" ) );
+                    }
+                }
+                else if ( line.indexOf( "Reactome;" ) > 0 ) {
+                    final Matcher m = Reactome_PATTERN.matcher( line );
+                    if ( m.find() ) {
+                        e.addCrossReference( new Accession( m.group( 1 ), "Reactome", m.group( 2 ) ) );
                     }
                 }
             }
@@ -113,95 +290,4 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
         }
         return e;
     }
-
-    private void addGoTerm( BasicGoTerm g ) {
-        if ( _go_terms == null ) {
-            _go_terms = new ArrayList<GoTerm>();
-        }
-        _go_terms.add( g );
-        
-    }
-
-    private void setSequenceSymbol( String symbol ) {
-        _symbol = symbol;
-    }
-
-    @Override
-    public String getAccession() {
-        return _ac;
-    }
-
-    private void setAc( final String ac ) {
-        if ( _ac == null ) {
-            _ac = ac;
-        }
-    }
-
-    @Override
-    public String getSequenceName() {
-        return _name;
-    }
-
-    private void setSequenceName( final String name ) {
-        if ( _name == null ) {
-            _name = name;
-        }
-    }
-
-    @Override
-    public String getTaxonomyScientificName() {
-        return _os_scientific_name;
-    }
-
-    private void setOsScientificName( final String os_scientific_name ) {
-        if ( _os_scientific_name == null ) {
-            _os_scientific_name = os_scientific_name;
-        }
-    }
-
-    @Override
-    public String getTaxonomyIdentifier() {
-        return _tax_id;
-    }
-
-    private void setTaxId( final String tax_id ) {
-        if ( _tax_id == null ) {
-            _tax_id = tax_id;
-        }
-    }
-
-    private void setGeneName( final String gene_name ) {
-        if ( _gene_name == null ) {
-            _gene_name = gene_name;
-        }
-    }
-    
-    @Override
-    public List<GoTerm> getGoTerms() {
-        return _go_terms;
-    }
-    
-
-    @Override
-    public String getSequenceSymbol() {
-        return _symbol;
-    }
-
-    @Override
-    public boolean isEmpty() {
-        return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
-                && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() )
-                && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil
-                .isEmpty( getSequenceSymbol() ) && ( getGoTerms() == null || getGoTerms().isEmpty() ) );
-    }
-
-    @Override
-    public String getProvider() {
-        return "uniprot";
-    }
-
-    @Override
-    public String getGeneName() {
-        return _gene_name;
-    }
 }