From: cmzmasek@gmail.com Date: Mon, 30 Sep 2013 22:50:02 +0000 (+0000) Subject: inprogress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=7e2a839d55608212fed645ce9ffe3a3f4952fb17;p=jalview.git inprogress --- diff --git a/forester/java/src/org/forester/archaeopteryx/NodePanel.java b/forester/java/src/org/forester/archaeopteryx/NodePanel.java index 686b348..90c7010 100644 --- a/forester/java/src/org/forester/archaeopteryx/NodePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/NodePanel.java @@ -43,6 +43,7 @@ import javax.swing.tree.TreePath; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.BranchWidth; @@ -62,56 +63,57 @@ import org.forester.util.ForesterUtil; class NodePanel extends JPanel implements TreeSelectionListener { - static final String DIST_ALTITUDE = "Altitude"; - static final String DIST_ALT_UNIT = "Altitude unit"; - static final String DIST_LONGITUDE = "Longitude"; - static final String DIST_LATITUDE = "Latitude"; - static final String DIST_GEODETIC_DATUM = "Geodetic datum"; - static final String DIST_DESCRIPTION = "Description"; - static final String DATE_UNIT = "Unit"; - static final String DATE_MAX = "Max"; - static final String DATE_MIN = "Min"; - static final String DATE_VALUE = "Value"; - static final String DATE_DESCRIPTION = "Description"; - static final String TAXONOMY_IDENTIFIER = "Identifier"; - static final String SEQ_ACCESSION = "Accession"; - static final String CONFIDENCE = "Confidence"; - static final String PROP = "Properties"; - static final String BINARY_CHARACTERS = "Binary characters"; - static final String REFERENCE = "Reference"; - static final String LIT_REFERENCE = "Reference"; - static final String LIT_REFERENCE_DESC = "Description"; - static final String LIT_REFERENCE_DOI = "DOI"; - static final String DISTRIBUTION = "Distribution"; - static final String DATE = "Date"; - static final String EVENTS = "Events"; - static final String SEQUENCE = "Sequence"; - static final String TAXONOMY = "Taxonomy"; - static final String BASIC = "Basic"; - static final String TAXONOMY_SCIENTIFIC_NAME = "Scientific name"; - static final String SEQ_MOL_SEQ = "Mol seq"; - static final String SEQ_TYPE = "Type"; - static final String SEQ_LOCATION = "Location"; - static final String SEQ_SYMBOL = "Symbol"; - static final String SEQ_URI = "URI"; - static final String NODE_BRANCH_LENGTH = "Branch length"; - static final String NODE_BRANCH_WIDTH = "Branch width"; - static final String NODE_BRANCH_COLOR = "Branch color"; - static final String NODE_NAME = "Name"; - static final String TAXONOMY_URI = "URI"; - static final String TAXONOMY_RANK = "Rank"; - static final String TAXONOMY_SYNONYM = "Synonym"; - static final String TAXONOMY_COMMON_NAME = "Common name"; - static final String TAXONOMY_AUTHORITY = "Authority"; - static final String TAXONOMY_CODE = "Code"; - static final String SEQ_NAME = "Name"; - static final String EVENTS_GENE_LOSSES = "Gene losses"; - static final String EVENTS_SPECIATIONS = "Speciations"; - static final String EVENTS_DUPLICATIONS = "Duplications"; - private static final long serialVersionUID = 5120159904388100771L; - static final String CONFIDENCE_TYPE = "type"; - private final JTree _tree; - private final JEditorPane _pane; + static final String BASIC = "Basic"; + static final String BINARY_CHARACTERS = "Binary characters"; + static final String CONFIDENCE = "Confidence"; + static final String CONFIDENCE_TYPE = "type"; + static final String DATE = "Date"; + static final String DATE_DESCRIPTION = "Description"; + static final String DATE_MAX = "Max"; + static final String DATE_MIN = "Min"; + static final String DATE_UNIT = "Unit"; + static final String DATE_VALUE = "Value"; + static final String DIST_ALT_UNIT = "Altitude unit"; + static final String DIST_ALTITUDE = "Altitude"; + static final String DIST_DESCRIPTION = "Description"; + static final String DIST_GEODETIC_DATUM = "Geodetic datum"; + static final String DIST_LATITUDE = "Latitude"; + static final String DIST_LONGITUDE = "Longitude"; + static final String DISTRIBUTION = "Distribution"; + static final String EVENTS = "Events"; + static final String EVENTS_DUPLICATIONS = "Duplications"; + static final String EVENTS_GENE_LOSSES = "Gene losses"; + static final String EVENTS_SPECIATIONS = "Speciations"; + static final String LIT_REFERENCE = "Reference"; + static final String LIT_REFERENCE_DESC = "Description"; + static final String LIT_REFERENCE_DOI = "DOI"; + static final String NODE_BRANCH_COLOR = "Branch color"; + static final String NODE_BRANCH_LENGTH = "Branch length"; + static final String NODE_BRANCH_WIDTH = "Branch width"; + static final String NODE_NAME = "Name"; + static final String PROP = "Properties"; + static final String REFERENCE = "Reference"; + static final String SEQ_ACCESSION = "Accession"; + static final String SEQ_LOCATION = "Location"; + static final String SEQ_MOL_SEQ = "Mol seq"; + static final String SEQ_NAME = "Name"; + static final String SEQ_SYMBOL = "Symbol"; + static final String SEQ_TYPE = "Type"; + static final String SEQ_URI = "URI"; + static final String SEQUENCE = "Sequence"; + static final String TAXONOMY = "Taxonomy"; + static final String TAXONOMY_AUTHORITY = "Authority"; + static final String TAXONOMY_CODE = "Code"; + static final String TAXONOMY_COMMON_NAME = "Common name"; + static final String TAXONOMY_IDENTIFIER = "Identifier"; + static final String TAXONOMY_RANK = "Rank"; + static final String TAXONOMY_SCIENTIFIC_NAME = "Scientific name"; + static final String TAXONOMY_SYNONYM = "Synonym"; + static final String TAXONOMY_URI = "URI"; + private static final String SEQ_GENE_NAME = "Gene name"; + private static final long serialVersionUID = 5120159904388100771L; + private final JEditorPane _pane; + private final JTree _tree; public NodePanel( final PhylogenyNode phylogeny_node ) { String node_name = ""; @@ -141,6 +143,11 @@ class NodePanel extends JPanel implements TreeSelectionListener { add( split_pane ); } + @Override + public void valueChanged( final TreeSelectionEvent e ) { + // Do nothing. + } + private void expandPath( final String name ) { final TreePath tp = getJTree().getNextMatch( name, 0, Position.Bias.Forward ); if ( tp != null ) { @@ -152,17 +159,10 @@ class NodePanel extends JPanel implements TreeSelectionListener { return _tree; } - @Override - public void valueChanged( final TreeSelectionEvent e ) { - // Do nothing. - } - private static void addAnnotation( final DefaultMutableTreeNode top, final Annotation ann, final String name ) { DefaultMutableTreeNode category; category = new DefaultMutableTreeNode( name ); top.add( category ); - addSubelement( category, REFERENCE, ann.getRef() ); - addSubelement( category, "Description", ann.getDesc() ); addSubelement( category, "Source", ann.getSource() ); addSubelement( category, "Type", ann.getType() ); addSubelement( category, "Evidence", ann.getEvidence() ); @@ -174,60 +174,14 @@ class NodePanel extends JPanel implements TreeSelectionListener { } } - private static void addUri( final DefaultMutableTreeNode top, final Uri uri, final String name ) { - DefaultMutableTreeNode category; - category = new DefaultMutableTreeNode( name ); - top.add( category ); - addSubelement( category, "Description", uri.getDescription() ); - addSubelement( category, "Type", uri.getType() ); - addSubelement( category, "URI", uri.getValue().toString() ); - } - private static void addAnnotations( final DefaultMutableTreeNode top, final SortedSet annotations, final DefaultMutableTreeNode category ) { if ( ( annotations != null ) && ( annotations.size() > 0 ) ) { category.add( new DefaultMutableTreeNode( "Annotations" ) ); final DefaultMutableTreeNode last = top.getLastLeaf(); - int i = 0; - for( final PhylogenyData ann : annotations ) { - addAnnotation( last, ( Annotation ) ann, "Annotation " + ( i++ ) ); - } - } - } - - private static void addUris( final DefaultMutableTreeNode top, - final List uris, - final DefaultMutableTreeNode category ) { - if ( ( uris != null ) && ( uris.size() > 0 ) ) { - category.add( new DefaultMutableTreeNode( "URIs" ) ); - final DefaultMutableTreeNode last = top.getLastLeaf(); - int i = 0; - for( final Uri uri : uris ) { - if ( uri != null ) { - addUri( last, uri, "URI " + ( i++ ) ); - } - } - } - } - - private static void addLineage( final DefaultMutableTreeNode top, - final List lineage, - final DefaultMutableTreeNode category ) { - if ( ( lineage != null ) && ( lineage.size() > 0 ) ) { - final StringBuilder sb = new StringBuilder(); - for( final String lin : lineage ) { - if ( !ForesterUtil.isEmpty( lin ) ) { - sb.append( lin ); - sb.append( " > " ); - } - } - String str = null; - if ( sb.length() > 1 ) { - str = sb.substring( 0, sb.length() - 3 ); - } - if ( !ForesterUtil.isEmpty( str ) ) { - addSubelement( category, "Lineage", str ); + for( final Annotation ann : annotations ) { + addAnnotation( last, ann, ann.asText().toString() ); } } } @@ -299,6 +253,24 @@ class NodePanel extends JPanel implements TreeSelectionListener { addSubelement( chars, "Present", bc.getPresentCharactersAsStringBuffer().toString() ); } + private static void addCrossReference( final DefaultMutableTreeNode top, final Accession x, final String name ) { + DefaultMutableTreeNode category; + category = new DefaultMutableTreeNode( name ); + top.add( category ); + } + + private static void addCrossReferences( final DefaultMutableTreeNode top, + final SortedSet xs, + final DefaultMutableTreeNode category ) { + if ( ( xs != null ) && ( xs.size() > 0 ) ) { + category.add( new DefaultMutableTreeNode( "Cross references" ) ); + final DefaultMutableTreeNode last = top.getLastLeaf(); + for( final Accession x : xs ) { + addCrossReference( last, x, x.asText().toString() ); + } + } + } + private static void addDate( final DefaultMutableTreeNode top, final Date date, final String name ) { DefaultMutableTreeNode category; category = new DefaultMutableTreeNode( name ); @@ -349,6 +321,27 @@ class NodePanel extends JPanel implements TreeSelectionListener { } } + private static void addLineage( final DefaultMutableTreeNode top, + final List lineage, + final DefaultMutableTreeNode category ) { + if ( ( lineage != null ) && ( lineage.size() > 0 ) ) { + final StringBuilder sb = new StringBuilder(); + for( final String lin : lineage ) { + if ( !ForesterUtil.isEmpty( lin ) ) { + sb.append( lin ); + sb.append( " > " ); + } + } + String str = null; + if ( sb.length() > 1 ) { + str = sb.substring( 0, sb.length() - 3 ); + } + if ( !ForesterUtil.isEmpty( str ) ) { + addSubelement( category, "Lineage", str ); + } + } + } + private static void addProperties( final DefaultMutableTreeNode top, final PropertiesMap properties, final String string ) { @@ -374,16 +367,22 @@ class NodePanel extends JPanel implements TreeSelectionListener { top.add( category ); addSubelement( category, SEQ_NAME, seq.getName() ); addSubelement( category, SEQ_SYMBOL, seq.getSymbol() ); + addSubelement( category, SEQ_GENE_NAME, seq.getGeneName() ); if ( seq.getAccession() != null ) { addSubelement( category, SEQ_ACCESSION, seq.getAccession().asText().toString() ); } addSubelement( category, SEQ_LOCATION, seq.getLocation() ); addSubelement( category, SEQ_TYPE, seq.getType() ); addSubelement( category, SEQ_MOL_SEQ, seq.getMolecularSequence() ); + if ( ( seq.getAnnotations() != null ) && !seq.getAnnotations().isEmpty() ) { + addAnnotations( top, seq.getAnnotations(), category ); + } + if ( ( seq.getCrossReferences() != null ) && !seq.getCrossReferences().isEmpty() ) { + addCrossReferences( top, seq.getCrossReferences(), category ); + } if ( ( seq.getUris() != null ) && !seq.getUris().isEmpty() ) { addUris( top, seq.getUris(), category ); } - addAnnotations( top, seq.getAnnotations(), category ); } private static void addSubelement( final DefaultMutableTreeNode node, final String name, final String value ) { @@ -414,6 +413,30 @@ class NodePanel extends JPanel implements TreeSelectionListener { } } + private static void addUri( final DefaultMutableTreeNode top, final Uri uri, final String name ) { + DefaultMutableTreeNode category; + category = new DefaultMutableTreeNode( name ); + top.add( category ); + addSubelement( category, "Description", uri.getDescription() ); + addSubelement( category, "Type", uri.getType() ); + addSubelement( category, "URI", uri.getValue().toString() ); + } + + private static void addUris( final DefaultMutableTreeNode top, + final List uris, + final DefaultMutableTreeNode category ) { + if ( ( uris != null ) && ( uris.size() > 0 ) ) { + category.add( new DefaultMutableTreeNode( "URIs" ) ); + final DefaultMutableTreeNode last = top.getLastLeaf(); + int i = 0; + for( final Uri uri : uris ) { + if ( uri != null ) { + addUri( last, uri, "URI " + ( i++ ) ); + } + } + } + } + private static void createNodes( final DefaultMutableTreeNode top, final PhylogenyNode phylogeny_node ) { addBasics( top, phylogeny_node, BASIC ); // Taxonomy diff --git a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java index 6b9b88b..9f3bcb1 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/SequenceDataRetriver.java @@ -38,7 +38,7 @@ import org.forester.ws.seqdb.SequenceDbWsTools; public final class SequenceDataRetriver extends RunnableProcess { - private final static int DEFAULT_LINES_TO_RETURN = 50; + private final static int DEFAULT_LINES_TO_RETURN = 4000; private final Phylogeny _phy; private final MainFrameApplication _mf; private final TreePanel _treepanel; diff --git a/forester/java/src/org/forester/go/GoNameSpace.java b/forester/java/src/org/forester/go/GoNameSpace.java index b487ea8..51932ec 100644 --- a/forester/java/src/org/forester/go/GoNameSpace.java +++ b/forester/java/src/org/forester/go/GoNameSpace.java @@ -27,10 +27,10 @@ package org.forester.go; public class GoNameSpace { - public final static String MOLECULAR_FUNCTION_STR = "molecular_function"; - public final static String BIOLOGICAL_PROCESS_STR = "biological_process"; - public final static String CELLULAR_COMPONENT_STR = "cellular_component"; - public final static String UNASSIGNED_STR = "unassigned"; + public final static String MOLECULAR_FUNCTION_STR = "molecular_function"; + public final static String BIOLOGICAL_PROCESS_STR = "biological_process"; + public final static String CELLULAR_COMPONENT_STR = "cellular_component"; + public final static String UNASSIGNED_STR = "unassigned"; private final GoNamespaceType _type; public GoNameSpace( final GoNamespaceType type ) { diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java index bebc238..d40e3f8 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java @@ -46,7 +46,6 @@ import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Confidence; -import org.forester.phylogeny.data.DomainArchitecture; import org.forester.phylogeny.data.Event; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.PhylogenyDataUtil; @@ -596,13 +595,6 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse } node_to_annotate.getNodeData().getTaxonomy().setIdentifier( new Identifier( s.substring( 2 ) ) ); } - else if ( s.startsWith( NHXtags.DOMAIN_STRUCTURE ) ) { - if ( !node_to_annotate.getNodeData().isHasSequence() ) { - node_to_annotate.getNodeData().setSequence( new Sequence() ); - } - node_to_annotate.getNodeData().getSequence() - .setDomainArchitecture( new DomainArchitecture( s.substring( 3 ) ) ); - } else if ( s.startsWith( NHXtags.SEQUENCE_ACCESSION ) ) { if ( !node_to_annotate.getNodeData().isHasSequence() ) { node_to_annotate.getNodeData().setSequence( new Sequence() ); diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java index bdf5889..643a401 100644 --- a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java +++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlMapping.java @@ -33,105 +33,106 @@ package org.forester.io.parsers.phyloxml; */ public final class PhyloXmlMapping { - public static final String PHYLOGENY = "phylogeny"; - public static final String PHYLOGENY_NAME = "name"; - public static final String PHYLOGENY_DESCRIPTION = "description"; - public static final String PHYLOGENY_IS_REROOTABLE_ATTR = "rerootable"; - public static final String PHYLOGENY_BRANCHLENGTH_UNIT_ATTR = "branch_length_unit"; - public static final String PHYLOGENY_IS_ROOTED_ATTR = "rooted"; - public static final String PHYLOGENY_TYPE_ATTR = "type"; - public static final String CLADE = "clade"; - public static final String NODE_NAME = "name"; - public static final String NODE_COLLAPSE = "collapse"; - public static final String SEQUENCE = "sequence"; - public static final String SEQUENCE_NAME = "name"; - public static final String SEQUENCE_SYMBOL = "symbol"; public static final String ACCESSION = "accession"; + public static final String ACCESSION_COMMENT_ATTR = "comment"; public static final String ACCESSION_SOURCE_ATTR = "source"; - public static final String SEQUENCE_LOCATION = "location"; - public static final String SEQUENCE_MOL_SEQ = "mol_seq"; - public static final String SEQUENCE_MOL_SEQ_ALIGNED_ATTR = "is_aligned"; public static final String ANNOTATION = "annotation"; public static final String ANNOTATION_DESC = "desc"; - public static final String ANNOTATION_REF_ATTR = "ref"; public static final String ANNOTATION_EVIDENCE_ATTR = "evidence"; + public static final String ANNOTATION_REF_ATTR = "ref"; + public static final String ANNOTATION_SOURCE_ATTR = "source"; public static final String ANNOTATION_TYPE_ATTR = "type"; - public static final String TAXONOMY = "taxonomy"; - public static final String TAXONOMY_SCIENTIFIC_NAME = "scientific_name"; - public static final String TAXONOMY_COMMON_NAME = "common_name"; - public static final String TAXONOMY_CODE = "code"; - public static final String TAXONOMY_RANK = "rank"; - public static final String TAXONOMY_SYNONYM = "synonym"; - public static final String TAXONOMY_AUTHORITY = "authority"; - public static final String DISTRIBUTION = "distribution"; + public static final String BINARY_CHARACTER = "bc"; public static final String BINARY_CHARACTERS = "binary_characters"; - public static final String BINARY_CHARACTERS_PRESENT = "present"; public static final String BINARY_CHARACTERS_GAINED = "gained"; - public static final String BINARY_CHARACTERS_LOST = "lost"; - public static final String BINARY_CHARACTERS_TYPE_ATTR = "type"; - public static final String BINARY_CHARACTERS_PRESENT_COUNT_ATTR = "present_count"; public static final String BINARY_CHARACTERS_GAINED_COUNT_ATTR = "gained_count"; + public static final String BINARY_CHARACTERS_LOST = "lost"; public static final String BINARY_CHARACTERS_LOST_COUNT_ATTR = "lost_count"; + public static final String BINARY_CHARACTERS_PRESENT = "present"; + public static final String BINARY_CHARACTERS_PRESENT_COUNT_ATTR = "present_count"; + public static final String BINARY_CHARACTERS_TYPE_ATTR = "type"; public static final String BRANCH_LENGTH = "branch_length"; + public static final String CLADE = "clade"; + public static final String CLADE_DATE = "date"; + public static final String CLADE_DATE_DESC = "desc"; + public static final String CLADE_DATE_MAX = "maximum"; + public static final String CLADE_DATE_MIN = "minimum"; + public static final String CLADE_DATE_UNIT = "unit"; + public static final String CLADE_DATE_VALUE = "value"; + public static final String COLOR = "color"; + public static final String COLOR_BLUE = "blue"; + public static final String COLOR_GREEN = "green"; + public static final String COLOR_RED = "red"; public static final String CONFIDENCE = "confidence"; public static final String CONFIDENCE_SD_ATTR = "stddev"; public static final String CONFIDENCE_TYPE_ATTR = "type"; - public static final String COLOR = "color"; - public static final String COLOR_RED = "red"; - public static final String COLOR_GREEN = "green"; - public static final String COLOR_BLUE = "blue"; - public final static String SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN = "domain"; - public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM = "from"; - public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO = "to"; - public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE = "confidence"; - public final static String SEQUENCE_X_REFS = "cross_references"; - // public final static String NODE_IDENTIFIER = "node_id"; - public final static String IDENTIFIER = "id"; - public final static String IDENTIFIER_PROVIDER_ATTR = "provider"; - public static final String URI = "uri"; - public static final String WIDTH = "width"; - public final static String EVENTS = "events"; - public final static String EVENT_TYPE = "type"; + public static final String DISTRIBUTION = "distribution"; + public static final String DISTRIBUTION_DESC = "desc"; public final static String EVENT_DUPLICATIONS = "duplications"; - public final static String EVENT_SPECIATIONS = "speciations"; public final static String EVENT_LOSSES = "losses"; - public final static String SEQUENCE_DOMAIN_ARCHITECURE = "domain_architecture"; - public final static String SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH = "length"; - public final static String SEQUENCE_TYPE = "type"; - public static final String BINARY_CHARACTER = "bc"; - public static final String URI_DESC_ATTR = "desc"; - public static final String TYPE_ATTR = "type"; - public static final String REFERENCE = "reference"; - public static final String REFERENCE_DOI_ATTR = "doi"; - public static final String REFERENCE_DESC = "desc"; - public static final String PROPERTY = "property"; - public static final String PROPERTY_REF = "ref"; - public static final String PROPERTY_UNIT = "unit"; - public static final String PROPERTY_DATATYPE = "datatype"; - public static final String PROPERTY_APPLIES_TO = "applies_to"; + public final static String EVENT_SPECIATIONS = "speciations"; + public final static String EVENT_TYPE = "type"; + public final static String EVENTS = "events"; public static final String ID_REF = "id_ref"; - public static final String ANNOTATION_SOURCE_ATTR = "source"; - public static final String DISTRIBUTION_DESC = "desc"; + // public final static String NODE_IDENTIFIER = "node_id"; + public final static String IDENTIFIER = "id"; + public final static String IDENTIFIER_PROVIDER_ATTR = "provider"; + public static final String NODE_COLLAPSE = "collapse"; + public static final String NODE_NAME = "name"; + public static final String PHYLOGENY = "phylogeny"; + public static final String PHYLOGENY_BRANCHLENGTH_UNIT_ATTR = "branch_length_unit"; + public static final String PHYLOGENY_DESCRIPTION = "description"; + public static final String PHYLOGENY_IS_REROOTABLE_ATTR = "rerootable"; + public static final String PHYLOGENY_IS_ROOTED_ATTR = "rooted"; + public static final String PHYLOGENY_NAME = "name"; + public static final String PHYLOGENY_TYPE_ATTR = "type"; public static final String POINT = "point"; - public static final String POINT_LONGITUDE = "long"; - public static final String POINT_LATITUDE = "lat"; public static final String POINT_ALTITUDE = "alt"; public static final String POINT_ALTITUDE_UNIT_ATTR = "alt_unit"; public static final String POINT_GEODETIC_DATUM = "geodetic_datum"; - public static final String CLADE_DATE = "date"; - public static final String CLADE_DATE_UNIT = "unit"; - public static final String CLADE_DATE_DESC = "desc"; - public static final String CLADE_DATE_MIN = "minimum"; - public static final String CLADE_DATE_MAX = "maximum"; - public static final String CLADE_DATE_VALUE = "value"; + public static final String POINT_LATITUDE = "lat"; + public static final String POINT_LONGITUDE = "long"; + public final static String POLYGON = "polygon"; + public static final String PROPERTY = "property"; + public static final String PROPERTY_APPLIES_TO = "applies_to"; + public static final String PROPERTY_DATATYPE = "datatype"; + public static final String PROPERTY_REF = "ref"; + public static final String PROPERTY_UNIT = "unit"; + public static final String REFERENCE = "reference"; + public static final String REFERENCE_DESC = "desc"; + public static final String REFERENCE_DOI_ATTR = "doi"; + public static final String SEQUENCE = "sequence"; + public final static String SEQUENCE_DOMAIN_ARCHITECTURE_DOMAIN = "domain"; + public final static String SEQUENCE_DOMAIN_ARCHITECTURE_LENGTH = "length"; + public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_CONFIDENCE = "confidence"; + public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_FROM = "from"; + public final static String SEQUENCE_DOMAIN_ARCHITECTURE_PROT_DOMAIN_TO = "to"; + public final static String SEQUENCE_DOMAIN_ARCHITECURE = "domain_architecture"; + public static final String SEQUENCE_GENE_NAME = "gene_name"; + public static final String SEQUENCE_LOCATION = "location"; + public static final String SEQUENCE_MOL_SEQ = "mol_seq"; + public static final String SEQUENCE_MOL_SEQ_ALIGNED_ATTR = "is_aligned"; + public static final String SEQUENCE_NAME = "name"; public final static String SEQUENCE_RELATION = "sequence_relation"; - public final static String SEQUENCE_RELATION_TYPE = "type"; + public final static String SEQUENCE_RELATION_DISTANCE = "distance"; public final static String SEQUENCE_RELATION_ID_REF0 = "id_ref_0"; public final static String SEQUENCE_RELATION_ID_REF1 = "id_ref_1"; - public final static String SEQUENCE_RELATION_DISTANCE = "distance"; + public final static String SEQUENCE_RELATION_TYPE = "type"; public final static String SEQUENCE_SOURCE_ID = "id_source"; - public final static String POLYGON = "polygon"; - + public static final String SEQUENCE_SYMBOL = "symbol"; + public final static String SEQUENCE_TYPE = "type"; + public final static String SEQUENCE_X_REFS = "cross_references"; + public static final String TAXONOMY = "taxonomy"; + public static final String TAXONOMY_AUTHORITY = "authority"; + public static final String TAXONOMY_CODE = "code"; + public static final String TAXONOMY_COMMON_NAME = "common_name"; + public static final String TAXONOMY_RANK = "rank"; + public static final String TAXONOMY_SCIENTIFIC_NAME = "scientific_name"; + public static final String TAXONOMY_SYNONYM = "synonym"; + public static final String TYPE_ATTR = "type"; + public static final String URI = "uri"; + public static final String URI_DESC_ATTR = "desc"; + public static final String WIDTH = "width"; private PhyloXmlMapping() { } diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java index 95d5e9b..e585cb3 100644 --- a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java +++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java @@ -37,7 +37,7 @@ public final class PhyloXmlUtil { public static final String OTHER = "other"; public static final String UNKNOWN = "unknown"; - public final static Pattern SEQUENCE_SYMBOL_PATTERN = Pattern.compile( "\\S{1,30}" ); + public final static Pattern SEQUENCE_SYMBOL_PATTERN = Pattern.compile( "\\S{1,20}" ); public final static Pattern TAXOMONY_CODE_PATTERN = Pattern .compile( ParserUtils.TAX_CODE ); public final static Pattern LIT_REF_DOI_PATTERN = Pattern diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/AccessionParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/AccessionParser.java index e70b11c..64d5539 100644 --- a/forester/java/src/org/forester/io/parsers/phyloxml/data/AccessionParser.java +++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/AccessionParser.java @@ -48,10 +48,21 @@ public class AccessionParser implements PhylogenyDataPhyloXmlParser { @Override public PhylogenyData parse( final XmlElement element ) throws PhyloXmlDataFormatException { - if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) ) { + if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) + && element.isHasAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) ) { + return new Accession( element.getValueAsString(), + element.getAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ), + element.getAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) ); + } + else if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) ) { return new Accession( element.getValueAsString(), element.getAttribute( PhyloXmlMapping.ACCESSION_SOURCE_ATTR ) ); } + else if ( element.isHasAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) ) { + return new Accession( element.getValueAsString(), + "?", + element.getAttribute( PhyloXmlMapping.ACCESSION_COMMENT_ATTR ) ); + } else { return new Accession( element.getValueAsString(), "?" ); } diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java b/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java index 680cb4d..56a7ea1 100644 --- a/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java +++ b/forester/java/src/org/forester/io/parsers/phyloxml/data/SequenceParser.java @@ -66,6 +66,9 @@ public class SequenceParser implements PhylogenyDataPhyloXmlParser { else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_NAME ) ) { sequence.setName( child_element.getValueAsString() ); } + else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_GENE_NAME ) ) { + sequence.setGeneName( child_element.getValueAsString() ); + } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_MOL_SEQ ) ) { if ( child_element.isHasAttribute( PhyloXmlMapping.SEQUENCE_MOL_SEQ_ALIGNED_ATTR ) ) { sequence.setMolecularSequenceAligned( Boolean.parseBoolean( child_element @@ -91,8 +94,9 @@ public class SequenceParser implements PhylogenyDataPhyloXmlParser { } else if ( child_element.getQualifiedName().equals( PhyloXmlMapping.SEQUENCE_X_REFS ) ) { for( int j = 0; j < child_element.getNumberOfChildElements(); ++j ) { - // final XmlElement c = child_element.getChildElement( j ); - sequence.addCrossReference( ( Accession ) AccessionParser.getInstance().parse( child_element.getChildElement( j ) ) ); + // final XmlElement c = child_element.getChildElement( j ); + sequence.addCrossReference( ( Accession ) AccessionParser.getInstance().parse( child_element + .getChildElement( j ) ) ); } //sequence.addUri( ( Uri ) UriParser.getInstance().parse( child_element ) ); } diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index cc140e4..9c83603 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -44,6 +44,8 @@ import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.parsers.util.PhylogenyParserException; +import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Annotation; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; @@ -918,6 +920,10 @@ public class PhylogenyMethods { match = true; } if ( !match && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) { + match = true; + } + if ( !match && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) { match = true; } @@ -940,6 +946,38 @@ public class PhylogenyMethods { } } } + // + if ( !match && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getAnnotations() != null ) ) { + for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) { + if ( match( ann.getDesc(), query, case_sensitive, partial ) ) { + match = true; + break; + } + if ( match( ann.getRef(), query, case_sensitive, partial ) ) { + match = true; + break; + } + } + } + if ( !match && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) { + for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) { + if ( match( x.getComment(), query, case_sensitive, partial ) ) { + match = true; + break; + } + if ( match( x.getSource(), query, case_sensitive, partial ) ) { + match = true; + break; + } + if ( match( x.getValue(), query, case_sensitive, partial ) ) { + match = true; + break; + } + } + } + // if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) { Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); I: while ( it.hasNext() ) { @@ -1018,6 +1056,10 @@ public class PhylogenyMethods { match = true; } if ( !match && node.getNodeData().isHasSequence() + && match( node.getNodeData().getSequence().getGeneName(), query, case_sensitive, partial ) ) { + match = true; + } + if ( !match && node.getNodeData().isHasSequence() && match( node.getNodeData().getSequence().getSymbol(), query, case_sensitive, partial ) ) { match = true; } @@ -1040,6 +1082,38 @@ public class PhylogenyMethods { } } } + // + if ( !match && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getAnnotations() != null ) ) { + for( final Annotation ann : node.getNodeData().getSequence().getAnnotations() ) { + if ( match( ann.getDesc(), query, case_sensitive, partial ) ) { + match = true; + break; + } + if ( match( ann.getRef(), query, case_sensitive, partial ) ) { + match = true; + break; + } + } + } + if ( !match && node.getNodeData().isHasSequence() + && ( node.getNodeData().getSequence().getCrossReferences() != null ) ) { + for( final Accession x : node.getNodeData().getSequence().getCrossReferences() ) { + if ( match( x.getComment(), query, case_sensitive, partial ) ) { + match = true; + break; + } + if ( match( x.getSource(), query, case_sensitive, partial ) ) { + match = true; + break; + } + if ( match( x.getValue(), query, case_sensitive, partial ) ) { + match = true; + break; + } + } + } + // if ( !match && ( node.getNodeData().getBinaryCharacters() != null ) ) { Iterator it = node.getNodeData().getBinaryCharacters().getPresentCharacters().iterator(); I: while ( it.hasNext() ) { diff --git a/forester/java/src/org/forester/phylogeny/data/Accession.java b/forester/java/src/org/forester/phylogeny/data/Accession.java index 6e9f158..5fb3afe 100644 --- a/forester/java/src/org/forester/phylogeny/data/Accession.java +++ b/forester/java/src/org/forester/phylogeny/data/Accession.java @@ -34,13 +34,27 @@ import org.forester.util.ForesterUtil; public final class Accession implements PhylogenyData, Comparable { - final private String _value; + final private String _comment; final private String _source; final private String _source_value; + final private String _value; public Accession( final String value, final String source ) { _value = value; _source = source; + _comment = ""; + if ( source != null ) { + _source_value = source + value; + } + else { + _source_value = value; + } + } + + public Accession( final String value, final String source, final String comment ) { + _value = value; + _source = source; + _comment = comment; if ( source != null ) { _source_value = source + value; } @@ -58,15 +72,27 @@ public final class Accession implements PhylogenyData, Comparable { public StringBuffer asText() { final StringBuffer sb = new StringBuffer(); if ( !ForesterUtil.isEmpty( getSource() ) ) { - sb.append( "[" ); sb.append( getSource() ); - sb.append( "] " ); + sb.append( ": " ); } sb.append( getValue() ); + if ( !ForesterUtil.isEmpty( getComment() ) ) { + sb.append( " (" ); + sb.append( getComment() ); + sb.append( ")" ); + } return sb; } @Override + public int compareTo( final Accession o ) { + if ( equals( o ) ) { + return 0; + } + return _source_value.compareTo( o._source_value ); + } + + @Override public PhylogenyData copy() { return new Accession( getValue(), getSource() ); } @@ -88,6 +114,10 @@ public final class Accession implements PhylogenyData, Comparable { } } + public String getComment() { + return _comment; + } + public String getSource() { return _source; } @@ -98,7 +128,6 @@ public final class Accession implements PhylogenyData, Comparable { @Override public int hashCode() { - return _source_value.hashCode(); } @@ -129,20 +158,44 @@ public final class Accession implements PhylogenyData, Comparable { @Override public void toPhyloXML( final Writer writer, final int level, final String indentation ) throws IOException { if ( ForesterUtil.isEmpty( getSource() ) ) { - PhylogenyDataUtil.appendElement( writer, - PhyloXmlMapping.ACCESSION, - getValue(), - PhyloXmlMapping.ACCESSION_SOURCE_ATTR, - "unknown", - indentation ); + if ( ForesterUtil.isEmpty( getComment() ) ) { + PhylogenyDataUtil.appendElement( writer, + PhyloXmlMapping.ACCESSION, + getValue(), + PhyloXmlMapping.ACCESSION_SOURCE_ATTR, + "unknown", + indentation ); + } + else { + PhylogenyDataUtil.appendElement( writer, + PhyloXmlMapping.ACCESSION, + getValue(), + PhyloXmlMapping.ACCESSION_SOURCE_ATTR, + "unknown", + PhyloXmlMapping.ACCESSION_COMMENT_ATTR, + getComment(), + indentation ); + } } else { - PhylogenyDataUtil.appendElement( writer, - PhyloXmlMapping.ACCESSION, - getValue(), - PhyloXmlMapping.ACCESSION_SOURCE_ATTR, - getSource(), - indentation ); + if ( ForesterUtil.isEmpty( getComment() ) ) { + PhylogenyDataUtil.appendElement( writer, + PhyloXmlMapping.ACCESSION, + getValue(), + PhyloXmlMapping.ACCESSION_SOURCE_ATTR, + getSource(), + indentation ); + } + else { + PhylogenyDataUtil.appendElement( writer, + PhyloXmlMapping.ACCESSION, + getValue(), + PhyloXmlMapping.ACCESSION_SOURCE_ATTR, + getSource(), + PhyloXmlMapping.ACCESSION_COMMENT_ATTR, + getComment(), + indentation ); + } } } @@ -150,12 +203,4 @@ public final class Accession implements PhylogenyData, Comparable { public String toString() { return asText().toString(); } - - @Override - public int compareTo( Accession o ) { - if ( equals( o ) ) { - return 0; - } - return _source_value.compareTo( o._source_value ); - } } diff --git a/forester/java/src/org/forester/phylogeny/data/Annotation.java b/forester/java/src/org/forester/phylogeny/data/Annotation.java index 99f9691..652a034 100644 --- a/forester/java/src/org/forester/phylogeny/data/Annotation.java +++ b/forester/java/src/org/forester/phylogeny/data/Annotation.java @@ -89,7 +89,20 @@ public class Annotation implements PhylogenyData, MultipleUris, Comparable() ); } - getCrossReferences().add( cross_reference ); + getCrossReferences().add( cross_reference ); } - + public SortedSet getCrossReferences() { return _xrefs; } - - private void setCrossReferences( TreeSet cross_references ) { + + private void setCrossReferences( final TreeSet cross_references ) { _xrefs = cross_references; } @@ -131,6 +132,7 @@ public class Sequence implements PhylogenyData, MultipleUris { final Sequence seq = new Sequence(); seq.setAnnotations( getAnnotations() ); seq.setName( getName() ); + seq.setGeneName( getGeneName() ); try { seq.setSymbol( getSymbol() ); } @@ -170,7 +172,7 @@ public class Sequence implements PhylogenyData, MultipleUris { seq.setCrossReferences( new TreeSet() ); for( final Accession x : getCrossReferences() ) { if ( x != null ) { - seq.getCrossReferences().add( x); + seq.getCrossReferences().add( x ); } } } @@ -229,6 +231,10 @@ public class Sequence implements PhylogenyData, MultipleUris { return _name; } + public String getGeneName() { + return _gene_name; + } + public List getSequenceRelations() { if ( _seq_relations == null ) { _seq_relations = new ArrayList(); @@ -282,8 +288,8 @@ public class Sequence implements PhylogenyData, MultipleUris { } public void init() { - setAnnotations( null ); setName( "" ); + setGeneName( "" ); setMolecularSequence( "" ); setMolecularSequenceAligned( false ); setLocation( "" ); @@ -304,7 +310,8 @@ public class Sequence implements PhylogenyData, MultipleUris { setUris( null ); setSequenceRelations( null ); setSourceId( null ); - setCrossReferences(null); + setCrossReferences( null ); + setAnnotations( null ); } @Override @@ -317,7 +324,7 @@ public class Sequence implements PhylogenyData, MultipleUris { return getAccession().isEqual( s.getAccession() ); } return s.getMolecularSequence().equals( getMolecularSequence() ) && s.getName().equals( getName() ) - && s.getSymbol().equals( getSymbol() ); + && s.getSymbol().equals( getSymbol() ) && s.getGeneName().equals( getGeneName() ); } public void setAccession( final Accession accession ) { @@ -348,6 +355,10 @@ public class Sequence implements PhylogenyData, MultipleUris { _name = name; } + public void setGeneName( final String gene_name ) { + _gene_name = gene_name; + } + public void setSourceId( final String source_id ) { _source_id = source_id; } @@ -382,9 +393,6 @@ public class Sequence implements PhylogenyData, MultipleUris { if ( getAccession() != null ) { getAccession().toNHX(); } - if ( getDomainArchitecture() != null ) { - sb.append( getDomainArchitecture().toNHX() ); - } return sb; } @@ -406,6 +414,9 @@ public class Sequence implements PhylogenyData, MultipleUris { if ( !ForesterUtil.isEmpty( getName() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_NAME, getName(), indentation ); } + if ( !ForesterUtil.isEmpty( getGeneName() ) ) { + PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_GENE_NAME, getGeneName(), indentation ); + } if ( !ForesterUtil.isEmpty( getLocation() ) ) { PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.SEQUENCE_LOCATION, getLocation(), indentation ); } @@ -417,14 +428,14 @@ public class Sequence implements PhylogenyData, MultipleUris { String.valueOf( isMolecularSequenceAligned() ), indentation ); } - if ( getUris() != null && !getUris().isEmpty() ) { + if ( ( getUris() != null ) && !getUris().isEmpty() ) { for( final Uri uri : getUris() ) { if ( uri != null ) { uri.toPhyloXML( writer, level, indentation ); } } } - if ( getAnnotations() != null && !getAnnotations().isEmpty() ) { + if ( ( getAnnotations() != null ) && !getAnnotations().isEmpty() ) { for( final PhylogenyData annotation : getAnnotations() ) { annotation.toPhyloXML( writer, level, my_ind ); } @@ -432,7 +443,7 @@ public class Sequence implements PhylogenyData, MultipleUris { if ( getDomainArchitecture() != null ) { getDomainArchitecture().toPhyloXML( writer, level, my_ind ); } - if ( getCrossReferences() != null && !getCrossReferences().isEmpty() ) { + if ( ( getCrossReferences() != null ) && !getCrossReferences().isEmpty() ) { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( my_ind ); PhylogenyDataUtil.appendOpen( writer, PhyloXmlMapping.SEQUENCE_X_REFS ); @@ -442,7 +453,7 @@ public class Sequence implements PhylogenyData, MultipleUris { writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( my_ind ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE_X_REFS ); - } + } writer.write( ForesterUtil.LINE_SEPARATOR ); writer.write( indentation ); PhylogenyDataUtil.appendClose( writer, PhyloXmlMapping.SEQUENCE ); diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 02efa90..d1b94ed 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -1209,12 +1209,12 @@ public final class Test { if ( !( t3.getNode( "root node" ).getNodeData().getSequence().getLocation().equals( "12p13-p12" ) ) ) { return false; } - SortedSet x = t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences(); + final SortedSet x = t3.getNode( "root node" ).getNodeData().getSequence().getCrossReferences(); if ( x.size() != 4 ) { return false; } int c = 0; - for( Accession acc : x ) { + for( final Accession acc : x ) { if ( c == 0 ) { if ( !acc.getSource().equals( "KEGG" ) ) { return false; @@ -1504,7 +1504,6 @@ public final class Test { } if ( ( ( BinaryCharacters ) t3_rt.getNode( "node bb" ).getNodeData().getBinaryCharacters().copy() ) .getLostCount() != BinaryCharacters.COUNT_DEFAULT ) { - return false; } if ( t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getGainedCount() != 1 ) { @@ -1553,12 +1552,13 @@ public final class Test { .equalsIgnoreCase( "433" ) ) { return false; } - SortedSet x = t3_rt.getNode( "root node" ).getNodeData().getSequence().getCrossReferences(); + final SortedSet x = t3_rt.getNode( "root node" ).getNodeData().getSequence() + .getCrossReferences(); if ( x.size() != 4 ) { return false; } int c = 0; - for( Accession acc : x ) { + for( final Accession acc : x ) { if ( c == 0 ) { if ( !acc.getSource().equals( "KEGG" ) ) { return false; diff --git a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java index 8e319f2..8172d3d 100644 --- a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java @@ -28,6 +28,7 @@ package org.forester.ws.seqdb; import java.util.List; import org.forester.go.GoTerm; +import org.forester.phylogeny.data.Accession; import org.forester.util.ForesterUtil; public final class EbiDbEntry implements SequenceDatabaseEntry { @@ -175,4 +176,9 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { public List getGoTerms() { return null; } + + @Override + public List getCrossReferences() { + return null; + } } diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java index 70ff7b4..3a28d6a 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java @@ -28,24 +28,27 @@ package org.forester.ws.seqdb; import java.util.List; import org.forester.go.GoTerm; +import org.forester.phylogeny.data.Accession; public interface SequenceDatabaseEntry { - public String getGeneName(); + public String getAccession(); - public boolean isEmpty(); + public String getGeneName(); - public String getAccession(); + public List getGoTerms(); public String getProvider(); public String getSequenceName(); - public String getTaxonomyScientificName(); + public String getSequenceSymbol(); public String getTaxonomyIdentifier(); - public String getSequenceSymbol(); + public String getTaxonomyScientificName(); - public List getGoTerms(); + public boolean isEmpty(); + + public List getCrossReferences(); } \ No newline at end of file diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index f370e3d..111fa68 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -241,23 +241,28 @@ public final class SequenceDbWsTools { seq.setName( db_entry.getSequenceName() ); } if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) { - final String gn = db_entry.getGeneName().replace( ' ', '_' ); + seq.setGeneName( db_entry.getGeneName() ); + } + if ( !ForesterUtil.isEmpty( db_entry.getSequenceSymbol() ) ) { try { - seq.setSymbol( gn ); + seq.setSymbol( db_entry.getSequenceSymbol() ); } - catch ( PhyloXmlDataFormatException e ) { + catch ( final PhyloXmlDataFormatException e ) { // Eat this exception. } } - if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) { - // seq.addAnnotation( new Annotation( "GN", db_entry.getGeneName() ) ); - } - if ( db_entry.getGoTerms() != null && !db_entry.getGoTerms().isEmpty() ) { + if ( ( db_entry.getGoTerms() != null ) && !db_entry.getGoTerms().isEmpty() ) { for( final GoTerm go : db_entry.getGoTerms() ) { - seq.addAnnotation( new Annotation( go.getGoId().getId(), go.getName() ) ); + final Annotation ann = new Annotation( go.getGoId().getId() ); + ann.setDesc( go.getName() ); + seq.addAnnotation( ann ); + } + } + if ( ( db_entry.getCrossReferences() != null ) && !db_entry.getCrossReferences().isEmpty() ) { + for( final Accession x : db_entry.getCrossReferences() ) { + seq.addCrossReference( x ); } } - final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy(); if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) { diff --git a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java index aeabf63..2eaa720 100644 --- a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java @@ -33,18 +33,30 @@ import java.util.regex.Pattern; import org.forester.go.BasicGoTerm; import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; +import org.forester.phylogeny.data.Accession; import org.forester.util.ForesterUtil; public final class UniProtEntry implements SequenceDatabaseEntry { - public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+(GO:\\d+);\\s+([PF]):([^;]+);" ); - private String _ac; - private String _name; - private String _symbol; - private String _gene_name; - private String _os_scientific_name; - private String _tax_id; - private List _go_terms; + public final static Pattern BindingDB_PATTERN = Pattern.compile( "BindingDB;\\s+([0-9A-Z]+);" ); + public final static Pattern CTD_PATTERN = Pattern.compile( "CTD;\\s+(\\d+);" ); + public final static Pattern DrugBank_PATTERN = Pattern.compile( "DrugBank;\\s+([0-9A-Z]+);\\s+([^\\.]+)" ); + public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+(GO:\\d+);\\s+([PFC]):([^;]+);" ); + public final static Pattern KEGG_PATTERN = Pattern.compile( "KEGG;\\s+([a-z]+:[0-9]+);" ); + public final static Pattern MIM_PATTERN = Pattern.compile( "MIM;\\s+(\\d+);" ); + public final static Pattern NextBio_PATTERN = Pattern.compile( "NextBio;\\s+(\\d+);" ); + public final static Pattern Orphanet_PATTERN = Pattern.compile( "Orphanet;\\s+(\\d+);\\s+([^\\.]+)" ); + public final static Pattern PDB_PATTERN = Pattern.compile( "PDB;\\s+([0-9A-Z]{4});\\s+([^;]+)" ); + public final static Pattern PharmGKB_PATTERN = Pattern.compile( "PharmGKB;\\s+([0-9A-Z]+);" ); + public final static Pattern Reactome_PATTERN = Pattern.compile( "Reactome;\\s+([0-9A-Z]+);\\s+([^\\.]+)" ); + private String _ac; + private ArrayList _cross_references; + private String _gene_name; + private List _go_terms; + private String _name; + private String _os_scientific_name; + private String _symbol; + private String _tax_id; private UniProtEntry() { } @@ -54,6 +66,110 @@ public final class UniProtEntry implements SequenceDatabaseEntry { throw new CloneNotSupportedException(); } + @Override + public String getAccession() { + return _ac; + } + + @Override + public List getCrossReferences() { + return _cross_references; + } + + @Override + public String getGeneName() { + return _gene_name; + } + + @Override + public List getGoTerms() { + return _go_terms; + } + + @Override + public String getProvider() { + return "uniprot"; + } + + @Override + public String getSequenceName() { + return _name; + } + + @Override + public String getSequenceSymbol() { + return _symbol; + } + + @Override + public String getTaxonomyIdentifier() { + return _tax_id; + } + + @Override + public String getTaxonomyScientificName() { + return _os_scientific_name; + } + + @Override + public boolean isEmpty() { + return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() ) + && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) + && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) + && ForesterUtil.isEmpty( getSequenceSymbol() ) && ( ( getGoTerms() == null ) || getGoTerms().isEmpty() ) && ( ( getCrossReferences() == null ) || getCrossReferences() + .isEmpty() ) ); + } + + private void addCrossReference( final Accession accession ) { + if ( _cross_references == null ) { + _cross_references = new ArrayList(); + } + System.out.println( "XREF ADDED: " + accession ); + _cross_references.add( accession ); + } + + private void addGoTerm( final BasicGoTerm g ) { + if ( _go_terms == null ) { + _go_terms = new ArrayList(); + } + System.out.println( "GOTERM ADDED: " + g ); + _go_terms.add( g ); + } + + private void setAc( final String ac ) { + if ( _ac == null ) { + _ac = ac; + } + } + + private void setGeneName( final String gene_name ) { + if ( _gene_name == null ) { + _gene_name = gene_name; + } + } + + private void setOsScientificName( final String os_scientific_name ) { + if ( _os_scientific_name == null ) { + _os_scientific_name = os_scientific_name; + } + } + + private void setSequenceName( final String name ) { + if ( _name == null ) { + _name = name; + } + } + + private void setSequenceSymbol( final String symbol ) { + _symbol = symbol; + } + + private void setTaxId( final String tax_id ) { + if ( _tax_id == null ) { + _tax_id = tax_id; + } + } + public static SequenceDatabaseEntry createInstanceFromPlainText( final List lines ) { final UniProtEntry e = new UniProtEntry(); for( final String line : lines ) { @@ -81,19 +197,80 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } else if ( line.startsWith( "DR" ) ) { if ( line.indexOf( "GO;" ) > 0 ) { - Matcher m = GO_PATTERN.matcher( line ); + final Matcher m = GO_PATTERN.matcher( line ); if ( m.find() ) { - String id = m.group( 1 ); - String ns_str = m.group( 2 ); - String desc = m.group( 3 ); + final String id = m.group( 1 ); + final String ns_str = m.group( 2 ); + final String desc = m.group( 3 ); String gns = GoNameSpace.BIOLOGICAL_PROCESS_STR; - if ( ns_str.equals( "F" ) ) { - gns = GoNameSpace.MOLECULAR_FUNCTION_STR; - } - + if ( ns_str.equals( "F" ) ) { + gns = GoNameSpace.MOLECULAR_FUNCTION_STR; + } + else if ( ns_str.equals( "C" ) ) { + gns = GoNameSpace.CELLULAR_COMPONENT_STR; + } System.out.println( "GO:" + id + " " + desc + " " + ns_str ); - - e.addGoTerm( new BasicGoTerm( id, desc, gns, false ) ); + e.addGoTerm( new BasicGoTerm( id, desc, gns, false ) ); + } + } + else if ( line.indexOf( "PDB;" ) > 0 ) { + final Matcher m = PDB_PATTERN.matcher( line ); + if ( m.find() ) { + e.addCrossReference( new Accession( m.group( 1 ), "PDB", m.group( 2 ) ) ); + } + } + else if ( line.indexOf( "KEGG;" ) > 0 ) { + final Matcher m = KEGG_PATTERN.matcher( line ); + if ( m.find() ) { + e.addCrossReference( new Accession( m.group( 1 ), "KEGG" ) ); + } + } + else if ( line.indexOf( "CTD;" ) > 0 ) { + final Matcher m = CTD_PATTERN.matcher( line ); + if ( m.find() ) { + e.addCrossReference( new Accession( m.group( 1 ), "CTD" ) ); + } + } + else if ( line.indexOf( "MIM;" ) > 0 ) { + final Matcher m = MIM_PATTERN.matcher( line ); + if ( m.find() ) { + e.addCrossReference( new Accession( m.group( 1 ), "MIM" ) ); + } + } + else if ( line.indexOf( "Orphanet;" ) > 0 ) { + final Matcher m = Orphanet_PATTERN.matcher( line ); + if ( m.find() ) { + e.addCrossReference( new Accession( m.group( 1 ), "Orphanet", m.group( 2 ) ) ); + } + } + else if ( line.indexOf( "PharmGKB;" ) > 0 ) { + final Matcher m = PharmGKB_PATTERN.matcher( line ); + if ( m.find() ) { + e.addCrossReference( new Accession( m.group( 1 ), "PharmGKB" ) ); + } + } + else if ( line.indexOf( "BindingDB;" ) > 0 ) { + final Matcher m = BindingDB_PATTERN.matcher( line ); + if ( m.find() ) { + e.addCrossReference( new Accession( m.group( 1 ), "BindingDB" ) ); + } + } + else if ( line.indexOf( "DrugBank;" ) > 0 ) { + final Matcher m = DrugBank_PATTERN.matcher( line ); + if ( m.find() ) { + e.addCrossReference( new Accession( m.group( 1 ), "DrugBank", m.group( 2 ) ) ); + } + } + else if ( line.indexOf( "NextBio;" ) > 0 ) { + final Matcher m = NextBio_PATTERN.matcher( line ); + if ( m.find() ) { + e.addCrossReference( new Accession( m.group( 1 ), "NextBio" ) ); + } + } + else if ( line.indexOf( "Reactome;" ) > 0 ) { + final Matcher m = Reactome_PATTERN.matcher( line ); + if ( m.find() ) { + e.addCrossReference( new Accession( m.group( 1 ), "Reactome", m.group( 2 ) ) ); } } } @@ -113,95 +290,4 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } return e; } - - private void addGoTerm( BasicGoTerm g ) { - if ( _go_terms == null ) { - _go_terms = new ArrayList(); - } - _go_terms.add( g ); - - } - - private void setSequenceSymbol( String symbol ) { - _symbol = symbol; - } - - @Override - public String getAccession() { - return _ac; - } - - private void setAc( final String ac ) { - if ( _ac == null ) { - _ac = ac; - } - } - - @Override - public String getSequenceName() { - return _name; - } - - private void setSequenceName( final String name ) { - if ( _name == null ) { - _name = name; - } - } - - @Override - public String getTaxonomyScientificName() { - return _os_scientific_name; - } - - private void setOsScientificName( final String os_scientific_name ) { - if ( _os_scientific_name == null ) { - _os_scientific_name = os_scientific_name; - } - } - - @Override - public String getTaxonomyIdentifier() { - return _tax_id; - } - - private void setTaxId( final String tax_id ) { - if ( _tax_id == null ) { - _tax_id = tax_id; - } - } - - private void setGeneName( final String gene_name ) { - if ( _gene_name == null ) { - _gene_name = gene_name; - } - } - - @Override - public List getGoTerms() { - return _go_terms; - } - - - @Override - public String getSequenceSymbol() { - return _symbol; - } - - @Override - public boolean isEmpty() { - return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() ) - && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) - && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil - .isEmpty( getSequenceSymbol() ) && ( getGoTerms() == null || getGoTerms().isEmpty() ) ); - } - - @Override - public String getProvider() { - return "uniprot"; - } - - @Override - public String getGeneName() { - return _gene_name; - } }