From 9c2c59a078617685b118ac8bb56cb6abb9a3d3f8 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Sun, 29 Sep 2013 22:06:38 +0000 Subject: [PATCH] --- forester/java/src/org/forester/go/GoNameSpace.java | 8 ++--- .../java/src/org/forester/ws/seqdb/EbiDbEntry.java | 6 ++++ .../forester/ws/seqdb/SequenceDatabaseEntry.java | 6 ++++ .../org/forester/ws/seqdb/SequenceDbWsTools.java | 9 +++++- .../src/org/forester/ws/seqdb/UniProtEntry.java | 32 ++++++++++++++++---- 5 files changed, 50 insertions(+), 11 deletions(-) diff --git a/forester/java/src/org/forester/go/GoNameSpace.java b/forester/java/src/org/forester/go/GoNameSpace.java index 2d943c1..b487ea8 100644 --- a/forester/java/src/org/forester/go/GoNameSpace.java +++ b/forester/java/src/org/forester/go/GoNameSpace.java @@ -27,10 +27,10 @@ package org.forester.go; public class GoNameSpace { - public final String MOLECULAR_FUNCTION_STR = "molecular_function"; - public final String BIOLOGICAL_PROCESS_STR = "biological_process"; - public final String CELLULAR_COMPONENT_STR = "cellular_component"; - public final String UNASSIGNED_STR = "unassigned"; + public final static String MOLECULAR_FUNCTION_STR = "molecular_function"; + public final static String BIOLOGICAL_PROCESS_STR = "biological_process"; + public final static String CELLULAR_COMPONENT_STR = "cellular_component"; + public final static String UNASSIGNED_STR = "unassigned"; private final GoNamespaceType _type; public GoNameSpace( final GoNamespaceType type ) { diff --git a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java index 091ec83..8e319f2 100644 --- a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java @@ -27,6 +27,7 @@ package org.forester.ws.seqdb; import java.util.List; +import org.forester.go.GoTerm; import org.forester.util.ForesterUtil; public final class EbiDbEntry implements SequenceDatabaseEntry { @@ -169,4 +170,9 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { public String getGeneName() { return null; } + + @Override + public List getGoTerms() { + return null; + } } diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java index da3a5c2..70ff7b4 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java @@ -25,6 +25,10 @@ package org.forester.ws.seqdb; +import java.util.List; + +import org.forester.go.GoTerm; + public interface SequenceDatabaseEntry { public String getGeneName(); @@ -42,4 +46,6 @@ public interface SequenceDatabaseEntry { public String getTaxonomyIdentifier(); public String getSequenceSymbol(); + + public List getGoTerms(); } \ No newline at end of file diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index eb80141..f28a8f1 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -37,6 +37,7 @@ import java.util.List; import java.util.SortedSet; import java.util.TreeSet; +import org.forester.go.GoTerm; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; @@ -248,8 +249,14 @@ public final class SequenceDbWsTools { } } if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) { - seq.addAnnotation( new Annotation( "GN", db_entry.getGeneName() ) ); + // seq.addAnnotation( new Annotation( "GN", db_entry.getGeneName() ) ); } + if ( db_entry.getGoTerms() != null && !db_entry.getGoTerms().isEmpty() ) { + for( final GoTerm go : db_entry.getGoTerms() ) { + seq.addAnnotation( new Annotation( go.getGoId().getId(), go.getName() ) ); + } + } + final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy(); if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) { diff --git a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java index 2565339..aeabf63 100644 --- a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java @@ -25,23 +25,26 @@ package org.forester.ws.seqdb; +import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.go.BasicGoTerm; +import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; import org.forester.util.ForesterUtil; public final class UniProtEntry implements SequenceDatabaseEntry { - public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+GO:(\\d+);\\s+([PF]):([^;]+);" ); + public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+(GO:\\d+);\\s+([PF]):([^;]+);" ); private String _ac; private String _name; private String _symbol; private String _gene_name; private String _os_scientific_name; private String _tax_id; + private List _go_terms; private UniProtEntry() { } @@ -80,14 +83,17 @@ public final class UniProtEntry implements SequenceDatabaseEntry { if ( line.indexOf( "GO;" ) > 0 ) { Matcher m = GO_PATTERN.matcher( line ); if ( m.find() ) { - String n = m.group( 1 ); + String id = m.group( 1 ); String ns_str = m.group( 2 ); String desc = m.group( 3 ); + String gns = GoNameSpace.BIOLOGICAL_PROCESS_STR; if ( ns_str.equals( "F" ) ) { + gns = GoNameSpace.MOLECULAR_FUNCTION_STR; + } - System.out.println( "GO:" + n + " " + desc + " " + ns ); - GoTerm go = new BasicGoTerm( n, desc, ns, false ); - // e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) ); + System.out.println( "GO:" + id + " " + desc + " " + ns_str ); + + e.addGoTerm( new BasicGoTerm( id, desc, gns, false ) ); } } } @@ -108,6 +114,14 @@ public final class UniProtEntry implements SequenceDatabaseEntry { return e; } + private void addGoTerm( BasicGoTerm g ) { + if ( _go_terms == null ) { + _go_terms = new ArrayList(); + } + _go_terms.add( g ); + + } + private void setSequenceSymbol( String symbol ) { _symbol = symbol; } @@ -161,6 +175,12 @@ public final class UniProtEntry implements SequenceDatabaseEntry { _gene_name = gene_name; } } + + @Override + public List getGoTerms() { + return _go_terms; + } + @Override public String getSequenceSymbol() { @@ -172,7 +192,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry { return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() ) && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() ) && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil - .isEmpty( getSequenceSymbol() ) ); + .isEmpty( getSequenceSymbol() ) && ( getGoTerms() == null || getGoTerms().isEmpty() ) ); } @Override -- 1.7.10.2