(no commit message)
authorcmzmasek <cmzmasek@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sun, 29 Sep 2013 22:06:38 +0000 (22:06 +0000)
committercmzmasek <cmzmasek@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sun, 29 Sep 2013 22:06:38 +0000 (22:06 +0000)
forester/java/src/org/forester/go/GoNameSpace.java
forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java
forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java
forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java
forester/java/src/org/forester/ws/seqdb/UniProtEntry.java

index 2d943c1..b487ea8 100644 (file)
@@ -27,10 +27,10 @@ package org.forester.go;
 
 public class GoNameSpace {
 
-    public final String           MOLECULAR_FUNCTION_STR = "molecular_function";
-    public final String           BIOLOGICAL_PROCESS_STR = "biological_process";
-    public final String           CELLULAR_COMPONENT_STR = "cellular_component";
-    public final String           UNASSIGNED_STR         = "unassigned";
+    public final static String           MOLECULAR_FUNCTION_STR = "molecular_function";
+    public final static String           BIOLOGICAL_PROCESS_STR = "biological_process";
+    public final static String           CELLULAR_COMPONENT_STR = "cellular_component";
+    public final static  String           UNASSIGNED_STR         = "unassigned";
     private final GoNamespaceType _type;
 
     public GoNameSpace( final GoNamespaceType type ) {
index 091ec83..8e319f2 100644 (file)
@@ -27,6 +27,7 @@ package org.forester.ws.seqdb;
 
 import java.util.List;
 
+import org.forester.go.GoTerm;
 import org.forester.util.ForesterUtil;
 
 public final class EbiDbEntry implements SequenceDatabaseEntry {
@@ -169,4 +170,9 @@ public final class EbiDbEntry implements SequenceDatabaseEntry {
     public String getGeneName() {
         return null;
     }
+
+    @Override
+    public List<GoTerm> getGoTerms() {
+        return null;
+    }
 }
index da3a5c2..70ff7b4 100644 (file)
 
 package org.forester.ws.seqdb;
 
+import java.util.List;
+
+import org.forester.go.GoTerm;
+
 public interface SequenceDatabaseEntry {
 
     public String getGeneName();
@@ -42,4 +46,6 @@ public interface SequenceDatabaseEntry {
     public String getTaxonomyIdentifier();
 
     public String getSequenceSymbol();
+
+    public List<GoTerm> getGoTerms();
 }
\ No newline at end of file
index eb80141..f28a8f1 100644 (file)
@@ -37,6 +37,7 @@ import java.util.List;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import org.forester.go.GoTerm;
 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyNode;
@@ -248,8 +249,14 @@ public final class SequenceDbWsTools {
                     }
                 }
                 if ( !ForesterUtil.isEmpty( db_entry.getGeneName() ) ) {
-                    seq.addAnnotation( new Annotation( "GN", db_entry.getGeneName() ) );
+                  //  seq.addAnnotation( new Annotation( "GN", db_entry.getGeneName() ) );
                 }
+                if ( db_entry.getGoTerms() != null &&  !db_entry.getGoTerms().isEmpty() ) {
+                    for( final GoTerm go : db_entry.getGoTerms() ) {
+                        seq.addAnnotation( new Annotation( go.getGoId().getId(), go.getName() ) );
+                    }
+                }
+                
                 final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy()
                         : new Taxonomy();
                 if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) {
index 2565339..aeabf63 100644 (file)
 
 package org.forester.ws.seqdb;
 
+import java.util.ArrayList;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.forester.go.BasicGoTerm;
+import org.forester.go.GoNameSpace;
 import org.forester.go.GoTerm;
 import org.forester.util.ForesterUtil;
 
 public final class UniProtEntry implements SequenceDatabaseEntry {
 
-    public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+GO:(\\d+);\\s+([PF]):([^;]+);" );
+    public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+(GO:\\d+);\\s+([PF]):([^;]+);" );
     private String              _ac;
     private String              _name;
     private String              _symbol;
     private String              _gene_name;
     private String              _os_scientific_name;
     private String              _tax_id;
+    private List<GoTerm> _go_terms;
 
     private UniProtEntry() {
     }
@@ -80,14 +83,17 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
                 if ( line.indexOf( "GO;" ) > 0 ) {
                     Matcher m = GO_PATTERN.matcher( line );
                     if ( m.find() ) {
-                        String n = m.group( 1 );
+                        String id = m.group( 1 );
                         String ns_str = m.group( 2 );
                         String desc = m.group( 3 );
+                        String gns = GoNameSpace.BIOLOGICAL_PROCESS_STR;
                         if ( ns_str.equals( "F" ) ) { 
+                            gns =  GoNameSpace.MOLECULAR_FUNCTION_STR;
+                        }    
                         
-                        System.out.println( "GO:" + n + " " + desc + " " + ns );
-                        GoTerm go = new BasicGoTerm( n, desc, ns, false );
-                        //  e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) );
+                        System.out.println( "GO:" + id + " " + desc + " " + ns_str );
+                      
+                        e.addGoTerm( new BasicGoTerm( id, desc, gns, false ) ); 
                     }
                 }
             }
@@ -108,6 +114,14 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
         return e;
     }
 
+    private void addGoTerm( BasicGoTerm g ) {
+        if ( _go_terms == null ) {
+            _go_terms = new ArrayList<GoTerm>();
+        }
+        _go_terms.add( g );
+        
+    }
+
     private void setSequenceSymbol( String symbol ) {
         _symbol = symbol;
     }
@@ -161,6 +175,12 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
             _gene_name = gene_name;
         }
     }
+    
+    @Override
+    public List<GoTerm> getGoTerms() {
+        return _go_terms;
+    }
+    
 
     @Override
     public String getSequenceSymbol() {
@@ -172,7 +192,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
         return ( ForesterUtil.isEmpty( getAccession() ) && ForesterUtil.isEmpty( getSequenceName() )
                 && ForesterUtil.isEmpty( getTaxonomyScientificName() ) && ForesterUtil.isEmpty( getSequenceSymbol() )
                 && ForesterUtil.isEmpty( getGeneName() ) && ForesterUtil.isEmpty( getTaxonomyIdentifier() ) && ForesterUtil
-                .isEmpty( getSequenceSymbol() ) );
+                .isEmpty( getSequenceSymbol() ) && ( getGoTerms() == null || getGoTerms().isEmpty() ) );
     }
 
     @Override