inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 28 Sep 2013 03:58:56 +0000 (03:58 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 28 Sep 2013 03:58:56 +0000 (03:58 +0000)
forester/java/src/org/forester/ws/seqdb/UniProtEntry.java

index c30dc4c..2565339 100644 (file)
 package org.forester.ws.seqdb;
 
 import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
+import org.forester.go.BasicGoTerm;
+import org.forester.go.GoTerm;
 import org.forester.util.ForesterUtil;
 
 public final class UniProtEntry implements SequenceDatabaseEntry {
 
-    private String _ac;
-    private String _name;
-    private String _symbol;
-    private String _gene_name;
-    private String _os_scientific_name;
-    private String _tax_id;
+    public final static Pattern GO_PATTERN = Pattern.compile( "GO;\\s+GO:(\\d+);\\s+([PF]):([^;]+);" );
+    private String              _ac;
+    private String              _name;
+    private String              _symbol;
+    private String              _gene_name;
+    private String              _os_scientific_name;
+    private String              _tax_id;
 
     private UniProtEntry() {
     }
@@ -49,7 +54,7 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
     public static SequenceDatabaseEntry createInstanceFromPlainText( final List<String> lines ) {
         final UniProtEntry e = new UniProtEntry();
         for( final String line : lines ) {
-            System.out.println( line );
+            //System.out.println( line );
             if ( line.startsWith( "AC" ) ) {
                 e.setAc( DatabaseTools.extract( line, "AC", ";" ) );
             }
@@ -71,6 +76,21 @@ public final class UniProtEntry implements SequenceDatabaseEntry {
                     e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) );
                 }
             }
+            else if ( line.startsWith( "DR" ) ) {
+                if ( line.indexOf( "GO;" ) > 0 ) {
+                    Matcher m = GO_PATTERN.matcher( line );
+                    if ( m.find() ) {
+                        String n = m.group( 1 );
+                        String ns_str = m.group( 2 );
+                        String desc = m.group( 3 );
+                        if ( ns_str.equals( "F" ) ) { 
+                        
+                        System.out.println( "GO:" + n + " " + desc + " " + ns );
+                        GoTerm go = new BasicGoTerm( n, desc, ns, false );
+                        //  e.setGeneName( DatabaseTools.extract( line, "Name=", ";" ) );
+                    }
+                }
+            }
             else if ( line.startsWith( "OS" ) ) {
                 if ( line.indexOf( "(" ) > 0 ) {
                     e.setOsScientificName( DatabaseTools.extract( line, "OS", "(" ) );