JAL-1705 various refactoring towards Uniprot-to-Ensembl fetching
[jalview.git] / src / jalview / ext / ensembl / EnsemblSeqProxy.java
index e77051d..a2be17b 100644 (file)
@@ -30,6 +30,8 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map.Entry;
 
+import com.stevesoft.pat.Regex;
+
 /**
  * Base class for Ensembl sequence fetchers
  * 
@@ -37,6 +39,10 @@ import java.util.Map.Entry;
  */
 public abstract class EnsemblSeqProxy extends EnsemblRestClient
 {
+  // TODO modify to accept other species e.g. ENSMUSTnnn
+  private static final Regex TRANSCRIPT_REGEX = new Regex(
+          "(ENST)[0-9]{11}$");
+
   private static final List<String> CROSS_REFERENCES = Arrays
           .asList(new String[] { "CCDS" });
 
@@ -154,14 +160,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
                 + ")";
         System.err.println(msg);
         break;
-        // if (alignment != null)
-        // {
-        // break; // return what we got
-        // }
-        // else
-        // {
-        // throw new JalviewException(msg, r);
-        // }
       }
     }
 
@@ -294,8 +292,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   }
 
   /**
-   * Get Uniprot and PDB xrefs from Ensembl, and attach them to the protein
-   * sequence
+   * Get database xrefs from Ensembl, and attach them to the sequence
    * 
    * @param seq
    */
@@ -719,18 +716,18 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       /*
        * for sequence_variant, make an additional feature with consequence
        */
-      if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
-              SequenceOntologyI.SEQUENCE_VARIANT))
-      {
-        String consequence = (String) sf.getValue(CONSEQUENCE_TYPE);
-        if (consequence != null)
-        {
-          SequenceFeature sf2 = new SequenceFeature("consequence",
-                  consequence, copy.getBegin(), copy.getEnd(), 0f,
-                  null);
-          targetSequence.addSequenceFeature(sf2);
-        }
-      }
+      // if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+      // SequenceOntologyI.SEQUENCE_VARIANT))
+      // {
+      // String consequence = (String) sf.getValue(CONSEQUENCE_TYPE);
+      // if (consequence != null)
+      // {
+      // SequenceFeature sf2 = new SequenceFeature("consequence",
+      // consequence, copy.getBegin(), copy.getEnd(), 0f,
+      // null);
+      // targetSequence.addSequenceFeature(sf2);
+      // }
+      // }
     }
   }
 
@@ -750,6 +747,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       return false;
     }
 
+    // long start = System.currentTimeMillis();
     SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
     MapList mapping = getGenomicRangesFromFeatures(sourceSequence, accessionId,
             targetSequence.getStart());
@@ -758,7 +756,13 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       return false;
     }
 
-    return transferFeatures(sfs, targetSequence, mapping, accessionId);
+    boolean result = transferFeatures(sfs, targetSequence, mapping,
+            accessionId);
+    // System.out.println("transferFeatures (" + (sfs.length) + " --> "
+    // + targetSequence.getSequenceFeatures().length + ") to "
+    // + targetSequence.getName()
+    // + " took " + (System.currentTimeMillis() - start) + "ms");
+    return result;
   }
 
   /**
@@ -1125,4 +1129,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
             || SequenceOntologyFactory.getInstance().isA(featureType,
                     SequenceOntologyI.TRANSCRIPT);
   }
+
+  public static boolean isTranscriptIdentifier(String query)
+  {
+    return query == null ? false : TRANSCRIPT_REGEX.search(query);
+  }
 }