JAL-1705 JAL-1191 SequenceOntologyLite added as hard-coded alternative
[jalview.git] / src / jalview / ext / ensembl / EnsemblSeqProxy.java
index 8698b78..744aa49 100644 (file)
@@ -11,7 +11,8 @@ import jalview.datamodel.SequenceI;
 import jalview.exceptions.JalviewException;
 import jalview.io.FastaFile;
 import jalview.io.FileParse;
-import jalview.io.gff.SequenceOntology;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
 import jalview.schemes.ResidueProperties;
 import jalview.util.DBRefUtils;
 import jalview.util.MapList;
@@ -36,6 +37,9 @@ import java.util.Map.Entry;
  */
 public abstract class EnsemblSeqProxy extends EnsemblRestClient
 {
+  private static final List<String> CROSS_REFERENCES = Arrays
+          .asList(new String[] { "CCDS" });
+
   protected static final String CONSEQUENCE_TYPE = "consequence_type";
 
   protected static final String PARENT = "Parent";
@@ -124,7 +128,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   @Override
   public AlignmentI getSequenceRecords(String query) throws Exception
   {
-    long now = System.currentTimeMillis();
     // TODO use a String... query vararg instead?
 
     // danger: accession separator used as a regex here, a string elsewhere
@@ -153,14 +156,15 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
                 + " chunks. Unexpected problem (" + r.getLocalizedMessage()
                 + ")";
         System.err.println(msg);
-        if (alignment != null)
-        {
-          break; // return what we got
-        }
-        else
-        {
-          throw new JalviewException(msg, r);
-        }
+        break;
+        // if (alignment != null)
+        // {
+        // break; // return what we got
+        // }
+        // else
+        // {
+        // throw new JalviewException(msg, r);
+        // }
       }
     }
 
@@ -173,9 +177,11 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       addFeaturesAndProduct(accId, alignment);
     }
 
-    inProgress = false;
-    System.out.println(getClass().getName() + " took "
-            + (System.currentTimeMillis() - now) + "ms to fetch");
+    for (SequenceI seq : alignment.getSequences())
+    {
+      getCrossReferences(seq);
+    }
+
     return alignment;
   }
 
@@ -265,8 +271,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       proteinSeq.createDatasetSequence();
       querySeq.createDatasetSequence();
 
-      getProteinCrossReferences(proteinSeq);
-
       MapList mapList = mapCdsToProtein(querySeq, proteinSeq);
       if (mapList != null)
       {
@@ -293,26 +297,35 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
    * Get Uniprot and PDB xrefs from Ensembl, and attach them to the protein
    * sequence
    * 
-   * @param proteinSeq
+   * @param seq
    */
-  protected void getProteinCrossReferences(SequenceI proteinSeq)
+  protected void getCrossReferences(SequenceI seq)
   {
-    while (proteinSeq.getDatasetSequence() != null)
+    while (seq.getDatasetSequence() != null)
     {
-      proteinSeq = proteinSeq.getDatasetSequence();
+      seq = seq.getDatasetSequence();
     }
 
     EnsemblXref xrefFetcher = new EnsemblXref();
-    List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(
-            proteinSeq.getName(), "PDB", "Uniprot/SPTREMBL",
-            "Uniprot/SWISSPROT");
+    List<DBRefEntry> xrefs = xrefFetcher.getCrossReferences(seq.getName(),
+            getCrossReferenceDatabases());
     for (DBRefEntry xref : xrefs)
     {
-      proteinSeq.addDBRef(xref);
+      seq.addDBRef(xref);
     }
   }
 
   /**
+   * Returns a list of database names to be used when fetching cross-references.
+   * 
+   * @return
+   */
+  protected List<String> getCrossReferenceDatabases()
+  {
+    return CROSS_REFERENCES;
+  }
+
+  /**
    * Returns a mapping from dna to protein by inspecting sequence features of
    * type "CDS" on the dna.
    * 
@@ -354,11 +367,12 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   }
 
   /**
-   * Adds CDS ranges to the ranges list, and returns the total length mapped.
+   * Adds CDS ranges to the ranges list, and returns the total length mapped
+   * from.
    * 
-   * No need to worry about reverse strand dna here since the retrieved sequence
-   * is as transcribed (reverse complement for reverse strand), i.e in the same
-   * sense as the peptide.
+   * No need to worry about reverse strand dna, here since the retrieved
+   * sequence is as transcribed (reverse complement for reverse strand), i.e in
+   * the same sense as the peptide.
    * 
    * @param dnaSeq
    * @param ranges
@@ -377,7 +391,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       /*
        * process a CDS feature (or a sub-type of CDS)
        */
-      if (SequenceOntology.getInstance().isA(sf.getType(), SequenceOntology.CDS))
+      if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+              SequenceOntologyI.CDS))
       {
         int phase = 0;
         try {
@@ -565,7 +580,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
    *          the start position of the sequence we are mapping to
    * @return
    */
-  protected MapList getGenomicRanges(SequenceI sourceSequence,
+  protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence,
           String accId, int start)
   {
     SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
@@ -591,11 +606,12 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
        */
       if (identifiesSequence(sf, accId))
       {
-          int strand = sf.getStrand();
-  
-          if (directionSet && strand != direction)
-          {
-            // abort - mix of forward and backward
+        int strand = sf.getStrand();
+        strand = strand == 0 ? 1 : strand; // treat unknown as forward
+
+        if (directionSet && strand != direction)
+        {
+          // abort - mix of forward and backward
           System.err.println("Error: forward and backward strand for "
                   + accId);
             return null;
@@ -640,8 +656,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
      */
     Collections.sort(regions, new RangeSorter(direction == 1));
   
-    List<int[]> to = new ArrayList<int[]>();
-    to.add(new int[] { start, start + mappedLength - 1 });
+    List<int[]> to = Arrays.asList(new int[] { start,
+        start + mappedLength - 1 });
   
     return new MapList(regions, to, 1, 1);
   }
@@ -696,7 +712,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       /*
        * for sequence_variant, make an additional feature with consequence
        */
-      if (SequenceOntology.getInstance().isSequenceVariant(sf.getType()))
+      if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+              SequenceOntologyI.SEQUENCE_VARIANT))
       {
         String consequence = (String) sf.getValue(CONSEQUENCE_TYPE);
         if (consequence != null)
@@ -727,7 +744,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     }
 
     SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
-    MapList mapping = getGenomicRanges(sourceSequence, accessionId,
+    MapList mapping = getGenomicRangesFromFeatures(sourceSequence, accessionId,
             targetSequence.getStart());
     if (mapping == null)
     {
@@ -836,7 +853,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     
     SequenceFeature[] sfs = sequence.getSequenceFeatures();
     if (sfs != null) {
-      SequenceOntology so = SequenceOntology.getInstance();
+      SequenceOntologyI so = SequenceOntologyFactory.getInstance();
       for (SequenceFeature sf :sfs) {
         if (so.isA(sf.getType(), type))
         {
@@ -874,7 +891,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     }
   
     AlignmentUtils.transferFeatures(dnaSeq, peptide, dnaToProtein,
-            SequenceOntology.EXON);
+            SequenceOntologyI.EXON);
 
     LinkedHashMap<Integer, String[][]> variants = buildDnaVariantsMap(
             dnaSeq, dnaToProtein);
@@ -895,7 +912,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
         String desc = StringUtils.listToDelimitedString(peptideVariants,
                 ", ");
         SequenceFeature sf = new SequenceFeature(
-                SequenceOntology.SEQUENCE_VARIANT, desc, peptidePos,
+                SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos,
                 peptidePos, 0f, null);
         peptide.addSequenceFeature(sf);
         count++;
@@ -920,7 +937,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
      * LinkedHashMap ensures we add the peptide features in sequence order
      */
     LinkedHashMap<Integer, String[][]> variants = new LinkedHashMap<Integer, String[][]>();
-    SequenceOntology so = SequenceOntology.getInstance();
+    SequenceOntologyI so = SequenceOntologyFactory.getInstance();
   
     SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures();
     if (dnaFeatures == null)
@@ -943,7 +960,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
         // not handling multi-locus variant features
         continue;
       }
-      if (so.isSequenceVariant(sf.getType()))
+      if (so.isA(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT))
       {
         int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
         if (mapsTo == null)
@@ -1082,6 +1099,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   public static boolean isTranscript(String featureType)
   {
     return NMD_VARIANT.equals(featureType)
-            || SequenceOntology.getInstance().isA(featureType, SequenceOntology.TRANSCRIPT);
+            || SequenceOntologyFactory.getInstance().isA(featureType,
+                    SequenceOntologyI.TRANSCRIPT);
   }
 }