Merge branch 'develop' into features/JAL-2094_colourInterface
[jalview.git] / src / jalview / analysis / CrossRef.java
index c027742..1295b46 100644 (file)
@@ -222,6 +222,9 @@ public class CrossRef
       boolean found = false;
       DBRefEntry[] xrfs = DBRefUtils
               .selectDbRefs(!fromDna, dss.getDBRefs());
+      // ENST & ENSP comes in to both Protein and nucleotide, so we need to
+      // filter them
+      // out later.
       if ((xrfs == null || xrfs.length == 0) && dataset != null)
       {
         /*
@@ -249,11 +252,15 @@ public class CrossRef
       List<DBRefEntry> sourceRefs = DBRefUtils.searchRefsForSource(xrfs,
               source);
       Iterator<DBRefEntry> refIterator = sourceRefs.iterator();
+      // At this point, if we are retrieving Ensembl, we still don't filter out
+      // ENST when looking for protein crossrefs.
       while (refIterator.hasNext())
       {
         DBRefEntry xref = refIterator.next();
         found = false;
-        if (xref.hasMap())
+        // we're only interested in coding cross-references, not
+        // locus->transcript
+        if (xref.hasMap() && xref.getMap().getMap().isTripletMap())
         {
           SequenceI mappedTo = xref.getMap().getTo();
           if (mappedTo != null)
@@ -332,7 +339,9 @@ public class CrossRef
         {
           SequenceI matchedSeq = matcher.findIdMatch(xref.getSource() + "|"
                   + xref.getAccessionId());
-          if (matchedSeq != null)
+          // if there was a match, check it's at least the right type of
+          // molecule!
+          if (matchedSeq != null && matchedSeq.isProtein() == fromDna)
           {
             if (constructMapping(seq, matchedSeq, xref, cf, fromDna))
             {
@@ -381,6 +390,37 @@ public class CrossRef
     SequenceI[] retrieved = null;
     SequenceI dss = seq.getDatasetSequence() == null ? seq : seq
             .getDatasetSequence();
+    // first filter in case we are retrieving crossrefs that have already been
+    // retrieved. this happens for cases where a database record doesn't yield
+    // protein products for CDS
+    DBRefEntry[] dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+    for (SequenceI sq : dataset.getSequences())
+    {
+      boolean dupeFound = false;
+      // !fromDna means we are looking only for nucleotide sequences, not
+      // protein
+      if (sq.isProtein() == fromDna)
+      {
+        for (DBRefEntry dbr : sq.getPrimaryDBRefs())
+        {
+          for (DBRefEntry found : DBRefUtils.searchRefs(dbrSourceSet, dbr))
+          {
+            sourceRefs.remove(found);
+            dupeFound = true;
+          }
+        }
+      }
+      if (dupeFound)
+      {
+        dbrSourceSet = sourceRefs.toArray(new DBRefEntry[0]);
+      }
+    }
+    if (sourceRefs.size() == 0)
+    {
+      // no more work to do! We already had all requested sequence records in
+      // the dataset.
+      return;
+    }
     try
     {
       retrieved = sftch.getSequences(sourceRefs, !fromDna);
@@ -696,7 +736,7 @@ public class CrossRef
     MapList mapping = null;
     SequenceI dsmapFrom = mapFrom.getDatasetSequence() == null ? mapFrom
             : mapFrom.getDatasetSequence();
-    SequenceI dsmapTo = mapFrom.getDatasetSequence() == null ? mapTo
+    SequenceI dsmapTo = mapTo.getDatasetSequence() == null ? mapTo
             : mapTo.getDatasetSequence();
     /*
      * look for a reverse mapping, if found make its inverse. 
@@ -827,8 +867,8 @@ public class CrossRef
    *          </ul>
    * @return true if relationship found and sequence added.
    */
-  boolean searchDataset(boolean fromDna, SequenceI fromSeq,
-          DBRefEntry xrf, List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
+  boolean searchDataset(boolean fromDna, SequenceI fromSeq, DBRefEntry xrf,
+          List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
           boolean direct)
   {
     boolean found = false;
@@ -889,37 +929,38 @@ public class CrossRef
           // }
           if (!cands.isEmpty())
           {
-            if (!foundSeqs.contains(nxt))
+            if (foundSeqs.contains(nxt))
             {
-              found = true;
-              foundSeqs.add(nxt);
-              if (mappings != null && !direct)
+              continue;
+            }
+            found = true;
+            foundSeqs.add(nxt);
+            if (mappings != null && !direct)
+            {
+              /*
+               * if the matched sequence has mapped dbrefs to
+               * protein product / cdna, add equivalent mappings to
+               * our source sequence
+               */
+              for (DBRefEntry candidate : cands)
               {
-                /*
-                 * if the matched sequence has mapped dbrefs to
-                 * protein product / cdna, add equivalent mappings to
-                 * our source sequence
-                 */
-                for (DBRefEntry candidate : cands)
+                Mapping mapping = candidate.getMap();
+                if (mapping != null)
                 {
-                  Mapping mapping = candidate.getMap();
-                  if (mapping != null)
+                  MapList map = mapping.getMap();
+                  if (mapping.getTo() != null
+                          && map.getFromRatio() != map.getToRatio())
                   {
-                    MapList map = mapping.getMap();
-                    if (mapping.getTo() != null
-                            && map.getFromRatio() != map.getToRatio())
+                    /*
+                     * add a mapping, as from dna to peptide sequence
+                     */
+                    if (map.getFromRatio() == 3)
                     {
-                      /*
-                       * add a mapping, as from dna to peptide sequence
-                       */
-                      if (map.getFromRatio() == 3)
-                      {
-                        mappings.addMap(nxt, fromSeq, map);
-                      }
-                      else
-                      {
-                        mappings.addMap(nxt, fromSeq, map.getInverse());
-                      }
+                      mappings.addMap(nxt, fromSeq, map);
+                    }
+                    else
+                    {
+                      mappings.addMap(nxt, fromSeq, map.getInverse());
                     }
                   }
                 }