Merge branch 'trailm' into trial_fixMakeCDSDBRefPropagation
authorJim Procter <jprocter@issues.jalview.org>
Thu, 25 Aug 2016 11:13:56 +0000 (12:13 +0100)
committerJim Procter <jprocter@issues.jalview.org>
Thu, 25 Aug 2016 11:13:56 +0000 (12:13 +0100)
src/jalview/analysis/AlignmentUtils.java
test/jalview/analysis/AlignmentUtilsTests.java

index dc57f6e..bed685a 100644 (file)
@@ -1700,7 +1700,8 @@ public class AlignmentUtils
           MapList cdsToProteinMap = new MapList(cdsRange, mapList.getToRanges(),
                   mapList.getFromRatio(), mapList.getToRatio());
           AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
-          cdsToProteinMapping.addMap(cdsSeq, proteinProduct, cdsToProteinMap);
+          cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
+                  cdsToProteinMap);
 
           /*
            * guard against duplicating the mapping if repeating this action
@@ -1710,23 +1711,8 @@ public class AlignmentUtils
             mappings.add(cdsToProteinMapping);
           }
 
-          /*
-           * copy protein's dbrefs to CDS sequence
-           * this enables Get Cross-References from CDS alignment
-           */
-          DBRefEntry[] proteinRefs = DBRefUtils.selectDbRefs(false,
-                  proteinProduct.getDBRefs());
-          if (proteinRefs != null)
-          {
-            for (DBRefEntry ref : proteinRefs)
-            {
-              DBRefEntry cdsToProteinRef = new DBRefEntry(ref);
-              cdsToProteinRef.setMap(new Mapping(proteinProduct,
-                      cdsToProteinMap));
-              cdsSeqDss.addDBRef(cdsToProteinRef);
-            }
-          }
-
+          propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),
+                  proteinProduct, aMapping);
           /*
            * add another mapping from original 'from' range to CDS
            */
@@ -1734,7 +1720,7 @@ public class AlignmentUtils
           MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
                   cdsRange, 1,
                   1);
-          dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeq,
+          dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,
                   dnaToCdsMap);
           if (!mappings.contains(dnaToCdsMapping))
           {
@@ -1900,7 +1886,7 @@ public class AlignmentUtils
         }
       }
     }
-
+    
     /*
      * assign 'from id' held in the mapping if set (e.g. EMBL protein_id),
      * else generate a sequence name
@@ -1914,6 +1900,84 @@ public class AlignmentUtils
   }
 
   /**
+   * add any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to
+   * the given mapping.
+   * 
+   * @param cdsSeq
+   * @param contig
+   * @param mapping
+   * @return list of DBRefEntrys added.
+   */
+  public static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
+          SequenceI contig, SequenceI proteinProduct, Mapping mapping)
+  {
+
+    // gather direct refs from contig congrent with mapping
+    List<DBRefEntry> direct = new ArrayList<DBRefEntry>();
+    HashSet<String> directSources = new HashSet<String>();
+    if (contig.getDBRefs() != null)
+    {
+      for (DBRefEntry dbr : contig.getDBRefs())
+      {
+        if (dbr.hasMap() && dbr.getMap().getMap().isTripletMap())
+        {
+          MapList map = dbr.getMap().getMap();
+          // check if map is the CDS mapping
+          if (mapping.getMap().equals(map))
+          {
+            direct.add(dbr);
+            directSources.add(dbr.getSource());
+          }
+        }
+      }
+    }
+    DBRefEntry[] onSource = DBRefUtils.selectRefs(
+            proteinProduct.getDBRefs(),
+            directSources.toArray(new String[0]));
+    List<DBRefEntry> propagated = new ArrayList<DBRefEntry>();
+
+    // and generate appropriate mappings
+    for (DBRefEntry cdsref : direct)
+    {
+      // clone maplist and mapping
+      MapList cdsposmap = new MapList(Arrays.asList(new int[][] { new int[]
+      { cdsSeq.getStart(), cdsSeq.getEnd() } }), cdsref.getMap().getMap()
+              .getToRanges(), 3, 1);
+      Mapping cdsmap = new Mapping(cdsref.getMap().getTo(), cdsref.getMap()
+              .getMap());
+
+      // create dbref
+      DBRefEntry newref = new DBRefEntry(cdsref.getSource(),
+              cdsref.getVersion(), cdsref.getAccessionId(), new Mapping(
+                      cdsmap.getTo(), cdsposmap));
+
+      // and see if we can map to the protein product for this mapping.
+      // onSource is the filtered set of accessions on protein that we are
+      // tranferring, so we assume accession is the same.
+      if (cdsmap.getTo() == null && onSource != null)
+      {
+        List<DBRefEntry> sourceRefs = DBRefUtils.searchRefs(onSource,
+                cdsref.getAccessionId());
+        if (sourceRefs != null)
+        {
+          for (DBRefEntry srcref : sourceRefs)
+          {
+            if (srcref.getSource().equalsIgnoreCase(cdsref.getSource()))
+            {
+              // we have found a complementary dbref on the protein product, so
+              // update mapping's getTo
+              newref.getMap().setTo(proteinProduct);
+            }
+          }
+        }
+      }
+      cdsSeq.addDBRef(newref);
+      propagated.add(newref);
+    }
+    return propagated;
+  }
+
+  /**
    * Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the
    * feature start/end ranges, optionally omitting specified feature types.
    * Returns the number of features copied.
index 0426091..a0ce475 100644 (file)
@@ -994,7 +994,7 @@ public class AlignmentUtilsTests
 
     /*
      * need a sourceDbRef if we are to construct dbrefs to the CDS
-     * sequence
+     * sequence from the dna contig sequences
      */
     DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
     dna1.getDatasetSequence().addDBRef(dbref);
@@ -1007,18 +1007,31 @@ public class AlignmentUtilsTests
      * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
      * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
      */
-    MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+    MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 },
             new int[] { 1, 2 }, 3, 1);
     AlignedCodonFrame acf = new AlignedCodonFrame();
-    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
+            mapfordna1);
     dna.addCodonFrame(acf);
-    map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
+    MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
+            new int[] { 1, 3 },
             3, 1);
     acf = new AlignedCodonFrame();
-    acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+    acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(),
+            mapfordna2);
     dna.addCodonFrame(acf);
 
     /*
+     * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation
+     */
+    DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
+            new Mapping(mapfordna1));
+    dna1.getDatasetSequence().addDBRef(dna1xref);
+    DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
+            new Mapping(mapfordna2));
+    dna2.getDatasetSequence().addDBRef(dna2xref);
+
+    /*
      * execute method under test:
      */
     AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
@@ -1046,9 +1059,10 @@ public class AlignmentUtilsTests
     assertNotNull(cds1Dss.getDBRefs());
     assertEquals(1, cds1Dss.getDBRefs().length);
     dbref = cds1Dss.getDBRefs()[0];
-    assertEquals("UNIPROT", dbref.getSource());
-    assertEquals("0", dbref.getVersion());
-    assertEquals("pep1", dbref.getAccessionId());
+    assertEquals(dna1xref.getSource(), dbref.getSource());
+    // version is via ensembl's primary ref
+    assertEquals(dna1xref.getVersion(), dbref.getVersion());
+    assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId());
     assertNotNull(dbref.getMap());
     assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
     MapList cdsMapping = new MapList(new int[] { 1, 6 },