JAL-2110 fixes to dbref resolution and mappings, use same dataset for
[jalview.git] / src / jalview / analysis / AlignmentUtils.java
index 949c47a..ead4ef8 100644 (file)
@@ -24,6 +24,7 @@ import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;
 
 import jalview.datamodel.AlignedCodon;
 import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
@@ -1400,16 +1401,15 @@ public class AlignmentUtils
    * 
    * @param dna
    *          aligned dna sequences
-   * @param mappings
-   *          from dna to protein
-   * @param al
+   * @param dataset
    * @return an alignment whose sequences are the cds-only parts of the dna
    *         sequences (or null if no mappings are found)
    */
   public static AlignmentI makeCdsAlignment(SequenceI[] dna,
-          List<AlignedCodonFrame> mappings, AlignmentI al)
+          AlignmentI dataset)
   {
     List<SequenceI> cdsSeqs = new ArrayList<SequenceI>();
+    List<AlignedCodonFrame> mappings = dataset.getCodonFrames();
     
     /*
      * construct CDS sequences from the (cds-to-protein) mappings made earlier;
@@ -1419,18 +1419,78 @@ public class AlignmentUtils
      */
     for (SequenceI seq : dna)
     {
-      AlignedCodonFrame cdsMappings = new AlignedCodonFrame();
+      SequenceI seqDss = seq.getDatasetSequence() == null ? seq : seq
+              .getDatasetSequence();
       List<AlignedCodonFrame> seqMappings = MappingUtils
               .findMappingsForSequence(seq, mappings);
-      List<AlignedCodonFrame> alignmentMappings = al.getCodonFrames();
       for (AlignedCodonFrame mapping : seqMappings)
       {
-        for (Mapping aMapping : mapping.getMappingsFromSequence(seq))
+        List<Mapping> mappingsFromSequence = mapping.getMappingsFromSequence(seq);
+
+        for (Mapping aMapping : mappingsFromSequence)
         {
-          SequenceI cdsSeq = makeCdsSequence(seq.getDatasetSequence(),
-                  aMapping);
+          if (aMapping.getMap().getFromRatio() == 1)
+          {
+            /*
+             * not a dna-to-protein mapping (likely dna-to-cds)
+             */
+            continue;
+          }
+
+          /*
+           * check for an existing CDS sequence i.e. a 3:1 mapping to 
+           * the dna mapping's product
+           */
+          SequenceI cdsSeq = null;
+          // TODO better mappings collection data model so we can do
+          // a table lookup instead of double loops to find mappings
+          SequenceI proteinProduct = aMapping.getTo();
+          for (AlignedCodonFrame acf : MappingUtils
+                  .findMappingsForSequence(proteinProduct, mappings))
+          {
+            for (SequenceToSequenceMapping map : acf.getMappings())
+            {
+              if (map.getMapping().getMap().getFromRatio() == 3
+                      && proteinProduct == map.getMapping().getTo()
+                      && seqDss != map.getFromSeq())
+              {
+                /*
+                 * found a 3:1 mapping to the protein product which is not
+                 * from the dna sequence...assume it is from the CDS sequence
+                 * TODO mappings data model that brings together related
+                 * dna-cds-protein mappings in one object
+                 */
+                cdsSeq = map.getFromSeq();
+              }
+            }
+          }
+          if (cdsSeq != null)
+          {
+            /*
+             * mappings are always to dataset sequences so create an aligned
+             * sequence to own it; add the dataset sequence to the dataset
+             */
+            SequenceI derivedSequence = cdsSeq.deriveSequence();
+            cdsSeqs.add(derivedSequence);
+            if (!dataset.getSequences().contains(cdsSeq))
+            {
+              dataset.addSequence(cdsSeq);
+            }
+            continue;
+          }
+
+          /*
+           * didn't find mapped CDS sequence - construct it and add
+           * its dataset sequence to the dataset
+           */
+          cdsSeq = makeCdsSequence(seq.getDatasetSequence(), aMapping);
+          SequenceI cdsSeqDss = cdsSeq.createDatasetSequence();
           cdsSeqs.add(cdsSeq);
-    
+          if (!dataset.getSequences().contains(cdsSeqDss))
+          {
+            dataset.addSequence(cdsSeqDss);
+          }
+
           /*
            * add a mapping from CDS to the (unchanged) mapped to range
            */
@@ -1439,16 +1499,29 @@ public class AlignmentUtils
           MapList map = new MapList(cdsRange, aMapping.getMap()
                   .getToRanges(), aMapping.getMap().getFromRatio(),
                   aMapping.getMap().getToRatio());
-          cdsMappings.addMap(cdsSeq, aMapping.getTo(), map);
+          AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
+          cdsToProteinMapping.addMap(cdsSeq, proteinProduct, map);
+
+          /*
+           * guard against duplicating the mapping if repeating this action
+           */
+          if (!mappings.contains(cdsToProteinMapping))
+          {
+            mappings.add(cdsToProteinMapping);
+          }
 
           /*
            * add another mapping from original 'from' range to CDS
            */
+          AlignedCodonFrame dnaToProteinMapping = new AlignedCodonFrame();
           map = new MapList(aMapping.getMap().getFromRanges(), cdsRange, 1,
                   1);
-          cdsMappings.addMap(seq.getDatasetSequence(), cdsSeq, map);
+          dnaToProteinMapping.addMap(seq.getDatasetSequence(), cdsSeq, map);
+          if (!mappings.contains(dnaToProteinMapping))
+          {
+            mappings.add(dnaToProteinMapping);
+          }
 
-          alignmentMappings.add(cdsMappings);
 
           /*
            * transfer any features on dna that overlap the CDS
@@ -1458,20 +1531,9 @@ public class AlignmentUtils
       }
     }
 
-    /*
-     * add CDS seqs to shared dataset
-     */
-    Alignment dataset = al.getDataset();
-    for (SequenceI seq : cdsSeqs)
-    {
-      if (!dataset.getSequences().contains(seq.getDatasetSequence()))
-      {
-        dataset.addSequence(seq.getDatasetSequence());
-      }
-    }
     AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
             .size()]));
-    cds.setDataset(dataset);
+    cds.setDataset((Alignment) dataset);
 
     return cds;
   }
@@ -1483,7 +1545,7 @@ public class AlignmentUtils
    * 
    * @param seq
    * @param mapping
-   * @return
+   * @return CDS sequence (as a dataset sequence)
    */
   static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping)
   {
@@ -1515,7 +1577,6 @@ public class AlignmentUtils
 
     SequenceI newSeq = new Sequence(seq.getName() + "|"
             + mapping.getTo().getName(), newSeqChars, 1, newPos);
-    newSeq.createDatasetSequence();
     return newSeq;
   }