JAL-1705 ensure the right mapping is used to align CDS to cDNA

[jalview.git] / src / jalview / analysis / AlignmentUtils.java
diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java

index eb1ee4b..14e3907 100644 (file)
--- a/src/jalview/analysis/AlignmentUtils.java
+++ b/src/jalview/analysis/AlignmentUtils.java
@@ -1366,12 +1366,13 @@ public class AlignmentUtils
     * Constructs an alignment consisting of the mapped (CDS) regions in the given
     * nucleotide sequences, and updates mappings to match. The CDS sequences are
     * added to the original alignment's dataset, which is shared by the new
-   * alignment.
+   * alignment. Mappings from nucleotide to CDS, and from CDS to protein, are
+   * added to the alignment dataset.
     * 
     * @param dna
     *          aligned dna sequences
     * @param mappings
-   *          from dna to protein; these are replaced with new mappings
+   *          from dna to protein
     * @param al
     * @return an alignment whose sequences are the cds-only parts of the dna
     *         sequences (or null if no mappings are found)
@@ -2093,7 +2094,7 @@ public class AlignmentUtils
      Map<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
  
      /*
-     * report any sequences that have no mapping so can't be realigned
+     * r any sequences that have no mapping so can't be realigned
       */
      unmapped.addAll(unaligned.getSequences());
  
@@ -2106,7 +2107,7 @@ public class AlignmentUtils
          SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned);
          if (fromSeq != null)
          {
-          Mapping seqMap = mapping.getMappingForSequence(seq);
+          Mapping seqMap = mapping.getMappingBetween(fromSeq, seq);
            if (addMappedPositions(seq, fromSeq, seqMap, map))
            {
              unmapped.remove(seq);
@@ -2137,6 +2138,11 @@ public class AlignmentUtils
    static boolean addMappedPositions(SequenceI seq, SequenceI fromSeq,
            Mapping seqMap, Map<Integer, Map<SequenceI, Character>> map)
    {
+    if (seqMap == null)
+    {
+      return false;
+    }
+
      char[] fromChars = fromSeq.getSequence();
      int toStart = seq.getStart();
      char[] toChars = seq.getSequence();
@@ -2193,4 +2199,19 @@ public class AlignmentUtils
      }
      return true;
    }
+
+  // strictly temporary hack until proper criteria for aligning protein to cds
+  // are in place; this is so Ensembl -> fetch xrefs Uniprot aligns the Uniprot
+  public static boolean looksLikeEnsembl(AlignmentI alignment)
+  {
+    for (SequenceI seq : alignment.getSequences())
+    {
+      String name = seq.getName();
+      if (!name.startsWith("ENSG") && !name.startsWith("ENST"))
+      {
+        return false;
+      }
+    }
+    return true;
+  }
  }