Jalview.isJS() --> Platform.isJS(), DBRefEntry[] --> List<DBRefEntry>

[jalview.git] / src / jalview / analysis / AlignmentUtils.java
diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java

index 3d22115..8fd961e 100644 (file)
--- a/src/jalview/analysis/AlignmentUtils.java
+++ b/src/jalview/analysis/AlignmentUtils.java
@@ -465,7 +465,7 @@ public class AlignmentUtils
      {
        String lastCodon = String.valueOf(cdnaSeqChars,
                cdnaLength - CODON_LENGTH, CODON_LENGTH).toUpperCase();
-      for (String stop : ResidueProperties.STOP)
+      for (String stop : ResidueProperties.STOP_CODONS)
        {
          if (lastCodon.equals(stop))
          {
@@ -536,7 +536,8 @@ public class AlignmentUtils
         * allow * in protein to match untranslatable in dna
         */
        final char aaRes = aaSeqChars[aaPos];
-      if ((translated == null || "STOP".equals(translated)) && aaRes == '*')
+      if ((translated == null || ResidueProperties.STOP.equals(translated))
+              && aaRes == '*')
        {
          continue;
        }
@@ -568,7 +569,8 @@ public class AlignmentUtils
      if (dnaPos == cdnaSeqChars.length - CODON_LENGTH)
      {
        String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);
-      if ("STOP".equals(ResidueProperties.codonTranslate(codon)))
+      if (ResidueProperties.STOP
+              .equals(ResidueProperties.codonTranslate(codon)))
        {
          return true;
        }
@@ -1597,11 +1599,12 @@ public class AlignmentUtils
        return false;
      }
      String name = seq2.getName();
-    final DBRefEntry[] xrefs = seq1.getDBRefs();
+    final List<DBRefEntry> xrefs = seq1.getDBRefs();
      if (xrefs != null)
      {
-      for (DBRefEntry xref : xrefs)
+      for (int ix = 0, nx = xrefs.size(); ix < nx; ix++)
        {
+         DBRefEntry xref = xrefs.get(ix);
          String xrefName = xref.getSource() + "|" + xref.getAccessionId();
          // case-insensitive test, consistent with DBRefEntry.equalRef()
          if (xrefName.equalsIgnoreCase(name))
@@ -1797,8 +1800,10 @@ public class AlignmentUtils
            // need to
            // synthesize an xref.
  
-          for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs())
+          List<DBRefEntry> primrefs = dnaDss.getPrimaryDBRefs();
+          for (int ip = 0, np = primrefs.size(); ip < np; ip++)
            {
+                 DBRefEntry primRef = primrefs.get(ip);
              /*
               * create a cross-reference from CDS to the source sequence's
               * primary reference and vice versa
@@ -1812,7 +1817,6 @@ public class AlignmentUtils
  
              dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq
                      .getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));
-
              // problem here is that the cross-reference is synthesized -
              // cdsSeq.getName() may be like 'CDS|dnaaccession' or
              // 'CDS|emblcdsacc'
@@ -1825,7 +1829,6 @@ public class AlignmentUtils
                      .getInverse()));
              proteinProduct.addDBRef(proteinToCdsRef);
            }
-
            /*
             * transfer any features on dna that overlap the CDS
             */
@@ -1886,7 +1889,7 @@ public class AlignmentUtils
     * @param seqMappings
     *          the set of mappings involving dnaSeq
     * @param aMapping
-   *          an initial candidate from seqMappings
+   *          a transcript-to-peptide mapping
     * @return
     */
    static SequenceI findCdsForProtein(List<AlignedCodonFrame> mappings,
@@ -1911,7 +1914,15 @@ public class AlignmentUtils
      if (mappedFromLength == dnaLength
              || mappedFromLength == dnaLength - CODON_LENGTH)
      {
-      return seqDss;
+      /*
+       * if sequence has CDS features, this is a transcript with no UTR
+       * - do not take this as the CDS sequence! (JAL-2789)
+       */
+      if (seqDss.getFeatures().getFeaturesByOntology(SequenceOntologyI.CDS)
+              .isEmpty())
+      {
+        return seqDss;
+      }
      }
  
      /*
@@ -1936,10 +1947,12 @@ public class AlignmentUtils
            {
              /*
              * found a 3:1 mapping to the protein product which covers
-            * the whole dna sequence i.e. is from CDS; finally check it
-            * is from the dna start sequence
+            * the whole dna sequence i.e. is from CDS; finally check the CDS
+            * is mapped from the given dna start sequence
              */
              SequenceI cdsSeq = map.getFromSeq();
+            // todo this test is weak if seqMappings contains multiple mappings;
+            // we get away with it if transcript:cds relationship is 1:1
              List<AlignedCodonFrame> dnaToCdsMaps = MappingUtils
                      .findMappingsForSequence(cdsSeq, seqMappings);
              if (!dnaToCdsMaps.isEmpty())
@@ -2054,16 +2067,20 @@ public class AlignmentUtils
    protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
            SequenceI contig, SequenceI proteinProduct, Mapping mapping)
    {
+
      // gather direct refs from contig congruent with mapping
      List<DBRefEntry> direct = new ArrayList<>();
      HashSet<String> directSources = new HashSet<>();
-    if (contig.getDBRefs() != null)
+
+    List<DBRefEntry> refs = contig.getDBRefs();
+    if (refs != null)
      {
-      for (DBRefEntry dbr : contig.getDBRefs())
+      for (int ib = 0, nb = refs.size(); ib < nb; ib++)
        {
-        if (dbr.hasMap() && dbr.getMap().getMap().isTripletMap())
+         DBRefEntry dbr = refs.get(ib);
+         MapList map;
+        if (dbr.hasMap() && (map = dbr.getMap().getMap()).isTripletMap())
          {
-          MapList map = dbr.getMap().getMap();
            // check if map is the CDS mapping
            if (mapping.getMap().equals(map))
            {
@@ -2073,21 +2090,22 @@ public class AlignmentUtils
          }
        }
      }
-    DBRefEntry[] onSource = DBRefUtils.selectRefs(
+    List<DBRefEntry> onSource = DBRefUtils.selectRefs(
              proteinProduct.getDBRefs(),
              directSources.toArray(new String[0]));
      List<DBRefEntry> propagated = new ArrayList<>();
  
      // and generate appropriate mappings
-    for (DBRefEntry cdsref : direct)
+    for (int ic = 0, nc = direct.size(); ic < nc; ic++)
      {
+       DBRefEntry cdsref = direct.get(ic);
+       Mapping m = cdsref.getMap();
        // clone maplist and mapping
        MapList cdsposmap = new MapList(
                Arrays.asList(new int[][]
                { new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }),
-              cdsref.getMap().getMap().getToRanges(), 3, 1);
-      Mapping cdsmap = new Mapping(cdsref.getMap().getTo(),
-              cdsref.getMap().getMap());
+              m.getMap().getToRanges(), 3, 1);
+      Mapping cdsmap = new Mapping(m.getTo(),m.getMap());
  
        // create dbref
        DBRefEntry newref = new DBRefEntry(cdsref.getSource(),
@@ -2233,12 +2251,13 @@ public class AlignmentUtils
      int mappedDnaLength = MappingUtils.getLength(ranges);
  
      /*
-     * if not a whole number of codons, something is wrong,
-     * abort mapping
+     * if not a whole number of codons, truncate mapping
       */
-    if (mappedDnaLength % CODON_LENGTH > 0)
+    int codonRemainder = mappedDnaLength % CODON_LENGTH;
+    if (codonRemainder > 0)
      {
-      return null;
+      mappedDnaLength -= codonRemainder;
+      MappingUtils.removeEndPositions(codonRemainder, ranges);
      }
  
      int proteinLength = proteinSeq.getLength();
@@ -2305,10 +2324,14 @@ public class AlignmentUtils
        int phase = 0;
        try
        {
-        phase = Integer.parseInt(sf.getPhase());
+       String s = sf.getPhase();
+       if (s != null) 
+       {
+               phase = Integer.parseInt(s);
+       }
        } catch (NumberFormatException e)
        {
-        // ignore
+        // SwingJS -- need to avoid these.
        }
        /*
         * phase > 0 on first codon means 5' incomplete - skip to the start
@@ -2419,20 +2442,23 @@ public class AlignmentUtils
      /*
       * variants in first codon base
       */
-    for (DnaVariant var : codonVariants[0])
+    for (DnaVariant dnavar : codonVariants[0])
      {
-      if (var.variant != null)
+      if (dnavar.variant != null)
        {
-        String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);
+        String alleles = (String) dnavar.variant.getValue(Gff3Helper.ALLELES);
          if (alleles != null)
          {
            for (String base : alleles.split(","))
            {
-            if (!base1.equals(base))
+            if (!base1.equalsIgnoreCase(base))
              {
-              String codon = base + base2 + base3;
-              if (addPeptideVariant(peptide, peptidePos, residue, var,
-                      codon))
+              String codon = base.toUpperCase() + base2.toLowerCase()
+                      + base3.toLowerCase();
+              String canonical = base1.toUpperCase() + base2.toLowerCase()
+                      + base3.toLowerCase();
+              if (addPeptideVariant(peptide, peptidePos, residue, dnavar,
+                      codon, canonical))
                {
                  count++;
                }
@@ -2454,11 +2480,14 @@ public class AlignmentUtils
          {
            for (String base : alleles.split(","))
            {
-            if (!base2.equals(base))
+            if (!base2.equalsIgnoreCase(base))
              {
-              String codon = base1 + base + base3;
+              String codon = base1.toLowerCase() + base.toUpperCase()
+                      + base3.toLowerCase();
+              String canonical = base1.toLowerCase() + base2.toUpperCase()
+                      + base3.toLowerCase();
                if (addPeptideVariant(peptide, peptidePos, residue, var,
-                      codon))
+                      codon, canonical))
                {
                  count++;
                }
@@ -2480,11 +2509,14 @@ public class AlignmentUtils
          {
            for (String base : alleles.split(","))
            {
-            if (!base3.equals(base))
+            if (!base3.equalsIgnoreCase(base))
              {
-              String codon = base1 + base2 + base;
+              String codon = base1.toLowerCase() + base2.toLowerCase()
+                      + base.toUpperCase();
+              String canonical = base1.toLowerCase() + base2.toLowerCase()
+                      + base3.toUpperCase();
                if (addPeptideVariant(peptide, peptidePos, residue, var,
-                      codon))
+                      codon, canonical))
                {
                  count++;
                }
@@ -2498,20 +2530,22 @@ public class AlignmentUtils
    }
  
    /**
-   * Helper method that adds a peptide variant feature, provided the given codon
-   * translates to a value different to the current residue (is a non-synonymous
-   * variant). ID and clinical_significance attributes of the dna variant (if
-   * present) are copied to the new feature.
+   * Helper method that adds a peptide variant feature. ID and
+   * clinical_significance attributes of the dna variant (if present) are copied
+   * to the new feature.
     * 
     * @param peptide
     * @param peptidePos
     * @param residue
     * @param var
     * @param codon
+   *          the variant codon e.g. aCg
+   * @param canonical
+   *          the 'normal' codon e.g. aTg
     * @return true if a feature was added, else false
     */
    static boolean addPeptideVariant(SequenceI peptide, int peptidePos,
-          String residue, DnaVariant var, String codon)
+          String residue, DnaVariant var, String codon, String canonical)
    {
      /*
       * get peptide translation of codon e.g. GAT -> D
@@ -2526,12 +2560,16 @@ public class AlignmentUtils
      {
        return false;
      }
-    String desc = codon;
+    String desc = canonical + "/" + codon;
      String featureType = "";
      if (trans.equals(residue))
      {
        featureType = SequenceOntologyI.SYNONYMOUS_VARIANT;
      }
+    else if (ResidueProperties.STOP.equals(trans))
+    {
+      featureType = SequenceOntologyI.STOP_GAINED;
+    }
      else
      {
        String residue3Char = StringUtils
@@ -2733,19 +2771,25 @@ public class AlignmentUtils
      SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
      if (xrefs != null)
      {
-      for (SequenceI xref : xrefs)
+       // BH 2019.01.25 streamlined this triply nested loop to remove all iterators
+       
+      for (int ix = 0, nx = xrefs.length; ix < nx; ix++)
        {
-        DBRefEntry[] dbrefs = xref.getDBRefs();
+       SequenceI xref = xrefs[ix];
+        List<DBRefEntry> dbrefs = xref.getDBRefs();
          if (dbrefs != null)
          {
-          for (DBRefEntry dbref : dbrefs)
+          for (int ir = 0, nir = dbrefs.size(); ir < nir; ir++)
            {
-            if (dbref.getMap() == null || dbref.getMap().getTo() == null
-                    || dbref.getMap().getTo().isProtein() != isProtein)
+                 DBRefEntry dbref = dbrefs.get(ir);
+                 Mapping map = dbref.getMap();
+                 SequenceI mto;
+            if (map == null || (mto = map.getTo()) == null
+                    || mto.isProtein() != isProtein)
              {
                continue;
              }
-            SequenceI mappedTo = dbref.getMap().getTo();
+            SequenceI mappedTo = mto;
              SequenceI match = matcher.findIdMatch(mappedTo);
              if (match == null)
              {