X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=2946ba2416922a157fdcebf25164c07f9b77c24a;hb=80634498762666e6acc92368716bf1a4d4f42f7b;hp=aa1a98b98dd4a91d7f2811e89da76b9997b29a09;hpb=241bd0223b016b5ad5ec78520310a8de32842722;p=jalview.git

diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java
index aa1a98b..2946ba2 100644
--- a/src/jalview/analysis/AlignmentUtils.java
+++ b/src/jalview/analysis/AlignmentUtils.java
@@ -74,12 +74,15 @@ import java.util.TreeMap;
  */
 public class AlignmentUtils
 {
-
   private static final int CODON_LENGTH = 3;
 
   private static final String SEQUENCE_VARIANT = "sequence_variant:";
 
-  private static final String ID = "ID";
+  /*
+   * the 'id' attribute is provided for variant features fetched from
+   * Ensembl using its REST service with JSON format 
+   */
+  public static final String VARIANT_ID = "id";
 
   /**
    * A data model to hold the 'normal' base value at a position, and an optional
@@ -1599,11 +1602,12 @@ public class AlignmentUtils
       return false;
     }
     String name = seq2.getName();
-    final DBRefEntry[] xrefs = seq1.getDBRefs();
+    final List<DBRefEntry> xrefs = seq1.getDBRefs();
     if (xrefs != null)
     {
-      for (DBRefEntry xref : xrefs)
+      for (int ix = 0, nx = xrefs.size(); ix < nx; ix++)
       {
+    	  DBRefEntry xref = xrefs.get(ix);
         String xrefName = xref.getSource() + "|" + xref.getAccessionId();
         // case-insensitive test, consistent with DBRefEntry.equalRef()
         if (xrefName.equalsIgnoreCase(name))
@@ -1799,8 +1803,10 @@ public class AlignmentUtils
           // need to
           // synthesize an xref.
 
-          for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs())
+          List<DBRefEntry> primrefs = dnaDss.getPrimaryDBRefs();
+          for (int ip = 0, np = primrefs.size(); ip < np; ip++)
           {
+        	  DBRefEntry primRef = primrefs.get(ip);
             /*
              * create a cross-reference from CDS to the source sequence's
              * primary reference and vice versa
@@ -1814,7 +1820,6 @@ public class AlignmentUtils
 
             dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq
                     .getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));
-
             // problem here is that the cross-reference is synthesized -
             // cdsSeq.getName() may be like 'CDS|dnaaccession' or
             // 'CDS|emblcdsacc'
@@ -1827,7 +1832,6 @@ public class AlignmentUtils
                     .getInverse()));
             proteinProduct.addDBRef(proteinToCdsRef);
           }
-
           /*
            * transfer any features on dna that overlap the CDS
            */
@@ -1888,7 +1892,7 @@ public class AlignmentUtils
    * @param seqMappings
    *          the set of mappings involving dnaSeq
    * @param aMapping
-   *          an initial candidate from seqMappings
+   *          a transcript-to-peptide mapping
    * @return
    */
   static SequenceI findCdsForProtein(List<AlignedCodonFrame> mappings,
@@ -1913,7 +1917,15 @@ public class AlignmentUtils
     if (mappedFromLength == dnaLength
             || mappedFromLength == dnaLength - CODON_LENGTH)
     {
-      return seqDss;
+      /*
+       * if sequence has CDS features, this is a transcript with no UTR
+       * - do not take this as the CDS sequence! (JAL-2789)
+       */
+      if (seqDss.getFeatures().getFeaturesByOntology(SequenceOntologyI.CDS)
+              .isEmpty())
+      {
+        return seqDss;
+      }
     }
 
     /*
@@ -1938,10 +1950,12 @@ public class AlignmentUtils
           {
             /*
             * found a 3:1 mapping to the protein product which covers
-            * the whole dna sequence i.e. is from CDS; finally check it
-            * is from the dna start sequence
+            * the whole dna sequence i.e. is from CDS; finally check the CDS
+            * is mapped from the given dna start sequence
             */
             SequenceI cdsSeq = map.getFromSeq();
+            // todo this test is weak if seqMappings contains multiple mappings;
+            // we get away with it if transcript:cds relationship is 1:1
             List<AlignedCodonFrame> dnaToCdsMaps = MappingUtils
                     .findMappingsForSequence(cdsSeq, seqMappings);
             if (!dnaToCdsMaps.isEmpty())
@@ -2056,16 +2070,20 @@ public class AlignmentUtils
   protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
           SequenceI contig, SequenceI proteinProduct, Mapping mapping)
   {
+
     // gather direct refs from contig congruent with mapping
     List<DBRefEntry> direct = new ArrayList<>();
     HashSet<String> directSources = new HashSet<>();
-    if (contig.getDBRefs() != null)
+
+    List<DBRefEntry> refs = contig.getDBRefs();
+    if (refs != null)
     {
-      for (DBRefEntry dbr : contig.getDBRefs())
+      for (int ib = 0, nb = refs.size(); ib < nb; ib++)
       {
-        if (dbr.hasMap() && dbr.getMap().getMap().isTripletMap())
+    	  DBRefEntry dbr = refs.get(ib);
+    	  MapList map;
+        if (dbr.hasMap() && (map = dbr.getMap().getMap()).isTripletMap())
         {
-          MapList map = dbr.getMap().getMap();
           // check if map is the CDS mapping
           if (mapping.getMap().equals(map))
           {
@@ -2075,21 +2093,22 @@ public class AlignmentUtils
         }
       }
     }
-    DBRefEntry[] onSource = DBRefUtils.selectRefs(
+    List<DBRefEntry> onSource = DBRefUtils.selectRefs(
             proteinProduct.getDBRefs(),
             directSources.toArray(new String[0]));
     List<DBRefEntry> propagated = new ArrayList<>();
 
     // and generate appropriate mappings
-    for (DBRefEntry cdsref : direct)
+    for (int ic = 0, nc = direct.size(); ic < nc; ic++)
     {
+    	DBRefEntry cdsref = direct.get(ic);
+    	Mapping m = cdsref.getMap();
       // clone maplist and mapping
       MapList cdsposmap = new MapList(
               Arrays.asList(new int[][]
               { new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }),
-              cdsref.getMap().getMap().getToRanges(), 3, 1);
-      Mapping cdsmap = new Mapping(cdsref.getMap().getTo(),
-              cdsref.getMap().getMap());
+              m.getMap().getToRanges(), 3, 1);
+      Mapping cdsmap = new Mapping(m.getTo(),m.getMap());
 
       // create dbref
       DBRefEntry newref = new DBRefEntry(cdsref.getSource(),
@@ -2235,12 +2254,13 @@ public class AlignmentUtils
     int mappedDnaLength = MappingUtils.getLength(ranges);
 
     /*
-     * if not a whole number of codons, something is wrong,
-     * abort mapping
+     * if not a whole number of codons, truncate mapping
      */
-    if (mappedDnaLength % CODON_LENGTH > 0)
+    int codonRemainder = mappedDnaLength % CODON_LENGTH;
+    if (codonRemainder > 0)
     {
-      return null;
+      mappedDnaLength -= codonRemainder;
+      MappingUtils.removeEndPositions(codonRemainder, ranges);
     }
 
     int proteinLength = proteinSeq.getLength();
@@ -2307,10 +2327,14 @@ public class AlignmentUtils
       int phase = 0;
       try
       {
-        phase = Integer.parseInt(sf.getPhase());
+    	String s = sf.getPhase();
+    	if (s != null) 
+    	{
+    		phase = Integer.parseInt(s);
+    	}
       } catch (NumberFormatException e)
       {
-        // ignore
+        // SwingJS -- need to avoid these.
       }
       /*
        * phase > 0 on first codon means 5' incomplete - skip to the start
@@ -2421,20 +2445,23 @@ public class AlignmentUtils
     /*
      * variants in first codon base
      */
-    for (DnaVariant var : codonVariants[0])
+    for (DnaVariant dnavar : codonVariants[0])
     {
-      if (var.variant != null)
+      if (dnavar.variant != null)
       {
-        String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES);
+        String alleles = (String) dnavar.variant.getValue(Gff3Helper.ALLELES);
         if (alleles != null)
         {
           for (String base : alleles.split(","))
           {
-            if (!base1.equals(base))
+            if (!base1.equalsIgnoreCase(base))
             {
-              String codon = base + base2 + base3;
-              if (addPeptideVariant(peptide, peptidePos, residue, var,
-                      codon))
+              String codon = base.toUpperCase() + base2.toLowerCase()
+                      + base3.toLowerCase();
+              String canonical = base1.toUpperCase() + base2.toLowerCase()
+                      + base3.toLowerCase();
+              if (addPeptideVariant(peptide, peptidePos, residue, dnavar,
+                      codon, canonical))
               {
                 count++;
               }
@@ -2456,11 +2483,14 @@ public class AlignmentUtils
         {
           for (String base : alleles.split(","))
           {
-            if (!base2.equals(base))
+            if (!base2.equalsIgnoreCase(base))
             {
-              String codon = base1 + base + base3;
+              String codon = base1.toLowerCase() + base.toUpperCase()
+                      + base3.toLowerCase();
+              String canonical = base1.toLowerCase() + base2.toUpperCase()
+                      + base3.toLowerCase();
               if (addPeptideVariant(peptide, peptidePos, residue, var,
-                      codon))
+                      codon, canonical))
               {
                 count++;
               }
@@ -2482,11 +2512,14 @@ public class AlignmentUtils
         {
           for (String base : alleles.split(","))
           {
-            if (!base3.equals(base))
+            if (!base3.equalsIgnoreCase(base))
             {
-              String codon = base1 + base2 + base;
+              String codon = base1.toLowerCase() + base2.toLowerCase()
+                      + base.toUpperCase();
+              String canonical = base1.toLowerCase() + base2.toLowerCase()
+                      + base3.toUpperCase();
               if (addPeptideVariant(peptide, peptidePos, residue, var,
-                      codon))
+                      codon, canonical))
               {
                 count++;
               }
@@ -2509,10 +2542,13 @@ public class AlignmentUtils
    * @param residue
    * @param var
    * @param codon
+   *          the variant codon e.g. aCg
+   * @param canonical
+   *          the 'normal' codon e.g. aTg
    * @return true if a feature was added, else false
    */
   static boolean addPeptideVariant(SequenceI peptide, int peptidePos,
-          String residue, DnaVariant var, String codon)
+          String residue, DnaVariant var, String codon, String canonical)
   {
     /*
      * get peptide translation of codon e.g. GAT -> D
@@ -2527,7 +2563,7 @@ public class AlignmentUtils
     {
       return false;
     }
-    String desc = codon;
+    String desc = canonical + "/" + codon;
     String featureType = "";
     if (trans.equals(residue))
     {
@@ -2550,15 +2586,15 @@ public class AlignmentUtils
             peptidePos, var.getSource());
 
     StringBuilder attributes = new StringBuilder(32);
-    String id = (String) var.variant.getValue(ID);
+    String id = (String) var.variant.getValue(VARIANT_ID);
     if (id != null)
     {
       if (id.startsWith(SEQUENCE_VARIANT))
       {
         id = id.substring(SEQUENCE_VARIANT.length());
       }
-      sf.setValue(ID, id);
-      attributes.append(ID).append("=").append(id);
+      sf.setValue(VARIANT_ID, id);
+      attributes.append(VARIANT_ID).append("=").append(id);
       // TODO handle other species variants JAL-2064
       StringBuilder link = new StringBuilder(32);
       try
@@ -2738,19 +2774,25 @@ public class AlignmentUtils
     SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
     if (xrefs != null)
     {
-      for (SequenceI xref : xrefs)
+    	// BH 2019.01.25 streamlined this triply nested loop to remove all iterators
+    	
+      for (int ix = 0, nx = xrefs.length; ix < nx; ix++)
       {
-        DBRefEntry[] dbrefs = xref.getDBRefs();
+    	SequenceI xref = xrefs[ix];
+        List<DBRefEntry> dbrefs = xref.getDBRefs();
         if (dbrefs != null)
         {
-          for (DBRefEntry dbref : dbrefs)
+          for (int ir = 0, nir = dbrefs.size(); ir < nir; ir++)
           {
-            if (dbref.getMap() == null || dbref.getMap().getTo() == null
-                    || dbref.getMap().getTo().isProtein() != isProtein)
+        	  DBRefEntry dbref = dbrefs.get(ir);
+        	  Mapping map = dbref.getMap();
+        	  SequenceI mto;
+            if (map == null || (mto = map.getTo()) == null
+                    || mto.isProtein() != isProtein)
             {
               continue;
             }
-            SequenceI mappedTo = dbref.getMap().getTo();
+            SequenceI mappedTo = mto;
             SequenceI match = matcher.findIdMatch(mappedTo);
             if (match == null)
             {