Merge branch 'develop' into update_212_Dec_merge_with_21125_chamges
[jalview.git] / src / jalview / analysis / AlignmentUtils.java
index 7c1f4d7..6ac59ba 100644 (file)
@@ -22,6 +22,23 @@ package jalview.analysis;
 
 import java.util.Locale;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import jalview.bin.Console;
 import jalview.commands.RemoveGapColCommand;
 import jalview.datamodel.AlignedCodon;
 import jalview.datamodel.AlignedCodonFrame;
@@ -46,22 +63,6 @@ import jalview.util.IntRangeComparator;
 import jalview.util.MapList;
 import jalview.util.MappingUtils;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.NoSuchElementException;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
 /**
  * grab bag of useful alignment manipulation operations Expect these to be
  * refactored elsewhere at some point.
@@ -185,8 +186,8 @@ public class AlignmentUtils
               .getSequence(s.getStart() - 1 - ustream_ds, s.getStart() - 1))
                       .toLowerCase(Locale.ROOT).toCharArray();
       char[] downstream = new String(
-              ds.getSequence(s_end - 1, s_end + dstream_ds)).toLowerCase(Locale.ROOT)
-                      .toCharArray();
+              ds.getSequence(s_end - 1, s_end + dstream_ds))
+                      .toLowerCase(Locale.ROOT).toCharArray();
       char[] coreseq = s.getSequence();
       char[] nseq = new char[offset + upstream.length + downstream.length
               + coreseq.length];
@@ -464,7 +465,8 @@ public class AlignmentUtils
     if (cdnaLength != mappedLength && cdnaLength > 2)
     {
       String lastCodon = String.valueOf(cdnaSeqChars,
-              cdnaLength - CODON_LENGTH, CODON_LENGTH).toUpperCase(Locale.ROOT);
+              cdnaLength - CODON_LENGTH, CODON_LENGTH)
+              .toUpperCase(Locale.ROOT);
       for (String stop : ResidueProperties.STOP_CODONS)
       {
         if (lastCodon.equals(stop))
@@ -481,7 +483,8 @@ public class AlignmentUtils
      */
     int startOffset = 0;
     if (cdnaLength != mappedLength && cdnaLength > 2
-            && String.valueOf(cdnaSeqChars, 0, CODON_LENGTH).toUpperCase(Locale.ROOT)
+            && String.valueOf(cdnaSeqChars, 0, CODON_LENGTH)
+                    .toUpperCase(Locale.ROOT)
                     .equals(ResidueProperties.START))
     {
       startOffset += CODON_LENGTH;
@@ -975,8 +978,8 @@ public class AlignmentUtils
             mapList = mapList.getInverse();
           }
           final int cdsLength = cdsDss.getLength();
-          int mappedFromLength = MappingUtils.getLength(mapList
-                  .getFromRanges());
+          int mappedFromLength = MappingUtils
+                  .getLength(mapList.getFromRanges());
           int mappedToLength = MappingUtils
                   .getLength(mapList.getToRanges());
           boolean addStopCodon = (cdsLength == mappedFromLength
@@ -1653,8 +1656,8 @@ public class AlignmentUtils
       productSeqs = new HashSet<>();
       for (SequenceI seq : products)
       {
-        productSeqs.add(seq.getDatasetSequence() == null ? seq : seq
-                .getDatasetSequence());
+        productSeqs.add(seq.getDatasetSequence() == null ? seq
+                : seq.getDatasetSequence());
       }
     }
 
@@ -1758,7 +1761,7 @@ public class AlignmentUtils
             dataset.addSequence(cdsSeqDss);
             AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
             cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
-                  cdsToProteinMap);
+                    cdsToProteinMap);
 
             /*
              * guard against duplicating the mapping if repeating this action
@@ -1811,20 +1814,21 @@ public class AlignmentUtils
           List<DBRefEntry> primrefs = dnaDss.getPrimaryDBRefs();
           for (int ip = 0, np = primrefs.size(); ip < np; ip++)
           {
-                 DBRefEntry primRef = primrefs.get(ip);
+            DBRefEntry primRef = primrefs.get(ip);
             /*
              * create a cross-reference from CDS to the source sequence's
              * primary reference and vice versa
              */
             String source = primRef.getSource();
             String version = primRef.getVersion();
-            DBRefEntry cdsCrossRef = new DBRefEntry(source, source + ":"
-                    + version, primRef.getAccessionId());
-            cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));
+            DBRefEntry cdsCrossRef = new DBRefEntry(source,
+                    source + ":" + version, primRef.getAccessionId());
+            cdsCrossRef
+                    .setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));
             cdsSeqDss.addDBRef(cdsCrossRef);
 
-            dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq
-                    .getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));
+            dnaSeq.addDBRef(new DBRefEntry(source, version,
+                    cdsSeq.getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));
             // problem here is that the cross-reference is synthesized -
             // cdsSeq.getName() may be like 'CDS|dnaaccession' or
             // 'CDS|emblcdsacc'
@@ -1833,8 +1837,8 @@ public class AlignmentUtils
             DBRefEntry proteinToCdsRef = new DBRefEntry(source, version,
                     cdsSeq.getName());
             //
-            proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
-                    .getInverse()));
+            proteinToCdsRef.setMap(
+                    new Mapping(cdsSeqDss, cdsToProteinMap.getInverse()));
             proteinProduct.addDBRef(proteinToCdsRef);
           }
           /*
@@ -1846,8 +1850,8 @@ public class AlignmentUtils
       }
     }
 
-    AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
-            .size()]));
+    AlignmentI cds = new Alignment(
+            cdsSeqs.toArray(new SequenceI[cdsSeqs.size()]));
     cds.setDataset(dataset);
 
     return cds;
@@ -2000,45 +2004,31 @@ public class AlignmentUtils
 
     SequenceI newSeq = null;
 
-    final MapList maplist = mapping.getMap();
-    if (maplist.isContiguous() && maplist.isFromForwardStrand())
-    {
-      /*
-       * just a subsequence, keep same dataset sequence
-       */
-      int start = maplist.getFromLowest();
-      int end = maplist.getFromHighest();
-      newSeq = seq.getSubSequence(start - 1, end);
-      newSeq.setName(seqId);
-    }
-    else
-    {
-      /*
-       * construct by splicing mapped from ranges
-       */
-      char[] seqChars = seq.getSequence();
-      List<int[]> fromRanges = maplist.getFromRanges();
-      int cdsWidth = MappingUtils.getLength(fromRanges);
-      char[] newSeqChars = new char[cdsWidth];
+    /*
+     * construct CDS sequence by splicing mapped from ranges
+     */
+    char[] seqChars = seq.getSequence();
+    List<int[]> fromRanges = mapping.getMap().getFromRanges();
+    int cdsWidth = MappingUtils.getLength(fromRanges);
+    char[] newSeqChars = new char[cdsWidth];
 
-      int newPos = 0;
-      for (int[] range : fromRanges)
+    int newPos = 0;
+    for (int[] range : fromRanges)
+    {
+      if (range[0] <= range[1])
       {
-        if (range[0] <= range[1])
-        {
-          // forward strand mapping - just copy the range
-          int length = range[1] - range[0] + 1;
-          System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,
-                  length);
-          newPos += length;
-        }
-        else
+        // forward strand mapping - just copy the range
+        int length = range[1] - range[0] + 1;
+        System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,
+                length);
+        newPos += length;
+      }
+      else
+      {
+        // reverse strand mapping - copy and complement one by one
+        for (int i = range[0]; i >= range[1]; i--)
         {
-          // reverse strand mapping - copy and complement one by one
-          for (int i = range[0]; i >= range[1]; i--)
-          {
-            newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);
-          }
+          newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);
         }
       }
 
@@ -2072,8 +2062,8 @@ public class AlignmentUtils
           }
           else
           {
-            System.err.println(
-                    "JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):"
+            Console.error(
+                    "JAL-2154 regression: warning - found (and ignored) a duplicate CDS sequence:"
                             + mtch.toString());
           }
         }
@@ -2198,9 +2188,9 @@ public class AlignmentUtils
     /*
      * get features, optionally restricted by an ontology term
      */
-    List<SequenceFeature> sfs = select == null ? fromSeq.getFeatures()
-            .getPositionalFeatures() : fromSeq.getFeatures()
-            .getFeaturesByOntology(select);
+    List<SequenceFeature> sfs = select == null
+            ? fromSeq.getFeatures().getPositionalFeatures()
+            : fromSeq.getFeatures().getFeaturesByOntology(select);
 
     int count = 0;
     for (SequenceFeature sf : sfs)
@@ -2345,8 +2335,8 @@ public class AlignmentUtils
   {
     List<int[]> result = new ArrayList<>();
 
-    List<SequenceFeature> sfs = dnaSeq.getFeatures().getFeaturesByOntology(
-            SequenceOntologyI.CDS);
+    List<SequenceFeature> sfs = dnaSeq.getFeatures()
+            .getFeaturesByOntology(SequenceOntologyI.CDS);
     if (sfs.isEmpty())
     {
       return result;
@@ -2358,11 +2348,11 @@ public class AlignmentUtils
       int phase = 0;
       try
       {
-       String s = sf.getPhase();
-       if (s != null) 
-       {
-               phase = Integer.parseInt(s);
-       }
+        String s = sf.getPhase();
+        if (s != null)
+        {
+          phase = Integer.parseInt(s);
+        }
       } catch (NumberFormatException e)
       {
         // leave as zero
@@ -2418,11 +2408,11 @@ public class AlignmentUtils
     SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
     if (xrefs != null)
     {
-       // BH 2019.01.25 recoded to remove iterators
-       
+      // BH 2019.01.25 recoded to remove iterators
+
       for (int ix = 0, nx = xrefs.length; ix < nx; ix++)
       {
-       SequenceI xref = xrefs[ix];
+        SequenceI xref = xrefs[ix];
         List<DBRefEntry> dbrefs = xref.getDBRefs();
         if (dbrefs != null)
         {
@@ -2537,10 +2527,10 @@ public class AlignmentUtils
    * true; else returns false
    * 
    * @param unaligned
-   *                    - sequences to be aligned based on aligned
+   *          - sequences to be aligned based on aligned
    * @param aligned
-   *                    - 'guide' alignment containing sequences derived from same
-   *                    dataset as unaligned
+   *          - 'guide' alignment containing sequences derived from same dataset
+   *          as unaligned
    * @return
    */
   static boolean alignAsSameSequences(AlignmentI unaligned,
@@ -2576,8 +2566,7 @@ public class AlignmentUtils
       {
         return false;
       }
-      SequenceI alignedSeq = alignedDatasets.get(ds)
-              .get(0);
+      SequenceI alignedSeq = alignedDatasets.get(ds).get(0);
       int startCol = alignedSeq.findIndex(seq.getStart()); // 1..
       leftmost = Math.min(leftmost, startCol);
     }