X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=d8c1cdff513f4bd63ec26f58c1d18a1e134941c6;hb=68ab92fb78438a63d70043b1b7f5740380668d7a;hp=fdf66d0a910c79d80f50dbfd444e9b9e2b4676ba;hpb=89f9ad40a3e1d7f1765f3bf5369dcc3636a824ff;p=jalview.git diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index fdf66d0..d8c1cdf 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -20,8 +20,23 @@ */ package jalview.analysis; -import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import jalview.bin.Cache; import jalview.commands.RemoveGapColCommand; import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; @@ -38,7 +53,6 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.datamodel.features.SequenceFeatures; -import jalview.io.gff.Gff3Helper; import jalview.io.gff.SequenceOntologyI; import jalview.schemes.ResidueProperties; import jalview.util.Comparison; @@ -46,25 +60,6 @@ import jalview.util.DBRefUtils; import jalview.util.IntRangeComparator; import jalview.util.MapList; import jalview.util.MappingUtils; -import jalview.util.StringUtils; - -import java.io.UnsupportedEncodingException; -import java.net.URLEncoder; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.NoSuchElementException; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; /** * grab bag of useful alignment manipulation operations Expect these to be @@ -966,11 +961,12 @@ public class AlignmentUtils .findMappingsForSequence(cdsSeq, mappings); for (AlignedCodonFrame mapping : dnaMappings) { - SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein); + List foundMap=new ArrayList<>(); + SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein,foundMap); if (peptide != null) { final int peptideLength = peptide.getLength(); - Mapping map = mapping.getMappingBetween(cdsSeq, peptide); + Mapping map = foundMap.get(0).getMapping(); if (map != null) { MapList mapList = map.getMap(); @@ -1737,15 +1733,8 @@ public class AlignmentUtils cdsSeqs.add(cdsSeq); - if (!dataset.getSequences().contains(cdsSeqDss)) - { - // check if this sequence is a newly created one - // so needs adding to the dataset - dataset.addSequence(cdsSeqDss); - } - /* - * add a mapping from CDS to the (unchanged) mapped to range + * build the mapping from CDS to protein */ List cdsRange = Collections .singletonList(new int[] @@ -1754,16 +1743,26 @@ public class AlignmentUtils MapList cdsToProteinMap = new MapList(cdsRange, mapList.getToRanges(), mapList.getFromRatio(), mapList.getToRatio()); - AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame(); - cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct, - cdsToProteinMap); - /* - * guard against duplicating the mapping if repeating this action - */ - if (!mappings.contains(cdsToProteinMapping)) + if (!dataset.getSequences().contains(cdsSeqDss)) { - mappings.add(cdsToProteinMapping); + /* + * if this sequence is a newly created one, add it to the dataset + * and made a CDS to protein mapping (if sequence already exists, + * CDS-to-protein mapping _is_ the transcript-to-protein mapping) + */ + dataset.addSequence(cdsSeqDss); + AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame(); + cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct, + cdsToProteinMap); + + /* + * guard against duplicating the mapping if repeating this action + */ + if (!mappings.contains(cdsToProteinMapping)) + { + mappings.add(cdsToProteinMapping); + } } propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(), @@ -1997,45 +1996,31 @@ public class AlignmentUtils SequenceI newSeq = null; - final MapList maplist = mapping.getMap(); - if (maplist.isContiguous() && maplist.isFromForwardStrand()) - { - /* - * just a subsequence, keep same dataset sequence - */ - int start = maplist.getFromLowest(); - int end = maplist.getFromHighest(); - newSeq = seq.getSubSequence(start - 1, end); - newSeq.setName(seqId); - } - else - { - /* - * construct by splicing mapped from ranges - */ - char[] seqChars = seq.getSequence(); - List fromRanges = maplist.getFromRanges(); - int cdsWidth = MappingUtils.getLength(fromRanges); - char[] newSeqChars = new char[cdsWidth]; + /* + * construct CDS sequence by splicing mapped from ranges + */ + char[] seqChars = seq.getSequence(); + List fromRanges = mapping.getMap().getFromRanges(); + int cdsWidth = MappingUtils.getLength(fromRanges); + char[] newSeqChars = new char[cdsWidth]; - int newPos = 0; - for (int[] range : fromRanges) + int newPos = 0; + for (int[] range : fromRanges) + { + if (range[0] <= range[1]) { - if (range[0] <= range[1]) - { - // forward strand mapping - just copy the range - int length = range[1] - range[0] + 1; - System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos, - length); - newPos += length; - } - else + // forward strand mapping - just copy the range + int length = range[1] - range[0] + 1; + System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos, + length); + newPos += length; + } + else + { + // reverse strand mapping - copy and complement one by one + for (int i = range[0]; i >= range[1]; i--) { - // reverse strand mapping - copy and complement one by one - for (int i = range[0]; i >= range[1]; i--) - { - newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]); - } + newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]); } } @@ -2069,9 +2054,8 @@ public class AlignmentUtils } else { - System.err.println( - "JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):" - + mtch.toString()); + Cache.log.error( + "JAL-2154 regression: warning - found (and ignored) a duplicate CDS sequence:" + mtch.toString()); } } } @@ -2576,6 +2560,13 @@ public class AlignmentUtils { List alignedSequences = alignedDatasets .get(seq.getDatasetSequence()); + if (alignedSequences.isEmpty()) + { + /* + * defensive check - shouldn't happen! (JAL-3536) + */ + continue; + } SequenceI alignedSeq = alignedSequences.get(0); /*