From b22d623e48f01e7c0ab274e30b1b05e289afc34f Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 11 Jun 2019 11:31:12 +0100 Subject: [PATCH] JAL-3187 CDS has same dataset sequence as transcript --- src/jalview/analysis/AlignmentUtils.java | 134 +++++++++++++------- src/jalview/datamodel/AlignedCodonFrame.java | 10 +- src/jalview/util/MapList.java | 10 ++ .../seqfeatures/FeatureRendererModel.java | 5 +- 4 files changed, 101 insertions(+), 58 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 4b4e2a7..77cc4d6 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -1107,7 +1107,7 @@ public class AlignmentUtils SequenceI prot = mapping.findAlignedSequence(dnaSeq, protein); if (prot != null) { - Mapping seqMap = mapping.getMappingForSequence(dnaSeq, false); + Mapping seqMap = mapping.getMappingForSequence(dnaSeq); addCodonPositions(dnaSeq, prot, protein.getGapCharacter(), seqMap, alignedCodons); unmappedProtein.remove(prot); @@ -1746,8 +1746,10 @@ public class AlignmentUtils /* * add a mapping from CDS to the (unchanged) mapped to range */ - List cdsRange = Collections.singletonList(new int[] { 1, - cdsSeq.getLength() }); + List cdsRange = Collections + .singletonList(new int[] + { cdsSeq.getStart(), + cdsSeq.getLength() + cdsSeq.getStart() - 1 }); MapList cdsToProteinMap = new MapList(cdsRange, mapList.getToRanges(), mapList.getFromRatio(), mapList.getToRatio()); @@ -1984,39 +1986,61 @@ public class AlignmentUtils static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping, AlignmentI dataset) { - char[] seqChars = seq.getSequence(); - List fromRanges = mapping.getMap().getFromRanges(); - int cdsWidth = MappingUtils.getLength(fromRanges); - char[] newSeqChars = new char[cdsWidth]; + /* + * construct CDS sequence name as "CDS|" with 'from id' held in the mapping + * if set (e.g. EMBL protein_id), else sequence name appended + */ + String mapFromId = mapping.getMappedFromId(); + final String seqId = "CDS|" + + (mapFromId != null ? mapFromId : seq.getName()); - int newPos = 0; - for (int[] range : fromRanges) + SequenceI newSeq = null; + + final MapList maplist = mapping.getMap(); + if (maplist.isContiguous() && maplist.isFromForwardStrand()) { - if (range[0] <= range[1]) - { - // forward strand mapping - just copy the range - int length = range[1] - range[0] + 1; - System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos, - length); - newPos += length; - } - else + /* + * just a subsequence, keep same dataset sequence + */ + int start = maplist.getFromLowest(); + int end = maplist.getFromHighest(); + newSeq = seq.getSubSequence(start - 1, end); + newSeq.setName(seqId); + } + else + { + /* + * construct by splicing mapped from ranges + */ + char[] seqChars = seq.getSequence(); + List fromRanges = maplist.getFromRanges(); + int cdsWidth = MappingUtils.getLength(fromRanges); + char[] newSeqChars = new char[cdsWidth]; + + int newPos = 0; + for (int[] range : fromRanges) { - // reverse strand mapping - copy and complement one by one - for (int i = range[0]; i >= range[1]; i--) + if (range[0] <= range[1]) + { + // forward strand mapping - just copy the range + int length = range[1] - range[0] + 1; + System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos, + length); + newPos += length; + } + else { - newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]); + // reverse strand mapping - copy and complement one by one + for (int i = range[0]; i >= range[1]; i--) + { + newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]); + } } } + + newSeq = new Sequence(seqId, newSeqChars, 1, newPos); } - /* - * assign 'from id' held in the mapping if set (e.g. EMBL protein_id), - * else generate a sequence name - */ - String mapFromId = mapping.getMappedFromId(); - String seqId = "CDS|" + (mapFromId != null ? mapFromId : seq.getName()); - SequenceI newSeq = new Sequence(seqId, newSeqChars, 1, newPos); if (dataset != null) { SequenceI[] matches = dataset.findSequenceMatch(newSeq.getName()); @@ -2159,6 +2183,10 @@ public class AlignmentUtils { copyTo = copyTo.getDatasetSequence(); } + if (fromSeq == copyTo || fromSeq.getDatasetSequence() == copyTo) + { + return 0; // shared dataset sequence + } /* * get features, optionally restricted by an ontology term @@ -2809,17 +2837,6 @@ public class AlignmentUtils */ public static int alignAs(AlignmentI unaligned, AlignmentI aligned) { - /* - * easy case - aligning a copy of aligned sequences - */ - if (alignAsSameSequences(unaligned, aligned)) - { - return unaligned.getHeight(); - } - - /* - * fancy case - aligning via mappings between sequences - */ List unmapped = new ArrayList<>(); Map> columnMap = buildMappedColumnsMap( unaligned, aligned, unmapped); @@ -2880,12 +2897,14 @@ public class AlignmentUtils * true; else returns false * * @param unaligned - * - sequences to be aligned based on aligned + * - sequences to be aligned based on aligned * @param aligned - * - 'guide' alignment containing sequences derived from same dataset - * as unaligned + * - 'guide' alignment containing sequences derived from same + * dataset as unaligned * @return + * @deprecated probably obsolete and incomplete */ + @Deprecated static boolean alignAsSameSequences(AlignmentI unaligned, AlignmentI aligned) { @@ -2907,15 +2926,22 @@ public class AlignmentUtils } /* - * first pass - check whether all sequences to be aligned share a dataset - * sequence with an aligned sequence + * first pass - check whether all sequences to be aligned share a + * dataset sequence with an aligned sequence; also note the leftmost + * ungapped column from which to copy */ + int leftmost = Integer.MAX_VALUE; for (SequenceI seq : unaligned.getSequences()) { - if (!alignedDatasets.containsKey(seq.getDatasetSequence())) + final SequenceI ds = seq.getDatasetSequence(); + if (!alignedDatasets.containsKey(ds)) { return false; } + SequenceI alignedSeq = alignedDatasets.get(ds) + .get(0); + int startCol = alignedSeq.findIndex(seq.getStart()); // 1.. + leftmost = Math.min(leftmost, startCol); } /* @@ -2923,13 +2949,25 @@ public class AlignmentUtils * heuristic rule: pair off sequences in order for the case where * more than one shares the same dataset sequence */ + final char gapCharacter = aligned.getGapCharacter(); for (SequenceI seq : unaligned.getSequences()) { List alignedSequences = alignedDatasets .get(seq.getDatasetSequence()); - // TODO: getSequenceAsString() will be deprecated in the future - // TODO: need to leave to SequenceI implementor to update gaps - seq.setSequence(alignedSequences.get(0).getSequenceAsString()); + SequenceI alignedSeq = alignedSequences.get(0); + + /* + * gap fill for leading (5') UTR if any + */ + // TODO this copies intron columns - wrong! + int startCol = alignedSeq.findIndex(seq.getStart()); // 1.. + int endCol = alignedSeq.findIndex(seq.getEnd()); + char[] seqchars = new char[endCol - leftmost + 1]; + Arrays.fill(seqchars, gapCharacter); + char[] toCopy = alignedSeq.getSequence(startCol - 1, endCol); + System.arraycopy(toCopy, 0, seqchars, startCol - leftmost, + toCopy.length); + seq.setSequence(String.valueOf(seqchars)); if (alignedSequences.size() > 0) { // pop off aligned sequences (except the last one) diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index 26f6e2a..fffa137 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -220,12 +220,12 @@ public class AlignedCodonFrame /** * Returns the first mapping found which is to or from the given sequence, or - * null. + * null if none is found * * @param seq * @return */ - public Mapping getMappingForSequence(SequenceI seq, boolean cdsOnly) + public Mapping getMappingForSequence(SequenceI seq) { SequenceI seqDs = seq.getDatasetSequence(); seqDs = seqDs != null ? seqDs : seq; @@ -234,11 +234,7 @@ public class AlignedCodonFrame { if (ssm.fromSeq == seqDs || ssm.mapping.to == seqDs) { - if (!cdsOnly || ssm.fromSeq.getName().startsWith("CDS") - || ssm.mapping.to.getName().startsWith("CDS")) - { - return ssm.mapping; - } + return ssm.mapping; } } return null; diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index 9f28ee1..731e976 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -1236,4 +1236,14 @@ public class MapList return new MapList(getFromRanges(), toRanges, outFromRatio, outToRatio); } + /** + * Answers true if the mapping is from one contiguous range to another, else + * false + * + * @return + */ + public boolean isContiguous() + { + return fromShifts.size() == 1 && toShifts.size() == 1; + } } diff --git a/src/jalview/viewmodel/seqfeatures/FeatureRendererModel.java b/src/jalview/viewmodel/seqfeatures/FeatureRendererModel.java index 2a17bf2..838c41a 100644 --- a/src/jalview/viewmodel/seqfeatures/FeatureRendererModel.java +++ b/src/jalview/viewmodel/seqfeatures/FeatureRendererModel.java @@ -1195,9 +1195,8 @@ public abstract class FeatureRendererModel for (AlignedCodonFrame acf : mappings) { - mapping = acf.getMappingForSequence(sequence, true); - if (mapping == null || mapping.getMap().getFromRatio() == mapping - .getMap().getToRatio()) + mapping = acf.getMappingForSequence(sequence); + if (mapping == null || !mapping.getMap().isTripletMap()) { continue; // we are only looking for 3:1 or 1:3 mappings } -- 1.7.10.2