X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FAlignedCodonFrame.java;h=2f33e43ceb7ae9ceaa866fa728220fe868fcdd54;hb=3af9017e1f6c6d045c6c0d41245fcd7bfdae32a6;hp=4fbfd62c26796f64fb6d9b1ab1cb335554ae7322;hpb=52402128d98c6744eb6348dd788fe6e8cd34575c;p=jalview.git diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index 4fbfd62..2f33e43 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -20,13 +20,13 @@ */ package jalview.datamodel; -import jalview.util.MapList; -import jalview.util.MappingUtils; - import java.util.AbstractList; import java.util.ArrayList; import java.util.List; +import jalview.util.MapList; +import jalview.util.MappingUtils; + /** * Stores mapping between the columns of a protein alignment and a DNA alignment * and a list of individual codon to amino acid mappings between sequences. @@ -90,10 +90,11 @@ public class AlignedCodonFrame return that.mapping == null; } // TODO: can simplify by asserting fromSeq is a dataset sequence - return (this.fromSeq == that.fromSeq || (this.fromSeq != null - && that.fromSeq != null - && this.fromSeq.getDatasetSequence() != null && this.fromSeq - .getDatasetSequence() == that.fromSeq.getDatasetSequence())) + return (this.fromSeq == that.fromSeq + || (this.fromSeq != null && that.fromSeq != null + && this.fromSeq.getDatasetSequence() != null + && this.fromSeq.getDatasetSequence() == that.fromSeq + .getDatasetSequence())) && this.mapping.equals(that.mapping); } @@ -106,6 +107,93 @@ public class AlignedCodonFrame { return mapping; } + + /** + * Returns true if the mapping covers the full length of the given sequence. + * This allows us to distinguish the CDS that codes for a protein from + * another overlapping CDS in the parent dna sequence. + * + * @param seq + * @return + */ + public boolean covers(SequenceI seq) + { + List mappedRanges = null; + MapList mapList = mapping.getMap(); + if (fromSeq == seq || fromSeq == seq.getDatasetSequence()) + { + mappedRanges = mapList.getFromRanges(); + } + else if (mapping.to == seq || mapping.to == seq.getDatasetSequence()) + { + mappedRanges = mapList.getToRanges(); + } + else + { + return false; + } + + /* + * check that each mapped range lies within the sequence range + * (necessary for circular CDS - example EMBL:J03321:AAA91567) + * and mapped length covers (at least) sequence length + */ + int length = 0; + for (int[] range : mappedRanges) + { + int from = Math.min(range[0], range[1]); + int to = Math.max(range[0], range[1]); + if (from < seq.getStart() || to > seq.getEnd()) + { + return false; + } + length += (to - from + 1); + } + // add 1 to mapped length to allow for a mapped stop codon + if (length + 1 < (seq.getEnd() - seq.getStart() + 1)) + { + return false; + } + return true; + } + + /** + * Adds any regions mapped to or from position {@code pos} in sequence + * {@code seq} to the given search results + * + * @param seq + * @param pos + * @param sr + */ + public void markMappedRegion(SequenceI seq, int pos, SearchResultsI sr) + { + int[] codon = null; + SequenceI mappedSeq = null; + SequenceI ds = seq.getDatasetSequence(); + if (ds == null) + { + ds = seq; + } + + if (this.fromSeq == seq || this.fromSeq == ds) + { + codon = this.mapping.map.locateInTo(pos, pos); + mappedSeq = this.mapping.to; + } + else if (this.mapping.to == seq || this.mapping.to == ds) + { + codon = this.mapping.map.locateInFrom(pos, pos); + mappedSeq = this.fromSeq; + } + + if (codon != null) + { + for (int i = 0; i < codon.length; i += 2) + { + sr.addResult(mappedSeq, codon[i], codon[i + 1]); + } + } + } } private List mappings; @@ -115,7 +203,7 @@ public class AlignedCodonFrame */ public AlignedCodonFrame() { - mappings = new ArrayList(); + mappings = new ArrayList<>(); } /** @@ -149,8 +237,8 @@ public class AlignedCodonFrame SequenceI fromSeq = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq.getDatasetSequence(); - SequenceI toSeq = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq - .getDatasetSequence(); + SequenceI toSeq = (aaseq.getDatasetSequence() == null) ? aaseq + : aaseq.getDatasetSequence(); /* * if we already hold a mapping between these sequences, just add to it @@ -178,7 +266,7 @@ public class AlignedCodonFrame { // TODO return a list instead? // return dnaSeqs; - List seqs = new ArrayList(); + List seqs = new ArrayList<>(); for (SequenceToSequenceMapping ssm : mappings) { seqs.add(ssm.fromSeq); @@ -189,7 +277,7 @@ public class AlignedCodonFrame public SequenceI[] getAaSeqs() { // TODO not used - remove? - List seqs = new ArrayList(); + List seqs = new ArrayList<>(); for (SequenceToSequenceMapping ssm : mappings) { seqs.add(ssm.mapping.to); @@ -199,7 +287,7 @@ public class AlignedCodonFrame public MapList[] getdnaToProt() { - List maps = new ArrayList(); + List maps = new ArrayList<>(); for (SequenceToSequenceMapping ssm : mappings) { maps.add(ssm.mapping.map); @@ -209,7 +297,7 @@ public class AlignedCodonFrame public Mapping[] getProtMappings() { - List maps = new ArrayList(); + List maps = new ArrayList<>(); for (SequenceToSequenceMapping ssm : mappings) { maps.add(ssm.mapping); @@ -219,7 +307,7 @@ public class AlignedCodonFrame /** * Returns the first mapping found which is to or from the given sequence, or - * null. + * null if none is found * * @param seq * @return @@ -260,9 +348,12 @@ public class AlignedCodonFrame } /** + * Return the corresponding aligned or dataset dna sequence for given amino + * acid sequence, or null if not found. returns the sequence from the first + * mapping found that involves the protein sequence. * - * @param sequenceRef - * @return null or corresponding aaSeq entry for dnaSeq entry + * @param aaSeqRef + * @return */ public SequenceI getDnaForAaSeq(SequenceI aaSeqRef) { @@ -292,7 +383,8 @@ public class AlignedCodonFrame /** * Add search results for regions in other sequences that translate or are - * translated from a particular position in seq + * translated from a particular position in seq (which may be an aligned or + * dataset sequence) * * @param seq * @param index @@ -303,69 +395,15 @@ public class AlignedCodonFrame public void markMappedRegion(SequenceI seq, int index, SearchResultsI results) { - int[] codon; SequenceI ds = seq.getDatasetSequence(); - for (SequenceToSequenceMapping ssm : mappings) + if (ds == null) { - if (ssm.fromSeq == seq || ssm.fromSeq == ds) - { - codon = ssm.mapping.map.locateInTo(index, index); - if (codon != null) - { - for (int i = 0; i < codon.length; i += 2) - { - results.addResult(ssm.mapping.to, codon[i], codon[i + 1]); - } - } - } - else if (ssm.mapping.to == seq || ssm.mapping.to == ds) - { - { - codon = ssm.mapping.map.locateInFrom(index, index); - if (codon != null) - { - for (int i = 0; i < codon.length; i += 2) - { - results.addResult(ssm.fromSeq, codon[i], codon[i + 1]); - } - } - } - } + ds = seq; } - } - - /** - * Returns the DNA codon positions (base 1) for the given position (base 1) in - * a mapped protein sequence, or null if no mapping is found. - * - * Intended for use in aligning cDNA to match aligned protein. Only the first - * mapping found is returned, so not suitable for use if multiple protein - * sequences are mapped to the same cDNA (but aligning cDNA as protein is - * ill-defined for this case anyway). - * - * @param seq - * the DNA dataset sequence - * @param aaPos - * residue position (base 1) in a protein sequence - * @return - */ - public int[] getDnaPosition(SequenceI seq, int aaPos) - { - /* - * Adapted from markMappedRegion(). - */ - MapList ml = null; - int i = 0; for (SequenceToSequenceMapping ssm : mappings) { - if (ssm.fromSeq == seq) - { - ml = getdnaToProt()[i]; - break; - } - i++; + ssm.markMappedRegion(ds, index, results); } - return ml == null ? null : ml.locateInFrom(aaPos, aaPos); } /** @@ -434,8 +472,8 @@ public class AlignedCodonFrame { SequenceI targetDs = target.getDatasetSequence() == null ? target : target.getDatasetSequence(); - SequenceI queryDs = query.getDatasetSequence() == null ? query : query - .getDatasetSequence(); + SequenceI queryDs = query.getDatasetSequence() == null ? query + : query.getDatasetSequence(); if (targetDs == null || queryDs == null /*|| dnaToProt == null*/) { return null; @@ -484,7 +522,7 @@ public class AlignedCodonFrame { MapList ml = null; SequenceI dnaSeq = null; - List result = new ArrayList(); + List result = new ArrayList<>(); for (SequenceToSequenceMapping ssm : mappings) { @@ -504,10 +542,11 @@ public class AlignedCodonFrame * Read off the mapped nucleotides (converting to position base 0) */ codonPos = MappingUtils.flattenRanges(codonPos); - char[] dna = dnaSeq.getSequence(); int start = dnaSeq.getStart(); - result.add(new char[] { dna[codonPos[0] - start], - dna[codonPos[1] - start], dna[codonPos[2] - start] }); + char c1 = dnaSeq.getCharAt(codonPos[0] - start); + char c2 = dnaSeq.getCharAt(codonPos[1] - start); + char c3 = dnaSeq.getCharAt(codonPos[2] - start); + result.add(new char[] { c1, c2, c3 }); } } return result.isEmpty() ? null : result; @@ -522,8 +561,8 @@ public class AlignedCodonFrame */ public List getMappingsFromSequence(SequenceI seq) { - List result = new ArrayList(); - List related = new ArrayList(); + List result = new ArrayList<>(); + List related = new ArrayList<>(); SequenceI seqDs = seq.getDatasetSequence(); seqDs = seqDs != null ? seqDs : seq; @@ -579,8 +618,9 @@ public class AlignedCodonFrame */ protected int realiseWith(SequenceI seq, boolean doUpdate) { - SequenceI ds = seq.getDatasetSequence() != null ? seq - .getDatasetSequence() : seq; + SequenceI ds = seq.getDatasetSequence() != null + ? seq.getDatasetSequence() + : seq; int count = 0; /* @@ -654,8 +694,8 @@ public class AlignedCodonFrame { int start = replacement.getStart(); int end = replacement.getEnd(); - boolean mappingOverlapsSequence = (mapStart >= start && mapStart <= end) - || (mapEnd >= start && mapEnd <= end); + boolean mappingOverlapsSequence = (mapStart >= start + && mapStart <= end) || (mapEnd >= start && mapEnd <= end); if (mappingOverlapsSequence) { return true; @@ -732,8 +772,8 @@ public class AlignedCodonFrame { SequenceI dssFrom = fromSeq.getDatasetSequence() == null ? fromSeq : fromSeq.getDatasetSequence(); - SequenceI dssTo = toSeq.getDatasetSequence() == null ? toSeq : toSeq - .getDatasetSequence(); + SequenceI dssTo = toSeq.getDatasetSequence() == null ? toSeq + : toSeq.getDatasetSequence(); for (SequenceToSequenceMapping mapping : mappings) { @@ -764,7 +804,7 @@ public class AlignedCodonFrame * Two AlignedCodonFrame objects are equal if they hold the same ordered list * of mappings * - * @see SequenceToSequenceMapping# + * @see SequenceToSequenceMapping#equals */ @Override public boolean equals(Object obj) @@ -780,4 +820,55 @@ public class AlignedCodonFrame { return mappings; } + + /** + * Returns the first mapping found which is between the two given sequences, + * and covers the full extent of both. + * + * @param seq1 + * @param seq2 + * @return + */ + public SequenceToSequenceMapping getCoveringMapping(SequenceI seq1, + SequenceI seq2) + { + for (SequenceToSequenceMapping mapping : mappings) + { + if (mapping.covers(seq2) && mapping.covers(seq1)) + { + return mapping; + } + } + return null; + } + + /** + * Returns the first mapping found which is between the given dataset sequence + * and another, is a triplet mapping (3:1 or 1:3), and covers the full extent + * of both sequences involved + * + * @param seq + * @return + */ + public SequenceToSequenceMapping getCoveringCodonMapping(SequenceI seq) + { + for (SequenceToSequenceMapping mapping : mappings) + { + if (mapping.getMapping().getMap().isTripletMap() + && mapping.covers(seq)) + { + if (mapping.fromSeq == seq + && mapping.covers(mapping.getMapping().getTo())) + { + return mapping; + } + else if (mapping.getMapping().getTo() == seq + && mapping.covers(mapping.fromSeq)) + { + return mapping; + } + } + } + return null; + } }