From da768251d307c7ce11283d72e0e522b2c5fac526 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 22 Dec 2015 09:03:03 +0000 Subject: [PATCH] JAL-653 AlignedCodonFrame internal refactor, merge ranges with MapList.addMapList --- src/jalview/analysis/AAFrequency.java | 17 +- src/jalview/datamodel/AlignedCodonFrame.java | 367 ++++++++++++--------- src/jalview/util/MapList.java | 110 +++++- test/jalview/datamodel/AlignedCodonFrameTest.java | 202 ++++++++++-- test/jalview/util/MapListTest.java | 121 +++++++ 5 files changed, 601 insertions(+), 216 deletions(-) diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index 5227795..3d61b11 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -32,7 +32,6 @@ import jalview.util.QuickSort; import java.util.Arrays; import java.util.Hashtable; import java.util.List; -import java.util.Set; /** * Takes in a vector or array of sequences and column start and column end and @@ -520,7 +519,7 @@ public class AAFrequency Hashtable[] hconsensus) { final char gapCharacter = alignment.getGapCharacter(); - Set mappings = alignment.getCodonFrames(); + List mappings = alignment.getCodonFrames(); if (mappings == null || mappings.isEmpty()) { return; @@ -541,12 +540,16 @@ public class AAFrequency { continue; } - char[] codon = MappingUtils.findCodonFor(seq, col, mappings); - int codonEncoded = CodingUtils.encodeCodon(codon); - if (codonEncoded >= 0) + List codons = MappingUtils + .findCodonsFor(seq, col, mappings); + for (char[] codon : codons) { - codonCounts[codonEncoded + 2]++; - ungappedCount++; + int codonEncoded = CodingUtils.encodeCodon(codon); + if (codonEncoded >= 0) + { + codonCounts[codonEncoded + 2]++; + ungappedCount++; + } } } codonCounts[1] = ungappedCount; diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index d24593a..10632c5 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -33,23 +33,30 @@ import java.util.List; public class AlignedCodonFrame { - /** - * tied array of na Sequence objects. + /* + * Data bean to hold mappings from one sequence to another */ - private SequenceI[] dnaSeqs = null; + private class SequenceToSequenceMapping + { + private SequenceI fromSeq; - /** - * tied array of Mappings to protein sequence Objects and SequenceI[] - * aaSeqs=null; MapLists where each maps from the corresponding dnaSeqs - * element to corresponding aaSeqs element - */ - private Mapping[] dnaToProt = null; + private Mapping mapping; + + SequenceToSequenceMapping(SequenceI from, Mapping map) + { + this.fromSeq = from; + this.mapping = map; + } + } + + private List mappings; /** * Constructor */ public AlignedCodonFrame() { + mappings = new ArrayList(); } /** @@ -62,68 +69,75 @@ public class AlignedCodonFrame */ public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map) { - int nlen = 1; - if (dnaSeqs != null) - { - nlen = dnaSeqs.length + 1; - } - SequenceI[] ndna = new SequenceI[nlen]; - Mapping[] ndtp = new Mapping[nlen]; - if (dnaSeqs != null) - { - System.arraycopy(dnaSeqs, 0, ndna, 0, dnaSeqs.length); - System.arraycopy(dnaToProt, 0, ndtp, 0, dnaSeqs.length); - } - dnaSeqs = ndna; - dnaToProt = ndtp; - nlen--; - dnaSeqs[nlen] = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq - .getDatasetSequence(); - Mapping mp = new Mapping(map); // JBPNote DEBUG! THIS ! // dnaseq.transferAnnotation(aaseq, mp); // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse())); - mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq + + SequenceI fromSeq = (dnaseq.getDatasetSequence() == null) ? dnaseq + : dnaseq.getDatasetSequence(); + SequenceI toSeq = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq .getDatasetSequence(); - dnaToProt[nlen] = mp; + + /* + * if we already hold a mapping between these sequences, just add to it + */ + for (SequenceToSequenceMapping ssm : mappings) + { + if (ssm.fromSeq == fromSeq && ssm.mapping.to == toSeq) + { + ssm.mapping.map.addMapList(map); + return; + } + } + + /* + * otherwise, add a new sequence mapping + */ + Mapping mp = new Mapping(toSeq, map); + mappings.add(new SequenceToSequenceMapping(fromSeq, mp)); } public SequenceI[] getdnaSeqs() { - return dnaSeqs; + // TODO return a list instead? + // return dnaSeqs; + List seqs = new ArrayList(); + for (SequenceToSequenceMapping ssm : mappings) + { + seqs.add(ssm.fromSeq); + } + return seqs.toArray(new SequenceI[seqs.size()]); } public SequenceI[] getAaSeqs() { - if (dnaToProt == null) + // TODO not used - remove? + List seqs = new ArrayList(); + for (SequenceToSequenceMapping ssm : mappings) { - return null; - } - SequenceI[] sqs = new SequenceI[dnaToProt.length]; - for (int sz = 0; sz < dnaToProt.length; sz++) - { - sqs[sz] = dnaToProt[sz].to; + seqs.add(ssm.mapping.to); } - return sqs; + return seqs.toArray(new SequenceI[seqs.size()]); } public MapList[] getdnaToProt() { - if (dnaToProt == null) + List maps = new ArrayList(); + for (SequenceToSequenceMapping ssm : mappings) { - return null; + maps.add(ssm.mapping.map); } - MapList[] sqs = new MapList[dnaToProt.length]; - for (int sz = 0; sz < dnaToProt.length; sz++) - { - sqs[sz] = dnaToProt[sz].map; - } - return sqs; + return maps.toArray(new MapList[maps.size()]); } public Mapping[] getProtMappings() { - return dnaToProt; + List maps = new ArrayList(); + for (SequenceToSequenceMapping ssm : mappings) + { + maps.add(ssm.mapping); + } + return maps.toArray(new Mapping[maps.size()]); } /** @@ -135,18 +149,14 @@ public class AlignedCodonFrame */ public Mapping getMappingForSequence(SequenceI seq) { - if (dnaSeqs == null) - { - return null; - } SequenceI seqDs = seq.getDatasetSequence(); seqDs = seqDs != null ? seqDs : seq; - for (int ds = 0; ds < dnaSeqs.length; ds++) + for (SequenceToSequenceMapping ssm : mappings) { - if (dnaSeqs[ds] == seqDs || dnaToProt[ds].to == seqDs) + if (ssm.fromSeq == seqDs || ssm.mapping.to == seqDs) { - return dnaToProt[ds]; + return ssm.mapping; } } return null; @@ -161,16 +171,12 @@ public class AlignedCodonFrame */ public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef) { - if (dnaSeqs == null) - { - return null; - } SequenceI dnads = dnaSeqRef.getDatasetSequence(); - for (int ds = 0; ds < dnaSeqs.length; ds++) + for (SequenceToSequenceMapping ssm : mappings) { - if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads) + if (ssm.fromSeq == dnaSeqRef || ssm.fromSeq == dnads) { - return dnaToProt[ds].to; + return ssm.mapping.to; } } return null; @@ -183,16 +189,12 @@ public class AlignedCodonFrame */ public SequenceI getDnaForAaSeq(SequenceI aaSeqRef) { - if (dnaToProt == null) - { - return null; - } SequenceI aads = aaSeqRef.getDatasetSequence(); - for (int as = 0; as < dnaToProt.length; as++) + for (SequenceToSequenceMapping ssm : mappings) { - if (dnaToProt[as].to == aaSeqRef || dnaToProt[as].to == aads) + if (ssm.mapping.to == aaSeqRef || ssm.mapping.to == aads) { - return dnaSeqs[as]; + return ssm.fromSeq; } } return null; @@ -224,36 +226,30 @@ public class AlignedCodonFrame public void markMappedRegion(SequenceI seq, int index, SearchResults results) { - if (dnaToProt == null) - { - return; - } int[] codon; SequenceI ds = seq.getDatasetSequence(); - for (int mi = 0; mi < dnaToProt.length; mi++) + for (SequenceToSequenceMapping ssm : mappings) { - if (dnaSeqs[mi] == seq || dnaSeqs[mi] == ds) + if (ssm.fromSeq == seq || ssm.fromSeq == ds) { - // DEBUG System.err.println("dna pos "+index); - codon = dnaToProt[mi].map.locateInTo(index, index); + codon = ssm.mapping.map.locateInTo(index, index); if (codon != null) { for (int i = 0; i < codon.length; i += 2) { - results.addResult(dnaToProt[mi].to, codon[i], codon[i + 1]); + results.addResult(ssm.mapping.to, codon[i], codon[i + 1]); } } } - else if (dnaToProt[mi].to == seq || dnaToProt[mi].to == ds) + else if (ssm.mapping.to == seq || ssm.mapping.to == ds) { - // DEBUG System.err.println("aa pos "+index); { - codon = dnaToProt[mi].map.locateInFrom(index, index); + codon = ssm.mapping.map.locateInFrom(index, index); if (codon != null) { for (int i = 0; i < codon.length; i += 2) { - results.addResult(dnaSeqs[mi], codon[i], codon[i + 1]); + results.addResult(ssm.fromSeq, codon[i], codon[i + 1]); } } } @@ -282,13 +278,15 @@ public class AlignedCodonFrame * Adapted from markMappedRegion(). */ MapList ml = null; - for (int i = 0; i < dnaToProt.length; i++) + int i = 0; + for (SequenceToSequenceMapping ssm : mappings) { - if (dnaSeqs[i] == seq) + if (ssm.fromSeq == seq) { ml = getdnaToProt()[i]; break; } + i++; } return ml == null ? null : ml.locateInFrom(aaPos, aaPos); } @@ -307,18 +305,15 @@ public class AlignedCodonFrame /* * Search mapped protein ('to') sequences first. */ - if (this.dnaToProt != null) + for (SequenceToSequenceMapping ssm : mappings) { - for (int i = 0; i < dnaToProt.length; i++) + if (ssm.fromSeq == seq) { - if (this.dnaSeqs[i] == seq) + for (SequenceI sourceAligned : al.getSequences()) { - for (SequenceI sourceAligned : al.getSequences()) + if (ssm.mapping.to == sourceAligned.getDatasetSequence()) { - if (this.dnaToProt[i].to == sourceAligned.getDatasetSequence()) - { - return sourceAligned; - } + return sourceAligned; } } } @@ -327,18 +322,15 @@ public class AlignedCodonFrame /* * Then try mapped dna sequences. */ - if (this.dnaToProt != null) + for (SequenceToSequenceMapping ssm : mappings) { - for (int i = 0; i < dnaToProt.length; i++) + if (ssm.mapping.to == seq) { - if (this.dnaToProt[i].to == seq) + for (SequenceI sourceAligned : al.getSequences()) { - for (SequenceI sourceAligned : al.getSequences()) + if (ssm.fromSeq == sourceAligned.getDatasetSequence()) { - if (this.dnaSeqs[i] == sourceAligned.getDatasetSequence()) - { - return sourceAligned; - } + return sourceAligned; } } } @@ -348,31 +340,45 @@ public class AlignedCodonFrame } /** - * Returns the region in the 'mappedFrom' sequence's dataset that is mapped to - * position 'pos' (base 1) in the 'mappedTo' sequence's dataset. The region is - * a set of start/end position pairs. + * Returns the region in the target sequence's dataset that is mapped to the + * given position (base 1) in the query sequence's dataset. The region is a + * set of start/end position pairs. * - * @param mappedFrom - * @param mappedTo - * @param pos + * @param target + * @param query + * @param queryPos * @return */ - public int[] getMappedRegion(SequenceI mappedFrom, SequenceI mappedTo, - int pos) + public int[] getMappedRegion(SequenceI target, SequenceI query, + int queryPos) { - SequenceI targetDs = mappedFrom.getDatasetSequence() == null ? mappedFrom - : mappedFrom.getDatasetSequence(); - SequenceI sourceDs = mappedTo.getDatasetSequence() == null ? mappedTo - : mappedTo.getDatasetSequence(); - if (targetDs == null || sourceDs == null || dnaToProt == null) + SequenceI targetDs = target.getDatasetSequence() == null ? target + : target.getDatasetSequence(); + SequenceI queryDs = query.getDatasetSequence() == null ? query : query + .getDatasetSequence(); + if (targetDs == null || queryDs == null /*|| dnaToProt == null*/) { return null; } - for (int mi = 0; mi < dnaToProt.length; mi++) + for (SequenceToSequenceMapping ssm : mappings) { - if (dnaSeqs[mi] == targetDs && dnaToProt[mi].to == sourceDs) + /* + * try mapping from target to query + */ + if (ssm.fromSeq == targetDs && ssm.mapping.to == queryDs) + { + int[] codon = ssm.mapping.map.locateInFrom(queryPos, queryPos); + if (codon != null) + { + return codon; + } + } + /* + * else try mapping from query to target + */ + else if (ssm.fromSeq == queryDs && ssm.mapping.to == targetDs) { - int[] codon = dnaToProt[mi].map.locateInFrom(pos, pos); + int[] codon = ssm.mapping.map.locateInTo(queryPos, queryPos); if (codon != null) { return codon; @@ -383,8 +389,10 @@ public class AlignedCodonFrame } /** - * Returns the DNA codon for the given position (base 1) in a mapped protein - * sequence, or null if no mapping is found. + * Returns the mapped DNA codons for the given position in a protein sequence, + * or null if no mapping is found. Returns a list of (e.g.) ['g', 'c', 't'] + * codons. There may be more than one codon mapped to the protein if (for + * example), there are mappings to cDNA variants. * * @param protein * the peptide dataset sequence @@ -392,41 +400,36 @@ public class AlignedCodonFrame * residue position (base 1) in the peptide sequence * @return */ - public char[] getMappedCodon(SequenceI protein, int aaPos) + public List getMappedCodons(SequenceI protein, int aaPos) { - if (dnaToProt == null) - { - return null; - } MapList ml = null; SequenceI dnaSeq = null; - for (int i = 0; i < dnaToProt.length; i++) + List result = new ArrayList(); + + for (SequenceToSequenceMapping ssm : mappings) { - if (dnaToProt[i].to == protein) + if (ssm.mapping.to == protein) { - ml = getdnaToProt()[i]; - dnaSeq = dnaSeqs[i]; - break; + ml = ssm.mapping.map; + dnaSeq = ssm.fromSeq; + + int[] codonPos = ml.locateInFrom(aaPos, aaPos); + if (codonPos == null) + { + return null; + } + + /* + * Read off the mapped nucleotides (converting to position base 0) + */ + codonPos = MappingUtils.flattenRanges(codonPos); + char[] dna = dnaSeq.getSequence(); + int start = dnaSeq.getStart(); + result.add(new char[] { dna[codonPos[0] - start], + dna[codonPos[1] - start], dna[codonPos[2] - start] }); } } - if (ml == null) - { - return null; - } - int[] codonPos = ml.locateInFrom(aaPos, aaPos); - if (codonPos == null) - { - return null; - } - - /* - * Read off the mapped nucleotides (converting to position base 0) - */ - codonPos = MappingUtils.flattenRanges(codonPos); - char[] dna = dnaSeq.getSequence(); - int start = dnaSeq.getStart(); - return new char[] { dna[codonPos[0] - start], dna[codonPos[1] - start], - dna[codonPos[2] - start] }; + return result.isEmpty() ? null : result; } /** @@ -439,18 +442,14 @@ public class AlignedCodonFrame public List getMappingsForSequence(SequenceI seq) { List result = new ArrayList(); - if (dnaSeqs == null) - { - return result; - } List related = new ArrayList(); SequenceI seqDs = seq.getDatasetSequence(); seqDs = seqDs != null ? seqDs : seq; - for (int ds = 0; ds < dnaSeqs.length; ds++) + for (SequenceToSequenceMapping ssm : mappings) { - final Mapping mapping = dnaToProt[ds]; - if (dnaSeqs[ds] == seqDs || mapping.to == seqDs) + final Mapping mapping = ssm.mapping; + if (ssm.fromSeq == seqDs || mapping.to == seqDs) { if (!related.contains(mapping.to)) { @@ -506,22 +505,27 @@ public class AlignedCodonFrame /* * check for replaceable DNA ('map from') sequences */ - for (int i = 0; i < dnaSeqs.length; i++) + for (SequenceToSequenceMapping ssm : mappings) { - SequenceI dna = dnaSeqs[i]; + SequenceI dna = ssm.fromSeq; if (dna instanceof SequenceDummy && dna.getName().equals(ds.getName())) { - Mapping mapping = dnaToProt[i]; + Mapping mapping = ssm.mapping; int mapStart = mapping.getMap().getFromLowest(); int mapEnd = mapping.getMap().getFromHighest(); - boolean mappable = couldReplaceSequence(dna, ds, mapStart, mapEnd); + boolean mappable = couldRealiseSequence(dna, ds, mapStart, mapEnd); if (mappable) { count++; if (doUpdate) { - dnaSeqs[i] = ds; + // TODO: new method ? ds.realise(dna); + // might want to copy database refs as well + ds.setSequenceFeatures(dna.getSequenceFeatures()); + // dnaSeqs[i] = ds; + ssm.fromSeq = ds; + System.out.println("Realised mapped sequence " + ds.getName()); } } } @@ -529,17 +533,20 @@ public class AlignedCodonFrame /* * check for replaceable protein ('map to') sequences */ - SequenceI prot = dnaToProt[i].getTo(); - Mapping mapping = dnaToProt[i]; + Mapping mapping = ssm.mapping; + SequenceI prot = mapping.getTo(); int mapStart = mapping.getMap().getToLowest(); int mapEnd = mapping.getMap().getToHighest(); - boolean mappable = couldReplaceSequence(prot, ds, mapStart, mapEnd); + boolean mappable = couldRealiseSequence(prot, ds, mapStart, mapEnd); if (mappable) { count++; if (doUpdate) { - dnaToProt[i].setTo(ds); + // TODO: new method ? ds.realise(dna); + // might want to copy database refs as well + ds.setSequenceFeatures(dna.getSequenceFeatures()); + ssm.mapping.setTo(ds); } } } @@ -557,10 +564,11 @@ public class AlignedCodonFrame * @param mapEnd * @return */ - protected static boolean couldReplaceSequence(SequenceI existing, + protected static boolean couldRealiseSequence(SequenceI existing, SequenceI replacement, int mapStart, int mapEnd) { if (existing instanceof SequenceDummy + && !(replacement instanceof SequenceDummy) && existing.getName().equals(replacement.getName())) { int start = replacement.getStart(); @@ -574,4 +582,39 @@ public class AlignedCodonFrame } return false; } + + /** + * Change any mapping to the given sequence to be to its dataset sequence + * instead. For use when mappings are created before their referenced + * sequences are instantiated, for example when parsing GFF data. + * + * @param seq + */ + public void updateToDataset(SequenceI seq) + { + if (seq == null || seq.getDatasetSequence() == null) + { + return; + } + SequenceI ds = seq.getDatasetSequence(); + + for (SequenceToSequenceMapping ssm : mappings) + /* + * 'from' sequences + */ + { + if (ssm.fromSeq == seq) + { + ssm.fromSeq = ds; + } + + /* + * 'to' sequences + */ + if (ssm.mapping.to == seq) + { + ssm.mapping.to = ds; + } + } + } } diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index 8ff640b..e0cad6e 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -190,7 +190,8 @@ public class MapList /** * Constructor given from and to ranges as [start1, end1, start2, end2,...]. - * If any end is equal to the next start, the ranges will be merged. + * If any end is equal to the next start, the ranges will be merged. There is + * no validation check that the ranges do not overlap each other. * * @param from * contiguous regions as [start1, end1, start2, end2, ...] @@ -206,14 +207,18 @@ public class MapList this(); this.fromRatio = fromRatio; this.toRatio = toRatio; - fromLowest = from[0]; - fromHighest = from[1]; + fromLowest = Integer.MAX_VALUE; + fromHighest = Integer.MIN_VALUE; int added = 0; for (int i = 0; i < from.length; i += 2) { - fromLowest = Math.min(fromLowest, from[i]); - fromHighest = Math.max(fromHighest, from[i + 1]); + /* + * note lowest and highest values - bearing in mind the + * direction may be revesed + */ + fromLowest = Math.min(fromLowest, Math.min(from[i], from[i + 1])); + fromHighest = Math.max(fromHighest, Math.max(from[i], from[i + 1])); if (added > 0 && from[i] == fromShifts.get(added - 1)[1]) { /* @@ -228,13 +233,13 @@ public class MapList } } - toLowest = to[0]; - toHighest = to[1]; + toLowest = Integer.MAX_VALUE; + toHighest = Integer.MIN_VALUE; added = 0; for (int i = 0; i < to.length; i += 2) { - toLowest = Math.min(toLowest, to[i]); - toHighest = Math.max(toHighest, to[i + 1]); + toLowest = Math.min(toLowest, Math.min(to[i], to[i + 1])); + toHighest = Math.max(toHighest, Math.max(to[i], to[i + 1])); if (added > 0 && to[i] == toShifts.get(added - 1)[1]) { toShifts.get(added - 1)[1] = to[i + 1]; @@ -280,7 +285,8 @@ public class MapList } /** - * Constructor given ranges as lists of [start, end] positions + * Constructor given ranges as lists of [start, end] positions. There is no + * validation check that the ranges do not overlap each other. * * @param fromRange * @param toRange @@ -297,19 +303,19 @@ public class MapList this.toRatio = toRatio; fromLowest = Integer.MAX_VALUE; - fromHighest = 0; + fromHighest = Integer.MIN_VALUE; for (int[] range : fromRange) { - fromLowest = Math.min(fromLowest, range[0]); - fromHighest = Math.max(fromHighest, range[1]); + fromLowest = Math.min(fromLowest, Math.min(range[0], range[1])); + fromHighest = Math.max(fromHighest, Math.max(range[0], range[1])); } toLowest = Integer.MAX_VALUE; - toHighest = 0; + toHighest = Integer.MIN_VALUE; for (int[] range : toRange) { - toLowest = Math.min(toLowest, range[0]); - toHighest = Math.max(toHighest, range[1]); + toLowest = Math.min(toLowest, Math.min(range[0], range[1])); + toHighest = Math.max(toHighest, Math.max(range[0], range[1])); } } @@ -897,4 +903,76 @@ public class MapList sb.append(" ]"); return sb.toString(); } + + /** + * Extend this map list by adding the given map's ranges. There is no + * validation check that the ranges do not overlap existing ranges (or each + * other), but contiguous ranges are merged. + * + * @param map + */ + public void addMapList(MapList map) + { + this.fromLowest = Math.min(fromLowest, map.fromLowest); + this.toLowest = Math.min(toLowest, map.toLowest); + this.fromHighest = Math.max(fromHighest, map.fromHighest); + this.toHighest = Math.max(toHighest, map.toHighest); + + for (int[] range : map.getFromRanges()) + { + addRange(range, fromShifts); + } + for (int[] range : map.getToRanges()) + { + addRange(range, toShifts); + } + } + + public static void addRange(int[] range, List addTo) + { + /* + * list is empty - add to it! + */ + if (addTo.size() == 0) + { + addTo.add(range); + return; + } + + int[] last = addTo.get(addTo.size() - 1); + boolean lastForward = last[1] >= last[0]; + boolean newForward = range[1] >= range[0]; + + /* + * contiguous range in the same direction - just update endpoint + */ + if (lastForward == newForward && last[1] == range[0]) + { + last[1] = range[1]; + return; + } + + /* + * next range starts at +1 in forward sense - update endpoint + */ + if (lastForward && newForward && range[0] == last[1] + 1) + { + last[1] = range[1]; + return; + } + + /* + * next range starts at -1 in reverse sense - update endpoint + */ + if (!lastForward && !newForward && range[0] == last[1] - 1) + { + last[1] = range[1]; + return; + } + + /* + * just add the new range + */ + addTo.add(range); + } } diff --git a/test/jalview/datamodel/AlignedCodonFrameTest.java b/test/jalview/datamodel/AlignedCodonFrameTest.java index 4984e5e..989ed7c 100644 --- a/test/jalview/datamodel/AlignedCodonFrameTest.java +++ b/test/jalview/datamodel/AlignedCodonFrameTest.java @@ -25,10 +25,12 @@ import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; +import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; import jalview.util.MapList; import java.util.Arrays; +import java.util.List; import org.testng.annotations.Test; @@ -91,7 +93,7 @@ public class AlignedCodonFrameTest final Sequence aseq1 = new Sequence("Seq1", "-P-R"); aseq1.createDatasetSequence(); - final Sequence aseq2 = new Sequence("Seq2", "-LY-"); + final Sequence aseq2 = new Sequence("Seq2", "-LY-Q"); aseq2.createDatasetSequence(); /* @@ -103,6 +105,7 @@ public class AlignedCodonFrameTest /* * Set up the mappings for the exons (upper-case bases) + * Note residue Q is unmapped */ MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] { 1, 2 }, 3, 1); @@ -111,14 +114,19 @@ public class AlignedCodonFrameTest 3, 1); acf.addMap(seq2.getDatasetSequence(), aseq2.getDatasetSequence(), map); - assertEquals("[2, 4]", - Arrays.toString(acf.getMappedRegion(seq1, aseq1, 1))); - assertEquals("[6, 6, 8, 9]", - Arrays.toString(acf.getMappedRegion(seq1, aseq1, 2))); - assertEquals("[1, 2, 4, 4]", - Arrays.toString(acf.getMappedRegion(seq2, aseq2, 1))); - assertEquals("[5, 5, 7, 8]", - Arrays.toString(acf.getMappedRegion(seq2, aseq2, 2))); + assertArrayEquals(new int[] { 2, 4 }, + acf.getMappedRegion(seq1, aseq1, 1)); + assertArrayEquals(new int[] { 6, 6, 8, 9 }, + acf.getMappedRegion(seq1, aseq1, 2)); + assertArrayEquals(new int[] { 1, 2, 4, 4 }, + acf.getMappedRegion(seq2, aseq2, 1)); + assertArrayEquals(new int[] { 5, 5, 7, 8 }, + acf.getMappedRegion(seq2, aseq2, 2)); + + /* + * No mapping from seq2 to Q + */ + assertNull(acf.getMappedRegion(seq2, aseq2, 3)); /* * No mapping from sequence 1 to sequence 2 @@ -127,11 +135,11 @@ public class AlignedCodonFrameTest } @Test(groups = { "Functional" }) - public void testGetMappedCodon() + public void testGetMappedCodons() { final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T"); seq1.createDatasetSequence(); - final Sequence aseq1 = new Sequence("Seq1", "-P-R"); + final Sequence aseq1 = new Sequence("Seq1", "-V-L"); aseq1.createDatasetSequence(); /* @@ -139,7 +147,7 @@ public class AlignedCodonFrameTest */ AlignedCodonFrame acf = new AlignedCodonFrame(); - assertNull(acf.getMappedCodon(seq1.getDatasetSequence(), 0)); + assertNull(acf.getMappedCodons(seq1.getDatasetSequence(), 0)); /* * Set up the mappings for the exons (upper-case bases) @@ -148,22 +156,64 @@ public class AlignedCodonFrameTest 1, 2 }, 3, 1); acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); - assertEquals("[G, T, A]", Arrays.toString(acf.getMappedCodon( - aseq1.getDatasetSequence(), 1))); - assertEquals("[C, T, T]", Arrays.toString(acf.getMappedCodon( - aseq1.getDatasetSequence(), 2))); + assertEquals(1, acf.getMappedCodons(aseq1.getDatasetSequence(), 1) + .size()); + assertEquals( + "[G, T, A]", + Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(), + 1).get(0))); + assertEquals( + "[C, T, T]", + Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(), + 2).get(0))); + } + + /** + * Test for the case where there is more than one variant of the DNA mapping + * to a protein sequence + */ + @Test(groups = { "Functional" }) + public void testGetMappedCodons_dnaVariants() + { + final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T"); + seq1.createDatasetSequence(); + final Sequence seq2 = new Sequence("Seq2", "c-G-TT-gT-gT-A"); + seq2.createDatasetSequence(); + final Sequence aseq1 = new Sequence("Seq1", "-V-L"); + aseq1.createDatasetSequence(); + + AlignedCodonFrame acf = new AlignedCodonFrame(); + + /* + * Set up the mappings for the exons (upper-case bases) + */ + MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] { + 1, 2 }, 3, 1); + acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); + acf.addMap(seq2.getDatasetSequence(), aseq1.getDatasetSequence(), map); + + assertEquals(2, acf.getMappedCodons(aseq1.getDatasetSequence(), 1) + .size()); + List codonsForV = acf.getMappedCodons( + aseq1.getDatasetSequence(), 1); + assertEquals("[G, T, A]", Arrays.toString(codonsForV.get(0))); + assertEquals("[G, T, T]", Arrays.toString(codonsForV.get(1))); + List codonsForL = acf.getMappedCodons( + aseq1.getDatasetSequence(), 2); + assertEquals("[C, T, T]", Arrays.toString(codonsForL.get(0))); + assertEquals("[T, T, A]", Arrays.toString(codonsForL.get(1))); } /** * Test for the case where sequences have start > 1 */ @Test(groups = { "Functional" }) - public void testGetMappedCodon_forSubSequences() + public void testGetMappedCodons_forSubSequences() { final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T", 27, 35); seq1.createDatasetSequence(); - final Sequence aseq1 = new Sequence("Seq1", "-P-R", 12, 13); + final Sequence aseq1 = new Sequence("Seq1", "-V-L", 12, 13); aseq1.createDatasetSequence(); /* @@ -174,44 +224,66 @@ public class AlignedCodonFrameTest new int[] { 12, 13 }, 3, 1); acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); - assertEquals("[G, T, A]", Arrays.toString(acf.getMappedCodon( - aseq1.getDatasetSequence(), 12))); - assertEquals("[C, T, T]", Arrays.toString(acf.getMappedCodon( - aseq1.getDatasetSequence(), 13))); + assertEquals( + "[G, T, A]", + Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(), + 12).get(0))); + assertEquals( + "[C, T, T]", + Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(), + 13).get(0))); } @Test(groups = { "Functional" }) public void testCouldReplaceSequence() { SequenceI seq1 = new Sequence("Seq1/10-21", "aaacccgggttt"); - SequenceI seq2 = new Sequence("Seq2", "PG"); SequenceI seq1proxy = new SequenceDummy("Seq1"); // map to region within sequence is ok - assertTrue(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 12, + assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12, 17)); // map to region overlapping sequence is ok - assertTrue(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 5, + assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 5, 10)); - assertTrue(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 21, + assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 21, 26)); // map to region before sequence is not ok - assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 4, + assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 4, 9)); // map to region after sequence is not ok - assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 22, + assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 22, 27)); /* * test should fail if name doesn't match */ seq1proxy.setName("Seq1a"); - assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 12, + assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12, 17)); seq1proxy.setName("Seq1"); seq1.setName("Seq1a"); - assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 12, + assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12, 17)); + + /* + * a dummy sequence can't replace a real one + */ + assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1, seq1proxy, 12, + 17)); + + /* + * a dummy sequence can't replace a dummy sequence + */ + SequenceI seq1proxy2 = new SequenceDummy("Seq1"); + assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, + seq1proxy2, 12, 17)); + + /* + * a real sequence can't replace a real one + */ + SequenceI seq1a = new Sequence("Seq1/10-21", "aaacccgggttt"); + assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1, seq1a, 12, 17)); } /** @@ -278,12 +350,14 @@ public class AlignedCodonFrameTest { SequenceI seq1 = new Sequence("Seq1", "tttCAACCCGGGtttaaa"); SequenceI seq2 = new Sequence("Seq2", "QPG"); + SequenceI seq2a = new Sequence("Seq2a", "QPG"); SequenceI seq1proxy = new SequenceDummy("Seq1"); seq1.createDatasetSequence(); seq2.createDatasetSequence(); + seq2a.createDatasetSequence(); /* - * Make two mappings from Seq2 peptide to dummy sequence Seq1 + * Make mappings from Seq2 and Seq2a peptides to dummy sequence Seq1 */ AlignedCodonFrame acf = new AlignedCodonFrame(); @@ -291,12 +365,17 @@ public class AlignedCodonFrameTest MapList mapping1 = new MapList(new int[] { 7, 12 }, new int[] { 2, 3 }, 3, 1); acf.addMap(seq1proxy, seq2, mapping1); + acf.addMap(seq1proxy, seq2a, mapping1); // map QP to codons 4-9 (CAACCC) MapList mapping2 = new MapList(new int[] { 4, 9 }, new int[] { 1, 2 }, 3, 1); acf.addMap(seq1proxy, seq2, mapping2); + acf.addMap(seq1proxy, seq2a, mapping2); + /* + * acf now has two mappings one from Seq1 to Seq2, one from Seq1 to Seq2a + */ assertEquals(2, acf.getdnaSeqs().length); assertSame(seq1proxy, acf.getdnaSeqs()[0]); assertSame(seq1proxy, acf.getdnaSeqs()[1]); @@ -308,4 +387,65 @@ public class AlignedCodonFrameTest assertSame(seq1.getDatasetSequence(), acf.getdnaSeqs()[0]); assertSame(seq1.getDatasetSequence(), acf.getdnaSeqs()[1]); } + + /** + * Test the method that locates the mapped codon for a protein position. + */ + @Test(groups = { "Functional" }) + public void testGetMappedRegion_eitherWay() + { + final Sequence seq1 = new Sequence("Seq1", "AAACCCGGGTTT"); + seq1.createDatasetSequence(); + final Sequence seq2 = new Sequence("Seq2", "KPGF"); + seq2.createDatasetSequence(); + final Sequence seq3 = new Sequence("Seq3", "QYKPGFSW"); + seq3.createDatasetSequence(); + + /* + * map Seq1 to all of Seq2 and part of Seq3 + */ + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1); + acf.addMap(seq1.getDatasetSequence(), seq2.getDatasetSequence(), map); + map = new MapList(new int[] { 1, 12 }, new int[] { 3, 6 }, 3, 1); + acf.addMap(seq1.getDatasetSequence(), seq3.getDatasetSequence(), map); + + /* + * map part of Seq3 to Seq2 + */ + map = new MapList(new int[] { 3, 6 }, new int[] { 1, 4 }, 1, 1); + acf.addMap(seq3.getDatasetSequence(), seq2.getDatasetSequence(), map); + + /* + * original case - locate mapped codon for protein position + */ + assertArrayEquals(new int[] { 4, 6 }, + acf.getMappedRegion(seq1, seq2, 2)); + assertArrayEquals(new int[] { 7, 9 }, + acf.getMappedRegion(seq1, seq3, 5)); + assertNull(acf.getMappedRegion(seq1, seq3, 1)); + + /* + * locate mapped protein for protein position + */ + assertArrayEquals(new int[] { 4, 4 }, + acf.getMappedRegion(seq3, seq2, 2)); + + /* + * reverse location protein-to-protein + */ + assertArrayEquals(new int[] { 2, 2 }, + acf.getMappedRegion(seq2, seq3, 4)); + + /* + * reverse location protein-from-nucleotide + * any of codon [4, 5, 6] positions map to seq2/2 + */ + assertArrayEquals(new int[] { 2, 2 }, + acf.getMappedRegion(seq2, seq1, 4)); + assertArrayEquals(new int[] { 2, 2 }, + acf.getMappedRegion(seq2, seq1, 5)); + assertArrayEquals(new int[] { 2, 2 }, + acf.getMappedRegion(seq2, seq1, 6)); + } } diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index e0a2d30..54e8311 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -23,6 +23,7 @@ package jalview.util; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNull; +import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; import java.util.ArrayList; @@ -450,6 +451,17 @@ public class MapListTest assertEquals("{[2, 3], [5, 7], [9, 10], [12, 12], [14, 14], [16, 18]}", prettyPrint(ml2.getFromRanges())); assertEquals("{[1, 1], [3, 4], [6, 6]}", prettyPrint(ml2.getToRanges())); + + /* + * reverse direction + */ + codons = new int[] { 9, 6 }; + protein = new int[] { 100, 91, 80, 79 }; + ml = new MapList(codons, protein, 3, 1); + assertEquals(6, ml.getFromLowest()); + assertEquals(9, ml.getFromHighest()); + assertEquals(79, ml.getToLowest()); + assertEquals(100, ml.getToHighest()); } /** @@ -525,4 +537,113 @@ public class MapListTest assertEquals("From (1:3) [ [1, 5] [10, 15] [25, 20] ] To [ [51, 1] ]", s); } + + @Test(groups = { "Functional" }) + public void testAddMapList() + { + MapList ml = new MapList(new int[] { 11, 15, 20, 25, 35, 30 }, + new int[] { 72, 22 }, 1, 3); + assertEquals(11, ml.getFromLowest()); + assertEquals(35, ml.getFromHighest()); + assertEquals(22, ml.getToLowest()); + assertEquals(72, ml.getToHighest()); + + MapList ml2 = new MapList(new int[] { 2, 4, 37, 40 }, new int[] { 12, + 17, 78, 83, 88, 96 }, 1, 3); + ml.addMapList(ml2); + assertEquals(2, ml.getFromLowest()); + assertEquals(40, ml.getFromHighest()); + assertEquals(12, ml.getToLowest()); + assertEquals(96, ml.getToHighest()); + + String s = ml.toString(); + assertEquals( + "From (1:3) [ [11, 15] [20, 25] [35, 30] [2, 4] [37, 40] ] To [ [72, 22] [12, 17] [78, 83] [88, 96] ]", + s); + } + + @Test(groups = { "Functional" }) + public void testAddMapList_contiguous() + { + MapList ml = new MapList(new int[] { 11, 15 }, new int[] { 72, 58 }, 1, + 3); + + MapList ml2 = new MapList(new int[] { 15, 16 }, new int[] { 58, 53 }, + 1, 3); + ml.addMapList(ml2); + assertEquals("From (1:3) [ [11, 16] ] To [ [72, 53] ]", ml.toString()); + } + + @Test(groups = "Functional") + public void testAddRange() + { + int[] range = { 1, 5 }; + List ranges = new ArrayList(); + + // add to empty list: + MapList.addRange(range, ranges); + assertEquals(1, ranges.size()); + assertSame(range, ranges.get(0)); + + // extend contiguous (same position): + MapList.addRange(new int[] { 5, 10 }, ranges); + assertEquals(1, ranges.size()); + assertEquals(1, ranges.get(0)[0]); + assertEquals(10, ranges.get(0)[1]); + + // extend contiguous (next position): + MapList.addRange(new int[] { 11, 15 }, ranges); + assertEquals(1, ranges.size()); + assertEquals(1, ranges.get(0)[0]); + assertEquals(15, ranges.get(0)[1]); + + // change direction: range is not merged: + MapList.addRange(new int[] { 16, 10 }, ranges); + assertEquals(2, ranges.size()); + assertEquals(16, ranges.get(1)[0]); + assertEquals(10, ranges.get(1)[1]); + + // extend reverse contiguous (same position): + MapList.addRange(new int[] { 10, 8 }, ranges); + assertEquals(2, ranges.size()); + assertEquals(16, ranges.get(1)[0]); + assertEquals(8, ranges.get(1)[1]); + + // extend reverse contiguous (next position): + MapList.addRange(new int[] { 7, 6 }, ranges); + assertEquals(2, ranges.size()); + assertEquals(16, ranges.get(1)[0]); + assertEquals(6, ranges.get(1)[1]); + + // change direction: range is not merged: + MapList.addRange(new int[] { 6, 9 }, ranges); + assertEquals(3, ranges.size()); + assertEquals(6, ranges.get(2)[0]); + assertEquals(9, ranges.get(2)[1]); + + // not contiguous: not merged + MapList.addRange(new int[] { 11, 12 }, ranges); + assertEquals(4, ranges.size()); + assertEquals(11, ranges.get(3)[0]); + assertEquals(12, ranges.get(3)[1]); + } + + /** + * Check state after construction + */ + @Test(groups = { "Functional" }) + public void testConstructor_withLists() + { + /* + * reverse direction + */ + int[][] codons = new int[][] { { 9, 6 } }; + int[][] protein = new int[][] { { 100, 91 }, { 80, 79 } }; + MapList ml = new MapList(Arrays.asList(codons), Arrays.asList(protein), + 3, 1); + assertEquals(6, ml.getFromLowest()); + assertEquals(9, ml.getFromHighest()); + assertEquals(79, ml.getToLowest()); + assertEquals(100, ml.getToHighest()); + } } -- 1.7.10.2