From 61f1a8b75ea5ce352d6214c34fbdcd58bafbbb73 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 22 Jan 2015 15:50:54 +0000 Subject: [PATCH] JAL-1619 refactoring / tests to support 'align linked dna as protein' --- resources/lang/Messages.properties | 9 +- src/jalview/analysis/AlignmentUtils.java | 189 +++++++++++++ src/jalview/datamodel/AlignedCodonFrame.java | 163 +++++++----- src/jalview/datamodel/Alignment.java | 143 +++------- src/jalview/datamodel/AlignmentI.java | 8 + src/jalview/datamodel/Mapping.java | 36 ++- src/jalview/datamodel/SearchResults.java | 61 ++--- src/jalview/gui/AlignFrame.java | 59 +++- src/jalview/gui/AlignViewport.java | 179 +++++++++++++ src/jalview/gui/CutAndPasteTransfer.java | 43 +-- src/jalview/gui/Desktop.java | 2 - src/jalview/gui/SequenceFetcher.java | 62 +++-- src/jalview/io/FileLoader.java | 8 +- src/jalview/jbgui/GAlignFrame.java | 6 - src/jalview/util/MapList.java | 296 ++++++++++----------- src/jalview/ws/AWSThread.java | 2 +- test/jalview/analysis/AlignmentUtilsTests.java | 158 +++++++++++ test/jalview/datamodel/AlignedCodonFrameTest.java | 100 ++++++- test/jalview/datamodel/AlignmentTest.java | 96 ++++++- test/jalview/util/MapListTest.java | 266 +++++++++++++++++- 20 files changed, 1412 insertions(+), 474 deletions(-) diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index 46b424c..8cf8797 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -705,6 +705,7 @@ label.get_cross_refs = Get Cross References label.sort_alignment_new_tree = Sort Alignment With New Tree label.add_sequences = Add Sequences label.new_window = New Window +label.split_window = Split Window label.refresh_available_sources = Refresh Available Sources label.use_registry = Use Registry label.add_local_source = Add Local Source @@ -1188,5 +1189,9 @@ label.show_logo = Show Logo label.normalise_logo = Normalise Logo label.no_colour_selection_in_scheme = Please, make a colour selection before to apply colour scheme label.no_colour_selection_warn = Error saving colour scheme -label.nonstandard_translation = Non-standard translation -warn.nonstandard_translation = Non-standard translation(s) detected at {0}.
Do you wish to proceed? +label.open_linked_alignment? = Would you like to open as a separate alignment, with cDNA and protein linked? +label.open_linked_alignment = Open linked alignment +label.no_mappings = No mappings found +label.mapping_failed = No sequence mapping could be made between the alignments.
A mapping requires sequence names to match, and equivalent sequence lengths. +action.no = No +label.for = for diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index b38ed30..2dbe015 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -359,4 +359,193 @@ public class AlignmentUtils return null; } } + + /** + * Align sequence 'seq' to match the alignment of a mapped sequence. Note this + * currently assumes that we are aligning cDNA to match protein. + * + * @param seq + * the sequence to be realigned + * @param al + * the alignment whose sequence alignment is to be 'copied' + * @param gap + * character string represent a gap in the realigned sequence + * @param preserveUnmappedGaps + * @param preserveMappedGaps + * @return true if the sequence was realigned, false if it could not be + */ + public static boolean alignSequenceAs(SequenceI seq, AlignmentI al, + String gap, boolean preserveMappedGaps, + boolean preserveUnmappedGaps) + { + /* + * Get any mappings from the source alignment to the target (dataset) sequence. + */ + // TODO there may be one AlignedCodonFrame per dataset sequence, or one with + // all mappings. Would it help to constrain this? + AlignedCodonFrame[] mappings = al.getCodonFrame(seq); + if (mappings == null) + { + return false; + } + + /* + * Locate the aligned source sequence whose dataset sequence is mapped. We + * just take the first match here (as we can't align cDNA like more than one + * protein sequence). + */ + SequenceI alignFrom = null; + AlignedCodonFrame mapping = null; + for (AlignedCodonFrame mp : mappings) + { + alignFrom = mp.findAlignedSequence(seq.getDatasetSequence(), al); + if (alignFrom != null) + { + mapping = mp; + break; + } + } + + if (alignFrom == null) + { + return false; + } + alignSequenceAs(seq, alignFrom, mapping, gap, al.getGapCharacter(), + preserveMappedGaps, preserveUnmappedGaps); + return true; + } + + /** + * Align sequence 'alignTo' the same way as 'alignFrom', using the mapping to + * match residues and codons. + * + * @param alignTo + * @param alignFrom + * @param mapping + * @param myGap + * @param sourceGap + * @param preserveUnmappedGaps + * @param preserveMappedGaps + */ + public static void alignSequenceAs(SequenceI alignTo, + SequenceI alignFrom, + AlignedCodonFrame mapping, String myGap, char sourceGap, + boolean preserveMappedGaps, boolean preserveUnmappedGaps) + { + // TODO generalise to work for Protein-Protein, dna-dna, dna-protein + final char[] thisSeq = alignTo.getSequence(); + final char[] thatAligned = alignFrom.getSequence(); + StringBuilder thisAligned = new StringBuilder(2 * thisSeq.length); + + // aligned and dataset sequence positions, all base zero + int thisSeqPos = 0; + int sourceDsPos = 0; + + int basesWritten = 0; + char myGapChar = myGap.charAt(0); + int ratio = myGap.length(); + + /* + * Traverse the aligned protein sequence. + */ + int sourceGapLength = 0; + for (char sourceChar : thatAligned) + { + if (sourceChar == sourceGap) + { + sourceGapLength++; + continue; + } + + /* + * Found a residue. Locate its mapped codon (start) position. + */ + sourceDsPos++; + // Note mapping positions are base 1, our sequence positions base 0 + int[] mappedPos = mapping.getMappedRegion(alignTo, alignFrom, + sourceDsPos); + if (mappedPos == null) + { + /* + * Abort realignment if unmapped protein. Or could ignore it?? + */ + System.err.println("Can't align: no codon mapping to residue " + + sourceDsPos + "(" + sourceChar + ")"); + return; + } + + int mappedCodonStart = mappedPos[0]; // position (1...) of codon start + int mappedCodonEnd = mappedPos[mappedPos.length - 1]; // codon end pos + int trailingCopiedGapLength = 0; + + /* + * Copy dna sequence up to and including this codon. Optionally, include + * gaps before the codon starts (in introns) and/or after the codon starts + * (in exons). + * + * Note this only works for 'linear' splicing, not reverse or interleaved. + * But then 'align dna as protein' doesn't make much sense otherwise. + */ + boolean inCodon = false; + while (basesWritten < mappedCodonEnd && thisSeqPos < thisSeq.length) + { + final char c = thisSeq[thisSeqPos++]; + if (c != myGapChar) + { + basesWritten++; + + /* + * Is this the start of the mapped codon? If so, add in any extra gap + * due to the protein alignment. + */ + if (basesWritten == mappedCodonStart) + { + inCodon = true; + int gapsToAdd = Math.max(0, ratio * sourceGapLength + - trailingCopiedGapLength); + for (int i = 0; i < gapsToAdd; i++) + { + thisAligned.append(myGapChar); + } + sourceGapLength = 0; + } + thisAligned.append(c); + trailingCopiedGapLength = 0; + } + else if ((!inCodon && preserveUnmappedGaps) + || (inCodon && preserveMappedGaps)) + { + thisAligned.append(c); + trailingCopiedGapLength++; + } + } + + /* + * Expand (if necessary) the trailing gap to the size of the aligned gap. + */ + int gapsToAdd = (ratio * sourceGapLength - trailingCopiedGapLength); + for (int i = 0; i < gapsToAdd; i++) + { + thisAligned.append(myGapChar); + } + } + + /* + * At end of protein sequence. Copy any remaining dna sequence, optionally + * including (intron) gaps. We do not copy trailing gaps in protein. + */ + while (thisSeqPos < thisSeq.length) + { + final char c = thisSeq[thisSeqPos++]; + if (c != myGapChar || preserveUnmappedGaps) + { + thisAligned.append(c); + } + } + + /* + * All done aligning, set the aligned sequence. + */ + alignTo.setSequence(new String(thisAligned)); + } } diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index 417363a..d3f6ad5 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -22,9 +22,6 @@ package jalview.datamodel; import jalview.util.MapList; -import java.util.ArrayList; -import java.util.List; - /** * Stores mapping between the columns of a protein alignment and a DNA alignment * and a list of individual codon to amino acid mappings between sequences. @@ -33,13 +30,6 @@ public class AlignedCodonFrame { /* - * TODO: not an ideal solution - we reference the aligned amino acid sequences - * in order to make insertions on them Better would be dnaAlignment and - * aaAlignment reference.... - */ - private List a_aaSeqs = new ArrayList(); - - /* * tied array of na Sequence objects. */ private SequenceI[] dnaSeqs = null; @@ -61,45 +51,6 @@ public class AlignedCodonFrame } /** - * Construct a 'near copy' of the given AlignedCodonFrame, that references the - * same dataset sequences, but the given protein aligned sequences. - * - * @param acf - * @param alignment - * @throws IllegalStateException - * if the copied mapping references any dataset not in the alignment - */ - public AlignedCodonFrame(AlignedCodonFrame acf, SequenceI[] alignment) - { - this.dnaSeqs = acf.dnaSeqs; - this.dnaToProt = acf.dnaToProt; - - for (SequenceI seq : acf.a_aaSeqs) - { - boolean found = false; - // TODO may not correctly handle the case where the same sequence appears - // twice in the source alignment i.e. same dataset sequence - // the copy will reference the first aligned sequence for both - // ?not solvable if realignment may reorder the sequences - // or check on sequence name as well???? - for (SequenceI newseq : alignment) - { - if (seq.getDatasetSequence() == newseq.getDatasetSequence()) - { - this.a_aaSeqs.add(newseq); - found = true; - break; - } - } - if (!found) - { - throw new IllegalStateException("Copying codon mapping for" - + seq.getSequenceAsString()); - } - } - } - - /** * add a mapping between the dataset sequences for the associated dna and * protein sequence objects * @@ -132,7 +83,6 @@ public class AlignedCodonFrame // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse())); mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq .getDatasetSequence(); - a_aaSeqs.add(aaseq); dnaToProt[nlen] = mp; } @@ -175,25 +125,13 @@ public class AlignedCodonFrame } /** - * - * @param sequenceRef - * @return null or corresponding aaSeq dataset sequence for dnaSeq entry - */ - public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef) - { - return getAaForDnaSeq(dnaSeqRef, true); - } - - /** * Return the corresponding aligned or dataset aa sequence for given dna * sequence, null if not found. * * @param sequenceRef - * @param returnDataset - * if true, return the aa dataset, else the aligned sequence * @return */ - public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef, boolean returnDataset) + public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef) { if (dnaSeqs == null) { @@ -204,16 +142,7 @@ public class AlignedCodonFrame { if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads) { - if (returnDataset) - { - return dnaToProt[ds].to; - } - else - { - // TODO very fragile - depends on dnaSeqs, dnaToProt, a_aaSeqs moving - // in parallel; revise data model to guarantee this - return a_aaSeqs.get(ds); - } + return dnaToProt[ds].to; } } return null; @@ -335,4 +264,92 @@ public class AlignedCodonFrame } return ml == null ? null : ml.locateInFrom(aaPos, aaPos); } + + /** + * Convenience method to return the first aligned sequence in the given + * alignment whose dataset has a mapping with the given dataset sequence. + * + * @param seq + * + * @param al + * @return + */ + public SequenceI findAlignedSequence(SequenceI seq, AlignmentI al) + { + /* + * Search mapped protein ('to') sequences first. + */ + if (this.dnaToProt != null) + { + for (int i = 0; i < dnaToProt.length; i++) + { + if (this.dnaSeqs[i] == seq) + { + for (SequenceI sourceAligned : al.getSequences()) + { + if (this.dnaToProt[i].to == sourceAligned.getDatasetSequence()) + { + return sourceAligned; + } + } + } + } + } + + /* + * Then try mapped dna sequences. + */ + if (this.dnaToProt != null) + { + for (int i = 0; i < dnaToProt.length; i++) + { + if (this.dnaToProt[i].to == seq) + { + for (SequenceI sourceAligned : al.getSequences()) + { + if (this.dnaSeqs[i] == sourceAligned.getDatasetSequence()) + { + return sourceAligned; + } + } + } + } + } + + return null; + } + + /** + * Returns the region in the 'mappedFrom' sequence's dataset that is mapped to + * position 'pos' (base 1) in the 'mappedTo' sequence's dataset. The region is + * a set of start/end position pairs. + * + * @param mappedFrom + * @param mappedTo + * @param pos + * @return + */ + public int[] getMappedRegion(SequenceI mappedFrom, SequenceI mappedTo, + int pos) + { + SequenceI targetDs = mappedFrom.getDatasetSequence() == null ? mappedFrom + : mappedFrom.getDatasetSequence(); + SequenceI sourceDs = mappedTo.getDatasetSequence() == null ? mappedTo + : mappedTo.getDatasetSequence(); + if (targetDs == null || sourceDs == null || dnaToProt == null) + { + return null; + } + for (int mi = 0; mi < dnaToProt.length; mi++) + { + if (dnaSeqs[mi] == targetDs && dnaToProt[mi].to == sourceDs) + { + int[] codon = dnaToProt[mi].map.locateInFrom(pos, pos); + if (codon != null) { + return codon; + } + } + } + return null; + } } diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 5d11a20..cea5956 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -21,13 +21,16 @@ package jalview.datamodel; import jalview.analysis.AlignmentUtils; +import jalview.io.FastaFile; import jalview.util.MessageManager; import java.util.ArrayList; import java.util.Enumeration; +import java.util.HashSet; import java.util.Hashtable; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Vector; /** @@ -1607,6 +1610,12 @@ public class Alignment implements AlignmentI return dataset; } + @Override + public int alignAs(AlignmentI al) + { + return alignAs(al, true, true); + } + /** * Align this alignment 'the same as' the given one. Mapped sequences only are * realigned. If both of the same type (nucleotide/protein) then align both @@ -1618,9 +1627,11 @@ public class Alignment implements AlignmentI * * @param al */ - @Override - public int alignAs(AlignmentI al) +// @Override + public int alignAs(AlignmentI al, boolean preserveMappedGaps, + boolean preserveUnmappedGaps) { + // TODO should this method signature be the one in the interface? int count = 0; boolean thisIsNucleotide = this.isNucleotide(); boolean thatIsProtein = !al.isNucleotide(); @@ -1631,133 +1642,45 @@ public class Alignment implements AlignmentI return 0; // todo: build it - a variant of Dna.CdnaTranslate() } + char thisGapChar = this.getGapCharacter(); - char thatGapChar = al.getGapCharacter(); String gap = thisIsNucleotide && thatIsProtein ? String .valueOf(new char[] { thisGapChar, thisGapChar, thisGapChar }) : String .valueOf(thisGapChar); - int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1; /* * Get mappings from 'that' alignment's sequences to this. */ for (SequenceI alignTo : getSequences()) { - AlignedCodonFrame[] mappings = al.getCodonFrame(alignTo); - if (mappings != null) - { - for (AlignedCodonFrame mapping : mappings) - { - count += alignSequenceAs(alignTo, mapping, thatGapChar, gap, - ratio) ? 1 : 0; - } - } + count += AlignmentUtils.alignSequenceAs(alignTo, al, gap, preserveMappedGaps, + preserveUnmappedGaps) ? 1 : 0; } return count; } /** - * Align sequence 'seq' the same way as 'other'. Note this currently assumes - * that we are aligned cDNA to match protein. - * - * @param seq - * the sequence to be realigned - * @param mapping - * holds mapping from the sequence whose alignment is to be 'copied' - * @param thatGapChar - * gap character used in the 'other' sequence - * @param gap - * character string represent a gap in the realigned sequence - * @param ratio - * the number of positions in the realigned sequence corresponding to - * one in the 'other' - * @return true if the sequence was realigned, false if it could not be + * Returns the alignment in Fasta format. Behaviour of this method is not + * guaranteed between versions. */ - protected boolean alignSequenceAs(SequenceI seq, - AlignedCodonFrame mapping, - char thatGapChar, - String gap, int ratio) - { - char myGapChar = gap.charAt(0); - // TODO rework this to use the mapping to match 'this' to 'that' residue - // position, to handle introns and exons correctly. - // TODO generalise to work for Protein-Protein, dna-dna, dna-protein - SequenceI alignFrom = mapping.getAaForDnaSeq(seq, false); - if (alignFrom == null) - { - return false; - } - final char[] thisSeq = seq.getSequence(); - final char[] thisDs = seq.getDatasetSequence().getSequence(); - final char[] thatAligned = alignFrom.getSequence(); - StringBuilder thisAligned = new StringBuilder(2 * thisDs.length); - - /* - * Find the DNA dataset position that corresponds to the first protein - * residue (e.g. ignoring start codon in cDNA). - */ - int[] dnaStart = mapping.getDnaPosition(seq.getDatasetSequence(), 1); - int thisDsPosition = dnaStart == null ? 0 : dnaStart[0] - 1; - int thisSeqPos = 0; - - /* - * Copy aligned cDNA up to (excluding) the first mapped base. - */ - int basesWritten = 0; - while (basesWritten < thisDsPosition && thisSeqPos < thisSeq.length) - { - char c = thisSeq[thisSeqPos++]; - thisAligned.append(c); - if (c != myGapChar) - { - basesWritten++; - } - } - - /* - * Now traverse the aligned protein mirroring its gaps in cDNA. - */ - for (char thatChar : thatAligned) - { - if (thatChar == thatGapChar) - { - /* - * Add (equivalent of) a gap - */ - thisAligned.append(gap); - } - else - { - /* - * Add (equivalent of) a residue - */ - for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++) - { - thisAligned.append(thisDs[thisDsPosition++]); - - /* - * Also advance over any gaps and the next residue in the old aligned - * sequence - */ - while (thisSeq[thisSeqPos] == myGapChar - && thisSeqPos < thisSeq.length) - { - thisSeqPos++; - } - thisSeqPos++; - } - } - } + @Override + public String toString() + { + return new FastaFile().print(getSequencesArray()); + } - /* - * Finally copy any 'extra' aligned cDNA (e.g. stop codon, introns). - */ - while (thisSeqPos < thisSeq.length) + /** + * Returns the set of distinct sequence names. No ordering is guaranteed. + */ + @Override + public Set getSequenceNames() + { + Set names = new HashSet(); + for (SequenceI seq : getSequences()) { - thisAligned.append(thisSeq[thisSeqPos++]); + names.add(seq.getName()); } - seq.setSequence(new String(thisAligned)); - return true; + return names; } } diff --git a/src/jalview/datamodel/AlignmentI.java b/src/jalview/datamodel/AlignmentI.java index bd9ba9e..c526f2a 100755 --- a/src/jalview/datamodel/AlignmentI.java +++ b/src/jalview/datamodel/AlignmentI.java @@ -23,6 +23,7 @@ package jalview.datamodel; import java.util.Hashtable; import java.util.List; import java.util.Map; +import java.util.Set; /** * Data structure to hold and manipulate a multiple sequence alignment @@ -503,4 +504,11 @@ public interface AlignmentI extends AnnotatedCollectionI * @return */ public int alignAs(AlignmentI al); + + /** + * Returns the set of distinct sequence names in the alignment. + * + * @return + */ + public Set getSequenceNames(); } diff --git a/src/jalview/datamodel/Mapping.java b/src/jalview/datamodel/Mapping.java index cb87719..f2c16d0 100644 --- a/src/jalview/datamodel/Mapping.java +++ b/src/jalview/datamodel/Mapping.java @@ -20,21 +20,21 @@ */ package jalview.datamodel; -import java.util.Vector; - import jalview.util.MapList; +import java.util.Vector; + public class Mapping { /** * Contains the start-end pairs mapping from the associated sequence to the - * sequence in the database coordinate system it also takes care of step - * difference between coordinate systems + * sequence in the database coordinate system. It also takes care of step + * difference between coordinate systems. */ MapList map = null; /** - * The seuqence that map maps the associated seuqence to (if any). + * The sequence that map maps the associated sequence to (if any). */ SequenceI to = null; @@ -111,19 +111,31 @@ public class Mapping * @param other * @return */ - public boolean equals(Mapping other) + @Override + public boolean equals(Object o) { - if (other == null) + if (o == null || !(o instanceof Mapping)) + { return false; + } + Mapping other = (Mapping) o; if (other == this) + { return true; + } if (other.to != to) + { return false; + } if ((map != null && other.map == null) || (map == null && other.map != null)) + { return false; + } if (map.equals(other.map)) + { return true; + } return false; } @@ -251,7 +263,9 @@ public class Mapping vf[v].setBegin(frange[i]); vf[v].setEnd(frange[i + 1]); if (frange.length > 2) + { vf[v].setDescription(f.getDescription() + "\nPart " + (v + 1)); + } } return vf; } @@ -300,14 +314,18 @@ public class Mapping from = (map.getToLowest() < from) ? from : map.getToLowest(); to = (map.getToHighest() > to) ? to : map.getToHighest(); if (from > to) + { return null; + } } else { from = (map.getToHighest() > from) ? from : map.getToHighest(); to = (map.getToLowest() < to) ? to : map.getToLowest(); if (from < to) + { return null; + } } return map.locateInFrom(from, to); } @@ -333,14 +351,18 @@ public class Mapping from = (map.getFromLowest() < from) ? from : map.getFromLowest(); to = (map.getFromHighest() > to) ? to : map.getFromHighest(); if (from > to) + { return null; + } } else { from = (map.getFromHighest() > from) ? from : map.getFromHighest(); to = (map.getFromLowest() < to) ? to : map.getFromLowest(); if (from < to) + { return null; + } } return map.locateInTo(from, to); } diff --git a/src/jalview/datamodel/SearchResults.java b/src/jalview/datamodel/SearchResults.java index 6b7a3eb..d36c872 100755 --- a/src/jalview/datamodel/SearchResults.java +++ b/src/jalview/datamodel/SearchResults.java @@ -20,10 +20,13 @@ */ package jalview.datamodel; +import java.util.ArrayList; +import java.util.List; + public class SearchResults { - Match[] matches; + private List matches = new ArrayList(); /** * This method replaces the old search results which merely held an alignment @@ -39,25 +42,7 @@ public class SearchResults */ public void addResult(SequenceI seq, int start, int end) { - if (matches == null) - { - matches = new Match[] - { new Match(seq, start, end) }; - return; - } - - int mSize = matches.length; - - Match[] tmp = new Match[mSize + 1]; - int m; - for (m = 0; m < mSize; m++) - { - tmp[m] = matches[m]; - } - - tmp[m] = new Match(seq, start, end); - - matches = tmp; + matches.add(new Match(seq, start, end)); } /** @@ -69,15 +54,11 @@ public class SearchResults */ public boolean involvesSequence(SequenceI sequence) { - if (matches == null || matches.length == 0) - { - return false; - } SequenceI ds = sequence.getDatasetSequence(); - for (int m = 0; m < matches.length; m++) + for (Match m : matches) { - if (matches[m].sequence != null - && (matches[m].sequence == sequence || matches[m].sequence == ds)) + if (m.sequence != null + && (m.sequence == sequence || m.sequence == ds)) { return true; } @@ -92,7 +73,7 @@ public class SearchResults */ public int[] getResults(SequenceI sequence, int start, int end) { - if (matches == null) + if (matches.isEmpty()) { return null; } @@ -101,22 +82,22 @@ public class SearchResults int[] tmp = null; int resultLength, matchStart = 0, matchEnd = 0; boolean mfound; - for (int m = 0; m < matches.length; m++) + for (Match m : matches) { mfound = false; - if (matches[m].sequence == sequence) + if (m.sequence == sequence) { mfound = true; // locate aligned position - matchStart = sequence.findIndex(matches[m].start) - 1; - matchEnd = sequence.findIndex(matches[m].end) - 1; + matchStart = sequence.findIndex(m.start) - 1; + matchEnd = sequence.findIndex(m.end) - 1; } - else if (matches[m].sequence == sequence.getDatasetSequence()) + else if (m.sequence == sequence.getDatasetSequence()) { mfound = true; // locate region in local context - matchStart = sequence.findIndex(matches[m].start) - 1; - matchEnd = sequence.findIndex(matches[m].end) - 1; + matchStart = sequence.findIndex(m.start) - 1; + matchEnd = sequence.findIndex(m.end) - 1; } if (mfound) { @@ -160,22 +141,22 @@ public class SearchResults public int getSize() { - return matches == null ? 0 : matches.length; + return matches.size(); } public SequenceI getResultSequence(int index) { - return matches[index].sequence; + return matches.get(index).sequence; } public int getResultStart(int index) { - return matches[index].start; + return matches.get(index).start; } public int getResultEnd(int index) { - return matches[index].end; + return matches.get(index).end; } class Match @@ -201,6 +182,6 @@ public class SearchResults */ public boolean isEmpty() { - return (matches == null) || (matches.length == 0); + return matches.isEmpty(); } } diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 918b156..3f0bb8c 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -745,7 +745,8 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, final StructureSelectionManager ssm = StructureSelectionManager .getStructureSelectionManager(Desktop.instance); ssm.addMappings(thisAlignment.getCodonFrames()); - ssm.addCommandListener(af.getViewport()); + // enable the next line to enable linked editing + // ssm.addCommandListener(af.getViewport()); linkedCount++; } } @@ -4903,7 +4904,8 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, Desktop.addInternalFrame(af, MessageManager.formatMessage( "label.translation_of_params", new String[] { this.getTitle() }), DEFAULT_WIDTH, DEFAULT_HEIGHT); - viewport.getStructureSelectionManager().addCommandListener(viewport); + // enable next line for linked editing + // viewport.getStructureSelectionManager().addCommandListener(viewport); } } @@ -5874,11 +5876,60 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, /** * - * @return alignment panels in this alignemnt frame + * @return alignment panels in this alignment frame */ public List getAlignPanels() { - return alignPanels == null ? Arrays.asList(alignPanel) : alignPanels; + return alignPanels == null ? Arrays.asList(alignPanel) + : alignPanels; + } + + /** + * Open a new alignment window, with the cDNA associated with this (protein) + * alignment, aligned as is the protein. + */ + @Override + protected void viewAsCdna_actionPerformed() + { + final AlignmentI alignment = getViewport().getAlignment(); + AlignedCodonFrame[] mappings = alignment.getCodonFrames(); + if (mappings == null) + { + return; + } + List cdnaSeqs = new ArrayList(); + for (SequenceI aaSeq : alignment.getSequences()) { + for (AlignedCodonFrame acf : mappings) { + SequenceI dnaSeq = acf.getDnaForAaSeq(aaSeq.getDatasetSequence()); + if (dnaSeq != null) + { + /* + * There is a cDNA mapping for this protein sequence - add to new + * alignment. It will share the same dataset sequence as other mapped + * cDNA (no new mappings need to be created). + */ + final Sequence newSeq = new Sequence(dnaSeq); + newSeq.setDatasetSequence(dnaSeq); + cdnaSeqs.add(newSeq); + } + } + } + if (cdnaSeqs.size() == 0) + { + // show a warning dialog no mapped cDNA + return; + } + AlignmentI cdna = new Alignment(cdnaSeqs.toArray(new SequenceI[cdnaSeqs + .size()])); + AlignFrame alignFrame = new AlignFrame(cdna, AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + cdna.alignAs(alignment); + String newtitle = "cDNA " + MessageManager.getString("label.for") + " " + + this.title; + Desktop.addInternalFrame(alignFrame, newtitle, + AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + } } diff --git a/src/jalview/gui/AlignViewport.java b/src/jalview/gui/AlignViewport.java index ff3e329..705f53a 100644 --- a/src/jalview/gui/AlignViewport.java +++ b/src/jalview/gui/AlignViewport.java @@ -38,6 +38,8 @@ */ package jalview.gui; +import jalview.analysis.AlignmentUtils; +import jalview.analysis.AlignmentUtils.MappingResult; import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder; import jalview.analysis.NJTree; import jalview.api.AlignViewportI; @@ -56,6 +58,7 @@ import jalview.structure.CommandListener; import jalview.structure.SelectionSource; import jalview.structure.StructureSelectionManager; import jalview.structure.VamsasSource; +import jalview.util.MessageManager; import jalview.viewmodel.AlignmentViewport; import jalview.ws.params.AutoCalcSetting; @@ -68,8 +71,13 @@ import java.util.ArrayList; import java.util.Deque; import java.util.Hashtable; import java.util.List; +import java.util.Set; import java.util.Vector; +import javax.swing.JInternalFrame; +import javax.swing.JOptionPane; +import javax.swing.JSplitPane; + /** * DOCUMENT ME! * @@ -1405,4 +1413,175 @@ public class AlignViewport extends AlignmentViewport implements { return this.redoList; } + + /** + * Add the sequences from the given alignment to this viewport. Optionally, + * may give the user the option to open a new frame or panel linking cDNA and + * protein. + * + * @param al + * @param title + */ + public void addAlignment(AlignmentI al, String title) + { + // TODO: promote to AlignViewportI? applet CutAndPasteTransfer is different + + // refactored from FileLoader / CutAndPasteTransfer / SequenceFetcher with + // this comment: + // TODO: create undo object for this JAL-1101 + + /* + * If one alignment is protein and one nucleotide, with at least one + * sequence name in common, offer to open a linked alignment. + */ + if (getAlignment().isNucleotide() != al.isNucleotide()) + { + final Set sequenceNames = getAlignment().getSequenceNames(); + sequenceNames.retainAll(al.getSequenceNames()); + if (!sequenceNames.isEmpty()) // at least one sequence name in both + { + if (openLinkedAlignment(al, title)) + { + return; + } + } + } + + for (int i = 0; i < al.getHeight(); i++) + { + getAlignment().addSequence(al.getSequenceAt(i)); + } + // TODO this call was done by SequenceFetcher but not FileLoader or + // CutAndPasteTransfer. Is it needed? + setEndSeq(getAlignment().getHeight()); + firePropertyChange("alignment", null, getAlignment().getSequences()); + } + + /** + * Show a dialog with the option to open and link (cDNA <-> protein) as a new + * alignment. Returns true if the new alignment was opened, false if not - + * either because the user declined the offer, or because no mapping could be + * made. + * + * @param title + */ + protected boolean openLinkedAlignment(AlignmentI al, String title) + { + String[] options = new String[] + { MessageManager.getString("action.no"), + MessageManager.getString("label.split_window"), + MessageManager.getString("label.new_window"), }; + final String question = JvSwingUtils.wrapTooltip(true, + MessageManager.getString("label.open_linked_alignment?")); + int response = JOptionPane.showOptionDialog(Desktop.desktop, question, + MessageManager.getString("label.open_linked_alignment"), + JOptionPane.DEFAULT_OPTION, JOptionPane.PLAIN_MESSAGE, null, + options, options[0]); + // int reply = JOptionPane.showInternalConfirmDialog(Desktop.desktop, + // question, + // MessageManager.getString("label.open_linked_alignment"), + // JOptionPane.YES_NO_OPTION, + // JOptionPane.QUESTION_MESSAGE); + + if (response != 1 && response != 2) + { + return false; + } + final boolean openSplitPane = (response == 1); + final boolean openInNewWindow = (response == 2); + + /* + * Create the AlignFrame first (which creates the new alignment's datasets), + * before attempting sequence mapping. + */ + AlignFrame alignFrame = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + + final AlignmentI protein = al.isNucleotide() ? getAlignment() : al; + final AlignmentI cdna = al.isNucleotide() ? al : getAlignment(); + + alignFrame.statusBar.setText(MessageManager.formatMessage( + "label.successfully_loaded_file", new Object[] + { title })); + + // TODO if we want this (e.g. to enable reload of the alignment from file), + // we will need to add parameters to the stack. + // if (!protocol.equals(AppletFormatAdapter.PASTE)) + // { + // alignFrame.setFileName(file, format); + // } + if (openInNewWindow) + { + /* + * open in new window + */ + Desktop.addInternalFrame(alignFrame, title, AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + } + + /* + * Try to find mappings for at least one sequence. + */ + MappingResult mapped = AlignmentUtils.mapProteinToCdna(protein, cdna); + if (mapped == MappingResult.Mapped) + { + + /* + * Register the mappings (held on the protein alignment) with the + * StructureSelectionManager (for mouseover linking). + */ + final StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); + ssm.addMappings(protein.getCodonFrames()); + + /* + * Set the cDNA to listen for edits on the protein. + */ + ssm.addCommandListener(al.isNucleotide() ? alignFrame.getViewport() + : this); + } + else + { + + /* + * No mapping possible - warn the user, but leave window open. + */ + final String msg = JvSwingUtils.wrapTooltip(true, + MessageManager.getString("label.mapping_failed")); + JOptionPane.showInternalMessageDialog(Desktop.desktop, msg, + MessageManager.getString("label.no_mappings"), + JOptionPane.WARNING_MESSAGE); + } + + try + { + alignFrame.setMaximum(jalview.bin.Cache.getDefault("SHOW_FULLSCREEN", + false)); + } catch (java.beans.PropertyVetoException ex) + { + } + + if (openSplitPane) + { + /* + * Open in split pane. Original sequence above, new one below. + */ + JInternalFrame splitFrame = new JInternalFrame(); + splitFrame.setSize(AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + // TODO not quite right to 'move' AlignPanel from 'this' to the split + // pane + // TODO probably want linked editing set up here + JSplitPane splitPane = new JSplitPane(JSplitPane.VERTICAL_SPLIT, + getAlignPanel(), alignFrame.alignPanel); + splitPane.setDividerLocation(0.5d); + splitFrame.setSize(AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + splitFrame.add(splitPane); + Desktop.addInternalFrame(splitFrame, title, AlignFrame.DEFAULT_WIDTH, + AlignFrame.DEFAULT_HEIGHT); + } + + return true; + } } diff --git a/src/jalview/gui/CutAndPasteTransfer.java b/src/jalview/gui/CutAndPasteTransfer.java index 0c22b14..6705f42 100644 --- a/src/jalview/gui/CutAndPasteTransfer.java +++ b/src/jalview/gui/CutAndPasteTransfer.java @@ -20,16 +20,28 @@ */ package jalview.gui; -import java.awt.*; -import java.awt.datatransfer.*; -import java.awt.event.*; -import javax.swing.*; - -import jalview.datamodel.*; -import jalview.io.*; -import jalview.jbgui.*; +import jalview.datamodel.Alignment; +import jalview.io.FormatAdapter; +import jalview.io.IdentifyFile; +import jalview.io.JalviewFileChooser; +import jalview.io.JalviewFileView; +import jalview.jbgui.GCutAndPasteTransfer; import jalview.util.MessageManager; +import java.awt.Toolkit; +import java.awt.datatransfer.Clipboard; +import java.awt.datatransfer.DataFlavor; +import java.awt.datatransfer.StringSelection; +import java.awt.datatransfer.Transferable; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.MouseEvent; + +import javax.swing.JMenuItem; +import javax.swing.JOptionPane; +import javax.swing.JPopupMenu; +import javax.swing.SwingUtilities; + /** * Cut'n'paste files into the desktop See JAL-1105 * @@ -190,24 +202,19 @@ public class CutAndPasteTransfer extends GCutAndPasteTransfer if (al != null) { + String title = MessageManager.formatMessage( + "label.input_cut_paste_params", new String[] + { format }); if (viewport != null) { - for (int i = 0; i < al.getHeight(); i++) - { - viewport.getAlignment().addSequence(al.getSequenceAt(i)); - } - - viewport.firePropertyChange("alignment", null, viewport - .getAlignment().getSequences()); + viewport.addAlignment(al, title); } else { AlignFrame af = new AlignFrame(al, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); af.currentFileFormat = format; - Desktop.addInternalFrame(af, MessageManager.formatMessage( - "label.input_cut_paste_params", new String[] - { format }), AlignFrame.DEFAULT_WIDTH, + Desktop.addInternalFrame(af, title, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); af.statusBar.setText(MessageManager .getString("label.successfully_pasted_alignment_file")); diff --git a/src/jalview/gui/Desktop.java b/src/jalview/gui/Desktop.java index a1d4492..6eeb0c2 100644 --- a/src/jalview/gui/Desktop.java +++ b/src/jalview/gui/Desktop.java @@ -21,7 +21,6 @@ package jalview.gui; import jalview.bin.Cache; -import jalview.datamodel.AlignmentI; import jalview.io.FileLoader; import jalview.io.FormatAdapter; import jalview.io.IdentifyFile; @@ -69,7 +68,6 @@ import java.lang.reflect.Constructor; import java.net.URL; import java.util.ArrayList; import java.util.Hashtable; -import java.util.List; import java.util.StringTokenizer; import java.util.Vector; import java.util.concurrent.ExecutorService; diff --git a/src/jalview/gui/SequenceFetcher.java b/src/jalview/gui/SequenceFetcher.java index 35bc29a..e159d41 100755 --- a/src/jalview/gui/SequenceFetcher.java +++ b/src/jalview/gui/SequenceFetcher.java @@ -20,24 +20,42 @@ */ package jalview.gui; -import java.util.*; -import java.util.List; - -import java.awt.*; -import java.awt.event.*; - -import javax.swing.*; -import javax.swing.tree.DefaultMutableTreeNode; - -import com.stevesoft.pat.Regex; - -import jalview.datamodel.*; -import jalview.io.*; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.io.FormatAdapter; +import jalview.io.IdentifyFile; import jalview.util.DBRefUtils; import jalview.util.MessageManager; import jalview.ws.dbsources.das.api.DasSourceRegistryI; import jalview.ws.seqfetcher.DbSourceProxy; + import java.awt.BorderLayout; +import java.awt.Font; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.KeyAdapter; +import java.awt.event.KeyEvent; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import javax.swing.JButton; +import javax.swing.JCheckBox; +import javax.swing.JInternalFrame; +import javax.swing.JLabel; +import javax.swing.JOptionPane; +import javax.swing.JPanel; +import javax.swing.JScrollPane; +import javax.swing.JTextArea; +import javax.swing.SwingConstants; +import javax.swing.tree.DefaultMutableTreeNode; + +import com.stevesoft.pat.Regex; public class SequenceFetcher extends JPanel implements Runnable { @@ -283,7 +301,9 @@ public class SequenceFetcher extends JPanel implements Runnable public void keyPressed(KeyEvent e) { if (e.getKeyCode() == KeyEvent.VK_ENTER) + { ok_actionPerformed(); + } } }); jPanel3.setLayout(borderLayout1); @@ -821,21 +841,7 @@ public class SequenceFetcher extends JPanel implements Runnable } else { - for (int i = 0; i < al.getHeight(); i++) - { - alignFrame.viewport.getAlignment().addSequence( - al.getSequenceAt(i)); // this - // also - // creates - // dataset - // sequence - // entries - } - alignFrame.viewport.setEndSeq(alignFrame.viewport.getAlignment() - .getHeight()); - alignFrame.viewport.getAlignment().getWidth(); - alignFrame.viewport.firePropertyChange("alignment", null, - alignFrame.viewport.getAlignment().getSequences()); + alignFrame.viewport.addAlignment(al, title); } } return al; diff --git a/src/jalview/io/FileLoader.java b/src/jalview/io/FileLoader.java index 82b94c3..1d85e0a 100755 --- a/src/jalview/io/FileLoader.java +++ b/src/jalview/io/FileLoader.java @@ -331,13 +331,7 @@ public class FileLoader implements Runnable } if (viewport != null) { - // TODO: create undo object for this JAL-1101 - for (int i = 0; i < al.getHeight(); i++) - { - viewport.getAlignment().addSequence(al.getSequenceAt(i)); - } - viewport.firePropertyChange("alignment", null, viewport - .getAlignment().getSequences()); + viewport.addAlignment(al, title); } else { diff --git a/src/jalview/jbgui/GAlignFrame.java b/src/jalview/jbgui/GAlignFrame.java index f633bf6..630c962 100755 --- a/src/jalview/jbgui/GAlignFrame.java +++ b/src/jalview/jbgui/GAlignFrame.java @@ -2449,20 +2449,14 @@ public class GAlignFrame extends JInternalFrame protected void viewAsCdna_actionPerformed() { - // TODO Auto-generated method stub - } protected void alignCdna_actionPerformed() { - // TODO Auto-generated method stub - } protected void linkCdna_actionPerformed() { - // TODO Auto-generated method stub - } /** diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index 5fbc956..d688f23 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -20,8 +20,9 @@ */ package jalview.util; -import java.util.Enumeration; -import java.util.Vector; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; /** * MapList Simple way of bijectively mapping a non-contiguous linear range to @@ -32,64 +33,57 @@ import java.util.Vector; */ public class MapList { + + private List fromShifts = new ArrayList(); + + private List toShifts = new ArrayList(); + + private int fromRatio; // number of steps in fromShifts to one toRatio unit + + private int toRatio; // number of steps in toShifts to one fromRatio + /* - * (non-Javadoc) - * - * @see java.lang.Object#equals(java.lang.Object) + * lowest and highest value in the from Map */ - public boolean equals(MapList obj) + private int fromLowest; + + private int fromHighest; + + /* + * lowest and highest value in the to Map + */ + private int toLowest; + + private int toHighest; + + /** + * Two MapList objects are equal if they are the same object, or they both + * have populated shift ranges and all values are the same. + */ + @Override + public boolean equals(Object o) { - // TODO should have @Override and arg0 of type Object + if (o == null || !(o instanceof MapList)) + { + return false; + } + + MapList obj = (MapList) o; if (obj == this) { return true; } - if (obj != null && obj.fromRatio == fromRatio && obj.toRatio == toRatio - && obj.fromShifts != null && obj.toShifts != null) + if (obj.fromRatio != fromRatio || obj.toRatio != toRatio + || obj.fromShifts == null || obj.toShifts == null) { - int i, iSize = fromShifts.size(), j, jSize = obj.fromShifts.size(); - if (iSize != jSize) - { - return false; - } - for (i = 0, iSize = fromShifts.size(), j = 0, jSize = obj.fromShifts - .size(); i < iSize;) - { - int[] mi = (int[]) fromShifts.elementAt(i++); - int[] mj = (int[]) obj.fromShifts.elementAt(j++); - if (mi[0] != mj[0] || mi[1] != mj[1]) - { - return false; - } - } - iSize = toShifts.size(); - jSize = obj.toShifts.size(); - if (iSize != jSize) - { - return false; - } - for (i = 0, j = 0; i < iSize;) - { - int[] mi = (int[]) toShifts.elementAt(i++); - int[] mj = (int[]) obj.toShifts.elementAt(j++); - if (mi[0] != mj[0] || mi[1] != mj[1]) - { - return false; - } - } - return true; + return false; } - return false; + return Arrays + .deepEquals(fromShifts.toArray(), obj.fromShifts.toArray()) + && Arrays + .deepEquals(toShifts.toArray(), obj.toShifts.toArray()); } - public Vector fromShifts; - - public Vector toShifts; - - int fromRatio; // number of steps in fromShifts to one toRatio unit - - int toRatio; // number of steps in toShifts to one fromRatio - /** * * @return series of intervals mapped in from @@ -104,14 +98,19 @@ public class MapList return getRanges(toShifts); } - private int[] getRanges(Vector shifts) + /** + * Flattens a list of [start, end] into a single [start1, end1, start2, + * end2,...] array. + * + * @param shifts + * @return + */ + protected static int[] getRanges(List shifts) { int[] rnges = new int[2 * shifts.size()]; - Enumeration e = shifts.elements(); int i = 0; - while (e.hasMoreElements()) + for (int[] r : shifts) { - int r[] = (int[]) e.nextElement(); rnges[i++] = r[0]; rnges[i++] = r[1]; } @@ -119,16 +118,6 @@ public class MapList } /** - * lowest and highest value in the from Map - */ - int[] fromRange = null; - - /** - * lowest and highest value in the to Map - */ - int[] toRange = null; - - /** * * @return length of mapped phrase in from */ @@ -148,22 +137,22 @@ public class MapList public int getFromLowest() { - return fromRange[0]; + return fromLowest; } public int getFromHighest() { - return fromRange[1]; + return fromHighest; } public int getToLowest() { - return toRange[0]; + return toLowest; } public int getToHighest() { - return toRange[1]; + return toHighest; } private void ensureRange(int[] limits, int pos) @@ -180,26 +169,24 @@ public class MapList public MapList(int from[], int to[], int fromRatio, int toRatio) { - fromRange = new int[] - { from[0], from[1] }; - toRange = new int[] - { to[0], to[1] }; - - fromShifts = new Vector(); + fromLowest = from[0]; + fromHighest = from[1]; for (int i = 0; i < from.length; i += 2) { - ensureRange(fromRange, from[i]); - ensureRange(fromRange, from[i + 1]); + fromLowest = Math.min(fromLowest, from[i]); + fromHighest = Math.max(fromHighest, from[i + 1]); - fromShifts.addElement(new int[] + fromShifts.add(new int[] { from[i], from[i + 1] }); } - toShifts = new Vector(); + + toLowest = to[0]; + toHighest = to[1]; for (int i = 0; i < to.length; i += 2) { - ensureRange(toRange, to[i]); - ensureRange(toRange, to[i + 1]); - toShifts.addElement(new int[] + toLowest = Math.min(toLowest, to[i]); + toHighest = Math.max(toHighest, to[i + 1]); + toShifts.add(new int[] { to[i], to[i + 1] }); } this.fromRatio = fromRatio; @@ -208,32 +195,27 @@ public class MapList public MapList(MapList map) { - this.fromRange = new int[] - { map.fromRange[0], map.fromRange[1] }; - this.toRange = new int[] - { map.toRange[0], map.toRange[1] }; + this.fromLowest = map.fromLowest; + this.fromHighest = map.fromHighest; + this.toLowest = map.toLowest; + this.toHighest = map.toHighest; + this.fromRatio = map.fromRatio; this.toRatio = map.toRatio; if (map.fromShifts != null) { - this.fromShifts = new Vector(); - Enumeration e = map.fromShifts.elements(); - while (e.hasMoreElements()) + for (int[] r : map.fromShifts) { - int[] el = (int[]) e.nextElement(); - fromShifts.addElement(new int[] - { el[0], el[1] }); + fromShifts.add(new int[] + { r[0], r[1] }); } } if (map.toShifts != null) { - this.toShifts = new Vector(); - Enumeration e = map.toShifts.elements(); - while (e.hasMoreElements()) + for (int[] r : map.toShifts) { - int[] el = (int[]) e.nextElement(); - toShifts.addElement(new int[] - { el[0], el[1] }); + toShifts.add(new int[] + { r[0], r[1] }); } } } @@ -244,8 +226,9 @@ public class MapList * @return int[][] { int[] { fromStart, fromFinish, toStart, toFinish }, int * [fromFinish-fromStart+2] { toStart..toFinish mappings}} */ - public int[][] makeFromMap() + protected int[][] makeFromMap() { + // TODO not used - remove?? return posMap(fromShifts, fromRatio, toShifts, toRatio); } @@ -254,27 +237,30 @@ public class MapList * * @return int[to position]=position mapped in from */ - public int[][] makeToMap() + protected int[][] makeToMap() { + // TODO not used - remove?? return posMap(toShifts, toRatio, fromShifts, fromRatio); } /** * construct an int map for intervals in intVals * - * @param intVals + * @param shiftTo * @return int[] { from, to pos in range }, int[range.to-range.from+1] * returning mapped position */ - private int[][] posMap(Vector intVals, int ratio, Vector toIntVals, + private int[][] posMap(List shiftTo, int ratio, + List shiftFrom, int toRatio) { - int iv = 0, ivSize = intVals.size(); + // TODO not used - remove?? + int iv = 0, ivSize = shiftTo.size(); if (iv >= ivSize) { return null; } - int[] intv = (int[]) intVals.elementAt(iv++); + int[] intv = shiftTo.get(iv++); int from = intv[0], to = intv[1]; if (from > to) { @@ -283,7 +269,7 @@ public class MapList } while (iv < ivSize) { - intv = (int[]) intVals.elementAt(iv++); + intv = shiftTo.get(iv++); if (intv[0] < from) { from = intv[0]; @@ -305,7 +291,7 @@ public class MapList int mp[][] = new int[to - from + 2][]; for (int i = 0; i < mp.length; i++) { - int[] m = shift(i + from, intVals, ratio, toIntVals, toRatio); + int[] m = shift(i + from, shiftTo, ratio, shiftFrom, toRatio); if (m != null) { if (i == 0) @@ -361,6 +347,7 @@ public class MapList * shifts.insertElementAt(new int[] { pos, shift}, sidx); else * rshift[1]+=shift; } */ + /** * shift from pos to To(pos) * @@ -389,23 +376,23 @@ public class MapList /** * - * @param fromShifts + * @param shiftTo * @param fromRatio - * @param toShifts + * @param shiftFrom * @param toRatio * @return */ - private int[] shift(int pos, Vector fromShifts, int fromRatio, - Vector toShifts, int toRatio) + private static int[] shift(int pos, List shiftTo, int fromRatio, + List shiftFrom, int toRatio) { - int[] fromCount = countPos(fromShifts, pos); + int[] fromCount = countPos(shiftTo, pos); if (fromCount == null) { return null; } int fromRemainder = (fromCount[0] - 1) % fromRatio; int toCount = 1 + (((fromCount[0] - 1) / fromRatio) * toRatio); - int[] toPos = countToPos(toShifts, toCount); + int[] toPos = countToPos(shiftFrom, toCount); if (toPos == null) { return null; // throw new Error("Bad Mapping!"); @@ -418,16 +405,16 @@ public class MapList /** * count how many positions pos is along the series of intervals. * - * @param intVals + * @param shiftTo * @param pos * @return number of positions or null if pos is not within intervals */ - private int[] countPos(Vector intVals, int pos) + protected static int[] countPos(List shiftTo, int pos) { - int count = 0, intv[], iv = 0, ivSize = intVals.size(); + int count = 0, intv[], iv = 0, ivSize = shiftTo.size(); while (iv < ivSize) { - intv = (int[]) intVals.elementAt(iv++); + intv = shiftTo.get(iv++); if (intv[0] <= intv[1]) { if (pos >= intv[0] && pos <= intv[1]) @@ -459,17 +446,18 @@ public class MapList /** * count out pos positions into a series of intervals and return the position * - * @param intVals + * @param shiftFrom * @param pos * @return position pos in interval set */ - private int[] countToPos(Vector intVals, int pos) + protected static int[] countToPos(List shiftFrom, int pos) { - int count = 0, diff = 0, iv = 0, ivSize = intVals.size(), intv[] = + int count = 0, diff = 0, iv = 0, ivSize = shiftFrom.size(); + int[] intv = { 0, 0 }; while (iv < ivSize) { - intv = (int[]) intVals.elementAt(iv++); + intv = shiftFrom.get(iv++); diff = intv[1] - intv[0]; if (diff >= 0) { @@ -503,73 +491,68 @@ public class MapList * find series of intervals mapping from start-end in the From map. * * @param start - * position in to map + * position mapped 'to' * @param end - * position in to map - * @return series of ranges in from map + * position mapped 'to' + * @return series of [start, end] ranges in sequence mapped 'from' */ public int[] locateInFrom(int start, int end) { // inefficient implementation int fromStart[] = shiftTo(start); - int fromEnd[] = shiftTo(end); // needs to be inclusive of end of symbol - // position - if (fromStart == null || fromEnd == null) - { - return null; - } - int iv[] = getIntervals(fromShifts, fromStart, fromEnd, fromRatio); - return iv; + // needs to be inclusive of end of symbol position + int fromEnd[] = shiftTo(end); + + return getIntervals(fromShifts, fromStart, fromEnd, fromRatio); } /** * find series of intervals mapping from start-end in the to map. * * @param start - * position in from map + * position mapped 'from' * @param end - * position in from map - * @return series of ranges in to map + * position mapped 'from' + * @return series of [start, end] ranges in sequence mapped 'to' */ public int[] locateInTo(int start, int end) { - // inefficient implementation int toStart[] = shiftFrom(start); int toEnd[] = shiftFrom(end); - if (toStart == null || toEnd == null) - { - return null; - } - int iv[] = getIntervals(toShifts, toStart, toEnd, toRatio); - return iv; + return getIntervals(toShifts, toStart, toEnd, toRatio); } /** * like shift - except returns the intervals in the given vector of shifts * which were spanned in traversing fromStart to fromEnd * - * @param fromShifts2 + * @param shiftFrom * @param fromStart * @param fromEnd * @param fromRatio2 * @return series of from,to intervals from from first position of starting * region to final position of ending region inclusive */ - private int[] getIntervals(Vector fromShifts2, int[] fromStart, + protected static int[] getIntervals(List shiftFrom, + int[] fromStart, int[] fromEnd, int fromRatio2) { + if (fromStart == null || fromEnd == null) + { + return null; + } int startpos, endpos; startpos = fromStart[0]; // first position in fromStart endpos = fromEnd[0]; // last position in fromEnd int endindx = (fromRatio2 - 1); // additional positions to get to last // position from endpos - int intv = 0, intvSize = fromShifts2.size(); + int intv = 0, intvSize = shiftFrom.size(); int iv[], i = 0, fs = -1, fe_s = -1, fe = -1; // containing intervals // search intervals to locate ones containing startpos and count endindx // positions on from endpos while (intv < intvSize && (fs == -1 || fe == -1)) { - iv = (int[]) fromShifts2.elementAt(intv++); + iv = shiftFrom.get(intv++); if (fe_s > -1) { endpos = iv[0]; // start counting from beginning of interval @@ -635,13 +618,13 @@ public class MapList { return null; } - Vector ranges = new Vector(); + List ranges = new ArrayList(); if (fs <= fe) { intv = fs; i = fs; // truncate initial interval - iv = (int[]) fromShifts2.elementAt(intv++); + iv = shiftFrom.get(intv++); iv = new int[] { iv[0], iv[1] };// clone if (i == fs) @@ -650,8 +633,8 @@ public class MapList } while (i != fe) { - ranges.addElement(iv); // add initial range - iv = (int[]) fromShifts2.elementAt(intv++); // get next interval + ranges.add(iv); // add initial range + iv = shiftFrom.get(intv++); // get next interval iv = new int[] { iv[0], iv[1] };// clone i++; @@ -660,17 +643,17 @@ public class MapList { iv[1] = endpos; } - ranges.addElement(iv); // add only - or final range + ranges.add(iv); // add only - or final range } else { // walk from end of interval. - i = fromShifts2.size() - 1; + i = shiftFrom.size() - 1; while (i > fs) { i--; } - iv = (int[]) fromShifts2.elementAt(i); + iv = shiftFrom.get(i); iv = new int[] { iv[1], iv[0] };// reverse and clone // truncate initial interval @@ -680,8 +663,8 @@ public class MapList } while (--i != fe) { // fix apparent logic bug when fe==-1 - ranges.addElement(iv); // add (truncated) reversed interval - iv = (int[]) fromShifts2.elementAt(i); + ranges.add(iv); // add (truncated) reversed interval + iv = shiftFrom.get(i); iv = new int[] { iv[1], iv[0] }; // reverse and clone } @@ -690,7 +673,7 @@ public class MapList // interval is already reversed iv[1] = endpos; } - ranges.addElement(iv); // add only - or final range + ranges.add(iv); // add only - or final range } // create array of start end intervals. int[] range = null; @@ -702,10 +685,10 @@ public class MapList i = 0; while (intv < intvSize) { - iv = (int[]) ranges.elementAt(intv); + iv = ranges.get(intv); range[i++] = iv[0]; range[i++] = iv[1]; - ranges.setElementAt(null, intv++); // remove + ranges.set(intv++, null); // remove } } return range; @@ -720,6 +703,7 @@ public class MapList */ public int getToPosition(int mpos) { + // TODO not used - remove?? int[] mp = shiftTo(mpos); if (mp != null) { @@ -756,6 +740,7 @@ public class MapList */ public int getMappedPosition(int pos) { + // TODO not used - remove?? int[] mp = shiftFrom(pos); if (mp != null) { @@ -766,6 +751,7 @@ public class MapList public int[] getMappedWord(int pos) { + // TODO not used - remove?? int[] mp = shiftFrom(pos); if (mp != null) { diff --git a/src/jalview/ws/AWSThread.java b/src/jalview/ws/AWSThread.java index 7f05e90..dd7ef7c 100644 --- a/src/jalview/ws/AWSThread.java +++ b/src/jalview/ws/AWSThread.java @@ -301,7 +301,7 @@ public abstract class AWSThread extends Thread final SequenceI seq = alignment[sq]; if (acf != null && acf.involvesSequence(seq)) { - al.addCodonFrame(new AlignedCodonFrame(acf, alignment)); + al.addCodonFrame(acf); codonframe[i] = null; break; } diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 3ada6fa..c436818 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -317,4 +317,162 @@ public class AlignmentUtilsTests assertTrue(Arrays.equals(new int[] { 1, 3 }, mapList.getToRanges())); } + + /** + * Test for the alignSequenceAs method that takes two sequences and a mapping. + */ + @Test + public void testAlignSequenceAs_withMapping_noIntrons() + { + /* + * Simple case: no gaps in dna + */ + SequenceI dna = new Sequence("Seq1", "GGGAAA"); + dna.createDatasetSequence(); + SequenceI protein = new Sequence("Seq1", "-A-L-"); + protein.createDatasetSequence(); + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] + { 1, 6 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); + + /* + * No existing gaps in dna: + */ + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + false); + assertEquals("---GGG---AAA", dna.getSequenceAsString()); + + /* + * Now introduce gaps in dna but ignore them when realigning. + */ + dna.setSequence("-G-G-G-A-A-A-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + false); + assertEquals("---GGG---AAA", dna.getSequenceAsString()); + + /* + * Now include gaps in dna when realigning. First retaining 'mapped' gaps + * only, i.e. those within the exon region. + */ + dna.setSequence("-G-G--G-A--A-A-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', true, + false); + assertEquals("---G-G--G---A--A-A", dna.getSequenceAsString()); + + /* + * Include all gaps in dna when realigning (within and without the exon + * region). The leading gap, and the gaps between codons, are subsumed by + * the protein alignment gap. + */ + dna.setSequence("-G-GG--AA-A-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', true, + true); + assertEquals("---G-GG---AA-A-", dna.getSequenceAsString()); + + /* + * Include only unmapped gaps in dna when realigning (outside the exon + * region). The leading gap, and the gaps between codons, are subsumed by + * the protein alignment gap. + */ + dna.setSequence("-G-GG--AA-A-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + true); + assertEquals("---GGG---AAA-", dna.getSequenceAsString()); + } + + /** + * Test for the alignSequenceAs method that takes two sequences and a mapping. + */ + @Test + public void testAlignSequenceAs_withMapping_withIntrons() + { + /* + * Simple case: no gaps in dna + */ + SequenceI dna = new Sequence("Seq1", "GGGAAACCCTTTGGG"); + dna.createDatasetSequence(); + SequenceI protein = new Sequence("Seq1", "-A-L-"); + protein.createDatasetSequence(); + AlignedCodonFrame acf = new AlignedCodonFrame(); + + /* + * Exons at codon 2 (AAA) and 4 (TTT) + */ + MapList map = new MapList(new int[] + { 4, 6, 10, 12 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); + + /* + * Align dna as "-A-L-". The protein 'gaps' follow the introns, i.e are + * placed immediately before the mapped codons. + */ + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + false); + assertEquals("GGG---AAACCC---TTTGGG", dna.getSequenceAsString()); + + /* + * Add gaps to dna - but ignore when realigning. + */ + dna.setSequence("-G-G-G--A--A---AC-CC-T-TT-GG-G-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + false); + assertEquals("GGG---AAACCC---TTTGGG", dna.getSequenceAsString()); + + /* + * Add gaps to dna - include within exons only when realigning. + */ + dna.setSequence("-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', true, + false); + assertEquals("GGG---A--A---ACCC---T-TTGGG", dna.getSequenceAsString()); + + /* + * Include gaps outside exons only when realigning. + */ + dna.setSequence("-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + true); + assertEquals("-G-G-G---AAA-C-CC---TTT-GG-G-", dna.getSequenceAsString()); + + /* + * Include all gaps in dna when realigning. + */ + dna.setSequence("-G-G-G--A--A---A-C-CC-T-TT-GG-G-"); + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', true, + true); + assertEquals("-G-G-G---A--A---A-C-CC---T-TT-GG-G-", + dna.getSequenceAsString()); + } + + /** + * Test for the case where not all of the protein sequence is mapped to cDNA. + */ + @Test + public void testAlignSequenceAs_withMapping_withUnmappedProtein() + { + SequenceI dna = new Sequence("Seq1", "GGGAAACCCTTTGGG"); + dna.createDatasetSequence(); + SequenceI protein = new Sequence("Seq1", "-A-L-P-"); + protein.createDatasetSequence(); + AlignedCodonFrame acf = new AlignedCodonFrame(); + + /* + * Exons at codon 2 (AAA) and 4 (TTT) mapped to A and P + */ + MapList map = new MapList(new int[] + { 4, 6, 10, 12 }, new int[] + { 1, 1, 3, 3 }, 3, 1); + acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); + + /* + * Align dna as "-A-L-P-". Currently, does nothing (aborts realignment). + * Change this test first if different behaviour wanted. + */ + AlignmentUtils.alignSequenceAs(dna, protein, acf, "---", '-', false, + false); + assertEquals("GGGAAACCCTTTGGG", dna.getSequenceAsString()); + } } diff --git a/test/jalview/datamodel/AlignedCodonFrameTest.java b/test/jalview/datamodel/AlignedCodonFrameTest.java index c73eb0b..e782cef 100644 --- a/test/jalview/datamodel/AlignedCodonFrameTest.java +++ b/test/jalview/datamodel/AlignedCodonFrameTest.java @@ -1,31 +1,103 @@ package jalview.datamodel; -import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; import jalview.util.MapList; +import java.util.Arrays; + import org.junit.Test; public class AlignedCodonFrameTest { /** - * Test the constructor which copies all except the aligned protein sequences. + * Test the method that locates the first aligned sequence that has a mapping. */ @Test - public void testConstructor_copyWithSequence() + public void testFindAlignedSequence() + { + AlignmentI cdna = new Alignment(new SequenceI[] + {}); + final Sequence seq1 = new Sequence("Seq1", "C-G-TA-GC"); + seq1.createDatasetSequence(); + cdna.addSequence(seq1); + final Sequence seq2 = new Sequence("Seq2", "-TA-GG-GG"); + seq2.createDatasetSequence(); + cdna.addSequence(seq2); + + AlignmentI aa = new Alignment(new SequenceI[] + {}); + final Sequence aseq1 = new Sequence("Seq1", "-P-R"); + aseq1.createDatasetSequence(); + aa.addSequence(aseq1); + final Sequence aseq2 = new Sequence("Seq2", "-LY-"); + aseq2.createDatasetSequence(); + aa.addSequence(aseq2); + + /* + * Mapping from first DNA sequence to second AA sequence. + */ + AlignedCodonFrame acf = new AlignedCodonFrame(); + + assertNull(acf.findAlignedSequence(seq1, aa)); + + MapList map = new MapList(new int[] + { 1, 6 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(seq1.getDatasetSequence(), aseq2.getDatasetSequence(), map); + + /* + * DNA seq1 maps to AA seq2 + */ + assertEquals(aa.getSequenceAt(1), + acf.findAlignedSequence(cdna + .getSequenceAt(0).getDatasetSequence(), aa)); + + assertEquals(cdna.getSequenceAt(0), + acf.findAlignedSequence(aa + .getSequenceAt(1).getDatasetSequence(), cdna)); + } + + /** + * Test the method that locates the mapped codon for a protein position. + */ + @Test + public void testGetMappedRegion() { + // introns lower case + final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T"); + seq1.createDatasetSequence(); + final Sequence seq2 = new Sequence("Seq2", "-TA-gG-Gg-CG-a"); + seq2.createDatasetSequence(); + + final Sequence aseq1 = new Sequence("Seq1", "-P-R"); + aseq1.createDatasetSequence(); + final Sequence aseq2 = new Sequence("Seq2", "-LY-"); + aseq2.createDatasetSequence(); + AlignedCodonFrame acf = new AlignedCodonFrame(); + + assertNull(acf.getMappedRegion(seq1, aseq1, 1)); + MapList map = new MapList(new int[] - { 1, 3 }, new int[] - { 1, 1 }, 3, 1); - SequenceI aaseq = new Sequence("", "FKQ"); - SequenceI dnaseq = new Sequence("", "ATTCGTACGGAC"); - acf.addMap(dnaseq, aaseq, map); - SequenceI[] newaligned = new SequenceI[1]; - newaligned[0] = new Sequence("", "-F-K-Q"); - newaligned[0].setDatasetSequence(aaseq.getDatasetSequence()); - AlignedCodonFrame copy = new AlignedCodonFrame(acf, newaligned); - assertSame(copy.getdnaSeqs(), acf.getdnaSeqs()); - assertSame(newaligned[0], copy.getAaForDnaSeq(dnaseq, false)); + { 2, 4, 6, 6, 8, 9 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); + map = new MapList(new int[] + { 1, 2, 4, 5, 7, 8 }, new int[] + { 1, 2 }, 3, 1); + acf.addMap(seq2.getDatasetSequence(), aseq2.getDatasetSequence(), map); + + assertEquals("[2, 4]", + Arrays.toString(acf.getMappedRegion(seq1, aseq1, 1))); + assertEquals("[6, 6, 8, 9]", + Arrays.toString(acf.getMappedRegion(seq1, aseq1, 2))); + assertEquals("[1, 2, 4, 4]", + Arrays.toString(acf.getMappedRegion(seq2, aseq2, 1))); + assertEquals("[5, 5, 7, 8]", + Arrays.toString(acf.getMappedRegion(seq2, aseq2, 2))); + + assertNull(acf.getMappedRegion(seq1, aseq2, 1)); } } diff --git a/test/jalview/datamodel/AlignmentTest.java b/test/jalview/datamodel/AlignmentTest.java index 8912155..dbd063c 100644 --- a/test/jalview/datamodel/AlignmentTest.java +++ b/test/jalview/datamodel/AlignmentTest.java @@ -60,11 +60,13 @@ public class AlignmentTest * Helper method to load an alignment and ensure dataset sequences are set up. * * @param data - * @param format TODO + * @param format + * TODO * @return * @throws IOException */ - protected AlignmentI loadAlignment(final String data, String format) throws IOException + protected AlignmentI loadAlignment(final String data, String format) + throws IOException { Alignment a = new FormatAdapter().readFile(data, AppletFormatAdapter.PASTE, format); @@ -83,8 +85,7 @@ public class AlignmentTest int i = 0; for (AlignmentAnnotation ann : al.getAlignmentAnnotation()) { - ann.setCalcId("CalcIdFor" - + al.getSequenceAt(i).getName()); + ann.setCalcId("CalcIdFor" + al.getSequenceAt(i).getName()); i++; } } @@ -180,9 +181,92 @@ public class AlignmentTest al2.addCodonFrame(acf); al1.alignAs(al2); - assertEquals("ACG---GCUCCA------ACT", al1.getSequenceAt(0) + assertEquals("AC-G---G--CUC-CA------A-CT", al1.getSequenceAt(0) .getSequenceAsString()); - assertEquals("---CGT---TAACGA---AGT---", al1.getSequenceAt(1) + assertEquals("---CG-T---TA--ACG---A---AGT", al1.getSequenceAt(1) + .getSequenceAsString()); + } + + /** + * Test aligning cdna (with introns) as per protein alignment. + * + * @throws IOException + */ + @Test + public void testAlignAs_cdnaAsProteinWithIntrons() throws IOException + { + /* + * Load alignments and add mappings for cDNA to protein + */ + AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA"); + AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA"); + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList ml = new MapList(new int[] + { 1, 12 }, new int[] + { 1, 4 }, 3, 1); + acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml); + acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml); + al2.addCodonFrame(acf); + + al1.alignAs(al2); + assertEquals("AC-G---G--CUC-CA------A-CT", al1.getSequenceAt(0) + .getSequenceAsString()); + assertEquals("---CG-T---TA--ACG---A---AGT", al1.getSequenceAt(1) + .getSequenceAsString()); + } + + /** + * Test aligning dna as per protein alignment, for the case where there are + * introns (i.e. some dna sites have no mapping from a peptide). + * + * @throws IOException + */ + @Test + public void testAlignAs_dnaAsProtein_withIntrons() throws IOException + { + /* + * Load alignments and add mappings for cDNA to protein + */ + String dna1 = "A-Aa-gG-GCC-cT-TT"; + String dna2 = "c--CCGgg-TT--T-AA-A"; + AlignmentI al1 = loadAlignment(">Seq1\n" + dna1 + "\n>Seq2\n" + dna2 + + "\n", "FASTA"); + AlignmentI al2 = loadAlignment(">Seq1\n-P--YK\n>Seq2\nG-T--F\n", + "FASTA"); + AlignedCodonFrame acf = new AlignedCodonFrame(); + // Seq1 has intron at dna positions 3,4,9 so splice is AAG GCC TTT + // Seq2 has intron at dna positions 1,5,6 so splice is CCG TTT AAA + MapList ml1 = new MapList(new int[] + { 1, 2, 5, 8, 10, 12 }, new int[] + { 1, 3 }, 3, 1); + acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml1); + MapList ml2 = new MapList(new int[] + { 2, 4, 7, 12 }, new int[] + { 1, 3 }, 3, 1); + acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml2); + al2.addCodonFrame(acf); + + /* + * Align ignoring gaps in dna introns and exons + */ + ((Alignment) al1).alignAs(al2, false, false); + assertEquals("---AAagG------GCCcTTT", al1.getSequenceAt(0) + .getSequenceAsString()); + assertEquals("cCCGgg---TTT------AAA", al1.getSequenceAt(1) + .getSequenceAsString()); + + /* + * Reset and realign, preserving gaps in dna introns and exons + */ + al1.getSequenceAt(0).setSequence(dna1); + al1.getSequenceAt(1).setSequence(dna2); + ((Alignment) al1).alignAs(al2, true, true); + // String dna1 = "A-Aa-gG-GCC-cT-TT"; + // String dna2 = "c--CCGgg-TT--T-AA-A"; + // assumption: we include 'the greater of' protein/dna gap lengths, not both + assertEquals("---A-Aa-gG------GCC-cT-TT", al1.getSequenceAt(0) + .getSequenceAsString()); + assertEquals("c--CCGgg---TT--T------AA-A", al1.getSequenceAt(1) .getSequenceAsString()); } } diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index 2c8e207..1913a70 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -1,6 +1,13 @@ package jalview.util; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import org.junit.Assert; import org.junit.Test; @@ -207,7 +214,264 @@ public class MapListTest System.out.print("\n"); } } - } + /** + * Tests for method that locates ranges in the 'from' map for given range in + * the 'to' map. + */ + @Test + public void testLocateInFrom_noIntrons() + { + /* + * Simple mapping with no introns + */ + int[] codons = new int[] + { 1, 12 }; + int[] protein = new int[] + { 1, 4 }; + MapList ml = new MapList(codons, protein, 3, 1); + assertEquals("[1, 3]", Arrays.toString(ml.locateInFrom(1, 1))); + assertEquals("[4, 6]", Arrays.toString(ml.locateInFrom(2, 2))); + assertEquals("[7, 9]", Arrays.toString(ml.locateInFrom(3, 3))); + assertEquals("[10, 12]", Arrays.toString(ml.locateInFrom(4, 4))); + assertEquals("[1, 6]", Arrays.toString(ml.locateInFrom(1, 2))); + assertEquals("[1, 9]", Arrays.toString(ml.locateInFrom(1, 3))); + assertEquals("[1, 12]", Arrays.toString(ml.locateInFrom(1, 4))); + assertEquals("[4, 9]", Arrays.toString(ml.locateInFrom(2, 3))); + assertEquals("[4, 12]", Arrays.toString(ml.locateInFrom(2, 4))); + assertEquals("[7, 12]", Arrays.toString(ml.locateInFrom(3, 4))); + assertEquals("[10, 12]", Arrays.toString(ml.locateInFrom(4, 4))); + + assertNull(ml.locateInFrom(0, 0)); + assertNull(ml.locateInFrom(1, 5)); + assertNull(ml.locateInFrom(-1, 1)); + } + + /** + * Tests for method that locates ranges in the 'from' map for given range in + * the 'to' map. + */ + @Test + public void testLocateInFrom_withIntrons() + { + /* + * Exons at positions [2, 3, 5] [6, 7, 9] [10, 12, 14] [16, 17, 18] i.e. + * 2-3, 5-7, 9-10, 12-12, 14-14, 16-18 + */ + int[] codons = + { 2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18 }; + int[] protein = + { 1, 4 }; + MapList ml = new MapList(codons, protein, 3, 1); + assertEquals("[2, 3, 5, 5]", Arrays.toString(ml.locateInFrom(1, 1))); + assertEquals("[6, 7, 9, 9]", Arrays.toString(ml.locateInFrom(2, 2))); + assertEquals("[10, 10, 12, 12, 14, 14]", + Arrays.toString(ml.locateInFrom(3, 3))); + assertEquals("[16, 18]", Arrays.toString(ml.locateInFrom(4, 4))); + } + + /** + * Tests for method that locates ranges in the 'to' map for given range in the + * 'from' map. + */ + @Test + public void testLocateInTo_noIntrons() + { + /* + * Simple mapping with no introns + */ + int[] codons = new int[] + { 1, 12 }; + int[] protein = new int[] + { 1, 4 }; + MapList ml = new MapList(codons, protein, 3, 1); + assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(1, 3))); + assertEquals("[2, 2]", Arrays.toString(ml.locateInTo(4, 6))); + assertEquals("[3, 3]", Arrays.toString(ml.locateInTo(7, 9))); + assertEquals("[4, 4]", Arrays.toString(ml.locateInTo(10, 12))); + assertEquals("[1, 2]", Arrays.toString(ml.locateInTo(1, 6))); + assertEquals("[1, 3]", Arrays.toString(ml.locateInTo(1, 9))); + assertEquals("[1, 4]", Arrays.toString(ml.locateInTo(1, 12))); + assertEquals("[2, 2]", Arrays.toString(ml.locateInTo(4, 6))); + assertEquals("[2, 4]", Arrays.toString(ml.locateInTo(4, 12))); + + /* + * A part codon is treated as if a whole one. + */ + assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(1, 1))); + assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(1, 2))); + assertEquals("[1, 2]", Arrays.toString(ml.locateInTo(1, 4))); + assertEquals("[1, 3]", Arrays.toString(ml.locateInTo(2, 8))); + assertEquals("[1, 4]", Arrays.toString(ml.locateInTo(3, 11))); + assertEquals("[2, 4]", Arrays.toString(ml.locateInTo(5, 11))); + + assertNull(ml.locateInTo(0, 0)); + assertNull(ml.locateInTo(1, 13)); + assertNull(ml.locateInTo(-1, 1)); + } + + /** + * Tests for method that locates ranges in the 'to' map for given range in the + * 'from' map. + */ + @Test + public void testLocateInTo_withIntrons() + { + /* + * Exons at positions [2, 3, 5] [6, 7, 9] [10, 12, 14] [16, 17, 18] i.e. + * 2-3, 5-7, 9-10, 12-12, 14-14, 16-18 + */ + int[] codons = + { 2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18 }; + /* + * Mapped proteins at positions 1, 3, 4, 6 in the sequence + */ + int[] protein = + { 1, 1, 3, 4, 6, 6 }; + MapList ml = new MapList(codons, protein, 3, 1); + + /* + * Can't map from an unmapped position + */ + assertNull(ml.locateInTo(1, 2)); + assertNull(ml.locateInTo(2, 4)); + assertNull(ml.locateInTo(4, 4)); + + /* + * Valid range or subrange of codon1 maps to protein1. + */ + assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(2, 2))); + assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(3, 3))); + assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(3, 5))); + assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(2, 3))); + assertEquals("[1, 1]", Arrays.toString(ml.locateInTo(2, 5))); + + // codon position 6 starts the next protein: + assertEquals("[1, 1, 3, 3]", Arrays.toString(ml.locateInTo(3, 6))); + + // codon positions 7 to 17 (part) cover proteins 2/3/4 at positions 3/4/6 + assertEquals("[3, 4, 6, 6]", Arrays.toString(ml.locateInTo(7, 17))); + + } + + /** + * Test equals method. + */ + @Test + public void testEquals() + { + int[] codons = new int[] + { 2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18 }; + int[] protein = new int[] + { 1, 4 }; + MapList ml = new MapList(codons, protein, 3, 1); + MapList ml1 = new MapList(codons, protein, 3, 1); // same values + MapList ml2 = new MapList(codons, protein, 2, 1); // fromRatio differs + MapList ml3 = new MapList(codons, protein, 3, 2); // toRatio differs + codons[2] = 4; + MapList ml6 = new MapList(codons, protein, 3, 1); // fromShifts differ + protein[1] = 3; + MapList ml7 = new MapList(codons, protein, 3, 1); // toShifts differ + + assertTrue(ml.equals(ml)); + assertTrue(ml.equals(ml1)); + assertTrue(ml1.equals(ml)); + + assertFalse(ml.equals(null)); + assertFalse(ml.equals("hello")); + assertFalse(ml.equals(ml2)); + assertFalse(ml.equals(ml3)); + assertFalse(ml.equals(ml6)); + assertFalse(ml.equals(ml7)); + assertFalse(ml6.equals(ml7)); + + try + { + MapList ml4 = new MapList(codons, null, 3, 1); // toShifts null + assertFalse(ml.equals(ml4)); + } catch (NullPointerException e) + { + // actually thrown by constructor before equals can be called + } + try + { + MapList ml5 = new MapList(null, protein, 3, 1); // fromShifts null + assertFalse(ml.equals(ml5)); + } catch (NullPointerException e) + { + // actually thrown by constructor before equals can be called + } + } + + /** + * Test for the method that flattens a list of ranges into a single array. + */ + @Test + public void testGetRanges() + { + List ranges = new ArrayList(); + ranges.add(new int[] + { 2, 3 }); + ranges.add(new int[] + { 5, 6 }); + assertEquals("[2, 3, 5, 6]", Arrays.toString(MapList.getRanges(ranges))); + } + + /** + * Check state after construction + */ + @Test + public void testConstructor() + { + int[] codons = + { 2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18 }; + int[] protein = + { 1, 1, 3, 4, 6, 6 }; + MapList ml = new MapList(codons, protein, 3, 1); + assertEquals(3, ml.getFromRatio()); + assertEquals(2, ml.getFromLowest()); + assertEquals(18, ml.getFromHighest()); + assertEquals(1, ml.getToLowest()); + assertEquals(6, ml.getToHighest()); + assertEquals("[2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18]", + Arrays.toString(ml.getFromRanges())); + assertEquals("[1, 1, 3, 4, 6, 6]", Arrays.toString(ml.getToRanges())); + + /* + * Also copy constructor + */ + MapList ml2 = new MapList(ml); + assertEquals(3, ml2.getFromRatio()); + assertEquals(2, ml2.getFromLowest()); + assertEquals(18, ml2.getFromHighest()); + assertEquals(1, ml2.getToLowest()); + assertEquals(6, ml2.getToHighest()); + assertEquals("[2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18]", + Arrays.toString(ml2.getFromRanges())); + assertEquals("[1, 1, 3, 4, 6, 6]", Arrays.toString(ml2.getToRanges())); + } + + /** + * Test the method that creates an inverse mapping + */ + @Test + public void testGetInverse() + { + int[] codons = + { 2, 3, 5, 7, 9, 10, 12, 12, 14, 14, 16, 18 }; + int[] protein = + { 1, 1, 3, 4, 6, 6 }; + + MapList ml = new MapList(codons, protein, 3, 1); + MapList ml2 = ml.getInverse(); + assertEquals(ml.getFromRatio(), ml2.getToRatio()); + assertEquals(ml.getFromRatio(), ml2.getToRatio()); + assertEquals(ml.getToHighest(), ml2.getFromHighest()); + assertEquals(ml.getFromHighest(), ml2.getToHighest()); + assertEquals(Arrays.toString(ml.getFromRanges()), + Arrays.toString(ml2.getToRanges())); + assertEquals(Arrays.toString(ml.getToRanges()), + Arrays.toString(ml2.getFromRanges())); + } } -- 1.7.10.2