X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FMapping.java;h=4d90e3e0b6c042533ef42f020ccbb232c9b7094c;hb=HEAD;hp=e54b53ec1e9917a8da23c3aa789903c99743105e;hpb=385e63c78d289d4beae9c1ad2b187c0ea311ffbc;p=jalview.git diff --git a/src/jalview/datamodel/Mapping.java b/src/jalview/datamodel/Mapping.java index e54b53e..4d90e3e 100644 --- a/src/jalview/datamodel/Mapping.java +++ b/src/jalview/datamodel/Mapping.java @@ -1,23 +1,299 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.datamodel; +import java.util.Iterator; +import java.util.NoSuchElementException; import java.util.Vector; +import jalview.util.Comparison; import jalview.util.MapList; public class Mapping { /** + * An iterator that serves the aligned codon positions (with their protein + * products). + * + * @author gmcarstairs + * + */ + public class AlignedCodonIterator implements Iterator + { + /* + * The gap character used in the aligned sequence + */ + private final char gap; + + /* + * The characters of the aligned sequence e.g. "-cGT-ACgTG-" + */ + private final SequenceI alignedSeq; + + /* + * the sequence start residue + */ + private int start; + + /* + * Next position (base 0) in the aligned sequence + */ + private int alignedColumn = 0; + + /* + * Count of bases up to and including alignedColumn position + */ + private int alignedBases = 0; + + /* + * [start, end] from ranges (base 1) + */ + private Iterator fromRanges; + + /* + * [start, end] to ranges (base 1) + */ + private Iterator toRanges; + + /* + * The current [start, end] (base 1) from range + */ + private int[] currentFromRange = null; + + /* + * The current [start, end] (base 1) to range + */ + private int[] currentToRange = null; + + /* + * The next 'from' position (base 1) to process + */ + private int fromPosition = 0; + + /* + * The next 'to' position (base 1) to process + */ + private int toPosition = 0; + + /** + * Constructor + * + * @param seq + * the aligned sequence + * @param gapChar + */ + public AlignedCodonIterator(SequenceI seq, char gapChar) + { + this.alignedSeq = seq; + this.start = seq.getStart(); + this.gap = gapChar; + fromRanges = map.getFromRanges().iterator(); + toRanges = map.getToRanges().iterator(); + if (fromRanges.hasNext()) + { + currentFromRange = fromRanges.next(); + fromPosition = currentFromRange[0]; + } + if (toRanges.hasNext()) + { + currentToRange = toRanges.next(); + toPosition = currentToRange[0]; + } + } + + /** + * Returns true unless we have already traversed the whole mapping. + */ + @Override + public boolean hasNext() + { + if (fromRanges.hasNext()) + { + return true; + } + if (currentFromRange == null || fromPosition >= currentFromRange[1]) + { + return false; + } + return true; + } + + /** + * Returns the next codon's aligned positions, and translated value. + * + * @throws NoSuchElementException + * if hasNext() would have returned false + * @throws IncompleteCodonException + * if not enough mapped bases are left to make up a codon + */ + @Override + public AlignedCodon next() throws IncompleteCodonException + { + if (!hasNext()) + { + throw new NoSuchElementException(); + } + + int[] codon = getNextCodon(); + int[] alignedCodon = getAlignedCodon(codon); + + String peptide = getPeptide(); + int peptideCol = toPosition - 1 - Mapping.this.to.getStart(); + return new AlignedCodon(alignedCodon[0], alignedCodon[1], + alignedCodon[2], peptide, peptideCol); + } + + /** + * Retrieve the translation as the 'mapped to' position in the mapped to + * sequence. + * + * @return + * @throws NoSuchElementException + * if the 'toRange' is exhausted (nothing to map to) + */ + private String getPeptide() + { + // TODO should ideally handle toRatio other than 1 as well... + // i.e. code like getNextCodon() + if (toPosition <= currentToRange[1]) + { + SequenceI seq = Mapping.this.to; + char pep = seq.getCharAt(toPosition - seq.getStart()); + toPosition++; + return String.valueOf(pep); + } + if (!toRanges.hasNext()) + { + throw new NoSuchElementException( + "Ran out of peptide at position " + toPosition); + } + currentToRange = toRanges.next(); + toPosition = currentToRange[0]; + return getPeptide(); + } + + /** + * Get the (base 1) dataset positions for the next codon in the mapping. + * + * @throws IncompleteCodonException + * if less than 3 remaining bases are mapped + */ + private int[] getNextCodon() + { + int[] codon = new int[3]; + int codonbase = 0; + + while (codonbase < 3) + { + if (fromPosition <= currentFromRange[1]) + { + /* + * Add next position from the current start-end range + */ + codon[codonbase++] = fromPosition++; + } + else + { + /* + * Move to the next range - if there is one + */ + if (!fromRanges.hasNext()) + { + throw new IncompleteCodonException(); + } + currentFromRange = fromRanges.next(); + fromPosition = currentFromRange[0]; + } + } + return codon; + } + + /** + * Get the aligned column positions (base 0) for the given sequence + * positions (base 1), by counting ungapped characters in the aligned + * sequence. + * + * @param codon + * @return + */ + private int[] getAlignedCodon(int[] codon) + { + int[] aligned = new int[codon.length]; + for (int i = 0; i < codon.length; i++) + { + aligned[i] = getAlignedColumn(codon[i]); + } + return aligned; + } + + /** + * Get the aligned column position (base 0) for the given sequence position + * (base 1). + * + * @param sequencePos + * @return + */ + private int getAlignedColumn(int sequencePos) + { + /* + * allow for offset e.g. treat pos 8 as 2 if sequence starts at 7 + */ + int truePos = sequencePos - (start - 1); + int length = alignedSeq.getLength(); + while (alignedBases < truePos && alignedColumn < length) + { + char c = alignedSeq.getCharAt(alignedColumn++); + if (c != gap && !Comparison.isGap(c)) + { + alignedBases++; + } + } + return alignedColumn - 1; + } + + @Override + public void remove() + { + // ignore + } + + } + + /* * Contains the start-end pairs mapping from the associated sequence to the - * sequence in the database coordinate system it also takes care of step - * difference between coordinate systems + * sequence in the database coordinate system. It also takes care of step + * difference between coordinate systems. */ MapList map = null; - /** - * The seuqence that map maps the associated seuqence to (if any). + /* + * The sequence that map maps the associated sequence to (if any). */ SequenceI to = null; + /* + * optional sequence id for the 'from' ranges + */ + private String mappedFromId; + public Mapping(MapList map) { super(); @@ -65,6 +341,7 @@ public class Mapping map = new MapList(map2.map); } to = map2.to; + mappedFromId = map2.mappedFromId; } } @@ -88,26 +365,54 @@ public class Mapping /** * Equals that compares both the to references and MapList mappings. * - * @param other + * @param o * @return + * @see MapList#equals */ - public boolean equals(Mapping other) + @Override + public boolean equals(Object o) { - if (other == null) + if (o == null || !(o instanceof Mapping)) + { return false; + } + Mapping other = (Mapping) o; if (other == this) + { return true; + } if (other.to != to) + { return false; + } if ((map != null && other.map == null) || (map == null && other.map != null)) + { return false; - if (map.equals(other.map)) + } + if ((map == null && other.map == null) || map.equals(other.map)) + { return true; + } return false; } /** + * Returns a hashCode made from the sequence and maplist + */ + @Override + public int hashCode() + { + int hashCode = (this.to == null ? 1 : this.to.hashCode()); + if (this.map != null) + { + hashCode = hashCode * 31 + this.map.hashCode(); + } + + return hashCode; + } + + /** * get the 'initial' position in the associated sequence for a position in the * mapped reference frame * @@ -128,23 +433,6 @@ public class Mapping } /** - * gets boundary in direction of mapping - * - * @param position - * in mapped reference frame - * @return int{start, end} positions in associated sequence (in direction of - * mapped word) - */ - public int[] getWord(int mpos) - { - if (map != null) - { - return map.getToWord(mpos); - } - return null; - } - - /** * width of mapped unit in associated sequence * */ @@ -198,8 +486,7 @@ public class Mapping int[] mp = map.shiftFrom(pos); if (mp != null) { - return new int[] - { mp[0], mp[0] + mp[2] * (map.getToRatio() - 1) }; + return new int[] { mp[0], mp[0] + mp[2] * (map.getToRatio() - 1) }; } } return null; @@ -219,42 +506,27 @@ public class Mapping if (map != null) { int[] frange = map.locateInFrom(f.getBegin(), f.getEnd()); + if (frange == null) + { + // JBPNote - this isprobably not the right thing to doJBPHack + return null; + } SequenceFeature[] vf = new SequenceFeature[frange.length / 2]; for (int i = 0, v = 0; i < frange.length; i += 2, v++) { - vf[v] = new SequenceFeature(f); - vf[v].setBegin(frange[i]); - vf[v].setEnd(frange[i + 1]); + vf[v] = new SequenceFeature(f, frange[i], frange[i + 1], + f.getFeatureGroup(), f.getScore()); if (frange.length > 2) - vf[v].setDescription(f.getDescription() + "\nPart " + v); + { + vf[v].setDescription(f.getDescription() + "\nPart " + (v + 1)); + } } return vf; } } - if (false) // else - { - int[] word = getWord(f.getBegin()); - if (word[0] < word[1]) - { - f.setBegin(word[0]); - } - else - { - f.setBegin(word[1]); - } - word = getWord(f.getEnd()); - if (word[0] > word[1]) - { - f.setEnd(word[0]); - } - else - { - f.setEnd(word[1]); - } - } + // give up and just return the feature. - return new SequenceFeature[] - { f }; + return new SequenceFeature[] { f }; } /** @@ -275,19 +547,22 @@ public class Mapping from = (map.getToLowest() < from) ? from : map.getToLowest(); to = (map.getToHighest() > to) ? to : map.getToHighest(); if (from > to) + { return null; + } } else { from = (map.getToHighest() > from) ? from : map.getToHighest(); to = (map.getToLowest() < to) ? to : map.getToLowest(); if (from < to) + { return null; + } } return map.locateInFrom(from, to); } - return new int[] - { from, to }; + return new int[] { from, to }; } /** @@ -308,19 +583,22 @@ public class Mapping from = (map.getFromLowest() < from) ? from : map.getFromLowest(); to = (map.getFromHighest() > to) ? to : map.getFromHighest(); if (from > to) + { return null; + } } else { from = (map.getFromHighest() > from) ? from : map.getFromHighest(); to = (map.getFromLowest() < to) ? to : map.getFromLowest(); if (from < to) + { return null; + } } return map.locateInTo(from, to); } - return new int[] - { from, to }; + return new int[] { from, to }; } /** @@ -342,65 +620,55 @@ public class Mapping for (int vc = 0; vc < viscontigs.length; vc += 2) { // find a mapped range in this visible region - int[] mpr = locateMappedRange(1+viscontigs[vc], viscontigs[vc + 1]-1); + int[] mpr = locateMappedRange(1 + viscontigs[vc], + viscontigs[vc + 1] - 1); if (mpr != null) { for (int m = 0; m < mpr.length; m += 2) { - toRange.addElement(new int[] - { mpr[m], mpr[m + 1] }); + toRange.addElement(new int[] { mpr[m], mpr[m + 1] }); int[] xpos = locateRange(mpr[m], mpr[m + 1]); for (int x = 0; x < xpos.length; x += 2) { - fromRange.addElement(new int[] - { xpos[x], xpos[x + 1] }); + fromRange.addElement(new int[] { xpos[x], xpos[x + 1] }); } } } } - int[] from = new int[fromRange.size()*2]; - int[] to = new int[toRange.size()*2]; + int[] from = new int[fromRange.size() * 2]; + int[] to = new int[toRange.size() * 2]; int[] r; - for (int f=0,fSize=fromRange.size(); f Protein exon map and a range of visContigs - */ - MapList fk = new MapList(new int[] { 1,6,8,13,15,23}, new int[] { 1,7}, 3, 1); - Mapping m = new Mapping(fk); - Mapping m_1 = m.intersectVisContigs(new int[] {fk.getFromLowest(), fk.getFromHighest()}); - Mapping m_2 = m.intersectVisContigs(new int[] {1,7,11,20}); - System.out.println(""+m_1.map.getFromRanges()); - - - } + /** * get the sequence being mapped to - if any + * * @return null or a dataset sequence */ public SequenceI getTo() { return to; } + /** * set the dataset sequence being mapped to if any + * * @param tto */ public void setTo(SequenceI tto) @@ -408,14 +676,47 @@ public class Mapping to = tto; } - /* (non-Javadoc) - * @see java.lang.Object#finalize() + /** + * Returns an iterator which can serve up the aligned codon column positions + * and their corresponding peptide products + * + * @param seq + * an aligned (i.e. possibly gapped) sequence + * @param gapChar + * @return + */ + public Iterator getCodonIterator(SequenceI seq, + char gapChar) + { + return new AlignedCodonIterator(seq, gapChar); + } + + /** + * Readable representation for debugging only, not guaranteed not to change + */ + @Override + public String toString() + { + return String.format("%s %s", this.map.toString(), + this.to == null ? "" : this.to.getName()); + } + + /** + * Returns the identifier for the 'from' range sequence, or null if not set + * + * @return */ - protected void finalize() throws Throwable + public String getMappedFromId() { - map = null; - to = null; - super.finalize(); + return mappedFromId; } - + + /** + * Sets the identifier for the 'from' range sequence + */ + public void setMappedFromId(String mappedFromId) + { + this.mappedFromId = mappedFromId; + } + }