X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FMapping.java;h=7f2c24c99b22d706ebeea602df4a2d40908768fc;hb=c19d2a91ca05e052e3408bf5852d88eb5d0608f1;hp=705cb15987292d9153f37480a5c994994230fa15;hpb=a878db688154da91ca7cc75a90c75d39bd193b3c;p=jalview.git diff --git a/src/jalview/datamodel/Mapping.java b/src/jalview/datamodel/Mapping.java index 705cb15..7f2c24c 100644 --- a/src/jalview/datamodel/Mapping.java +++ b/src/jalview/datamodel/Mapping.java @@ -1,119 +1,436 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9.0b2) + * Copyright (C) 2015 The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.datamodel; import jalview.util.MapList; -public class Mapping { +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.Vector; + +public class Mapping +{ /** - * Contains the - * start-end pairs mapping from - * the associated sequence to the - * sequence in the database - * coordinate system - * it also takes care of step difference between coordinate systems + * An iterator that serves the aligned codon positions (with their protein + * products). + * + * @author gmcarstairs + * */ - MapList map=null; + public class AlignedCodonIterator implements Iterator + { + /* + * The gap character used in the aligned sequence + */ + private final char gap; + + /* + * The characters of the aligned sequence e.g. "-cGT-ACgTG-" + */ + private final char[] alignedSeq; + + /* + * the sequence start residue + */ + private int start; + + /* + * Next position (base 0) in the aligned sequence + */ + private int alignedColumn = 0; + + /* + * Count of bases up to and including alignedColumn position + */ + private int alignedBases = 0; + + /* + * [start, end] from ranges (base 1) + */ + private Iterator fromRanges; + + /* + * [start, end] to ranges (base 1) + */ + private Iterator toRanges; + + /* + * The current [start, end] (base 1) from range + */ + private int[] currentFromRange = null; + + /* + * The current [start, end] (base 1) to range + */ + private int[] currentToRange = null; + + /* + * The next 'from' position (base 1) to process + */ + private int fromPosition = 0; + + /* + * The next 'to' position (base 1) to process + */ + private int toPosition = 0; + + /** + * Constructor + * + * @param seq + * the aligned sequence + * @param gapChar + */ + public AlignedCodonIterator(SequenceI seq, char gapChar) + { + this.alignedSeq = seq.getSequence(); + this.start = seq.getStart(); + this.gap = gapChar; + fromRanges = map.getFromRanges().iterator(); + toRanges = map.getToRanges().iterator(); + if (fromRanges.hasNext()) + { + currentFromRange = fromRanges.next(); + fromPosition = currentFromRange[0]; + } + if (toRanges.hasNext()) + { + currentToRange = toRanges.next(); + toPosition = currentToRange[0]; + } + } + + /** + * Returns true unless we have already traversed the whole mapping. + */ + @Override + public boolean hasNext() + { + if (fromRanges.hasNext()) + { + return true; + } + if (currentFromRange == null || fromPosition >= currentFromRange[1]) + { + return false; + } + return true; + } + + /** + * Returns the next codon's aligned positions, and translated value. + * + * @throws NoSuchElementException + * if hasNext() would have returned false + * @throws IncompleteCodonException + * if not enough mapped bases are left to make up a codon + */ + @Override + public AlignedCodon next() throws IncompleteCodonException + { + if (!hasNext()) + { + throw new NoSuchElementException(); + } + + int[] codon = getNextCodon(); + int[] alignedCodon = getAlignedCodon(codon); + + String peptide = getPeptide(); + return new AlignedCodon(alignedCodon[0], alignedCodon[1], + alignedCodon[2], peptide); + } + + /** + * Retrieve the translation as the 'mapped to' position in the mapped to + * sequence. + * + * @return + */ + private String getPeptide() + { + // TODO should ideally handle toRatio other than 1 as well... + // i.e. code like getNextCodon() + if (toPosition <= currentToRange[1]) + { + SequenceI seq = Mapping.this.to; + char pep = seq.getSequence()[toPosition - seq.getStart()]; + toPosition++; + return String.valueOf(pep); + } + if (!toRanges.hasNext()) + { + throw new NoSuchElementException("Ran out of peptide at position " + + toPosition); + } + currentToRange = toRanges.next(); + toPosition = currentToRange[0]; + return getPeptide(); + } + + /** + * Get the (base 1) dataset positions for the next codon in the mapping. + * + * @throws IncompleteCodonException + * if less than 3 remaining bases are mapped + */ + private int[] getNextCodon() + { + int[] codon = new int[3]; + int codonbase = 0; + + while (codonbase < 3) + { + if (fromPosition <= currentFromRange[1]) + { + /* + * Add next position from the current start-end range + */ + codon[codonbase++] = fromPosition++; + } + else + { + /* + * Move to the next range - if there is one + */ + if (!fromRanges.hasNext()) + { + throw new IncompleteCodonException(); + } + currentFromRange = fromRanges.next(); + fromPosition = currentFromRange[0]; + } + } + return codon; + } + + /** + * Get the aligned column positions (base 0) for the given sequence + * positions (base 1), by counting ungapped characters in the aligned + * sequence. + * + * @param codon + * @return + */ + private int[] getAlignedCodon(int[] codon) + { + int[] aligned = new int[codon.length]; + for (int i = 0; i < codon.length; i++) + { + aligned[i] = getAlignedColumn(codon[i]); + } + return aligned; + } + + /** + * Get the aligned column position (base 0) for the given sequence position + * (base 1). + * + * @param sequencePos + * @return + */ + private int getAlignedColumn(int sequencePos) + { + /* + * allow for offset e.g. treat pos 8 as 2 if sequence starts at 7 + */ + int truePos = sequencePos - (start - 1); + while (alignedBases < truePos && alignedColumn < alignedSeq.length) + { + if (alignedSeq[alignedColumn++] != gap) + { + alignedBases++; + } + } + return alignedColumn - 1; + } + + @Override + public void remove() + { + // ignore + } + + } + + /** + * Contains the start-end pairs mapping from the associated sequence to the + * sequence in the database coordinate system. It also takes care of step + * difference between coordinate systems. + */ + MapList map = null; + /** - * The seuqence that map maps the associated seuqence to (if any). + * The sequence that map maps the associated sequence to (if any). */ - SequenceI to=null; - public Mapping(MapList map) { + SequenceI to = null; + + public Mapping(MapList map) + { super(); this.map = map; } - public Mapping(SequenceI to, MapList map) { + + public Mapping(SequenceI to, MapList map) + { this(map); this.to = to; } + /** * create a new mapping from - * @param to the sequence being mapped - * @param exon int[] {start,end,start,end} series on associated sequence - * @param is int[] {start,end,...} ranges on the reference frame being mapped to - * @param i step size on associated sequence - * @param j step size on mapped frame + * + * @param to + * the sequence being mapped + * @param exon + * int[] {start,end,start,end} series on associated sequence + * @param is + * int[] {start,end,...} ranges on the reference frame being mapped + * to + * @param i + * step size on associated sequence + * @param j + * step size on mapped frame */ public Mapping(SequenceI to, int[] exon, int[] is, int i, int j) { this(to, new MapList(exon, is, i, j)); } + /** - * create a duplicate (and independent) mapping object with - * the same reference to any SequenceI being mapped to. + * create a duplicate (and independent) mapping object with the same reference + * to any SequenceI being mapped to. + * * @param map2 */ public Mapping(Mapping map2) { - if (map2!=this && map2!=null) { - if (map2.map!=null) + if (map2 != this && map2 != null) + { + if (map2.map != null) { - map=new MapList(map2.map); + map = new MapList(map2.map); } to = map2.to; } } + /** * @return the map */ - public MapList getMap() { + public MapList getMap() + { return map; } /** - * @param map the map to set + * @param map + * the map to set */ - public void setMap(MapList map) { + public void setMap(MapList map) + { this.map = map; } + /** * Equals that compares both the to references and MapList mappings. + * * @param other - * @return + * @return */ - public boolean equals(Mapping other) { - if (other==null) + @Override + public boolean equals(Object o) + { + // TODO should override Object.hashCode() to ensure that equal objects have + // equal hashcodes + if (o == null || !(o instanceof Mapping)) + { return false; - if (other==this) + } + Mapping other = (Mapping) o; + if (other == this) + { return true; - if (other.to!=to) + } + if (other.to != to) + { return false; - if ((map!=null && other.map==null) || (map==null && other.map!=null)) + } + if ((map != null && other.map == null) + || (map == null && other.map != null)) + { return false; - if (map.equals(other.map)) + } + if ((map == null && other.map == null) || map.equals(other.map)) + { return true; + } return false; } + /** - * get the 'initial' position in the associated - * sequence for a position in the mapped reference frame + * get the 'initial' position in the associated sequence for a position in the + * mapped reference frame + * * @param mpos * @return */ - public int getPosition(int mpos) + public int getPosition(int mpos) { - if (map!=null) { + if (map != null) + { int[] mp = map.shiftTo(mpos); - if (mp!=null) + if (mp != null) { return mp[0]; } } return mpos; } - public int[] getWord(int mpos) { - if (map!=null) { - int[] mp=map.shiftTo(mpos); - if (mp!=null) { - return new int[] {mp[0], mp[0]+mp[2]*(map.getFromRatio()-1)}; - } + + /** + * gets boundary in direction of mapping + * + * @param position + * in mapped reference frame + * @return int{start, end} positions in associated sequence (in direction of + * mapped word) + */ + public int[] getWord(int mpos) + { + if (map != null) + { + return map.getToWord(mpos); } return null; } + /** - * width of mapped unit in associated sequence + * width of mapped unit in associated sequence * */ - public int getWidth() { - if (map!=null) { + public int getWidth() + { + if (map != null) + { return map.getFromRatio(); } return 1; @@ -121,107 +438,271 @@ public class Mapping { /** * width of unit in mapped reference frame + * * @return */ - public int getMappedWidth() { - if (map!=null) { + public int getMappedWidth() + { + if (map != null) + { return map.getToRatio(); } return 1; } + /** - * get mapped position in the associated - * reference frame for position pos in the - * associated sequence. + * get mapped position in the associated reference frame for position pos in + * the associated sequence. + * * @param pos * @return */ - public int getMappedPosition(int pos) { - if (map!=null) { + public int getMappedPosition(int pos) + { + if (map != null) + { int[] mp = map.shiftFrom(pos); - if (mp!=null) + if (mp != null) { return mp[0]; } } return pos; } - public int[] getMappedWord(int pos) { - if (map!=null) { + + public int[] getMappedWord(int pos) + { + if (map != null) + { int[] mp = map.shiftFrom(pos); - if (mp!=null) + if (mp != null) { - return new int[] { mp[0], mp[0]+mp[2]*(map.getToRatio()-1)}; + return new int[] { mp[0], mp[0] + mp[2] * (map.getToRatio() - 1) }; } } return null; } + /** - * locates the region of feature f in the associated sequence's reference frame + * locates the region of feature f in the associated sequence's reference + * frame + * * @param f * @return one or more features corresponding to f */ public SequenceFeature[] locateFeature(SequenceFeature f) { - // this is a stopgap - features broken over exon boundaries will not be - // broken into a collection of feature fragments. - // TODO: implement creation of several features from a single feature on a discontinuously mapped seuqence - // need a function like int [] fromrange = map.getRange(from,to) - // need to make subgrouped sequence features. - if (true) { - if (map!=null) { + if (true) + { // f.getBegin()!=f.getEnd()) { + if (map != null) + { int[] frange = map.locateInFrom(f.getBegin(), f.getEnd()); - SequenceFeature[] vf = new SequenceFeature[frange.length/2]; - for (int i=0,v=0;i2) - vf[v].setDescription(f.getDescription() +"\nPart "+v); + vf[v].setEnd(frange[i + 1]); + if (frange.length > 2) + { + vf[v].setDescription(f.getDescription() + "\nPart " + (v + 1)); + } } return vf; } } - if (false){ + if (false) // else + { int[] word = getWord(f.getBegin()); - if (word[0]word[1]) + if (word[0] > word[1]) { f.setEnd(word[0]); - } else { + } + else + { f.setEnd(word[1]); } } // give up and just return the feature. return new SequenceFeature[] { f }; } - - /** - * return a series of contigs on the associated sequence corresponding to - * the from,to interval on the mapped reference frame + + /** + * return a series of contigs on the associated sequence corresponding to the + * from,to interval on the mapped reference frame + * * @param from * @param to - * @return + * @return int[] { from_i, to_i for i=1 to n contiguous regions in the + * associated sequence} */ - public int[] locateRange(int from, int to) { - //TODO - return null; + public int[] locateRange(int from, int to) + { + if (map != null) + { + if (from <= to) + { + from = (map.getToLowest() < from) ? from : map.getToLowest(); + to = (map.getToHighest() > to) ? to : map.getToHighest(); + if (from > to) + { + return null; + } + } + else + { + from = (map.getToHighest() > from) ? from : map.getToHighest(); + to = (map.getToLowest() < to) ? to : map.getToLowest(); + if (from < to) + { + return null; + } + } + return map.locateInFrom(from, to); + } + return new int[] { from, to }; } + /** - * return a series of contigs on the mapped reference frame corresponding to - * the from,to interval on the associated sequence + * return a series of mapped contigs mapped from a range on the associated + * sequence + * * @param from * @param to * @return */ - public int[] locateMappedRange(int from, int to) { - //TODO - return null; + public int[] locateMappedRange(int from, int to) + { + if (map != null) + { + + if (from <= to) + { + from = (map.getFromLowest() < from) ? from : map.getFromLowest(); + to = (map.getFromHighest() > to) ? to : map.getFromHighest(); + if (from > to) + { + return null; + } + } + else + { + from = (map.getFromHighest() > from) ? from : map.getFromHighest(); + to = (map.getFromLowest() < to) ? to : map.getFromLowest(); + if (from < to) + { + return null; + } + } + return map.locateInTo(from, to); + } + return new int[] { from, to }; + } + + /** + * return a new mapping object with a maplist modifed to only map the visible + * regions defined by viscontigs. + * + * @param viscontigs + * @return + */ + public Mapping intersectVisContigs(int[] viscontigs) + { + Mapping copy = new Mapping(this); + if (map != null) + { + int vpos = 0; + int apos = 0; + Vector toRange = new Vector(); + Vector fromRange = new Vector(); + for (int vc = 0; vc < viscontigs.length; vc += 2) + { + // find a mapped range in this visible region + int[] mpr = locateMappedRange(1 + viscontigs[vc], + viscontigs[vc + 1] - 1); + if (mpr != null) + { + for (int m = 0; m < mpr.length; m += 2) + { + toRange.addElement(new int[] { mpr[m], mpr[m + 1] }); + int[] xpos = locateRange(mpr[m], mpr[m + 1]); + for (int x = 0; x < xpos.length; x += 2) + { + fromRange.addElement(new int[] { xpos[x], xpos[x + 1] }); + } + } + } + } + int[] from = new int[fromRange.size() * 2]; + int[] to = new int[toRange.size() * 2]; + int[] r; + for (int f = 0, fSize = fromRange.size(); f < fSize; f++) + { + r = (int[]) fromRange.elementAt(f); + from[f * 2] = r[0]; + from[f * 2 + 1] = r[1]; + } + for (int f = 0, fSize = toRange.size(); f < fSize; f++) + { + r = (int[]) toRange.elementAt(f); + to[f * 2] = r[0]; + to[f * 2 + 1] = r[1]; + } + copy.setMap(new MapList(from, to, map.getFromRatio(), map + .getToRatio())); + } + return copy; + } + + /** + * get the sequence being mapped to - if any + * + * @return null or a dataset sequence + */ + public SequenceI getTo() + { + return to; + } + + /** + * set the dataset sequence being mapped to if any + * + * @param tto + */ + public void setTo(SequenceI tto) + { + to = tto; } + + /* + * (non-Javadoc) + * + * @see java.lang.Object#finalize() + */ + protected void finalize() throws Throwable + { + map = null; + to = null; + super.finalize(); + } + + public Iterator getCodonIterator(SequenceI seq, char gapChar) + { + return new AlignedCodonIterator(seq, gapChar); + } + }