2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.util.Comparison;
24 import jalview.util.MapList;
26 import java.util.ArrayList;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.NoSuchElementException;
30 import java.util.Vector;
35 * An iterator that serves the aligned codon positions (with their protein
41 public class AlignedCodonIterator implements Iterator<AlignedCodon>
44 * The gap character used in the aligned sequence
46 private final char gap;
49 * The characters of the aligned sequence e.g. "-cGT-ACgTG-"
51 private final SequenceI alignedSeq;
54 * the sequence start residue
59 * Next position (base 0) in the aligned sequence
61 private int alignedColumn = 0;
64 * Count of bases up to and including alignedColumn position
66 private int alignedBases = 0;
69 * [start, end] from ranges (base 1)
71 private Iterator<int[]> fromRanges;
74 * [start, end] to ranges (base 1)
76 private Iterator<int[]> toRanges;
79 * The current [start, end] (base 1) from range
81 private int[] currentFromRange = null;
84 * The current [start, end] (base 1) to range
86 private int[] currentToRange = null;
89 * The next 'from' position (base 1) to process
91 private int fromPosition = 0;
94 * The next 'to' position (base 1) to process
96 private int toPosition = 0;
102 * the aligned sequence
105 public AlignedCodonIterator(SequenceI seq, char gapChar)
107 this.alignedSeq = seq;
108 this.start = seq.getStart();
110 fromRanges = map.getFromRanges().iterator();
111 toRanges = map.getToRanges().iterator();
112 if (fromRanges.hasNext())
114 currentFromRange = fromRanges.next();
115 fromPosition = currentFromRange[0];
117 if (toRanges.hasNext())
119 currentToRange = toRanges.next();
120 toPosition = currentToRange[0];
125 * Returns true unless we have already traversed the whole mapping.
128 public boolean hasNext()
130 if (fromRanges.hasNext())
134 if (currentFromRange == null || fromPosition >= currentFromRange[1])
142 * Returns the next codon's aligned positions, and translated value.
144 * @throws NoSuchElementException
145 * if hasNext() would have returned false
146 * @throws IncompleteCodonException
147 * if not enough mapped bases are left to make up a codon
150 public AlignedCodon next() throws IncompleteCodonException
154 throw new NoSuchElementException();
157 int[] codon = getNextCodon();
158 int[] alignedCodon = getAlignedCodon(codon);
160 String peptide = getPeptide();
161 int peptideCol = toPosition - 1 - Mapping.this.to.getStart();
162 return new AlignedCodon(alignedCodon[0], alignedCodon[1],
163 alignedCodon[2], peptide, peptideCol);
167 * Retrieve the translation as the 'mapped to' position in the mapped to
171 * @throws NoSuchElementException
172 * if the 'toRange' is exhausted (nothing to map to)
174 private String getPeptide()
176 // TODO should ideally handle toRatio other than 1 as well...
177 // i.e. code like getNextCodon()
178 if (toPosition <= currentToRange[1])
180 SequenceI seq = Mapping.this.to;
181 char pep = seq.getCharAt(toPosition - seq.getStart());
183 return String.valueOf(pep);
185 if (!toRanges.hasNext())
187 throw new NoSuchElementException(
188 "Ran out of peptide at position " + toPosition);
190 currentToRange = toRanges.next();
191 toPosition = currentToRange[0];
196 * Get the (base 1) dataset positions for the next codon in the mapping.
198 * @throws IncompleteCodonException
199 * if less than 3 remaining bases are mapped
201 private int[] getNextCodon()
203 int[] codon = new int[3];
206 while (codonbase < 3)
208 if (fromPosition <= currentFromRange[1])
211 * Add next position from the current start-end range
213 codon[codonbase++] = fromPosition++;
218 * Move to the next range - if there is one
220 if (!fromRanges.hasNext())
222 throw new IncompleteCodonException();
224 currentFromRange = fromRanges.next();
225 fromPosition = currentFromRange[0];
232 * Get the aligned column positions (base 0) for the given sequence
233 * positions (base 1), by counting ungapped characters in the aligned
239 private int[] getAlignedCodon(int[] codon)
241 int[] aligned = new int[codon.length];
242 for (int i = 0; i < codon.length; i++)
244 aligned[i] = getAlignedColumn(codon[i]);
250 * Get the aligned column position (base 0) for the given sequence position
256 private int getAlignedColumn(int sequencePos)
259 * allow for offset e.g. treat pos 8 as 2 if sequence starts at 7
261 int truePos = sequencePos - (start - 1);
262 int length = alignedSeq.getLength();
263 while (alignedBases < truePos && alignedColumn < length)
265 char c = alignedSeq.getCharAt(alignedColumn++);
266 if (c != gap && !Comparison.isGap(c))
271 return alignedColumn - 1;
283 * Contains the start-end pairs mapping from the associated sequence to the
284 * sequence in the database coordinate system. It also takes care of step
285 * difference between coordinate systems.
290 * The sequence that map maps the associated sequence to (if any).
295 * optional sequence id for the 'from' ranges
297 private String mappedFromId;
299 public Mapping(MapList map)
305 public Mapping(SequenceI to, MapList map)
312 * create a new mapping from
315 * the sequence being mapped
317 * int[] {start,end,start,end} series on associated sequence
319 * int[] {start,end,...} ranges on the reference frame being mapped
322 * step size on associated sequence
324 * step size on mapped frame
326 public Mapping(SequenceI to, int[] exon, int[] is, int i, int j)
328 this(to, new MapList(exon, is, i, j));
332 * create a duplicate (and independent) mapping object with the same reference
333 * to any SequenceI being mapped to.
337 public Mapping(Mapping map2)
339 if (map2 != this && map2 != null)
341 if (map2.map != null)
343 map = new MapList(map2.map);
346 mappedFromId = map2.mappedFromId;
353 public MapList getMap()
362 public void setMap(MapList map)
368 * Equals that compares both the to references and MapList mappings.
372 * @see MapList#equals
375 public boolean equals(Object o)
377 if (o == null || !(o instanceof Mapping))
381 Mapping other = (Mapping) o;
390 if ((map != null && other.map == null)
391 || (map == null && other.map != null))
395 if ((map == null && other.map == null) || map.equals(other.map))
403 * Returns a hashCode made from the sequence and maplist
406 public int hashCode()
408 int hashCode = (this.to == null ? 1 : this.to.hashCode());
409 if (this.map != null)
411 hashCode = hashCode * 31 + this.map.hashCode();
418 // * gets boundary in direction of mapping
421 // * in mapped reference frame
422 // * @return int{start, end} positions in associated sequence (in direction of
425 // public int[] getWord(int mpos)
427 // // BH never called
430 // return map.getToWord(mpos);
436 * width of mapped unit in associated sequence
439 public int getWidth()
443 return map.getFromRatio();
449 * width of unit in mapped reference frame
453 public int getMappedWidth()
457 return map.getToRatio();
463 * get the 'initial' position in the associated sequence for a position in the
464 * mapped reference frame
466 * or the mapped position in the associated reference frame for position pos in
467 * the associated sequence.
470 * @param reg reg[POS]
473 * @return position or mapped position
475 public int getPosition(int[] reg, boolean isMapped)
477 int pos = reg[MapList.POS];
480 reg = (isMapped ? map.shiftFrom(reg) : map.shiftTo(reg));
483 return reg[MapList.POS_TO]; // was newArray[0], but shift puts the result in COUNT_TO
490 //* get mapped position in the associated reference frame for position pos in
491 //* the associated sequence.
496 // public int getMappedPosition(int[] reg)
498 // int mpos = reg[MapList.POS];
501 // reg = map.shiftFrom(reg);
504 // return reg[MapList.POS_TO]; // was newArray[0], but shift puts the result in COUNT_TO
510 // public int[] getMappedWord(int pos)
515 // reg = map.shiftFrom(reg);
519 // return new int[] { mp[0], mp[0] + mp[2] * (map.getToRatio() - 1) };
526 * locates the region of feature f in the associated sequence's reference
530 * @return one or more features corresponding to f
532 public SequenceFeature[] locateFeature(SequenceFeature f)
535 { // f.getBegin()!=f.getEnd()) {
538 int[] frange = map.locateInFrom(f.getBegin(), f.getEnd());
541 // JBPNote - this isprobably not the right thing to doJBPHack
544 SequenceFeature[] vf = new SequenceFeature[frange.length / 2];
545 for (int i = 0, v = 0; i < frange.length; i += 2, v++)
547 vf[v] = new SequenceFeature(f, frange[i], frange[i + 1],
548 f.getFeatureGroup(), f.getScore());
549 if (frange.length > 2)
551 vf[v].setDescription(f.getDescription() + "\nPart " + (v + 1));
558 // give up and just return the feature.
559 return new SequenceFeature[] { f };
563 * return a series of contigs on the associated sequence corresponding to the
564 * from,to interval on the mapped reference frame
568 * @return int[] { from_i, to_i for i=1 to n contiguous regions in the
569 * associated sequence}
571 public int[] locateRange(int from, int to)
577 from = (map.getToLowest() < from) ? from : map.getToLowest();
578 to = (map.getToHighest() > to) ? to : map.getToHighest();
586 from = (map.getToHighest() > from) ? from : map.getToHighest();
587 to = (map.getToLowest() < to) ? to : map.getToLowest();
593 return map.locateInFrom(from, to);
595 return new int[] { from, to };
599 * return a series of mapped contigs mapped from a range on the associated
606 public int[] locateMappedRange(int from, int to)
613 from = (map.getFromLowest() < from) ? from : map.getFromLowest();
614 to = (map.getFromHighest() > to) ? to : map.getFromHighest();
622 from = (map.getFromHighest() > from) ? from : map.getFromHighest();
623 to = (map.getFromLowest() < to) ? to : map.getFromLowest();
629 return map.locateInTo(from, to);
631 return new int[] { from, to };
635 * return a new mapping object with a maplist modifed to only map the visible
636 * regions defined by viscontigs.
641 public Mapping intersectVisContigs(int[] viscontigs)
643 Mapping copy = new Mapping(this);
648 List<int[]> toRange = new ArrayList<int[]>();
649 List<int[]> fromRange = new ArrayList<int[]>();
650 for (int vc = 0; vc < viscontigs.length; vc += 2)
652 // find a mapped range in this visible region
653 int[] mpr = locateMappedRange(1 + viscontigs[vc],
654 viscontigs[vc + 1] - 1);
657 for (int m = 0; m < mpr.length; m += 2)
659 toRange.add(new int[] { mpr[m], mpr[m + 1] });
660 int[] xpos = locateRange(mpr[m], mpr[m + 1]);
661 for (int x = 0; x < xpos.length; x += 2)
663 fromRange.add(new int[] { xpos[x], xpos[x + 1] });
668 int[] from = new int[fromRange.size() * 2];
669 int[] to = new int[toRange.size() * 2];
671 for (int f = 0, fSize = fromRange.size(); f < fSize; f++)
673 r = fromRange.get(f);
675 from[f * 2 + 1] = r[1];
677 for (int f = 0, fSize = toRange.size(); f < fSize; f++)
681 to[f * 2 + 1] = r[1];
684 new MapList(from, to, map.getFromRatio(), map.getToRatio()));
690 * get the sequence being mapped to - if any
692 * @return null or a dataset sequence
694 public SequenceI getTo()
700 * set the dataset sequence being mapped to if any
704 public void setTo(SequenceI tto)
710 * Returns an iterator which can serve up the aligned codon column positions
711 * and their corresponding peptide products
714 * an aligned (i.e. possibly gapped) sequence
718 public Iterator<AlignedCodon> getCodonIterator(SequenceI seq,
721 return new AlignedCodonIterator(seq, gapChar);
725 * Readable representation for debugging only, not guaranteed not to change
728 public String toString()
730 return String.format("%s %s", this.map.toString(),
731 this.to == null ? "" : this.to.getName());
735 * Returns the identifier for the 'from' range sequence, or null if not set
739 public String getMappedFromId()
745 * Sets the identifier for the 'from' range sequence
747 public void setMappedFromId(String mappedFromId)
749 this.mappedFromId = mappedFromId;