/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.datamodel;
import jalview.util.Comparison;
import jalview.util.MapList;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Vector;
public class Mapping
{
/**
* An iterator that serves the aligned codon positions (with their protein
* products).
*
* @author gmcarstairs
*
*/
public class AlignedCodonIterator implements Iterator
{
/*
* The gap character used in the aligned sequence
*/
private final char gap;
/*
* The characters of the aligned sequence e.g. "-cGT-ACgTG-"
*/
private final SequenceI alignedSeq;
/*
* the sequence start residue
*/
private int start;
/*
* Next position (base 0) in the aligned sequence
*/
private int alignedColumn = 0;
/*
* Count of bases up to and including alignedColumn position
*/
private int alignedBases = 0;
/*
* [start, end] from ranges (base 1)
*/
private Iterator fromRanges;
/*
* [start, end] to ranges (base 1)
*/
private Iterator toRanges;
/*
* The current [start, end] (base 1) from range
*/
private int[] currentFromRange = null;
/*
* The current [start, end] (base 1) to range
*/
private int[] currentToRange = null;
/*
* The next 'from' position (base 1) to process
*/
private int fromPosition = 0;
/*
* The next 'to' position (base 1) to process
*/
private int toPosition = 0;
/**
* Constructor
*
* @param seq
* the aligned sequence
* @param gapChar
*/
public AlignedCodonIterator(SequenceI seq, char gapChar)
{
this.alignedSeq = seq;
this.start = seq.getStart();
this.gap = gapChar;
fromRanges = map.getFromRanges().iterator();
toRanges = map.getToRanges().iterator();
if (fromRanges.hasNext())
{
currentFromRange = fromRanges.next();
fromPosition = currentFromRange[0];
}
if (toRanges.hasNext())
{
currentToRange = toRanges.next();
toPosition = currentToRange[0];
}
}
/**
* Returns true unless we have already traversed the whole mapping.
*/
@Override
public boolean hasNext()
{
if (fromRanges.hasNext())
{
return true;
}
if (currentFromRange == null || fromPosition >= currentFromRange[1])
{
return false;
}
return true;
}
/**
* Returns the next codon's aligned positions, and translated value.
*
* @throws NoSuchElementException
* if hasNext() would have returned false
* @throws IncompleteCodonException
* if not enough mapped bases are left to make up a codon
*/
@Override
public AlignedCodon next() throws IncompleteCodonException
{
if (!hasNext())
{
throw new NoSuchElementException();
}
int[] codon = getNextCodon();
int[] alignedCodon = getAlignedCodon(codon);
String peptide = getPeptide();
int peptideCol = toPosition - 1 - Mapping.this.to.getStart();
return new AlignedCodon(alignedCodon[0], alignedCodon[1],
alignedCodon[2], peptide, peptideCol);
}
/**
* Retrieve the translation as the 'mapped to' position in the mapped to
* sequence.
*
* @return
* @throws NoSuchElementException
* if the 'toRange' is exhausted (nothing to map to)
*/
private String getPeptide()
{
// TODO should ideally handle toRatio other than 1 as well...
// i.e. code like getNextCodon()
if (toPosition <= currentToRange[1])
{
SequenceI seq = Mapping.this.to;
char pep = seq.getCharAt(toPosition - seq.getStart());
toPosition++;
return String.valueOf(pep);
}
if (!toRanges.hasNext())
{
throw new NoSuchElementException(
"Ran out of peptide at position " + toPosition);
}
currentToRange = toRanges.next();
toPosition = currentToRange[0];
return getPeptide();
}
/**
* Get the (base 1) dataset positions for the next codon in the mapping.
*
* @throws IncompleteCodonException
* if less than 3 remaining bases are mapped
*/
private int[] getNextCodon()
{
int[] codon = new int[3];
int codonbase = 0;
while (codonbase < 3)
{
if (fromPosition <= currentFromRange[1])
{
/*
* Add next position from the current start-end range
*/
codon[codonbase++] = fromPosition++;
}
else
{
/*
* Move to the next range - if there is one
*/
if (!fromRanges.hasNext())
{
throw new IncompleteCodonException();
}
currentFromRange = fromRanges.next();
fromPosition = currentFromRange[0];
}
}
return codon;
}
/**
* Get the aligned column positions (base 0) for the given sequence
* positions (base 1), by counting ungapped characters in the aligned
* sequence.
*
* @param codon
* @return
*/
private int[] getAlignedCodon(int[] codon)
{
int[] aligned = new int[codon.length];
for (int i = 0; i < codon.length; i++)
{
aligned[i] = getAlignedColumn(codon[i]);
}
return aligned;
}
/**
* Get the aligned column position (base 0) for the given sequence position
* (base 1).
*
* @param sequencePos
* @return
*/
private int getAlignedColumn(int sequencePos)
{
/*
* allow for offset e.g. treat pos 8 as 2 if sequence starts at 7
*/
int truePos = sequencePos - (start - 1);
int length = alignedSeq.getLength();
while (alignedBases < truePos && alignedColumn < length)
{
char c = alignedSeq.getCharAt(alignedColumn++);
if (c != gap && !Comparison.isGap(c))
{
alignedBases++;
}
}
return alignedColumn - 1;
}
@Override
public void remove()
{
// ignore
}
}
/*
* Contains the start-end pairs mapping from the associated sequence to the
* sequence in the database coordinate system. It also takes care of step
* difference between coordinate systems.
*/
MapList map = null;
/*
* The sequence that map maps the associated sequence to (if any).
*/
SequenceI to = null;
/*
* optional sequence id for the 'from' ranges
*/
private String mappedFromId;
public Mapping(MapList map)
{
super();
this.map = map;
}
public Mapping(SequenceI to, MapList map)
{
this(map);
this.to = to;
}
/**
* create a new mapping from
*
* @param to
* the sequence being mapped
* @param exon
* int[] {start,end,start,end} series on associated sequence
* @param is
* int[] {start,end,...} ranges on the reference frame being mapped
* to
* @param i
* step size on associated sequence
* @param j
* step size on mapped frame
*/
public Mapping(SequenceI to, int[] exon, int[] is, int i, int j)
{
this(to, new MapList(exon, is, i, j));
}
/**
* create a duplicate (and independent) mapping object with the same reference
* to any SequenceI being mapped to.
*
* @param map2
*/
public Mapping(Mapping map2)
{
if (map2 != this && map2 != null)
{
if (map2.map != null)
{
map = new MapList(map2.map);
}
to = map2.to;
mappedFromId = map2.mappedFromId;
}
}
/**
* @return the map
*/
public MapList getMap()
{
return map;
}
/**
* @param map
* the map to set
*/
public void setMap(MapList map)
{
this.map = map;
}
/**
* Equals that compares both the to references and MapList mappings.
*
* @param o
* @return
* @see MapList#equals
*/
@Override
public boolean equals(Object o)
{
if (o == null || !(o instanceof Mapping))
{
return false;
}
Mapping other = (Mapping) o;
if (other == this)
{
return true;
}
if (other.to != to)
{
return false;
}
if ((map != null && other.map == null)
|| (map == null && other.map != null))
{
return false;
}
if ((map == null && other.map == null) || map.equals(other.map))
{
return true;
}
return false;
}
/**
* Returns a hashCode made from the sequence and maplist
*/
@Override
public int hashCode()
{
int hashCode = (this.to == null ? 1 : this.to.hashCode());
if (this.map != null)
{
hashCode = hashCode * 31 + this.map.hashCode();
}
return hashCode;
}
// /**
// * gets boundary in direction of mapping
// *
// * @param position
// * in mapped reference frame
// * @return int{start, end} positions in associated sequence (in direction of
// * mapped word)
// */
// public int[] getWord(int mpos)
// {
// // BH never called
// if (map != null)
// {
// return map.getToWord(mpos);
// }
// return null;
// }
/**
* width of mapped unit in associated sequence
*
*/
public int getWidth()
{
if (map != null)
{
return map.getFromRatio();
}
return 1;
}
/**
* width of unit in mapped reference frame
*
* @return
*/
public int getMappedWidth()
{
if (map != null)
{
return map.getToRatio();
}
return 1;
}
/**
* get the 'initial' position in the associated sequence for a position in the
* mapped reference frame
*
* or the mapped position in the associated reference frame for position pos in
* the associated sequence.
*
*
* @param reg reg[POS]
* @param isMapped
*
* @return position or mapped position
*/
public int getPosition(int[] reg, boolean isMapped)
{
int pos = reg[MapList.POS];
if (map != null)
{
reg = (isMapped ? map.shiftFrom(reg) : map.shiftTo(reg));
if (reg != null)
{
return reg[MapList.POS_TO]; // was newArray[0], but shift puts the result in COUNT_TO
}
}
return pos;
}
// /**
//* get mapped position in the associated reference frame for position pos in
//* the associated sequence.
// *
// * @param pos
// * @return
// */
// public int getMappedPosition(int[] reg)
// {
// int mpos = reg[MapList.POS];
// if (map != null)
// {
// reg = map.shiftFrom(reg);
// if (reg != null)
// {
// return reg[MapList.POS_TO]; // was newArray[0], but shift puts the result in COUNT_TO
// }
// }
// return mpos;
// }
// public int[] getMappedWord(int pos)
// {
// // BH Not used?
// if (map != null)
// {
// reg = map.shiftFrom(reg);
// if (reg != null)
// {
// reg[MP_0] =
// return new int[] { mp[0], mp[0] + mp[2] * (map.getToRatio() - 1) };
// }
// }
// return null;
// }
/**
* locates the region of feature f in the associated sequence's reference
* frame
*
* @param f
* @return one or more features corresponding to f
*/
public SequenceFeature[] locateFeature(SequenceFeature f)
{
if (true)
{ // f.getBegin()!=f.getEnd()) {
if (map != null)
{
int[] frange = map.locateInFrom(f.getBegin(), f.getEnd());
if (frange == null)
{
// JBPNote - this isprobably not the right thing to doJBPHack
return null;
}
SequenceFeature[] vf = new SequenceFeature[frange.length / 2];
for (int i = 0, v = 0; i < frange.length; i += 2, v++)
{
vf[v] = new SequenceFeature(f, frange[i], frange[i + 1],
f.getFeatureGroup(), f.getScore());
if (frange.length > 2)
{
vf[v].setDescription(f.getDescription() + "\nPart " + (v + 1));
}
}
return vf;
}
}
// give up and just return the feature.
return new SequenceFeature[] { f };
}
/**
* return a series of contigs on the associated sequence corresponding to the
* from,to interval on the mapped reference frame
*
* @param from
* @param to
* @return int[] { from_i, to_i for i=1 to n contiguous regions in the
* associated sequence}
*/
public int[] locateRange(int from, int to)
{
if (map != null)
{
if (from <= to)
{
from = (map.getToLowest() < from) ? from : map.getToLowest();
to = (map.getToHighest() > to) ? to : map.getToHighest();
if (from > to)
{
return null;
}
}
else
{
from = (map.getToHighest() > from) ? from : map.getToHighest();
to = (map.getToLowest() < to) ? to : map.getToLowest();
if (from < to)
{
return null;
}
}
return map.locateInFrom(from, to);
}
return new int[] { from, to };
}
/**
* return a series of mapped contigs mapped from a range on the associated
* sequence
*
* @param from
* @param to
* @return
*/
public int[] locateMappedRange(int from, int to)
{
if (map != null)
{
if (from <= to)
{
from = (map.getFromLowest() < from) ? from : map.getFromLowest();
to = (map.getFromHighest() > to) ? to : map.getFromHighest();
if (from > to)
{
return null;
}
}
else
{
from = (map.getFromHighest() > from) ? from : map.getFromHighest();
to = (map.getFromLowest() < to) ? to : map.getFromLowest();
if (from < to)
{
return null;
}
}
return map.locateInTo(from, to);
}
return new int[] { from, to };
}
/**
* return a new mapping object with a maplist modifed to only map the visible
* regions defined by viscontigs.
*
* @param viscontigs
* @return
*/
public Mapping intersectVisContigs(int[] viscontigs)
{
Mapping copy = new Mapping(this);
if (map != null)
{
// int vpos = 0;
// int apos = 0;
List toRange = new ArrayList();
List fromRange = new ArrayList();
for (int vc = 0; vc < viscontigs.length; vc += 2)
{
// find a mapped range in this visible region
int[] mpr = locateMappedRange(1 + viscontigs[vc],
viscontigs[vc + 1] - 1);
if (mpr != null)
{
for (int m = 0; m < mpr.length; m += 2)
{
toRange.add(new int[] { mpr[m], mpr[m + 1] });
int[] xpos = locateRange(mpr[m], mpr[m + 1]);
for (int x = 0; x < xpos.length; x += 2)
{
fromRange.add(new int[] { xpos[x], xpos[x + 1] });
}
}
}
}
int[] from = new int[fromRange.size() * 2];
int[] to = new int[toRange.size() * 2];
int[] r;
for (int f = 0, fSize = fromRange.size(); f < fSize; f++)
{
r = fromRange.get(f);
from[f * 2] = r[0];
from[f * 2 + 1] = r[1];
}
for (int f = 0, fSize = toRange.size(); f < fSize; f++)
{
r = toRange.get(f);
to[f * 2] = r[0];
to[f * 2 + 1] = r[1];
}
copy.setMap(
new MapList(from, to, map.getFromRatio(), map.getToRatio()));
}
return copy;
}
/**
* get the sequence being mapped to - if any
*
* @return null or a dataset sequence
*/
public SequenceI getTo()
{
return to;
}
/**
* set the dataset sequence being mapped to if any
*
* @param tto
*/
public void setTo(SequenceI tto)
{
to = tto;
}
/**
* Returns an iterator which can serve up the aligned codon column positions
* and their corresponding peptide products
*
* @param seq
* an aligned (i.e. possibly gapped) sequence
* @param gapChar
* @return
*/
public Iterator getCodonIterator(SequenceI seq,
char gapChar)
{
return new AlignedCodonIterator(seq, gapChar);
}
/**
* Readable representation for debugging only, not guaranteed not to change
*/
@Override
public String toString()
{
return String.format("%s %s", this.map.toString(),
this.to == null ? "" : this.to.getName());
}
/**
* Returns the identifier for the 'from' range sequence, or null if not set
*
* @return
*/
public String getMappedFromId()
{
return mappedFromId;
}
/**
* Sets the identifier for the 'from' range sequence
*/
public void setMappedFromId(String mappedFromId)
{
this.mappedFromId = mappedFromId;
}
}