import java.util.Arrays;
import java.util.Hashtable;
import java.util.List;
-import java.util.Set;
/**
* Takes in a vector or array of sequences and column start and column end and
Hashtable[] hconsensus)
{
final char gapCharacter = alignment.getGapCharacter();
- Set<AlignedCodonFrame> mappings = alignment.getCodonFrames();
+ List<AlignedCodonFrame> mappings = alignment.getCodonFrames();
if (mappings == null || mappings.isEmpty())
{
return;
{
continue;
}
- char[] codon = MappingUtils.findCodonFor(seq, col, mappings);
- int codonEncoded = CodingUtils.encodeCodon(codon);
- if (codonEncoded >= 0)
+ List<char[]> codons = MappingUtils
+ .findCodonsFor(seq, col, mappings);
+ for (char[] codon : codons)
{
- codonCounts[codonEncoded + 2]++;
- ungappedCount++;
+ int codonEncoded = CodingUtils.encodeCodon(codon);
+ if (codonEncoded >= 0)
+ {
+ codonCounts[codonEncoded + 2]++;
+ ungappedCount++;
+ }
}
}
codonCounts[1] = ungappedCount;
public class AlignedCodonFrame
{
- /**
- * tied array of na Sequence objects.
+ /*
+ * Data bean to hold mappings from one sequence to another
*/
- private SequenceI[] dnaSeqs = null;
+ private class SequenceToSequenceMapping
+ {
+ private SequenceI fromSeq;
- /**
- * tied array of Mappings to protein sequence Objects and SequenceI[]
- * aaSeqs=null; MapLists where each maps from the corresponding dnaSeqs
- * element to corresponding aaSeqs element
- */
- private Mapping[] dnaToProt = null;
+ private Mapping mapping;
+
+ SequenceToSequenceMapping(SequenceI from, Mapping map)
+ {
+ this.fromSeq = from;
+ this.mapping = map;
+ }
+ }
+
+ private List<SequenceToSequenceMapping> mappings;
/**
* Constructor
*/
public AlignedCodonFrame()
{
+ mappings = new ArrayList<SequenceToSequenceMapping>();
}
/**
*/
public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map)
{
- int nlen = 1;
- if (dnaSeqs != null)
- {
- nlen = dnaSeqs.length + 1;
- }
- SequenceI[] ndna = new SequenceI[nlen];
- Mapping[] ndtp = new Mapping[nlen];
- if (dnaSeqs != null)
- {
- System.arraycopy(dnaSeqs, 0, ndna, 0, dnaSeqs.length);
- System.arraycopy(dnaToProt, 0, ndtp, 0, dnaSeqs.length);
- }
- dnaSeqs = ndna;
- dnaToProt = ndtp;
- nlen--;
- dnaSeqs[nlen] = (dnaseq.getDatasetSequence() == null) ? dnaseq : dnaseq
- .getDatasetSequence();
- Mapping mp = new Mapping(map);
// JBPNote DEBUG! THIS !
// dnaseq.transferAnnotation(aaseq, mp);
// aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse()));
- mp.to = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq
+
+ SequenceI fromSeq = (dnaseq.getDatasetSequence() == null) ? dnaseq
+ : dnaseq.getDatasetSequence();
+ SequenceI toSeq = (aaseq.getDatasetSequence() == null) ? aaseq : aaseq
.getDatasetSequence();
- dnaToProt[nlen] = mp;
+
+ /*
+ * if we already hold a mapping between these sequences, just add to it
+ */
+ for (SequenceToSequenceMapping ssm : mappings)
+ {
+ if (ssm.fromSeq == fromSeq && ssm.mapping.to == toSeq)
+ {
+ ssm.mapping.map.addMapList(map);
+ return;
+ }
+ }
+
+ /*
+ * otherwise, add a new sequence mapping
+ */
+ Mapping mp = new Mapping(toSeq, map);
+ mappings.add(new SequenceToSequenceMapping(fromSeq, mp));
}
public SequenceI[] getdnaSeqs()
{
- return dnaSeqs;
+ // TODO return a list instead?
+ // return dnaSeqs;
+ List<SequenceI> seqs = new ArrayList<SequenceI>();
+ for (SequenceToSequenceMapping ssm : mappings)
+ {
+ seqs.add(ssm.fromSeq);
+ }
+ return seqs.toArray(new SequenceI[seqs.size()]);
}
public SequenceI[] getAaSeqs()
{
- if (dnaToProt == null)
+ // TODO not used - remove?
+ List<SequenceI> seqs = new ArrayList<SequenceI>();
+ for (SequenceToSequenceMapping ssm : mappings)
{
- return null;
- }
- SequenceI[] sqs = new SequenceI[dnaToProt.length];
- for (int sz = 0; sz < dnaToProt.length; sz++)
- {
- sqs[sz] = dnaToProt[sz].to;
+ seqs.add(ssm.mapping.to);
}
- return sqs;
+ return seqs.toArray(new SequenceI[seqs.size()]);
}
public MapList[] getdnaToProt()
{
- if (dnaToProt == null)
+ List<MapList> maps = new ArrayList<MapList>();
+ for (SequenceToSequenceMapping ssm : mappings)
{
- return null;
+ maps.add(ssm.mapping.map);
}
- MapList[] sqs = new MapList[dnaToProt.length];
- for (int sz = 0; sz < dnaToProt.length; sz++)
- {
- sqs[sz] = dnaToProt[sz].map;
- }
- return sqs;
+ return maps.toArray(new MapList[maps.size()]);
}
public Mapping[] getProtMappings()
{
- return dnaToProt;
+ List<Mapping> maps = new ArrayList<Mapping>();
+ for (SequenceToSequenceMapping ssm : mappings)
+ {
+ maps.add(ssm.mapping);
+ }
+ return maps.toArray(new Mapping[maps.size()]);
}
/**
*/
public Mapping getMappingForSequence(SequenceI seq)
{
- if (dnaSeqs == null)
- {
- return null;
- }
SequenceI seqDs = seq.getDatasetSequence();
seqDs = seqDs != null ? seqDs : seq;
- for (int ds = 0; ds < dnaSeqs.length; ds++)
+ for (SequenceToSequenceMapping ssm : mappings)
{
- if (dnaSeqs[ds] == seqDs || dnaToProt[ds].to == seqDs)
+ if (ssm.fromSeq == seqDs || ssm.mapping.to == seqDs)
{
- return dnaToProt[ds];
+ return ssm.mapping;
}
}
return null;
*/
public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef)
{
- if (dnaSeqs == null)
- {
- return null;
- }
SequenceI dnads = dnaSeqRef.getDatasetSequence();
- for (int ds = 0; ds < dnaSeqs.length; ds++)
+ for (SequenceToSequenceMapping ssm : mappings)
{
- if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads)
+ if (ssm.fromSeq == dnaSeqRef || ssm.fromSeq == dnads)
{
- return dnaToProt[ds].to;
+ return ssm.mapping.to;
}
}
return null;
*/
public SequenceI getDnaForAaSeq(SequenceI aaSeqRef)
{
- if (dnaToProt == null)
- {
- return null;
- }
SequenceI aads = aaSeqRef.getDatasetSequence();
- for (int as = 0; as < dnaToProt.length; as++)
+ for (SequenceToSequenceMapping ssm : mappings)
{
- if (dnaToProt[as].to == aaSeqRef || dnaToProt[as].to == aads)
+ if (ssm.mapping.to == aaSeqRef || ssm.mapping.to == aads)
{
- return dnaSeqs[as];
+ return ssm.fromSeq;
}
}
return null;
public void markMappedRegion(SequenceI seq, int index,
SearchResults results)
{
- if (dnaToProt == null)
- {
- return;
- }
int[] codon;
SequenceI ds = seq.getDatasetSequence();
- for (int mi = 0; mi < dnaToProt.length; mi++)
+ for (SequenceToSequenceMapping ssm : mappings)
{
- if (dnaSeqs[mi] == seq || dnaSeqs[mi] == ds)
+ if (ssm.fromSeq == seq || ssm.fromSeq == ds)
{
- // DEBUG System.err.println("dna pos "+index);
- codon = dnaToProt[mi].map.locateInTo(index, index);
+ codon = ssm.mapping.map.locateInTo(index, index);
if (codon != null)
{
for (int i = 0; i < codon.length; i += 2)
{
- results.addResult(dnaToProt[mi].to, codon[i], codon[i + 1]);
+ results.addResult(ssm.mapping.to, codon[i], codon[i + 1]);
}
}
}
- else if (dnaToProt[mi].to == seq || dnaToProt[mi].to == ds)
+ else if (ssm.mapping.to == seq || ssm.mapping.to == ds)
{
- // DEBUG System.err.println("aa pos "+index);
{
- codon = dnaToProt[mi].map.locateInFrom(index, index);
+ codon = ssm.mapping.map.locateInFrom(index, index);
if (codon != null)
{
for (int i = 0; i < codon.length; i += 2)
{
- results.addResult(dnaSeqs[mi], codon[i], codon[i + 1]);
+ results.addResult(ssm.fromSeq, codon[i], codon[i + 1]);
}
}
}
* Adapted from markMappedRegion().
*/
MapList ml = null;
- for (int i = 0; i < dnaToProt.length; i++)
+ int i = 0;
+ for (SequenceToSequenceMapping ssm : mappings)
{
- if (dnaSeqs[i] == seq)
+ if (ssm.fromSeq == seq)
{
ml = getdnaToProt()[i];
break;
}
+ i++;
}
return ml == null ? null : ml.locateInFrom(aaPos, aaPos);
}
/*
* Search mapped protein ('to') sequences first.
*/
- if (this.dnaToProt != null)
+ for (SequenceToSequenceMapping ssm : mappings)
{
- for (int i = 0; i < dnaToProt.length; i++)
+ if (ssm.fromSeq == seq)
{
- if (this.dnaSeqs[i] == seq)
+ for (SequenceI sourceAligned : al.getSequences())
{
- for (SequenceI sourceAligned : al.getSequences())
+ if (ssm.mapping.to == sourceAligned.getDatasetSequence())
{
- if (this.dnaToProt[i].to == sourceAligned.getDatasetSequence())
- {
- return sourceAligned;
- }
+ return sourceAligned;
}
}
}
/*
* Then try mapped dna sequences.
*/
- if (this.dnaToProt != null)
+ for (SequenceToSequenceMapping ssm : mappings)
{
- for (int i = 0; i < dnaToProt.length; i++)
+ if (ssm.mapping.to == seq)
{
- if (this.dnaToProt[i].to == seq)
+ for (SequenceI sourceAligned : al.getSequences())
{
- for (SequenceI sourceAligned : al.getSequences())
+ if (ssm.fromSeq == sourceAligned.getDatasetSequence())
{
- if (this.dnaSeqs[i] == sourceAligned.getDatasetSequence())
- {
- return sourceAligned;
- }
+ return sourceAligned;
}
}
}
}
/**
- * Returns the region in the 'mappedFrom' sequence's dataset that is mapped to
- * position 'pos' (base 1) in the 'mappedTo' sequence's dataset. The region is
- * a set of start/end position pairs.
+ * Returns the region in the target sequence's dataset that is mapped to the
+ * given position (base 1) in the query sequence's dataset. The region is a
+ * set of start/end position pairs.
*
- * @param mappedFrom
- * @param mappedTo
- * @param pos
+ * @param target
+ * @param query
+ * @param queryPos
* @return
*/
- public int[] getMappedRegion(SequenceI mappedFrom, SequenceI mappedTo,
- int pos)
+ public int[] getMappedRegion(SequenceI target, SequenceI query,
+ int queryPos)
{
- SequenceI targetDs = mappedFrom.getDatasetSequence() == null ? mappedFrom
- : mappedFrom.getDatasetSequence();
- SequenceI sourceDs = mappedTo.getDatasetSequence() == null ? mappedTo
- : mappedTo.getDatasetSequence();
- if (targetDs == null || sourceDs == null || dnaToProt == null)
+ SequenceI targetDs = target.getDatasetSequence() == null ? target
+ : target.getDatasetSequence();
+ SequenceI queryDs = query.getDatasetSequence() == null ? query : query
+ .getDatasetSequence();
+ if (targetDs == null || queryDs == null /*|| dnaToProt == null*/)
{
return null;
}
- for (int mi = 0; mi < dnaToProt.length; mi++)
+ for (SequenceToSequenceMapping ssm : mappings)
{
- if (dnaSeqs[mi] == targetDs && dnaToProt[mi].to == sourceDs)
+ /*
+ * try mapping from target to query
+ */
+ if (ssm.fromSeq == targetDs && ssm.mapping.to == queryDs)
+ {
+ int[] codon = ssm.mapping.map.locateInFrom(queryPos, queryPos);
+ if (codon != null)
+ {
+ return codon;
+ }
+ }
+ /*
+ * else try mapping from query to target
+ */
+ else if (ssm.fromSeq == queryDs && ssm.mapping.to == targetDs)
{
- int[] codon = dnaToProt[mi].map.locateInFrom(pos, pos);
+ int[] codon = ssm.mapping.map.locateInTo(queryPos, queryPos);
if (codon != null)
{
return codon;
}
/**
- * Returns the DNA codon for the given position (base 1) in a mapped protein
- * sequence, or null if no mapping is found.
+ * Returns the mapped DNA codons for the given position in a protein sequence,
+ * or null if no mapping is found. Returns a list of (e.g.) ['g', 'c', 't']
+ * codons. There may be more than one codon mapped to the protein if (for
+ * example), there are mappings to cDNA variants.
*
* @param protein
* the peptide dataset sequence
* residue position (base 1) in the peptide sequence
* @return
*/
- public char[] getMappedCodon(SequenceI protein, int aaPos)
+ public List<char[]> getMappedCodons(SequenceI protein, int aaPos)
{
- if (dnaToProt == null)
- {
- return null;
- }
MapList ml = null;
SequenceI dnaSeq = null;
- for (int i = 0; i < dnaToProt.length; i++)
+ List<char[]> result = new ArrayList<char[]>();
+
+ for (SequenceToSequenceMapping ssm : mappings)
{
- if (dnaToProt[i].to == protein)
+ if (ssm.mapping.to == protein)
{
- ml = getdnaToProt()[i];
- dnaSeq = dnaSeqs[i];
- break;
+ ml = ssm.mapping.map;
+ dnaSeq = ssm.fromSeq;
+
+ int[] codonPos = ml.locateInFrom(aaPos, aaPos);
+ if (codonPos == null)
+ {
+ return null;
+ }
+
+ /*
+ * Read off the mapped nucleotides (converting to position base 0)
+ */
+ codonPos = MappingUtils.flattenRanges(codonPos);
+ char[] dna = dnaSeq.getSequence();
+ int start = dnaSeq.getStart();
+ result.add(new char[] { dna[codonPos[0] - start],
+ dna[codonPos[1] - start], dna[codonPos[2] - start] });
}
}
- if (ml == null)
- {
- return null;
- }
- int[] codonPos = ml.locateInFrom(aaPos, aaPos);
- if (codonPos == null)
- {
- return null;
- }
-
- /*
- * Read off the mapped nucleotides (converting to position base 0)
- */
- codonPos = MappingUtils.flattenRanges(codonPos);
- char[] dna = dnaSeq.getSequence();
- int start = dnaSeq.getStart();
- return new char[] { dna[codonPos[0] - start], dna[codonPos[1] - start],
- dna[codonPos[2] - start] };
+ return result.isEmpty() ? null : result;
}
/**
public List<Mapping> getMappingsForSequence(SequenceI seq)
{
List<Mapping> result = new ArrayList<Mapping>();
- if (dnaSeqs == null)
- {
- return result;
- }
List<SequenceI> related = new ArrayList<SequenceI>();
SequenceI seqDs = seq.getDatasetSequence();
seqDs = seqDs != null ? seqDs : seq;
- for (int ds = 0; ds < dnaSeqs.length; ds++)
+ for (SequenceToSequenceMapping ssm : mappings)
{
- final Mapping mapping = dnaToProt[ds];
- if (dnaSeqs[ds] == seqDs || mapping.to == seqDs)
+ final Mapping mapping = ssm.mapping;
+ if (ssm.fromSeq == seqDs || mapping.to == seqDs)
{
if (!related.contains(mapping.to))
{
/*
* check for replaceable DNA ('map from') sequences
*/
- for (int i = 0; i < dnaSeqs.length; i++)
+ for (SequenceToSequenceMapping ssm : mappings)
{
- SequenceI dna = dnaSeqs[i];
+ SequenceI dna = ssm.fromSeq;
if (dna instanceof SequenceDummy
&& dna.getName().equals(ds.getName()))
{
- Mapping mapping = dnaToProt[i];
+ Mapping mapping = ssm.mapping;
int mapStart = mapping.getMap().getFromLowest();
int mapEnd = mapping.getMap().getFromHighest();
- boolean mappable = couldReplaceSequence(dna, ds, mapStart, mapEnd);
+ boolean mappable = couldRealiseSequence(dna, ds, mapStart, mapEnd);
if (mappable)
{
count++;
if (doUpdate)
{
- dnaSeqs[i] = ds;
+ // TODO: new method ? ds.realise(dna);
+ // might want to copy database refs as well
+ ds.setSequenceFeatures(dna.getSequenceFeatures());
+ // dnaSeqs[i] = ds;
+ ssm.fromSeq = ds;
+ System.out.println("Realised mapped sequence " + ds.getName());
}
}
}
/*
* check for replaceable protein ('map to') sequences
*/
- SequenceI prot = dnaToProt[i].getTo();
- Mapping mapping = dnaToProt[i];
+ Mapping mapping = ssm.mapping;
+ SequenceI prot = mapping.getTo();
int mapStart = mapping.getMap().getToLowest();
int mapEnd = mapping.getMap().getToHighest();
- boolean mappable = couldReplaceSequence(prot, ds, mapStart, mapEnd);
+ boolean mappable = couldRealiseSequence(prot, ds, mapStart, mapEnd);
if (mappable)
{
count++;
if (doUpdate)
{
- dnaToProt[i].setTo(ds);
+ // TODO: new method ? ds.realise(dna);
+ // might want to copy database refs as well
+ ds.setSequenceFeatures(dna.getSequenceFeatures());
+ ssm.mapping.setTo(ds);
}
}
}
* @param mapEnd
* @return
*/
- protected static boolean couldReplaceSequence(SequenceI existing,
+ protected static boolean couldRealiseSequence(SequenceI existing,
SequenceI replacement, int mapStart, int mapEnd)
{
if (existing instanceof SequenceDummy
+ && !(replacement instanceof SequenceDummy)
&& existing.getName().equals(replacement.getName()))
{
int start = replacement.getStart();
}
return false;
}
+
+ /**
+ * Change any mapping to the given sequence to be to its dataset sequence
+ * instead. For use when mappings are created before their referenced
+ * sequences are instantiated, for example when parsing GFF data.
+ *
+ * @param seq
+ */
+ public void updateToDataset(SequenceI seq)
+ {
+ if (seq == null || seq.getDatasetSequence() == null)
+ {
+ return;
+ }
+ SequenceI ds = seq.getDatasetSequence();
+
+ for (SequenceToSequenceMapping ssm : mappings)
+ /*
+ * 'from' sequences
+ */
+ {
+ if (ssm.fromSeq == seq)
+ {
+ ssm.fromSeq = ds;
+ }
+
+ /*
+ * 'to' sequences
+ */
+ if (ssm.mapping.to == seq)
+ {
+ ssm.mapping.to = ds;
+ }
+ }
+ }
}
/**
* Constructor given from and to ranges as [start1, end1, start2, end2,...].
- * If any end is equal to the next start, the ranges will be merged.
+ * If any end is equal to the next start, the ranges will be merged. There is
+ * no validation check that the ranges do not overlap each other.
*
* @param from
* contiguous regions as [start1, end1, start2, end2, ...]
this();
this.fromRatio = fromRatio;
this.toRatio = toRatio;
- fromLowest = from[0];
- fromHighest = from[1];
+ fromLowest = Integer.MAX_VALUE;
+ fromHighest = Integer.MIN_VALUE;
int added = 0;
for (int i = 0; i < from.length; i += 2)
{
- fromLowest = Math.min(fromLowest, from[i]);
- fromHighest = Math.max(fromHighest, from[i + 1]);
+ /*
+ * note lowest and highest values - bearing in mind the
+ * direction may be revesed
+ */
+ fromLowest = Math.min(fromLowest, Math.min(from[i], from[i + 1]));
+ fromHighest = Math.max(fromHighest, Math.max(from[i], from[i + 1]));
if (added > 0 && from[i] == fromShifts.get(added - 1)[1])
{
/*
}
}
- toLowest = to[0];
- toHighest = to[1];
+ toLowest = Integer.MAX_VALUE;
+ toHighest = Integer.MIN_VALUE;
added = 0;
for (int i = 0; i < to.length; i += 2)
{
- toLowest = Math.min(toLowest, to[i]);
- toHighest = Math.max(toHighest, to[i + 1]);
+ toLowest = Math.min(toLowest, Math.min(to[i], to[i + 1]));
+ toHighest = Math.max(toHighest, Math.max(to[i], to[i + 1]));
if (added > 0 && to[i] == toShifts.get(added - 1)[1])
{
toShifts.get(added - 1)[1] = to[i + 1];
}
/**
- * Constructor given ranges as lists of [start, end] positions
+ * Constructor given ranges as lists of [start, end] positions. There is no
+ * validation check that the ranges do not overlap each other.
*
* @param fromRange
* @param toRange
this.toRatio = toRatio;
fromLowest = Integer.MAX_VALUE;
- fromHighest = 0;
+ fromHighest = Integer.MIN_VALUE;
for (int[] range : fromRange)
{
- fromLowest = Math.min(fromLowest, range[0]);
- fromHighest = Math.max(fromHighest, range[1]);
+ fromLowest = Math.min(fromLowest, Math.min(range[0], range[1]));
+ fromHighest = Math.max(fromHighest, Math.max(range[0], range[1]));
}
toLowest = Integer.MAX_VALUE;
- toHighest = 0;
+ toHighest = Integer.MIN_VALUE;
for (int[] range : toRange)
{
- toLowest = Math.min(toLowest, range[0]);
- toHighest = Math.max(toHighest, range[1]);
+ toLowest = Math.min(toLowest, Math.min(range[0], range[1]));
+ toHighest = Math.max(toHighest, Math.max(range[0], range[1]));
}
}
sb.append(" ]");
return sb.toString();
}
+
+ /**
+ * Extend this map list by adding the given map's ranges. There is no
+ * validation check that the ranges do not overlap existing ranges (or each
+ * other), but contiguous ranges are merged.
+ *
+ * @param map
+ */
+ public void addMapList(MapList map)
+ {
+ this.fromLowest = Math.min(fromLowest, map.fromLowest);
+ this.toLowest = Math.min(toLowest, map.toLowest);
+ this.fromHighest = Math.max(fromHighest, map.fromHighest);
+ this.toHighest = Math.max(toHighest, map.toHighest);
+
+ for (int[] range : map.getFromRanges())
+ {
+ addRange(range, fromShifts);
+ }
+ for (int[] range : map.getToRanges())
+ {
+ addRange(range, toShifts);
+ }
+ }
+
+ public static void addRange(int[] range, List<int[]> addTo)
+ {
+ /*
+ * list is empty - add to it!
+ */
+ if (addTo.size() == 0)
+ {
+ addTo.add(range);
+ return;
+ }
+
+ int[] last = addTo.get(addTo.size() - 1);
+ boolean lastForward = last[1] >= last[0];
+ boolean newForward = range[1] >= range[0];
+
+ /*
+ * contiguous range in the same direction - just update endpoint
+ */
+ if (lastForward == newForward && last[1] == range[0])
+ {
+ last[1] = range[1];
+ return;
+ }
+
+ /*
+ * next range starts at +1 in forward sense - update endpoint
+ */
+ if (lastForward && newForward && range[0] == last[1] + 1)
+ {
+ last[1] = range[1];
+ return;
+ }
+
+ /*
+ * next range starts at -1 in reverse sense - update endpoint
+ */
+ if (!lastForward && !newForward && range[0] == last[1] - 1)
+ {
+ last[1] = range[1];
+ return;
+ }
+
+ /*
+ * just add the new range
+ */
+ addTo.add(range);
+ }
}
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
import jalview.util.MapList;
import java.util.Arrays;
+import java.util.List;
import org.testng.annotations.Test;
final Sequence aseq1 = new Sequence("Seq1", "-P-R");
aseq1.createDatasetSequence();
- final Sequence aseq2 = new Sequence("Seq2", "-LY-");
+ final Sequence aseq2 = new Sequence("Seq2", "-LY-Q");
aseq2.createDatasetSequence();
/*
/*
* Set up the mappings for the exons (upper-case bases)
+ * Note residue Q is unmapped
*/
MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
1, 2 }, 3, 1);
3, 1);
acf.addMap(seq2.getDatasetSequence(), aseq2.getDatasetSequence(), map);
- assertEquals("[2, 4]",
- Arrays.toString(acf.getMappedRegion(seq1, aseq1, 1)));
- assertEquals("[6, 6, 8, 9]",
- Arrays.toString(acf.getMappedRegion(seq1, aseq1, 2)));
- assertEquals("[1, 2, 4, 4]",
- Arrays.toString(acf.getMappedRegion(seq2, aseq2, 1)));
- assertEquals("[5, 5, 7, 8]",
- Arrays.toString(acf.getMappedRegion(seq2, aseq2, 2)));
+ assertArrayEquals(new int[] { 2, 4 },
+ acf.getMappedRegion(seq1, aseq1, 1));
+ assertArrayEquals(new int[] { 6, 6, 8, 9 },
+ acf.getMappedRegion(seq1, aseq1, 2));
+ assertArrayEquals(new int[] { 1, 2, 4, 4 },
+ acf.getMappedRegion(seq2, aseq2, 1));
+ assertArrayEquals(new int[] { 5, 5, 7, 8 },
+ acf.getMappedRegion(seq2, aseq2, 2));
+
+ /*
+ * No mapping from seq2 to Q
+ */
+ assertNull(acf.getMappedRegion(seq2, aseq2, 3));
/*
* No mapping from sequence 1 to sequence 2
}
@Test(groups = { "Functional" })
- public void testGetMappedCodon()
+ public void testGetMappedCodons()
{
final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
seq1.createDatasetSequence();
- final Sequence aseq1 = new Sequence("Seq1", "-P-R");
+ final Sequence aseq1 = new Sequence("Seq1", "-V-L");
aseq1.createDatasetSequence();
/*
*/
AlignedCodonFrame acf = new AlignedCodonFrame();
- assertNull(acf.getMappedCodon(seq1.getDatasetSequence(), 0));
+ assertNull(acf.getMappedCodons(seq1.getDatasetSequence(), 0));
/*
* Set up the mappings for the exons (upper-case bases)
1, 2 }, 3, 1);
acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
- assertEquals("[G, T, A]", Arrays.toString(acf.getMappedCodon(
- aseq1.getDatasetSequence(), 1)));
- assertEquals("[C, T, T]", Arrays.toString(acf.getMappedCodon(
- aseq1.getDatasetSequence(), 2)));
+ assertEquals(1, acf.getMappedCodons(aseq1.getDatasetSequence(), 1)
+ .size());
+ assertEquals(
+ "[G, T, A]",
+ Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
+ 1).get(0)));
+ assertEquals(
+ "[C, T, T]",
+ Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
+ 2).get(0)));
+ }
+
+ /**
+ * Test for the case where there is more than one variant of the DNA mapping
+ * to a protein sequence
+ */
+ @Test(groups = { "Functional" })
+ public void testGetMappedCodons_dnaVariants()
+ {
+ final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
+ seq1.createDatasetSequence();
+ final Sequence seq2 = new Sequence("Seq2", "c-G-TT-gT-gT-A");
+ seq2.createDatasetSequence();
+ final Sequence aseq1 = new Sequence("Seq1", "-V-L");
+ aseq1.createDatasetSequence();
+
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+
+ /*
+ * Set up the mappings for the exons (upper-case bases)
+ */
+ MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
+ 1, 2 }, 3, 1);
+ acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
+ acf.addMap(seq2.getDatasetSequence(), aseq1.getDatasetSequence(), map);
+
+ assertEquals(2, acf.getMappedCodons(aseq1.getDatasetSequence(), 1)
+ .size());
+ List<char[]> codonsForV = acf.getMappedCodons(
+ aseq1.getDatasetSequence(), 1);
+ assertEquals("[G, T, A]", Arrays.toString(codonsForV.get(0)));
+ assertEquals("[G, T, T]", Arrays.toString(codonsForV.get(1)));
+ List<char[]> codonsForL = acf.getMappedCodons(
+ aseq1.getDatasetSequence(), 2);
+ assertEquals("[C, T, T]", Arrays.toString(codonsForL.get(0)));
+ assertEquals("[T, T, A]", Arrays.toString(codonsForL.get(1)));
}
/**
* Test for the case where sequences have start > 1
*/
@Test(groups = { "Functional" })
- public void testGetMappedCodon_forSubSequences()
+ public void testGetMappedCodons_forSubSequences()
{
final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T", 27, 35);
seq1.createDatasetSequence();
- final Sequence aseq1 = new Sequence("Seq1", "-P-R", 12, 13);
+ final Sequence aseq1 = new Sequence("Seq1", "-V-L", 12, 13);
aseq1.createDatasetSequence();
/*
new int[] { 12, 13 }, 3, 1);
acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
- assertEquals("[G, T, A]", Arrays.toString(acf.getMappedCodon(
- aseq1.getDatasetSequence(), 12)));
- assertEquals("[C, T, T]", Arrays.toString(acf.getMappedCodon(
- aseq1.getDatasetSequence(), 13)));
+ assertEquals(
+ "[G, T, A]",
+ Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
+ 12).get(0)));
+ assertEquals(
+ "[C, T, T]",
+ Arrays.toString(acf.getMappedCodons(aseq1.getDatasetSequence(),
+ 13).get(0)));
}
@Test(groups = { "Functional" })
public void testCouldReplaceSequence()
{
SequenceI seq1 = new Sequence("Seq1/10-21", "aaacccgggttt");
- SequenceI seq2 = new Sequence("Seq2", "PG");
SequenceI seq1proxy = new SequenceDummy("Seq1");
// map to region within sequence is ok
- assertTrue(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 12,
+ assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
17));
// map to region overlapping sequence is ok
- assertTrue(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 5,
+ assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 5,
10));
- assertTrue(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 21,
+ assertTrue(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 21,
26));
// map to region before sequence is not ok
- assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 4,
+ assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 4,
9));
// map to region after sequence is not ok
- assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 22,
+ assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 22,
27));
/*
* test should fail if name doesn't match
*/
seq1proxy.setName("Seq1a");
- assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 12,
+ assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
17));
seq1proxy.setName("Seq1");
seq1.setName("Seq1a");
- assertFalse(AlignedCodonFrame.couldReplaceSequence(seq1proxy, seq1, 12,
+ assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy, seq1, 12,
17));
+
+ /*
+ * a dummy sequence can't replace a real one
+ */
+ assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1, seq1proxy, 12,
+ 17));
+
+ /*
+ * a dummy sequence can't replace a dummy sequence
+ */
+ SequenceI seq1proxy2 = new SequenceDummy("Seq1");
+ assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1proxy,
+ seq1proxy2, 12, 17));
+
+ /*
+ * a real sequence can't replace a real one
+ */
+ SequenceI seq1a = new Sequence("Seq1/10-21", "aaacccgggttt");
+ assertFalse(AlignedCodonFrame.couldRealiseSequence(seq1, seq1a, 12, 17));
}
/**
{
SequenceI seq1 = new Sequence("Seq1", "tttCAACCCGGGtttaaa");
SequenceI seq2 = new Sequence("Seq2", "QPG");
+ SequenceI seq2a = new Sequence("Seq2a", "QPG");
SequenceI seq1proxy = new SequenceDummy("Seq1");
seq1.createDatasetSequence();
seq2.createDatasetSequence();
+ seq2a.createDatasetSequence();
/*
- * Make two mappings from Seq2 peptide to dummy sequence Seq1
+ * Make mappings from Seq2 and Seq2a peptides to dummy sequence Seq1
*/
AlignedCodonFrame acf = new AlignedCodonFrame();
MapList mapping1 = new MapList(new int[] { 7, 12 }, new int[] { 2, 3 },
3, 1);
acf.addMap(seq1proxy, seq2, mapping1);
+ acf.addMap(seq1proxy, seq2a, mapping1);
// map QP to codons 4-9 (CAACCC)
MapList mapping2 = new MapList(new int[] { 4, 9 }, new int[] { 1, 2 },
3, 1);
acf.addMap(seq1proxy, seq2, mapping2);
+ acf.addMap(seq1proxy, seq2a, mapping2);
+ /*
+ * acf now has two mappings one from Seq1 to Seq2, one from Seq1 to Seq2a
+ */
assertEquals(2, acf.getdnaSeqs().length);
assertSame(seq1proxy, acf.getdnaSeqs()[0]);
assertSame(seq1proxy, acf.getdnaSeqs()[1]);
assertSame(seq1.getDatasetSequence(), acf.getdnaSeqs()[0]);
assertSame(seq1.getDatasetSequence(), acf.getdnaSeqs()[1]);
}
+
+ /**
+ * Test the method that locates the mapped codon for a protein position.
+ */
+ @Test(groups = { "Functional" })
+ public void testGetMappedRegion_eitherWay()
+ {
+ final Sequence seq1 = new Sequence("Seq1", "AAACCCGGGTTT");
+ seq1.createDatasetSequence();
+ final Sequence seq2 = new Sequence("Seq2", "KPGF");
+ seq2.createDatasetSequence();
+ final Sequence seq3 = new Sequence("Seq3", "QYKPGFSW");
+ seq3.createDatasetSequence();
+
+ /*
+ * map Seq1 to all of Seq2 and part of Seq3
+ */
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);
+ acf.addMap(seq1.getDatasetSequence(), seq2.getDatasetSequence(), map);
+ map = new MapList(new int[] { 1, 12 }, new int[] { 3, 6 }, 3, 1);
+ acf.addMap(seq1.getDatasetSequence(), seq3.getDatasetSequence(), map);
+
+ /*
+ * map part of Seq3 to Seq2
+ */
+ map = new MapList(new int[] { 3, 6 }, new int[] { 1, 4 }, 1, 1);
+ acf.addMap(seq3.getDatasetSequence(), seq2.getDatasetSequence(), map);
+
+ /*
+ * original case - locate mapped codon for protein position
+ */
+ assertArrayEquals(new int[] { 4, 6 },
+ acf.getMappedRegion(seq1, seq2, 2));
+ assertArrayEquals(new int[] { 7, 9 },
+ acf.getMappedRegion(seq1, seq3, 5));
+ assertNull(acf.getMappedRegion(seq1, seq3, 1));
+
+ /*
+ * locate mapped protein for protein position
+ */
+ assertArrayEquals(new int[] { 4, 4 },
+ acf.getMappedRegion(seq3, seq2, 2));
+
+ /*
+ * reverse location protein-to-protein
+ */
+ assertArrayEquals(new int[] { 2, 2 },
+ acf.getMappedRegion(seq2, seq3, 4));
+
+ /*
+ * reverse location protein-from-nucleotide
+ * any of codon [4, 5, 6] positions map to seq2/2
+ */
+ assertArrayEquals(new int[] { 2, 2 },
+ acf.getMappedRegion(seq2, seq1, 4));
+ assertArrayEquals(new int[] { 2, 2 },
+ acf.getMappedRegion(seq2, seq1, 5));
+ assertArrayEquals(new int[] { 2, 2 },
+ acf.getMappedRegion(seq2, seq1, 6));
+ }
}
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertNull;
+import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
import java.util.ArrayList;
assertEquals("{[2, 3], [5, 7], [9, 10], [12, 12], [14, 14], [16, 18]}",
prettyPrint(ml2.getFromRanges()));
assertEquals("{[1, 1], [3, 4], [6, 6]}", prettyPrint(ml2.getToRanges()));
+
+ /*
+ * reverse direction
+ */
+ codons = new int[] { 9, 6 };
+ protein = new int[] { 100, 91, 80, 79 };
+ ml = new MapList(codons, protein, 3, 1);
+ assertEquals(6, ml.getFromLowest());
+ assertEquals(9, ml.getFromHighest());
+ assertEquals(79, ml.getToLowest());
+ assertEquals(100, ml.getToHighest());
}
/**
assertEquals("From (1:3) [ [1, 5] [10, 15] [25, 20] ] To [ [51, 1] ]",
s);
}
+
+ @Test(groups = { "Functional" })
+ public void testAddMapList()
+ {
+ MapList ml = new MapList(new int[] { 11, 15, 20, 25, 35, 30 },
+ new int[] { 72, 22 }, 1, 3);
+ assertEquals(11, ml.getFromLowest());
+ assertEquals(35, ml.getFromHighest());
+ assertEquals(22, ml.getToLowest());
+ assertEquals(72, ml.getToHighest());
+
+ MapList ml2 = new MapList(new int[] { 2, 4, 37, 40 }, new int[] { 12,
+ 17, 78, 83, 88, 96 }, 1, 3);
+ ml.addMapList(ml2);
+ assertEquals(2, ml.getFromLowest());
+ assertEquals(40, ml.getFromHighest());
+ assertEquals(12, ml.getToLowest());
+ assertEquals(96, ml.getToHighest());
+
+ String s = ml.toString();
+ assertEquals(
+ "From (1:3) [ [11, 15] [20, 25] [35, 30] [2, 4] [37, 40] ] To [ [72, 22] [12, 17] [78, 83] [88, 96] ]",
+ s);
+ }
+
+ @Test(groups = { "Functional" })
+ public void testAddMapList_contiguous()
+ {
+ MapList ml = new MapList(new int[] { 11, 15 }, new int[] { 72, 58 }, 1,
+ 3);
+
+ MapList ml2 = new MapList(new int[] { 15, 16 }, new int[] { 58, 53 },
+ 1, 3);
+ ml.addMapList(ml2);
+ assertEquals("From (1:3) [ [11, 16] ] To [ [72, 53] ]", ml.toString());
+ }
+
+ @Test(groups = "Functional")
+ public void testAddRange()
+ {
+ int[] range = { 1, 5 };
+ List<int[]> ranges = new ArrayList<int[]>();
+
+ // add to empty list:
+ MapList.addRange(range, ranges);
+ assertEquals(1, ranges.size());
+ assertSame(range, ranges.get(0));
+
+ // extend contiguous (same position):
+ MapList.addRange(new int[] { 5, 10 }, ranges);
+ assertEquals(1, ranges.size());
+ assertEquals(1, ranges.get(0)[0]);
+ assertEquals(10, ranges.get(0)[1]);
+
+ // extend contiguous (next position):
+ MapList.addRange(new int[] { 11, 15 }, ranges);
+ assertEquals(1, ranges.size());
+ assertEquals(1, ranges.get(0)[0]);
+ assertEquals(15, ranges.get(0)[1]);
+
+ // change direction: range is not merged:
+ MapList.addRange(new int[] { 16, 10 }, ranges);
+ assertEquals(2, ranges.size());
+ assertEquals(16, ranges.get(1)[0]);
+ assertEquals(10, ranges.get(1)[1]);
+
+ // extend reverse contiguous (same position):
+ MapList.addRange(new int[] { 10, 8 }, ranges);
+ assertEquals(2, ranges.size());
+ assertEquals(16, ranges.get(1)[0]);
+ assertEquals(8, ranges.get(1)[1]);
+
+ // extend reverse contiguous (next position):
+ MapList.addRange(new int[] { 7, 6 }, ranges);
+ assertEquals(2, ranges.size());
+ assertEquals(16, ranges.get(1)[0]);
+ assertEquals(6, ranges.get(1)[1]);
+
+ // change direction: range is not merged:
+ MapList.addRange(new int[] { 6, 9 }, ranges);
+ assertEquals(3, ranges.size());
+ assertEquals(6, ranges.get(2)[0]);
+ assertEquals(9, ranges.get(2)[1]);
+
+ // not contiguous: not merged
+ MapList.addRange(new int[] { 11, 12 }, ranges);
+ assertEquals(4, ranges.size());
+ assertEquals(11, ranges.get(3)[0]);
+ assertEquals(12, ranges.get(3)[1]);
+ }
+
+ /**
+ * Check state after construction
+ */
+ @Test(groups = { "Functional" })
+ public void testConstructor_withLists()
+ {
+ /*
+ * reverse direction
+ */
+ int[][] codons = new int[][] { { 9, 6 } };
+ int[][] protein = new int[][] { { 100, 91 }, { 80, 79 } };
+ MapList ml = new MapList(Arrays.asList(codons), Arrays.asList(protein),
+ 3, 1);
+ assertEquals(6, ml.getFromLowest());
+ assertEquals(9, ml.getFromHighest());
+ assertEquals(79, ml.getToLowest());
+ assertEquals(100, ml.getToHighest());
+ }
}