import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.GeneLociI;
import jalview.datamodel.IncompleteCodonException;
import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
* Answers true if the mappings include one between the given (dataset)
* sequences.
*/
- public static boolean mappingExists(List<AlignedCodonFrame> mappings,
+ protected static boolean mappingExists(List<AlignedCodonFrame> mappings,
SequenceI aaSeq, SequenceI cdnaSeq)
{
if (mappings != null)
AlignmentI dataset, SequenceI[] products)
{
if (dataset == null || dataset.getDataset() != null)
- {
+ {
throw new IllegalArgumentException(
"IMPLEMENTATION ERROR: dataset.getDataset() must be null!");
}
{
productSeqs = new HashSet<SequenceI>();
for (SequenceI seq : products)
- {
+ {
productSeqs.add(seq.getDatasetSequence() == null ? seq : seq
.getDatasetSequence());
- }
+ }
}
/*
List<AlignedCodonFrame> seqMappings = MappingUtils
.findMappingsForSequence(dnaSeq, mappings);
for (AlignedCodonFrame mapping : seqMappings)
- {
+ {
List<Mapping> mappingsFromSequence = mapping
.getMappingsFromSequence(dnaSeq);
for (Mapping aMapping : mappingsFromSequence)
- {
+ {
MapList mapList = aMapping.getMap();
if (mapList.getFromRatio() == 1)
- {
+ {
/*
* not a dna-to-protein mapping (likely dna-to-cds)
*/
if (cdsSeq != null)
{
if (!foundSeqs.contains(cdsSeq))
- {
+ {
foundSeqs.add(cdsSeq);
SequenceI derivedSequence = cdsSeq.deriveSequence();
cdsSeqs.add(derivedSequence);
if (!dataset.getSequences().contains(cdsSeq))
- {
+ {
dataset.addSequence(cdsSeq);
+ }
}
- }
continue;
}
* add another mapping from original 'from' range to CDS
*/
AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame();
- MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
+ final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(),
cdsRange, 1, 1);
dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss,
dnaToCdsMap);
}
/*
+ * transfer dna chromosomal loci (if known) to the CDS
+ * sequence (via the mapping)
+ */
+ final MapList cdsToDnaMap = dnaToCdsMap.getInverse();
+ transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq);
+
+ /*
* add DBRef with mapping from protein to CDS
* (this enables Get Cross-References from protein alignment)
* This is tricky because we can't have two DBRefs with the
* create a cross-reference from CDS to the source sequence's
* primary reference and vice versa
*/
-
String source = primRef.getSource();
String version = primRef.getVersion();
DBRefEntry cdsCrossRef = new DBRefEntry(source, source + ":"
+ version, primRef.getAccessionId());
- cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap
- .getInverse())));
+ cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));
cdsSeqDss.addDBRef(cdsCrossRef);
dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq
proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
.getInverse()));
proteinProduct.addDBRef(proteinToCdsRef);
- }
+ }
/*
* transfer any features on dna that overlap the CDS
*/
transferFeatures(dnaSeq, cdsSeq, dnaToCdsMap, null,
SequenceOntologyI.CDS);
+ }
}
}
- }
AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
.size()]));
}
/**
+ * Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to
+ * toSeq, mediated by the given mapping between the sequences
+ *
+ * @param fromSeq
+ * @param targetToFrom
+ * Map
+ * @param targetSeq
+ */
+ protected static void transferGeneLoci(SequenceI fromSeq,
+ MapList targetToFrom, SequenceI targetSeq)
+ {
+ if (targetSeq.getGeneLoci() != null)
+ {
+ // already have - don't override
+ return;
+ }
+ GeneLociI fromLoci = fromSeq.getGeneLoci();
+ if (fromLoci == null)
+ {
+ return;
+ }
+
+ MapList newMap = targetToFrom.traverse(fromLoci.getMap());
+
+ if (newMap != null)
+ {
+ targetSeq.setGeneLoci(fromLoci.getSpeciesId(),
+ fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap);
+ }
+ }
+
+ /**
* A helper method that finds a CDS sequence in the alignment dataset that is
* mapped to the given protein sequence, and either is, or has a mapping from,
* the given dna sequence.
}
/**
- * add any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to
+ * Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to
* the given mapping.
*
* @param cdsSeq
* @param contig
+ * @param proteinProduct
* @param mapping
- * @return list of DBRefEntrys added.
+ * @return list of DBRefEntrys added
*/
- public static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
+ protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
SequenceI contig, SequenceI proteinProduct, Mapping mapping)
{
-
- // gather direct refs from contig congrent with mapping
+ // gather direct refs from contig congruent with mapping
List<DBRefEntry> direct = new ArrayList<DBRefEntry>();
HashSet<String> directSources = new HashSet<String>();
if (contig.getDBRefs() != null)
* subtypes in the Sequence Ontology)
* @param omitting
*/
- public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
+ protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
MapList mapping, String select, String... omitting)
{
SequenceI copyTo = toSeq;
* @param dnaSeq
* @return
*/
- public static List<int[]> findCdsPositions(SequenceI dnaSeq)
+ protected static List<int[]> findCdsPositions(SequenceI dnaSeq)
{
List<int[]> result = new ArrayList<int[]>();
{
sb.append(String.format("%s %d-%d %s", type, begin, end, description));
}
+ if (!Float.isNaN(score) && score != 0f)
+ {
+ sb.append(" score=").append(score);
+ }
if (featureGroup != null)
{
sb.append(" (").append(featureGroup).append(")");
{
desc = String.format("%s %d-%d", sf.getType(), start, end);
}
+ String description = sf.getDescription();
+ if (description != null)
+ {
+ if (description.length() <= 6)
+ {
+ desc = desc + " " + description;
+ }
+ else
+ {
+ desc = desc + " " + description.substring(0, 6) + "..";
+ }
+ }
if (sf.getFeatureGroup() != null)
{
desc = desc + " (" + sf.getFeatureGroup() + ")";
GeneLociI seqCoords = seq.getGeneLoci();
if (seqCoords == null)
{
+ System.out.println(String.format(
+ "Can't query VCF for %s as chromosome coordinates not known",
+ seq.getName()));
return 0;
}
|| (fromRatio == 3 && toRatio == 1);
}
+ /**
+ * Returns a map which is the composite of this one and the input map. That
+ * is, the output map has the fromRanges of this map, and its toRanges are the
+ * toRanges of this map as transformed by the input map.
+ * <p>
+ * Returns null if the mappings cannot be traversed (not all toRanges of this
+ * map correspond to fromRanges of the input), or if this.toRatio does not
+ * match map.fromRatio.
+ *
+ * <pre>
+ * Example 1:
+ * this: from [1-100] to [501-600]
+ * input: from [10-40] to [60-90]
+ * output: from [10-40] to [560-590]
+ * Example 2 ('reverse strand exons'):
+ * this: from [1-100] to [2000-1951], [1000-951] // transcript to loci
+ * input: from [1-50] to [41-90] // CDS to transcript
+ * output: from [10-40] to [1960-1951], [1000-971] // CDS to gene loci
+ * </pre>
+ *
+ * @param map
+ * @return
+ */
+ public MapList traverse(MapList map)
+ {
+ if (map == null)
+ {
+ return null;
+ }
+
+ /*
+ * compound the ratios by this rule:
+ * A:B with M:N gives A*M:B*N
+ * reduced by greatest common divisor
+ * so 1:3 with 3:3 is 3:9 or 1:3
+ * 1:3 with 3:1 is 3:3 or 1:1
+ * 1:3 with 1:3 is 1:9
+ * 2:5 with 3:7 is 6:35
+ */
+ int outFromRatio = getFromRatio() * map.getFromRatio();
+ int outToRatio = getToRatio() * map.getToRatio();
+ int gcd = MathUtils.gcd(outFromRatio, outToRatio);
+ outFromRatio /= gcd;
+ outToRatio /= gcd;
+
+ List<int[]> toRanges = new ArrayList<>();
+ for (int[] range : getToRanges())
+ {
+ int[] transferred = map.locateInTo(range[0], range[1]);
+ if (transferred == null)
+ {
+ return null;
+ }
+ toRanges.add(transferred);
+ }
+
+ return new MapList(getFromRanges(), toRanges, outFromRatio, outToRatio);
+ }
+
}
--- /dev/null
+package jalview.util;
+
+public class MathUtils
+{
+
+ /**
+ * Returns the greatest common divisor of two integers
+ *
+ * @param a
+ * @param b
+ * @return
+ */
+ public static int gcd(int a, int b)
+ {
+ if (b == 0)
+ {
+ return Math.abs(a);
+ }
+ return gcd(b, a % b);
+ }
+
+}
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Annotation;
import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.GeneLociI;
import jalview.datamodel.Mapping;
import jalview.datamodel.SearchResultMatchI;
import jalview.datamodel.SearchResultsI;
JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
}
- public static Sequence ts = new Sequence("short",
+ private static Sequence ts = new Sequence("short",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
@Test(groups = { "Functional" })
assertEquals(s_as3, uas3.getSequenceAsString());
}
+ @Test(groups = { "Functional" })
+ public void testTransferGeneLoci()
+ {
+ SequenceI from = new Sequence("transcript",
+ "aaacccgggTTTAAACCCGGGtttaaacccgggttt");
+ SequenceI to = new Sequence("CDS", "TTTAAACCCGGG");
+ MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1,
+ 1);
+
+ /*
+ * first with nothing to transfer
+ */
+ AlignmentUtils.transferGeneLoci(from, map, to);
+ assertNull(to.getGeneLoci());
+
+ /*
+ * next with gene loci set on 'from' sequence
+ */
+ int[] exons = new int[] { 100, 105, 155, 164, 210, 229 };
+ MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1);
+ from.setGeneLoci("human", "GRCh38", "7", geneMap);
+ AlignmentUtils.transferGeneLoci(from, map, to);
+
+ GeneLociI toLoci = to.getGeneLoci();
+ assertNotNull(toLoci);
+ // DBRefEntry constructor upper-cases 'source'
+ assertEquals("HUMAN", toLoci.getSpeciesId());
+ assertEquals("GRCh38", toLoci.getAssemblyId());
+ assertEquals("7", toLoci.getChromosomeId());
+
+ /*
+ * transcript 'exons' are 1-6, 7-16, 17-36
+ * CDS 1:12 is transcript 10-21
+ * transcript 'CDS' is 10-16, 17-21
+ * which is 'gene' 158-164, 210-214
+ */
+ MapList toMap = toLoci.getMap();
+ assertEquals(1, toMap.getFromRanges().size());
+ assertEquals(2, toMap.getFromRanges().get(0).length);
+ assertEquals(1, toMap.getFromRanges().get(0)[0]);
+ assertEquals(12, toMap.getFromRanges().get(0)[1]);
+ assertEquals(1, toMap.getToRanges().size());
+ assertEquals(4, toMap.getToRanges().get(0).length);
+ assertEquals(158, toMap.getToRanges().get(0)[0]);
+ assertEquals(164, toMap.getToRanges().get(0)[1]);
+ assertEquals(210, toMap.getToRanges().get(0)[2]);
+ assertEquals(214, toMap.getToRanges().get(0)[3]);
+ // or summarised as (but toString might change in future):
+ assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]",
+ toMap.toString());
+
+ /*
+ * an existing value is not overridden
+ */
+ geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1);
+ from.setGeneLoci("inhuman", "GRCh37", "6", geneMap);
+ AlignmentUtils.transferGeneLoci(from, map, to);
+ assertEquals("GRCh38", toLoci.getAssemblyId());
+ assertEquals("7", toLoci.getChromosomeId());
+ toMap = toLoci.getMap();
+ assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]",
+ toMap.toString());
+ }
}
assertEquals(1, merged.size());
assertArrayEquals(new int[] { 9, 0 }, merged.get(0));
}
+
+ /**
+ * Test the method that compounds ('traverses') two mappings
+ */
+ @Test
+ public void testTraverse()
+ {
+ /*
+ * simple 1:1 plus 1:1 forwards
+ */
+ MapList ml1 = new MapList(new int[] { 3, 4, 8, 12 }, new int[] { 5, 8,
+ 11, 13 }, 1, 1);
+ MapList ml2 = new MapList(new int[] { 1, 50 }, new int[] { 40, 45, 70,
+ 75, 90, 127 }, 1, 1);
+ MapList compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 1);
+ assertEquals(compound.getToRatio(), 1);
+ List<int[]> fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 2);
+ assertArrayEquals(new int[] { 3, 4 }, fromRanges.get(0));
+ assertArrayEquals(new int[] { 8, 12 }, fromRanges.get(1));
+ List<int[]> toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 2);
+ // 5-8 maps to 44-45,70-71
+ // 11-13 maps to 74-75,90
+ assertArrayEquals(new int[] { 44, 45, 70, 71 }, toRanges.get(0));
+ assertArrayEquals(new int[] { 74, 75, 90, 90 }, toRanges.get(1));
+
+ /*
+ * 1:1 over 1:1 backwards ('reverse strand')
+ */
+ ml1 = new MapList(new int[] { 1, 50 }, new int[] { 70, 119 }, 1, 1);
+ ml2 = new MapList(new int[] { 1, 500 },
+ new int[] { 1000, 901, 600, 201 }, 1, 1);
+ compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 1);
+ assertEquals(compound.getToRatio(), 1);
+ fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 1);
+ assertArrayEquals(new int[] { 1, 50 }, fromRanges.get(0));
+ toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 1);
+ assertArrayEquals(new int[] { 931, 901, 600, 582 }, toRanges.get(0));
+
+ /*
+ * 1:1 plus 1:3 should result in 1:3
+ */
+ ml1 = new MapList(new int[] { 1, 30 }, new int[] { 11, 40 }, 1, 1);
+ ml2 = new MapList(new int[] { 1, 100 }, new int[] { 1, 50, 91, 340 },
+ 1, 3);
+ compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 1);
+ assertEquals(compound.getToRatio(), 3);
+ fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 1);
+ assertArrayEquals(new int[] { 1, 30 }, fromRanges.get(0));
+ // 11-40 maps to 31-50,91-160
+ toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 1);
+ assertArrayEquals(new int[] { 31, 50, 91, 160 }, toRanges.get(0));
+
+ /*
+ * 3:1 plus 1:1 should result in 3:1
+ */
+ ml1 = new MapList(new int[] { 1, 30 }, new int[] { 11, 20 }, 3, 1);
+ ml2 = new MapList(new int[] { 1, 100 }, new int[] { 1, 15, 91, 175 },
+ 1, 1);
+ compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 3);
+ assertEquals(compound.getToRatio(), 1);
+ fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 1);
+ assertArrayEquals(new int[] { 1, 30 }, fromRanges.get(0));
+ // 11-20 maps to 11-15, 91-95
+ toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 1);
+ assertArrayEquals(new int[] { 11, 15, 91, 95 }, toRanges.get(0));
+
+ /*
+ * 1:3 plus 3:1 should result in 1:1
+ */
+ ml1 = new MapList(new int[] { 21, 40 }, new int[] { 13, 72 }, 1, 3);
+ ml2 = new MapList(new int[] { 1, 300 }, new int[] { 51, 70, 121, 200 },
+ 3, 1);
+ compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 1);
+ assertEquals(compound.getToRatio(), 1);
+ fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 1);
+ assertArrayEquals(new int[] { 21, 40 }, fromRanges.get(0));
+ // 13-72 maps 3:1 to 55-70, 121-124
+ toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 1);
+ assertArrayEquals(new int[] { 55, 70, 121, 124 }, toRanges.get(0));
+
+ /*
+ * 3:1 plus 1:3 should result in 1:1
+ */
+ ml1 = new MapList(new int[] { 31, 90 }, new int[] { 13, 32 }, 3, 1);
+ ml2 = new MapList(new int[] { 11, 40 }, new int[] { 41, 50, 71, 150 },
+ 1, 3);
+ compound = ml1.traverse(ml2);
+
+ assertEquals(compound.getFromRatio(), 1);
+ assertEquals(compound.getToRatio(), 1);
+ fromRanges = compound.getFromRanges();
+ assertEquals(fromRanges.size(), 1);
+ assertArrayEquals(new int[] { 31, 90 }, fromRanges.get(0));
+ // 13-32 maps to 47-50,71-126
+ toRanges = compound.getToRanges();
+ assertEquals(toRanges.size(), 1);
+ assertArrayEquals(new int[] { 47, 50, 71, 126 }, toRanges.get(0));
+ }
}
--- /dev/null
+package jalview.util;
+
+import static org.testng.Assert.assertEquals;
+
+import org.testng.annotations.Test;
+
+public class MathUtilsTest
+{
+ @Test
+ public void testGcd()
+ {
+ assertEquals(MathUtils.gcd(0, 0), 0);
+ assertEquals(MathUtils.gcd(0, 1), 1);
+ assertEquals(MathUtils.gcd(1, 0), 1);
+ assertEquals(MathUtils.gcd(1, 1), 1);
+ assertEquals(MathUtils.gcd(1, -1), 1);
+ assertEquals(MathUtils.gcd(-1, 1), 1);
+ assertEquals(MathUtils.gcd(2, 3), 1);
+ assertEquals(MathUtils.gcd(4, 2), 2);
+ assertEquals(MathUtils.gcd(2, 4), 2);
+ assertEquals(MathUtils.gcd(2, -4), 2);
+ assertEquals(MathUtils.gcd(-2, 4), 2);
+ assertEquals(MathUtils.gcd(-2, -4), 2);
+ assertEquals(MathUtils.gcd(2 * 3 * 5 * 7 * 11, 3 * 7 * 13 * 17), 3 * 7);
+ }
+}