From e464da0ea20b0322846ce623598a16f2a1cba7ae Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 5 Oct 2017 15:36:38 +0100 Subject: [PATCH] JAL-2738 update spikes/mungo --- src/jalview/analysis/AlignmentUtils.java | 84 ++++++++++++----- src/jalview/datamodel/SequenceFeature.java | 4 + src/jalview/gui/PopupMenu.java | 12 +++ src/jalview/io/vcf/VCFLoader.java | 3 + src/jalview/util/MapList.java | 59 ++++++++++++ src/jalview/util/MathUtils.java | 22 +++++ test/jalview/analysis/AlignmentUtilsTests.java | 66 ++++++++++++- test/jalview/util/MapListTest.java | 118 ++++++++++++++++++++++++ test/jalview/util/MathUtilsTest.java | 26 ++++++ 9 files changed, 370 insertions(+), 24 deletions(-) create mode 100644 src/jalview/util/MathUtils.java create mode 100644 test/jalview/util/MathUtilsTest.java diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 9a46a91..228446b 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -29,6 +29,7 @@ import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; import jalview.datamodel.IncompleteCodonException; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; @@ -394,7 +395,7 @@ public class AlignmentUtils * Answers true if the mappings include one between the given (dataset) * sequences. */ - public static boolean mappingExists(List mappings, + protected static boolean mappingExists(List mappings, SequenceI aaSeq, SequenceI cdnaSeq) { if (mappings != null) @@ -1633,7 +1634,7 @@ public class AlignmentUtils AlignmentI dataset, SequenceI[] products) { if (dataset == null || dataset.getDataset() != null) - { + { throw new IllegalArgumentException( "IMPLEMENTATION ERROR: dataset.getDataset() must be null!"); } @@ -1645,10 +1646,10 @@ public class AlignmentUtils { productSeqs = new HashSet(); for (SequenceI seq : products) - { + { productSeqs.add(seq.getDatasetSequence() == null ? seq : seq .getDatasetSequence()); - } + } } /* @@ -1670,15 +1671,15 @@ public class AlignmentUtils List seqMappings = MappingUtils .findMappingsForSequence(dnaSeq, mappings); for (AlignedCodonFrame mapping : seqMappings) - { + { List mappingsFromSequence = mapping .getMappingsFromSequence(dnaSeq); for (Mapping aMapping : mappingsFromSequence) - { + { MapList mapList = aMapping.getMap(); if (mapList.getFromRatio() == 1) - { + { /* * not a dna-to-protein mapping (likely dna-to-cds) */ @@ -1704,15 +1705,15 @@ public class AlignmentUtils if (cdsSeq != null) { if (!foundSeqs.contains(cdsSeq)) - { + { foundSeqs.add(cdsSeq); SequenceI derivedSequence = cdsSeq.deriveSequence(); cdsSeqs.add(derivedSequence); if (!dataset.getSequences().contains(cdsSeq)) - { + { dataset.addSequence(cdsSeq); + } } - } continue; } @@ -1763,7 +1764,7 @@ public class AlignmentUtils * add another mapping from original 'from' range to CDS */ AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame(); - MapList dnaToCdsMap = new MapList(mapList.getFromRanges(), + final MapList dnaToCdsMap = new MapList(mapList.getFromRanges(), cdsRange, 1, 1); dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss, dnaToCdsMap); @@ -1773,6 +1774,13 @@ public class AlignmentUtils } /* + * transfer dna chromosomal loci (if known) to the CDS + * sequence (via the mapping) + */ + final MapList cdsToDnaMap = dnaToCdsMap.getInverse(); + transferGeneLoci(dnaSeq, cdsToDnaMap, cdsSeq); + + /* * add DBRef with mapping from protein to CDS * (this enables Get Cross-References from protein alignment) * This is tricky because we can't have two DBRefs with the @@ -1795,13 +1803,11 @@ public class AlignmentUtils * create a cross-reference from CDS to the source sequence's * primary reference and vice versa */ - String source = primRef.getSource(); String version = primRef.getVersion(); DBRefEntry cdsCrossRef = new DBRefEntry(source, source + ":" + version, primRef.getAccessionId()); - cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap - .getInverse()))); + cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap))); cdsSeqDss.addDBRef(cdsCrossRef); dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq @@ -1818,16 +1824,16 @@ public class AlignmentUtils proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap .getInverse())); proteinProduct.addDBRef(proteinToCdsRef); - } + } /* * transfer any features on dna that overlap the CDS */ transferFeatures(dnaSeq, cdsSeq, dnaToCdsMap, null, SequenceOntologyI.CDS); + } } } - } AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs .size()])); @@ -1837,6 +1843,38 @@ public class AlignmentUtils } /** + * Tries to transfer gene loci (dbref to chromosome positions) from fromSeq to + * toSeq, mediated by the given mapping between the sequences + * + * @param fromSeq + * @param targetToFrom + * Map + * @param targetSeq + */ + protected static void transferGeneLoci(SequenceI fromSeq, + MapList targetToFrom, SequenceI targetSeq) + { + if (targetSeq.getGeneLoci() != null) + { + // already have - don't override + return; + } + GeneLociI fromLoci = fromSeq.getGeneLoci(); + if (fromLoci == null) + { + return; + } + + MapList newMap = targetToFrom.traverse(fromLoci.getMap()); + + if (newMap != null) + { + targetSeq.setGeneLoci(fromLoci.getSpeciesId(), + fromLoci.getAssemblyId(), fromLoci.getChromosomeId(), newMap); + } + } + + /** * A helper method that finds a CDS sequence in the alignment dataset that is * mapped to the given protein sequence, and either is, or has a mapping from, * the given dna sequence. @@ -2004,19 +2042,19 @@ public class AlignmentUtils } /** - * add any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to + * Adds any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to * the given mapping. * * @param cdsSeq * @param contig + * @param proteinProduct * @param mapping - * @return list of DBRefEntrys added. + * @return list of DBRefEntrys added */ - public static List propagateDBRefsToCDS(SequenceI cdsSeq, + protected static List propagateDBRefsToCDS(SequenceI cdsSeq, SequenceI contig, SequenceI proteinProduct, Mapping mapping) { - - // gather direct refs from contig congrent with mapping + // gather direct refs from contig congruent with mapping List direct = new ArrayList(); HashSet directSources = new HashSet(); if (contig.getDBRefs() != null) @@ -2096,7 +2134,7 @@ public class AlignmentUtils * subtypes in the Sequence Ontology) * @param omitting */ - public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq, + protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq, MapList mapping, String select, String... omitting) { SequenceI copyTo = toSeq; @@ -2234,7 +2272,7 @@ public class AlignmentUtils * @param dnaSeq * @return */ - public static List findCdsPositions(SequenceI dnaSeq) + protected static List findCdsPositions(SequenceI dnaSeq) { List result = new ArrayList(); diff --git a/src/jalview/datamodel/SequenceFeature.java b/src/jalview/datamodel/SequenceFeature.java index 0352918..4208ce1 100755 --- a/src/jalview/datamodel/SequenceFeature.java +++ b/src/jalview/datamodel/SequenceFeature.java @@ -563,6 +563,10 @@ public class SequenceFeature implements FeatureLocationI { sb.append(String.format("%s %d-%d %s", type, begin, end, description)); } + if (!Float.isNaN(score) && score != 0f) + { + sb.append(" score=").append(score); + } if (featureGroup != null) { sb.append(" (").append(featureGroup).append(")"); diff --git a/src/jalview/gui/PopupMenu.java b/src/jalview/gui/PopupMenu.java index 8e9bade..33c86bc 100644 --- a/src/jalview/gui/PopupMenu.java +++ b/src/jalview/gui/PopupMenu.java @@ -549,6 +549,18 @@ public class PopupMenu extends JPopupMenu implements ColourChangeListener { desc = String.format("%s %d-%d", sf.getType(), start, end); } + String description = sf.getDescription(); + if (description != null) + { + if (description.length() <= 6) + { + desc = desc + " " + description; + } + else + { + desc = desc + " " + description.substring(0, 6) + ".."; + } + } if (sf.getFeatureGroup() != null) { desc = desc + " (" + sf.getFeatureGroup() + ")"; diff --git a/src/jalview/io/vcf/VCFLoader.java b/src/jalview/io/vcf/VCFLoader.java index a3e7e5a..c1c84fb 100644 --- a/src/jalview/io/vcf/VCFLoader.java +++ b/src/jalview/io/vcf/VCFLoader.java @@ -351,6 +351,9 @@ public class VCFLoader GeneLociI seqCoords = seq.getGeneLoci(); if (seqCoords == null) { + System.out.println(String.format( + "Can't query VCF for %s as chromosome coordinates not known", + seq.getName())); return 0; } diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index 4658724..3ce0bb3 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -1120,4 +1120,63 @@ public class MapList || (fromRatio == 3 && toRatio == 1); } + /** + * Returns a map which is the composite of this one and the input map. That + * is, the output map has the fromRanges of this map, and its toRanges are the + * toRanges of this map as transformed by the input map. + *

+ * Returns null if the mappings cannot be traversed (not all toRanges of this + * map correspond to fromRanges of the input), or if this.toRatio does not + * match map.fromRatio. + * + *

+   * Example 1:
+   *    this:   from [1-100] to [501-600]
+   *    input:  from [10-40] to [60-90]
+   *    output: from [10-40] to [560-590]
+   * Example 2 ('reverse strand exons'):
+   *    this:   from [1-100] to [2000-1951], [1000-951] // transcript to loci
+   *    input:  from [1-50]  to [41-90] // CDS to transcript
+   *    output: from [10-40] to [1960-1951], [1000-971] // CDS to gene loci
+   * 
+ * + * @param map + * @return + */ + public MapList traverse(MapList map) + { + if (map == null) + { + return null; + } + + /* + * compound the ratios by this rule: + * A:B with M:N gives A*M:B*N + * reduced by greatest common divisor + * so 1:3 with 3:3 is 3:9 or 1:3 + * 1:3 with 3:1 is 3:3 or 1:1 + * 1:3 with 1:3 is 1:9 + * 2:5 with 3:7 is 6:35 + */ + int outFromRatio = getFromRatio() * map.getFromRatio(); + int outToRatio = getToRatio() * map.getToRatio(); + int gcd = MathUtils.gcd(outFromRatio, outToRatio); + outFromRatio /= gcd; + outToRatio /= gcd; + + List toRanges = new ArrayList<>(); + for (int[] range : getToRanges()) + { + int[] transferred = map.locateInTo(range[0], range[1]); + if (transferred == null) + { + return null; + } + toRanges.add(transferred); + } + + return new MapList(getFromRanges(), toRanges, outFromRatio, outToRatio); + } + } diff --git a/src/jalview/util/MathUtils.java b/src/jalview/util/MathUtils.java new file mode 100644 index 0000000..72d46a2 --- /dev/null +++ b/src/jalview/util/MathUtils.java @@ -0,0 +1,22 @@ +package jalview.util; + +public class MathUtils +{ + + /** + * Returns the greatest common divisor of two integers + * + * @param a + * @param b + * @return + */ + public static int gcd(int a, int b) + { + if (b == 0) + { + return Math.abs(a); + } + return gcd(b, a % b); + } + +} diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 7c64193..d229a39 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -34,6 +34,7 @@ import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; import jalview.datamodel.Mapping; import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResultsI; @@ -71,7 +72,7 @@ public class AlignmentUtilsTests JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); } - public static Sequence ts = new Sequence("short", + private static Sequence ts = new Sequence("short", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"); @Test(groups = { "Functional" }) @@ -2569,4 +2570,67 @@ public class AlignmentUtilsTests assertEquals(s_as3, uas3.getSequenceAsString()); } + @Test(groups = { "Functional" }) + public void testTransferGeneLoci() + { + SequenceI from = new Sequence("transcript", + "aaacccgggTTTAAACCCGGGtttaaacccgggttt"); + SequenceI to = new Sequence("CDS", "TTTAAACCCGGG"); + MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1, + 1); + + /* + * first with nothing to transfer + */ + AlignmentUtils.transferGeneLoci(from, map, to); + assertNull(to.getGeneLoci()); + + /* + * next with gene loci set on 'from' sequence + */ + int[] exons = new int[] { 100, 105, 155, 164, 210, 229 }; + MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1); + from.setGeneLoci("human", "GRCh38", "7", geneMap); + AlignmentUtils.transferGeneLoci(from, map, to); + + GeneLociI toLoci = to.getGeneLoci(); + assertNotNull(toLoci); + // DBRefEntry constructor upper-cases 'source' + assertEquals("HUMAN", toLoci.getSpeciesId()); + assertEquals("GRCh38", toLoci.getAssemblyId()); + assertEquals("7", toLoci.getChromosomeId()); + + /* + * transcript 'exons' are 1-6, 7-16, 17-36 + * CDS 1:12 is transcript 10-21 + * transcript 'CDS' is 10-16, 17-21 + * which is 'gene' 158-164, 210-214 + */ + MapList toMap = toLoci.getMap(); + assertEquals(1, toMap.getFromRanges().size()); + assertEquals(2, toMap.getFromRanges().get(0).length); + assertEquals(1, toMap.getFromRanges().get(0)[0]); + assertEquals(12, toMap.getFromRanges().get(0)[1]); + assertEquals(1, toMap.getToRanges().size()); + assertEquals(4, toMap.getToRanges().get(0).length); + assertEquals(158, toMap.getToRanges().get(0)[0]); + assertEquals(164, toMap.getToRanges().get(0)[1]); + assertEquals(210, toMap.getToRanges().get(0)[2]); + assertEquals(214, toMap.getToRanges().get(0)[3]); + // or summarised as (but toString might change in future): + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + toMap.toString()); + + /* + * an existing value is not overridden + */ + geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1); + from.setGeneLoci("inhuman", "GRCh37", "6", geneMap); + AlignmentUtils.transferGeneLoci(from, map, to); + assertEquals("GRCh38", toLoci.getAssemblyId()); + assertEquals("7", toLoci.getChromosomeId()); + toMap = toLoci.getMap(); + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + toMap.toString()); + } } diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index a2f38e2..f3395ca 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -814,4 +814,122 @@ public class MapListTest assertEquals(1, merged.size()); assertArrayEquals(new int[] { 9, 0 }, merged.get(0)); } + + /** + * Test the method that compounds ('traverses') two mappings + */ + @Test + public void testTraverse() + { + /* + * simple 1:1 plus 1:1 forwards + */ + MapList ml1 = new MapList(new int[] { 3, 4, 8, 12 }, new int[] { 5, 8, + 11, 13 }, 1, 1); + MapList ml2 = new MapList(new int[] { 1, 50 }, new int[] { 40, 45, 70, + 75, 90, 127 }, 1, 1); + MapList compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 1); + assertEquals(compound.getToRatio(), 1); + List fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 2); + assertArrayEquals(new int[] { 3, 4 }, fromRanges.get(0)); + assertArrayEquals(new int[] { 8, 12 }, fromRanges.get(1)); + List toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 2); + // 5-8 maps to 44-45,70-71 + // 11-13 maps to 74-75,90 + assertArrayEquals(new int[] { 44, 45, 70, 71 }, toRanges.get(0)); + assertArrayEquals(new int[] { 74, 75, 90, 90 }, toRanges.get(1)); + + /* + * 1:1 over 1:1 backwards ('reverse strand') + */ + ml1 = new MapList(new int[] { 1, 50 }, new int[] { 70, 119 }, 1, 1); + ml2 = new MapList(new int[] { 1, 500 }, + new int[] { 1000, 901, 600, 201 }, 1, 1); + compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 1); + assertEquals(compound.getToRatio(), 1); + fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 1); + assertArrayEquals(new int[] { 1, 50 }, fromRanges.get(0)); + toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 1); + assertArrayEquals(new int[] { 931, 901, 600, 582 }, toRanges.get(0)); + + /* + * 1:1 plus 1:3 should result in 1:3 + */ + ml1 = new MapList(new int[] { 1, 30 }, new int[] { 11, 40 }, 1, 1); + ml2 = new MapList(new int[] { 1, 100 }, new int[] { 1, 50, 91, 340 }, + 1, 3); + compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 1); + assertEquals(compound.getToRatio(), 3); + fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 1); + assertArrayEquals(new int[] { 1, 30 }, fromRanges.get(0)); + // 11-40 maps to 31-50,91-160 + toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 1); + assertArrayEquals(new int[] { 31, 50, 91, 160 }, toRanges.get(0)); + + /* + * 3:1 plus 1:1 should result in 3:1 + */ + ml1 = new MapList(new int[] { 1, 30 }, new int[] { 11, 20 }, 3, 1); + ml2 = new MapList(new int[] { 1, 100 }, new int[] { 1, 15, 91, 175 }, + 1, 1); + compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 3); + assertEquals(compound.getToRatio(), 1); + fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 1); + assertArrayEquals(new int[] { 1, 30 }, fromRanges.get(0)); + // 11-20 maps to 11-15, 91-95 + toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 1); + assertArrayEquals(new int[] { 11, 15, 91, 95 }, toRanges.get(0)); + + /* + * 1:3 plus 3:1 should result in 1:1 + */ + ml1 = new MapList(new int[] { 21, 40 }, new int[] { 13, 72 }, 1, 3); + ml2 = new MapList(new int[] { 1, 300 }, new int[] { 51, 70, 121, 200 }, + 3, 1); + compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 1); + assertEquals(compound.getToRatio(), 1); + fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 1); + assertArrayEquals(new int[] { 21, 40 }, fromRanges.get(0)); + // 13-72 maps 3:1 to 55-70, 121-124 + toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 1); + assertArrayEquals(new int[] { 55, 70, 121, 124 }, toRanges.get(0)); + + /* + * 3:1 plus 1:3 should result in 1:1 + */ + ml1 = new MapList(new int[] { 31, 90 }, new int[] { 13, 32 }, 3, 1); + ml2 = new MapList(new int[] { 11, 40 }, new int[] { 41, 50, 71, 150 }, + 1, 3); + compound = ml1.traverse(ml2); + + assertEquals(compound.getFromRatio(), 1); + assertEquals(compound.getToRatio(), 1); + fromRanges = compound.getFromRanges(); + assertEquals(fromRanges.size(), 1); + assertArrayEquals(new int[] { 31, 90 }, fromRanges.get(0)); + // 13-32 maps to 47-50,71-126 + toRanges = compound.getToRanges(); + assertEquals(toRanges.size(), 1); + assertArrayEquals(new int[] { 47, 50, 71, 126 }, toRanges.get(0)); + } } diff --git a/test/jalview/util/MathUtilsTest.java b/test/jalview/util/MathUtilsTest.java new file mode 100644 index 0000000..fc84741 --- /dev/null +++ b/test/jalview/util/MathUtilsTest.java @@ -0,0 +1,26 @@ +package jalview.util; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + +public class MathUtilsTest +{ + @Test + public void testGcd() + { + assertEquals(MathUtils.gcd(0, 0), 0); + assertEquals(MathUtils.gcd(0, 1), 1); + assertEquals(MathUtils.gcd(1, 0), 1); + assertEquals(MathUtils.gcd(1, 1), 1); + assertEquals(MathUtils.gcd(1, -1), 1); + assertEquals(MathUtils.gcd(-1, 1), 1); + assertEquals(MathUtils.gcd(2, 3), 1); + assertEquals(MathUtils.gcd(4, 2), 2); + assertEquals(MathUtils.gcd(2, 4), 2); + assertEquals(MathUtils.gcd(2, -4), 2); + assertEquals(MathUtils.gcd(-2, 4), 2); + assertEquals(MathUtils.gcd(-2, -4), 2); + assertEquals(MathUtils.gcd(2 * 3 * 5 * 7 * 11, 3 * 7 * 13 * 17), 3 * 7); + } +} -- 1.7.10.2