From 29d4809e01ace4e6ac449ec944bfeec70070ac81 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 4 Jul 2016 15:21:17 +0100 Subject: [PATCH] JAL-2110 bug fix / test for searchDataset mappings created --- src/jalview/analysis/CrossRef.java | 50 ++++++------- test/jalview/analysis/CrossRefTest.java | 120 +++++++++++++++++-------------- 2 files changed, 92 insertions(+), 78 deletions(-) diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index e73912d..2b5a0e2 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -145,18 +145,18 @@ public class CrossRef * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs */ DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs()); - List rseqs = new ArrayList(); + List foundSeqs = new ArrayList(); /* * find sequences in the alignment which xref one of these DBRefs * i.e. is xref-ed to a common sequence identifier */ - searchDatasetXrefs(fromDna, seq, lrfs, rseqs, null); + searchDatasetXrefs(fromDna, seq, lrfs, foundSeqs, null); /* * add those sequences' (dna-to-peptide or peptide-to-dna) dbref sources */ - for (SequenceI rs : rseqs) + for (SequenceI rs : foundSeqs) { DBRefEntry[] xrs = DBRefUtils .selectDbRefs(!fromDna, rs.getDBRefs()); @@ -728,11 +728,11 @@ public class CrossRef * context was searching from Protein sequences * @param sequenceI * @param lrfs - * @param rseqs + * @param foundSeqs * @return true if matches were found. */ private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI, - DBRefEntry[] lrfs, List rseqs, AlignedCodonFrame cf) + DBRefEntry[] lrfs, List foundSeqs, AlignedCodonFrame cf) { boolean found = false; if (lrfs == null) @@ -745,7 +745,7 @@ public class CrossRef // add in wildcards xref.setVersion(null); xref.setMap(null); - found |= searchDataset(fromDna, sequenceI, xref, rseqs, cf, false); + found |= searchDataset(fromDna, sequenceI, xref, foundSeqs, cf, false); } return found; } @@ -757,13 +757,13 @@ public class CrossRef * @param fromDna * true if context was searching for refs *from* dna sequence, false * if context was searching for refs *from* protein sequence - * @param sequenceI + * @param fromSeq * a sequence to ignore (start point of search) * @param xrf * a cross-reference to try to match - * @param rseqs + * @param foundSeqs * result list to add to - * @param cf + * @param mappings * a set of sequence mappings to add to * @param direct * - indicates the type of relationship between returned sequences, @@ -778,8 +778,8 @@ public class CrossRef * * @return true if relationship found and sequence added. */ - boolean searchDataset(boolean fromDna, SequenceI sequenceI, - DBRefEntry xrf, List rseqs, AlignedCodonFrame cf, + boolean searchDataset(boolean fromDna, SequenceI fromSeq, + DBRefEntry xrf, List foundSeqs, AlignedCodonFrame mappings, boolean direct) { boolean found = false; @@ -808,7 +808,7 @@ public class CrossRef + nxt.getDatasetSequence().getDisplayId(true) + ")"); } - if (nxt == sequenceI || nxt == sequenceI.getDatasetSequence()) + if (nxt == fromSeq || nxt == fromSeq.getDatasetSequence()) { continue; } @@ -840,13 +840,17 @@ public class CrossRef // } if (!cands.isEmpty()) { - if (!rseqs.contains(nxt)) + if (!foundSeqs.contains(nxt)) { found = true; - rseqs.add(nxt); - if (cf != null) + foundSeqs.add(nxt); + if (mappings != null && !direct) { - // don't search if we aren't given a codon map object + /* + * if the matched sequence has mapped dbrefs to + * protein product / cdna, add equivalent mappings to + * our source sequence + */ for (DBRefEntry candidate : cands) { Mapping mapping = candidate.getMap(); @@ -856,23 +860,21 @@ public class CrossRef if (mapping.getTo() != null && map.getFromRatio() != map.getToRatio()) { - // get sense of map correct for adding to product - // alignment. - if (fromDna) + /* + * add a mapping, as from dna to peptide sequence + */ + if (map.getFromRatio() == 3) { - // map is from dna seq to a protein product - cf.addMap(sequenceI, nxt, map); + mappings.addMap(nxt, fromSeq, map); } else { - // map should be from protein seq to its coding dna - cf.addMap(nxt, sequenceI, map.getInverse()); + mappings.addMap(nxt, fromSeq, map.getInverse()); } } } } } - // TODO: add mapping between sequences if necessary } } } diff --git a/test/jalview/analysis/CrossRefTest.java b/test/jalview/analysis/CrossRefTest.java index 0c3e4d5..62bcae8 100644 --- a/test/jalview/analysis/CrossRefTest.java +++ b/test/jalview/analysis/CrossRefTest.java @@ -27,8 +27,9 @@ import static org.testng.AssertJUnit.assertNotSame; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; -import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; +import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; @@ -248,9 +249,14 @@ public class CrossRefTest * peptide sequence with UNIPROT dbref */ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"), new MapList( + new int[] { 1, 21 }, new int[] { + 1, 7 }, 3, 1)); + DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); + dna1.addDBRef(dbref); dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ"); + dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 }); @@ -259,76 +265,100 @@ public class CrossRefTest /* * first search for a dbref nowhere on the alignment: */ - DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419"); + dbref = new DBRefEntry("UNIPROT", "0", "P30419"); CrossRef testee = new CrossRef(al.getSequencesArray(), al); - boolean found = testee.searchDataset(true, dna1, dbref, result, null, + AlignedCodonFrame acf = new AlignedCodonFrame(); + boolean found = testee.searchDataset(true, dna1, dbref, result, acf, true); assertFalse(found); assertTrue(result.isEmpty()); - - // TODO we are setting direct=true here but it is set to - // false in Jalview code... + assertTrue(acf.isEmpty()); /* * search for a protein sequence with dbref UNIPROT:Q9ZTS2 */ + acf = new AlignedCodonFrame(); dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result, - null, false); // search dataset with a protein xref from a dna + acf, false); // search dataset with a protein xref from a dna // sequence to locate the protein product assertTrue(found); assertEquals(1, result.size()); assertSame(pep1, result.get(0)); + assertTrue(acf.isEmpty()); /* * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2 */ result.clear(); + acf = new AlignedCodonFrame(); dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result, - null, false); // search dataset with a protein's direct dbref to + acf, false); // search dataset with a protein's direct dbref to // locate dna sequences with matching xref assertTrue(found); assertEquals(1, result.size()); assertSame(dna1, result.get(0)); + // should now have a mapping from dna to pep1 + List mappings = acf.getMappings(); + assertEquals(1, mappings.size()); + SequenceToSequenceMapping mapping = mappings.get(0); + assertSame(dna1, mapping.getFromSeq()); + assertSame(pep1, mapping.getMapping().getTo()); + MapList mapList = mapping.getMapping().getMap(); + assertEquals(1, mapList.getToRatio()); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getFromRanges().size()); + assertEquals(1, mapList.getFromRanges().get(0)[0]); + assertEquals(21, mapList.getFromRanges().get(0)[1]); + assertEquals(1, mapList.getToRanges().size()); + assertEquals(1, mapList.getToRanges().get(0)[0]); + assertEquals(7, mapList.getToRanges().get(0)[1]); } /** * Test for finding 'product' sequences for the case where the selected - * sequence has a dbref with a mapping to a sequence + * sequence has a dbref with a mapping to a sequence. This represents the case + * where either + *
    + *
  • a fetched sequence is already decorated with its cross-reference (e.g. + * EMBL + translation), or
  • + *
  • Get Cross-References has been done once resulting in instantiated + * cross-reference mappings
  • + *
*/ @Test(groups = { "Functional" }) public void testFindXrefSequences_fromDbRefMap() { /* - * two peptide sequences each with a DBRef and SequenceFeature + * scenario: nucleotide sequence AF039662 + * with dbref + mapping to Q9ZTS2 and P30419 + * which themselves each have a dbref and feature */ + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV"); - pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111")); + SequenceI pep2 = new Sequence("P30419", "MTRRSQIF"); + dna1.createDatasetSequence(); + pep1.createDatasetSequence(); + pep2.createDatasetSequence(); + + pep1.getDatasetSequence().addDBRef( + new DBRefEntry("Pfam", "0", "PF00111")); pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f, "group")); - SequenceI pep2 = new Sequence("P30419", "MTRRSQIF"); - pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK")); + pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK")); pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15, 12f, "group2")); - /* - * nucleotide sequence (to go in the alignment) - */ - SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - - /* - * add DBRefEntry's to dna1 with mappings from dna to both peptides - */ MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1); Mapping map = new Mapping(pep1, mapList); DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); - dna1.addDBRef(dbRef1); + dna1.getDatasetSequence().addDBRef(dbRef1); mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1); map = new Mapping(pep2, mapList); DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map); - dna1.addDBRef(dbRef2); + dna1.getDatasetSequence().addDBRef(dbRef2); /* * find UNIPROT xrefs for nucleotide sequence - it should pick up @@ -348,39 +378,21 @@ public class CrossRefTest } /** - * Helper method to assert seq1 looks like a copy of seq2 + * Helper method that verifies that 'copy' has the same name, start, end, + * sequence and dataset sequence object as 'original' (but is not the same + * object) * - * @param seq1 - * @param seq2 + * @param copy + * @param original */ - private void checkCopySequence(SequenceI seq1, SequenceI seq2) + private void checkCopySequence(SequenceI copy, SequenceI original) { - assertNotSame(seq1, seq2); - assertEquals(seq1.getName(), seq2.getName()); - assertEquals(seq1.getStart(), seq2.getStart()); - assertEquals(seq1.getEnd(), seq2.getEnd()); - assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString()); - - /* - * compare dbrefs - */ - assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs()); - // check one to verify a copy, not the same object - if (seq1.getDBRefs().length > 0) - { - assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]); - } - - /* - * compare features - */ - assertArrayEquals(seq1.getSequenceFeatures(), - seq2.getSequenceFeatures()); - if (seq1.getSequenceFeatures().length > 0) - { - assertNotSame(seq1.getSequenceFeatures()[0], - seq2.getSequenceFeatures()[0]); - } + assertNotSame(copy, original); + assertSame(copy.getDatasetSequence(), original.getDatasetSequence()); + assertEquals(copy.getName(), original.getName()); + assertEquals(copy.getStart(), original.getStart()); + assertEquals(copy.getEnd(), original.getEnd()); + assertEquals(copy.getSequenceAsString(), original.getSequenceAsString()); } /** -- 1.7.10.2