From: Jim Procter Date: Wed, 22 Jun 2016 10:41:24 +0000 (+0100) Subject: JAL-2110 remove hardwired detection of search context from the dataset ‘isNucleotide... X-Git-Tag: Release_2_10_0~140^2~5^2~48 X-Git-Url: http://source.jalview.org/gitweb/?p=jalview.git;a=commitdiff_plain;h=b10330c2e330e409d2fd3812f1001f9542dc1b36 JAL-2110 remove hardwired detection of search context from the dataset ‘isNucleotide()’ flag --- diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 9fd87df..d96ab58 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -54,12 +54,6 @@ public class CrossRef private AlignmentI dataset; /* - * true if we are searching for cross-references from nucleotide, - * i.e. for protein sequences, false if the reverse - */ - private boolean fromDna; - - /* * the sequences for which we are seeking cross-references */ private SequenceI[] fromSeqs; @@ -76,7 +70,6 @@ public class CrossRef public CrossRef(SequenceI[] seqs, AlignmentI ds) { fromSeqs = seqs; - fromDna = ds.isNucleotide(); dataset = ds.getDataset() == null ? ds : ds.getDataset(); } @@ -88,16 +81,20 @@ public class CrossRef * reference from another sequence in the dataset which has a cross-reference * to a direct DBRefEntry on the given sequence * + * + * @param dna + * - when true, cross-references *from* dna returned. When false, + * cross-references *from* protein are returned * @return */ - public List findXrefSourcesForSequences() + public List findXrefSourcesForSequences(boolean dna) { List sources = new ArrayList(); for (SequenceI seq : fromSeqs) { if (seq != null) { - findXrefSourcesForSequence(seq, sources); + findXrefSourcesForSequence(seq, dna, sources); } } return sources; @@ -117,7 +114,8 @@ public class CrossRef * @param sources * a list of sources to add matches to */ - void findXrefSourcesForSequence(SequenceI seq, List sources) + void findXrefSourcesForSequence(SequenceI seq, boolean fromDna, + List sources) { /* * first find seq's xrefs (dna-to-peptide or peptide-to-dna) @@ -136,7 +134,7 @@ public class CrossRef * find sequences in the alignment which xref one of these DBRefs * i.e. is xref-ed to a common sequence identifier */ - searchDatasetXrefs(seq, lrfs, rseqs, null); + searchDatasetXrefs(fromDna, seq, lrfs, rseqs, null); /* * add those sequences' (dna-to-peptide or peptide-to-dna) dbref sources @@ -194,7 +192,7 @@ public class CrossRef * @param source * @return cross-referenced sequences (as dataset sequences) */ - public Alignment findXrefSequences(String source) + public Alignment findXrefSequences(String source, boolean fromDna) { List rseqs = new ArrayList(); @@ -226,7 +224,7 @@ public class CrossRef * which have a dbref to an accession id for this sequence, * and add them to the results */ - found = searchDatasetXrefs(dss, lrfs, rseqs, cf); + found = searchDatasetXrefs(fromDna, dss, lrfs, rseqs, cf); } if (xrfs == null && !found) { @@ -297,7 +295,7 @@ public class CrossRef + xref.getAccessionId()); if (matchedSeq != null) { - if (constructMapping(seq, matchedSeq, xref, cf)) + if (constructMapping(seq, matchedSeq, xref, cf, fromDna)) { found = true; } @@ -308,7 +306,7 @@ public class CrossRef { // do a bit more work - search for sequences with references matching // xrefs on this sequence. - found = searchDataset(dss, xref, rseqs, cf, false); + found = searchDataset(fromDna, dss, xref, rseqs, cf, false); } if (found) { @@ -337,7 +335,7 @@ public class CrossRef if (retrieved != null) { - updateDbrefMappings(seq, xrfs, retrieved, cf); + updateDbrefMappings(seq, xrfs, retrieved, cf, fromDna); for (SequenceI retrievedSequence : retrieved) { SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence @@ -595,8 +593,8 @@ public class CrossRef * @param retrieved * @param acf */ - void updateDbrefMappings(SequenceI mapFrom, - DBRefEntry[] xrefs, SequenceI[] retrieved, AlignedCodonFrame acf) + void updateDbrefMappings(SequenceI mapFrom, DBRefEntry[] xrefs, + SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna) { SequenceIdMatcher matcher = new SequenceIdMatcher(retrieved); for (DBRefEntry xref : xrefs) @@ -612,7 +610,7 @@ public class CrossRef } for (SequenceI seq : matches) { - constructMapping(mapFrom, seq, xref, acf); + constructMapping(mapFrom, seq, xref, acf, fromDna); } } } @@ -639,7 +637,7 @@ public class CrossRef * @return */ boolean constructMapping(SequenceI mapFrom, SequenceI mapTo, - DBRefEntry xref, AlignedCodonFrame mappings) + DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna) { MapList mapping = null; @@ -700,12 +698,15 @@ public class CrossRef * dataset (that is not equal to sequenceI) Identifies matching DBRefEntry * based on source and accession string only - Map and Version are nulled. * + * @param fromDna + * - true if context was searching from Dna sequences, false if + * context was searching from Protein sequences * @param sequenceI * @param lrfs * @param rseqs * @return true if matches were found. */ - private boolean searchDatasetXrefs(SequenceI sequenceI, + private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI, DBRefEntry[] lrfs, List rseqs, AlignedCodonFrame cf) { boolean found = false; @@ -719,7 +720,7 @@ public class CrossRef // add in wildcards xref.setVersion(null); xref.setMap(null); - found |= searchDataset(sequenceI, xref, rseqs, cf, false); + found |= searchDataset(fromDna, sequenceI, xref, rseqs, cf, false); } return found; } @@ -728,6 +729,9 @@ public class CrossRef * Searches dataset for DBRefEntrys matching the given one (xrf) and adds the * associated sequence to rseqs * + * @param fromDna + * true if context was searching for refs *from* dna sequence, false + * if context was searching for refs *from* protein sequence * @param sequenceI * a sequence to ignore (start point of search) * @param xrf @@ -740,8 +744,9 @@ public class CrossRef * - search all references or only subset * @return true if relationship found and sequence added. */ - boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf, - List rseqs, AlignedCodonFrame cf, boolean direct) + boolean searchDataset(boolean fromDna, SequenceI sequenceI, + DBRefEntry xrf, List rseqs, AlignedCodonFrame cf, + boolean direct) { boolean found = false; if (dataset == null) diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 18ee912..c5fb11f 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -4651,7 +4651,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, final boolean dna = viewport.getAlignment().isNucleotide(); List ptypes = (seqs == null || seqs.length == 0) ? null : new CrossRef(seqs, dataset) - .findXrefSourcesForSequences(); + .findXrefSourcesForSequences(dna); for (final String source : ptypes) { @@ -4691,7 +4691,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, * @param source * the database to show cross-references for */ - protected void showProductsFor(final SequenceI[] sel, final boolean dna, + protected void showProductsFor(final SequenceI[] sel, final boolean _odna, final String source) { Runnable foo = new Runnable() @@ -4710,8 +4710,18 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, .getAlignment(); AlignmentI dataset = alignment.getDataset() == null ? alignment : alignment.getDataset(); - AlignmentI xrefs = new CrossRef(sel, alignment) - .findXrefSequences(source); + boolean dna = alignment.isNucleotide(); + if (_odna!=dna) + { + System.err + .println("Conflict: showProducts for alignment originally " + + "thought to be " + + (_odna ? "DNA" : "Protein") + + " now searching for " + + (dna ? "DNA" : "Protein") + " Context."); + } + AlignmentI xrefs = new CrossRef(sel, dataset) + .findXrefSequences(source, dna); if (xrefs == null) { return; @@ -4891,7 +4901,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, sprods[s].updatePDBIds(); } Alignment al = new Alignment(sprods); - al.setDataset((Alignment) dataset); + al.setDataset(dataset); return al; } diff --git a/test/jalview/analysis/CrossRefTest.java b/test/jalview/analysis/CrossRefTest.java index ecfedb1..eab8d5d 100644 --- a/test/jalview/analysis/CrossRefTest.java +++ b/test/jalview/analysis/CrossRefTest.java @@ -103,7 +103,7 @@ public class CrossRefTest * first with no dbrefs to search */ sources = new CrossRef(new SequenceI[] { seq }, al) - .findXrefSourcesForSequences(); + .findXrefSourcesForSequences(false); assertTrue(sources.isEmpty()); /* @@ -120,7 +120,7 @@ public class CrossRefTest seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349")); seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350")); sources = new CrossRef(new SequenceI[] { seq }, al) - .findXrefSourcesForSequences(); + .findXrefSourcesForSequences(false); assertEquals(4, sources.size()); assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]", sources.toString()); @@ -140,7 +140,7 @@ public class CrossRefTest // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ? al.addSequence(seq2); sources = new CrossRef(new SequenceI[] { seq }, al) - .findXrefSourcesForSequences(); + .findXrefSourcesForSequences(false); assertEquals(3, sources.size()); assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString()); } @@ -171,7 +171,7 @@ public class CrossRefTest */ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al) - .findXrefSequences("UNIPROT"); + .findXrefSequences("UNIPROT", true); assertEquals(1, xrefs.getHeight()); assertSame(uniprotSeq, xrefs.getSequenceAt(0)); } @@ -206,7 +206,8 @@ public class CrossRefTest */ AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq }, - al).findXrefSequences("EMBL"); + al) + .findXrefSequences("EMBL", true); assertEquals(1, xrefs.getHeight()); assertSame(emblSeq, xrefs.getSequenceAt(0)); } @@ -233,7 +234,7 @@ public class CrossRefTest */ AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 }); Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al) - .findXrefSequences("UNIPROT"); + .findXrefSequences("UNIPROT", true); assertNull(xrefs); } @@ -262,7 +263,8 @@ public class CrossRefTest */ DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419"); CrossRef testee = new CrossRef(al.getSequencesArray(), al); - boolean found = testee.searchDataset(dna1, dbref, result, null, true); + boolean found = testee.searchDataset(true, dna1, dbref, result, null, + true); assertFalse(found); assertTrue(result.isEmpty()); @@ -273,7 +275,7 @@ public class CrossRefTest * search for a protein sequence with dbref UNIPROT:Q9ZTS2 */ dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); - found = testee.searchDataset(dna1, dbref, result, null, true); + found = testee.searchDataset(true, dna1, dbref, result, null, true); assertTrue(found); assertEquals(1, result.size()); assertSame(pep1, result.get(0)); @@ -283,7 +285,7 @@ public class CrossRefTest */ result.clear(); dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); - found = testee.searchDataset(pep1, dbref, result, null, false); + found = testee.searchDataset(false, pep1, dbref, result, null, false); assertTrue(found); assertEquals(1, result.size()); assertSame(dna1, result.get(0)); @@ -332,7 +334,7 @@ public class CrossRefTest */ AlignmentI al = new Alignment(new SequenceI[] { dna1 }); Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al) - .findXrefSequences("UNIPROT"); + .findXrefSequences("UNIPROT", true); assertEquals(2, xrefs.getHeight()); /* @@ -418,7 +420,7 @@ public class CrossRefTest */ AlignmentI al = new Alignment(new SequenceI[] { dna1 }); Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al) - .findXrefSequences("UNIPROT"); + .findXrefSequences("UNIPROT", true); assertEquals(2, xrefs.getHeight()); assertSame(pep1, xrefs.getSequenceAt(0)); assertSame(pep2, xrefs.getSequenceAt(1)); @@ -500,7 +502,8 @@ public class CrossRefTest SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 }; AlignmentI al = new Alignment(seqs); Alignment xrefs = new CrossRef(seqs, al) - .findXrefSequences("UNIPROT"); +.findXrefSequences("UNIPROT", + true); assertEquals(2, xrefs.getHeight()); assertSame(pep1, xrefs.getSequenceAt(0)); assertSame(pep2, xrefs.getSequenceAt(1)); @@ -652,8 +655,8 @@ public class CrossRefTest */ SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 }; AlignmentI al = new Alignment(seqs); - Alignment xrefs = new CrossRef(seqs, al) - .findXrefSequences("EMBL"); + Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL", + false); /* * verify retrieved sequences diff --git a/test/jalview/ws/SequenceFetcherTest.java b/test/jalview/ws/SequenceFetcherTest.java index 50fb3c0..4e08430 100644 --- a/test/jalview/ws/SequenceFetcherTest.java +++ b/test/jalview/ws/SequenceFetcherTest.java @@ -108,7 +108,7 @@ public class SequenceFetcherTest // try and find products CrossRef crossRef = new CrossRef(al.getSequencesArray(), al); - List types = crossRef.findXrefSourcesForSequences(); + List types = crossRef.findXrefSourcesForSequences(dna); if (types != null) { System.out.println("Xref Types for: " @@ -116,7 +116,7 @@ public class SequenceFetcherTest for (String source : types) { System.out.println("Type: " + source); - SequenceI[] prod = crossRef.findXrefSequences(source) + SequenceI[] prod = crossRef.findXrefSequences(source, dna) .getSequencesArray(); System.out.println("Found " + ((prod == null) ? "no" : "" + prod.length) @@ -199,8 +199,8 @@ public class SequenceFetcherTest // have a bash at finding the products amongst all the retrieved // sequences. SequenceI[] seqs = al.getSequencesArray(); - Alignment prodal = new CrossRef(seqs, ds) - .findXrefSequences(null); + Alignment prodal = new CrossRef(seqs, ds).findXrefSequences(null, + dna); System.out.println("Found " + ((prodal == null) ? "no" : "" + prodal.getHeight()) + " products"); diff --git a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java index 8ee4f40..b3c7e10 100644 --- a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java +++ b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java @@ -179,7 +179,7 @@ public class DbRefFetcherTest assertEquals("Expected local reference map to be 3 nucleotides", dr[0] .getMap().getWidth(), 3); AlignmentI sprods = new CrossRef(alsq.getSequencesArray(), alsq) - .findXrefSequences(dr[0].getSource()); + .findXrefSequences(dr[0].getSource(), true); assertNotNull( "Couldn't recover cross reference sequence from dataset. Was it ever added ?", sprods);