* find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs
*/
DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs());
- List<SequenceI> rseqs = new ArrayList<SequenceI>();
+ List<SequenceI> foundSeqs = new ArrayList<SequenceI>();
/*
* find sequences in the alignment which xref one of these DBRefs
* i.e. is xref-ed to a common sequence identifier
*/
- searchDatasetXrefs(fromDna, seq, lrfs, rseqs, null);
+ searchDatasetXrefs(fromDna, seq, lrfs, foundSeqs, null);
/*
* add those sequences' (dna-to-peptide or peptide-to-dna) dbref sources
*/
- for (SequenceI rs : rseqs)
+ for (SequenceI rs : foundSeqs)
{
DBRefEntry[] xrs = DBRefUtils
.selectDbRefs(!fromDna, rs.getDBRefs());
* context was searching from Protein sequences
* @param sequenceI
* @param lrfs
- * @param rseqs
+ * @param foundSeqs
* @return true if matches were found.
*/
private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI,
- DBRefEntry[] lrfs, List<SequenceI> rseqs, AlignedCodonFrame cf)
+ DBRefEntry[] lrfs, List<SequenceI> foundSeqs, AlignedCodonFrame cf)
{
boolean found = false;
if (lrfs == null)
// add in wildcards
xref.setVersion(null);
xref.setMap(null);
- found |= searchDataset(fromDna, sequenceI, xref, rseqs, cf, false);
+ found |= searchDataset(fromDna, sequenceI, xref, foundSeqs, cf, false);
}
return found;
}
* @param fromDna
* true if context was searching for refs *from* dna sequence, false
* if context was searching for refs *from* protein sequence
- * @param sequenceI
+ * @param fromSeq
* a sequence to ignore (start point of search)
* @param xrf
* a cross-reference to try to match
- * @param rseqs
+ * @param foundSeqs
* result list to add to
- * @param cf
+ * @param mappings
* a set of sequence mappings to add to
* @param direct
* - indicates the type of relationship between returned sequences,
* </ul>
* @return true if relationship found and sequence added.
*/
- boolean searchDataset(boolean fromDna, SequenceI sequenceI,
- DBRefEntry xrf, List<SequenceI> rseqs, AlignedCodonFrame cf,
+ boolean searchDataset(boolean fromDna, SequenceI fromSeq,
+ DBRefEntry xrf, List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
boolean direct)
{
boolean found = false;
+ nxt.getDatasetSequence().getDisplayId(true)
+ ")");
}
- if (nxt == sequenceI || nxt == sequenceI.getDatasetSequence())
+ if (nxt == fromSeq || nxt == fromSeq.getDatasetSequence())
{
continue;
}
// }
if (!cands.isEmpty())
{
- if (!rseqs.contains(nxt))
+ if (!foundSeqs.contains(nxt))
{
found = true;
- rseqs.add(nxt);
- if (cf != null)
+ foundSeqs.add(nxt);
+ if (mappings != null && !direct)
{
- // don't search if we aren't given a codon map object
+ /*
+ * if the matched sequence has mapped dbrefs to
+ * protein product / cdna, add equivalent mappings to
+ * our source sequence
+ */
for (DBRefEntry candidate : cands)
{
Mapping mapping = candidate.getMap();
if (mapping.getTo() != null
&& map.getFromRatio() != map.getToRatio())
{
- // get sense of map correct for adding to product
- // alignment.
- if (fromDna)
+ /*
+ * add a mapping, as from dna to peptide sequence
+ */
+ if (map.getFromRatio() == 3)
{
- // map is from dna seq to a protein product
- cf.addMap(sequenceI, nxt, map);
+ mappings.addMap(nxt, fromSeq, map);
}
else
{
- // map should be from protein seq to its coding dna
- cf.addMap(nxt, sequenceI, map.getInverse());
+ mappings.addMap(nxt, fromSeq, map.getInverse());
}
}
}
}
}
- // TODO: add mapping between sequences if necessary
}
}
}
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
-import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.DBRefEntry;
* peptide sequence with UNIPROT dbref
*/
SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
- dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"), new MapList(
+ new int[] { 1, 21 }, new int[] {
+ 1, 7 }, 3, 1));
+ DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
+ dna1.addDBRef(dbref);
dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
+ dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
/*
* first search for a dbref nowhere on the alignment:
*/
- DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419");
+ dbref = new DBRefEntry("UNIPROT", "0", "P30419");
CrossRef testee = new CrossRef(al.getSequencesArray(), al);
- boolean found = testee.searchDataset(true, dna1, dbref, result, null,
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ boolean found = testee.searchDataset(true, dna1, dbref, result, acf,
true);
assertFalse(found);
assertTrue(result.isEmpty());
-
- // TODO we are setting direct=true here but it is set to
- // false in Jalview code...
+ assertTrue(acf.isEmpty());
/*
* search for a protein sequence with dbref UNIPROT:Q9ZTS2
*/
+ acf = new AlignedCodonFrame();
dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result,
- null, false); // search dataset with a protein xref from a dna
+ acf, false); // search dataset with a protein xref from a dna
// sequence to locate the protein product
assertTrue(found);
assertEquals(1, result.size());
assertSame(pep1, result.get(0));
+ assertTrue(acf.isEmpty());
/*
* search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
*/
result.clear();
+ acf = new AlignedCodonFrame();
dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result,
- null, false); // search dataset with a protein's direct dbref to
+ acf, false); // search dataset with a protein's direct dbref to
// locate dna sequences with matching xref
assertTrue(found);
assertEquals(1, result.size());
assertSame(dna1, result.get(0));
+ // should now have a mapping from dna to pep1
+ List<SequenceToSequenceMapping> mappings = acf.getMappings();
+ assertEquals(1, mappings.size());
+ SequenceToSequenceMapping mapping = mappings.get(0);
+ assertSame(dna1, mapping.getFromSeq());
+ assertSame(pep1, mapping.getMapping().getTo());
+ MapList mapList = mapping.getMapping().getMap();
+ assertEquals(1, mapList.getToRatio());
+ assertEquals(3, mapList.getFromRatio());
+ assertEquals(1, mapList.getFromRanges().size());
+ assertEquals(1, mapList.getFromRanges().get(0)[0]);
+ assertEquals(21, mapList.getFromRanges().get(0)[1]);
+ assertEquals(1, mapList.getToRanges().size());
+ assertEquals(1, mapList.getToRanges().get(0)[0]);
+ assertEquals(7, mapList.getToRanges().get(0)[1]);
}
/**
* Test for finding 'product' sequences for the case where the selected
- * sequence has a dbref with a mapping to a sequence
+ * sequence has a dbref with a mapping to a sequence. This represents the case
+ * where either
+ * <ul>
+ * <li>a fetched sequence is already decorated with its cross-reference (e.g.
+ * EMBL + translation), or</li>
+ * <li>Get Cross-References has been done once resulting in instantiated
+ * cross-reference mappings</li>
+ * </ul>
*/
@Test(groups = { "Functional" })
public void testFindXrefSequences_fromDbRefMap()
{
/*
- * two peptide sequences each with a DBRef and SequenceFeature
+ * scenario: nucleotide sequence AF039662
+ * with dbref + mapping to Q9ZTS2 and P30419
+ * which themselves each have a dbref and feature
*/
+ SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
- pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
+ SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
+ dna1.createDatasetSequence();
+ pep1.createDatasetSequence();
+ pep2.createDatasetSequence();
+
+ pep1.getDatasetSequence().addDBRef(
+ new DBRefEntry("Pfam", "0", "PF00111"));
pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
"group"));
- SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
- pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
+ pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
12f, "group2"));
- /*
- * nucleotide sequence (to go in the alignment)
- */
- SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
-
- /*
- * add DBRefEntry's to dna1 with mappings from dna to both peptides
- */
MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
3, 1);
Mapping map = new Mapping(pep1, mapList);
DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
- dna1.addDBRef(dbRef1);
+ dna1.getDatasetSequence().addDBRef(dbRef1);
mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
map = new Mapping(pep2, mapList);
DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
- dna1.addDBRef(dbRef2);
+ dna1.getDatasetSequence().addDBRef(dbRef2);
/*
* find UNIPROT xrefs for nucleotide sequence - it should pick up
}
/**
- * Helper method to assert seq1 looks like a copy of seq2
+ * Helper method that verifies that 'copy' has the same name, start, end,
+ * sequence and dataset sequence object as 'original' (but is not the same
+ * object)
*
- * @param seq1
- * @param seq2
+ * @param copy
+ * @param original
*/
- private void checkCopySequence(SequenceI seq1, SequenceI seq2)
+ private void checkCopySequence(SequenceI copy, SequenceI original)
{
- assertNotSame(seq1, seq2);
- assertEquals(seq1.getName(), seq2.getName());
- assertEquals(seq1.getStart(), seq2.getStart());
- assertEquals(seq1.getEnd(), seq2.getEnd());
- assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString());
-
- /*
- * compare dbrefs
- */
- assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs());
- // check one to verify a copy, not the same object
- if (seq1.getDBRefs().length > 0)
- {
- assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]);
- }
-
- /*
- * compare features
- */
- assertArrayEquals(seq1.getSequenceFeatures(),
- seq2.getSequenceFeatures());
- if (seq1.getSequenceFeatures().length > 0)
- {
- assertNotSame(seq1.getSequenceFeatures()[0],
- seq2.getSequenceFeatures()[0]);
- }
+ assertNotSame(copy, original);
+ assertSame(copy.getDatasetSequence(), original.getDatasetSequence());
+ assertEquals(copy.getName(), original.getName());
+ assertEquals(copy.getStart(), original.getStart());
+ assertEquals(copy.getEnd(), original.getEnd());
+ assertEquals(copy.getSequenceAsString(), original.getSequenceAsString());
}
/**