import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;
-import jalview.api.DBRefEntryI;
import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
* its dataset sequence to the dataset
*/
cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping);
+ // cdsSeq has a name constructed as CDS|<dbref>
+ // <dbref> will be either the accession for the coding sequence,
+ // marked in the /via/ dbref to the protein product accession
+ // or it will be the original nucleotide accession.
SequenceI cdsSeqDss = cdsSeq.createDatasetSequence();
cdsSeqs.add(cdsSeq);
if (!dataset.getSequences().contains(cdsSeqDss))
* same source and accession, so need a different accession for
* the CDS from the dna sequence
*/
- DBRefEntryI dnaRef = dnaDss.getSourceDBRef();
- if (dnaRef != null)
- {
- // assuming cds version same as dna ?!?
- DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(),
- dnaRef.getVersion(), cdsSeq.getName());
- proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
- .getInverse()));
- proteinProduct.addDBRef(proteinToCdsRef);
- }
+ // specific use case:
+ // Genomic contig ENSCHR:1, contains coding regions for ENSG01,
+ // ENSG02, ENSG03, with transcripts and products similarly named.
+ // cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01
+ // JBPNote: ?? can't actually create an example that demonstrates we
+ // need to
+ // synthesize an xref.
+ // TODO: merge conflicts from JAL-2154 branch and use PrimaryDBRefs()
+ // for (DBRefEntry primRef:dnaDss.getPrimaryDBRefs())
+ // {
+ // creates a complementary cross-reference to the source sequence's
+ // primary reference.
+
+ // // problem here is that the cross-reference is synthesized -
+ // cdsSeq.getName() may be like 'CDS|dnaaccession' or 'CDS|emblcdsacc'
+ // // assuming cds version same as dna ?!?
+ // DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(),
+ // dnaRef.getVersion(), cdsSeq.getName());
+ // proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
+ // .getInverse()));
+ // proteinProduct.addDBRef(proteinToCdsRef);
+ // }
/*
* transfer any features on dna that overlap the CDS
return false; // should only pass alignments with datasets here
}
- // map from dataset sequence to alignment sequence
- Map<SequenceI, SequenceI> alignedDatasets = new HashMap<SequenceI, SequenceI>();
+ // map from dataset sequence to alignment sequence(s)
+ Map<SequenceI, List<SequenceI>> alignedDatasets = new HashMap<SequenceI, List<SequenceI>>();
for (SequenceI seq : aligned.getSequences())
{
- // JAL-2110: fail if two or more alignment sequences have a common dataset
- // sequence.
- alignedDatasets.put(seq.getDatasetSequence(), seq);
+ SequenceI ds = seq.getDatasetSequence();
+ if (alignedDatasets.get(ds) == null)
+ {
+ alignedDatasets.put(ds, new ArrayList<SequenceI>());
+ }
+ alignedDatasets.get(ds).add(seq);
}
/*
}
/*
- * second pass - copy aligned sequences
+ * second pass - copy aligned sequences;
+ * heuristic rule: pair off sequences in order for the case where
+ * more than one shares the same dataset sequence
*/
for (SequenceI seq : unaligned.getSequences())
{
- SequenceI alignedSequence = alignedDatasets.get(seq
+ List<SequenceI> alignedSequences = alignedDatasets.get(seq
.getDatasetSequence());
- // JAL-2110: fail if two or more alignment sequences have common dataset
- // sequence.
// TODO: getSequenceAsString() will be deprecated in the future
// TODO: need to leave to SequenceI implementor to update gaps
- seq.setSequence(alignedSequence.getSequenceAsString());
+ seq.setSequence(alignedSequences.get(0).getSequenceAsString());
+ if (alignedSequences.size() > 0)
+ {
+ // pop off aligned sequences (except the last one)
+ alignedSequences.remove(0);
+ }
}
return true;
* sequence
*/
DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
- dna1.getDatasetSequence().setSourceDBRef(dbref);
+ dna1.getDatasetSequence().addDBRef(dbref);
+ org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
- dna2.getDatasetSequence().setSourceDBRef(dbref);
+ dna2.getDatasetSequence().addDBRef(dbref);
+ org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
/*
* CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
* verify peptide has added a dbref with reverse mapping to CDS
*/
assertNotNull(pep1.getDBRefs());
+ // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?
assertEquals(2, pep1.getDBRefs().length);
dbref = pep1.getDBRefs()[1];
assertEquals("ENSEMBL", dbref.getSource());
{
SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
- SequenceI as1 = dna1.deriveSequence(), as2 = dna1.deriveSequence()
- .getSubSequence(3, 7), as3 = dna2.deriveSequence();
+ SequenceI as1 = dna1.deriveSequence();
+ SequenceI as2 = dna1.deriveSequence().getSubSequence(3, 7);
+ SequenceI as3 = dna2.deriveSequence();
as1.insertCharAt(6, 5, '-');
String s_as1 = as1.getSequenceAsString();
as2.insertCharAt(6, 5, '-');
// why do we need to cast this still ?
((Alignment) aligned).createDatasetAlignment();
- SequenceI uas1 = dna1.deriveSequence(), uas2 = dna1.deriveSequence()
- .getSubSequence(3, 7), uas3 = dna2.deriveSequence();
+ SequenceI uas1 = dna1.deriveSequence();
+ SequenceI uas2 = dna1.deriveSequence().getSubSequence(3, 7);
+ SequenceI uas3 = dna2.deriveSequence();
AlignmentI tobealigned = new Alignment(new SequenceI[] { uas1, uas2,
uas3 });
((Alignment) tobealigned).createDatasetAlignment();
{
AlignmentAnnotation ann1 = addAnnotation("label1", "desc1", "calcId1",
1f);
- AlignmentAnnotation ann2 = addAnnotation("label2", "desc2", "calcId2",
- 1f);
+ addAnnotation("label2", "desc2", "calcId2", 1f);
AlignmentAnnotation ann3 = addAnnotation("label1", "desc3", "calcId3",
1f);
AlignmentAnnotation[] anns = seq.getAnnotation("label1");
@Test(groups = { "Functional" })
public void testGetAlignmentAnnotations_forCalcIdAndLabel()
{
- AlignmentAnnotation ann1 = addAnnotation("label1", "desc1", "calcId1",
- 1f);
+ addAnnotation("label1", "desc1", "calcId1", 1f);
AlignmentAnnotation ann2 = addAnnotation("label2", "desc2", "calcId2",
1f);
- AlignmentAnnotation ann3 = addAnnotation("label2", "desc3", "calcId3",
- 1f);
+ addAnnotation("label2", "desc3", "calcId3", 1f);
AlignmentAnnotation ann4 = addAnnotation("label2", "desc3", "calcId2",
1f);
- AlignmentAnnotation ann5 = addAnnotation("label5", "desc3", null, 1f);
- AlignmentAnnotation ann6 = addAnnotation(null, "desc3", "calcId3", 1f);
+ addAnnotation("label5", "desc3", null, 1f);
+ addAnnotation(null, "desc3", "calcId3", 1f);
+
List<AlignmentAnnotation> anns = seq.getAlignmentAnnotations("calcId2",
"label2");
assertEquals(2, anns.size());
sq.setDescription("Test sequence description..");
sq.setVamsasId("TestVamsasId");
- sq.setSourceDBRef(new DBRefEntry("PDB", "version0", "1TST"));
+ sq.addDBRef(new DBRefEntry("PDB", "version0", "1TST"));
- sq.addDBRef(new DBRefEntry("PDB", "version1", "1Tst"));
- sq.addDBRef(new DBRefEntry("PDB", "version2", "2Tst"));
- sq.addDBRef(new DBRefEntry("PDB", "version3", "3Tst"));
- sq.addDBRef(new DBRefEntry("PDB", "version4", "4Tst"));
+ sq.addDBRef(new DBRefEntry("PDB", "version1", "1PDB"));
+ sq.addDBRef(new DBRefEntry("PDB", "version2", "2PDB"));
+ sq.addDBRef(new DBRefEntry("PDB", "version3", "3PDB"));
+ sq.addDBRef(new DBRefEntry("PDB", "version4", "4PDB"));
sq.addPDBId(new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
sq.addPDBId(new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
-
- sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version1", "1Tst"));
+
+ DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB");
+ DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version1", "2PDB");
+ List<DBRefEntry> primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb,
+ pdb2pdb });
+
+ sq.getDatasetSequence().addDBRef(pdb1pdb);
+ sq.getDatasetSequence().addDBRef(pdb2pdb);
sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version2", "2Tst"));
+ new DBRefEntry("PDB", "version3", "3PDB"));
sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version3", "3Tst"));
- sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version4", "4Tst"));
-
+ new DBRefEntry("PDB", "version4", "4PDB"));
+
+ PDBEntry pdbe1a=new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1");
+ PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1");
+ PDBEntry pdbe2a=new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2");
+ PDBEntry pdbe2b = new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2");
sq.getDatasetSequence().addPDBId(
- new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
+ pdbe1a);
sq.getDatasetSequence().addPDBId(
- new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
- sq.getDatasetSequence().addPDBId(
- new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
- sq.getDatasetSequence().addPDBId(
- new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
+ pdbe1b);
+ sq.getDatasetSequence().addPDBId(pdbe2a);
+ sq.getDatasetSequence().addPDBId(pdbe2b);
+
+ /*
+ * test we added pdb entries to the dataset sequence
+ */
+ Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries(), Arrays
+ .asList(new PDBEntry[] { pdbe1a, pdbe1b, pdbe2a, pdbe2b }),
+ "PDB Entries were not found on dataset sequence.");
+ /*
+ * we should recover a pdb entry that is on the dataset sequence via PDBEntry
+ */
+ Assert.assertEquals(pdbe1a,
+ sq.getDatasetSequence().getPDBEntry("1PDB"),
+ "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry.");
ArrayList<Annotation> annotsList = new ArrayList<Annotation>();
System.out.println(">>>>>> " + sq.getSequenceAsString().length());
annotsList.add(new Annotation("A", "A", 'X', 0.1f));
new AlignmentAnnotation("Test annot", "Test annot description",
annots));
Assert.assertEquals(sq.getDescription(), "Test sequence description..");
- Assert.assertEquals(sq.getDBRefs().length, 4);
+ Assert.assertEquals(sq.getDBRefs().length, 5);
Assert.assertEquals(sq.getAllPDBEntries().size(), 4);
Assert.assertNotNull(sq.getAnnotation());
Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2);
Assert.assertEquals(derived.getDescription(),
"Test sequence description..");
- Assert.assertEquals(derived.getDBRefs().length, 4);
+ Assert.assertEquals(derived.getDBRefs().length, 4); // come from dataset
Assert.assertEquals(derived.getAllPDBEntries().size(), 4);
Assert.assertNotNull(derived.getAnnotation());
Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2);
assertNotNull(sq.getSequenceFeatures());
assertArrayEquals(sq.getSequenceFeatures(),
derived.getSequenceFeatures());
+
+ /*
+ * verify we have primary db refs *just* for PDB IDs with associated
+ * PDBEntry objects
+ */
+
+ assertEquals(primRefs, sq.getPrimaryDBRefs());
+ assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs());
+
+ assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs());
+
}
/**