import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;
-import jalview.api.DBRefEntryI;
import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
* its dataset sequence to the dataset
*/
cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping);
+ // cdsSeq has a name constructed as CDS|<dbref>
+ // <dbref> will be either the accession for the coding sequence,
+ // marked in the /via/ dbref to the protein product accession
+ // or it will be the original nucleotide accession.
SequenceI cdsSeqDss = cdsSeq.createDatasetSequence();
cdsSeqs.add(cdsSeq);
if (!dataset.getSequences().contains(cdsSeqDss))
* same source and accession, so need a different accession for
* the CDS from the dna sequence
*/
- DBRefEntryI dnaRef = dnaDss.getSourceDBRef();
- if (dnaRef != null)
- {
- // assuming cds version same as dna ?!?
- DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(),
- dnaRef.getVersion(), cdsSeq.getName());
- proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
- .getInverse()));
- proteinProduct.addDBRef(proteinToCdsRef);
- }
+ // specific use case:
+ // Genomic contig ENSCHR:1, contains coding regions for ENSG01,
+ // ENSG02, ENSG03, with transcripts and products similarly named.
+ // cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01
+ // JBPNote: ?? can't actually create an example that demonstrates we
+ // need to
+ // synthesize an xref.
+ // TODO: merge conflicts from JAL-2154 branch and use PrimaryDBRefs()
+ // for (DBRefEntry primRef:dnaDss.getPrimaryDBRefs())
+ // {
+ // creates a complementary cross-reference to the source sequence's
+ // primary reference.
+
+ // // problem here is that the cross-reference is synthesized -
+ // cdsSeq.getName() may be like 'CDS|dnaaccession' or 'CDS|emblcdsacc'
+ // // assuming cds version same as dna ?!?
+ // DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(),
+ // dnaRef.getVersion(), cdsSeq.getName());
+ // proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
+ // .getInverse()));
+ // proteinProduct.addDBRef(proteinToCdsRef);
+ // }
/*
* transfer any features on dna that overlap the CDS
/*
* and add a reverse DbRef with the inverse mapping
*/
- if (mapFrom.getDatasetSequence() != null
- && mapFrom.getDatasetSequence().getSourceDBRef() != null)
+ if (mapFrom.getDatasetSequence() != null && false)
+ // && mapFrom.getDatasetSequence().getSourceDBRef() != null)
{
- DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence()
- .getSourceDBRef());
- dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping
- .getInverse()));
- mapTo.addDBRef(dbref);
+ // possible need to search primary references... except, why doesn't xref
+ // == getSourceDBRef ??
+ // DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence()
+ // .getSourceDBRef());
+ // dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping
+ // .getInverse()));
+ // mapTo.addDBRef(dbref);
}
if (fromDna)
import jalview.analysis.AlignSeq;
import jalview.api.DBRefEntryI;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
import jalview.util.StringUtils;
import java.util.ArrayList;
seq.getEnd());
}
description = seq.getDescription();
- sourceDBRef = seq.getSourceDBRef() == null ? null : new DBRefEntry(
- seq.getSourceDBRef());
if (seq != datasetSequence)
{
setDatasetSequence(seq.getDatasetSequence());
return null;
}
- @Override
- public void setSourceDBRef(DBRefEntryI dbRef)
- {
- this.sourceDBRef = dbRef;
- }
@Override
- public DBRefEntryI getSourceDBRef()
+ public List<DBRefEntry> getPrimaryDBRefs()
{
- return this.sourceDBRef;
+ if (datasetSequence!=null)
+ {
+ return datasetSequence.getPrimaryDBRefs();
+ }
+ if (dbrefs==null || dbrefs.length==0)
+ {
+ return Arrays.asList(new DBRefEntry[0]);
+ }
+ synchronized (dbrefs)
+ {
+ List<DBRefEntry> primaries = new ArrayList<DBRefEntry>();
+ DBRefEntry tmp[] = new DBRefEntry[1], res[] = null;
+ for (DBRefEntry ref : dbrefs)
+ {
+ if (!ref.isPrimary())
+ {
+ continue;
+ }
+ if (ref.hasMap())
+ {
+ MapList mp = ref.getMap().getMap();
+ if (mp.getFromLowest() > start || mp.getFromHighest() < end)
+ {
+ // map only involves a subsequence, so cannot be primary
+ continue;
+ }
+ }
+ // whilst it looks like it is a primary ref, we also sanity check type
+ if (DBRefUtils.getCanonicalName(DBRefSource.PDB).equals(
+ DBRefUtils.getCanonicalName(ref.getSource())))
+ {
+ // PDB dbrefs imply there should be a PDBEntry associated
+ if (getPDBEntry(ref.getAccessionId()) != null)
+ {
+ primaries.add(ref);
+ }
+ continue;
+ }
+ // check standard protein or dna sources
+ tmp[0] = ref;
+ res = DBRefUtils.selectDbRefs(!isProtein(), tmp);
+ if (res != null && res[0] == tmp[0])
+ {
+ primaries.add(ref);
+ continue;
+ }
+ }
+ return primaries;
+ }
}
}
*/
package jalview.datamodel;
-import jalview.api.DBRefEntryI;
-
import java.util.List;
import java.util.Vector;
*/
public PDBEntry getPDBEntry(String pdbId);
- /**
- * Set the distinct source database, and accession number from which a
- * sequence and its start-end data were derived from. This is very important
- * for SIFTS mappings and must be set prior to performing SIFTS mapping.
- *
- * @param dbRef
- * the source dbRef for the sequence
- */
- public void setSourceDBRef(DBRefEntryI dbRef);
/**
- * Get the distinct source database, and accession number from which a
- * sequence and its start-end data were derived from.
+ * Get all primary database/accessions for this sequence's data. These
+ * DBRefEntry are expected to resolve to a valid record in the associated
+ * external database, either directly or via a provided 1:1 Mapping.
*
- * @return
+ * @return just the primary references (if any) for this sequence, or an empty
+ * list
*/
- public DBRefEntryI getSourceDBRef();
+ public List<DBRefEntry> getPrimaryDBRefs();
}
DBRefEntry retrievedref = new DBRefEntry(sourceDb,
getSequenceVersion(), accession);
dna.addDBRef(retrievedref);
- dna.setSourceDBRef(retrievedref);
// add map to indicate the sequence is a valid coordinate frame for the
// dbref
retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
dnaToProteinMapping.setTo(proteinSeq);
dnaToProteinMapping.setMappedFromId(proteinId);
proteinSeq.addDBRef(proteinDbRef);
- proteinSeq.setSourceDBRef(proteinDbRef);
ref.setMap(dnaToProteinMapping);
}
hasUniprotDbref = true;
DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId);
}
product.addDBRef(proteinToEmblProteinRef);
- product.setSourceDBRef(proteinToEmblProteinRef);
if (dnaToProteinMapping != null
&& dnaToProteinMapping.getTo() != null)
{
// clunky: ensure Uniprot xref if we have one is on mapped sequence
SequenceI ds = proteinSeq.getDatasetSequence();
- ds.setSourceDBRef(proteinSeq.getSourceDBRef());
-
+ // TODO: Verify ensp primary ref is on proteinSeq.getDatasetSequence()
Mapping map = new Mapping(ds, mapList);
DBRefEntry dbr = new DBRefEntry(getDbSource(),
getEnsemblDataVersion(), proteinSeq.getName(), map);
DBRefEntry self = new DBRefEntry(getDbSource(),
getEnsemblDataVersion(), seq.getName());
seq.addDBRef(self);
- seq.setSourceDBRef(self);
}
/**
{
DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(),
getEnsemblDataVersion(), name);
- sq.setSourceDBRef(dbref);
+ sq.addDBRef(dbref);
}
}
if (alignment == null)
ArrayList<SequenceI> seqsWithoutSourceDBRef = new ArrayList<SequenceI>();
for (SequenceI seq : sequences)
{
- if (seq.getSourceDBRef() == null && seq.getDBRefs() == null)
+ if (seq.getPrimaryDBRefs().size() == 0)
{
seqsWithoutSourceDBRef.add(seq);
continue;
DBRefEntry sourceDBRef = new DBRefEntry();
sourceDBRef.setAccessionId(getId());
sourceDBRef.setSource(DBRefSource.PDB);
- pdbSequence.setSourceDBRef(sourceDBRef);
+ // TODO: specify version for 'PDB' database ref if it is read from a file.
+ // TODO: decide if jalview.io should be creating primary refs!
+ sourceDBRef.setVersion("");
pdbSequence.addPDBId(entry);
pdbSequence.addDBRef(sourceDBRef);
SequenceI chainseq = pdbSequence;
{
DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
accessionId);
+
+ // mark dbRef as a primary reference for this sequence
dbRefs.add(dbRef);
}
- sequence.setSourceDBRef((dbRefs != null && dbRefs.size() > 0) ? dbRefs
- .get(0) : null);
Vector<PDBEntry> onlyPdbEntries = new Vector<PDBEntry>();
for (PDBEntry pdb : entry.getDbReference())
public DBRefEntryI getValidSourceDBRef(SequenceI seq)
throws SiftsException
{
- DBRefEntryI sourceDBRef = null;
- sourceDBRef = seq.getSourceDBRef();
- if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
+ DBRefEntry[] dbRefs = seq.getDBRefs();
+ if (dbRefs == null || dbRefs.length < 1)
{
- return sourceDBRef;
+ throw new SiftsException(
+ "Source DBRef could not be determined. DBRefs might not have been retrieved.");
}
- else
+
+ for (DBRefEntryI dbRef : dbRefs)
{
- DBRefEntry[] dbRefs = seq.getDBRefs();
- if (dbRefs == null || dbRefs.length < 1)
+ if (dbRef == null || dbRef.getAccessionId() == null
+ || dbRef.getSource() == null)
{
- throw new SiftsException(
- "Source DBRef could not be determined. DBRefs might not have been retrieved.");
+ continue;
}
-
- for (DBRefEntryI dbRef : dbRefs)
+ if (isValidDBRefEntry(dbRef)
+ && dbRef.isPrimary()
+ && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef
+ .getSource().equalsIgnoreCase(DBRefSource.PDB)))
{
- if (dbRef == null || dbRef.getAccessionId() == null
- || dbRef.getSource() == null)
- {
- continue;
- }
- if (isFoundInSiftsEntry(dbRef.getAccessionId())
- && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef
- .getSource().equalsIgnoreCase(DBRefSource.PDB)))
- {
- seq.setSourceDBRef(dbRef);
- return dbRef;
- }
+ return dbRef;
}
}
- if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
- {
- return sourceDBRef;
- }
throw new SiftsException("Could not get source DB Ref");
}
String originalSeq = AlignSeq.extractGaps(
jalview.util.Comparison.GapChars, seq.getSequenceAsString());
HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>();
- DBRefEntryI sourceDBRef = seq.getSourceDBRef();
+ DBRefEntryI sourceDBRef;
sourceDBRef = getValidSourceDBRef(seq);
// TODO ensure sequence start/end is in the same coordinate system and
// consistent with the choosen sourceDBRef
* sequence
*/
DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
- dna1.getDatasetSequence().setSourceDBRef(dbref);
+ dna1.getDatasetSequence().addDBRef(dbref);
+ org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
- dna2.getDatasetSequence().setSourceDBRef(dbref);
+ dna2.getDatasetSequence().addDBRef(dbref);
+ org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
/*
* CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
new AlignmentAnnotation("Test annot", "Test annot description",
annots));
Assert.assertEquals(sq.getDescription(), "Test sequence description..");
- Assert.assertEquals(sq.getDBRefs().length, 4);
+ Assert.assertEquals(sq.getDBRefs().length, 5);
Assert.assertEquals(sq.getAllPDBEntries().size(), 4);
Assert.assertNotNull(sq.getAnnotation());
Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2);
Assert.assertEquals(derived.getDescription(),
"Test sequence description..");
- Assert.assertEquals(derived.getDBRefs().length, 4);
+ Assert.assertEquals(derived.getDBRefs().length, 4); // come from dataset
Assert.assertEquals(derived.getAllPDBEntries().size(), 4);
Assert.assertNotNull(derived.getAnnotation());
Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2);
assertNotNull(sq.getSequenceFeatures());
assertArrayEquals(sq.getSequenceFeatures(),
derived.getSequenceFeatures());
+
+ /*
+ * verify we have primary db refs *just* for PDB IDs with associated
+ * PDBEntry objects
+ */
+
+ assertEquals(primRefs, sq.getPrimaryDBRefs());
+ assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs());
+
+ assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs());
+
}
/**