import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE;
-import jalview.api.DBRefEntryI;
import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
* its dataset sequence to the dataset
*/
cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping);
+ // cdsSeq has a name constructed as CDS|<dbref>
+ // <dbref> will be either the accession for the coding sequence,
+ // marked in the /via/ dbref to the protein product accession
+ // or it will be the original nucleotide accession.
SequenceI cdsSeqDss = cdsSeq.createDatasetSequence();
cdsSeqs.add(cdsSeq);
if (!dataset.getSequences().contains(cdsSeqDss))
* same source and accession, so need a different accession for
* the CDS from the dna sequence
*/
- DBRefEntryI dnaRef = dnaDss.getSourceDBRef();
- if (dnaRef != null)
- {
- // assuming cds version same as dna ?!?
- DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(),
- dnaRef.getVersion(), cdsSeq.getName());
- proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
- .getInverse()));
- proteinProduct.addDBRef(proteinToCdsRef);
- }
+ // specific use case:
+ // Genomic contig ENSCHR:1, contains coding regions for ENSG01,
+ // ENSG02, ENSG03, with transcripts and products similarly named.
+ // cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01
+ // JBPNote: ?? can't actually create an example that demonstrates we
+ // need to
+ // synthesize an xref.
+ // TODO: merge conflicts from JAL-2154 branch and use PrimaryDBRefs()
+ // for (DBRefEntry primRef:dnaDss.getPrimaryDBRefs())
+ // {
+ // creates a complementary cross-reference to the source sequence's
+ // primary reference.
+
+ // // problem here is that the cross-reference is synthesized -
+ // cdsSeq.getName() may be like 'CDS|dnaaccession' or 'CDS|emblcdsacc'
+ // // assuming cds version same as dna ?!?
+ // DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(),
+ // dnaRef.getVersion(), cdsSeq.getName());
+ // proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
+ // .getInverse()));
+ // proteinProduct.addDBRef(proteinToCdsRef);
+ // }
/*
* transfer any features on dna that overlap the CDS
/*
* and add a reverse DbRef with the inverse mapping
*/
- if (mapFrom.getDatasetSequence() != null
- && mapFrom.getDatasetSequence().getSourceDBRef() != null)
+ if (mapFrom.getDatasetSequence() != null && false)
+ // && mapFrom.getDatasetSequence().getSourceDBRef() != null)
{
- DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence()
- .getSourceDBRef());
- dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping
- .getInverse()));
- mapTo.addDBRef(dbref);
+ // possible need to search primary references... except, why doesn't xref
+ // == getSourceDBRef ??
+ // DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence()
+ // .getSourceDBRef());
+ // dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping
+ // .getInverse()));
+ // mapTo.addDBRef(dbref);
}
if (fromDna)
* @return
*/
public boolean updateFrom(DBRefEntryI otherEntry);
+
+ /**
+ * Method to distinguish between direct and indirect database references
+ *
+ * primary references indicate the local sequence data directly corresponds
+ * with the database record. All other references are secondary. direct
+ * references indicate that part or all of the local sequence data can be
+ * mapped with another sequence, enabling annotation transfer.
+ * cross-references indicate the local sequence data can be corresponded to
+ * some other linear coordinate system via a transformation.
+ *
+ * This method is also sufficient to distinguish direct DBRefEntry mappings
+ * from other relationships - e.g. coding relationships (imply a 1:3/3:1
+ * mapping), but not transcript relationships, which imply a (possibly
+ * non-contiguous) 1:1 mapping
+ *
+ * The only way a dbref's mappings can be fully verified is via the local
+ * sequence frame, so rather than use isPrimary directly, please use
+ * SequenceI.getPrimaryDbRefs()
+ *
+ * @return true if this reference provides a primary accession for the
+ * associated sequence object
+ */
+ public boolean isPrimary();
}
import jalview.api.DBRefEntryI;
+import java.util.Arrays;
+
public class DBRefEntry implements DBRefEntryI
{
String source = "", version = "", accessionId = "";
+
/**
* maps from associated sequence to the database sequence's coordinate system
*/
}
-
public DBRefEntry(String source, String version, String accessionId)
{
this(source, version, accessionId, null);
String otherAccession = other.getAccessionId();
if ((accessionId == null && otherAccession != null)
|| (accessionId != null && otherAccession == null)
- || (accessionId != null && !accessionId.equalsIgnoreCase(otherAccession)))
+ || (accessionId != null && !accessionId
+ .equalsIgnoreCase(otherAccession)))
{
return false;
}
* otherwise the versions have to match
*/
String otherVersion = other.getVersion();
-
+
if ((version == null || version.equals("0") || version.endsWith(":0"))
&& otherVersion != null)
{
return accessionId;
}
-
@Override
public void setAccessionId(String accessionId)
{
this.accessionId = accessionId;
}
-
@Override
public void setSource(String source)
{
this.source = source;
}
-
@Override
public void setVersion(String version)
{
this.version = version;
}
-
@Override
public Mapping getMap()
{
{
return getSrcAccString();
}
+
+ @Override
+ public boolean isPrimary()
+ {
+ /*
+ * if a map is present, unless it is 1:1 and has no SequenceI mate, it cannot be a primary reference.
+ */
+ if (map != null)
+ {
+ if (map.getTo() != null)
+ {
+ return false;
+ }
+ if (map.getMap().getFromRatio() != map.getMap().getToRatio()
+ || map.getMap().getFromRatio() != 1)
+ {
+ return false;
+ }
+ // check map is really 1:1, no shifts allowed.
+ if (map.getMap().getFromHighest() != map.getMap().getToHighest()
+ && map.getMap().getFromLowest() != map.getMap().getToLowest()
+ && !Arrays.equals(
+ map.getMap().getFromRanges().toArray(new int[0][]),
+ map.getMap().getToRanges().toArray(new int[0][])))
+ {
+ return false;
+ }
+ }
+ if (version == null)
+ {
+ // no version string implies the reference has not been verified at all.
+ return false;
+ }
+ // tricky - this test really needs to search the sequence's set of dbrefs to
+ // see if there is a primary reference that derived this reference.
+ String ucv = version.toUpperCase();
+ for (String primsrc : Arrays.asList(DBRefSource.allSources()))
+ {
+ if (ucv.startsWith(primsrc.toUpperCase()))
+ {
+ // by convention, many secondary references inherit the primary
+ // reference's
+ // source string as a prefix for any version information from the
+ // secondary reference.
+ return false;
+ }
+ }
+ return true;
+ }
}
*/
package jalview.datamodel;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* Defines internal constants for unambiguous annotation of DbRefEntry source
* strings and describing the data retrieved from external database sources (see
/**
* UNIPROT Accession Number
*/
- public static String UNIPROT = "UNIPROT";
+ public static final String UNIPROT = "UNIPROT";
/**
* UNIPROT Entry Name
*/
- public static String UP_NAME = "UNIPROT_NAME".toUpperCase();
+ public static final String UP_NAME = "UNIPROT_NAME".toUpperCase();
/**
* Uniprot Knowledgebase/TrEMBL as served from EMBL protein products.
/**
* PDB Entry Code
*/
- public static String PDB = "PDB";
+ public static final String PDB = "PDB";
/**
* EMBL ID
*/
- public static String EMBL = "EMBL";
+ public static final String EMBL = "EMBL";
/**
* EMBLCDS ID
*/
- public static String EMBLCDS = "EMBLCDS";
+ public static final String EMBLCDS = "EMBLCDS";
/**
* PFAM ID
*/
- public static String PFAM = "PFAM";
+ public static final String PFAM = "PFAM";
/**
* RFAM ID
*/
- public static String RFAM = "RFAM";
+ public static final String RFAM = "RFAM";
/**
* GeneDB ID
public static final String[] PROTEINDBS = { UNIPROT, PDB, UNIPROTKB,
EMBLCDSProduct, ENSEMBL }; // Ensembl ENSP* entries are protein
+
+ public static String[] allSources()
+ {
+ List<String> src = new ArrayList<String>();
+ for (Field f : DBRefSource.class.getFields())
+ {
+ if (String.class.equals(f.getType()))
+ {
+ try
+ {
+ src.add((String) f.get(null));
+ } catch (Exception x)
+ {
+ x.printStackTrace();
+ }
+ }
+ }
+ return src.toArray(new String[0]);
+ }
}
import jalview.analysis.AlignSeq;
import jalview.api.DBRefEntryI;
+import jalview.util.DBRefUtils;
+import jalview.util.MapList;
import jalview.util.StringUtils;
import java.util.ArrayList;
seq.getEnd());
}
description = seq.getDescription();
- sourceDBRef = seq.getSourceDBRef() == null ? null : new DBRefEntry(
- seq.getSourceDBRef());
if (seq != datasetSequence)
{
setDatasetSequence(seq.getDatasetSequence());
@Override
public PDBEntry getPDBEntry(String pdbIdStr)
{
- if (getDatasetSequence() == null
- || getDatasetSequence().getAllPDBEntries() == null)
+ if (getDatasetSequence() != null)
+ {
+ return getDatasetSequence().getPDBEntry(pdbIdStr);
+ }
+ if (pdbIds == null)
{
return null;
}
- List<PDBEntry> entries = getDatasetSequence().getAllPDBEntries();
+ List<PDBEntry> entries = getAllPDBEntries();
for (PDBEntry entry : entries)
{
if (entry.getId().equalsIgnoreCase(pdbIdStr))
return null;
}
- @Override
- public void setSourceDBRef(DBRefEntryI dbRef)
- {
- this.sourceDBRef = dbRef;
- }
@Override
- public DBRefEntryI getSourceDBRef()
+ public List<DBRefEntry> getPrimaryDBRefs()
{
- return this.sourceDBRef;
+ if (datasetSequence!=null)
+ {
+ return datasetSequence.getPrimaryDBRefs();
+ }
+ if (dbrefs==null || dbrefs.length==0)
+ {
+ return Arrays.asList(new DBRefEntry[0]);
+ }
+ synchronized (dbrefs)
+ {
+ List<DBRefEntry> primaries = new ArrayList<DBRefEntry>();
+ DBRefEntry tmp[] = new DBRefEntry[1], res[] = null;
+ for (DBRefEntry ref : dbrefs)
+ {
+ if (!ref.isPrimary())
+ {
+ continue;
+ }
+ if (ref.hasMap())
+ {
+ MapList mp = ref.getMap().getMap();
+ if (mp.getFromLowest() > start || mp.getFromHighest() < end)
+ {
+ // map only involves a subsequence, so cannot be primary
+ continue;
+ }
+ }
+ // whilst it looks like it is a primary ref, we also sanity check type
+ if (DBRefUtils.getCanonicalName(DBRefSource.PDB).equals(
+ DBRefUtils.getCanonicalName(ref.getSource())))
+ {
+ // PDB dbrefs imply there should be a PDBEntry associated
+ if (getPDBEntry(ref.getAccessionId()) != null)
+ {
+ primaries.add(ref);
+ }
+ continue;
+ }
+ // check standard protein or dna sources
+ tmp[0] = ref;
+ res = DBRefUtils.selectDbRefs(!isProtein(), tmp);
+ if (res != null && res[0] == tmp[0])
+ {
+ primaries.add(ref);
+ continue;
+ }
+ }
+ return primaries;
+ }
}
}
*/
package jalview.datamodel;
-import jalview.api.DBRefEntryI;
-
import java.util.List;
import java.util.Vector;
*/
public PDBEntry getPDBEntry(String pdbId);
- /**
- * Set the distinct source database, and accession number from which a
- * sequence and its start-end data were derived from. This is very important
- * for SIFTS mappings and must be set prior to performing SIFTS mapping.
- *
- * @param dbRef
- * the source dbRef for the sequence
- */
- public void setSourceDBRef(DBRefEntryI dbRef);
/**
- * Get the distinct source database, and accession number from which a
- * sequence and its start-end data were derived from.
+ * Get all primary database/accessions for this sequence's data. These
+ * DBRefEntry are expected to resolve to a valid record in the associated
+ * external database, either directly or via a provided 1:1 Mapping.
*
- * @return
+ * @return just the primary references (if any) for this sequence, or an empty
+ * list
*/
- public DBRefEntryI getSourceDBRef();
+ public List<DBRefEntry> getPrimaryDBRefs();
}
DBRefEntry retrievedref = new DBRefEntry(sourceDb,
getSequenceVersion(), accession);
dna.addDBRef(retrievedref);
- dna.setSourceDBRef(retrievedref);
// add map to indicate the sequence is a valid coordinate frame for the
// dbref
retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() },
dnaToProteinMapping.setTo(proteinSeq);
dnaToProteinMapping.setMappedFromId(proteinId);
proteinSeq.addDBRef(proteinDbRef);
- proteinSeq.setSourceDBRef(proteinDbRef);
ref.setMap(dnaToProteinMapping);
}
hasUniprotDbref = true;
DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId);
}
product.addDBRef(proteinToEmblProteinRef);
- product.setSourceDBRef(proteinToEmblProteinRef);
if (dnaToProteinMapping != null
&& dnaToProteinMapping.getTo() != null)
{
// clunky: ensure Uniprot xref if we have one is on mapped sequence
SequenceI ds = proteinSeq.getDatasetSequence();
- ds.setSourceDBRef(proteinSeq.getSourceDBRef());
-
+ // TODO: Verify ensp primary ref is on proteinSeq.getDatasetSequence()
Mapping map = new Mapping(ds, mapList);
DBRefEntry dbr = new DBRefEntry(getDbSource(),
getEnsemblDataVersion(), proteinSeq.getName(), map);
DBRefEntry self = new DBRefEntry(getDbSource(),
getEnsemblDataVersion(), seq.getName());
seq.addDBRef(self);
- seq.setSourceDBRef(self);
}
/**
{
DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(),
getEnsemblDataVersion(), name);
- sq.setSourceDBRef(dbref);
+ sq.addDBRef(dbref);
}
}
if (alignment == null)
ArrayList<SequenceI> seqsWithoutSourceDBRef = new ArrayList<SequenceI>();
for (SequenceI seq : sequences)
{
- if (seq.getSourceDBRef() == null && seq.getDBRefs() == null)
+ if (seq.getPrimaryDBRefs().size() == 0)
{
seqsWithoutSourceDBRef.add(seq);
continue;
DBRefEntry sourceDBRef = new DBRefEntry();
sourceDBRef.setAccessionId(getId());
sourceDBRef.setSource(DBRefSource.PDB);
- pdbSequence.setSourceDBRef(sourceDBRef);
+ // TODO: specify version for 'PDB' database ref if it is read from a file.
+ // TODO: decide if jalview.io should be creating primary refs!
+ sourceDBRef.setVersion("");
pdbSequence.addPDBId(entry);
pdbSequence.addDBRef(sourceDBRef);
SequenceI chainseq = pdbSequence;
}
ArrayList<StructureMapping> seqToStrucMapping = new ArrayList<StructureMapping>();
- if (isMapUsingSIFTs)
+ if (isMapUsingSIFTs && seq.isProtein())
{
setProgressBar(null);
setProgressBar(MessageManager
return "cif".equalsIgnoreCase(fileExt);
}
+ /**
+ * retrieve a mapping for seq from SIFTs using associated DBRefEntry for
+ * uniprot or PDB
+ *
+ * @param seq
+ * @param pdbFile
+ * @param targetChainId
+ * @param pdb
+ * @param maxChain
+ * @param sqmpping
+ * @param maxAlignseq
+ * @return
+ * @throws SiftsException
+ */
private StructureMapping getStructureMapping(SequenceI seq,
String pdbFile, String targetChainId, StructureFile pdb,
PDBChain maxChain, jalview.datamodel.Mapping sqmpping,
final int sequenceStart = sequence.getStart();
if (absStart == -1)
{
- // Is local sequence contained in dataset sequence?
+ // couldn't find local sequence in sequence from database, so check if
+ // the database sequence is a subsequence of local sequence
absStart = nonGapped.indexOf(entrySeq);
if (absStart == -1)
- { // verification failed.
+ {
+ // verification failed. couldn't find any relationship between
+ // entrySeq and local sequence
messages.append(sequence.getName()
+ " SEQUENCE NOT %100 MATCH \n");
continue;
}
+ /*
+ * found match for the whole of the database sequence within the local
+ * sequence's reference frame.
+ */
transferred = true;
sbuffer.append(sequence.getName() + " HAS " + absStart
+ " PREFIXED RESIDUES COMPARED TO " + dbSource + "\n");
- //
- // + " - ANY SEQUENCE FEATURES"
- // + " HAVE BEEN ADJUSTED ACCORDINGLY \n");
- // absStart = 0;
- // create valid mapping between matching region of local sequence and
- // the mapped sequence
+
+ /*
+ * So create a mapping to the external entry from the matching region of
+ * the local sequence, and leave local start/end untouched.
+ */
mp = new Mapping(null, new int[] { sequenceStart + absStart,
sequenceStart + absStart + entrySeq.length() - 1 }, new int[]
{ entry.getStart(), entry.getStart() + entrySeq.length() - 1 },
1, 1);
- updateRefFrame = false; // mapping is based on current start/end so
- // don't modify start and end
+ updateRefFrame = false;
}
else
{
+ /*
+ * found a match for the local sequence within sequence from
+ * the external database
+ */
transferred = true;
+
// update start and end of local sequence to place it in entry's
// reference frame.
// apply identity map map from whole of local sequence to matching
// absStart+sequence.getStart()+entrySeq.length()-1},
// new int[] { entry.getStart(), entry.getEnd() }, 1, 1);
// relocate local features for updated start
+
if (updateRefFrame)
{
if (sequence.getSequenceFeatures() != null)
{
+ /*
+ * relocate existing sequence features by offset
+ */
SequenceFeature[] sf = sequence.getSequenceFeatures();
int start = sequenceStart;
int end = sequence.getEnd();
System.out.println("Adding dbrefs to " + sequence.getName()
+ " from " + dbSource + " sequence : " + entry.getName());
sequence.transferAnnotation(entry, mp);
- // unknownSequences.remove(sequence);
+
absStart += entry.getStart();
int absEnd = absStart + nonGapped.length() - 1;
if (!trimDatasetSeqs)
{
DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
accessionId);
+
+ // mark dbRef as a primary reference for this sequence
dbRefs.add(dbRef);
}
- sequence.setSourceDBRef((dbRefs != null && dbRefs.size() > 0) ? dbRefs
- .get(0) : null);
Vector<PDBEntry> onlyPdbEntries = new Vector<PDBEntry>();
for (PDBEntry pdb : entry.getDbReference())
public DBRefEntryI getValidSourceDBRef(SequenceI seq)
throws SiftsException
{
- DBRefEntryI sourceDBRef = null;
- sourceDBRef = seq.getSourceDBRef();
- if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
+ DBRefEntry[] dbRefs = seq.getDBRefs();
+ if (dbRefs == null || dbRefs.length < 1)
{
- return sourceDBRef;
+ throw new SiftsException(
+ "Source DBRef could not be determined. DBRefs might not have been retrieved.");
}
- else
+
+ for (DBRefEntryI dbRef : dbRefs)
{
- DBRefEntry[] dbRefs = seq.getDBRefs();
- if (dbRefs == null || dbRefs.length < 1)
+ if (dbRef == null || dbRef.getAccessionId() == null
+ || dbRef.getSource() == null)
{
- throw new SiftsException(
- "Source DBRef could not be determined. DBRefs might not have been retrieved.");
+ continue;
}
-
- for (DBRefEntryI dbRef : dbRefs)
+ if (isValidDBRefEntry(dbRef)
+ && dbRef.isPrimary()
+ && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef
+ .getSource().equalsIgnoreCase(DBRefSource.PDB)))
{
- if (dbRef == null || dbRef.getAccessionId() == null
- || dbRef.getSource() == null)
- {
- continue;
- }
- if (isFoundInSiftsEntry(dbRef.getAccessionId())
- && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef
- .getSource().equalsIgnoreCase(DBRefSource.PDB)))
- {
- seq.setSourceDBRef(dbRef);
- return dbRef;
- }
+ return dbRef;
}
}
- if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef))
- {
- return sourceDBRef;
- }
throw new SiftsException("Could not get source DB Ref");
}
String originalSeq = AlignSeq.extractGaps(
jalview.util.Comparison.GapChars, seq.getSequenceAsString());
HashMap<Integer, int[]> mapping = new HashMap<Integer, int[]>();
- DBRefEntryI sourceDBRef = seq.getSourceDBRef();
+ DBRefEntryI sourceDBRef;
sourceDBRef = getValidSourceDBRef(seq);
// TODO ensure sequence start/end is in the same coordinate system and
// consistent with the choosen sourceDBRef
* sequence
*/
DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
- dna1.getDatasetSequence().setSourceDBRef(dbref);
+ dna1.getDatasetSequence().addDBRef(dbref);
+ org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
- dna2.getDatasetSequence().setSourceDBRef(dbref);
+ dna2.getDatasetSequence().addDBRef(dbref);
+ org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
/*
* CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
* verify peptide has added a dbref with reverse mapping to CDS
*/
assertNotNull(pep1.getDBRefs());
+ // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?
assertEquals(2, pep1.getDBRefs().length);
dbref = pep1.getDBRefs()[1];
assertEquals("ENSEMBL", dbref.getSource());
assertFalse(ref1.updateFrom(ref2));
assertEquals("10", ref1.getVersion());
}
+
+ @Test(groups = { "Functional" })
+ public void testIsPrimary()
+ {
+ DBRefEntry dbr = new DBRefEntry(DBRefSource.UNIPROT, "", "Q12345");
+ assertTrue(dbr.isPrimary());
+ /*
+ * 1:1 mapping
+ */
+ dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 1,
+ 1));
+ assertTrue(dbr.isPrimary());
+ /*
+ * Version string is prefixed with another dbref source string (fail)
+ */
+ dbr.setVersion(DBRefSource.EMBL + ":0");
+ assertFalse(dbr.isPrimary());
+
+ /*
+ * Version string is alphanumeric
+ */
+ dbr.setVersion("0.1.b");
+ assertTrue(dbr.isPrimary());
+
+ /*
+ * 1:1 mapping with shift (fail)
+ */
+ dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 2, 4 }, 1,
+ 1));
+ assertFalse(dbr.isPrimary());
+
+ /*
+ * 1:1 mapping and sequenceRef (fail)
+ */
+ dbr.setMap(new Mapping(new Sequence("foo", "ASDF"), new int[] { 1, 3 },
+ new int[] { 1, 3 }, 1, 1));
+ assertFalse(dbr.isPrimary());
+
+ /*
+ * 1:3 mapping (fail)
+ */
+ dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 1,
+ 3));
+ assertFalse(dbr.isPrimary());
+ /*
+ * 2:2 mapping with shift (expected fail, but maybe use case for a pass)
+ */
+ dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 2,
+ 2));
+ assertFalse(dbr.isPrimary());
+
+ /*
+ * Version string is prefixed with another dbref source string
+ */
+ dbr.setVersion(DBRefSource.EMBL + ":0");
+ assertFalse(dbr.isPrimary());
+
+ }
}
sq.setDescription("Test sequence description..");
sq.setVamsasId("TestVamsasId");
- sq.setSourceDBRef(new DBRefEntry("PDB", "version0", "1TST"));
+ sq.addDBRef(new DBRefEntry("PDB", "version0", "1TST"));
- sq.addDBRef(new DBRefEntry("PDB", "version1", "1Tst"));
- sq.addDBRef(new DBRefEntry("PDB", "version2", "2Tst"));
- sq.addDBRef(new DBRefEntry("PDB", "version3", "3Tst"));
- sq.addDBRef(new DBRefEntry("PDB", "version4", "4Tst"));
+ sq.addDBRef(new DBRefEntry("PDB", "version1", "1PDB"));
+ sq.addDBRef(new DBRefEntry("PDB", "version2", "2PDB"));
+ sq.addDBRef(new DBRefEntry("PDB", "version3", "3PDB"));
+ sq.addDBRef(new DBRefEntry("PDB", "version4", "4PDB"));
sq.addPDBId(new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
sq.addPDBId(new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
+
+ DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB");
+ DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version1", "2PDB");
+ List<DBRefEntry> primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb,
+ pdb2pdb });
+ sq.getDatasetSequence().addDBRef(pdb1pdb);
+ sq.getDatasetSequence().addDBRef(pdb2pdb);
sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version1", "1Tst"));
- sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version2", "2Tst"));
- sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version3", "3Tst"));
+ new DBRefEntry("PDB", "version3", "3PDB"));
sq.getDatasetSequence().addDBRef(
- new DBRefEntry("PDB", "version4", "4Tst"));
-
- sq.getDatasetSequence().addPDBId(
- new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"));
- sq.getDatasetSequence().addPDBId(
- new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"));
+ new DBRefEntry("PDB", "version4", "4PDB"));
+
+ PDBEntry pdbe1a=new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1");
+ PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1");
+ PDBEntry pdbe2a=new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2");
+ PDBEntry pdbe2b = new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2");
sq.getDatasetSequence().addPDBId(
- new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"));
+ pdbe1a);
sq.getDatasetSequence().addPDBId(
- new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"));
+ pdbe1b);
+ sq.getDatasetSequence().addPDBId(pdbe2a);
+ sq.getDatasetSequence().addPDBId(pdbe2b);
+
+ /*
+ * test we added pdb entries to the dataset sequence
+ */
+ Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries(), Arrays
+ .asList(new PDBEntry[] { pdbe1a, pdbe1b, pdbe2a, pdbe2b }),
+ "PDB Entries were not found on dataset sequence.");
+ /*
+ * we should recover a pdb entry that is on the dataset sequence via PDBEntry
+ */
+ Assert.assertEquals(pdbe1a,
+ sq.getDatasetSequence().getPDBEntry("1PDB"),
+ "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry.");
ArrayList<Annotation> annotsList = new ArrayList<Annotation>();
System.out.println(">>>>>> " + sq.getSequenceAsString().length());
annotsList.add(new Annotation("A", "A", 'X', 0.1f));
new AlignmentAnnotation("Test annot", "Test annot description",
annots));
Assert.assertEquals(sq.getDescription(), "Test sequence description..");
- Assert.assertEquals(sq.getDBRefs().length, 4);
+ Assert.assertEquals(sq.getDBRefs().length, 5);
Assert.assertEquals(sq.getAllPDBEntries().size(), 4);
Assert.assertNotNull(sq.getAnnotation());
Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2);
Assert.assertEquals(derived.getDescription(),
"Test sequence description..");
- Assert.assertEquals(derived.getDBRefs().length, 4);
+ Assert.assertEquals(derived.getDBRefs().length, 4); // come from dataset
Assert.assertEquals(derived.getAllPDBEntries().size(), 4);
Assert.assertNotNull(derived.getAnnotation());
Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2);
assertNotNull(sq.getSequenceFeatures());
assertArrayEquals(sq.getSequenceFeatures(),
derived.getSequenceFeatures());
+
+ /*
+ * verify we have primary db refs *just* for PDB IDs with associated
+ * PDBEntry objects
+ */
+
+ assertEquals(primRefs, sq.getPrimaryDBRefs());
+ assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs());
+
+ assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs());
+
}
/**
assertEquals(5, dbrefs.length);
assertEquals(DBRefSource.EMBL, dbrefs[0].getSource());
assertEquals("CAA30420.1", dbrefs[0].getAccessionId());
+ // TODO: verify getPrimaryDBRefs() for peptide products
assertEquals(cds1Map.getInverse(), dbrefs[0].getMap().getMap());
assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource());
assertEquals("CAA30420.1", dbrefs[1].getAccessionId());