From: Jim Procter Date: Thu, 25 Aug 2016 11:12:06 +0000 (+0100) Subject: Merge branch 'refactor/JAL-2106_sourceDbRef_revision' into trailm X-Git-Tag: Release_2_10_0~47^2~4^2~43^2~15^2 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=75f46f30f01d025e5696dfeead2c19aba316a1bf;hp=d63bbc2d56c86d7dca68e76104a8682b5e38c763;p=jalview.git Merge branch 'refactor/JAL-2106_sourceDbRef_revision' into trailm --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index d93f42f..dc57f6e 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -22,7 +22,6 @@ package jalview.analysis; import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE; -import jalview.api.DBRefEntryI; import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; @@ -1682,6 +1681,10 @@ public class AlignmentUtils * its dataset sequence to the dataset */ cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping); + // cdsSeq has a name constructed as CDS| + // will be either the accession for the coding sequence, + // marked in the /via/ dbref to the protein product accession + // or it will be the original nucleotide accession. SequenceI cdsSeqDss = cdsSeq.createDatasetSequence(); cdsSeqs.add(cdsSeq); if (!dataset.getSequences().contains(cdsSeqDss)) @@ -1745,16 +1748,28 @@ public class AlignmentUtils * same source and accession, so need a different accession for * the CDS from the dna sequence */ - DBRefEntryI dnaRef = dnaDss.getSourceDBRef(); - if (dnaRef != null) - { - // assuming cds version same as dna ?!? - DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(), - dnaRef.getVersion(), cdsSeq.getName()); - proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap - .getInverse())); - proteinProduct.addDBRef(proteinToCdsRef); - } + // specific use case: + // Genomic contig ENSCHR:1, contains coding regions for ENSG01, + // ENSG02, ENSG03, with transcripts and products similarly named. + // cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01 + // JBPNote: ?? can't actually create an example that demonstrates we + // need to + // synthesize an xref. + // TODO: merge conflicts from JAL-2154 branch and use PrimaryDBRefs() + // for (DBRefEntry primRef:dnaDss.getPrimaryDBRefs()) + // { + // creates a complementary cross-reference to the source sequence's + // primary reference. + + // // problem here is that the cross-reference is synthesized - + // cdsSeq.getName() may be like 'CDS|dnaaccession' or 'CDS|emblcdsacc' + // // assuming cds version same as dna ?!? + // DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(), + // dnaRef.getVersion(), cdsSeq.getName()); + // proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap + // .getInverse())); + // proteinProduct.addDBRef(proteinToCdsRef); + // } /* * transfer any features on dna that overlap the CDS diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index ddfd7ff..c027742 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -742,14 +742,16 @@ public class CrossRef /* * and add a reverse DbRef with the inverse mapping */ - if (mapFrom.getDatasetSequence() != null - && mapFrom.getDatasetSequence().getSourceDBRef() != null) + if (mapFrom.getDatasetSequence() != null && false) + // && mapFrom.getDatasetSequence().getSourceDBRef() != null) { - DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence() - .getSourceDBRef()); - dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping - .getInverse())); - mapTo.addDBRef(dbref); + // possible need to search primary references... except, why doesn't xref + // == getSourceDBRef ?? + // DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence() + // .getSourceDBRef()); + // dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping + // .getInverse())); + // mapTo.addDBRef(dbref); } if (fromDna) diff --git a/src/jalview/api/DBRefEntryI.java b/src/jalview/api/DBRefEntryI.java index 32245b3..701acb6 100644 --- a/src/jalview/api/DBRefEntryI.java +++ b/src/jalview/api/DBRefEntryI.java @@ -70,4 +70,28 @@ public interface DBRefEntryI * @return */ public boolean updateFrom(DBRefEntryI otherEntry); + + /** + * Method to distinguish between direct and indirect database references + * + * primary references indicate the local sequence data directly corresponds + * with the database record. All other references are secondary. direct + * references indicate that part or all of the local sequence data can be + * mapped with another sequence, enabling annotation transfer. + * cross-references indicate the local sequence data can be corresponded to + * some other linear coordinate system via a transformation. + * + * This method is also sufficient to distinguish direct DBRefEntry mappings + * from other relationships - e.g. coding relationships (imply a 1:3/3:1 + * mapping), but not transcript relationships, which imply a (possibly + * non-contiguous) 1:1 mapping + * + * The only way a dbref's mappings can be fully verified is via the local + * sequence frame, so rather than use isPrimary directly, please use + * SequenceI.getPrimaryDbRefs() + * + * @return true if this reference provides a primary accession for the + * associated sequence object + */ + public boolean isPrimary(); } diff --git a/src/jalview/datamodel/DBRefEntry.java b/src/jalview/datamodel/DBRefEntry.java index a641b1b..11e77d8 100755 --- a/src/jalview/datamodel/DBRefEntry.java +++ b/src/jalview/datamodel/DBRefEntry.java @@ -22,9 +22,12 @@ package jalview.datamodel; import jalview.api.DBRefEntryI; +import java.util.Arrays; + public class DBRefEntry implements DBRefEntryI { String source = "", version = "", accessionId = ""; + /** * maps from associated sequence to the database sequence's coordinate system */ @@ -35,7 +38,6 @@ public class DBRefEntry implements DBRefEntryI } - public DBRefEntry(String source, String version, String accessionId) { this(source, version, accessionId, null); @@ -138,7 +140,8 @@ public class DBRefEntry implements DBRefEntryI String otherAccession = other.getAccessionId(); if ((accessionId == null && otherAccession != null) || (accessionId != null && otherAccession == null) - || (accessionId != null && !accessionId.equalsIgnoreCase(otherAccession))) + || (accessionId != null && !accessionId + .equalsIgnoreCase(otherAccession))) { return false; } @@ -148,7 +151,7 @@ public class DBRefEntry implements DBRefEntryI * otherwise the versions have to match */ String otherVersion = other.getVersion(); - + if ((version == null || version.equals("0") || version.endsWith(":0")) && otherVersion != null) { @@ -223,28 +226,24 @@ public class DBRefEntry implements DBRefEntryI return accessionId; } - @Override public void setAccessionId(String accessionId) { this.accessionId = accessionId; } - @Override public void setSource(String source) { this.source = source; } - @Override public void setVersion(String version) { this.version = version; } - @Override public Mapping getMap() { @@ -280,4 +279,53 @@ public class DBRefEntry implements DBRefEntryI { return getSrcAccString(); } + + @Override + public boolean isPrimary() + { + /* + * if a map is present, unless it is 1:1 and has no SequenceI mate, it cannot be a primary reference. + */ + if (map != null) + { + if (map.getTo() != null) + { + return false; + } + if (map.getMap().getFromRatio() != map.getMap().getToRatio() + || map.getMap().getFromRatio() != 1) + { + return false; + } + // check map is really 1:1, no shifts allowed. + if (map.getMap().getFromHighest() != map.getMap().getToHighest() + && map.getMap().getFromLowest() != map.getMap().getToLowest() + && !Arrays.equals( + map.getMap().getFromRanges().toArray(new int[0][]), + map.getMap().getToRanges().toArray(new int[0][]))) + { + return false; + } + } + if (version == null) + { + // no version string implies the reference has not been verified at all. + return false; + } + // tricky - this test really needs to search the sequence's set of dbrefs to + // see if there is a primary reference that derived this reference. + String ucv = version.toUpperCase(); + for (String primsrc : Arrays.asList(DBRefSource.allSources())) + { + if (ucv.startsWith(primsrc.toUpperCase())) + { + // by convention, many secondary references inherit the primary + // reference's + // source string as a prefix for any version information from the + // secondary reference. + return false; + } + } + return true; + } } diff --git a/src/jalview/datamodel/DBRefSource.java b/src/jalview/datamodel/DBRefSource.java index fba9211..064764c 100755 --- a/src/jalview/datamodel/DBRefSource.java +++ b/src/jalview/datamodel/DBRefSource.java @@ -20,6 +20,10 @@ */ package jalview.datamodel; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.List; + /** * Defines internal constants for unambiguous annotation of DbRefEntry source * strings and describing the data retrieved from external database sources (see @@ -36,12 +40,12 @@ public class DBRefSource /** * UNIPROT Accession Number */ - public static String UNIPROT = "UNIPROT"; + public static final String UNIPROT = "UNIPROT"; /** * UNIPROT Entry Name */ - public static String UP_NAME = "UNIPROT_NAME".toUpperCase(); + public static final String UP_NAME = "UNIPROT_NAME".toUpperCase(); /** * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products. @@ -54,27 +58,27 @@ public class DBRefSource /** * PDB Entry Code */ - public static String PDB = "PDB"; + public static final String PDB = "PDB"; /** * EMBL ID */ - public static String EMBL = "EMBL"; + public static final String EMBL = "EMBL"; /** * EMBLCDS ID */ - public static String EMBLCDS = "EMBLCDS"; + public static final String EMBLCDS = "EMBLCDS"; /** * PFAM ID */ - public static String PFAM = "PFAM"; + public static final String PFAM = "PFAM"; /** * RFAM ID */ - public static String RFAM = "RFAM"; + public static final String RFAM = "RFAM"; /** * GeneDB ID @@ -98,4 +102,23 @@ public class DBRefSource public static final String[] PROTEINDBS = { UNIPROT, PDB, UNIPROTKB, EMBLCDSProduct, ENSEMBL }; // Ensembl ENSP* entries are protein + + public static String[] allSources() + { + List src = new ArrayList(); + for (Field f : DBRefSource.class.getFields()) + { + if (String.class.equals(f.getType())) + { + try + { + src.add((String) f.get(null)); + } catch (Exception x) + { + x.printStackTrace(); + } + } + } + return src.toArray(new String[0]); + } } diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 88f4308..620bcc1 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -22,6 +22,8 @@ package jalview.datamodel; import jalview.analysis.AlignSeq; import jalview.api.DBRefEntryI; +import jalview.util.DBRefUtils; +import jalview.util.MapList; import jalview.util.StringUtils; import java.util.ArrayList; @@ -235,8 +237,6 @@ public class Sequence extends ASequence implements SequenceI seq.getEnd()); } description = seq.getDescription(); - sourceDBRef = seq.getSourceDBRef() == null ? null : new DBRefEntry( - seq.getSourceDBRef()); if (seq != datasetSequence) { setDatasetSequence(seq.getDatasetSequence()); @@ -1395,12 +1395,15 @@ public class Sequence extends ASequence implements SequenceI @Override public PDBEntry getPDBEntry(String pdbIdStr) { - if (getDatasetSequence() == null - || getDatasetSequence().getAllPDBEntries() == null) + if (getDatasetSequence() != null) + { + return getDatasetSequence().getPDBEntry(pdbIdStr); + } + if (pdbIds == null) { return null; } - List entries = getDatasetSequence().getAllPDBEntries(); + List entries = getAllPDBEntries(); for (PDBEntry entry : entries) { if (entry.getId().equalsIgnoreCase(pdbIdStr)) @@ -1411,16 +1414,59 @@ public class Sequence extends ASequence implements SequenceI return null; } - @Override - public void setSourceDBRef(DBRefEntryI dbRef) - { - this.sourceDBRef = dbRef; - } @Override - public DBRefEntryI getSourceDBRef() + public List getPrimaryDBRefs() { - return this.sourceDBRef; + if (datasetSequence!=null) + { + return datasetSequence.getPrimaryDBRefs(); + } + if (dbrefs==null || dbrefs.length==0) + { + return Arrays.asList(new DBRefEntry[0]); + } + synchronized (dbrefs) + { + List primaries = new ArrayList(); + DBRefEntry tmp[] = new DBRefEntry[1], res[] = null; + for (DBRefEntry ref : dbrefs) + { + if (!ref.isPrimary()) + { + continue; + } + if (ref.hasMap()) + { + MapList mp = ref.getMap().getMap(); + if (mp.getFromLowest() > start || mp.getFromHighest() < end) + { + // map only involves a subsequence, so cannot be primary + continue; + } + } + // whilst it looks like it is a primary ref, we also sanity check type + if (DBRefUtils.getCanonicalName(DBRefSource.PDB).equals( + DBRefUtils.getCanonicalName(ref.getSource()))) + { + // PDB dbrefs imply there should be a PDBEntry associated + if (getPDBEntry(ref.getAccessionId()) != null) + { + primaries.add(ref); + } + continue; + } + // check standard protein or dna sources + tmp[0] = ref; + res = DBRefUtils.selectDbRefs(!isProtein(), tmp); + if (res != null && res[0] == tmp[0]) + { + primaries.add(ref); + continue; + } + } + return primaries; + } } } diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 45a767c..ec7520b 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -20,8 +20,6 @@ */ package jalview.datamodel; -import jalview.api.DBRefEntryI; - import java.util.List; import java.util.Vector; @@ -443,21 +441,14 @@ public interface SequenceI extends ASequenceI */ public PDBEntry getPDBEntry(String pdbId); - /** - * Set the distinct source database, and accession number from which a - * sequence and its start-end data were derived from. This is very important - * for SIFTS mappings and must be set prior to performing SIFTS mapping. - * - * @param dbRef - * the source dbRef for the sequence - */ - public void setSourceDBRef(DBRefEntryI dbRef); /** - * Get the distinct source database, and accession number from which a - * sequence and its start-end data were derived from. + * Get all primary database/accessions for this sequence's data. These + * DBRefEntry are expected to resolve to a valid record in the associated + * external database, either directly or via a provided 1:1 Mapping. * - * @return + * @return just the primary references (if any) for this sequence, or an empty + * list */ - public DBRefEntryI getSourceDBRef(); + public List getPrimaryDBRefs(); } diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index 06e929d..3ba36ca 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -195,7 +195,6 @@ public class EmblEntry DBRefEntry retrievedref = new DBRefEntry(sourceDb, getSequenceVersion(), accession); dna.addDBRef(retrievedref); - dna.setSourceDBRef(retrievedref); // add map to indicate the sequence is a valid coordinate frame for the // dbref retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() }, @@ -504,7 +503,6 @@ public class EmblEntry dnaToProteinMapping.setTo(proteinSeq); dnaToProteinMapping.setMappedFromId(proteinId); proteinSeq.addDBRef(proteinDbRef); - proteinSeq.setSourceDBRef(proteinDbRef); ref.setMap(dnaToProteinMapping); } hasUniprotDbref = true; @@ -549,7 +547,6 @@ public class EmblEntry DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId); } product.addDBRef(proteinToEmblProteinRef); - product.setSourceDBRef(proteinToEmblProteinRef); if (dnaToProteinMapping != null && dnaToProteinMapping.getTo() != null) diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 31552af..e44b610 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -276,8 +276,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { // clunky: ensure Uniprot xref if we have one is on mapped sequence SequenceI ds = proteinSeq.getDatasetSequence(); - ds.setSourceDBRef(proteinSeq.getSourceDBRef()); - + // TODO: Verify ensp primary ref is on proteinSeq.getDatasetSequence() Mapping map = new Mapping(ds, mapList); DBRefEntry dbr = new DBRefEntry(getDbSource(), getEnsemblDataVersion(), proteinSeq.getName(), map); @@ -322,7 +321,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient DBRefEntry self = new DBRefEntry(getDbSource(), getEnsemblDataVersion(), seq.getName()); seq.addDBRef(self); - seq.setSourceDBRef(self); } /** @@ -382,7 +380,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(), getEnsemblDataVersion(), name); - sq.setSourceDBRef(dbref); + sq.addDBRef(dbref); } } if (alignment == null) diff --git a/src/jalview/gui/StructureChooser.java b/src/jalview/gui/StructureChooser.java index 13fa460..b2cc70f 100644 --- a/src/jalview/gui/StructureChooser.java +++ b/src/jalview/gui/StructureChooser.java @@ -867,7 +867,7 @@ public class StructureChooser extends GStructureChooser implements ArrayList seqsWithoutSourceDBRef = new ArrayList(); for (SequenceI seq : sequences) { - if (seq.getSourceDBRef() == null && seq.getDBRefs() == null) + if (seq.getPrimaryDBRefs().size() == 0) { seqsWithoutSourceDBRef.add(seq); continue; diff --git a/src/jalview/io/StructureFile.java b/src/jalview/io/StructureFile.java index fc0e207..f095383 100644 --- a/src/jalview/io/StructureFile.java +++ b/src/jalview/io/StructureFile.java @@ -117,7 +117,9 @@ public abstract class StructureFile extends AlignFile DBRefEntry sourceDBRef = new DBRefEntry(); sourceDBRef.setAccessionId(getId()); sourceDBRef.setSource(DBRefSource.PDB); - pdbSequence.setSourceDBRef(sourceDBRef); + // TODO: specify version for 'PDB' database ref if it is read from a file. + // TODO: decide if jalview.io should be creating primary refs! + sourceDBRef.setVersion(""); pdbSequence.addPDBId(entry); pdbSequence.addDBRef(sourceDBRef); SequenceI chainseq = pdbSequence; diff --git a/src/jalview/structure/StructureSelectionManager.java b/src/jalview/structure/StructureSelectionManager.java index be042e6..182a48f 100644 --- a/src/jalview/structure/StructureSelectionManager.java +++ b/src/jalview/structure/StructureSelectionManager.java @@ -502,7 +502,7 @@ public class StructureSelectionManager } ArrayList seqToStrucMapping = new ArrayList(); - if (isMapUsingSIFTs) + if (isMapUsingSIFTs && seq.isProtein()) { setProgressBar(null); setProgressBar(MessageManager @@ -585,6 +585,20 @@ public class StructureSelectionManager return "cif".equalsIgnoreCase(fileExt); } + /** + * retrieve a mapping for seq from SIFTs using associated DBRefEntry for + * uniprot or PDB + * + * @param seq + * @param pdbFile + * @param targetChainId + * @param pdb + * @param maxChain + * @param sqmpping + * @param maxAlignseq + * @return + * @throws SiftsException + */ private StructureMapping getStructureMapping(SequenceI seq, String pdbFile, String targetChainId, StructureFile pdb, PDBChain maxChain, jalview.datamodel.Mapping sqmpping, diff --git a/src/jalview/ws/DBRefFetcher.java b/src/jalview/ws/DBRefFetcher.java index 3ba0e34..6213568 100644 --- a/src/jalview/ws/DBRefFetcher.java +++ b/src/jalview/ws/DBRefFetcher.java @@ -622,33 +622,43 @@ public class DBRefFetcher implements Runnable final int sequenceStart = sequence.getStart(); if (absStart == -1) { - // Is local sequence contained in dataset sequence? + // couldn't find local sequence in sequence from database, so check if + // the database sequence is a subsequence of local sequence absStart = nonGapped.indexOf(entrySeq); if (absStart == -1) - { // verification failed. + { + // verification failed. couldn't find any relationship between + // entrySeq and local sequence messages.append(sequence.getName() + " SEQUENCE NOT %100 MATCH \n"); continue; } + /* + * found match for the whole of the database sequence within the local + * sequence's reference frame. + */ transferred = true; sbuffer.append(sequence.getName() + " HAS " + absStart + " PREFIXED RESIDUES COMPARED TO " + dbSource + "\n"); - // - // + " - ANY SEQUENCE FEATURES" - // + " HAVE BEEN ADJUSTED ACCORDINGLY \n"); - // absStart = 0; - // create valid mapping between matching region of local sequence and - // the mapped sequence + + /* + * So create a mapping to the external entry from the matching region of + * the local sequence, and leave local start/end untouched. + */ mp = new Mapping(null, new int[] { sequenceStart + absStart, sequenceStart + absStart + entrySeq.length() - 1 }, new int[] { entry.getStart(), entry.getStart() + entrySeq.length() - 1 }, 1, 1); - updateRefFrame = false; // mapping is based on current start/end so - // don't modify start and end + updateRefFrame = false; } else { + /* + * found a match for the local sequence within sequence from + * the external database + */ transferred = true; + // update start and end of local sequence to place it in entry's // reference frame. // apply identity map map from whole of local sequence to matching @@ -660,10 +670,14 @@ public class DBRefFetcher implements Runnable // absStart+sequence.getStart()+entrySeq.length()-1}, // new int[] { entry.getStart(), entry.getEnd() }, 1, 1); // relocate local features for updated start + if (updateRefFrame) { if (sequence.getSequenceFeatures() != null) { + /* + * relocate existing sequence features by offset + */ SequenceFeature[] sf = sequence.getSequenceFeatures(); int start = sequenceStart; int end = sequence.getEnd(); @@ -686,7 +700,7 @@ public class DBRefFetcher implements Runnable System.out.println("Adding dbrefs to " + sequence.getName() + " from " + dbSource + " sequence : " + entry.getName()); sequence.transferAnnotation(entry, mp); - // unknownSequences.remove(sequence); + absStart += entry.getStart(); int absEnd = absStart + nonGapped.length() - 1; if (!trimDatasetSeqs) diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index 8cc0ce4..81b4caf 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -205,10 +205,10 @@ public class Uniprot extends DbSourceProxyImpl { DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion, accessionId); + + // mark dbRef as a primary reference for this sequence dbRefs.add(dbRef); } - sequence.setSourceDBRef((dbRefs != null && dbRefs.size() > 0) ? dbRefs - .get(0) : null); Vector onlyPdbEntries = new Vector(); for (PDBEntry pdb : entry.getDbReference()) diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 6c94723..0ab6e7d 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -323,41 +323,28 @@ public class SiftsClient implements SiftsClientI public DBRefEntryI getValidSourceDBRef(SequenceI seq) throws SiftsException { - DBRefEntryI sourceDBRef = null; - sourceDBRef = seq.getSourceDBRef(); - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) + DBRefEntry[] dbRefs = seq.getDBRefs(); + if (dbRefs == null || dbRefs.length < 1) { - return sourceDBRef; + throw new SiftsException( + "Source DBRef could not be determined. DBRefs might not have been retrieved."); } - else + + for (DBRefEntryI dbRef : dbRefs) { - DBRefEntry[] dbRefs = seq.getDBRefs(); - if (dbRefs == null || dbRefs.length < 1) + if (dbRef == null || dbRef.getAccessionId() == null + || dbRef.getSource() == null) { - throw new SiftsException( - "Source DBRef could not be determined. DBRefs might not have been retrieved."); + continue; } - - for (DBRefEntryI dbRef : dbRefs) + if (isValidDBRefEntry(dbRef) + && dbRef.isPrimary() + && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef + .getSource().equalsIgnoreCase(DBRefSource.PDB))) { - if (dbRef == null || dbRef.getAccessionId() == null - || dbRef.getSource() == null) - { - continue; - } - if (isFoundInSiftsEntry(dbRef.getAccessionId()) - && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef - .getSource().equalsIgnoreCase(DBRefSource.PDB))) - { - seq.setSourceDBRef(dbRef); - return dbRef; - } + return dbRef; } } - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) - { - return sourceDBRef; - } throw new SiftsException("Could not get source DB Ref"); } @@ -440,7 +427,7 @@ public class SiftsClient implements SiftsClientI String originalSeq = AlignSeq.extractGaps( jalview.util.Comparison.GapChars, seq.getSequenceAsString()); HashMap mapping = new HashMap(); - DBRefEntryI sourceDBRef = seq.getSourceDBRef(); + DBRefEntryI sourceDBRef; sourceDBRef = getValidSourceDBRef(seq); // TODO ensure sequence start/end is in the same coordinate system and // consistent with the choosen sourceDBRef diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 22bb680..0426091 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -997,9 +997,11 @@ public class AlignmentUtilsTests * sequence */ DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1"); - dna1.getDatasetSequence().setSourceDBRef(dbref); + dna1.getDatasetSequence().addDBRef(dbref); + org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0)); dbref = new DBRefEntry("ENSEMBL", "0", "dna2"); - dna2.getDatasetSequence().setSourceDBRef(dbref); + dna2.getDatasetSequence().addDBRef(dbref); + org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0)); /* * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment @@ -1057,6 +1059,7 @@ public class AlignmentUtilsTests * verify peptide has added a dbref with reverse mapping to CDS */ assertNotNull(pep1.getDBRefs()); + // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ? assertEquals(2, pep1.getDBRefs().length); dbref = pep1.getDBRefs()[1]; assertEquals("ENSEMBL", dbref.getSource()); diff --git a/test/jalview/datamodel/DBRefEntryTest.java b/test/jalview/datamodel/DBRefEntryTest.java index ae6dcda..09d9df1 100644 --- a/test/jalview/datamodel/DBRefEntryTest.java +++ b/test/jalview/datamodel/DBRefEntryTest.java @@ -138,4 +138,62 @@ public class DBRefEntryTest assertFalse(ref1.updateFrom(ref2)); assertEquals("10", ref1.getVersion()); } + + @Test(groups = { "Functional" }) + public void testIsPrimary() + { + DBRefEntry dbr = new DBRefEntry(DBRefSource.UNIPROT, "", "Q12345"); + assertTrue(dbr.isPrimary()); + /* + * 1:1 mapping + */ + dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 1, + 1)); + assertTrue(dbr.isPrimary()); + /* + * Version string is prefixed with another dbref source string (fail) + */ + dbr.setVersion(DBRefSource.EMBL + ":0"); + assertFalse(dbr.isPrimary()); + + /* + * Version string is alphanumeric + */ + dbr.setVersion("0.1.b"); + assertTrue(dbr.isPrimary()); + + /* + * 1:1 mapping with shift (fail) + */ + dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 2, 4 }, 1, + 1)); + assertFalse(dbr.isPrimary()); + + /* + * 1:1 mapping and sequenceRef (fail) + */ + dbr.setMap(new Mapping(new Sequence("foo", "ASDF"), new int[] { 1, 3 }, + new int[] { 1, 3 }, 1, 1)); + assertFalse(dbr.isPrimary()); + + /* + * 1:3 mapping (fail) + */ + dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 1, + 3)); + assertFalse(dbr.isPrimary()); + /* + * 2:2 mapping with shift (expected fail, but maybe use case for a pass) + */ + dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 2, + 2)); + assertFalse(dbr.isPrimary()); + + /* + * Version string is prefixed with another dbref source string + */ + dbr.setVersion(DBRefSource.EMBL + ":0"); + assertFalse(dbr.isPrimary()); + + } } diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index cfc4cbb..fcd24dd 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -438,36 +438,54 @@ public class SequenceTest sq.setDescription("Test sequence description.."); sq.setVamsasId("TestVamsasId"); - sq.setSourceDBRef(new DBRefEntry("PDB", "version0", "1TST")); + sq.addDBRef(new DBRefEntry("PDB", "version0", "1TST")); - sq.addDBRef(new DBRefEntry("PDB", "version1", "1Tst")); - sq.addDBRef(new DBRefEntry("PDB", "version2", "2Tst")); - sq.addDBRef(new DBRefEntry("PDB", "version3", "3Tst")); - sq.addDBRef(new DBRefEntry("PDB", "version4", "4Tst")); + sq.addDBRef(new DBRefEntry("PDB", "version1", "1PDB")); + sq.addDBRef(new DBRefEntry("PDB", "version2", "2PDB")); + sq.addDBRef(new DBRefEntry("PDB", "version3", "3PDB")); + sq.addDBRef(new DBRefEntry("PDB", "version4", "4PDB")); sq.addPDBId(new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1")); sq.addPDBId(new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1")); sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2")); sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2")); + + DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB"); + DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version1", "2PDB"); + List primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb, + pdb2pdb }); + sq.getDatasetSequence().addDBRef(pdb1pdb); + sq.getDatasetSequence().addDBRef(pdb2pdb); sq.getDatasetSequence().addDBRef( - new DBRefEntry("PDB", "version1", "1Tst")); - sq.getDatasetSequence().addDBRef( - new DBRefEntry("PDB", "version2", "2Tst")); - sq.getDatasetSequence().addDBRef( - new DBRefEntry("PDB", "version3", "3Tst")); + new DBRefEntry("PDB", "version3", "3PDB")); sq.getDatasetSequence().addDBRef( - new DBRefEntry("PDB", "version4", "4Tst")); - - sq.getDatasetSequence().addPDBId( - new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1")); - sq.getDatasetSequence().addPDBId( - new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1")); + new DBRefEntry("PDB", "version4", "4PDB")); + + PDBEntry pdbe1a=new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"); + PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"); + PDBEntry pdbe2a=new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"); + PDBEntry pdbe2b = new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"); sq.getDatasetSequence().addPDBId( - new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2")); + pdbe1a); sq.getDatasetSequence().addPDBId( - new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2")); + pdbe1b); + sq.getDatasetSequence().addPDBId(pdbe2a); + sq.getDatasetSequence().addPDBId(pdbe2b); + + /* + * test we added pdb entries to the dataset sequence + */ + Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries(), Arrays + .asList(new PDBEntry[] { pdbe1a, pdbe1b, pdbe2a, pdbe2b }), + "PDB Entries were not found on dataset sequence."); + /* + * we should recover a pdb entry that is on the dataset sequence via PDBEntry + */ + Assert.assertEquals(pdbe1a, + sq.getDatasetSequence().getPDBEntry("1PDB"), + "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry."); ArrayList annotsList = new ArrayList(); System.out.println(">>>>>> " + sq.getSequenceAsString().length()); annotsList.add(new Annotation("A", "A", 'X', 0.1f)); @@ -479,7 +497,7 @@ public class SequenceTest new AlignmentAnnotation("Test annot", "Test annot description", annots)); Assert.assertEquals(sq.getDescription(), "Test sequence description.."); - Assert.assertEquals(sq.getDBRefs().length, 4); + Assert.assertEquals(sq.getDBRefs().length, 5); Assert.assertEquals(sq.getAllPDBEntries().size(), 4); Assert.assertNotNull(sq.getAnnotation()); Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2); @@ -492,7 +510,7 @@ public class SequenceTest Assert.assertEquals(derived.getDescription(), "Test sequence description.."); - Assert.assertEquals(derived.getDBRefs().length, 4); + Assert.assertEquals(derived.getDBRefs().length, 4); // come from dataset Assert.assertEquals(derived.getAllPDBEntries().size(), 4); Assert.assertNotNull(derived.getAnnotation()); Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2); @@ -510,6 +528,17 @@ public class SequenceTest assertNotNull(sq.getSequenceFeatures()); assertArrayEquals(sq.getSequenceFeatures(), derived.getSequenceFeatures()); + + /* + * verify we have primary db refs *just* for PDB IDs with associated + * PDBEntry objects + */ + + assertEquals(primRefs, sq.getPrimaryDBRefs()); + assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs()); + + assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs()); + } /** diff --git a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java index 4b71417..abe5099 100644 --- a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java @@ -128,6 +128,7 @@ public class EmblEntryTest assertEquals(5, dbrefs.length); assertEquals(DBRefSource.EMBL, dbrefs[0].getSource()); assertEquals("CAA30420.1", dbrefs[0].getAccessionId()); + // TODO: verify getPrimaryDBRefs() for peptide products assertEquals(cds1Map.getInverse(), dbrefs[0].getMap().getMap()); assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource()); assertEquals("CAA30420.1", dbrefs[1].getAccessionId());