From: Jim Procter Date: Fri, 26 Aug 2016 14:37:36 +0000 (+0100) Subject: Merge branch 'bug/JAL-2154projectMappings' into merge/develop_bug/JAL-2154projectMappings X-Git-Tag: Release_2_10_0~47^2~4^2~43 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=3fc77551d3029adc03f3d110e818c5c49c74787a;hp=45a15211723195c363f1798dd9f0907ea6c28f08;p=jalview.git Merge branch 'bug/JAL-2154projectMappings' into merge/develop_bug/JAL-2154projectMappings --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index d93f42f..e0ec22b 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -22,7 +22,6 @@ package jalview.analysis; import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE; -import jalview.api.DBRefEntryI; import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; @@ -1682,6 +1681,10 @@ public class AlignmentUtils * its dataset sequence to the dataset */ cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping); + // cdsSeq has a name constructed as CDS| + // will be either the accession for the coding sequence, + // marked in the /via/ dbref to the protein product accession + // or it will be the original nucleotide accession. SequenceI cdsSeqDss = cdsSeq.createDatasetSequence(); cdsSeqs.add(cdsSeq); if (!dataset.getSequences().contains(cdsSeqDss)) @@ -1697,7 +1700,8 @@ public class AlignmentUtils MapList cdsToProteinMap = new MapList(cdsRange, mapList.getToRanges(), mapList.getFromRatio(), mapList.getToRatio()); AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame(); - cdsToProteinMapping.addMap(cdsSeq, proteinProduct, cdsToProteinMap); + cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct, + cdsToProteinMap); /* * guard against duplicating the mapping if repeating this action @@ -1707,23 +1711,8 @@ public class AlignmentUtils mappings.add(cdsToProteinMapping); } - /* - * copy protein's dbrefs to CDS sequence - * this enables Get Cross-References from CDS alignment - */ - DBRefEntry[] proteinRefs = DBRefUtils.selectDbRefs(false, - proteinProduct.getDBRefs()); - if (proteinRefs != null) - { - for (DBRefEntry ref : proteinRefs) - { - DBRefEntry cdsToProteinRef = new DBRefEntry(ref); - cdsToProteinRef.setMap(new Mapping(proteinProduct, - cdsToProteinMap)); - cdsSeqDss.addDBRef(cdsToProteinRef); - } - } - + propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(), + proteinProduct, aMapping); /* * add another mapping from original 'from' range to CDS */ @@ -1731,7 +1720,7 @@ public class AlignmentUtils MapList dnaToCdsMap = new MapList(mapList.getFromRanges(), cdsRange, 1, 1); - dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeq, + dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeqDss, dnaToCdsMap); if (!mappings.contains(dnaToCdsMapping)) { @@ -1745,12 +1734,37 @@ public class AlignmentUtils * same source and accession, so need a different accession for * the CDS from the dna sequence */ - DBRefEntryI dnaRef = dnaDss.getSourceDBRef(); - if (dnaRef != null) + + // specific use case: + // Genomic contig ENSCHR:1, contains coding regions for ENSG01, + // ENSG02, ENSG03, with transcripts and products similarly named. + // cannot add distinct dbrefs mapping location on ENSCHR:1 to ENSG01 + + // JBPNote: ?? can't actually create an example that demonstrates we + // need to + // synthesize an xref. + + for (DBRefEntry primRef : dnaDss.getPrimaryDBRefs()) { + // creates a complementary cross-reference to the source sequence's + // primary reference. + + DBRefEntry cdsCrossRef = new DBRefEntry(primRef.getSource(), + primRef.getSource() + ":" + primRef.getVersion(), + primRef.getAccessionId()); + cdsCrossRef + .setMap(new Mapping(dnaDss, new MapList(dnaToCdsMap))); + cdsSeqDss.addDBRef(cdsCrossRef); + + // problem here is that the cross-reference is synthesized - + // cdsSeq.getName() may be like 'CDS|dnaaccession' or + // 'CDS|emblcdsacc' // assuming cds version same as dna ?!? - DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(), - dnaRef.getVersion(), cdsSeq.getName()); + + DBRefEntry proteinToCdsRef = new DBRefEntry( + primRef.getSource(), primRef.getVersion(), + cdsSeq.getName()); + // proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap .getInverse())); proteinProduct.addDBRef(proteinToCdsRef); @@ -1885,7 +1899,7 @@ public class AlignmentUtils } } } - + /* * assign 'from id' held in the mapping if set (e.g. EMBL protein_id), * else generate a sequence name @@ -1899,6 +1913,84 @@ public class AlignmentUtils } /** + * add any DBRefEntrys to cdsSeq from contig that have a Mapping congruent to + * the given mapping. + * + * @param cdsSeq + * @param contig + * @param mapping + * @return list of DBRefEntrys added. + */ + public static List propagateDBRefsToCDS(SequenceI cdsSeq, + SequenceI contig, SequenceI proteinProduct, Mapping mapping) + { + + // gather direct refs from contig congrent with mapping + List direct = new ArrayList(); + HashSet directSources = new HashSet(); + if (contig.getDBRefs() != null) + { + for (DBRefEntry dbr : contig.getDBRefs()) + { + if (dbr.hasMap() && dbr.getMap().getMap().isTripletMap()) + { + MapList map = dbr.getMap().getMap(); + // check if map is the CDS mapping + if (mapping.getMap().equals(map)) + { + direct.add(dbr); + directSources.add(dbr.getSource()); + } + } + } + } + DBRefEntry[] onSource = DBRefUtils.selectRefs( + proteinProduct.getDBRefs(), + directSources.toArray(new String[0])); + List propagated = new ArrayList(); + + // and generate appropriate mappings + for (DBRefEntry cdsref : direct) + { + // clone maplist and mapping + MapList cdsposmap = new MapList(Arrays.asList(new int[][] { new int[] + { cdsSeq.getStart(), cdsSeq.getEnd() } }), cdsref.getMap().getMap() + .getToRanges(), 3, 1); + Mapping cdsmap = new Mapping(cdsref.getMap().getTo(), cdsref.getMap() + .getMap()); + + // create dbref + DBRefEntry newref = new DBRefEntry(cdsref.getSource(), + cdsref.getVersion(), cdsref.getAccessionId(), new Mapping( + cdsmap.getTo(), cdsposmap)); + + // and see if we can map to the protein product for this mapping. + // onSource is the filtered set of accessions on protein that we are + // tranferring, so we assume accession is the same. + if (cdsmap.getTo() == null && onSource != null) + { + List sourceRefs = DBRefUtils.searchRefs(onSource, + cdsref.getAccessionId()); + if (sourceRefs != null) + { + for (DBRefEntry srcref : sourceRefs) + { + if (srcref.getSource().equalsIgnoreCase(cdsref.getSource())) + { + // we have found a complementary dbref on the protein product, so + // update mapping's getTo + newref.getMap().setTo(proteinProduct); + } + } + } + } + cdsSeq.addDBRef(newref); + propagated.add(newref); + } + return propagated; + } + + /** * Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the * feature start/end ranges, optionally omitting specified feature types. * Returns the number of features copied. @@ -2504,7 +2596,7 @@ public class AlignmentUtils { AlignmentI copy = new Alignment(new Alignment(seqs)); copy.setDataset(dataset); - + boolean isProtein = !copy.isNucleotide(); SequenceIdMatcher matcher = new SequenceIdMatcher(seqs); if (xrefs != null) { @@ -2515,7 +2607,8 @@ public class AlignmentUtils { for (DBRefEntry dbref : dbrefs) { - if (dbref.getMap() == null || dbref.getMap().getTo() == null) + if (dbref.getMap() == null || dbref.getMap().getTo() == null + || dbref.getMap().getTo().isProtein() != isProtein) { continue; } diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 288d60e..aefc6f8 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -222,6 +222,9 @@ public class CrossRef boolean found = false; DBRefEntry[] xrfs = DBRefUtils .selectDbRefs(!fromDna, dss.getDBRefs()); + // ENST & ENSP comes in to both Protein and nucleotide, so we need to + // filter them + // out later. if ((xrfs == null || xrfs.length == 0) && dataset != null) { /* @@ -249,11 +252,15 @@ public class CrossRef List sourceRefs = DBRefUtils.searchRefsForSource(xrfs, source); Iterator refIterator = sourceRefs.iterator(); + // At this point, if we are retrieving Ensembl, we still don't filter out + // ENST when looking for protein crossrefs. while (refIterator.hasNext()) { DBRefEntry xref = refIterator.next(); found = false; - if (xref.hasMap()) + // we're only interested in coding cross-references, not + // locus->transcript + if (xref.hasMap() && xref.getMap().getMap().isTripletMap()) { SequenceI mappedTo = xref.getMap().getTo(); if (mappedTo != null) @@ -271,20 +278,45 @@ public class CrossRef * but findInDataset() matches ENSP when looking for Uniprot... */ SequenceI matchInDataset = findInDataset(xref); + if (matchInDataset != null && xref.getMap().getTo() != null + && matchInDataset != xref.getMap().getTo()) + { + System.err + .println("Implementation problem (reopen JAL-2154): CrossRef.findInDataset seems to have recovered a different sequence than the one explicitly mapped for xref." + + "Found:" + + matchInDataset + + "\nExpected:" + + xref.getMap().getTo() + + "\nFor xref:" + + xref); + } /*matcher.findIdMatch(mappedTo);*/ if (matchInDataset != null) { if (!rseqs.contains(matchInDataset)) { rseqs.add(matchInDataset); + // need to try harder to only add unique mappings + if (xref.getMap().getMap().isTripletMap() + && dataset.getMapping(seq, matchInDataset) == null + && cf.getMappingBetween(seq, matchInDataset) == null) + { + // materialise a mapping for highlighting between these sequences + if (fromDna) + { + cf.addMap(dss, matchInDataset, xref.getMap().getMap(), xref.getMap().getMappedFromId()); + } else { + cf.addMap(matchInDataset, dss, xref.getMap().getMap().getInverse(), xref.getMap().getMappedFromId()); + } + } } refIterator.remove(); continue; } + // TODO: need to determine if this should be a deriveSequence SequenceI rsq = new Sequence(mappedTo); rseqs.add(rsq); - if (xref.getMap().getMap().getFromRatio() != xref.getMap() - .getMap().getToRatio()) + if (xref.getMap().getMap().isTripletMap()) { // get sense of map correct for adding to product alignment. if (fromDna) @@ -307,7 +339,9 @@ public class CrossRef { SequenceI matchedSeq = matcher.findIdMatch(xref.getSource() + "|" + xref.getAccessionId()); - if (matchedSeq != null) + // if there was a match, check it's at least the right type of + // molecule! + if (matchedSeq != null && matchedSeq.isProtein() == fromDna) { if (constructMapping(seq, matchedSeq, xref, cf, fromDna)) { @@ -413,7 +447,11 @@ public class CrossRef } else { - matcher.add(map.getTo()); + if (dataset.findIndex(map.getTo()) == -1) + { + dataset.addSequence(map.getTo()); + matcher.add(map.getTo()); + } } try { @@ -483,8 +521,11 @@ public class CrossRef } retrievedSequence.updatePDBIds(); rseqs.add(retrievedDss); - dataset.addSequence(retrievedDss); - matcher.add(retrievedDss); + if (dataset.findIndex(retrievedDss) == -1) + { + dataset.addSequence(retrievedDss); + matcher.add(retrievedDss); + } } } } @@ -662,24 +703,28 @@ public class CrossRef DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna) { MapList mapping = null; - + SequenceI dsmapFrom = mapFrom.getDatasetSequence() == null ? mapFrom + : mapFrom.getDatasetSequence(); + SequenceI dsmapTo = mapTo.getDatasetSequence() == null ? mapTo + : mapTo.getDatasetSequence(); /* - * look for a reverse mapping, if found make its inverse + * look for a reverse mapping, if found make its inverse. + * Note - we do this on dataset sequences only. */ - if (mapTo.getDBRefs() != null) + if (dsmapTo.getDBRefs() != null) { - for (DBRefEntry dbref : mapTo.getDBRefs()) + for (DBRefEntry dbref : dsmapTo.getDBRefs()) { String name = dbref.getSource() + "|" + dbref.getAccessionId(); - if (dbref.hasMap() && mapFrom.getName().startsWith(name)) + if (dbref.hasMap() && dsmapFrom.getName().startsWith(name)) { /* * looks like we've found a map from 'mapTo' to 'mapFrom' * - invert it to make the mapping the other way */ MapList reverse = dbref.getMap().getMap().getInverse(); - xref.setMap(new Mapping(mapTo, reverse)); - mappings.addMap(mapFrom, mapTo, reverse); + xref.setMap(new Mapping(dsmapTo, reverse)); + mappings.addMap(mapFrom, dsmapTo, reverse); return true; } } @@ -706,14 +751,16 @@ public class CrossRef /* * and add a reverse DbRef with the inverse mapping */ - if (mapFrom.getDatasetSequence() != null - && mapFrom.getDatasetSequence().getSourceDBRef() != null) + if (mapFrom.getDatasetSequence() != null && false) + // && mapFrom.getDatasetSequence().getSourceDBRef() != null) { - DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence() - .getSourceDBRef()); - dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping - .getInverse())); - mapTo.addDBRef(dbref); + // possible need to search primary references... except, why doesn't xref + // == getSourceDBRef ?? + // DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence() + // .getSourceDBRef()); + // dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping + // .getInverse())); + // mapTo.addDBRef(dbref); } if (fromDna) diff --git a/src/jalview/api/DBRefEntryI.java b/src/jalview/api/DBRefEntryI.java index 32245b3..701acb6 100644 --- a/src/jalview/api/DBRefEntryI.java +++ b/src/jalview/api/DBRefEntryI.java @@ -70,4 +70,28 @@ public interface DBRefEntryI * @return */ public boolean updateFrom(DBRefEntryI otherEntry); + + /** + * Method to distinguish between direct and indirect database references + * + * primary references indicate the local sequence data directly corresponds + * with the database record. All other references are secondary. direct + * references indicate that part or all of the local sequence data can be + * mapped with another sequence, enabling annotation transfer. + * cross-references indicate the local sequence data can be corresponded to + * some other linear coordinate system via a transformation. + * + * This method is also sufficient to distinguish direct DBRefEntry mappings + * from other relationships - e.g. coding relationships (imply a 1:3/3:1 + * mapping), but not transcript relationships, which imply a (possibly + * non-contiguous) 1:1 mapping + * + * The only way a dbref's mappings can be fully verified is via the local + * sequence frame, so rather than use isPrimary directly, please use + * SequenceI.getPrimaryDbRefs() + * + * @return true if this reference provides a primary accession for the + * associated sequence object + */ + public boolean isPrimary(); } diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 32bb761..2f64759 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -21,6 +21,7 @@ package jalview.datamodel; import jalview.analysis.AlignmentUtils; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.io.FastaFile; import jalview.util.Comparison; import jalview.util.MessageManager; @@ -225,18 +226,21 @@ public class Alignment implements AlignmentI { if (dataset != null) { + // maintain dataset integrity - if (snew.getDatasetSequence() != null) - { - getDataset().addSequence(snew.getDatasetSequence()); - } - else + SequenceI dsseq = snew.getDatasetSequence(); + if (dsseq == null) { // derive new sequence SequenceI adding = snew.deriveSequence(); - getDataset().addSequence(adding.getDatasetSequence()); snew = adding; + dsseq = snew.getDatasetSequence(); } + if (getDataset().findIndex(dsseq) == -1) + { + getDataset().addSequence(dsseq); + } + } if (sequences == null) { @@ -255,18 +259,22 @@ public class Alignment implements AlignmentI } } - /** - * Adds a sequence to the alignment. Recalculates maxLength and size. - * - * @param snew - */ @Override - public void setSequenceAt(int i, SequenceI snew) + public SequenceI replaceSequenceAt(int i, SequenceI snew) { synchronized (sequences) { - deleteSequence(i); - sequences.set(i, snew); + if (sequences.size() > i) + { + return sequences.set(i, snew); + + } + else + { + sequences.add(snew); + hiddenSequences.adjustHeightSequenceAdded(); + } + return null; } } @@ -1029,6 +1037,62 @@ public class Alignment implements AlignmentI } /** + * add dataset sequences to seq for currentSeq and any sequences it references + */ + private void resolveAndAddDatasetSeq(SequenceI currentSeq, + Set seqs, boolean createDatasetSequence) + { + if (currentSeq.getDatasetSequence() != null) + { + currentSeq = currentSeq.getDatasetSequence(); + } + else + { + if (createDatasetSequence) + { + currentSeq = currentSeq.createDatasetSequence(); + } + } + if (seqs.contains(currentSeq)) + { + return; + } + List toProcess = new ArrayList(); + toProcess.add(currentSeq); + while (toProcess.size() > 0) + { + // use a queue ? + SequenceI curDs = toProcess.remove(0); + if (seqs.contains(curDs)) + { + continue; + } + seqs.add(curDs); + // iterate over database references, making sure we add forward referenced + // sequences + if (curDs.getDBRefs() != null) + { + for (DBRefEntry dbr : curDs.getDBRefs()) + { + if (dbr.getMap() != null && dbr.getMap().getTo() != null) + { + if (dbr.getMap().getTo().getDatasetSequence() != null) + { + throw new Error("Implementation error: Map.getTo() for dbref" + + dbr + " is not a dataset sequence."); + // TODO: if this happens, could also rewrite the reference to + // point to new dataset sequence + } + // we recurse to add all forward references to dataset sequences via + // DBRefs/etc + toProcess.add(dbr.getMap().getTo()); + } + } + } + } + } + + /** * Creates a new dataset for this alignment. Can only be done once - if * dataset is not null this will not be performed. */ @@ -1038,22 +1102,32 @@ public class Alignment implements AlignmentI { return; } - SequenceI[] seqs = new SequenceI[getHeight()]; - SequenceI currentSeq; + // try to avoid using SequenceI.equals at this stage, it will be expensive + Set seqs = new jalview.util.LinkedIdentityHashSet(); + for (int i = 0; i < getHeight(); i++) { - currentSeq = getSequenceAt(i); - if (currentSeq.getDatasetSequence() != null) - { - seqs[i] = currentSeq.getDatasetSequence(); - } - else + SequenceI currentSeq = getSequenceAt(i); + resolveAndAddDatasetSeq(currentSeq, seqs, true); + } + + // verify all mappings are in dataset + for (AlignedCodonFrame cf : codonFrameList) + { + for (SequenceToSequenceMapping ssm : cf.getMappings()) { - seqs[i] = currentSeq.createDatasetSequence(); + if (!seqs.contains(ssm.getFromSeq())) + { + resolveAndAddDatasetSeq(ssm.getFromSeq(), seqs, false); + } + if (!seqs.contains(ssm.getMapping().getTo())) + { + resolveAndAddDatasetSeq(ssm.getMapping().getTo(), seqs, false); + } } } - - dataset = new Alignment(seqs); + // finally construct dataset + dataset = new Alignment(seqs.toArray(new SequenceI[seqs.size()])); // move mappings to the dataset alignment dataset.codonFrameList = this.codonFrameList; this.codonFrameList = null; diff --git a/src/jalview/datamodel/AlignmentI.java b/src/jalview/datamodel/AlignmentI.java index f1db4c0..1d37fa6 100755 --- a/src/jalview/datamodel/AlignmentI.java +++ b/src/jalview/datamodel/AlignmentI.java @@ -108,11 +108,14 @@ public interface AlignmentI extends AnnotatedCollectionI * Used to set a particular index of the alignment with the given sequence. * * @param i - * Index of sequence to be updated. + * Index of sequence to be updated. if i>length, sequence will be + * added to end, with no intervening positions. * @param seq - * New sequence to be inserted. + * New sequence to be inserted. The existing sequence at position i + * will be replaced. + * @return existing sequence (or null if i>current length) */ - void setSequenceAt(int i, SequenceI seq); + SequenceI replaceSequenceAt(int i, SequenceI seq); /** * Deletes a sequence from the alignment diff --git a/src/jalview/datamodel/DBRefEntry.java b/src/jalview/datamodel/DBRefEntry.java index a641b1b..11e77d8 100755 --- a/src/jalview/datamodel/DBRefEntry.java +++ b/src/jalview/datamodel/DBRefEntry.java @@ -22,9 +22,12 @@ package jalview.datamodel; import jalview.api.DBRefEntryI; +import java.util.Arrays; + public class DBRefEntry implements DBRefEntryI { String source = "", version = "", accessionId = ""; + /** * maps from associated sequence to the database sequence's coordinate system */ @@ -35,7 +38,6 @@ public class DBRefEntry implements DBRefEntryI } - public DBRefEntry(String source, String version, String accessionId) { this(source, version, accessionId, null); @@ -138,7 +140,8 @@ public class DBRefEntry implements DBRefEntryI String otherAccession = other.getAccessionId(); if ((accessionId == null && otherAccession != null) || (accessionId != null && otherAccession == null) - || (accessionId != null && !accessionId.equalsIgnoreCase(otherAccession))) + || (accessionId != null && !accessionId + .equalsIgnoreCase(otherAccession))) { return false; } @@ -148,7 +151,7 @@ public class DBRefEntry implements DBRefEntryI * otherwise the versions have to match */ String otherVersion = other.getVersion(); - + if ((version == null || version.equals("0") || version.endsWith(":0")) && otherVersion != null) { @@ -223,28 +226,24 @@ public class DBRefEntry implements DBRefEntryI return accessionId; } - @Override public void setAccessionId(String accessionId) { this.accessionId = accessionId; } - @Override public void setSource(String source) { this.source = source; } - @Override public void setVersion(String version) { this.version = version; } - @Override public Mapping getMap() { @@ -280,4 +279,53 @@ public class DBRefEntry implements DBRefEntryI { return getSrcAccString(); } + + @Override + public boolean isPrimary() + { + /* + * if a map is present, unless it is 1:1 and has no SequenceI mate, it cannot be a primary reference. + */ + if (map != null) + { + if (map.getTo() != null) + { + return false; + } + if (map.getMap().getFromRatio() != map.getMap().getToRatio() + || map.getMap().getFromRatio() != 1) + { + return false; + } + // check map is really 1:1, no shifts allowed. + if (map.getMap().getFromHighest() != map.getMap().getToHighest() + && map.getMap().getFromLowest() != map.getMap().getToLowest() + && !Arrays.equals( + map.getMap().getFromRanges().toArray(new int[0][]), + map.getMap().getToRanges().toArray(new int[0][]))) + { + return false; + } + } + if (version == null) + { + // no version string implies the reference has not been verified at all. + return false; + } + // tricky - this test really needs to search the sequence's set of dbrefs to + // see if there is a primary reference that derived this reference. + String ucv = version.toUpperCase(); + for (String primsrc : Arrays.asList(DBRefSource.allSources())) + { + if (ucv.startsWith(primsrc.toUpperCase())) + { + // by convention, many secondary references inherit the primary + // reference's + // source string as a prefix for any version information from the + // secondary reference. + return false; + } + } + return true; + } } diff --git a/src/jalview/datamodel/DBRefSource.java b/src/jalview/datamodel/DBRefSource.java index fba9211..0ac14e5 100755 --- a/src/jalview/datamodel/DBRefSource.java +++ b/src/jalview/datamodel/DBRefSource.java @@ -20,6 +20,10 @@ */ package jalview.datamodel; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.List; + /** * Defines internal constants for unambiguous annotation of DbRefEntry source * strings and describing the data retrieved from external database sources (see @@ -36,12 +40,12 @@ public class DBRefSource /** * UNIPROT Accession Number */ - public static String UNIPROT = "UNIPROT"; + public static final String UNIPROT = "UNIPROT"; /** * UNIPROT Entry Name */ - public static String UP_NAME = "UNIPROT_NAME".toUpperCase(); + public static final String UP_NAME = "UNIPROT_NAME".toUpperCase(); /** * Uniprot Knowledgebase/TrEMBL as served from EMBL protein products. @@ -54,27 +58,27 @@ public class DBRefSource /** * PDB Entry Code */ - public static String PDB = "PDB"; + public static final String PDB = "PDB"; /** * EMBL ID */ - public static String EMBL = "EMBL"; + public static final String EMBL = "EMBL"; /** * EMBLCDS ID */ - public static String EMBLCDS = "EMBLCDS"; + public static final String EMBLCDS = "EMBLCDS"; /** * PFAM ID */ - public static String PFAM = "PFAM"; + public static final String PFAM = "PFAM"; /** * RFAM ID */ - public static String RFAM = "RFAM"; + public static final String RFAM = "RFAM"; /** * GeneDB ID @@ -96,6 +100,25 @@ public class DBRefSource public static final String[] CODINGDBS = { EMBLCDS, GENEDB, ENSEMBL }; - public static final String[] PROTEINDBS = { UNIPROT, PDB, UNIPROTKB, + public static final String[] PROTEINDBS = { UNIPROT, UNIPROTKB, EMBLCDSProduct, ENSEMBL }; // Ensembl ENSP* entries are protein + + public static String[] allSources() + { + List src = new ArrayList(); + for (Field f : DBRefSource.class.getFields()) + { + if (String.class.equals(f.getType())) + { + try + { + src.add((String) f.get(null)); + } catch (Exception x) + { + x.printStackTrace(); + } + } + } + return src.toArray(new String[0]); + } } diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index a857712..2bbc278 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -22,6 +22,8 @@ package jalview.datamodel; import jalview.analysis.AlignSeq; import jalview.api.DBRefEntryI; +import jalview.util.DBRefUtils; +import jalview.util.MapList; import jalview.util.StringUtils; import java.util.ArrayList; @@ -57,8 +59,6 @@ public class Sequence extends ASequence implements SequenceI String vamsasId; - DBRefEntryI sourceDBRef; - DBRefEntry[] dbrefs; RNA rna; @@ -235,8 +235,6 @@ public class Sequence extends ASequence implements SequenceI seq.getEnd()); } description = seq.getDescription(); - sourceDBRef = seq.getSourceDBRef() == null ? null : new DBRefEntry( - seq.getSourceDBRef()); if (seq != datasetSequence) { setDatasetSequence(seq.getDatasetSequence()); @@ -307,8 +305,9 @@ public class Sequence extends ASequence implements SequenceI && datasetSequence.getSequenceFeatures() != null && datasetSequence.getSequenceFeatures().length > 0) { - System.err - .println("Warning: JAL-2046 side effect ? Possible implementation error: overwriting dataset sequence features by setting sequence features on alignment"); + new Exception( + "Warning: JAL-2046 side effect ? Possible implementation error: overwriting dataset sequence features by setting sequence features on alignment") + .printStackTrace(); } datasetSequence.setSequenceFeatures(features); } @@ -1394,12 +1393,15 @@ public class Sequence extends ASequence implements SequenceI @Override public PDBEntry getPDBEntry(String pdbIdStr) { - if (getDatasetSequence() == null - || getDatasetSequence().getAllPDBEntries() == null) + if (getDatasetSequence() != null) + { + return getDatasetSequence().getPDBEntry(pdbIdStr); + } + if (pdbIds == null) { return null; } - List entries = getDatasetSequence().getAllPDBEntries(); + List entries = getAllPDBEntries(); for (PDBEntry entry : entries) { if (entry.getId().equalsIgnoreCase(pdbIdStr)) @@ -1410,16 +1412,66 @@ public class Sequence extends ASequence implements SequenceI return null; } - @Override - public void setSourceDBRef(DBRefEntryI dbRef) - { - this.sourceDBRef = dbRef; - } @Override - public DBRefEntryI getSourceDBRef() + public List getPrimaryDBRefs() { - return this.sourceDBRef; + if (datasetSequence!=null) + { + return datasetSequence.getPrimaryDBRefs(); + } + if (dbrefs==null || dbrefs.length==0) + { + return Arrays.asList(new DBRefEntry[0]); + } + synchronized (dbrefs) + { + List primaries = new ArrayList(); + DBRefEntry tmp[] = new DBRefEntry[1], res[] = null; + for (DBRefEntry ref : dbrefs) + { + if (!ref.isPrimary()) + { + continue; + } + if (ref.hasMap()) + { + MapList mp = ref.getMap().getMap(); + if (mp.getFromLowest() > start || mp.getFromHighest() < end) + { + // map only involves a subsequence, so cannot be primary + continue; + } + } + // whilst it looks like it is a primary ref, we also sanity check type + if (DBRefUtils.getCanonicalName(DBRefSource.PDB).equals( + DBRefUtils.getCanonicalName(ref.getSource()))) + { + // PDB dbrefs imply there should be a PDBEntry associated + // TODO: tighten PDB dbrefs + // formally imply Jalview has actually downlaoded and + // parsed the pdb file. That means there should be a cached file + // handle on the PDBEntry, and a real mapping between sequence and + // extracted sequence from PDB file + PDBEntry pdbentry = getPDBEntry(ref.getAccessionId()); + if (pdbentry != null && pdbentry.getType() != null + && pdbentry.getType().equalsIgnoreCase("PDB")) + { + primaries.add(ref); + } + continue; + } + // check standard protein or dna sources + tmp[0] = ref; + res = DBRefUtils.selectDbRefs(!isProtein(), tmp); + if (res != null && res[0] == tmp[0]) + { + primaries.add(ref); + continue; + } + } + return primaries; + } } } diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 45a767c..ec7520b 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -20,8 +20,6 @@ */ package jalview.datamodel; -import jalview.api.DBRefEntryI; - import java.util.List; import java.util.Vector; @@ -443,21 +441,14 @@ public interface SequenceI extends ASequenceI */ public PDBEntry getPDBEntry(String pdbId); - /** - * Set the distinct source database, and accession number from which a - * sequence and its start-end data were derived from. This is very important - * for SIFTS mappings and must be set prior to performing SIFTS mapping. - * - * @param dbRef - * the source dbRef for the sequence - */ - public void setSourceDBRef(DBRefEntryI dbRef); /** - * Get the distinct source database, and accession number from which a - * sequence and its start-end data were derived from. + * Get all primary database/accessions for this sequence's data. These + * DBRefEntry are expected to resolve to a valid record in the associated + * external database, either directly or via a provided 1:1 Mapping. * - * @return + * @return just the primary references (if any) for this sequence, or an empty + * list */ - public DBRefEntryI getSourceDBRef(); + public List getPrimaryDBRefs(); } diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index 06e929d..3ba36ca 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -195,7 +195,6 @@ public class EmblEntry DBRefEntry retrievedref = new DBRefEntry(sourceDb, getSequenceVersion(), accession); dna.addDBRef(retrievedref); - dna.setSourceDBRef(retrievedref); // add map to indicate the sequence is a valid coordinate frame for the // dbref retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() }, @@ -504,7 +503,6 @@ public class EmblEntry dnaToProteinMapping.setTo(proteinSeq); dnaToProteinMapping.setMappedFromId(proteinId); proteinSeq.addDBRef(proteinDbRef); - proteinSeq.setSourceDBRef(proteinDbRef); ref.setMap(dnaToProteinMapping); } hasUniprotDbref = true; @@ -549,7 +547,6 @@ public class EmblEntry DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId); } product.addDBRef(proteinToEmblProteinRef); - product.setSourceDBRef(proteinToEmblProteinRef); if (dnaToProteinMapping != null && dnaToProteinMapping.getTo() != null) diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index b4d2783..50e1032 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -174,7 +174,8 @@ public class EnsemblGene extends EnsemblSeqProxy */ else { - List ids = new EnsemblSymbol(getDomain()).getIds(acc); + List ids = new EnsemblSymbol(getDomain(), getDbSource(), + getDbVersion()).getIds(acc); for (String geneId : ids) { if (!geneIds.contains(geneId)) @@ -196,7 +197,8 @@ public class EnsemblGene extends EnsemblSeqProxy */ protected String getGeneIdentifiersForName(String query) { - List ids = new EnsemblSymbol(getDomain()).getIds(query); + List ids = new EnsemblSymbol(getDomain(), getDbSource(), + getDbVersion()).getIds(query); if (ids != null) { for (String id : ids) diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 31552af..5a32736 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -276,8 +276,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { // clunky: ensure Uniprot xref if we have one is on mapped sequence SequenceI ds = proteinSeq.getDatasetSequence(); - ds.setSourceDBRef(proteinSeq.getSourceDBRef()); - + // TODO: Verify ensp primary ref is on proteinSeq.getDatasetSequence() Mapping map = new Mapping(ds, mapList); DBRefEntry dbr = new DBRefEntry(getDbSource(), getEnsemblDataVersion(), proteinSeq.getName(), map); @@ -309,7 +308,8 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient seq = seq.getDatasetSequence(); } - EnsemblXref xrefFetcher = new EnsemblXref(getDomain()); + EnsemblXref xrefFetcher = new EnsemblXref(getDomain(), getDbSource(), + getEnsemblDataVersion()); List xrefs = xrefFetcher.getCrossReferences(seq.getName()); for (DBRefEntry xref : xrefs) { @@ -322,7 +322,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient DBRefEntry self = new DBRefEntry(getDbSource(), getEnsemblDataVersion(), seq.getName()); seq.addDBRef(self); - seq.setSourceDBRef(self); } /** @@ -382,7 +381,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(), getEnsemblDataVersion(), name); - sq.setSourceDBRef(dbref); + sq.addDBRef(dbref); } } if (alignment == null) diff --git a/src/jalview/ext/ensembl/EnsemblSymbol.java b/src/jalview/ext/ensembl/EnsemblSymbol.java index 1c47f11..b8c8c54 100644 --- a/src/jalview/ext/ensembl/EnsemblSymbol.java +++ b/src/jalview/ext/ensembl/EnsemblSymbol.java @@ -25,11 +25,13 @@ public class EnsemblSymbol extends EnsemblXref /** * Constructor given the target domain to fetch data from * - * @param d + * @param domain + * @param dbName + * @param dbVersion */ - public EnsemblSymbol(String d) + public EnsemblSymbol(String domain, String dbName, String dbVersion) { - super(d); + super(domain, dbName, dbVersion); } /** diff --git a/src/jalview/ext/ensembl/EnsemblXref.java b/src/jalview/ext/ensembl/EnsemblXref.java index fa86865..313572f 100644 --- a/src/jalview/ext/ensembl/EnsemblXref.java +++ b/src/jalview/ext/ensembl/EnsemblXref.java @@ -29,20 +29,25 @@ class EnsemblXref extends EnsemblRestClient private static final String GO_GENE_ONTOLOGY = "GO"; + private String dbName = "ENSEMBL (xref)"; + /** * Constructor given the target domain to fetch data from * * @param d */ - public EnsemblXref(String d) + public EnsemblXref(String d, String dbSource, String version) { super(d); + dbName = dbSource; + xrefVersion = dbSource + ":" + version; + } @Override public String getDbName() { - return "ENSEMBL (xref)"; + return dbName; } @Override @@ -152,7 +157,7 @@ class EnsemblXref extends EnsemblRestClient if (dbName != null && id != null) { dbName = DBRefUtils.getCanonicalName(dbName); - DBRefEntry dbref = new DBRefEntry(dbName, "0", id); + DBRefEntry dbref = new DBRefEntry(dbName, getXRefVersion(), id); result.add(dbref); } } @@ -163,6 +168,18 @@ class EnsemblXref extends EnsemblRestClient return result; } + private String xrefVersion = "ENSEMBL:0"; + + /** + * version string for Xrefs - for 2.10, hardwired for ENSEMBL:0 + * + * @return + */ + public String getXRefVersion() + { + return xrefVersion; + } + /** * Returns the URL for the REST endpoint to fetch all cross-references for an * identifier. Note this may return protein cross-references for nucleotide. diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index f6268c0..dd8fb7a 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -32,7 +32,6 @@ import jalview.api.AlignViewControllerI; import jalview.api.AlignViewportI; import jalview.api.AlignmentViewPanel; import jalview.api.FeatureSettingsControllerI; -import jalview.api.FeatureSettingsModelI; import jalview.api.SplitContainerI; import jalview.api.ViewStyleI; import jalview.api.analysis.ScoreModelI; @@ -54,7 +53,6 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentOrder; import jalview.datamodel.AlignmentView; import jalview.datamodel.ColumnSelection; -import jalview.datamodel.DBRefSource; import jalview.datamodel.HiddenSequences; import jalview.datamodel.PDBEntry; import jalview.datamodel.SeqCigar; @@ -74,7 +72,6 @@ import jalview.io.JalviewFileView; import jalview.io.JnetAnnotationMaker; import jalview.io.NewickFile; import jalview.io.TCoffeeScoreFile; -import jalview.io.gff.SequenceOntologyI; import jalview.jbgui.GAlignFrame; import jalview.schemes.Blosum62ColourScheme; import jalview.schemes.BuriedColourScheme; @@ -94,12 +91,10 @@ import jalview.schemes.TaylorColourScheme; import jalview.schemes.TurnColourScheme; import jalview.schemes.UserColourScheme; import jalview.schemes.ZappoColourScheme; -import jalview.structure.StructureSelectionManager; import jalview.util.MessageManager; import jalview.viewmodel.AlignmentViewport; import jalview.ws.DBRefFetcher; import jalview.ws.DBRefFetcher.FetchFinishedListenerI; -import jalview.ws.SequenceFetcher; import jalview.ws.jws1.Discoverer; import jalview.ws.jws2.Jws2Discoverer; import jalview.ws.jws2.jabaws2.Jws2Instance; @@ -4676,236 +4671,8 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, protected void showProductsFor(final SequenceI[] sel, final boolean _odna, final String source) { - Runnable foo = new Runnable() - { - - @Override - public void run() - { - final long sttime = System.currentTimeMillis(); - AlignFrame.this.setProgressBar(MessageManager.formatMessage( - "status.searching_for_sequences_from", - new Object[] { source }), sttime); - try - { - AlignmentI alignment = AlignFrame.this.getViewport() - .getAlignment(); - AlignmentI dataset = alignment.getDataset() == null ? alignment - : alignment.getDataset(); - boolean dna = alignment.isNucleotide(); - if (_odna != dna) - { - System.err - .println("Conflict: showProducts for alignment originally " - + "thought to be " - + (_odna ? "DNA" : "Protein") - + " now searching for " - + (dna ? "DNA" : "Protein") + " Context."); - } - AlignmentI xrefs = new CrossRef(sel, dataset).findXrefSequences( - source, dna); - if (xrefs == null) - { - return; - } - /* - * get display scheme (if any) to apply to features - */ - FeatureSettingsModelI featureColourScheme = new SequenceFetcher() - .getFeatureColourScheme(source); - - AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset, - xrefs); - if (!dna) - { - xrefsAlignment = AlignmentUtils.makeCdsAlignment( - xrefsAlignment.getSequencesArray(), dataset, sel); - xrefsAlignment.alignAs(alignment); - } - - /* - * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset - * sequences). If we are DNA, drop introns and update mappings - */ - AlignmentI copyAlignment = null; - - if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) - { - boolean copyAlignmentIsAligned = false; - if (dna) - { - copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset, - xrefsAlignment.getSequencesArray()); - if (copyAlignment.getHeight() == 0) - { - JOptionPane.showMessageDialog(AlignFrame.this, - MessageManager.getString("label.cant_map_cds"), - MessageManager.getString("label.operation_failed"), - JOptionPane.OK_OPTION); - System.err.println("Failed to make CDS alignment"); - } - - /* - * pending getting Embl transcripts to 'align', - * we are only doing this for Ensembl - */ - // TODO proper criteria for 'can align as cdna' - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) - || AlignmentUtils.looksLikeEnsembl(alignment)) - { - copyAlignment.alignAs(alignment); - copyAlignmentIsAligned = true; - } - } - else - { - copyAlignment = AlignmentUtils.makeCopyAlignment(sel, - xrefs.getSequencesArray(), dataset); - } - copyAlignment.setGapCharacter(AlignFrame.this.viewport - .getGapCharacter()); - - StructureSelectionManager ssm = StructureSelectionManager - .getStructureSelectionManager(Desktop.instance); - - /* - * register any new mappings for sequence mouseover etc - * (will not duplicate any previously registered mappings) - */ - ssm.registerMappings(dataset.getCodonFrames()); - - if (copyAlignment.getHeight() <= 0) - { - System.err.println("No Sequences generated for xRef type " - + source); - return; - } - /* - * align protein to dna - */ - if (dna && copyAlignmentIsAligned) - { - xrefsAlignment.alignAs(copyAlignment); - } - else - { - /* - * align cdna to protein - currently only if - * fetching and aligning Ensembl transcripts! - */ - // TODO: generalise for other sources of locus/transcript/cds data - if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source)) - { - copyAlignment.alignAs(xrefsAlignment); - } - } - } - /* - * build AlignFrame(s) according to available alignment data - */ - AlignFrame newFrame = new AlignFrame(xrefsAlignment, - DEFAULT_WIDTH, DEFAULT_HEIGHT); - if (Cache.getDefault("HIDE_INTRONS", true)) - { - newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false); - } - String newtitle = String.format("%s %s %s", - dna ? MessageManager.getString("label.proteins") - : MessageManager.getString("label.nucleotides"), - MessageManager.getString("label.for"), getTitle()); - newFrame.setTitle(newtitle); - - if (copyAlignment == null) - { - /* - * split frame display is turned off in preferences file - */ - Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH, - DEFAULT_HEIGHT); - return; // via finally clause - } - AlignFrame copyThis = new AlignFrame(copyAlignment, - AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); - copyThis.setTitle(AlignFrame.this.getTitle()); - - boolean showSequenceFeatures = viewport.isShowSequenceFeatures(); - newFrame.setShowSeqFeatures(showSequenceFeatures); - copyThis.setShowSeqFeatures(showSequenceFeatures); - FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer(); - - /* - * copy feature rendering settings to split frame - */ - newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() - .transferSettings(myFeatureStyling); - copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() - .transferSettings(myFeatureStyling); - - /* - * apply 'database source' feature configuration - * if any was found - */ - // TODO is this the feature colouring for the original - // alignment or the fetched xrefs? either could be Ensembl - newFrame.getViewport().applyFeaturesStyle(featureColourScheme); - copyThis.getViewport().applyFeaturesStyle(featureColourScheme); - - SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame, - dna ? newFrame : copyThis); - newFrame.setVisible(true); - copyThis.setVisible(true); - String linkedTitle = MessageManager - .getString("label.linked_view_title"); - Desktop.addInternalFrame(sf, linkedTitle, -1, -1); - sf.adjustDivider(); - } catch (OutOfMemoryError e) - { - new OOMWarning("whilst fetching crossreferences", e); - } catch (Throwable e) - { - Cache.log.error("Error when finding crossreferences", e); - } finally - { - AlignFrame.this.setProgressBar(MessageManager.formatMessage( - "status.finished_searching_for_sequences_from", - new Object[] { source }), sttime); - } - } - - /** - * Makes an alignment containing the given sequences, and adds them to the - * given dataset, which is also set as the dataset for the new alignment - * - * TODO: refactor to DatasetI method - * - * @param dataset - * @param seqs - * @return - */ - protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset, - AlignmentI seqs) - { - SequenceI[] sprods = new SequenceI[seqs.getHeight()]; - for (int s = 0; s < sprods.length; s++) - { - sprods[s] = (seqs.getSequenceAt(s)).deriveSequence(); - if (dataset.getSequences() == null - || !dataset.getSequences().contains( - sprods[s].getDatasetSequence())) - { - dataset.addSequence(sprods[s].getDatasetSequence()); - } - sprods[s].updatePDBIds(); - } - Alignment al = new Alignment(sprods); - al.setDataset(dataset); - return al; - } - - }; - Thread frunner = new Thread(foo); - frunner.start(); + new Thread(CrossRefAction.showProductsFor(sel, _odna, source, this)) + .start(); } /** diff --git a/src/jalview/gui/CrossRefAction.java b/src/jalview/gui/CrossRefAction.java new file mode 100644 index 0000000..32af226 --- /dev/null +++ b/src/jalview/gui/CrossRefAction.java @@ -0,0 +1,312 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.gui; + +import jalview.analysis.AlignmentUtils; +import jalview.analysis.CrossRef; +import jalview.api.AlignmentViewPanel; +import jalview.api.FeatureSettingsModelI; +import jalview.bin.Cache; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; +import jalview.io.gff.SequenceOntologyI; +import jalview.structure.StructureSelectionManager; +import jalview.util.MessageManager; +import jalview.ws.SequenceFetcher; + +import java.util.ArrayList; +import java.util.List; + +import javax.swing.JOptionPane; + +/** + * Factory constructor and runnable for discovering and displaying + * cross-references for a set of aligned sequences + * + * @author jprocter + * + */ +public class CrossRefAction implements Runnable +{ + private AlignFrame alignFrame; + + private SequenceI[] sel; + + private boolean _odna; + + private String source; + + List xrefViews = new ArrayList(); + + public List getXrefViews() + { + return xrefViews; + } + + @Override + public void run() + { + final long sttime = System.currentTimeMillis(); + alignFrame.setProgressBar( + MessageManager.formatMessage( + "status.searching_for_sequences_from", + new Object[] { source }), sttime); + try + { + AlignmentI alignment = alignFrame.getViewport().getAlignment(); + AlignmentI dataset = alignment.getDataset() == null ? alignment + : alignment.getDataset(); + boolean dna = alignment.isNucleotide(); + if (_odna != dna) + { + System.err + .println("Conflict: showProducts for alignment originally " + + "thought to be " + (_odna ? "DNA" : "Protein") + + " now searching for " + (dna ? "DNA" : "Protein") + + " Context."); + } + AlignmentI xrefs = new CrossRef(sel, dataset).findXrefSequences( + source, dna); + if (xrefs == null) + { + return; + } + /* + * get display scheme (if any) to apply to features + */ + FeatureSettingsModelI featureColourScheme = new SequenceFetcher() + .getFeatureColourScheme(source); + + AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset, + xrefs); + if (!dna) + { + xrefsAlignment = AlignmentUtils.makeCdsAlignment( + xrefsAlignment.getSequencesArray(), dataset, sel); + xrefsAlignment.alignAs(alignment); + } + + /* + * If we are opening a splitframe, make a copy of this alignment (sharing the same dataset + * sequences). If we are DNA, drop introns and update mappings + */ + AlignmentI copyAlignment = null; + + if (Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) + { + boolean copyAlignmentIsAligned = false; + if (dna) + { + copyAlignment = AlignmentUtils.makeCdsAlignment(sel, dataset, + xrefsAlignment.getSequencesArray()); + if (copyAlignment.getHeight() == 0) + { + JOptionPane.showMessageDialog(alignFrame, + MessageManager.getString("label.cant_map_cds"), + MessageManager.getString("label.operation_failed"), + JOptionPane.OK_OPTION); + System.err.println("Failed to make CDS alignment"); + } + + /* + * pending getting Embl transcripts to 'align', + * we are only doing this for Ensembl + */ + // TODO proper criteria for 'can align as cdna' + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) + || AlignmentUtils.looksLikeEnsembl(alignment)) + { + copyAlignment.alignAs(alignment); + copyAlignmentIsAligned = true; + } + } + else + { + copyAlignment = AlignmentUtils.makeCopyAlignment(sel, + xrefs.getSequencesArray(), dataset); + } + copyAlignment + .setGapCharacter(alignFrame.viewport.getGapCharacter()); + + StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); + + /* + * register any new mappings for sequence mouseover etc + * (will not duplicate any previously registered mappings) + */ + ssm.registerMappings(dataset.getCodonFrames()); + + if (copyAlignment.getHeight() <= 0) + { + System.err.println("No Sequences generated for xRef type " + + source); + return; + } + /* + * align protein to dna + */ + if (dna && copyAlignmentIsAligned) + { + xrefsAlignment.alignAs(copyAlignment); + } + else + { + /* + * align cdna to protein - currently only if + * fetching and aligning Ensembl transcripts! + */ + // TODO: generalise for other sources of locus/transcript/cds data + if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source)) + { + copyAlignment.alignAs(xrefsAlignment); + } + } + } + /* + * build AlignFrame(s) according to available alignment data + */ + AlignFrame newFrame = new AlignFrame(xrefsAlignment, + AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); + if (Cache.getDefault("HIDE_INTRONS", true)) + { + newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false); + } + String newtitle = String.format("%s %s %s", + dna ? MessageManager.getString("label.proteins") + : MessageManager.getString("label.nucleotides"), + MessageManager.getString("label.for"), alignFrame.getTitle()); + newFrame.setTitle(newtitle); + + if (copyAlignment == null) + { + /* + * split frame display is turned off in preferences file + */ + Desktop.addInternalFrame(newFrame, newtitle, + AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); + xrefViews.add(newFrame.alignPanel); + return; // via finally clause + } + AlignFrame copyThis = new AlignFrame(copyAlignment, + AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); + copyThis.setTitle(alignFrame.getTitle()); + + boolean showSequenceFeatures = alignFrame.getViewport() + .isShowSequenceFeatures(); + newFrame.setShowSeqFeatures(showSequenceFeatures); + copyThis.setShowSeqFeatures(showSequenceFeatures); + FeatureRenderer myFeatureStyling = alignFrame.alignPanel + .getSeqPanel().seqCanvas.getFeatureRenderer(); + + /* + * copy feature rendering settings to split frame + */ + newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() + .transferSettings(myFeatureStyling); + copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() + .transferSettings(myFeatureStyling); + + /* + * apply 'database source' feature configuration + * if any was found + */ + // TODO is this the feature colouring for the original + // alignment or the fetched xrefs? either could be Ensembl + newFrame.getViewport().applyFeaturesStyle(featureColourScheme); + copyThis.getViewport().applyFeaturesStyle(featureColourScheme); + + SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame, + dna ? newFrame : copyThis); + newFrame.setVisible(true); + copyThis.setVisible(true); + String linkedTitle = MessageManager + .getString("label.linked_view_title"); + Desktop.addInternalFrame(sf, linkedTitle, -1, -1); + sf.adjustDivider(); + + // finally add the top, then bottom frame to the view list + xrefViews.add(dna ? copyThis.alignPanel : newFrame.alignPanel); + xrefViews.add(!dna ? copyThis.alignPanel : newFrame.alignPanel); + + } catch (OutOfMemoryError e) + { + new OOMWarning("whilst fetching crossreferences", e); + } catch (Throwable e) + { + Cache.log.error("Error when finding crossreferences", e); + } finally + { + alignFrame.setProgressBar(MessageManager.formatMessage( + "status.finished_searching_for_sequences_from", + new Object[] { source }), sttime); + } + } + + /** + * Makes an alignment containing the given sequences, and adds them to the + * given dataset, which is also set as the dataset for the new alignment + * + * TODO: refactor to DatasetI method + * + * @param dataset + * @param seqs + * @return + */ + protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset, + AlignmentI seqs) + { + SequenceI[] sprods = new SequenceI[seqs.getHeight()]; + for (int s = 0; s < sprods.length; s++) + { + sprods[s] = (seqs.getSequenceAt(s)).deriveSequence(); + if (dataset.getSequences() == null + || !dataset.getSequences().contains( + sprods[s].getDatasetSequence())) + { + dataset.addSequence(sprods[s].getDatasetSequence()); + } + sprods[s].updatePDBIds(); + } + Alignment al = new Alignment(sprods); + al.setDataset(dataset); + return al; + } + + public CrossRefAction(AlignFrame alignFrame, SequenceI[] sel, + boolean _odna, String source) + { + this.alignFrame = alignFrame; + this.sel = sel; + this._odna = _odna; + this.source = source; + } + + public static CrossRefAction showProductsFor(final SequenceI[] sel, + final boolean _odna, final String source, + final AlignFrame alignFrame) + { + return new CrossRefAction(alignFrame, sel, _odna, source); + } + +} diff --git a/src/jalview/gui/Jalview2XML.java b/src/jalview/gui/Jalview2XML.java index 68245b6..c80f3de 100644 --- a/src/jalview/gui/Jalview2XML.java +++ b/src/jalview/gui/Jalview2XML.java @@ -365,6 +365,12 @@ public class Jalview2XML public jalview.datamodel.Mapping mp = _jmap; @Override + public boolean isResolvable() + { + return super.isResolvable() && mp.getTo() != null; + }; + + @Override boolean resolve() { SequenceI seq = getSrefDatasetSeq(); @@ -787,37 +793,42 @@ public class Jalview2XML JSeq jseq; Set calcIdSet = new HashSet(); - + // record the set of vamsas sequence XML POJO we create. + HashMap vamsasSetIds = new HashMap(); // SAVE SEQUENCES for (final SequenceI jds : rjal.getSequences()) { final SequenceI jdatasq = jds.getDatasetSequence() == null ? jds : jds.getDatasetSequence(); String id = seqHash(jds); - - if (seqRefIds.get(id) != null) - { - // This happens for two reasons: 1. multiple views are being serialised. - // 2. the hashCode has collided with another sequence's code. This DOES - // HAPPEN! (PF00072.15.stk does this) - // JBPNote: Uncomment to debug writing out of files that do not read - // back in due to ArrayOutOfBoundExceptions. - // System.err.println("vamsasSeq backref: "+id+""); - // System.err.println(jds.getName()+" - // "+jds.getStart()+"-"+jds.getEnd()+" "+jds.getSequenceAsString()); - // System.err.println("Hashcode: "+seqHash(jds)); - // SequenceI rsq = (SequenceI) seqRefIds.get(id + ""); - // System.err.println(rsq.getName()+" - // "+rsq.getStart()+"-"+rsq.getEnd()+" "+rsq.getSequenceAsString()); - // System.err.println("Hashcode: "+seqHash(rsq)); - } - else - { - vamsasSeq = createVamsasSequence(id, jds); - vamsasSet.addSequence(vamsasSeq); - seqRefIds.put(id, jds); + if (vamsasSetIds.get(id) == null) + { + if (seqRefIds.get(id) != null && !storeDS) + { + // This happens for two reasons: 1. multiple views are being + // serialised. + // 2. the hashCode has collided with another sequence's code. This + // DOES + // HAPPEN! (PF00072.15.stk does this) + // JBPNote: Uncomment to debug writing out of files that do not read + // back in due to ArrayOutOfBoundExceptions. + // System.err.println("vamsasSeq backref: "+id+""); + // System.err.println(jds.getName()+" + // "+jds.getStart()+"-"+jds.getEnd()+" "+jds.getSequenceAsString()); + // System.err.println("Hashcode: "+seqHash(jds)); + // SequenceI rsq = (SequenceI) seqRefIds.get(id + ""); + // System.err.println(rsq.getName()+" + // "+rsq.getStart()+"-"+rsq.getEnd()+" "+rsq.getSequenceAsString()); + // System.err.println("Hashcode: "+seqHash(rsq)); + } + else + { + vamsasSeq = createVamsasSequence(id, jds); + vamsasSet.addSequence(vamsasSeq); + vamsasSetIds.put(id, vamsasSeq); + seqRefIds.put(id, jds); + } } - jseq = new JSeq(); jseq.setStart(jds.getStart()); jseq.setEnd(jds.getEnd()); @@ -2808,15 +2819,28 @@ public class Jalview2XML { System.err .println("Warning JAL-2154 regression: updating start/end for sequence " - + tmpSeq.toString()); + + tmpSeq.toString() + " to " + jseqs[i]); } } else { incompleteSeqs.remove(seqId); } + if (vamsasSeq.length > vi && vamsasSeq[vi].getId().equals(seqId)) + { + // most likely we are reading a dataset XML document so + // update from vamsasSeq section of XML for this sequence + tmpSeq.setName(vamsasSeq[vi].getName()); + tmpSeq.setDescription(vamsasSeq[vi].getDescription()); + tmpSeq.setSequence(vamsasSeq[vi].getSequence()); + vi++; + } + else + { + // reading multiple views, so vamsasSeq set is a subset of JSeq + multipleView = true; + } tmpSeq.setStart(jseqs[i].getStart()); tmpSeq.setEnd(jseqs[i].getEnd()); tmpseqs.add(tmpSeq); - multipleView = true; } else { @@ -2905,6 +2929,12 @@ public class Jalview2XML { // load sequence features, database references and any associated PDB // structures for the alignment + // + // prior to 2.10, this part would only be executed the first time a + // sequence was encountered, but not afterwards. + // now, for 2.10 projects, this is also done if the xml doc includes + // dataset sequences not actually present in any particular view. + // for (int i = 0; i < vamsasSeq.length; i++) { if (jseqs[i].getFeaturesCount() > 0) @@ -2931,13 +2961,17 @@ public class Jalview2XML } } - - al.getSequenceAt(i).getDatasetSequence().addSequenceFeature(sf); + // adds feature to datasequence's feature set (since Jalview 2.10) + al.getSequenceAt(i).addSequenceFeature(sf); } } if (vamsasSeq[i].getDBRefCount() > 0) { - addDBRefs(al.getSequenceAt(i).getDatasetSequence(), vamsasSeq[i]); + // adds dbrefs to datasequence's set (since Jalview 2.10) + addDBRefs( + al.getSequenceAt(i).getDatasetSequence() == null ? al.getSequenceAt(i) + : al.getSequenceAt(i).getDatasetSequence(), + vamsasSeq[i]); } if (jseqs[i].getPdbidsCount() > 0) { @@ -2970,7 +3004,15 @@ public class Jalview2XML } StructureSelectionManager.getStructureSelectionManager( Desktop.instance).registerPDBEntry(entry); - al.getSequenceAt(i).getDatasetSequence().addPDBId(entry); + // adds PDBEntry to datasequence's set (since Jalview 2.10) + if (al.getSequenceAt(i).getDatasetSequence() != null) + { + al.getSequenceAt(i).getDatasetSequence().addPDBId(entry); + } + else + { + al.getSequenceAt(i).addPDBId(entry); + } } } } @@ -2999,16 +3041,16 @@ public class Jalview2XML if (maps[m].getMapping() != null) { mapping = addMapping(maps[m].getMapping()); - } - if (dnaseq != null && mapping.getTo() != null) - { - cf.addMap(dnaseq, mapping.getTo(), mapping.getMap()); - } - else - { - // defer to later - frefedSequence.add(newAlcodMapRef(maps[m].getDnasq(), cf, - mapping)); + if (dnaseq != null && mapping.getTo() != null) + { + cf.addMap(dnaseq, mapping.getTo(), mapping.getMap()); + } + else + { + // defer to later + frefedSequence.add(newAlcodMapRef(maps[m].getDnasq(), cf, + mapping)); + } } } al.addCodonFrame(cf); @@ -4898,7 +4940,7 @@ public class Jalview2XML for (int i = 0, iSize = vamsasSet.getSequenceCount(); i < iSize; i++) { Sequence vamsasSeq = vamsasSet.getSequence(i); - ensureJalviewDatasetSequence(vamsasSeq, ds, dseqs, ignoreUnrefed); + ensureJalviewDatasetSequence(vamsasSeq, ds, dseqs, ignoreUnrefed, i); } // create a new dataset if (ds == null) @@ -4925,18 +4967,29 @@ public class Jalview2XML * dataset alignment * @param dseqs * vector to add new dataset sequence to + * @param ignoreUnrefed + * - when true, don't create new sequences from vamsasSeq if it's id + * doesn't already have an asssociated Jalview sequence. + * @param vseqpos + * - used to reorder the sequence in the alignment according to the + * vamsasSeq array ordering, to preserve ordering of dataset */ private void ensureJalviewDatasetSequence(Sequence vamsasSeq, - AlignmentI ds, Vector dseqs, boolean ignoreUnrefed) + AlignmentI ds, Vector dseqs, boolean ignoreUnrefed, int vseqpos) { // JBP TODO: Check this is called for AlCodonFrames to support recovery of // xRef Codon Maps SequenceI sq = seqRefIds.get(vamsasSeq.getId()); + boolean reorder = false; SequenceI dsq = null; if (sq != null && sq.getDatasetSequence() != null) { dsq = sq.getDatasetSequence(); } + else + { + reorder = true; + } if (sq == null && ignoreUnrefed) { return; @@ -5032,6 +5085,35 @@ public class Jalview2XML // + (post ? "appended" : "")); } } + else + { + // sequence refs are identical. We may need to update the existing dataset + // alignment with this one, though. + if (ds != null && dseqs == null) + { + int opos = ds.findIndex(dsq); + SequenceI tseq = null; + if (opos != -1 && vseqpos != opos) + { + // remove from old position + ds.deleteSequence(dsq); + } + if (vseqpos < ds.getHeight()) + { + if (vseqpos != opos) + { + // save sequence at destination position + tseq = ds.getSequenceAt(vseqpos); + ds.replaceSequenceAt(vseqpos, dsq); + ds.addSequence(tseq); + } + } + else + { + ds.addSequence(dsq); + } + } + } } /* diff --git a/src/jalview/gui/SequenceFetcher.java b/src/jalview/gui/SequenceFetcher.java index 828a2aa..a875053 100755 --- a/src/jalview/gui/SequenceFetcher.java +++ b/src/jalview/gui/SequenceFetcher.java @@ -43,6 +43,7 @@ import java.awt.event.KeyAdapter; import java.awt.event.KeyEvent; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -202,8 +203,19 @@ public class SequenceFetcher extends JPanel implements Runnable private IProgressIndicator progressIndicator; + private volatile boolean _isConstructing = false; + + private List newAlframes = null; + public SequenceFetcher(IProgressIndicator guiIndic) { + this(guiIndic, null, null); + } + + public SequenceFetcher(IProgressIndicator guiIndic, + final String selectedDb, final String queryString) + { + this._isConstructing=true; this.progressIndicator = guiIndic; final SequenceFetcher us = this; // launch initialiser thread @@ -215,7 +227,8 @@ public class SequenceFetcher extends JPanel implements Runnable { if (getSequenceFetcherSingleton(progressIndicator) != null) { - us.initGui(progressIndicator); + us.initGui(progressIndicator, selectedDb, queryString); + us._isConstructing=false; } else { @@ -241,6 +254,26 @@ public class SequenceFetcher extends JPanel implements Runnable }); sf.start(); } + /** + * blocking call which creates a new sequence fetcher panel, configures it and presses the OK button with the given database and query. + * @param database + * @param query + */ + public static List fetchAndShow(String database, String query) + { + final SequenceFetcher sf = new SequenceFetcher(Desktop.instance, database, query); + while (sf._isConstructing) + { + try { Thread.sleep(50); + } catch (Exception q) + { + return Collections.emptyList(); + } + } + sf.newAlframes = new ArrayList(); + sf.run(); + return sf.newAlframes; + } private class DatabaseAuthority extends DefaultMutableTreeNode { @@ -251,13 +284,59 @@ public class SequenceFetcher extends JPanel implements Runnable { }; + + /** + * initialise the database and query for this fetcher panel + * + * @param selectedDb + * - string that should correspond to a sequence fetcher + * @param queryString + * - string that will be entered in the query dialog + * @return true if UI was configured with valid database and query string + */ + protected boolean setInitialQuery(String selectedDb, String queryString) + { + if (selectedDb == null || selectedDb.trim().length() == 0) + { + return false; + } + try + { + List sp = sfetch.getSourceProxy(selectedDb); + for (DbSourceProxy sourcep : sp) + { + if (sourcep.getTier() == 0) + { + database.selection = Arrays + .asList(new DbSourceProxy[] { sourcep }); + break; + } + } + if (database.selection == null || database.selection.size() == 0) + { + System.err.println("Ignoring fetch parameter db='" + selectedDb + + "'"); + return false; + } + textArea.setText(queryString); + } catch (Exception q) + { + System.err.println("Ignoring fetch parameter db='" + selectedDb + + "' and query='" + queryString + "'"); + return false; + } + return true; + } /** * called by thread spawned by constructor * * @param guiWindow + * @param queryString + * @param selectedDb */ - private void initGui(IProgressIndicator guiWindow) + private void initGui(IProgressIndicator guiWindow, String selectedDb, + String queryString) { this.guiWindow = guiWindow; if (guiWindow instanceof AlignFrame) @@ -268,6 +347,16 @@ public class SequenceFetcher extends JPanel implements Runnable try { jbInit(); + /* + * configure the UI with any query parameters we were called with + */ + if (!setInitialQuery(selectedDb, queryString)) + { + /* + * none provided, so show the database chooser + */ + database.waitForInput(); + } } catch (Exception ex) { ex.printStackTrace(); @@ -425,11 +514,6 @@ public class SequenceFetcher extends JPanel implements Runnable this.add(jPanel3, java.awt.BorderLayout.CENTER); this.add(jPanel2, java.awt.BorderLayout.NORTH); jScrollPane1.getViewport().add(textArea); - - /* - * open the database tree - */ - database.waitForInput(); } private void pdbSourceAction() @@ -942,7 +1026,10 @@ public class SequenceFetcher extends JPanel implements Runnable { af.hideFeatureColumns(SequenceOntologyI.EXON, false); } - + if (newAlframes != null) + { + newAlframes.add(af); + } Desktop.addInternalFrame(af, title, AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); diff --git a/src/jalview/gui/StructureChooser.java b/src/jalview/gui/StructureChooser.java index 13fa460..b2cc70f 100644 --- a/src/jalview/gui/StructureChooser.java +++ b/src/jalview/gui/StructureChooser.java @@ -867,7 +867,7 @@ public class StructureChooser extends GStructureChooser implements ArrayList seqsWithoutSourceDBRef = new ArrayList(); for (SequenceI seq : sequences) { - if (seq.getSourceDBRef() == null && seq.getDBRefs() == null) + if (seq.getPrimaryDBRefs().size() == 0) { seqsWithoutSourceDBRef.add(seq); continue; diff --git a/src/jalview/io/StructureFile.java b/src/jalview/io/StructureFile.java index fc0e207..f095383 100644 --- a/src/jalview/io/StructureFile.java +++ b/src/jalview/io/StructureFile.java @@ -117,7 +117,9 @@ public abstract class StructureFile extends AlignFile DBRefEntry sourceDBRef = new DBRefEntry(); sourceDBRef.setAccessionId(getId()); sourceDBRef.setSource(DBRefSource.PDB); - pdbSequence.setSourceDBRef(sourceDBRef); + // TODO: specify version for 'PDB' database ref if it is read from a file. + // TODO: decide if jalview.io should be creating primary refs! + sourceDBRef.setVersion(""); pdbSequence.addPDBId(entry); pdbSequence.addDBRef(sourceDBRef); SequenceI chainseq = pdbSequence; diff --git a/src/jalview/structure/StructureSelectionManager.java b/src/jalview/structure/StructureSelectionManager.java index be042e6..182a48f 100644 --- a/src/jalview/structure/StructureSelectionManager.java +++ b/src/jalview/structure/StructureSelectionManager.java @@ -502,7 +502,7 @@ public class StructureSelectionManager } ArrayList seqToStrucMapping = new ArrayList(); - if (isMapUsingSIFTs) + if (isMapUsingSIFTs && seq.isProtein()) { setProgressBar(null); setProgressBar(MessageManager @@ -585,6 +585,20 @@ public class StructureSelectionManager return "cif".equalsIgnoreCase(fileExt); } + /** + * retrieve a mapping for seq from SIFTs using associated DBRefEntry for + * uniprot or PDB + * + * @param seq + * @param pdbFile + * @param targetChainId + * @param pdb + * @param maxChain + * @param sqmpping + * @param maxAlignseq + * @return + * @throws SiftsException + */ private StructureMapping getStructureMapping(SequenceI seq, String pdbFile, String targetChainId, StructureFile pdb, PDBChain maxChain, jalview.datamodel.Mapping sqmpping, diff --git a/src/jalview/util/LinkedIdentityHashSet.java b/src/jalview/util/LinkedIdentityHashSet.java new file mode 100644 index 0000000..5cdbeb1 --- /dev/null +++ b/src/jalview/util/LinkedIdentityHashSet.java @@ -0,0 +1,109 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.util; + +import java.util.AbstractSet; +import java.util.Iterator; +import java.util.LinkedHashMap; + +/** + * Order preserving Set based on System.identityHashCode() for an object, which + * also supports Object->index lookup. + * + * @author Jim Procter (2016) based on Evgeniy Dorofeev's response: via + * https://stackoverflow.com/questions/17276658/linkedidentityhashset + * + */ +public class LinkedIdentityHashSet extends AbstractSet +{ + LinkedHashMap set = new LinkedHashMap(); + + static class IdentityWrapper + { + Object obj; + + public int p; + + IdentityWrapper(Object obj, int p) + { + this.obj = obj; + this.p = p; + } + + @Override + public boolean equals(Object obj) + { + return this.obj == obj; + } + + @Override + public int hashCode() + { + return System.identityHashCode(obj); + } + } + + @Override + public boolean add(E e) + { + IdentityWrapper el = (new IdentityWrapper(e, set.size())); + return set.putIfAbsent(el, el) == null; + } + + @Override + public Iterator iterator() + { + return new Iterator() + { + final Iterator se = set.keySet().iterator(); + + @Override + public boolean hasNext() + { + return se.hasNext(); + } + + @SuppressWarnings("unchecked") + @Override + public E next() + { + return (E) se.next().obj; + } + }; + } + + @Override + public int size() + { + return set.size(); + } + + /** + * Lookup the index for e in the set + * + * @param e + * @return position of e in the set when it was added. + */ + public int indexOf(E e) + { + return set.get(e).p; + } +} diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index cae968e..dc5bee8 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -1103,4 +1103,14 @@ public class MapList return forwardStrand; } + /** + * + * @return true if from, or to is a three to 1 mapping + */ + public boolean isTripletMap() + { + return (toRatio == 3 && fromRatio == 1) + || (fromRatio == 3 && toRatio == 1); + } + } diff --git a/src/jalview/ws/DBRefFetcher.java b/src/jalview/ws/DBRefFetcher.java index 3ba0e34..6213568 100644 --- a/src/jalview/ws/DBRefFetcher.java +++ b/src/jalview/ws/DBRefFetcher.java @@ -622,33 +622,43 @@ public class DBRefFetcher implements Runnable final int sequenceStart = sequence.getStart(); if (absStart == -1) { - // Is local sequence contained in dataset sequence? + // couldn't find local sequence in sequence from database, so check if + // the database sequence is a subsequence of local sequence absStart = nonGapped.indexOf(entrySeq); if (absStart == -1) - { // verification failed. + { + // verification failed. couldn't find any relationship between + // entrySeq and local sequence messages.append(sequence.getName() + " SEQUENCE NOT %100 MATCH \n"); continue; } + /* + * found match for the whole of the database sequence within the local + * sequence's reference frame. + */ transferred = true; sbuffer.append(sequence.getName() + " HAS " + absStart + " PREFIXED RESIDUES COMPARED TO " + dbSource + "\n"); - // - // + " - ANY SEQUENCE FEATURES" - // + " HAVE BEEN ADJUSTED ACCORDINGLY \n"); - // absStart = 0; - // create valid mapping between matching region of local sequence and - // the mapped sequence + + /* + * So create a mapping to the external entry from the matching region of + * the local sequence, and leave local start/end untouched. + */ mp = new Mapping(null, new int[] { sequenceStart + absStart, sequenceStart + absStart + entrySeq.length() - 1 }, new int[] { entry.getStart(), entry.getStart() + entrySeq.length() - 1 }, 1, 1); - updateRefFrame = false; // mapping is based on current start/end so - // don't modify start and end + updateRefFrame = false; } else { + /* + * found a match for the local sequence within sequence from + * the external database + */ transferred = true; + // update start and end of local sequence to place it in entry's // reference frame. // apply identity map map from whole of local sequence to matching @@ -660,10 +670,14 @@ public class DBRefFetcher implements Runnable // absStart+sequence.getStart()+entrySeq.length()-1}, // new int[] { entry.getStart(), entry.getEnd() }, 1, 1); // relocate local features for updated start + if (updateRefFrame) { if (sequence.getSequenceFeatures() != null) { + /* + * relocate existing sequence features by offset + */ SequenceFeature[] sf = sequence.getSequenceFeatures(); int start = sequenceStart; int end = sequence.getEnd(); @@ -686,7 +700,7 @@ public class DBRefFetcher implements Runnable System.out.println("Adding dbrefs to " + sequence.getName() + " from " + dbSource + " sequence : " + entry.getName()); sequence.transferAnnotation(entry, mp); - // unknownSequences.remove(sequence); + absStart += entry.getStart(); int absEnd = absStart + nonGapped.length() - 1; if (!trimDatasetSeqs) diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java index 8cc0ce4..81b4caf 100644 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -205,10 +205,10 @@ public class Uniprot extends DbSourceProxyImpl { DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion, accessionId); + + // mark dbRef as a primary reference for this sequence dbRefs.add(dbRef); } - sequence.setSourceDBRef((dbRefs != null && dbRefs.size() > 0) ? dbRefs - .get(0) : null); Vector onlyPdbEntries = new Vector(); for (PDBEntry pdb : entry.getDbReference()) diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index 6c94723..0ab6e7d 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -323,41 +323,28 @@ public class SiftsClient implements SiftsClientI public DBRefEntryI getValidSourceDBRef(SequenceI seq) throws SiftsException { - DBRefEntryI sourceDBRef = null; - sourceDBRef = seq.getSourceDBRef(); - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) + DBRefEntry[] dbRefs = seq.getDBRefs(); + if (dbRefs == null || dbRefs.length < 1) { - return sourceDBRef; + throw new SiftsException( + "Source DBRef could not be determined. DBRefs might not have been retrieved."); } - else + + for (DBRefEntryI dbRef : dbRefs) { - DBRefEntry[] dbRefs = seq.getDBRefs(); - if (dbRefs == null || dbRefs.length < 1) + if (dbRef == null || dbRef.getAccessionId() == null + || dbRef.getSource() == null) { - throw new SiftsException( - "Source DBRef could not be determined. DBRefs might not have been retrieved."); + continue; } - - for (DBRefEntryI dbRef : dbRefs) + if (isValidDBRefEntry(dbRef) + && dbRef.isPrimary() + && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef + .getSource().equalsIgnoreCase(DBRefSource.PDB))) { - if (dbRef == null || dbRef.getAccessionId() == null - || dbRef.getSource() == null) - { - continue; - } - if (isFoundInSiftsEntry(dbRef.getAccessionId()) - && (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) || dbRef - .getSource().equalsIgnoreCase(DBRefSource.PDB))) - { - seq.setSourceDBRef(dbRef); - return dbRef; - } + return dbRef; } } - if (sourceDBRef != null && isValidDBRefEntry(sourceDBRef)) - { - return sourceDBRef; - } throw new SiftsException("Could not get source DB Ref"); } @@ -440,7 +427,7 @@ public class SiftsClient implements SiftsClientI String originalSeq = AlignSeq.extractGaps( jalview.util.Comparison.GapChars, seq.getSequenceAsString()); HashMap mapping = new HashMap(); - DBRefEntryI sourceDBRef = seq.getSourceDBRef(); + DBRefEntryI sourceDBRef; sourceDBRef = getValidSourceDBRef(seq); // TODO ensure sequence start/end is in the same coordinate system and // consistent with the choosen sourceDBRef diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 22bb680..7e8442d 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -994,29 +994,44 @@ public class AlignmentUtilsTests /* * need a sourceDbRef if we are to construct dbrefs to the CDS - * sequence + * sequence from the dna contig sequences */ DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1"); - dna1.getDatasetSequence().setSourceDBRef(dbref); + dna1.getDatasetSequence().addDBRef(dbref); + org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0)); dbref = new DBRefEntry("ENSEMBL", "0", "dna2"); - dna2.getDatasetSequence().setSourceDBRef(dbref); + dna2.getDatasetSequence().addDBRef(dbref); + org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0)); /* * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences) */ - MapList map = new MapList(new int[] { 4, 6, 10, 12 }, + MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 2 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); - acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); + acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), + mapfordna1); dna.addCodonFrame(acf); - map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 }, + MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, + new int[] { 1, 3 }, 3, 1); acf = new AlignedCodonFrame(); - acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); + acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), + mapfordna2); dna.addCodonFrame(acf); /* + * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation + */ + DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1", + new Mapping(mapfordna1)); + dna1.getDatasetSequence().addDBRef(dna1xref); + DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2", + new Mapping(mapfordna2)); + dna2.getDatasetSequence().addDBRef(dna2xref); + + /* * execute method under test: */ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { @@ -1042,11 +1057,12 @@ public class AlignmentUtilsTests * verify CDS has a dbref with mapping to peptide */ assertNotNull(cds1Dss.getDBRefs()); - assertEquals(1, cds1Dss.getDBRefs().length); + assertEquals(2, cds1Dss.getDBRefs().length); dbref = cds1Dss.getDBRefs()[0]; - assertEquals("UNIPROT", dbref.getSource()); - assertEquals("0", dbref.getVersion()); - assertEquals("pep1", dbref.getAccessionId()); + assertEquals(dna1xref.getSource(), dbref.getSource()); + // version is via ensembl's primary ref + assertEquals(dna1xref.getVersion(), dbref.getVersion()); + assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId()); assertNotNull(dbref.getMap()); assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo()); MapList cdsMapping = new MapList(new int[] { 1, 6 }, @@ -1057,6 +1073,7 @@ public class AlignmentUtilsTests * verify peptide has added a dbref with reverse mapping to CDS */ assertNotNull(pep1.getDBRefs()); + // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ? assertEquals(2, pep1.getDBRefs().length); dbref = pep1.getDBRefs()[1]; assertEquals("ENSEMBL", dbref.getSource()); diff --git a/test/jalview/datamodel/AlignmentTest.java b/test/jalview/datamodel/AlignmentTest.java index b75ef50..2863340 100644 --- a/test/jalview/datamodel/AlignmentTest.java +++ b/test/jalview/datamodel/AlignmentTest.java @@ -27,6 +27,7 @@ import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.io.AppletFormatAdapter; import jalview.io.FormatAdapter; import jalview.util.MapList; @@ -37,6 +38,7 @@ import java.util.Arrays; import java.util.Iterator; import java.util.List; +import org.testng.Assert; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -101,6 +103,324 @@ public class AlignmentTest return a; } + /** + * assert wrapper: tests all references in the given alignment are consistent + * + * @param alignment + */ + public static void assertAlignmentDatasetRefs(AlignmentI alignment) + { + verifyAlignmentDatasetRefs(alignment, true, null); + } + + /** + * assert wrapper: tests all references in the given alignment are consistent + * + * @param alignment + * @param message + * - prefixed to any assert failed messages + */ + public static void assertAlignmentDatasetRefs(AlignmentI alignment, + String message) + { + verifyAlignmentDatasetRefs(alignment, true, message); + } + + /** + * verify sequence and dataset references are properly contained within + * dataset + * + * @param alignment + * - the alignmentI object to verify (either alignment or dataset) + * @param raiseAssert + * - when set, testng assertions are raised. + * @param message + * - null or a string message to prepend to the assert failed messages. + * @return true if alignment references were in order, otherwise false. + */ + public static boolean verifyAlignmentDatasetRefs(AlignmentI alignment, + boolean raiseAssert, String message) + { + if (message==null) { message = ""; } + if (alignment == null) + { + if (raiseAssert) + { + Assert.fail(message+"Alignment for verification was null."); + } + return false; + } + if (alignment.getDataset() != null) + { + AlignmentI dataset = alignment.getDataset(); + // check all alignment sequences have their dataset within the dataset + for (SequenceI seq : alignment.getSequences()) + { + SequenceI seqds = seq.getDatasetSequence(); + if (seqds.getDatasetSequence() != null) + { + if (raiseAssert) + { + Assert.fail(message+" Alignment contained a sequence who's dataset sequence has a second dataset reference."); + } + return false; + } + if (dataset.findIndex(seqds) == -1) + { + if (raiseAssert) + { + Assert.fail(message+" Alignment contained a sequence who's dataset sequence was not in the dataset."); + } + return false; + } + } + return verifyAlignmentDatasetRefs(alignment.getDataset(), raiseAssert, message); + } + else + { + int dsp = -1; + // verify all dataset sequences + for (SequenceI seqds : alignment.getSequences()) + { + dsp++; + if (seqds.getDatasetSequence() != null) + { + if (raiseAssert) + { + Assert.fail(message+" Dataset contained a sequence with non-null dataset reference (ie not a dataset sequence!)"); + } + return false; + } + int foundp = alignment.findIndex(seqds); + if (foundp != dsp) + { + if (raiseAssert) + { + Assert.fail(message + + " Dataset sequence array contains a reference at " + + dsp + " to a sequence first seen at " + foundp + " (" + + seqds.toString() + ")"); + } + return false; + } + if (seqds.getDBRefs() != null) + { + for (DBRefEntry dbr : seqds.getDBRefs()) + { + if (dbr.getMap() != null) + { + SequenceI seqdbrmapto = dbr.getMap().getTo(); + if (seqdbrmapto != null) + { + if (seqdbrmapto.getDatasetSequence() != null) + { + if (raiseAssert) + { + Assert.fail(message+" DBRefEntry for sequence in alignment had map to sequence which was not a dataset sequence"); + } + return false; + + } + if (alignment.findIndex(dbr.getMap().getTo()) == -1) + { + if (raiseAssert) + { + Assert.fail(message+" DBRefEntry for sequence in alignment had map to sequence not in dataset"); + } + return false; + } + } + } + } + } + } + // finally, verify codonmappings involve only dataset sequences. + if (alignment.getCodonFrames() != null) + { + for (AlignedCodonFrame alc : alignment.getCodonFrames()) + { + for (SequenceToSequenceMapping ssm : alc.getMappings()) + { + if (ssm.getFromSeq().getDatasetSequence() != null) + { + if (raiseAssert) + { + Assert.fail(message+" CodonFrame-SSM-FromSeq is not a dataset sequence"); + } + return false; + } + if (alignment.findIndex(ssm.getFromSeq()) == -1) + { + + if (raiseAssert) + { + Assert.fail(message+" CodonFrame-SSM-FromSeq is not contained in dataset"); + } + return false; + } + if (ssm.getMapping().getTo().getDatasetSequence() != null) + { + if (raiseAssert) + { + Assert.fail(message+" CodonFrame-SSM-Mapping-ToSeq is not a dataset sequence"); + } + return false; + } + if (alignment.findIndex(ssm.getMapping().getTo()) == -1) + { + + if (raiseAssert) + { + Assert.fail(message+" CodonFrame-SSM-Mapping-ToSeq is not contained in dataset"); + } + return false; + } + } + } + } + } + return true; // all relationships verified! + } + + /** + * call verifyAlignmentDatasetRefs with and without assertion raising enabled, + * to check expected pass/fail actually occurs in both conditions + * + * @param al + * @param expected + * @param msg + */ + private void assertVerifyAlignment(AlignmentI al, boolean expected, + String msg) + { + if (expected) + { + try + { + + Assert.assertTrue(verifyAlignmentDatasetRefs(al, true, null), + "Valid test alignment failed when raiseAsserts enabled:" + + msg); + } catch (AssertionError ae) + { + ae.printStackTrace(); + Assert.fail( + "Valid test alignment raised assertion errors when raiseAsserts enabled: " + + msg, ae); + } + // also check validation passes with asserts disabled + Assert.assertTrue(verifyAlignmentDatasetRefs(al, false, null), + "Valid test alignment tested false when raiseAsserts disabled:" + + msg); + } + else + { + boolean assertRaised = false; + try + { + verifyAlignmentDatasetRefs(al, true, null); + } catch (AssertionError ae) + { + // expected behaviour + assertRaised = true; + } + if (!assertRaised) + { + Assert.fail("Invalid test alignment passed when raiseAsserts enabled:" + + msg); + } + // also check validation passes with asserts disabled + Assert.assertFalse(verifyAlignmentDatasetRefs(al, false, null), + "Invalid test alignment tested true when raiseAsserts disabled:" + + msg); + } + } + @Test(groups = { "Functional" }) + public void testVerifyAlignmentDatasetRefs() + { + SequenceI sq1 = new Sequence("sq1", "ASFDD"), sq2 = new Sequence("sq2", + "TTTTTT"); + + // construct simple valid alignment dataset + Alignment al = new Alignment(new SequenceI[] { + sq1, sq2 }); + // expect this to pass + assertVerifyAlignment(al, true, "Simple valid alignment didn't verify"); + + // check test for sequence->datasetSequence validity + sq1.setDatasetSequence(sq2); + assertVerifyAlignment( + al, + false, + "didn't detect dataset sequence with a dataset sequence reference."); + + sq1.setDatasetSequence(null); + assertVerifyAlignment( + al, + true, + "didn't reinstate validity after nulling dataset sequence dataset reference"); + + // now create dataset and check again + al.createDatasetAlignment(); + assertNotNull(al.getDataset()); + + assertVerifyAlignment(al, true, + "verify failed after createDatasetAlignment"); + + // create a dbref on sq1 with a sequence ref to sq2 + DBRefEntry dbrs1tos2 = new DBRefEntry("UNIPROT", "1", "Q111111"); + dbrs1tos2.setMap(new Mapping(sq2.getDatasetSequence(), + new int[] { 1, 5 }, new int[] { 2, 6 }, 1, 1)); + sq1.getDatasetSequence().addDBRef(dbrs1tos2); + assertVerifyAlignment(al, true, + "verify failed after addition of valid DBRefEntry/map"); + // now create a dbref on a new sequence which maps to another sequence + // outside of the dataset + SequenceI sqout = new Sequence("sqout", "ututututucagcagcag"), sqnew = new Sequence( + "sqnew", "EEERRR"); + DBRefEntry sqnewsqout = new DBRefEntry("ENAFOO", "1", "R000001"); + sqnewsqout.setMap(new Mapping(sqout, new int[] { 1, 6 }, new int[] { 1, + 18 }, 1, 3)); + al.getDataset().addSequence(sqnew); + + assertVerifyAlignment(al, true, + "verify failed after addition of new sequence to dataset"); + // now start checking exception conditions + sqnew.addDBRef(sqnewsqout); + assertVerifyAlignment( + al, + false, + "verify passed when a dbref with map to sequence outside of dataset was added"); + // make the verify pass by adding the outsider back in + al.getDataset().addSequence(sqout); + assertVerifyAlignment(al, true, + "verify should have passed after adding dbref->to sequence in to dataset"); + // and now the same for a codon mapping... + SequenceI sqanotherout = new Sequence("sqanotherout", + "aggtutaggcagcagcag"); + + AlignedCodonFrame alc = new AlignedCodonFrame(); + alc.addMap(sqanotherout, sqnew, new MapList(new int[] { 1, 6 }, + new int[] { 1, 18 }, 3, 1)); + + al.addCodonFrame(alc); + Assert.assertEquals(al.getDataset().getCodonFrames().size(), 1); + + assertVerifyAlignment( + al, + false, + "verify passed when alCodonFrame mapping to sequence outside of dataset was added"); + // make the verify pass by adding the outsider back in + al.getDataset().addSequence(sqanotherout); + assertVerifyAlignment( + al, + true, + "verify should have passed once all sequences involved in alCodonFrame were added to dataset"); + al.getDataset().addSequence(sqanotherout); + assertVerifyAlignment(al, false, + "verify should have failed when a sequence was added twice to the dataset"); + + } /* * Read in Stockholm format test data including secondary structure * annotations. @@ -460,6 +780,60 @@ public class AlignmentTest assertTrue(ds.getCodonFrames().contains(acf)); } + /** + * tests the addition of *all* sequences referred to by a sequence being added + * to the dataset + */ + @Test(groups = "Functional") + public void testCreateDatasetAlignmentWithMappedToSeqs() + { + // Alignment with two sequences, gapped. + SequenceI sq1 = new Sequence("sq1", "A--SDF"); + SequenceI sq2 = new Sequence("sq2", "G--TRQ"); + + // cross-references to two more sequences. + DBRefEntry dbr = new DBRefEntry("SQ1", "", "sq3"); + SequenceI sq3 = new Sequence("sq3", "VWANG"); + dbr.setMap(new Mapping(sq3, new MapList(new int[] { 1, 4 }, new int[] { + 2, 5 }, 1, 1))); + sq1.addDBRef(dbr); + + SequenceI sq4 = new Sequence("sq4", "ERKWI"); + DBRefEntry dbr2 = new DBRefEntry("SQ2", "", "sq4"); + dbr2.setMap(new Mapping(sq4, new MapList(new int[] { 1, 4 }, new int[] { + 2, 5 }, 1, 1))); + sq2.addDBRef(dbr2); + // and a 1:1 codonframe mapping between them. + AlignedCodonFrame alc = new AlignedCodonFrame(); + alc.addMap(sq1, sq2, new MapList(new int[] { 1, 4 }, + new int[] { 1, 4 }, 1, 1)); + + AlignmentI protein = new Alignment(new SequenceI[] { sq1, sq2 }); + + /* + * create the alignment dataset + * note this creates sequence datasets where missing + * as a side-effect (in this case, on seq2 + */ + + // TODO promote this method to AlignmentI + ((Alignment) protein).createDatasetAlignment(); + + AlignmentI ds = protein.getDataset(); + + // should be 4 sequences in dataset - two materialised, and two propagated + // from dbref + assertEquals(4, ds.getHeight()); + assertTrue(ds.getSequences().contains(sq1.getDatasetSequence())); + assertTrue(ds.getSequences().contains(sq2.getDatasetSequence())); + assertTrue(ds.getSequences().contains(sq3)); + assertTrue(ds.getSequences().contains(sq4)); + // Should have one codon frame mapping between sq1 and sq2 via dataset + // sequences + assertEquals(ds.getCodonFrame(sq1.getDatasetSequence()), + ds.getCodonFrame(sq2.getDatasetSequence())); + } + @Test(groups = "Functional") public void testAddCodonFrame() { @@ -483,6 +857,27 @@ public class AlignmentTest } @Test(groups = "Functional") + public void testAddSequencePreserveDatasetIntegrity() + { + Sequence seq = new Sequence("testSeq", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + Alignment align = new Alignment(new SequenceI[] { seq }); + align.createDatasetAlignment(); + AlignmentI ds = align.getDataset(); + SequenceI copy = new Sequence(seq); + copy.insertCharAt(3, 5, '-'); + align.addSequence(copy); + Assert.assertEquals(align.getDataset().getHeight(), 1, + "Dataset shouldn't have more than one sequence."); + + Sequence seq2 = new Sequence("newtestSeq", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + align.addSequence(seq2); + Assert.assertEquals(align.getDataset().getHeight(), 2, + "Dataset should now have two sequences."); + + assertAlignmentDatasetRefs(align, + "addSequence broke dataset reference integrity"); + } + @Test(groups = "Functional") public void getVisibleStartAndEndIndexTest() { Sequence seq = new Sequence("testSeq", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); diff --git a/test/jalview/datamodel/DBRefEntryTest.java b/test/jalview/datamodel/DBRefEntryTest.java index ae6dcda..09d9df1 100644 --- a/test/jalview/datamodel/DBRefEntryTest.java +++ b/test/jalview/datamodel/DBRefEntryTest.java @@ -138,4 +138,62 @@ public class DBRefEntryTest assertFalse(ref1.updateFrom(ref2)); assertEquals("10", ref1.getVersion()); } + + @Test(groups = { "Functional" }) + public void testIsPrimary() + { + DBRefEntry dbr = new DBRefEntry(DBRefSource.UNIPROT, "", "Q12345"); + assertTrue(dbr.isPrimary()); + /* + * 1:1 mapping + */ + dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 1, + 1)); + assertTrue(dbr.isPrimary()); + /* + * Version string is prefixed with another dbref source string (fail) + */ + dbr.setVersion(DBRefSource.EMBL + ":0"); + assertFalse(dbr.isPrimary()); + + /* + * Version string is alphanumeric + */ + dbr.setVersion("0.1.b"); + assertTrue(dbr.isPrimary()); + + /* + * 1:1 mapping with shift (fail) + */ + dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 2, 4 }, 1, + 1)); + assertFalse(dbr.isPrimary()); + + /* + * 1:1 mapping and sequenceRef (fail) + */ + dbr.setMap(new Mapping(new Sequence("foo", "ASDF"), new int[] { 1, 3 }, + new int[] { 1, 3 }, 1, 1)); + assertFalse(dbr.isPrimary()); + + /* + * 1:3 mapping (fail) + */ + dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 1, + 3)); + assertFalse(dbr.isPrimary()); + /* + * 2:2 mapping with shift (expected fail, but maybe use case for a pass) + */ + dbr.setMap(new Mapping(null, new int[] { 1, 3 }, new int[] { 1, 3 }, 2, + 2)); + assertFalse(dbr.isPrimary()); + + /* + * Version string is prefixed with another dbref source string + */ + dbr.setVersion(DBRefSource.EMBL + ":0"); + assertFalse(dbr.isPrimary()); + + } } diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index cfc4cbb..3ad309e 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -438,36 +438,56 @@ public class SequenceTest sq.setDescription("Test sequence description.."); sq.setVamsasId("TestVamsasId"); - sq.setSourceDBRef(new DBRefEntry("PDB", "version0", "1TST")); + sq.addDBRef(new DBRefEntry("PDB", "version0", "1TST")); - sq.addDBRef(new DBRefEntry("PDB", "version1", "1Tst")); - sq.addDBRef(new DBRefEntry("PDB", "version2", "2Tst")); - sq.addDBRef(new DBRefEntry("PDB", "version3", "3Tst")); - sq.addDBRef(new DBRefEntry("PDB", "version4", "4Tst")); + sq.addDBRef(new DBRefEntry("PDB", "version1", "1PDB")); + sq.addDBRef(new DBRefEntry("PDB", "version2", "2PDB")); + sq.addDBRef(new DBRefEntry("PDB", "version3", "3PDB")); + sq.addDBRef(new DBRefEntry("PDB", "version4", "4PDB")); sq.addPDBId(new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1")); sq.addPDBId(new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1")); sq.addPDBId(new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2")); sq.addPDBId(new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2")); + + DBRefEntry pdb1pdb = new DBRefEntry("PDB", "version1", "1PDB"); + DBRefEntry pdb2pdb = new DBRefEntry("PDB", "version1", "2PDB"); + //FIXME pdb2pdb's matching PDBEntry has Type.MMCIF - but 2.10 only has PDBEntry with type==PDB to indicate ID is a real PDB entry + + List primRefs = Arrays.asList(new DBRefEntry[] { pdb1pdb }); + + sq.getDatasetSequence().addDBRef(pdb1pdb); + sq.getDatasetSequence().addDBRef(pdb2pdb); sq.getDatasetSequence().addDBRef( - new DBRefEntry("PDB", "version1", "1Tst")); - sq.getDatasetSequence().addDBRef( - new DBRefEntry("PDB", "version2", "2Tst")); - sq.getDatasetSequence().addDBRef( - new DBRefEntry("PDB", "version3", "3Tst")); + new DBRefEntry("PDB", "version3", "3PDB")); sq.getDatasetSequence().addDBRef( - new DBRefEntry("PDB", "version4", "4Tst")); - - sq.getDatasetSequence().addPDBId( - new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1")); - sq.getDatasetSequence().addPDBId( - new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1")); + new DBRefEntry("PDB", "version4", "4PDB")); + + PDBEntry pdbe1a=new PDBEntry("1PDB", "A", Type.PDB, "filePath/test1"); + PDBEntry pdbe1b = new PDBEntry("1PDB", "B", Type.PDB, "filePath/test1"); + PDBEntry pdbe2a=new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2"); + PDBEntry pdbe2b = new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2"); sq.getDatasetSequence().addPDBId( - new PDBEntry("2PDB", "A", Type.MMCIF, "filePath/test2")); + pdbe1a); sq.getDatasetSequence().addPDBId( - new PDBEntry("2PDB", "B", Type.MMCIF, "filePath/test2")); + pdbe1b); + sq.getDatasetSequence().addPDBId(pdbe2a); + sq.getDatasetSequence().addPDBId(pdbe2b); + + /* + * test we added pdb entries to the dataset sequence + */ + Assert.assertEquals(sq.getDatasetSequence().getAllPDBEntries(), Arrays + .asList(new PDBEntry[] { pdbe1a, pdbe1b, pdbe2a, pdbe2b }), + "PDB Entries were not found on dataset sequence."); + /* + * we should recover a pdb entry that is on the dataset sequence via PDBEntry + */ + Assert.assertEquals(pdbe1a, + sq.getDatasetSequence().getPDBEntry("1PDB"), + "PDB Entry '1PDB' not found on dataset sequence via getPDBEntry."); ArrayList annotsList = new ArrayList(); System.out.println(">>>>>> " + sq.getSequenceAsString().length()); annotsList.add(new Annotation("A", "A", 'X', 0.1f)); @@ -479,7 +499,7 @@ public class SequenceTest new AlignmentAnnotation("Test annot", "Test annot description", annots)); Assert.assertEquals(sq.getDescription(), "Test sequence description.."); - Assert.assertEquals(sq.getDBRefs().length, 4); + Assert.assertEquals(sq.getDBRefs().length, 5); Assert.assertEquals(sq.getAllPDBEntries().size(), 4); Assert.assertNotNull(sq.getAnnotation()); Assert.assertEquals(sq.getAnnotation()[0].annotations.length, 2); @@ -492,7 +512,7 @@ public class SequenceTest Assert.assertEquals(derived.getDescription(), "Test sequence description.."); - Assert.assertEquals(derived.getDBRefs().length, 4); + Assert.assertEquals(derived.getDBRefs().length, 4); // come from dataset Assert.assertEquals(derived.getAllPDBEntries().size(), 4); Assert.assertNotNull(derived.getAnnotation()); Assert.assertEquals(derived.getAnnotation()[0].annotations.length, 2); @@ -510,6 +530,17 @@ public class SequenceTest assertNotNull(sq.getSequenceFeatures()); assertArrayEquals(sq.getSequenceFeatures(), derived.getSequenceFeatures()); + + /* + * verify we have primary db refs *just* for PDB IDs with associated + * PDBEntry objects + */ + + assertEquals(primRefs, sq.getPrimaryDBRefs()); + assertEquals(primRefs, sq.getDatasetSequence().getPrimaryDBRefs()); + + assertEquals(sq.getPrimaryDBRefs(), derived.getPrimaryDBRefs()); + } /** @@ -734,4 +765,30 @@ public class SequenceTest assertSame(dbref3, sq.getDBRefs()[2]); assertEquals("3", dbref2.getVersion()); } + + @Test(groups = { "Functional" }) + public void testGetPrimaryDBRefs() + { + /* + * test PDB relationships for for getPrimaryDBRefs + */ + SequenceI seq = new Sequence("aseq", "ASDF"); + DBRefEntry upentry = new DBRefEntry("UNIPROT", "0", "1qip"); + // primary - uniprot + seq.addDBRef(upentry); + // primary - type is PDB + DBRefEntry pdbentry = new DBRefEntry("PDB", "0", "1qip"); + seq.addDBRef(pdbentry); + // not primary - type of PDBEntry is not PDB + seq.addDBRef(new DBRefEntry("PDB", "0", "1AAA")); + // not primary - no PDBEntry + seq.addDBRef(new DBRefEntry("PDB", "0", "1DDD")); + seq.addPDBId(new PDBEntry("1QIP", null, Type.PDB, null)); + seq.addPDBId(new PDBEntry("1AAA", null, null, null)); + assertTrue("Couldn't find simple primary reference (UNIPROT)", seq + .getPrimaryDBRefs().contains(upentry)); + assertTrue("Couldn't find expected PDB primary reference", seq + .getPrimaryDBRefs().contains(pdbentry)); + assertEquals(2, seq.getPrimaryDBRefs().size()); + } } diff --git a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java index 4b71417..abe5099 100644 --- a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java @@ -128,6 +128,7 @@ public class EmblEntryTest assertEquals(5, dbrefs.length); assertEquals(DBRefSource.EMBL, dbrefs[0].getSource()); assertEquals("CAA30420.1", dbrefs[0].getAccessionId()); + // TODO: verify getPrimaryDBRefs() for peptide products assertEquals(cds1Map.getInverse(), dbrefs[0].getMap().getMap()); assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource()); assertEquals("CAA30420.1", dbrefs[1].getAccessionId()); diff --git a/test/jalview/ext/ensembl/EnsemblXrefTest.java b/test/jalview/ext/ensembl/EnsemblXrefTest.java index 1dc9b8d..4dc8ab2 100644 --- a/test/jalview/ext/ensembl/EnsemblXrefTest.java +++ b/test/jalview/ext/ensembl/EnsemblXrefTest.java @@ -1,6 +1,7 @@ package jalview.ext.ensembl; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; import jalview.datamodel.DBRefEntry; @@ -24,8 +25,11 @@ public class EnsemblXrefTest @Test(groups = "Functional") public void testGetCrossReferences() { + String dbName = "ENSEMBL"; + String dbVers = "0.6.2b1"; System.out.println(JSON); - EnsemblXref testee = new EnsemblXref("http://rest.ensembl.org") + EnsemblXref testee = new EnsemblXref("http://rest.ensembl.org", dbName, + dbVers) { @Override protected BufferedReader getHttpResponse(URL url, List ids) @@ -40,8 +44,12 @@ public class EnsemblXrefTest assertEquals(2, dbrefs.size()); assertEquals("CCDS", dbrefs.get(0).getSource()); assertEquals("CCDS5863", dbrefs.get(0).getAccessionId()); + assertFalse(dbrefs.get(0).isPrimary()); + assertEquals(dbName + ":" + dbVers, dbrefs.get(0).getVersion()); // Uniprot name should get converted to Jalview canonical form assertEquals("UNIPROT", dbrefs.get(1).getSource()); assertEquals("P15056", dbrefs.get(1).getAccessionId()); + assertEquals(dbName + ":" + dbVers, dbrefs.get(1).getVersion()); + assertFalse(dbrefs.get(1).isPrimary()); } } diff --git a/test/jalview/io/CrossRef2xmlTests.java b/test/jalview/io/CrossRef2xmlTests.java new file mode 100644 index 0000000..e7a6950 --- /dev/null +++ b/test/jalview/io/CrossRef2xmlTests.java @@ -0,0 +1,538 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.io; + +import jalview.analysis.CrossRef; +import jalview.api.AlignmentViewPanel; +import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.AlignmentTest; +import jalview.datamodel.SequenceI; +import jalview.gui.AlignFrame; +import jalview.gui.CrossRefAction; +import jalview.gui.Desktop; +import jalview.gui.Jalview2XML; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.testng.Assert; +import org.testng.annotations.Test; + +@Test(singleThreaded = true) +public class CrossRef2xmlTests extends Jalview2xmlBase +{ + + /** + * test store and recovery of expanded views + * + * @throws Exception + */ + @Test(groups = { "Operational" }, enabled = true) + public void testRetrieveAndShowCrossref() throws Exception + { + + List failedDBRetr = new ArrayList(); + List failedXrefMenuItems = new ArrayList(); + List failedProjectRecoveries = new ArrayList(); + + // for every set of db queries + // retrieve db query + // verify presence of expected xrefs + // show xrefs - verify expected type of frame is shown for each xref + // show xrefs again + // - verify original -> xref -> xref(original) recovers frame containing at + // least the first retrieved sequence + // store + // 1. whole project + // 2. individual frames + // 3. load each one back and verify + // . aligned sequences (.toString() ) + // . xrefs (.toString() ) + // . codonframes + // + // + HashMap dbtoviewBit = new HashMap(); + List keyseq = new ArrayList(); + HashMap savedProjects = new HashMap(); + + for (String[] did : new String[][] { { "ENSEMBL", "ENSG00000157764" }, + { "UNIPROT", "P01731" } }) + { + // pass counters - 0 - first pass, 1 means retrieve project rather than + // perform action + int pass1 = 0, pass2 = 0, pass3 = 0; + // each do loop performs two iterations in the first outer loop pass, but + // only performs one iteration on the second outer loop + // ie. pass 1 = 0 {pass 2= 0 { pass 3 = 0,1 }, pass 2=1 { pass 3 = 0 }}, 1 + // { pass 2 = 0 { pass 3 = 0 } } + do + { + String first = did[0] + " " + did[1]; + AlignFrame af = null; + boolean dna; + AlignmentI retral; + AlignmentI dataset; + SequenceI[] seqs; + List ptypes = null; + if (pass1 == 0) + { + // retrieve dbref + + List afs = jalview.gui.SequenceFetcher.fetchAndShow( + did[0], did[1]); + if (afs.size() == 0) + { + failedDBRetr.add("Didn't retrieve " + first); + break; + } + keyseq.add(first); + af = afs.get(0); + + // verify references for retrieved data + AlignmentTest.assertAlignmentDatasetRefs(af.getViewport() + .getAlignment(), "Pass (" + pass1 + "," + pass2 + "," + + pass3 + "): Fetch " + first + ":"); + dna = af.getViewport().getAlignment().isNucleotide(); + retral = af.getViewport().getAlignment(); + dataset = retral.getDataset(); + seqs = retral.getSequencesArray(); + + } + else + { + Desktop.instance.closeAll_actionPerformed(null); + // recover stored project + af = new FileLoader(false).LoadFileWaitTillLoaded(savedProjects + .get(first).toString(), FormatAdapter.FILE); + System.out.println("Recovered view for '" + first + "' from '" + + savedProjects.get(first).toString() + "'"); + dna = af.getViewport().getAlignment().isNucleotide(); + retral = af.getViewport().getAlignment(); + dataset = retral.getDataset(); + seqs = retral.getSequencesArray(); + + // verify references for recovered data + AlignmentTest.assertAlignmentDatasetRefs(af.getViewport() + .getAlignment(), "Pass (" + pass1 + "," + pass2 + "," + + pass3 + "): Recover " + first + ":"); + + } + + // store project on first pass, compare next pass + stringify(dbtoviewBit, savedProjects, first, af.alignPanel); + + ptypes = (seqs == null || seqs.length == 0) ? null : new CrossRef( + seqs, dataset).findXrefSourcesForSequences(dna); + + // start of pass2: retrieve each cross-ref for fetched or restored + // project. + do // first cross ref and recover crossref loop + { + + for (String db : ptypes) + { + // counter for splitframe views retrieved via crossref + int firstcr_ap = 0; + // build next key so we an retrieve all views + String nextxref = first + " -> " + db + "{" + firstcr_ap + "}"; + // perform crossref action, or retrieve stored project + List cra_views = new ArrayList(); + CrossRefAction cra = null; + + if (pass2 == 0) + { // retrieve and show cross-refs in this thread + cra = new CrossRefAction(af, seqs, dna, db); + cra.run(); + if (cra.getXrefViews().size() == 0) + { + failedXrefMenuItems.add("No crossrefs retrieved for " + + first + " -> " + db); + continue; + } + cra_views = cra.getXrefViews(); + assertNucleotide(cra_views.get(0), + "Nucleotide panel included proteins for " + first + + " -> " + db); + assertProtein(cra_views.get(1), + "Protein panel included nucleotides for " + first + + " -> " + db); + } + else + { + Desktop.instance.closeAll_actionPerformed(null); + pass3 = 0; + // recover stored project + File storedProject = savedProjects.get(nextxref); + if (storedProject == null) + { + failedProjectRecoveries.add("Failed to store a view for '" + + nextxref + "'"); + continue; + } + + // recover stored project + AlignFrame af2 = new FileLoader(false) + .LoadFileWaitTillLoaded(savedProjects.get(nextxref) + .toString(), FormatAdapter.FILE); + System.out.println("Recovered view for '" + nextxref + + "' from '" + savedProjects.get(nextxref).toString() + + "'"); + // gymnastics to recover the alignPanel/Complementary alignPanel + if (af2.getViewport().isNucleotide()) + { + // top view, then bottom + cra_views.add(af2.getViewport().getAlignPanel()); + cra_views.add(((jalview.gui.AlignViewport) af2 + .getViewport().getCodingComplement()) + .getAlignPanel()); + + } + else + { + // bottom view, then top + cra_views.add(((jalview.gui.AlignViewport) af2 + .getViewport().getCodingComplement()) + .getAlignPanel()); + cra_views.add(af2.getViewport().getAlignPanel()); + + } + } + HashMap> xrptypes = new HashMap>(); + // first save/verify views. + for (AlignmentViewPanel avp : cra_views) + { + nextxref = first + " -> " + db + "{" + firstcr_ap++ + "}"; + // verify references for this panel + AlignmentTest.assertAlignmentDatasetRefs(avp.getAlignment(), + "Pass (" + pass1 + "," + pass2 + "," + pass3 + + "): before start of pass3: " + nextxref + + ":"); + + SequenceI[] xrseqs = avp.getAlignment().getSequencesArray(); + + List _xrptypes = (seqs == null || seqs.length == 0) ? null + : new CrossRef(xrseqs, dataset) + .findXrefSourcesForSequences(avp + .getAlignViewport().isNucleotide()); + + stringify(dbtoviewBit, savedProjects, nextxref, avp); + xrptypes.put(nextxref, _xrptypes); + + } + + // now do the second xref pass starting from either saved or just + // recovered split pane, in sequence + do // retrieve second set of cross refs or recover and verify + { + firstcr_ap = 0; + for (AlignmentViewPanel avp : cra_views) + { + nextxref = first + " -> " + db + "{" + firstcr_ap++ + "}"; + for (String xrefdb : xrptypes.get(nextxref)) + { + List cra_views2 = new ArrayList(); + int q = 0; + String nextnextxref = nextxref + + " -> " + xrefdb + "{" + q + "}"; + + if (pass3 == 0) + { + + SequenceI[] xrseqs = avp.getAlignment() + .getSequencesArray(); + AlignFrame nextaf = Desktop.getAlignFrameFor(avp + .getAlignViewport()); + + cra = new CrossRefAction(nextaf, xrseqs, avp + .getAlignViewport().isNucleotide(), xrefdb); + cra.run(); + if (cra.getXrefViews().size() == 0) + { + failedXrefMenuItems + .add("No crossrefs retrieved for '" + + nextxref + "' to " + xrefdb + " via '" + + nextaf.getTitle() + "'"); + continue; + } + cra_views2 = cra.getXrefViews(); + assertNucleotide(cra_views2.get(0), + "Nucleotide panel included proteins for '" + + nextxref + "' to " + xrefdb + + " via '" + nextaf.getTitle() + "'"); + assertProtein(cra_views2.get(1), + "Protein panel included nucleotides for '" + + nextxref + "' to " + xrefdb + + " via '" + nextaf.getTitle() + "'"); + + } + else + { + Desktop.instance.closeAll_actionPerformed(null); + // recover stored project + File storedProject = savedProjects.get(nextnextxref); + if (storedProject == null) + { + failedProjectRecoveries + .add("Failed to store a view for '" + + nextnextxref + "'"); + continue; + } + AlignFrame af2 = new FileLoader(false) + .LoadFileWaitTillLoaded( + savedProjects.get(nextnextxref) + .toString(), FormatAdapter.FILE); + System.out.println("Recovered view for '" + + nextnextxref + "' from '" + + savedProjects.get(nextnextxref).toString() + + "'"); + // gymnastics to recover the alignPanel/Complementary + // alignPanel + if (af2.getViewport().isNucleotide()) + { + // top view, then bottom + cra_views2.add(af2.getViewport().getAlignPanel()); + cra_views2.add(((jalview.gui.AlignViewport) af2 + .getViewport().getCodingComplement()) + .getAlignPanel()); + + } + else + { + // bottom view, then top + cra_views2.add(((jalview.gui.AlignViewport) af2 + .getViewport().getCodingComplement()) + .getAlignPanel()); + cra_views2.add(af2.getViewport().getAlignPanel()); + } + Assert.assertEquals(cra_views2.size(), 2); + Assert.assertNotNull(cra_views2.get(0)); + Assert.assertNotNull(cra_views2.get(1)); + } + + for (AlignmentViewPanel nextavp : cra_views2) + { + nextnextxref = nextxref + + " -> " + xrefdb + "{" + q++ + "}"; + + // verify references for this panel + AlignmentTest.assertAlignmentDatasetRefs( + nextavp.getAlignment(), "" + "Pass (" + pass1 + + "," + pass2 + "): For " + + nextnextxref + ":"); + + stringify(dbtoviewBit, savedProjects, nextnextxref, + nextavp); + keyseq.add(nextnextxref); + } + } // end of loop around showing all xrefdb for crossrf2 + + } // end of loop around all viewpanels from crossrf1 + } while (pass2 == 2 && pass3++ < 2); + // fetchdb->crossref1->crossref-2->verify for xrefs we + // either loop twice when pass2=0, or just once when pass2=1 + // (recovered project from previous crossref) + + } // end of loop over db-xrefs for crossref-2 + + // fetchdb-->crossref1 + // for each xref we try to retrieve xref, store and verify when + // pass1=0, or just retrieve and verify when pass1=1 + } while (pass1 == 1 && pass2++ < 2); + // fetchdb + // for each ref we + // loop twice: first, do the retrieve, second recover from saved project + + // increment pass counters, so we repeat traversal starting from the + // oldest saved project first. + if (pass1 == 0) + { + // verify stored projects for first set of cross references + pass1 = 1; + // and verify cross-references retrieved from stored projects + pass2 = 0; + pass3 = 0; + } + else + { + pass1++; + if (pass1 < 2) + { + // verify stored projects for second set of cross references + pass2 = 1; + // and verify cross-references retrievable from those stored projects. + pass3 = 0; + } + } + } while (pass3 < 2); + } + if (failedXrefMenuItems.size() > 0) + { + for (String s : failedXrefMenuItems) + { + System.err.println(s); + } + Assert.fail("Faulty xref menu (" + failedXrefMenuItems.size() + + " counts)"); + } + if (failedProjectRecoveries.size() > 0) + { + + for (String s : failedProjectRecoveries) + { + System.err.println(s); + } + Assert.fail("Didn't recover projects for some retrievals (did they retrieve ?) (" + + failedProjectRecoveries.size() + " counts)"); + } + if (failedDBRetr.size() > 0) + { + for (String s : failedProjectRecoveries) + { + System.err.println(s); + } + Assert.fail("Didn't retrieve some db refs for checking cross-refs (" + + failedDBRetr.size() + " counts)"); + } + } + + private void assertProtein(AlignmentViewPanel alignmentViewPanel, + String message) + { + assertType(true, alignmentViewPanel, message); + } + + private void assertNucleotide(AlignmentViewPanel alignmentViewPanel, + String message) + { + assertType(false, alignmentViewPanel, message); + } + + private void assertType(boolean expectProtein, + AlignmentViewPanel alignmentViewPanel, String message) + { + List nonType = new ArrayList(); + for (SequenceI sq : alignmentViewPanel.getAlignViewport() + .getAlignment() + .getSequences()) + { + if (sq.isProtein() != expectProtein) + { + nonType.add(sq); + } + } + if (nonType.size() > 0) + { + Assert.fail(message + " [ " + + (expectProtein ? "nucleotides were " : "proteins were ") + + nonType.toString() + + " ]"); + } + } + + /** + * first time called, record strings derived from alignment and + * alignedcodonframes, and save view to a project file. Second time called, + * compare strings to existing ones. org.testng.Assert.assertTrue on + * stringmatch + * + * @param dbtoviewBit + * map between xrefpath and view string + * @param savedProjects + * - map from xrefpath to saved project filename (createTempFile) + * @param xrefpath + * - xrefpath - unique ID for this context (composed of sequence of + * db-fetch/cross-ref actions preceeding state) + * @param avp + * - viewpanel to store (for viewpanels in splitframe, the same + * project should be written for both panels, only one needs + * recovering for comparison on the next stringify call, but each + * viewpanel needs to be called with a distinct xrefpath to ensure + * each one's strings are compared) + */ + private void stringify(HashMap dbtoviewBit, + HashMap savedProjects, String xrefpath, + AlignmentViewPanel avp) + { + if (savedProjects != null) + { + if (savedProjects.get(xrefpath) == null) + { + // write a project file for this view. On the second pass, this will be + // recovered and cross-references verified + try + { + File prfile = File.createTempFile("crossRefTest", ".jvp"); + AlignFrame af = Desktop.getAlignFrameFor(avp.getAlignViewport()); + new Jalview2XML(false).saveAlignment(af, prfile.toString(), + af.getTitle()); + System.out.println("Written view from '" + xrefpath + "' as '" + + prfile.getAbsolutePath() + "'"); + savedProjects.put(xrefpath, prfile); + } catch (IOException q) + { + Assert.fail("Unexpected IO Exception", q); + } + } + else + { + System.out.println("Stringify check on view from '" + xrefpath + + "' [ possibly retrieved from '" + + savedProjects.get(xrefpath).getAbsolutePath() + "' ]"); + + } + } + + StringBuilder sbr = new StringBuilder(); + sbr.append(avp.getAlignment().toString()); + sbr.append("\n"); + sbr.append(""); + sbr.append("\n"); + sbr.append(avp.getAlignment().getDataset()); + sbr.append("\n"); + sbr.append(""); + sbr.append("\n"); + int p = 0; + if (avp.getAlignment().getCodonFrames() != null) + { + for (AlignedCodonFrame ac : avp.getAlignment().getCodonFrames()) + { + sbr.append(""); + sbr.append("\n"); + sbr.append(ac.toString()); + sbr.append("\n"); + } + } + String dbt = dbtoviewBit.get(xrefpath); + if (dbt == null) + { + dbtoviewBit.put(xrefpath, sbr.toString()); + } + else + { + Assert.assertEquals(sbr.toString(), dbt, "stringify mismatch for " + + xrefpath); + } + } +} diff --git a/test/jalview/io/Jalview2xmlBase.java b/test/jalview/io/Jalview2xmlBase.java new file mode 100644 index 0000000..379fd68 --- /dev/null +++ b/test/jalview/io/Jalview2xmlBase.java @@ -0,0 +1,76 @@ +package jalview.io; + +import jalview.bin.Cache; +import jalview.bin.Jalview; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.SequenceI; +import jalview.gui.Desktop; + +import java.util.Date; + +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeTest; + +public class Jalview2xmlBase +{ + + /** + * @throws java.lang.Exception + */ + @BeforeClass(alwaysRun = true) + public static void setUpBeforeClass() throws Exception + { + /* + * use read-only test properties file + */ + Cache.loadProperties("test/jalview/io/testProps.jvprops"); + + /* + * set news feed last read to a future time to ensure no + * 'unread' news item is displayed + */ + Date oneHourFromNow = new Date(System.currentTimeMillis() + 3600 * 1000); + Cache.setDateProperty("JALVIEW_NEWS_RSS_LASTMODIFIED", oneHourFromNow); + + Jalview.main(new String[] {}); + } + + /** + * @throws java.lang.Exception + */ + @AfterClass(alwaysRun = true) + public static void tearDownAfterClass() throws Exception + { + jalview.gui.Desktop.instance.closeAll_actionPerformed(null); + } + + @BeforeTest(alwaysRun = true) + public static void clearDesktop() + { + if (Desktop.instance != null && Desktop.getAlignFrames() != null) + { + Desktop.instance.closeAll_actionPerformed(null); + } + } + + public int countDsAnn(jalview.viewmodel.AlignmentViewport avp) + { + int numdsann = 0; + for (SequenceI sq : avp.getAlignment().getDataset().getSequences()) + { + if (sq.getAnnotation() != null) + { + for (AlignmentAnnotation dssa : sq.getAnnotation()) + { + if (dssa.isValidStruc()) + { + numdsann++; + } + } + } + } + return numdsann; + } + +} diff --git a/test/jalview/io/Jalview2xmlTests.java b/test/jalview/io/Jalview2xmlTests.java index 784f3dd..f7853ff 100644 --- a/test/jalview/io/Jalview2xmlTests.java +++ b/test/jalview/io/Jalview2xmlTests.java @@ -29,8 +29,6 @@ import static org.testng.AssertJUnit.assertTrue; import jalview.api.AlignViewportI; import jalview.api.AlignmentViewPanel; import jalview.api.ViewStyleI; -import jalview.bin.Cache; -import jalview.bin.Jalview; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.HiddenSequences; @@ -49,70 +47,18 @@ import jalview.viewmodel.AlignmentViewport; import java.io.File; import java.util.ArrayList; -import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import org.testng.Assert; import org.testng.AssertJUnit; -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @Test(singleThreaded = true) -public class Jalview2xmlTests +public class Jalview2xmlTests extends Jalview2xmlBase { - /** - * @throws java.lang.Exception - */ - @BeforeClass(alwaysRun = true) - public static void setUpBeforeClass() throws Exception - { - /* - * use read-only test properties file - */ - Cache.loadProperties("test/jalview/io/testProps.jvprops"); - - /* - * set news feed last read to a future time to ensure no - * 'unread' news item is displayed - */ - Date oneHourFromNow = new Date(System.currentTimeMillis() + 3600 * 1000); - Cache.setDateProperty("JALVIEW_NEWS_RSS_LASTMODIFIED", oneHourFromNow); - - Jalview.main(new String[] {}); - } - - /** - * @throws java.lang.Exception - */ - @AfterClass(alwaysRun = true) - public static void tearDownAfterClass() throws Exception - { - Desktop.instance.closeAll_actionPerformed(null); - } - - int countDsAnn(jalview.viewmodel.AlignmentViewport avp) - { - int numdsann = 0; - for (SequenceI sq : avp.getAlignment().getDataset().getSequences()) - { - if (sq.getAnnotation() != null) - { - for (AlignmentAnnotation dssa : sq.getAnnotation()) - { - if (dssa.isValidStruc()) - { - numdsann++; - } - } - } - } - return numdsann; - } - @Test(groups = { "Functional" }) public void testRNAStructureRecovery() throws Exception { diff --git a/test/jalview/io/testProps_nodas.jvprops b/test/jalview/io/testProps_nodas.jvprops new file mode 100644 index 0000000..da95549 --- /dev/null +++ b/test/jalview/io/testProps_nodas.jvprops @@ -0,0 +1,83 @@ +#---JalviewX Properties File--- +#Fri Apr 25 09:54:25 BST 2014 +SCREEN_Y=768 +SCREEN_X=936 +SHOW_WSDISCOVERY_ERRORS=true +LATEST_VERSION=2.8.0b1 +SHOW_CONSERVATION=true +JALVIEW_RSS_WINDOW_SCREEN_WIDTH=550 +JAVA_CONSOLE_SCREEN_WIDTH=450 +LAST_DIRECTORY=/Volumes/Data/Users/jimp/Documents/testing/Jalview/examples +ID_ITALICS=true +SORT_ALIGNMENT=No sort +SHOW_IDENTITY=true +WSMENU_BYHOST=false +SEQUENCE_LINKS=EMBL-EBI Search|http\://www.ebi.ac.uk/ebisearch/search.ebi?db\=allebi&query\=$SEQUENCE_ID$ +SHOW_FULLSCREEN=false +RECENT_URL=http\://www.jalview.org/examples/exampleFile_2_7.jar +FONT_NAME=SansSerif +BLC_JVSUFFIX=true +VERSION_CHECK=false +YEAR=2011 +SHOW_DBREFS_TOOLTIP=true +MSF_JVSUFFIX=true +SCREENGEOMETRY_HEIGHT=1600 +JAVA_CONSOLE_SCREEN_Y=475 +JAVA_CONSOLE_SCREEN_X=830 +PFAM_JVSUFFIX=true +PIR_JVSUFFIX=true +STARTUP_FILE=http\://www.jalview.org/examples/exampleFile_2_3.jar +JAVA_CONSOLE_SCREEN_HEIGHT=162 +PIR_MODELLER=false +GAP_SYMBOL=- +SHOW_QUALITY=true +SHOW_GROUP_CONSERVATION=false +SHOW_JWS2_SERVICES=true +SHOW_NPFEATS_TOOLTIP=true +FONT_STYLE=plain +ANTI_ALIAS=false +SORT_BY_TREE=false +RSBS_SERVICES=|Multi-Harmony|Analysis|Sequence Harmony and Multi-Relief (Brandt et al. 2010)|hseparable,gapCharacter\='-',returns\='ANNOTATION'|?tool\=jalview|http\://zeus.few.vu.nl/programs/shmrwww/index.php?tool\=jalview&groups\=$PARTITION\:min\='2',minsize\='2',sep\=' '$&ali_file\=$ALIGNMENT\:format\='FASTA',writeasfile$ +AUTHORFNAMES=Jim Procter, Andrew Waterhouse, Jan Engelhardt, Lauren Lui, Michele Clamp, James Cuff, Steve Searle, David Martin & Geoff Barton +JALVIEW_RSS_WINDOW_SCREEN_HEIGHT=328 +SHOW_GROUP_CONSENSUS=false +SHOW_CONSENSUS_HISTOGRAM=true +SHOW_OVERVIEW=false +AUTHORS=J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle +FIGURE_AUTOIDWIDTH=false +SCREEN_WIDTH=900 +ANNOTATIONCOLOUR_MIN=ffc800 +SHOW_STARTUP_FILE=false +RECENT_FILE=examples/uniref50.fa\t/Volumes/Data/Users/jimp/Documents/testing/Jalview/examples/RF00031_folded.stk\t/Volumes/Data/Users/jimp/bs_ig_mult.out +DEFAULT_FILE_FORMAT=FASTA +SHOW_JAVA_CONSOLE=false +VERSION=2.8b1 +FIGURE_USERIDWIDTH= +WSMENU_BYTYPE=false +DEFAULT_COLOUR=None +NOQUESTIONNAIRES=true +JALVIEW_NEWS_RSS_LASTMODIFIED=Apr 23, 2014 2\:53\:26 PM +BUILD_DATE=01 November 2013 +PILEUP_JVSUFFIX=true +SHOW_CONSENSUS_LOGO=false +SCREENGEOMETRY_WIDTH=2560 +SHOW_ANNOTATIONS=true +JALVIEW_RSS_WINDOW_SCREEN_Y=0 +USAGESTATS=false +JALVIEW_RSS_WINDOW_SCREEN_X=0 +SHOW_UNCONSERVED=false +SHOW_JVSUFFIX=true +SCREEN_HEIGHT=650 +ANNOTATIONCOLOUR_MAX=ff0000 +AUTO_CALC_CONSENSUS=true +FASTA_JVSUFFIX=true +DAS_ACTIVE_SOURCE= +JWS2HOSTURLS=http\://www.compbio.dundee.ac.uk/jabaws +PAD_GAPS=false +CLUSTAL_JVSUFFIX=true +SHOW_ENFIN_SERVICES=true +FONT_SIZE=10 +RIGHT_ALIGN_IDS=false +USE_PROXY=false +WRAP_ALIGNMENT=false +DAS_REGISTRY_URL=http\://www.nowhere/