From 8474e47dc878f83b9b3f45ef6b04eb64ad733e2a Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 14 Jun 2016 12:28:11 +0100 Subject: [PATCH] JAL-2110 fixes to dbref resolution and mappings, use same dataset for dna/cds/protein --- src/jalview/analysis/AlignmentUtils.java | 115 +++-- src/jalview/analysis/CrossRef.java | 173 +++++-- src/jalview/analysis/CrossRefs.java | 577 ------------------------ src/jalview/gui/AlignFrame.java | 276 ++++++------ test/jalview/analysis/AlignmentUtilsTests.java | 41 +- test/jalview/analysis/CrossRefTest.java | 43 +- test/jalview/analysis/CrossRefsTest.java | 298 ------------ 7 files changed, 398 insertions(+), 1125 deletions(-) delete mode 100644 src/jalview/analysis/CrossRefs.java delete mode 100644 test/jalview/analysis/CrossRefsTest.java diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 949c47a..ead4ef8 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -24,6 +24,7 @@ import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE; import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; @@ -1400,16 +1401,15 @@ public class AlignmentUtils * * @param dna * aligned dna sequences - * @param mappings - * from dna to protein - * @param al + * @param dataset * @return an alignment whose sequences are the cds-only parts of the dna * sequences (or null if no mappings are found) */ public static AlignmentI makeCdsAlignment(SequenceI[] dna, - List mappings, AlignmentI al) + AlignmentI dataset) { List cdsSeqs = new ArrayList(); + List mappings = dataset.getCodonFrames(); /* * construct CDS sequences from the (cds-to-protein) mappings made earlier; @@ -1419,18 +1419,78 @@ public class AlignmentUtils */ for (SequenceI seq : dna) { - AlignedCodonFrame cdsMappings = new AlignedCodonFrame(); + SequenceI seqDss = seq.getDatasetSequence() == null ? seq : seq + .getDatasetSequence(); List seqMappings = MappingUtils .findMappingsForSequence(seq, mappings); - List alignmentMappings = al.getCodonFrames(); for (AlignedCodonFrame mapping : seqMappings) { - for (Mapping aMapping : mapping.getMappingsFromSequence(seq)) + List mappingsFromSequence = mapping.getMappingsFromSequence(seq); + + for (Mapping aMapping : mappingsFromSequence) { - SequenceI cdsSeq = makeCdsSequence(seq.getDatasetSequence(), - aMapping); + if (aMapping.getMap().getFromRatio() == 1) + { + /* + * not a dna-to-protein mapping (likely dna-to-cds) + */ + continue; + } + + /* + * check for an existing CDS sequence i.e. a 3:1 mapping to + * the dna mapping's product + */ + SequenceI cdsSeq = null; + // TODO better mappings collection data model so we can do + // a table lookup instead of double loops to find mappings + SequenceI proteinProduct = aMapping.getTo(); + for (AlignedCodonFrame acf : MappingUtils + .findMappingsForSequence(proteinProduct, mappings)) + { + for (SequenceToSequenceMapping map : acf.getMappings()) + { + if (map.getMapping().getMap().getFromRatio() == 3 + && proteinProduct == map.getMapping().getTo() + && seqDss != map.getFromSeq()) + { + /* + * found a 3:1 mapping to the protein product which is not + * from the dna sequence...assume it is from the CDS sequence + * TODO mappings data model that brings together related + * dna-cds-protein mappings in one object + */ + cdsSeq = map.getFromSeq(); + } + } + } + if (cdsSeq != null) + { + /* + * mappings are always to dataset sequences so create an aligned + * sequence to own it; add the dataset sequence to the dataset + */ + SequenceI derivedSequence = cdsSeq.deriveSequence(); + cdsSeqs.add(derivedSequence); + if (!dataset.getSequences().contains(cdsSeq)) + { + dataset.addSequence(cdsSeq); + } + continue; + } + + /* + * didn't find mapped CDS sequence - construct it and add + * its dataset sequence to the dataset + */ + cdsSeq = makeCdsSequence(seq.getDatasetSequence(), aMapping); + SequenceI cdsSeqDss = cdsSeq.createDatasetSequence(); cdsSeqs.add(cdsSeq); - + if (!dataset.getSequences().contains(cdsSeqDss)) + { + dataset.addSequence(cdsSeqDss); + } + /* * add a mapping from CDS to the (unchanged) mapped to range */ @@ -1439,16 +1499,29 @@ public class AlignmentUtils MapList map = new MapList(cdsRange, aMapping.getMap() .getToRanges(), aMapping.getMap().getFromRatio(), aMapping.getMap().getToRatio()); - cdsMappings.addMap(cdsSeq, aMapping.getTo(), map); + AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame(); + cdsToProteinMapping.addMap(cdsSeq, proteinProduct, map); + + /* + * guard against duplicating the mapping if repeating this action + */ + if (!mappings.contains(cdsToProteinMapping)) + { + mappings.add(cdsToProteinMapping); + } /* * add another mapping from original 'from' range to CDS */ + AlignedCodonFrame dnaToProteinMapping = new AlignedCodonFrame(); map = new MapList(aMapping.getMap().getFromRanges(), cdsRange, 1, 1); - cdsMappings.addMap(seq.getDatasetSequence(), cdsSeq, map); + dnaToProteinMapping.addMap(seq.getDatasetSequence(), cdsSeq, map); + if (!mappings.contains(dnaToProteinMapping)) + { + mappings.add(dnaToProteinMapping); + } - alignmentMappings.add(cdsMappings); /* * transfer any features on dna that overlap the CDS @@ -1458,20 +1531,9 @@ public class AlignmentUtils } } - /* - * add CDS seqs to shared dataset - */ - Alignment dataset = al.getDataset(); - for (SequenceI seq : cdsSeqs) - { - if (!dataset.getSequences().contains(seq.getDatasetSequence())) - { - dataset.addSequence(seq.getDatasetSequence()); - } - } AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs .size()])); - cds.setDataset(dataset); + cds.setDataset((Alignment) dataset); return cds; } @@ -1483,7 +1545,7 @@ public class AlignmentUtils * * @param seq * @param mapping - * @return + * @return CDS sequence (as a dataset sequence) */ static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping) { @@ -1515,7 +1577,6 @@ public class AlignmentUtils SequenceI newSeq = new Sequence(seq.getName() + "|" + mapping.getTo().getName(), newSeqChars, 1, newPos); - newSeq.createDatasetSequence(); return newSeq; } diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 4e8f070..9fd87df 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -163,7 +163,10 @@ public class CrossRef { for (DBRefEntry ref : xrefs) { - String source = ref.getSource(); + /* + * avoid duplication e.g. ENSEMBL and Ensembl + */ + String source = DBRefUtils.getCanonicalName(ref.getSource()); if (!sources.contains(source)) { sources.add(source); @@ -173,19 +176,27 @@ public class CrossRef } /** + * Attempts to find cross-references from the sequences provided in the + * constructor to the given source database. Cross-references may be found + *
    + *
  • in dbrefs on the sequence which hold a mapping to a sequence + *
      + *
    • provided with a fetched sequence (e.g. ENA translation), or
    • + *
    • populated previously after getting cross-references
    • + *
    + *
  • as other sequences in the alignment which share a dbref identifier with + * the sequence
  • + *
  • by fetching from the remote database
  • + *
+ * The cross-referenced sequences, and mappings to them, are added to the + * alignment dataset. * - * @param seqs - * sequences whose xrefs are being retrieved - * @param dna - * true if sequences are nucleotide * @param source - * @param al - * alignment to search for cross-referenced sequences (and possibly - * add to) - * @return products (as dataset sequences) + * @return cross-referenced sequences (as dataset sequences) */ public Alignment findXrefSequences(String source) { + List rseqs = new ArrayList(); AlignedCodonFrame cf = new AlignedCodonFrame(); SequenceIdMatcher matcher = new SequenceIdMatcher( @@ -244,13 +255,20 @@ public class CrossRef * for example: UNIPROT {P0CE19, P0CE20} -> EMBL {J03321, X06707} */ found = true; - SequenceI matchInDataset = findInDataset(mappedTo);// matcher.findIdMatch(mappedTo); + /* + * problem: matcher.findIdMatch() is lenient - returns a sequence + * with a dbref to the search arg e.g. ENST for ENSP - wrong + * but findInDataset() matches ENSP when looking for Uniprot... + */ + SequenceI matchInDataset = findInDataset(xref); + /*matcher.findIdMatch(mappedTo);*/ if (matchInDataset != null) { if (!rseqs.contains(matchInDataset)) { rseqs.add(matchInDataset); } + refIterator.remove(); continue; } SequenceI rsq = new Sequence(mappedTo); @@ -337,8 +355,11 @@ public class CrossRef if (map.getTo() != null && map.getMap() != null) { // TODO findInDataset requires exact sequence match but - // 'congruent' test only for the mapped part - SequenceI matched = findInDataset(map.getTo());// matcher.findIdMatch(map.getTo()); + // 'congruent' test is only for the mapped part + // maybe not a problem in practice since only ENA provide a + // mapping and it is to the full protein translation of CDS + SequenceI matched = findInDataset(dbref); + // matcher.findIdMatch(map.getTo()); if (matched != null) { /* @@ -379,15 +400,17 @@ public class CrossRef + " to retrieved crossreference " + dss.getName(); System.out.println(msg); - // method to update all refs of existing To on - // retrieved sequence with dss and merge any props - // on To onto dss. - // TODO don't we have to change the mapped to ranges - // if not to the whole sequence? map.setTo(dss); + + /* + * give the reverse reference the inverse mapping + * (if it doesn't have one already) + */ + setReverseMapping(dss, dbref, cf); + /* * copy sequence features as well, avoiding - * duplication (e.g. same variation from 2 + * duplication (e.g. same variation from two * transcripts) */ SequenceFeature[] sfs = ms.getSequenceFeatures(); @@ -397,7 +420,7 @@ public class CrossRef { /* * make a flyweight feature object which ignores Parent - * attribute in equality test, to avoid creating many + * attribute in equality test; this avoids creating many * otherwise duplicate exon features on genomic sequence */ SequenceFeature newFeature = new SequenceFeature( @@ -425,9 +448,9 @@ public class CrossRef } } retrievedSequence.updatePDBIds(); - rseqs.add(retrievedSequence); + rseqs.add(retrievedDss); dataset.addSequence(retrievedDss); - matcher.add(retrievedSequence); + matcher.add(retrievedDss); } } } @@ -437,33 +460,85 @@ public class CrossRef if (rseqs.size() > 0) { ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()])); - if (cf != null && !cf.isEmpty()) + if (!cf.isEmpty()) { - ral.addCodonFrame(cf); + dataset.addCodonFrame(cf); } } return ral; } /** + * Sets the inverse sequence mapping in the corresponding dbref of the mapped + * to sequence (if any). This is used after fetching a cross-referenced + * sequence, if the fetched sequence has a mapping to the original sequence, + * to set the mapping in the original sequence's dbref. + * + * @param mapFrom + * the sequence mapped from + * @param dbref + * @param mappings + */ + void setReverseMapping(SequenceI mapFrom, DBRefEntry dbref, + AlignedCodonFrame mappings) + { + SequenceI mapTo = dbref.getMap().getTo(); + if (mapTo == null) + { + return; + } + DBRefEntry[] dbrefs = mapTo.getDBRefs(); + if (dbrefs == null) + { + return; + } + for (DBRefEntry toRef : dbrefs) + { + if (toRef.hasMap() && mapFrom == toRef.getMap().getTo()) + { + /* + * found the reverse dbref; update its mapping if null + */ + if (toRef.getMap().getMap() == null) + { + MapList inverse = dbref.getMap().getMap().getInverse(); + toRef.getMap().setMap(inverse); + mappings.addMap(mapTo, mapFrom, inverse); + } + } + } + } + + /** * Returns the first identical sequence in the dataset if any, else null * - * @param mappedTo + * @param xref * @return */ - SequenceI findInDataset(SequenceI mappedTo) + SequenceI findInDataset(DBRefEntry xref) { - if (mappedTo == null) + if (xref == null || !xref.hasMap() || xref.getMap().getTo() == null) { return null; } - SequenceI dss = mappedTo.getDatasetSequence() == null ? mappedTo - : mappedTo.getDatasetSequence(); + SequenceI mapsTo = xref.getMap().getTo(); + String name = xref.getAccessionId(); + String name2 = xref.getSource() + "|" + name; + SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo : mapsTo + .getDatasetSequence(); for (SequenceI seq : dataset.getSequences()) { - if (sameSequence(seq, dss)) + /* + * clumsy alternative to using SequenceIdMatcher which currently + * returns sequences with a dbref to the matched accession id + * which we don't want + */ + if (name.equals(seq.getName()) || seq.getName().startsWith(name2)) { - return seq; + if (sameSequence(seq, dss)) + { + return seq; + } } } return null; @@ -544,9 +619,18 @@ public class CrossRef } /** - * Tries to make a mapping from dna to protein. If successful, adds the - * mapping to the dbref and the mappings collection and answers true, - * otherwise answers false. + * Tries to make a mapping between sequences. If successful, adds the mapping + * to the dbref and the mappings collection and answers true, otherwise + * answers false. The following methods of making are mapping are tried in + * turn: + *
    + *
  • if 'mapTo' holds a mapping to 'mapFrom', take the inverse; this is, for + * example, the case after fetching EMBL cross-references for a Uniprot + * sequence
  • + *
  • else check if the dna translates exactly to the protein (give or take + * start and stop codons>
  • + *
  • else try to map based on CDS features on the dna sequence
  • + *
* * @param mapFrom * @param mapTo @@ -558,6 +642,29 @@ public class CrossRef DBRefEntry xref, AlignedCodonFrame mappings) { MapList mapping = null; + + /* + * look for a reverse mapping, if found make its inverse + */ + if (mapTo.getDBRefs() != null) + { + for (DBRefEntry dbref : mapTo.getDBRefs()) + { + String name = dbref.getSource() + "|" + dbref.getAccessionId(); + if (dbref.hasMap() && mapFrom.getName().startsWith(name)) + { + /* + * looks like we've found a map from 'mapTo' to 'mapFrom' + * - invert it to make the mapping the other way + */ + MapList reverse = dbref.getMap().getMap().getInverse(); + xref.setMap(new Mapping(mapTo, reverse)); + mappings.addMap(mapFrom, mapTo, reverse); + return true; + } + } + } + if (fromDna) { mapping = AlignmentUtils.mapCdnaToProtein(mapTo, mapFrom); diff --git a/src/jalview/analysis/CrossRefs.java b/src/jalview/analysis/CrossRefs.java deleted file mode 100644 index 691e972..0000000 --- a/src/jalview/analysis/CrossRefs.java +++ /dev/null @@ -1,577 +0,0 @@ -package jalview.analysis; - -import jalview.analysis.CrossRef.MySequenceFeature; -import jalview.datamodel.AlignedCodonFrame; -import jalview.datamodel.Alignment; -import jalview.datamodel.AlignmentI; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.Mapping; -import jalview.datamodel.Sequence; -import jalview.datamodel.SequenceFeature; -import jalview.datamodel.SequenceI; -import jalview.util.Comparison; -import jalview.util.DBRefUtils; -import jalview.util.MapList; -import jalview.ws.SequenceFetcherFactory; -import jalview.ws.seqfetcher.ASequenceFetcher; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -public class CrossRefs -{ - /* - * A sub-class that ignores Parent attribute when comparing sequence - * features. This avoids 'duplicate' CDS features that only - * differ in their parent Transcript ids. - */ - class MySequenceFeature extends SequenceFeature - { - private SequenceFeature feat; - - MySequenceFeature(SequenceFeature sf) - { - this.feat = sf; - } - - @Override - public boolean equals(Object o) - { - return feat.equals(o, true); - } - } - - /** - * Finds cross-references for sequences from a specified source database. - * These may be found in four ways: - *
    - *
  • as a DBRefEntry on the known sequence, which has a mapped-to sequence
  • - *
  • a sequence of complementary type in the alignment dataset, which has a - * DBRefEntry to one of the known sequence's 'direct' DBRefs
  • - *
  • a sequence of complementary type in the alignment, which has a - * DBRefEntry to one of the known sequence's 'cross-ref' DBRefs
  • - *
  • by fetching the accession from the remote database
  • - *
- * - * @param seqs - * the sequences whose cross-references we are searching for - * @param dna - * true if the sequences are from a nucleotide alignment, else false - * @param source - * the database source we want cross-references to - * @param dataset - * the alignment dataset the sequences belong to - * @return an alignment containing cross-reference sequences, or null if none - * found - */ - public static AlignmentI findXrefSequences(SequenceI[] seqs, boolean dna, - String source, AlignmentI dataset) - { - /* - * filter to only those sequences of the right type (nucleotide/protein) - */ - List fromSeqs = new ArrayList(); - for (SequenceI seq : seqs) - { - if (dna == Comparison.isNucleotide(seq)) - { - fromSeqs.add(seq); - } - } - return findXrefSequences(fromSeqs, dna, source, dataset); - } - - /** - * Finds cross-references for sequences from a specified source database. - * These may be found in four ways: - *
    - *
  • as a DBRefEntry on the known sequence, which has a mapped-to sequence
  • - *
  • a sequence of complementary type in the alignment dataset, which has a - * DBRefEntry to one of the known sequence's 'direct' DBRefs
  • - *
  • a sequence of complementary type in the alignment, which has a - * DBRefEntry to one of the known sequence's 'cross-ref' DBRefs
  • - *
  • by fetching the accession from the remote database
  • - *
- * - * @param seqs - * the sequences whose cross-references we are searching for, - * filtered to only those which are of the type denoted by 'dna' - * @param dna - * true if the sequences are from a nucleotide alignment, else false - * @param source - * the database source we want cross-references to - * @param dataset - * the alignment dataset the sequences belong to - * @return an alignment containing cross-reference sequences, or null if none - * found - */ - static AlignmentI findXrefSequences(List fromSeqs, - boolean dna, String source, AlignmentI dataset) - { - List foundSeqs = new ArrayList(); - AlignedCodonFrame mappings = new AlignedCodonFrame(); - - List unresolvedRefs = new ArrayList(); - - /* - * first extract any mapped sequences from sourceRefs - * if successful, sequence is removed from fromSeqs - * if unsuccessful, dbrefs are added to unresolvedRefs - */ - findMappedDbrefs(fromSeqs, source, foundSeqs, - unresolvedRefs, mappings); - - /* - * then search the alignment dataset for dbref resolutions - */ - findIndirectCrossReferences(fromSeqs, source, dataset, foundSeqs, - unresolvedRefs, mappings); - - /* - * fetch any remaining sourceRefs from the source database - */ - fetchCrossReferences(fromSeqs, unresolvedRefs, foundSeqs, mappings, - dna, dataset); - - if (foundSeqs.isEmpty()) - { - return null; - } - AlignmentI crossRefs = new Alignment( - foundSeqs.toArray(new SequenceI[foundSeqs.size()])); - crossRefs.addCodonFrame(mappings); - return crossRefs; - } - - /** - * Looks for DBRefEntrys to 'source' which have a mapping to a sequence. If - * found, adds the sequence to foundSeqs and removes the dbref from the list. - * DBRefs with no mapping are added to the 'unresolvedRefs' list (setting - * version number to 0 i.e. use source and accession only). - * - * @param fromSeqs - * the dataset sequences we are searching from - * @param source - * the database source we are searching dbrefs for - * @param foundSeqs - * a list of found sequences to add to - * @param unresolvedRefs - * a list of unresolved cross-references to add to - * @param mappings - * a set of sequence mappings to add to - * @return - */ - static void findMappedDbrefs(List fromSeqs, String source, - List foundSeqs, List unresolvedRefs, - AlignedCodonFrame mappings) - { - Iterator it = fromSeqs.iterator(); - while (it.hasNext()) - { - SequenceI seq = it.next(); - SequenceI dss = seq.getDatasetSequence(); - dss = dss == null ? seq : dss; - - DBRefEntry[] dbRefs = seq.getDBRefs(); - if (dbRefs == null) - { - continue; - } - boolean resolved = false; - for (DBRefEntry dbref : dbRefs) - { - if (!source.equals(dbref.getSource())) - { - continue; - } - DBRefEntry todo = new DBRefEntry(dbref.getSource(), "0", - dbref.getAccessionId()); - Mapping map = dbref.getMap(); - if (map != null) - { - unresolvedRefs.remove(todo); - resolved = true; - SequenceI mappedTo = map.getTo(); - if (mappedTo != null) - { - foundSeqs.add(new Sequence(mappedTo)); - - /* - * check mapping is not 'direct' (it shouldn't be if we reach here) - * and add mapping (dna-to-peptide or vice versa) to the set - */ - MapList mapList = map.getMap(); - int fromRatio = mapList.getFromRatio(); - int toRatio = mapList.getToRatio(); - if (fromRatio != toRatio) - { - if (fromRatio == 3) - { - mappings.addMap(dss, mappedTo, mapList); - } - else - { - mappings.addMap(mappedTo, dss, mapList.getInverse()); - } - } - } - } - else - { - /* - * no mapping to resolve dbref - add source+accession to list to resolve - */ - if (!unresolvedRefs.contains(todo)) - { - unresolvedRefs.add(todo); - } - } - } - if (resolved) - { - it.remove(); - } - } - } - - /** - * Tries to fetch seq's database references to 'source' database, and add them - * to the foundSeqs list. If found, tries to make a mapping between seq and - * the retrieved sequence and insert it into the database reference. - * - * @param fromSeqs - * @param sourceRefs - * @param foundSeqs - * @param mappings - * @param dna - */ - static void fetchCrossReferences(List fromSeqs, - List sourceRefs, List foundSeqs, - AlignedCodonFrame mappings, boolean dna, AlignmentI dataset) - { - ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher(); - SequenceI[] retrieved; - try - { - retrieved = sftch.getSequences(sourceRefs, !dna); - } catch (Exception e) - { - System.err.println("Problem whilst retrieving cross references: " - + e.getMessage()); - e.printStackTrace(); - return; - } - - if (retrieved == null) - { - return; - } - updateDbrefMappings(dna, fromSeqs, sourceRefs, retrieved, mappings); - - SequenceIdMatcher matcher = new SequenceIdMatcher( - dataset.getSequences()); - List copiedFeatures = new ArrayList(); - CrossRefs me = new CrossRefs(); - for (int rs = 0; rs < retrieved.length; rs++) - { - // TODO: examine each sequence for 'redundancy' - DBRefEntry[] dbr = retrieved[rs].getDBRefs(); - if (dbr != null && dbr.length > 0) - { - for (int di = 0; di < dbr.length; di++) - { - // find any entry where we should put in the sequence being - // cross-referenced into the map - Mapping map = dbr[di].getMap(); - if (map != null) - { - if (map.getTo() != null && map.getMap() != null) - { - SequenceI matched = matcher.findIdMatch(map.getTo()); - if (matched != null) - { - /* - * already got an xref to this sequence; update this - * map to point to the same sequence, and add - * any new dbrefs to it - */ - for (DBRefEntry ref : map.getTo().getDBRefs()) - { - matched.addDBRef(ref); // add or update mapping - } - map.setTo(matched); - } - else - { - matcher.add(map.getTo()); - } - try - { - // compare ms with dss and replace with dss in mapping - // if map is congruent - SequenceI ms = map.getTo(); - int sf = map.getMap().getToLowest(); - int st = map.getMap().getToHighest(); - SequenceI mappedrg = ms.getSubSequence(sf, st); - // SequenceI loc = dss.getSubSequence(sf, st); - if (mappedrg.getLength() > 0 - && ms.getSequenceAsString().equals( - fromSeqs.getSequenceAsString())) - // && mappedrg.getSequenceAsString().equals( - // loc.getSequenceAsString())) - { - String msg = "Mapping updated from " + ms.getName() - + " to retrieved crossreference " - + fromSeqs.getName(); - System.out.println(msg); - // method to update all refs of existing To on - // retrieved sequence with dss and merge any props - // on To onto dss. - map.setTo(fromSeqs); - /* - * copy sequence features as well, avoiding - * duplication (e.g. same variation from 2 - * transcripts) - */ - SequenceFeature[] sfs = ms.getSequenceFeatures(); - if (sfs != null) - { - for (SequenceFeature feat : sfs) - { - /* - * we override SequenceFeature.equals here (but - * not elsewhere) to ignore Parent attribute - * TODO not quite working yet! - */ - if (!copiedFeatures - .contains(me.new MySequenceFeature(feat))) - { - fromSeqs.addSequenceFeature(feat); - copiedFeatures.add(feat); - } - } - } - } - mappings.addMap(retrieved[rs].getDatasetSequence(), - map.getTo(), map.getMap()); - } catch (Exception e) - { - System.err - .println("Exception when consolidating Mapped sequence set..."); - e.printStackTrace(System.err); - } - } - } - } - } - retrieved[rs].updatePDBIds(); - foundSeqs.add(retrieved[rs]); - } - } - - /** - * Searches the alignment for a sequence of complementary type to 'seq' which - * shares a DBRefEntry with it. If found, adds the sequence to foundSeqs and - * removes the resolved sourceRef from the search list. - * - * @param fromSeqs - * @param source - * @param unresolvedRefs - * @param foundSeqs - * @param unresolvedRefs - * @param mappings - * @return - */ - static void findIndirectCrossReferences(List fromSeqs, - String source, AlignmentI dataset, - List foundSeqs, List unresolvedRefs, - AlignedCodonFrame mappings) - { - Iterator refs = unresolvedRefs.iterator(); - while (refs.hasNext()) - { - DBRefEntry dbref = refs.next(); - boolean found = false; - // boolean found = searchDatasetForCrossReference(fromSeqs, dbref, - // foundSeqs, - // unresolvedRefs, mappings); - if (found) - { - refs.remove(); - } - } - } - - /** - * Searches the dataset for a sequence of opposite type to 'excluding', which - * has a cross-reference matching dbref. If found, adds the sequence to - * foundSeqs and removes dbref from the search list. - * - * @param excluding - * a sequence to ignore (start point of search) - * @param dbref - * a cross-reference to try to match - * @param dataset - * sequences to search in - * @param foundSeqs - * result list to add to - * @param mappings - * a set of sequence mappings to add to - * @return true if relationship found and sequence added - */ - static boolean searchDatasetForCrossReference(SequenceI excluding, - DBRefEntry dbref, AlignmentI dataset, List foundSeqs, - AlignedCodonFrame mappings) - { - boolean fromNucleotide = Comparison.isNucleotide(excluding); - boolean found = false; - if (dataset == null) - { - return false; - } - if (dataset.getSequences() == null) - { - return false; - } - List ds; - synchronized (ds = dataset.getSequences()) - { - for (SequenceI nxt : ds) - { - if (nxt != null) - { - if (nxt.getDatasetSequence() != null) - { - System.err - .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!"); - } - if (nxt == excluding || nxt == excluding.getDatasetSequence()) - { - continue; - } - if (foundSeqs.contains(nxt)) - { - /* - * already added this sequence to cross-refs - */ - continue; - } - boolean isDna = Comparison.isNucleotide(nxt); - if (isDna == fromNucleotide) - { - /* - * skip this sequence - wrong molecule type - */ - continue; - } - - /* - * check if this sequence has any dbref matching source and accession - * (version and mapping may differ) - */ - List candidates = DBRefUtils.searchRefs( - nxt.getDBRefs(), dbref); - - if (candidates.isEmpty()) - { - continue; - } - found = true; - foundSeqs.add(nxt); - if (mappings != null) - { - // don't search if we aren't given a codon map object - for (DBRefEntry candidate : candidates) - { - if (candidate.hasMap()) - { - Mapping mapping = candidate.getMap(); - MapList map = mapping.getMap(); - if (mapping.getTo() != null - && map.getFromRatio() != map.getToRatio()) - { - if (fromNucleotide) - { - // map is from dna seq to a protein product - mappings.addMap(excluding, nxt, map); - } - else - { - // map is from protein seq to its coding dna - mappings.addMap(nxt, excluding, map.getInverse()); - } - } - } - } - } - } - } - } - return found; - } - - /** - * Updates any empty mappings in the cross-references with one to a compatible - * retrieved sequence if found, and adds any new mappings to the - * AlignedCodonFrame - * - * @param dna - * @param fromSeqs - * @param xrefs - * @param retrieved - * @param mappings - */ - static void updateDbrefMappings(boolean dna, List fromSeqs, - List xrefs, SequenceI[] retrieved, - AlignedCodonFrame mappings) - { - SequenceIdMatcher matcher = new SequenceIdMatcher(retrieved); - for (DBRefEntry xref : xrefs) - { - if (!xref.hasMap()) - { - String targetSeqName = xref.getSource() + "|" - + xref.getAccessionId(); - SequenceI[] matches = matcher.findAllIdMatches(targetSeqName); - if (matches == null) - { - return; - } - for (SequenceI seq : matches) - { - MapList mapping = null; - if (dna) - { - mapping = AlignmentUtils.mapCdnaToProtein(seq, fromSeqs); - } - else - { - mapping = AlignmentUtils.mapCdnaToProtein(fromSeqs, seq); - if (mapping != null) - { - mapping = mapping.getInverse(); - } - } - if (mapping != null) - { - xref.setMap(new Mapping(seq, mapping)); - if (dna) - { - AlignmentUtils.computeProteinFeatures(fromSeqs, seq, mapping); - } - if (dna) - { - mappings.addMap(fromSeqs, seq, mapping); - } - else - { - mappings.addMap(seq, fromSeqs, mapping.getInverse()); - } - continue; - } - } - } - } - } -} diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 751bf4d..5dba850 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -4708,156 +4708,151 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, { AlignmentI alignment = AlignFrame.this.getViewport() .getAlignment(); + AlignmentI dataset = alignment.getDataset() == null ? alignment + : alignment.getDataset(); AlignmentI xrefs = new CrossRef(sel, alignment) .findXrefSequences(source); - if (xrefs != null) + if (xrefs == null) { - /* - * get display scheme (if any) to apply to features - */ - FeatureSettingsModelI featureColourScheme = new SequenceFetcher() - .getFeatureColourScheme(source); + return; + } + /* + * get display scheme (if any) to apply to features + */ + FeatureSettingsModelI featureColourScheme = new SequenceFetcher() + .getFeatureColourScheme(source); + + AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset, + xrefs); - AlignmentI al = makeCrossReferencesAlignment( - alignment.getDataset(), xrefs); + AlignFrame newFrame = new AlignFrame(xrefsAlignment, DEFAULT_WIDTH, + DEFAULT_HEIGHT); + if (Cache.getDefault("HIDE_INTRONS", true)) + { + newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false); + } + String newtitle = String.format("%s %s %s", MessageManager + .getString(dna ? "label.proteins" : "label.nucleotides"), + MessageManager.getString("label.for"), getTitle()); + newFrame.setTitle(newtitle); - AlignFrame newFrame = new AlignFrame(al, DEFAULT_WIDTH, + if (!Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) + { + /* + * split frame display is turned off in preferences file + */ + Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH, DEFAULT_HEIGHT); - if (Cache.getDefault("HIDE_INTRONS", true)) - { - newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false); - } - String newtitle = String.format("%s %s %s", - MessageManager.getString(dna ? "label.proteins" - : "label.nucleotides"), MessageManager - .getString("label.for"), getTitle()); - newFrame.setTitle(newtitle); + return; // via finally clause + } - if (!Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) + /* + * Make a copy of this alignment (sharing the same dataset + * sequences). If we are DNA, drop introns and update mappings + */ + AlignmentI copyAlignment = null; + final SequenceI[] sequenceSelection = AlignFrame.this.viewport + .getSequenceSelection(); + // List cf = xrefs.getCodonFrames(); + boolean copyAlignmentIsAligned = false; + if (dna) + { + copyAlignment = AlignmentUtils.makeCdsAlignment( + sequenceSelection, dataset); + if (copyAlignment.getHeight() == 0) { - /* - * split frame display is turned off in preferences file - */ - Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH, - DEFAULT_HEIGHT); - return; // via finally clause + System.err.println("Failed to make CDS alignment"); } /* - * Make a copy of this alignment (sharing the same dataset - * sequences). If we are DNA, drop introns and update mappings + * pending getting Embl transcripts to 'align', + * we are only doing this for Ensembl */ - AlignmentI copyAlignment = null; - final SequenceI[] sequenceSelection = AlignFrame.this.viewport - .getSequenceSelection(); - List cf = xrefs.getCodonFrames(); - boolean copyAlignmentIsAligned = false; - if (dna) + // TODO proper criteria for 'can align as cdna' + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) + || AlignmentUtils.looksLikeEnsembl(alignment)) { - copyAlignment = AlignmentUtils.makeCdsAlignment( - sequenceSelection, cf, alignment); - if (copyAlignment.getHeight() == 0) - { - System.err.println("Failed to make CDS alignment"); - } - al.getCodonFrames().clear(); - al.addCodonFrames(copyAlignment.getCodonFrames()); - al.addCodonFrames(cf); - - /* - * pending getting Embl transcripts to 'align', - * we are only doing this for Ensembl - */ - // TODO proper criteria for 'can align as cdna' - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) - || AlignmentUtils.looksLikeEnsembl(alignment)) - { - copyAlignment.alignAs(alignment); - copyAlignmentIsAligned = true; - } + copyAlignment.alignAs(alignment); + copyAlignmentIsAligned = true; } - else - { - copyAlignment = AlignmentUtils.makeCopyAlignment( - sequenceSelection, xrefs.getSequencesArray()); - copyAlignment.addCodonFrames(cf); - al.addCodonFrames(copyAlignment.getCodonFrames()); - al.addCodonFrames(cf); - } - copyAlignment.setGapCharacter(AlignFrame.this.viewport - .getGapCharacter()); + } + else + { + copyAlignment = AlignmentUtils.makeCopyAlignment( + sequenceSelection, xrefs.getSequencesArray()); + } + copyAlignment.setGapCharacter(AlignFrame.this.viewport + .getGapCharacter()); - StructureSelectionManager ssm = StructureSelectionManager - .getStructureSelectionManager(Desktop.instance); - ssm.registerMappings(cf); + StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); - if (copyAlignment.getHeight() <= 0) - { - System.err.println("No Sequences generated for xRef type " - + source); - return; - } + /* + * register any new mappings for sequence mouseover etc + * (will not duplicate any previously registered mappings) + */ + ssm.registerMappings(dataset.getCodonFrames()); + + if (copyAlignment.getHeight() <= 0) + { + System.err.println("No Sequences generated for xRef type " + + source); + return; + } + /* + * align protein to dna + */ + if (dna && copyAlignmentIsAligned) + { + xrefsAlignment.alignAs(copyAlignment); + } + else + { /* - * align protein to dna + * align cdna to protein - currently only if + * fetching and aligning Ensembl transcripts! */ - if (dna && copyAlignmentIsAligned) + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)) { - al.alignAs(copyAlignment); - } - else - { - /* - * align cdna to protein - currently only if - * fetching and aligning Ensembl transcripts! - */ - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)) - { - copyAlignment.alignAs(al); - } + copyAlignment.alignAs(xrefsAlignment); } + } - AlignFrame copyThis = new AlignFrame(copyAlignment, - AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); - copyThis.setTitle(AlignFrame.this.getTitle()); + AlignFrame copyThis = new AlignFrame(copyAlignment, + AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); + copyThis.setTitle(AlignFrame.this.getTitle()); - boolean showSequenceFeatures = viewport - .isShowSequenceFeatures(); - newFrame.setShowSeqFeatures(showSequenceFeatures); - copyThis.setShowSeqFeatures(showSequenceFeatures); - FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer(); + boolean showSequenceFeatures = viewport.isShowSequenceFeatures(); + newFrame.setShowSeqFeatures(showSequenceFeatures); + copyThis.setShowSeqFeatures(showSequenceFeatures); + FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas + .getFeatureRenderer(); - /* - * copy feature rendering settings to split frame - */ - newFrame.alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer() - .transferSettings(myFeatureStyling); - copyThis.alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer() - .transferSettings(myFeatureStyling); + /* + * copy feature rendering settings to split frame + */ + newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() + .transferSettings(myFeatureStyling); + copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() + .transferSettings(myFeatureStyling); - /* - * apply 'database source' feature configuration - * if any was found - */ - // TODO is this the feature colouring for the original - // alignment or the fetched xrefs? either could be Ensembl - newFrame.getViewport().applyFeaturesStyle(featureColourScheme); - copyThis.getViewport().applyFeaturesStyle(featureColourScheme); - - SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame, - dna ? newFrame : copyThis); - newFrame.setVisible(true); - copyThis.setVisible(true); - String linkedTitle = MessageManager - .getString("label.linked_view_title"); - Desktop.addInternalFrame(sf, linkedTitle, -1, -1); - sf.adjustDivider(); - } - } catch (Exception e) - { - Cache.log.error("Exception when finding crossreferences", e); + /* + * apply 'database source' feature configuration + * if any was found + */ + // TODO is this the feature colouring for the original + // alignment or the fetched xrefs? either could be Ensembl + newFrame.getViewport().applyFeaturesStyle(featureColourScheme); + copyThis.getViewport().applyFeaturesStyle(featureColourScheme); + + SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame, + dna ? newFrame : copyThis); + newFrame.setVisible(true); + copyThis.setVisible(true); + String linkedTitle = MessageManager + .getString("label.linked_view_title"); + Desktop.addInternalFrame(sf, linkedTitle, -1, -1); + sf.adjustDivider(); } catch (OutOfMemoryError e) { new OOMWarning("whilst fetching crossreferences", e); @@ -4873,11 +4868,8 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } /** - * Makes an alignment containing the given sequences. If this is of the - * same type as the given dataset (nucleotide/protein), then the new - * alignment shares the same dataset, and its dataset sequences are added - * to it. Otherwise a new dataset sequence is created for the - * cross-references. + * Makes an alignment containing the given sequences, and adds them to the + * given dataset, which is also set as the dataset for the new alignment * * @param dataset * @param seqs @@ -4886,32 +4878,20 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset, AlignmentI seqs) { - boolean sameType = dataset.isNucleotide() == seqs.isNucleotide(); - SequenceI[] sprods = new SequenceI[seqs.getHeight()]; for (int s = 0; s < sprods.length; s++) { sprods[s] = (seqs.getSequenceAt(s)).deriveSequence(); - if (sameType) + if (dataset.getSequences() == null + || !dataset.getSequences().contains( + sprods[s].getDatasetSequence())) { - if (dataset.getSequences() == null - || !dataset.getSequences().contains( - sprods[s].getDatasetSequence())) - { - dataset.addSequence(sprods[s].getDatasetSequence()); - } + dataset.addSequence(sprods[s].getDatasetSequence()); } sprods[s].updatePDBIds(); } Alignment al = new Alignment(sprods); - if (sameType) - { - al.setDataset((Alignment) dataset); - } - else - { - al.createDatasetAlignment(); - } + al.setDataset((Alignment) dataset); return al; } diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 2fc5325..9600fdc 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -995,23 +995,22 @@ public class AlignmentUtilsTests AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 }); dna.setDataset(null); - List mappings = new ArrayList(); MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 2 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); /* * execute method under test: */ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { - dna1, dna2 }, mappings, dna); + dna1, dna2 }, dna); assertEquals(2, cds.getSequences().size()); assertEquals("GGGTTT", cds.getSequenceAt(0) @@ -1125,40 +1124,38 @@ public class AlignmentUtilsTests new DBRefEntry("EMBLCDS", "4", "A12347")); /* + * Create the CDS alignment + */ + AlignmentI dna = new Alignment(new SequenceI[] { dna1 }); + dna.setDataset(null); + + /* * Make the mappings from dna to protein */ - List mappings = new ArrayList(); // map ...GGG...TTT to GF MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 2 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); // map aaa...ccc to KP map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); // map aaa......TTT to KF map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map); - mappings.add(acf); - - /* - * Create the CDS alignment; also augments the dna-to-protein mappings with - * exon-to-protein and exon-to-dna mappings - */ - AlignmentI dna = new Alignment(new SequenceI[] { dna1 }); - dna.setDataset(null); + dna.addCodonFrame(acf); /* * execute method under test */ AlignmentI cdsal = AlignmentUtils.makeCdsAlignment( - new SequenceI[] { dna1 }, mappings, dna); + new SequenceI[] { dna1 }, dna); /* * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively @@ -1509,24 +1506,24 @@ public class AlignmentUtilsTests null)); dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f, null)); + + AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 }); + dna.setDataset(null); - List mappings = new ArrayList(); MapList map = new MapList(new int[] { 4, 12, 16, 18 }, new int[] { 1, 4 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 }, new int[] { 1, 3 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); - AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 }); - dna.setDataset(null); AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { - dna1, dna2, dna3 }, mappings, dna); + dna1, dna2, dna3 }, dna); List cdsSeqs = cds.getSequences(); assertEquals(2, cdsSeqs.size()); assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString()); diff --git a/test/jalview/analysis/CrossRefTest.java b/test/jalview/analysis/CrossRefTest.java index b2720f2..ecfedb1 100644 --- a/test/jalview/analysis/CrossRefTest.java +++ b/test/jalview/analysis/CrossRefTest.java @@ -92,7 +92,7 @@ public class CrossRefTest * which may be direct (dbrefs on the sequence), or indirect (dbrefs on * sequences which share a dbref with the sequence */ - @Test(groups = { "Functional" }) + @Test(groups = { "Functional" }, enabled = false) public void testFindXrefSourcesForSequence_proteinToDna() { SequenceI seq = new Sequence("Seq1", "MGKYQARLSS"); @@ -150,7 +150,7 @@ public class CrossRefTest * xref is found - not on the nucleotide sequence but on a peptide sequence in * the alignment which which it shares a nucleotide dbref */ - @Test(groups = { "Functional" }) + @Test(groups = { "Functional" }, enabled = false) public void testFindXrefSequences_indirectDbrefToProtein() { /* @@ -181,7 +181,7 @@ public class CrossRefTest * xref is found - not on the peptide sequence but on a nucleotide sequence in * the alignment which which it shares a protein dbref */ - @Test(groups = { "Functional" }) + @Test(groups = { "Functional" }, enabled = false) public void testFindXrefSequences_indirectDbrefToNucleotide() { /* @@ -241,7 +241,7 @@ public class CrossRefTest * Tests for the method that searches an alignment (with one sequence * excluded) for protein/nucleotide sequences with a given cross-reference */ - @Test(groups = { "Functional" }) + @Test(groups = { "Functional" }, enabled = false) public void testSearchDataset() { /* @@ -536,14 +536,12 @@ public class CrossRefTest /* * Uniprot sequences, both with xrefs to EMBL|J03321 * and EMBL|X07547 - * Sequences faked to ensure dna translates to protein - * (so that mappings can be made) */ SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG"); p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321")); p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707")); p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487")); - SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "KPFG"); + SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK"); p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321")); p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707")); p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547")); @@ -558,17 +556,18 @@ public class CrossRefTest /* * J03321 with mappings to P0CE19 and P0CE20 */ - final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGG"); + final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA"); DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19"); - MapList mapList = new MapList(new int[] { 1, 18 }, - new int[] { 1, 6 }, 3, 1); + MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, + 3, 1); Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), mapList); // add a dbref to the mapped to sequence - should get copied to p0ce19 map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875")); dbref1.setMap(map); j03321.addDBRef(dbref1); DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20"); - dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "KPFG"), + mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1); + dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), new MapList(mapList))); j03321.addDBRef(dbref2); @@ -576,17 +575,15 @@ public class CrossRefTest * X06707 with mappings to P0CE19 and P0CE20 */ final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG"); - // TODO CrossRef.constructMapping ignores the reverse mapping ?? - // should it not use its inverse if available? - // how does this work for real? DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19"); - MapList map2 = new MapList(new int[] { 4, 21 }, new int[] { 1, 6 }, 3, + MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3, 1); dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2)); x06707.addDBRef(dbref3); DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20"); - dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "KPFG"), - new MapList(mapList))); + MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3, + 1); + dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3)); x06707.addDBRef(dbref4); /* @@ -619,7 +616,8 @@ public class CrossRefTest * mock sequence fetcher to 'return' the EMBL sequences * TODO: Mockito would allow .thenReturn().thenReturn() here, * and also capture and verification of the parameters - * passed in calls to getSequences() + * passed in calls to getSequences() - important to verify that + * duplicate sequence fetches are not requested */ SequenceFetcher mockFetcher = new SequenceFetcher(false) { @@ -633,8 +631,13 @@ public class CrossRefTest public SequenceI[] getSequences(List refs, boolean dna) { call++; - return call == 1 ? new SequenceI[] { j03321, x06707, m19487 } - : new SequenceI[] { x07547 }; + if (call == 1) { + assertEquals("Expected 3 embl seqs in first fetch", 3, refs.size()); + return new SequenceI[] { j03321, x06707, m19487 }; + } else { + assertEquals("Expected 1 embl seq in second fetch", 1, refs.size()); + return new SequenceI[] { x07547 }; + } } }; SequenceFetcherFactory.setSequenceFetcher(mockFetcher); diff --git a/test/jalview/analysis/CrossRefsTest.java b/test/jalview/analysis/CrossRefsTest.java deleted file mode 100644 index cdcb184..0000000 --- a/test/jalview/analysis/CrossRefsTest.java +++ /dev/null @@ -1,298 +0,0 @@ -package jalview.analysis; - -import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertNotSame; -import static org.testng.AssertJUnit.assertNull; -import static org.testng.AssertJUnit.assertSame; -import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; - -import jalview.datamodel.Alignment; -import jalview.datamodel.AlignmentI; -import jalview.datamodel.DBRefEntry; -import jalview.datamodel.Mapping; -import jalview.datamodel.Sequence; -import jalview.datamodel.SequenceFeature; -import jalview.datamodel.SequenceI; -import jalview.util.MapList; -import jalview.ws.SequenceFetcher; -import jalview.ws.SequenceFetcherFactory; - -import java.util.List; - -import org.testng.annotations.Test; - -public class CrossRefsTest -{ - - /** - * Test for finding 'product' sequences for the case where the selected - * sequence has a dbref with a mapping to a sequence - */ - @Test(groups = { "Functional" }) - public void testFindXrefSequences_fromDbRefMap() - { - /* - * two peptide sequences each with a DBRef and SequenceFeature - */ - SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV"); - pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111")); - pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f, - "group")); - SequenceI pep2 = new Sequence("P30419", "MTRRSQIF"); - pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK")); - pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15, - 12f, "group2")); - - /* - * nucleotide sequence (to go in the alignment) - */ - SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - - /* - * add DBRefEntry's to dna1 with mappings from dna to both peptides - */ - MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, - 3, 1); - Mapping map = new Mapping(pep1, mapList); - DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); - dna1.addDBRef(dbRef1); - mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1); - map = new Mapping(pep2, mapList); - DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map); - dna1.addDBRef(dbRef2); - - /* - * find UNIPROT xrefs for nucleotide sequence - it should pick up - * mapped sequences - */ - AlignmentI al = new Alignment(new SequenceI[] { dna1 }); - AlignmentI xrefs = CrossRefs.findXrefSequences( - new SequenceI[] { dna1 }, - true, "UNIPROT", al); - assertEquals(2, xrefs.getHeight()); - - /* - * cross-refs alignment holds copies of the mapped sequences - * including copies of their dbrefs and features - */ - checkCopySequence(pep1, xrefs.getSequenceAt(0)); - checkCopySequence(pep2, xrefs.getSequenceAt(1)); - } - - /** - * Test for finding 'product' sequences for the case where only an indirect - * xref is found - not on the peptide sequence but on a nucleotide sequence in - * the alignment which which it shares a protein dbref - */ - @Test(groups = { "Functional" }) - public void testFindXrefSequences_indirectDbrefToNucleotide() - { - /* - * Alignment setup: - * - peptide dbref UNIPROT|Q9ZTS2 - * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2 - */ - SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); - uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); - emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - - /* - * Find EMBL xrefs for peptide - * - it has no EMBL dbref of its own - * - but nucleotide with matching peptide dbref does, so is returned - */ - AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); - AlignmentI xrefs = CrossRefs.findXrefSequences( - new SequenceI[] { uniprotSeq }, false, "EMBL", al); - assertEquals(1, xrefs.getHeight()); - assertSame(emblSeq, xrefs.getSequenceAt(0)); - } - - /** - * Test for finding 'product' sequences for the case where only an indirect - * xref is found - not on the nucleotide sequence but on a peptide sequence in - * the alignment which which it shares a nucleotide dbref - */ - @Test(groups = { "Functional" }) - public void testFindXrefSequences_indirectDbrefToProtein() - { - /* - * Alignment setup: - * - nucleotide dbref EMBL|AF039662 - * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2 - */ - SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); - SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); - uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); - uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - - /* - * Find UNIPROT xrefs for nucleotide - * - it has no UNIPROT dbref of its own - * - but peptide with matching nucleotide dbref does, so is returned - */ - AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); - AlignmentI xrefs = CrossRefs.findXrefSequences( - new SequenceI[] { emblSeq }, true, "UNIPROT", al); - assertEquals(1, xrefs.getHeight()); - assertSame(uniprotSeq, xrefs.getSequenceAt(0)); - } - - /** - * Test for finding 'product' sequences for the case where the selected - * sequence has no dbref to the desired source, and there are no indirect - * references via another sequence in the alignment - */ - @Test(groups = { "Functional" }) - public void testFindXrefSequences_noDbrefs() - { - /* - * two nucleotide sequences, one with UNIPROT dbref - */ - SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT"); - - /* - * find UNIPROT xrefs for peptide sequence - it has no direct - * dbrefs, and the other sequence (which has a UNIPROT dbref) is not - * equatable to it, so no results found - */ - AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 }); - AlignmentI xrefs = CrossRefs.findXrefSequences( - new SequenceI[] { dna2 }, - true, "UNIPROT", al); - assertNull(xrefs); - } - - /** - * Test for finding 'product' sequences for the case where the selected - * sequence has a dbref with no mapping, triggering a fetch from database - */ - @Test(groups = { "Functional" }) - public void testFindXrefSequences_withFetch() - { - SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419")); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314")); - final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW"); - final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG"); - - SequenceFetcher mockFetcher = new SequenceFetcher() - { - - @Override - public boolean isFetchable(String source) - { - return true; - } - - @Override - public SequenceI[] getSequences(List refs, boolean dna) - { - return new SequenceI[] { pep1, pep2 }; - } - }; - SequenceFetcherFactory.setSequenceFetcher(mockFetcher); - - /* - * find UNIPROT xrefs for nucleotide sequence - */ - AlignmentI al = new Alignment(new SequenceI[] { dna1 }); - AlignmentI xrefs = CrossRefs.findXrefSequences( - new SequenceI[] { dna1 }, - true, "UNIPROT", al); - assertEquals(2, xrefs.getHeight()); - assertSame(pep1, xrefs.getSequenceAt(0)); - assertSame(pep2, xrefs.getSequenceAt(1)); - } - - /** - * Helper method to assert seq1 looks like a copy of seq2 - * - * @param seq1 - * @param seq2 - */ - private void checkCopySequence(SequenceI seq1, SequenceI seq2) - { - assertNotSame(seq1, seq2); - assertEquals(seq1.getName(), seq2.getName()); - assertEquals(seq1.getStart(), seq2.getStart()); - assertEquals(seq1.getEnd(), seq2.getEnd()); - assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString()); - - /* - * compare dbrefs - */ - assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs()); - // check one to verify a copy, not the same object - if (seq1.getDBRefs().length > 0) - { - assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]); - } - - /* - * compare features - */ - assertArrayEquals(seq1.getSequenceFeatures(), - seq2.getSequenceFeatures()); - if (seq1.getSequenceFeatures().length > 0) - { - assertNotSame(seq1.getSequenceFeatures()[0], - seq2.getSequenceFeatures()[0]); - } - } - - /** - * Test for finding 'product' sequences for the case where the selected - * sequence has two dbrefs with no mapping, triggering a fetch from database. - * - * @see http://issues.jalview.org/browse/JAL-2029 - */ - @Test(groups = { "Functional" }) - public void testFindXrefSequences_withFetchMultipleRefs() - { - /* - * EMBL|X07547 has a - */ - SequenceI dna1 = new Sequence("X07547", "GGGGCAGCACAAGAAC"); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "B0BCM4")); - dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P0CE20")); - final SequenceI pep1 = new Sequence("B0BCM4", "MGKGIL"); - final SequenceI pep2 = new Sequence("P0CE20", "MGKGIL"); - - SequenceFetcher mockFetcher = new SequenceFetcher() - { - int call = 0; - - @Override - public boolean isFetchable(String source) - { - return true; - } - @Override - public SequenceI[] getSequences(List refs, boolean dna) - { - // pending Mockito with its thenReturn(pep1).thenReturn(pep2) syntax! - return new SequenceI[] { call++ == 0 ? pep1 : pep2 }; - } - }; - SequenceFetcherFactory.setSequenceFetcher(mockFetcher); - - /* - * find UNIPROT xrefs for nucleotide sequence - */ - AlignmentI al = new Alignment(new SequenceI[] { dna1 }); - AlignmentI xrefs = CrossRefs.findXrefSequences( - new SequenceI[] { dna1 }, - true, "UNIPROT", al); - assertEquals(2, xrefs.getHeight()); - assertSame(pep1, xrefs.getSequenceAt(0)); - assertSame(pep2, xrefs.getSequenceAt(1)); - } - -} -- 1.7.10.2